sanscript 0.8.1 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c50d574983e399ea044b60c2d8ece2a624d561ca
4
- data.tar.gz: 7b8b068883034401079c3671f6ca60b9f777d5df
2
+ SHA256:
3
+ metadata.gz: ef6e9b7e12549b07177b807613493b8bbf5df8dab1d762d6f2cbb5db65c3a710
4
+ data.tar.gz: 9e02c19c6189ca48c95a74b6448cb5f0a2be25fa5fabea8a39082a52d4a8bb41
5
5
  SHA512:
6
- metadata.gz: 39a77f3cc096bb673ad499539f7e8bae0e6300c5401c091391b3dd1e3305f8d7ec278d5dd16b19cf8a9ac1e4144bd3f606ff07e5d1028f198b499585bab893e9
7
- data.tar.gz: 8b9a1fbe21223ca924c62e83c487cd3ea2be68735cee2207b66970644f6a3a8c307173b8287c916286bcddc1e989867abd775b4a43e4a31b14c36bf2882b29d8
6
+ metadata.gz: 11f53ad681749ca4aed397e3401f40c9f97ef1e3f654d3f23c9dde407636739817d6af962f9ed09d18a4375021822e5fbbb99b38790b011986e362e2c4758a8f
7
+ data.tar.gz: 1296c18342ff547c831549e3b17412541a7afe18c471e6f0e1867b1f0bb0c24bc3c00e52a5e0d69241936f3ebb576504dcc5cfea4f8fcfc1f1a4484576578fc2
data/.gitignore CHANGED
@@ -8,6 +8,7 @@
8
8
  /spec/reports/
9
9
  /tmp/
10
10
  /lib/librusty_sanscript.*
11
+ /lib/rusty_sanscript.*
11
12
  /target
12
13
  mkmf.log
13
14
  Cargo.lock
@@ -1,6 +1,5 @@
1
- require: rubocop-rspec
2
1
  AllCops:
3
- TargetRubyVersion: 2.3
2
+ TargetRubyVersion: 2.2
4
3
 
5
4
  # Metrics
6
5
  Metrics/AbcSize:
@@ -17,6 +16,10 @@ Metrics/BlockNesting:
17
16
  Enabled: true
18
17
  Max: 4
19
18
 
19
+ Metrics/BlockLength:
20
+ Exclude:
21
+ - spec/**/*.rb
22
+
20
23
  Metrics/ClassLength:
21
24
  Description: 'Avoid classes longer than 250 lines of code.'
22
25
  Enabled: true
@@ -96,3 +99,12 @@ Style/TrailingCommaInLiteral:
96
99
  Style/TrivialAccessors:
97
100
  ExactNameMatch: true
98
101
  AllowPredicates: true
102
+
103
+ # RSpec
104
+ require: rubocop-rspec
105
+
106
+ RSpec/ContextWording:
107
+ Enabled: false
108
+
109
+ RSpec/NestedGroups:
110
+ Max: 3
@@ -1,9 +1,9 @@
1
1
  sudo: false
2
2
  language: ruby
3
3
  rvm:
4
- - 2.4.0
5
- - 2.3.3
6
- - 2.2.6
4
+ - 2.4.2
5
+ - 2.3.5
6
+ - 2.2.8
7
7
  - ruby-head
8
8
  env:
9
9
  - RUST=yes
data/Cargo.toml CHANGED
@@ -1,13 +1,13 @@
1
1
  [package]
2
2
  name = "rusty_sanscript"
3
- version = "0.4.0"
3
+ version = "0.4.1"
4
4
  authors = ["Tim Bellefleur <nomoon@phoebus.ca>"]
5
5
  publish = false
6
6
 
7
7
  [dependencies]
8
- lazy_static = "^0.2.2"
9
- ruby-sys = "^0.2.17"
10
- regex = "^0.1.80"
8
+ lazy_static = "^0.2.10"
9
+ ruby-sys = "^0.3.0"
10
+ regex = "^0.2.2"
11
11
 
12
12
  [lib]
13
13
  crate-type = ["cdylib"]
data/Gemfile CHANGED
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  source "https://rubygems.org"
3
4
 
4
5
  # Specify your gem's dependencies in sanscript.gemspec
data/Rakefile CHANGED
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "bundler/gem_tasks"
3
4
 
4
5
  require "thermite/tasks"
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "ragabash"
3
4
 
4
5
  require "sanscript/version"
@@ -5,7 +5,7 @@ module Sanscript
5
5
  require "benchmark/ips"
6
6
  rescue LoadError
7
7
  #:nocov:
8
- module ::Benchmark
8
+ module ::Benchmark # rubocop:disable Style/ClassAndModuleChildren
9
9
  def self.ips(*)
10
10
  raise NotImplementedError, "You must install the `benchmark-ips` gem first."
11
11
  end
@@ -15,32 +15,32 @@ module Sanscript
15
15
 
16
16
  # Benchmark/testing module.
17
17
  module Benchmark
18
+ TEST_STRINGS = {
19
+ brahmic: {
20
+ devanagari: "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः",
21
+ malayalam: "നാനാശാസ്ത്രസുഭാഷിതാമൃതരസൈഃ ശ്രോത്രോത്സവം കുര്വതാം യേഷാം യാന്തി ദിനാനി പണ്ഡിതജനവ്യായാമഖിന്നാത്മനാമ് തേഷാം ജന്മ ച ജീവിതം ച സുകൃതം തൈര് ഏവ ഭൂര് ഭൂഷിതാ ശേഷൈഹ് കിം പശുവദ് വിവേകരഹിതൈര് ഭൂഭാരഭൂതൈര് നരഃ",
22
+ }.freeze,
23
+ roman: {
24
+ iast: "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ",
25
+ slp1: "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH",
26
+ hk: "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH",
27
+ }.freeze,
28
+ }.freeze
29
+
30
+ TEST_STRINGS_FLAT = TEST_STRINGS.reduce({}) { |a, (_, v)| a.merge(v) }.freeze
31
+
32
+ private_constant :TEST_STRINGS, :TEST_STRINGS_FLAT
33
+
18
34
  module_function
19
35
 
20
36
  # Runs benchmark-ips test on detection methods.
21
37
  def detect!(time = 2, warmup = 1)
22
- deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
23
- malayalam_string = "നാനാശാസ്ത്രസുഭാഷിതാമൃതരസൈഃ ശ്രോത്രോത്സവം കുര്വതാം യേഷാം യാന്തി ദിനാനി പണ്ഡിതജനവ്യായാമഖിന്നാത്മനാമ് തേഷാം ജന്മ ച ജീവിതം ച സുകൃതം തൈര് ഏവ ഭൂര് ഭൂഷിതാ ശേഷൈഹ് കിം പശുവദ് വിവേകരഹിതൈര് ഭൂഭാരഭൂതൈര് നരഃ"
24
- iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
25
- slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
26
- hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
27
-
28
38
  ::Benchmark.ips do |x|
29
39
  x.config(time: time, warmup: warmup)
30
- x.report("Detect Devanagari") do
31
- Sanscript::Detect.detect_scheme(deva_string)
32
- end
33
- x.report("Detect Malayalam") do
34
- Sanscript::Detect.detect_scheme(malayalam_string)
35
- end
36
- x.report("Detect IAST") do
37
- Sanscript::Detect.detect_scheme(iast_string)
38
- end
39
- x.report("Detect SLP1") do
40
- Sanscript::Detect.detect_scheme(slp1_string)
41
- end
42
- x.report("Detect HK") do
43
- Sanscript::Detect.detect_scheme(hk_string)
40
+ TEST_STRINGS_FLAT.each do |scheme, string|
41
+ x.report("Detect #{scheme}") do
42
+ Sanscript::Detect.detect_scheme(string)
43
+ end
44
44
  end
45
45
  x.compare!
46
46
  end
@@ -49,39 +49,13 @@ module Sanscript
49
49
 
50
50
  # Runs benchmark-ips test on roman-source transliteration methods.
51
51
  def transliterate_roman!(time = 2, warmup = 1)
52
- iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
53
- slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
54
- hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
55
-
56
52
  ::Benchmark.ips do |x|
57
53
  x.config(time: time, warmup: warmup)
58
-
59
- x.report("IAST==>Devanagari") do
60
- Sanscript.transliterate(iast_string, :iast, :devanagari)
61
- end
62
- x.report("IAST==>SLP1") do
63
- Sanscript.transliterate(iast_string, :iast, :slp1)
64
- end
65
- x.report("IAST==>SLP1") do
66
- Sanscript.transliterate(iast_string, :iast, :hk)
67
- end
68
- x.report("SLP1==>Devanagari") do
69
- Sanscript.transliterate(slp1_string, :slp1, :devanagari)
70
- end
71
- x.report("SLP1==>IAST") do
72
- Sanscript.transliterate(slp1_string, :slp1, :iast)
73
- end
74
- x.report("SLP1==>HK") do
75
- Sanscript.transliterate(slp1_string, :slp1, :hk)
76
- end
77
- x.report("HK==>Devanagari") do
78
- Sanscript.transliterate(hk_string, :hk, :devanagari)
79
- end
80
- x.report("HK==>IAST") do
81
- Sanscript.transliterate(hk_string, :hk, :iast)
82
- end
83
- x.report("HK==>SLP1") do
84
- Sanscript.transliterate(hk_string, :hk, :slp1)
54
+ TEST_STRINGS[:roman].to_a.product(TEST_STRINGS_FLAT.keys).each do |(ak, av), bk|
55
+ next if ak == bk
56
+ x.report("#{ak} => #{bk}") do
57
+ Sanscript.transliterate(av, ak, bk)
58
+ end
85
59
  end
86
60
  x.compare!
87
61
  end
@@ -90,18 +64,13 @@ module Sanscript
90
64
 
91
65
  # Runs benchmark-ips test on brahmic-source transliteration methods.
92
66
  def transliterate_brahmic!(time = 2, warmup = 1)
93
- deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
94
-
95
67
  ::Benchmark.ips do |x|
96
68
  x.config(time: time, warmup: warmup)
97
- x.report("Devanagari==>IAST") do
98
- Sanscript.transliterate(deva_string, :devanagari, :iast)
99
- end
100
- x.report("Devanagari==>SLP1") do
101
- Sanscript.transliterate(deva_string, :devanagari, :slp1)
102
- end
103
- x.report("Devanagari==>HK") do
104
- Sanscript.transliterate(deva_string, :devanagari, :hk)
69
+ TEST_STRINGS[:brahmic].to_a.product(TEST_STRINGS_FLAT.keys).each do |(ak, av), bk|
70
+ next if ak == bk
71
+ x.report("#{ak} => #{bk}") do
72
+ Sanscript.transliterate(av, ak, bk)
73
+ end
105
74
  end
106
75
  x.compare!
107
76
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "sanscript/detect/constants"
3
4
 
4
5
  module Sanscript
@@ -6,11 +7,13 @@ module Sanscript
6
7
  # Developed from code available @ https://github.com/sanskrit/detect.js
7
8
  module Detect
8
9
  if Regexp.method_defined?(:match?)
10
+ # :nocov:
9
11
  require "sanscript/detect/ruby24"
10
12
  extend Ruby24
11
13
  else
12
14
  require "sanscript/detect/ruby2x"
13
15
  extend Ruby2x
16
+ # :nocov:
14
17
  end
15
18
 
16
19
  # @!method detect_scheme(text)
@@ -8,24 +8,14 @@ module Sanscript
8
8
  # between Devanagari and Malayalam.
9
9
  RE_BRAHMIC_RANGE = /[\u0900-\u0d7f]/
10
10
 
11
- # Match each individual Brahmic script.
12
- RE_BRAHMIC_SCRIPTS = {
13
- devanagari: /\p{Devanagari}/,
14
- bengali: /\p{Bengali}/,
15
- gurmukhi: /\p{Gurmukhi}/,
16
- gujarati: /\p{Gujarati}/,
17
- oriya: /\p{Oriya}/,
18
- tamil: /\p{Tamil}/,
19
- telugu: /\p{Telugu}/,
20
- kannada: /\p{Kannada}/,
21
- malayalam: /\p{Malayalam}/,
22
- }.freeze
11
+ # The order of individual brahmic scripts in 128 character unicode blocks.
12
+ BRAHMIC_SCRIPTS_ORDER = %i[devanagari bengali gurmukhi gujarati oriya tamil telugu kannada malayalam].freeze
23
13
 
24
14
  # Match on special Roman characters
25
- RE_IAST_OR_KOLKATA_ONLY = /[āīūṛṝḷḹēōṃḥṅñṭḍṇśṣḻ]/i
15
+ RE_IAST_OR_KOLKATA_ONLY = /[āīūṛṝḷḹēōṃḥṅñṭḍṇśṣḻĀĪŪṚṜḶḸĒŌṂḤṄÑṬḌṆŚṢḺ]|[aiueoAIUEO]\u0304|[rlRL]\u0323\u0304?|[mhtdMHTD]\u0323|[nN][\u0307\u0303\u0323]|[sS][\u0301\u0323]|[lL]\u0331/
26
16
 
27
17
  # Match on Kolkata-specific Roman characters
28
- RE_KOLKATA_ONLY = /[ēō]/i
18
+ RE_KOLKATA_ONLY = /[ēōĒŌ]|[eoEO]\u0304/
29
19
 
30
20
  # Match on ITRANS-only
31
21
  RE_ITRANS_ONLY = /ee|oo|\^[iI]|RR[iI]|L[iI]|~N|N\^|Ch|chh|JN|sh|Sh|\.a/
@@ -45,7 +35,7 @@ module Sanscript
45
35
  # Match ##...## or {#...#} control blocks.
46
36
  RE_CONTROL_BLOCK = /(?<!\\)##.*?(?<!\\)##|(?<!\\)\{#.*?(?<!\\)#\}/
47
37
 
48
- private_constant :RE_BRAHMIC_RANGE, :RE_BRAHMIC_SCRIPTS, :RE_IAST_OR_KOLKATA_ONLY,
38
+ private_constant :RE_BRAHMIC_RANGE, :BRAHMIC_SCRIPTS_ORDER, :RE_IAST_OR_KOLKATA_ONLY,
49
39
  :RE_KOLKATA_ONLY, :RE_ITRANS_ONLY, :RE_SLP1_ONLY, :RE_VELTHUIS_ONLY,
50
40
  :RE_ITRANS_OR_VELTHUIS_ONLY, :RE_HARVARD_KYOTO, :RE_CONTROL_BLOCK
51
41
  end
@@ -14,11 +14,8 @@ module Sanscript
14
14
  text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
15
15
 
16
16
  # Brahmic schemes are all within a specific range of code points.
17
- if RE_BRAHMIC_RANGE.match?(text)
18
- RE_BRAHMIC_SCRIPTS.each do |script, regex|
19
- return script if regex.match?(text)
20
- end
21
- end
17
+ brahmic_char = text[RE_BRAHMIC_RANGE]
18
+ return BRAHMIC_SCRIPTS_ORDER[(brahmic_char.ord - 0x0900) / 0x80] if brahmic_char
22
19
 
23
20
  # Romanizations
24
21
  if RE_IAST_OR_KOLKATA_ONLY.match?(text)
@@ -15,11 +15,8 @@ module Sanscript
15
15
  # rubocop:disable Style/CaseEquality
16
16
 
17
17
  # Brahmic schemes are all within a specific range of code points.
18
- if RE_BRAHMIC_RANGE === text
19
- RE_BRAHMIC_SCRIPTS.each do |script, regex|
20
- return script if regex === text
21
- end
22
- end
18
+ brahmic_char = text[RE_BRAHMIC_RANGE]
19
+ return BRAHMIC_SCRIPTS_ORDER[(brahmic_char.ord - 0x0900) / 0x80] if brahmic_char
23
20
 
24
21
  # Romanizations
25
22
  if RE_IAST_OR_KOLKATA_ONLY === text
@@ -1,10 +1,12 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Sanscript
3
4
  module_function
4
5
 
5
6
  # Attempts to load Rust native extension.
6
7
  # @return [bool] whether the extension loaded.
7
8
  def rust_load!
9
+ # :nocov:
8
10
  return RUST_AVAILABLE if defined?(RUST_AVAILABLE)
9
11
  require "thermite/fiddle"
10
12
  Thermite::Fiddle.load_module("init_rusty_sanscript",
@@ -13,6 +15,7 @@ module Sanscript
13
15
  defined?(Sanscript::Rust) ? true : false
14
16
  rescue Fiddle::DLError
15
17
  false
18
+ # :nocov:
16
19
  end
17
20
 
18
21
  # @return [bool] the enabled status of the Rust extension
@@ -128,7 +128,7 @@ module Sanscript
128
128
  add_roman_scheme(:itrans_dravidian, itrans_dravidian)
129
129
 
130
130
  # ensure deep freeze on alternates
131
- @all_alternates.each { |_, scheme| scheme.deep_freeze }
131
+ @all_alternates.each_value { |alternates| alternates.deep_freeze } # rubocop:disable Style/SymbolProc
132
132
  end
133
133
 
134
134
  # Transliterate from one script to another.
@@ -195,14 +195,14 @@ module Sanscript
195
195
  token_lengths.push(f.length)
196
196
  token_lengths.concat(alts.map(&:length))
197
197
 
198
- if group == :vowel_marks || group == :virama
198
+ if group == :vowel_marks || group == :virama # rubocop:disable MultipleComparison
199
199
  marks[f] = t
200
200
  alts.each { |alt| marks[alt] = t }
201
201
  else
202
202
  letters[f] = t
203
203
  alts.each { |alt| letters[alt] = t }
204
204
 
205
- if group == :consonants || group == :other
205
+ if group == :consonants || group == :other # rubocop:disable MultipleComparison
206
206
  consonants[f] = t
207
207
  alts.each { |alt| consonants[alt] = t }
208
208
  end
@@ -227,7 +227,7 @@ module Sanscript
227
227
  # @param data [String] the string to transliterate
228
228
  # @param map [Hash] map data generated from {#make_map}
229
229
  # @return [String] the transliterated string
230
- def transliterate_roman(data, map, options = {})
230
+ def transliterate_roman(data, map, options = {}) # rubocop:disable MethodLength, CyclomaticComplexity
231
231
  data = data.to_str.chars
232
232
  buf = []
233
233
  token_buffer = []
@@ -239,7 +239,7 @@ module Sanscript
239
239
  until data.empty? && token_buffer.empty?
240
240
  # Match all token substrings to our map.
241
241
  token = data[0, max_token_length].join("")
242
- max_token_length.downto(1) do |j|
242
+ max_token_length.downto(1) do |j| # rubocop:disable BlockLength
243
243
  token = token[0, j] unless j == max_token_length
244
244
  if j == 2
245
245
  if !control_char && token == "##"
@@ -302,7 +302,7 @@ module Sanscript
302
302
  # @param data [String] the string to transliterate
303
303
  # @param map [Hash] map data generated from {#make_map}
304
304
  # @return [String] the transliterated string
305
- def transliterate_brahmic(data, map)
305
+ def transliterate_brahmic(data, map) # rubocop:disable MethodLength, CyclomaticComplexity
306
306
  data = data.to_str.chars
307
307
  buf = []
308
308
  had_roman_consonant = false
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Sanscript
3
4
  # The version number
4
- VERSION = "0.8.1"
5
+ VERSION = "0.9.1".freeze
5
6
 
6
7
  GEM_ROOT = Pathname.new(File.realpath(File.join(__dir__, "..", "..")))
7
8
  private_constant :GEM_ROOT
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  # coding: utf-8
3
+
3
4
  lib = File.expand_path("../lib", __FILE__)
4
5
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
6
  require "sanscript/version"
@@ -22,16 +23,16 @@ Gem::Specification.new do |spec|
22
23
 
23
24
  spec.required_ruby_version = "~> 2.2"
24
25
 
26
+ spec.add_development_dependency "benchmark-ips", "~> 2.7"
25
27
  spec.add_development_dependency "bundler", "~> 1.13"
26
- spec.add_development_dependency "rake", "~> 12"
27
- spec.add_development_dependency "rspec", "~> 3.5"
28
28
  spec.add_development_dependency "coveralls", "~> 0.8"
29
- spec.add_development_dependency "rubocop", "~> 0.43"
30
- spec.add_development_dependency "rubocop-rspec", "~> 1.7"
31
29
  spec.add_development_dependency "pry", "~> 0.10"
32
- spec.add_development_dependency "benchmark-ips", "~> 2.7"
30
+ spec.add_development_dependency "rake", "~> 12"
31
+ spec.add_development_dependency "rspec", "~> 3.5"
32
+ spec.add_development_dependency "rubocop", "~> 0.51"
33
+ spec.add_development_dependency "rubocop-rspec", "~> 1.20"
33
34
  spec.add_development_dependency "yard", "~> 0.9"
34
35
 
35
- spec.add_runtime_dependency "ragabash", "~> 0.2"
36
- spec.add_runtime_dependency "thermite", "~> 0.7"
36
+ spec.add_runtime_dependency "ragabash", "~> 0.3"
37
+ spec.add_runtime_dependency "thermite", "~> 0.13"
37
38
  end
@@ -14,13 +14,13 @@ lazy_static! {
14
14
 
15
15
  // Match any character in the block of Brahmic scripts
16
16
  // between Devanagari and Malayalam.
17
- static ref RE_BRAHMIC_RANGE: Regex = Regex::new(r"[\x{0900}-\x{0d7f}]").unwrap();
17
+ static ref RE_BRAHMIC_RANGE: Regex = Regex::new(r"[\x{0900}-\x{0D7F}]").unwrap();
18
18
 
19
19
  // Match on special Roman characters
20
- static ref RE_IAST_OR_KOLKATA_ONLY: Regex = Regex::new(r"(?i)[āīūṛṝḷḹēōṃḥṅñṭḍṇśṣḻ]").unwrap();
20
+ static ref RE_IAST_OR_KOLKATA_ONLY: Regex = Regex::new(r"[āīūṛṝḷḹēōṃḥṅñṭḍṇśṣḻĀĪŪṚṜḶḸĒŌṂḤṄÑṬḌṆŚṢḺ]|[aiueoAIUEO]\x{0304}|[rlRL]\x{0323}\x{0304}?|[mhtdMHTD]\x{0323}|[nN][\x{0307}\x{0303}\x{0323}]|[sS][\x{0301}\x{0323}]|[lL]\x{0331}").unwrap();
21
21
 
22
22
  // Match on Kolkata-specific Roman characters
23
- static ref RE_KOLKATA_ONLY: Regex = Regex::new(r"(?i)[ēō]").unwrap();
23
+ static ref RE_KOLKATA_ONLY: Regex = Regex::new(r"[ēōĒŌ]|[eoEO]\x{0304}").unwrap();
24
24
 
25
25
  // Match on ITRANS-only
26
26
  static ref RE_ITRANS_ONLY: Regex = Regex::new(r"ee|oo|\^[iI]|RR[iI]|L[iI]|~N|N\^|Ch|chh|JN|sh|Sh|\.a").unwrap();
@@ -44,53 +44,47 @@ lazy_static! {
44
44
  #[no_mangle]
45
45
  pub extern fn detect_scheme(s: &str) -> usize {
46
46
  // Clean-up string of control characters.
47
- let r_str = &RE_CONTROL_BLOCK.replace_all(
48
- &RE_ESCAPED_CONTROL_CHAR.replace_all(s, ""), "");
47
+ let r_str = &RE_ESCAPED_CONTROL_CHAR.replace_all(s, "");
48
+ let r_str = &RE_CONTROL_BLOCK.replace_all(r_str, "");
49
49
 
50
50
  // Brahmic schemes are all within a specific range of code points.
51
51
  let brahmic_match = RE_BRAHMIC_RANGE.find(r_str);
52
52
  if brahmic_match != None {
53
53
  let brahmic_match = brahmic_match.unwrap();
54
- let brahmic_codepoint = r_str.chars().nth(brahmic_match.0).unwrap() as usize;
54
+ let brahmic_codepoint = r_str.chars().nth(brahmic_match.start()).unwrap() as usize;
55
55
 
56
- if brahmic_codepoint < 0x0980 {
57
- return 1; // Devanagari
58
- } else if brahmic_codepoint < 0x0A00 {
59
- return 2; // Bengali
60
- } else if brahmic_codepoint < 0x0A80 {
61
- return 3; // Gurmukhi
62
- } else if brahmic_codepoint < 0x0B00 {
63
- return 4; // Gujarati
64
- } else if brahmic_codepoint < 0x0B80 {
65
- return 5; // Oriya
66
- } else if brahmic_codepoint < 0x0C00 {
67
- return 6; // Tamil
68
- } else if brahmic_codepoint < 0x0C80 {
69
- return 7; // Telugu
70
- } else if brahmic_codepoint < 0x0D00 {
71
- return 8; // Kannada
72
- } else {
73
- return 9; // Malayalam
56
+ return match brahmic_codepoint {
57
+ 0x0900...0x097F => 1, // Devanagari
58
+ 0x0980...0x09FF => 2, // Bengali
59
+ 0x0A00...0x0A7F => 3, // Gurmukhi
60
+ 0x0A80...0x0AFF => 4, // Gujarati
61
+ 0x0B00...0x0B7F => 5, // Oriya
62
+ 0x0B80...0x0BFF => 6, // Tamil
63
+ 0x0C00...0x0C7F => 7, // Telugu
64
+ 0x0C80...0x0CFF => 8, // Kannada
65
+ 0x0D00...0x0D7F => 9, // Malayalam
66
+ _ => 0
74
67
  }
75
68
  }
76
69
 
77
70
  // Romanizations
78
- if RE_IAST_OR_KOLKATA_ONLY.is_match(r_str) {
71
+ return if RE_IAST_OR_KOLKATA_ONLY.is_match(r_str) {
79
72
  if RE_KOLKATA_ONLY.is_match(r_str) {
80
- return 11; // Kolkata
73
+ 11 // Kolkata
81
74
  } else {
82
- return 10; // IAST
75
+ 10 // IAST
83
76
  }
84
77
  } else if RE_ITRANS_ONLY.is_match(r_str) {
85
- return 12; // ITRANS
78
+ 12 // ITRANS
86
79
  } else if RE_SLP1_ONLY.is_match(r_str) {
87
- return 13; // SLP1
80
+ 13 // SLP1
88
81
  } else if RE_VELTHUIS_ONLY.is_match(r_str) {
89
- return 14; // Velthuis
82
+ 14 // Velthuis
90
83
  } else if RE_ITRANS_OR_VELTHUIS_ONLY.is_match(r_str) {
91
- return 12; // ITRANS
84
+ 12 // ITRANS
92
85
  } else if RE_HARVARD_KYOTO.is_match(r_str) {
93
- return 15; // HK
86
+ 15 // HK
87
+ } else {
88
+ 0 // Unknown
94
89
  }
95
- return 0; // Unknown
96
90
  }
data/src/lib.rs CHANGED
@@ -4,3 +4,33 @@ extern crate regex;
4
4
 
5
5
  #[macro_use] mod rb;
6
6
  pub mod detect;
7
+
8
+ #[cfg(test)]
9
+ mod tests {
10
+ use detect::detect_scheme;
11
+
12
+ #[test]
13
+ fn detect_devanagari() {
14
+ assert_eq!(detect_scheme("नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"), 1);
15
+ }
16
+
17
+ #[test]
18
+ fn detect_malayalam() {
19
+ assert_eq!(detect_scheme("നാനാശാസ്ത്രസുഭാഷിതാമൃതരസൈഃ ശ്രോത്രോത്സവം കുര്വതാം യേഷാം യാന്തി ദിനാനി പണ്ഡിതജനവ്യായാമഖിന്നാത്മനാമ് തേഷാം ജന്മ ച ജീവിതം ച സുകൃതം തൈര് ഏവ ഭൂര് ഭൂഷിതാ ശേഷൈഹ് കിം പശുവദ് വിവേകരഹിതൈര് ഭൂഭാരഭൂതൈര് നരഃ"), 9);
20
+ }
21
+
22
+ #[test]
23
+ fn detect_iast() {
24
+ assert_eq!(detect_scheme("nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"), 10);
25
+ }
26
+
27
+ #[test]
28
+ fn detect_slp1() {
29
+ assert_eq!(detect_scheme("nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"), 13);
30
+ }
31
+
32
+ #[test]
33
+ fn detect_hk() {
34
+ assert_eq!(detect_scheme("nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"), 15);
35
+ }
36
+ }
metadata CHANGED
@@ -1,127 +1,127 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Bellefleur
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-10 00:00:00.000000000 Z
11
+ date: 2017-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
14
+ name: benchmark-ips
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.13'
19
+ version: '2.7'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.13'
26
+ version: '2.7'
27
27
  - !ruby/object:Gem::Dependency
28
- name: rake
28
+ name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '12'
33
+ version: '1.13'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '12'
40
+ version: '1.13'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rspec
42
+ name: coveralls
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '3.5'
47
+ version: '0.8'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '3.5'
54
+ version: '0.8'
55
55
  - !ruby/object:Gem::Dependency
56
- name: coveralls
56
+ name: pry
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.8'
61
+ version: '0.10'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0.8'
68
+ version: '0.10'
69
69
  - !ruby/object:Gem::Dependency
70
- name: rubocop
70
+ name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0.43'
75
+ version: '12'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0.43'
82
+ version: '12'
83
83
  - !ruby/object:Gem::Dependency
84
- name: rubocop-rspec
84
+ name: rspec
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '1.7'
89
+ version: '3.5'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '1.7'
96
+ version: '3.5'
97
97
  - !ruby/object:Gem::Dependency
98
- name: pry
98
+ name: rubocop
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '0.10'
103
+ version: '0.51'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '0.10'
110
+ version: '0.51'
111
111
  - !ruby/object:Gem::Dependency
112
- name: benchmark-ips
112
+ name: rubocop-rspec
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '2.7'
117
+ version: '1.20'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '2.7'
124
+ version: '1.20'
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: yard
127
127
  requirement: !ruby/object:Gem::Requirement
@@ -142,28 +142,28 @@ dependencies:
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: '0.2'
145
+ version: '0.3'
146
146
  type: :runtime
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: '0.2'
152
+ version: '0.3'
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: thermite
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
157
  - - "~>"
158
158
  - !ruby/object:Gem::Version
159
- version: '0.7'
159
+ version: '0.13'
160
160
  type: :runtime
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
- version: '0.7'
166
+ version: '0.13'
167
167
  description:
168
168
  email:
169
169
  - nomoon@phoebus.ca
@@ -222,7 +222,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
222
222
  version: '0'
223
223
  requirements: []
224
224
  rubyforge_project:
225
- rubygems_version: 2.6.8
225
+ rubygems_version: 2.7.2
226
226
  signing_key:
227
227
  specification_version: 4
228
228
  summary: Ruby port and extension of Sanscript.js transliterator by learnsanskrit.org