sanscript 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d94fb9c3290ec64af941b806bb8cd78f0b66b442
4
- data.tar.gz: 3e298a3b363a89081fbf603247f2b49a71144b8a
3
+ metadata.gz: 4e9133bd05cf7deb7c03cec7e4c9d46bac3eabaf
4
+ data.tar.gz: e36b9dfe888ee1070d674d72a766130527ce603b
5
5
  SHA512:
6
- metadata.gz: 8ed2a31fa2f140f4e0085638996cbf31693735d07348fb367505fa104a06a1d22834f18ac3cbc0696079ff0503729b0b64192c006ccce1945ad3de5737d8aef3
7
- data.tar.gz: fe535ef6247b9d91ab23643566a2d9e86d7c144cdccf66f3ce6f8b18ba49830813c8d9c20e27c10fcb536c480cee25d504119bf2ba7f04d2676b49f59d4135d4
6
+ metadata.gz: 651f1f92402d0b8507ffeda3df3d93ca9c16316f37f0903cb4034efda2a1e5d92df9c9f40810e34f43466de84e15c22d771f73404248811846b6249f5eefa4ba
7
+ data.tar.gz: c79cc7021d15d791663faf6d3895bea4e80981c5f6061e780d4d38e057df3f6b1e89ff4eab5fcf8298a889f15d2c9289cf49074a7206fbd2ecb4c75444a32c7b
@@ -1,53 +1,79 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sanscript/refinements"
4
- require "benchmark"
4
+ begin
5
+ require "benchmark/ips"
6
+ rescue LoadError
7
+ module Benchmark
8
+ def self.ips(*)
9
+ raise NotImplementedError, "You must install the `benchmark-ips` gem first."
10
+ end
11
+ end
12
+ end
5
13
 
6
14
  module Sanscript
7
15
  using Refinements
8
16
  module Benchmark
9
17
  module_function
10
18
 
11
- def detection!
12
- n = 100_000
19
+ def detect!
13
20
  iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
14
21
  deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
22
+ slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
23
+ hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
24
+ malayalam_string = "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ"
15
25
 
16
- ::Benchmark.bmbm(18) do |x|
26
+ ::Benchmark.ips do |x|
27
+ x.config(time: 5, warmup: 1)
28
+ x.report("Detect Devanagari") do
29
+ Sanscript::Detect.detect_script(deva_string) == :devanagari
30
+ end
31
+ x.report("Detect Malayalam") do
32
+ Sanscript::Detect.detect_script(malayalam_string) == :malayalam
33
+ end
17
34
  x.report("Detect IAST") do
18
- n.times { raise unless Sanscript.detect(iast_string) == :iast }
35
+ Sanscript::Detect.detect_script(iast_string) == :iast
19
36
  end
20
- x.report("Detect Devanagari") do
21
- n.times { raise unless Sanscript.detect(deva_string) == :devanagari }
37
+ x.report("Detect SLP1") do
38
+ Sanscript::Detect.detect_script(slp1_string) == :slp1
22
39
  end
40
+ x.report("Detect HK") do
41
+ Sanscript::Detect.detect_script(hk_string) == :hk
42
+ end
43
+ x.compare!
23
44
  end
45
+ true
24
46
  end
25
47
 
26
- def transliteration!
27
- n = 5_000
48
+ def transliterate!
28
49
  iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
29
-
30
50
  deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
31
- ::Benchmark.bmbm(18) do |x|
32
- x.report("IAST**>Devanagari") do
33
- n.times { Sanscript.transliterate(iast_string, :devanagari) }
34
- end
51
+ slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
52
+
53
+ ::Benchmark.ips do |x|
54
+ x.config(time: 5, warmup: 2)
55
+
35
56
  x.report("IAST==>Devanagari") do
36
- n.times { Sanscript.transliterate(iast_string, :iast, :devanagari) }
37
- end
38
- x.report("IAST**>SLP1") do
39
- n.times { Sanscript.transliterate(iast_string, :slp1) }
57
+ Sanscript.transliterate(iast_string, :iast, :devanagari) == deva_string
40
58
  end
41
59
  x.report("IAST==>SLP1") do
42
- n.times { Sanscript.transliterate(iast_string, :iast, :slp1) }
60
+ Sanscript.transliterate(iast_string, :iast, :slp1) == slp1_string
61
+ end
62
+ x.report("SLP1==>Devanagari") do
63
+ Sanscript.transliterate(slp1_string, :slp1, :devanagari) == deva_string
64
+ end
65
+ x.report("SLP1==>IAST") do
66
+ Sanscript.transliterate(slp1_string, :slp1, :iast) == iast_string
43
67
  end
44
- x.report("Devanagari**>SLP1") do
45
- n.times { Sanscript.transliterate(deva_string, :slp1) }
68
+ x.report("Devanagari==>SLP1") do
69
+ Sanscript.transliterate(deva_string, :devanagari, :slp1) == slp1_string
46
70
  end
47
- x.report("Devanagari**>IAST") do
48
- n.times { Sanscript.transliterate(deva_string, :iast) }
71
+ x.report("Devanagari==>IAST") do
72
+ Sanscript.transliterate(deva_string, :devanagari, :iast) == iast_string
49
73
  end
74
+ x.compare!
50
75
  end
76
+ true
51
77
  end
52
78
  end
53
79
  end
@@ -1,4 +1,6 @@
1
1
  # frozen_string_literal: true
2
+ # rubocop:disable Style/CaseEquality
3
+
2
4
  #
3
5
  # Developed from code available @ https://github.com/sanskrit/detect.js
4
6
  #
@@ -31,7 +33,7 @@ module Sanscript
31
33
  RE_ITRANS_ONLY = /ee|oo|\^[iI]|RR[iI]|L[iI]|~N|N\^|Ch|chh|JN|sh|Sh|\.a/
32
34
 
33
35
  # Match on SLP1-only characters and bigrams
34
- RE_SLP1_ONLY = /[fFxXEOCYwWqQPB]|kz|Nk|Ng|tT|dD|Sc|Sn|[aAiIuUfFxXeEoO]R|G[yr]|(\\W|^)G'/
36
+ RE_SLP1_ONLY = /[fFxXEOCYwWqQPB]|kz|N[kg]|tT|dD|S[cn]|[aAiIuUeo]R|G[yr]/
35
37
 
36
38
  # Match on Velthuis-only characters
37
39
  RE_VELTHUIS_ONLY = /\.[mhnrlntds]|"n|~s/
@@ -39,7 +41,7 @@ module Sanscript
39
41
  # Match on chars shared by ITRANS and Velthuis
40
42
  RE_ITRANS_OR_VELTHUIS_ONLY = /aa|ii|uu|~n/
41
43
 
42
- # Match on characters unavailable in Harvard-Kyoto
44
+ # Match on characters available in Harvard-Kyoto
43
45
  RE_HARVARD_KYOTO = /[aAiIuUeoRMHkgGcjJTDNtdnpbmyrlvzSsh]/
44
46
 
45
47
  private_constant :RE_BRAHMIC_RANGE, :RE_BRAHMIC_SCRIPTS, :RE_IAST_OR_KOLKATA_ONLY,
@@ -50,24 +52,25 @@ module Sanscript
50
52
 
51
53
  def detect_script(text)
52
54
  # Brahmic schemes are all within a specific range of code points.
53
- if text =~ RE_BRAHMIC_RANGE
55
+ if RE_BRAHMIC_RANGE === text
54
56
  RE_BRAHMIC_SCRIPTS.each do |script, regex|
55
- return script if text =~ regex
57
+ return script if regex === text
56
58
  end
57
59
  end
58
60
 
59
61
  # Romanizations
60
- if text =~ RE_IAST_OR_KOLKATA_ONLY
61
- text =~ RE_KOLKATA_ONLY ? :kolkata : :iast
62
- elsif text =~ RE_ITRANS_ONLY
62
+ if RE_IAST_OR_KOLKATA_ONLY === text
63
+ return :kolkata if RE_KOLKATA_ONLY === text
64
+ :iast
65
+ elsif RE_ITRANS_ONLY === text
63
66
  :itrans
64
- elsif text =~ RE_SLP1_ONLY
67
+ elsif RE_SLP1_ONLY === text
65
68
  :slp1
66
- elsif text =~ RE_VELTHUIS_ONLY
69
+ elsif RE_VELTHUIS_ONLY === text
67
70
  :velthuis
68
- elsif text =~ RE_ITRANS_OR_VELTHUIS_ONLY
71
+ elsif RE_ITRANS_OR_VELTHUIS_ONLY === text
69
72
  :itrans
70
- elsif text =~ RE_HARVARD_KYOTO
73
+ elsif RE_HARVARD_KYOTO === text
71
74
  :hk
72
75
  else
73
76
  :unknown
@@ -14,7 +14,8 @@ module Sanscript
14
14
  using Refinements
15
15
  module Transliterate
16
16
  class << self
17
- attr_reader :defaults, :schemes, :roman_schemes, :all_alternates
17
+ attr_reader :defaults, :schemes, :scheme_names, :brahmic_schemes, :roman_schemes,
18
+ :all_alternates
18
19
  end
19
20
 
20
21
  @defaults = {
@@ -27,12 +28,13 @@ module Sanscript
27
28
  module_function
28
29
 
29
30
  #
30
- # Return a list of available schemes.
31
+ # Check whether the given scheme encodes Brahmic Sanskrit.
31
32
  #
32
- # @return array of scheme identifiers
33
+ # @param name the scheme name
34
+ # @return boolean
33
35
  #
34
- def scheme_names
35
- @schemes.keys.sort!
36
+ def brahmic_scheme?(name)
37
+ @brahmic_schemes.include?(name.to_sym)
36
38
  end
37
39
 
38
40
  #
@@ -64,7 +66,12 @@ module Sanscript
64
66
  # described above.
65
67
  #
66
68
  def add_brahmic_scheme(name, scheme)
67
- @schemes[name.to_sym] = scheme.deep_dup.deep_freeze
69
+ name = name.to_sym
70
+ scheme = scheme.deep_dup
71
+ @schemes[name] = scheme.deep_freeze
72
+ @brahmic_schemes.add(name)
73
+ @scheme_names.add(name)
74
+ scheme
68
75
  end
69
76
 
70
77
  #
@@ -82,6 +89,8 @@ module Sanscript
82
89
  scheme[:vowel_marks] = scheme[:vowels][1..-1] unless scheme.key?(:vowel_marks)
83
90
  @schemes[name] = scheme.deep_freeze
84
91
  @roman_schemes.add(name)
92
+ @scheme_names.add(name)
93
+ scheme
85
94
  end
86
95
 
87
96
  #
@@ -93,15 +102,23 @@ module Sanscript
93
102
 
94
103
  # Set up various schemes
95
104
  begin
105
+ # Re-add existing Brahmic schemes in order to add them to `scheme_names`
106
+ # and to freeze them up.
107
+ brahmic_scheme_names = %i[bengali devanagari gujarati gurmukhi kannada malayalam
108
+ oriya tamil telugu]
109
+ brahmic_scheme_names.each do |name|
110
+ add_brahmic_scheme(name, @schemes[name])
111
+ end
112
+
96
113
  # Set up roman schemes
97
114
  kolkata = @schemes[:kolkata] = @schemes[:iast].deep_dup
98
- scheme_names = %i[iast itrans hk kolkata slp1 velthuis wx]
115
+ roman_scheme_names = %i[iast itrans hk kolkata slp1 velthuis wx]
99
116
  kolkata[:vowels] = %w[a ā i ī u ū ṛ ṝ ḷ ḹ e ē ai o ō au]
100
117
 
101
118
  # These schemes already belong to Sanscript.schemes. But by adding
102
- # them again with `addRomanScheme`, we automatically build up
119
+ # them again with `add_roman_scheme`, we automatically build up
103
120
  # `roman_schemes` and define a `vowel_marks` field for each one.
104
- scheme_names.each do |name|
121
+ roman_scheme_names.each do |name|
105
122
  add_roman_scheme(name, @schemes[name])
106
123
  end
107
124
 
@@ -112,8 +129,7 @@ module Sanscript
112
129
  @all_alternates[:itrans_dravidian] = @all_alternates[:itrans]
113
130
  add_roman_scheme(:itrans_dravidian, itrans_dravidian)
114
131
 
115
- # ensure deep freeze on all existing schemes and alternates
116
- @schemes.each { |_, scheme| scheme.deep_freeze }
132
+ # ensure deep freeze on alternates
117
133
  @all_alternates.each { |_, scheme| scheme.deep_freeze }
118
134
  end
119
135
 
@@ -275,9 +275,15 @@ module Sanscript
275
275
  },
276
276
  }
277
277
 
278
- # Set of names of schemes
278
+ # Set of names of Roman schemes
279
279
  @roman_schemes = Set.new
280
280
 
281
+ # Set of names of Brahmic schemes
282
+ @brahmic_schemes = Set.new
283
+
284
+ # Set of names of all schemes
285
+ @scheme_names = Set.new
286
+
281
287
  # Map of alternate encodings.
282
288
  @all_alternates = {
283
289
  itrans: {
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Sanscript
3
- VERSION = "0.1.0"
3
+ VERSION = "0.2.0"
4
4
  end
data/sanscript.gemspec CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "rspec", "~> 3.5"
25
25
  spec.add_development_dependency "codeclimate-test-reporter", "~> 0.6"
26
26
  spec.add_development_dependency "pry", "~> 0.10"
27
+ spec.add_development_dependency "benchmark-ips", "~> 2.6"
27
28
 
28
29
  spec.add_runtime_dependency "ice_nine", "~> 0.11"
29
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Bellefleur
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-13 00:00:00.000000000 Z
11
+ date: 2016-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.10'
83
+ - !ruby/object:Gem::Dependency
84
+ name: benchmark-ips
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.6'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.6'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: ice_nine
85
99
  requirement: !ruby/object:Gem::Requirement