sanscript 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d94fb9c3290ec64af941b806bb8cd78f0b66b442
4
- data.tar.gz: 3e298a3b363a89081fbf603247f2b49a71144b8a
3
+ metadata.gz: 4e9133bd05cf7deb7c03cec7e4c9d46bac3eabaf
4
+ data.tar.gz: e36b9dfe888ee1070d674d72a766130527ce603b
5
5
  SHA512:
6
- metadata.gz: 8ed2a31fa2f140f4e0085638996cbf31693735d07348fb367505fa104a06a1d22834f18ac3cbc0696079ff0503729b0b64192c006ccce1945ad3de5737d8aef3
7
- data.tar.gz: fe535ef6247b9d91ab23643566a2d9e86d7c144cdccf66f3ce6f8b18ba49830813c8d9c20e27c10fcb536c480cee25d504119bf2ba7f04d2676b49f59d4135d4
6
+ metadata.gz: 651f1f92402d0b8507ffeda3df3d93ca9c16316f37f0903cb4034efda2a1e5d92df9c9f40810e34f43466de84e15c22d771f73404248811846b6249f5eefa4ba
7
+ data.tar.gz: c79cc7021d15d791663faf6d3895bea4e80981c5f6061e780d4d38e057df3f6b1e89ff4eab5fcf8298a889f15d2c9289cf49074a7206fbd2ecb4c75444a32c7b
@@ -1,53 +1,79 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sanscript/refinements"
4
- require "benchmark"
4
+ begin
5
+ require "benchmark/ips"
6
+ rescue LoadError
7
+ module Benchmark
8
+ def self.ips(*)
9
+ raise NotImplementedError, "You must install the `benchmark-ips` gem first."
10
+ end
11
+ end
12
+ end
5
13
 
6
14
  module Sanscript
7
15
  using Refinements
8
16
  module Benchmark
9
17
  module_function
10
18
 
11
- def detection!
12
- n = 100_000
19
+ def detect!
13
20
  iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
14
21
  deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
22
+ slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
23
+ hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
24
+ malayalam_string = "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ"
15
25
 
16
- ::Benchmark.bmbm(18) do |x|
26
+ ::Benchmark.ips do |x|
27
+ x.config(time: 5, warmup: 1)
28
+ x.report("Detect Devanagari") do
29
+ Sanscript::Detect.detect_script(deva_string) == :devanagari
30
+ end
31
+ x.report("Detect Malayalam") do
32
+ Sanscript::Detect.detect_script(malayalam_string) == :malayalam
33
+ end
17
34
  x.report("Detect IAST") do
18
- n.times { raise unless Sanscript.detect(iast_string) == :iast }
35
+ Sanscript::Detect.detect_script(iast_string) == :iast
19
36
  end
20
- x.report("Detect Devanagari") do
21
- n.times { raise unless Sanscript.detect(deva_string) == :devanagari }
37
+ x.report("Detect SLP1") do
38
+ Sanscript::Detect.detect_script(slp1_string) == :slp1
22
39
  end
40
+ x.report("Detect HK") do
41
+ Sanscript::Detect.detect_script(hk_string) == :hk
42
+ end
43
+ x.compare!
23
44
  end
45
+ true
24
46
  end
25
47
 
26
- def transliteration!
27
- n = 5_000
48
+ def transliterate!
28
49
  iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
29
-
30
50
  deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
31
- ::Benchmark.bmbm(18) do |x|
32
- x.report("IAST**>Devanagari") do
33
- n.times { Sanscript.transliterate(iast_string, :devanagari) }
34
- end
51
+ slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
52
+
53
+ ::Benchmark.ips do |x|
54
+ x.config(time: 5, warmup: 2)
55
+
35
56
  x.report("IAST==>Devanagari") do
36
- n.times { Sanscript.transliterate(iast_string, :iast, :devanagari) }
37
- end
38
- x.report("IAST**>SLP1") do
39
- n.times { Sanscript.transliterate(iast_string, :slp1) }
57
+ Sanscript.transliterate(iast_string, :iast, :devanagari) == deva_string
40
58
  end
41
59
  x.report("IAST==>SLP1") do
42
- n.times { Sanscript.transliterate(iast_string, :iast, :slp1) }
60
+ Sanscript.transliterate(iast_string, :iast, :slp1) == slp1_string
61
+ end
62
+ x.report("SLP1==>Devanagari") do
63
+ Sanscript.transliterate(slp1_string, :slp1, :devanagari) == deva_string
64
+ end
65
+ x.report("SLP1==>IAST") do
66
+ Sanscript.transliterate(slp1_string, :slp1, :iast) == iast_string
43
67
  end
44
- x.report("Devanagari**>SLP1") do
45
- n.times { Sanscript.transliterate(deva_string, :slp1) }
68
+ x.report("Devanagari==>SLP1") do
69
+ Sanscript.transliterate(deva_string, :devanagari, :slp1) == slp1_string
46
70
  end
47
- x.report("Devanagari**>IAST") do
48
- n.times { Sanscript.transliterate(deva_string, :iast) }
71
+ x.report("Devanagari==>IAST") do
72
+ Sanscript.transliterate(deva_string, :devanagari, :iast) == iast_string
49
73
  end
74
+ x.compare!
50
75
  end
76
+ true
51
77
  end
52
78
  end
53
79
  end
@@ -1,4 +1,6 @@
1
1
  # frozen_string_literal: true
2
+ # rubocop:disable Style/CaseEquality
3
+
2
4
  #
3
5
  # Developed from code available @ https://github.com/sanskrit/detect.js
4
6
  #
@@ -31,7 +33,7 @@ module Sanscript
31
33
  RE_ITRANS_ONLY = /ee|oo|\^[iI]|RR[iI]|L[iI]|~N|N\^|Ch|chh|JN|sh|Sh|\.a/
32
34
 
33
35
  # Match on SLP1-only characters and bigrams
34
- RE_SLP1_ONLY = /[fFxXEOCYwWqQPB]|kz|Nk|Ng|tT|dD|Sc|Sn|[aAiIuUfFxXeEoO]R|G[yr]|(\\W|^)G'/
36
+ RE_SLP1_ONLY = /[fFxXEOCYwWqQPB]|kz|N[kg]|tT|dD|S[cn]|[aAiIuUeo]R|G[yr]/
35
37
 
36
38
  # Match on Velthuis-only characters
37
39
  RE_VELTHUIS_ONLY = /\.[mhnrlntds]|"n|~s/
@@ -39,7 +41,7 @@ module Sanscript
39
41
  # Match on chars shared by ITRANS and Velthuis
40
42
  RE_ITRANS_OR_VELTHUIS_ONLY = /aa|ii|uu|~n/
41
43
 
42
- # Match on characters unavailable in Harvard-Kyoto
44
+ # Match on characters available in Harvard-Kyoto
43
45
  RE_HARVARD_KYOTO = /[aAiIuUeoRMHkgGcjJTDNtdnpbmyrlvzSsh]/
44
46
 
45
47
  private_constant :RE_BRAHMIC_RANGE, :RE_BRAHMIC_SCRIPTS, :RE_IAST_OR_KOLKATA_ONLY,
@@ -50,24 +52,25 @@ module Sanscript
50
52
 
51
53
  def detect_script(text)
52
54
  # Brahmic schemes are all within a specific range of code points.
53
- if text =~ RE_BRAHMIC_RANGE
55
+ if RE_BRAHMIC_RANGE === text
54
56
  RE_BRAHMIC_SCRIPTS.each do |script, regex|
55
- return script if text =~ regex
57
+ return script if regex === text
56
58
  end
57
59
  end
58
60
 
59
61
  # Romanizations
60
- if text =~ RE_IAST_OR_KOLKATA_ONLY
61
- text =~ RE_KOLKATA_ONLY ? :kolkata : :iast
62
- elsif text =~ RE_ITRANS_ONLY
62
+ if RE_IAST_OR_KOLKATA_ONLY === text
63
+ return :kolkata if RE_KOLKATA_ONLY === text
64
+ :iast
65
+ elsif RE_ITRANS_ONLY === text
63
66
  :itrans
64
- elsif text =~ RE_SLP1_ONLY
67
+ elsif RE_SLP1_ONLY === text
65
68
  :slp1
66
- elsif text =~ RE_VELTHUIS_ONLY
69
+ elsif RE_VELTHUIS_ONLY === text
67
70
  :velthuis
68
- elsif text =~ RE_ITRANS_OR_VELTHUIS_ONLY
71
+ elsif RE_ITRANS_OR_VELTHUIS_ONLY === text
69
72
  :itrans
70
- elsif text =~ RE_HARVARD_KYOTO
73
+ elsif RE_HARVARD_KYOTO === text
71
74
  :hk
72
75
  else
73
76
  :unknown
@@ -14,7 +14,8 @@ module Sanscript
14
14
  using Refinements
15
15
  module Transliterate
16
16
  class << self
17
- attr_reader :defaults, :schemes, :roman_schemes, :all_alternates
17
+ attr_reader :defaults, :schemes, :scheme_names, :brahmic_schemes, :roman_schemes,
18
+ :all_alternates
18
19
  end
19
20
 
20
21
  @defaults = {
@@ -27,12 +28,13 @@ module Sanscript
27
28
  module_function
28
29
 
29
30
  #
30
- # Return a list of available schemes.
31
+ # Check whether the given scheme encodes Brahmic Sanskrit.
31
32
  #
32
- # @return array of scheme identifiers
33
+ # @param name the scheme name
34
+ # @return boolean
33
35
  #
34
- def scheme_names
35
- @schemes.keys.sort!
36
+ def brahmic_scheme?(name)
37
+ @brahmic_schemes.include?(name.to_sym)
36
38
  end
37
39
 
38
40
  #
@@ -64,7 +66,12 @@ module Sanscript
64
66
  # described above.
65
67
  #
66
68
  def add_brahmic_scheme(name, scheme)
67
- @schemes[name.to_sym] = scheme.deep_dup.deep_freeze
69
+ name = name.to_sym
70
+ scheme = scheme.deep_dup
71
+ @schemes[name] = scheme.deep_freeze
72
+ @brahmic_schemes.add(name)
73
+ @scheme_names.add(name)
74
+ scheme
68
75
  end
69
76
 
70
77
  #
@@ -82,6 +89,8 @@ module Sanscript
82
89
  scheme[:vowel_marks] = scheme[:vowels][1..-1] unless scheme.key?(:vowel_marks)
83
90
  @schemes[name] = scheme.deep_freeze
84
91
  @roman_schemes.add(name)
92
+ @scheme_names.add(name)
93
+ scheme
85
94
  end
86
95
 
87
96
  #
@@ -93,15 +102,23 @@ module Sanscript
93
102
 
94
103
  # Set up various schemes
95
104
  begin
105
+ # Re-add existing Brahmic schemes in order to add them to `scheme_names`
106
+ # and to freeze them up.
107
+ brahmic_scheme_names = %i[bengali devanagari gujarati gurmukhi kannada malayalam
108
+ oriya tamil telugu]
109
+ brahmic_scheme_names.each do |name|
110
+ add_brahmic_scheme(name, @schemes[name])
111
+ end
112
+
96
113
  # Set up roman schemes
97
114
  kolkata = @schemes[:kolkata] = @schemes[:iast].deep_dup
98
- scheme_names = %i[iast itrans hk kolkata slp1 velthuis wx]
115
+ roman_scheme_names = %i[iast itrans hk kolkata slp1 velthuis wx]
99
116
  kolkata[:vowels] = %w[a ā i ī u ū ṛ ṝ ḷ ḹ e ē ai o ō au]
100
117
 
101
118
  # These schemes already belong to Sanscript.schemes. But by adding
102
- # them again with `addRomanScheme`, we automatically build up
119
+ # them again with `add_roman_scheme`, we automatically build up
103
120
  # `roman_schemes` and define a `vowel_marks` field for each one.
104
- scheme_names.each do |name|
121
+ roman_scheme_names.each do |name|
105
122
  add_roman_scheme(name, @schemes[name])
106
123
  end
107
124
 
@@ -112,8 +129,7 @@ module Sanscript
112
129
  @all_alternates[:itrans_dravidian] = @all_alternates[:itrans]
113
130
  add_roman_scheme(:itrans_dravidian, itrans_dravidian)
114
131
 
115
- # ensure deep freeze on all existing schemes and alternates
116
- @schemes.each { |_, scheme| scheme.deep_freeze }
132
+ # ensure deep freeze on alternates
117
133
  @all_alternates.each { |_, scheme| scheme.deep_freeze }
118
134
  end
119
135
 
@@ -275,9 +275,15 @@ module Sanscript
275
275
  },
276
276
  }
277
277
 
278
- # Set of names of schemes
278
+ # Set of names of Roman schemes
279
279
  @roman_schemes = Set.new
280
280
 
281
+ # Set of names of Brahmic schemes
282
+ @brahmic_schemes = Set.new
283
+
284
+ # Set of names of all schemes
285
+ @scheme_names = Set.new
286
+
281
287
  # Map of alternate encodings.
282
288
  @all_alternates = {
283
289
  itrans: {
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Sanscript
3
- VERSION = "0.1.0"
3
+ VERSION = "0.2.0"
4
4
  end
data/sanscript.gemspec CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "rspec", "~> 3.5"
25
25
  spec.add_development_dependency "codeclimate-test-reporter", "~> 0.6"
26
26
  spec.add_development_dependency "pry", "~> 0.10"
27
+ spec.add_development_dependency "benchmark-ips", "~> 2.6"
27
28
 
28
29
  spec.add_runtime_dependency "ice_nine", "~> 0.11"
29
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Bellefleur
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-13 00:00:00.000000000 Z
11
+ date: 2016-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.10'
83
+ - !ruby/object:Gem::Dependency
84
+ name: benchmark-ips
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.6'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.6'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: ice_nine
85
99
  requirement: !ruby/object:Gem::Requirement