sanscript 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d1e473866926c96b58966e9a570d1310ae9d8eff
4
- data.tar.gz: b6f0165d110775e8f4adb49de170634638afb372
3
+ metadata.gz: 86f56c7a86cc995ae43b8ccd595937d9199ba620
4
+ data.tar.gz: 319cabddf6e0f49f35ab0e9f7d20dcb5005865da
5
5
  SHA512:
6
- metadata.gz: 6abff0d8586ff49952c7a308d4ddaf142fcc0ae4be41a739ed287a5705b2bca2cf104ef965c37cdc39dd6b0ce34155d2509722275494a87125e8c5b65403f062
7
- data.tar.gz: d66c8bfd4cd7faddf4e544e5287e7c83a8c7d855741aac71f2153f457e4e275a47981cff9559009d7d7ce7cc0b7c3faf20c684d11cf1561f09c6924b500f4e73
6
+ metadata.gz: c874822cef8dc80268b660adbb555ea05c06c2c18d8a57b0c0e3a20e68e7af1dff1f3d1c9570d2987cb7b416f56ee0e9045dc39f0c6fc0fb8f1dab866f342a61
7
+ data.tar.gz: 64e1f0ca20f1de06eb0eff45323e53f20f8a99a1d21edfcaaf210899ceb03d9ea54b94ac66ceebc80c059a771e01ea6118f478ee70fcd88588281d2e2889835b
data/lib/sanscript.rb CHANGED
@@ -4,26 +4,41 @@ require "sanscript/detect"
4
4
  require "sanscript/transliterate"
5
5
  require "sanscript/benchmark"
6
6
 
7
+ # Sanscript.rb detection/transliteration module for Sanskrit.
7
8
  module Sanscript
8
9
  module_function
9
10
 
10
- # Proxies the Detect.detect_script method
11
+ # Attempts to detect the encoding scheme of the provided string.
12
+ # Simple proxy for {Detect.detect_scheme}
11
13
  #
14
+ # @param text [String] a string of Sanskrit text
15
+ # @return [Symbol, nil] the Symbol of the scheme, or nil if no match
12
16
  def detect(text)
13
- Detect.detect_script(text)
17
+ Detect.detect_scheme(text)
14
18
  end
15
19
 
16
- # The transliterate method accepts multiple signatures
17
- # .transliterate(text, to) will auto-detect the source script
18
- # .transliterate(text, to, from) will specify the source and target script
20
+ # Transliterates a string, optionally detecting its source-scheme first.
19
21
  #
20
- # Final Hash arguments are passed along as options.
22
+ # @overload transliterate(text, from, to, **opts)
23
+ # @param text [String] the String to transliterate
24
+ # @param from [Symbol] the name of the scheme to transliterate from
25
+ # @param to [Symbol] the name of the scheme to transliterate to
26
+ # @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
27
+ # @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
28
+ # @return [String] the transliterated String
21
29
  #
22
- def transliterate(text, first, second = nil, **options)
23
- if second.nil?
24
- second = first
25
- first = Detect.detect_script(text)
30
+ # @overload transliterate(text, to, **opts)
31
+ # @param text [String] the String to transliterate
32
+ # @param to [Symbol] the name of the scheme to transliterate to
33
+ # @option opts [Symbol] :default_scheme a default scheme to fall-back to if detection fails
34
+ # @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
35
+ # @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
36
+ # @return [String, nil] the transliterated String, or nil if detection and fallback fail
37
+ def transliterate(text, from, to = nil, **opts)
38
+ if to.nil?
39
+ to = from
40
+ from = Detect.detect_scheme(text) || opts[:default_scheme] || return
26
41
  end
27
- Transliterate.transliterate(text, first, second, options)
42
+ Transliterate.transliterate(text, from, to, opts)
28
43
  end
29
44
  end
@@ -13,9 +13,11 @@ end
13
13
 
14
14
  module Sanscript
15
15
  using Refinements
16
+ # Benchmark/testing module.
16
17
  module Benchmark
17
18
  module_function
18
19
 
20
+ # Runs benchmark-ips test on detection methods.
19
21
  def detect!
20
22
  iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
21
23
  deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
@@ -26,25 +28,26 @@ module Sanscript
26
28
  ::Benchmark.ips do |x|
27
29
  x.config(time: 5, warmup: 1)
28
30
  x.report("Detect Devanagari") do
29
- Sanscript::Detect.detect_script(deva_string) == :devanagari
31
+ raise unless Sanscript::Detect.detect_scheme(deva_string) == :devanagari
30
32
  end
31
33
  x.report("Detect Malayalam") do
32
- Sanscript::Detect.detect_script(malayalam_string) == :malayalam
34
+ raise unless Sanscript::Detect.detect_scheme(malayalam_string) == :malayalam
33
35
  end
34
36
  x.report("Detect IAST") do
35
- Sanscript::Detect.detect_script(iast_string) == :iast
37
+ raise unless Sanscript::Detect.detect_scheme(iast_string) == :iast
36
38
  end
37
39
  x.report("Detect SLP1") do
38
- Sanscript::Detect.detect_script(slp1_string) == :slp1
40
+ raise unless Sanscript::Detect.detect_scheme(slp1_string) == :slp1
39
41
  end
40
42
  x.report("Detect HK") do
41
- Sanscript::Detect.detect_script(hk_string) == :hk
43
+ raise unless Sanscript::Detect.detect_scheme(hk_string) == :hk
42
44
  end
43
45
  x.compare!
44
46
  end
45
47
  true
46
48
  end
47
49
 
50
+ # Runs benchmark-ips test on transliteration methods.
48
51
  def transliterate!
49
52
  iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
50
53
  deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
@@ -54,22 +57,22 @@ module Sanscript
54
57
  x.config(time: 5, warmup: 2)
55
58
 
56
59
  x.report("IAST==>Devanagari") do
57
- Sanscript.transliterate(iast_string, :iast, :devanagari) == deva_string
60
+ raise unless Sanscript.transliterate(iast_string, :iast, :devanagari) == deva_string
58
61
  end
59
62
  x.report("IAST==>SLP1") do
60
- Sanscript.transliterate(iast_string, :iast, :slp1) == slp1_string
63
+ raise unless Sanscript.transliterate(iast_string, :iast, :slp1) == slp1_string
61
64
  end
62
65
  x.report("SLP1==>Devanagari") do
63
- Sanscript.transliterate(slp1_string, :slp1, :devanagari) == deva_string
66
+ raise unless Sanscript.transliterate(slp1_string, :slp1, :devanagari) == deva_string
64
67
  end
65
68
  x.report("SLP1==>IAST") do
66
- Sanscript.transliterate(slp1_string, :slp1, :iast) == iast_string
69
+ raise unless Sanscript.transliterate(slp1_string, :slp1, :iast) == iast_string
67
70
  end
68
71
  x.report("Devanagari==>SLP1") do
69
- Sanscript.transliterate(deva_string, :devanagari, :slp1) == slp1_string
72
+ raise unless Sanscript.transliterate(deva_string, :devanagari, :slp1) == slp1_string
70
73
  end
71
74
  x.report("Devanagari==>IAST") do
72
- Sanscript.transliterate(deva_string, :devanagari, :iast) == iast_string
75
+ raise unless Sanscript.transliterate(deva_string, :devanagari, :iast) == iast_string
73
76
  end
74
77
  x.compare!
75
78
  end
@@ -1,11 +1,11 @@
1
1
  # frozen_string_literal: true
2
- # rubocop:disable Style/CaseEquality
3
2
 
4
- #
5
- # Developed from code available @ https://github.com/sanskrit/detect.js
6
- #
7
3
  module Sanscript
4
+ # Transliteration scheme detection module.
5
+ # Developed from code available @ https://github.com/sanskrit/detect.js
8
6
  module Detect
7
+ # rubocop:disable Style/CaseEquality
8
+
9
9
  # Match any character in the block of Brahmic scripts
10
10
  # between Devanagari and Malayalam.
11
11
  RE_BRAHMIC_RANGE = /[\u0900-\u0d7f]/
@@ -53,7 +53,11 @@ module Sanscript
53
53
 
54
54
  module_function
55
55
 
56
- def detect_script(text)
56
+ # Attempts to detect the encoding scheme of the provided string.
57
+ #
58
+ # @param text [String] a string of Sanskrit text
59
+ # @return [Symbol, nil] the Symbol of the scheme, or nil if no match
60
+ def detect_scheme(text)
57
61
  text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
58
62
 
59
63
  # Brahmic schemes are all within a specific range of code points.
@@ -77,14 +81,13 @@ module Sanscript
77
81
  :itrans
78
82
  elsif RE_HARVARD_KYOTO === text
79
83
  :hk
80
- else
81
- :unknown
82
84
  end
83
85
  end
84
86
 
85
- # If Ruby 2.4's Regexp#match? method is found, use it for performance
87
+ # If Ruby 2.4's Regexp#match? method is found, use this version of detect_scheme
86
88
  if Regexp.method_defined?(:match?)
87
- def detect_script(text)
89
+ # @!visibility private
90
+ def detect_scheme(text)
88
91
  text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
89
92
 
90
93
  # Brahmic schemes are all within a specific range of code points.
@@ -108,8 +111,6 @@ module Sanscript
108
111
  :itrans
109
112
  elsif RE_HARVARD_KYOTO.match?(text)
110
113
  :hk
111
- else
112
- :unknown
113
114
  end
114
115
  end
115
116
  end
@@ -2,6 +2,7 @@
2
2
  require "ice_nine"
3
3
 
4
4
  module Sanscript
5
+ # A set of helpful refinements for duplication and deep freezing.
5
6
  module Refinements
6
7
  refine Object do
7
8
  def deep_dup
@@ -2,20 +2,31 @@
2
2
 
3
3
  require "sanscript/refinements"
4
4
  require "sanscript/transliterate/schemes"
5
- #
6
- # Sanscript
7
- #
8
- # Sanscript is a Sanskrit transliteration library. Currently, it supports
9
- # other Indian languages only incidentally.
10
- #
11
- # Released under the MIT and GPL Licenses.
12
- #
13
5
  module Sanscript
14
6
  using Refinements
7
+ # Sanskrit transliteration module.
8
+ # Derived from Sanscript, released under the MIT and GPL Licenses.
9
+ # "Sanscript is a Sanskrit transliteration library. Currently, it supports
10
+ # other Indian languages only incidentally."
15
11
  module Transliterate
16
12
  class << self
17
- attr_reader :defaults, :schemes, :scheme_names, :brahmic_schemes, :roman_schemes,
18
- :all_alternates
13
+ # @return [Array<Symbol>] the names of all supported schemes
14
+ attr_reader :scheme_names
15
+
16
+ # @return [Array<Symbol>] the names of all Brahmic schemes
17
+ attr_reader :brahmic_schemes
18
+
19
+ # @return [Array<Symbol>] the names of all roman schemes
20
+ attr_reader :roman_schemes
21
+
22
+ # @return [Hash] the data for all schemes
23
+ attr_reader :schemes
24
+
25
+ # @return [Hash] the alternate-character data for all schemes
26
+ attr_reader :all_alternates
27
+
28
+ # @return [Hash] the default transliteration options
29
+ attr_reader :defaults
19
30
  end
20
31
 
21
32
  @defaults = {
@@ -27,44 +38,37 @@ module Sanscript
27
38
 
28
39
  module_function
29
40
 
41
+ # Check whether the given scheme encodes Brahmic Sanskrit.
30
42
  #
31
- # Check whether the given scheme encodes Brahmic Sanskrit.
32
- #
33
- # @param name the scheme name
34
- # @return boolean
35
- #
43
+ # @param name [Symbol] the scheme name
44
+ # @return [Boolean]
36
45
  def brahmic_scheme?(name)
37
46
  @brahmic_schemes.include?(name.to_sym)
38
47
  end
39
48
 
40
- #
41
49
  # Check whether the given scheme encodes romanized Sanskrit.
42
50
  #
43
- # @param name the scheme name
44
- # @return boolean
45
- #
51
+ # @param name [Symbol] the scheme name
52
+ # @return [Boolean]
46
53
  def roman_scheme?(name)
47
54
  @roman_schemes.include?(name.to_sym)
48
55
  end
49
56
 
50
- #
51
57
  # Add a Brahmic scheme to Sanscript.
52
58
  #
53
59
  # Schemes are of two types: "Brahmic" and "roman". Brahmic consonants
54
60
  # have an inherent vowel sound, but roman consonants do not. This is the
55
61
  # main difference between these two types of scheme.
56
62
  #
57
- # A scheme definition is an object ("{}") that maps a group name to a
58
- # list of characters. For illustration, see the "devanagari" scheme at
59
- # the top of this file.
63
+ # A scheme definition is a Hash that maps a group name to a
64
+ # list of characters. For illustration, see `transliterate/schemes.rb`.
60
65
  #
61
66
  # You can use whatever group names you like, but for the best results,
62
67
  # you should use the same group names that Sanscript does.
63
68
  #
64
- # @param name the scheme name
65
- # @param scheme the scheme data itself. This should be constructed as
66
- # described above.
67
- #
69
+ # @param name [Symbol] the scheme name
70
+ # @param scheme [Hash] the scheme data, constructed as described above
71
+ # @return [Hash] the frozen scheme data as it exists inside the module
68
72
  def add_brahmic_scheme(name, scheme)
69
73
  name = name.to_sym
70
74
  scheme = scheme.deep_dup
@@ -74,15 +78,12 @@ module Sanscript
74
78
  scheme
75
79
  end
76
80
 
77
- #
78
81
  # Add a roman scheme to Sanscript.
79
82
  #
80
- # See the comments on Sanscript.add_brahmic_scheme. The "vowel_marks" field
81
- # can be omitted.
82
- #
83
- # @param name the scheme name
84
- # @param scheme the scheme data itself
85
- #
83
+ # @param name [Symbol] the scheme name
84
+ # @param scheme [Hash] the scheme data, constructed as in {add_brahmic_scheme}.
85
+ # The "vowel_marks" field can be omitted
86
+ # @return [Hash] the frozen scheme data as it exists inside the module
86
87
  def add_roman_scheme(name, scheme)
87
88
  name = name.to_sym
88
89
  scheme = scheme.deep_dup
@@ -93,13 +94,6 @@ module Sanscript
93
94
  scheme
94
95
  end
95
96
 
96
- #
97
- # Create a deep copy of an object, for certain kinds of objects.
98
- #
99
- # @param scheme the scheme to copy
100
- # @return the copy
101
- #
102
-
103
97
  # Set up various schemes
104
98
  begin
105
99
  # Re-add existing Brahmic schemes in order to add them to `scheme_names`
@@ -136,16 +130,15 @@ module Sanscript
136
130
  @all_alternates.each { |_, scheme| scheme.deep_freeze }
137
131
  end
138
132
 
139
- # /**
140
133
  # Transliterate from one script to another.
141
- # *
142
- # @param data the string to transliterate
143
- # @param from the source script
144
- # @param to the destination script
145
- # @param options transliteration options
146
- # @return the finished string
147
134
  #
148
- def transliterate(data, from, to, options = {})
135
+ # @param data [String] the String to transliterate
136
+ # @param from [Symbol] the source script
137
+ # @param to [Symbol] the destination script
138
+ # @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
139
+ # @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
140
+ # @return [String] the transliterated string
141
+ def transliterate(data, from, to, **opts)
149
142
  from = from.to_sym
150
143
  to = to.to_sym
151
144
  return data if from == to
@@ -153,7 +146,7 @@ module Sanscript
153
146
  raise "Scheme not known ':#{to}'" unless @schemes.key?(to)
154
147
 
155
148
  data = data.to_str.dup
156
- options = @defaults.merge(options)
149
+ options = @defaults.merge(opts)
157
150
  map = make_map(from, to)
158
151
 
159
152
  data.gsub!(/(<.*?>)/, "##\\1##") if options[:skip_sgml]
@@ -175,13 +168,12 @@ module Sanscript
175
168
  class << self
176
169
  private
177
170
 
178
- #
179
171
  # Create a map from every character in `from` to its partner in `to`.
180
172
  # Also, store any "marks" that `from` might have.
181
173
  #
182
- # @param from input scheme
183
- # @param to output scheme
184
- #
174
+ # @param from [Symbol] name of input scheme
175
+ # @param to [Symbol] name of output scheme
176
+ # @return [Hash] a mapping from one scheme to another
185
177
  def make_map(from, to)
186
178
  @cache[:"#{from}_#{to}"] ||= begin
187
179
  alternates = @all_alternates[from] || {}
@@ -229,16 +221,12 @@ module Sanscript
229
221
  end
230
222
  end
231
223
 
232
- #
233
224
  # Transliterate from a romanized script.
234
225
  #
235
- # @param data the string to transliterate
236
- # @param map map data generated from makeMap()
237
- # @param options transliteration options
238
- # @return the finished string
239
- #
226
+ # @param data [String] the string to transliterate
227
+ # @param map [Hash] map data generated from {#make_map}
228
+ # @return [String] the transliterated string
240
229
  def transliterate_roman(data, map, options = {})
241
- options = @defaults.merge(options)
242
230
  data = data.to_str.dup
243
231
  buf = []
244
232
  token_buffer = String.new
@@ -308,13 +296,11 @@ module Sanscript
308
296
  buf.join("")
309
297
  end
310
298
 
311
- #
312
299
  # Transliterate from a Brahmic script.
313
300
  #
314
- # @param data the string to transliterate
315
- # @param map map data generated from makeMap()
316
- # @return the finished string
317
- #
301
+ # @param data [String] the string to transliterate
302
+ # @param map [Hash] map data generated from {#make_map}
303
+ # @return [String] the transliterated string
318
304
  def transliterate_brahmic(data, map)
319
305
  data = data.to_str.dup
320
306
  buf = []
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  module Sanscript
3
- VERSION = "0.3.3"
3
+ # The version number
4
+ VERSION = "0.4.0"
4
5
  end
data/sanscript.gemspec CHANGED
@@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency "codeclimate-test-reporter", "~> 0.6"
28
28
  spec.add_development_dependency "pry", "~> 0.10"
29
29
  spec.add_development_dependency "benchmark-ips", "~> 2.6"
30
+ spec.add_development_dependency "yard", "~> 0.9"
30
31
 
31
32
  spec.add_runtime_dependency "ice_nine", "~> 0.11"
32
33
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Bellefleur
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '2.6'
97
+ - !ruby/object:Gem::Dependency
98
+ name: yard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.9'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.9'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: ice_nine
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -155,7 +169,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
155
169
  version: '0'
156
170
  requirements: []
157
171
  rubyforge_project:
158
- rubygems_version: 2.6.4
172
+ rubygems_version: 2.5.1
159
173
  signing_key:
160
174
  specification_version: 4
161
175
  summary: Ruby port and extension of Sanscript.js transliterator by learnsanskrit.org