sanscript 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d1e473866926c96b58966e9a570d1310ae9d8eff
4
- data.tar.gz: b6f0165d110775e8f4adb49de170634638afb372
3
+ metadata.gz: 86f56c7a86cc995ae43b8ccd595937d9199ba620
4
+ data.tar.gz: 319cabddf6e0f49f35ab0e9f7d20dcb5005865da
5
5
  SHA512:
6
- metadata.gz: 6abff0d8586ff49952c7a308d4ddaf142fcc0ae4be41a739ed287a5705b2bca2cf104ef965c37cdc39dd6b0ce34155d2509722275494a87125e8c5b65403f062
7
- data.tar.gz: d66c8bfd4cd7faddf4e544e5287e7c83a8c7d855741aac71f2153f457e4e275a47981cff9559009d7d7ce7cc0b7c3faf20c684d11cf1561f09c6924b500f4e73
6
+ metadata.gz: c874822cef8dc80268b660adbb555ea05c06c2c18d8a57b0c0e3a20e68e7af1dff1f3d1c9570d2987cb7b416f56ee0e9045dc39f0c6fc0fb8f1dab866f342a61
7
+ data.tar.gz: 64e1f0ca20f1de06eb0eff45323e53f20f8a99a1d21edfcaaf210899ceb03d9ea54b94ac66ceebc80c059a771e01ea6118f478ee70fcd88588281d2e2889835b
data/lib/sanscript.rb CHANGED
@@ -4,26 +4,41 @@ require "sanscript/detect"
4
4
  require "sanscript/transliterate"
5
5
  require "sanscript/benchmark"
6
6
 
7
+ # Sanscript.rb detection/transliteration module for Sanskrit.
7
8
  module Sanscript
8
9
  module_function
9
10
 
10
- # Proxies the Detect.detect_script method
11
+ # Attempts to detect the encoding scheme of the provided string.
12
+ # Simple proxy for {Detect.detect_scheme}
11
13
  #
14
+ # @param text [String] a string of Sanskrit text
15
+ # @return [Symbol, nil] the Symbol of the scheme, or nil if no match
12
16
  def detect(text)
13
- Detect.detect_script(text)
17
+ Detect.detect_scheme(text)
14
18
  end
15
19
 
16
- # The transliterate method accepts multiple signatures
17
- # .transliterate(text, to) will auto-detect the source script
18
- # .transliterate(text, to, from) will specify the source and target script
20
+ # Transliterates a string, optionally detecting its source-scheme first.
19
21
  #
20
- # Final Hash arguments are passed along as options.
22
+ # @overload transliterate(text, from, to, **opts)
23
+ # @param text [String] the String to transliterate
24
+ # @param from [Symbol] the name of the scheme to transliterate from
25
+ # @param to [Symbol] the name of the scheme to transliterate to
26
+ # @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
27
+ # @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
28
+ # @return [String] the transliterated String
21
29
  #
22
- def transliterate(text, first, second = nil, **options)
23
- if second.nil?
24
- second = first
25
- first = Detect.detect_script(text)
30
+ # @overload transliterate(text, to, **opts)
31
+ # @param text [String] the String to transliterate
32
+ # @param to [Symbol] the name of the scheme to transliterate to
33
+ # @option opts [Symbol] :default_scheme a default scheme to fall-back to if detection fails
34
+ # @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
35
+ # @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
36
+ # @return [String, nil] the transliterated String, or nil if detection and fallback fail
37
+ def transliterate(text, from, to = nil, **opts)
38
+ if to.nil?
39
+ to = from
40
+ from = Detect.detect_scheme(text) || opts[:default_scheme] || return
26
41
  end
27
- Transliterate.transliterate(text, first, second, options)
42
+ Transliterate.transliterate(text, from, to, opts)
28
43
  end
29
44
  end
@@ -13,9 +13,11 @@ end
13
13
 
14
14
  module Sanscript
15
15
  using Refinements
16
+ # Benchmark/testing module.
16
17
  module Benchmark
17
18
  module_function
18
19
 
20
+ # Runs benchmark-ips test on detection methods.
19
21
  def detect!
20
22
  iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
21
23
  deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
@@ -26,25 +28,26 @@ module Sanscript
26
28
  ::Benchmark.ips do |x|
27
29
  x.config(time: 5, warmup: 1)
28
30
  x.report("Detect Devanagari") do
29
- Sanscript::Detect.detect_script(deva_string) == :devanagari
31
+ raise unless Sanscript::Detect.detect_scheme(deva_string) == :devanagari
30
32
  end
31
33
  x.report("Detect Malayalam") do
32
- Sanscript::Detect.detect_script(malayalam_string) == :malayalam
34
+ raise unless Sanscript::Detect.detect_scheme(malayalam_string) == :malayalam
33
35
  end
34
36
  x.report("Detect IAST") do
35
- Sanscript::Detect.detect_script(iast_string) == :iast
37
+ raise unless Sanscript::Detect.detect_scheme(iast_string) == :iast
36
38
  end
37
39
  x.report("Detect SLP1") do
38
- Sanscript::Detect.detect_script(slp1_string) == :slp1
40
+ raise unless Sanscript::Detect.detect_scheme(slp1_string) == :slp1
39
41
  end
40
42
  x.report("Detect HK") do
41
- Sanscript::Detect.detect_script(hk_string) == :hk
43
+ raise unless Sanscript::Detect.detect_scheme(hk_string) == :hk
42
44
  end
43
45
  x.compare!
44
46
  end
45
47
  true
46
48
  end
47
49
 
50
+ # Runs benchmark-ips test on transliteration methods.
48
51
  def transliterate!
49
52
  iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
50
53
  deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
@@ -54,22 +57,22 @@ module Sanscript
54
57
  x.config(time: 5, warmup: 2)
55
58
 
56
59
  x.report("IAST==>Devanagari") do
57
- Sanscript.transliterate(iast_string, :iast, :devanagari) == deva_string
60
+ raise unless Sanscript.transliterate(iast_string, :iast, :devanagari) == deva_string
58
61
  end
59
62
  x.report("IAST==>SLP1") do
60
- Sanscript.transliterate(iast_string, :iast, :slp1) == slp1_string
63
+ raise unless Sanscript.transliterate(iast_string, :iast, :slp1) == slp1_string
61
64
  end
62
65
  x.report("SLP1==>Devanagari") do
63
- Sanscript.transliterate(slp1_string, :slp1, :devanagari) == deva_string
66
+ raise unless Sanscript.transliterate(slp1_string, :slp1, :devanagari) == deva_string
64
67
  end
65
68
  x.report("SLP1==>IAST") do
66
- Sanscript.transliterate(slp1_string, :slp1, :iast) == iast_string
69
+ raise unless Sanscript.transliterate(slp1_string, :slp1, :iast) == iast_string
67
70
  end
68
71
  x.report("Devanagari==>SLP1") do
69
- Sanscript.transliterate(deva_string, :devanagari, :slp1) == slp1_string
72
+ raise unless Sanscript.transliterate(deva_string, :devanagari, :slp1) == slp1_string
70
73
  end
71
74
  x.report("Devanagari==>IAST") do
72
- Sanscript.transliterate(deva_string, :devanagari, :iast) == iast_string
75
+ raise unless Sanscript.transliterate(deva_string, :devanagari, :iast) == iast_string
73
76
  end
74
77
  x.compare!
75
78
  end
@@ -1,11 +1,11 @@
1
1
  # frozen_string_literal: true
2
- # rubocop:disable Style/CaseEquality
3
2
 
4
- #
5
- # Developed from code available @ https://github.com/sanskrit/detect.js
6
- #
7
3
  module Sanscript
4
+ # Transliteration scheme detection module.
5
+ # Developed from code available @ https://github.com/sanskrit/detect.js
8
6
  module Detect
7
+ # rubocop:disable Style/CaseEquality
8
+
9
9
  # Match any character in the block of Brahmic scripts
10
10
  # between Devanagari and Malayalam.
11
11
  RE_BRAHMIC_RANGE = /[\u0900-\u0d7f]/
@@ -53,7 +53,11 @@ module Sanscript
53
53
 
54
54
  module_function
55
55
 
56
- def detect_script(text)
56
+ # Attempts to detect the encoding scheme of the provided string.
57
+ #
58
+ # @param text [String] a string of Sanskrit text
59
+ # @return [Symbol, nil] the Symbol of the scheme, or nil if no match
60
+ def detect_scheme(text)
57
61
  text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
58
62
 
59
63
  # Brahmic schemes are all within a specific range of code points.
@@ -77,14 +81,13 @@ module Sanscript
77
81
  :itrans
78
82
  elsif RE_HARVARD_KYOTO === text
79
83
  :hk
80
- else
81
- :unknown
82
84
  end
83
85
  end
84
86
 
85
- # If Ruby 2.4's Regexp#match? method is found, use it for performance
87
+ # If Ruby 2.4's Regexp#match? method is found, use this version of detect_scheme
86
88
  if Regexp.method_defined?(:match?)
87
- def detect_script(text)
89
+ # @!visibility private
90
+ def detect_scheme(text)
88
91
  text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
89
92
 
90
93
  # Brahmic schemes are all within a specific range of code points.
@@ -108,8 +111,6 @@ module Sanscript
108
111
  :itrans
109
112
  elsif RE_HARVARD_KYOTO.match?(text)
110
113
  :hk
111
- else
112
- :unknown
113
114
  end
114
115
  end
115
116
  end
@@ -2,6 +2,7 @@
2
2
  require "ice_nine"
3
3
 
4
4
  module Sanscript
5
+ # A set of helpful refinements for duplication and deep freezing.
5
6
  module Refinements
6
7
  refine Object do
7
8
  def deep_dup
@@ -2,20 +2,31 @@
2
2
 
3
3
  require "sanscript/refinements"
4
4
  require "sanscript/transliterate/schemes"
5
- #
6
- # Sanscript
7
- #
8
- # Sanscript is a Sanskrit transliteration library. Currently, it supports
9
- # other Indian languages only incidentally.
10
- #
11
- # Released under the MIT and GPL Licenses.
12
- #
13
5
  module Sanscript
14
6
  using Refinements
7
+ # Sanskrit transliteration module.
8
+ # Derived from Sanscript, released under the MIT and GPL Licenses.
9
+ # "Sanscript is a Sanskrit transliteration library. Currently, it supports
10
+ # other Indian languages only incidentally."
15
11
  module Transliterate
16
12
  class << self
17
- attr_reader :defaults, :schemes, :scheme_names, :brahmic_schemes, :roman_schemes,
18
- :all_alternates
13
+ # @return [Array<Symbol>] the names of all supported schemes
14
+ attr_reader :scheme_names
15
+
16
+ # @return [Array<Symbol>] the names of all Brahmic schemes
17
+ attr_reader :brahmic_schemes
18
+
19
+ # @return [Array<Symbol>] the names of all roman schemes
20
+ attr_reader :roman_schemes
21
+
22
+ # @return [Hash] the data for all schemes
23
+ attr_reader :schemes
24
+
25
+ # @return [Hash] the alternate-character data for all schemes
26
+ attr_reader :all_alternates
27
+
28
+ # @return [Hash] the default transliteration options
29
+ attr_reader :defaults
19
30
  end
20
31
 
21
32
  @defaults = {
@@ -27,44 +38,37 @@ module Sanscript
27
38
 
28
39
  module_function
29
40
 
41
+ # Check whether the given scheme encodes Brahmic Sanskrit.
30
42
  #
31
- # Check whether the given scheme encodes Brahmic Sanskrit.
32
- #
33
- # @param name the scheme name
34
- # @return boolean
35
- #
43
+ # @param name [Symbol] the scheme name
44
+ # @return [Boolean]
36
45
  def brahmic_scheme?(name)
37
46
  @brahmic_schemes.include?(name.to_sym)
38
47
  end
39
48
 
40
- #
41
49
  # Check whether the given scheme encodes romanized Sanskrit.
42
50
  #
43
- # @param name the scheme name
44
- # @return boolean
45
- #
51
+ # @param name [Symbol] the scheme name
52
+ # @return [Boolean]
46
53
  def roman_scheme?(name)
47
54
  @roman_schemes.include?(name.to_sym)
48
55
  end
49
56
 
50
- #
51
57
  # Add a Brahmic scheme to Sanscript.
52
58
  #
53
59
  # Schemes are of two types: "Brahmic" and "roman". Brahmic consonants
54
60
  # have an inherent vowel sound, but roman consonants do not. This is the
55
61
  # main difference between these two types of scheme.
56
62
  #
57
- # A scheme definition is an object ("{}") that maps a group name to a
58
- # list of characters. For illustration, see the "devanagari" scheme at
59
- # the top of this file.
63
+ # A scheme definition is a Hash that maps a group name to a
64
+ # list of characters. For illustration, see `transliterate/schemes.rb`.
60
65
  #
61
66
  # You can use whatever group names you like, but for the best results,
62
67
  # you should use the same group names that Sanscript does.
63
68
  #
64
- # @param name the scheme name
65
- # @param scheme the scheme data itself. This should be constructed as
66
- # described above.
67
- #
69
+ # @param name [Symbol] the scheme name
70
+ # @param scheme [Hash] the scheme data, constructed as described above
71
+ # @return [Hash] the frozen scheme data as it exists inside the module
68
72
  def add_brahmic_scheme(name, scheme)
69
73
  name = name.to_sym
70
74
  scheme = scheme.deep_dup
@@ -74,15 +78,12 @@ module Sanscript
74
78
  scheme
75
79
  end
76
80
 
77
- #
78
81
  # Add a roman scheme to Sanscript.
79
82
  #
80
- # See the comments on Sanscript.add_brahmic_scheme. The "vowel_marks" field
81
- # can be omitted.
82
- #
83
- # @param name the scheme name
84
- # @param scheme the scheme data itself
85
- #
83
+ # @param name [Symbol] the scheme name
84
+ # @param scheme [Hash] the scheme data, constructed as in {add_brahmic_scheme}.
85
+ # The "vowel_marks" field can be omitted
86
+ # @return [Hash] the frozen scheme data as it exists inside the module
86
87
  def add_roman_scheme(name, scheme)
87
88
  name = name.to_sym
88
89
  scheme = scheme.deep_dup
@@ -93,13 +94,6 @@ module Sanscript
93
94
  scheme
94
95
  end
95
96
 
96
- #
97
- # Create a deep copy of an object, for certain kinds of objects.
98
- #
99
- # @param scheme the scheme to copy
100
- # @return the copy
101
- #
102
-
103
97
  # Set up various schemes
104
98
  begin
105
99
  # Re-add existing Brahmic schemes in order to add them to `scheme_names`
@@ -136,16 +130,15 @@ module Sanscript
136
130
  @all_alternates.each { |_, scheme| scheme.deep_freeze }
137
131
  end
138
132
 
139
- # /**
140
133
  # Transliterate from one script to another.
141
- # *
142
- # @param data the string to transliterate
143
- # @param from the source script
144
- # @param to the destination script
145
- # @param options transliteration options
146
- # @return the finished string
147
134
  #
148
- def transliterate(data, from, to, options = {})
135
+ # @param data [String] the String to transliterate
136
+ # @param from [Symbol] the source script
137
+ # @param to [Symbol] the destination script
138
+ # @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
139
+ # @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
140
+ # @return [String] the transliterated string
141
+ def transliterate(data, from, to, **opts)
149
142
  from = from.to_sym
150
143
  to = to.to_sym
151
144
  return data if from == to
@@ -153,7 +146,7 @@ module Sanscript
153
146
  raise "Scheme not known ':#{to}'" unless @schemes.key?(to)
154
147
 
155
148
  data = data.to_str.dup
156
- options = @defaults.merge(options)
149
+ options = @defaults.merge(opts)
157
150
  map = make_map(from, to)
158
151
 
159
152
  data.gsub!(/(<.*?>)/, "##\\1##") if options[:skip_sgml]
@@ -175,13 +168,12 @@ module Sanscript
175
168
  class << self
176
169
  private
177
170
 
178
- #
179
171
  # Create a map from every character in `from` to its partner in `to`.
180
172
  # Also, store any "marks" that `from` might have.
181
173
  #
182
- # @param from input scheme
183
- # @param to output scheme
184
- #
174
+ # @param from [Symbol] name of input scheme
175
+ # @param to [Symbol] name of output scheme
176
+ # @return [Hash] a mapping from one scheme to another
185
177
  def make_map(from, to)
186
178
  @cache[:"#{from}_#{to}"] ||= begin
187
179
  alternates = @all_alternates[from] || {}
@@ -229,16 +221,12 @@ module Sanscript
229
221
  end
230
222
  end
231
223
 
232
- #
233
224
  # Transliterate from a romanized script.
234
225
  #
235
- # @param data the string to transliterate
236
- # @param map map data generated from makeMap()
237
- # @param options transliteration options
238
- # @return the finished string
239
- #
226
+ # @param data [String] the string to transliterate
227
+ # @param map [Hash] map data generated from {#make_map}
228
+ # @return [String] the transliterated string
240
229
  def transliterate_roman(data, map, options = {})
241
- options = @defaults.merge(options)
242
230
  data = data.to_str.dup
243
231
  buf = []
244
232
  token_buffer = String.new
@@ -308,13 +296,11 @@ module Sanscript
308
296
  buf.join("")
309
297
  end
310
298
 
311
- #
312
299
  # Transliterate from a Brahmic script.
313
300
  #
314
- # @param data the string to transliterate
315
- # @param map map data generated from makeMap()
316
- # @return the finished string
317
- #
301
+ # @param data [String] the string to transliterate
302
+ # @param map [Hash] map data generated from {#make_map}
303
+ # @return [String] the transliterated string
318
304
  def transliterate_brahmic(data, map)
319
305
  data = data.to_str.dup
320
306
  buf = []
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  module Sanscript
3
- VERSION = "0.3.3"
3
+ # The version number
4
+ VERSION = "0.4.0"
4
5
  end
data/sanscript.gemspec CHANGED
@@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency "codeclimate-test-reporter", "~> 0.6"
28
28
  spec.add_development_dependency "pry", "~> 0.10"
29
29
  spec.add_development_dependency "benchmark-ips", "~> 2.6"
30
+ spec.add_development_dependency "yard", "~> 0.9"
30
31
 
31
32
  spec.add_runtime_dependency "ice_nine", "~> 0.11"
32
33
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Bellefleur
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '2.6'
97
+ - !ruby/object:Gem::Dependency
98
+ name: yard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.9'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.9'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: ice_nine
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -155,7 +169,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
155
169
  version: '0'
156
170
  requirements: []
157
171
  rubyforge_project:
158
- rubygems_version: 2.6.4
172
+ rubygems_version: 2.5.1
159
173
  signing_key:
160
174
  specification_version: 4
161
175
  summary: Ruby port and extension of Sanscript.js transliterator by learnsanskrit.org