sanscript 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/lib/sanscript.rb +18 -3
- data/lib/sanscript/benchmark.rb +43 -15
- data/lib/sanscript/detect.rb +2 -0
- data/lib/sanscript/exceptions.rb +19 -0
- data/lib/sanscript/transliterate.rb +41 -40
- data/lib/sanscript/transliterate/schemes.rb +80 -81
- data/lib/sanscript/version.rb +1 -1
- data/sanscript.gemspec +1 -1
- metadata +5 -5
- data/lib/sanscript/refinements.rb +0 -95
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 348f8d72cc3d76ba760a4225a4f784324294474a
|
4
|
+
data.tar.gz: f3a1215ad14dc3778795dc0f6563345aaa3015fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c8eae2315a8d3a68ce1a873585ab752902287a027a6d45961fef77ad0174d3646d7c7c36107a5879b5b8b36c10afef93428b34098b2ae86dfb6026bb4d644e94
|
7
|
+
data.tar.gz: 1997c7bb6d11f4b139eb5cde903bc17bffb786bcad236351100e55248d57945fbd19e7cf83f409af357c4c36d1dfb43fbb5adc5a0c2997e66622ed1576bb1921
|
data/README.md
CHANGED
@@ -25,7 +25,9 @@ Or install it yourself as:
|
|
25
25
|
|
26
26
|
## Usage
|
27
27
|
|
28
|
-
You can access detection through `Sanscript.detect(text)` and transliteration through `Sanscript.transliterate(text, from, to)`.
|
28
|
+
You can access detection through `Sanscript.detect(text)` and transliteration through `Sanscript.transliterate(text, from, to)`.
|
29
|
+
|
30
|
+
Documentation is provided in YARD format and available online at [rubydoc.info](http://www.rubydoc.info/github/ubcsanskrit/sanscript.rb).
|
29
31
|
|
30
32
|
## Development
|
31
33
|
|
data/lib/sanscript.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require "ragabash"
|
3
|
+
|
2
4
|
require "sanscript/version"
|
5
|
+
require "sanscript/exceptions"
|
3
6
|
require "sanscript/detect"
|
4
7
|
require "sanscript/transliterate"
|
5
8
|
require "sanscript/benchmark"
|
@@ -21,23 +24,35 @@ module Sanscript
|
|
21
24
|
#
|
22
25
|
# @overload transliterate(text, from, to, **opts)
|
23
26
|
# @param text [String] the String to transliterate
|
24
|
-
# @param from [Symbol] the name of the scheme to transliterate from
|
27
|
+
# @param from [Symbol, nil] the name of the scheme to transliterate from, or Nil to detect
|
25
28
|
# @param to [Symbol] the name of the scheme to transliterate to
|
29
|
+
# @option opts [Symbol] :default_scheme a default scheme to fall-back to if detection fails
|
26
30
|
# @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
|
27
31
|
# @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
|
28
32
|
# @return [String] the transliterated String
|
29
33
|
#
|
34
|
+
# @raise [DetectionError] if scheme detection and fallback fail
|
35
|
+
# @raise [SchemeNotSupportedError] if a provided transliteration scheme is not supported
|
36
|
+
#
|
30
37
|
# @overload transliterate(text, to, **opts)
|
31
38
|
# @param text [String] the String to transliterate
|
32
39
|
# @param to [Symbol] the name of the scheme to transliterate to
|
33
40
|
# @option opts [Symbol] :default_scheme a default scheme to fall-back to if detection fails
|
34
41
|
# @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
|
35
42
|
# @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
|
36
|
-
# @return [String
|
43
|
+
# @return [String] the transliterated String
|
44
|
+
#
|
45
|
+
# @raise [DetectionError] if scheme detection and fallback fail
|
46
|
+
# @raise [SchemeNotSupportedError] if a provided transliteration scheme is not supported
|
47
|
+
#
|
37
48
|
def transliterate(text, from, to = nil, **opts)
|
38
49
|
if to.nil?
|
39
50
|
to = from
|
40
|
-
from =
|
51
|
+
from = nil
|
52
|
+
end
|
53
|
+
if from.nil?
|
54
|
+
from = Detect.detect_scheme(text) || opts[:default_scheme] ||
|
55
|
+
raise(DetectionError, "String detection and fallback failed.")
|
41
56
|
end
|
42
57
|
Transliterate.transliterate(text, from, to, opts)
|
43
58
|
end
|
data/lib/sanscript/benchmark.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
#:nocov:
|
2
3
|
|
3
|
-
require "sanscript/refinements"
|
4
4
|
begin
|
5
5
|
require "benchmark/ips"
|
6
6
|
rescue LoadError
|
@@ -12,18 +12,17 @@ rescue LoadError
|
|
12
12
|
end
|
13
13
|
|
14
14
|
module Sanscript
|
15
|
-
using Refinements
|
16
15
|
# Benchmark/testing module.
|
17
16
|
module Benchmark
|
18
17
|
module_function
|
19
18
|
|
20
19
|
# Runs benchmark-ips test on detection methods.
|
21
20
|
def detect!
|
22
|
-
iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
|
23
21
|
deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
|
22
|
+
malayalam_string = "നാനാശാസ്ത്രസുഭാഷിതാമൃതരസൈഃ ശ്രോത്രോത്സവം കുര്വതാം യേഷാം യാന്തി ദിനാനി പണ്ഡിതജനവ്യായാമഖിന്നാത്മനാമ് തേഷാം ജന്മ ച ജീവിതം ച സുകൃതം തൈര് ഏവ ഭൂര് ഭൂഷിതാ ശേഷൈഹ് കിം പശുവദ് വിവേകരഹിതൈര് ഭൂഭാരഭൂതൈര് നരഃ"
|
23
|
+
iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
|
24
24
|
slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
|
25
25
|
hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
|
26
|
-
malayalam_string = "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ"
|
27
26
|
|
28
27
|
::Benchmark.ips do |x|
|
29
28
|
x.config(time: 5, warmup: 1)
|
@@ -47,32 +46,61 @@ module Sanscript
|
|
47
46
|
true
|
48
47
|
end
|
49
48
|
|
50
|
-
# Runs benchmark-ips test on transliteration methods.
|
51
|
-
def
|
49
|
+
# Runs benchmark-ips test on roman-source transliteration methods.
|
50
|
+
def transliterate_roman!
|
52
51
|
iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
|
53
|
-
deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
|
54
52
|
slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
|
53
|
+
hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
|
55
54
|
|
56
55
|
::Benchmark.ips do |x|
|
57
|
-
x.config(time:
|
56
|
+
x.config(time: 3, warmup: 2)
|
58
57
|
|
59
58
|
x.report("IAST==>Devanagari") do
|
60
|
-
|
59
|
+
Sanscript.transliterate(iast_string, :iast, :devanagari)
|
60
|
+
end
|
61
|
+
x.report("IAST==>SLP1") do
|
62
|
+
Sanscript.transliterate(iast_string, :iast, :slp1)
|
61
63
|
end
|
62
64
|
x.report("IAST==>SLP1") do
|
63
|
-
|
65
|
+
Sanscript.transliterate(iast_string, :iast, :hk)
|
64
66
|
end
|
65
67
|
x.report("SLP1==>Devanagari") do
|
66
|
-
|
68
|
+
Sanscript.transliterate(slp1_string, :slp1, :devanagari)
|
67
69
|
end
|
68
70
|
x.report("SLP1==>IAST") do
|
69
|
-
|
71
|
+
Sanscript.transliterate(slp1_string, :slp1, :iast)
|
70
72
|
end
|
71
|
-
x.report("
|
72
|
-
|
73
|
+
x.report("SLP1==>HK") do
|
74
|
+
Sanscript.transliterate(slp1_string, :slp1, :hk)
|
75
|
+
end
|
76
|
+
x.report("HK==>Devanagari") do
|
77
|
+
Sanscript.transliterate(hk_string, :hk, :devanagari)
|
73
78
|
end
|
79
|
+
x.report("HK==>IAST") do
|
80
|
+
Sanscript.transliterate(hk_string, :hk, :iast)
|
81
|
+
end
|
82
|
+
x.report("HK==>SLP1") do
|
83
|
+
Sanscript.transliterate(hk_string, :hk, :slp1)
|
84
|
+
end
|
85
|
+
x.compare!
|
86
|
+
end
|
87
|
+
true
|
88
|
+
end
|
89
|
+
|
90
|
+
# Runs benchmark-ips test on brahmic-source transliteration methods.
|
91
|
+
def transliterate_brahmic!
|
92
|
+
deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
|
93
|
+
|
94
|
+
::Benchmark.ips do |x|
|
95
|
+
x.config(time: 5, warmup: 2)
|
74
96
|
x.report("Devanagari==>IAST") do
|
75
|
-
|
97
|
+
Sanscript.transliterate(deva_string, :devanagari, :iast)
|
98
|
+
end
|
99
|
+
x.report("Devanagari==>SLP1") do
|
100
|
+
Sanscript.transliterate(deva_string, :devanagari, :slp1)
|
101
|
+
end
|
102
|
+
x.report("Devanagari==>HK") do
|
103
|
+
Sanscript.transliterate(deva_string, :devanagari, :hk)
|
76
104
|
end
|
77
105
|
x.compare!
|
78
106
|
end
|
data/lib/sanscript/detect.rb
CHANGED
@@ -61,6 +61,7 @@ module Sanscript
|
|
61
61
|
# @return [Symbol, nil] the Symbol of the scheme, or nil if no match
|
62
62
|
|
63
63
|
# @!visibility private
|
64
|
+
# :nocov:
|
64
65
|
if Regexp.method_defined?(:match?)
|
65
66
|
require "sanscript/detect/ruby24"
|
66
67
|
extend Ruby24
|
@@ -68,5 +69,6 @@ module Sanscript
|
|
68
69
|
require "sanscript/detect/ruby2x"
|
69
70
|
extend Ruby2x
|
70
71
|
end
|
72
|
+
# :nocov:
|
71
73
|
end
|
72
74
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sanscript
|
4
|
+
using ::Ragabash::Refinements
|
5
|
+
# Error for when transliteration scheme is not supported.
|
6
|
+
class SchemeNotSupportedError < StandardError
|
7
|
+
def initialize(scheme = :unknown)
|
8
|
+
super(":#{scheme} is not supported.")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
# Error for when scheme detection should non-silently fail
|
13
|
+
# (such as inside a transliteration method).
|
14
|
+
class DetectionError < StandardError
|
15
|
+
def initialize(message = "String detection failed.")
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -1,9 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "sanscript/refinements"
|
4
3
|
require "sanscript/transliterate/schemes"
|
5
4
|
module Sanscript
|
6
|
-
using Refinements
|
5
|
+
using ::Ragabash::Refinements
|
7
6
|
# Sanskrit transliteration module.
|
8
7
|
# Derived from Sanscript (https://github.com/sanskrit/sanscript.js), which is
|
9
8
|
# released under the MIT and GPL Licenses.
|
@@ -144,8 +143,8 @@ module Sanscript
|
|
144
143
|
from = from.to_sym
|
145
144
|
to = to.to_sym
|
146
145
|
return data if from == to
|
147
|
-
raise
|
148
|
-
raise
|
146
|
+
raise SchemeNotSupportedError, from unless @schemes.key?(from)
|
147
|
+
raise SchemeNotSupportedError, to unless @schemes.key?(to)
|
149
148
|
|
150
149
|
data = data.to_str.dup
|
151
150
|
options = @defaults.merge(opts)
|
@@ -229,39 +228,40 @@ module Sanscript
|
|
229
228
|
# @param map [Hash] map data generated from {#make_map}
|
230
229
|
# @return [String] the transliterated string
|
231
230
|
def transliterate_roman(data, map, options = {})
|
232
|
-
data = data.to_str.
|
231
|
+
data = data.to_str.chars
|
233
232
|
buf = []
|
234
|
-
token_buffer =
|
233
|
+
token_buffer = []
|
235
234
|
had_consonant = false
|
236
235
|
transliteration_enabled = true
|
237
236
|
control_char = false
|
237
|
+
max_token_length = map[:max_token_length]
|
238
238
|
|
239
239
|
until data.empty? && token_buffer.empty?
|
240
|
-
token_buffer << data.slice!(0, map[:max_token_length] - token_buffer.length)
|
241
|
-
|
242
240
|
# Match all token substrings to our map.
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
if
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
transliteration_enabled
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
241
|
+
token = data[0, max_token_length].join("")
|
242
|
+
max_token_length.downto(1) do |j|
|
243
|
+
token = token[0, j] unless j == max_token_length
|
244
|
+
if j == 2
|
245
|
+
if !control_char && token == "##"
|
246
|
+
transliteration_enabled = !transliteration_enabled
|
247
|
+
data.shift(2)
|
248
|
+
break
|
249
|
+
elsif control_char && token == "#}"
|
250
|
+
transliteration_enabled = true
|
251
|
+
control_char = false
|
252
|
+
buf << token
|
253
|
+
data.shift(2)
|
254
|
+
break
|
255
|
+
elsif transliteration_enabled && token == "{#"
|
256
|
+
transliteration_enabled = false
|
257
|
+
control_char = true
|
258
|
+
buf << token
|
259
|
+
data.shift(2)
|
260
|
+
break
|
261
|
+
end
|
262
262
|
end
|
263
|
-
|
264
|
-
if
|
263
|
+
|
264
|
+
if transliteration_enabled && (temp_letter = map[:letters][token])
|
265
265
|
if map[:to_roman?]
|
266
266
|
buf << temp_letter
|
267
267
|
else
|
@@ -269,18 +269,19 @@ module Sanscript
|
|
269
269
|
# vowels to appear as marks if we've just seen a
|
270
270
|
# consonant.
|
271
271
|
if had_consonant
|
272
|
-
|
273
|
-
if
|
272
|
+
# rubocop:disable Metrics/BlockNesting
|
273
|
+
if (temp_mark = map[:marks][token])
|
274
274
|
buf << temp_mark
|
275
275
|
elsif token != "a"
|
276
|
-
buf
|
276
|
+
buf.push(map[:virama], temp_letter)
|
277
277
|
end
|
278
|
+
# rubocop:enable Metrics/BlockNesting
|
278
279
|
else
|
279
280
|
buf << temp_letter
|
280
281
|
end
|
281
282
|
had_consonant = map[:consonants].key?(token)
|
282
283
|
end
|
283
|
-
|
284
|
+
j > 1 ? data.shift(j) : data.shift
|
284
285
|
break
|
285
286
|
elsif j == 1 # Last iteration
|
286
287
|
if had_consonant
|
@@ -288,7 +289,7 @@ module Sanscript
|
|
288
289
|
buf << map[:virama] unless options[:syncope]
|
289
290
|
end
|
290
291
|
buf << token
|
291
|
-
|
292
|
+
data.shift
|
292
293
|
end
|
293
294
|
end
|
294
295
|
end
|
@@ -302,27 +303,27 @@ module Sanscript
|
|
302
303
|
# @param map [Hash] map data generated from {#make_map}
|
303
304
|
# @return [String] the transliterated string
|
304
305
|
def transliterate_brahmic(data, map)
|
305
|
-
data = data.to_str.
|
306
|
+
data = data.to_str.chars
|
306
307
|
buf = []
|
307
308
|
had_roman_consonant = false
|
308
309
|
transliteration_enabled = true
|
309
310
|
control_char = false
|
310
311
|
|
311
312
|
until data.empty?
|
312
|
-
token = data
|
313
|
+
token = data[0, 2].join("")
|
313
314
|
if !control_char && token == "##"
|
314
315
|
if had_roman_consonant
|
315
316
|
buf << "a" if transliteration_enabled
|
316
317
|
had_roman_consonant = false
|
317
318
|
end
|
318
319
|
transliteration_enabled = !transliteration_enabled
|
319
|
-
data.
|
320
|
+
data.shift(2)
|
320
321
|
next
|
321
322
|
elsif control_char && token == "#}"
|
322
323
|
transliteration_enabled = true
|
323
324
|
control_char = false
|
324
325
|
buf << token
|
325
|
-
data.
|
326
|
+
data.shift(2)
|
326
327
|
next
|
327
328
|
elsif transliteration_enabled && token == "{#"
|
328
329
|
if had_roman_consonant
|
@@ -332,11 +333,11 @@ module Sanscript
|
|
332
333
|
transliteration_enabled = false
|
333
334
|
control_char = true
|
334
335
|
buf << token
|
335
|
-
data.
|
336
|
+
data.shift(2)
|
336
337
|
next
|
337
338
|
end
|
338
339
|
|
339
|
-
l = data.
|
340
|
+
l = data.shift
|
340
341
|
unless transliteration_enabled
|
341
342
|
buf << l
|
342
343
|
next
|
@@ -1,8 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require "sanscript/refinements"
|
3
2
|
|
4
3
|
module Sanscript
|
5
|
-
using Refinements
|
4
|
+
using ::Ragabash::Refinements
|
6
5
|
module Transliterate
|
7
6
|
# Schemes
|
8
7
|
# =======
|
@@ -25,13 +24,13 @@ module Sanscript
|
|
25
24
|
# 'va' and 'ba' are both rendered as ব.
|
26
25
|
#
|
27
26
|
bengali: {
|
28
|
-
vowels: "অ আ ই ঈ উ ঊ ঋ ৠ ঌ ৡ এ ঐ ও ঔ".
|
29
|
-
vowel_marks: "া ি ী ু ূ ৃ ৄ ৢ ৣ ে ৈ ো ৌ".
|
30
|
-
other_marks: "ং ঃ ঁ".
|
27
|
+
vowels: "অ আ ই ঈ উ ঊ ঋ ৠ ঌ ৡ এ ঐ ও ঔ".split(/\s/),
|
28
|
+
vowel_marks: "া ি ী ু ূ ৃ ৄ ৢ ৣ ে ৈ ো ৌ".split(/\s/),
|
29
|
+
other_marks: "ং ঃ ঁ".split(/\s/),
|
31
30
|
virama: ["্"],
|
32
|
-
consonants: "ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড ঢ ণ ত থ দ ধ ন প ফ ব ভ ম য র ল ব শ ষ স হ ळ ক্ষ জ্ঞ".
|
33
|
-
symbols: "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ॐ ঽ । ॥".
|
34
|
-
other: " ড ঢ য ".
|
31
|
+
consonants: "ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড ঢ ণ ত থ দ ধ ন প ফ ব ভ ম য র ল ব শ ষ স হ ळ ক্ষ জ্ঞ".split(/\s/),
|
32
|
+
symbols: "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ॐ ঽ । ॥".split(/\s/),
|
33
|
+
other: " ড ঢ য ".split(/\s/),
|
35
34
|
},
|
36
35
|
|
37
36
|
# Devanagari
|
@@ -41,15 +40,15 @@ module Sanscript
|
|
41
40
|
devanagari: {
|
42
41
|
# "Independent" forms of the vowels. These are used whenever the
|
43
42
|
# vowel does not immediately follow a consonant.
|
44
|
-
vowels: "अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ऎ ए ऐ ऒ ओ औ".
|
43
|
+
vowels: "अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ऎ ए ऐ ऒ ओ औ".split(/\s/),
|
45
44
|
|
46
45
|
# "Dependent" forms of the vowels. These are used whenever the
|
47
46
|
# vowel immediately follows a consonant. If a letter is not
|
48
47
|
# listed in `vowels`, it should not be listed here.
|
49
|
-
vowel_marks: "ा ि ी ु ू ृ ॄ ॢ ॣ ॆ े ै ॊ ो ौ".
|
48
|
+
vowel_marks: "ा ि ी ु ू ृ ॄ ॢ ॣ ॆ े ै ॊ ो ौ".split(/\s/),
|
50
49
|
|
51
50
|
# Miscellaneous marks, all of which are used in Sanskrit.
|
52
|
-
other_marks: "ं ः ँ".
|
51
|
+
other_marks: "ं ः ँ".split(/\s/),
|
53
52
|
|
54
53
|
# In syllabic scripts like Devanagari, consonants have an inherent
|
55
54
|
# vowel that must be suppressed explicitly. We do so by putting a
|
@@ -58,10 +57,10 @@ module Sanscript
|
|
58
57
|
|
59
58
|
# Various Sanskrit consonants and consonant clusters. Every token
|
60
59
|
# here has an explicit vowel. Thus "क" is "ka" instead of "k".
|
61
|
-
consonants: "क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह ळ क्ष ज्ञ".
|
60
|
+
consonants: "क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह ळ क्ष ज्ञ".split(/\s/),
|
62
61
|
|
63
62
|
# Numbers and punctuation
|
64
|
-
symbols: "० १ २ ३ ४ ५ ६ ७ ८ ९ ॐ ऽ । ॥".
|
63
|
+
symbols: "० १ २ ३ ४ ५ ६ ७ ८ ९ ॐ ऽ । ॥".split(/\s/),
|
65
64
|
|
66
65
|
# Zero-width joiner. This is used to separate a consonant cluster
|
67
66
|
# and avoid a complex ligature.
|
@@ -76,12 +75,12 @@ module Sanscript
|
|
76
75
|
|
77
76
|
# Accent combined with anusvara and and visarga. For compatibility
|
78
77
|
# with ITRANS, which allows the reverse of these four.
|
79
|
-
combo_accent: "ः॑ ः॒ ं॑ ं॒".
|
78
|
+
combo_accent: "ः॑ ः॒ ं॑ ं॒".split(/\s/),
|
80
79
|
|
81
80
|
candra: ["ॅ"],
|
82
81
|
|
83
82
|
# Non-Sanskrit consonants
|
84
|
-
other: "क़ ख़ ग़ ज़ ड़ ढ़ फ़ य़ ऱ".
|
83
|
+
other: "क़ ख़ ग़ ज़ ड़ ढ़ फ़ य़ ऱ".split(/\s/),
|
85
84
|
},
|
86
85
|
|
87
86
|
# Gujarati
|
@@ -89,12 +88,12 @@ module Sanscript
|
|
89
88
|
# Sanskrit-complete.
|
90
89
|
#
|
91
90
|
gujarati: {
|
92
|
-
vowels: "અ આ ઇ ઈ ઉ ઊ ઋ ૠ ઌ ૡ એ ઐ ઓ ઔ".
|
93
|
-
vowel_marks: "ા િ ી ુ ૂ ૃ ૄ ૢ ૣ ે ૈ ો ૌ".
|
94
|
-
other_marks: "ં ઃ ઁ".
|
91
|
+
vowels: "અ આ ઇ ઈ ઉ ઊ ઋ ૠ ઌ ૡ એ ઐ ઓ ઔ".split(/\s/),
|
92
|
+
vowel_marks: "ા િ ી ુ ૂ ૃ ૄ ૢ ૣ ે ૈ ો ૌ".split(/\s/),
|
93
|
+
other_marks: "ં ઃ ઁ".split(/\s/),
|
95
94
|
virama: ["્"],
|
96
|
-
consonants: "ક ખ ગ ઘ ઙ ચ છ જ ઝ ઞ ટ ઠ ડ ઢ ણ ત થ દ ધ ન પ ફ બ ભ મ ય ર લ વ શ ષ સ હ ળ ક્ષ જ્ઞ".
|
97
|
-
symbols: "૦ ૧ ૨ ૩ ૪ ૫ ૬ ૭ ૮ ૯ ૐ ઽ ".
|
95
|
+
consonants: "ક ખ ગ ઘ ઙ ચ છ જ ઝ ઞ ટ ઠ ડ ઢ ણ ત થ દ ધ ન પ ફ બ ભ મ ય ર લ વ શ ષ સ હ ળ ક્ષ જ્ઞ".split(/\s/),
|
96
|
+
symbols: "૦ ૧ ૨ ૩ ૪ ૫ ૬ ૭ ૮ ૯ ૐ ઽ ".split(/\s/),
|
98
97
|
candra: ["ૅ"],
|
99
98
|
},
|
100
99
|
|
@@ -103,13 +102,13 @@ module Sanscript
|
|
103
102
|
# Missing R/RR/lR/lRR
|
104
103
|
#
|
105
104
|
gurmukhi: {
|
106
|
-
vowels: "ਅ ਆ ਇ ਈ ਉ ਊ ਏ ਐ ਓ ਔ".
|
107
|
-
vowel_marks: "ਾ ਿ ੀ ੁ ੂ ੇ ੈ ੋ ੌ".
|
108
|
-
other_marks: "ਂ ਃ ਁ".
|
105
|
+
vowels: "ਅ ਆ ਇ ਈ ਉ ਊ ਏ ਐ ਓ ਔ".split(/\s/),
|
106
|
+
vowel_marks: "ਾ ਿ ੀ ੁ ੂ ੇ ੈ ੋ ੌ".split(/\s/),
|
107
|
+
other_marks: "ਂ ਃ ਁ".split(/\s/),
|
109
108
|
virama: ["੍"],
|
110
|
-
consonants: "ਕ ਖ ਗ ਘ ਙ ਚ ਛ ਜ ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ ਬ ਭ ਮ ਯ ਰ ਲ ਵ ਸ਼ ਸ਼ ਸ ਹ ਲ਼ ਕ੍ਸ਼ ਜ੍ਞ".
|
111
|
-
symbols: "੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯ ॐ ऽ । ॥".
|
112
|
-
other: " ਖ ਗ ਜ ਡ ਫ ".
|
109
|
+
consonants: "ਕ ਖ ਗ ਘ ਙ ਚ ਛ ਜ ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ ਬ ਭ ਮ ਯ ਰ ਲ ਵ ਸ਼ ਸ਼ ਸ ਹ ਲ਼ ਕ੍ਸ਼ ਜ੍ਞ".split(/\s/),
|
110
|
+
symbols: "੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯ ॐ ऽ । ॥".split(/\s/),
|
111
|
+
other: " ਖ ਗ ਜ ਡ ਫ ".split(/\s/),
|
113
112
|
},
|
114
113
|
|
115
114
|
# Kannada
|
@@ -117,13 +116,13 @@ module Sanscript
|
|
117
116
|
# Sanskrit-complete.
|
118
117
|
#
|
119
118
|
kannada: {
|
120
|
-
vowels: "ಅ ಆ ಇ ಈ ಉ ಊ ಋ ೠ ಌ ೡ ಎ ಏ ಐ ಒ ಓ ಔ".
|
121
|
-
vowel_marks: "ಾ ಿ ೀ ು ೂ ೃ ೄ ೢ ೣ ೆ ೇ ೈ ೊ ೋ ೌ".
|
122
|
-
other_marks: "ಂ ಃ ँ".
|
119
|
+
vowels: "ಅ ಆ ಇ ಈ ಉ ಊ ಋ ೠ ಌ ೡ ಎ ಏ ಐ ಒ ಓ ಔ".split(/\s/),
|
120
|
+
vowel_marks: "ಾ ಿ ೀ ು ೂ ೃ ೄ ೢ ೣ ೆ ೇ ೈ ೊ ೋ ೌ".split(/\s/),
|
121
|
+
other_marks: "ಂ ಃ ँ".split(/\s/),
|
123
122
|
virama: ["್"],
|
124
|
-
consonants: "ಕ ಖ ಗ ಘ ಙ ಚ ಛ ಜ ಝ ಞ ಟ ಠ ಡ ಢ ಣ ತ ಥ ದ ಧ ನ ಪ ಫ ಬ ಭ ಮ ಯ ರ ಲ ವ ಶ ಷ ಸ ಹ ಳ ಕ್ಷ ಜ್ಞ".
|
125
|
-
symbols: "೦ ೧ ೨ ೩ ೪ ೫ ೬ ೭ ೮ ೯ ಓಂ ಽ । ॥".
|
126
|
-
other: " ಫ ಱ".
|
123
|
+
consonants: "ಕ ಖ ಗ ಘ ಙ ಚ ಛ ಜ ಝ ಞ ಟ ಠ ಡ ಢ ಣ ತ ಥ ದ ಧ ನ ಪ ಫ ಬ ಭ ಮ ಯ ರ ಲ ವ ಶ ಷ ಸ ಹ ಳ ಕ್ಷ ಜ್ಞ".split(/\s/),
|
124
|
+
symbols: "೦ ೧ ೨ ೩ ೪ ೫ ೬ ೭ ೮ ೯ ಓಂ ಽ । ॥".split(/\s/),
|
125
|
+
other: " ಫ ಱ".split(/\s/),
|
127
126
|
},
|
128
127
|
|
129
128
|
# Malayalam
|
@@ -131,13 +130,13 @@ module Sanscript
|
|
131
130
|
# Sanskrit-complete.
|
132
131
|
#
|
133
132
|
malayalam: {
|
134
|
-
vowels: "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ".
|
135
|
-
vowel_marks: "ാ ി ീ ു ൂ ൃ ൄ ൢ ൣ െ േ ൈ ൊ ോ ൌ".
|
136
|
-
other_marks: "ം ഃ ँ".
|
133
|
+
vowels: "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ".split(/\s/),
|
134
|
+
vowel_marks: "ാ ി ീ ു ൂ ൃ ൄ ൢ ൣ െ േ ൈ ൊ ോ ൌ".split(/\s/),
|
135
|
+
other_marks: "ം ഃ ँ".split(/\s/),
|
137
136
|
virama: ["്"],
|
138
|
-
consonants: "ക ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഡ ഢ ണ ത ഥ ദ ധ ന പ ഫ ബ ഭ മ യ ര ല വ ശ ഷ സ ഹ ള ക്ഷ ജ്ഞ".
|
139
|
-
symbols: "൦ ൧ ൨ ൩ ൪ ൫ ൬ ൭ ൮ ൯ ഓം ഽ । ॥".
|
140
|
-
other: " റ".
|
137
|
+
consonants: "ക ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഡ ഢ ണ ത ഥ ദ ധ ന പ ഫ ബ ഭ മ യ ര ല വ ശ ഷ സ ഹ ള ക്ഷ ജ്ഞ".split(/\s/),
|
138
|
+
symbols: "൦ ൧ ൨ ൩ ൪ ൫ ൬ ൭ ൮ ൯ ഓം ഽ । ॥".split(/\s/),
|
139
|
+
other: " റ".split(/\s/),
|
141
140
|
},
|
142
141
|
|
143
142
|
# Oriya
|
@@ -145,13 +144,13 @@ module Sanscript
|
|
145
144
|
# Sanskrit-complete.
|
146
145
|
#
|
147
146
|
oriya: {
|
148
|
-
vowels: "ଅ ଆ ଇ ଈ ଉ ଊ ଋ ୠ ଌ ୡ ଏ ଐ ଓ ଔ".
|
149
|
-
vowel_marks: "ା ି ୀ ୁ ୂ ୃ ୄ ୢ ୣ େ ୈ ୋ ୌ".
|
150
|
-
other_marks: "ଂ ଃ ଁ".
|
147
|
+
vowels: "ଅ ଆ ଇ ଈ ଉ ଊ ଋ ୠ ଌ ୡ ଏ ଐ ଓ ଔ".split(/\s/),
|
148
|
+
vowel_marks: "ା ି ୀ ୁ ୂ ୃ ୄ ୢ ୣ େ ୈ ୋ ୌ".split(/\s/),
|
149
|
+
other_marks: "ଂ ଃ ଁ".split(/\s/),
|
151
150
|
virama: ["୍"],
|
152
|
-
consonants: "କ ଖ ଗ ଘ ଙ ଚ ଛ ଜ ଝ ଞ ଟ ଠ ଡ ଢ ଣ ତ ଥ ଦ ଧ ନ ପ ଫ ବ ଭ ମ ଯ ର ଲ ଵ ଶ ଷ ସ ହ ଳ କ୍ଷ ଜ୍ଞ".
|
153
|
-
symbols: "୦ ୧ ୨ ୩ ୪ ୫ ୬ ୭ ୮ ୯ ଓଂ ଽ । ॥".
|
154
|
-
other: " ଡ ଢ ଯ ".
|
151
|
+
consonants: "କ ଖ ଗ ଘ ଙ ଚ ଛ ଜ ଝ ଞ ଟ ଠ ଡ ଢ ଣ ତ ଥ ଦ ଧ ନ ପ ଫ ବ ଭ ମ ଯ ର ଲ ଵ ଶ ଷ ସ ହ ଳ କ୍ଷ ଜ୍ଞ".split(/\s/),
|
152
|
+
symbols: "୦ ୧ ୨ ୩ ୪ ୫ ୬ ୭ ୮ ୯ ଓଂ ଽ । ॥".split(/\s/),
|
153
|
+
other: " ଡ ଢ ଯ ".split(/\s/),
|
155
154
|
},
|
156
155
|
|
157
156
|
# Tamil
|
@@ -160,13 +159,13 @@ module Sanscript
|
|
160
159
|
# The most incomplete of the Sanskrit schemes here.
|
161
160
|
#
|
162
161
|
tamil: {
|
163
|
-
vowels: "அ ஆ இ ஈ உ ஊ எ ஏ ஐ ஒ ஓ ஔ".
|
164
|
-
vowel_marks: "ா ி ீ ு ூ ெ ே ை ொ ோ ௌ".
|
165
|
-
other_marks: "ஂ ஃ ".
|
162
|
+
vowels: "அ ஆ இ ஈ உ ஊ எ ஏ ஐ ஒ ஓ ஔ".split(/\s/),
|
163
|
+
vowel_marks: "ா ி ீ ு ூ ெ ே ை ொ ோ ௌ".split(/\s/),
|
164
|
+
other_marks: "ஂ ஃ ".split(/\s/),
|
166
165
|
virama: ["்"],
|
167
|
-
consonants: "க க க க ங ச ச ஜ ச ஞ ட ட ட ட ண த த த த ந ப ப ப ப ம ய ர ல வ ஶ ஷ ஸ ஹ ள க்ஷ ஜ்ஞ".
|
168
|
-
symbols: "௦ ௧ ௨ ௩ ௪ ௫ ௬ ௭ ௮ ௯ ௐ ऽ । ॥".
|
169
|
-
other: " ற".
|
166
|
+
consonants: "க க க க ங ச ச ஜ ச ஞ ட ட ட ட ண த த த த ந ப ப ப ப ம ய ர ல வ ஶ ஷ ஸ ஹ ள க்ஷ ஜ்ஞ".split(/\s/),
|
167
|
+
symbols: "௦ ௧ ௨ ௩ ௪ ௫ ௬ ௭ ௮ ௯ ௐ ऽ । ॥".split(/\s/),
|
168
|
+
other: " ற".split(/\s/),
|
170
169
|
},
|
171
170
|
|
172
171
|
# Telugu
|
@@ -174,13 +173,13 @@ module Sanscript
|
|
174
173
|
# Sanskrit-complete.
|
175
174
|
#
|
176
175
|
telugu: {
|
177
|
-
vowels: "అ ఆ ఇ ఈ ఉ ఊ ఋ ౠ ఌ ౡ ఎ ఏ ఐ ఒ ఓ ఔ".
|
178
|
-
vowel_marks: "ా ి ీ ు ూ ృ ౄ ౢ ౣ ె ే ై ొ ో ౌ".
|
179
|
-
other_marks: "ం ః ఁ".
|
176
|
+
vowels: "అ ఆ ఇ ఈ ఉ ఊ ఋ ౠ ఌ ౡ ఎ ఏ ఐ ఒ ఓ ఔ".split(/\s/),
|
177
|
+
vowel_marks: "ా ి ీ ు ూ ృ ౄ ౢ ౣ ె ే ై ొ ో ౌ".split(/\s/),
|
178
|
+
other_marks: "ం ః ఁ".split(/\s/),
|
180
179
|
virama: ["్"],
|
181
|
-
consonants: "క ఖ గ ఘ ఙ చ ఛ జ ఝ ఞ ట ఠ డ ఢ ణ త థ ద ధ న ప ఫ బ భ మ య ర ల వ శ ష స హ ళ క్ష జ్ఞ".
|
182
|
-
symbols: "౦ ౧ ౨ ౩ ౪ ౫ ౬ ౭ ౮ ౯ ఓం ఽ । ॥".
|
183
|
-
other: " ఱ".
|
180
|
+
consonants: "క ఖ గ ఘ ఙ చ ఛ జ ఝ ఞ ట ఠ డ ఢ ణ త థ ద ధ న ప ఫ బ భ మ య ర ల వ శ ష స హ ళ క్ష జ్ఞ".split(/\s/),
|
181
|
+
symbols: "౦ ౧ ౨ ౩ ౪ ౫ ౬ ౭ ౮ ౯ ఓం ఽ । ॥".split(/\s/),
|
182
|
+
other: " ఱ".split(/\s/),
|
184
183
|
},
|
185
184
|
|
186
185
|
# International Alphabet of Sanskrit Transliteration
|
@@ -188,11 +187,11 @@ module Sanscript
|
|
188
187
|
# The most "professional" Sanskrit romanization scheme.
|
189
188
|
#
|
190
189
|
iast: {
|
191
|
-
vowels: "a ā i ī u ū ṛ ṝ ḷ ḹ e ai o au".
|
190
|
+
vowels: "a ā i ī u ū ṛ ṝ ḷ ḹ e ai o au".split(/\s/),
|
192
191
|
other_marks: ["ṃ", "ḥ", "~"],
|
193
192
|
virama: [""],
|
194
|
-
consonants: "k kh g gh ṅ c ch j jh ñ ṭ ṭh ḍ ḍh ṇ t th d dh n p ph b bh m y r l v ś ṣ s h ḻ kṣ jñ".
|
195
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 oṃ ' | ||".
|
193
|
+
consonants: "k kh g gh ṅ c ch j jh ñ ṭ ṭh ḍ ḍh ṇ t th d dh n p ph b bh m y r l v ś ṣ s h ḻ kṣ jñ".split(/\s/),
|
194
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 oṃ ' | ||".split(/\s/),
|
196
195
|
},
|
197
196
|
|
198
197
|
# ITRANS
|
@@ -204,17 +203,17 @@ module Sanscript
|
|
204
203
|
# '_' is a "null" letter, which allows adjacent vowels.
|
205
204
|
#
|
206
205
|
itrans: {
|
207
|
-
vowels: "a A i I u U RRi RRI LLi LLI e ai o au".
|
206
|
+
vowels: "a A i I u U RRi RRI LLi LLI e ai o au".split(/\s/),
|
208
207
|
other_marks: ["M", "H", ".N"],
|
209
208
|
virama: [""],
|
210
|
-
consonants: "k kh g gh ~N ch Ch j jh ~n T Th D Dh N t th d dh n p ph b bh m y r l v sh Sh s h L kSh j~n".
|
211
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 OM .a | ||".
|
209
|
+
consonants: "k kh g gh ~N ch Ch j jh ~n T Th D Dh N t th d dh n p ph b bh m y r l v sh Sh s h L kSh j~n".split(/\s/),
|
210
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 OM .a | ||".split(/\s/),
|
212
211
|
candra: [".c"],
|
213
212
|
zwj: ["{}"],
|
214
213
|
skip: ["_"],
|
215
214
|
accent: ["\\'", "\\_"],
|
216
|
-
combo_accent: "\\'H \\_H \\'M \\_M".
|
217
|
-
other: "q K G z .D .Dh f Y R".
|
215
|
+
combo_accent: "\\'H \\_H \\'M \\_M".split(/\s/),
|
216
|
+
other: "q K G z .D .Dh f Y R".split(/\s/),
|
218
217
|
},
|
219
218
|
|
220
219
|
# Harvard-Kyoto
|
@@ -222,11 +221,11 @@ module Sanscript
|
|
222
221
|
# A simple 1:1 mapping.
|
223
222
|
#
|
224
223
|
hk: {
|
225
|
-
vowels: "a A i I u U R RR lR lRR e ai o au".
|
226
|
-
other_marks: "M H ~".
|
224
|
+
vowels: "a A i I u U R RR lR lRR e ai o au".split(/\s/),
|
225
|
+
other_marks: "M H ~".split(/\s/),
|
227
226
|
virama: [""],
|
228
|
-
consonants: "k kh g gh G c ch j jh J T Th D Dh N t th d dh n p ph b bh m y r l v z S s h L kS jJ".
|
229
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 OM ' | ||".
|
227
|
+
consonants: "k kh g gh G c ch j jh J T Th D Dh N t th d dh n p ph b bh m y r l v z S s h L kS jJ".split(/\s/),
|
228
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 OM ' | ||".split(/\s/),
|
230
229
|
},
|
231
230
|
|
232
231
|
# National Library at Kolkata
|
@@ -243,11 +242,11 @@ module Sanscript
|
|
243
242
|
# scheme in use today and is especially suited to computer processing.
|
244
243
|
#
|
245
244
|
slp1: {
|
246
|
-
vowels: "a A i I u U f F x X e E o O".
|
247
|
-
other_marks: "M H ~".
|
245
|
+
vowels: "a A i I u U f F x X e E o O".split(/\s/),
|
246
|
+
other_marks: "M H ~".split(/\s/),
|
248
247
|
virama: [""],
|
249
|
-
consonants: "k K g G N c C j J Y w W q Q R t T d D n p P b B m y r l v S z s h L kz jY".
|
250
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".
|
248
|
+
consonants: "k K g G N c C j J Y w W q Q R t T d D n p P b B m y r l v S z s h L kz jY".split(/\s/),
|
249
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".split(/\s/),
|
251
250
|
},
|
252
251
|
|
253
252
|
# Velthuis
|
@@ -255,11 +254,11 @@ module Sanscript
|
|
255
254
|
# A case-insensitive Sanskrit encoding.
|
256
255
|
#
|
257
256
|
velthuis: {
|
258
|
-
vowels: "a aa i ii u uu .r .rr .li .ll e ai o au".
|
259
|
-
other_marks: ".m .h ".
|
257
|
+
vowels: "a aa i ii u uu .r .rr .li .ll e ai o au".split(/\s/),
|
258
|
+
other_marks: ".m .h ".split(/\s/),
|
260
259
|
virama: [""],
|
261
|
-
consonants: 'k kh g gh "n c ch j jh ~n .t .th .d .dh .n t th d dh n p ph b bh m y r l v ~s .s s h L k.s j~n'.
|
262
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 o.m ' | ||".
|
260
|
+
consonants: 'k kh g gh "n c ch j jh ~n .t .th .d .dh .n t th d dh n p ph b bh m y r l v ~s .s s h L k.s j~n'.split(/\s/),
|
261
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 o.m ' | ||".split(/\s/),
|
263
262
|
},
|
264
263
|
|
265
264
|
# WX
|
@@ -267,11 +266,11 @@ module Sanscript
|
|
267
266
|
# As terse as SLP1.
|
268
267
|
#
|
269
268
|
wx: {
|
270
|
-
vowels: "a A i I u U q Q L e E o O".
|
271
|
-
other_marks: "M H z".
|
269
|
+
vowels: "a A i I u U q Q L e E o O".split(/\s/),
|
270
|
+
other_marks: "M H z".split(/\s/),
|
272
271
|
virama: [""],
|
273
|
-
consonants: "k K g G f c C j J F t T d D N w W x X n p P b B m y r l v S R s h kR jF".
|
274
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".
|
272
|
+
consonants: "k K g G f c C j J F t T d D N w W x X n p P b B m y r l v S R s h kR jF".split(/\s/),
|
273
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".split(/\s/),
|
275
274
|
},
|
276
275
|
}
|
277
276
|
|
@@ -307,7 +306,7 @@ module Sanscript
|
|
307
306
|
"\\_" => ["\\`"],
|
308
307
|
"\\_H" => ["\\`H"],
|
309
308
|
"\\'M" => ["\\'.m", "\\'.n"],
|
310
|
-
"\\_M" => "\\_.m \\_.n \\`M \\`.m \\`.n".
|
309
|
+
"\\_M" => "\\_.m \\_.n \\`M \\`.m \\`.n".split(/\s/),
|
311
310
|
".a" => ["~"],
|
312
311
|
"|" => ["."],
|
313
312
|
"||" => [".."],
|
data/lib/sanscript/version.rb
CHANGED
data/sanscript.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanscript
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Bellefleur
|
@@ -109,19 +109,19 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0.9'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: ragabash
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '0.
|
117
|
+
version: '0.1'
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '0.
|
124
|
+
version: '0.1'
|
125
125
|
description:
|
126
126
|
email:
|
127
127
|
- nomoon@phoebus.ca
|
@@ -146,7 +146,7 @@ files:
|
|
146
146
|
- lib/sanscript/detect.rb
|
147
147
|
- lib/sanscript/detect/ruby24.rb
|
148
148
|
- lib/sanscript/detect/ruby2x.rb
|
149
|
-
- lib/sanscript/
|
149
|
+
- lib/sanscript/exceptions.rb
|
150
150
|
- lib/sanscript/transliterate.rb
|
151
151
|
- lib/sanscript/transliterate/schemes.rb
|
152
152
|
- lib/sanscript/version.rb
|
@@ -1,95 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
require "ice_nine"
|
3
|
-
|
4
|
-
module Sanscript
|
5
|
-
# A set of helpful refinements for duplication and deep freezing.
|
6
|
-
module Refinements
|
7
|
-
refine Object do
|
8
|
-
def deep_dup
|
9
|
-
dup
|
10
|
-
rescue TypeError
|
11
|
-
self
|
12
|
-
end
|
13
|
-
|
14
|
-
def deep_freeze
|
15
|
-
IceNine.deep_freeze(self)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
refine NilClass do
|
20
|
-
def deep_dup
|
21
|
-
self
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
refine FalseClass do
|
26
|
-
def deep_dup
|
27
|
-
self
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
refine TrueClass do
|
32
|
-
def deep_dup
|
33
|
-
self
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
refine Symbol do
|
38
|
-
def deep_dup
|
39
|
-
self
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
refine Numeric do
|
44
|
-
def deep_dup
|
45
|
-
self
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
# Necessary to re-override Numeric
|
50
|
-
require "bigdecimal"
|
51
|
-
refine BigDecimal do
|
52
|
-
def deep_dup
|
53
|
-
dup
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
refine String do
|
58
|
-
def w_split
|
59
|
-
split(/\s/)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
refine Array do
|
64
|
-
def deep_dup
|
65
|
-
map { |value| value.deep_dup } # rubocop:disable Style/SymbolProc
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
refine Hash do
|
70
|
-
def deep_dup
|
71
|
-
hash = dup
|
72
|
-
each_pair do |key, value|
|
73
|
-
if ::String === key # rubocop:disable Style/CaseEquality
|
74
|
-
hash[key] = value.deep_dup
|
75
|
-
else
|
76
|
-
hash.delete(key)
|
77
|
-
hash[key.deep_dup] = value.deep_dup
|
78
|
-
end
|
79
|
-
end
|
80
|
-
hash
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
refine Set do
|
85
|
-
def deep_dup
|
86
|
-
set_a = to_a
|
87
|
-
set_a.map! do |val|
|
88
|
-
next val if ::String === val # rubocop:disable Style/CaseEquality
|
89
|
-
val.deep_dup
|
90
|
-
end
|
91
|
-
self.class[set_a]
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|