sanscript 0.4.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/lib/sanscript.rb +18 -3
- data/lib/sanscript/benchmark.rb +43 -15
- data/lib/sanscript/detect.rb +2 -0
- data/lib/sanscript/exceptions.rb +19 -0
- data/lib/sanscript/transliterate.rb +41 -40
- data/lib/sanscript/transliterate/schemes.rb +80 -81
- data/lib/sanscript/version.rb +1 -1
- data/sanscript.gemspec +1 -1
- metadata +5 -5
- data/lib/sanscript/refinements.rb +0 -95
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 348f8d72cc3d76ba760a4225a4f784324294474a
|
4
|
+
data.tar.gz: f3a1215ad14dc3778795dc0f6563345aaa3015fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c8eae2315a8d3a68ce1a873585ab752902287a027a6d45961fef77ad0174d3646d7c7c36107a5879b5b8b36c10afef93428b34098b2ae86dfb6026bb4d644e94
|
7
|
+
data.tar.gz: 1997c7bb6d11f4b139eb5cde903bc17bffb786bcad236351100e55248d57945fbd19e7cf83f409af357c4c36d1dfb43fbb5adc5a0c2997e66622ed1576bb1921
|
data/README.md
CHANGED
@@ -25,7 +25,9 @@ Or install it yourself as:
|
|
25
25
|
|
26
26
|
## Usage
|
27
27
|
|
28
|
-
You can access detection through `Sanscript.detect(text)` and transliteration through `Sanscript.transliterate(text, from, to)`.
|
28
|
+
You can access detection through `Sanscript.detect(text)` and transliteration through `Sanscript.transliterate(text, from, to)`.
|
29
|
+
|
30
|
+
Documentation is provided in YARD format and available online at [rubydoc.info](http://www.rubydoc.info/github/ubcsanskrit/sanscript.rb).
|
29
31
|
|
30
32
|
## Development
|
31
33
|
|
data/lib/sanscript.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require "ragabash"
|
3
|
+
|
2
4
|
require "sanscript/version"
|
5
|
+
require "sanscript/exceptions"
|
3
6
|
require "sanscript/detect"
|
4
7
|
require "sanscript/transliterate"
|
5
8
|
require "sanscript/benchmark"
|
@@ -21,23 +24,35 @@ module Sanscript
|
|
21
24
|
#
|
22
25
|
# @overload transliterate(text, from, to, **opts)
|
23
26
|
# @param text [String] the String to transliterate
|
24
|
-
# @param from [Symbol] the name of the scheme to transliterate from
|
27
|
+
# @param from [Symbol, nil] the name of the scheme to transliterate from, or Nil to detect
|
25
28
|
# @param to [Symbol] the name of the scheme to transliterate to
|
29
|
+
# @option opts [Symbol] :default_scheme a default scheme to fall-back to if detection fails
|
26
30
|
# @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
|
27
31
|
# @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
|
28
32
|
# @return [String] the transliterated String
|
29
33
|
#
|
34
|
+
# @raise [DetectionError] if scheme detection and fallback fail
|
35
|
+
# @raise [SchemeNotSupportedError] if a provided transliteration scheme is not supported
|
36
|
+
#
|
30
37
|
# @overload transliterate(text, to, **opts)
|
31
38
|
# @param text [String] the String to transliterate
|
32
39
|
# @param to [Symbol] the name of the scheme to transliterate to
|
33
40
|
# @option opts [Symbol] :default_scheme a default scheme to fall-back to if detection fails
|
34
41
|
# @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
|
35
42
|
# @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
|
36
|
-
# @return [String
|
43
|
+
# @return [String] the transliterated String
|
44
|
+
#
|
45
|
+
# @raise [DetectionError] if scheme detection and fallback fail
|
46
|
+
# @raise [SchemeNotSupportedError] if a provided transliteration scheme is not supported
|
47
|
+
#
|
37
48
|
def transliterate(text, from, to = nil, **opts)
|
38
49
|
if to.nil?
|
39
50
|
to = from
|
40
|
-
from =
|
51
|
+
from = nil
|
52
|
+
end
|
53
|
+
if from.nil?
|
54
|
+
from = Detect.detect_scheme(text) || opts[:default_scheme] ||
|
55
|
+
raise(DetectionError, "String detection and fallback failed.")
|
41
56
|
end
|
42
57
|
Transliterate.transliterate(text, from, to, opts)
|
43
58
|
end
|
data/lib/sanscript/benchmark.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
#:nocov:
|
2
3
|
|
3
|
-
require "sanscript/refinements"
|
4
4
|
begin
|
5
5
|
require "benchmark/ips"
|
6
6
|
rescue LoadError
|
@@ -12,18 +12,17 @@ rescue LoadError
|
|
12
12
|
end
|
13
13
|
|
14
14
|
module Sanscript
|
15
|
-
using Refinements
|
16
15
|
# Benchmark/testing module.
|
17
16
|
module Benchmark
|
18
17
|
module_function
|
19
18
|
|
20
19
|
# Runs benchmark-ips test on detection methods.
|
21
20
|
def detect!
|
22
|
-
iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
|
23
21
|
deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
|
22
|
+
malayalam_string = "നാനാശാസ്ത്രസുഭാഷിതാമൃതരസൈഃ ശ്രോത്രോത്സവം കുര്വതാം യേഷാം യാന്തി ദിനാനി പണ്ഡിതജനവ്യായാമഖിന്നാത്മനാമ് തേഷാം ജന്മ ച ജീവിതം ച സുകൃതം തൈര് ഏവ ഭൂര് ഭൂഷിതാ ശേഷൈഹ് കിം പശുവദ് വിവേകരഹിതൈര് ഭൂഭാരഭൂതൈര് നരഃ"
|
23
|
+
iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
|
24
24
|
slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
|
25
25
|
hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
|
26
|
-
malayalam_string = "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ"
|
27
26
|
|
28
27
|
::Benchmark.ips do |x|
|
29
28
|
x.config(time: 5, warmup: 1)
|
@@ -47,32 +46,61 @@ module Sanscript
|
|
47
46
|
true
|
48
47
|
end
|
49
48
|
|
50
|
-
# Runs benchmark-ips test on transliteration methods.
|
51
|
-
def
|
49
|
+
# Runs benchmark-ips test on roman-source transliteration methods.
|
50
|
+
def transliterate_roman!
|
52
51
|
iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
|
53
|
-
deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
|
54
52
|
slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
|
53
|
+
hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
|
55
54
|
|
56
55
|
::Benchmark.ips do |x|
|
57
|
-
x.config(time:
|
56
|
+
x.config(time: 3, warmup: 2)
|
58
57
|
|
59
58
|
x.report("IAST==>Devanagari") do
|
60
|
-
|
59
|
+
Sanscript.transliterate(iast_string, :iast, :devanagari)
|
60
|
+
end
|
61
|
+
x.report("IAST==>SLP1") do
|
62
|
+
Sanscript.transliterate(iast_string, :iast, :slp1)
|
61
63
|
end
|
62
64
|
x.report("IAST==>SLP1") do
|
63
|
-
|
65
|
+
Sanscript.transliterate(iast_string, :iast, :hk)
|
64
66
|
end
|
65
67
|
x.report("SLP1==>Devanagari") do
|
66
|
-
|
68
|
+
Sanscript.transliterate(slp1_string, :slp1, :devanagari)
|
67
69
|
end
|
68
70
|
x.report("SLP1==>IAST") do
|
69
|
-
|
71
|
+
Sanscript.transliterate(slp1_string, :slp1, :iast)
|
70
72
|
end
|
71
|
-
x.report("
|
72
|
-
|
73
|
+
x.report("SLP1==>HK") do
|
74
|
+
Sanscript.transliterate(slp1_string, :slp1, :hk)
|
75
|
+
end
|
76
|
+
x.report("HK==>Devanagari") do
|
77
|
+
Sanscript.transliterate(hk_string, :hk, :devanagari)
|
73
78
|
end
|
79
|
+
x.report("HK==>IAST") do
|
80
|
+
Sanscript.transliterate(hk_string, :hk, :iast)
|
81
|
+
end
|
82
|
+
x.report("HK==>SLP1") do
|
83
|
+
Sanscript.transliterate(hk_string, :hk, :slp1)
|
84
|
+
end
|
85
|
+
x.compare!
|
86
|
+
end
|
87
|
+
true
|
88
|
+
end
|
89
|
+
|
90
|
+
# Runs benchmark-ips test on brahmic-source transliteration methods.
|
91
|
+
def transliterate_brahmic!
|
92
|
+
deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
|
93
|
+
|
94
|
+
::Benchmark.ips do |x|
|
95
|
+
x.config(time: 5, warmup: 2)
|
74
96
|
x.report("Devanagari==>IAST") do
|
75
|
-
|
97
|
+
Sanscript.transliterate(deva_string, :devanagari, :iast)
|
98
|
+
end
|
99
|
+
x.report("Devanagari==>SLP1") do
|
100
|
+
Sanscript.transliterate(deva_string, :devanagari, :slp1)
|
101
|
+
end
|
102
|
+
x.report("Devanagari==>HK") do
|
103
|
+
Sanscript.transliterate(deva_string, :devanagari, :hk)
|
76
104
|
end
|
77
105
|
x.compare!
|
78
106
|
end
|
data/lib/sanscript/detect.rb
CHANGED
@@ -61,6 +61,7 @@ module Sanscript
|
|
61
61
|
# @return [Symbol, nil] the Symbol of the scheme, or nil if no match
|
62
62
|
|
63
63
|
# @!visibility private
|
64
|
+
# :nocov:
|
64
65
|
if Regexp.method_defined?(:match?)
|
65
66
|
require "sanscript/detect/ruby24"
|
66
67
|
extend Ruby24
|
@@ -68,5 +69,6 @@ module Sanscript
|
|
68
69
|
require "sanscript/detect/ruby2x"
|
69
70
|
extend Ruby2x
|
70
71
|
end
|
72
|
+
# :nocov:
|
71
73
|
end
|
72
74
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sanscript
|
4
|
+
using ::Ragabash::Refinements
|
5
|
+
# Error for when transliteration scheme is not supported.
|
6
|
+
class SchemeNotSupportedError < StandardError
|
7
|
+
def initialize(scheme = :unknown)
|
8
|
+
super(":#{scheme} is not supported.")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
# Error for when scheme detection should non-silently fail
|
13
|
+
# (such as inside a transliteration method).
|
14
|
+
class DetectionError < StandardError
|
15
|
+
def initialize(message = "String detection failed.")
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -1,9 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "sanscript/refinements"
|
4
3
|
require "sanscript/transliterate/schemes"
|
5
4
|
module Sanscript
|
6
|
-
using Refinements
|
5
|
+
using ::Ragabash::Refinements
|
7
6
|
# Sanskrit transliteration module.
|
8
7
|
# Derived from Sanscript (https://github.com/sanskrit/sanscript.js), which is
|
9
8
|
# released under the MIT and GPL Licenses.
|
@@ -144,8 +143,8 @@ module Sanscript
|
|
144
143
|
from = from.to_sym
|
145
144
|
to = to.to_sym
|
146
145
|
return data if from == to
|
147
|
-
raise
|
148
|
-
raise
|
146
|
+
raise SchemeNotSupportedError, from unless @schemes.key?(from)
|
147
|
+
raise SchemeNotSupportedError, to unless @schemes.key?(to)
|
149
148
|
|
150
149
|
data = data.to_str.dup
|
151
150
|
options = @defaults.merge(opts)
|
@@ -229,39 +228,40 @@ module Sanscript
|
|
229
228
|
# @param map [Hash] map data generated from {#make_map}
|
230
229
|
# @return [String] the transliterated string
|
231
230
|
def transliterate_roman(data, map, options = {})
|
232
|
-
data = data.to_str.
|
231
|
+
data = data.to_str.chars
|
233
232
|
buf = []
|
234
|
-
token_buffer =
|
233
|
+
token_buffer = []
|
235
234
|
had_consonant = false
|
236
235
|
transliteration_enabled = true
|
237
236
|
control_char = false
|
237
|
+
max_token_length = map[:max_token_length]
|
238
238
|
|
239
239
|
until data.empty? && token_buffer.empty?
|
240
|
-
token_buffer << data.slice!(0, map[:max_token_length] - token_buffer.length)
|
241
|
-
|
242
240
|
# Match all token substrings to our map.
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
if
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
transliteration_enabled
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
241
|
+
token = data[0, max_token_length].join("")
|
242
|
+
max_token_length.downto(1) do |j|
|
243
|
+
token = token[0, j] unless j == max_token_length
|
244
|
+
if j == 2
|
245
|
+
if !control_char && token == "##"
|
246
|
+
transliteration_enabled = !transliteration_enabled
|
247
|
+
data.shift(2)
|
248
|
+
break
|
249
|
+
elsif control_char && token == "#}"
|
250
|
+
transliteration_enabled = true
|
251
|
+
control_char = false
|
252
|
+
buf << token
|
253
|
+
data.shift(2)
|
254
|
+
break
|
255
|
+
elsif transliteration_enabled && token == "{#"
|
256
|
+
transliteration_enabled = false
|
257
|
+
control_char = true
|
258
|
+
buf << token
|
259
|
+
data.shift(2)
|
260
|
+
break
|
261
|
+
end
|
262
262
|
end
|
263
|
-
|
264
|
-
if
|
263
|
+
|
264
|
+
if transliteration_enabled && (temp_letter = map[:letters][token])
|
265
265
|
if map[:to_roman?]
|
266
266
|
buf << temp_letter
|
267
267
|
else
|
@@ -269,18 +269,19 @@ module Sanscript
|
|
269
269
|
# vowels to appear as marks if we've just seen a
|
270
270
|
# consonant.
|
271
271
|
if had_consonant
|
272
|
-
|
273
|
-
if
|
272
|
+
# rubocop:disable Metrics/BlockNesting
|
273
|
+
if (temp_mark = map[:marks][token])
|
274
274
|
buf << temp_mark
|
275
275
|
elsif token != "a"
|
276
|
-
buf
|
276
|
+
buf.push(map[:virama], temp_letter)
|
277
277
|
end
|
278
|
+
# rubocop:enable Metrics/BlockNesting
|
278
279
|
else
|
279
280
|
buf << temp_letter
|
280
281
|
end
|
281
282
|
had_consonant = map[:consonants].key?(token)
|
282
283
|
end
|
283
|
-
|
284
|
+
j > 1 ? data.shift(j) : data.shift
|
284
285
|
break
|
285
286
|
elsif j == 1 # Last iteration
|
286
287
|
if had_consonant
|
@@ -288,7 +289,7 @@ module Sanscript
|
|
288
289
|
buf << map[:virama] unless options[:syncope]
|
289
290
|
end
|
290
291
|
buf << token
|
291
|
-
|
292
|
+
data.shift
|
292
293
|
end
|
293
294
|
end
|
294
295
|
end
|
@@ -302,27 +303,27 @@ module Sanscript
|
|
302
303
|
# @param map [Hash] map data generated from {#make_map}
|
303
304
|
# @return [String] the transliterated string
|
304
305
|
def transliterate_brahmic(data, map)
|
305
|
-
data = data.to_str.
|
306
|
+
data = data.to_str.chars
|
306
307
|
buf = []
|
307
308
|
had_roman_consonant = false
|
308
309
|
transliteration_enabled = true
|
309
310
|
control_char = false
|
310
311
|
|
311
312
|
until data.empty?
|
312
|
-
token = data
|
313
|
+
token = data[0, 2].join("")
|
313
314
|
if !control_char && token == "##"
|
314
315
|
if had_roman_consonant
|
315
316
|
buf << "a" if transliteration_enabled
|
316
317
|
had_roman_consonant = false
|
317
318
|
end
|
318
319
|
transliteration_enabled = !transliteration_enabled
|
319
|
-
data.
|
320
|
+
data.shift(2)
|
320
321
|
next
|
321
322
|
elsif control_char && token == "#}"
|
322
323
|
transliteration_enabled = true
|
323
324
|
control_char = false
|
324
325
|
buf << token
|
325
|
-
data.
|
326
|
+
data.shift(2)
|
326
327
|
next
|
327
328
|
elsif transliteration_enabled && token == "{#"
|
328
329
|
if had_roman_consonant
|
@@ -332,11 +333,11 @@ module Sanscript
|
|
332
333
|
transliteration_enabled = false
|
333
334
|
control_char = true
|
334
335
|
buf << token
|
335
|
-
data.
|
336
|
+
data.shift(2)
|
336
337
|
next
|
337
338
|
end
|
338
339
|
|
339
|
-
l = data.
|
340
|
+
l = data.shift
|
340
341
|
unless transliteration_enabled
|
341
342
|
buf << l
|
342
343
|
next
|
@@ -1,8 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require "sanscript/refinements"
|
3
2
|
|
4
3
|
module Sanscript
|
5
|
-
using Refinements
|
4
|
+
using ::Ragabash::Refinements
|
6
5
|
module Transliterate
|
7
6
|
# Schemes
|
8
7
|
# =======
|
@@ -25,13 +24,13 @@ module Sanscript
|
|
25
24
|
# 'va' and 'ba' are both rendered as ব.
|
26
25
|
#
|
27
26
|
bengali: {
|
28
|
-
vowels: "অ আ ই ঈ উ ঊ ঋ ৠ ঌ ৡ এ ঐ ও ঔ".
|
29
|
-
vowel_marks: "া ি ী ু ূ ৃ ৄ ৢ ৣ ে ৈ ো ৌ".
|
30
|
-
other_marks: "ং ঃ ঁ".
|
27
|
+
vowels: "অ আ ই ঈ উ ঊ ঋ ৠ ঌ ৡ এ ঐ ও ঔ".split(/\s/),
|
28
|
+
vowel_marks: "া ি ী ু ূ ৃ ৄ ৢ ৣ ে ৈ ো ৌ".split(/\s/),
|
29
|
+
other_marks: "ং ঃ ঁ".split(/\s/),
|
31
30
|
virama: ["্"],
|
32
|
-
consonants: "ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড ঢ ণ ত থ দ ধ ন প ফ ব ভ ম য র ল ব শ ষ স হ ळ ক্ষ জ্ঞ".
|
33
|
-
symbols: "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ॐ ঽ । ॥".
|
34
|
-
other: " ড ঢ য ".
|
31
|
+
consonants: "ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড ঢ ণ ত থ দ ধ ন প ফ ব ভ ম য র ল ব শ ষ স হ ळ ক্ষ জ্ঞ".split(/\s/),
|
32
|
+
symbols: "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ॐ ঽ । ॥".split(/\s/),
|
33
|
+
other: " ড ঢ য ".split(/\s/),
|
35
34
|
},
|
36
35
|
|
37
36
|
# Devanagari
|
@@ -41,15 +40,15 @@ module Sanscript
|
|
41
40
|
devanagari: {
|
42
41
|
# "Independent" forms of the vowels. These are used whenever the
|
43
42
|
# vowel does not immediately follow a consonant.
|
44
|
-
vowels: "अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ऎ ए ऐ ऒ ओ औ".
|
43
|
+
vowels: "अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ऎ ए ऐ ऒ ओ औ".split(/\s/),
|
45
44
|
|
46
45
|
# "Dependent" forms of the vowels. These are used whenever the
|
47
46
|
# vowel immediately follows a consonant. If a letter is not
|
48
47
|
# listed in `vowels`, it should not be listed here.
|
49
|
-
vowel_marks: "ा ि ी ु ू ृ ॄ ॢ ॣ ॆ े ै ॊ ो ौ".
|
48
|
+
vowel_marks: "ा ि ी ु ू ृ ॄ ॢ ॣ ॆ े ै ॊ ो ौ".split(/\s/),
|
50
49
|
|
51
50
|
# Miscellaneous marks, all of which are used in Sanskrit.
|
52
|
-
other_marks: "ं ः ँ".
|
51
|
+
other_marks: "ं ः ँ".split(/\s/),
|
53
52
|
|
54
53
|
# In syllabic scripts like Devanagari, consonants have an inherent
|
55
54
|
# vowel that must be suppressed explicitly. We do so by putting a
|
@@ -58,10 +57,10 @@ module Sanscript
|
|
58
57
|
|
59
58
|
# Various Sanskrit consonants and consonant clusters. Every token
|
60
59
|
# here has an explicit vowel. Thus "क" is "ka" instead of "k".
|
61
|
-
consonants: "क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह ळ क्ष ज्ञ".
|
60
|
+
consonants: "क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह ळ क्ष ज्ञ".split(/\s/),
|
62
61
|
|
63
62
|
# Numbers and punctuation
|
64
|
-
symbols: "० १ २ ३ ४ ५ ६ ७ ८ ९ ॐ ऽ । ॥".
|
63
|
+
symbols: "० १ २ ३ ४ ५ ६ ७ ८ ९ ॐ ऽ । ॥".split(/\s/),
|
65
64
|
|
66
65
|
# Zero-width joiner. This is used to separate a consonant cluster
|
67
66
|
# and avoid a complex ligature.
|
@@ -76,12 +75,12 @@ module Sanscript
|
|
76
75
|
|
77
76
|
# Accent combined with anusvara and and visarga. For compatibility
|
78
77
|
# with ITRANS, which allows the reverse of these four.
|
79
|
-
combo_accent: "ः॑ ः॒ ं॑ ं॒".
|
78
|
+
combo_accent: "ः॑ ः॒ ं॑ ं॒".split(/\s/),
|
80
79
|
|
81
80
|
candra: ["ॅ"],
|
82
81
|
|
83
82
|
# Non-Sanskrit consonants
|
84
|
-
other: "क़ ख़ ग़ ज़ ड़ ढ़ फ़ य़ ऱ".
|
83
|
+
other: "क़ ख़ ग़ ज़ ड़ ढ़ फ़ य़ ऱ".split(/\s/),
|
85
84
|
},
|
86
85
|
|
87
86
|
# Gujarati
|
@@ -89,12 +88,12 @@ module Sanscript
|
|
89
88
|
# Sanskrit-complete.
|
90
89
|
#
|
91
90
|
gujarati: {
|
92
|
-
vowels: "અ આ ઇ ઈ ઉ ઊ ઋ ૠ ઌ ૡ એ ઐ ઓ ઔ".
|
93
|
-
vowel_marks: "ા િ ી ુ ૂ ૃ ૄ ૢ ૣ ે ૈ ો ૌ".
|
94
|
-
other_marks: "ં ઃ ઁ".
|
91
|
+
vowels: "અ આ ઇ ઈ ઉ ઊ ઋ ૠ ઌ ૡ એ ઐ ઓ ઔ".split(/\s/),
|
92
|
+
vowel_marks: "ા િ ી ુ ૂ ૃ ૄ ૢ ૣ ે ૈ ો ૌ".split(/\s/),
|
93
|
+
other_marks: "ં ઃ ઁ".split(/\s/),
|
95
94
|
virama: ["્"],
|
96
|
-
consonants: "ક ખ ગ ઘ ઙ ચ છ જ ઝ ઞ ટ ઠ ડ ઢ ણ ત થ દ ધ ન પ ફ બ ભ મ ય ર લ વ શ ષ સ હ ળ ક્ષ જ્ઞ".
|
97
|
-
symbols: "૦ ૧ ૨ ૩ ૪ ૫ ૬ ૭ ૮ ૯ ૐ ઽ ".
|
95
|
+
consonants: "ક ખ ગ ઘ ઙ ચ છ જ ઝ ઞ ટ ઠ ડ ઢ ણ ત થ દ ધ ન પ ફ બ ભ મ ય ર લ વ શ ષ સ હ ળ ક્ષ જ્ઞ".split(/\s/),
|
96
|
+
symbols: "૦ ૧ ૨ ૩ ૪ ૫ ૬ ૭ ૮ ૯ ૐ ઽ ".split(/\s/),
|
98
97
|
candra: ["ૅ"],
|
99
98
|
},
|
100
99
|
|
@@ -103,13 +102,13 @@ module Sanscript
|
|
103
102
|
# Missing R/RR/lR/lRR
|
104
103
|
#
|
105
104
|
gurmukhi: {
|
106
|
-
vowels: "ਅ ਆ ਇ ਈ ਉ ਊ ਏ ਐ ਓ ਔ".
|
107
|
-
vowel_marks: "ਾ ਿ ੀ ੁ ੂ ੇ ੈ ੋ ੌ".
|
108
|
-
other_marks: "ਂ ਃ ਁ".
|
105
|
+
vowels: "ਅ ਆ ਇ ਈ ਉ ਊ ਏ ਐ ਓ ਔ".split(/\s/),
|
106
|
+
vowel_marks: "ਾ ਿ ੀ ੁ ੂ ੇ ੈ ੋ ੌ".split(/\s/),
|
107
|
+
other_marks: "ਂ ਃ ਁ".split(/\s/),
|
109
108
|
virama: ["੍"],
|
110
|
-
consonants: "ਕ ਖ ਗ ਘ ਙ ਚ ਛ ਜ ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ ਬ ਭ ਮ ਯ ਰ ਲ ਵ ਸ਼ ਸ਼ ਸ ਹ ਲ਼ ਕ੍ਸ਼ ਜ੍ਞ".
|
111
|
-
symbols: "੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯ ॐ ऽ । ॥".
|
112
|
-
other: " ਖ ਗ ਜ ਡ ਫ ".
|
109
|
+
consonants: "ਕ ਖ ਗ ਘ ਙ ਚ ਛ ਜ ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ ਬ ਭ ਮ ਯ ਰ ਲ ਵ ਸ਼ ਸ਼ ਸ ਹ ਲ਼ ਕ੍ਸ਼ ਜ੍ਞ".split(/\s/),
|
110
|
+
symbols: "੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯ ॐ ऽ । ॥".split(/\s/),
|
111
|
+
other: " ਖ ਗ ਜ ਡ ਫ ".split(/\s/),
|
113
112
|
},
|
114
113
|
|
115
114
|
# Kannada
|
@@ -117,13 +116,13 @@ module Sanscript
|
|
117
116
|
# Sanskrit-complete.
|
118
117
|
#
|
119
118
|
kannada: {
|
120
|
-
vowels: "ಅ ಆ ಇ ಈ ಉ ಊ ಋ ೠ ಌ ೡ ಎ ಏ ಐ ಒ ಓ ಔ".
|
121
|
-
vowel_marks: "ಾ ಿ ೀ ು ೂ ೃ ೄ ೢ ೣ ೆ ೇ ೈ ೊ ೋ ೌ".
|
122
|
-
other_marks: "ಂ ಃ ँ".
|
119
|
+
vowels: "ಅ ಆ ಇ ಈ ಉ ಊ ಋ ೠ ಌ ೡ ಎ ಏ ಐ ಒ ಓ ಔ".split(/\s/),
|
120
|
+
vowel_marks: "ಾ ಿ ೀ ು ೂ ೃ ೄ ೢ ೣ ೆ ೇ ೈ ೊ ೋ ೌ".split(/\s/),
|
121
|
+
other_marks: "ಂ ಃ ँ".split(/\s/),
|
123
122
|
virama: ["್"],
|
124
|
-
consonants: "ಕ ಖ ಗ ಘ ಙ ಚ ಛ ಜ ಝ ಞ ಟ ಠ ಡ ಢ ಣ ತ ಥ ದ ಧ ನ ಪ ಫ ಬ ಭ ಮ ಯ ರ ಲ ವ ಶ ಷ ಸ ಹ ಳ ಕ್ಷ ಜ್ಞ".
|
125
|
-
symbols: "೦ ೧ ೨ ೩ ೪ ೫ ೬ ೭ ೮ ೯ ಓಂ ಽ । ॥".
|
126
|
-
other: " ಫ ಱ".
|
123
|
+
consonants: "ಕ ಖ ಗ ಘ ಙ ಚ ಛ ಜ ಝ ಞ ಟ ಠ ಡ ಢ ಣ ತ ಥ ದ ಧ ನ ಪ ಫ ಬ ಭ ಮ ಯ ರ ಲ ವ ಶ ಷ ಸ ಹ ಳ ಕ್ಷ ಜ್ಞ".split(/\s/),
|
124
|
+
symbols: "೦ ೧ ೨ ೩ ೪ ೫ ೬ ೭ ೮ ೯ ಓಂ ಽ । ॥".split(/\s/),
|
125
|
+
other: " ಫ ಱ".split(/\s/),
|
127
126
|
},
|
128
127
|
|
129
128
|
# Malayalam
|
@@ -131,13 +130,13 @@ module Sanscript
|
|
131
130
|
# Sanskrit-complete.
|
132
131
|
#
|
133
132
|
malayalam: {
|
134
|
-
vowels: "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ".
|
135
|
-
vowel_marks: "ാ ി ീ ു ൂ ൃ ൄ ൢ ൣ െ േ ൈ ൊ ോ ൌ".
|
136
|
-
other_marks: "ം ഃ ँ".
|
133
|
+
vowels: "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ".split(/\s/),
|
134
|
+
vowel_marks: "ാ ി ീ ു ൂ ൃ ൄ ൢ ൣ െ േ ൈ ൊ ോ ൌ".split(/\s/),
|
135
|
+
other_marks: "ം ഃ ँ".split(/\s/),
|
137
136
|
virama: ["്"],
|
138
|
-
consonants: "ക ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഡ ഢ ണ ത ഥ ദ ധ ന പ ഫ ബ ഭ മ യ ര ല വ ശ ഷ സ ഹ ള ക്ഷ ജ്ഞ".
|
139
|
-
symbols: "൦ ൧ ൨ ൩ ൪ ൫ ൬ ൭ ൮ ൯ ഓം ഽ । ॥".
|
140
|
-
other: " റ".
|
137
|
+
consonants: "ക ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഡ ഢ ണ ത ഥ ദ ധ ന പ ഫ ബ ഭ മ യ ര ല വ ശ ഷ സ ഹ ള ക്ഷ ജ്ഞ".split(/\s/),
|
138
|
+
symbols: "൦ ൧ ൨ ൩ ൪ ൫ ൬ ൭ ൮ ൯ ഓം ഽ । ॥".split(/\s/),
|
139
|
+
other: " റ".split(/\s/),
|
141
140
|
},
|
142
141
|
|
143
142
|
# Oriya
|
@@ -145,13 +144,13 @@ module Sanscript
|
|
145
144
|
# Sanskrit-complete.
|
146
145
|
#
|
147
146
|
oriya: {
|
148
|
-
vowels: "ଅ ଆ ଇ ଈ ଉ ଊ ଋ ୠ ଌ ୡ ଏ ଐ ଓ ଔ".
|
149
|
-
vowel_marks: "ା ି ୀ ୁ ୂ ୃ ୄ ୢ ୣ େ ୈ ୋ ୌ".
|
150
|
-
other_marks: "ଂ ଃ ଁ".
|
147
|
+
vowels: "ଅ ଆ ଇ ଈ ଉ ଊ ଋ ୠ ଌ ୡ ଏ ଐ ଓ ଔ".split(/\s/),
|
148
|
+
vowel_marks: "ା ି ୀ ୁ ୂ ୃ ୄ ୢ ୣ େ ୈ ୋ ୌ".split(/\s/),
|
149
|
+
other_marks: "ଂ ଃ ଁ".split(/\s/),
|
151
150
|
virama: ["୍"],
|
152
|
-
consonants: "କ ଖ ଗ ଘ ଙ ଚ ଛ ଜ ଝ ଞ ଟ ଠ ଡ ଢ ଣ ତ ଥ ଦ ଧ ନ ପ ଫ ବ ଭ ମ ଯ ର ଲ ଵ ଶ ଷ ସ ହ ଳ କ୍ଷ ଜ୍ଞ".
|
153
|
-
symbols: "୦ ୧ ୨ ୩ ୪ ୫ ୬ ୭ ୮ ୯ ଓଂ ଽ । ॥".
|
154
|
-
other: " ଡ ଢ ଯ ".
|
151
|
+
consonants: "କ ଖ ଗ ଘ ଙ ଚ ଛ ଜ ଝ ଞ ଟ ଠ ଡ ଢ ଣ ତ ଥ ଦ ଧ ନ ପ ଫ ବ ଭ ମ ଯ ର ଲ ଵ ଶ ଷ ସ ହ ଳ କ୍ଷ ଜ୍ଞ".split(/\s/),
|
152
|
+
symbols: "୦ ୧ ୨ ୩ ୪ ୫ ୬ ୭ ୮ ୯ ଓଂ ଽ । ॥".split(/\s/),
|
153
|
+
other: " ଡ ଢ ଯ ".split(/\s/),
|
155
154
|
},
|
156
155
|
|
157
156
|
# Tamil
|
@@ -160,13 +159,13 @@ module Sanscript
|
|
160
159
|
# The most incomplete of the Sanskrit schemes here.
|
161
160
|
#
|
162
161
|
tamil: {
|
163
|
-
vowels: "அ ஆ இ ஈ உ ஊ எ ஏ ஐ ஒ ஓ ஔ".
|
164
|
-
vowel_marks: "ா ி ீ ு ூ ெ ே ை ொ ோ ௌ".
|
165
|
-
other_marks: "ஂ ஃ ".
|
162
|
+
vowels: "அ ஆ இ ஈ உ ஊ எ ஏ ஐ ஒ ஓ ஔ".split(/\s/),
|
163
|
+
vowel_marks: "ா ி ீ ு ூ ெ ே ை ொ ோ ௌ".split(/\s/),
|
164
|
+
other_marks: "ஂ ஃ ".split(/\s/),
|
166
165
|
virama: ["்"],
|
167
|
-
consonants: "க க க க ங ச ச ஜ ச ஞ ட ட ட ட ண த த த த ந ப ப ப ப ம ய ர ல வ ஶ ஷ ஸ ஹ ள க்ஷ ஜ்ஞ".
|
168
|
-
symbols: "௦ ௧ ௨ ௩ ௪ ௫ ௬ ௭ ௮ ௯ ௐ ऽ । ॥".
|
169
|
-
other: " ற".
|
166
|
+
consonants: "க க க க ங ச ச ஜ ச ஞ ட ட ட ட ண த த த த ந ப ப ப ப ம ய ர ல வ ஶ ஷ ஸ ஹ ள க்ஷ ஜ்ஞ".split(/\s/),
|
167
|
+
symbols: "௦ ௧ ௨ ௩ ௪ ௫ ௬ ௭ ௮ ௯ ௐ ऽ । ॥".split(/\s/),
|
168
|
+
other: " ற".split(/\s/),
|
170
169
|
},
|
171
170
|
|
172
171
|
# Telugu
|
@@ -174,13 +173,13 @@ module Sanscript
|
|
174
173
|
# Sanskrit-complete.
|
175
174
|
#
|
176
175
|
telugu: {
|
177
|
-
vowels: "అ ఆ ఇ ఈ ఉ ఊ ఋ ౠ ఌ ౡ ఎ ఏ ఐ ఒ ఓ ఔ".
|
178
|
-
vowel_marks: "ా ి ీ ు ూ ృ ౄ ౢ ౣ ె ే ై ొ ో ౌ".
|
179
|
-
other_marks: "ం ః ఁ".
|
176
|
+
vowels: "అ ఆ ఇ ఈ ఉ ఊ ఋ ౠ ఌ ౡ ఎ ఏ ఐ ఒ ఓ ఔ".split(/\s/),
|
177
|
+
vowel_marks: "ా ి ీ ు ూ ృ ౄ ౢ ౣ ె ే ై ొ ో ౌ".split(/\s/),
|
178
|
+
other_marks: "ం ః ఁ".split(/\s/),
|
180
179
|
virama: ["్"],
|
181
|
-
consonants: "క ఖ గ ఘ ఙ చ ఛ జ ఝ ఞ ట ఠ డ ఢ ణ త థ ద ధ న ప ఫ బ భ మ య ర ల వ శ ష స హ ళ క్ష జ్ఞ".
|
182
|
-
symbols: "౦ ౧ ౨ ౩ ౪ ౫ ౬ ౭ ౮ ౯ ఓం ఽ । ॥".
|
183
|
-
other: " ఱ".
|
180
|
+
consonants: "క ఖ గ ఘ ఙ చ ఛ జ ఝ ఞ ట ఠ డ ఢ ణ త థ ద ధ న ప ఫ బ భ మ య ర ల వ శ ష స హ ళ క్ష జ్ఞ".split(/\s/),
|
181
|
+
symbols: "౦ ౧ ౨ ౩ ౪ ౫ ౬ ౭ ౮ ౯ ఓం ఽ । ॥".split(/\s/),
|
182
|
+
other: " ఱ".split(/\s/),
|
184
183
|
},
|
185
184
|
|
186
185
|
# International Alphabet of Sanskrit Transliteration
|
@@ -188,11 +187,11 @@ module Sanscript
|
|
188
187
|
# The most "professional" Sanskrit romanization scheme.
|
189
188
|
#
|
190
189
|
iast: {
|
191
|
-
vowels: "a ā i ī u ū ṛ ṝ ḷ ḹ e ai o au".
|
190
|
+
vowels: "a ā i ī u ū ṛ ṝ ḷ ḹ e ai o au".split(/\s/),
|
192
191
|
other_marks: ["ṃ", "ḥ", "~"],
|
193
192
|
virama: [""],
|
194
|
-
consonants: "k kh g gh ṅ c ch j jh ñ ṭ ṭh ḍ ḍh ṇ t th d dh n p ph b bh m y r l v ś ṣ s h ḻ kṣ jñ".
|
195
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 oṃ ' | ||".
|
193
|
+
consonants: "k kh g gh ṅ c ch j jh ñ ṭ ṭh ḍ ḍh ṇ t th d dh n p ph b bh m y r l v ś ṣ s h ḻ kṣ jñ".split(/\s/),
|
194
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 oṃ ' | ||".split(/\s/),
|
196
195
|
},
|
197
196
|
|
198
197
|
# ITRANS
|
@@ -204,17 +203,17 @@ module Sanscript
|
|
204
203
|
# '_' is a "null" letter, which allows adjacent vowels.
|
205
204
|
#
|
206
205
|
itrans: {
|
207
|
-
vowels: "a A i I u U RRi RRI LLi LLI e ai o au".
|
206
|
+
vowels: "a A i I u U RRi RRI LLi LLI e ai o au".split(/\s/),
|
208
207
|
other_marks: ["M", "H", ".N"],
|
209
208
|
virama: [""],
|
210
|
-
consonants: "k kh g gh ~N ch Ch j jh ~n T Th D Dh N t th d dh n p ph b bh m y r l v sh Sh s h L kSh j~n".
|
211
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 OM .a | ||".
|
209
|
+
consonants: "k kh g gh ~N ch Ch j jh ~n T Th D Dh N t th d dh n p ph b bh m y r l v sh Sh s h L kSh j~n".split(/\s/),
|
210
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 OM .a | ||".split(/\s/),
|
212
211
|
candra: [".c"],
|
213
212
|
zwj: ["{}"],
|
214
213
|
skip: ["_"],
|
215
214
|
accent: ["\\'", "\\_"],
|
216
|
-
combo_accent: "\\'H \\_H \\'M \\_M".
|
217
|
-
other: "q K G z .D .Dh f Y R".
|
215
|
+
combo_accent: "\\'H \\_H \\'M \\_M".split(/\s/),
|
216
|
+
other: "q K G z .D .Dh f Y R".split(/\s/),
|
218
217
|
},
|
219
218
|
|
220
219
|
# Harvard-Kyoto
|
@@ -222,11 +221,11 @@ module Sanscript
|
|
222
221
|
# A simple 1:1 mapping.
|
223
222
|
#
|
224
223
|
hk: {
|
225
|
-
vowels: "a A i I u U R RR lR lRR e ai o au".
|
226
|
-
other_marks: "M H ~".
|
224
|
+
vowels: "a A i I u U R RR lR lRR e ai o au".split(/\s/),
|
225
|
+
other_marks: "M H ~".split(/\s/),
|
227
226
|
virama: [""],
|
228
|
-
consonants: "k kh g gh G c ch j jh J T Th D Dh N t th d dh n p ph b bh m y r l v z S s h L kS jJ".
|
229
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 OM ' | ||".
|
227
|
+
consonants: "k kh g gh G c ch j jh J T Th D Dh N t th d dh n p ph b bh m y r l v z S s h L kS jJ".split(/\s/),
|
228
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 OM ' | ||".split(/\s/),
|
230
229
|
},
|
231
230
|
|
232
231
|
# National Library at Kolkata
|
@@ -243,11 +242,11 @@ module Sanscript
|
|
243
242
|
# scheme in use today and is especially suited to computer processing.
|
244
243
|
#
|
245
244
|
slp1: {
|
246
|
-
vowels: "a A i I u U f F x X e E o O".
|
247
|
-
other_marks: "M H ~".
|
245
|
+
vowels: "a A i I u U f F x X e E o O".split(/\s/),
|
246
|
+
other_marks: "M H ~".split(/\s/),
|
248
247
|
virama: [""],
|
249
|
-
consonants: "k K g G N c C j J Y w W q Q R t T d D n p P b B m y r l v S z s h L kz jY".
|
250
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".
|
248
|
+
consonants: "k K g G N c C j J Y w W q Q R t T d D n p P b B m y r l v S z s h L kz jY".split(/\s/),
|
249
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".split(/\s/),
|
251
250
|
},
|
252
251
|
|
253
252
|
# Velthuis
|
@@ -255,11 +254,11 @@ module Sanscript
|
|
255
254
|
# A case-insensitive Sanskrit encoding.
|
256
255
|
#
|
257
256
|
velthuis: {
|
258
|
-
vowels: "a aa i ii u uu .r .rr .li .ll e ai o au".
|
259
|
-
other_marks: ".m .h ".
|
257
|
+
vowels: "a aa i ii u uu .r .rr .li .ll e ai o au".split(/\s/),
|
258
|
+
other_marks: ".m .h ".split(/\s/),
|
260
259
|
virama: [""],
|
261
|
-
consonants: 'k kh g gh "n c ch j jh ~n .t .th .d .dh .n t th d dh n p ph b bh m y r l v ~s .s s h L k.s j~n'.
|
262
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 o.m ' | ||".
|
260
|
+
consonants: 'k kh g gh "n c ch j jh ~n .t .th .d .dh .n t th d dh n p ph b bh m y r l v ~s .s s h L k.s j~n'.split(/\s/),
|
261
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 o.m ' | ||".split(/\s/),
|
263
262
|
},
|
264
263
|
|
265
264
|
# WX
|
@@ -267,11 +266,11 @@ module Sanscript
|
|
267
266
|
# As terse as SLP1.
|
268
267
|
#
|
269
268
|
wx: {
|
270
|
-
vowels: "a A i I u U q Q L e E o O".
|
271
|
-
other_marks: "M H z".
|
269
|
+
vowels: "a A i I u U q Q L e E o O".split(/\s/),
|
270
|
+
other_marks: "M H z".split(/\s/),
|
272
271
|
virama: [""],
|
273
|
-
consonants: "k K g G f c C j J F t T d D N w W x X n p P b B m y r l v S R s h kR jF".
|
274
|
-
symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".
|
272
|
+
consonants: "k K g G f c C j J F t T d D N w W x X n p P b B m y r l v S R s h kR jF".split(/\s/),
|
273
|
+
symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".split(/\s/),
|
275
274
|
},
|
276
275
|
}
|
277
276
|
|
@@ -307,7 +306,7 @@ module Sanscript
|
|
307
306
|
"\\_" => ["\\`"],
|
308
307
|
"\\_H" => ["\\`H"],
|
309
308
|
"\\'M" => ["\\'.m", "\\'.n"],
|
310
|
-
"\\_M" => "\\_.m \\_.n \\`M \\`.m \\`.n".
|
309
|
+
"\\_M" => "\\_.m \\_.n \\`M \\`.m \\`.n".split(/\s/),
|
311
310
|
".a" => ["~"],
|
312
311
|
"|" => ["."],
|
313
312
|
"||" => [".."],
|
data/lib/sanscript/version.rb
CHANGED
data/sanscript.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanscript
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Bellefleur
|
@@ -109,19 +109,19 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0.9'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: ragabash
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '0.
|
117
|
+
version: '0.1'
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '0.
|
124
|
+
version: '0.1'
|
125
125
|
description:
|
126
126
|
email:
|
127
127
|
- nomoon@phoebus.ca
|
@@ -146,7 +146,7 @@ files:
|
|
146
146
|
- lib/sanscript/detect.rb
|
147
147
|
- lib/sanscript/detect/ruby24.rb
|
148
148
|
- lib/sanscript/detect/ruby2x.rb
|
149
|
-
- lib/sanscript/
|
149
|
+
- lib/sanscript/exceptions.rb
|
150
150
|
- lib/sanscript/transliterate.rb
|
151
151
|
- lib/sanscript/transliterate/schemes.rb
|
152
152
|
- lib/sanscript/version.rb
|
@@ -1,95 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
require "ice_nine"
|
3
|
-
|
4
|
-
module Sanscript
|
5
|
-
# A set of helpful refinements for duplication and deep freezing.
|
6
|
-
module Refinements
|
7
|
-
refine Object do
|
8
|
-
def deep_dup
|
9
|
-
dup
|
10
|
-
rescue TypeError
|
11
|
-
self
|
12
|
-
end
|
13
|
-
|
14
|
-
def deep_freeze
|
15
|
-
IceNine.deep_freeze(self)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
refine NilClass do
|
20
|
-
def deep_dup
|
21
|
-
self
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
refine FalseClass do
|
26
|
-
def deep_dup
|
27
|
-
self
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
refine TrueClass do
|
32
|
-
def deep_dup
|
33
|
-
self
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
refine Symbol do
|
38
|
-
def deep_dup
|
39
|
-
self
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
refine Numeric do
|
44
|
-
def deep_dup
|
45
|
-
self
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
# Necessary to re-override Numeric
|
50
|
-
require "bigdecimal"
|
51
|
-
refine BigDecimal do
|
52
|
-
def deep_dup
|
53
|
-
dup
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
refine String do
|
58
|
-
def w_split
|
59
|
-
split(/\s/)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
refine Array do
|
64
|
-
def deep_dup
|
65
|
-
map { |value| value.deep_dup } # rubocop:disable Style/SymbolProc
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
refine Hash do
|
70
|
-
def deep_dup
|
71
|
-
hash = dup
|
72
|
-
each_pair do |key, value|
|
73
|
-
if ::String === key # rubocop:disable Style/CaseEquality
|
74
|
-
hash[key] = value.deep_dup
|
75
|
-
else
|
76
|
-
hash.delete(key)
|
77
|
-
hash[key.deep_dup] = value.deep_dup
|
78
|
-
end
|
79
|
-
end
|
80
|
-
hash
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
refine Set do
|
85
|
-
def deep_dup
|
86
|
-
set_a = to_a
|
87
|
-
set_a.map! do |val|
|
88
|
-
next val if ::String === val # rubocop:disable Style/CaseEquality
|
89
|
-
val.deep_dup
|
90
|
-
end
|
91
|
-
self.class[set_a]
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|