sanscript 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sanscript/detect.rb +1 -1
- data/lib/sanscript/transliterate.rb +35 -20
- data/lib/sanscript/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b4be971ec110312b8703d756af1c77a7a924c9b
|
4
|
+
data.tar.gz: d512d045decff83a7f8e6aa67a76fe0e805da05d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d242562206727e23f0f37e945d2292deb636965459a598d8601950d4d88a410512a83626c870c9f14bc672b5d40b53fb2014aaf9a11d0b1de4f8dd892fc22d21
|
7
|
+
data.tar.gz: 4d1daa671878bedd23097be18392712fb1a4c96adc71c3cfec74e386e0f916c9c066e5e483bcc6bdfc68141af104d5f7194c16c1d629a37739ad161b46646681
|
data/lib/sanscript/detect.rb
CHANGED
@@ -51,7 +51,7 @@ module Sanscript
|
|
51
51
|
module_function
|
52
52
|
|
53
53
|
def detect_script(text)
|
54
|
-
text = text.to_str.gsub(/(?<!\\)##.*?(?<!\\)
|
54
|
+
text = text.to_str.gsub(/(?<!\\)##.*?(?<!\\)##|(?<!\\)\{#.*?(?<!\\)#\}/, "")
|
55
55
|
|
56
56
|
# Brahmic schemes are all within a specific range of code points.
|
57
57
|
if RE_BRAHMIC_RANGE === text
|
@@ -243,6 +243,7 @@ module Sanscript
|
|
243
243
|
token_buffer = String.new
|
244
244
|
had_consonant = false
|
245
245
|
transliteration_enabled = true
|
246
|
+
control_char = false
|
246
247
|
|
247
248
|
until data.empty? && token_buffer.empty?
|
248
249
|
token_buffer << data.slice!(0, map[:max_token_length] - token_buffer.length)
|
@@ -251,10 +252,22 @@ module Sanscript
|
|
251
252
|
(0...map[:max_token_length]).each do |j|
|
252
253
|
token = token_buffer[0, map[:max_token_length] - j]
|
253
254
|
|
254
|
-
if token == "##"
|
255
|
+
if !control_char && token == "##"
|
255
256
|
transliteration_enabled = !transliteration_enabled
|
256
257
|
token_buffer.slice!(0, 2)
|
257
258
|
break
|
259
|
+
elsif control_char && token == "#}"
|
260
|
+
transliteration_enabled = true
|
261
|
+
control_char = false
|
262
|
+
buf << token
|
263
|
+
token_buffer.slice!(0, 2)
|
264
|
+
break
|
265
|
+
elsif transliteration_enabled && token == "{#"
|
266
|
+
transliteration_enabled = false
|
267
|
+
control_char = true
|
268
|
+
buf << token
|
269
|
+
token_buffer.slice!(0, 2)
|
270
|
+
break
|
258
271
|
end
|
259
272
|
temp_letter = map[:letters][token]
|
260
273
|
if !temp_letter.nil? && transliteration_enabled
|
@@ -304,26 +317,32 @@ module Sanscript
|
|
304
317
|
def transliterate_brahmic(data, map)
|
305
318
|
data = data.to_str.dup
|
306
319
|
buf = []
|
307
|
-
dangling_hash = false
|
308
320
|
had_roman_consonant = false
|
309
321
|
transliteration_enabled = true
|
322
|
+
control_char = false
|
310
323
|
|
311
324
|
until data.empty?
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
buf << "a"
|
323
|
-
had_roman_consonant = false
|
324
|
-
end
|
325
|
+
token = data.slice(0, 2)
|
326
|
+
if !control_char && token == "##"
|
327
|
+
transliteration_enabled = !transliteration_enabled
|
328
|
+
data.slice!(0, 2)
|
329
|
+
next
|
330
|
+
elsif control_char && token == "#}"
|
331
|
+
transliteration_enabled = true
|
332
|
+
control_char = false
|
333
|
+
buf << token
|
334
|
+
data.slice!(0, 2)
|
325
335
|
next
|
326
|
-
elsif
|
336
|
+
elsif transliteration_enabled && token == "{#"
|
337
|
+
transliteration_enabled = false
|
338
|
+
control_char = true
|
339
|
+
buf << token
|
340
|
+
data.slice!(0, 2)
|
341
|
+
next
|
342
|
+
end
|
343
|
+
|
344
|
+
l = data.slice!(0, 1)
|
345
|
+
unless transliteration_enabled
|
327
346
|
buf << l
|
328
347
|
next
|
329
348
|
end
|
@@ -333,10 +352,6 @@ module Sanscript
|
|
333
352
|
buf << temp
|
334
353
|
had_roman_consonant = false
|
335
354
|
else
|
336
|
-
if dangling_hash
|
337
|
-
buf << "#"
|
338
|
-
dangling_hash = false
|
339
|
-
end
|
340
355
|
if had_roman_consonant
|
341
356
|
buf << "a"
|
342
357
|
had_roman_consonant = false
|
data/lib/sanscript/version.rb
CHANGED