sanscript 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/sanscript/detect.rb +1 -1
- data/lib/sanscript/transliterate.rb +35 -20
- data/lib/sanscript/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b4be971ec110312b8703d756af1c77a7a924c9b
|
4
|
+
data.tar.gz: d512d045decff83a7f8e6aa67a76fe0e805da05d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d242562206727e23f0f37e945d2292deb636965459a598d8601950d4d88a410512a83626c870c9f14bc672b5d40b53fb2014aaf9a11d0b1de4f8dd892fc22d21
|
7
|
+
data.tar.gz: 4d1daa671878bedd23097be18392712fb1a4c96adc71c3cfec74e386e0f916c9c066e5e483bcc6bdfc68141af104d5f7194c16c1d629a37739ad161b46646681
|
data/lib/sanscript/detect.rb
CHANGED
@@ -51,7 +51,7 @@ module Sanscript
|
|
51
51
|
module_function
|
52
52
|
|
53
53
|
def detect_script(text)
|
54
|
-
text = text.to_str.gsub(/(?<!\\)##.*?(?<!\\)
|
54
|
+
text = text.to_str.gsub(/(?<!\\)##.*?(?<!\\)##|(?<!\\)\{#.*?(?<!\\)#\}/, "")
|
55
55
|
|
56
56
|
# Brahmic schemes are all within a specific range of code points.
|
57
57
|
if RE_BRAHMIC_RANGE === text
|
@@ -243,6 +243,7 @@ module Sanscript
|
|
243
243
|
token_buffer = String.new
|
244
244
|
had_consonant = false
|
245
245
|
transliteration_enabled = true
|
246
|
+
control_char = false
|
246
247
|
|
247
248
|
until data.empty? && token_buffer.empty?
|
248
249
|
token_buffer << data.slice!(0, map[:max_token_length] - token_buffer.length)
|
@@ -251,10 +252,22 @@ module Sanscript
|
|
251
252
|
(0...map[:max_token_length]).each do |j|
|
252
253
|
token = token_buffer[0, map[:max_token_length] - j]
|
253
254
|
|
254
|
-
if token == "##"
|
255
|
+
if !control_char && token == "##"
|
255
256
|
transliteration_enabled = !transliteration_enabled
|
256
257
|
token_buffer.slice!(0, 2)
|
257
258
|
break
|
259
|
+
elsif control_char && token == "#}"
|
260
|
+
transliteration_enabled = true
|
261
|
+
control_char = false
|
262
|
+
buf << token
|
263
|
+
token_buffer.slice!(0, 2)
|
264
|
+
break
|
265
|
+
elsif transliteration_enabled && token == "{#"
|
266
|
+
transliteration_enabled = false
|
267
|
+
control_char = true
|
268
|
+
buf << token
|
269
|
+
token_buffer.slice!(0, 2)
|
270
|
+
break
|
258
271
|
end
|
259
272
|
temp_letter = map[:letters][token]
|
260
273
|
if !temp_letter.nil? && transliteration_enabled
|
@@ -304,26 +317,32 @@ module Sanscript
|
|
304
317
|
def transliterate_brahmic(data, map)
|
305
318
|
data = data.to_str.dup
|
306
319
|
buf = []
|
307
|
-
dangling_hash = false
|
308
320
|
had_roman_consonant = false
|
309
321
|
transliteration_enabled = true
|
322
|
+
control_char = false
|
310
323
|
|
311
324
|
until data.empty?
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
buf << "a"
|
323
|
-
had_roman_consonant = false
|
324
|
-
end
|
325
|
+
token = data.slice(0, 2)
|
326
|
+
if !control_char && token == "##"
|
327
|
+
transliteration_enabled = !transliteration_enabled
|
328
|
+
data.slice!(0, 2)
|
329
|
+
next
|
330
|
+
elsif control_char && token == "#}"
|
331
|
+
transliteration_enabled = true
|
332
|
+
control_char = false
|
333
|
+
buf << token
|
334
|
+
data.slice!(0, 2)
|
325
335
|
next
|
326
|
-
elsif
|
336
|
+
elsif transliteration_enabled && token == "{#"
|
337
|
+
transliteration_enabled = false
|
338
|
+
control_char = true
|
339
|
+
buf << token
|
340
|
+
data.slice!(0, 2)
|
341
|
+
next
|
342
|
+
end
|
343
|
+
|
344
|
+
l = data.slice!(0, 1)
|
345
|
+
unless transliteration_enabled
|
327
346
|
buf << l
|
328
347
|
next
|
329
348
|
end
|
@@ -333,10 +352,6 @@ module Sanscript
|
|
333
352
|
buf << temp
|
334
353
|
had_roman_consonant = false
|
335
354
|
else
|
336
|
-
if dangling_hash
|
337
|
-
buf << "#"
|
338
|
-
dangling_hash = false
|
339
|
-
end
|
340
355
|
if had_roman_consonant
|
341
356
|
buf << "a"
|
342
357
|
had_roman_consonant = false
|
data/lib/sanscript/version.rb
CHANGED