sanscript 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/sanscript/detect.rb +13 -59
- data/lib/sanscript/detect/ruby24.rb +40 -0
- data/lib/sanscript/detect/ruby2x.rb +41 -0
- data/lib/sanscript/transliterate.rb +3 -1
- data/lib/sanscript/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3a0e34eb01c4a5d8e016b323a192fb6de6f82e8
|
4
|
+
data.tar.gz: db4799dc5bf50b86724b19ff139105f307706720
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e13bc3d149f056346bd61ebcfd4abc0798af91998b9f89440b780f6c1ee9f0e79319f157d60e0831a1521e95ffecd0fdc4edb1bf6010da0c5f0a0b8cf36a618
|
7
|
+
data.tar.gz: 4a4f770e5dc1d8c637814b5fb21a5fbbb14c0512d228235d7b8f5cf02b94f9b27112e79758e63306f182d74016dc51cae1b2a0b37ad6d68bab3da3ca83a38af2
|
data/lib/sanscript/detect.rb
CHANGED
@@ -4,8 +4,6 @@ module Sanscript
|
|
4
4
|
# Transliteration scheme detection module.
|
5
5
|
# Developed from code available @ https://github.com/sanskrit/detect.js
|
6
6
|
module Detect
|
7
|
-
# rubocop:disable Style/CaseEquality
|
8
|
-
|
9
7
|
# Match any character in the block of Brahmic scripts
|
10
8
|
# between Devanagari and Malayalam.
|
11
9
|
RE_BRAHMIC_RANGE = /[\u0900-\u0d7f]/
|
@@ -53,66 +51,22 @@ module Sanscript
|
|
53
51
|
|
54
52
|
module_function
|
55
53
|
|
56
|
-
#
|
54
|
+
# @!method detect_scheme(text)
|
55
|
+
# Attempts to detect the encoding scheme of the provided string.
|
57
56
|
#
|
58
|
-
#
|
59
|
-
#
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
# Brahmic schemes are all within a specific range of code points.
|
64
|
-
if RE_BRAHMIC_RANGE === text
|
65
|
-
RE_BRAHMIC_SCRIPTS.each do |script, regex|
|
66
|
-
return script if regex === text
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
# Romanizations
|
71
|
-
if RE_IAST_OR_KOLKATA_ONLY === text
|
72
|
-
return :kolkata if RE_KOLKATA_ONLY === text
|
73
|
-
:iast
|
74
|
-
elsif RE_ITRANS_ONLY === text
|
75
|
-
:itrans
|
76
|
-
elsif RE_SLP1_ONLY === text
|
77
|
-
:slp1
|
78
|
-
elsif RE_VELTHUIS_ONLY === text
|
79
|
-
:velthuis
|
80
|
-
elsif RE_ITRANS_OR_VELTHUIS_ONLY === text
|
81
|
-
:itrans
|
82
|
-
elsif RE_HARVARD_KYOTO === text
|
83
|
-
:hk
|
84
|
-
end
|
85
|
-
end
|
57
|
+
# Uses the most efficient implementation for your ruby version
|
58
|
+
# (either {Ruby2x#detect_scheme} or {Ruby24#detect_scheme})
|
59
|
+
#
|
60
|
+
# @param text [String] a string of Sanskrit text
|
61
|
+
# @return [Symbol, nil] the Symbol of the scheme, or nil if no match
|
86
62
|
|
87
|
-
#
|
63
|
+
# @!visibility private
|
88
64
|
if Regexp.method_defined?(:match?)
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if RE_BRAHMIC_RANGE.match?(text)
|
95
|
-
RE_BRAHMIC_SCRIPTS.each do |script, regex|
|
96
|
-
return script if regex.match?(text)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
# Romanizations
|
101
|
-
if RE_IAST_OR_KOLKATA_ONLY.match?(text)
|
102
|
-
return :kolkata if RE_KOLKATA_ONLY.match?(text)
|
103
|
-
:iast
|
104
|
-
elsif RE_ITRANS_ONLY.match?(text)
|
105
|
-
:itrans
|
106
|
-
elsif RE_SLP1_ONLY.match?(text)
|
107
|
-
:slp1
|
108
|
-
elsif RE_VELTHUIS_ONLY.match?(text)
|
109
|
-
:velthuis
|
110
|
-
elsif RE_ITRANS_OR_VELTHUIS_ONLY.match?(text)
|
111
|
-
:itrans
|
112
|
-
elsif RE_HARVARD_KYOTO.match?(text)
|
113
|
-
:hk
|
114
|
-
end
|
115
|
-
end
|
65
|
+
require "sanscript/detect/ruby24"
|
66
|
+
extend Ruby24
|
67
|
+
else
|
68
|
+
require "sanscript/detect/ruby2x"
|
69
|
+
extend Ruby2x
|
116
70
|
end
|
117
71
|
end
|
118
72
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sanscript
|
4
|
+
module Detect
|
5
|
+
# Module implementing `detect_scheme` method using Ruby 2.4's faster Regexp#match? method
|
6
|
+
module Ruby24
|
7
|
+
# Attempts to detect the encoding scheme of the provided string.
|
8
|
+
# Uses Ruby 2.4's Regexp#match? method for additional speed.
|
9
|
+
#
|
10
|
+
# @param text [String] a string of Sanskrit text
|
11
|
+
# @return [Symbol, nil] the Symbol of the scheme, or nil if no match
|
12
|
+
def detect_scheme(text)
|
13
|
+
text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
|
14
|
+
|
15
|
+
# Brahmic schemes are all within a specific range of code points.
|
16
|
+
if RE_BRAHMIC_RANGE.match?(text)
|
17
|
+
RE_BRAHMIC_SCRIPTS.each do |script, regex|
|
18
|
+
return script if regex.match?(text)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Romanizations
|
23
|
+
if RE_IAST_OR_KOLKATA_ONLY.match?(text)
|
24
|
+
return :kolkata if RE_KOLKATA_ONLY.match?(text)
|
25
|
+
:iast
|
26
|
+
elsif RE_ITRANS_ONLY.match?(text)
|
27
|
+
:itrans
|
28
|
+
elsif RE_SLP1_ONLY.match?(text)
|
29
|
+
:slp1
|
30
|
+
elsif RE_VELTHUIS_ONLY.match?(text)
|
31
|
+
:velthuis
|
32
|
+
elsif RE_ITRANS_OR_VELTHUIS_ONLY.match?(text)
|
33
|
+
:itrans
|
34
|
+
elsif RE_HARVARD_KYOTO.match?(text)
|
35
|
+
:hk
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sanscript
|
4
|
+
module Detect
|
5
|
+
# Module implementing `detect_scheme` method using Ruby 2.x-compatible syntax.
|
6
|
+
module Ruby2x
|
7
|
+
# Attempts to detect the encoding scheme of the provided string.
|
8
|
+
#
|
9
|
+
# @param text [String] a string of Sanskrit text
|
10
|
+
# @return [Symbol, nil] the Symbol of the scheme, or nil if no match
|
11
|
+
def detect_scheme(text)
|
12
|
+
text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
|
13
|
+
|
14
|
+
# rubocop:disable Style/CaseEquality
|
15
|
+
|
16
|
+
# Brahmic schemes are all within a specific range of code points.
|
17
|
+
if RE_BRAHMIC_RANGE === text
|
18
|
+
RE_BRAHMIC_SCRIPTS.each do |script, regex|
|
19
|
+
return script if regex === text
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Romanizations
|
24
|
+
if RE_IAST_OR_KOLKATA_ONLY === text
|
25
|
+
return :kolkata if RE_KOLKATA_ONLY === text
|
26
|
+
:iast
|
27
|
+
elsif RE_ITRANS_ONLY === text
|
28
|
+
:itrans
|
29
|
+
elsif RE_SLP1_ONLY === text
|
30
|
+
:slp1
|
31
|
+
elsif RE_VELTHUIS_ONLY === text
|
32
|
+
:velthuis
|
33
|
+
elsif RE_ITRANS_OR_VELTHUIS_ONLY === text
|
34
|
+
:itrans
|
35
|
+
elsif RE_HARVARD_KYOTO === text
|
36
|
+
:hk
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -5,7 +5,9 @@ require "sanscript/transliterate/schemes"
|
|
5
5
|
module Sanscript
|
6
6
|
using Refinements
|
7
7
|
# Sanskrit transliteration module.
|
8
|
-
# Derived from Sanscript,
|
8
|
+
# Derived from Sanscript (https://github.com/sanskrit/sanscript.js), which is
|
9
|
+
# released under the MIT and GPL Licenses.
|
10
|
+
#
|
9
11
|
# "Sanscript is a Sanskrit transliteration library. Currently, it supports
|
10
12
|
# other Indian languages only incidentally."
|
11
13
|
module Transliterate
|
data/lib/sanscript/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanscript
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Bellefleur
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -144,6 +144,8 @@ files:
|
|
144
144
|
- lib/sanscript.rb
|
145
145
|
- lib/sanscript/benchmark.rb
|
146
146
|
- lib/sanscript/detect.rb
|
147
|
+
- lib/sanscript/detect/ruby24.rb
|
148
|
+
- lib/sanscript/detect/ruby2x.rb
|
147
149
|
- lib/sanscript/refinements.rb
|
148
150
|
- lib/sanscript/transliterate.rb
|
149
151
|
- lib/sanscript/transliterate/schemes.rb
|
@@ -169,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
169
171
|
version: '0'
|
170
172
|
requirements: []
|
171
173
|
rubyforge_project:
|
172
|
-
rubygems_version: 2.
|
174
|
+
rubygems_version: 2.6.6
|
173
175
|
signing_key:
|
174
176
|
specification_version: 4
|
175
177
|
summary: Ruby port and extension of Sanscript.js transliterator by learnsanskrit.org
|