sanscript 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sanscript/detect.rb +13 -59
- data/lib/sanscript/detect/ruby24.rb +40 -0
- data/lib/sanscript/detect/ruby2x.rb +41 -0
- data/lib/sanscript/transliterate.rb +3 -1
- data/lib/sanscript/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3a0e34eb01c4a5d8e016b323a192fb6de6f82e8
|
4
|
+
data.tar.gz: db4799dc5bf50b86724b19ff139105f307706720
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e13bc3d149f056346bd61ebcfd4abc0798af91998b9f89440b780f6c1ee9f0e79319f157d60e0831a1521e95ffecd0fdc4edb1bf6010da0c5f0a0b8cf36a618
|
7
|
+
data.tar.gz: 4a4f770e5dc1d8c637814b5fb21a5fbbb14c0512d228235d7b8f5cf02b94f9b27112e79758e63306f182d74016dc51cae1b2a0b37ad6d68bab3da3ca83a38af2
|
data/lib/sanscript/detect.rb
CHANGED
@@ -4,8 +4,6 @@ module Sanscript
|
|
4
4
|
# Transliteration scheme detection module.
|
5
5
|
# Developed from code available @ https://github.com/sanskrit/detect.js
|
6
6
|
module Detect
|
7
|
-
# rubocop:disable Style/CaseEquality
|
8
|
-
|
9
7
|
# Match any character in the block of Brahmic scripts
|
10
8
|
# between Devanagari and Malayalam.
|
11
9
|
RE_BRAHMIC_RANGE = /[\u0900-\u0d7f]/
|
@@ -53,66 +51,22 @@ module Sanscript
|
|
53
51
|
|
54
52
|
module_function
|
55
53
|
|
56
|
-
#
|
54
|
+
# @!method detect_scheme(text)
|
55
|
+
# Attempts to detect the encoding scheme of the provided string.
|
57
56
|
#
|
58
|
-
#
|
59
|
-
#
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
# Brahmic schemes are all within a specific range of code points.
|
64
|
-
if RE_BRAHMIC_RANGE === text
|
65
|
-
RE_BRAHMIC_SCRIPTS.each do |script, regex|
|
66
|
-
return script if regex === text
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
# Romanizations
|
71
|
-
if RE_IAST_OR_KOLKATA_ONLY === text
|
72
|
-
return :kolkata if RE_KOLKATA_ONLY === text
|
73
|
-
:iast
|
74
|
-
elsif RE_ITRANS_ONLY === text
|
75
|
-
:itrans
|
76
|
-
elsif RE_SLP1_ONLY === text
|
77
|
-
:slp1
|
78
|
-
elsif RE_VELTHUIS_ONLY === text
|
79
|
-
:velthuis
|
80
|
-
elsif RE_ITRANS_OR_VELTHUIS_ONLY === text
|
81
|
-
:itrans
|
82
|
-
elsif RE_HARVARD_KYOTO === text
|
83
|
-
:hk
|
84
|
-
end
|
85
|
-
end
|
57
|
+
# Uses the most efficient implementation for your ruby version
|
58
|
+
# (either {Ruby2x#detect_scheme} or {Ruby24#detect_scheme})
|
59
|
+
#
|
60
|
+
# @param text [String] a string of Sanskrit text
|
61
|
+
# @return [Symbol, nil] the Symbol of the scheme, or nil if no match
|
86
62
|
|
87
|
-
#
|
63
|
+
# @!visibility private
|
88
64
|
if Regexp.method_defined?(:match?)
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if RE_BRAHMIC_RANGE.match?(text)
|
95
|
-
RE_BRAHMIC_SCRIPTS.each do |script, regex|
|
96
|
-
return script if regex.match?(text)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
# Romanizations
|
101
|
-
if RE_IAST_OR_KOLKATA_ONLY.match?(text)
|
102
|
-
return :kolkata if RE_KOLKATA_ONLY.match?(text)
|
103
|
-
:iast
|
104
|
-
elsif RE_ITRANS_ONLY.match?(text)
|
105
|
-
:itrans
|
106
|
-
elsif RE_SLP1_ONLY.match?(text)
|
107
|
-
:slp1
|
108
|
-
elsif RE_VELTHUIS_ONLY.match?(text)
|
109
|
-
:velthuis
|
110
|
-
elsif RE_ITRANS_OR_VELTHUIS_ONLY.match?(text)
|
111
|
-
:itrans
|
112
|
-
elsif RE_HARVARD_KYOTO.match?(text)
|
113
|
-
:hk
|
114
|
-
end
|
115
|
-
end
|
65
|
+
require "sanscript/detect/ruby24"
|
66
|
+
extend Ruby24
|
67
|
+
else
|
68
|
+
require "sanscript/detect/ruby2x"
|
69
|
+
extend Ruby2x
|
116
70
|
end
|
117
71
|
end
|
118
72
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sanscript
|
4
|
+
module Detect
|
5
|
+
# Module implementing `detect_scheme` method using Ruby 2.4's faster Regexp#match? method
|
6
|
+
module Ruby24
|
7
|
+
# Attempts to detect the encoding scheme of the provided string.
|
8
|
+
# Uses Ruby 2.4's Regexp#match? method for additional speed.
|
9
|
+
#
|
10
|
+
# @param text [String] a string of Sanskrit text
|
11
|
+
# @return [Symbol, nil] the Symbol of the scheme, or nil if no match
|
12
|
+
def detect_scheme(text)
|
13
|
+
text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
|
14
|
+
|
15
|
+
# Brahmic schemes are all within a specific range of code points.
|
16
|
+
if RE_BRAHMIC_RANGE.match?(text)
|
17
|
+
RE_BRAHMIC_SCRIPTS.each do |script, regex|
|
18
|
+
return script if regex.match?(text)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Romanizations
|
23
|
+
if RE_IAST_OR_KOLKATA_ONLY.match?(text)
|
24
|
+
return :kolkata if RE_KOLKATA_ONLY.match?(text)
|
25
|
+
:iast
|
26
|
+
elsif RE_ITRANS_ONLY.match?(text)
|
27
|
+
:itrans
|
28
|
+
elsif RE_SLP1_ONLY.match?(text)
|
29
|
+
:slp1
|
30
|
+
elsif RE_VELTHUIS_ONLY.match?(text)
|
31
|
+
:velthuis
|
32
|
+
elsif RE_ITRANS_OR_VELTHUIS_ONLY.match?(text)
|
33
|
+
:itrans
|
34
|
+
elsif RE_HARVARD_KYOTO.match?(text)
|
35
|
+
:hk
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sanscript
|
4
|
+
module Detect
|
5
|
+
# Module implementing `detect_scheme` method using Ruby 2.x-compatible syntax.
|
6
|
+
module Ruby2x
|
7
|
+
# Attempts to detect the encoding scheme of the provided string.
|
8
|
+
#
|
9
|
+
# @param text [String] a string of Sanskrit text
|
10
|
+
# @return [Symbol, nil] the Symbol of the scheme, or nil if no match
|
11
|
+
def detect_scheme(text)
|
12
|
+
text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
|
13
|
+
|
14
|
+
# rubocop:disable Style/CaseEquality
|
15
|
+
|
16
|
+
# Brahmic schemes are all within a specific range of code points.
|
17
|
+
if RE_BRAHMIC_RANGE === text
|
18
|
+
RE_BRAHMIC_SCRIPTS.each do |script, regex|
|
19
|
+
return script if regex === text
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Romanizations
|
24
|
+
if RE_IAST_OR_KOLKATA_ONLY === text
|
25
|
+
return :kolkata if RE_KOLKATA_ONLY === text
|
26
|
+
:iast
|
27
|
+
elsif RE_ITRANS_ONLY === text
|
28
|
+
:itrans
|
29
|
+
elsif RE_SLP1_ONLY === text
|
30
|
+
:slp1
|
31
|
+
elsif RE_VELTHUIS_ONLY === text
|
32
|
+
:velthuis
|
33
|
+
elsif RE_ITRANS_OR_VELTHUIS_ONLY === text
|
34
|
+
:itrans
|
35
|
+
elsif RE_HARVARD_KYOTO === text
|
36
|
+
:hk
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -5,7 +5,9 @@ require "sanscript/transliterate/schemes"
|
|
5
5
|
module Sanscript
|
6
6
|
using Refinements
|
7
7
|
# Sanskrit transliteration module.
|
8
|
-
# Derived from Sanscript,
|
8
|
+
# Derived from Sanscript (https://github.com/sanskrit/sanscript.js), which is
|
9
|
+
# released under the MIT and GPL Licenses.
|
10
|
+
#
|
9
11
|
# "Sanscript is a Sanskrit transliteration library. Currently, it supports
|
10
12
|
# other Indian languages only incidentally."
|
11
13
|
module Transliterate
|
data/lib/sanscript/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanscript
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Bellefleur
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -144,6 +144,8 @@ files:
|
|
144
144
|
- lib/sanscript.rb
|
145
145
|
- lib/sanscript/benchmark.rb
|
146
146
|
- lib/sanscript/detect.rb
|
147
|
+
- lib/sanscript/detect/ruby24.rb
|
148
|
+
- lib/sanscript/detect/ruby2x.rb
|
147
149
|
- lib/sanscript/refinements.rb
|
148
150
|
- lib/sanscript/transliterate.rb
|
149
151
|
- lib/sanscript/transliterate/schemes.rb
|
@@ -169,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
169
171
|
version: '0'
|
170
172
|
requirements: []
|
171
173
|
rubyforge_project:
|
172
|
-
rubygems_version: 2.
|
174
|
+
rubygems_version: 2.6.6
|
173
175
|
signing_key:
|
174
176
|
specification_version: 4
|
175
177
|
summary: Ruby port and extension of Sanscript.js transliterator by learnsanskrit.org
|