interscript 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +1 -3
- data/aliases.json +1 -0
- data/lib/interscript.rb +8 -3
- data/lib/interscript/fs.rb +27 -0
- data/lib/interscript/mapping.rb +3 -1
- data/lib/interscript/opal.rb +142 -3
- data/lib/interscript/opal/entrypoint.rb +8 -0
- data/lib/interscript/opal/exports.rb +11 -0
- data/lib/interscript/opal/maps.js.erb +2 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
- data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
- data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
- data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
- data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
- data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
- data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
- data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
- data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
- data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
- data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
- data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
- data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
- data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
- data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
- data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
- data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
- data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
- data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
- data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
- data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
- data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
- data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
- data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
- data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
- data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
- data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
- data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
- data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
- data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
- data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
- data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
- data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
- data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
- data/spec/interscript/filenames_spec.rb +6 -369
- data/spec/interscript_spec.rb +10 -2
- metadata +50 -7
- data/lib/interscript/opal/map_translate.rb +0 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7813dba0d0cc7493ed3b9279c61283c8d305f1e05584a44aa700e9b72acb2f06
|
|
4
|
+
data.tar.gz: f4c87e24d7c2719b4f358198967d55e0c17d8aaac354311cb9eecc800a592b2d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8e23000fe8fb016dec9351241787608c892d7af48682259c10345ce417f94a626ea06a5bc8b1e7a3f084da8c0502d13b96bd06c9a53da31530f5c487b06fe4e9
|
|
7
|
+
data.tar.gz: b2d8cb122b2c1bbb2d989d832802a007a60ecc6d0f2984e323a983ec94ae9505664b98e0d9c33e0b2711f5fa31f4e657a720648dcb572915f69006a1719a9610
|
data/README.adoc
CHANGED
|
@@ -52,9 +52,7 @@ Interscript depends on Python and the https://github.com/sequitur-g2p/sequitur-g
|
|
|
52
52
|
|
|
53
53
|
[source,sh]
|
|
54
54
|
----
|
|
55
|
-
pip3 install
|
|
56
|
-
curl -sSL -o sequitur-g2p.zip https://github.com/sequitur-g2p/sequitur-g2p/archive/806273f.zip
|
|
57
|
-
pip3 install sequitur-g2p.zip
|
|
55
|
+
pip3 install -r requirments.txt
|
|
58
56
|
----
|
|
59
57
|
|
|
60
58
|
Interscript depends on Ruby. Once you manage to install Ruby, it's easy.
|
data/aliases.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"pan_Deva2Latn_ALA_1997":"alalc-pan-Guru-Latn-1997","kor_Hang2Latn_ALA_1997":"alalc-kor-Hang-Latn-1997","asm_Deva2Latn_ALA_1997":"alalc-asm-Deva-Latn-1997","aze_Cyrl2Latn_ALA_1997":"alalc-aze-Cyrl-Latn-1997","ukr_Cyrl2Latn_GUP_1996":"ua-ukr-Cyrl-Latn-1996","tha_Thai2Latn_RIT_1968":"royin-tha-Thai-Latn-1968","bul_Cyrl2Latn_BGN_1952":"bgnpcgn-bul-Cyrl-Latn-1952","tam_Taml2Latn_ALA_1997":"alalc-tam-Taml-Latn-1997","kor_Hang2Latn_GKN_2002":"kp-kor-Hang-Latn-2002","ell_Grek2Latn_ELOT743_1996":"bgnpcgn-ell-Grek-Latn-1996","zho_Hani2Latn_AcadSin_2002":"acadsin-zho-Hani-Latn-2002","ara_Arab2Latn_SES_1930":"ses-ara-Arab-Latn-1930","tgk_Cyrl2Latn_BGN_1994":"bgnpcgn-tgk-Cyrl-Latn-1994","fas_Arab2Latn_BGN_1958":"bgnpcgn-fas-Arab-Latn-1956","sin_Sinh2Latn_ALA_1997":"alalc-sin-Sinh-Latn-1997","uas_Arab2Latn_BGN_2007":"bgnpcgn-urd-Arab-Latn-2007","ukr_Cyrl2Latn_ALA_1997":"alalc-ukr-Cyrl-Latn-1997","bak_Cyrl2Latn_BGN_2007":"bgnpcgn-bak-Cyrl-Latn-2007","tam_Taml2Latn_ALA_2011":"alalc-tam-Taml-Latn-2011","ara_Arab2Latn_BGN_1956":"bgnpcgn-ara-Arab-Latn-1956","ell_Grek2Latn_ALA_1997":"alalc-ell-Grek-Latn-1997","rus_Cyrl2Latn_GOST_1983":"gost-rus-Cyrl-Latn-16876-71-1983","mar_Deva2Latn_ALA_1997":"alalc-mar-Deva-Latn-1997","bel_Cyrl2Latn_ALA_1997":"alalc-bel-Cyrl-Latn-1997","kat_Geor2Latn_ALA_1997":"alalc-kat-Geor-Latn-1997","bul_Cyrl2Latn_ALA_1997":"alalc-bul-Cyrl-Latn-1997","ara_Arab2Latn_ALA_1997":"alalc-ara-Arab-Latn-1997","mon_Cyrl2Latn_ALA_1997":"alalc-mon-Cyrl-Latn-1997","div_Thaa2Latn_GMV_1988":"bgnpcgn-div-Thaa-Latn-1988","hin_Deva2Latn_ALA_1997":"alalc-hin-Deva-Latn-1997","bel_Cyrl2Latn_GBO_1998":"by-bel-Cyrl-Latn-1998","ukr_Cyrl2Latn_BGN_1965":"bgnpcgn-ukr-Cyrl-Latn-1965","rus_Cyrl2Latn_ALA_1997":"alalc-rus-Cyrl-Latn-1997","tir_Thai2Latn_RIT_2000":"royin-tha-Thai-Latn-1999","guj_Gujr2Latn_ALA_1997":"alalc-guj-Gujr-Latn-1997","tel_Telu2Latn_ALA_1997":"alalc-tel-Telu-Latn-1997","mkd_Cyrl2Latn_BGN_1981":"bgnpcgn-mkd-Cyrl-Latn-1981","ori_Orya2Latn_ALA_2011":"alalc-ori-Orya-Latn-2011","aze_Arab2Latn_ALA_1997":"alalc-aze-Arab-Latn-1997","ori_Orya2Latn_ALA_1997":"alalc-ori-Orya-Latn-1997","div_Thaa2Latn_ALA_1997":"alalc-div-Thaa-Latn-1997","rue_Cyrl2Latn_BGN_2016":"bgnpcgn-rue-Cyrl-Latn-2016","guj_Gujr2Latn_ALA_2011":"alalc-guj-Gujr-Latn-2011","kat_Geor2Latn_BGN_1981":"bgnpcgn-kat-Geor-Latn-1981","kor_Hang2Latn_MOCT_2000":"moct-kor-Hang-Latn-2000","sin_Sinh2Latn_ALA_2011":"alalc-sin-Sinh-Latn-2011","amh_Ethi2Latn_BGN_1967":"bgnpcgn-amh-Ethi-Latn-1967","srp_Cyrl2Latn_BGN_2005":"bgnpcgn-srp-Cyrl-Latn-2005","srp_Cyrl2Latn_ALA_1997":"alalc-srp-Cyrl-Latn-1997","mal_Mlym2Latn_ALA_2012":"alalc-mal-Mlym-Latn-2012","kat_Geor2Latn_GGG_2002":"ggg-kat-Geor-Latn-2002","mon_Cyrl2Latn_BGN_1964":"bgnpcgn-mon-Cyrl-Latn-1964","mal_Mlym2Latn_ALA_1997":"alalc-mal-Mlym-Latn-1997","ben_Beng2Latn_ALA_1997":"alalc-ben-Beng-Latn-1997","kor_Hang2Latn_MR_1939":"bgn-kor-Hang-Latn-1943","zho_Hani2Latn_GCH_1979":"sac-zho-Hans-Latn-1979","bul_Cyrl2Latn_BGN_2013":"bgnpcgn-bul-Cyrl-Latn-2013","ell_Grek2Latn_BGN_1962":"bgnpcgn-ell-Grek-Latn-1962","amh_Ethi2Latn_ALA_1997":"alalc-amh-Ethi-Latn-1997","pan_Deva2Latn_ALA_2011":"alalc-pan-Guru-Latn-2011","zho_Hani2Latn_WDG_1979":"var-zho-Hani-Latn-wd-1979","rus_Cyrl2Latn_BGN_1947":"bgnpcgn-rus-Cyrl-Latn-1947","bel_Cyrl2Latn_BGN_1979":"bgnpcgn-bel-Cyrl-Latn-1979","tat_Cyrl2Latn_BGN_2005":"bgnpcgn-tat-Cyrl-Latn-2007"}
|
data/lib/interscript.rb
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "interscript/opal/maps" if RUBY_ENGINE == "opal"
|
|
4
3
|
require "interscript/mapping"
|
|
5
4
|
|
|
6
5
|
# Transliteration
|
|
@@ -21,6 +20,8 @@ module Interscript
|
|
|
21
20
|
class << self
|
|
22
21
|
|
|
23
22
|
def transliterate(system_code, string, maps={})
|
|
23
|
+
system_code = map_resolve(system_code)
|
|
24
|
+
|
|
24
25
|
unless maps.has_key? system_code
|
|
25
26
|
maps[system_code] = Interscript::Mapping.for(system_code)
|
|
26
27
|
end
|
|
@@ -39,8 +40,6 @@ module Interscript
|
|
|
39
40
|
title_case = mapping.title_case
|
|
40
41
|
downcase = mapping.downcase
|
|
41
42
|
|
|
42
|
-
# charmap = mapping.characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
|
|
43
|
-
# dictmap = mapping.dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
|
|
44
43
|
charmap = mapping.characters_hash
|
|
45
44
|
dictmap = mapping.dictionary_hash
|
|
46
45
|
trie = mapping.dictionary_trie
|
|
@@ -134,6 +133,12 @@ module Interscript
|
|
|
134
133
|
output.unicode_normalize
|
|
135
134
|
end
|
|
136
135
|
|
|
136
|
+
def map_resolve(map)
|
|
137
|
+
map = aliases[map] if aliases.key? map
|
|
138
|
+
raise ArgumentError, "Map #{map} doesn't exist" unless map_exist? map
|
|
139
|
+
map
|
|
140
|
+
end
|
|
141
|
+
|
|
137
142
|
private
|
|
138
143
|
|
|
139
144
|
def add_separator(separator, pos, result)
|
data/lib/interscript/fs.rb
CHANGED
|
@@ -59,8 +59,35 @@ module Interscript
|
|
|
59
59
|
string
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
def aliases (refresh: false)
|
|
63
|
+
file = root_path.join("./aliases.json").to_s
|
|
64
|
+
if !refresh && File.exist?(file)
|
|
65
|
+
JSON.load(File.read(file))
|
|
66
|
+
elsif !refresh && @aliases
|
|
67
|
+
@aliases
|
|
68
|
+
else
|
|
69
|
+
@aliases = {}
|
|
70
|
+
Dir[root_path.join('./maps/*.yaml').to_s].each do |yaml_file|
|
|
71
|
+
org_name = File.basename(yaml_file, ".yaml")
|
|
72
|
+
map = YAML.load_file(yaml_file)
|
|
73
|
+
(map["alias"] || {}).each do |k,v|
|
|
74
|
+
@aliases[v["code"]] = org_name
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Try to save it to a file, but not force it.
|
|
79
|
+
File.write("aliases.json", JSON.dump(@aliases)) rescue nil
|
|
80
|
+
|
|
81
|
+
@aliases
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
62
85
|
private
|
|
63
86
|
|
|
87
|
+
def map_exist?(map)
|
|
88
|
+
File.exist?(root_path.join("./maps/" + map + ".yaml").to_s)
|
|
89
|
+
end
|
|
90
|
+
|
|
64
91
|
def mkregexp(regexpstring)
|
|
65
92
|
/#{regexpstring}/u
|
|
66
93
|
end
|
data/lib/interscript/mapping.rb
CHANGED
|
@@ -76,7 +76,7 @@ module Interscript
|
|
|
76
76
|
end
|
|
77
77
|
|
|
78
78
|
def load_opal_mappings
|
|
79
|
-
JSON.parse(`InterscriptMaps[#{system_code}]`)
|
|
79
|
+
JSON.parse(`Opal.global.InterscriptMaps[#{system_code}]`)
|
|
80
80
|
end
|
|
81
81
|
|
|
82
82
|
def load_fs_mappings
|
|
@@ -127,6 +127,8 @@ module Interscript
|
|
|
127
127
|
@characters = (inherited_mapping.characters|| {}).merge(characters)
|
|
128
128
|
@dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
|
|
129
129
|
end
|
|
130
|
+
|
|
131
|
+
@characters.compact! # the feature to ignore characters from inherited
|
|
130
132
|
end
|
|
131
133
|
|
|
132
134
|
def build_hashes
|
data/lib/interscript/opal.rb
CHANGED
|
@@ -39,13 +39,152 @@ module Interscript
|
|
|
39
39
|
string
|
|
40
40
|
end
|
|
41
41
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
JSON.load(json)
|
|
42
|
+
def load_map_json(_, json)
|
|
43
|
+
json = Hash.new(json) if native? json
|
|
44
|
+
json = JSON.load(json) if String === json
|
|
45
|
+
json.each do |k,v|
|
|
45
46
|
`Opal.global.InterscriptMaps[#{k}] = #{JSON.dump(v)}`
|
|
46
47
|
end
|
|
47
48
|
end
|
|
48
49
|
|
|
50
|
+
# Use #on_load_maps if possible. It will be available earlier.
|
|
51
|
+
# See lib/interscript/opal/entrypoint.rb
|
|
52
|
+
def load_maps(opts, &block)
|
|
53
|
+
# Convert arg
|
|
54
|
+
opts = Hash.new(opts) if native? opts
|
|
55
|
+
|
|
56
|
+
defaults = {
|
|
57
|
+
maps: [],
|
|
58
|
+
path: nil,
|
|
59
|
+
node_path: "./maps/",
|
|
60
|
+
ajax_path: "maps/",
|
|
61
|
+
loader: nil,
|
|
62
|
+
processor: proc { |i| i },
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
opts = defaults.merge opts
|
|
66
|
+
opts[:maps] = Array(opts[:maps])
|
|
67
|
+
|
|
68
|
+
%x{
|
|
69
|
+
var ajax_loader = function(map) {
|
|
70
|
+
return new Promise(function(ok, fail) {
|
|
71
|
+
var httpRequest = new XMLHttpRequest();
|
|
72
|
+
httpRequest.onreadystatechange = function() {
|
|
73
|
+
if (httpRequest.readyState === XMLHttpRequest.DONE) {
|
|
74
|
+
if (httpRequest.responseText) {
|
|
75
|
+
ok(JSON.parse(httpRequest.responseText));
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
if (is_local) {
|
|
79
|
+
console.log(httpRequest.responseText);
|
|
80
|
+
fail("Ajax failed load: "+map+". Status: "+httpRequest.statusText+". "+
|
|
81
|
+
"Are you running this locally? Try adding: "+
|
|
82
|
+
"--allow-file-access-from-files to your Chromium command line.")
|
|
83
|
+
}
|
|
84
|
+
else fail("Ajax failed load: "+map+". Status: "+httpRequest.statusText);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
httpRequest.open('GET', #{opts[:path] || opts[:ajax_path]}+map+".json", true);
|
|
89
|
+
httpRequest.send();
|
|
90
|
+
});
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
var fetch_loader = function(map) {
|
|
94
|
+
return fetch(#{opts[:path] || opts[:ajax_path]}+map+".json").then(function(response) {
|
|
95
|
+
return response.json();
|
|
96
|
+
});
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
var node_loader = function(map) {
|
|
100
|
+
var resolve = null, error = null;
|
|
101
|
+
var prom = new Promise(function(ok, fail) {
|
|
102
|
+
resolve = ok;
|
|
103
|
+
error = fail;
|
|
104
|
+
});
|
|
105
|
+
try {
|
|
106
|
+
var node_require = eval("require");
|
|
107
|
+
var data = node_require(#{opts[:path] || opts[:node_path]}+map+'.json');
|
|
108
|
+
resolve(data);
|
|
109
|
+
}
|
|
110
|
+
catch(e) {
|
|
111
|
+
error("Node failed load: "+map+". Error: "+e);
|
|
112
|
+
}
|
|
113
|
+
return prom;
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
var is_local = false;
|
|
117
|
+
if (typeof document !== "undefined" &&
|
|
118
|
+
typeof document.location !== "undefined" &&
|
|
119
|
+
typeof document.location.protocol !== "undefined") {
|
|
120
|
+
is_local = document.location.protocol == "file:";
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
var loader = function(map) {
|
|
124
|
+
if (#{opts[:loader] != nil}) {
|
|
125
|
+
return #{opts[:loader]}(#{opts[:path]}+map+'.json').then(#{opts[:processor]});
|
|
126
|
+
}
|
|
127
|
+
else if (typeof window !== "undefined") {
|
|
128
|
+
return ajax_loader(map);
|
|
129
|
+
}
|
|
130
|
+
else if (typeof global !== "undefined") {
|
|
131
|
+
return node_loader(map);
|
|
132
|
+
}
|
|
133
|
+
else if (!is_local && typeof fetch === "function") {
|
|
134
|
+
return fetch_loader(map);
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
#{raise StandardError, "We couldn't find a good way to load a map"}
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
prom = `new Promise(function(ok, fail) {
|
|
143
|
+
#{
|
|
144
|
+
maps = opts[:maps]
|
|
145
|
+
maps = maps.map { |i| map_resolve i }
|
|
146
|
+
maps = maps.reject { |i| map_loaded? i }
|
|
147
|
+
#p ["Loading:", maps]
|
|
148
|
+
maps = maps.map do |i|
|
|
149
|
+
`loader(#{i})`.JS.then do |map|
|
|
150
|
+
load_map_json(nil, map)
|
|
151
|
+
|
|
152
|
+
m = Native(map)
|
|
153
|
+
inherits = []
|
|
154
|
+
m.each do |mapname, mapvalue|
|
|
155
|
+
inherits += Array(Native(mapvalue)[:map][:inherit])
|
|
156
|
+
inherits += Array(Native(mapvalue)[:chain])
|
|
157
|
+
end
|
|
158
|
+
inherits = inherits.uniq
|
|
159
|
+
inherits = inherits.reject { |i| map_loaded? i }
|
|
160
|
+
|
|
161
|
+
load_maps(opts.merge({maps: inherits})) unless inherits.empty?
|
|
162
|
+
end.JS.catch do |response|
|
|
163
|
+
`fail(#{response})`
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
}
|
|
167
|
+
Promise.all(#{maps}).then(ok).catch(fail);
|
|
168
|
+
})`
|
|
169
|
+
|
|
170
|
+
if block_given?
|
|
171
|
+
prom.JS.then(block)
|
|
172
|
+
else
|
|
173
|
+
prom
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def aliases
|
|
178
|
+
@aliases ||= Hash.new(`Opal.global.InterscriptMapAliases`)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def map_exist?(map)
|
|
182
|
+
`typeof(Opal.global.InterscriptMaps[#{map}]) !== 'undefined'`
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def map_loaded?(map)
|
|
186
|
+
`!!Opal.global.InterscriptMaps[#{map}]`
|
|
187
|
+
end
|
|
49
188
|
end
|
|
50
189
|
end
|
|
51
190
|
|
|
@@ -1,10 +1,18 @@
|
|
|
1
1
|
require "opal"
|
|
2
|
+
require "interscript/opal/maps"
|
|
2
3
|
require "onigmo/onigmo-wasm"
|
|
3
4
|
|
|
4
5
|
module Interscript
|
|
5
6
|
def self.on_load(&block)
|
|
6
7
|
WebAssembly.wait_for("onigmo/onigmo-wasm", &block)
|
|
7
8
|
end
|
|
9
|
+
|
|
10
|
+
# on_load + load_maps
|
|
11
|
+
def self.on_load_maps(arg, &block)
|
|
12
|
+
self.on_load.JS.then do
|
|
13
|
+
self.load_maps(arg, &block)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
8
16
|
end
|
|
9
17
|
|
|
10
18
|
Interscript.on_load do
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Exports - the methods used publically. Listed so that
|
|
2
|
+
# opal-optimizer will know the methods it needs to preserve.
|
|
3
|
+
|
|
4
|
+
Interscript.on_load
|
|
5
|
+
Interscript.on_load_maps
|
|
6
|
+
Interscript.load_maps
|
|
7
|
+
Interscript.load_map_json
|
|
8
|
+
Interscript.transliterate
|
|
9
|
+
|
|
10
|
+
# TODO: Fix handling of "ord".$to_proc() in opal-optimizer
|
|
11
|
+
"a".ord
|
|
@@ -3,8 +3,6 @@ Opal.global.InterscriptMaps = {
|
|
|
3
3
|
<% Dir['maps/*.yaml'].each do |yaml_file| %>
|
|
4
4
|
"<%= File.basename(yaml_file, ".yaml") %>": null,
|
|
5
5
|
<% end %>
|
|
6
|
-
}
|
|
6
|
+
};
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
JSON.dump(Interscript::OpalMapTranslate.translate_regexp(JSON.dump(YAML.load(File.read(yaml_file)))))
|
|
10
|
-
%>
|
|
8
|
+
Opal.global.InterscriptMapAliases = <%= File.read('aliases.json') %>;
|
data/lib/interscript/version.rb
CHANGED
|
@@ -1022,11 +1022,11 @@ map:
|
|
|
1022
1022
|
'\u064f': 'u'
|
|
1023
1023
|
'\u0650': 'i'
|
|
1024
1024
|
'\u064e\u0627': 'ā' # see Rule 5
|
|
1025
|
-
'\
|
|
1026
|
-
'\
|
|
1027
|
-
'\
|
|
1028
|
-
'\
|
|
1029
|
-
'\
|
|
1025
|
+
'\u064e\u0649': 'á' # see Rule 6(a)
|
|
1026
|
+
'\u064f\u0648': 'ū'
|
|
1027
|
+
'\u0650\u064a': 'ī'
|
|
1028
|
+
'\u064e\u0648\u0652': 'aw'
|
|
1029
|
+
'\u064e\u064a\u0652': 'ay'
|
|
1030
1030
|
|
|
1031
1031
|
# Letters Representing Non-Arabic Consonants
|
|
1032
1032
|
# (this list in not exhaustive)
|
|
@@ -35,30 +35,107 @@ notes:
|
|
|
35
35
|
|
|
36
36
|
tests:
|
|
37
37
|
- source: "অসমীয়া কবিতা"
|
|
38
|
-
expected: "
|
|
38
|
+
expected: "asamīẏā kabitā"
|
|
39
39
|
- source: "কবিৰ আজি জন্মদিন"
|
|
40
|
-
expected: "
|
|
40
|
+
expected: "kabira āji janmadina"
|
|
41
41
|
- source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
|
|
42
|
-
expected: "
|
|
42
|
+
expected: "beruṭata emāhara pāchate punara bhayaṃkara agnikāṇḍa"
|
|
43
43
|
- source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
|
|
44
|
-
expected: "
|
|
44
|
+
expected: "bhaṅāra biruddhe āwedana dākhila kaṃganāra"
|
|
45
45
|
- source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
|
|
46
|
-
expected: "
|
|
46
|
+
expected: "āpuni paṛhi bhāla pāba parā bātari"
|
|
47
47
|
- source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
|
|
48
|
-
expected: "
|
|
48
|
+
expected: "śrīrāmapurata garubharti ṭrāka jabda, dujanaka āṭaka"
|
|
49
49
|
- source: "কেনে আছে প্ৰাক্তন"
|
|
50
|
-
expected: "
|
|
50
|
+
expected: "kene āche prāktana"
|
|
51
51
|
- source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
|
|
52
|
-
expected: "
|
|
52
|
+
expected: "kamumbāira meẏarara dehata kobhiḍa pajiṭibha"
|
|
53
53
|
- source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
|
|
54
|
-
expected: "
|
|
54
|
+
expected: "ṭuiṭāraẏoge khoda sadarī kare ei kathā"
|
|
55
55
|
- source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
|
|
56
|
-
expected: "
|
|
56
|
+
expected: "lakhimapura jilāra nārāẏaṇapurara barapathārata āji praśānti dhāma nāmere ekhana bṛddhāśramara śubhārambha karā haẏa"
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
map:
|
|
60
60
|
|
|
61
61
|
rules:
|
|
62
|
+
# note[2]
|
|
63
|
+
- pattern: (ক=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
64
|
+
result: 'k'
|
|
65
|
+
- pattern: (খ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
66
|
+
result: 'kh'
|
|
67
|
+
- pattern: (গ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
68
|
+
result: 'g'
|
|
69
|
+
- pattern: (ঘ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
70
|
+
result: 'gh'
|
|
71
|
+
- pattern: (ঙ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
72
|
+
result: 'ṅ'
|
|
73
|
+
- pattern: (চ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
74
|
+
result: 'c'
|
|
75
|
+
- pattern: (ছ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
76
|
+
result: 'ch'
|
|
77
|
+
- pattern: (জ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
78
|
+
result: 'j'
|
|
79
|
+
- pattern: (ঝ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
80
|
+
result: 'jh'
|
|
81
|
+
- pattern: (ঞ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
82
|
+
result: 'ñ'
|
|
83
|
+
- pattern: (ট=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
84
|
+
result: 'ṭ'
|
|
85
|
+
- pattern: (ঠ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
86
|
+
result: 'ṭh'
|
|
87
|
+
- pattern: (ড=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
88
|
+
result: 'ḍ'
|
|
89
|
+
- pattern: (ড়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
90
|
+
result: 'ṛ'
|
|
91
|
+
- pattern: (ঢ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
92
|
+
result: 'ḍh'
|
|
93
|
+
- pattern: (ঢ়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
94
|
+
result: 'ṛh'
|
|
95
|
+
- pattern: (ণ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
96
|
+
result: 'ṇ'
|
|
97
|
+
- pattern: (ত=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
98
|
+
result: 't'
|
|
99
|
+
- pattern: (ৎ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
100
|
+
result: 'ṭ'
|
|
101
|
+
- pattern: (থ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
102
|
+
result: 'th'
|
|
103
|
+
- pattern: (দ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
104
|
+
result: 'd'
|
|
105
|
+
- pattern: (ধ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
106
|
+
result: 'dh'
|
|
107
|
+
- pattern: (ন=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
108
|
+
result: 'n'
|
|
109
|
+
- pattern: (প=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
110
|
+
result: 'p'
|
|
111
|
+
- pattern: (ফ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
112
|
+
result: 'ph'
|
|
113
|
+
- pattern: (ব=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
114
|
+
result: 'b'
|
|
115
|
+
- pattern: (ভ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
116
|
+
result: 'bh'
|
|
117
|
+
- pattern: (ম=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
118
|
+
result: 'm'
|
|
119
|
+
- pattern: (য়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
120
|
+
result: 'y'
|
|
121
|
+
- pattern: (য=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
122
|
+
result: 'ẏ'
|
|
123
|
+
- pattern: (য়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
124
|
+
result: 'ẏ'
|
|
125
|
+
- pattern: (ৰ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
126
|
+
result: 'r'
|
|
127
|
+
- pattern: (ল=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
128
|
+
result: 'l'
|
|
129
|
+
- pattern: (ৱ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
130
|
+
result: 'w'
|
|
131
|
+
- pattern: (শ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
132
|
+
result: 'ś'
|
|
133
|
+
- pattern: (ষ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
134
|
+
result: 'sh'
|
|
135
|
+
- pattern: (স=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
136
|
+
result: 's'
|
|
137
|
+
- pattern: (হ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
|
|
138
|
+
result: 'h'
|
|
62
139
|
# note[3]
|
|
63
140
|
- pattern: \u0981(?=[কখগঘঙচছজঝঞটঠডড়ঢঢ়ণতৎথদধন]) # ঁ before guttural, palatal, cerebral, and dental
|
|
64
141
|
result: ṅ
|
|
@@ -162,4 +239,21 @@ map:
|
|
|
162
239
|
'\u09cb': 'o'
|
|
163
240
|
'\u09cc': 'au'
|
|
164
241
|
'।': '.'
|
|
242
|
+
'्': ''
|
|
165
243
|
'\u09CD': '' # Used for joining
|
|
244
|
+
|
|
245
|
+
# Digits
|
|
246
|
+
|
|
247
|
+
'১': '1'
|
|
248
|
+
'২': '2'
|
|
249
|
+
'৩': '3'
|
|
250
|
+
'৪': '4'
|
|
251
|
+
'৫': '5'
|
|
252
|
+
'৬': '6'
|
|
253
|
+
'৭': '7'
|
|
254
|
+
'৮': '8'
|
|
255
|
+
'৯': '9'
|
|
256
|
+
'০': '0'
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
|