interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82fae2b248d9c86139b7f188da2ac72699696c9768e54a4510a6f1af2b933dc9
4
- data.tar.gz: 3e2cc24b8d33f5a8ed0f8b475e4d109049439221274ecda9ee1b9c7743896e07
3
+ metadata.gz: 7813dba0d0cc7493ed3b9279c61283c8d305f1e05584a44aa700e9b72acb2f06
4
+ data.tar.gz: f4c87e24d7c2719b4f358198967d55e0c17d8aaac354311cb9eecc800a592b2d
5
5
  SHA512:
6
- metadata.gz: 5f9925a97d17f0433446a898f63d18869a73e92f9975d8259c916dae242fc5b15ff93dd6d3c28ca2ff5bcbda29489d4ea59af26d4e16f2e1416d701354a6f6e2
7
- data.tar.gz: 77321c4a1001cabda8cc057950682037b637176fd05637ce78a6ea698d8c55e238e6bbcc850dda97828c719afbdae1192a8e7a93fd2307a9d52196342fb015f5
6
+ metadata.gz: 8e23000fe8fb016dec9351241787608c892d7af48682259c10345ce417f94a626ea06a5bc8b1e7a3f084da8c0502d13b96bd06c9a53da31530f5c487b06fe4e9
7
+ data.tar.gz: b2d8cb122b2c1bbb2d989d832802a007a60ecc6d0f2984e323a983ec94ae9505664b98e0d9c33e0b2711f5fa31f4e657a720648dcb572915f69006a1719a9610
@@ -52,9 +52,7 @@ Interscript depends on Python and the https://github.com/sequitur-g2p/sequitur-g
52
52
 
53
53
  [source,sh]
54
54
  ----
55
- pip3 install setuptools numpy
56
- curl -sSL -o sequitur-g2p.zip https://github.com/sequitur-g2p/sequitur-g2p/archive/806273f.zip
57
- pip3 install sequitur-g2p.zip
55
+ pip3 install -r requirments.txt
58
56
  ----
59
57
 
60
58
  Interscript depends on Ruby. Once you manage to install Ruby, it's easy.
@@ -0,0 +1 @@
1
+ {"pan_Deva2Latn_ALA_1997":"alalc-pan-Guru-Latn-1997","kor_Hang2Latn_ALA_1997":"alalc-kor-Hang-Latn-1997","asm_Deva2Latn_ALA_1997":"alalc-asm-Deva-Latn-1997","aze_Cyrl2Latn_ALA_1997":"alalc-aze-Cyrl-Latn-1997","ukr_Cyrl2Latn_GUP_1996":"ua-ukr-Cyrl-Latn-1996","tha_Thai2Latn_RIT_1968":"royin-tha-Thai-Latn-1968","bul_Cyrl2Latn_BGN_1952":"bgnpcgn-bul-Cyrl-Latn-1952","tam_Taml2Latn_ALA_1997":"alalc-tam-Taml-Latn-1997","kor_Hang2Latn_GKN_2002":"kp-kor-Hang-Latn-2002","ell_Grek2Latn_ELOT743_1996":"bgnpcgn-ell-Grek-Latn-1996","zho_Hani2Latn_AcadSin_2002":"acadsin-zho-Hani-Latn-2002","ara_Arab2Latn_SES_1930":"ses-ara-Arab-Latn-1930","tgk_Cyrl2Latn_BGN_1994":"bgnpcgn-tgk-Cyrl-Latn-1994","fas_Arab2Latn_BGN_1958":"bgnpcgn-fas-Arab-Latn-1956","sin_Sinh2Latn_ALA_1997":"alalc-sin-Sinh-Latn-1997","uas_Arab2Latn_BGN_2007":"bgnpcgn-urd-Arab-Latn-2007","ukr_Cyrl2Latn_ALA_1997":"alalc-ukr-Cyrl-Latn-1997","bak_Cyrl2Latn_BGN_2007":"bgnpcgn-bak-Cyrl-Latn-2007","tam_Taml2Latn_ALA_2011":"alalc-tam-Taml-Latn-2011","ara_Arab2Latn_BGN_1956":"bgnpcgn-ara-Arab-Latn-1956","ell_Grek2Latn_ALA_1997":"alalc-ell-Grek-Latn-1997","rus_Cyrl2Latn_GOST_1983":"gost-rus-Cyrl-Latn-16876-71-1983","mar_Deva2Latn_ALA_1997":"alalc-mar-Deva-Latn-1997","bel_Cyrl2Latn_ALA_1997":"alalc-bel-Cyrl-Latn-1997","kat_Geor2Latn_ALA_1997":"alalc-kat-Geor-Latn-1997","bul_Cyrl2Latn_ALA_1997":"alalc-bul-Cyrl-Latn-1997","ara_Arab2Latn_ALA_1997":"alalc-ara-Arab-Latn-1997","mon_Cyrl2Latn_ALA_1997":"alalc-mon-Cyrl-Latn-1997","div_Thaa2Latn_GMV_1988":"bgnpcgn-div-Thaa-Latn-1988","hin_Deva2Latn_ALA_1997":"alalc-hin-Deva-Latn-1997","bel_Cyrl2Latn_GBO_1998":"by-bel-Cyrl-Latn-1998","ukr_Cyrl2Latn_BGN_1965":"bgnpcgn-ukr-Cyrl-Latn-1965","rus_Cyrl2Latn_ALA_1997":"alalc-rus-Cyrl-Latn-1997","tir_Thai2Latn_RIT_2000":"royin-tha-Thai-Latn-1999","guj_Gujr2Latn_ALA_1997":"alalc-guj-Gujr-Latn-1997","tel_Telu2Latn_ALA_1997":"alalc-tel-Telu-Latn-1997","mkd_Cyrl2Latn_BGN_1981":"bgnpcgn-mkd-Cyrl-Latn-1981","ori_Orya2Latn_ALA_2011":"alalc-ori-Orya-Latn-2011","aze_Arab2Latn_ALA_1997":"alalc-aze-Arab-Latn-1997","ori_Orya2Latn_ALA_1997":"alalc-ori-Orya-Latn-1997","div_Thaa2Latn_ALA_1997":"alalc-div-Thaa-Latn-1997","rue_Cyrl2Latn_BGN_2016":"bgnpcgn-rue-Cyrl-Latn-2016","guj_Gujr2Latn_ALA_2011":"alalc-guj-Gujr-Latn-2011","kat_Geor2Latn_BGN_1981":"bgnpcgn-kat-Geor-Latn-1981","kor_Hang2Latn_MOCT_2000":"moct-kor-Hang-Latn-2000","sin_Sinh2Latn_ALA_2011":"alalc-sin-Sinh-Latn-2011","amh_Ethi2Latn_BGN_1967":"bgnpcgn-amh-Ethi-Latn-1967","srp_Cyrl2Latn_BGN_2005":"bgnpcgn-srp-Cyrl-Latn-2005","srp_Cyrl2Latn_ALA_1997":"alalc-srp-Cyrl-Latn-1997","mal_Mlym2Latn_ALA_2012":"alalc-mal-Mlym-Latn-2012","kat_Geor2Latn_GGG_2002":"ggg-kat-Geor-Latn-2002","mon_Cyrl2Latn_BGN_1964":"bgnpcgn-mon-Cyrl-Latn-1964","mal_Mlym2Latn_ALA_1997":"alalc-mal-Mlym-Latn-1997","ben_Beng2Latn_ALA_1997":"alalc-ben-Beng-Latn-1997","kor_Hang2Latn_MR_1939":"bgn-kor-Hang-Latn-1943","zho_Hani2Latn_GCH_1979":"sac-zho-Hans-Latn-1979","bul_Cyrl2Latn_BGN_2013":"bgnpcgn-bul-Cyrl-Latn-2013","ell_Grek2Latn_BGN_1962":"bgnpcgn-ell-Grek-Latn-1962","amh_Ethi2Latn_ALA_1997":"alalc-amh-Ethi-Latn-1997","pan_Deva2Latn_ALA_2011":"alalc-pan-Guru-Latn-2011","zho_Hani2Latn_WDG_1979":"var-zho-Hani-Latn-wd-1979","rus_Cyrl2Latn_BGN_1947":"bgnpcgn-rus-Cyrl-Latn-1947","bel_Cyrl2Latn_BGN_1979":"bgnpcgn-bel-Cyrl-Latn-1979","tat_Cyrl2Latn_BGN_2005":"bgnpcgn-tat-Cyrl-Latn-2007"}
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "interscript/opal/maps" if RUBY_ENGINE == "opal"
4
3
  require "interscript/mapping"
5
4
 
6
5
  # Transliteration
@@ -21,6 +20,8 @@ module Interscript
21
20
  class << self
22
21
 
23
22
  def transliterate(system_code, string, maps={})
23
+ system_code = map_resolve(system_code)
24
+
24
25
  unless maps.has_key? system_code
25
26
  maps[system_code] = Interscript::Mapping.for(system_code)
26
27
  end
@@ -39,8 +40,6 @@ module Interscript
39
40
  title_case = mapping.title_case
40
41
  downcase = mapping.downcase
41
42
 
42
- # charmap = mapping.characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
43
- # dictmap = mapping.dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
44
43
  charmap = mapping.characters_hash
45
44
  dictmap = mapping.dictionary_hash
46
45
  trie = mapping.dictionary_trie
@@ -134,6 +133,12 @@ module Interscript
134
133
  output.unicode_normalize
135
134
  end
136
135
 
136
+ def map_resolve(map)
137
+ map = aliases[map] if aliases.key? map
138
+ raise ArgumentError, "Map #{map} doesn't exist" unless map_exist? map
139
+ map
140
+ end
141
+
137
142
  private
138
143
 
139
144
  def add_separator(separator, pos, result)
@@ -59,8 +59,35 @@ module Interscript
59
59
  string
60
60
  end
61
61
 
62
+ def aliases (refresh: false)
63
+ file = root_path.join("./aliases.json").to_s
64
+ if !refresh && File.exist?(file)
65
+ JSON.load(File.read(file))
66
+ elsif !refresh && @aliases
67
+ @aliases
68
+ else
69
+ @aliases = {}
70
+ Dir[root_path.join('./maps/*.yaml').to_s].each do |yaml_file|
71
+ org_name = File.basename(yaml_file, ".yaml")
72
+ map = YAML.load_file(yaml_file)
73
+ (map["alias"] || {}).each do |k,v|
74
+ @aliases[v["code"]] = org_name
75
+ end
76
+ end
77
+
78
+ # Try to save it to a file, but not force it.
79
+ File.write("aliases.json", JSON.dump(@aliases)) rescue nil
80
+
81
+ @aliases
82
+ end
83
+ end
84
+
62
85
  private
63
86
 
87
+ def map_exist?(map)
88
+ File.exist?(root_path.join("./maps/" + map + ".yaml").to_s)
89
+ end
90
+
64
91
  def mkregexp(regexpstring)
65
92
  /#{regexpstring}/u
66
93
  end
@@ -76,7 +76,7 @@ module Interscript
76
76
  end
77
77
 
78
78
  def load_opal_mappings
79
- JSON.parse(`InterscriptMaps[#{system_code}]`)
79
+ JSON.parse(`Opal.global.InterscriptMaps[#{system_code}]`)
80
80
  end
81
81
 
82
82
  def load_fs_mappings
@@ -127,6 +127,8 @@ module Interscript
127
127
  @characters = (inherited_mapping.characters|| {}).merge(characters)
128
128
  @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
129
129
  end
130
+
131
+ @characters.compact! # the feature to ignore characters from inherited
130
132
  end
131
133
 
132
134
  def build_hashes
@@ -39,13 +39,152 @@ module Interscript
39
39
  string
40
40
  end
41
41
 
42
- # name is unused
43
- def load_map_json(name, json)
44
- JSON.load(json).each do |k,v|
42
+ def load_map_json(_, json)
43
+ json = Hash.new(json) if native? json
44
+ json = JSON.load(json) if String === json
45
+ json.each do |k,v|
45
46
  `Opal.global.InterscriptMaps[#{k}] = #{JSON.dump(v)}`
46
47
  end
47
48
  end
48
49
 
50
+ # Use #on_load_maps if possible. It will be available earlier.
51
+ # See lib/interscript/opal/entrypoint.rb
52
+ def load_maps(opts, &block)
53
+ # Convert arg
54
+ opts = Hash.new(opts) if native? opts
55
+
56
+ defaults = {
57
+ maps: [],
58
+ path: nil,
59
+ node_path: "./maps/",
60
+ ajax_path: "maps/",
61
+ loader: nil,
62
+ processor: proc { |i| i },
63
+ }
64
+
65
+ opts = defaults.merge opts
66
+ opts[:maps] = Array(opts[:maps])
67
+
68
+ %x{
69
+ var ajax_loader = function(map) {
70
+ return new Promise(function(ok, fail) {
71
+ var httpRequest = new XMLHttpRequest();
72
+ httpRequest.onreadystatechange = function() {
73
+ if (httpRequest.readyState === XMLHttpRequest.DONE) {
74
+ if (httpRequest.responseText) {
75
+ ok(JSON.parse(httpRequest.responseText));
76
+ }
77
+ else {
78
+ if (is_local) {
79
+ console.log(httpRequest.responseText);
80
+ fail("Ajax failed load: "+map+". Status: "+httpRequest.statusText+". "+
81
+ "Are you running this locally? Try adding: "+
82
+ "--allow-file-access-from-files to your Chromium command line.")
83
+ }
84
+ else fail("Ajax failed load: "+map+". Status: "+httpRequest.statusText);
85
+ }
86
+ }
87
+ };
88
+ httpRequest.open('GET', #{opts[:path] || opts[:ajax_path]}+map+".json", true);
89
+ httpRequest.send();
90
+ });
91
+ };
92
+
93
+ var fetch_loader = function(map) {
94
+ return fetch(#{opts[:path] || opts[:ajax_path]}+map+".json").then(function(response) {
95
+ return response.json();
96
+ });
97
+ };
98
+
99
+ var node_loader = function(map) {
100
+ var resolve = null, error = null;
101
+ var prom = new Promise(function(ok, fail) {
102
+ resolve = ok;
103
+ error = fail;
104
+ });
105
+ try {
106
+ var node_require = eval("require");
107
+ var data = node_require(#{opts[:path] || opts[:node_path]}+map+'.json');
108
+ resolve(data);
109
+ }
110
+ catch(e) {
111
+ error("Node failed load: "+map+". Error: "+e);
112
+ }
113
+ return prom;
114
+ };
115
+
116
+ var is_local = false;
117
+ if (typeof document !== "undefined" &&
118
+ typeof document.location !== "undefined" &&
119
+ typeof document.location.protocol !== "undefined") {
120
+ is_local = document.location.protocol == "file:";
121
+ }
122
+
123
+ var loader = function(map) {
124
+ if (#{opts[:loader] != nil}) {
125
+ return #{opts[:loader]}(#{opts[:path]}+map+'.json').then(#{opts[:processor]});
126
+ }
127
+ else if (typeof window !== "undefined") {
128
+ return ajax_loader(map);
129
+ }
130
+ else if (typeof global !== "undefined") {
131
+ return node_loader(map);
132
+ }
133
+ else if (!is_local && typeof fetch === "function") {
134
+ return fetch_loader(map);
135
+ }
136
+ else {
137
+ #{raise StandardError, "We couldn't find a good way to load a map"}
138
+ }
139
+ };
140
+ }
141
+
142
+ prom = `new Promise(function(ok, fail) {
143
+ #{
144
+ maps = opts[:maps]
145
+ maps = maps.map { |i| map_resolve i }
146
+ maps = maps.reject { |i| map_loaded? i }
147
+ #p ["Loading:", maps]
148
+ maps = maps.map do |i|
149
+ `loader(#{i})`.JS.then do |map|
150
+ load_map_json(nil, map)
151
+
152
+ m = Native(map)
153
+ inherits = []
154
+ m.each do |mapname, mapvalue|
155
+ inherits += Array(Native(mapvalue)[:map][:inherit])
156
+ inherits += Array(Native(mapvalue)[:chain])
157
+ end
158
+ inherits = inherits.uniq
159
+ inherits = inherits.reject { |i| map_loaded? i }
160
+
161
+ load_maps(opts.merge({maps: inherits})) unless inherits.empty?
162
+ end.JS.catch do |response|
163
+ `fail(#{response})`
164
+ end
165
+ end
166
+ }
167
+ Promise.all(#{maps}).then(ok).catch(fail);
168
+ })`
169
+
170
+ if block_given?
171
+ prom.JS.then(block)
172
+ else
173
+ prom
174
+ end
175
+ end
176
+
177
+ def aliases
178
+ @aliases ||= Hash.new(`Opal.global.InterscriptMapAliases`)
179
+ end
180
+
181
+ def map_exist?(map)
182
+ `typeof(Opal.global.InterscriptMaps[#{map}]) !== 'undefined'`
183
+ end
184
+
185
+ def map_loaded?(map)
186
+ `!!Opal.global.InterscriptMaps[#{map}]`
187
+ end
49
188
  end
50
189
  end
51
190
 
@@ -1,10 +1,18 @@
1
1
  require "opal"
2
+ require "interscript/opal/maps"
2
3
  require "onigmo/onigmo-wasm"
3
4
 
4
5
  module Interscript
5
6
  def self.on_load(&block)
6
7
  WebAssembly.wait_for("onigmo/onigmo-wasm", &block)
7
8
  end
9
+
10
+ # on_load + load_maps
11
+ def self.on_load_maps(arg, &block)
12
+ self.on_load.JS.then do
13
+ self.load_maps(arg, &block)
14
+ end
15
+ end
8
16
  end
9
17
 
10
18
  Interscript.on_load do
@@ -0,0 +1,11 @@
1
+ # Exports - the methods used publically. Listed so that
2
+ # opal-optimizer will know the methods it needs to preserve.
3
+
4
+ Interscript.on_load
5
+ Interscript.on_load_maps
6
+ Interscript.load_maps
7
+ Interscript.load_map_json
8
+ Interscript.transliterate
9
+
10
+ # TODO: Fix handling of "ord".$to_proc() in opal-optimizer
11
+ "a".ord
@@ -3,8 +3,6 @@ Opal.global.InterscriptMaps = {
3
3
  <% Dir['maps/*.yaml'].each do |yaml_file| %>
4
4
  "<%= File.basename(yaml_file, ".yaml") %>": null,
5
5
  <% end %>
6
- }
6
+ };
7
7
 
8
- <%#=
9
- JSON.dump(Interscript::OpalMapTranslate.translate_regexp(JSON.dump(YAML.load(File.read(yaml_file)))))
10
- %>
8
+ Opal.global.InterscriptMapAliases = <%= File.read('aliases.json') %>;
@@ -1,3 +1,3 @@
1
1
  module Interscript
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.9"
3
3
  end
@@ -1022,11 +1022,11 @@ map:
1022
1022
  '\u064f': 'u'
1023
1023
  '\u0650': 'i'
1024
1024
  '\u064e\u0627': 'ā' # see Rule 5
1025
- '\ufeef \u064e': 'á' # see Rule 6(a)
1026
- '\ufeed \u064f': 'ū'
1027
- '\ufeef \u0650': 'ī'
1028
- '\ufeed\u0652 \u064e': 'aw'
1029
- '\ufeef\u0652 \u064e': 'ay'
1025
+ '\u064e\u0649': 'á' # see Rule 6(a)
1026
+ '\u064f\u0648': 'ū'
1027
+ '\u0650\u064a': 'ī'
1028
+ '\u064e\u0648\u0652': 'aw'
1029
+ '\u064e\u064a\u0652': 'ay'
1030
1030
 
1031
1031
  # Letters Representing Non-Arabic Consonants
1032
1032
  # (this list in not exhaustive)
@@ -35,30 +35,107 @@ notes:
35
35
 
36
36
  tests:
37
37
  - source: "অসমীয়া কবিতা"
38
- expected: "asamaīẏaā kabaitaā"
38
+ expected: "asamīẏā kabitā"
39
39
  - source: "কবিৰ আজি জন্মদিন"
40
- expected: "kabaira ājai janamadaina"
40
+ expected: "kabira āji janmadina"
41
41
  - source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
42
- expected: "baerauṭata emaāhara paāchatae paunara bhayaṃkara aganaikaāṇaḍa"
42
+ expected: "beruṭata emāhara pāchate punara bhayaṃkara agnikāṇḍa"
43
43
  - source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
44
- expected: "bhaṅaāra bairaudadhae āwaedana daākhaila kaṃganaāra"
44
+ expected: "bhaṅāra biruddhe āwedana dākhila kaṃganāra"
45
45
  - source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
46
- expected: "āpaunai paṛhai bhaāla paāba paraā baātarai"
46
+ expected: "āpuni paṛhi bhāla pāba parā bātari"
47
47
  - source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
48
- expected: "śaraīraāmapaurata garaubharatai ṭaraāka jabada, daujanaka āṭaka"
48
+ expected: "śrīrāmapurata garubharti ṭrāka jabda, dujanaka āṭaka"
49
49
  - source: "কেনে আছে প্ৰাক্তন"
50
- expected: "kaenae āchae paraākatana"
50
+ expected: "kene āche prāktana"
51
51
  - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
52
- expected: "kamaumabaāira maeẏarara daehata kaobhaiḍa pajaiṭaibha"
52
+ expected: "kamumbāira meẏarara dehata kobhiḍa pajiṭibha"
53
53
  - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
54
- expected: "ṭauiṭaāraẏaogae khaoda sadaraī karae ei kathaā"
54
+ expected: "ṭuiṭāraẏoge khoda sadarī kare ei kathā"
55
55
  - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
56
- expected: "lakhaimapaura jailaāra naāraāẏaṇapaurara barapathaārata ājai paraśaānatai dhaāma naāmaerae ekhana baṛdadhaāśaramara śaubhaāramabha karaā haẏa"
56
+ expected: "lakhimapura jilāra nārāẏaṇapurara barapathārata āji praśānti dhāma nāmere ekhana bṛddhāśramara śubhārambha karā haẏa"
57
57
 
58
58
 
59
59
  map:
60
60
 
61
61
  rules:
62
+ # note[2]
63
+ - pattern: (ক=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
64
+ result: 'k'
65
+ - pattern: (খ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
66
+ result: 'kh'
67
+ - pattern: (গ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
68
+ result: 'g'
69
+ - pattern: (ঘ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
70
+ result: 'gh'
71
+ - pattern: (ঙ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
72
+ result: 'ṅ'
73
+ - pattern: (চ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
74
+ result: 'c'
75
+ - pattern: (ছ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
76
+ result: 'ch'
77
+ - pattern: (জ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
78
+ result: 'j'
79
+ - pattern: (ঝ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
80
+ result: 'jh'
81
+ - pattern: (ঞ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
82
+ result: 'ñ'
83
+ - pattern: (ট=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
84
+ result: 'ṭ'
85
+ - pattern: (ঠ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
86
+ result: 'ṭh'
87
+ - pattern: (ড=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
88
+ result: 'ḍ'
89
+ - pattern: (ড়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
90
+ result: 'ṛ'
91
+ - pattern: (ঢ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
92
+ result: 'ḍh'
93
+ - pattern: (ঢ়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
94
+ result: 'ṛh'
95
+ - pattern: (ণ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
96
+ result: 'ṇ'
97
+ - pattern: (ত=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
98
+ result: 't'
99
+ - pattern: (ৎ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
100
+ result: 'ṭ'
101
+ - pattern: (থ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
102
+ result: 'th'
103
+ - pattern: (দ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
104
+ result: 'd'
105
+ - pattern: (ধ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
106
+ result: 'dh'
107
+ - pattern: (ন=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
108
+ result: 'n'
109
+ - pattern: (প=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
110
+ result: 'p'
111
+ - pattern: (ফ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
112
+ result: 'ph'
113
+ - pattern: (ব=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
114
+ result: 'b'
115
+ - pattern: (ভ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
116
+ result: 'bh'
117
+ - pattern: (ম=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
118
+ result: 'm'
119
+ - pattern: (য়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
120
+ result: 'y'
121
+ - pattern: (য=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
122
+ result: 'ẏ'
123
+ - pattern: (য়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
124
+ result: 'ẏ'
125
+ - pattern: (ৰ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
126
+ result: 'r'
127
+ - pattern: (ল=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
128
+ result: 'l'
129
+ - pattern: (ৱ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
130
+ result: 'w'
131
+ - pattern: (শ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
132
+ result: 'ś'
133
+ - pattern: (ষ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
134
+ result: 'sh'
135
+ - pattern: (স=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
136
+ result: 's'
137
+ - pattern: (হ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
138
+ result: 'h'
62
139
  # note[3]
63
140
  - pattern: \u0981(?=[কখগঘঙচছজঝঞটঠডড়ঢঢ়ণতৎথদধন]) # ঁ before guttural, palatal, cerebral, and dental
64
141
  result: ṅ
@@ -162,4 +239,21 @@ map:
162
239
  '\u09cb': 'o'
163
240
  '\u09cc': 'au'
164
241
  '।': '.'
242
+ '्': ''
165
243
  '\u09CD': '' # Used for joining
244
+
245
+ # Digits
246
+
247
+ '১': '1'
248
+ '২': '2'
249
+ '৩': '3'
250
+ '৪': '4'
251
+ '৫': '5'
252
+ '৬': '6'
253
+ '৭': '7'
254
+ '৮': '8'
255
+ '৯': '9'
256
+ '০': '0'
257
+
258
+
259
+