interscript 0.1.7 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82fae2b248d9c86139b7f188da2ac72699696c9768e54a4510a6f1af2b933dc9
4
- data.tar.gz: 3e2cc24b8d33f5a8ed0f8b475e4d109049439221274ecda9ee1b9c7743896e07
3
+ metadata.gz: 7813dba0d0cc7493ed3b9279c61283c8d305f1e05584a44aa700e9b72acb2f06
4
+ data.tar.gz: f4c87e24d7c2719b4f358198967d55e0c17d8aaac354311cb9eecc800a592b2d
5
5
  SHA512:
6
- metadata.gz: 5f9925a97d17f0433446a898f63d18869a73e92f9975d8259c916dae242fc5b15ff93dd6d3c28ca2ff5bcbda29489d4ea59af26d4e16f2e1416d701354a6f6e2
7
- data.tar.gz: 77321c4a1001cabda8cc057950682037b637176fd05637ce78a6ea698d8c55e238e6bbcc850dda97828c719afbdae1192a8e7a93fd2307a9d52196342fb015f5
6
+ metadata.gz: 8e23000fe8fb016dec9351241787608c892d7af48682259c10345ce417f94a626ea06a5bc8b1e7a3f084da8c0502d13b96bd06c9a53da31530f5c487b06fe4e9
7
+ data.tar.gz: b2d8cb122b2c1bbb2d989d832802a007a60ecc6d0f2984e323a983ec94ae9505664b98e0d9c33e0b2711f5fa31f4e657a720648dcb572915f69006a1719a9610
@@ -52,9 +52,7 @@ Interscript depends on Python and the https://github.com/sequitur-g2p/sequitur-g
52
52
 
53
53
  [source,sh]
54
54
  ----
55
- pip3 install setuptools numpy
56
- curl -sSL -o sequitur-g2p.zip https://github.com/sequitur-g2p/sequitur-g2p/archive/806273f.zip
57
- pip3 install sequitur-g2p.zip
55
+ pip3 install -r requirments.txt
58
56
  ----
59
57
 
60
58
  Interscript depends on Ruby. Once you manage to install Ruby, it's easy.
@@ -0,0 +1 @@
1
+ {"pan_Deva2Latn_ALA_1997":"alalc-pan-Guru-Latn-1997","kor_Hang2Latn_ALA_1997":"alalc-kor-Hang-Latn-1997","asm_Deva2Latn_ALA_1997":"alalc-asm-Deva-Latn-1997","aze_Cyrl2Latn_ALA_1997":"alalc-aze-Cyrl-Latn-1997","ukr_Cyrl2Latn_GUP_1996":"ua-ukr-Cyrl-Latn-1996","tha_Thai2Latn_RIT_1968":"royin-tha-Thai-Latn-1968","bul_Cyrl2Latn_BGN_1952":"bgnpcgn-bul-Cyrl-Latn-1952","tam_Taml2Latn_ALA_1997":"alalc-tam-Taml-Latn-1997","kor_Hang2Latn_GKN_2002":"kp-kor-Hang-Latn-2002","ell_Grek2Latn_ELOT743_1996":"bgnpcgn-ell-Grek-Latn-1996","zho_Hani2Latn_AcadSin_2002":"acadsin-zho-Hani-Latn-2002","ara_Arab2Latn_SES_1930":"ses-ara-Arab-Latn-1930","tgk_Cyrl2Latn_BGN_1994":"bgnpcgn-tgk-Cyrl-Latn-1994","fas_Arab2Latn_BGN_1958":"bgnpcgn-fas-Arab-Latn-1956","sin_Sinh2Latn_ALA_1997":"alalc-sin-Sinh-Latn-1997","uas_Arab2Latn_BGN_2007":"bgnpcgn-urd-Arab-Latn-2007","ukr_Cyrl2Latn_ALA_1997":"alalc-ukr-Cyrl-Latn-1997","bak_Cyrl2Latn_BGN_2007":"bgnpcgn-bak-Cyrl-Latn-2007","tam_Taml2Latn_ALA_2011":"alalc-tam-Taml-Latn-2011","ara_Arab2Latn_BGN_1956":"bgnpcgn-ara-Arab-Latn-1956","ell_Grek2Latn_ALA_1997":"alalc-ell-Grek-Latn-1997","rus_Cyrl2Latn_GOST_1983":"gost-rus-Cyrl-Latn-16876-71-1983","mar_Deva2Latn_ALA_1997":"alalc-mar-Deva-Latn-1997","bel_Cyrl2Latn_ALA_1997":"alalc-bel-Cyrl-Latn-1997","kat_Geor2Latn_ALA_1997":"alalc-kat-Geor-Latn-1997","bul_Cyrl2Latn_ALA_1997":"alalc-bul-Cyrl-Latn-1997","ara_Arab2Latn_ALA_1997":"alalc-ara-Arab-Latn-1997","mon_Cyrl2Latn_ALA_1997":"alalc-mon-Cyrl-Latn-1997","div_Thaa2Latn_GMV_1988":"bgnpcgn-div-Thaa-Latn-1988","hin_Deva2Latn_ALA_1997":"alalc-hin-Deva-Latn-1997","bel_Cyrl2Latn_GBO_1998":"by-bel-Cyrl-Latn-1998","ukr_Cyrl2Latn_BGN_1965":"bgnpcgn-ukr-Cyrl-Latn-1965","rus_Cyrl2Latn_ALA_1997":"alalc-rus-Cyrl-Latn-1997","tir_Thai2Latn_RIT_2000":"royin-tha-Thai-Latn-1999","guj_Gujr2Latn_ALA_1997":"alalc-guj-Gujr-Latn-1997","tel_Telu2Latn_ALA_1997":"alalc-tel-Telu-Latn-1997","mkd_Cyrl2Latn_BGN_1981":"bgnpcgn-mkd-Cyrl-Latn-1981","ori_Orya2Latn_ALA_2011":"alalc-ori-Orya-Latn-2011","aze_Arab2Latn_ALA_1997":"alalc-aze-Arab-Latn-1997","ori_Orya2Latn_ALA_1997":"alalc-ori-Orya-Latn-1997","div_Thaa2Latn_ALA_1997":"alalc-div-Thaa-Latn-1997","rue_Cyrl2Latn_BGN_2016":"bgnpcgn-rue-Cyrl-Latn-2016","guj_Gujr2Latn_ALA_2011":"alalc-guj-Gujr-Latn-2011","kat_Geor2Latn_BGN_1981":"bgnpcgn-kat-Geor-Latn-1981","kor_Hang2Latn_MOCT_2000":"moct-kor-Hang-Latn-2000","sin_Sinh2Latn_ALA_2011":"alalc-sin-Sinh-Latn-2011","amh_Ethi2Latn_BGN_1967":"bgnpcgn-amh-Ethi-Latn-1967","srp_Cyrl2Latn_BGN_2005":"bgnpcgn-srp-Cyrl-Latn-2005","srp_Cyrl2Latn_ALA_1997":"alalc-srp-Cyrl-Latn-1997","mal_Mlym2Latn_ALA_2012":"alalc-mal-Mlym-Latn-2012","kat_Geor2Latn_GGG_2002":"ggg-kat-Geor-Latn-2002","mon_Cyrl2Latn_BGN_1964":"bgnpcgn-mon-Cyrl-Latn-1964","mal_Mlym2Latn_ALA_1997":"alalc-mal-Mlym-Latn-1997","ben_Beng2Latn_ALA_1997":"alalc-ben-Beng-Latn-1997","kor_Hang2Latn_MR_1939":"bgn-kor-Hang-Latn-1943","zho_Hani2Latn_GCH_1979":"sac-zho-Hans-Latn-1979","bul_Cyrl2Latn_BGN_2013":"bgnpcgn-bul-Cyrl-Latn-2013","ell_Grek2Latn_BGN_1962":"bgnpcgn-ell-Grek-Latn-1962","amh_Ethi2Latn_ALA_1997":"alalc-amh-Ethi-Latn-1997","pan_Deva2Latn_ALA_2011":"alalc-pan-Guru-Latn-2011","zho_Hani2Latn_WDG_1979":"var-zho-Hani-Latn-wd-1979","rus_Cyrl2Latn_BGN_1947":"bgnpcgn-rus-Cyrl-Latn-1947","bel_Cyrl2Latn_BGN_1979":"bgnpcgn-bel-Cyrl-Latn-1979","tat_Cyrl2Latn_BGN_2005":"bgnpcgn-tat-Cyrl-Latn-2007"}
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "interscript/opal/maps" if RUBY_ENGINE == "opal"
4
3
  require "interscript/mapping"
5
4
 
6
5
  # Transliteration
@@ -21,6 +20,8 @@ module Interscript
21
20
  class << self
22
21
 
23
22
  def transliterate(system_code, string, maps={})
23
+ system_code = map_resolve(system_code)
24
+
24
25
  unless maps.has_key? system_code
25
26
  maps[system_code] = Interscript::Mapping.for(system_code)
26
27
  end
@@ -39,8 +40,6 @@ module Interscript
39
40
  title_case = mapping.title_case
40
41
  downcase = mapping.downcase
41
42
 
42
- # charmap = mapping.characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
43
- # dictmap = mapping.dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
44
43
  charmap = mapping.characters_hash
45
44
  dictmap = mapping.dictionary_hash
46
45
  trie = mapping.dictionary_trie
@@ -134,6 +133,12 @@ module Interscript
134
133
  output.unicode_normalize
135
134
  end
136
135
 
136
+ def map_resolve(map)
137
+ map = aliases[map] if aliases.key? map
138
+ raise ArgumentError, "Map #{map} doesn't exist" unless map_exist? map
139
+ map
140
+ end
141
+
137
142
  private
138
143
 
139
144
  def add_separator(separator, pos, result)
@@ -59,8 +59,35 @@ module Interscript
59
59
  string
60
60
  end
61
61
 
62
+ def aliases (refresh: false)
63
+ file = root_path.join("./aliases.json").to_s
64
+ if !refresh && File.exist?(file)
65
+ JSON.load(File.read(file))
66
+ elsif !refresh && @aliases
67
+ @aliases
68
+ else
69
+ @aliases = {}
70
+ Dir[root_path.join('./maps/*.yaml').to_s].each do |yaml_file|
71
+ org_name = File.basename(yaml_file, ".yaml")
72
+ map = YAML.load_file(yaml_file)
73
+ (map["alias"] || {}).each do |k,v|
74
+ @aliases[v["code"]] = org_name
75
+ end
76
+ end
77
+
78
+ # Try to save it to a file, but not force it.
79
+ File.write("aliases.json", JSON.dump(@aliases)) rescue nil
80
+
81
+ @aliases
82
+ end
83
+ end
84
+
62
85
  private
63
86
 
87
+ def map_exist?(map)
88
+ File.exist?(root_path.join("./maps/" + map + ".yaml").to_s)
89
+ end
90
+
64
91
  def mkregexp(regexpstring)
65
92
  /#{regexpstring}/u
66
93
  end
@@ -76,7 +76,7 @@ module Interscript
76
76
  end
77
77
 
78
78
  def load_opal_mappings
79
- JSON.parse(`InterscriptMaps[#{system_code}]`)
79
+ JSON.parse(`Opal.global.InterscriptMaps[#{system_code}]`)
80
80
  end
81
81
 
82
82
  def load_fs_mappings
@@ -127,6 +127,8 @@ module Interscript
127
127
  @characters = (inherited_mapping.characters|| {}).merge(characters)
128
128
  @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
129
129
  end
130
+
131
+ @characters.compact! # the feature to ignore characters from inherited
130
132
  end
131
133
 
132
134
  def build_hashes
@@ -39,13 +39,152 @@ module Interscript
39
39
  string
40
40
  end
41
41
 
42
- # name is unused
43
- def load_map_json(name, json)
44
- JSON.load(json).each do |k,v|
42
+ def load_map_json(_, json)
43
+ json = Hash.new(json) if native? json
44
+ json = JSON.load(json) if String === json
45
+ json.each do |k,v|
45
46
  `Opal.global.InterscriptMaps[#{k}] = #{JSON.dump(v)}`
46
47
  end
47
48
  end
48
49
 
50
+ # Use #on_load_maps if possible. It will be available earlier.
51
+ # See lib/interscript/opal/entrypoint.rb
52
+ def load_maps(opts, &block)
53
+ # Convert arg
54
+ opts = Hash.new(opts) if native? opts
55
+
56
+ defaults = {
57
+ maps: [],
58
+ path: nil,
59
+ node_path: "./maps/",
60
+ ajax_path: "maps/",
61
+ loader: nil,
62
+ processor: proc { |i| i },
63
+ }
64
+
65
+ opts = defaults.merge opts
66
+ opts[:maps] = Array(opts[:maps])
67
+
68
+ %x{
69
+ var ajax_loader = function(map) {
70
+ return new Promise(function(ok, fail) {
71
+ var httpRequest = new XMLHttpRequest();
72
+ httpRequest.onreadystatechange = function() {
73
+ if (httpRequest.readyState === XMLHttpRequest.DONE) {
74
+ if (httpRequest.responseText) {
75
+ ok(JSON.parse(httpRequest.responseText));
76
+ }
77
+ else {
78
+ if (is_local) {
79
+ console.log(httpRequest.responseText);
80
+ fail("Ajax failed load: "+map+". Status: "+httpRequest.statusText+". "+
81
+ "Are you running this locally? Try adding: "+
82
+ "--allow-file-access-from-files to your Chromium command line.")
83
+ }
84
+ else fail("Ajax failed load: "+map+". Status: "+httpRequest.statusText);
85
+ }
86
+ }
87
+ };
88
+ httpRequest.open('GET', #{opts[:path] || opts[:ajax_path]}+map+".json", true);
89
+ httpRequest.send();
90
+ });
91
+ };
92
+
93
+ var fetch_loader = function(map) {
94
+ return fetch(#{opts[:path] || opts[:ajax_path]}+map+".json").then(function(response) {
95
+ return response.json();
96
+ });
97
+ };
98
+
99
+ var node_loader = function(map) {
100
+ var resolve = null, error = null;
101
+ var prom = new Promise(function(ok, fail) {
102
+ resolve = ok;
103
+ error = fail;
104
+ });
105
+ try {
106
+ var node_require = eval("require");
107
+ var data = node_require(#{opts[:path] || opts[:node_path]}+map+'.json');
108
+ resolve(data);
109
+ }
110
+ catch(e) {
111
+ error("Node failed load: "+map+". Error: "+e);
112
+ }
113
+ return prom;
114
+ };
115
+
116
+ var is_local = false;
117
+ if (typeof document !== "undefined" &&
118
+ typeof document.location !== "undefined" &&
119
+ typeof document.location.protocol !== "undefined") {
120
+ is_local = document.location.protocol == "file:";
121
+ }
122
+
123
+ var loader = function(map) {
124
+ if (#{opts[:loader] != nil}) {
125
+ return #{opts[:loader]}(#{opts[:path]}+map+'.json').then(#{opts[:processor]});
126
+ }
127
+ else if (typeof window !== "undefined") {
128
+ return ajax_loader(map);
129
+ }
130
+ else if (typeof global !== "undefined") {
131
+ return node_loader(map);
132
+ }
133
+ else if (!is_local && typeof fetch === "function") {
134
+ return fetch_loader(map);
135
+ }
136
+ else {
137
+ #{raise StandardError, "We couldn't find a good way to load a map"}
138
+ }
139
+ };
140
+ }
141
+
142
+ prom = `new Promise(function(ok, fail) {
143
+ #{
144
+ maps = opts[:maps]
145
+ maps = maps.map { |i| map_resolve i }
146
+ maps = maps.reject { |i| map_loaded? i }
147
+ #p ["Loading:", maps]
148
+ maps = maps.map do |i|
149
+ `loader(#{i})`.JS.then do |map|
150
+ load_map_json(nil, map)
151
+
152
+ m = Native(map)
153
+ inherits = []
154
+ m.each do |mapname, mapvalue|
155
+ inherits += Array(Native(mapvalue)[:map][:inherit])
156
+ inherits += Array(Native(mapvalue)[:chain])
157
+ end
158
+ inherits = inherits.uniq
159
+ inherits = inherits.reject { |i| map_loaded? i }
160
+
161
+ load_maps(opts.merge({maps: inherits})) unless inherits.empty?
162
+ end.JS.catch do |response|
163
+ `fail(#{response})`
164
+ end
165
+ end
166
+ }
167
+ Promise.all(#{maps}).then(ok).catch(fail);
168
+ })`
169
+
170
+ if block_given?
171
+ prom.JS.then(block)
172
+ else
173
+ prom
174
+ end
175
+ end
176
+
177
+ def aliases
178
+ @aliases ||= Hash.new(`Opal.global.InterscriptMapAliases`)
179
+ end
180
+
181
+ def map_exist?(map)
182
+ `typeof(Opal.global.InterscriptMaps[#{map}]) !== 'undefined'`
183
+ end
184
+
185
+ def map_loaded?(map)
186
+ `!!Opal.global.InterscriptMaps[#{map}]`
187
+ end
49
188
  end
50
189
  end
51
190
 
@@ -1,10 +1,18 @@
1
1
  require "opal"
2
+ require "interscript/opal/maps"
2
3
  require "onigmo/onigmo-wasm"
3
4
 
4
5
  module Interscript
5
6
  def self.on_load(&block)
6
7
  WebAssembly.wait_for("onigmo/onigmo-wasm", &block)
7
8
  end
9
+
10
+ # on_load + load_maps
11
+ def self.on_load_maps(arg, &block)
12
+ self.on_load.JS.then do
13
+ self.load_maps(arg, &block)
14
+ end
15
+ end
8
16
  end
9
17
 
10
18
  Interscript.on_load do
@@ -0,0 +1,11 @@
1
+ # Exports - the methods used publically. Listed so that
2
+ # opal-optimizer will know the methods it needs to preserve.
3
+
4
+ Interscript.on_load
5
+ Interscript.on_load_maps
6
+ Interscript.load_maps
7
+ Interscript.load_map_json
8
+ Interscript.transliterate
9
+
10
+ # TODO: Fix handling of "ord".$to_proc() in opal-optimizer
11
+ "a".ord
@@ -3,8 +3,6 @@ Opal.global.InterscriptMaps = {
3
3
  <% Dir['maps/*.yaml'].each do |yaml_file| %>
4
4
  "<%= File.basename(yaml_file, ".yaml") %>": null,
5
5
  <% end %>
6
- }
6
+ };
7
7
 
8
- <%#=
9
- JSON.dump(Interscript::OpalMapTranslate.translate_regexp(JSON.dump(YAML.load(File.read(yaml_file)))))
10
- %>
8
+ Opal.global.InterscriptMapAliases = <%= File.read('aliases.json') %>;
@@ -1,3 +1,3 @@
1
1
  module Interscript
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.9"
3
3
  end
@@ -1022,11 +1022,11 @@ map:
1022
1022
  '\u064f': 'u'
1023
1023
  '\u0650': 'i'
1024
1024
  '\u064e\u0627': 'ā' # see Rule 5
1025
- '\ufeef \u064e': 'á' # see Rule 6(a)
1026
- '\ufeed \u064f': 'ū'
1027
- '\ufeef \u0650': 'ī'
1028
- '\ufeed\u0652 \u064e': 'aw'
1029
- '\ufeef\u0652 \u064e': 'ay'
1025
+ '\u064e\u0649': 'á' # see Rule 6(a)
1026
+ '\u064f\u0648': 'ū'
1027
+ '\u0650\u064a': 'ī'
1028
+ '\u064e\u0648\u0652': 'aw'
1029
+ '\u064e\u064a\u0652': 'ay'
1030
1030
 
1031
1031
  # Letters Representing Non-Arabic Consonants
1032
1032
  # (this list in not exhaustive)
@@ -35,30 +35,107 @@ notes:
35
35
 
36
36
  tests:
37
37
  - source: "অসমীয়া কবিতা"
38
- expected: "asamaīẏaā kabaitaā"
38
+ expected: "asamīẏā kabitā"
39
39
  - source: "কবিৰ আজি জন্মদিন"
40
- expected: "kabaira ājai janamadaina"
40
+ expected: "kabira āji janmadina"
41
41
  - source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
42
- expected: "baerauṭata emaāhara paāchatae paunara bhayaṃkara aganaikaāṇaḍa"
42
+ expected: "beruṭata emāhara pāchate punara bhayaṃkara agnikāṇḍa"
43
43
  - source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
44
- expected: "bhaṅaāra bairaudadhae āwaedana daākhaila kaṃganaāra"
44
+ expected: "bhaṅāra biruddhe āwedana dākhila kaṃganāra"
45
45
  - source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
46
- expected: "āpaunai paṛhai bhaāla paāba paraā baātarai"
46
+ expected: "āpuni paṛhi bhāla pāba parā bātari"
47
47
  - source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
48
- expected: "śaraīraāmapaurata garaubharatai ṭaraāka jabada, daujanaka āṭaka"
48
+ expected: "śrīrāmapurata garubharti ṭrāka jabda, dujanaka āṭaka"
49
49
  - source: "কেনে আছে প্ৰাক্তন"
50
- expected: "kaenae āchae paraākatana"
50
+ expected: "kene āche prāktana"
51
51
  - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
52
- expected: "kamaumabaāira maeẏarara daehata kaobhaiḍa pajaiṭaibha"
52
+ expected: "kamumbāira meẏarara dehata kobhiḍa pajiṭibha"
53
53
  - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
54
- expected: "ṭauiṭaāraẏaogae khaoda sadaraī karae ei kathaā"
54
+ expected: "ṭuiṭāraẏoge khoda sadarī kare ei kathā"
55
55
  - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
56
- expected: "lakhaimapaura jailaāra naāraāẏaṇapaurara barapathaārata ājai paraśaānatai dhaāma naāmaerae ekhana baṛdadhaāśaramara śaubhaāramabha karaā haẏa"
56
+ expected: "lakhimapura jilāra nārāẏaṇapurara barapathārata āji praśānti dhāma nāmere ekhana bṛddhāśramara śubhārambha karā haẏa"
57
57
 
58
58
 
59
59
  map:
60
60
 
61
61
  rules:
62
+ # note[2]
63
+ - pattern: (ক=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
64
+ result: 'k'
65
+ - pattern: (খ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
66
+ result: 'kh'
67
+ - pattern: (গ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
68
+ result: 'g'
69
+ - pattern: (ঘ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
70
+ result: 'gh'
71
+ - pattern: (ঙ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
72
+ result: 'ṅ'
73
+ - pattern: (চ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
74
+ result: 'c'
75
+ - pattern: (ছ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
76
+ result: 'ch'
77
+ - pattern: (জ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
78
+ result: 'j'
79
+ - pattern: (ঝ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
80
+ result: 'jh'
81
+ - pattern: (ঞ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
82
+ result: 'ñ'
83
+ - pattern: (ট=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
84
+ result: 'ṭ'
85
+ - pattern: (ঠ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
86
+ result: 'ṭh'
87
+ - pattern: (ড=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
88
+ result: 'ḍ'
89
+ - pattern: (ড়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
90
+ result: 'ṛ'
91
+ - pattern: (ঢ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
92
+ result: 'ḍh'
93
+ - pattern: (ঢ়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
94
+ result: 'ṛh'
95
+ - pattern: (ণ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
96
+ result: 'ṇ'
97
+ - pattern: (ত=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
98
+ result: 't'
99
+ - pattern: (ৎ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
100
+ result: 'ṭ'
101
+ - pattern: (থ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
102
+ result: 'th'
103
+ - pattern: (দ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
104
+ result: 'd'
105
+ - pattern: (ধ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
106
+ result: 'dh'
107
+ - pattern: (ন=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
108
+ result: 'n'
109
+ - pattern: (প=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
110
+ result: 'p'
111
+ - pattern: (ফ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
112
+ result: 'ph'
113
+ - pattern: (ব=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
114
+ result: 'b'
115
+ - pattern: (ভ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
116
+ result: 'bh'
117
+ - pattern: (ম=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
118
+ result: 'm'
119
+ - pattern: (য়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
120
+ result: 'y'
121
+ - pattern: (য=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
122
+ result: 'ẏ'
123
+ - pattern: (য়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
124
+ result: 'ẏ'
125
+ - pattern: (ৰ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
126
+ result: 'r'
127
+ - pattern: (ল=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
128
+ result: 'l'
129
+ - pattern: (ৱ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
130
+ result: 'w'
131
+ - pattern: (শ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
132
+ result: 'ś'
133
+ - pattern: (ষ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
134
+ result: 'sh'
135
+ - pattern: (স=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
136
+ result: 's'
137
+ - pattern: (হ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
138
+ result: 'h'
62
139
  # note[3]
63
140
  - pattern: \u0981(?=[কখগঘঙচছজঝঞটঠডড়ঢঢ়ণতৎথদধন]) # ঁ before guttural, palatal, cerebral, and dental
64
141
  result: ṅ
@@ -162,4 +239,21 @@ map:
162
239
  '\u09cb': 'o'
163
240
  '\u09cc': 'au'
164
241
  '।': '.'
242
+ '्': ''
165
243
  '\u09CD': '' # Used for joining
244
+
245
+ # Digits
246
+
247
+ '১': '1'
248
+ '২': '2'
249
+ '৩': '3'
250
+ '৪': '4'
251
+ '৫': '5'
252
+ '৬': '6'
253
+ '৭': '7'
254
+ '৮': '8'
255
+ '৯': '9'
256
+ '০': '0'
257
+
258
+
259
+