interscript 0.1.6 → 2.1.0a9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -127
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +75 -339
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -71
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -27
  71. data/lib/interscript/opal/maps.js.erb +0 -10
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -509
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1283
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -159
  80. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  81. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -125
  82. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  83. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  84. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -624
  85. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -627
  86. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  87. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  88. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  89. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  90. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -170
  91. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  92. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  93. data/maps/alalc-pan-Deva-Latn-1997.yaml +0 -237
  94. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -221
  95. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  96. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  97. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  98. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  99. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  100. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  101. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  102. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  103. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  104. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  105. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  106. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  107. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  108. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -528
  109. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -592
  110. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  111. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  112. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  113. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -285
  114. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  115. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  116. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -701
  117. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -19
  118. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  119. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  120. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  121. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  122. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  123. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  124. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  125. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  126. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -200
  127. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -92
  128. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  129. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  130. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -162
  131. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  132. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  133. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  134. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  135. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  136. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +0 -166
  137. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  138. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  139. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  140. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  141. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  142. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  143. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  144. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  145. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -33
  146. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  147. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  148. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  149. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  150. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -88
  151. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  152. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  153. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -186
  154. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  155. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  156. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  157. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  158. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  159. data/maps/icao-per-Arab-Latn-9303.yaml +0 -103
  160. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  161. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  162. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  163. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  164. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -609
  165. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -40
  166. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  167. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  172. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  173. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  174. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  175. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  176. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  177. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  178. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  179. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  180. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  181. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  182. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  183. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  184. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  185. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  186. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  200. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -279
  201. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  202. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  203. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  204. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  205. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  206. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  207. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  208. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  209. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  210. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  211. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
  212. data/maps/un-nep-Deva-Latn-1972.yaml +0 -163
  213. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  214. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -30
  215. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +0 -575
  216. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  217. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  218. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  219. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  220. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  221. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  222. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  223. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  224. data/spec/interscript/mapping_spec.rb +0 -42
  225. data/spec/interscript_spec.rb +0 -26
  226. data/spec/spec_helper.rb +0 -3
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+ ENV["INTERSCRIPT_STAGING"] = "1"
3
+ require "bundler/setup"
4
+ require "interscript"
5
+ require "interscript/compiler/ruby"
6
+
7
+ # Compile a given map with the Ruby compiler for debugging purposes
8
+
9
+ if ARGV[0] == '-b'
10
+ require 'base64'
11
+ $b64 = true
12
+ ARGV.shift
13
+ end
14
+
15
+ map = ARGV[0]
16
+ m = Interscript.parse(map)
17
+ cr = Interscript::Compiler::Ruby
18
+ mr = cr.(map)
19
+
20
+ if $b64
21
+ puts Base64.encode64(mr.code)
22
+ else
23
+ puts mr.code
24
+ end
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+ ENV["INTERSCRIPT_STAGING"] = "1"
3
+ require "bundler/setup"
4
+ require "interscript"
5
+ require "interscript/compiler/ruby"
6
+
7
+ # This script has been written because there are some differences between platforms
8
+ # (ie. windows vs linux) that we wish to find out more about
9
+
10
+ if ARGV[0] == '-b'
11
+ require 'base64'
12
+ $b64 = []
13
+ ARGV.shift
14
+ end
15
+
16
+ map = ARGV[0]
17
+ m = Interscript.parse(map)
18
+ cr = Interscript::Compiler::Ruby
19
+ mr = cr.(map, debug: true)
20
+
21
+ m.tests.data.each_with_index do |(from, expected), idx|
22
+ r = mr.(from)
23
+
24
+ unless ARGV[1] && ARGV[1].split(",").any? { |i| i.to_i == idx }
25
+ if r == expected
26
+ cr.reset_debug_data
27
+ next
28
+ end
29
+ end
30
+
31
+ dr = cr.read_debug_data
32
+
33
+ if $b64
34
+ $b64 << [idx, dr]
35
+ else
36
+ pp [idx, dr]
37
+ end
38
+
39
+ cr.reset_debug_data
40
+ end
41
+
42
+ if $b64
43
+ puts Base64.encode64($b64.inspect)
44
+ end
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env ruby
2
+ ENV["INTERSCRIPT_STAGING"] = "1"
3
+ require "bundler/setup"
4
+ require "interscript"
5
+ require "interscript/compiler/ruby"
6
+
7
+ $map_name = ARGV[0]
8
+
9
+ if $map_name
10
+ filelist = [ __dir__+"/../../maps/maps-staging/#{$map_name}.imp" ]
11
+ else
12
+ filelist = Dir[__dir__+"/../../maps/maps-staging/*.imp"].sort
13
+ end
14
+
15
+
16
+ # levenshtein distance algorithm for comparing string similarity
17
+ def ld(s, t)
18
+ v0 = (0..t.length).to_a
19
+ v1 = []
20
+ #p v0
21
+
22
+ s.chars.each_with_index do |s_ch, i|
23
+ v1[0] = i + 1
24
+
25
+ t.chars.each_with_index do |t_ch, j|
26
+ cost = s_ch == t_ch ? 0 : 1
27
+ v1[j + 1] = [v1[j] + 1, v0[j + 1] + 1, v0[j] + cost].min
28
+ end
29
+ v0 = v1.dup
30
+ #p v1
31
+ end
32
+
33
+ v0[t.length]
34
+ end
35
+
36
+
37
+ def score_order( system, order )
38
+ interpreter = Marshal.load( Marshal.dump( $interpreter ))
39
+ parallel = interpreter.map.stages[:main].children.select{|x| Interscript::Node::Group::Parallel === x}[0]
40
+ parallel.apply_order(order)
41
+ interpreter.map.stages[:main].children[$parallel_idx].children = parallel.children.compact #.reorder_children(source,target)
42
+ delta_sum = 0
43
+ errors = []
44
+ system.tests.data.each do |from, expected|
45
+ result = interpreter.(from)
46
+ delta = ld(expected, result)
47
+ errors << [expected, result] if delta != 0
48
+ delta_sum += delta
49
+ end;
50
+ [delta_sum, errors]
51
+ end
52
+
53
+
54
+ def mutate_order(order)
55
+ order2 = order.dup
56
+ a = rand(order2.size)
57
+ b = rand(order2.size)
58
+ order2[a], order2[b] = order2[b], order2[a]
59
+ order2
60
+ end
61
+
62
+ for i in filelist
63
+
64
+ begin
65
+ system_name = File.basename(i, ".imp")
66
+ puts "\ndebugging #{system_name}"
67
+
68
+ system = Interscript.parse(system_name);
69
+ if system.tests && system.tests.data && system.tests.data.length > 0
70
+
71
+ $interpreter = Interscript::Interpreter.new.compile(system);
72
+ $orig_parallel = $interpreter.map.stages[:main].children.select{|x| Interscript::Node::Group::Parallel === x}[0].dup;
73
+ $parallel_idx = $interpreter.map.stages[:main].children.each_with_index.select{|x,i| Interscript::Node::Group::Parallel === x}.map{|x,i| i}[0]
74
+ next if !$parallel_idx
75
+ starting_score, starting_errors = score_order(system, $orig_parallel.children.size.times.to_a)
76
+ parallel_size = $interpreter.map.stages[:main].children.select{|x| Interscript::Node::Group::Parallel === x}[0].children.size
77
+
78
+ puts "starting_score = #{starting_score}"
79
+ best_score = starting_score
80
+ best_errors = starting_errors
81
+ curr_order = $orig_parallel.children.size.times.to_a.shuffle
82
+ #curr_order = [28, 308, 61, 87, 29, 147, 124, 22, 373, 186, 336, 19, 405, 387, 215, 209, 131, 30, 60, 343, 34, 380, 189, 53, 300, 286, 162, 31, 33, 218, 90, 51, 293, 226, 56, 305, 142, 102, 346, 222, 126, 338, 50, 52, 363, 144, 136, 16, 388, 221, 267, 63, 352, 365, 251, 78, 68, 328, 69, 12, 67, 317, 334, 94, 366, 412, 302, 243, 311, 318, 281, 274, 143, 236, 386, 135, 280, 167, 173, 291, 271, 309, 73, 20, 157, 331, 43, 242, 65, 351, 134, 151, 0, 285, 211, 417, 220, 179, 91, 353, 255, 141, 23, 104, 413, 409, 256, 326, 180, 140, 24, 348, 261, 5, 99, 47, 35, 358, 177, 123, 277, 396, 114, 213, 116, 188, 217, 249, 419, 120, 289, 330, 110, 118, 176, 113, 278, 127, 313, 55, 370, 48, 364, 171, 244, 407, 57, 371, 128, 196, 103, 202, 294, 239, 283, 299, 237, 394, 81, 230, 97, 46, 109, 337, 355, 240, 195, 100, 204, 389, 146, 153, 121, 183, 137, 159, 254, 231, 3, 101, 290, 323, 148, 359, 250, 25, 40, 219, 119, 169, 378, 282, 377, 238, 130, 279, 385, 58, 41, 115, 197, 382, 193, 225, 199, 6, 59, 208, 93, 138, 11, 15, 37, 38, 27, 354, 175, 411, 83, 89, 368, 216, 301, 168, 401, 84, 235, 333, 246, 284, 372, 155, 105, 339, 228, 342, 122, 161, 316, 145, 272, 321, 80, 315, 163, 107, 288, 227, 191, 306, 310, 76, 85, 132, 2, 320, 36, 13, 74, 233, 72, 381, 269, 70, 402, 86, 95, 111, 8, 383, 314, 10, 200, 203, 292, 241, 212, 374, 234, 369, 422, 42, 357, 18, 49, 214, 9, 156, 129, 258, 259, 190, 79, 367, 414, 201, 166, 270, 319, 332, 4, 184, 187, 164, 395, 325, 88, 245, 185, 71, 400, 275, 312, 324, 1, 224, 45, 205, 404, 260, 392, 253, 273, 416, 96, 408, 112, 349, 393, 345, 152, 329, 420, 410, 14, 361, 7, 257, 207, 194, 298, 17, 98, 340, 391, 399, 397, 82, 263, 376, 158, 327, 406, 265, 418, 322, 77, 92, 266, 262, 44, 360, 172, 403, 350, 66, 384, 247, 139, 181, 198, 248, 232, 32, 295, 106, 160, 287, 379, 341, 344, 421, 182, 375, 307, 415, 64, 75, 297, 125, 276, 223, 149, 26, 398, 303, 154, 133, 210, 150, 206, 174, 62, 170, 390, 54, 347, 39, 229, 178, 296, 108, 21, 165, 268, 264, 356, 304, 192, 252, 117, 335, 362]
83
+
84
+ best_order = curr_order.dup
85
+ while true
86
+
87
+ curr_score, curr_errors = score_order(system, curr_order)
88
+ #print "#{source} <-> #{target} = #{curr_score}; "
89
+ puts Time.now.inspect
90
+ puts best_order.inspect
91
+ puts curr_score
92
+ puts best_errors.inspect
93
+ puts best_score
94
+ puts ''
95
+
96
+ if curr_score < best_score
97
+ puts ''
98
+ best_score = curr_score.dup
99
+ best_order = curr_order.dup
100
+ best_errors = curr_errors.dup
101
+ end
102
+ curr_order = mutate_order(best_order)
103
+ end
104
+ end
105
+ rescue Exception => e
106
+ puts e
107
+ end
108
+
109
+ end
110
+ # for this code to work sorting by max_length in interpreter.rb line 46 needs to be disabled
111
+ # #r.children.each do |i|
112
+ # r.children.sort_by{ |rule| -rule.max_length }.each do |i|
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'yaml'
4
+ require 'fileutils'
5
+ require 'regexp_parser'
6
+
7
+ old_maps = Dir["../../interscript/maps/*.yaml"]
8
+
9
+ $expr_classes = []
10
+ $quantifiers = []
11
+ old_maps.each do |old_map|
12
+ old_map_name = File.basename(old_map, ".yaml")
13
+ puts old_map
14
+ f = File.read(old_map)
15
+ yaml = YAML.load(f)
16
+ map_keys = yaml['map'].keys
17
+ puts map_keys
18
+ rs = yaml['map']['postrules']&.map{|h| h['pattern']} || []
19
+ rs += yaml['map']['characters']&.keys || []
20
+ rs.each do |regexp|
21
+ tree = Regexp::Parser.parse( regexp )
22
+
23
+ arr = tree.expressions
24
+ while true
25
+ new_arr = arr.map do |elem|
26
+ if elem.respond_to? :quantifier and elem.quantifier
27
+ $expr_classes << elem.quantifier
28
+ end
29
+ el = el.class == Class ? el : el.class
30
+ if elem.respond_to?(:expressions)
31
+ [el, elem.expressions]
32
+ else
33
+ el
34
+ end
35
+ end.flatten
36
+ break if new_arr == arr
37
+ arr = new_arr
38
+ end
39
+ $expr_classes += arr
40
+ end
41
+ end;
42
+ # $expressions.map{|elem| elem.class.to_s=="Class" ? elem : elem.class}.tally
43
+
44
+ pp $expr_classes.tally.sort_by{|k,v| -v}
45
+ pp $quanitifiers.map{|q| q.text}.tally.sort_by{|k,v| -v}
data/bin/maps_v1_to_v2 ADDED
@@ -0,0 +1,426 @@
1
+ #!/usr/bin/env ruby
2
+ # This is a helper script for porting Interscript v1 maps to v2 format. It won't
3
+ # ever be able to port them completely, but it should help bootstrap the process.
4
+
5
+ require 'bundler/setup'
6
+
7
+ require 'yaml'
8
+ require 'fileutils'
9
+
10
+ Dir.chdir(__dir__ + "/../")
11
+ FileUtils.rm_rf(Dir.glob("../maps/maps-staging/*"))
12
+ #FileUtils.mkdir_p("../maps/maps-staging/")
13
+
14
+ #old_maps = []
15
+ old_maps = Dir["../../interscript/maps/*.yaml"]
16
+ #old_maps = Dir["../../interscript/maps/alalc-aze-Arab-Latn-1997.yaml"]
17
+ #old_maps = Dir["../../interscript/maps/mofa-jpn-Hrkt-Latn-1989.yaml"]
18
+
19
+
20
+ ex_maps = Dir["../maps/maps/*.imp"]
21
+ ex_map_names = ex_maps.map { |i| File.basename(i, ".imp") }
22
+
23
+
24
+ require 'regexp_parser'
25
+
26
+ require 'interscript/utils/regexp_converter.rb'
27
+
28
+
29
+ def process_line( kkk, vvv, indent: 0)
30
+
31
+ parse_kkk = Regexp::Parser.parse(kkk, 'ruby/2.1')
32
+ tokens_kkk = process(parse_kkk)
33
+ # pp conv
34
+ root_hash = process_root(tokens_kkk)
35
+ # pp root_hash
36
+ # puts "vvv = #{vvv.inspect}"
37
+ if vvv.class == String
38
+ if vvv == '' or vvv =='""' or vvv == nil or vvv.include? '~'
39
+ root_hash[:to] = '""'
40
+ elsif vvv == '"'
41
+ root_hash[:to] = '"\""'
42
+ else
43
+ if vvv == "?" #alalc-ell-Grek-Latn-1997.imp un-ell-Grek-Latn-1987-phonetic have to "?"
44
+ root_hash[:to] = "?".inspect
45
+ #if root_hash[:from].to_s.include?('capture') or root_hash[:to] =~ /\\\\([0-9]+)/
46
+ else
47
+ parse_vvv = Regexp::Parser.parse(vvv)
48
+ tokens_vvv = process(parse_vvv)
49
+ string_vvv = stringify(tokens_vvv)
50
+ # puts string_vvv
51
+ root_hash[:to] = string_vvv
52
+ root_hash[:to] = 'upcase' if string_vvv.include? 'upcase'
53
+ end
54
+ end
55
+ elsif vvv.class == Array
56
+ root_hash[:to] = "any(#{vvv.inspect})"
57
+ else
58
+ root_hash[:to] = "\"\""
59
+ # puts "unknown class #{vvv.inspect}"
60
+ end
61
+
62
+ str = stringify_root(root_hash, indent: indent)
63
+ # puts str
64
+ # puts ""
65
+ str
66
+ end
67
+
68
+
69
+ old_maps.sort.each do |old_map|
70
+ old_map_name = File.basename(old_map, ".yaml")
71
+
72
+ if ex_map_names.include? old_map_name
73
+ puts "* Skipping #{old_map_name} as it's already ported"
74
+ next
75
+ end
76
+
77
+ print "* Converting #{old_map_name}."
78
+
79
+ f = File.read(old_map)
80
+ fl = f.split("\n")
81
+
82
+ md = []
83
+ tests = []
84
+ map = []
85
+ chain = nil
86
+
87
+ cur = md
88
+
89
+ bugnotes = false
90
+
91
+ fl.each do |i|
92
+ if i == '---'
93
+ # skip the first line
94
+ elsif i =~ /\A\s+|\A\z/
95
+ # continuation
96
+ if bugnotes
97
+ i = "#{i}"
98
+ md << i
99
+ else
100
+ cur << i
101
+ end
102
+ else
103
+ cmt = nil
104
+ i = i.sub(/(#.*?)\z/) do |j|
105
+ cmt = j
106
+ ""
107
+ end
108
+
109
+ # block begin or md
110
+ case i.strip
111
+ when "tests:"
112
+ cur = tests
113
+ bugnotes = false
114
+ when "map:"
115
+ cur = map
116
+ bugnotes = false
117
+ when "notes:"
118
+ md << "notes:"
119
+ bugnotes = true
120
+ when /\Achain:/
121
+ chain = i
122
+ else
123
+ cur << i
124
+ end
125
+
126
+ cur << cmt if cmt
127
+ end
128
+ end
129
+
130
+ print "."
131
+
132
+ newmd = []
133
+ aliasff = false
134
+ md.each do |i|
135
+ if i.strip == "alias:"
136
+ aliasff = true
137
+ elsif i !~ /\A\s+/
138
+ aliasff = false
139
+ end
140
+ newmd << i unless aliasff
141
+ end
142
+ md = newmd
143
+
144
+ print "."
145
+
146
+ newmd = md.map(&" ".method(:+))
147
+ .join("\n")
148
+ .gsub(" note:", " notes:")
149
+ .gsub("confirmation date:", "confirmation_date:")
150
+ #.gsub("special_rules:", ' - "special rules:"')
151
+ #.gsub("original_description:", " # original description:")
152
+ #.gsub("original_notes:", ' - "original notes:"')
153
+ #.gsub("implementation_notes:", ' - "implementation notes:"')
154
+ .rstrip
155
+
156
+ new = "metadata {\n"
157
+ new << newmd
158
+ new << "\n}\n\n"
159
+
160
+ class MultilineError < StandardError; end
161
+
162
+ if tests.length > 0
163
+ new << "tests {\n"
164
+ cmt = ""
165
+
166
+ iter = 0
167
+ while iter < tests.length; begin
168
+ test = tests[iter]
169
+
170
+ if test =~ /\A\s*#/
171
+ new << " " << test.strip << "\n"
172
+ iter += 1
173
+ next
174
+ end
175
+
176
+ re_source = /\A(?: ){0,2}- source: (.*?)(\s*#.*?)?\z/m
177
+ re_expect = /\A(?: ){0,3}expected:[ \t](.*?)(\s*#.*?)?\z/m
178
+
179
+ if test.rstrip.end_with?("|") ||
180
+ (test =~ /"/ && !test.rstrip.end_with?('"'))
181
+ while iter < tests.length
182
+ xtest = tests[iter+1]
183
+ break if xtest =~ re_source || xtest =~ re_expect
184
+ test << "\n" << (xtest||"")
185
+ iter += 1
186
+ end
187
+ end
188
+
189
+ case test
190
+ when re_source
191
+ new << " test #{YAML.load($1).inspect}, "
192
+ cmt = $2 if $2
193
+ when re_expect
194
+ new << "#{YAML.load($1).inspect}".unicode_normalize
195
+ new << cmt
196
+ new << $2 if $2
197
+ new << "\n"
198
+ cmt = ""
199
+ when /\A\s*\z/
200
+ # empty line, ignore
201
+ else
202
+ new << "\n# BUG: #{test}\n"
203
+ end
204
+
205
+ iter += 1
206
+ rescue Psych::SyntaxError
207
+ p test
208
+ end; end
209
+
210
+ new << "}\n\n"
211
+ end
212
+
213
+ print "."
214
+
215
+ new << "# This map has been partially converted by the bin/maps_v1_to_v2 script\n"
216
+ new << "# The section below requires human attention. Remember to remove this\n"
217
+ new << "# comment and move the converted map to 'maps/' directory. Please also\n"
218
+ new << "# take note that the maps-staging directory will be cleaned up whenever\n"
219
+ new << "# you run the bin/maps_v1_to_v2 script. You should particularly be\n"
220
+ new << "# concerned about any regular expressions found in this file and about\n"
221
+ new << "# advanced expressions in parallel {} parts, and also about the order\n"
222
+ new << "# of particular parts of the stage.\n\n"
223
+
224
+ transcription = nil
225
+ title_case = nil
226
+ downcase = nil
227
+ inherit = nil
228
+
229
+ characters, rules, dictionary, postrules = [], [], [], []
230
+
231
+ cur = nil
232
+ indent = 0
233
+ stagedone = false
234
+ efini = proc do
235
+ if inherit
236
+ new << "\n"
237
+ inherit.each do |i|
238
+ new << " run map.#{i}.stage.main\n"
239
+ end
240
+ end
241
+ new << "\n"
242
+ efini=proc{}
243
+ end
244
+ fini = proc{}
245
+ begn = proc { new << "stage {\n"; stagedone = true; begn=proc{} }
246
+
247
+ iter = -1
248
+ while iter+1 < map.length
249
+ iter += 1
250
+ i = map[iter]
251
+
252
+ cmt = nil
253
+ if i =~ /\A [^\s#]/ || i =~ /\A inherit:/
254
+ i.sub(/(#.*?)\z/) { cmt = $1 }
255
+ if cmt
256
+ new << " "*indent << cmt << "\n"
257
+ end
258
+
259
+ maybe_val = YAML.load(i.split(":").last)
260
+ case i.split(":").first.strip
261
+ when "inherit"
262
+ inherit = Array(maybe_val)
263
+ inherit = inherit.map do |inh|
264
+ short = inh.split("-")[2..3].join.downcase
265
+ new << "dependency #{inh.inspect}, as: #{short}\n"
266
+ short
267
+ end
268
+ new << "\n"
269
+ raise "Duplicate items" unless inherit.length == inherit.uniq.length
270
+ when "dictionary"
271
+ begn.()
272
+ fini.()
273
+ new << " # DICTIONARY\n"
274
+ new << " parallel {\n"
275
+ indent = 4
276
+ cur = dictionary
277
+ fini = proc{new << " }\n\n";indent = 2}
278
+ when "rules"
279
+ begn.()
280
+ fini.()
281
+ efini.()
282
+ new << " # RULES\n"
283
+ indent = 2
284
+ cur = rules
285
+ fini = proc{new << "\n"}
286
+ when "characters"
287
+ begn.()
288
+ fini.()
289
+ efini.()
290
+ new << " # CHARACTERS\n"
291
+ new << " parallel {\n"
292
+ indent = 4
293
+ cur = characters
294
+ fini = proc{new << " }\n\n";indent = 2}
295
+ when "postrules"
296
+ begn.()
297
+ fini.()
298
+ efini.()
299
+ new << " # POSTRULES\n"
300
+ indent = 2
301
+ cur = postrules
302
+ fini = proc{new << "\n"}
303
+ when "downcase"
304
+ downcase = maybe_val
305
+ when "title_case"
306
+ title_case = maybe_val
307
+ when "transcription"
308
+ transcription = maybe_val
309
+ # Those we will ignore for now
310
+ when "word_separator", "segmentation", "character_separator", "map"
311
+ # Those are bugs
312
+ when "title-case"
313
+ else
314
+ p i
315
+ end
316
+ else
317
+ cmt = ""
318
+ i = i.sub(/(#.*?)\z/) { cmt << $1; "" }
319
+ #new << " "*indent << i.strip << "\n"
320
+ if i.strip == ""
321
+ new << " "*indent << cmt << "\n"
322
+ next
323
+ end
324
+
325
+ case cur.object_id
326
+ when nil.object_id
327
+ raise "Unexpected line #{i}"
328
+ when characters.object_id, dictionary.object_id
329
+ k,v = i.split(":", 2).map(&:strip)
330
+ if !v || v == ""
331
+ v = ""
332
+ # Load array
333
+ iter2 = iter + 1
334
+ while iter2 < map.length
335
+ i2 = map[iter2]
336
+ break unless i2.strip =~ /\A-/
337
+ i2 = i2.sub(/(#.*?)\z/) { cmt << $1; "" }
338
+ v << "\n" << i2
339
+ iter2 += 1
340
+ end
341
+ iter = iter2 - 1
342
+ end
343
+
344
+ kk,vv = YAML.load(k), YAML.load(v)
345
+
346
+ kkk,vvv = kk.inspect, vv.inspect
347
+ if vv.class == Array
348
+ if vv.all? { |z| z.length == 1 }
349
+ vvv = "any(" + vv.join.inspect + ")"
350
+ else
351
+ vvv = "any(" + vv.inspect + ")"
352
+ end
353
+ elsif vv.class == NilClass
354
+ vvv = "none"
355
+ end
356
+ kkk,vvv = kk.dup,vv.dup
357
+ # This worked due to use of regexps... it should remove 1 slash.
358
+ kkk = kkk.gsub("\\\\u", "\\\\u")
359
+ kkk = kkk.gsub("\\\\U", "\\\\u")
360
+
361
+ new << process_line( kkk,vvv, indent: indent )
362
+ when rules.object_id, postrules.object_id
363
+ if i.strip =~ /\A- pattern\s*:/
364
+ _, k = i.split(":", 2)
365
+ ii = map[iter+1]
366
+ ii = ii.sub(/(#.*?)\z/) { cmt << $1; "" }
367
+ if ii.strip == ""
368
+ iter += 1
369
+ ii = map[iter+1]
370
+ ii = ii.sub(/(#.*?)\z/) { cmt << $1; "" }
371
+ end
372
+ if ii.strip =~ /\Aresult\s*:/
373
+ _, v = ii.split(":", 2)
374
+ else
375
+ raise "Unexpected(2): #{ii.strip.inspect}"
376
+ end
377
+ iter += 1
378
+ else
379
+ raise "Unexpected(1): #{i.strip.inspect}"
380
+ end
381
+
382
+ kk,vv = YAML.load(k), YAML.load(v)
383
+ # kkk,vvv = kk.inspect, vv.inspect
384
+ kkk,vvv = kk.dup,vv.dup
385
+
386
+ new << process_line( kkk,vvv, indent: indent )
387
+ end
388
+
389
+ if cmt != ""
390
+ new << " " << cmt << "\n"
391
+ else
392
+ new << "\n"
393
+ end
394
+ end
395
+ end
396
+
397
+ fini.()
398
+
399
+ unless stagedone
400
+ new << "stage {\n"
401
+ efini.()
402
+ end
403
+
404
+ new << " title_case\n" if title_case
405
+ new << " downcase\n" if downcase
406
+
407
+ new << "}\n\n"
408
+
409
+ if chain
410
+ new << "# This map is chained and probably depends on seq2seq:\n"
411
+ new << "# #{chain}\n\n"
412
+ end
413
+
414
+ if transcription
415
+ new << "# This map contains transcription and probably depends on seq2seq:\n"
416
+ new << "# transcription: #{transcription}\n\n"
417
+ end
418
+
419
+ new = new.gsub(/ +$/, '') # Cleanup trailing whitespaces
420
+
421
+ #new << map.join("\n")
422
+
423
+ File.write("../maps/maps-staging/#{old_map_name}.imp", new)
424
+
425
+ puts " done!"
426
+ end