interscript 0.1.5 → 2.1.0a8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -123
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -311
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -69
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -23
  71. data/lib/interscript/opal/maps.js.erb +0 -7
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  77. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  78. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  79. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  80. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  81. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  82. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  83. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  84. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  85. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  86. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  87. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  88. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  89. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  90. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  91. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  92. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  93. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  94. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  95. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  96. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  97. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  98. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  99. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  100. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  101. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  102. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  103. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  104. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  105. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  106. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  107. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  108. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  109. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  110. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  111. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  112. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  113. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  114. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  115. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  116. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  117. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  118. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  119. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  120. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  122. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  123. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  124. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  125. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  126. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  127. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  128. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  129. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  130. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  131. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  132. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  133. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  134. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  135. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  136. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  137. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  138. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  139. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  140. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  141. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  142. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  143. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  144. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  145. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  146. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  147. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  148. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  149. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  150. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  151. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  152. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  153. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  154. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  155. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  156. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  157. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  158. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  159. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  160. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  161. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  162. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  163. data/maps/odni-mkd-cyrl-latn-2015.yaml +0 -122
  164. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  165. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  166. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  167. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  168. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  169. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  170. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  171. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  172. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  173. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  174. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  175. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  176. data/maps/ses-ara-arab-latn-1930.yaml +0 -275
  177. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  178. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  179. data/maps/un-ara-Arab-Latn-1971.yaml +0 -127
  180. data/maps/un-ara-Arab-Latn-1972.yaml +0 -152
  181. data/maps/un-ara-Arab-Latn-2017.yaml +0 -383
  182. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  183. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  184. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  185. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  186. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  187. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  188. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  189. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  190. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  191. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  192. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  193. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  194. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  195. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  196. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  197. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  198. data/spec/interscript/mapping_spec.rb +0 -42
  199. data/spec/interscript_spec.rb +0 -26
  200. data/spec/spec_helper.rb +0 -3
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+ ENV["INTERSCRIPT_STAGING"] = "1"
3
+ require "bundler/setup"
4
+ require "interscript"
5
+ require "interscript/compiler/ruby"
6
+
7
+ # Compile a given map with the Ruby compiler for debugging purposes
8
+
9
+ if ARGV[0] == '-b'
10
+ require 'base64'
11
+ $b64 = true
12
+ ARGV.shift
13
+ end
14
+
15
+ map = ARGV[0]
16
+ m = Interscript.parse(map)
17
+ cr = Interscript::Compiler::Ruby
18
+ mr = cr.(map)
19
+
20
+ if $b64
21
+ puts Base64.encode64(mr.code)
22
+ else
23
+ puts mr.code
24
+ end
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+ ENV["INTERSCRIPT_STAGING"] = "1"
3
+ require "bundler/setup"
4
+ require "interscript"
5
+ require "interscript/compiler/ruby"
6
+
7
+ # This script has been written because there are some differences between platforms
8
+ # (ie. windows vs linux) that we wish to find out more about
9
+
10
+ if ARGV[0] == '-b'
11
+ require 'base64'
12
+ $b64 = []
13
+ ARGV.shift
14
+ end
15
+
16
+ map = ARGV[0]
17
+ m = Interscript.parse(map)
18
+ cr = Interscript::Compiler::Ruby
19
+ mr = cr.(map, debug: true)
20
+
21
+ m.tests.data.each_with_index do |(from, expected), idx|
22
+ r = mr.(from)
23
+
24
+ unless ARGV[1] && ARGV[1].split(",").any? { |i| i.to_i == idx }
25
+ if r == expected
26
+ cr.reset_debug_data
27
+ next
28
+ end
29
+ end
30
+
31
+ dr = cr.read_debug_data
32
+
33
+ if $b64
34
+ $b64 << [idx, dr]
35
+ else
36
+ pp [idx, dr]
37
+ end
38
+
39
+ cr.reset_debug_data
40
+ end
41
+
42
+ if $b64
43
+ puts Base64.encode64($b64.inspect)
44
+ end
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env ruby
2
+ ENV["INTERSCRIPT_STAGING"] = "1"
3
+ require "bundler/setup"
4
+ require "interscript"
5
+ require "interscript/compiler/ruby"
6
+
7
+ $map_name = ARGV[0]
8
+
9
+ if $map_name
10
+ filelist = [ __dir__+"/../../maps/maps-staging/#{$map_name}.imp" ]
11
+ else
12
+ filelist = Dir[__dir__+"/../../maps/maps-staging/*.imp"].sort
13
+ end
14
+
15
+
16
+ # levenshtein distance algorithm for comparing string similarity
17
+ def ld(s, t)
18
+ v0 = (0..t.length).to_a
19
+ v1 = []
20
+ #p v0
21
+
22
+ s.chars.each_with_index do |s_ch, i|
23
+ v1[0] = i + 1
24
+
25
+ t.chars.each_with_index do |t_ch, j|
26
+ cost = s_ch == t_ch ? 0 : 1
27
+ v1[j + 1] = [v1[j] + 1, v0[j + 1] + 1, v0[j] + cost].min
28
+ end
29
+ v0 = v1.dup
30
+ #p v1
31
+ end
32
+
33
+ v0[t.length]
34
+ end
35
+
36
+
37
+ def score_order( system, order )
38
+ interpreter = Marshal.load( Marshal.dump( $interpreter ))
39
+ parallel = interpreter.map.stages[:main].children.select{|x| Interscript::Node::Group::Parallel === x}[0]
40
+ parallel.apply_order(order)
41
+ interpreter.map.stages[:main].children[$parallel_idx].children = parallel.children.compact #.reorder_children(source,target)
42
+ delta_sum = 0
43
+ errors = []
44
+ system.tests.data.each do |from, expected|
45
+ result = interpreter.(from)
46
+ delta = ld(expected, result)
47
+ errors << [expected, result] if delta != 0
48
+ delta_sum += delta
49
+ end;
50
+ [delta_sum, errors]
51
+ end
52
+
53
+
54
+ def mutate_order(order)
55
+ order2 = order.dup
56
+ a = rand(order2.size)
57
+ b = rand(order2.size)
58
+ order2[a], order2[b] = order2[b], order2[a]
59
+ order2
60
+ end
61
+
62
+ for i in filelist
63
+
64
+ begin
65
+ system_name = File.basename(i, ".imp")
66
+ puts "\ndebugging #{system_name}"
67
+
68
+ system = Interscript.parse(system_name);
69
+ if system.tests && system.tests.data && system.tests.data.length > 0
70
+
71
+ $interpreter = Interscript::Interpreter.new.compile(system);
72
+ $orig_parallel = $interpreter.map.stages[:main].children.select{|x| Interscript::Node::Group::Parallel === x}[0].dup;
73
+ $parallel_idx = $interpreter.map.stages[:main].children.each_with_index.select{|x,i| Interscript::Node::Group::Parallel === x}.map{|x,i| i}[0]
74
+ next if !$parallel_idx
75
+ starting_score, starting_errors = score_order(system, $orig_parallel.children.size.times.to_a)
76
+ parallel_size = $interpreter.map.stages[:main].children.select{|x| Interscript::Node::Group::Parallel === x}[0].children.size
77
+
78
+ puts "starting_score = #{starting_score}"
79
+ best_score = starting_score
80
+ best_errors = starting_errors
81
+ curr_order = $orig_parallel.children.size.times.to_a.shuffle
82
+ #curr_order = [28, 308, 61, 87, 29, 147, 124, 22, 373, 186, 336, 19, 405, 387, 215, 209, 131, 30, 60, 343, 34, 380, 189, 53, 300, 286, 162, 31, 33, 218, 90, 51, 293, 226, 56, 305, 142, 102, 346, 222, 126, 338, 50, 52, 363, 144, 136, 16, 388, 221, 267, 63, 352, 365, 251, 78, 68, 328, 69, 12, 67, 317, 334, 94, 366, 412, 302, 243, 311, 318, 281, 274, 143, 236, 386, 135, 280, 167, 173, 291, 271, 309, 73, 20, 157, 331, 43, 242, 65, 351, 134, 151, 0, 285, 211, 417, 220, 179, 91, 353, 255, 141, 23, 104, 413, 409, 256, 326, 180, 140, 24, 348, 261, 5, 99, 47, 35, 358, 177, 123, 277, 396, 114, 213, 116, 188, 217, 249, 419, 120, 289, 330, 110, 118, 176, 113, 278, 127, 313, 55, 370, 48, 364, 171, 244, 407, 57, 371, 128, 196, 103, 202, 294, 239, 283, 299, 237, 394, 81, 230, 97, 46, 109, 337, 355, 240, 195, 100, 204, 389, 146, 153, 121, 183, 137, 159, 254, 231, 3, 101, 290, 323, 148, 359, 250, 25, 40, 219, 119, 169, 378, 282, 377, 238, 130, 279, 385, 58, 41, 115, 197, 382, 193, 225, 199, 6, 59, 208, 93, 138, 11, 15, 37, 38, 27, 354, 175, 411, 83, 89, 368, 216, 301, 168, 401, 84, 235, 333, 246, 284, 372, 155, 105, 339, 228, 342, 122, 161, 316, 145, 272, 321, 80, 315, 163, 107, 288, 227, 191, 306, 310, 76, 85, 132, 2, 320, 36, 13, 74, 233, 72, 381, 269, 70, 402, 86, 95, 111, 8, 383, 314, 10, 200, 203, 292, 241, 212, 374, 234, 369, 422, 42, 357, 18, 49, 214, 9, 156, 129, 258, 259, 190, 79, 367, 414, 201, 166, 270, 319, 332, 4, 184, 187, 164, 395, 325, 88, 245, 185, 71, 400, 275, 312, 324, 1, 224, 45, 205, 404, 260, 392, 253, 273, 416, 96, 408, 112, 349, 393, 345, 152, 329, 420, 410, 14, 361, 7, 257, 207, 194, 298, 17, 98, 340, 391, 399, 397, 82, 263, 376, 158, 327, 406, 265, 418, 322, 77, 92, 266, 262, 44, 360, 172, 403, 350, 66, 384, 247, 139, 181, 198, 248, 232, 32, 295, 106, 160, 287, 379, 341, 344, 421, 182, 375, 307, 415, 64, 75, 297, 125, 276, 223, 149, 26, 398, 303, 154, 133, 210, 150, 206, 174, 62, 170, 390, 54, 347, 39, 229, 178, 296, 108, 21, 165, 268, 264, 356, 304, 192, 252, 117, 335, 362]
83
+
84
+ best_order = curr_order.dup
85
+ while true
86
+
87
+ curr_score, curr_errors = score_order(system, curr_order)
88
+ #print "#{source} <-> #{target} = #{curr_score}; "
89
+ puts Time.now.inspect
90
+ puts best_order.inspect
91
+ puts curr_score
92
+ puts best_errors.inspect
93
+ puts best_score
94
+ puts ''
95
+
96
+ if curr_score < best_score
97
+ puts ''
98
+ best_score = curr_score.dup
99
+ best_order = curr_order.dup
100
+ best_errors = curr_errors.dup
101
+ end
102
+ curr_order = mutate_order(best_order)
103
+ end
104
+ end
105
+ rescue Exception => e
106
+ puts e
107
+ end
108
+
109
+ end
110
+ # for this code to work sorting by max_length in interpreter.rb line 46 needs to be disabled
111
+ # #r.children.each do |i|
112
+ # r.children.sort_by{ |rule| -rule.max_length }.each do |i|
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'yaml'
4
+ require 'fileutils'
5
+ require 'regexp_parser'
6
+
7
+ old_maps = Dir["../../interscript/maps/*.yaml"]
8
+
9
+ $expr_classes = []
10
+ $quantifiers = []
11
+ old_maps.each do |old_map|
12
+ old_map_name = File.basename(old_map, ".yaml")
13
+ puts old_map
14
+ f = File.read(old_map)
15
+ yaml = YAML.load(f)
16
+ map_keys = yaml['map'].keys
17
+ puts map_keys
18
+ rs = yaml['map']['postrules']&.map{|h| h['pattern']} || []
19
+ rs += yaml['map']['characters']&.keys || []
20
+ rs.each do |regexp|
21
+ tree = Regexp::Parser.parse( regexp )
22
+
23
+ arr = tree.expressions
24
+ while true
25
+ new_arr = arr.map do |elem|
26
+ if elem.respond_to? :quantifier and elem.quantifier
27
+ $expr_classes << elem.quantifier
28
+ end
29
+ el = el.class == Class ? el : el.class
30
+ if elem.respond_to?(:expressions)
31
+ [el, elem.expressions]
32
+ else
33
+ el
34
+ end
35
+ end.flatten
36
+ break if new_arr == arr
37
+ arr = new_arr
38
+ end
39
+ $expr_classes += arr
40
+ end
41
+ end;
42
+ # $expressions.map{|elem| elem.class.to_s=="Class" ? elem : elem.class}.tally
43
+
44
+ pp $expr_classes.tally.sort_by{|k,v| -v}
45
+ pp $quanitifiers.map{|q| q.text}.tally.sort_by{|k,v| -v}
data/bin/maps_v1_to_v2 ADDED
@@ -0,0 +1,426 @@
1
+ #!/usr/bin/env ruby
2
+ # This is a helper script for porting Interscript v1 maps to v2 format. It won't
3
+ # ever be able to port them completely, but it should help bootstrap the process.
4
+
5
+ require 'bundler/setup'
6
+
7
+ require 'yaml'
8
+ require 'fileutils'
9
+
10
+ Dir.chdir(__dir__ + "/../")
11
+ FileUtils.rm_rf(Dir.glob("../maps/maps-staging/*"))
12
+ #FileUtils.mkdir_p("../maps/maps-staging/")
13
+
14
+ #old_maps = []
15
+ old_maps = Dir["../../interscript/maps/*.yaml"]
16
+ #old_maps = Dir["../../interscript/maps/alalc-aze-Arab-Latn-1997.yaml"]
17
+ #old_maps = Dir["../../interscript/maps/mofa-jpn-Hrkt-Latn-1989.yaml"]
18
+
19
+
20
+ ex_maps = Dir["../maps/maps/*.imp"]
21
+ ex_map_names = ex_maps.map { |i| File.basename(i, ".imp") }
22
+
23
+
24
+ require 'regexp_parser'
25
+
26
+ require 'interscript/utils/regexp_converter.rb'
27
+
28
+
29
+ def process_line( kkk, vvv, indent: 0)
30
+
31
+ parse_kkk = Regexp::Parser.parse(kkk, 'ruby/2.1')
32
+ tokens_kkk = process(parse_kkk)
33
+ # pp conv
34
+ root_hash = process_root(tokens_kkk)
35
+ # pp root_hash
36
+ # puts "vvv = #{vvv.inspect}"
37
+ if vvv.class == String
38
+ if vvv == '' or vvv =='""' or vvv == nil or vvv.include? '~'
39
+ root_hash[:to] = '""'
40
+ elsif vvv == '"'
41
+ root_hash[:to] = '"\""'
42
+ else
43
+ if vvv == "?" #alalc-ell-Grek-Latn-1997.imp un-ell-Grek-Latn-1987-phonetic have to "?"
44
+ root_hash[:to] = "?".inspect
45
+ #if root_hash[:from].to_s.include?('capture') or root_hash[:to] =~ /\\\\([0-9]+)/
46
+ else
47
+ parse_vvv = Regexp::Parser.parse(vvv)
48
+ tokens_vvv = process(parse_vvv)
49
+ string_vvv = stringify(tokens_vvv)
50
+ # puts string_vvv
51
+ root_hash[:to] = string_vvv
52
+ root_hash[:to] = 'upcase' if string_vvv.include? 'upcase'
53
+ end
54
+ end
55
+ elsif vvv.class == Array
56
+ root_hash[:to] = "any(#{vvv.inspect})"
57
+ else
58
+ root_hash[:to] = "\"\""
59
+ # puts "unknown class #{vvv.inspect}"
60
+ end
61
+
62
+ str = stringify_root(root_hash, indent: indent)
63
+ # puts str
64
+ # puts ""
65
+ str
66
+ end
67
+
68
+
69
+ old_maps.sort.each do |old_map|
70
+ old_map_name = File.basename(old_map, ".yaml")
71
+
72
+ if ex_map_names.include? old_map_name
73
+ puts "* Skipping #{old_map_name} as it's already ported"
74
+ next
75
+ end
76
+
77
+ print "* Converting #{old_map_name}."
78
+
79
+ f = File.read(old_map)
80
+ fl = f.split("\n")
81
+
82
+ md = []
83
+ tests = []
84
+ map = []
85
+ chain = nil
86
+
87
+ cur = md
88
+
89
+ bugnotes = false
90
+
91
+ fl.each do |i|
92
+ if i == '---'
93
+ # skip the first line
94
+ elsif i =~ /\A\s+|\A\z/
95
+ # continuation
96
+ if bugnotes
97
+ i = "#{i}"
98
+ md << i
99
+ else
100
+ cur << i
101
+ end
102
+ else
103
+ cmt = nil
104
+ i = i.sub(/(#.*?)\z/) do |j|
105
+ cmt = j
106
+ ""
107
+ end
108
+
109
+ # block begin or md
110
+ case i.strip
111
+ when "tests:"
112
+ cur = tests
113
+ bugnotes = false
114
+ when "map:"
115
+ cur = map
116
+ bugnotes = false
117
+ when "notes:"
118
+ md << "notes:"
119
+ bugnotes = true
120
+ when /\Achain:/
121
+ chain = i
122
+ else
123
+ cur << i
124
+ end
125
+
126
+ cur << cmt if cmt
127
+ end
128
+ end
129
+
130
+ print "."
131
+
132
+ newmd = []
133
+ aliasff = false
134
+ md.each do |i|
135
+ if i.strip == "alias:"
136
+ aliasff = true
137
+ elsif i !~ /\A\s+/
138
+ aliasff = false
139
+ end
140
+ newmd << i unless aliasff
141
+ end
142
+ md = newmd
143
+
144
+ print "."
145
+
146
+ newmd = md.map(&" ".method(:+))
147
+ .join("\n")
148
+ .gsub(" note:", " notes:")
149
+ .gsub("confirmation date:", "confirmation_date:")
150
+ #.gsub("special_rules:", ' - "special rules:"')
151
+ #.gsub("original_description:", " # original description:")
152
+ #.gsub("original_notes:", ' - "original notes:"')
153
+ #.gsub("implementation_notes:", ' - "implementation notes:"')
154
+ .rstrip
155
+
156
+ new = "metadata {\n"
157
+ new << newmd
158
+ new << "\n}\n\n"
159
+
160
+ class MultilineError < StandardError; end
161
+
162
+ if tests.length > 0
163
+ new << "tests {\n"
164
+ cmt = ""
165
+
166
+ iter = 0
167
+ while iter < tests.length; begin
168
+ test = tests[iter]
169
+
170
+ if test =~ /\A\s*#/
171
+ new << " " << test.strip << "\n"
172
+ iter += 1
173
+ next
174
+ end
175
+
176
+ re_source = /\A(?: ){0,2}- source: (.*?)(\s*#.*?)?\z/m
177
+ re_expect = /\A(?: ){0,3}expected:[ \t](.*?)(\s*#.*?)?\z/m
178
+
179
+ if test.rstrip.end_with?("|") ||
180
+ (test =~ /"/ && !test.rstrip.end_with?('"'))
181
+ while iter < tests.length
182
+ xtest = tests[iter+1]
183
+ break if xtest =~ re_source || xtest =~ re_expect
184
+ test << "\n" << (xtest||"")
185
+ iter += 1
186
+ end
187
+ end
188
+
189
+ case test
190
+ when re_source
191
+ new << " test #{YAML.load($1).inspect}, "
192
+ cmt = $2 if $2
193
+ when re_expect
194
+ new << "#{YAML.load($1).inspect}".unicode_normalize
195
+ new << cmt
196
+ new << $2 if $2
197
+ new << "\n"
198
+ cmt = ""
199
+ when /\A\s*\z/
200
+ # empty line, ignore
201
+ else
202
+ new << "\n# BUG: #{test}\n"
203
+ end
204
+
205
+ iter += 1
206
+ rescue Psych::SyntaxError
207
+ p test
208
+ end; end
209
+
210
+ new << "}\n\n"
211
+ end
212
+
213
+ print "."
214
+
215
+ new << "# This map has been partially converted by the bin/maps_v1_to_v2 script\n"
216
+ new << "# The section below requires human attention. Remember to remove this\n"
217
+ new << "# comment and move the converted map to 'maps/' directory. Please also\n"
218
+ new << "# take note that the maps-staging directory will be cleaned up whenever\n"
219
+ new << "# you run the bin/maps_v1_to_v2 script. You should particularly be\n"
220
+ new << "# concerned about any regular expressions found in this file and about\n"
221
+ new << "# advanced expressions in parallel {} parts, and also about the order\n"
222
+ new << "# of particular parts of the stage.\n\n"
223
+
224
+ transcription = nil
225
+ title_case = nil
226
+ downcase = nil
227
+ inherit = nil
228
+
229
+ characters, rules, dictionary, postrules = [], [], [], []
230
+
231
+ cur = nil
232
+ indent = 0
233
+ stagedone = false
234
+ efini = proc do
235
+ if inherit
236
+ new << "\n"
237
+ inherit.each do |i|
238
+ new << " run map.#{i}.stage.main\n"
239
+ end
240
+ end
241
+ new << "\n"
242
+ efini=proc{}
243
+ end
244
+ fini = proc{}
245
+ begn = proc { new << "stage {\n"; stagedone = true; begn=proc{} }
246
+
247
+ iter = -1
248
+ while iter+1 < map.length
249
+ iter += 1
250
+ i = map[iter]
251
+
252
+ cmt = nil
253
+ if i =~ /\A [^\s#]/ || i =~ /\A inherit:/
254
+ i.sub(/(#.*?)\z/) { cmt = $1 }
255
+ if cmt
256
+ new << " "*indent << cmt << "\n"
257
+ end
258
+
259
+ maybe_val = YAML.load(i.split(":").last)
260
+ case i.split(":").first.strip
261
+ when "inherit"
262
+ inherit = Array(maybe_val)
263
+ inherit = inherit.map do |inh|
264
+ short = inh.split("-")[2..3].join.downcase
265
+ new << "dependency #{inh.inspect}, as: #{short}\n"
266
+ short
267
+ end
268
+ new << "\n"
269
+ raise "Duplicate items" unless inherit.length == inherit.uniq.length
270
+ when "dictionary"
271
+ begn.()
272
+ fini.()
273
+ new << " # DICTIONARY\n"
274
+ new << " parallel {\n"
275
+ indent = 4
276
+ cur = dictionary
277
+ fini = proc{new << " }\n\n";indent = 2}
278
+ when "rules"
279
+ begn.()
280
+ fini.()
281
+ efini.()
282
+ new << " # RULES\n"
283
+ indent = 2
284
+ cur = rules
285
+ fini = proc{new << "\n"}
286
+ when "characters"
287
+ begn.()
288
+ fini.()
289
+ efini.()
290
+ new << " # CHARACTERS\n"
291
+ new << " parallel {\n"
292
+ indent = 4
293
+ cur = characters
294
+ fini = proc{new << " }\n\n";indent = 2}
295
+ when "postrules"
296
+ begn.()
297
+ fini.()
298
+ efini.()
299
+ new << " # POSTRULES\n"
300
+ indent = 2
301
+ cur = postrules
302
+ fini = proc{new << "\n"}
303
+ when "downcase"
304
+ downcase = maybe_val
305
+ when "title_case"
306
+ title_case = maybe_val
307
+ when "transcription"
308
+ transcription = maybe_val
309
+ # Those we will ignore for now
310
+ when "word_separator", "segmentation", "character_separator", "map"
311
+ # Those are bugs
312
+ when "title-case"
313
+ else
314
+ p i
315
+ end
316
+ else
317
+ cmt = ""
318
+ i = i.sub(/(#.*?)\z/) { cmt << $1; "" }
319
+ #new << " "*indent << i.strip << "\n"
320
+ if i.strip == ""
321
+ new << " "*indent << cmt << "\n"
322
+ next
323
+ end
324
+
325
+ case cur.object_id
326
+ when nil.object_id
327
+ raise "Unexpected line #{i}"
328
+ when characters.object_id, dictionary.object_id
329
+ k,v = i.split(":", 2).map(&:strip)
330
+ if !v || v == ""
331
+ v = ""
332
+ # Load array
333
+ iter2 = iter + 1
334
+ while iter2 < map.length
335
+ i2 = map[iter2]
336
+ break unless i2.strip =~ /\A-/
337
+ i2 = i2.sub(/(#.*?)\z/) { cmt << $1; "" }
338
+ v << "\n" << i2
339
+ iter2 += 1
340
+ end
341
+ iter = iter2 - 1
342
+ end
343
+
344
+ kk,vv = YAML.load(k), YAML.load(v)
345
+
346
+ kkk,vvv = kk.inspect, vv.inspect
347
+ if vv.class == Array
348
+ if vv.all? { |z| z.length == 1 }
349
+ vvv = "any(" + vv.join.inspect + ")"
350
+ else
351
+ vvv = "any(" + vv.inspect + ")"
352
+ end
353
+ elsif vv.class == NilClass
354
+ vvv = "none"
355
+ end
356
+ kkk,vvv = kk.dup,vv.dup
357
+ # This worked due to use of regexps... it should remove 1 slash.
358
+ kkk = kkk.gsub("\\\\u", "\\\\u")
359
+ kkk = kkk.gsub("\\\\U", "\\\\u")
360
+
361
+ new << process_line( kkk,vvv, indent: indent )
362
+ when rules.object_id, postrules.object_id
363
+ if i.strip =~ /\A- pattern\s*:/
364
+ _, k = i.split(":", 2)
365
+ ii = map[iter+1]
366
+ ii = ii.sub(/(#.*?)\z/) { cmt << $1; "" }
367
+ if ii.strip == ""
368
+ iter += 1
369
+ ii = map[iter+1]
370
+ ii = ii.sub(/(#.*?)\z/) { cmt << $1; "" }
371
+ end
372
+ if ii.strip =~ /\Aresult\s*:/
373
+ _, v = ii.split(":", 2)
374
+ else
375
+ raise "Unexpected(2): #{ii.strip.inspect}"
376
+ end
377
+ iter += 1
378
+ else
379
+ raise "Unexpected(1): #{i.strip.inspect}"
380
+ end
381
+
382
+ kk,vv = YAML.load(k), YAML.load(v)
383
+ # kkk,vvv = kk.inspect, vv.inspect
384
+ kkk,vvv = kk.dup,vv.dup
385
+
386
+ new << process_line( kkk,vvv, indent: indent )
387
+ end
388
+
389
+ if cmt != ""
390
+ new << " " << cmt << "\n"
391
+ else
392
+ new << "\n"
393
+ end
394
+ end
395
+ end
396
+
397
+ fini.()
398
+
399
+ unless stagedone
400
+ new << "stage {\n"
401
+ efini.()
402
+ end
403
+
404
+ new << " title_case\n" if title_case
405
+ new << " downcase\n" if downcase
406
+
407
+ new << "}\n\n"
408
+
409
+ if chain
410
+ new << "# This map is chained and probably depends on seq2seq:\n"
411
+ new << "# #{chain}\n\n"
412
+ end
413
+
414
+ if transcription
415
+ new << "# This map contains transcription and probably depends on seq2seq:\n"
416
+ new << "# transcription: #{transcription}\n\n"
417
+ end
418
+
419
+ new = new.gsub(/ +$/, '') # Cleanup trailing whitespaces
420
+
421
+ #new << map.join("\n")
422
+
423
+ File.write("../maps/maps-staging/#{old_map_name}.imp", new)
424
+
425
+ puts " done!"
426
+ end