interscript 2.1.0 → 2.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +6 -0
  3. data/Rakefile +9 -1
  4. data/bin/console +4 -8
  5. data/interscript.gemspec +2 -1
  6. data/lib/interscript.rb +78 -0
  7. data/lib/interscript/compiler/javascript.rb +6 -1
  8. data/lib/interscript/compiler/ruby.rb +5 -0
  9. data/lib/interscript/detector.rb +62 -0
  10. data/lib/interscript/dsl.rb +35 -2
  11. data/lib/interscript/dsl/document.rb +2 -1
  12. data/lib/interscript/dsl/group.rb +7 -6
  13. data/lib/interscript/dsl/group/parallel.rb +2 -2
  14. data/lib/interscript/dsl/tests.rb +2 -2
  15. data/lib/interscript/interpreter.rb +5 -1
  16. data/lib/interscript/node.rb +4 -0
  17. data/lib/interscript/node/alias_def.rb +6 -0
  18. data/lib/interscript/node/dependency.rb +16 -0
  19. data/lib/interscript/node/document.rb +34 -0
  20. data/lib/interscript/node/group.rb +13 -2
  21. data/lib/interscript/node/item.rb +4 -0
  22. data/lib/interscript/node/item/alias.rb +12 -0
  23. data/lib/interscript/node/item/any.rb +7 -0
  24. data/lib/interscript/node/item/capture.rb +11 -0
  25. data/lib/interscript/node/item/group.rb +29 -1
  26. data/lib/interscript/node/item/repeat.rb +4 -0
  27. data/lib/interscript/node/item/stage.rb +4 -0
  28. data/lib/interscript/node/item/string.rb +7 -0
  29. data/lib/interscript/node/metadata.rb +10 -0
  30. data/lib/interscript/node/rule.rb +3 -0
  31. data/lib/interscript/node/rule/funcall.rb +12 -2
  32. data/lib/interscript/node/rule/run.rb +16 -3
  33. data/lib/interscript/node/rule/sub.rb +165 -4
  34. data/lib/interscript/node/stage.rb +30 -4
  35. data/lib/interscript/node/tests.rb +10 -0
  36. data/lib/interscript/stdlib.rb +45 -3
  37. data/lib/interscript/utils/helpers.rb +39 -0
  38. data/lib/interscript/version.rb +1 -1
  39. data/lib/interscript/visualize/json.rb +12 -4
  40. metadata +17 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f92a8060d3eee8e0eb4815795294c045ccc7ad341c92a6ba48cb825be4598b95
4
- data.tar.gz: 6b977f41c3b1ba80bf25ef1d120666a228bcfd050f31f0a87a9c9ceeb86f5007
3
+ metadata.gz: 8955bc942068792948463573b04d1ba4304da1d7a6aac8cf2182017e7f145b5e
4
+ data.tar.gz: e1f57087a0cb5f1e99f2defbaf24a71eee35db9606b54353aeca7b73a19c5848
5
5
  SHA512:
6
- metadata.gz: 2fc32770358443ca0baae5416a918032b4a169fdc98b32d493fe1a487599fbdf9c5005fb53b98a25e5962f18f2ff52a1667010bda48394e891998437b2a2f1b6
7
- data.tar.gz: a96e1cc807ebc83c9630b1a78f5b9c26558f771954b55622ec55d0f0b0ff17601a27a5753ab407dc49ec0db8d960940c811148c2872ba9c56e1b19baa3a7ccb7
6
+ metadata.gz: ad126dead3c67005ecf110d04d4b48ea98fd4321a7535306da84a98eeb5bfa5aca35a81aee20468ee68d4110afcf113b8241a2d950840f3625192a5150e8e903
7
+ data.tar.gz: 41721c71bfbe2d1bfd6df7b0f6c89ab3ea8a7e386d4dead7a84baa28e4bec52ddcaca68e7c87886d842f29a4a54fe56e6e89f5b617eb749b7d9273d221a98c2c
data/Gemfile CHANGED
@@ -26,4 +26,10 @@ unless ENV["SKIP_JS"]
26
26
  end
27
27
  end
28
28
 
29
+ group :rababa do
30
+ gem 'rababa', github: 'interscript/rababa', ref: '70051da'
31
+ end
32
+
33
+ gem 'pry'
34
+
29
35
  gem 'simplecov', require: false, group: :test
data/Rakefile CHANGED
@@ -20,7 +20,10 @@ task :compile, [:compiler, :target] do |t, args|
20
20
 
21
21
  maplist = {}
22
22
 
23
- Interscript.maps.each do |map|
23
+ maps = Interscript.maps
24
+ maps = Interscript.exclude_maps(maps, compiler: compiler, platform: false)
25
+
26
+ maps.each do |map|
24
27
  code = compiler.(map).code
25
28
  File.write(args[:target] + "/" + map + "." + ext, code)
26
29
  maplist[map] = nil
@@ -68,6 +71,7 @@ task :generate_metadata_json do
68
71
  require "fileutils"
69
72
  require "json"
70
73
  require "interscript"
74
+ require "interscript/compiler/javascript"
71
75
 
72
76
  FileUtils.rm_rf(file = __dir__+"/metadata.json")
73
77
 
@@ -75,6 +79,10 @@ task :generate_metadata_json do
75
79
  parsed_map = Interscript.parse(map)
76
80
  md = parsed_map.metadata.to_hash
77
81
  md["test"] = parsed_map.tests&.data&.first
82
+ md["skip_js"] = Interscript.exclude_maps([map],
83
+ compiler: Interscript::Compiler::Javascript,
84
+ platform: false,
85
+ ).empty?
78
86
  [map, md]
79
87
  end.to_h
80
88
 
data/bin/console CHANGED
@@ -3,12 +3,8 @@
3
3
  require "bundler/setup"
4
4
  require "interscript"
5
5
 
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
6
+ require "interscript/utils/helpers"
7
+ include Interscript::Utils::Helpers
8
8
 
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require "pry"
11
- # Pry.start
12
-
13
- require "irb"
14
- IRB.start(__FILE__)
9
+ require "pry"
10
+ Pry.start
data/interscript.gemspec CHANGED
@@ -27,5 +27,6 @@ Gem::Specification.new do |spec|
27
27
  spec.require_paths = ["lib"]
28
28
 
29
29
  spec.add_dependency "thor"
30
- spec.add_dependency "interscript-maps"
30
+ spec.add_dependency "interscript-maps", "~> #{Interscript::VERSION.split('.')[0,2].join(".")}.0a"
31
+ spec.add_dependency "text"
31
32
  end
data/lib/interscript.rb CHANGED
@@ -53,6 +53,16 @@ module Interscript
53
53
  output_file
54
54
  end
55
55
 
56
+ # Detects the transliteration that gives the most close approximation
57
+ # of transliterating source into destination.
58
+ #
59
+ # Set multiple: true to get a full report.
60
+ def detect(source, destination, **kwargs)
61
+ detector = Detector.new
62
+ detector.set_from_kwargs(**kwargs)
63
+ detector.(source, destination)
64
+ end
65
+
56
66
  def map_gems
57
67
  @map_gems ||= Gem.find_latest_files('interscript-maps.yaml').map do |i|
58
68
  [i, YAML.load_file(i)]
@@ -76,6 +86,56 @@ module Interscript
76
86
  end.compact.flatten
77
87
  end
78
88
 
89
+ def rababa_configs
90
+ @rababa_configs ||= map_gems.map do |i,v|
91
+ v["rababa-configs"]
92
+ end.compact.inject({}) do |a,b|
93
+ a.merge(b)
94
+ end
95
+ end
96
+
97
+ # This code is borrowed from Secryst and should end up in Rababa, but for now,
98
+ # let's keep it here.
99
+ def rababa_provision(model_name, model_uri)
100
+ require 'fileutils'
101
+ require 'open-uri'
102
+
103
+ # We provision the environment in the following way:
104
+ # First, we try the RABABA_DATA environment variable. If that's available,
105
+ # we use it to store the Rababa data we need. Otherwise, we try the following
106
+ # paths:
107
+
108
+ possible_paths = [
109
+ "/var/lib/rababa",
110
+ "/usr/local/share/rababa",
111
+ "/usr/share/rababa",
112
+ File.join(Dir.home, ".local/share/rababa")
113
+ ]
114
+
115
+ # We find the first writable path
116
+
117
+ write_path = nil
118
+
119
+ ([ENV["RABABA_DATA"]] + possible_paths).compact.each do |path|
120
+ FileUtils.mkdir_p(path)
121
+ write_path = path unless write_path
122
+ rescue
123
+ end
124
+
125
+ raise StandardError, "Can't find a writable path for Rababa. Consider setting a RABABA_DATA environment variable" unless write_path
126
+
127
+ model_path = "#{write_path}/model-#{model_name}.onnx"
128
+
129
+ # Redownload every hour
130
+ if File.exist?(model_path) && File.mtime(model_path) + 3600 >= Time.now
131
+ return model_path
132
+ else
133
+ data = URI.open(model_uri).read
134
+ File.write(model_path, data)
135
+ return model_path
136
+ end
137
+ end
138
+
79
139
  def map_aliases
80
140
  return @map_aliases if @map_aliases
81
141
 
@@ -99,6 +159,22 @@ module Interscript
99
159
 
100
160
  basename ? imps.map { |j| File.basename(j, ".#{ext}") } : imps
101
161
  end
162
+
163
+ # Removes the excluded maps for a given compiler and RUBY_PLATFORM.
164
+ # To be used by tests
165
+ # and builders. It uses the `skip` directive in interscript-maps.yaml
166
+ def exclude_maps(maps, compiler:, platform: true)
167
+ map_gems.each do |i,v|
168
+ [compiler.name, (Gem::Platform.local.os if platform)].compact.each do |name|
169
+ skips = v.dig('skip', name) || []
170
+ skips.each do |skip|
171
+ skip_re = /#{Regexp.escape(skip).gsub("\\*", ".*?")}/
172
+ maps = maps.grep_v(skip_re)
173
+ end
174
+ end
175
+ end
176
+ maps
177
+ end
102
178
  end
103
179
  end
104
180
 
@@ -109,3 +185,5 @@ require "interscript/interpreter"
109
185
 
110
186
  require 'interscript/dsl'
111
187
  require 'interscript/node'
188
+
189
+ require 'interscript/detector'
@@ -53,6 +53,7 @@ class Interscript::Compiler::Javascript < Interscript::Compiler
53
53
 
54
54
  def compile_rule(r, map = @map, wrapper = false)
55
55
  c = ""
56
+ return c if r.reverse_run == true
56
57
  case r
57
58
  when Interscript::Node::Stage
58
59
  c += "map.stages.#{r.name} = function(s) {\n"
@@ -75,6 +76,7 @@ class Interscript::Compiler::Javascript < Interscript::Compiler
75
76
  raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before
76
77
  raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after
77
78
 
79
+ next if i.reverse_run == true
78
80
  a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)]
79
81
  end
80
82
  ah = a.hash.abs
@@ -88,7 +90,8 @@ class Interscript::Compiler::Javascript < Interscript::Compiler
88
90
  a = []
89
91
  Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i|
90
92
  raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
91
-
93
+
94
+ next if i.reverse_run == true
92
95
  a << [build_regexp(i, map), compile_item(i.to, map, :parstr)]
93
96
  end
94
97
  ah = a.hash.abs
@@ -102,6 +105,8 @@ class Interscript::Compiler::Javascript < Interscript::Compiler
102
105
  from = %{"#{build_regexp(r, map).gsub("/", "\\\\/")}"}
103
106
  if r.to == :upcase
104
107
  to = 'function(a){return a.toUpperCase();}'
108
+ elsif r.to == :downcase
109
+ to = 'function(a){return a.toLowerCase();}'
105
110
  else
106
111
  to = compile_item(r.to, map, :str)
107
112
  end
@@ -42,6 +42,7 @@ class Interscript::Compiler::Ruby < Interscript::Compiler
42
42
 
43
43
  def compile_rule(r, map = @map, wrapper = false)
44
44
  c = ""
45
+ return c if r.reverse_run == true
45
46
  case r
46
47
  when Interscript::Node::Stage
47
48
  c += "Interscript::Maps.add_map_stage \"#{@map.name}\", #{r.name.inspect} do |s|\n"
@@ -65,6 +66,7 @@ class Interscript::Compiler::Ruby < Interscript::Compiler
65
66
  raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before
66
67
  raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after
67
68
 
69
+ next if i.reverse_run == true
68
70
  a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)]
69
71
  end
70
72
  ah = a.hash.abs
@@ -79,6 +81,7 @@ class Interscript::Compiler::Ruby < Interscript::Compiler
79
81
  Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i|
80
82
  raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
81
83
 
84
+ next if i.reverse_run == true
82
85
  a << [build_regexp(i, map), compile_item(i.to, map, :parstr)]
83
86
  end
84
87
  ah = a.hash.abs
@@ -92,6 +95,8 @@ class Interscript::Compiler::Ruby < Interscript::Compiler
92
95
  from = "/#{build_regexp(r, map).gsub("/", "\\\\/")}/"
93
96
  if r.to == :upcase
94
97
  to = '&:upcase'
98
+ elsif r.to == :downcase
99
+ to = '&:downcase'
95
100
  else
96
101
  to = compile_item(r.to, map, :str)
97
102
  end
@@ -0,0 +1,62 @@
1
+ require "text"
2
+
3
+ class Interscript::Detector
4
+ attr_accessor :compiler
5
+ attr_accessor :distance_computer
6
+ attr_accessor :map_pattern
7
+
8
+ # TODO: use transliterate_each
9
+ attr_accessor :each
10
+
11
+ attr_accessor :load_path
12
+ attr_accessor :cache
13
+
14
+ # Returns a summary of all detected transliterations
15
+ attr_accessor :multiple
16
+
17
+ def initialize
18
+ @compiler = Interscript::Interpreter
19
+ @distance_computer = DistanceComputer::Levenshtein
20
+ @map_pattern = "*"
21
+
22
+ @each = false
23
+
24
+ @load_path = false
25
+ @cache = CACHE
26
+ end
27
+
28
+ def set_from_kwargs(**kwargs)
29
+ kwargs.each do |k,v|
30
+ self.public_send(:"#{k}=", v)
31
+ end
32
+ end
33
+
34
+ def call(source, destination)
35
+ maps = Interscript.maps(select: @map_pattern, load_path: @load_path)
36
+ maps = Interscript.exclude_maps(maps, compiler: self.class)
37
+ maps = Interscript.exclude_maps(maps, compiler: @compiler)
38
+
39
+ summary = maps.map do |map|
40
+ try_dest = Interscript.transliterate(map, source, compiler: @compiler)
41
+
42
+ [map, try_dest]
43
+ end.map do |map, try_dest|
44
+ dist = @distance_computer.(try_dest, destination)
45
+
46
+ [map, dist]
47
+ end.sort_by(&:last).to_h
48
+
49
+ if @multiple
50
+ summary.to_h
51
+ else
52
+ summary.first.first
53
+ end
54
+ end
55
+
56
+ CACHE = {}
57
+
58
+ # A DistanceComputer needs to respond to #call(source, destination)
59
+ module DistanceComputer
60
+ Levenshtein = Text::Levenshtein.method(:distance)
61
+ end
62
+ end
@@ -2,12 +2,45 @@ require "yaml"
2
2
 
3
3
  module Interscript::DSL
4
4
  @cache = {}
5
- def self.parse(map_name)
5
+ def self.parse(map_name, reverse: true)
6
6
  # map name aliases? here may be a place to wrap it
7
7
 
8
8
  return @cache[map_name] if @cache[map_name]
9
- path = Interscript.locate(map_name)
9
+
10
+ # This is a composition, so let's make a new virtual map
11
+ # that calls all maps in a sequence.
12
+ if map_name.include? "|"
13
+ map_parts = map_name.split("|").map(&:strip)
14
+
15
+ doc = Interscript::DSL::Document.new(map_name) do
16
+ map_parts.each_with_index do |i, idx|
17
+ dependency i, as: :"part#{idx}"
18
+ end
19
+
20
+ stage {
21
+ map_parts.each_with_index do |i, idx|
22
+ run map[:"part#{idx}"].stage.main
23
+ end
24
+ }
25
+ end.node
26
+
27
+ return @cache[map_name] = doc
28
+ end
29
+
30
+ path = begin
31
+ Interscript.locate(map_name)
32
+ rescue Interscript::MapNotFoundError => e
33
+ # But maybe we called the map in a reversed fashion?
34
+ begin
35
+ raise e if reverse == false # Protect from an infinite loop
36
+ reverse_name = Interscript::Node::Document.reverse_name(map_name)
37
+ return @cache[map_name] = parse(reverse_name, reverse: false).reverse
38
+ rescue Interscript::MapNotFoundError
39
+ raise e
40
+ end
41
+ end
10
42
  library = path.end_with?(".iml")
43
+
11
44
  map_name = File.basename(path, ".imp")
12
45
  map_name = File.basename(map_name, ".iml")
13
46
 
@@ -37,10 +37,11 @@ class Interscript::DSL::Document
37
37
  @node.dep_aliases[dep.name] = dep if dep.name
38
38
  end
39
39
 
40
- def stage(name = :main, &block)
40
+ def stage(name = :main, dont_reverse: false, &block)
41
41
  puts "stage(#{name}) from #{self.inspect}" if $DEBUG
42
42
  stage = Interscript::DSL::Stage.new(name, &block)
43
43
  stage.node.doc_name = @node.name
44
+ stage.node.dont_reverse = dont_reverse
44
45
  @node.stages[name] = stage.node
45
46
  end
46
47
  end
@@ -8,16 +8,16 @@ class Interscript::DSL::Group
8
8
  self.instance_exec(&block)
9
9
  end
10
10
 
11
- def run(stage)
11
+ def run(stage, **kwargs)
12
12
  if stage.class != Interscript::Node::Item::Stage
13
13
  raise TypeError, "I::Node::Item::Stage expected, got #{stage.class}"
14
14
  end
15
- @node.children << Interscript::Node::Rule::Run.new(stage)
15
+ @node.children << Interscript::Node::Rule::Run.new(stage, **kwargs)
16
16
  end
17
17
 
18
18
  def sub(from, to, **kwargs, &block)
19
- puts "sub(#{from.inspect},#{to}, kargs = #{
20
- kargs.inspect
19
+ puts "sub(#{from.inspect},#{to}, kwargs = #{
20
+ kwargs.inspect
21
21
  }) from #{self.inspect}" if $DEBUG
22
22
 
23
23
  rule = Interscript::Node::Rule::Sub.new(from, to, **kwargs)
@@ -25,6 +25,7 @@ class Interscript::DSL::Group
25
25
  end
26
26
 
27
27
  def upcase; :upcase; end
28
+ def downcase; :downcase; end
28
29
 
29
30
  Interscript::Stdlib.available_functions.each do |fun|
30
31
  define_method fun do |**kwargs|
@@ -35,9 +36,9 @@ class Interscript::DSL::Group
35
36
  end
36
37
  end
37
38
 
38
- def parallel(&block)
39
+ def parallel(**kwargs, &block)
39
40
  puts "parallel(#{chars.inspect}) from #{self.inspect}" if $DEBUG
40
- group = Interscript::DSL::Group::Parallel.new(&block)
41
+ group = Interscript::DSL::Group::Parallel.new(**kwargs, &block)
41
42
  @node.children << group.node
42
43
  end
43
44
  end
@@ -1,6 +1,6 @@
1
1
  class Interscript::DSL::Group::Parallel < Interscript::DSL::Group
2
- def initialize(&block)
3
- @node = Interscript::Node::Group::Parallel.new
2
+ def initialize(reverse_run: nil, &block)
3
+ @node = Interscript::Node::Group::Parallel.new(reverse_run: reverse_run)
4
4
  self.instance_exec(&block)
5
5
  end
6
6
  end
@@ -6,7 +6,7 @@ class Interscript::DSL::Tests
6
6
  self.instance_exec(&block)
7
7
  end
8
8
 
9
- def test(from,to)
10
- @node << [from, to]
9
+ def test(from, to, reverse_run: nil)
10
+ @node << [from, to, reverse_run]
11
11
  end
12
12
  end
@@ -76,6 +76,7 @@ class Interscript::Interpreter < Interscript::Compiler
76
76
  end
77
77
 
78
78
  def execute_rule r
79
+ return if r.reverse_run == true
79
80
  case r
80
81
  when Interscript::Node::Group::Parallel
81
82
  if r.cached_tree
@@ -96,6 +97,7 @@ class Interscript::Interpreter < Interscript::Compiler
96
97
  raise ArgumentError, "Can't parallelize rules with :after" if i.after
97
98
  raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before
98
99
  raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after
100
+ next if i.reverse_run == true
99
101
  subs_array << [build_item(i.from, :par), build_item(i.to, :parstr)]
100
102
  end
101
103
  tree = Interscript::Stdlib.parallel_replace_compile_tree(subs_array) #.sort_by{|k,v| -k.length})
@@ -108,7 +110,7 @@ class Interscript::Interpreter < Interscript::Compiler
108
110
  subs_array = []
109
111
  Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| # rule.from.max_length gives somewhat better test results, why is that
110
112
  raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
111
-
113
+ next if i.reverse_run == true
112
114
  subs_array << [build_regexp(i), build_item(i.to, :parstr)]
113
115
  end
114
116
  r.subs_regexp = Interscript::Stdlib.parallel_regexp_compile(subs_array)
@@ -129,6 +131,8 @@ class Interscript::Interpreter < Interscript::Compiler
129
131
  when Interscript::Node::Rule::Sub
130
132
  if r.to == :upcase
131
133
  @str = @str.gsub(Regexp.new(build_regexp(r)), &:upcase)
134
+ elsif r.to == :downcase
135
+ @str = @str.gsub(Regexp.new(build_regexp(r)), &:downcase)
132
136
  else
133
137
  @str = @str.gsub(Regexp.new(build_regexp(r)), build_item(r.to, :str))
134
138
  end