interscript 2.1.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +6 -0
  3. data/Rakefile +9 -1
  4. data/bin/console +4 -8
  5. data/interscript.gemspec +2 -1
  6. data/lib/interscript.rb +78 -0
  7. data/lib/interscript/compiler/javascript.rb +6 -1
  8. data/lib/interscript/compiler/ruby.rb +5 -0
  9. data/lib/interscript/detector.rb +62 -0
  10. data/lib/interscript/dsl.rb +35 -2
  11. data/lib/interscript/dsl/document.rb +2 -1
  12. data/lib/interscript/dsl/group.rb +7 -6
  13. data/lib/interscript/dsl/group/parallel.rb +2 -2
  14. data/lib/interscript/dsl/tests.rb +2 -2
  15. data/lib/interscript/interpreter.rb +5 -1
  16. data/lib/interscript/node.rb +4 -0
  17. data/lib/interscript/node/alias_def.rb +6 -0
  18. data/lib/interscript/node/dependency.rb +16 -0
  19. data/lib/interscript/node/document.rb +34 -0
  20. data/lib/interscript/node/group.rb +13 -2
  21. data/lib/interscript/node/item.rb +4 -0
  22. data/lib/interscript/node/item/alias.rb +12 -0
  23. data/lib/interscript/node/item/any.rb +7 -0
  24. data/lib/interscript/node/item/capture.rb +11 -0
  25. data/lib/interscript/node/item/group.rb +29 -1
  26. data/lib/interscript/node/item/repeat.rb +4 -0
  27. data/lib/interscript/node/item/stage.rb +4 -0
  28. data/lib/interscript/node/item/string.rb +7 -0
  29. data/lib/interscript/node/metadata.rb +10 -0
  30. data/lib/interscript/node/rule.rb +3 -0
  31. data/lib/interscript/node/rule/funcall.rb +12 -2
  32. data/lib/interscript/node/rule/run.rb +16 -3
  33. data/lib/interscript/node/rule/sub.rb +165 -4
  34. data/lib/interscript/node/stage.rb +30 -4
  35. data/lib/interscript/node/tests.rb +10 -0
  36. data/lib/interscript/stdlib.rb +45 -3
  37. data/lib/interscript/utils/helpers.rb +39 -0
  38. data/lib/interscript/version.rb +1 -1
  39. data/lib/interscript/visualize/json.rb +12 -4
  40. metadata +17 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f92a8060d3eee8e0eb4815795294c045ccc7ad341c92a6ba48cb825be4598b95
4
- data.tar.gz: 6b977f41c3b1ba80bf25ef1d120666a228bcfd050f31f0a87a9c9ceeb86f5007
3
+ metadata.gz: 8955bc942068792948463573b04d1ba4304da1d7a6aac8cf2182017e7f145b5e
4
+ data.tar.gz: e1f57087a0cb5f1e99f2defbaf24a71eee35db9606b54353aeca7b73a19c5848
5
5
  SHA512:
6
- metadata.gz: 2fc32770358443ca0baae5416a918032b4a169fdc98b32d493fe1a487599fbdf9c5005fb53b98a25e5962f18f2ff52a1667010bda48394e891998437b2a2f1b6
7
- data.tar.gz: a96e1cc807ebc83c9630b1a78f5b9c26558f771954b55622ec55d0f0b0ff17601a27a5753ab407dc49ec0db8d960940c811148c2872ba9c56e1b19baa3a7ccb7
6
+ metadata.gz: ad126dead3c67005ecf110d04d4b48ea98fd4321a7535306da84a98eeb5bfa5aca35a81aee20468ee68d4110afcf113b8241a2d950840f3625192a5150e8e903
7
+ data.tar.gz: 41721c71bfbe2d1bfd6df7b0f6c89ab3ea8a7e386d4dead7a84baa28e4bec52ddcaca68e7c87886d842f29a4a54fe56e6e89f5b617eb749b7d9273d221a98c2c
data/Gemfile CHANGED
@@ -26,4 +26,10 @@ unless ENV["SKIP_JS"]
26
26
  end
27
27
  end
28
28
 
29
+ group :rababa do
30
+ gem 'rababa', github: 'interscript/rababa', ref: '70051da'
31
+ end
32
+
33
+ gem 'pry'
34
+
29
35
  gem 'simplecov', require: false, group: :test
data/Rakefile CHANGED
@@ -20,7 +20,10 @@ task :compile, [:compiler, :target] do |t, args|
20
20
 
21
21
  maplist = {}
22
22
 
23
- Interscript.maps.each do |map|
23
+ maps = Interscript.maps
24
+ maps = Interscript.exclude_maps(maps, compiler: compiler, platform: false)
25
+
26
+ maps.each do |map|
24
27
  code = compiler.(map).code
25
28
  File.write(args[:target] + "/" + map + "." + ext, code)
26
29
  maplist[map] = nil
@@ -68,6 +71,7 @@ task :generate_metadata_json do
68
71
  require "fileutils"
69
72
  require "json"
70
73
  require "interscript"
74
+ require "interscript/compiler/javascript"
71
75
 
72
76
  FileUtils.rm_rf(file = __dir__+"/metadata.json")
73
77
 
@@ -75,6 +79,10 @@ task :generate_metadata_json do
75
79
  parsed_map = Interscript.parse(map)
76
80
  md = parsed_map.metadata.to_hash
77
81
  md["test"] = parsed_map.tests&.data&.first
82
+ md["skip_js"] = Interscript.exclude_maps([map],
83
+ compiler: Interscript::Compiler::Javascript,
84
+ platform: false,
85
+ ).empty?
78
86
  [map, md]
79
87
  end.to_h
80
88
 
data/bin/console CHANGED
@@ -3,12 +3,8 @@
3
3
  require "bundler/setup"
4
4
  require "interscript"
5
5
 
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
6
+ require "interscript/utils/helpers"
7
+ include Interscript::Utils::Helpers
8
8
 
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require "pry"
11
- # Pry.start
12
-
13
- require "irb"
14
- IRB.start(__FILE__)
9
+ require "pry"
10
+ Pry.start
data/interscript.gemspec CHANGED
@@ -27,5 +27,6 @@ Gem::Specification.new do |spec|
27
27
  spec.require_paths = ["lib"]
28
28
 
29
29
  spec.add_dependency "thor"
30
- spec.add_dependency "interscript-maps"
30
+ spec.add_dependency "interscript-maps", "~> #{Interscript::VERSION.split('.')[0,2].join(".")}.0a"
31
+ spec.add_dependency "text"
31
32
  end
data/lib/interscript.rb CHANGED
@@ -53,6 +53,16 @@ module Interscript
53
53
  output_file
54
54
  end
55
55
 
56
+ # Detects the transliteration that gives the most close approximation
57
+ # of transliterating source into destination.
58
+ #
59
+ # Set multiple: true to get a full report.
60
+ def detect(source, destination, **kwargs)
61
+ detector = Detector.new
62
+ detector.set_from_kwargs(**kwargs)
63
+ detector.(source, destination)
64
+ end
65
+
56
66
  def map_gems
57
67
  @map_gems ||= Gem.find_latest_files('interscript-maps.yaml').map do |i|
58
68
  [i, YAML.load_file(i)]
@@ -76,6 +86,56 @@ module Interscript
76
86
  end.compact.flatten
77
87
  end
78
88
 
89
+ def rababa_configs
90
+ @rababa_configs ||= map_gems.map do |i,v|
91
+ v["rababa-configs"]
92
+ end.compact.inject({}) do |a,b|
93
+ a.merge(b)
94
+ end
95
+ end
96
+
97
+ # This code is borrowed from Secryst and should end up in Rababa, but for now,
98
+ # let's keep it here.
99
+ def rababa_provision(model_name, model_uri)
100
+ require 'fileutils'
101
+ require 'open-uri'
102
+
103
+ # We provision the environment in the following way:
104
+ # First, we try the RABABA_DATA environment variable. If that's available,
105
+ # we use it to store the Rababa data we need. Otherwise, we try the following
106
+ # paths:
107
+
108
+ possible_paths = [
109
+ "/var/lib/rababa",
110
+ "/usr/local/share/rababa",
111
+ "/usr/share/rababa",
112
+ File.join(Dir.home, ".local/share/rababa")
113
+ ]
114
+
115
+ # We find the first writable path
116
+
117
+ write_path = nil
118
+
119
+ ([ENV["RABABA_DATA"]] + possible_paths).compact.each do |path|
120
+ FileUtils.mkdir_p(path)
121
+ write_path = path unless write_path
122
+ rescue
123
+ end
124
+
125
+ raise StandardError, "Can't find a writable path for Rababa. Consider setting a RABABA_DATA environment variable" unless write_path
126
+
127
+ model_path = "#{write_path}/model-#{model_name}.onnx"
128
+
129
+ # Redownload every hour
130
+ if File.exist?(model_path) && File.mtime(model_path) + 3600 >= Time.now
131
+ return model_path
132
+ else
133
+ data = URI.open(model_uri).read
134
+ File.write(model_path, data)
135
+ return model_path
136
+ end
137
+ end
138
+
79
139
  def map_aliases
80
140
  return @map_aliases if @map_aliases
81
141
 
@@ -99,6 +159,22 @@ module Interscript
99
159
 
100
160
  basename ? imps.map { |j| File.basename(j, ".#{ext}") } : imps
101
161
  end
162
+
163
+ # Removes the excluded maps for a given compiler and RUBY_PLATFORM.
164
+ # To be used by tests
165
+ # and builders. It uses the `skip` directive in interscript-maps.yaml
166
+ def exclude_maps(maps, compiler:, platform: true)
167
+ map_gems.each do |i,v|
168
+ [compiler.name, (Gem::Platform.local.os if platform)].compact.each do |name|
169
+ skips = v.dig('skip', name) || []
170
+ skips.each do |skip|
171
+ skip_re = /#{Regexp.escape(skip).gsub("\\*", ".*?")}/
172
+ maps = maps.grep_v(skip_re)
173
+ end
174
+ end
175
+ end
176
+ maps
177
+ end
102
178
  end
103
179
  end
104
180
 
@@ -109,3 +185,5 @@ require "interscript/interpreter"
109
185
 
110
186
  require 'interscript/dsl'
111
187
  require 'interscript/node'
188
+
189
+ require 'interscript/detector'
@@ -53,6 +53,7 @@ class Interscript::Compiler::Javascript < Interscript::Compiler
53
53
 
54
54
  def compile_rule(r, map = @map, wrapper = false)
55
55
  c = ""
56
+ return c if r.reverse_run == true
56
57
  case r
57
58
  when Interscript::Node::Stage
58
59
  c += "map.stages.#{r.name} = function(s) {\n"
@@ -75,6 +76,7 @@ class Interscript::Compiler::Javascript < Interscript::Compiler
75
76
  raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before
76
77
  raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after
77
78
 
79
+ next if i.reverse_run == true
78
80
  a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)]
79
81
  end
80
82
  ah = a.hash.abs
@@ -88,7 +90,8 @@ class Interscript::Compiler::Javascript < Interscript::Compiler
88
90
  a = []
89
91
  Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i|
90
92
  raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
91
-
93
+
94
+ next if i.reverse_run == true
92
95
  a << [build_regexp(i, map), compile_item(i.to, map, :parstr)]
93
96
  end
94
97
  ah = a.hash.abs
@@ -102,6 +105,8 @@ class Interscript::Compiler::Javascript < Interscript::Compiler
102
105
  from = %{"#{build_regexp(r, map).gsub("/", "\\\\/")}"}
103
106
  if r.to == :upcase
104
107
  to = 'function(a){return a.toUpperCase();}'
108
+ elsif r.to == :downcase
109
+ to = 'function(a){return a.toLowerCase();}'
105
110
  else
106
111
  to = compile_item(r.to, map, :str)
107
112
  end
@@ -42,6 +42,7 @@ class Interscript::Compiler::Ruby < Interscript::Compiler
42
42
 
43
43
  def compile_rule(r, map = @map, wrapper = false)
44
44
  c = ""
45
+ return c if r.reverse_run == true
45
46
  case r
46
47
  when Interscript::Node::Stage
47
48
  c += "Interscript::Maps.add_map_stage \"#{@map.name}\", #{r.name.inspect} do |s|\n"
@@ -65,6 +66,7 @@ class Interscript::Compiler::Ruby < Interscript::Compiler
65
66
  raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before
66
67
  raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after
67
68
 
69
+ next if i.reverse_run == true
68
70
  a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)]
69
71
  end
70
72
  ah = a.hash.abs
@@ -79,6 +81,7 @@ class Interscript::Compiler::Ruby < Interscript::Compiler
79
81
  Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i|
80
82
  raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
81
83
 
84
+ next if i.reverse_run == true
82
85
  a << [build_regexp(i, map), compile_item(i.to, map, :parstr)]
83
86
  end
84
87
  ah = a.hash.abs
@@ -92,6 +95,8 @@ class Interscript::Compiler::Ruby < Interscript::Compiler
92
95
  from = "/#{build_regexp(r, map).gsub("/", "\\\\/")}/"
93
96
  if r.to == :upcase
94
97
  to = '&:upcase'
98
+ elsif r.to == :downcase
99
+ to = '&:downcase'
95
100
  else
96
101
  to = compile_item(r.to, map, :str)
97
102
  end
@@ -0,0 +1,62 @@
1
+ require "text"
2
+
3
+ class Interscript::Detector
4
+ attr_accessor :compiler
5
+ attr_accessor :distance_computer
6
+ attr_accessor :map_pattern
7
+
8
+ # TODO: use transliterate_each
9
+ attr_accessor :each
10
+
11
+ attr_accessor :load_path
12
+ attr_accessor :cache
13
+
14
+ # Returns a summary of all detected transliterations
15
+ attr_accessor :multiple
16
+
17
+ def initialize
18
+ @compiler = Interscript::Interpreter
19
+ @distance_computer = DistanceComputer::Levenshtein
20
+ @map_pattern = "*"
21
+
22
+ @each = false
23
+
24
+ @load_path = false
25
+ @cache = CACHE
26
+ end
27
+
28
+ def set_from_kwargs(**kwargs)
29
+ kwargs.each do |k,v|
30
+ self.public_send(:"#{k}=", v)
31
+ end
32
+ end
33
+
34
+ def call(source, destination)
35
+ maps = Interscript.maps(select: @map_pattern, load_path: @load_path)
36
+ maps = Interscript.exclude_maps(maps, compiler: self.class)
37
+ maps = Interscript.exclude_maps(maps, compiler: @compiler)
38
+
39
+ summary = maps.map do |map|
40
+ try_dest = Interscript.transliterate(map, source, compiler: @compiler)
41
+
42
+ [map, try_dest]
43
+ end.map do |map, try_dest|
44
+ dist = @distance_computer.(try_dest, destination)
45
+
46
+ [map, dist]
47
+ end.sort_by(&:last).to_h
48
+
49
+ if @multiple
50
+ summary.to_h
51
+ else
52
+ summary.first.first
53
+ end
54
+ end
55
+
56
+ CACHE = {}
57
+
58
+ # A DistanceComputer needs to respond to #call(source, destination)
59
+ module DistanceComputer
60
+ Levenshtein = Text::Levenshtein.method(:distance)
61
+ end
62
+ end
@@ -2,12 +2,45 @@ require "yaml"
2
2
 
3
3
  module Interscript::DSL
4
4
  @cache = {}
5
- def self.parse(map_name)
5
+ def self.parse(map_name, reverse: true)
6
6
  # map name aliases? here may be a place to wrap it
7
7
 
8
8
  return @cache[map_name] if @cache[map_name]
9
- path = Interscript.locate(map_name)
9
+
10
+ # This is a composition, so let's make a new virtual map
11
+ # that calls all maps in a sequence.
12
+ if map_name.include? "|"
13
+ map_parts = map_name.split("|").map(&:strip)
14
+
15
+ doc = Interscript::DSL::Document.new(map_name) do
16
+ map_parts.each_with_index do |i, idx|
17
+ dependency i, as: :"part#{idx}"
18
+ end
19
+
20
+ stage {
21
+ map_parts.each_with_index do |i, idx|
22
+ run map[:"part#{idx}"].stage.main
23
+ end
24
+ }
25
+ end.node
26
+
27
+ return @cache[map_name] = doc
28
+ end
29
+
30
+ path = begin
31
+ Interscript.locate(map_name)
32
+ rescue Interscript::MapNotFoundError => e
33
+ # But maybe we called the map in a reversed fashion?
34
+ begin
35
+ raise e if reverse == false # Protect from an infinite loop
36
+ reverse_name = Interscript::Node::Document.reverse_name(map_name)
37
+ return @cache[map_name] = parse(reverse_name, reverse: false).reverse
38
+ rescue Interscript::MapNotFoundError
39
+ raise e
40
+ end
41
+ end
10
42
  library = path.end_with?(".iml")
43
+
11
44
  map_name = File.basename(path, ".imp")
12
45
  map_name = File.basename(map_name, ".iml")
13
46
 
@@ -37,10 +37,11 @@ class Interscript::DSL::Document
37
37
  @node.dep_aliases[dep.name] = dep if dep.name
38
38
  end
39
39
 
40
- def stage(name = :main, &block)
40
+ def stage(name = :main, dont_reverse: false, &block)
41
41
  puts "stage(#{name}) from #{self.inspect}" if $DEBUG
42
42
  stage = Interscript::DSL::Stage.new(name, &block)
43
43
  stage.node.doc_name = @node.name
44
+ stage.node.dont_reverse = dont_reverse
44
45
  @node.stages[name] = stage.node
45
46
  end
46
47
  end
@@ -8,16 +8,16 @@ class Interscript::DSL::Group
8
8
  self.instance_exec(&block)
9
9
  end
10
10
 
11
- def run(stage)
11
+ def run(stage, **kwargs)
12
12
  if stage.class != Interscript::Node::Item::Stage
13
13
  raise TypeError, "I::Node::Item::Stage expected, got #{stage.class}"
14
14
  end
15
- @node.children << Interscript::Node::Rule::Run.new(stage)
15
+ @node.children << Interscript::Node::Rule::Run.new(stage, **kwargs)
16
16
  end
17
17
 
18
18
  def sub(from, to, **kwargs, &block)
19
- puts "sub(#{from.inspect},#{to}, kargs = #{
20
- kargs.inspect
19
+ puts "sub(#{from.inspect},#{to}, kwargs = #{
20
+ kwargs.inspect
21
21
  }) from #{self.inspect}" if $DEBUG
22
22
 
23
23
  rule = Interscript::Node::Rule::Sub.new(from, to, **kwargs)
@@ -25,6 +25,7 @@ class Interscript::DSL::Group
25
25
  end
26
26
 
27
27
  def upcase; :upcase; end
28
+ def downcase; :downcase; end
28
29
 
29
30
  Interscript::Stdlib.available_functions.each do |fun|
30
31
  define_method fun do |**kwargs|
@@ -35,9 +36,9 @@ class Interscript::DSL::Group
35
36
  end
36
37
  end
37
38
 
38
- def parallel(&block)
39
+ def parallel(**kwargs, &block)
39
40
  puts "parallel(#{chars.inspect}) from #{self.inspect}" if $DEBUG
40
- group = Interscript::DSL::Group::Parallel.new(&block)
41
+ group = Interscript::DSL::Group::Parallel.new(**kwargs, &block)
41
42
  @node.children << group.node
42
43
  end
43
44
  end
@@ -1,6 +1,6 @@
1
1
  class Interscript::DSL::Group::Parallel < Interscript::DSL::Group
2
- def initialize(&block)
3
- @node = Interscript::Node::Group::Parallel.new
2
+ def initialize(reverse_run: nil, &block)
3
+ @node = Interscript::Node::Group::Parallel.new(reverse_run: reverse_run)
4
4
  self.instance_exec(&block)
5
5
  end
6
6
  end
@@ -6,7 +6,7 @@ class Interscript::DSL::Tests
6
6
  self.instance_exec(&block)
7
7
  end
8
8
 
9
- def test(from,to)
10
- @node << [from, to]
9
+ def test(from, to, reverse_run: nil)
10
+ @node << [from, to, reverse_run]
11
11
  end
12
12
  end
@@ -76,6 +76,7 @@ class Interscript::Interpreter < Interscript::Compiler
76
76
  end
77
77
 
78
78
  def execute_rule r
79
+ return if r.reverse_run == true
79
80
  case r
80
81
  when Interscript::Node::Group::Parallel
81
82
  if r.cached_tree
@@ -96,6 +97,7 @@ class Interscript::Interpreter < Interscript::Compiler
96
97
  raise ArgumentError, "Can't parallelize rules with :after" if i.after
97
98
  raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before
98
99
  raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after
100
+ next if i.reverse_run == true
99
101
  subs_array << [build_item(i.from, :par), build_item(i.to, :parstr)]
100
102
  end
101
103
  tree = Interscript::Stdlib.parallel_replace_compile_tree(subs_array) #.sort_by{|k,v| -k.length})
@@ -108,7 +110,7 @@ class Interscript::Interpreter < Interscript::Compiler
108
110
  subs_array = []
109
111
  Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| # rule.from.max_length gives somewhat better test results, why is that
110
112
  raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
111
-
113
+ next if i.reverse_run == true
112
114
  subs_array << [build_regexp(i), build_item(i.to, :parstr)]
113
115
  end
114
116
  r.subs_regexp = Interscript::Stdlib.parallel_regexp_compile(subs_array)
@@ -129,6 +131,8 @@ class Interscript::Interpreter < Interscript::Compiler
129
131
  when Interscript::Node::Rule::Sub
130
132
  if r.to == :upcase
131
133
  @str = @str.gsub(Regexp.new(build_regexp(r)), &:upcase)
134
+ elsif r.to == :downcase
135
+ @str = @str.gsub(Regexp.new(build_regexp(r)), &:downcase)
132
136
  else
133
137
  @str = @str.gsub(Regexp.new(build_regexp(r)), build_item(r.to, :str))
134
138
  end