calyx 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/calyx.gemspec +1 -1
- data/lib/calyx.rb +11 -8
- data/lib/calyx/modifiers.rb +0 -2
- data/lib/calyx/prefix_tree.rb +191 -0
- data/lib/calyx/production/affix_table.rb +53 -0
- data/lib/calyx/production/uniform_branch.rb +6 -0
- data/lib/calyx/production/weighted_branch.rb +6 -0
- data/lib/calyx/registry.rb +33 -5
- data/lib/calyx/rule.rb +14 -3
- data/lib/calyx/{production → syntax}/choices.rb +1 -1
- data/lib/calyx/{production → syntax}/concat.rb +18 -22
- data/lib/calyx/syntax/expression.rb +87 -0
- data/lib/calyx/{production → syntax}/memo.rb +1 -1
- data/lib/calyx/{production → syntax}/non_terminal.rb +1 -1
- data/lib/calyx/syntax/paired_mapping.rb +53 -0
- data/lib/calyx/{production → syntax}/terminal.rb +1 -1
- data/lib/calyx/syntax/token.rb +9 -0
- data/lib/calyx/{production → syntax}/unique.rb +1 -1
- data/lib/calyx/{production → syntax}/weighted_choices.rb +2 -2
- data/lib/calyx/version.rb +1 -1
- metadata +21 -16
- data/.travis.yml +0 -10
- data/lib/calyx/production/expression.rb +0 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8278055314ef40a029522a8237f5c1956e27b449b31bf4bb8d6d2d7e5b6ff904
|
4
|
+
data.tar.gz: ccc0ebfe719a5bfc88582dfb78225edb7562d6e3a7e23e672dcb2822d1c0b9a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 132390da1638e0b3c4bffc237c9dfe5b565b556269ea55da011aaaddaa7884a6ca6294190c4ceb76bbbd2977b0011601a885df090c2cdf468ad6bac7c657e8ee
|
7
|
+
data.tar.gz: e973005cbb1717949cdda94ea7c1c772301d8fcc1104b38f4a51fa5dd924b3025cf46a960909b7997748db1ee4373e86bd05e01f89eb9488058f703b49015f12
|
data/.github/workflows/ruby.yml
CHANGED
@@ -23,8 +23,8 @@ jobs:
|
|
23
23
|
- name: Set up Ruby
|
24
24
|
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
25
25
|
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
26
|
-
|
27
|
-
uses: ruby/setup-ruby@ec106b438a1ff6ff109590de34ddc62c540232e0
|
26
|
+
uses: ruby/setup-ruby@v1
|
27
|
+
#uses: ruby/setup-ruby@ec106b438a1ff6ff109590de34ddc62c540232e0
|
28
28
|
with:
|
29
29
|
ruby-version: 2.6
|
30
30
|
- name: Install dependencies
|
data/calyx.gemspec
CHANGED
@@ -19,6 +19,6 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ['lib']
|
20
20
|
|
21
21
|
spec.add_development_dependency 'bundler', '~> 2.0'
|
22
|
-
spec.add_development_dependency 'rake', '~>
|
22
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
23
23
|
spec.add_development_dependency 'rspec', '~> 3.4'
|
24
24
|
end
|
data/lib/calyx.rb
CHANGED
@@ -8,12 +8,15 @@ require 'calyx/errors'
|
|
8
8
|
require 'calyx/format'
|
9
9
|
require 'calyx/registry'
|
10
10
|
require 'calyx/modifiers'
|
11
|
+
require 'calyx/prefix_tree'
|
11
12
|
require 'calyx/mapping'
|
12
|
-
require 'calyx/production/
|
13
|
-
require 'calyx/
|
14
|
-
require 'calyx/
|
15
|
-
require 'calyx/
|
16
|
-
require 'calyx/
|
17
|
-
require 'calyx/
|
18
|
-
require 'calyx/
|
19
|
-
require 'calyx/
|
13
|
+
require 'calyx/production/affix_table'
|
14
|
+
require 'calyx/syntax/token'
|
15
|
+
require 'calyx/syntax/memo'
|
16
|
+
require 'calyx/syntax/unique'
|
17
|
+
require 'calyx/syntax/choices'
|
18
|
+
require 'calyx/syntax/concat'
|
19
|
+
require 'calyx/syntax/expression'
|
20
|
+
require 'calyx/syntax/non_terminal'
|
21
|
+
require 'calyx/syntax/terminal'
|
22
|
+
require 'calyx/syntax/weighted_choices'
|
data/lib/calyx/modifiers.rb
CHANGED
@@ -0,0 +1,191 @@
|
|
1
|
+
module Calyx
|
2
|
+
PrefixNode = Struct.new(:children, :index)
|
3
|
+
PrefixEdge = Struct.new(:node, :label, :wildcard?)
|
4
|
+
PrefixMatch = Struct.new(:label, :index, :captured)
|
5
|
+
|
6
|
+
class PrefixTree
|
7
|
+
def initialize
|
8
|
+
@root = PrefixNode.new([], nil)
|
9
|
+
end
|
10
|
+
|
11
|
+
def insert(label, index)
|
12
|
+
if @root.children.empty?
|
13
|
+
@root.children << PrefixEdge.new(PrefixNode.new([], index), label, false)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_all(elements)
|
18
|
+
elements.each_with_index { |el, i| add(el, i) }
|
19
|
+
end
|
20
|
+
|
21
|
+
def add(label, index)
|
22
|
+
parts = label.split(/(%)/).reject { |p| p.empty? }
|
23
|
+
parts_count = parts.count
|
24
|
+
|
25
|
+
# Can’t use more than one capture symbol which gives the following splits:
|
26
|
+
# - ["literal"]
|
27
|
+
# - ["%", "literal"]
|
28
|
+
# - ["literal", "%"]
|
29
|
+
# - ["literal", "%", "literal"]
|
30
|
+
if parts_count > 3
|
31
|
+
raise "Too many capture patterns: #{label}"
|
32
|
+
end
|
33
|
+
|
34
|
+
current_node = @root
|
35
|
+
|
36
|
+
parts.each_with_index do |part, i|
|
37
|
+
index_slot = (i == parts_count - 1) ? index : nil
|
38
|
+
is_wildcard = part == "%"
|
39
|
+
matched_prefix = false
|
40
|
+
|
41
|
+
current_node.children.each_with_index do |edge, j|
|
42
|
+
prefix = common_prefix(edge.label, part)
|
43
|
+
unless prefix.empty?
|
44
|
+
matched_prefix = true
|
45
|
+
|
46
|
+
if prefix == edge.label
|
47
|
+
# Current prefix matches the edge label so we can continue down the
|
48
|
+
# tree without mutating the current branch
|
49
|
+
next_node = PrefixNode.new([], index_slot)
|
50
|
+
current_node.children << PrefixEdge.new(next_node, label.delete_prefix(prefix), is_wildcard)
|
51
|
+
else
|
52
|
+
# We have a partial match on current edge so replace it with the new
|
53
|
+
# prefix then rejoin the remaining suffix to the existing branch
|
54
|
+
edge.label = edge.label.delete_prefix(prefix)
|
55
|
+
prefix_node = PrefixNode.new([edge], nil)
|
56
|
+
next_node = PrefixNode.new([], index_slot)
|
57
|
+
prefix_node.children << PrefixEdge.new(next_node, label.delete_prefix(prefix), is_wildcard)
|
58
|
+
current_node.children[j] = PrefixEdge.new(prefix_node, prefix, is_wildcard)
|
59
|
+
end
|
60
|
+
|
61
|
+
current_node = next_node
|
62
|
+
break
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# No existing edges have a common prefix so push a new branch onto the tree
|
67
|
+
# at the current level
|
68
|
+
unless matched_prefix
|
69
|
+
next_edge = PrefixEdge.new(PrefixNode.new([], index_slot), part, is_wildcard)
|
70
|
+
current_node.children << next_edge
|
71
|
+
current_node = next_edge.node
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# This was basically ported from the pseudocode found on Wikipedia to Ruby,
|
77
|
+
# with a lot of extra internal state tracking that is totally absent from
|
78
|
+
# most algorithmic descriptions. This ends up making a real mess of the
|
79
|
+
# expression of the algorithm, mostly due to choices and conflicts between
|
80
|
+
# whether to go with the standard iterative and procedural flow of statements
|
81
|
+
# or use a more functional style. A mangle that speaks to the questions
|
82
|
+
# around portability between different languages. Is this codebase a design
|
83
|
+
# prototype? Is it an evolving example that should guide implementations in
|
84
|
+
# other languages?
|
85
|
+
#
|
86
|
+
# The problem with code like this is that it’s a bit of a maintenance burden
|
87
|
+
# if not structured compactly and precisely enough to not matter and having
|
88
|
+
# enough tests passing that it lasts for a few years without becoming a
|
89
|
+
# nuisance or leading to too much nonsense.
|
90
|
+
#
|
91
|
+
# There are several ways to implement this, some of these may work better or
|
92
|
+
# worse, and this might be quite different across multiple languages so what
|
93
|
+
# goes well in one place could suck in other places. The only way to make a
|
94
|
+
# good decision around it is to learn via testing and experiments.
|
95
|
+
#
|
96
|
+
# Alternative possible implementations:
|
97
|
+
# - Regex compilation on registration, use existing legacy mapping code
|
98
|
+
# - Prefix tree, trie, radix tree/trie, compressed bitpatterns, etc
|
99
|
+
# - Split string flip, imperative list processing hacks
|
100
|
+
# (easier for more people to contribute?)
|
101
|
+
def lookup(label)
|
102
|
+
current_node = @root
|
103
|
+
chars_consumed = 0
|
104
|
+
chars_captured = nil
|
105
|
+
label_length = label.length
|
106
|
+
|
107
|
+
# Traverse the tree until reaching a leaf node or all input characters are consumed
|
108
|
+
while current_node != nil && !current_node.children.empty? && chars_consumed < label_length
|
109
|
+
# Candidate edge pointing to the next node to check
|
110
|
+
candidate_edge = nil
|
111
|
+
|
112
|
+
# Traverse from the current node down the tree looking for candidate edges
|
113
|
+
current_node.children.each do |edge|
|
114
|
+
# Generate a suffix based on the prefix already consumed
|
115
|
+
sub_label = label[chars_consumed, label_length]
|
116
|
+
|
117
|
+
# If this edge is a wildcard we check the next level of the tree
|
118
|
+
if edge.wildcard?
|
119
|
+
# Wildcard pattern is anchored to the end of the string so we can
|
120
|
+
# consume all remaining characters and pick this as an edge candidate
|
121
|
+
if edge.node.children.empty?
|
122
|
+
chars_captured = label[chars_consumed, sub_label.length]
|
123
|
+
chars_consumed += sub_label.length
|
124
|
+
candidate_edge = edge
|
125
|
+
break
|
126
|
+
end
|
127
|
+
|
128
|
+
# The wildcard is anchored to the start or embedded in the middle of
|
129
|
+
# the string so we traverse this edge and scan the next level of the
|
130
|
+
# tree with a greedy lookahead. This means we will always match as
|
131
|
+
# much of the wildcard string as possible when there is a trailing
|
132
|
+
# suffix that could be repeated several times within the characters
|
133
|
+
# consumed by the wildcard pattern.
|
134
|
+
#
|
135
|
+
# For example, we expect `"te%s"` to match on `"tests"` rather than
|
136
|
+
# bail out after matching the first three characters `"tes"`.
|
137
|
+
edge.node.children.each do |lookahead_edge|
|
138
|
+
prefix = sub_label.rindex(lookahead_edge.label)
|
139
|
+
if prefix
|
140
|
+
chars_captured = label[chars_consumed, prefix]
|
141
|
+
chars_consumed += prefix + lookahead_edge.label.length
|
142
|
+
candidate_edge = lookahead_edge
|
143
|
+
break
|
144
|
+
end
|
145
|
+
end
|
146
|
+
# We found a candidate so no need to continue checking edges
|
147
|
+
break if candidate_edge
|
148
|
+
else
|
149
|
+
# Look for a common prefix on this current edge label and the remaining suffix
|
150
|
+
if edge.label == common_prefix(edge.label, sub_label)
|
151
|
+
chars_consumed += edge.label.length
|
152
|
+
candidate_edge = edge
|
153
|
+
break
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
if candidate_edge
|
159
|
+
# Traverse to the node our edge candidate points to
|
160
|
+
current_node = candidate_edge.node
|
161
|
+
else
|
162
|
+
# We didn’t find a possible edge candidate so bail out of the loop
|
163
|
+
current_node = nil
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# In order to return a match, the following postconditions must be true:
|
168
|
+
# - We are pointing to a leaf node
|
169
|
+
# - We have consumed all the input characters
|
170
|
+
if current_node != nil and current_node.index != nil and chars_consumed == label_length
|
171
|
+
PrefixMatch.new(label, current_node.index, chars_captured)
|
172
|
+
else
|
173
|
+
nil
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def common_prefix(a, b)
|
178
|
+
selected_prefix = ""
|
179
|
+
min_index_length = a < b ? a.length : b.length
|
180
|
+
index = 0
|
181
|
+
|
182
|
+
until index == min_index_length
|
183
|
+
return selected_prefix if a[index] != b[index]
|
184
|
+
selected_prefix += a[index]
|
185
|
+
index += 1
|
186
|
+
end
|
187
|
+
|
188
|
+
selected_prefix
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Calyx
|
2
|
+
module Production
|
3
|
+
# A type of production rule representing a bidirectional dictionary of
|
4
|
+
# mapping pairs that can be used as a substitution table in template
|
5
|
+
# expressions.
|
6
|
+
class AffixTable
|
7
|
+
def self.parse(productions, registry)
|
8
|
+
# TODO: handle wildcard expressions
|
9
|
+
self.new(productions)
|
10
|
+
end
|
11
|
+
|
12
|
+
# %es
|
13
|
+
# prefix: nil, suffix: 'es'
|
14
|
+
# match: 'buses' -> ends_with(suffix)
|
15
|
+
|
16
|
+
# %y
|
17
|
+
# prefix: nil, suffix: 'ies'
|
18
|
+
|
19
|
+
def initialize(mapping)
|
20
|
+
@lhs_index = PrefixTree.new
|
21
|
+
@rhs_index = PrefixTree.new
|
22
|
+
|
23
|
+
@lhs_list = mapping.keys
|
24
|
+
@rhs_list = mapping.values
|
25
|
+
|
26
|
+
@lhs_index.add_all(@lhs_list)
|
27
|
+
@rhs_index.add_all(@rhs_list)
|
28
|
+
end
|
29
|
+
|
30
|
+
def value_for(key)
|
31
|
+
match = @lhs_index.lookup(key)
|
32
|
+
result = @rhs_list[match.index]
|
33
|
+
|
34
|
+
if match.captured
|
35
|
+
result.sub("%", match.captured)
|
36
|
+
else
|
37
|
+
result
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def key_for(value)
|
42
|
+
match = @rhs_index.lookup(value)
|
43
|
+
result = @lhs_list[match.index]
|
44
|
+
|
45
|
+
if match.captured
|
46
|
+
result.sub("%", match.captured)
|
47
|
+
else
|
48
|
+
result
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/calyx/registry.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
module Calyx
|
2
2
|
# Lookup table of all the available rules in the grammar.
|
3
3
|
class Registry
|
4
|
-
attr_reader :rules, :transforms, :modifiers
|
4
|
+
attr_reader :rules, :dicts, :transforms, :modifiers
|
5
5
|
|
6
6
|
# Construct an empty registry.
|
7
7
|
def initialize
|
8
8
|
@options = Options.new({})
|
9
9
|
@rules = {}
|
10
|
+
@dicts = {}
|
10
11
|
@transforms = {}
|
11
12
|
@modifiers = Modifiers.new
|
12
13
|
end
|
@@ -67,7 +68,17 @@ module Calyx
|
|
67
68
|
# @param [Symbol] name
|
68
69
|
# @param [Array] productions
|
69
70
|
def define_rule(name, trace, productions)
|
70
|
-
|
71
|
+
symbol = name.to_sym
|
72
|
+
|
73
|
+
# TODO: this could be tidied up by consolidating parsing in a single class
|
74
|
+
branch = Rule.build_ast(productions, self)
|
75
|
+
|
76
|
+
# If the static rule is a map of k=>v pairs then add it to the lookup dict
|
77
|
+
if branch.is_a?(Production::AffixTable)
|
78
|
+
dicts[symbol] = branch
|
79
|
+
else
|
80
|
+
rules[symbol] = Rule.new(symbol, branch, trace)
|
81
|
+
end
|
71
82
|
end
|
72
83
|
|
73
84
|
# Defines a rule in the temporary evaluation context.
|
@@ -90,7 +101,7 @@ module Calyx
|
|
90
101
|
if @options.strict?
|
91
102
|
raise Errors::UndefinedRule.new(@last_expansion, symbol)
|
92
103
|
else
|
93
|
-
expansion =
|
104
|
+
expansion = Syntax::Terminal.new('')
|
94
105
|
end
|
95
106
|
end
|
96
107
|
|
@@ -98,12 +109,12 @@ module Calyx
|
|
98
109
|
expansion
|
99
110
|
end
|
100
111
|
|
101
|
-
# Applies the given modifier function to the given value to
|
112
|
+
# Applies the given modifier function to the given value to filter it.
|
102
113
|
#
|
103
114
|
# @param [Symbol] name
|
104
115
|
# @param [String] value
|
105
116
|
# @return [String]
|
106
|
-
def
|
117
|
+
def expand_filter(name, value)
|
107
118
|
if transforms.key?(name)
|
108
119
|
transforms[name].call(value)
|
109
120
|
else
|
@@ -111,6 +122,23 @@ module Calyx
|
|
111
122
|
end
|
112
123
|
end
|
113
124
|
|
125
|
+
# Applies a modifier to substitute the value with a bidirectional map
|
126
|
+
# lookup.
|
127
|
+
#
|
128
|
+
# @param [Symbol] name
|
129
|
+
# @param [String] value
|
130
|
+
# @param [Symbol] direction :left or :right
|
131
|
+
# @return [String]
|
132
|
+
def expand_map(name, value, direction)
|
133
|
+
map_lookup = dicts[name]
|
134
|
+
|
135
|
+
if direction == :left
|
136
|
+
map_lookup.key_for(value)
|
137
|
+
else
|
138
|
+
map_lookup.value_for(value)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
114
142
|
# Expands a memoized rule symbol by evaluating it and storing the result
|
115
143
|
# for later.
|
116
144
|
#
|
data/lib/calyx/rule.rb
CHANGED
@@ -4,11 +4,22 @@ module Calyx
|
|
4
4
|
class Rule
|
5
5
|
def self.build_ast(productions, registry)
|
6
6
|
if productions.first.is_a?(Hash)
|
7
|
-
|
7
|
+
# TODO: test that key is a string
|
8
|
+
|
9
|
+
if productions.first.first.last.is_a?(String)
|
10
|
+
# If value of the production is a strings then this is a
|
11
|
+
# paired mapping production.
|
12
|
+
Production::AffixTable.parse(productions.first, registry)
|
13
|
+
else
|
14
|
+
# Otherwise, we assume this is a weighted choice declaration and
|
15
|
+
# convert the hash to an array
|
16
|
+
Syntax::WeightedChoices.parse(productions.first.to_a, registry)
|
17
|
+
end
|
8
18
|
elsif productions.first.is_a?(Enumerable)
|
9
|
-
|
19
|
+
# TODO: this needs to change to support attributed/tagged grammars
|
20
|
+
Syntax::WeightedChoices.parse(productions, registry)
|
10
21
|
else
|
11
|
-
|
22
|
+
Syntax::Choices.parse(productions, registry)
|
12
23
|
end
|
13
24
|
end
|
14
25
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Calyx
|
2
2
|
# A type of production rule representing a list of possible rules, one of
|
3
3
|
# which will chosen each time the grammar runs.
|
4
|
-
module
|
4
|
+
module Syntax
|
5
5
|
class Choices
|
6
6
|
# Parse a list of productions and return a choice node which is the head
|
7
7
|
# of a syntax tree of child nodes.
|
@@ -1,12 +1,12 @@
|
|
1
1
|
module Calyx
|
2
|
-
module
|
2
|
+
module Syntax
|
3
3
|
# A type of production rule representing a string combining both template
|
4
4
|
# substitutions and raw content.
|
5
5
|
class Concat
|
6
|
-
EXPRESSION = /(\{[A-Za-z0-9_
|
6
|
+
EXPRESSION = /(\{[A-Za-z0-9_@$<>\.]+\})/.freeze
|
7
|
+
DEREF_OP = /([<>\.])/.freeze
|
7
8
|
START_TOKEN = '{'.freeze
|
8
9
|
END_TOKEN = '}'.freeze
|
9
|
-
DEREF_TOKEN = '.'.freeze
|
10
10
|
|
11
11
|
# Parses an interpolated string into fragments combining terminal strings
|
12
12
|
# and non-terminal rules.
|
@@ -16,21 +16,14 @@ module Calyx
|
|
16
16
|
# @param [String] production
|
17
17
|
# @param [Calyx::Registry] registry
|
18
18
|
def self.parse(production, registry)
|
19
|
-
|
19
|
+
expressions = production.split(EXPRESSION).map do |atom|
|
20
20
|
if atom.is_a?(String)
|
21
21
|
if atom.chars.first == START_TOKEN && atom.chars.last == END_TOKEN
|
22
|
-
head, *tail = atom.slice(1, atom.length-2).split(
|
23
|
-
if
|
24
|
-
|
25
|
-
elsif head[0] == Unique::SIGIL
|
26
|
-
rule = Unique.new(head, registry)
|
22
|
+
head, *tail = atom.slice(1, atom.length-2).split(DEREF_OP)
|
23
|
+
if tail.any?
|
24
|
+
ExpressionChain.parse(head, tail, registry)
|
27
25
|
else
|
28
|
-
|
29
|
-
end
|
30
|
-
unless tail.empty?
|
31
|
-
Expression.new(rule, tail, registry)
|
32
|
-
else
|
33
|
-
rule
|
26
|
+
Expression.parse(head, registry)
|
34
27
|
end
|
35
28
|
else
|
36
29
|
Terminal.new(atom)
|
@@ -38,28 +31,31 @@ module Calyx
|
|
38
31
|
end
|
39
32
|
end
|
40
33
|
|
41
|
-
self.new(
|
34
|
+
self.new(expressions)
|
42
35
|
end
|
43
36
|
|
44
37
|
# Initialize the concat node with an expansion of terminal and
|
45
38
|
# non-terminal fragments.
|
46
39
|
#
|
47
40
|
# @param [Array] expansion
|
48
|
-
def initialize(
|
49
|
-
@
|
41
|
+
def initialize(expressions)
|
42
|
+
@expressions = expressions
|
50
43
|
end
|
51
44
|
|
52
|
-
# Evaluate all the child nodes of this node and concatenate
|
53
|
-
# into a single result.
|
45
|
+
# Evaluate all the child nodes of this node and concatenate each expansion
|
46
|
+
# together into a single result.
|
54
47
|
#
|
55
48
|
# @param [Calyx::Options] options
|
56
49
|
# @return [Array]
|
57
50
|
def evaluate(options)
|
58
|
-
|
51
|
+
expansion = @expressions.reduce([]) do |exp, atom|
|
59
52
|
exp << atom.evaluate(options)
|
60
53
|
end
|
61
54
|
|
62
|
-
[:
|
55
|
+
#[:expansion, expansion]
|
56
|
+
# TODO: fix this along with a git rename
|
57
|
+
# Commented out because of a lot of tests depending on :concat symbol
|
58
|
+
[:concat, expansion]
|
63
59
|
end
|
64
60
|
end
|
65
61
|
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module Calyx
|
2
|
+
module Syntax
|
3
|
+
# A symbolic expression representing a single template substitution.
|
4
|
+
class Expression
|
5
|
+
def self.parse(symbol, registry)
|
6
|
+
if symbol[0] == Memo::SIGIL
|
7
|
+
Memo.new(symbol, registry)
|
8
|
+
elsif symbol[0] == Unique::SIGIL
|
9
|
+
Unique.new(symbol, registry)
|
10
|
+
else
|
11
|
+
NonTerminal.new(symbol, registry)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class Modifier < Struct.new(:type, :name, :map_dir)
|
17
|
+
def self.filter(name)
|
18
|
+
new(:filter, name, nil)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.map_left(name)
|
22
|
+
new(:map, name, :left)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.map_right(name)
|
26
|
+
new(:map, name, :right)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Handles filter chains that symbolic expressions can pass through to
|
31
|
+
# generate a custom substitution.
|
32
|
+
class ExpressionChain
|
33
|
+
def self.parse(production, production_chain, registry)
|
34
|
+
modifier_chain = production_chain.each_slice(2).map do |op_token, target|
|
35
|
+
rule = target.to_sym
|
36
|
+
case op_token
|
37
|
+
when Token::EXPR_FILTER then Modifier.filter(rule)
|
38
|
+
when Token::EXPR_MAP_LEFT then Modifier.map_left(rule)
|
39
|
+
when Token::EXPR_MAP_RIGHT then Modifier.map_right(rule)
|
40
|
+
else
|
41
|
+
# Should not end up here because the regex excludes it but this
|
42
|
+
# could be a place to add a helpful parse error on any weird
|
43
|
+
# chars used by the expression—current behaviour is to pass
|
44
|
+
# the broken expression through to the result as part of the
|
45
|
+
# text, as if that is what the author meant.
|
46
|
+
raise("unreachable")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
expression = Expression.parse(production, registry)
|
51
|
+
|
52
|
+
self.new(expression, modifier_chain, registry)
|
53
|
+
end
|
54
|
+
|
55
|
+
# @param [#evaluate] production
|
56
|
+
# @param [Array] modifiers
|
57
|
+
# @param [Calyx::Registry] registry
|
58
|
+
def initialize(production, modifiers, registry)
|
59
|
+
@production = production
|
60
|
+
@modifiers = modifiers
|
61
|
+
@registry = registry
|
62
|
+
end
|
63
|
+
|
64
|
+
# Evaluate the expression by expanding the non-terminal to produce a
|
65
|
+
# terminal string, then passing it through the given modifier chain and
|
66
|
+
# returning the transformed result.
|
67
|
+
#
|
68
|
+
# @param [Calyx::Options] options
|
69
|
+
# @return [Array]
|
70
|
+
def evaluate(options)
|
71
|
+
expanded = @production.evaluate(options).flatten.reject { |o| o.is_a?(Symbol) }.join
|
72
|
+
chain = []
|
73
|
+
|
74
|
+
expression = @modifiers.reduce(expanded) do |value, modifier|
|
75
|
+
case modifier.type
|
76
|
+
when :filter
|
77
|
+
@registry.expand_filter(modifier.name, value)
|
78
|
+
when :map
|
79
|
+
@registry.expand_map(modifier.name, value, modifier.map_dir)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
[:expression, expression]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Calyx
|
2
|
+
module Syntax
|
3
|
+
# A type of production rule representing a bidirectional dictionary of
|
4
|
+
# mapping pairs that can be used as a substitution table in template
|
5
|
+
# expressions.
|
6
|
+
class PairedMapping
|
7
|
+
def self.parse(productions, registry)
|
8
|
+
# TODO: handle wildcard expressions
|
9
|
+
self.new(productions)
|
10
|
+
end
|
11
|
+
|
12
|
+
# %es
|
13
|
+
# prefix: nil, suffix: 'es'
|
14
|
+
# match: 'buses' -> ends_with(suffix)
|
15
|
+
|
16
|
+
# %y
|
17
|
+
# prefix: nil, suffix: 'ies'
|
18
|
+
|
19
|
+
def initialize(mapping)
|
20
|
+
@lhs_index = PrefixTree.new
|
21
|
+
@rhs_index = PrefixTree.new
|
22
|
+
|
23
|
+
@lhs_list = mapping.keys
|
24
|
+
@rhs_list = mapping.values
|
25
|
+
|
26
|
+
@lhs_index.add_all(@lhs_list)
|
27
|
+
@rhs_index.add_all(@rhs_list)
|
28
|
+
end
|
29
|
+
|
30
|
+
def value_for(key)
|
31
|
+
match = @lhs_index.lookup(key)
|
32
|
+
result = @rhs_list[match.index]
|
33
|
+
|
34
|
+
if match.captured
|
35
|
+
result.sub("%", match.captured)
|
36
|
+
else
|
37
|
+
result
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def key_for(value)
|
42
|
+
match = @rhs_index.lookup(value)
|
43
|
+
result = @lhs_list[match.index]
|
44
|
+
|
45
|
+
if match.captured
|
46
|
+
result.sub("%", match.captured)
|
47
|
+
else
|
48
|
+
result
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Calyx
|
2
|
-
module
|
2
|
+
module Syntax
|
3
3
|
# A type of production rule representing a unique substitution which only
|
4
4
|
# returns values that have not previously been selected. The probability
|
5
5
|
# that a given rule will be selected increases as more selections are made
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Calyx
|
2
|
-
module
|
2
|
+
module Syntax
|
3
3
|
# A type of production rule representing a map of possible rules with
|
4
4
|
# associated weights that define the expected probability of a rule
|
5
5
|
# being chosen.
|
@@ -12,7 +12,7 @@ module Calyx
|
|
12
12
|
#
|
13
13
|
# @param [Array<Array>, Hash<#to_s, Float>] productions
|
14
14
|
# @param [Calyx::Registry] registry
|
15
|
-
# @return [Calyx::
|
15
|
+
# @return [Calyx::Syntax::WeightedChoices]
|
16
16
|
def self.parse(productions, registry)
|
17
17
|
if productions.first.last.is_a?(Range)
|
18
18
|
range_max = productions.max { |a,b| a.last.max <=> b.last.max }.last.max
|
data/lib/calyx/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: calyx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.22.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Rickerby
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '13.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '13.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,7 +62,6 @@ extra_rdoc_files: []
|
|
62
62
|
files:
|
63
63
|
- ".github/workflows/ruby.yml"
|
64
64
|
- ".gitignore"
|
65
|
-
- ".travis.yml"
|
66
65
|
- CODE_OF_CONDUCT.md
|
67
66
|
- CONTRIBUTING.md
|
68
67
|
- Gemfile
|
@@ -81,23 +80,29 @@ files:
|
|
81
80
|
- lib/calyx/mapping.rb
|
82
81
|
- lib/calyx/modifiers.rb
|
83
82
|
- lib/calyx/options.rb
|
84
|
-
- lib/calyx/
|
85
|
-
- lib/calyx/production/
|
86
|
-
- lib/calyx/production/
|
87
|
-
- lib/calyx/production/
|
88
|
-
- lib/calyx/production/non_terminal.rb
|
89
|
-
- lib/calyx/production/terminal.rb
|
90
|
-
- lib/calyx/production/unique.rb
|
91
|
-
- lib/calyx/production/weighted_choices.rb
|
83
|
+
- lib/calyx/prefix_tree.rb
|
84
|
+
- lib/calyx/production/affix_table.rb
|
85
|
+
- lib/calyx/production/uniform_branch.rb
|
86
|
+
- lib/calyx/production/weighted_branch.rb
|
92
87
|
- lib/calyx/registry.rb
|
93
88
|
- lib/calyx/result.rb
|
94
89
|
- lib/calyx/rule.rb
|
90
|
+
- lib/calyx/syntax/choices.rb
|
91
|
+
- lib/calyx/syntax/concat.rb
|
92
|
+
- lib/calyx/syntax/expression.rb
|
93
|
+
- lib/calyx/syntax/memo.rb
|
94
|
+
- lib/calyx/syntax/non_terminal.rb
|
95
|
+
- lib/calyx/syntax/paired_mapping.rb
|
96
|
+
- lib/calyx/syntax/terminal.rb
|
97
|
+
- lib/calyx/syntax/token.rb
|
98
|
+
- lib/calyx/syntax/unique.rb
|
99
|
+
- lib/calyx/syntax/weighted_choices.rb
|
95
100
|
- lib/calyx/version.rb
|
96
101
|
homepage: https://github.com/maetl/calyx
|
97
102
|
licenses:
|
98
103
|
- MIT
|
99
104
|
metadata: {}
|
100
|
-
post_install_message:
|
105
|
+
post_install_message:
|
101
106
|
rdoc_options: []
|
102
107
|
require_paths:
|
103
108
|
- lib
|
@@ -113,7 +118,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
118
|
version: '0'
|
114
119
|
requirements: []
|
115
120
|
rubygems_version: 3.1.2
|
116
|
-
signing_key:
|
121
|
+
signing_key:
|
117
122
|
specification_version: 4
|
118
123
|
summary: Generate text with declarative recursive grammars
|
119
124
|
test_files: []
|
data/.travis.yml
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
module Calyx
|
2
|
-
module Production
|
3
|
-
# A type of production rule representing a single template substitution.
|
4
|
-
class Expression
|
5
|
-
# Constructs a node representing a single template substitution.
|
6
|
-
#
|
7
|
-
# @param [#evaluate] production
|
8
|
-
# @param [Array] methods
|
9
|
-
# @param [Calyx::Registry] registry
|
10
|
-
def initialize(production, methods, registry)
|
11
|
-
@production = production
|
12
|
-
@methods = methods.map { |m| m.to_sym }
|
13
|
-
@registry = registry
|
14
|
-
end
|
15
|
-
|
16
|
-
# Evaluate the expression by expanding the non-terminal to produce a
|
17
|
-
# terminal string, then passing it through the given modifier chain and
|
18
|
-
# returning the transformed result.
|
19
|
-
#
|
20
|
-
# @param [Calyx::Options] options
|
21
|
-
# @return [Array]
|
22
|
-
def evaluate(options)
|
23
|
-
terminal = @production.evaluate(options).flatten.reject { |o| o.is_a?(Symbol) }.join
|
24
|
-
expression = @methods.reduce(terminal) do |value, method|
|
25
|
-
@registry.transform(method, value)
|
26
|
-
end
|
27
|
-
|
28
|
-
[:expression, expression]
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|