fop_lang 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/fop/compiler.rb +72 -0
- data/lib/fop/nodes.rb +15 -31
- data/lib/fop/parser.rb +130 -149
- data/lib/fop/program.rb +6 -12
- data/lib/fop/tokenizer.rb +136 -105
- data/lib/fop/tokens.rb +13 -0
- data/lib/fop/version.rb +1 -1
- data/lib/fop_lang.rb +12 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1166e1e43fd54ed2263db8a37ff288431a6152543869d961f007a134483f1a4b
|
4
|
+
data.tar.gz: 17e55b17448c38a37afb6e24a5798c87de368a352bdb0c74df13ad865a7e3ad0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbf5d8c7f6c10ca395518cbd7bf0e9083b1f217b3523386a482f19e5cdf47a16aa7ffb50a6290f89f0ec80ba113900c499944fdf35b7e2e71a941d537483a7e1
|
7
|
+
data.tar.gz: bff3c613a575687d0d3223c5bd60bb1128b0ae78accf2e5101228c3ec14d61f46cdc798855af9d07692de0b25ba01737dbda97409d4e8f44881e9bbe6da9c523
|
data/lib/fop/compiler.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
|
3
|
+
module Fop
|
4
|
+
module Compiler
|
5
|
+
def self.compile(src)
|
6
|
+
parser = Parser.new(src)
|
7
|
+
nodes, errors = parser.parse
|
8
|
+
|
9
|
+
instructions = nodes.map { |node|
|
10
|
+
case node
|
11
|
+
when Nodes::Text, Nodes::Regex
|
12
|
+
Instructions.regex_match(node.regex)
|
13
|
+
when Nodes::Expression
|
14
|
+
Instructions::ExpressionMatch.new(node)
|
15
|
+
else
|
16
|
+
raise "Unknown node type #{node}"
|
17
|
+
end
|
18
|
+
}
|
19
|
+
|
20
|
+
return nil, errors if errors.any?
|
21
|
+
return instructions, nil
|
22
|
+
end
|
23
|
+
|
24
|
+
module Instructions
|
25
|
+
BLANK = "".freeze
|
26
|
+
OPERATIONS = {
|
27
|
+
"=" => ->(_val, arg) { arg || BLANK },
|
28
|
+
"+" => ->(val, arg) { val.to_i + arg.to_i },
|
29
|
+
"-" => ->(val, arg) { val.to_i - arg.to_i },
|
30
|
+
">" => ->(val, arg) { val + arg },
|
31
|
+
"<" => ->(val, arg) { arg + val },
|
32
|
+
}
|
33
|
+
|
34
|
+
def self.regex_match(regex)
|
35
|
+
->(input) { input.slice! regex }
|
36
|
+
end
|
37
|
+
|
38
|
+
class ExpressionMatch
|
39
|
+
def initialize(node)
|
40
|
+
@regex = node.regex&.regex
|
41
|
+
@op = node.operator ? OPERATIONS.fetch(node.operator) : nil
|
42
|
+
@regex_match = node.regex_match
|
43
|
+
if node.arg&.any? { |a| a.is_a? Integer }
|
44
|
+
@arg, @arg_with_caps = nil, node.arg
|
45
|
+
else
|
46
|
+
@arg = node.arg&.join("")
|
47
|
+
@arg_with_caps = nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def call(input)
|
52
|
+
if (match = @regex.match(input))
|
53
|
+
val = match.to_s
|
54
|
+
blank = val == BLANK
|
55
|
+
input.sub!(val, BLANK) unless blank
|
56
|
+
found_val = @regex_match || !blank
|
57
|
+
arg = @arg_with_caps ? sub_caps(@arg_with_caps, match.captures) : @arg
|
58
|
+
@op && found_val ? @op.call(val, arg) : val
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def sub_caps(args, caps)
|
65
|
+
args.map { |a|
|
66
|
+
a.is_a?(Integer) ? caps[a].to_s : a
|
67
|
+
}.join("")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/fop/nodes.rb
CHANGED
@@ -1,47 +1,31 @@
|
|
1
1
|
module Fop
|
2
2
|
module Nodes
|
3
|
-
Text = Struct.new(:wildcard, :str) do
|
4
|
-
def consume!(input)
|
5
|
-
@regex ||= Regexp.new((wildcard ? ".*" : "^") + Regexp.escape(str))
|
6
|
-
input.slice!(@regex)
|
7
|
-
end
|
8
|
-
|
3
|
+
Text = Struct.new(:wildcard, :str, :regex) do
|
9
4
|
def to_s
|
10
5
|
w = wildcard ? "*" : nil
|
11
|
-
"
|
6
|
+
"[#{w}txt] #{str}"
|
12
7
|
end
|
13
8
|
end
|
14
9
|
|
15
|
-
|
16
|
-
def
|
17
|
-
|
18
|
-
|
19
|
-
blank = val == Parser::BLANK
|
20
|
-
input.sub!(val, Parser::BLANK) unless blank
|
21
|
-
found_val = regex_match || !blank
|
22
|
-
arg = operator_arg_w_caps ? sub_caps(operator_arg_w_caps, match.captures) : operator_arg
|
23
|
-
expression && found_val ? expression.call(val, operator, arg) : val
|
24
|
-
end
|
10
|
+
Regex = Struct.new(:wildcard, :src, :regex) do
|
11
|
+
def to_s
|
12
|
+
w = wildcard ? "*" : nil
|
13
|
+
"[#{w}reg] #{src}"
|
25
14
|
end
|
15
|
+
end
|
26
16
|
|
17
|
+
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :arg) do
|
27
18
|
def to_s
|
28
19
|
w = wildcard ? "*" : nil
|
29
|
-
s = "#{w}#{match}"
|
30
|
-
|
20
|
+
s = "[#{w}exp] #{match}"
|
21
|
+
if operator
|
22
|
+
arg_str = arg
|
23
|
+
.map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
|
24
|
+
.join("")
|
25
|
+
s << " #{operator} #{arg_str}"
|
26
|
+
end
|
31
27
|
s
|
32
28
|
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def sub_caps(tokens, caps)
|
37
|
-
tokens.map { |t|
|
38
|
-
case t
|
39
|
-
when String then t
|
40
|
-
when Parser::CaptureGroup then caps[t.index].to_s
|
41
|
-
else raise Parser::Error, "Unexpected #{t} in capture group"
|
42
|
-
end
|
43
|
-
}.join("")
|
44
|
-
end
|
45
29
|
end
|
46
30
|
end
|
47
31
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -1,181 +1,162 @@
|
|
1
|
+
require_relative 'tokenizer'
|
1
2
|
require_relative 'nodes'
|
2
3
|
|
3
4
|
module Fop
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
class Parser
|
6
|
+
DIGIT = /^[0-9]$/
|
7
|
+
REGEX_START = "^".freeze
|
8
|
+
REGEX_LAZY_WILDCARD = ".*?".freeze
|
9
|
+
REGEX_MATCHES = {
|
10
|
+
"N" => "[0-9]+".freeze,
|
11
|
+
"W" => "\\w+".freeze,
|
12
|
+
"A" => "[a-zA-Z]+".freeze,
|
13
|
+
"*" => ".*".freeze,
|
14
|
+
}.freeze
|
15
|
+
OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
16
|
+
TR_REGEX = /.*/
|
17
|
+
|
18
|
+
Error = Struct.new(:type, :token, :message) do
|
19
|
+
def to_s
|
20
|
+
"#{type.to_s.capitalize} error: #{message} at column #{token.pos}"
|
21
|
+
end
|
22
|
+
end
|
7
23
|
|
8
|
-
|
9
|
-
MATCH_WORD = "W".freeze
|
10
|
-
MATCH_ALPHA = "A".freeze
|
11
|
-
MATCH_WILD = "*".freeze
|
12
|
-
BLANK = "".freeze
|
13
|
-
OP_REPLACE = "=".freeze
|
14
|
-
OP_APPEND = ">".freeze
|
15
|
-
OP_PREPEND = "<".freeze
|
16
|
-
OP_ADD = "+".freeze
|
17
|
-
OP_SUB = "-".freeze
|
18
|
-
OP_MUL = "*".freeze
|
19
|
-
OP_DIV = "/".freeze
|
20
|
-
VAR = "$".freeze
|
21
|
-
CAP_NUM = /^[1-9]$/
|
24
|
+
attr_reader :errors
|
22
25
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
26
|
+
def initialize(src, debug: false)
|
27
|
+
@tokenizer = Tokenizer.new(src)
|
28
|
+
@errors = []
|
29
|
+
end
|
27
30
|
|
28
|
-
def
|
31
|
+
def parse
|
29
32
|
nodes = []
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
33
|
+
wildcard = false
|
34
|
+
eof = false
|
35
|
+
# Top-level parsing. It will always be looking for a String, Regex, or Expression.
|
36
|
+
until eof
|
37
|
+
@tokenizer.reset_escapes!
|
38
|
+
t = @tokenizer.next
|
39
|
+
case t.type
|
40
|
+
when Tokens::WILDCARD
|
41
|
+
errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
|
42
|
+
wildcard = true
|
43
|
+
when Tokens::TEXT
|
44
|
+
reg = build_regex!(wildcard, t, Regexp.escape(t.val))
|
45
|
+
nodes << Nodes::Text.new(wildcard, t.val, reg)
|
46
|
+
wildcard = false
|
47
|
+
when Tokens::EXP_OPEN
|
48
|
+
nodes << parse_exp!(wildcard)
|
49
|
+
wildcard = false
|
50
|
+
when Tokens::REG_DELIM
|
51
|
+
nodes << parse_regex!(wildcard)
|
52
|
+
wildcard = false
|
53
|
+
when Tokens::EOF
|
54
|
+
eof = true
|
45
55
|
else
|
46
|
-
|
56
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}")
|
47
57
|
end
|
48
|
-
}
|
49
|
-
|
50
|
-
case curr_node
|
51
|
-
when nil
|
52
|
-
# noop
|
53
|
-
when :wildcard
|
54
|
-
nodes << Nodes::Text.new(true, "")
|
55
|
-
when Nodes::Text, Nodes::Op
|
56
|
-
nodes << curr_node
|
57
|
-
else
|
58
|
-
raise Error, "Unexpected end node #{curr_node}"
|
59
58
|
end
|
60
|
-
|
61
|
-
nodes
|
59
|
+
nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
|
60
|
+
return nodes, @errors
|
62
61
|
end
|
63
62
|
|
64
|
-
|
63
|
+
def parse_exp!(wildcard = false)
|
64
|
+
exp = Nodes::Expression.new(wildcard)
|
65
|
+
parse_exp_match! exp
|
66
|
+
op_token = parse_exp_operator! exp
|
67
|
+
if exp.operator
|
68
|
+
parse_exp_arg! exp, op_token
|
69
|
+
end
|
70
|
+
return exp
|
71
|
+
end
|
65
72
|
|
66
|
-
def
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
when
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
def parse_exp_match!(exp)
|
74
|
+
@tokenizer.escape.operators = false
|
75
|
+
t = @tokenizer.next
|
76
|
+
case t.type
|
77
|
+
when Tokens::TEXT, Tokens::WILDCARD
|
78
|
+
exp.match = t.val
|
79
|
+
if (src = REGEX_MATCHES[exp.match])
|
80
|
+
reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
81
|
+
exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
|
82
|
+
else
|
83
|
+
errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
|
84
|
+
end
|
85
|
+
when Tokens::REG_DELIM
|
86
|
+
exp.regex = parse_regex!(exp.wildcard)
|
87
|
+
exp.match = exp.regex&.src
|
88
|
+
exp.regex_match = true
|
89
|
+
@tokenizer.reset_escapes!
|
76
90
|
else
|
77
|
-
|
91
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
|
78
92
|
end
|
79
93
|
end
|
80
94
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
case
|
85
|
-
when
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
op = new_node token
|
90
|
-
return op, node
|
91
|
-
when :wildcard
|
92
|
-
return :wildcard, node
|
95
|
+
def parse_exp_operator!(exp)
|
96
|
+
@tokenizer.escape.operators = false
|
97
|
+
t = @tokenizer.next
|
98
|
+
case t.type
|
99
|
+
when Tokens::EXP_CLOSE
|
100
|
+
# no op
|
101
|
+
when Tokens::OPERATOR
|
102
|
+
exp.operator = t.val
|
93
103
|
else
|
94
|
-
|
104
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
|
95
105
|
end
|
106
|
+
t
|
96
107
|
end
|
97
108
|
|
98
|
-
def
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
when
|
117
|
-
|
109
|
+
def parse_exp_arg!(exp, op_token)
|
110
|
+
@tokenizer.escape.operators = true
|
111
|
+
@tokenizer.escape.regex = true
|
112
|
+
@tokenizer.escape.regex_capture = false if exp.regex_match
|
113
|
+
|
114
|
+
exp.arg = []
|
115
|
+
found_close, eof = false, false
|
116
|
+
until found_close or eof
|
117
|
+
t = @tokenizer.next
|
118
|
+
case t.type
|
119
|
+
when Tokens::TEXT
|
120
|
+
exp.arg << t.val
|
121
|
+
when Tokens::REG_CAPTURE
|
122
|
+
exp.arg << t.val.to_i - 1
|
123
|
+
errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
|
124
|
+
errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
|
125
|
+
when Tokens::EXP_CLOSE
|
126
|
+
found_close = true
|
127
|
+
when Tokens::EOF
|
128
|
+
eof = true
|
129
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
118
130
|
else
|
119
|
-
|
131
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
120
132
|
end
|
133
|
+
end
|
121
134
|
|
122
|
-
|
123
|
-
|
124
|
-
raise Error, "Unexpected #{token.operator} for operator" unless token.operator.is_a? Tokenizer::Char
|
125
|
-
node.operator = token.operator.char
|
126
|
-
node.operator_arg = token.arg if token.arg and token.arg != BLANK
|
127
|
-
node.operator_arg_w_caps = parse_captures! node.operator_arg if node.operator_arg and node.regex_match
|
128
|
-
node.expression =
|
129
|
-
case node.operator
|
130
|
-
when OP_REPLACE
|
131
|
-
EXP_REPLACE
|
132
|
-
when OP_ADD, OP_SUB, OP_MUL, OP_DIV
|
133
|
-
raise Error, "Operator #{node.operator} is only available for numeric matches" unless node.match == MATCH_NUM
|
134
|
-
raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
|
135
|
-
EXP_MATH
|
136
|
-
when OP_APPEND
|
137
|
-
raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
|
138
|
-
EXP_APPEND
|
139
|
-
when OP_PREPEND
|
140
|
-
raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
|
141
|
-
EXP_PREPEND
|
142
|
-
else
|
143
|
-
raise Error, "Unknown operator #{node.operator}"
|
144
|
-
end
|
135
|
+
if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
136
|
+
errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
|
145
137
|
end
|
146
138
|
end
|
147
139
|
|
148
|
-
def
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
i += 1
|
157
|
-
|
158
|
-
if escape
|
159
|
-
nodes << char
|
160
|
-
escape = false
|
161
|
-
next
|
162
|
-
end
|
163
|
-
|
164
|
-
case char
|
165
|
-
when Tokenizer::ESCAPE
|
166
|
-
escape = true
|
167
|
-
when VAR
|
168
|
-
num = arg[i].to_s
|
169
|
-
raise Error, "Capture group number must be between 1 and 9; found '#{num}'" unless num =~ CAP_NUM
|
170
|
-
nodes << CaptureGroup.new(num.to_i - 1)
|
171
|
-
i += 1
|
172
|
-
else
|
173
|
-
nodes << char
|
174
|
-
end
|
140
|
+
def parse_regex!(wildcard)
|
141
|
+
@tokenizer.regex_mode!
|
142
|
+
t = @tokenizer.next
|
143
|
+
reg = Nodes::Regex.new(wildcard, t.val)
|
144
|
+
if t.type == Tokens::TEXT
|
145
|
+
reg.regex = build_regex!(wildcard, t)
|
146
|
+
else
|
147
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
|
175
148
|
end
|
176
149
|
|
177
|
-
|
178
|
-
|
150
|
+
t = @tokenizer.next
|
151
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
|
152
|
+
reg
|
153
|
+
end
|
154
|
+
|
155
|
+
def build_regex!(wildcard, token, src = token.val)
|
156
|
+
Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
157
|
+
rescue RegexpError => e
|
158
|
+
errors << Error.new(:regex, token, e.message)
|
159
|
+
nil
|
179
160
|
end
|
180
161
|
end
|
181
162
|
end
|
data/lib/fop/program.rb
CHANGED
@@ -1,22 +1,16 @@
|
|
1
|
-
require_relative 'tokenizer'
|
2
|
-
require_relative 'parser'
|
3
|
-
|
4
1
|
module Fop
|
5
2
|
class Program
|
6
|
-
|
7
|
-
|
8
|
-
def initialize(src)
|
9
|
-
tokens = Tokenizer.new(src).tokenize!
|
10
|
-
@nodes = Parser.parse! tokens
|
3
|
+
def initialize(instructions)
|
4
|
+
@instructions = instructions
|
11
5
|
end
|
12
6
|
|
13
7
|
def apply(input)
|
14
8
|
input = input.clone
|
15
9
|
output =
|
16
|
-
@
|
17
|
-
|
18
|
-
return nil if
|
19
|
-
acc +
|
10
|
+
@instructions.reduce("") { |acc, ins|
|
11
|
+
result = ins.call(input)
|
12
|
+
return nil if result.nil?
|
13
|
+
acc + result.to_s
|
20
14
|
}
|
21
15
|
input.empty? ? output : nil
|
22
16
|
end
|
data/lib/fop/tokenizer.rb
CHANGED
@@ -1,144 +1,175 @@
|
|
1
|
+
require_relative 'tokens'
|
2
|
+
|
1
3
|
module Fop
|
2
4
|
class Tokenizer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
Error = Class.new(StandardError)
|
5
|
+
Token = Struct.new(:pos, :type, :val)
|
6
|
+
Error = Struct.new(:pos, :message)
|
7
|
+
Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
7
8
|
|
8
|
-
|
9
|
-
|
9
|
+
EXP_OPEN = "{".freeze
|
10
|
+
EXP_CLOSE = "}".freeze
|
10
11
|
ESCAPE = "\\".freeze
|
11
12
|
WILDCARD = "*".freeze
|
12
|
-
|
13
|
+
REGEX_DELIM = "/".freeze
|
14
|
+
REGEX_CAPTURE = "$".freeze
|
15
|
+
OP_REPLACE = "=".freeze
|
16
|
+
OP_APPEND = ">".freeze
|
17
|
+
OP_PREPEND = "<".freeze
|
18
|
+
OP_ADD = "+".freeze
|
19
|
+
OP_SUB = "-".freeze
|
20
|
+
|
21
|
+
#
|
22
|
+
# Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
|
23
|
+
# explicit string delimiters. That *could* be worked around by requiring users to escape all reserved chars,
|
24
|
+
# but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on
|
25
|
+
# or off to auto-escape certain chars for the next token.
|
26
|
+
#
|
27
|
+
attr_reader :escape
|
13
28
|
|
14
29
|
def initialize(src)
|
15
30
|
@src = src
|
16
31
|
@end = src.size - 1
|
32
|
+
@start_i = 0
|
33
|
+
@i = 0
|
34
|
+
reset_escapes!
|
17
35
|
end
|
18
36
|
|
19
|
-
|
20
|
-
|
21
|
-
escape =
|
22
|
-
i = 0
|
23
|
-
until i > @end do
|
24
|
-
char = @src[i]
|
25
|
-
i += 1
|
26
|
-
|
27
|
-
if escape
|
28
|
-
tokens << Char.new(char)
|
29
|
-
escape = false
|
30
|
-
next
|
31
|
-
end
|
32
|
-
|
33
|
-
case char
|
34
|
-
when ESCAPE
|
35
|
-
escape = true
|
36
|
-
when OP_OPEN
|
37
|
-
i, op = operation! i
|
38
|
-
tokens << op
|
39
|
-
when OP_CLOSE
|
40
|
-
raise "Unexpected #{OP_CLOSE}"
|
41
|
-
when WILDCARD
|
42
|
-
tokens << :wildcard
|
43
|
-
else
|
44
|
-
tokens << Char.new(char)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
raise Error, "Trailing escape" if escape
|
49
|
-
tokens
|
37
|
+
# Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
|
38
|
+
def reset_escapes!
|
39
|
+
@escape = Escapes.new(true, true)
|
50
40
|
end
|
51
41
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
42
|
+
# Auto-escape anything you'd find in a regular expression
|
43
|
+
def regex_mode!
|
44
|
+
@escape.regex = false # look for the final /
|
45
|
+
@escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
|
46
|
+
@escape.wildcards = true
|
47
|
+
@escape.operators = true
|
48
|
+
@escape.regex_capture = true
|
49
|
+
@escape.exp = true
|
50
|
+
end
|
57
51
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
52
|
+
def next
|
53
|
+
return Token.new(@i, Tokens::EOF) if @i > @end
|
54
|
+
char = @src[@i]
|
55
|
+
case char
|
56
|
+
when EXP_OPEN
|
57
|
+
@i += 1
|
58
|
+
token! Tokens::EXP_OPEN
|
59
|
+
when EXP_CLOSE
|
60
|
+
@i += 1
|
61
|
+
token! Tokens::EXP_CLOSE
|
62
|
+
when WILDCARD
|
63
|
+
@i += 1
|
64
|
+
token! Tokens::WILDCARD, WILDCARD
|
65
|
+
when REGEX_DELIM
|
66
|
+
if @escape.regex
|
67
|
+
get_str!
|
68
68
|
else
|
69
|
-
|
69
|
+
@i += 1
|
70
|
+
token! Tokens::REG_DELIM
|
70
71
|
end
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
until found_close or op.operator or i > @end do
|
75
|
-
char = @src[i]
|
76
|
-
i += 1
|
77
|
-
case char
|
78
|
-
when OP_CLOSE
|
79
|
-
found_close = true
|
72
|
+
when REGEX_CAPTURE
|
73
|
+
if @escape.regex_capture
|
74
|
+
get_str!
|
80
75
|
else
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
escape = false
|
87
|
-
until found_close or i > @end do
|
88
|
-
char = @src[i]
|
89
|
-
i += 1
|
90
|
-
|
91
|
-
if escape
|
92
|
-
op.arg << char
|
93
|
-
escape = false
|
94
|
-
next
|
76
|
+
@i += 1
|
77
|
+
t = token! Tokens::REG_CAPTURE, @src[@i]
|
78
|
+
@i += 1
|
79
|
+
@start_i = @i
|
80
|
+
t
|
95
81
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
escape = true
|
100
|
-
when OP_OPEN
|
101
|
-
raise "Unexpected #{OP_OPEN}"
|
102
|
-
when OP_CLOSE
|
103
|
-
found_close = true
|
82
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
83
|
+
if @escape.operators
|
84
|
+
get_str!
|
104
85
|
else
|
105
|
-
|
86
|
+
@i += 1
|
87
|
+
token! Tokens::OPERATOR, char
|
106
88
|
end
|
89
|
+
else
|
90
|
+
get_str!
|
107
91
|
end
|
108
|
-
|
109
|
-
raise Error, "Unclosed operation" if !found_close
|
110
|
-
raise Error, "Trailing escape" if escape
|
111
|
-
return i, op
|
112
92
|
end
|
113
93
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
94
|
+
private
|
95
|
+
|
96
|
+
def token!(type, val = nil)
|
97
|
+
t = Token.new(@start_i, type, val)
|
98
|
+
@start_i = @i
|
99
|
+
t
|
100
|
+
end
|
118
101
|
|
119
|
-
|
120
|
-
|
121
|
-
|
102
|
+
def get_str!
|
103
|
+
str = ""
|
104
|
+
escape, found_end = false, false
|
105
|
+
until found_end or @i > @end
|
106
|
+
char = @src[@i]
|
122
107
|
|
123
108
|
if escape
|
124
|
-
|
109
|
+
@i += 1
|
110
|
+
str << char
|
125
111
|
escape = false
|
126
112
|
next
|
127
113
|
end
|
128
114
|
|
129
115
|
case char
|
130
116
|
when ESCAPE
|
131
|
-
|
132
|
-
|
133
|
-
|
117
|
+
@i += 1
|
118
|
+
if @escape.regex_escape and @src[@i] != REGEX_DELIM
|
119
|
+
str << char
|
120
|
+
else
|
121
|
+
escape = true
|
122
|
+
end
|
123
|
+
when EXP_OPEN
|
124
|
+
if @escape.exp
|
125
|
+
@i += 1
|
126
|
+
str << char
|
127
|
+
else
|
128
|
+
found_end = true
|
129
|
+
end
|
130
|
+
when EXP_CLOSE
|
131
|
+
if @escape.exp
|
132
|
+
@i += 1
|
133
|
+
str << char
|
134
|
+
else
|
135
|
+
found_end = true
|
136
|
+
end
|
137
|
+
when WILDCARD
|
138
|
+
if @escape.wildcards
|
139
|
+
@i += 1
|
140
|
+
str << char
|
141
|
+
else
|
142
|
+
found_end = true
|
143
|
+
end
|
144
|
+
when REGEX_DELIM
|
145
|
+
if @escape.regex
|
146
|
+
@i += 1
|
147
|
+
str << char
|
148
|
+
else
|
149
|
+
found_end = true
|
150
|
+
end
|
151
|
+
when REGEX_CAPTURE
|
152
|
+
if @escape.regex_capture
|
153
|
+
@i += 1
|
154
|
+
str << char
|
155
|
+
else
|
156
|
+
found_end = true
|
157
|
+
end
|
158
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
159
|
+
if @escape.operators
|
160
|
+
@i += 1
|
161
|
+
str << char
|
162
|
+
else
|
163
|
+
found_end = true
|
164
|
+
end
|
134
165
|
else
|
135
|
-
|
166
|
+
@i += 1
|
167
|
+
str << char
|
136
168
|
end
|
137
169
|
end
|
138
170
|
|
139
|
-
|
140
|
-
|
141
|
-
return i, Regex.new(src)
|
171
|
+
return Token.new(@i - 1, Tokens::TR_ESC) if escape
|
172
|
+
token! Tokens::TEXT, str
|
142
173
|
end
|
143
174
|
end
|
144
175
|
end
|
data/lib/fop/tokens.rb
ADDED
data/lib/fop/version.rb
CHANGED
data/lib/fop_lang.rb
CHANGED
@@ -1,12 +1,22 @@
|
|
1
1
|
require_relative 'fop/version'
|
2
|
+
require_relative 'fop/compiler'
|
2
3
|
require_relative 'fop/program'
|
3
4
|
|
4
5
|
def Fop(src)
|
5
|
-
::Fop
|
6
|
+
::Fop.compile!(src)
|
6
7
|
end
|
7
8
|
|
8
9
|
module Fop
|
10
|
+
def self.compile!(src)
|
11
|
+
prog, errors = compile(src)
|
12
|
+
# TODO better exception
|
13
|
+
raise "Fop errors: " + errors.map(&:message).join(",") if errors
|
14
|
+
prog
|
15
|
+
end
|
16
|
+
|
9
17
|
def self.compile(src)
|
10
|
-
|
18
|
+
instructions, errors = ::Fop::Compiler.compile(src)
|
19
|
+
return nil, errors if errors
|
20
|
+
return Program.new(instructions), nil
|
11
21
|
end
|
12
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|
@@ -17,10 +17,12 @@ extensions: []
|
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
19
|
- README.md
|
20
|
+
- lib/fop/compiler.rb
|
20
21
|
- lib/fop/nodes.rb
|
21
22
|
- lib/fop/parser.rb
|
22
23
|
- lib/fop/program.rb
|
23
24
|
- lib/fop/tokenizer.rb
|
25
|
+
- lib/fop/tokens.rb
|
24
26
|
- lib/fop/version.rb
|
25
27
|
- lib/fop_lang.rb
|
26
28
|
homepage: https://jhollinger.github.io/fop-lang-rb/
|