fop_lang 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/fop/compiler.rb +72 -0
- data/lib/fop/nodes.rb +15 -31
- data/lib/fop/parser.rb +130 -149
- data/lib/fop/program.rb +6 -12
- data/lib/fop/tokenizer.rb +136 -105
- data/lib/fop/tokens.rb +13 -0
- data/lib/fop/version.rb +1 -1
- data/lib/fop_lang.rb +12 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1166e1e43fd54ed2263db8a37ff288431a6152543869d961f007a134483f1a4b
|
4
|
+
data.tar.gz: 17e55b17448c38a37afb6e24a5798c87de368a352bdb0c74df13ad865a7e3ad0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbf5d8c7f6c10ca395518cbd7bf0e9083b1f217b3523386a482f19e5cdf47a16aa7ffb50a6290f89f0ec80ba113900c499944fdf35b7e2e71a941d537483a7e1
|
7
|
+
data.tar.gz: bff3c613a575687d0d3223c5bd60bb1128b0ae78accf2e5101228c3ec14d61f46cdc798855af9d07692de0b25ba01737dbda97409d4e8f44881e9bbe6da9c523
|
data/lib/fop/compiler.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
|
3
|
+
module Fop
|
4
|
+
module Compiler
|
5
|
+
def self.compile(src)
|
6
|
+
parser = Parser.new(src)
|
7
|
+
nodes, errors = parser.parse
|
8
|
+
|
9
|
+
instructions = nodes.map { |node|
|
10
|
+
case node
|
11
|
+
when Nodes::Text, Nodes::Regex
|
12
|
+
Instructions.regex_match(node.regex)
|
13
|
+
when Nodes::Expression
|
14
|
+
Instructions::ExpressionMatch.new(node)
|
15
|
+
else
|
16
|
+
raise "Unknown node type #{node}"
|
17
|
+
end
|
18
|
+
}
|
19
|
+
|
20
|
+
return nil, errors if errors.any?
|
21
|
+
return instructions, nil
|
22
|
+
end
|
23
|
+
|
24
|
+
module Instructions
|
25
|
+
BLANK = "".freeze
|
26
|
+
OPERATIONS = {
|
27
|
+
"=" => ->(_val, arg) { arg || BLANK },
|
28
|
+
"+" => ->(val, arg) { val.to_i + arg.to_i },
|
29
|
+
"-" => ->(val, arg) { val.to_i - arg.to_i },
|
30
|
+
">" => ->(val, arg) { val + arg },
|
31
|
+
"<" => ->(val, arg) { arg + val },
|
32
|
+
}
|
33
|
+
|
34
|
+
def self.regex_match(regex)
|
35
|
+
->(input) { input.slice! regex }
|
36
|
+
end
|
37
|
+
|
38
|
+
class ExpressionMatch
|
39
|
+
def initialize(node)
|
40
|
+
@regex = node.regex&.regex
|
41
|
+
@op = node.operator ? OPERATIONS.fetch(node.operator) : nil
|
42
|
+
@regex_match = node.regex_match
|
43
|
+
if node.arg&.any? { |a| a.is_a? Integer }
|
44
|
+
@arg, @arg_with_caps = nil, node.arg
|
45
|
+
else
|
46
|
+
@arg = node.arg&.join("")
|
47
|
+
@arg_with_caps = nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def call(input)
|
52
|
+
if (match = @regex.match(input))
|
53
|
+
val = match.to_s
|
54
|
+
blank = val == BLANK
|
55
|
+
input.sub!(val, BLANK) unless blank
|
56
|
+
found_val = @regex_match || !blank
|
57
|
+
arg = @arg_with_caps ? sub_caps(@arg_with_caps, match.captures) : @arg
|
58
|
+
@op && found_val ? @op.call(val, arg) : val
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def sub_caps(args, caps)
|
65
|
+
args.map { |a|
|
66
|
+
a.is_a?(Integer) ? caps[a].to_s : a
|
67
|
+
}.join("")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/fop/nodes.rb
CHANGED
@@ -1,47 +1,31 @@
|
|
1
1
|
module Fop
|
2
2
|
module Nodes
|
3
|
-
Text = Struct.new(:wildcard, :str) do
|
4
|
-
def consume!(input)
|
5
|
-
@regex ||= Regexp.new((wildcard ? ".*" : "^") + Regexp.escape(str))
|
6
|
-
input.slice!(@regex)
|
7
|
-
end
|
8
|
-
|
3
|
+
Text = Struct.new(:wildcard, :str, :regex) do
|
9
4
|
def to_s
|
10
5
|
w = wildcard ? "*" : nil
|
11
|
-
"
|
6
|
+
"[#{w}txt] #{str}"
|
12
7
|
end
|
13
8
|
end
|
14
9
|
|
15
|
-
|
16
|
-
def
|
17
|
-
|
18
|
-
|
19
|
-
blank = val == Parser::BLANK
|
20
|
-
input.sub!(val, Parser::BLANK) unless blank
|
21
|
-
found_val = regex_match || !blank
|
22
|
-
arg = operator_arg_w_caps ? sub_caps(operator_arg_w_caps, match.captures) : operator_arg
|
23
|
-
expression && found_val ? expression.call(val, operator, arg) : val
|
24
|
-
end
|
10
|
+
Regex = Struct.new(:wildcard, :src, :regex) do
|
11
|
+
def to_s
|
12
|
+
w = wildcard ? "*" : nil
|
13
|
+
"[#{w}reg] #{src}"
|
25
14
|
end
|
15
|
+
end
|
26
16
|
|
17
|
+
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :arg) do
|
27
18
|
def to_s
|
28
19
|
w = wildcard ? "*" : nil
|
29
|
-
s = "#{w}#{match}"
|
30
|
-
|
20
|
+
s = "[#{w}exp] #{match}"
|
21
|
+
if operator
|
22
|
+
arg_str = arg
|
23
|
+
.map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
|
24
|
+
.join("")
|
25
|
+
s << " #{operator} #{arg_str}"
|
26
|
+
end
|
31
27
|
s
|
32
28
|
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def sub_caps(tokens, caps)
|
37
|
-
tokens.map { |t|
|
38
|
-
case t
|
39
|
-
when String then t
|
40
|
-
when Parser::CaptureGroup then caps[t.index].to_s
|
41
|
-
else raise Parser::Error, "Unexpected #{t} in capture group"
|
42
|
-
end
|
43
|
-
}.join("")
|
44
|
-
end
|
45
29
|
end
|
46
30
|
end
|
47
31
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -1,181 +1,162 @@
|
|
1
|
+
require_relative 'tokenizer'
|
1
2
|
require_relative 'nodes'
|
2
3
|
|
3
4
|
module Fop
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
class Parser
|
6
|
+
DIGIT = /^[0-9]$/
|
7
|
+
REGEX_START = "^".freeze
|
8
|
+
REGEX_LAZY_WILDCARD = ".*?".freeze
|
9
|
+
REGEX_MATCHES = {
|
10
|
+
"N" => "[0-9]+".freeze,
|
11
|
+
"W" => "\\w+".freeze,
|
12
|
+
"A" => "[a-zA-Z]+".freeze,
|
13
|
+
"*" => ".*".freeze,
|
14
|
+
}.freeze
|
15
|
+
OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
16
|
+
TR_REGEX = /.*/
|
17
|
+
|
18
|
+
Error = Struct.new(:type, :token, :message) do
|
19
|
+
def to_s
|
20
|
+
"#{type.to_s.capitalize} error: #{message} at column #{token.pos}"
|
21
|
+
end
|
22
|
+
end
|
7
23
|
|
8
|
-
|
9
|
-
MATCH_WORD = "W".freeze
|
10
|
-
MATCH_ALPHA = "A".freeze
|
11
|
-
MATCH_WILD = "*".freeze
|
12
|
-
BLANK = "".freeze
|
13
|
-
OP_REPLACE = "=".freeze
|
14
|
-
OP_APPEND = ">".freeze
|
15
|
-
OP_PREPEND = "<".freeze
|
16
|
-
OP_ADD = "+".freeze
|
17
|
-
OP_SUB = "-".freeze
|
18
|
-
OP_MUL = "*".freeze
|
19
|
-
OP_DIV = "/".freeze
|
20
|
-
VAR = "$".freeze
|
21
|
-
CAP_NUM = /^[1-9]$/
|
24
|
+
attr_reader :errors
|
22
25
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
26
|
+
def initialize(src, debug: false)
|
27
|
+
@tokenizer = Tokenizer.new(src)
|
28
|
+
@errors = []
|
29
|
+
end
|
27
30
|
|
28
|
-
def
|
31
|
+
def parse
|
29
32
|
nodes = []
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
33
|
+
wildcard = false
|
34
|
+
eof = false
|
35
|
+
# Top-level parsing. It will always be looking for a String, Regex, or Expression.
|
36
|
+
until eof
|
37
|
+
@tokenizer.reset_escapes!
|
38
|
+
t = @tokenizer.next
|
39
|
+
case t.type
|
40
|
+
when Tokens::WILDCARD
|
41
|
+
errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
|
42
|
+
wildcard = true
|
43
|
+
when Tokens::TEXT
|
44
|
+
reg = build_regex!(wildcard, t, Regexp.escape(t.val))
|
45
|
+
nodes << Nodes::Text.new(wildcard, t.val, reg)
|
46
|
+
wildcard = false
|
47
|
+
when Tokens::EXP_OPEN
|
48
|
+
nodes << parse_exp!(wildcard)
|
49
|
+
wildcard = false
|
50
|
+
when Tokens::REG_DELIM
|
51
|
+
nodes << parse_regex!(wildcard)
|
52
|
+
wildcard = false
|
53
|
+
when Tokens::EOF
|
54
|
+
eof = true
|
45
55
|
else
|
46
|
-
|
56
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}")
|
47
57
|
end
|
48
|
-
}
|
49
|
-
|
50
|
-
case curr_node
|
51
|
-
when nil
|
52
|
-
# noop
|
53
|
-
when :wildcard
|
54
|
-
nodes << Nodes::Text.new(true, "")
|
55
|
-
when Nodes::Text, Nodes::Op
|
56
|
-
nodes << curr_node
|
57
|
-
else
|
58
|
-
raise Error, "Unexpected end node #{curr_node}"
|
59
58
|
end
|
60
|
-
|
61
|
-
nodes
|
59
|
+
nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
|
60
|
+
return nodes, @errors
|
62
61
|
end
|
63
62
|
|
64
|
-
|
63
|
+
def parse_exp!(wildcard = false)
|
64
|
+
exp = Nodes::Expression.new(wildcard)
|
65
|
+
parse_exp_match! exp
|
66
|
+
op_token = parse_exp_operator! exp
|
67
|
+
if exp.operator
|
68
|
+
parse_exp_arg! exp, op_token
|
69
|
+
end
|
70
|
+
return exp
|
71
|
+
end
|
65
72
|
|
66
|
-
def
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
when
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
def parse_exp_match!(exp)
|
74
|
+
@tokenizer.escape.operators = false
|
75
|
+
t = @tokenizer.next
|
76
|
+
case t.type
|
77
|
+
when Tokens::TEXT, Tokens::WILDCARD
|
78
|
+
exp.match = t.val
|
79
|
+
if (src = REGEX_MATCHES[exp.match])
|
80
|
+
reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
81
|
+
exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
|
82
|
+
else
|
83
|
+
errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
|
84
|
+
end
|
85
|
+
when Tokens::REG_DELIM
|
86
|
+
exp.regex = parse_regex!(exp.wildcard)
|
87
|
+
exp.match = exp.regex&.src
|
88
|
+
exp.regex_match = true
|
89
|
+
@tokenizer.reset_escapes!
|
76
90
|
else
|
77
|
-
|
91
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
|
78
92
|
end
|
79
93
|
end
|
80
94
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
case
|
85
|
-
when
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
op = new_node token
|
90
|
-
return op, node
|
91
|
-
when :wildcard
|
92
|
-
return :wildcard, node
|
95
|
+
def parse_exp_operator!(exp)
|
96
|
+
@tokenizer.escape.operators = false
|
97
|
+
t = @tokenizer.next
|
98
|
+
case t.type
|
99
|
+
when Tokens::EXP_CLOSE
|
100
|
+
# no op
|
101
|
+
when Tokens::OPERATOR
|
102
|
+
exp.operator = t.val
|
93
103
|
else
|
94
|
-
|
104
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
|
95
105
|
end
|
106
|
+
t
|
96
107
|
end
|
97
108
|
|
98
|
-
def
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
when
|
117
|
-
|
109
|
+
def parse_exp_arg!(exp, op_token)
|
110
|
+
@tokenizer.escape.operators = true
|
111
|
+
@tokenizer.escape.regex = true
|
112
|
+
@tokenizer.escape.regex_capture = false if exp.regex_match
|
113
|
+
|
114
|
+
exp.arg = []
|
115
|
+
found_close, eof = false, false
|
116
|
+
until found_close or eof
|
117
|
+
t = @tokenizer.next
|
118
|
+
case t.type
|
119
|
+
when Tokens::TEXT
|
120
|
+
exp.arg << t.val
|
121
|
+
when Tokens::REG_CAPTURE
|
122
|
+
exp.arg << t.val.to_i - 1
|
123
|
+
errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
|
124
|
+
errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
|
125
|
+
when Tokens::EXP_CLOSE
|
126
|
+
found_close = true
|
127
|
+
when Tokens::EOF
|
128
|
+
eof = true
|
129
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
118
130
|
else
|
119
|
-
|
131
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
120
132
|
end
|
133
|
+
end
|
121
134
|
|
122
|
-
|
123
|
-
|
124
|
-
raise Error, "Unexpected #{token.operator} for operator" unless token.operator.is_a? Tokenizer::Char
|
125
|
-
node.operator = token.operator.char
|
126
|
-
node.operator_arg = token.arg if token.arg and token.arg != BLANK
|
127
|
-
node.operator_arg_w_caps = parse_captures! node.operator_arg if node.operator_arg and node.regex_match
|
128
|
-
node.expression =
|
129
|
-
case node.operator
|
130
|
-
when OP_REPLACE
|
131
|
-
EXP_REPLACE
|
132
|
-
when OP_ADD, OP_SUB, OP_MUL, OP_DIV
|
133
|
-
raise Error, "Operator #{node.operator} is only available for numeric matches" unless node.match == MATCH_NUM
|
134
|
-
raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
|
135
|
-
EXP_MATH
|
136
|
-
when OP_APPEND
|
137
|
-
raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
|
138
|
-
EXP_APPEND
|
139
|
-
when OP_PREPEND
|
140
|
-
raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
|
141
|
-
EXP_PREPEND
|
142
|
-
else
|
143
|
-
raise Error, "Unknown operator #{node.operator}"
|
144
|
-
end
|
135
|
+
if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
136
|
+
errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
|
145
137
|
end
|
146
138
|
end
|
147
139
|
|
148
|
-
def
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
i += 1
|
157
|
-
|
158
|
-
if escape
|
159
|
-
nodes << char
|
160
|
-
escape = false
|
161
|
-
next
|
162
|
-
end
|
163
|
-
|
164
|
-
case char
|
165
|
-
when Tokenizer::ESCAPE
|
166
|
-
escape = true
|
167
|
-
when VAR
|
168
|
-
num = arg[i].to_s
|
169
|
-
raise Error, "Capture group number must be between 1 and 9; found '#{num}'" unless num =~ CAP_NUM
|
170
|
-
nodes << CaptureGroup.new(num.to_i - 1)
|
171
|
-
i += 1
|
172
|
-
else
|
173
|
-
nodes << char
|
174
|
-
end
|
140
|
+
def parse_regex!(wildcard)
|
141
|
+
@tokenizer.regex_mode!
|
142
|
+
t = @tokenizer.next
|
143
|
+
reg = Nodes::Regex.new(wildcard, t.val)
|
144
|
+
if t.type == Tokens::TEXT
|
145
|
+
reg.regex = build_regex!(wildcard, t)
|
146
|
+
else
|
147
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
|
175
148
|
end
|
176
149
|
|
177
|
-
|
178
|
-
|
150
|
+
t = @tokenizer.next
|
151
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
|
152
|
+
reg
|
153
|
+
end
|
154
|
+
|
155
|
+
def build_regex!(wildcard, token, src = token.val)
|
156
|
+
Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
157
|
+
rescue RegexpError => e
|
158
|
+
errors << Error.new(:regex, token, e.message)
|
159
|
+
nil
|
179
160
|
end
|
180
161
|
end
|
181
162
|
end
|
data/lib/fop/program.rb
CHANGED
@@ -1,22 +1,16 @@
|
|
1
|
-
require_relative 'tokenizer'
|
2
|
-
require_relative 'parser'
|
3
|
-
|
4
1
|
module Fop
|
5
2
|
class Program
|
6
|
-
|
7
|
-
|
8
|
-
def initialize(src)
|
9
|
-
tokens = Tokenizer.new(src).tokenize!
|
10
|
-
@nodes = Parser.parse! tokens
|
3
|
+
def initialize(instructions)
|
4
|
+
@instructions = instructions
|
11
5
|
end
|
12
6
|
|
13
7
|
def apply(input)
|
14
8
|
input = input.clone
|
15
9
|
output =
|
16
|
-
@
|
17
|
-
|
18
|
-
return nil if
|
19
|
-
acc +
|
10
|
+
@instructions.reduce("") { |acc, ins|
|
11
|
+
result = ins.call(input)
|
12
|
+
return nil if result.nil?
|
13
|
+
acc + result.to_s
|
20
14
|
}
|
21
15
|
input.empty? ? output : nil
|
22
16
|
end
|
data/lib/fop/tokenizer.rb
CHANGED
@@ -1,144 +1,175 @@
|
|
1
|
+
require_relative 'tokens'
|
2
|
+
|
1
3
|
module Fop
|
2
4
|
class Tokenizer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
Error = Class.new(StandardError)
|
5
|
+
Token = Struct.new(:pos, :type, :val)
|
6
|
+
Error = Struct.new(:pos, :message)
|
7
|
+
Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
7
8
|
|
8
|
-
|
9
|
-
|
9
|
+
EXP_OPEN = "{".freeze
|
10
|
+
EXP_CLOSE = "}".freeze
|
10
11
|
ESCAPE = "\\".freeze
|
11
12
|
WILDCARD = "*".freeze
|
12
|
-
|
13
|
+
REGEX_DELIM = "/".freeze
|
14
|
+
REGEX_CAPTURE = "$".freeze
|
15
|
+
OP_REPLACE = "=".freeze
|
16
|
+
OP_APPEND = ">".freeze
|
17
|
+
OP_PREPEND = "<".freeze
|
18
|
+
OP_ADD = "+".freeze
|
19
|
+
OP_SUB = "-".freeze
|
20
|
+
|
21
|
+
#
|
22
|
+
# Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
|
23
|
+
# explicit string delimiters. That *could* be worked around by requiring users to escape all reserved chars,
|
24
|
+
# but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on
|
25
|
+
# or off to auto-escape certain chars for the next token.
|
26
|
+
#
|
27
|
+
attr_reader :escape
|
13
28
|
|
14
29
|
def initialize(src)
|
15
30
|
@src = src
|
16
31
|
@end = src.size - 1
|
32
|
+
@start_i = 0
|
33
|
+
@i = 0
|
34
|
+
reset_escapes!
|
17
35
|
end
|
18
36
|
|
19
|
-
|
20
|
-
|
21
|
-
escape =
|
22
|
-
i = 0
|
23
|
-
until i > @end do
|
24
|
-
char = @src[i]
|
25
|
-
i += 1
|
26
|
-
|
27
|
-
if escape
|
28
|
-
tokens << Char.new(char)
|
29
|
-
escape = false
|
30
|
-
next
|
31
|
-
end
|
32
|
-
|
33
|
-
case char
|
34
|
-
when ESCAPE
|
35
|
-
escape = true
|
36
|
-
when OP_OPEN
|
37
|
-
i, op = operation! i
|
38
|
-
tokens << op
|
39
|
-
when OP_CLOSE
|
40
|
-
raise "Unexpected #{OP_CLOSE}"
|
41
|
-
when WILDCARD
|
42
|
-
tokens << :wildcard
|
43
|
-
else
|
44
|
-
tokens << Char.new(char)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
raise Error, "Trailing escape" if escape
|
49
|
-
tokens
|
37
|
+
# Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
|
38
|
+
def reset_escapes!
|
39
|
+
@escape = Escapes.new(true, true)
|
50
40
|
end
|
51
41
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
42
|
+
# Auto-escape anything you'd find in a regular expression
|
43
|
+
def regex_mode!
|
44
|
+
@escape.regex = false # look for the final /
|
45
|
+
@escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
|
46
|
+
@escape.wildcards = true
|
47
|
+
@escape.operators = true
|
48
|
+
@escape.regex_capture = true
|
49
|
+
@escape.exp = true
|
50
|
+
end
|
57
51
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
52
|
+
def next
|
53
|
+
return Token.new(@i, Tokens::EOF) if @i > @end
|
54
|
+
char = @src[@i]
|
55
|
+
case char
|
56
|
+
when EXP_OPEN
|
57
|
+
@i += 1
|
58
|
+
token! Tokens::EXP_OPEN
|
59
|
+
when EXP_CLOSE
|
60
|
+
@i += 1
|
61
|
+
token! Tokens::EXP_CLOSE
|
62
|
+
when WILDCARD
|
63
|
+
@i += 1
|
64
|
+
token! Tokens::WILDCARD, WILDCARD
|
65
|
+
when REGEX_DELIM
|
66
|
+
if @escape.regex
|
67
|
+
get_str!
|
68
68
|
else
|
69
|
-
|
69
|
+
@i += 1
|
70
|
+
token! Tokens::REG_DELIM
|
70
71
|
end
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
until found_close or op.operator or i > @end do
|
75
|
-
char = @src[i]
|
76
|
-
i += 1
|
77
|
-
case char
|
78
|
-
when OP_CLOSE
|
79
|
-
found_close = true
|
72
|
+
when REGEX_CAPTURE
|
73
|
+
if @escape.regex_capture
|
74
|
+
get_str!
|
80
75
|
else
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
escape = false
|
87
|
-
until found_close or i > @end do
|
88
|
-
char = @src[i]
|
89
|
-
i += 1
|
90
|
-
|
91
|
-
if escape
|
92
|
-
op.arg << char
|
93
|
-
escape = false
|
94
|
-
next
|
76
|
+
@i += 1
|
77
|
+
t = token! Tokens::REG_CAPTURE, @src[@i]
|
78
|
+
@i += 1
|
79
|
+
@start_i = @i
|
80
|
+
t
|
95
81
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
escape = true
|
100
|
-
when OP_OPEN
|
101
|
-
raise "Unexpected #{OP_OPEN}"
|
102
|
-
when OP_CLOSE
|
103
|
-
found_close = true
|
82
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
83
|
+
if @escape.operators
|
84
|
+
get_str!
|
104
85
|
else
|
105
|
-
|
86
|
+
@i += 1
|
87
|
+
token! Tokens::OPERATOR, char
|
106
88
|
end
|
89
|
+
else
|
90
|
+
get_str!
|
107
91
|
end
|
108
|
-
|
109
|
-
raise Error, "Unclosed operation" if !found_close
|
110
|
-
raise Error, "Trailing escape" if escape
|
111
|
-
return i, op
|
112
92
|
end
|
113
93
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
94
|
+
private
|
95
|
+
|
96
|
+
def token!(type, val = nil)
|
97
|
+
t = Token.new(@start_i, type, val)
|
98
|
+
@start_i = @i
|
99
|
+
t
|
100
|
+
end
|
118
101
|
|
119
|
-
|
120
|
-
|
121
|
-
|
102
|
+
def get_str!
|
103
|
+
str = ""
|
104
|
+
escape, found_end = false, false
|
105
|
+
until found_end or @i > @end
|
106
|
+
char = @src[@i]
|
122
107
|
|
123
108
|
if escape
|
124
|
-
|
109
|
+
@i += 1
|
110
|
+
str << char
|
125
111
|
escape = false
|
126
112
|
next
|
127
113
|
end
|
128
114
|
|
129
115
|
case char
|
130
116
|
when ESCAPE
|
131
|
-
|
132
|
-
|
133
|
-
|
117
|
+
@i += 1
|
118
|
+
if @escape.regex_escape and @src[@i] != REGEX_DELIM
|
119
|
+
str << char
|
120
|
+
else
|
121
|
+
escape = true
|
122
|
+
end
|
123
|
+
when EXP_OPEN
|
124
|
+
if @escape.exp
|
125
|
+
@i += 1
|
126
|
+
str << char
|
127
|
+
else
|
128
|
+
found_end = true
|
129
|
+
end
|
130
|
+
when EXP_CLOSE
|
131
|
+
if @escape.exp
|
132
|
+
@i += 1
|
133
|
+
str << char
|
134
|
+
else
|
135
|
+
found_end = true
|
136
|
+
end
|
137
|
+
when WILDCARD
|
138
|
+
if @escape.wildcards
|
139
|
+
@i += 1
|
140
|
+
str << char
|
141
|
+
else
|
142
|
+
found_end = true
|
143
|
+
end
|
144
|
+
when REGEX_DELIM
|
145
|
+
if @escape.regex
|
146
|
+
@i += 1
|
147
|
+
str << char
|
148
|
+
else
|
149
|
+
found_end = true
|
150
|
+
end
|
151
|
+
when REGEX_CAPTURE
|
152
|
+
if @escape.regex_capture
|
153
|
+
@i += 1
|
154
|
+
str << char
|
155
|
+
else
|
156
|
+
found_end = true
|
157
|
+
end
|
158
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
159
|
+
if @escape.operators
|
160
|
+
@i += 1
|
161
|
+
str << char
|
162
|
+
else
|
163
|
+
found_end = true
|
164
|
+
end
|
134
165
|
else
|
135
|
-
|
166
|
+
@i += 1
|
167
|
+
str << char
|
136
168
|
end
|
137
169
|
end
|
138
170
|
|
139
|
-
|
140
|
-
|
141
|
-
return i, Regex.new(src)
|
171
|
+
return Token.new(@i - 1, Tokens::TR_ESC) if escape
|
172
|
+
token! Tokens::TEXT, str
|
142
173
|
end
|
143
174
|
end
|
144
175
|
end
|
data/lib/fop/tokens.rb
ADDED
data/lib/fop/version.rb
CHANGED
data/lib/fop_lang.rb
CHANGED
@@ -1,12 +1,22 @@
|
|
1
1
|
require_relative 'fop/version'
|
2
|
+
require_relative 'fop/compiler'
|
2
3
|
require_relative 'fop/program'
|
3
4
|
|
4
5
|
def Fop(src)
|
5
|
-
::Fop
|
6
|
+
::Fop.compile!(src)
|
6
7
|
end
|
7
8
|
|
8
9
|
module Fop
|
10
|
+
def self.compile!(src)
|
11
|
+
prog, errors = compile(src)
|
12
|
+
# TODO better exception
|
13
|
+
raise "Fop errors: " + errors.map(&:message).join(",") if errors
|
14
|
+
prog
|
15
|
+
end
|
16
|
+
|
9
17
|
def self.compile(src)
|
10
|
-
|
18
|
+
instructions, errors = ::Fop::Compiler.compile(src)
|
19
|
+
return nil, errors if errors
|
20
|
+
return Program.new(instructions), nil
|
11
21
|
end
|
12
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|
@@ -17,10 +17,12 @@ extensions: []
|
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
19
|
- README.md
|
20
|
+
- lib/fop/compiler.rb
|
20
21
|
- lib/fop/nodes.rb
|
21
22
|
- lib/fop/parser.rb
|
22
23
|
- lib/fop/program.rb
|
23
24
|
- lib/fop/tokenizer.rb
|
25
|
+
- lib/fop/tokens.rb
|
24
26
|
- lib/fop/version.rb
|
25
27
|
- lib/fop_lang.rb
|
26
28
|
homepage: https://jhollinger.github.io/fop-lang-rb/
|