fop_lang 0.1.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +66 -18
- data/lib/fop/compiler.rb +72 -0
- data/lib/fop/nodes.rb +15 -53
- data/lib/fop/parser.rb +138 -69
- data/lib/fop/program.rb +6 -12
- data/lib/fop/tokenizer.rb +162 -21
- data/lib/fop/tokens.rb +13 -0
- data/lib/fop/version.rb +1 -1
- data/lib/fop_lang.rb +12 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1166e1e43fd54ed2263db8a37ff288431a6152543869d961f007a134483f1a4b
|
4
|
+
data.tar.gz: 17e55b17448c38a37afb6e24a5798c87de368a352bdb0c74df13ad865a7e3ad0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbf5d8c7f6c10ca395518cbd7bf0e9083b1f217b3523386a482f19e5cdf47a16aa7ffb50a6290f89f0ec80ba113900c499944fdf35b7e2e71a941d537483a7e1
|
7
|
+
data.tar.gz: bff3c613a575687d0d3223c5bd60bb1128b0ae78accf2e5101228c3ec14d61f46cdc798855af9d07692de0b25ba01737dbda97409d4e8f44881e9bbe6da9c523
|
data/README.md
CHANGED
@@ -1,44 +1,92 @@
|
|
1
1
|
# fop_lang
|
2
2
|
|
3
|
-
Fop is
|
3
|
+
Fop (Filter and OPerations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
|
4
4
|
|
5
|
-
|
5
|
+
```ruby
|
6
|
+
gem 'fop_lang'
|
7
|
+
```
|
8
|
+
|
9
|
+
## Release Number Example
|
10
|
+
|
11
|
+
This example takes in GitHub branch names, decides if they're release branches, and if so, increments the version number.
|
6
12
|
|
7
13
|
```ruby
|
8
|
-
f = Fop(
|
14
|
+
f = Fop('release-{N}.{N+1}.{N=0}')
|
9
15
|
|
10
|
-
puts f.apply(
|
11
|
-
=>
|
16
|
+
puts f.apply('release-5.99.1')
|
17
|
+
=> 'release-5.100.0'
|
12
18
|
|
13
|
-
puts f.apply(
|
19
|
+
puts f.apply('release-5')
|
14
20
|
=> nil
|
15
21
|
# doesn't match the pattern
|
16
22
|
```
|
17
23
|
|
24
|
+
## Anatomy of a Fop expression
|
25
|
+
|
26
|
+
`Text Literal {Operation}`
|
27
|
+
|
28
|
+
The above expression contains the only two parts of Fop (except for the wildcard and escape characters).
|
29
|
+
|
30
|
+
**Text Literals**
|
31
|
+
|
32
|
+
A text literal works how it sounds: the input must match it exactly. If it matches it passes through unchanged. The only exception is the `*` (wildcard) character, which matches 0 or more of anything. Wildcards can be used anywhere except inside `{...}` (operations).
|
33
|
+
|
34
|
+
If `\` (escape) is used before the special characters `*`, `{` or `}`, then that character is treated like a text literal. It's recommended to use single-quoted Ruby strings with Fop expressions that so you don't need to double-escape.
|
35
|
+
|
36
|
+
**Operations**
|
37
|
+
|
38
|
+
Operations are the interesting part of Fop, and are specified between `{` and `}`. An Operation can consist of one to three parts:
|
39
|
+
|
40
|
+
1. Matching class (required): Defines what characters the operation will match and operate on.
|
41
|
+
* `N` is the numeric class and will match one or more digits.
|
42
|
+
* `A` is the alpha class and will match one or more letters (lower or upper case).
|
43
|
+
* `W` is the word class and matches alphanumeric chars and underscores.
|
44
|
+
* `*` is the wildcard class and greedily matches everything after it.
|
45
|
+
* `/.../` matches on the supplied regex between the `/`'s. If you're regex contains a `/`, it must be escaped. Capture groups may be referenced in the operator argument as `$1`, `$2`, etc.
|
46
|
+
3. Operator (optional): What to do to the matching characters.
|
47
|
+
* `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
|
48
|
+
* `>` Append the following chars to the matching value.
|
49
|
+
* `<` Prepend the following chars to the matching value.
|
50
|
+
* `+` Perform addition on the matching number and the argument (`N` only).
|
51
|
+
* `-` Subtract the argument from the matching number (`N` only).
|
52
|
+
5. Operator argument (required for some operators): meaning varies by operator.
|
53
|
+
|
54
|
+
## More Examples
|
55
|
+
|
18
56
|
```ruby
|
19
|
-
f = Fop(
|
57
|
+
f = Fop('release-{N=5}.{N+1}.{N=0}')
|
58
|
+
|
59
|
+
puts f.apply('release-4.99.1')
|
60
|
+
=> 'release-5.100.0'
|
61
|
+
```
|
62
|
+
|
63
|
+
```ruby
|
64
|
+
f = Fop('rel{/(ease)?/}-{N=5}.{N+1}.{N=0}')
|
65
|
+
|
66
|
+
puts f.apply('release-4.99.1')
|
67
|
+
=> 'release-5.100.0'
|
20
68
|
|
21
|
-
puts f.apply(
|
22
|
-
=>
|
69
|
+
puts f.apply('rel-4.99.1')
|
70
|
+
=> 'rel-5.100.0'
|
23
71
|
```
|
24
72
|
|
25
73
|
```ruby
|
26
|
-
f = Fop(
|
74
|
+
f = Fop('release-*{N=5}.{N+100}.{N=0}')
|
27
75
|
|
28
|
-
puts f.apply(
|
29
|
-
=>
|
76
|
+
puts f.apply('release-foo-4.100.1')
|
77
|
+
=> 'release-foo-5.200.0'
|
30
78
|
```
|
31
79
|
|
32
80
|
```ruby
|
33
|
-
f = Fop(
|
81
|
+
f = Fop('release-{N=5}.{N+1}.{N=0}{*=}')
|
34
82
|
|
35
|
-
puts f.apply(
|
36
|
-
=>
|
83
|
+
puts f.apply('release-4.100.1.foo.bar')
|
84
|
+
=> 'release-5.101.0'
|
37
85
|
```
|
38
86
|
|
39
87
|
```ruby
|
40
|
-
f = Fop(
|
88
|
+
f = Fop('{W=version}-{N=5}.{N+1}.{N=0}')
|
41
89
|
|
42
|
-
puts f.apply(
|
43
|
-
=>
|
90
|
+
puts f.apply('release-4.100.1')
|
91
|
+
=> 'version-5.101.0'
|
44
92
|
```
|
data/lib/fop/compiler.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
|
3
|
+
module Fop
|
4
|
+
module Compiler
|
5
|
+
def self.compile(src)
|
6
|
+
parser = Parser.new(src)
|
7
|
+
nodes, errors = parser.parse
|
8
|
+
|
9
|
+
instructions = nodes.map { |node|
|
10
|
+
case node
|
11
|
+
when Nodes::Text, Nodes::Regex
|
12
|
+
Instructions.regex_match(node.regex)
|
13
|
+
when Nodes::Expression
|
14
|
+
Instructions::ExpressionMatch.new(node)
|
15
|
+
else
|
16
|
+
raise "Unknown node type #{node}"
|
17
|
+
end
|
18
|
+
}
|
19
|
+
|
20
|
+
return nil, errors if errors.any?
|
21
|
+
return instructions, nil
|
22
|
+
end
|
23
|
+
|
24
|
+
module Instructions
|
25
|
+
BLANK = "".freeze
|
26
|
+
OPERATIONS = {
|
27
|
+
"=" => ->(_val, arg) { arg || BLANK },
|
28
|
+
"+" => ->(val, arg) { val.to_i + arg.to_i },
|
29
|
+
"-" => ->(val, arg) { val.to_i - arg.to_i },
|
30
|
+
">" => ->(val, arg) { val + arg },
|
31
|
+
"<" => ->(val, arg) { arg + val },
|
32
|
+
}
|
33
|
+
|
34
|
+
def self.regex_match(regex)
|
35
|
+
->(input) { input.slice! regex }
|
36
|
+
end
|
37
|
+
|
38
|
+
class ExpressionMatch
|
39
|
+
def initialize(node)
|
40
|
+
@regex = node.regex&.regex
|
41
|
+
@op = node.operator ? OPERATIONS.fetch(node.operator) : nil
|
42
|
+
@regex_match = node.regex_match
|
43
|
+
if node.arg&.any? { |a| a.is_a? Integer }
|
44
|
+
@arg, @arg_with_caps = nil, node.arg
|
45
|
+
else
|
46
|
+
@arg = node.arg&.join("")
|
47
|
+
@arg_with_caps = nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def call(input)
|
52
|
+
if (match = @regex.match(input))
|
53
|
+
val = match.to_s
|
54
|
+
blank = val == BLANK
|
55
|
+
input.sub!(val, BLANK) unless blank
|
56
|
+
found_val = @regex_match || !blank
|
57
|
+
arg = @arg_with_caps ? sub_caps(@arg_with_caps, match.captures) : @arg
|
58
|
+
@op && found_val ? @op.call(val, arg) : val
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def sub_caps(args, caps)
|
65
|
+
args.map { |a|
|
66
|
+
a.is_a?(Integer) ? caps[a].to_s : a
|
67
|
+
}.join("")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/fop/nodes.rb
CHANGED
@@ -1,68 +1,30 @@
|
|
1
1
|
module Fop
|
2
2
|
module Nodes
|
3
|
-
Text = Struct.new(:wildcard, :str) do
|
4
|
-
def consume!(input)
|
5
|
-
@regex ||= Regexp.new((wildcard ? ".*" : "^") + Regexp.escape(str))
|
6
|
-
input.slice!(@regex)
|
7
|
-
end
|
8
|
-
|
3
|
+
Text = Struct.new(:wildcard, :str, :regex) do
|
9
4
|
def to_s
|
10
5
|
w = wildcard ? "*" : nil
|
11
|
-
"
|
6
|
+
"[#{w}txt] #{str}"
|
12
7
|
end
|
13
8
|
end
|
14
9
|
|
15
|
-
|
16
|
-
NUM = "N".freeze
|
17
|
-
WORD = "W".freeze
|
18
|
-
WILD = "*".freeze
|
19
|
-
BLANK = "".freeze
|
20
|
-
|
21
|
-
def consume!(input)
|
22
|
-
if (val = input.slice!(@regex))
|
23
|
-
@expression && val != BLANK ? @expression.call(val) : val
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
10
|
+
Regex = Struct.new(:wildcard, :src, :regex) do
|
27
11
|
def to_s
|
28
12
|
w = wildcard ? "*" : nil
|
29
|
-
|
13
|
+
"[#{w}reg] #{src}"
|
30
14
|
end
|
15
|
+
end
|
31
16
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
when WILD then /.*/
|
42
|
-
else raise ParserError, "Unknown match type '#{@match}'"
|
43
|
-
end
|
44
|
-
|
45
|
-
if (op = tokens.shift)
|
46
|
-
raise ParserError, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
|
47
|
-
arg = tokens.reduce("") { |acc, t|
|
48
|
-
raise ParserError, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
|
49
|
-
acc + t.char
|
50
|
-
}
|
51
|
-
|
52
|
-
@op = op.char
|
53
|
-
@arg = arg == BLANK ? nil : arg
|
54
|
-
@expression =
|
55
|
-
case @op
|
56
|
-
when "=" then ->(_) { @arg || BLANK }
|
57
|
-
when "+", "-", "*", "/"
|
58
|
-
raise ParserError, "Operator #{@op} is only available for numeric matches" unless @match == NUM
|
59
|
-
raise ParserError, "Operator #{@op} expects an argument" if @arg.nil?
|
60
|
-
->(x) { x.to_i.send(@op, @arg.to_i) }
|
61
|
-
else raise ParserError, "Unknown operator #{@op}"
|
62
|
-
end
|
63
|
-
else
|
64
|
-
@op, @arg, @expression = nil, nil, nil
|
17
|
+
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :arg) do
|
18
|
+
def to_s
|
19
|
+
w = wildcard ? "*" : nil
|
20
|
+
s = "[#{w}exp] #{match}"
|
21
|
+
if operator
|
22
|
+
arg_str = arg
|
23
|
+
.map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
|
24
|
+
.join("")
|
25
|
+
s << " #{operator} #{arg_str}"
|
65
26
|
end
|
27
|
+
s
|
66
28
|
end
|
67
29
|
end
|
68
30
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -1,93 +1,162 @@
|
|
1
|
+
require_relative 'tokenizer'
|
1
2
|
require_relative 'nodes'
|
2
3
|
|
3
4
|
module Fop
|
4
|
-
|
5
|
-
|
5
|
+
class Parser
|
6
|
+
DIGIT = /^[0-9]$/
|
7
|
+
REGEX_START = "^".freeze
|
8
|
+
REGEX_LAZY_WILDCARD = ".*?".freeze
|
9
|
+
REGEX_MATCHES = {
|
10
|
+
"N" => "[0-9]+".freeze,
|
11
|
+
"W" => "\\w+".freeze,
|
12
|
+
"A" => "[a-zA-Z]+".freeze,
|
13
|
+
"*" => ".*".freeze,
|
14
|
+
}.freeze
|
15
|
+
OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
16
|
+
TR_REGEX = /.*/
|
6
17
|
|
7
|
-
|
8
|
-
|
9
|
-
|
18
|
+
Error = Struct.new(:type, :token, :message) do
|
19
|
+
def to_s
|
20
|
+
"#{type.to_s.capitalize} error: #{message} at column #{token.pos}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :errors
|
10
25
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
26
|
+
def initialize(src, debug: false)
|
27
|
+
@tokenizer = Tokenizer.new(src)
|
28
|
+
@errors = []
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse
|
32
|
+
nodes = []
|
33
|
+
wildcard = false
|
34
|
+
eof = false
|
35
|
+
# Top-level parsing. It will always be looking for a String, Regex, or Expression.
|
36
|
+
until eof
|
37
|
+
@tokenizer.reset_escapes!
|
38
|
+
t = @tokenizer.next
|
39
|
+
case t.type
|
40
|
+
when Tokens::WILDCARD
|
41
|
+
errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
|
42
|
+
wildcard = true
|
43
|
+
when Tokens::TEXT
|
44
|
+
reg = build_regex!(wildcard, t, Regexp.escape(t.val))
|
45
|
+
nodes << Nodes::Text.new(wildcard, t.val, reg)
|
46
|
+
wildcard = false
|
47
|
+
when Tokens::EXP_OPEN
|
48
|
+
nodes << parse_exp!(wildcard)
|
49
|
+
wildcard = false
|
50
|
+
when Tokens::REG_DELIM
|
51
|
+
nodes << parse_regex!(wildcard)
|
52
|
+
wildcard = false
|
53
|
+
when Tokens::EOF
|
54
|
+
eof = true
|
22
55
|
else
|
23
|
-
|
56
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}")
|
24
57
|
end
|
25
|
-
}
|
26
|
-
|
27
|
-
case current_el
|
28
|
-
when nil
|
29
|
-
# noop
|
30
|
-
when :wildcard
|
31
|
-
stack << Nodes::Text.new(true, "")
|
32
|
-
when Nodes::Text
|
33
|
-
stack << current_el
|
34
|
-
when Nodes::Match
|
35
|
-
raise Error, "Unclosed match"
|
36
58
|
end
|
37
|
-
|
38
|
-
|
59
|
+
nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
|
60
|
+
return nodes, @errors
|
39
61
|
end
|
40
62
|
|
41
|
-
|
63
|
+
def parse_exp!(wildcard = false)
|
64
|
+
exp = Nodes::Expression.new(wildcard)
|
65
|
+
parse_exp_match! exp
|
66
|
+
op_token = parse_exp_operator! exp
|
67
|
+
if exp.operator
|
68
|
+
parse_exp_arg! exp, op_token
|
69
|
+
end
|
70
|
+
return exp
|
71
|
+
end
|
42
72
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
when
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
73
|
+
def parse_exp_match!(exp)
|
74
|
+
@tokenizer.escape.operators = false
|
75
|
+
t = @tokenizer.next
|
76
|
+
case t.type
|
77
|
+
when Tokens::TEXT, Tokens::WILDCARD
|
78
|
+
exp.match = t.val
|
79
|
+
if (src = REGEX_MATCHES[exp.match])
|
80
|
+
reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
81
|
+
exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
|
82
|
+
else
|
83
|
+
errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
|
84
|
+
end
|
85
|
+
when Tokens::REG_DELIM
|
86
|
+
exp.regex = parse_regex!(exp.wildcard)
|
87
|
+
exp.match = exp.regex&.src
|
88
|
+
exp.regex_match = true
|
89
|
+
@tokenizer.reset_escapes!
|
53
90
|
else
|
54
|
-
|
91
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
|
55
92
|
end
|
56
93
|
end
|
57
94
|
|
58
|
-
def
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
text_el.str << token.char
|
67
|
-
text_el
|
68
|
-
when :wildcard
|
69
|
-
stack << text_el
|
70
|
-
:wildcard
|
95
|
+
def parse_exp_operator!(exp)
|
96
|
+
@tokenizer.escape.operators = false
|
97
|
+
t = @tokenizer.next
|
98
|
+
case t.type
|
99
|
+
when Tokens::EXP_CLOSE
|
100
|
+
# no op
|
101
|
+
when Tokens::OPERATOR
|
102
|
+
exp.operator = t.val
|
71
103
|
else
|
72
|
-
|
104
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
|
73
105
|
end
|
106
|
+
t
|
74
107
|
end
|
75
108
|
|
76
|
-
def
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
109
|
+
def parse_exp_arg!(exp, op_token)
|
110
|
+
@tokenizer.escape.operators = true
|
111
|
+
@tokenizer.escape.regex = true
|
112
|
+
@tokenizer.escape.regex_capture = false if exp.regex_match
|
113
|
+
|
114
|
+
exp.arg = []
|
115
|
+
found_close, eof = false, false
|
116
|
+
until found_close or eof
|
117
|
+
t = @tokenizer.next
|
118
|
+
case t.type
|
119
|
+
when Tokens::TEXT
|
120
|
+
exp.arg << t.val
|
121
|
+
when Tokens::REG_CAPTURE
|
122
|
+
exp.arg << t.val.to_i - 1
|
123
|
+
errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
|
124
|
+
errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
|
125
|
+
when Tokens::EXP_CLOSE
|
126
|
+
found_close = true
|
127
|
+
when Tokens::EOF
|
128
|
+
eof = true
|
129
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
130
|
+
else
|
131
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
136
|
+
errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def parse_regex!(wildcard)
|
141
|
+
@tokenizer.regex_mode!
|
142
|
+
t = @tokenizer.next
|
143
|
+
reg = Nodes::Regex.new(wildcard, t.val)
|
144
|
+
if t.type == Tokens::TEXT
|
145
|
+
reg.regex = build_regex!(wildcard, t)
|
88
146
|
else
|
89
|
-
|
147
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
|
90
148
|
end
|
149
|
+
|
150
|
+
t = @tokenizer.next
|
151
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
|
152
|
+
reg
|
153
|
+
end
|
154
|
+
|
155
|
+
def build_regex!(wildcard, token, src = token.val)
|
156
|
+
Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
157
|
+
rescue RegexpError => e
|
158
|
+
errors << Error.new(:regex, token, e.message)
|
159
|
+
nil
|
91
160
|
end
|
92
161
|
end
|
93
162
|
end
|
data/lib/fop/program.rb
CHANGED
@@ -1,22 +1,16 @@
|
|
1
|
-
require_relative 'tokenizer'
|
2
|
-
require_relative 'parser'
|
3
|
-
|
4
1
|
module Fop
|
5
2
|
class Program
|
6
|
-
|
7
|
-
|
8
|
-
def initialize(src)
|
9
|
-
tokens = Tokenizer.tokenize! src
|
10
|
-
@nodes = Parser.parse! tokens
|
3
|
+
def initialize(instructions)
|
4
|
+
@instructions = instructions
|
11
5
|
end
|
12
6
|
|
13
7
|
def apply(input)
|
14
8
|
input = input.clone
|
15
9
|
output =
|
16
|
-
@
|
17
|
-
|
18
|
-
return nil if
|
19
|
-
acc +
|
10
|
+
@instructions.reduce("") { |acc, ins|
|
11
|
+
result = ins.call(input)
|
12
|
+
return nil if result.nil?
|
13
|
+
acc + result.to_s
|
20
14
|
}
|
21
15
|
input.empty? ? output : nil
|
22
16
|
end
|
data/lib/fop/tokenizer.rb
CHANGED
@@ -1,34 +1,175 @@
|
|
1
|
+
require_relative 'tokens'
|
2
|
+
|
1
3
|
module Fop
|
2
|
-
|
3
|
-
|
4
|
-
Error =
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
class Tokenizer
|
5
|
+
Token = Struct.new(:pos, :type, :val)
|
6
|
+
Error = Struct.new(:pos, :message)
|
7
|
+
Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
8
|
+
|
9
|
+
EXP_OPEN = "{".freeze
|
10
|
+
EXP_CLOSE = "}".freeze
|
11
|
+
ESCAPE = "\\".freeze
|
12
|
+
WILDCARD = "*".freeze
|
13
|
+
REGEX_DELIM = "/".freeze
|
14
|
+
REGEX_CAPTURE = "$".freeze
|
15
|
+
OP_REPLACE = "=".freeze
|
16
|
+
OP_APPEND = ">".freeze
|
17
|
+
OP_PREPEND = "<".freeze
|
18
|
+
OP_ADD = "+".freeze
|
19
|
+
OP_SUB = "-".freeze
|
20
|
+
|
21
|
+
#
|
22
|
+
# Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
|
23
|
+
# explicit string delimiters. That *could* be worked around by requiring users to escape all reserved chars,
|
24
|
+
# but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on
|
25
|
+
# or off to auto-escape certain chars for the next token.
|
26
|
+
#
|
27
|
+
attr_reader :escape
|
28
|
+
|
29
|
+
def initialize(src)
|
30
|
+
@src = src
|
31
|
+
@end = src.size - 1
|
32
|
+
@start_i = 0
|
33
|
+
@i = 0
|
34
|
+
reset_escapes!
|
35
|
+
end
|
36
|
+
|
37
|
+
# Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
|
38
|
+
def reset_escapes!
|
39
|
+
@escape = Escapes.new(true, true)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Auto-escape anything you'd find in a regular expression
|
43
|
+
def regex_mode!
|
44
|
+
@escape.regex = false # look for the final /
|
45
|
+
@escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
|
46
|
+
@escape.wildcards = true
|
47
|
+
@escape.operators = true
|
48
|
+
@escape.regex_capture = true
|
49
|
+
@escape.exp = true
|
50
|
+
end
|
51
|
+
|
52
|
+
def next
|
53
|
+
return Token.new(@i, Tokens::EOF) if @i > @end
|
54
|
+
char = @src[@i]
|
55
|
+
case char
|
56
|
+
when EXP_OPEN
|
57
|
+
@i += 1
|
58
|
+
token! Tokens::EXP_OPEN
|
59
|
+
when EXP_CLOSE
|
60
|
+
@i += 1
|
61
|
+
token! Tokens::EXP_CLOSE
|
62
|
+
when WILDCARD
|
63
|
+
@i += 1
|
64
|
+
token! Tokens::WILDCARD, WILDCARD
|
65
|
+
when REGEX_DELIM
|
66
|
+
if @escape.regex
|
67
|
+
get_str!
|
68
|
+
else
|
69
|
+
@i += 1
|
70
|
+
token! Tokens::REG_DELIM
|
71
|
+
end
|
72
|
+
when REGEX_CAPTURE
|
73
|
+
if @escape.regex_capture
|
74
|
+
get_str!
|
75
|
+
else
|
76
|
+
@i += 1
|
77
|
+
t = token! Tokens::REG_CAPTURE, @src[@i]
|
78
|
+
@i += 1
|
79
|
+
@start_i = @i
|
80
|
+
t
|
81
|
+
end
|
82
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
83
|
+
if @escape.operators
|
84
|
+
get_str!
|
85
|
+
else
|
86
|
+
@i += 1
|
87
|
+
token! Tokens::OPERATOR, char
|
88
|
+
end
|
89
|
+
else
|
90
|
+
get_str!
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def token!(type, val = nil)
|
97
|
+
t = Token.new(@start_i, type, val)
|
98
|
+
@start_i = @i
|
99
|
+
t
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_str!
|
103
|
+
str = ""
|
104
|
+
escape, found_end = false, false
|
105
|
+
until found_end or @i > @end
|
106
|
+
char = @src[@i]
|
107
|
+
|
10
108
|
if escape
|
11
|
-
|
109
|
+
@i += 1
|
110
|
+
str << char
|
12
111
|
escape = false
|
13
112
|
next
|
14
113
|
end
|
15
114
|
|
16
115
|
case char
|
17
|
-
when
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
116
|
+
when ESCAPE
|
117
|
+
@i += 1
|
118
|
+
if @escape.regex_escape and @src[@i] != REGEX_DELIM
|
119
|
+
str << char
|
120
|
+
else
|
121
|
+
escape = true
|
122
|
+
end
|
123
|
+
when EXP_OPEN
|
124
|
+
if @escape.exp
|
125
|
+
@i += 1
|
126
|
+
str << char
|
127
|
+
else
|
128
|
+
found_end = true
|
129
|
+
end
|
130
|
+
when EXP_CLOSE
|
131
|
+
if @escape.exp
|
132
|
+
@i += 1
|
133
|
+
str << char
|
134
|
+
else
|
135
|
+
found_end = true
|
136
|
+
end
|
137
|
+
when WILDCARD
|
138
|
+
if @escape.wildcards
|
139
|
+
@i += 1
|
140
|
+
str << char
|
141
|
+
else
|
142
|
+
found_end = true
|
143
|
+
end
|
144
|
+
when REGEX_DELIM
|
145
|
+
if @escape.regex
|
146
|
+
@i += 1
|
147
|
+
str << char
|
148
|
+
else
|
149
|
+
found_end = true
|
150
|
+
end
|
151
|
+
when REGEX_CAPTURE
|
152
|
+
if @escape.regex_capture
|
153
|
+
@i += 1
|
154
|
+
str << char
|
155
|
+
else
|
156
|
+
found_end = true
|
157
|
+
end
|
158
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
159
|
+
if @escape.operators
|
160
|
+
@i += 1
|
161
|
+
str << char
|
162
|
+
else
|
163
|
+
found_end = true
|
164
|
+
end
|
25
165
|
else
|
26
|
-
|
166
|
+
@i += 1
|
167
|
+
str << char
|
27
168
|
end
|
28
|
-
|
169
|
+
end
|
29
170
|
|
30
|
-
|
31
|
-
|
171
|
+
return Token.new(@i - 1, Tokens::TR_ESC) if escape
|
172
|
+
token! Tokens::TEXT, str
|
32
173
|
end
|
33
174
|
end
|
34
175
|
end
|
data/lib/fop/tokens.rb
ADDED
data/lib/fop/version.rb
CHANGED
data/lib/fop_lang.rb
CHANGED
@@ -1,12 +1,22 @@
|
|
1
1
|
require_relative 'fop/version'
|
2
|
+
require_relative 'fop/compiler'
|
2
3
|
require_relative 'fop/program'
|
3
4
|
|
4
5
|
def Fop(src)
|
5
|
-
::Fop
|
6
|
+
::Fop.compile!(src)
|
6
7
|
end
|
7
8
|
|
8
9
|
module Fop
|
10
|
+
def self.compile!(src)
|
11
|
+
prog, errors = compile(src)
|
12
|
+
# TODO better exception
|
13
|
+
raise "Fop errors: " + errors.map(&:message).join(",") if errors
|
14
|
+
prog
|
15
|
+
end
|
16
|
+
|
9
17
|
def self.compile(src)
|
10
|
-
|
18
|
+
instructions, errors = ::Fop::Compiler.compile(src)
|
19
|
+
return nil, errors if errors
|
20
|
+
return Program.new(instructions), nil
|
11
21
|
end
|
12
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|
@@ -17,10 +17,12 @@ extensions: []
|
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
19
|
- README.md
|
20
|
+
- lib/fop/compiler.rb
|
20
21
|
- lib/fop/nodes.rb
|
21
22
|
- lib/fop/parser.rb
|
22
23
|
- lib/fop/program.rb
|
23
24
|
- lib/fop/tokenizer.rb
|
25
|
+
- lib/fop/tokens.rb
|
24
26
|
- lib/fop/version.rb
|
25
27
|
- lib/fop_lang.rb
|
26
28
|
homepage: https://jhollinger.github.io/fop-lang-rb/
|