fop_lang 0.1.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +66 -18
- data/lib/fop/compiler.rb +72 -0
- data/lib/fop/nodes.rb +15 -53
- data/lib/fop/parser.rb +138 -69
- data/lib/fop/program.rb +6 -12
- data/lib/fop/tokenizer.rb +162 -21
- data/lib/fop/tokens.rb +13 -0
- data/lib/fop/version.rb +1 -1
- data/lib/fop_lang.rb +12 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1166e1e43fd54ed2263db8a37ff288431a6152543869d961f007a134483f1a4b
|
4
|
+
data.tar.gz: 17e55b17448c38a37afb6e24a5798c87de368a352bdb0c74df13ad865a7e3ad0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbf5d8c7f6c10ca395518cbd7bf0e9083b1f217b3523386a482f19e5cdf47a16aa7ffb50a6290f89f0ec80ba113900c499944fdf35b7e2e71a941d537483a7e1
|
7
|
+
data.tar.gz: bff3c613a575687d0d3223c5bd60bb1128b0ae78accf2e5101228c3ec14d61f46cdc798855af9d07692de0b25ba01737dbda97409d4e8f44881e9bbe6da9c523
|
data/README.md
CHANGED
@@ -1,44 +1,92 @@
|
|
1
1
|
# fop_lang
|
2
2
|
|
3
|
-
Fop is
|
3
|
+
Fop (Filter and OPerations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
|
4
4
|
|
5
|
-
|
5
|
+
```ruby
|
6
|
+
gem 'fop_lang'
|
7
|
+
```
|
8
|
+
|
9
|
+
## Release Number Example
|
10
|
+
|
11
|
+
This example takes in GitHub branch names, decides if they're release branches, and if so, increments the version number.
|
6
12
|
|
7
13
|
```ruby
|
8
|
-
f = Fop(
|
14
|
+
f = Fop('release-{N}.{N+1}.{N=0}')
|
9
15
|
|
10
|
-
puts f.apply(
|
11
|
-
=>
|
16
|
+
puts f.apply('release-5.99.1')
|
17
|
+
=> 'release-5.100.0'
|
12
18
|
|
13
|
-
puts f.apply(
|
19
|
+
puts f.apply('release-5')
|
14
20
|
=> nil
|
15
21
|
# doesn't match the pattern
|
16
22
|
```
|
17
23
|
|
24
|
+
## Anatomy of a Fop expression
|
25
|
+
|
26
|
+
`Text Literal {Operation}`
|
27
|
+
|
28
|
+
The above expression contains the only two parts of Fop (except for the wildcard and escape characters).
|
29
|
+
|
30
|
+
**Text Literals**
|
31
|
+
|
32
|
+
A text literal works how it sounds: the input must match it exactly. If it matches it passes through unchanged. The only exception is the `*` (wildcard) character, which matches 0 or more of anything. Wildcards can be used anywhere except inside `{...}` (operations).
|
33
|
+
|
34
|
+
If `\` (escape) is used before the special characters `*`, `{` or `}`, then that character is treated like a text literal. It's recommended to use single-quoted Ruby strings with Fop expressions that so you don't need to double-escape.
|
35
|
+
|
36
|
+
**Operations**
|
37
|
+
|
38
|
+
Operations are the interesting part of Fop, and are specified between `{` and `}`. An Operation can consist of one to three parts:
|
39
|
+
|
40
|
+
1. Matching class (required): Defines what characters the operation will match and operate on.
|
41
|
+
* `N` is the numeric class and will match one or more digits.
|
42
|
+
* `A` is the alpha class and will match one or more letters (lower or upper case).
|
43
|
+
* `W` is the word class and matches alphanumeric chars and underscores.
|
44
|
+
* `*` is the wildcard class and greedily matches everything after it.
|
45
|
+
* `/.../` matches on the supplied regex between the `/`'s. If you're regex contains a `/`, it must be escaped. Capture groups may be referenced in the operator argument as `$1`, `$2`, etc.
|
46
|
+
3. Operator (optional): What to do to the matching characters.
|
47
|
+
* `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
|
48
|
+
* `>` Append the following chars to the matching value.
|
49
|
+
* `<` Prepend the following chars to the matching value.
|
50
|
+
* `+` Perform addition on the matching number and the argument (`N` only).
|
51
|
+
* `-` Subtract the argument from the matching number (`N` only).
|
52
|
+
5. Operator argument (required for some operators): meaning varies by operator.
|
53
|
+
|
54
|
+
## More Examples
|
55
|
+
|
18
56
|
```ruby
|
19
|
-
f = Fop(
|
57
|
+
f = Fop('release-{N=5}.{N+1}.{N=0}')
|
58
|
+
|
59
|
+
puts f.apply('release-4.99.1')
|
60
|
+
=> 'release-5.100.0'
|
61
|
+
```
|
62
|
+
|
63
|
+
```ruby
|
64
|
+
f = Fop('rel{/(ease)?/}-{N=5}.{N+1}.{N=0}')
|
65
|
+
|
66
|
+
puts f.apply('release-4.99.1')
|
67
|
+
=> 'release-5.100.0'
|
20
68
|
|
21
|
-
puts f.apply(
|
22
|
-
=>
|
69
|
+
puts f.apply('rel-4.99.1')
|
70
|
+
=> 'rel-5.100.0'
|
23
71
|
```
|
24
72
|
|
25
73
|
```ruby
|
26
|
-
f = Fop(
|
74
|
+
f = Fop('release-*{N=5}.{N+100}.{N=0}')
|
27
75
|
|
28
|
-
puts f.apply(
|
29
|
-
=>
|
76
|
+
puts f.apply('release-foo-4.100.1')
|
77
|
+
=> 'release-foo-5.200.0'
|
30
78
|
```
|
31
79
|
|
32
80
|
```ruby
|
33
|
-
f = Fop(
|
81
|
+
f = Fop('release-{N=5}.{N+1}.{N=0}{*=}')
|
34
82
|
|
35
|
-
puts f.apply(
|
36
|
-
=>
|
83
|
+
puts f.apply('release-4.100.1.foo.bar')
|
84
|
+
=> 'release-5.101.0'
|
37
85
|
```
|
38
86
|
|
39
87
|
```ruby
|
40
|
-
f = Fop(
|
88
|
+
f = Fop('{W=version}-{N=5}.{N+1}.{N=0}')
|
41
89
|
|
42
|
-
puts f.apply(
|
43
|
-
=>
|
90
|
+
puts f.apply('release-4.100.1')
|
91
|
+
=> 'version-5.101.0'
|
44
92
|
```
|
data/lib/fop/compiler.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
|
3
|
+
module Fop
|
4
|
+
module Compiler
|
5
|
+
def self.compile(src)
|
6
|
+
parser = Parser.new(src)
|
7
|
+
nodes, errors = parser.parse
|
8
|
+
|
9
|
+
instructions = nodes.map { |node|
|
10
|
+
case node
|
11
|
+
when Nodes::Text, Nodes::Regex
|
12
|
+
Instructions.regex_match(node.regex)
|
13
|
+
when Nodes::Expression
|
14
|
+
Instructions::ExpressionMatch.new(node)
|
15
|
+
else
|
16
|
+
raise "Unknown node type #{node}"
|
17
|
+
end
|
18
|
+
}
|
19
|
+
|
20
|
+
return nil, errors if errors.any?
|
21
|
+
return instructions, nil
|
22
|
+
end
|
23
|
+
|
24
|
+
module Instructions
|
25
|
+
BLANK = "".freeze
|
26
|
+
OPERATIONS = {
|
27
|
+
"=" => ->(_val, arg) { arg || BLANK },
|
28
|
+
"+" => ->(val, arg) { val.to_i + arg.to_i },
|
29
|
+
"-" => ->(val, arg) { val.to_i - arg.to_i },
|
30
|
+
">" => ->(val, arg) { val + arg },
|
31
|
+
"<" => ->(val, arg) { arg + val },
|
32
|
+
}
|
33
|
+
|
34
|
+
def self.regex_match(regex)
|
35
|
+
->(input) { input.slice! regex }
|
36
|
+
end
|
37
|
+
|
38
|
+
class ExpressionMatch
|
39
|
+
def initialize(node)
|
40
|
+
@regex = node.regex&.regex
|
41
|
+
@op = node.operator ? OPERATIONS.fetch(node.operator) : nil
|
42
|
+
@regex_match = node.regex_match
|
43
|
+
if node.arg&.any? { |a| a.is_a? Integer }
|
44
|
+
@arg, @arg_with_caps = nil, node.arg
|
45
|
+
else
|
46
|
+
@arg = node.arg&.join("")
|
47
|
+
@arg_with_caps = nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def call(input)
|
52
|
+
if (match = @regex.match(input))
|
53
|
+
val = match.to_s
|
54
|
+
blank = val == BLANK
|
55
|
+
input.sub!(val, BLANK) unless blank
|
56
|
+
found_val = @regex_match || !blank
|
57
|
+
arg = @arg_with_caps ? sub_caps(@arg_with_caps, match.captures) : @arg
|
58
|
+
@op && found_val ? @op.call(val, arg) : val
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def sub_caps(args, caps)
|
65
|
+
args.map { |a|
|
66
|
+
a.is_a?(Integer) ? caps[a].to_s : a
|
67
|
+
}.join("")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/fop/nodes.rb
CHANGED
@@ -1,68 +1,30 @@
|
|
1
1
|
module Fop
|
2
2
|
module Nodes
|
3
|
-
Text = Struct.new(:wildcard, :str) do
|
4
|
-
def consume!(input)
|
5
|
-
@regex ||= Regexp.new((wildcard ? ".*" : "^") + Regexp.escape(str))
|
6
|
-
input.slice!(@regex)
|
7
|
-
end
|
8
|
-
|
3
|
+
Text = Struct.new(:wildcard, :str, :regex) do
|
9
4
|
def to_s
|
10
5
|
w = wildcard ? "*" : nil
|
11
|
-
"
|
6
|
+
"[#{w}txt] #{str}"
|
12
7
|
end
|
13
8
|
end
|
14
9
|
|
15
|
-
|
16
|
-
NUM = "N".freeze
|
17
|
-
WORD = "W".freeze
|
18
|
-
WILD = "*".freeze
|
19
|
-
BLANK = "".freeze
|
20
|
-
|
21
|
-
def consume!(input)
|
22
|
-
if (val = input.slice!(@regex))
|
23
|
-
@expression && val != BLANK ? @expression.call(val) : val
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
10
|
+
Regex = Struct.new(:wildcard, :src, :regex) do
|
27
11
|
def to_s
|
28
12
|
w = wildcard ? "*" : nil
|
29
|
-
|
13
|
+
"[#{w}reg] #{src}"
|
30
14
|
end
|
15
|
+
end
|
31
16
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
when WILD then /.*/
|
42
|
-
else raise ParserError, "Unknown match type '#{@match}'"
|
43
|
-
end
|
44
|
-
|
45
|
-
if (op = tokens.shift)
|
46
|
-
raise ParserError, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
|
47
|
-
arg = tokens.reduce("") { |acc, t|
|
48
|
-
raise ParserError, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
|
49
|
-
acc + t.char
|
50
|
-
}
|
51
|
-
|
52
|
-
@op = op.char
|
53
|
-
@arg = arg == BLANK ? nil : arg
|
54
|
-
@expression =
|
55
|
-
case @op
|
56
|
-
when "=" then ->(_) { @arg || BLANK }
|
57
|
-
when "+", "-", "*", "/"
|
58
|
-
raise ParserError, "Operator #{@op} is only available for numeric matches" unless @match == NUM
|
59
|
-
raise ParserError, "Operator #{@op} expects an argument" if @arg.nil?
|
60
|
-
->(x) { x.to_i.send(@op, @arg.to_i) }
|
61
|
-
else raise ParserError, "Unknown operator #{@op}"
|
62
|
-
end
|
63
|
-
else
|
64
|
-
@op, @arg, @expression = nil, nil, nil
|
17
|
+
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :arg) do
|
18
|
+
def to_s
|
19
|
+
w = wildcard ? "*" : nil
|
20
|
+
s = "[#{w}exp] #{match}"
|
21
|
+
if operator
|
22
|
+
arg_str = arg
|
23
|
+
.map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
|
24
|
+
.join("")
|
25
|
+
s << " #{operator} #{arg_str}"
|
65
26
|
end
|
27
|
+
s
|
66
28
|
end
|
67
29
|
end
|
68
30
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -1,93 +1,162 @@
|
|
1
|
+
require_relative 'tokenizer'
|
1
2
|
require_relative 'nodes'
|
2
3
|
|
3
4
|
module Fop
|
4
|
-
|
5
|
-
|
5
|
+
class Parser
|
6
|
+
DIGIT = /^[0-9]$/
|
7
|
+
REGEX_START = "^".freeze
|
8
|
+
REGEX_LAZY_WILDCARD = ".*?".freeze
|
9
|
+
REGEX_MATCHES = {
|
10
|
+
"N" => "[0-9]+".freeze,
|
11
|
+
"W" => "\\w+".freeze,
|
12
|
+
"A" => "[a-zA-Z]+".freeze,
|
13
|
+
"*" => ".*".freeze,
|
14
|
+
}.freeze
|
15
|
+
OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
16
|
+
TR_REGEX = /.*/
|
6
17
|
|
7
|
-
|
8
|
-
|
9
|
-
|
18
|
+
Error = Struct.new(:type, :token, :message) do
|
19
|
+
def to_s
|
20
|
+
"#{type.to_s.capitalize} error: #{message} at column #{token.pos}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :errors
|
10
25
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
26
|
+
def initialize(src, debug: false)
|
27
|
+
@tokenizer = Tokenizer.new(src)
|
28
|
+
@errors = []
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse
|
32
|
+
nodes = []
|
33
|
+
wildcard = false
|
34
|
+
eof = false
|
35
|
+
# Top-level parsing. It will always be looking for a String, Regex, or Expression.
|
36
|
+
until eof
|
37
|
+
@tokenizer.reset_escapes!
|
38
|
+
t = @tokenizer.next
|
39
|
+
case t.type
|
40
|
+
when Tokens::WILDCARD
|
41
|
+
errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
|
42
|
+
wildcard = true
|
43
|
+
when Tokens::TEXT
|
44
|
+
reg = build_regex!(wildcard, t, Regexp.escape(t.val))
|
45
|
+
nodes << Nodes::Text.new(wildcard, t.val, reg)
|
46
|
+
wildcard = false
|
47
|
+
when Tokens::EXP_OPEN
|
48
|
+
nodes << parse_exp!(wildcard)
|
49
|
+
wildcard = false
|
50
|
+
when Tokens::REG_DELIM
|
51
|
+
nodes << parse_regex!(wildcard)
|
52
|
+
wildcard = false
|
53
|
+
when Tokens::EOF
|
54
|
+
eof = true
|
22
55
|
else
|
23
|
-
|
56
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}")
|
24
57
|
end
|
25
|
-
}
|
26
|
-
|
27
|
-
case current_el
|
28
|
-
when nil
|
29
|
-
# noop
|
30
|
-
when :wildcard
|
31
|
-
stack << Nodes::Text.new(true, "")
|
32
|
-
when Nodes::Text
|
33
|
-
stack << current_el
|
34
|
-
when Nodes::Match
|
35
|
-
raise Error, "Unclosed match"
|
36
58
|
end
|
37
|
-
|
38
|
-
|
59
|
+
nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
|
60
|
+
return nodes, @errors
|
39
61
|
end
|
40
62
|
|
41
|
-
|
63
|
+
def parse_exp!(wildcard = false)
|
64
|
+
exp = Nodes::Expression.new(wildcard)
|
65
|
+
parse_exp_match! exp
|
66
|
+
op_token = parse_exp_operator! exp
|
67
|
+
if exp.operator
|
68
|
+
parse_exp_arg! exp, op_token
|
69
|
+
end
|
70
|
+
return exp
|
71
|
+
end
|
42
72
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
when
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
73
|
+
def parse_exp_match!(exp)
|
74
|
+
@tokenizer.escape.operators = false
|
75
|
+
t = @tokenizer.next
|
76
|
+
case t.type
|
77
|
+
when Tokens::TEXT, Tokens::WILDCARD
|
78
|
+
exp.match = t.val
|
79
|
+
if (src = REGEX_MATCHES[exp.match])
|
80
|
+
reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
81
|
+
exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
|
82
|
+
else
|
83
|
+
errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
|
84
|
+
end
|
85
|
+
when Tokens::REG_DELIM
|
86
|
+
exp.regex = parse_regex!(exp.wildcard)
|
87
|
+
exp.match = exp.regex&.src
|
88
|
+
exp.regex_match = true
|
89
|
+
@tokenizer.reset_escapes!
|
53
90
|
else
|
54
|
-
|
91
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
|
55
92
|
end
|
56
93
|
end
|
57
94
|
|
58
|
-
def
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
text_el.str << token.char
|
67
|
-
text_el
|
68
|
-
when :wildcard
|
69
|
-
stack << text_el
|
70
|
-
:wildcard
|
95
|
+
def parse_exp_operator!(exp)
|
96
|
+
@tokenizer.escape.operators = false
|
97
|
+
t = @tokenizer.next
|
98
|
+
case t.type
|
99
|
+
when Tokens::EXP_CLOSE
|
100
|
+
# no op
|
101
|
+
when Tokens::OPERATOR
|
102
|
+
exp.operator = t.val
|
71
103
|
else
|
72
|
-
|
104
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
|
73
105
|
end
|
106
|
+
t
|
74
107
|
end
|
75
108
|
|
76
|
-
def
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
109
|
+
def parse_exp_arg!(exp, op_token)
|
110
|
+
@tokenizer.escape.operators = true
|
111
|
+
@tokenizer.escape.regex = true
|
112
|
+
@tokenizer.escape.regex_capture = false if exp.regex_match
|
113
|
+
|
114
|
+
exp.arg = []
|
115
|
+
found_close, eof = false, false
|
116
|
+
until found_close or eof
|
117
|
+
t = @tokenizer.next
|
118
|
+
case t.type
|
119
|
+
when Tokens::TEXT
|
120
|
+
exp.arg << t.val
|
121
|
+
when Tokens::REG_CAPTURE
|
122
|
+
exp.arg << t.val.to_i - 1
|
123
|
+
errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
|
124
|
+
errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
|
125
|
+
when Tokens::EXP_CLOSE
|
126
|
+
found_close = true
|
127
|
+
when Tokens::EOF
|
128
|
+
eof = true
|
129
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
130
|
+
else
|
131
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
136
|
+
errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def parse_regex!(wildcard)
|
141
|
+
@tokenizer.regex_mode!
|
142
|
+
t = @tokenizer.next
|
143
|
+
reg = Nodes::Regex.new(wildcard, t.val)
|
144
|
+
if t.type == Tokens::TEXT
|
145
|
+
reg.regex = build_regex!(wildcard, t)
|
88
146
|
else
|
89
|
-
|
147
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
|
90
148
|
end
|
149
|
+
|
150
|
+
t = @tokenizer.next
|
151
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
|
152
|
+
reg
|
153
|
+
end
|
154
|
+
|
155
|
+
def build_regex!(wildcard, token, src = token.val)
|
156
|
+
Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
157
|
+
rescue RegexpError => e
|
158
|
+
errors << Error.new(:regex, token, e.message)
|
159
|
+
nil
|
91
160
|
end
|
92
161
|
end
|
93
162
|
end
|
data/lib/fop/program.rb
CHANGED
@@ -1,22 +1,16 @@
|
|
1
|
-
require_relative 'tokenizer'
|
2
|
-
require_relative 'parser'
|
3
|
-
|
4
1
|
module Fop
|
5
2
|
class Program
|
6
|
-
|
7
|
-
|
8
|
-
def initialize(src)
|
9
|
-
tokens = Tokenizer.tokenize! src
|
10
|
-
@nodes = Parser.parse! tokens
|
3
|
+
def initialize(instructions)
|
4
|
+
@instructions = instructions
|
11
5
|
end
|
12
6
|
|
13
7
|
def apply(input)
|
14
8
|
input = input.clone
|
15
9
|
output =
|
16
|
-
@
|
17
|
-
|
18
|
-
return nil if
|
19
|
-
acc +
|
10
|
+
@instructions.reduce("") { |acc, ins|
|
11
|
+
result = ins.call(input)
|
12
|
+
return nil if result.nil?
|
13
|
+
acc + result.to_s
|
20
14
|
}
|
21
15
|
input.empty? ? output : nil
|
22
16
|
end
|
data/lib/fop/tokenizer.rb
CHANGED
@@ -1,34 +1,175 @@
|
|
1
|
+
require_relative 'tokens'
|
2
|
+
|
1
3
|
module Fop
|
2
|
-
|
3
|
-
|
4
|
-
Error =
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
class Tokenizer
|
5
|
+
Token = Struct.new(:pos, :type, :val)
|
6
|
+
Error = Struct.new(:pos, :message)
|
7
|
+
Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
8
|
+
|
9
|
+
EXP_OPEN = "{".freeze
|
10
|
+
EXP_CLOSE = "}".freeze
|
11
|
+
ESCAPE = "\\".freeze
|
12
|
+
WILDCARD = "*".freeze
|
13
|
+
REGEX_DELIM = "/".freeze
|
14
|
+
REGEX_CAPTURE = "$".freeze
|
15
|
+
OP_REPLACE = "=".freeze
|
16
|
+
OP_APPEND = ">".freeze
|
17
|
+
OP_PREPEND = "<".freeze
|
18
|
+
OP_ADD = "+".freeze
|
19
|
+
OP_SUB = "-".freeze
|
20
|
+
|
21
|
+
#
|
22
|
+
# Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
|
23
|
+
# explicit string delimiters. That *could* be worked around by requiring users to escape all reserved chars,
|
24
|
+
# but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on
|
25
|
+
# or off to auto-escape certain chars for the next token.
|
26
|
+
#
|
27
|
+
attr_reader :escape
|
28
|
+
|
29
|
+
def initialize(src)
|
30
|
+
@src = src
|
31
|
+
@end = src.size - 1
|
32
|
+
@start_i = 0
|
33
|
+
@i = 0
|
34
|
+
reset_escapes!
|
35
|
+
end
|
36
|
+
|
37
|
+
# Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
|
38
|
+
def reset_escapes!
|
39
|
+
@escape = Escapes.new(true, true)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Auto-escape anything you'd find in a regular expression
|
43
|
+
def regex_mode!
|
44
|
+
@escape.regex = false # look for the final /
|
45
|
+
@escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
|
46
|
+
@escape.wildcards = true
|
47
|
+
@escape.operators = true
|
48
|
+
@escape.regex_capture = true
|
49
|
+
@escape.exp = true
|
50
|
+
end
|
51
|
+
|
52
|
+
def next
|
53
|
+
return Token.new(@i, Tokens::EOF) if @i > @end
|
54
|
+
char = @src[@i]
|
55
|
+
case char
|
56
|
+
when EXP_OPEN
|
57
|
+
@i += 1
|
58
|
+
token! Tokens::EXP_OPEN
|
59
|
+
when EXP_CLOSE
|
60
|
+
@i += 1
|
61
|
+
token! Tokens::EXP_CLOSE
|
62
|
+
when WILDCARD
|
63
|
+
@i += 1
|
64
|
+
token! Tokens::WILDCARD, WILDCARD
|
65
|
+
when REGEX_DELIM
|
66
|
+
if @escape.regex
|
67
|
+
get_str!
|
68
|
+
else
|
69
|
+
@i += 1
|
70
|
+
token! Tokens::REG_DELIM
|
71
|
+
end
|
72
|
+
when REGEX_CAPTURE
|
73
|
+
if @escape.regex_capture
|
74
|
+
get_str!
|
75
|
+
else
|
76
|
+
@i += 1
|
77
|
+
t = token! Tokens::REG_CAPTURE, @src[@i]
|
78
|
+
@i += 1
|
79
|
+
@start_i = @i
|
80
|
+
t
|
81
|
+
end
|
82
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
83
|
+
if @escape.operators
|
84
|
+
get_str!
|
85
|
+
else
|
86
|
+
@i += 1
|
87
|
+
token! Tokens::OPERATOR, char
|
88
|
+
end
|
89
|
+
else
|
90
|
+
get_str!
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def token!(type, val = nil)
|
97
|
+
t = Token.new(@start_i, type, val)
|
98
|
+
@start_i = @i
|
99
|
+
t
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_str!
|
103
|
+
str = ""
|
104
|
+
escape, found_end = false, false
|
105
|
+
until found_end or @i > @end
|
106
|
+
char = @src[@i]
|
107
|
+
|
10
108
|
if escape
|
11
|
-
|
109
|
+
@i += 1
|
110
|
+
str << char
|
12
111
|
escape = false
|
13
112
|
next
|
14
113
|
end
|
15
114
|
|
16
115
|
case char
|
17
|
-
when
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
116
|
+
when ESCAPE
|
117
|
+
@i += 1
|
118
|
+
if @escape.regex_escape and @src[@i] != REGEX_DELIM
|
119
|
+
str << char
|
120
|
+
else
|
121
|
+
escape = true
|
122
|
+
end
|
123
|
+
when EXP_OPEN
|
124
|
+
if @escape.exp
|
125
|
+
@i += 1
|
126
|
+
str << char
|
127
|
+
else
|
128
|
+
found_end = true
|
129
|
+
end
|
130
|
+
when EXP_CLOSE
|
131
|
+
if @escape.exp
|
132
|
+
@i += 1
|
133
|
+
str << char
|
134
|
+
else
|
135
|
+
found_end = true
|
136
|
+
end
|
137
|
+
when WILDCARD
|
138
|
+
if @escape.wildcards
|
139
|
+
@i += 1
|
140
|
+
str << char
|
141
|
+
else
|
142
|
+
found_end = true
|
143
|
+
end
|
144
|
+
when REGEX_DELIM
|
145
|
+
if @escape.regex
|
146
|
+
@i += 1
|
147
|
+
str << char
|
148
|
+
else
|
149
|
+
found_end = true
|
150
|
+
end
|
151
|
+
when REGEX_CAPTURE
|
152
|
+
if @escape.regex_capture
|
153
|
+
@i += 1
|
154
|
+
str << char
|
155
|
+
else
|
156
|
+
found_end = true
|
157
|
+
end
|
158
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
159
|
+
if @escape.operators
|
160
|
+
@i += 1
|
161
|
+
str << char
|
162
|
+
else
|
163
|
+
found_end = true
|
164
|
+
end
|
25
165
|
else
|
26
|
-
|
166
|
+
@i += 1
|
167
|
+
str << char
|
27
168
|
end
|
28
|
-
|
169
|
+
end
|
29
170
|
|
30
|
-
|
31
|
-
|
171
|
+
return Token.new(@i - 1, Tokens::TR_ESC) if escape
|
172
|
+
token! Tokens::TEXT, str
|
32
173
|
end
|
33
174
|
end
|
34
175
|
end
|
data/lib/fop/tokens.rb
ADDED
data/lib/fop/version.rb
CHANGED
data/lib/fop_lang.rb
CHANGED
@@ -1,12 +1,22 @@
|
|
1
1
|
require_relative 'fop/version'
|
2
|
+
require_relative 'fop/compiler'
|
2
3
|
require_relative 'fop/program'
|
3
4
|
|
4
5
|
def Fop(src)
|
5
|
-
::Fop
|
6
|
+
::Fop.compile!(src)
|
6
7
|
end
|
7
8
|
|
8
9
|
module Fop
|
10
|
+
def self.compile!(src)
|
11
|
+
prog, errors = compile(src)
|
12
|
+
# TODO better exception
|
13
|
+
raise "Fop errors: " + errors.map(&:message).join(",") if errors
|
14
|
+
prog
|
15
|
+
end
|
16
|
+
|
9
17
|
def self.compile(src)
|
10
|
-
|
18
|
+
instructions, errors = ::Fop::Compiler.compile(src)
|
19
|
+
return nil, errors if errors
|
20
|
+
return Program.new(instructions), nil
|
11
21
|
end
|
12
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|
@@ -17,10 +17,12 @@ extensions: []
|
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
19
|
- README.md
|
20
|
+
- lib/fop/compiler.rb
|
20
21
|
- lib/fop/nodes.rb
|
21
22
|
- lib/fop/parser.rb
|
22
23
|
- lib/fop/program.rb
|
23
24
|
- lib/fop/tokenizer.rb
|
25
|
+
- lib/fop/tokens.rb
|
24
26
|
- lib/fop/version.rb
|
25
27
|
- lib/fop_lang.rb
|
26
28
|
homepage: https://jhollinger.github.io/fop-lang-rb/
|