fop_lang 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -3
- data/lib/fop/nodes.rb +7 -47
- data/lib/fop/parser.rb +91 -48
- data/lib/fop/program.rb +1 -1
- data/lib/fop/tokenizer.rb +99 -10
- data/lib/fop/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b5f19a543b81c0046dc63fcc1c0769989628017d2c1d1da74ef0db9866a0f2f7
|
4
|
+
data.tar.gz: 03b6597f9cab97c95ccda8396693bb43d9da729137cb916cc74f7fbecc314b32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a17c82a561e20cbc5cb8abbad5be4f94f02110d60b6130e3e1e9489672c5c134befc6b1daca2f590f083a67934e600fb5d6fa0ea5433181ba3014514c558232
|
7
|
+
data.tar.gz: 790250c8a79dcf04b381f2dd33cbaa048fd070688ab45446ff87652dcb18844c2d6139d0ead060fa338a57b8590eee0167ea2c25abd84e1d71571f33c49bcbda
|
data/README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# fop_lang
|
2
2
|
|
3
|
-
Fop (Filter and
|
3
|
+
Fop (Filter and OPerations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
gem 'fop_lang'
|
7
|
+
```
|
4
8
|
|
5
9
|
## Release Number Example
|
6
10
|
|
@@ -33,13 +37,14 @@ If `\` (escape) is used before the special characters `*`, `{` or `}`, then that
|
|
33
37
|
|
34
38
|
Operations are the interesting part of Fop, and are specified between `{` and `}`. An Operation can consist of one to three parts:
|
35
39
|
|
36
|
-
1. Matching
|
40
|
+
1. Matching class (required): Defines what characters the operation will match and operate on.
|
37
41
|
* `N` is the numeric class and will match one or more digits.
|
38
42
|
* `A` is the alpha class and will match one or more letters (lower or upper case).
|
39
43
|
* `W` is the word class and matches alphanumeric chars and underscores.
|
40
44
|
* `*` is the wildcard class and greedily matches everything after it.
|
45
|
+
* `/.../` matches on the supplied regex between the `/`'s. If you're regex contains a `/`, it must be escaped.
|
41
46
|
3. Operator (optional): What to do to the matching characters.
|
42
|
-
* `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
|
47
|
+
* `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars. Note that any `/` chars must be escaped, so as not to be mistaken for a regex.
|
43
48
|
* `+` Perform addition on the matching number and the argument (`N` only).
|
44
49
|
* `-` Subtract the argument from the matching number (`N` only).
|
45
50
|
5. Operator argument (required for some operators): meaning varies by operator.
|
@@ -53,6 +58,16 @@ Operations are the interesting part of Fop, and are specified between `{` and `}
|
|
53
58
|
=> 'release-5.100.0'
|
54
59
|
```
|
55
60
|
|
61
|
+
```ruby
|
62
|
+
f = Fop('rel{/(ease)?/}-{N=5}.{N+1}.{N=0}')
|
63
|
+
|
64
|
+
puts f.apply('release-4.99.1')
|
65
|
+
=> 'release-5.100.0'
|
66
|
+
|
67
|
+
puts f.apply('rel-4.99.1')
|
68
|
+
=> 'rel-5.100.0'
|
69
|
+
```
|
70
|
+
|
56
71
|
```ruby
|
57
72
|
f = Fop('release-*{N=5}.{N+100}.{N=0}')
|
58
73
|
|
data/lib/fop/nodes.rb
CHANGED
@@ -12,59 +12,19 @@ module Fop
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
NUM = "N".freeze
|
17
|
-
WORD = "W".freeze
|
18
|
-
ALPHA = "A".freeze
|
19
|
-
WILD = "*".freeze
|
20
|
-
BLANK = "".freeze
|
21
|
-
|
15
|
+
Op = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :operator_arg, :expression) do
|
22
16
|
def consume!(input)
|
23
|
-
if (val = input.slice!(
|
24
|
-
|
17
|
+
if (val = input.slice!(regex))
|
18
|
+
found_val = regex_match || val != Parser::BLANK
|
19
|
+
expression && found_val ? expression.call(val) : val
|
25
20
|
end
|
26
21
|
end
|
27
22
|
|
28
23
|
def to_s
|
29
24
|
w = wildcard ? "*" : nil
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
def parse!
|
34
|
-
match = tokens.shift || raise(ParserError, "Empty match")
|
35
|
-
raise ParserError, "Unexpected #{match}" unless match.is_a? Tokenizer::Char
|
36
|
-
|
37
|
-
@match = match.char
|
38
|
-
@regex =
|
39
|
-
case @match
|
40
|
-
when NUM then Regexp.new((wildcard ? ".*?" : "^") + "[0-9]+")
|
41
|
-
when WORD then Regexp.new((wildcard ? ".*?" : "^") + "\\w+")
|
42
|
-
when ALPHA then Regexp.new((wildcard ? ".*?" : "^") + "[a-zA-Z]+")
|
43
|
-
when WILD then /.*/
|
44
|
-
else raise ParserError, "Unknown match type '#{@match}'"
|
45
|
-
end
|
46
|
-
|
47
|
-
if (op = tokens.shift)
|
48
|
-
raise ParserError, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
|
49
|
-
arg = tokens.reduce("") { |acc, t|
|
50
|
-
raise ParserError, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
|
51
|
-
acc + t.char
|
52
|
-
}
|
53
|
-
|
54
|
-
@op = op.char
|
55
|
-
@arg = arg == BLANK ? nil : arg
|
56
|
-
@expression =
|
57
|
-
case @op
|
58
|
-
when "=" then ->(_) { @arg || BLANK }
|
59
|
-
when "+", "-", "*", "/"
|
60
|
-
raise ParserError, "Operator #{@op} is only available for numeric matches" unless @match == NUM
|
61
|
-
raise ParserError, "Operator #{@op} expects an argument" if @arg.nil?
|
62
|
-
->(x) { x.to_i.send(@op, @arg.to_i) }
|
63
|
-
else raise ParserError, "Unknown operator #{@op}"
|
64
|
-
end
|
65
|
-
else
|
66
|
-
@op, @arg, @expression = nil, nil, nil
|
67
|
-
end
|
25
|
+
s = "#{w}#{match}"
|
26
|
+
s << " #{operator} #{operator_arg}" if operator
|
27
|
+
s
|
68
28
|
end
|
69
29
|
end
|
70
30
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -4,89 +4,132 @@ module Fop
|
|
4
4
|
module Parser
|
5
5
|
Error = Class.new(StandardError)
|
6
6
|
|
7
|
+
MATCH_NUM = "N".freeze
|
8
|
+
MATCH_WORD = "W".freeze
|
9
|
+
MATCH_ALPHA = "A".freeze
|
10
|
+
MATCH_WILD = "*".freeze
|
11
|
+
BLANK = "".freeze
|
12
|
+
OP_REPLACE = "=".freeze
|
13
|
+
OP_ADD = "+".freeze
|
14
|
+
OP_SUB = "-".freeze
|
15
|
+
OP_MUL = "*".freeze
|
16
|
+
OP_DIV = "/".freeze
|
17
|
+
|
7
18
|
def self.parse!(tokens)
|
8
|
-
|
9
|
-
|
19
|
+
nodes = []
|
20
|
+
curr_node = nil
|
10
21
|
|
11
22
|
tokens.each { |token|
|
12
|
-
case
|
23
|
+
case curr_node
|
13
24
|
when nil
|
14
|
-
|
25
|
+
curr_node = new_node token
|
15
26
|
when :wildcard
|
16
|
-
|
17
|
-
raise Error, "Unexpected * after wildcard" if
|
27
|
+
curr_node = new_node token, true
|
28
|
+
raise Error, "Unexpected * after wildcard" if curr_node == :wildcard
|
18
29
|
when Nodes::Text
|
19
|
-
|
20
|
-
|
21
|
-
|
30
|
+
curr_node, finished_node = parse_text curr_node, token
|
31
|
+
nodes << finished_node if finished_node
|
32
|
+
when Nodes::Op
|
33
|
+
nodes << curr_node
|
34
|
+
curr_node = new_node token
|
22
35
|
else
|
23
|
-
raise Error, "Unexpected
|
36
|
+
raise Error, "Unexpected node #{curr_node}"
|
24
37
|
end
|
25
38
|
}
|
26
39
|
|
27
|
-
case
|
40
|
+
case curr_node
|
28
41
|
when nil
|
29
42
|
# noop
|
30
43
|
when :wildcard
|
31
|
-
|
32
|
-
when Nodes::Text
|
33
|
-
|
34
|
-
|
35
|
-
raise
|
44
|
+
nodes << Nodes::Text.new(true, "")
|
45
|
+
when Nodes::Text, Nodes::Op
|
46
|
+
nodes << curr_node
|
47
|
+
else
|
48
|
+
raise "Unexpected end node #{curr_node}"
|
36
49
|
end
|
37
50
|
|
38
|
-
|
51
|
+
nodes
|
39
52
|
end
|
40
53
|
|
41
54
|
private
|
42
55
|
|
43
|
-
def self.
|
56
|
+
def self.new_node(token, wildcard = false)
|
44
57
|
case token
|
45
58
|
when Tokenizer::Char
|
46
59
|
Nodes::Text.new(wildcard, token.char.clone)
|
47
|
-
when
|
48
|
-
Nodes::
|
49
|
-
|
50
|
-
|
60
|
+
when Tokenizer::Op
|
61
|
+
op = Nodes::Op.new(wildcard)
|
62
|
+
parse_op! op, token.tokens
|
63
|
+
op
|
51
64
|
when :wildcard
|
52
65
|
:wildcard
|
53
66
|
else
|
54
|
-
raise
|
67
|
+
raise Error, "Unexpected #{token}"
|
55
68
|
end
|
56
69
|
end
|
57
70
|
|
58
|
-
|
71
|
+
# @return current node
|
72
|
+
# @return finished node
|
73
|
+
def self.parse_text(node, token)
|
59
74
|
case token
|
60
|
-
when :match_open
|
61
|
-
stack << text_el
|
62
|
-
Nodes::Match.new(false, [])
|
63
|
-
when :match_close
|
64
|
-
raise ParserError.new, "Unexpected }"
|
65
75
|
when Tokenizer::Char
|
66
|
-
|
67
|
-
|
76
|
+
node.str << token.char
|
77
|
+
return node, nil
|
78
|
+
when Tokenizer::Op
|
79
|
+
op = new_node token
|
80
|
+
return op, node
|
68
81
|
when :wildcard
|
69
|
-
|
70
|
-
:wildcard
|
82
|
+
return :wildcard, node
|
71
83
|
else
|
72
|
-
raise
|
84
|
+
raise Error, "Unexpected #{token}"
|
73
85
|
end
|
74
86
|
end
|
75
87
|
|
76
|
-
def self.
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
88
|
+
def self.parse_op!(node, tokens)
|
89
|
+
t = tokens[0] || raise(Error, "Empty operation")
|
90
|
+
# parse the matching type
|
91
|
+
node.regex =
|
92
|
+
case t
|
93
|
+
when Tokenizer::Char
|
94
|
+
node.match = t.char
|
95
|
+
node.regex_match = false
|
96
|
+
case t.char
|
97
|
+
when MATCH_NUM then Regexp.new((node.wildcard ? ".*?" : "^") + "[0-9]+")
|
98
|
+
when MATCH_WORD then Regexp.new((node.wildcard ? ".*?" : "^") + "\\w+")
|
99
|
+
when MATCH_ALPHA then Regexp.new((node.wildcard ? ".*?" : "^") + "[a-zA-Z]+")
|
100
|
+
when MATCH_WILD then /.*/
|
101
|
+
else raise Error, "Unknown match type '#{t.char}'"
|
102
|
+
end
|
103
|
+
when Tokenizer::Regex
|
104
|
+
node.match = "/#{t.src}/"
|
105
|
+
node.regex_match = true
|
106
|
+
Regexp.new((node.wildcard ? ".*?" : "^") + t.src)
|
107
|
+
else
|
108
|
+
raise Error, "Unexpected token #{t}"
|
109
|
+
end
|
110
|
+
|
111
|
+
# parse the operator (if any)
|
112
|
+
if (op = tokens[1])
|
113
|
+
raise Error, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
|
114
|
+
node.operator = op.char
|
115
|
+
|
116
|
+
arg = tokens[2..-1].reduce("") { |acc, t|
|
117
|
+
raise Error, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
|
118
|
+
acc + t.char
|
119
|
+
}
|
120
|
+
node.operator_arg = arg == BLANK ? nil : arg
|
121
|
+
|
122
|
+
node.expression =
|
123
|
+
case node.operator
|
124
|
+
when OP_REPLACE
|
125
|
+
->(_) { node.operator_arg || BLANK }
|
126
|
+
when OP_ADD, OP_SUB, OP_MUL, OP_DIV
|
127
|
+
raise Error, "Operator #{node.operator} is only available for numeric matches" unless node.match == MATCH_NUM
|
128
|
+
raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
|
129
|
+
->(x) { x.to_i.send(node.operator, node.operator_arg.to_i) }
|
130
|
+
else
|
131
|
+
raise(Error, "Unknown operator #{node.operator}")
|
132
|
+
end
|
90
133
|
end
|
91
134
|
end
|
92
135
|
end
|
data/lib/fop/program.rb
CHANGED
data/lib/fop/tokenizer.rb
CHANGED
@@ -1,34 +1,123 @@
|
|
1
1
|
module Fop
|
2
|
-
|
2
|
+
class Tokenizer
|
3
3
|
Char = Struct.new(:char)
|
4
|
+
Op = Struct.new(:tokens)
|
5
|
+
Regex = Struct.new(:src)
|
4
6
|
Error = Class.new(StandardError)
|
5
7
|
|
6
|
-
|
8
|
+
OP_OPEN = "{".freeze
|
9
|
+
OP_CLOSE = "}".freeze
|
10
|
+
ESCAPE = "\\".freeze
|
11
|
+
WILDCARD = "*".freeze
|
12
|
+
REGEX_MARKER = "/".freeze
|
13
|
+
|
14
|
+
def initialize(src)
|
15
|
+
@src = src
|
16
|
+
@end = src.size - 1
|
17
|
+
end
|
18
|
+
|
19
|
+
def tokenize!
|
7
20
|
tokens = []
|
8
21
|
escape = false
|
9
|
-
|
22
|
+
i = 0
|
23
|
+
until i > @end do
|
24
|
+
char = @src[i]
|
10
25
|
if escape
|
11
26
|
tokens << Char.new(char)
|
12
27
|
escape = false
|
28
|
+
i += 1
|
13
29
|
next
|
14
30
|
end
|
15
31
|
|
16
32
|
case char
|
17
|
-
when
|
33
|
+
when ESCAPE
|
18
34
|
escape = true
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
tokens <<
|
23
|
-
when
|
35
|
+
i += 1
|
36
|
+
when OP_OPEN
|
37
|
+
i, op = operation! i + 1
|
38
|
+
tokens << op
|
39
|
+
when OP_CLOSE
|
40
|
+
raise "Unexpected #{OP_CLOSE}"
|
41
|
+
when WILDCARD
|
24
42
|
tokens << :wildcard
|
43
|
+
i += 1
|
25
44
|
else
|
26
45
|
tokens << Char.new(char)
|
46
|
+
i += 1
|
27
47
|
end
|
28
|
-
|
48
|
+
end
|
29
49
|
|
30
50
|
raise Error, "Trailing escape" if escape
|
31
51
|
tokens
|
32
52
|
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def operation!(i)
|
57
|
+
escape = false
|
58
|
+
found_close = false
|
59
|
+
tokens = []
|
60
|
+
|
61
|
+
until found_close or i > @end do
|
62
|
+
char = @src[i]
|
63
|
+
if escape
|
64
|
+
tokens << Char.new(char)
|
65
|
+
escape = false
|
66
|
+
i += 1
|
67
|
+
next
|
68
|
+
end
|
69
|
+
|
70
|
+
case char
|
71
|
+
when ESCAPE
|
72
|
+
escape = true
|
73
|
+
i += 1
|
74
|
+
when OP_OPEN
|
75
|
+
raise "Unexpected #{OP_OPEN}"
|
76
|
+
when OP_CLOSE
|
77
|
+
found_close = true
|
78
|
+
i += 1
|
79
|
+
when REGEX_MARKER
|
80
|
+
i, reg = regex! i + 1
|
81
|
+
tokens << reg
|
82
|
+
else
|
83
|
+
tokens << Char.new(char)
|
84
|
+
i += 1
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
raise Error, "Unclosed operation" if !found_close
|
89
|
+
raise Error, "Trailing escape" if escape
|
90
|
+
return i, Op.new(tokens)
|
91
|
+
end
|
92
|
+
|
93
|
+
def regex!(i)
|
94
|
+
escape = false
|
95
|
+
found_close = false
|
96
|
+
src = ""
|
97
|
+
|
98
|
+
until found_close or i > @end
|
99
|
+
char = @src[i]
|
100
|
+
i += 1
|
101
|
+
|
102
|
+
if escape
|
103
|
+
src << char
|
104
|
+
escape = false
|
105
|
+
next
|
106
|
+
end
|
107
|
+
|
108
|
+
case char
|
109
|
+
when ESCAPE
|
110
|
+
escape = true
|
111
|
+
when REGEX_MARKER
|
112
|
+
found_close = true
|
113
|
+
else
|
114
|
+
src << char
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
raise Error, "Unclosed regex" if !found_close
|
119
|
+
raise Error, "Trailing escape" if escape
|
120
|
+
return i, Regex.new(src)
|
121
|
+
end
|
33
122
|
end
|
34
123
|
end
|
data/lib/fop/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|