fop_lang 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -3
- data/lib/fop/nodes.rb +7 -47
- data/lib/fop/parser.rb +91 -48
- data/lib/fop/program.rb +1 -1
- data/lib/fop/tokenizer.rb +99 -10
- data/lib/fop/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b5f19a543b81c0046dc63fcc1c0769989628017d2c1d1da74ef0db9866a0f2f7
|
4
|
+
data.tar.gz: 03b6597f9cab97c95ccda8396693bb43d9da729137cb916cc74f7fbecc314b32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a17c82a561e20cbc5cb8abbad5be4f94f02110d60b6130e3e1e9489672c5c134befc6b1daca2f590f083a67934e600fb5d6fa0ea5433181ba3014514c558232
|
7
|
+
data.tar.gz: 790250c8a79dcf04b381f2dd33cbaa048fd070688ab45446ff87652dcb18844c2d6139d0ead060fa338a57b8590eee0167ea2c25abd84e1d71571f33c49bcbda
|
data/README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# fop_lang
|
2
2
|
|
3
|
-
Fop (Filter and
|
3
|
+
Fop (Filter and OPerations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
gem 'fop_lang'
|
7
|
+
```
|
4
8
|
|
5
9
|
## Release Number Example
|
6
10
|
|
@@ -33,13 +37,14 @@ If `\` (escape) is used before the special characters `*`, `{` or `}`, then that
|
|
33
37
|
|
34
38
|
Operations are the interesting part of Fop, and are specified between `{` and `}`. An Operation can consist of one to three parts:
|
35
39
|
|
36
|
-
1. Matching
|
40
|
+
1. Matching class (required): Defines what characters the operation will match and operate on.
|
37
41
|
* `N` is the numeric class and will match one or more digits.
|
38
42
|
* `A` is the alpha class and will match one or more letters (lower or upper case).
|
39
43
|
* `W` is the word class and matches alphanumeric chars and underscores.
|
40
44
|
* `*` is the wildcard class and greedily matches everything after it.
|
45
|
+
* `/.../` matches on the supplied regex between the `/`'s. If you're regex contains a `/`, it must be escaped.
|
41
46
|
3. Operator (optional): What to do to the matching characters.
|
42
|
-
* `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
|
47
|
+
* `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars. Note that any `/` chars must be escaped, so as not to be mistaken for a regex.
|
43
48
|
* `+` Perform addition on the matching number and the argument (`N` only).
|
44
49
|
* `-` Subtract the argument from the matching number (`N` only).
|
45
50
|
5. Operator argument (required for some operators): meaning varies by operator.
|
@@ -53,6 +58,16 @@ Operations are the interesting part of Fop, and are specified between `{` and `}
|
|
53
58
|
=> 'release-5.100.0'
|
54
59
|
```
|
55
60
|
|
61
|
+
```ruby
|
62
|
+
f = Fop('rel{/(ease)?/}-{N=5}.{N+1}.{N=0}')
|
63
|
+
|
64
|
+
puts f.apply('release-4.99.1')
|
65
|
+
=> 'release-5.100.0'
|
66
|
+
|
67
|
+
puts f.apply('rel-4.99.1')
|
68
|
+
=> 'rel-5.100.0'
|
69
|
+
```
|
70
|
+
|
56
71
|
```ruby
|
57
72
|
f = Fop('release-*{N=5}.{N+100}.{N=0}')
|
58
73
|
|
data/lib/fop/nodes.rb
CHANGED
@@ -12,59 +12,19 @@ module Fop
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
NUM = "N".freeze
|
17
|
-
WORD = "W".freeze
|
18
|
-
ALPHA = "A".freeze
|
19
|
-
WILD = "*".freeze
|
20
|
-
BLANK = "".freeze
|
21
|
-
|
15
|
+
Op = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :operator_arg, :expression) do
|
22
16
|
def consume!(input)
|
23
|
-
if (val = input.slice!(
|
24
|
-
|
17
|
+
if (val = input.slice!(regex))
|
18
|
+
found_val = regex_match || val != Parser::BLANK
|
19
|
+
expression && found_val ? expression.call(val) : val
|
25
20
|
end
|
26
21
|
end
|
27
22
|
|
28
23
|
def to_s
|
29
24
|
w = wildcard ? "*" : nil
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
def parse!
|
34
|
-
match = tokens.shift || raise(ParserError, "Empty match")
|
35
|
-
raise ParserError, "Unexpected #{match}" unless match.is_a? Tokenizer::Char
|
36
|
-
|
37
|
-
@match = match.char
|
38
|
-
@regex =
|
39
|
-
case @match
|
40
|
-
when NUM then Regexp.new((wildcard ? ".*?" : "^") + "[0-9]+")
|
41
|
-
when WORD then Regexp.new((wildcard ? ".*?" : "^") + "\\w+")
|
42
|
-
when ALPHA then Regexp.new((wildcard ? ".*?" : "^") + "[a-zA-Z]+")
|
43
|
-
when WILD then /.*/
|
44
|
-
else raise ParserError, "Unknown match type '#{@match}'"
|
45
|
-
end
|
46
|
-
|
47
|
-
if (op = tokens.shift)
|
48
|
-
raise ParserError, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
|
49
|
-
arg = tokens.reduce("") { |acc, t|
|
50
|
-
raise ParserError, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
|
51
|
-
acc + t.char
|
52
|
-
}
|
53
|
-
|
54
|
-
@op = op.char
|
55
|
-
@arg = arg == BLANK ? nil : arg
|
56
|
-
@expression =
|
57
|
-
case @op
|
58
|
-
when "=" then ->(_) { @arg || BLANK }
|
59
|
-
when "+", "-", "*", "/"
|
60
|
-
raise ParserError, "Operator #{@op} is only available for numeric matches" unless @match == NUM
|
61
|
-
raise ParserError, "Operator #{@op} expects an argument" if @arg.nil?
|
62
|
-
->(x) { x.to_i.send(@op, @arg.to_i) }
|
63
|
-
else raise ParserError, "Unknown operator #{@op}"
|
64
|
-
end
|
65
|
-
else
|
66
|
-
@op, @arg, @expression = nil, nil, nil
|
67
|
-
end
|
25
|
+
s = "#{w}#{match}"
|
26
|
+
s << " #{operator} #{operator_arg}" if operator
|
27
|
+
s
|
68
28
|
end
|
69
29
|
end
|
70
30
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -4,89 +4,132 @@ module Fop
|
|
4
4
|
module Parser
|
5
5
|
Error = Class.new(StandardError)
|
6
6
|
|
7
|
+
MATCH_NUM = "N".freeze
|
8
|
+
MATCH_WORD = "W".freeze
|
9
|
+
MATCH_ALPHA = "A".freeze
|
10
|
+
MATCH_WILD = "*".freeze
|
11
|
+
BLANK = "".freeze
|
12
|
+
OP_REPLACE = "=".freeze
|
13
|
+
OP_ADD = "+".freeze
|
14
|
+
OP_SUB = "-".freeze
|
15
|
+
OP_MUL = "*".freeze
|
16
|
+
OP_DIV = "/".freeze
|
17
|
+
|
7
18
|
def self.parse!(tokens)
|
8
|
-
|
9
|
-
|
19
|
+
nodes = []
|
20
|
+
curr_node = nil
|
10
21
|
|
11
22
|
tokens.each { |token|
|
12
|
-
case
|
23
|
+
case curr_node
|
13
24
|
when nil
|
14
|
-
|
25
|
+
curr_node = new_node token
|
15
26
|
when :wildcard
|
16
|
-
|
17
|
-
raise Error, "Unexpected * after wildcard" if
|
27
|
+
curr_node = new_node token, true
|
28
|
+
raise Error, "Unexpected * after wildcard" if curr_node == :wildcard
|
18
29
|
when Nodes::Text
|
19
|
-
|
20
|
-
|
21
|
-
|
30
|
+
curr_node, finished_node = parse_text curr_node, token
|
31
|
+
nodes << finished_node if finished_node
|
32
|
+
when Nodes::Op
|
33
|
+
nodes << curr_node
|
34
|
+
curr_node = new_node token
|
22
35
|
else
|
23
|
-
raise Error, "Unexpected
|
36
|
+
raise Error, "Unexpected node #{curr_node}"
|
24
37
|
end
|
25
38
|
}
|
26
39
|
|
27
|
-
case
|
40
|
+
case curr_node
|
28
41
|
when nil
|
29
42
|
# noop
|
30
43
|
when :wildcard
|
31
|
-
|
32
|
-
when Nodes::Text
|
33
|
-
|
34
|
-
|
35
|
-
raise
|
44
|
+
nodes << Nodes::Text.new(true, "")
|
45
|
+
when Nodes::Text, Nodes::Op
|
46
|
+
nodes << curr_node
|
47
|
+
else
|
48
|
+
raise "Unexpected end node #{curr_node}"
|
36
49
|
end
|
37
50
|
|
38
|
-
|
51
|
+
nodes
|
39
52
|
end
|
40
53
|
|
41
54
|
private
|
42
55
|
|
43
|
-
def self.
|
56
|
+
def self.new_node(token, wildcard = false)
|
44
57
|
case token
|
45
58
|
when Tokenizer::Char
|
46
59
|
Nodes::Text.new(wildcard, token.char.clone)
|
47
|
-
when
|
48
|
-
Nodes::
|
49
|
-
|
50
|
-
|
60
|
+
when Tokenizer::Op
|
61
|
+
op = Nodes::Op.new(wildcard)
|
62
|
+
parse_op! op, token.tokens
|
63
|
+
op
|
51
64
|
when :wildcard
|
52
65
|
:wildcard
|
53
66
|
else
|
54
|
-
raise
|
67
|
+
raise Error, "Unexpected #{token}"
|
55
68
|
end
|
56
69
|
end
|
57
70
|
|
58
|
-
|
71
|
+
# @return current node
|
72
|
+
# @return finished node
|
73
|
+
def self.parse_text(node, token)
|
59
74
|
case token
|
60
|
-
when :match_open
|
61
|
-
stack << text_el
|
62
|
-
Nodes::Match.new(false, [])
|
63
|
-
when :match_close
|
64
|
-
raise ParserError.new, "Unexpected }"
|
65
75
|
when Tokenizer::Char
|
66
|
-
|
67
|
-
|
76
|
+
node.str << token.char
|
77
|
+
return node, nil
|
78
|
+
when Tokenizer::Op
|
79
|
+
op = new_node token
|
80
|
+
return op, node
|
68
81
|
when :wildcard
|
69
|
-
|
70
|
-
:wildcard
|
82
|
+
return :wildcard, node
|
71
83
|
else
|
72
|
-
raise
|
84
|
+
raise Error, "Unexpected #{token}"
|
73
85
|
end
|
74
86
|
end
|
75
87
|
|
76
|
-
def self.
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
88
|
+
def self.parse_op!(node, tokens)
|
89
|
+
t = tokens[0] || raise(Error, "Empty operation")
|
90
|
+
# parse the matching type
|
91
|
+
node.regex =
|
92
|
+
case t
|
93
|
+
when Tokenizer::Char
|
94
|
+
node.match = t.char
|
95
|
+
node.regex_match = false
|
96
|
+
case t.char
|
97
|
+
when MATCH_NUM then Regexp.new((node.wildcard ? ".*?" : "^") + "[0-9]+")
|
98
|
+
when MATCH_WORD then Regexp.new((node.wildcard ? ".*?" : "^") + "\\w+")
|
99
|
+
when MATCH_ALPHA then Regexp.new((node.wildcard ? ".*?" : "^") + "[a-zA-Z]+")
|
100
|
+
when MATCH_WILD then /.*/
|
101
|
+
else raise Error, "Unknown match type '#{t.char}'"
|
102
|
+
end
|
103
|
+
when Tokenizer::Regex
|
104
|
+
node.match = "/#{t.src}/"
|
105
|
+
node.regex_match = true
|
106
|
+
Regexp.new((node.wildcard ? ".*?" : "^") + t.src)
|
107
|
+
else
|
108
|
+
raise Error, "Unexpected token #{t}"
|
109
|
+
end
|
110
|
+
|
111
|
+
# parse the operator (if any)
|
112
|
+
if (op = tokens[1])
|
113
|
+
raise Error, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
|
114
|
+
node.operator = op.char
|
115
|
+
|
116
|
+
arg = tokens[2..-1].reduce("") { |acc, t|
|
117
|
+
raise Error, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
|
118
|
+
acc + t.char
|
119
|
+
}
|
120
|
+
node.operator_arg = arg == BLANK ? nil : arg
|
121
|
+
|
122
|
+
node.expression =
|
123
|
+
case node.operator
|
124
|
+
when OP_REPLACE
|
125
|
+
->(_) { node.operator_arg || BLANK }
|
126
|
+
when OP_ADD, OP_SUB, OP_MUL, OP_DIV
|
127
|
+
raise Error, "Operator #{node.operator} is only available for numeric matches" unless node.match == MATCH_NUM
|
128
|
+
raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
|
129
|
+
->(x) { x.to_i.send(node.operator, node.operator_arg.to_i) }
|
130
|
+
else
|
131
|
+
raise(Error, "Unknown operator #{node.operator}")
|
132
|
+
end
|
90
133
|
end
|
91
134
|
end
|
92
135
|
end
|
data/lib/fop/program.rb
CHANGED
data/lib/fop/tokenizer.rb
CHANGED
@@ -1,34 +1,123 @@
|
|
1
1
|
module Fop
|
2
|
-
|
2
|
+
class Tokenizer
|
3
3
|
Char = Struct.new(:char)
|
4
|
+
Op = Struct.new(:tokens)
|
5
|
+
Regex = Struct.new(:src)
|
4
6
|
Error = Class.new(StandardError)
|
5
7
|
|
6
|
-
|
8
|
+
OP_OPEN = "{".freeze
|
9
|
+
OP_CLOSE = "}".freeze
|
10
|
+
ESCAPE = "\\".freeze
|
11
|
+
WILDCARD = "*".freeze
|
12
|
+
REGEX_MARKER = "/".freeze
|
13
|
+
|
14
|
+
def initialize(src)
|
15
|
+
@src = src
|
16
|
+
@end = src.size - 1
|
17
|
+
end
|
18
|
+
|
19
|
+
def tokenize!
|
7
20
|
tokens = []
|
8
21
|
escape = false
|
9
|
-
|
22
|
+
i = 0
|
23
|
+
until i > @end do
|
24
|
+
char = @src[i]
|
10
25
|
if escape
|
11
26
|
tokens << Char.new(char)
|
12
27
|
escape = false
|
28
|
+
i += 1
|
13
29
|
next
|
14
30
|
end
|
15
31
|
|
16
32
|
case char
|
17
|
-
when
|
33
|
+
when ESCAPE
|
18
34
|
escape = true
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
tokens <<
|
23
|
-
when
|
35
|
+
i += 1
|
36
|
+
when OP_OPEN
|
37
|
+
i, op = operation! i + 1
|
38
|
+
tokens << op
|
39
|
+
when OP_CLOSE
|
40
|
+
raise "Unexpected #{OP_CLOSE}"
|
41
|
+
when WILDCARD
|
24
42
|
tokens << :wildcard
|
43
|
+
i += 1
|
25
44
|
else
|
26
45
|
tokens << Char.new(char)
|
46
|
+
i += 1
|
27
47
|
end
|
28
|
-
|
48
|
+
end
|
29
49
|
|
30
50
|
raise Error, "Trailing escape" if escape
|
31
51
|
tokens
|
32
52
|
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def operation!(i)
|
57
|
+
escape = false
|
58
|
+
found_close = false
|
59
|
+
tokens = []
|
60
|
+
|
61
|
+
until found_close or i > @end do
|
62
|
+
char = @src[i]
|
63
|
+
if escape
|
64
|
+
tokens << Char.new(char)
|
65
|
+
escape = false
|
66
|
+
i += 1
|
67
|
+
next
|
68
|
+
end
|
69
|
+
|
70
|
+
case char
|
71
|
+
when ESCAPE
|
72
|
+
escape = true
|
73
|
+
i += 1
|
74
|
+
when OP_OPEN
|
75
|
+
raise "Unexpected #{OP_OPEN}"
|
76
|
+
when OP_CLOSE
|
77
|
+
found_close = true
|
78
|
+
i += 1
|
79
|
+
when REGEX_MARKER
|
80
|
+
i, reg = regex! i + 1
|
81
|
+
tokens << reg
|
82
|
+
else
|
83
|
+
tokens << Char.new(char)
|
84
|
+
i += 1
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
raise Error, "Unclosed operation" if !found_close
|
89
|
+
raise Error, "Trailing escape" if escape
|
90
|
+
return i, Op.new(tokens)
|
91
|
+
end
|
92
|
+
|
93
|
+
def regex!(i)
|
94
|
+
escape = false
|
95
|
+
found_close = false
|
96
|
+
src = ""
|
97
|
+
|
98
|
+
until found_close or i > @end
|
99
|
+
char = @src[i]
|
100
|
+
i += 1
|
101
|
+
|
102
|
+
if escape
|
103
|
+
src << char
|
104
|
+
escape = false
|
105
|
+
next
|
106
|
+
end
|
107
|
+
|
108
|
+
case char
|
109
|
+
when ESCAPE
|
110
|
+
escape = true
|
111
|
+
when REGEX_MARKER
|
112
|
+
found_close = true
|
113
|
+
else
|
114
|
+
src << char
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
raise Error, "Unclosed regex" if !found_close
|
119
|
+
raise Error, "Trailing escape" if escape
|
120
|
+
return i, Regex.new(src)
|
121
|
+
end
|
33
122
|
end
|
34
123
|
end
|
data/lib/fop/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|