fop_lang 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -5
- data/lib/fop/compiler.rb +37 -14
- data/lib/fop/nodes.rb +15 -4
- data/lib/fop/parser.rb +25 -14
- data/lib/fop/tokenizer.rb +22 -3
- data/lib/fop/tokens.rb +1 -0
- data/lib/fop/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e23d8d937f5a4b5e4d74010bb91923dedce019543d4d3baefc228dece938a731
|
4
|
+
data.tar.gz: cc97f6953b708498be169352269b861c73c9dbe52ded1a72f4370a8d18d32d48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2cec9cd47a472298f7af0268a9dc03aacce374ed88da7b505e33cb4536f6f1d04107cce7c33eba4718809d54591a3111bfd26971eef3c52073ba1226be4da4f
|
7
|
+
data.tar.gz: 80b5700d0cdda44dd021fe48d5c134cb992c6967b10681c43488a5a7276fbf03df7d7a9427a9aa92529569eaf0d134fa789df0c8e27cd2250dc50bcb16727d13
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# fop_lang
|
2
2
|
|
3
|
-
Fop (Filter and OPerations language) is a tiny, experimental language for filtering and
|
3
|
+
Fop (Filter and OPerations language) is a tiny, experimental language for filtering and operating on text. Think of it like awk but with the condition and action segments combined.
|
4
4
|
|
5
5
|
This is a Ruby implementation with both a library interface and a bin command.
|
6
6
|
|
@@ -16,10 +16,8 @@ You may use fop in a Ruby script:
|
|
16
16
|
require 'fop_lang'
|
17
17
|
|
18
18
|
f = Fop('foo {N+1}')
|
19
|
-
|
20
19
|
f.apply('foo 1')
|
21
20
|
=> "foo 2"
|
22
|
-
|
23
21
|
f.apply('bar 1')
|
24
22
|
=> nil
|
25
23
|
```
|
@@ -40,18 +38,24 @@ The above program demonstrates a text match, a regex match, and a match expressi
|
|
40
38
|
|
41
39
|
### Text match
|
42
40
|
|
41
|
+
`Text ` and ` ` in the above example.
|
42
|
+
|
43
43
|
The input must match this text exactly. Whitespace is part of the match. Wildcards (`*`) are allowed. Special characters (`*/{}\`) may be escaped with `\`.
|
44
44
|
|
45
45
|
The output of a text match will be the matching input.
|
46
46
|
|
47
47
|
### Regex match
|
48
48
|
|
49
|
+
`/(R|r)egex/` in the above example.
|
50
|
+
|
49
51
|
Regular expressions may be placed between `/`s. If the regular expression contains a `/`, you may escape it with `\`. Special regex characters like `[]()+.*` may also be escaped with `\`.
|
50
52
|
|
51
53
|
The output of a regex match will be the matching input.
|
52
54
|
|
53
55
|
### Match expression
|
54
56
|
|
57
|
+
`{N+1}` in the above example.
|
58
|
+
|
55
59
|
A match expression both matches on input and modifies that input. An expression is made up of 1 - 3 parts:
|
56
60
|
|
57
61
|
1. The match, e.g. `N` for numeric.
|
@@ -76,6 +80,10 @@ The output of a match expression will be the _modified_ matching input. If no op
|
|
76
80
|
* `+` Perform addition on the matching number and the argument (`N` only).
|
77
81
|
* `-` Subtract the argument from the matching number (`N` only).
|
78
82
|
|
83
|
+
**Whitespace**
|
84
|
+
|
85
|
+
Inside of match expressions, whitespace is an optional seperator of terms, i.e. `{ N + 1 }` is the same as `{N+1}`. This means that any spaces in string arguments must be escaped. For example, replacing a word with `foo bar` looks like `{W = foo\ bar}`.
|
86
|
+
|
79
87
|
## Examples
|
80
88
|
|
81
89
|
### Release Number Example
|
@@ -103,10 +111,10 @@ This example takes in GitHub branch names, decides if they're release branches,
|
|
103
111
|
```
|
104
112
|
|
105
113
|
```ruby
|
106
|
-
f = Fop('rel{/(ease)
|
114
|
+
f = Fop('rel{/(ease)?/=}-{N=5}.{N+1}.{N=0}')
|
107
115
|
|
108
116
|
puts f.apply('release-4.99.1')
|
109
|
-
=> '
|
117
|
+
=> 'rel-5.100.0'
|
110
118
|
|
111
119
|
puts f.apply('rel-4.99.1')
|
112
120
|
=> 'rel-5.100.0'
|
data/lib/fop/compiler.rb
CHANGED
@@ -11,6 +11,8 @@ module Fop
|
|
11
11
|
when Nodes::Text, Nodes::Regex
|
12
12
|
Instructions.regex_match(node.regex)
|
13
13
|
when Nodes::Expression
|
14
|
+
arg_error = Validations.validate_args(node)
|
15
|
+
errors << arg_error if arg_error
|
14
16
|
Instructions::ExpressionMatch.new(node)
|
15
17
|
else
|
16
18
|
raise "Unknown node type #{node}"
|
@@ -22,13 +24,14 @@ module Fop
|
|
22
24
|
end
|
23
25
|
|
24
26
|
module Instructions
|
27
|
+
Op = Struct.new(:proc, :arity, :max_arity)
|
25
28
|
BLANK = "".freeze
|
26
29
|
OPERATIONS = {
|
27
|
-
"=" => ->(_val,
|
28
|
-
"+" => ->(val,
|
29
|
-
"-" => ->(val,
|
30
|
-
">" => ->(val,
|
31
|
-
"<" => ->(val,
|
30
|
+
"=" => Op.new(->(_val, args) { args[0] || BLANK }, 0, 1),
|
31
|
+
"+" => Op.new(->(val, args) { val.to_i + args[0].to_i }, 1),
|
32
|
+
"-" => Op.new(->(val, args) { val.to_i - args[0].to_i }, 1),
|
33
|
+
">" => Op.new(->(val, args) { val + args[0] }, 1),
|
34
|
+
"<" => Op.new(->(val, args) { args[0] + val }, 1),
|
32
35
|
}
|
33
36
|
|
34
37
|
def self.regex_match(regex)
|
@@ -38,14 +41,11 @@ module Fop
|
|
38
41
|
class ExpressionMatch
|
39
42
|
def initialize(node)
|
40
43
|
@regex = node.regex&.regex
|
41
|
-
@op = node.
|
44
|
+
@op = node.operator_token ? OPERATIONS.fetch(node.operator_token.val) : nil
|
42
45
|
@regex_match = node.regex_match
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@arg = node.arg&.join("")
|
47
|
-
@arg_with_caps = nil
|
48
|
-
end
|
46
|
+
@args = node.args&.map { |arg|
|
47
|
+
arg.has_captures ? arg.segments : arg.segments.join("")
|
48
|
+
}
|
49
49
|
end
|
50
50
|
|
51
51
|
def call(input)
|
@@ -54,8 +54,18 @@ module Fop
|
|
54
54
|
blank = val == BLANK
|
55
55
|
input.sub!(val, BLANK) unless blank
|
56
56
|
found_val = @regex_match || !blank
|
57
|
-
|
58
|
-
|
57
|
+
if @op and @args and found_val
|
58
|
+
args = @args.map { |arg|
|
59
|
+
case arg
|
60
|
+
when String then arg
|
61
|
+
when Array then sub_caps(arg, match.captures)
|
62
|
+
else raise "Unexpected arg type #{arg.class.name}"
|
63
|
+
end
|
64
|
+
}
|
65
|
+
@op.proc.call(val, args)
|
66
|
+
else
|
67
|
+
val
|
68
|
+
end
|
59
69
|
end
|
60
70
|
end
|
61
71
|
|
@@ -68,5 +78,18 @@ module Fop
|
|
68
78
|
end
|
69
79
|
end
|
70
80
|
end
|
81
|
+
|
82
|
+
module Validations
|
83
|
+
def self.validate_args(exp_node)
|
84
|
+
op_token = exp_node.operator_token || return
|
85
|
+
op = Instructions::OPERATIONS.fetch(op_token.val)
|
86
|
+
num = exp_node.args&.size || 0
|
87
|
+
arity = op.arity
|
88
|
+
max_arity = op.max_arity || arity
|
89
|
+
if num < arity or num > max_arity
|
90
|
+
Parser::Error.new(:argument, op_token, "#{op_token.val} expects #{arity}..#{max_arity} arguments; #{num} given")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
71
94
|
end
|
72
95
|
end
|
data/lib/fop/nodes.rb
CHANGED
@@ -14,18 +14,29 @@ module Fop
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
-
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :
|
17
|
+
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator_token, :args) do
|
18
18
|
def to_s
|
19
19
|
w = wildcard ? "*" : nil
|
20
20
|
s = "[#{w}exp] #{match}"
|
21
|
-
if
|
22
|
-
arg_str =
|
21
|
+
if operator_token
|
22
|
+
arg_str = args
|
23
23
|
.map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
|
24
24
|
.join("")
|
25
|
-
s << " #{
|
25
|
+
s << " #{operator_token.val} #{arg_str}"
|
26
26
|
end
|
27
27
|
s
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
Arg = Struct.new(:segments, :has_captures) do
|
32
|
+
def to_s
|
33
|
+
segments.map { |s|
|
34
|
+
case s
|
35
|
+
when Integer then "$#{s + 1}"
|
36
|
+
else s.to_s
|
37
|
+
end
|
38
|
+
}.join("")
|
39
|
+
end
|
40
|
+
end
|
30
41
|
end
|
31
42
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -12,7 +12,7 @@ module Fop
|
|
12
12
|
"A" => "[a-zA-Z]+".freeze,
|
13
13
|
"*" => ".*".freeze,
|
14
14
|
}.freeze
|
15
|
-
OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
15
|
+
#OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
16
16
|
TR_REGEX = /.*/
|
17
17
|
|
18
18
|
Error = Struct.new(:type, :token, :message) do
|
@@ -63,14 +63,15 @@ module Fop
|
|
63
63
|
def parse_exp!(wildcard = false)
|
64
64
|
exp = Nodes::Expression.new(wildcard)
|
65
65
|
parse_exp_match! exp
|
66
|
-
|
67
|
-
if exp.
|
68
|
-
parse_exp_arg! exp
|
66
|
+
parse_exp_operator! exp
|
67
|
+
if exp.operator_token
|
68
|
+
parse_exp_arg! exp
|
69
69
|
end
|
70
70
|
return exp
|
71
71
|
end
|
72
72
|
|
73
73
|
def parse_exp_match!(exp)
|
74
|
+
@tokenizer.escape.whitespace = false
|
74
75
|
@tokenizer.escape.operators = false
|
75
76
|
t = @tokenizer.next
|
76
77
|
case t.type
|
@@ -93,35 +94,44 @@ module Fop
|
|
93
94
|
end
|
94
95
|
|
95
96
|
def parse_exp_operator!(exp)
|
97
|
+
@tokenizer.escape.whitespace = false
|
96
98
|
@tokenizer.escape.operators = false
|
97
99
|
t = @tokenizer.next
|
98
100
|
case t.type
|
99
101
|
when Tokens::EXP_CLOSE
|
100
102
|
# no op
|
101
|
-
when Tokens::OPERATOR
|
102
|
-
exp.
|
103
|
+
when Tokens::OPERATOR, Tokens::TEXT
|
104
|
+
exp.operator_token = t
|
103
105
|
else
|
104
106
|
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
|
105
107
|
end
|
106
|
-
t
|
107
108
|
end
|
108
109
|
|
109
|
-
def parse_exp_arg!(exp
|
110
|
+
def parse_exp_arg!(exp)
|
111
|
+
@tokenizer.escape.whitespace = false
|
112
|
+
@tokenizer.escape.whitespace_sep = false
|
110
113
|
@tokenizer.escape.operators = true
|
111
114
|
@tokenizer.escape.regex = true
|
112
115
|
@tokenizer.escape.regex_capture = false if exp.regex_match
|
113
116
|
|
114
|
-
|
117
|
+
arg = Nodes::Arg.new([], false)
|
118
|
+
exp.args = []
|
115
119
|
found_close, eof = false, false
|
116
120
|
until found_close or eof
|
117
121
|
t = @tokenizer.next
|
118
122
|
case t.type
|
119
123
|
when Tokens::TEXT
|
120
|
-
|
124
|
+
arg.segments << t.val
|
121
125
|
when Tokens::REG_CAPTURE
|
122
|
-
|
126
|
+
arg.has_captures = true
|
127
|
+
arg.segments << t.val.to_i - 1
|
123
128
|
errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
|
124
129
|
errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
|
130
|
+
when Tokens::WHITESPACE_SEP
|
131
|
+
if arg.segments.any?
|
132
|
+
exp.args << arg
|
133
|
+
arg = Nodes::Arg.new([])
|
134
|
+
end
|
125
135
|
when Tokens::EXP_CLOSE
|
126
136
|
found_close = true
|
127
137
|
when Tokens::EOF
|
@@ -131,10 +141,11 @@ module Fop
|
|
131
141
|
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
132
142
|
end
|
133
143
|
end
|
144
|
+
exp.args << arg if arg.segments.any?
|
134
145
|
|
135
|
-
if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
136
|
-
|
137
|
-
end
|
146
|
+
#if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
147
|
+
# errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
|
148
|
+
#end
|
138
149
|
end
|
139
150
|
|
140
151
|
def parse_regex!(wildcard)
|
data/lib/fop/tokenizer.rb
CHANGED
@@ -3,8 +3,7 @@ require_relative 'tokens'
|
|
3
3
|
module Fop
|
4
4
|
class Tokenizer
|
5
5
|
Token = Struct.new(:pos, :type, :val)
|
6
|
-
|
7
|
-
Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
6
|
+
Escapes = Struct.new(:whitespace, :whitespace_sep, :operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
8
7
|
|
9
8
|
EXP_OPEN = "{".freeze
|
10
9
|
EXP_CLOSE = "}".freeze
|
@@ -17,6 +16,7 @@ module Fop
|
|
17
16
|
OP_PREPEND = "<".freeze
|
18
17
|
OP_ADD = "+".freeze
|
19
18
|
OP_SUB = "-".freeze
|
19
|
+
WHITESPACE = " ".freeze
|
20
20
|
|
21
21
|
#
|
22
22
|
# Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
|
@@ -36,11 +36,12 @@ module Fop
|
|
36
36
|
|
37
37
|
# Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
|
38
38
|
def reset_escapes!
|
39
|
-
@escape = Escapes.new(true, true)
|
39
|
+
@escape = Escapes.new(true, true, true, true)
|
40
40
|
end
|
41
41
|
|
42
42
|
# Auto-escape anything you'd find in a regular expression
|
43
43
|
def regex_mode!
|
44
|
+
@escape.whitespace = true
|
44
45
|
@escape.regex = false # look for the final /
|
45
46
|
@escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
|
46
47
|
@escape.wildcards = true
|
@@ -86,6 +87,17 @@ module Fop
|
|
86
87
|
@i += 1
|
87
88
|
token! Tokens::OPERATOR, char
|
88
89
|
end
|
90
|
+
when WHITESPACE
|
91
|
+
if @escape.whitespace
|
92
|
+
get_str!
|
93
|
+
elsif !@escape.whitespace_sep
|
94
|
+
@i += 1
|
95
|
+
token! Tokens::WHITESPACE_SEP
|
96
|
+
else
|
97
|
+
@i += 1
|
98
|
+
@start_i = @i
|
99
|
+
self.next
|
100
|
+
end
|
89
101
|
else
|
90
102
|
get_str!
|
91
103
|
end
|
@@ -162,6 +174,13 @@ module Fop
|
|
162
174
|
else
|
163
175
|
found_end = true
|
164
176
|
end
|
177
|
+
when WHITESPACE
|
178
|
+
if @escape.whitespace
|
179
|
+
@i += 1
|
180
|
+
str << char
|
181
|
+
else
|
182
|
+
found_end = true
|
183
|
+
end
|
165
184
|
else
|
166
185
|
@i += 1
|
167
186
|
str << char
|
data/lib/fop/tokens.rb
CHANGED
data/lib/fop/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|