fop_lang 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -5
- data/lib/fop/compiler.rb +37 -14
- data/lib/fop/nodes.rb +15 -4
- data/lib/fop/parser.rb +25 -14
- data/lib/fop/tokenizer.rb +22 -3
- data/lib/fop/tokens.rb +1 -0
- data/lib/fop/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e23d8d937f5a4b5e4d74010bb91923dedce019543d4d3baefc228dece938a731
|
4
|
+
data.tar.gz: cc97f6953b708498be169352269b861c73c9dbe52ded1a72f4370a8d18d32d48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2cec9cd47a472298f7af0268a9dc03aacce374ed88da7b505e33cb4536f6f1d04107cce7c33eba4718809d54591a3111bfd26971eef3c52073ba1226be4da4f
|
7
|
+
data.tar.gz: 80b5700d0cdda44dd021fe48d5c134cb992c6967b10681c43488a5a7276fbf03df7d7a9427a9aa92529569eaf0d134fa789df0c8e27cd2250dc50bcb16727d13
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# fop_lang
|
2
2
|
|
3
|
-
Fop (Filter and OPerations language) is a tiny, experimental language for filtering and
|
3
|
+
Fop (Filter and OPerations language) is a tiny, experimental language for filtering and operating on text. Think of it like awk but with the condition and action segments combined.
|
4
4
|
|
5
5
|
This is a Ruby implementation with both a library interface and a bin command.
|
6
6
|
|
@@ -16,10 +16,8 @@ You may use fop in a Ruby script:
|
|
16
16
|
require 'fop_lang'
|
17
17
|
|
18
18
|
f = Fop('foo {N+1}')
|
19
|
-
|
20
19
|
f.apply('foo 1')
|
21
20
|
=> "foo 2"
|
22
|
-
|
23
21
|
f.apply('bar 1')
|
24
22
|
=> nil
|
25
23
|
```
|
@@ -40,18 +38,24 @@ The above program demonstrates a text match, a regex match, and a match expressi
|
|
40
38
|
|
41
39
|
### Text match
|
42
40
|
|
41
|
+
`Text ` and ` ` in the above example.
|
42
|
+
|
43
43
|
The input must match this text exactly. Whitespace is part of the match. Wildcards (`*`) are allowed. Special characters (`*/{}\`) may be escaped with `\`.
|
44
44
|
|
45
45
|
The output of a text match will be the matching input.
|
46
46
|
|
47
47
|
### Regex match
|
48
48
|
|
49
|
+
`/(R|r)egex/` in the above example.
|
50
|
+
|
49
51
|
Regular expressions may be placed between `/`s. If the regular expression contains a `/`, you may escape it with `\`. Special regex characters like `[]()+.*` may also be escaped with `\`.
|
50
52
|
|
51
53
|
The output of a regex match will be the matching input.
|
52
54
|
|
53
55
|
### Match expression
|
54
56
|
|
57
|
+
`{N+1}` in the above example.
|
58
|
+
|
55
59
|
A match expression both matches on input and modifies that input. An expression is made up of 1 - 3 parts:
|
56
60
|
|
57
61
|
1. The match, e.g. `N` for numeric.
|
@@ -76,6 +80,10 @@ The output of a match expression will be the _modified_ matching input. If no op
|
|
76
80
|
* `+` Perform addition on the matching number and the argument (`N` only).
|
77
81
|
* `-` Subtract the argument from the matching number (`N` only).
|
78
82
|
|
83
|
+
**Whitespace**
|
84
|
+
|
85
|
+
Inside of match expressions, whitespace is an optional seperator of terms, i.e. `{ N + 1 }` is the same as `{N+1}`. This means that any spaces in string arguments must be escaped. For example, replacing a word with `foo bar` looks like `{W = foo\ bar}`.
|
86
|
+
|
79
87
|
## Examples
|
80
88
|
|
81
89
|
### Release Number Example
|
@@ -103,10 +111,10 @@ This example takes in GitHub branch names, decides if they're release branches,
|
|
103
111
|
```
|
104
112
|
|
105
113
|
```ruby
|
106
|
-
f = Fop('rel{/(ease)
|
114
|
+
f = Fop('rel{/(ease)?/=}-{N=5}.{N+1}.{N=0}')
|
107
115
|
|
108
116
|
puts f.apply('release-4.99.1')
|
109
|
-
=> '
|
117
|
+
=> 'rel-5.100.0'
|
110
118
|
|
111
119
|
puts f.apply('rel-4.99.1')
|
112
120
|
=> 'rel-5.100.0'
|
data/lib/fop/compiler.rb
CHANGED
@@ -11,6 +11,8 @@ module Fop
|
|
11
11
|
when Nodes::Text, Nodes::Regex
|
12
12
|
Instructions.regex_match(node.regex)
|
13
13
|
when Nodes::Expression
|
14
|
+
arg_error = Validations.validate_args(node)
|
15
|
+
errors << arg_error if arg_error
|
14
16
|
Instructions::ExpressionMatch.new(node)
|
15
17
|
else
|
16
18
|
raise "Unknown node type #{node}"
|
@@ -22,13 +24,14 @@ module Fop
|
|
22
24
|
end
|
23
25
|
|
24
26
|
module Instructions
|
27
|
+
Op = Struct.new(:proc, :arity, :max_arity)
|
25
28
|
BLANK = "".freeze
|
26
29
|
OPERATIONS = {
|
27
|
-
"=" => ->(_val,
|
28
|
-
"+" => ->(val,
|
29
|
-
"-" => ->(val,
|
30
|
-
">" => ->(val,
|
31
|
-
"<" => ->(val,
|
30
|
+
"=" => Op.new(->(_val, args) { args[0] || BLANK }, 0, 1),
|
31
|
+
"+" => Op.new(->(val, args) { val.to_i + args[0].to_i }, 1),
|
32
|
+
"-" => Op.new(->(val, args) { val.to_i - args[0].to_i }, 1),
|
33
|
+
">" => Op.new(->(val, args) { val + args[0] }, 1),
|
34
|
+
"<" => Op.new(->(val, args) { args[0] + val }, 1),
|
32
35
|
}
|
33
36
|
|
34
37
|
def self.regex_match(regex)
|
@@ -38,14 +41,11 @@ module Fop
|
|
38
41
|
class ExpressionMatch
|
39
42
|
def initialize(node)
|
40
43
|
@regex = node.regex&.regex
|
41
|
-
@op = node.
|
44
|
+
@op = node.operator_token ? OPERATIONS.fetch(node.operator_token.val) : nil
|
42
45
|
@regex_match = node.regex_match
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@arg = node.arg&.join("")
|
47
|
-
@arg_with_caps = nil
|
48
|
-
end
|
46
|
+
@args = node.args&.map { |arg|
|
47
|
+
arg.has_captures ? arg.segments : arg.segments.join("")
|
48
|
+
}
|
49
49
|
end
|
50
50
|
|
51
51
|
def call(input)
|
@@ -54,8 +54,18 @@ module Fop
|
|
54
54
|
blank = val == BLANK
|
55
55
|
input.sub!(val, BLANK) unless blank
|
56
56
|
found_val = @regex_match || !blank
|
57
|
-
|
58
|
-
|
57
|
+
if @op and @args and found_val
|
58
|
+
args = @args.map { |arg|
|
59
|
+
case arg
|
60
|
+
when String then arg
|
61
|
+
when Array then sub_caps(arg, match.captures)
|
62
|
+
else raise "Unexpected arg type #{arg.class.name}"
|
63
|
+
end
|
64
|
+
}
|
65
|
+
@op.proc.call(val, args)
|
66
|
+
else
|
67
|
+
val
|
68
|
+
end
|
59
69
|
end
|
60
70
|
end
|
61
71
|
|
@@ -68,5 +78,18 @@ module Fop
|
|
68
78
|
end
|
69
79
|
end
|
70
80
|
end
|
81
|
+
|
82
|
+
module Validations
|
83
|
+
def self.validate_args(exp_node)
|
84
|
+
op_token = exp_node.operator_token || return
|
85
|
+
op = Instructions::OPERATIONS.fetch(op_token.val)
|
86
|
+
num = exp_node.args&.size || 0
|
87
|
+
arity = op.arity
|
88
|
+
max_arity = op.max_arity || arity
|
89
|
+
if num < arity or num > max_arity
|
90
|
+
Parser::Error.new(:argument, op_token, "#{op_token.val} expects #{arity}..#{max_arity} arguments; #{num} given")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
71
94
|
end
|
72
95
|
end
|
data/lib/fop/nodes.rb
CHANGED
@@ -14,18 +14,29 @@ module Fop
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
-
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :
|
17
|
+
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator_token, :args) do
|
18
18
|
def to_s
|
19
19
|
w = wildcard ? "*" : nil
|
20
20
|
s = "[#{w}exp] #{match}"
|
21
|
-
if
|
22
|
-
arg_str =
|
21
|
+
if operator_token
|
22
|
+
arg_str = args
|
23
23
|
.map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
|
24
24
|
.join("")
|
25
|
-
s << " #{
|
25
|
+
s << " #{operator_token.val} #{arg_str}"
|
26
26
|
end
|
27
27
|
s
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
Arg = Struct.new(:segments, :has_captures) do
|
32
|
+
def to_s
|
33
|
+
segments.map { |s|
|
34
|
+
case s
|
35
|
+
when Integer then "$#{s + 1}"
|
36
|
+
else s.to_s
|
37
|
+
end
|
38
|
+
}.join("")
|
39
|
+
end
|
40
|
+
end
|
30
41
|
end
|
31
42
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -12,7 +12,7 @@ module Fop
|
|
12
12
|
"A" => "[a-zA-Z]+".freeze,
|
13
13
|
"*" => ".*".freeze,
|
14
14
|
}.freeze
|
15
|
-
OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
15
|
+
#OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
16
16
|
TR_REGEX = /.*/
|
17
17
|
|
18
18
|
Error = Struct.new(:type, :token, :message) do
|
@@ -63,14 +63,15 @@ module Fop
|
|
63
63
|
def parse_exp!(wildcard = false)
|
64
64
|
exp = Nodes::Expression.new(wildcard)
|
65
65
|
parse_exp_match! exp
|
66
|
-
|
67
|
-
if exp.
|
68
|
-
parse_exp_arg! exp
|
66
|
+
parse_exp_operator! exp
|
67
|
+
if exp.operator_token
|
68
|
+
parse_exp_arg! exp
|
69
69
|
end
|
70
70
|
return exp
|
71
71
|
end
|
72
72
|
|
73
73
|
def parse_exp_match!(exp)
|
74
|
+
@tokenizer.escape.whitespace = false
|
74
75
|
@tokenizer.escape.operators = false
|
75
76
|
t = @tokenizer.next
|
76
77
|
case t.type
|
@@ -93,35 +94,44 @@ module Fop
|
|
93
94
|
end
|
94
95
|
|
95
96
|
def parse_exp_operator!(exp)
|
97
|
+
@tokenizer.escape.whitespace = false
|
96
98
|
@tokenizer.escape.operators = false
|
97
99
|
t = @tokenizer.next
|
98
100
|
case t.type
|
99
101
|
when Tokens::EXP_CLOSE
|
100
102
|
# no op
|
101
|
-
when Tokens::OPERATOR
|
102
|
-
exp.
|
103
|
+
when Tokens::OPERATOR, Tokens::TEXT
|
104
|
+
exp.operator_token = t
|
103
105
|
else
|
104
106
|
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
|
105
107
|
end
|
106
|
-
t
|
107
108
|
end
|
108
109
|
|
109
|
-
def parse_exp_arg!(exp
|
110
|
+
def parse_exp_arg!(exp)
|
111
|
+
@tokenizer.escape.whitespace = false
|
112
|
+
@tokenizer.escape.whitespace_sep = false
|
110
113
|
@tokenizer.escape.operators = true
|
111
114
|
@tokenizer.escape.regex = true
|
112
115
|
@tokenizer.escape.regex_capture = false if exp.regex_match
|
113
116
|
|
114
|
-
|
117
|
+
arg = Nodes::Arg.new([], false)
|
118
|
+
exp.args = []
|
115
119
|
found_close, eof = false, false
|
116
120
|
until found_close or eof
|
117
121
|
t = @tokenizer.next
|
118
122
|
case t.type
|
119
123
|
when Tokens::TEXT
|
120
|
-
|
124
|
+
arg.segments << t.val
|
121
125
|
when Tokens::REG_CAPTURE
|
122
|
-
|
126
|
+
arg.has_captures = true
|
127
|
+
arg.segments << t.val.to_i - 1
|
123
128
|
errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
|
124
129
|
errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
|
130
|
+
when Tokens::WHITESPACE_SEP
|
131
|
+
if arg.segments.any?
|
132
|
+
exp.args << arg
|
133
|
+
arg = Nodes::Arg.new([])
|
134
|
+
end
|
125
135
|
when Tokens::EXP_CLOSE
|
126
136
|
found_close = true
|
127
137
|
when Tokens::EOF
|
@@ -131,10 +141,11 @@ module Fop
|
|
131
141
|
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
132
142
|
end
|
133
143
|
end
|
144
|
+
exp.args << arg if arg.segments.any?
|
134
145
|
|
135
|
-
if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
136
|
-
|
137
|
-
end
|
146
|
+
#if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
147
|
+
# errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
|
148
|
+
#end
|
138
149
|
end
|
139
150
|
|
140
151
|
def parse_regex!(wildcard)
|
data/lib/fop/tokenizer.rb
CHANGED
@@ -3,8 +3,7 @@ require_relative 'tokens'
|
|
3
3
|
module Fop
|
4
4
|
class Tokenizer
|
5
5
|
Token = Struct.new(:pos, :type, :val)
|
6
|
-
|
7
|
-
Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
6
|
+
Escapes = Struct.new(:whitespace, :whitespace_sep, :operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
8
7
|
|
9
8
|
EXP_OPEN = "{".freeze
|
10
9
|
EXP_CLOSE = "}".freeze
|
@@ -17,6 +16,7 @@ module Fop
|
|
17
16
|
OP_PREPEND = "<".freeze
|
18
17
|
OP_ADD = "+".freeze
|
19
18
|
OP_SUB = "-".freeze
|
19
|
+
WHITESPACE = " ".freeze
|
20
20
|
|
21
21
|
#
|
22
22
|
# Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
|
@@ -36,11 +36,12 @@ module Fop
|
|
36
36
|
|
37
37
|
# Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
|
38
38
|
def reset_escapes!
|
39
|
-
@escape = Escapes.new(true, true)
|
39
|
+
@escape = Escapes.new(true, true, true, true)
|
40
40
|
end
|
41
41
|
|
42
42
|
# Auto-escape anything you'd find in a regular expression
|
43
43
|
def regex_mode!
|
44
|
+
@escape.whitespace = true
|
44
45
|
@escape.regex = false # look for the final /
|
45
46
|
@escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
|
46
47
|
@escape.wildcards = true
|
@@ -86,6 +87,17 @@ module Fop
|
|
86
87
|
@i += 1
|
87
88
|
token! Tokens::OPERATOR, char
|
88
89
|
end
|
90
|
+
when WHITESPACE
|
91
|
+
if @escape.whitespace
|
92
|
+
get_str!
|
93
|
+
elsif !@escape.whitespace_sep
|
94
|
+
@i += 1
|
95
|
+
token! Tokens::WHITESPACE_SEP
|
96
|
+
else
|
97
|
+
@i += 1
|
98
|
+
@start_i = @i
|
99
|
+
self.next
|
100
|
+
end
|
89
101
|
else
|
90
102
|
get_str!
|
91
103
|
end
|
@@ -162,6 +174,13 @@ module Fop
|
|
162
174
|
else
|
163
175
|
found_end = true
|
164
176
|
end
|
177
|
+
when WHITESPACE
|
178
|
+
if @escape.whitespace
|
179
|
+
@i += 1
|
180
|
+
str << char
|
181
|
+
else
|
182
|
+
found_end = true
|
183
|
+
end
|
165
184
|
else
|
166
185
|
@i += 1
|
167
186
|
str << char
|
data/lib/fop/tokens.rb
CHANGED
data/lib/fop/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|