fop_lang 0.3.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +76 -32
- data/bin/fop +42 -0
- data/lib/fop/cli.rb +34 -0
- data/lib/fop/compiler.rb +72 -0
- data/lib/fop/nodes.rb +15 -15
- data/lib/fop/parser.rb +134 -108
- data/lib/fop/program.rb +6 -12
- data/lib/fop/tokenizer.rb +139 -87
- data/lib/fop/tokens.rb +13 -0
- data/lib/fop/version.rb +1 -1
- data/lib/fop_lang.rb +12 -2
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 798fd7c335f394e878fba2f70a9f60372ea356c79f2dc63392398920d0ffce38
|
4
|
+
data.tar.gz: 654786ff77823e8d8dd9a348f958828346e3755e43a04a0f38e711a6c5571ea9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6761f3d7dd602d1c93a2387fc73ea14c11484e88d0d319bbf87df98925977aa15de59a63f23aafffafa384ce3b9def9f81edabae669aabc2012b00d3131e46f4
|
7
|
+
data.tar.gz: 7f5187cd510d691dda996284d5a400804b7573f67506701e39a6d2909c8a4026b58655f6b2800708e911377ccce790885a2238eed7a75d4873e4b599d23e67df
|
data/README.md
CHANGED
@@ -1,55 +1,99 @@
|
|
1
1
|
# fop_lang
|
2
2
|
|
3
|
-
Fop (Filter and OPerations language) is
|
3
|
+
Fop (Filter and OPerations language) is a tiny, experimental language for filtering and transforming text. Think of it like awk but with the condition and action segments combined.
|
4
4
|
|
5
|
-
|
6
|
-
gem 'fop_lang'
|
7
|
-
```
|
5
|
+
This is a Ruby implementation with both a library interface and a bin command.
|
8
6
|
|
9
|
-
##
|
7
|
+
## Installation
|
10
8
|
|
11
|
-
|
9
|
+
```bash
|
10
|
+
$ gem install fop_lang
|
11
|
+
```
|
12
|
+
|
13
|
+
You may use fop in a Ruby script:
|
12
14
|
|
13
15
|
```ruby
|
14
|
-
|
16
|
+
require 'fop_lang'
|
15
17
|
|
16
|
-
|
17
|
-
=> 'release-5.100.0'
|
18
|
+
f = Fop('foo {N+1}')
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
f.apply('foo 1')
|
21
|
+
=> "foo 2"
|
22
|
+
|
23
|
+
f.apply('bar 1')
|
24
|
+
=> nil
|
25
|
+
```
|
26
|
+
|
27
|
+
or run `fop` from the command line:
|
28
|
+
|
29
|
+
```bash
|
30
|
+
$ echo 'foo 1' | fop 'foo {N+1}'
|
31
|
+
foo 2
|
32
|
+
$ echo 'bar 1' | fop 'foo {N+1}'
|
22
33
|
```
|
23
34
|
|
24
|
-
##
|
35
|
+
## Syntax
|
36
|
+
|
37
|
+
`Text /(R|r)egex/ {N+1}`
|
38
|
+
|
39
|
+
The above program demonstrates a text match, a regex match, and a match expression. If the input matches all three segments, output is given. If the input was `Text regex 5`, the output would be `Text regex 6`.
|
40
|
+
|
41
|
+
### Text match
|
42
|
+
|
43
|
+
The input must match this text exactly. Whitespace is part of the match. Wildcards (`*`) are allowed. Special characters (`*/{}\`) may be escaped with `\`.
|
25
44
|
|
26
|
-
|
45
|
+
The output of a text match will be the matching input.
|
27
46
|
|
28
|
-
|
47
|
+
### Regex match
|
29
48
|
|
30
|
-
|
49
|
+
Regular expressions may be placed between `/`s. If the regular expression contains a `/`, you may escape it with `\`. Special regex characters like `[]()+.*` may also be escaped with `\`.
|
31
50
|
|
32
|
-
|
51
|
+
The output of a regex match will be the matching input.
|
33
52
|
|
34
|
-
|
53
|
+
### Match expression
|
35
54
|
|
36
|
-
|
55
|
+
A match expression both matches on input and modifies that input. An expression is made up of 1 - 3 parts:
|
37
56
|
|
38
|
-
|
57
|
+
1. The match, e.g. `N` for numeric.
|
58
|
+
2. The operator, e.g. `+` for addition (optional).
|
59
|
+
3. The argument, e.g `1` for "add one" (required for most operators).
|
39
60
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
61
|
+
The output of a match expression will be the _modified_ matching input. If no operator is given, the output will be the matching input.
|
62
|
+
|
63
|
+
**Matches**
|
64
|
+
|
65
|
+
* `N` matches one or more consecutive digits.
|
66
|
+
* `A` matches one or more letters (lower or upper case).
|
67
|
+
* `W` matches alphanumeric chars and underscores.
|
68
|
+
* `*` greedily matches everything after it.
|
69
|
+
* `/regex/` matches on the supplied regex. Capture groups may be referenced in the argument as `$1`, `$2`, etc.
|
70
|
+
|
71
|
+
**Operators**
|
72
|
+
|
73
|
+
* `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
|
74
|
+
* `>` Append the argument to the matching value.
|
75
|
+
* `<` Prepend the argument to the matching value.
|
76
|
+
* `+` Perform addition on the matching number and the argument (`N` only).
|
77
|
+
* `-` Subtract the argument from the matching number (`N` only).
|
78
|
+
|
79
|
+
## Examples
|
80
|
+
|
81
|
+
### Release Number Example
|
82
|
+
|
83
|
+
This example takes in GitHub branch names, decides if they're release branches, and if so, increments the version number.
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
f = Fop('release-{N}.{N+1}.{N=0}')
|
87
|
+
|
88
|
+
puts f.apply('release-5.99.1')
|
89
|
+
=> 'release-5.100.0'
|
90
|
+
|
91
|
+
puts f.apply('release-5')
|
92
|
+
=> nil
|
93
|
+
# doesn't match the pattern
|
94
|
+
```
|
51
95
|
|
52
|
-
|
96
|
+
### More Examples
|
53
97
|
|
54
98
|
```ruby
|
55
99
|
f = Fop('release-{N=5}.{N+1}.{N=0}')
|
data/bin/fop
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Used for local testing
|
4
|
+
# $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
5
|
+
|
6
|
+
require 'fop_lang'
|
7
|
+
require 'fop/cli'
|
8
|
+
|
9
|
+
opts = Fop::CLI.options!
|
10
|
+
|
11
|
+
if opts.version
|
12
|
+
puts Fop::VERSION
|
13
|
+
exit 0
|
14
|
+
end
|
15
|
+
|
16
|
+
src = opts.src.read.chomp
|
17
|
+
if src.empty?
|
18
|
+
$stderr.puts "No expression given"
|
19
|
+
exit 1
|
20
|
+
end
|
21
|
+
|
22
|
+
fop, errors = Fop.compile(src)
|
23
|
+
opts.src.close
|
24
|
+
NL = "\n".freeze
|
25
|
+
|
26
|
+
if errors
|
27
|
+
$stderr.puts src
|
28
|
+
$stderr.puts errors.join(NL)
|
29
|
+
exit 1
|
30
|
+
end
|
31
|
+
|
32
|
+
if opts.check
|
33
|
+
$stdout.puts "Syntax OK" unless opts.quiet
|
34
|
+
exit 0
|
35
|
+
end
|
36
|
+
|
37
|
+
while (line = gets) do
|
38
|
+
line.chomp!
|
39
|
+
if (res = fop.apply(line))
|
40
|
+
print(res << NL)
|
41
|
+
end
|
42
|
+
end
|
data/lib/fop/cli.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Fop
|
4
|
+
module CLI
|
5
|
+
Options = Struct.new(:src, :check, :quiet, :version)
|
6
|
+
|
7
|
+
def self.options!
|
8
|
+
options = Options.new
|
9
|
+
OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: fop [options] [ 'prog' | -f progfile ] [ file ... ]"
|
11
|
+
|
12
|
+
opts.on("-fFILE", "--file=FILE", "Read program from file instead of first argument") do |f|
|
13
|
+
options.src = File.open(f)
|
14
|
+
options.src.advise(:sequential)
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-c", "--check", "Perform a syntax check on the program and exit") do
|
18
|
+
options.check = true
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-q", "--quiet", "Only print errors and output") do
|
22
|
+
options.quiet = true
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("--version", "Print version and exit") do
|
26
|
+
options.version = true
|
27
|
+
end
|
28
|
+
end.parse!
|
29
|
+
|
30
|
+
options.src ||= StringIO.new(ARGV.shift || "")
|
31
|
+
options
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/fop/compiler.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
|
3
|
+
module Fop
|
4
|
+
module Compiler
|
5
|
+
def self.compile(src)
|
6
|
+
parser = Parser.new(src)
|
7
|
+
nodes, errors = parser.parse
|
8
|
+
|
9
|
+
instructions = nodes.map { |node|
|
10
|
+
case node
|
11
|
+
when Nodes::Text, Nodes::Regex
|
12
|
+
Instructions.regex_match(node.regex)
|
13
|
+
when Nodes::Expression
|
14
|
+
Instructions::ExpressionMatch.new(node)
|
15
|
+
else
|
16
|
+
raise "Unknown node type #{node}"
|
17
|
+
end
|
18
|
+
}
|
19
|
+
|
20
|
+
return nil, errors if errors.any?
|
21
|
+
return instructions, nil
|
22
|
+
end
|
23
|
+
|
24
|
+
module Instructions
|
25
|
+
BLANK = "".freeze
|
26
|
+
OPERATIONS = {
|
27
|
+
"=" => ->(_val, arg) { arg || BLANK },
|
28
|
+
"+" => ->(val, arg) { val.to_i + arg.to_i },
|
29
|
+
"-" => ->(val, arg) { val.to_i - arg.to_i },
|
30
|
+
">" => ->(val, arg) { val + arg },
|
31
|
+
"<" => ->(val, arg) { arg + val },
|
32
|
+
}
|
33
|
+
|
34
|
+
def self.regex_match(regex)
|
35
|
+
->(input) { input.slice! regex }
|
36
|
+
end
|
37
|
+
|
38
|
+
class ExpressionMatch
|
39
|
+
def initialize(node)
|
40
|
+
@regex = node.regex&.regex
|
41
|
+
@op = node.operator ? OPERATIONS.fetch(node.operator) : nil
|
42
|
+
@regex_match = node.regex_match
|
43
|
+
if node.arg&.any? { |a| a.is_a? Integer }
|
44
|
+
@arg, @arg_with_caps = nil, node.arg
|
45
|
+
else
|
46
|
+
@arg = node.arg&.join("")
|
47
|
+
@arg_with_caps = nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def call(input)
|
52
|
+
if (match = @regex.match(input))
|
53
|
+
val = match.to_s
|
54
|
+
blank = val == BLANK
|
55
|
+
input.sub!(val, BLANK) unless blank
|
56
|
+
found_val = @regex_match || !blank
|
57
|
+
arg = @arg_with_caps ? sub_caps(@arg_with_caps, match.captures) : @arg
|
58
|
+
@op && found_val ? @op.call(val, arg) : val
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def sub_caps(args, caps)
|
65
|
+
args.map { |a|
|
66
|
+
a.is_a?(Integer) ? caps[a].to_s : a
|
67
|
+
}.join("")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/fop/nodes.rb
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
module Fop
|
2
2
|
module Nodes
|
3
|
-
Text = Struct.new(:wildcard, :str) do
|
4
|
-
def consume!(input)
|
5
|
-
@regex ||= Regexp.new((wildcard ? ".*" : "^") + Regexp.escape(str))
|
6
|
-
input.slice!(@regex)
|
7
|
-
end
|
8
|
-
|
3
|
+
Text = Struct.new(:wildcard, :str, :regex) do
|
9
4
|
def to_s
|
10
5
|
w = wildcard ? "*" : nil
|
11
|
-
"
|
6
|
+
"[#{w}txt] #{str}"
|
12
7
|
end
|
13
8
|
end
|
14
9
|
|
15
|
-
|
16
|
-
def
|
17
|
-
|
18
|
-
|
19
|
-
expression && found_val ? expression.call(val) : val
|
20
|
-
end
|
10
|
+
Regex = Struct.new(:wildcard, :src, :regex) do
|
11
|
+
def to_s
|
12
|
+
w = wildcard ? "*" : nil
|
13
|
+
"[#{w}reg] #{src}"
|
21
14
|
end
|
15
|
+
end
|
22
16
|
|
17
|
+
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :arg) do
|
23
18
|
def to_s
|
24
19
|
w = wildcard ? "*" : nil
|
25
|
-
s = "#{w}#{match}"
|
26
|
-
|
20
|
+
s = "[#{w}exp] #{match}"
|
21
|
+
if operator
|
22
|
+
arg_str = arg
|
23
|
+
.map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
|
24
|
+
.join("")
|
25
|
+
s << " #{operator} #{arg_str}"
|
26
|
+
end
|
27
27
|
s
|
28
28
|
end
|
29
29
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -1,136 +1,162 @@
|
|
1
|
+
require_relative 'tokenizer'
|
1
2
|
require_relative 'nodes'
|
2
3
|
|
3
4
|
module Fop
|
4
|
-
|
5
|
-
|
5
|
+
class Parser
|
6
|
+
DIGIT = /^[0-9]$/
|
7
|
+
REGEX_START = "^".freeze
|
8
|
+
REGEX_LAZY_WILDCARD = ".*?".freeze
|
9
|
+
REGEX_MATCHES = {
|
10
|
+
"N" => "[0-9]+".freeze,
|
11
|
+
"W" => "\\w+".freeze,
|
12
|
+
"A" => "[a-zA-Z]+".freeze,
|
13
|
+
"*" => ".*".freeze,
|
14
|
+
}.freeze
|
15
|
+
OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
16
|
+
TR_REGEX = /.*/
|
6
17
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
OP_REPLACE = "=".freeze
|
13
|
-
OP_ADD = "+".freeze
|
14
|
-
OP_SUB = "-".freeze
|
15
|
-
OP_MUL = "*".freeze
|
16
|
-
OP_DIV = "/".freeze
|
18
|
+
Error = Struct.new(:type, :token, :message) do
|
19
|
+
def to_s
|
20
|
+
"#{type.to_s.capitalize} error: #{message} at column #{token.pos}"
|
21
|
+
end
|
22
|
+
end
|
17
23
|
|
18
|
-
|
19
|
-
nodes = []
|
20
|
-
curr_node = nil
|
24
|
+
attr_reader :errors
|
21
25
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
26
|
+
def initialize(src, debug: false)
|
27
|
+
@tokenizer = Tokenizer.new(src)
|
28
|
+
@errors = []
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse
|
32
|
+
nodes = []
|
33
|
+
wildcard = false
|
34
|
+
eof = false
|
35
|
+
# Top-level parsing. It will always be looking for a String, Regex, or Expression.
|
36
|
+
until eof
|
37
|
+
@tokenizer.reset_escapes!
|
38
|
+
t = @tokenizer.next
|
39
|
+
case t.type
|
40
|
+
when Tokens::WILDCARD
|
41
|
+
errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
|
42
|
+
wildcard = true
|
43
|
+
when Tokens::TEXT
|
44
|
+
reg = build_regex!(wildcard, t, Regexp.escape(t.val))
|
45
|
+
nodes << Nodes::Text.new(wildcard, t.val, reg)
|
46
|
+
wildcard = false
|
47
|
+
when Tokens::EXP_OPEN
|
48
|
+
nodes << parse_exp!(wildcard)
|
49
|
+
wildcard = false
|
50
|
+
when Tokens::REG_DELIM
|
51
|
+
nodes << parse_regex!(wildcard)
|
52
|
+
wildcard = false
|
53
|
+
when Tokens::EOF
|
54
|
+
eof = true
|
35
55
|
else
|
36
|
-
|
56
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}")
|
37
57
|
end
|
38
|
-
}
|
39
|
-
|
40
|
-
case curr_node
|
41
|
-
when nil
|
42
|
-
# noop
|
43
|
-
when :wildcard
|
44
|
-
nodes << Nodes::Text.new(true, "")
|
45
|
-
when Nodes::Text, Nodes::Op
|
46
|
-
nodes << curr_node
|
47
|
-
else
|
48
|
-
raise "Unexpected end node #{curr_node}"
|
49
58
|
end
|
50
|
-
|
51
|
-
nodes
|
59
|
+
nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
|
60
|
+
return nodes, @errors
|
52
61
|
end
|
53
62
|
|
54
|
-
|
63
|
+
def parse_exp!(wildcard = false)
|
64
|
+
exp = Nodes::Expression.new(wildcard)
|
65
|
+
parse_exp_match! exp
|
66
|
+
op_token = parse_exp_operator! exp
|
67
|
+
if exp.operator
|
68
|
+
parse_exp_arg! exp, op_token
|
69
|
+
end
|
70
|
+
return exp
|
71
|
+
end
|
55
72
|
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
when
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
73
|
+
def parse_exp_match!(exp)
|
74
|
+
@tokenizer.escape.operators = false
|
75
|
+
t = @tokenizer.next
|
76
|
+
case t.type
|
77
|
+
when Tokens::TEXT, Tokens::WILDCARD
|
78
|
+
exp.match = t.val
|
79
|
+
if (src = REGEX_MATCHES[exp.match])
|
80
|
+
reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
81
|
+
exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
|
82
|
+
else
|
83
|
+
errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
|
84
|
+
end
|
85
|
+
when Tokens::REG_DELIM
|
86
|
+
exp.regex = parse_regex!(exp.wildcard)
|
87
|
+
exp.match = exp.regex&.src
|
88
|
+
exp.regex_match = true
|
89
|
+
@tokenizer.reset_escapes!
|
66
90
|
else
|
67
|
-
|
91
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
|
68
92
|
end
|
69
93
|
end
|
70
94
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
case
|
75
|
-
when
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
op = new_node token
|
80
|
-
return op, node
|
81
|
-
when :wildcard
|
82
|
-
return :wildcard, node
|
95
|
+
def parse_exp_operator!(exp)
|
96
|
+
@tokenizer.escape.operators = false
|
97
|
+
t = @tokenizer.next
|
98
|
+
case t.type
|
99
|
+
when Tokens::EXP_CLOSE
|
100
|
+
# no op
|
101
|
+
when Tokens::OPERATOR
|
102
|
+
exp.operator = t.val
|
83
103
|
else
|
84
|
-
|
104
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
|
85
105
|
end
|
106
|
+
t
|
86
107
|
end
|
87
108
|
|
88
|
-
def
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
109
|
+
def parse_exp_arg!(exp, op_token)
|
110
|
+
@tokenizer.escape.operators = true
|
111
|
+
@tokenizer.escape.regex = true
|
112
|
+
@tokenizer.escape.regex_capture = false if exp.regex_match
|
113
|
+
|
114
|
+
exp.arg = []
|
115
|
+
found_close, eof = false, false
|
116
|
+
until found_close or eof
|
117
|
+
t = @tokenizer.next
|
118
|
+
case t.type
|
119
|
+
when Tokens::TEXT
|
120
|
+
exp.arg << t.val
|
121
|
+
when Tokens::REG_CAPTURE
|
122
|
+
exp.arg << t.val.to_i - 1
|
123
|
+
errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
|
124
|
+
errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
|
125
|
+
when Tokens::EXP_CLOSE
|
126
|
+
found_close = true
|
127
|
+
when Tokens::EOF
|
128
|
+
eof = true
|
129
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
107
130
|
else
|
108
|
-
|
131
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
109
132
|
end
|
133
|
+
end
|
110
134
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
arg = tokens[2..-1].reduce("") { |acc, t|
|
117
|
-
raise Error, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
|
118
|
-
acc + t.char
|
119
|
-
}
|
120
|
-
node.operator_arg = arg == BLANK ? nil : arg
|
135
|
+
if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
136
|
+
errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
|
137
|
+
end
|
138
|
+
end
|
121
139
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
else
|
131
|
-
raise(Error, "Unknown operator #{node.operator}")
|
132
|
-
end
|
140
|
+
def parse_regex!(wildcard)
|
141
|
+
@tokenizer.regex_mode!
|
142
|
+
t = @tokenizer.next
|
143
|
+
reg = Nodes::Regex.new(wildcard, t.val)
|
144
|
+
if t.type == Tokens::TEXT
|
145
|
+
reg.regex = build_regex!(wildcard, t)
|
146
|
+
else
|
147
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
|
133
148
|
end
|
149
|
+
|
150
|
+
t = @tokenizer.next
|
151
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
|
152
|
+
reg
|
153
|
+
end
|
154
|
+
|
155
|
+
def build_regex!(wildcard, token, src = token.val)
|
156
|
+
Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
157
|
+
rescue RegexpError => e
|
158
|
+
errors << Error.new(:regex, token, e.message)
|
159
|
+
nil
|
134
160
|
end
|
135
161
|
end
|
136
162
|
end
|
data/lib/fop/program.rb
CHANGED
@@ -1,22 +1,16 @@
|
|
1
|
-
require_relative 'tokenizer'
|
2
|
-
require_relative 'parser'
|
3
|
-
|
4
1
|
module Fop
|
5
2
|
class Program
|
6
|
-
|
7
|
-
|
8
|
-
def initialize(src)
|
9
|
-
tokens = Tokenizer.new(src).tokenize!
|
10
|
-
@nodes = Parser.parse! tokens
|
3
|
+
def initialize(instructions)
|
4
|
+
@instructions = instructions
|
11
5
|
end
|
12
6
|
|
13
7
|
def apply(input)
|
14
8
|
input = input.clone
|
15
9
|
output =
|
16
|
-
@
|
17
|
-
|
18
|
-
return nil if
|
19
|
-
acc +
|
10
|
+
@instructions.reduce("") { |acc, ins|
|
11
|
+
result = ins.call(input)
|
12
|
+
return nil if result.nil?
|
13
|
+
acc + result.to_s
|
20
14
|
}
|
21
15
|
input.empty? ? output : nil
|
22
16
|
end
|
data/lib/fop/tokenizer.rb
CHANGED
@@ -1,123 +1,175 @@
|
|
1
|
+
require_relative 'tokens'
|
2
|
+
|
1
3
|
module Fop
|
2
4
|
class Tokenizer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
Error = Class.new(StandardError)
|
5
|
+
Token = Struct.new(:pos, :type, :val)
|
6
|
+
Error = Struct.new(:pos, :message)
|
7
|
+
Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
7
8
|
|
8
|
-
|
9
|
-
|
9
|
+
EXP_OPEN = "{".freeze
|
10
|
+
EXP_CLOSE = "}".freeze
|
10
11
|
ESCAPE = "\\".freeze
|
11
12
|
WILDCARD = "*".freeze
|
12
|
-
|
13
|
+
REGEX_DELIM = "/".freeze
|
14
|
+
REGEX_CAPTURE = "$".freeze
|
15
|
+
OP_REPLACE = "=".freeze
|
16
|
+
OP_APPEND = ">".freeze
|
17
|
+
OP_PREPEND = "<".freeze
|
18
|
+
OP_ADD = "+".freeze
|
19
|
+
OP_SUB = "-".freeze
|
20
|
+
|
21
|
+
#
|
22
|
+
# Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
|
23
|
+
# explicit string delimiters. That *could* be worked around by requiring users to escape all reserved chars,
|
24
|
+
# but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on
|
25
|
+
# or off to auto-escape certain chars for the next token.
|
26
|
+
#
|
27
|
+
attr_reader :escape
|
13
28
|
|
14
29
|
def initialize(src)
|
15
30
|
@src = src
|
16
31
|
@end = src.size - 1
|
32
|
+
@start_i = 0
|
33
|
+
@i = 0
|
34
|
+
reset_escapes!
|
17
35
|
end
|
18
36
|
|
19
|
-
|
20
|
-
|
21
|
-
escape =
|
22
|
-
i = 0
|
23
|
-
until i > @end do
|
24
|
-
char = @src[i]
|
25
|
-
if escape
|
26
|
-
tokens << Char.new(char)
|
27
|
-
escape = false
|
28
|
-
i += 1
|
29
|
-
next
|
30
|
-
end
|
31
|
-
|
32
|
-
case char
|
33
|
-
when ESCAPE
|
34
|
-
escape = true
|
35
|
-
i += 1
|
36
|
-
when OP_OPEN
|
37
|
-
i, op = operation! i + 1
|
38
|
-
tokens << op
|
39
|
-
when OP_CLOSE
|
40
|
-
raise "Unexpected #{OP_CLOSE}"
|
41
|
-
when WILDCARD
|
42
|
-
tokens << :wildcard
|
43
|
-
i += 1
|
44
|
-
else
|
45
|
-
tokens << Char.new(char)
|
46
|
-
i += 1
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
raise Error, "Trailing escape" if escape
|
51
|
-
tokens
|
37
|
+
# Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
|
38
|
+
def reset_escapes!
|
39
|
+
@escape = Escapes.new(true, true)
|
52
40
|
end
|
53
41
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
escape =
|
58
|
-
|
59
|
-
|
42
|
+
# Auto-escape anything you'd find in a regular expression
|
43
|
+
def regex_mode!
|
44
|
+
@escape.regex = false # look for the final /
|
45
|
+
@escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
|
46
|
+
@escape.wildcards = true
|
47
|
+
@escape.operators = true
|
48
|
+
@escape.regex_capture = true
|
49
|
+
@escape.exp = true
|
50
|
+
end
|
60
51
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
52
|
+
def next
|
53
|
+
return Token.new(@i, Tokens::EOF) if @i > @end
|
54
|
+
char = @src[@i]
|
55
|
+
case char
|
56
|
+
when EXP_OPEN
|
57
|
+
@i += 1
|
58
|
+
token! Tokens::EXP_OPEN
|
59
|
+
when EXP_CLOSE
|
60
|
+
@i += 1
|
61
|
+
token! Tokens::EXP_CLOSE
|
62
|
+
when WILDCARD
|
63
|
+
@i += 1
|
64
|
+
token! Tokens::WILDCARD, WILDCARD
|
65
|
+
when REGEX_DELIM
|
66
|
+
if @escape.regex
|
67
|
+
get_str!
|
68
|
+
else
|
69
|
+
@i += 1
|
70
|
+
token! Tokens::REG_DELIM
|
68
71
|
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
escape = true
|
73
|
-
i += 1
|
74
|
-
when OP_OPEN
|
75
|
-
raise "Unexpected #{OP_OPEN}"
|
76
|
-
when OP_CLOSE
|
77
|
-
found_close = true
|
78
|
-
i += 1
|
79
|
-
when REGEX_MARKER
|
80
|
-
i, reg = regex! i + 1
|
81
|
-
tokens << reg
|
72
|
+
when REGEX_CAPTURE
|
73
|
+
if @escape.regex_capture
|
74
|
+
get_str!
|
82
75
|
else
|
83
|
-
|
84
|
-
|
76
|
+
@i += 1
|
77
|
+
t = token! Tokens::REG_CAPTURE, @src[@i]
|
78
|
+
@i += 1
|
79
|
+
@start_i = @i
|
80
|
+
t
|
85
81
|
end
|
82
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
83
|
+
if @escape.operators
|
84
|
+
get_str!
|
85
|
+
else
|
86
|
+
@i += 1
|
87
|
+
token! Tokens::OPERATOR, char
|
88
|
+
end
|
89
|
+
else
|
90
|
+
get_str!
|
86
91
|
end
|
87
|
-
|
88
|
-
raise Error, "Unclosed operation" if !found_close
|
89
|
-
raise Error, "Trailing escape" if escape
|
90
|
-
return i, Op.new(tokens)
|
91
92
|
end
|
92
93
|
|
93
|
-
|
94
|
-
escape = false
|
95
|
-
found_close = false
|
96
|
-
src = ""
|
94
|
+
private
|
97
95
|
|
98
|
-
|
99
|
-
|
100
|
-
|
96
|
+
def token!(type, val = nil)
|
97
|
+
t = Token.new(@start_i, type, val)
|
98
|
+
@start_i = @i
|
99
|
+
t
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_str!
|
103
|
+
str = ""
|
104
|
+
escape, found_end = false, false
|
105
|
+
until found_end or @i > @end
|
106
|
+
char = @src[@i]
|
101
107
|
|
102
108
|
if escape
|
103
|
-
|
109
|
+
@i += 1
|
110
|
+
str << char
|
104
111
|
escape = false
|
105
112
|
next
|
106
113
|
end
|
107
114
|
|
108
115
|
case char
|
109
116
|
when ESCAPE
|
110
|
-
|
111
|
-
|
112
|
-
|
117
|
+
@i += 1
|
118
|
+
if @escape.regex_escape and @src[@i] != REGEX_DELIM
|
119
|
+
str << char
|
120
|
+
else
|
121
|
+
escape = true
|
122
|
+
end
|
123
|
+
when EXP_OPEN
|
124
|
+
if @escape.exp
|
125
|
+
@i += 1
|
126
|
+
str << char
|
127
|
+
else
|
128
|
+
found_end = true
|
129
|
+
end
|
130
|
+
when EXP_CLOSE
|
131
|
+
if @escape.exp
|
132
|
+
@i += 1
|
133
|
+
str << char
|
134
|
+
else
|
135
|
+
found_end = true
|
136
|
+
end
|
137
|
+
when WILDCARD
|
138
|
+
if @escape.wildcards
|
139
|
+
@i += 1
|
140
|
+
str << char
|
141
|
+
else
|
142
|
+
found_end = true
|
143
|
+
end
|
144
|
+
when REGEX_DELIM
|
145
|
+
if @escape.regex
|
146
|
+
@i += 1
|
147
|
+
str << char
|
148
|
+
else
|
149
|
+
found_end = true
|
150
|
+
end
|
151
|
+
when REGEX_CAPTURE
|
152
|
+
if @escape.regex_capture
|
153
|
+
@i += 1
|
154
|
+
str << char
|
155
|
+
else
|
156
|
+
found_end = true
|
157
|
+
end
|
158
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
159
|
+
if @escape.operators
|
160
|
+
@i += 1
|
161
|
+
str << char
|
162
|
+
else
|
163
|
+
found_end = true
|
164
|
+
end
|
113
165
|
else
|
114
|
-
|
166
|
+
@i += 1
|
167
|
+
str << char
|
115
168
|
end
|
116
169
|
end
|
117
170
|
|
118
|
-
|
119
|
-
|
120
|
-
return i, Regex.new(src)
|
171
|
+
return Token.new(@i - 1, Tokens::TR_ESC) if escape
|
172
|
+
token! Tokens::TEXT, str
|
121
173
|
end
|
122
174
|
end
|
123
175
|
end
|
data/lib/fop/tokens.rb
ADDED
data/lib/fop/version.rb
CHANGED
data/lib/fop_lang.rb
CHANGED
@@ -1,12 +1,22 @@
|
|
1
1
|
require_relative 'fop/version'
|
2
|
+
require_relative 'fop/compiler'
|
2
3
|
require_relative 'fop/program'
|
3
4
|
|
4
5
|
def Fop(src)
|
5
|
-
::Fop
|
6
|
+
::Fop.compile!(src)
|
6
7
|
end
|
7
8
|
|
8
9
|
module Fop
|
10
|
+
def self.compile!(src)
|
11
|
+
prog, errors = compile(src)
|
12
|
+
# TODO better exception
|
13
|
+
raise "Fop errors: " + errors.map(&:message).join(",") if errors
|
14
|
+
prog
|
15
|
+
end
|
16
|
+
|
9
17
|
def self.compile(src)
|
10
|
-
|
18
|
+
instructions, errors = ::Fop::Compiler.compile(src)
|
19
|
+
return nil, errors if errors
|
20
|
+
return Program.new(instructions), nil
|
11
21
|
end
|
12
22
|
end
|
metadata
CHANGED
@@ -1,26 +1,31 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|
15
|
-
executables:
|
15
|
+
executables:
|
16
|
+
- fop
|
16
17
|
extensions: []
|
17
18
|
extra_rdoc_files: []
|
18
19
|
files:
|
19
20
|
- README.md
|
21
|
+
- bin/fop
|
22
|
+
- lib/fop/cli.rb
|
23
|
+
- lib/fop/compiler.rb
|
20
24
|
- lib/fop/nodes.rb
|
21
25
|
- lib/fop/parser.rb
|
22
26
|
- lib/fop/program.rb
|
23
27
|
- lib/fop/tokenizer.rb
|
28
|
+
- lib/fop/tokens.rb
|
24
29
|
- lib/fop/version.rb
|
25
30
|
- lib/fop_lang.rb
|
26
31
|
homepage: https://jhollinger.github.io/fop-lang-rb/
|