fop_lang 0.3.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +76 -32
- data/bin/fop +42 -0
- data/lib/fop/cli.rb +34 -0
- data/lib/fop/compiler.rb +72 -0
- data/lib/fop/nodes.rb +15 -15
- data/lib/fop/parser.rb +134 -108
- data/lib/fop/program.rb +6 -12
- data/lib/fop/tokenizer.rb +139 -87
- data/lib/fop/tokens.rb +13 -0
- data/lib/fop/version.rb +1 -1
- data/lib/fop_lang.rb +12 -2
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 798fd7c335f394e878fba2f70a9f60372ea356c79f2dc63392398920d0ffce38
|
4
|
+
data.tar.gz: 654786ff77823e8d8dd9a348f958828346e3755e43a04a0f38e711a6c5571ea9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6761f3d7dd602d1c93a2387fc73ea14c11484e88d0d319bbf87df98925977aa15de59a63f23aafffafa384ce3b9def9f81edabae669aabc2012b00d3131e46f4
|
7
|
+
data.tar.gz: 7f5187cd510d691dda996284d5a400804b7573f67506701e39a6d2909c8a4026b58655f6b2800708e911377ccce790885a2238eed7a75d4873e4b599d23e67df
|
data/README.md
CHANGED
@@ -1,55 +1,99 @@
|
|
1
1
|
# fop_lang
|
2
2
|
|
3
|
-
Fop (Filter and OPerations language) is
|
3
|
+
Fop (Filter and OPerations language) is a tiny, experimental language for filtering and transforming text. Think of it like awk but with the condition and action segments combined.
|
4
4
|
|
5
|
-
|
6
|
-
gem 'fop_lang'
|
7
|
-
```
|
5
|
+
This is a Ruby implementation with both a library interface and a bin command.
|
8
6
|
|
9
|
-
##
|
7
|
+
## Installation
|
10
8
|
|
11
|
-
|
9
|
+
```bash
|
10
|
+
$ gem install fop_lang
|
11
|
+
```
|
12
|
+
|
13
|
+
You may use fop in a Ruby script:
|
12
14
|
|
13
15
|
```ruby
|
14
|
-
|
16
|
+
require 'fop_lang'
|
15
17
|
|
16
|
-
|
17
|
-
=> 'release-5.100.0'
|
18
|
+
f = Fop('foo {N+1}')
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
f.apply('foo 1')
|
21
|
+
=> "foo 2"
|
22
|
+
|
23
|
+
f.apply('bar 1')
|
24
|
+
=> nil
|
25
|
+
```
|
26
|
+
|
27
|
+
or run `fop` from the command line:
|
28
|
+
|
29
|
+
```bash
|
30
|
+
$ echo 'foo 1' | fop 'foo {N+1}'
|
31
|
+
foo 2
|
32
|
+
$ echo 'bar 1' | fop 'foo {N+1}'
|
22
33
|
```
|
23
34
|
|
24
|
-
##
|
35
|
+
## Syntax
|
36
|
+
|
37
|
+
`Text /(R|r)egex/ {N+1}`
|
38
|
+
|
39
|
+
The above program demonstrates a text match, a regex match, and a match expression. If the input matches all three segments, output is given. If the input was `Text regex 5`, the output would be `Text regex 6`.
|
40
|
+
|
41
|
+
### Text match
|
42
|
+
|
43
|
+
The input must match this text exactly. Whitespace is part of the match. Wildcards (`*`) are allowed. Special characters (`*/{}\`) may be escaped with `\`.
|
25
44
|
|
26
|
-
|
45
|
+
The output of a text match will be the matching input.
|
27
46
|
|
28
|
-
|
47
|
+
### Regex match
|
29
48
|
|
30
|
-
|
49
|
+
Regular expressions may be placed between `/`s. If the regular expression contains a `/`, you may escape it with `\`. Special regex characters like `[]()+.*` may also be escaped with `\`.
|
31
50
|
|
32
|
-
|
51
|
+
The output of a regex match will be the matching input.
|
33
52
|
|
34
|
-
|
53
|
+
### Match expression
|
35
54
|
|
36
|
-
|
55
|
+
A match expression both matches on input and modifies that input. An expression is made up of 1 - 3 parts:
|
37
56
|
|
38
|
-
|
57
|
+
1. The match, e.g. `N` for numeric.
|
58
|
+
2. The operator, e.g. `+` for addition (optional).
|
59
|
+
3. The argument, e.g `1` for "add one" (required for most operators).
|
39
60
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
61
|
+
The output of a match expression will be the _modified_ matching input. If no operator is given, the output will be the matching input.
|
62
|
+
|
63
|
+
**Matches**
|
64
|
+
|
65
|
+
* `N` matches one or more consecutive digits.
|
66
|
+
* `A` matches one or more letters (lower or upper case).
|
67
|
+
* `W` matches alphanumeric chars and underscores.
|
68
|
+
* `*` greedily matches everything after it.
|
69
|
+
* `/regex/` matches on the supplied regex. Capture groups may be referenced in the argument as `$1`, `$2`, etc.
|
70
|
+
|
71
|
+
**Operators**
|
72
|
+
|
73
|
+
* `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
|
74
|
+
* `>` Append the argument to the matching value.
|
75
|
+
* `<` Prepend the argument to the matching value.
|
76
|
+
* `+` Perform addition on the matching number and the argument (`N` only).
|
77
|
+
* `-` Subtract the argument from the matching number (`N` only).
|
78
|
+
|
79
|
+
## Examples
|
80
|
+
|
81
|
+
### Release Number Example
|
82
|
+
|
83
|
+
This example takes in GitHub branch names, decides if they're release branches, and if so, increments the version number.
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
f = Fop('release-{N}.{N+1}.{N=0}')
|
87
|
+
|
88
|
+
puts f.apply('release-5.99.1')
|
89
|
+
=> 'release-5.100.0'
|
90
|
+
|
91
|
+
puts f.apply('release-5')
|
92
|
+
=> nil
|
93
|
+
# doesn't match the pattern
|
94
|
+
```
|
51
95
|
|
52
|
-
|
96
|
+
### More Examples
|
53
97
|
|
54
98
|
```ruby
|
55
99
|
f = Fop('release-{N=5}.{N+1}.{N=0}')
|
data/bin/fop
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Used for local testing
|
4
|
+
# $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
5
|
+
|
6
|
+
require 'fop_lang'
|
7
|
+
require 'fop/cli'
|
8
|
+
|
9
|
+
opts = Fop::CLI.options!
|
10
|
+
|
11
|
+
if opts.version
|
12
|
+
puts Fop::VERSION
|
13
|
+
exit 0
|
14
|
+
end
|
15
|
+
|
16
|
+
src = opts.src.read.chomp
|
17
|
+
if src.empty?
|
18
|
+
$stderr.puts "No expression given"
|
19
|
+
exit 1
|
20
|
+
end
|
21
|
+
|
22
|
+
fop, errors = Fop.compile(src)
|
23
|
+
opts.src.close
|
24
|
+
NL = "\n".freeze
|
25
|
+
|
26
|
+
if errors
|
27
|
+
$stderr.puts src
|
28
|
+
$stderr.puts errors.join(NL)
|
29
|
+
exit 1
|
30
|
+
end
|
31
|
+
|
32
|
+
if opts.check
|
33
|
+
$stdout.puts "Syntax OK" unless opts.quiet
|
34
|
+
exit 0
|
35
|
+
end
|
36
|
+
|
37
|
+
while (line = gets) do
|
38
|
+
line.chomp!
|
39
|
+
if (res = fop.apply(line))
|
40
|
+
print(res << NL)
|
41
|
+
end
|
42
|
+
end
|
data/lib/fop/cli.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Fop
|
4
|
+
module CLI
|
5
|
+
Options = Struct.new(:src, :check, :quiet, :version)
|
6
|
+
|
7
|
+
def self.options!
|
8
|
+
options = Options.new
|
9
|
+
OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: fop [options] [ 'prog' | -f progfile ] [ file ... ]"
|
11
|
+
|
12
|
+
opts.on("-fFILE", "--file=FILE", "Read program from file instead of first argument") do |f|
|
13
|
+
options.src = File.open(f)
|
14
|
+
options.src.advise(:sequential)
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-c", "--check", "Perform a syntax check on the program and exit") do
|
18
|
+
options.check = true
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-q", "--quiet", "Only print errors and output") do
|
22
|
+
options.quiet = true
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("--version", "Print version and exit") do
|
26
|
+
options.version = true
|
27
|
+
end
|
28
|
+
end.parse!
|
29
|
+
|
30
|
+
options.src ||= StringIO.new(ARGV.shift || "")
|
31
|
+
options
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/fop/compiler.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
|
3
|
+
module Fop
|
4
|
+
module Compiler
|
5
|
+
def self.compile(src)
|
6
|
+
parser = Parser.new(src)
|
7
|
+
nodes, errors = parser.parse
|
8
|
+
|
9
|
+
instructions = nodes.map { |node|
|
10
|
+
case node
|
11
|
+
when Nodes::Text, Nodes::Regex
|
12
|
+
Instructions.regex_match(node.regex)
|
13
|
+
when Nodes::Expression
|
14
|
+
Instructions::ExpressionMatch.new(node)
|
15
|
+
else
|
16
|
+
raise "Unknown node type #{node}"
|
17
|
+
end
|
18
|
+
}
|
19
|
+
|
20
|
+
return nil, errors if errors.any?
|
21
|
+
return instructions, nil
|
22
|
+
end
|
23
|
+
|
24
|
+
module Instructions
|
25
|
+
BLANK = "".freeze
|
26
|
+
OPERATIONS = {
|
27
|
+
"=" => ->(_val, arg) { arg || BLANK },
|
28
|
+
"+" => ->(val, arg) { val.to_i + arg.to_i },
|
29
|
+
"-" => ->(val, arg) { val.to_i - arg.to_i },
|
30
|
+
">" => ->(val, arg) { val + arg },
|
31
|
+
"<" => ->(val, arg) { arg + val },
|
32
|
+
}
|
33
|
+
|
34
|
+
def self.regex_match(regex)
|
35
|
+
->(input) { input.slice! regex }
|
36
|
+
end
|
37
|
+
|
38
|
+
class ExpressionMatch
|
39
|
+
def initialize(node)
|
40
|
+
@regex = node.regex&.regex
|
41
|
+
@op = node.operator ? OPERATIONS.fetch(node.operator) : nil
|
42
|
+
@regex_match = node.regex_match
|
43
|
+
if node.arg&.any? { |a| a.is_a? Integer }
|
44
|
+
@arg, @arg_with_caps = nil, node.arg
|
45
|
+
else
|
46
|
+
@arg = node.arg&.join("")
|
47
|
+
@arg_with_caps = nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def call(input)
|
52
|
+
if (match = @regex.match(input))
|
53
|
+
val = match.to_s
|
54
|
+
blank = val == BLANK
|
55
|
+
input.sub!(val, BLANK) unless blank
|
56
|
+
found_val = @regex_match || !blank
|
57
|
+
arg = @arg_with_caps ? sub_caps(@arg_with_caps, match.captures) : @arg
|
58
|
+
@op && found_val ? @op.call(val, arg) : val
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def sub_caps(args, caps)
|
65
|
+
args.map { |a|
|
66
|
+
a.is_a?(Integer) ? caps[a].to_s : a
|
67
|
+
}.join("")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/fop/nodes.rb
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
module Fop
|
2
2
|
module Nodes
|
3
|
-
Text = Struct.new(:wildcard, :str) do
|
4
|
-
def consume!(input)
|
5
|
-
@regex ||= Regexp.new((wildcard ? ".*" : "^") + Regexp.escape(str))
|
6
|
-
input.slice!(@regex)
|
7
|
-
end
|
8
|
-
|
3
|
+
Text = Struct.new(:wildcard, :str, :regex) do
|
9
4
|
def to_s
|
10
5
|
w = wildcard ? "*" : nil
|
11
|
-
"
|
6
|
+
"[#{w}txt] #{str}"
|
12
7
|
end
|
13
8
|
end
|
14
9
|
|
15
|
-
|
16
|
-
def
|
17
|
-
|
18
|
-
|
19
|
-
expression && found_val ? expression.call(val) : val
|
20
|
-
end
|
10
|
+
Regex = Struct.new(:wildcard, :src, :regex) do
|
11
|
+
def to_s
|
12
|
+
w = wildcard ? "*" : nil
|
13
|
+
"[#{w}reg] #{src}"
|
21
14
|
end
|
15
|
+
end
|
22
16
|
|
17
|
+
Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :arg) do
|
23
18
|
def to_s
|
24
19
|
w = wildcard ? "*" : nil
|
25
|
-
s = "#{w}#{match}"
|
26
|
-
|
20
|
+
s = "[#{w}exp] #{match}"
|
21
|
+
if operator
|
22
|
+
arg_str = arg
|
23
|
+
.map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
|
24
|
+
.join("")
|
25
|
+
s << " #{operator} #{arg_str}"
|
26
|
+
end
|
27
27
|
s
|
28
28
|
end
|
29
29
|
end
|
data/lib/fop/parser.rb
CHANGED
@@ -1,136 +1,162 @@
|
|
1
|
+
require_relative 'tokenizer'
|
1
2
|
require_relative 'nodes'
|
2
3
|
|
3
4
|
module Fop
|
4
|
-
|
5
|
-
|
5
|
+
class Parser
|
6
|
+
DIGIT = /^[0-9]$/
|
7
|
+
REGEX_START = "^".freeze
|
8
|
+
REGEX_LAZY_WILDCARD = ".*?".freeze
|
9
|
+
REGEX_MATCHES = {
|
10
|
+
"N" => "[0-9]+".freeze,
|
11
|
+
"W" => "\\w+".freeze,
|
12
|
+
"A" => "[a-zA-Z]+".freeze,
|
13
|
+
"*" => ".*".freeze,
|
14
|
+
}.freeze
|
15
|
+
OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
|
16
|
+
TR_REGEX = /.*/
|
6
17
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
OP_REPLACE = "=".freeze
|
13
|
-
OP_ADD = "+".freeze
|
14
|
-
OP_SUB = "-".freeze
|
15
|
-
OP_MUL = "*".freeze
|
16
|
-
OP_DIV = "/".freeze
|
18
|
+
Error = Struct.new(:type, :token, :message) do
|
19
|
+
def to_s
|
20
|
+
"#{type.to_s.capitalize} error: #{message} at column #{token.pos}"
|
21
|
+
end
|
22
|
+
end
|
17
23
|
|
18
|
-
|
19
|
-
nodes = []
|
20
|
-
curr_node = nil
|
24
|
+
attr_reader :errors
|
21
25
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
26
|
+
def initialize(src, debug: false)
|
27
|
+
@tokenizer = Tokenizer.new(src)
|
28
|
+
@errors = []
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse
|
32
|
+
nodes = []
|
33
|
+
wildcard = false
|
34
|
+
eof = false
|
35
|
+
# Top-level parsing. It will always be looking for a String, Regex, or Expression.
|
36
|
+
until eof
|
37
|
+
@tokenizer.reset_escapes!
|
38
|
+
t = @tokenizer.next
|
39
|
+
case t.type
|
40
|
+
when Tokens::WILDCARD
|
41
|
+
errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
|
42
|
+
wildcard = true
|
43
|
+
when Tokens::TEXT
|
44
|
+
reg = build_regex!(wildcard, t, Regexp.escape(t.val))
|
45
|
+
nodes << Nodes::Text.new(wildcard, t.val, reg)
|
46
|
+
wildcard = false
|
47
|
+
when Tokens::EXP_OPEN
|
48
|
+
nodes << parse_exp!(wildcard)
|
49
|
+
wildcard = false
|
50
|
+
when Tokens::REG_DELIM
|
51
|
+
nodes << parse_regex!(wildcard)
|
52
|
+
wildcard = false
|
53
|
+
when Tokens::EOF
|
54
|
+
eof = true
|
35
55
|
else
|
36
|
-
|
56
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}")
|
37
57
|
end
|
38
|
-
}
|
39
|
-
|
40
|
-
case curr_node
|
41
|
-
when nil
|
42
|
-
# noop
|
43
|
-
when :wildcard
|
44
|
-
nodes << Nodes::Text.new(true, "")
|
45
|
-
when Nodes::Text, Nodes::Op
|
46
|
-
nodes << curr_node
|
47
|
-
else
|
48
|
-
raise "Unexpected end node #{curr_node}"
|
49
58
|
end
|
50
|
-
|
51
|
-
nodes
|
59
|
+
nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
|
60
|
+
return nodes, @errors
|
52
61
|
end
|
53
62
|
|
54
|
-
|
63
|
+
def parse_exp!(wildcard = false)
|
64
|
+
exp = Nodes::Expression.new(wildcard)
|
65
|
+
parse_exp_match! exp
|
66
|
+
op_token = parse_exp_operator! exp
|
67
|
+
if exp.operator
|
68
|
+
parse_exp_arg! exp, op_token
|
69
|
+
end
|
70
|
+
return exp
|
71
|
+
end
|
55
72
|
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
when
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
73
|
+
def parse_exp_match!(exp)
|
74
|
+
@tokenizer.escape.operators = false
|
75
|
+
t = @tokenizer.next
|
76
|
+
case t.type
|
77
|
+
when Tokens::TEXT, Tokens::WILDCARD
|
78
|
+
exp.match = t.val
|
79
|
+
if (src = REGEX_MATCHES[exp.match])
|
80
|
+
reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
81
|
+
exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
|
82
|
+
else
|
83
|
+
errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
|
84
|
+
end
|
85
|
+
when Tokens::REG_DELIM
|
86
|
+
exp.regex = parse_regex!(exp.wildcard)
|
87
|
+
exp.match = exp.regex&.src
|
88
|
+
exp.regex_match = true
|
89
|
+
@tokenizer.reset_escapes!
|
66
90
|
else
|
67
|
-
|
91
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
|
68
92
|
end
|
69
93
|
end
|
70
94
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
case
|
75
|
-
when
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
op = new_node token
|
80
|
-
return op, node
|
81
|
-
when :wildcard
|
82
|
-
return :wildcard, node
|
95
|
+
def parse_exp_operator!(exp)
|
96
|
+
@tokenizer.escape.operators = false
|
97
|
+
t = @tokenizer.next
|
98
|
+
case t.type
|
99
|
+
when Tokens::EXP_CLOSE
|
100
|
+
# no op
|
101
|
+
when Tokens::OPERATOR
|
102
|
+
exp.operator = t.val
|
83
103
|
else
|
84
|
-
|
104
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
|
85
105
|
end
|
106
|
+
t
|
86
107
|
end
|
87
108
|
|
88
|
-
def
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
109
|
+
def parse_exp_arg!(exp, op_token)
|
110
|
+
@tokenizer.escape.operators = true
|
111
|
+
@tokenizer.escape.regex = true
|
112
|
+
@tokenizer.escape.regex_capture = false if exp.regex_match
|
113
|
+
|
114
|
+
exp.arg = []
|
115
|
+
found_close, eof = false, false
|
116
|
+
until found_close or eof
|
117
|
+
t = @tokenizer.next
|
118
|
+
case t.type
|
119
|
+
when Tokens::TEXT
|
120
|
+
exp.arg << t.val
|
121
|
+
when Tokens::REG_CAPTURE
|
122
|
+
exp.arg << t.val.to_i - 1
|
123
|
+
errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
|
124
|
+
errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
|
125
|
+
when Tokens::EXP_CLOSE
|
126
|
+
found_close = true
|
127
|
+
when Tokens::EOF
|
128
|
+
eof = true
|
129
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
107
130
|
else
|
108
|
-
|
131
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
|
109
132
|
end
|
133
|
+
end
|
110
134
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
arg = tokens[2..-1].reduce("") { |acc, t|
|
117
|
-
raise Error, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
|
118
|
-
acc + t.char
|
119
|
-
}
|
120
|
-
node.operator_arg = arg == BLANK ? nil : arg
|
135
|
+
if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
|
136
|
+
errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
|
137
|
+
end
|
138
|
+
end
|
121
139
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
else
|
131
|
-
raise(Error, "Unknown operator #{node.operator}")
|
132
|
-
end
|
140
|
+
def parse_regex!(wildcard)
|
141
|
+
@tokenizer.regex_mode!
|
142
|
+
t = @tokenizer.next
|
143
|
+
reg = Nodes::Regex.new(wildcard, t.val)
|
144
|
+
if t.type == Tokens::TEXT
|
145
|
+
reg.regex = build_regex!(wildcard, t)
|
146
|
+
else
|
147
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
|
133
148
|
end
|
149
|
+
|
150
|
+
t = @tokenizer.next
|
151
|
+
errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
|
152
|
+
reg
|
153
|
+
end
|
154
|
+
|
155
|
+
def build_regex!(wildcard, token, src = token.val)
|
156
|
+
Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
|
157
|
+
rescue RegexpError => e
|
158
|
+
errors << Error.new(:regex, token, e.message)
|
159
|
+
nil
|
134
160
|
end
|
135
161
|
end
|
136
162
|
end
|
data/lib/fop/program.rb
CHANGED
@@ -1,22 +1,16 @@
|
|
1
|
-
require_relative 'tokenizer'
|
2
|
-
require_relative 'parser'
|
3
|
-
|
4
1
|
module Fop
|
5
2
|
class Program
|
6
|
-
|
7
|
-
|
8
|
-
def initialize(src)
|
9
|
-
tokens = Tokenizer.new(src).tokenize!
|
10
|
-
@nodes = Parser.parse! tokens
|
3
|
+
def initialize(instructions)
|
4
|
+
@instructions = instructions
|
11
5
|
end
|
12
6
|
|
13
7
|
def apply(input)
|
14
8
|
input = input.clone
|
15
9
|
output =
|
16
|
-
@
|
17
|
-
|
18
|
-
return nil if
|
19
|
-
acc +
|
10
|
+
@instructions.reduce("") { |acc, ins|
|
11
|
+
result = ins.call(input)
|
12
|
+
return nil if result.nil?
|
13
|
+
acc + result.to_s
|
20
14
|
}
|
21
15
|
input.empty? ? output : nil
|
22
16
|
end
|
data/lib/fop/tokenizer.rb
CHANGED
@@ -1,123 +1,175 @@
|
|
1
|
+
require_relative 'tokens'
|
2
|
+
|
1
3
|
module Fop
|
2
4
|
class Tokenizer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
Error = Class.new(StandardError)
|
5
|
+
Token = Struct.new(:pos, :type, :val)
|
6
|
+
Error = Struct.new(:pos, :message)
|
7
|
+
Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
|
7
8
|
|
8
|
-
|
9
|
-
|
9
|
+
EXP_OPEN = "{".freeze
|
10
|
+
EXP_CLOSE = "}".freeze
|
10
11
|
ESCAPE = "\\".freeze
|
11
12
|
WILDCARD = "*".freeze
|
12
|
-
|
13
|
+
REGEX_DELIM = "/".freeze
|
14
|
+
REGEX_CAPTURE = "$".freeze
|
15
|
+
OP_REPLACE = "=".freeze
|
16
|
+
OP_APPEND = ">".freeze
|
17
|
+
OP_PREPEND = "<".freeze
|
18
|
+
OP_ADD = "+".freeze
|
19
|
+
OP_SUB = "-".freeze
|
20
|
+
|
21
|
+
#
|
22
|
+
# Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
|
23
|
+
# explicit string delimiters. That *could* be worked around by requiring users to escape all reserved chars,
|
24
|
+
# but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on
|
25
|
+
# or off to auto-escape certain chars for the next token.
|
26
|
+
#
|
27
|
+
attr_reader :escape
|
13
28
|
|
14
29
|
def initialize(src)
|
15
30
|
@src = src
|
16
31
|
@end = src.size - 1
|
32
|
+
@start_i = 0
|
33
|
+
@i = 0
|
34
|
+
reset_escapes!
|
17
35
|
end
|
18
36
|
|
19
|
-
|
20
|
-
|
21
|
-
escape =
|
22
|
-
i = 0
|
23
|
-
until i > @end do
|
24
|
-
char = @src[i]
|
25
|
-
if escape
|
26
|
-
tokens << Char.new(char)
|
27
|
-
escape = false
|
28
|
-
i += 1
|
29
|
-
next
|
30
|
-
end
|
31
|
-
|
32
|
-
case char
|
33
|
-
when ESCAPE
|
34
|
-
escape = true
|
35
|
-
i += 1
|
36
|
-
when OP_OPEN
|
37
|
-
i, op = operation! i + 1
|
38
|
-
tokens << op
|
39
|
-
when OP_CLOSE
|
40
|
-
raise "Unexpected #{OP_CLOSE}"
|
41
|
-
when WILDCARD
|
42
|
-
tokens << :wildcard
|
43
|
-
i += 1
|
44
|
-
else
|
45
|
-
tokens << Char.new(char)
|
46
|
-
i += 1
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
raise Error, "Trailing escape" if escape
|
51
|
-
tokens
|
37
|
+
# Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
|
38
|
+
def reset_escapes!
|
39
|
+
@escape = Escapes.new(true, true)
|
52
40
|
end
|
53
41
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
escape =
|
58
|
-
|
59
|
-
|
42
|
+
# Auto-escape anything you'd find in a regular expression
|
43
|
+
def regex_mode!
|
44
|
+
@escape.regex = false # look for the final /
|
45
|
+
@escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
|
46
|
+
@escape.wildcards = true
|
47
|
+
@escape.operators = true
|
48
|
+
@escape.regex_capture = true
|
49
|
+
@escape.exp = true
|
50
|
+
end
|
60
51
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
52
|
+
def next
|
53
|
+
return Token.new(@i, Tokens::EOF) if @i > @end
|
54
|
+
char = @src[@i]
|
55
|
+
case char
|
56
|
+
when EXP_OPEN
|
57
|
+
@i += 1
|
58
|
+
token! Tokens::EXP_OPEN
|
59
|
+
when EXP_CLOSE
|
60
|
+
@i += 1
|
61
|
+
token! Tokens::EXP_CLOSE
|
62
|
+
when WILDCARD
|
63
|
+
@i += 1
|
64
|
+
token! Tokens::WILDCARD, WILDCARD
|
65
|
+
when REGEX_DELIM
|
66
|
+
if @escape.regex
|
67
|
+
get_str!
|
68
|
+
else
|
69
|
+
@i += 1
|
70
|
+
token! Tokens::REG_DELIM
|
68
71
|
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
escape = true
|
73
|
-
i += 1
|
74
|
-
when OP_OPEN
|
75
|
-
raise "Unexpected #{OP_OPEN}"
|
76
|
-
when OP_CLOSE
|
77
|
-
found_close = true
|
78
|
-
i += 1
|
79
|
-
when REGEX_MARKER
|
80
|
-
i, reg = regex! i + 1
|
81
|
-
tokens << reg
|
72
|
+
when REGEX_CAPTURE
|
73
|
+
if @escape.regex_capture
|
74
|
+
get_str!
|
82
75
|
else
|
83
|
-
|
84
|
-
|
76
|
+
@i += 1
|
77
|
+
t = token! Tokens::REG_CAPTURE, @src[@i]
|
78
|
+
@i += 1
|
79
|
+
@start_i = @i
|
80
|
+
t
|
85
81
|
end
|
82
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
83
|
+
if @escape.operators
|
84
|
+
get_str!
|
85
|
+
else
|
86
|
+
@i += 1
|
87
|
+
token! Tokens::OPERATOR, char
|
88
|
+
end
|
89
|
+
else
|
90
|
+
get_str!
|
86
91
|
end
|
87
|
-
|
88
|
-
raise Error, "Unclosed operation" if !found_close
|
89
|
-
raise Error, "Trailing escape" if escape
|
90
|
-
return i, Op.new(tokens)
|
91
92
|
end
|
92
93
|
|
93
|
-
|
94
|
-
escape = false
|
95
|
-
found_close = false
|
96
|
-
src = ""
|
94
|
+
private
|
97
95
|
|
98
|
-
|
99
|
-
|
100
|
-
|
96
|
+
def token!(type, val = nil)
|
97
|
+
t = Token.new(@start_i, type, val)
|
98
|
+
@start_i = @i
|
99
|
+
t
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_str!
|
103
|
+
str = ""
|
104
|
+
escape, found_end = false, false
|
105
|
+
until found_end or @i > @end
|
106
|
+
char = @src[@i]
|
101
107
|
|
102
108
|
if escape
|
103
|
-
|
109
|
+
@i += 1
|
110
|
+
str << char
|
104
111
|
escape = false
|
105
112
|
next
|
106
113
|
end
|
107
114
|
|
108
115
|
case char
|
109
116
|
when ESCAPE
|
110
|
-
|
111
|
-
|
112
|
-
|
117
|
+
@i += 1
|
118
|
+
if @escape.regex_escape and @src[@i] != REGEX_DELIM
|
119
|
+
str << char
|
120
|
+
else
|
121
|
+
escape = true
|
122
|
+
end
|
123
|
+
when EXP_OPEN
|
124
|
+
if @escape.exp
|
125
|
+
@i += 1
|
126
|
+
str << char
|
127
|
+
else
|
128
|
+
found_end = true
|
129
|
+
end
|
130
|
+
when EXP_CLOSE
|
131
|
+
if @escape.exp
|
132
|
+
@i += 1
|
133
|
+
str << char
|
134
|
+
else
|
135
|
+
found_end = true
|
136
|
+
end
|
137
|
+
when WILDCARD
|
138
|
+
if @escape.wildcards
|
139
|
+
@i += 1
|
140
|
+
str << char
|
141
|
+
else
|
142
|
+
found_end = true
|
143
|
+
end
|
144
|
+
when REGEX_DELIM
|
145
|
+
if @escape.regex
|
146
|
+
@i += 1
|
147
|
+
str << char
|
148
|
+
else
|
149
|
+
found_end = true
|
150
|
+
end
|
151
|
+
when REGEX_CAPTURE
|
152
|
+
if @escape.regex_capture
|
153
|
+
@i += 1
|
154
|
+
str << char
|
155
|
+
else
|
156
|
+
found_end = true
|
157
|
+
end
|
158
|
+
when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
|
159
|
+
if @escape.operators
|
160
|
+
@i += 1
|
161
|
+
str << char
|
162
|
+
else
|
163
|
+
found_end = true
|
164
|
+
end
|
113
165
|
else
|
114
|
-
|
166
|
+
@i += 1
|
167
|
+
str << char
|
115
168
|
end
|
116
169
|
end
|
117
170
|
|
118
|
-
|
119
|
-
|
120
|
-
return i, Regex.new(src)
|
171
|
+
return Token.new(@i - 1, Tokens::TR_ESC) if escape
|
172
|
+
token! Tokens::TEXT, str
|
121
173
|
end
|
122
174
|
end
|
123
175
|
end
|
data/lib/fop/tokens.rb
ADDED
data/lib/fop/version.rb
CHANGED
data/lib/fop_lang.rb
CHANGED
@@ -1,12 +1,22 @@
|
|
1
1
|
require_relative 'fop/version'
|
2
|
+
require_relative 'fop/compiler'
|
2
3
|
require_relative 'fop/program'
|
3
4
|
|
4
5
|
def Fop(src)
|
5
|
-
::Fop
|
6
|
+
::Fop.compile!(src)
|
6
7
|
end
|
7
8
|
|
8
9
|
module Fop
|
10
|
+
def self.compile!(src)
|
11
|
+
prog, errors = compile(src)
|
12
|
+
# TODO better exception
|
13
|
+
raise "Fop errors: " + errors.map(&:message).join(",") if errors
|
14
|
+
prog
|
15
|
+
end
|
16
|
+
|
9
17
|
def self.compile(src)
|
10
|
-
|
18
|
+
instructions, errors = ::Fop::Compiler.compile(src)
|
19
|
+
return nil, errors if errors
|
20
|
+
return Program.new(instructions), nil
|
11
21
|
end
|
12
22
|
end
|
metadata
CHANGED
@@ -1,26 +1,31 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fop_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A micro expression language for Filter and OPerations on text
|
14
14
|
email: jordan.hollinger@gmail.com
|
15
|
-
executables:
|
15
|
+
executables:
|
16
|
+
- fop
|
16
17
|
extensions: []
|
17
18
|
extra_rdoc_files: []
|
18
19
|
files:
|
19
20
|
- README.md
|
21
|
+
- bin/fop
|
22
|
+
- lib/fop/cli.rb
|
23
|
+
- lib/fop/compiler.rb
|
20
24
|
- lib/fop/nodes.rb
|
21
25
|
- lib/fop/parser.rb
|
22
26
|
- lib/fop/program.rb
|
23
27
|
- lib/fop/tokenizer.rb
|
28
|
+
- lib/fop/tokens.rb
|
24
29
|
- lib/fop/version.rb
|
25
30
|
- lib/fop_lang.rb
|
26
31
|
homepage: https://jhollinger.github.io/fop-lang-rb/
|