fop_lang 0.2.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 711af4fad2907616e057049dcb84bce16ffdb381b3601a387dc7260cc16057d3
4
- data.tar.gz: 40da554ca0cb21f275748593921bdc616dcf8cdfe5d4fa31494f588f2c25d66c
3
+ metadata.gz: 7cfe0a53d59b00d2fc60f0c0f5ed3e413eb61cf0ae445d49562f0cf861cd9f0b
4
+ data.tar.gz: 5aa04d8145417d52b42847be17b54a230242143486278d2826adacb7d1143101
5
5
  SHA512:
6
- metadata.gz: 0253a6446b88b6de112f00a95c81f1d5f710a859998e5b3d8df21d64caecc8e013ab1edec2b68047558f8053ce135b60082078f85db1f7aa16a93b86aa487093
7
- data.tar.gz: c7f0cb0387df52a3ea121e3e91a35e0444afe8862aabe054ec433d889955ee9221129fd0c07f7e36d6c71e6b9ab33f30cd16264447dfda24d29babb671112140
6
+ metadata.gz: 79efdf103e6e33bc508df356e2295ac49c8f112ad44a0c3aad4209bb204fadcd240b12f14f62ee3266d21762afec1e8a3262b767fda42cc7ebf019688bfbe30e
7
+ data.tar.gz: 1f424bf43332438a3f223af0737fa6570de8e06444669edd8808e5d41e2804fe9139598f58336ca098a1b67fba8741519375e3700916747acb8df6ddace3294d
data/README.md CHANGED
@@ -1,6 +1,10 @@
1
1
  # fop_lang
2
2
 
3
- Fop (Filter and OPperations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
3
+ Fop (Filter and OPerations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
4
+
5
+ ```ruby
6
+ gem 'fop_lang'
7
+ ```
4
8
 
5
9
  ## Release Number Example
6
10
 
@@ -25,7 +29,7 @@ The above expression contains the only two parts of Fop (except for the wildcard
25
29
 
26
30
  **Text Literals**
27
31
 
28
- A text literal works how it sounds: the input must match it exactly. The only exception is the `*` (wildcard) character, which matches 0 or more of anything. Wildcards can be used anywhere except inside `{...}` (operations).
32
+ A text literal works how it sounds: the input must match it exactly. If it matches it passes through unchanged. The only exception is the `*` (wildcard) character, which matches 0 or more of anything. Wildcards can be used anywhere except inside `{...}` (operations).
29
33
 
30
34
  If `\` (escape) is used before the special characters `*`, `{` or `}`, then that character is treated like a text literal. It's recommended to use single-quoted Ruby strings with Fop expressions that so you don't need to double-escape.
31
35
 
@@ -33,13 +37,16 @@ If `\` (escape) is used before the special characters `*`, `{` or `}`, then that
33
37
 
34
38
  Operations are the interesting part of Fop, and are specified between `{` and `}`. An Operation can consist of one to three parts:
35
39
 
36
- 1. Matching character class (required): Defines what characters the operation will match and operate on.
40
+ 1. Matching class (required): Defines what characters the operation will match and operate on.
37
41
  * `N` is the numeric class and will match one or more digits.
38
42
  * `A` is the alpha class and will match one or more letters (lower or upper case).
39
43
  * `W` is the word class and matches alphanumeric chars and underscores.
40
44
  * `*` is the wildcard class and greedily matches everything after it.
45
+ * `/.../` matches on the supplied regex between the `/`'s. If you're regex contains a `/`, it must be escaped. Capture groups may be referenced in the operator argument as `$1`, `$2`, etc.
41
46
  3. Operator (optional): What to do to the matching characters.
42
47
  * `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
48
+ * `>` Append the following chars to the matching value.
49
+ * `<` Prepend the following chars to the matching value.
43
50
  * `+` Perform addition on the matching number and the argument (`N` only).
44
51
  * `-` Subtract the argument from the matching number (`N` only).
45
52
  5. Operator argument (required for some operators): meaning varies by operator.
@@ -53,6 +60,16 @@ Operations are the interesting part of Fop, and are specified between `{` and `}
53
60
  => 'release-5.100.0'
54
61
  ```
55
62
 
63
+ ```ruby
64
+ f = Fop('rel{/(ease)?/}-{N=5}.{N+1}.{N=0}')
65
+
66
+ puts f.apply('release-4.99.1')
67
+ => 'release-5.100.0'
68
+
69
+ puts f.apply('rel-4.99.1')
70
+ => 'rel-5.100.0'
71
+ ```
72
+
56
73
  ```ruby
57
74
  f = Fop('release-*{N=5}.{N+100}.{N=0}')
58
75
 
data/bin/fop ADDED
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Used for local testing
4
+ # $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
5
+
6
+ require 'fop_lang'
7
+ require 'fop/cli'
8
+
9
+ opts = Fop::CLI.options!
10
+ src = opts.src.read.chomp
11
+ fop, errors = Fop.compile(src)
12
+ opts.src.close
13
+ NL = "\n".freeze
14
+
15
+ if errors
16
+ $stderr.puts src
17
+ $stderr.puts errors.join(NL)
18
+ exit 1
19
+ end
20
+
21
+ if opts.check
22
+ $stdout.puts "Syntax OK" unless opts.quiet
23
+ exit 0
24
+ end
25
+
26
+ while (line = gets) do
27
+ line.chomp!
28
+ if (res = fop.apply(line))
29
+ print(res << NL)
30
+ end
31
+ end
data/lib/fop/cli.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'optparse'
2
+
3
+ module Fop
4
+ module CLI
5
+ Options = Struct.new(:src, :check, :quiet)
6
+
7
+ def self.options!
8
+ options = Options.new
9
+ OptionParser.new do |opts|
10
+ opts.banner = "Usage: fop [options] [ 'prog' | -f progfile ] [ file ... ]"
11
+
12
+ opts.on("-fFILE", "--file=FILE", "Read program from file instead of first argument") do |f|
13
+ options.src = File.open(f)
14
+ options.src.advise(:sequential)
15
+ end
16
+
17
+ opts.on("-c", "--check", "Perform a syntax check on the program and exit") do
18
+ options.check = true
19
+ end
20
+
21
+ opts.on("-q", "--quiet") do
22
+ options.quiet = true
23
+ end
24
+ end.parse!
25
+
26
+ options.src ||= StringIO.new(ARGV.shift)
27
+ options
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,72 @@
1
+ require_relative 'parser'
2
+
3
+ module Fop
4
+ module Compiler
5
+ def self.compile(src)
6
+ parser = Parser.new(src)
7
+ nodes, errors = parser.parse
8
+
9
+ instructions = nodes.map { |node|
10
+ case node
11
+ when Nodes::Text, Nodes::Regex
12
+ Instructions.regex_match(node.regex)
13
+ when Nodes::Expression
14
+ Instructions::ExpressionMatch.new(node)
15
+ else
16
+ raise "Unknown node type #{node}"
17
+ end
18
+ }
19
+
20
+ return nil, errors if errors.any?
21
+ return instructions, nil
22
+ end
23
+
24
+ module Instructions
25
+ BLANK = "".freeze
26
+ OPERATIONS = {
27
+ "=" => ->(_val, arg) { arg || BLANK },
28
+ "+" => ->(val, arg) { val.to_i + arg.to_i },
29
+ "-" => ->(val, arg) { val.to_i - arg.to_i },
30
+ ">" => ->(val, arg) { val + arg },
31
+ "<" => ->(val, arg) { arg + val },
32
+ }
33
+
34
+ def self.regex_match(regex)
35
+ ->(input) { input.slice! regex }
36
+ end
37
+
38
+ class ExpressionMatch
39
+ def initialize(node)
40
+ @regex = node.regex&.regex
41
+ @op = node.operator ? OPERATIONS.fetch(node.operator) : nil
42
+ @regex_match = node.regex_match
43
+ if node.arg&.any? { |a| a.is_a? Integer }
44
+ @arg, @arg_with_caps = nil, node.arg
45
+ else
46
+ @arg = node.arg&.join("")
47
+ @arg_with_caps = nil
48
+ end
49
+ end
50
+
51
+ def call(input)
52
+ if (match = @regex.match(input))
53
+ val = match.to_s
54
+ blank = val == BLANK
55
+ input.sub!(val, BLANK) unless blank
56
+ found_val = @regex_match || !blank
57
+ arg = @arg_with_caps ? sub_caps(@arg_with_caps, match.captures) : @arg
58
+ @op && found_val ? @op.call(val, arg) : val
59
+ end
60
+ end
61
+
62
+ private
63
+
64
+ def sub_caps(args, caps)
65
+ args.map { |a|
66
+ a.is_a?(Integer) ? caps[a].to_s : a
67
+ }.join("")
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
data/lib/fop/nodes.rb CHANGED
@@ -1,70 +1,30 @@
1
1
  module Fop
2
2
  module Nodes
3
- Text = Struct.new(:wildcard, :str) do
4
- def consume!(input)
5
- @regex ||= Regexp.new((wildcard ? ".*" : "^") + Regexp.escape(str))
6
- input.slice!(@regex)
7
- end
8
-
3
+ Text = Struct.new(:wildcard, :str, :regex) do
9
4
  def to_s
10
5
  w = wildcard ? "*" : nil
11
- "Text #{w}#{str}"
6
+ "[#{w}txt] #{str}"
12
7
  end
13
8
  end
14
9
 
15
- Match = Struct.new(:wildcard, :tokens) do
16
- NUM = "N".freeze
17
- WORD = "W".freeze
18
- ALPHA = "A".freeze
19
- WILD = "*".freeze
20
- BLANK = "".freeze
21
-
22
- def consume!(input)
23
- if (val = input.slice!(@regex))
24
- @expression && val != BLANK ? @expression.call(val) : val
25
- end
26
- end
27
-
10
+ Regex = Struct.new(:wildcard, :src, :regex) do
28
11
  def to_s
29
12
  w = wildcard ? "*" : nil
30
- @op ? "#{w}#{@match} #{@op} #{@arg}" : "#{w}#{@match}"
13
+ "[#{w}reg] #{src}"
31
14
  end
15
+ end
32
16
 
33
- def parse!
34
- match = tokens.shift || raise(ParserError, "Empty match")
35
- raise ParserError, "Unexpected #{match}" unless match.is_a? Tokenizer::Char
36
-
37
- @match = match.char
38
- @regex =
39
- case @match
40
- when NUM then Regexp.new((wildcard ? ".*?" : "^") + "[0-9]+")
41
- when WORD then Regexp.new((wildcard ? ".*?" : "^") + "\\w+")
42
- when ALPHA then Regexp.new((wildcard ? ".*?" : "^") + "[a-zA-Z]+")
43
- when WILD then /.*/
44
- else raise ParserError, "Unknown match type '#{@match}'"
45
- end
46
-
47
- if (op = tokens.shift)
48
- raise ParserError, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
49
- arg = tokens.reduce("") { |acc, t|
50
- raise ParserError, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
51
- acc + t.char
52
- }
53
-
54
- @op = op.char
55
- @arg = arg == BLANK ? nil : arg
56
- @expression =
57
- case @op
58
- when "=" then ->(_) { @arg || BLANK }
59
- when "+", "-", "*", "/"
60
- raise ParserError, "Operator #{@op} is only available for numeric matches" unless @match == NUM
61
- raise ParserError, "Operator #{@op} expects an argument" if @arg.nil?
62
- ->(x) { x.to_i.send(@op, @arg.to_i) }
63
- else raise ParserError, "Unknown operator #{@op}"
64
- end
65
- else
66
- @op, @arg, @expression = nil, nil, nil
17
+ Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :arg) do
18
+ def to_s
19
+ w = wildcard ? "*" : nil
20
+ s = "[#{w}exp] #{match}"
21
+ if operator
22
+ arg_str = arg
23
+ .map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
24
+ .join("")
25
+ s << " #{operator} #{arg_str}"
67
26
  end
27
+ s
68
28
  end
69
29
  end
70
30
  end
data/lib/fop/parser.rb CHANGED
@@ -1,93 +1,162 @@
1
+ require_relative 'tokenizer'
1
2
  require_relative 'nodes'
2
3
 
3
4
  module Fop
4
- module Parser
5
- Error = Class.new(StandardError)
5
+ class Parser
6
+ DIGIT = /^[0-9]$/
7
+ REGEX_START = "^".freeze
8
+ REGEX_LAZY_WILDCARD = ".*?".freeze
9
+ REGEX_MATCHES = {
10
+ "N" => "[0-9]+".freeze,
11
+ "W" => "\\w+".freeze,
12
+ "A" => "[a-zA-Z]+".freeze,
13
+ "*" => ".*".freeze,
14
+ }.freeze
15
+ OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
16
+ TR_REGEX = /.*/
6
17
 
7
- def self.parse!(tokens)
8
- stack = []
9
- current_el = nil
18
+ Error = Struct.new(:type, :token, :message) do
19
+ def to_s
20
+ "#{type.to_s.capitalize} error: #{message} at column #{token.pos}"
21
+ end
22
+ end
23
+
24
+ attr_reader :errors
10
25
 
11
- tokens.each { |token|
12
- case current_el
13
- when nil
14
- current_el = new_element token
15
- when :wildcard
16
- current_el = new_element token, true
17
- raise Error, "Unexpected * after wildcard" if current_el == :wildcard
18
- when Nodes::Text
19
- current_el = parse_text stack, current_el, token
20
- when Nodes::Match
21
- current_el = parse_match stack, current_el, token
26
+ def initialize(src, debug: false)
27
+ @tokenizer = Tokenizer.new(src)
28
+ @errors = []
29
+ end
30
+
31
+ def parse
32
+ nodes = []
33
+ wildcard = false
34
+ eof = false
35
+ # Top-level parsing. It will always be looking for a String, Regex, or Expression.
36
+ until eof
37
+ @tokenizer.reset_escapes!
38
+ t = @tokenizer.next
39
+ case t.type
40
+ when Tokens::WILDCARD
41
+ errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
42
+ wildcard = true
43
+ when Tokens::TEXT
44
+ reg = build_regex!(wildcard, t, Regexp.escape(t.val))
45
+ nodes << Nodes::Text.new(wildcard, t.val, reg)
46
+ wildcard = false
47
+ when Tokens::EXP_OPEN
48
+ nodes << parse_exp!(wildcard)
49
+ wildcard = false
50
+ when Tokens::REG_DELIM
51
+ nodes << parse_regex!(wildcard)
52
+ wildcard = false
53
+ when Tokens::EOF
54
+ eof = true
22
55
  else
23
- raise Error, "Unexpected token #{token} in #{current_el}"
56
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}")
24
57
  end
25
- }
26
-
27
- case current_el
28
- when nil
29
- # noop
30
- when :wildcard
31
- stack << Nodes::Text.new(true, "")
32
- when Nodes::Text
33
- stack << current_el
34
- when Nodes::Match
35
- raise Error, "Unclosed match"
36
58
  end
37
-
38
- stack
59
+ nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
60
+ return nodes, @errors
39
61
  end
40
62
 
41
- private
63
+ def parse_exp!(wildcard = false)
64
+ exp = Nodes::Expression.new(wildcard)
65
+ parse_exp_match! exp
66
+ op_token = parse_exp_operator! exp
67
+ if exp.operator
68
+ parse_exp_arg! exp, op_token
69
+ end
70
+ return exp
71
+ end
42
72
 
43
- def self.new_element(token, wildcard = false)
44
- case token
45
- when Tokenizer::Char
46
- Nodes::Text.new(wildcard, token.char.clone)
47
- when :match_open
48
- Nodes::Match.new(wildcard, [])
49
- when :match_close
50
- raise ParserError, "Unmatched }"
51
- when :wildcard
52
- :wildcard
73
+ def parse_exp_match!(exp)
74
+ @tokenizer.escape.operators = false
75
+ t = @tokenizer.next
76
+ case t.type
77
+ when Tokens::TEXT, Tokens::WILDCARD
78
+ exp.match = t.val
79
+ if (src = REGEX_MATCHES[exp.match])
80
+ reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
81
+ exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
82
+ else
83
+ errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
84
+ end
85
+ when Tokens::REG_DELIM
86
+ exp.regex = parse_regex!(exp.wildcard)
87
+ exp.match = exp.regex&.src
88
+ exp.regex_match = true
89
+ @tokenizer.reset_escapes!
53
90
  else
54
- raise ParserError, "Unexpected #{token}"
91
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
55
92
  end
56
93
  end
57
94
 
58
- def self.parse_text(stack, text_el, token)
59
- case token
60
- when :match_open
61
- stack << text_el
62
- Nodes::Match.new(false, [])
63
- when :match_close
64
- raise ParserError.new, "Unexpected }"
65
- when Tokenizer::Char
66
- text_el.str << token.char
67
- text_el
68
- when :wildcard
69
- stack << text_el
70
- :wildcard
95
+ def parse_exp_operator!(exp)
96
+ @tokenizer.escape.operators = false
97
+ t = @tokenizer.next
98
+ case t.type
99
+ when Tokens::EXP_CLOSE
100
+ # no op
101
+ when Tokens::OPERATOR
102
+ exp.operator = t.val
71
103
  else
72
- raise ParserError, "Unexpected #{token}"
104
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
73
105
  end
106
+ t
74
107
  end
75
108
 
76
- def self.parse_match(stack, match_el, token)
77
- case token
78
- when Tokenizer::Char
79
- match_el.tokens << token
80
- match_el
81
- when :wildcard
82
- match_el.tokens << Tokenizer::Char.new("*").freeze
83
- match_el
84
- when :match_close
85
- match_el.parse!
86
- stack << match_el
87
- nil
109
+ def parse_exp_arg!(exp, op_token)
110
+ @tokenizer.escape.operators = true
111
+ @tokenizer.escape.regex = true
112
+ @tokenizer.escape.regex_capture = false if exp.regex_match
113
+
114
+ exp.arg = []
115
+ found_close, eof = false, false
116
+ until found_close or eof
117
+ t = @tokenizer.next
118
+ case t.type
119
+ when Tokens::TEXT
120
+ exp.arg << t.val
121
+ when Tokens::REG_CAPTURE
122
+ exp.arg << t.val.to_i - 1
123
+ errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
124
+ errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
125
+ when Tokens::EXP_CLOSE
126
+ found_close = true
127
+ when Tokens::EOF
128
+ eof = true
129
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
130
+ else
131
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
132
+ end
133
+ end
134
+
135
+ if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
136
+ errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
137
+ end
138
+ end
139
+
140
+ def parse_regex!(wildcard)
141
+ @tokenizer.regex_mode!
142
+ t = @tokenizer.next
143
+ reg = Nodes::Regex.new(wildcard, t.val)
144
+ if t.type == Tokens::TEXT
145
+ reg.regex = build_regex!(wildcard, t)
88
146
  else
89
- raise ParserError, "Unexpected #{token}"
147
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
90
148
  end
149
+
150
+ t = @tokenizer.next
151
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
152
+ reg
153
+ end
154
+
155
+ def build_regex!(wildcard, token, src = token.val)
156
+ Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
157
+ rescue RegexpError => e
158
+ errors << Error.new(:regex, token, e.message)
159
+ nil
91
160
  end
92
161
  end
93
162
  end
data/lib/fop/program.rb CHANGED
@@ -1,22 +1,16 @@
1
- require_relative 'tokenizer'
2
- require_relative 'parser'
3
-
4
1
  module Fop
5
2
  class Program
6
- attr_reader :nodes
7
-
8
- def initialize(src)
9
- tokens = Tokenizer.tokenize! src
10
- @nodes = Parser.parse! tokens
3
+ def initialize(instructions)
4
+ @instructions = instructions
11
5
  end
12
6
 
13
7
  def apply(input)
14
8
  input = input.clone
15
9
  output =
16
- @nodes.reduce("") { |acc, token|
17
- section = token.consume!(input)
18
- return nil if section.nil?
19
- acc + section.to_s
10
+ @instructions.reduce("") { |acc, ins|
11
+ result = ins.call(input)
12
+ return nil if result.nil?
13
+ acc + result.to_s
20
14
  }
21
15
  input.empty? ? output : nil
22
16
  end
data/lib/fop/tokenizer.rb CHANGED
@@ -1,34 +1,175 @@
1
+ require_relative 'tokens'
2
+
1
3
  module Fop
2
- module Tokenizer
3
- Char = Struct.new(:char)
4
- Error = Class.new(StandardError)
5
-
6
- def self.tokenize!(src)
7
- tokens = []
8
- escape = false
9
- src.each_char { |char|
4
+ class Tokenizer
5
+ Token = Struct.new(:pos, :type, :val)
6
+ Error = Struct.new(:pos, :message)
7
+ Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
8
+
9
+ EXP_OPEN = "{".freeze
10
+ EXP_CLOSE = "}".freeze
11
+ ESCAPE = "\\".freeze
12
+ WILDCARD = "*".freeze
13
+ REGEX_DELIM = "/".freeze
14
+ REGEX_CAPTURE = "$".freeze
15
+ OP_REPLACE = "=".freeze
16
+ OP_APPEND = ">".freeze
17
+ OP_PREPEND = "<".freeze
18
+ OP_ADD = "+".freeze
19
+ OP_SUB = "-".freeze
20
+
21
+ #
22
+ # Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
23
+ # explicit string delimiters. That *could* be worked around by requiring users to escape all reserved chars,
24
+ # but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on
25
+ # or off to auto-escape certain chars for the next token.
26
+ #
27
+ attr_reader :escape
28
+
29
+ def initialize(src)
30
+ @src = src
31
+ @end = src.size - 1
32
+ @start_i = 0
33
+ @i = 0
34
+ reset_escapes!
35
+ end
36
+
37
+ # Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
38
+ def reset_escapes!
39
+ @escape = Escapes.new(true, true)
40
+ end
41
+
42
+ # Auto-escape anything you'd find in a regular expression
43
+ def regex_mode!
44
+ @escape.regex = false # look for the final /
45
+ @escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
46
+ @escape.wildcards = true
47
+ @escape.operators = true
48
+ @escape.regex_capture = true
49
+ @escape.exp = true
50
+ end
51
+
52
+ def next
53
+ return Token.new(@i, Tokens::EOF) if @i > @end
54
+ char = @src[@i]
55
+ case char
56
+ when EXP_OPEN
57
+ @i += 1
58
+ token! Tokens::EXP_OPEN
59
+ when EXP_CLOSE
60
+ @i += 1
61
+ token! Tokens::EXP_CLOSE
62
+ when WILDCARD
63
+ @i += 1
64
+ token! Tokens::WILDCARD, WILDCARD
65
+ when REGEX_DELIM
66
+ if @escape.regex
67
+ get_str!
68
+ else
69
+ @i += 1
70
+ token! Tokens::REG_DELIM
71
+ end
72
+ when REGEX_CAPTURE
73
+ if @escape.regex_capture
74
+ get_str!
75
+ else
76
+ @i += 1
77
+ t = token! Tokens::REG_CAPTURE, @src[@i]
78
+ @i += 1
79
+ @start_i = @i
80
+ t
81
+ end
82
+ when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
83
+ if @escape.operators
84
+ get_str!
85
+ else
86
+ @i += 1
87
+ token! Tokens::OPERATOR, char
88
+ end
89
+ else
90
+ get_str!
91
+ end
92
+ end
93
+
94
+ private
95
+
96
+ def token!(type, val = nil)
97
+ t = Token.new(@start_i, type, val)
98
+ @start_i = @i
99
+ t
100
+ end
101
+
102
+ def get_str!
103
+ str = ""
104
+ escape, found_end = false, false
105
+ until found_end or @i > @end
106
+ char = @src[@i]
107
+
10
108
  if escape
11
- tokens << Char.new(char)
109
+ @i += 1
110
+ str << char
12
111
  escape = false
13
112
  next
14
113
  end
15
114
 
16
115
  case char
17
- when "\\".freeze
18
- escape = true
19
- when "{".freeze
20
- tokens << :match_open
21
- when "}".freeze
22
- tokens << :match_close
23
- when "*".freeze
24
- tokens << :wildcard
116
+ when ESCAPE
117
+ @i += 1
118
+ if @escape.regex_escape and @src[@i] != REGEX_DELIM
119
+ str << char
120
+ else
121
+ escape = true
122
+ end
123
+ when EXP_OPEN
124
+ if @escape.exp
125
+ @i += 1
126
+ str << char
127
+ else
128
+ found_end = true
129
+ end
130
+ when EXP_CLOSE
131
+ if @escape.exp
132
+ @i += 1
133
+ str << char
134
+ else
135
+ found_end = true
136
+ end
137
+ when WILDCARD
138
+ if @escape.wildcards
139
+ @i += 1
140
+ str << char
141
+ else
142
+ found_end = true
143
+ end
144
+ when REGEX_DELIM
145
+ if @escape.regex
146
+ @i += 1
147
+ str << char
148
+ else
149
+ found_end = true
150
+ end
151
+ when REGEX_CAPTURE
152
+ if @escape.regex_capture
153
+ @i += 1
154
+ str << char
155
+ else
156
+ found_end = true
157
+ end
158
+ when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
159
+ if @escape.operators
160
+ @i += 1
161
+ str << char
162
+ else
163
+ found_end = true
164
+ end
25
165
  else
26
- tokens << Char.new(char)
166
+ @i += 1
167
+ str << char
27
168
  end
28
- }
169
+ end
29
170
 
30
- raise Error, "Trailing escape" if escape
31
- tokens
171
+ return Token.new(@i - 1, Tokens::TR_ESC) if escape
172
+ token! Tokens::TEXT, str
32
173
  end
33
174
  end
34
175
  end
data/lib/fop/tokens.rb ADDED
@@ -0,0 +1,13 @@
1
+ module Fop
2
+ module Tokens
3
+ TEXT = :TXT
4
+ EXP_OPEN = :"{"
5
+ EXP_CLOSE = :"}"
6
+ REG_CAPTURE = :"$"
7
+ REG_DELIM = :/
8
+ WILDCARD = :*
9
+ OPERATOR = :op
10
+ TR_ESC = :"trailing escape"
11
+ EOF = :EOF
12
+ end
13
+ end
data/lib/fop/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Fop
2
- VERSION = "0.2.0"
2
+ VERSION = "0.6.0"
3
3
  end
data/lib/fop_lang.rb CHANGED
@@ -1,12 +1,22 @@
1
1
  require_relative 'fop/version'
2
+ require_relative 'fop/compiler'
2
3
  require_relative 'fop/program'
3
4
 
4
5
  def Fop(src)
5
- ::Fop::Program.new(src)
6
+ ::Fop.compile!(src)
6
7
  end
7
8
 
8
9
  module Fop
10
+ def self.compile!(src)
11
+ prog, errors = compile(src)
12
+ # TODO better exception
13
+ raise "Fop errors: " + errors.map(&:message).join(",") if errors
14
+ prog
15
+ end
16
+
9
17
  def self.compile(src)
10
- Program.new(src)
18
+ instructions, errors = ::Fop::Compiler.compile(src)
19
+ return nil, errors if errors
20
+ return Program.new(instructions), nil
11
21
  end
12
22
  end
metadata CHANGED
@@ -1,26 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fop_lang
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jordan Hollinger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-15 00:00:00.000000000 Z
11
+ date: 2021-08-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A micro expression language for Filter and OPerations on text
14
14
  email: jordan.hollinger@gmail.com
15
- executables: []
15
+ executables:
16
+ - fop
16
17
  extensions: []
17
18
  extra_rdoc_files: []
18
19
  files:
19
20
  - README.md
21
+ - bin/fop
22
+ - lib/fop/cli.rb
23
+ - lib/fop/compiler.rb
20
24
  - lib/fop/nodes.rb
21
25
  - lib/fop/parser.rb
22
26
  - lib/fop/program.rb
23
27
  - lib/fop/tokenizer.rb
28
+ - lib/fop/tokens.rb
24
29
  - lib/fop/version.rb
25
30
  - lib/fop_lang.rb
26
31
  homepage: https://jhollinger.github.io/fop-lang-rb/