fop_lang 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8ed95bb4708820a186e6485cc29dbb47286b0f309a1caf91af7778d768b0efb3
4
- data.tar.gz: 85d41728ddae13f3667f0a2d55a5c4dcbc26e8217d4f31466e6ed92038859881
3
+ metadata.gz: 1166e1e43fd54ed2263db8a37ff288431a6152543869d961f007a134483f1a4b
4
+ data.tar.gz: 17e55b17448c38a37afb6e24a5798c87de368a352bdb0c74df13ad865a7e3ad0
5
5
  SHA512:
6
- metadata.gz: e650bdf66d8d0b5dcb603eae494f38d4969a19647053b4e81e0612705f5b16a5755d007e4ce01fad9b487224d66eff462738f1e37b011ba2a4bf4a45b0203bb3
7
- data.tar.gz: 99b31736236785cecc85b9bb23ccc3e366713cd23dd04c992a8fff6676a316d5741bfe5058e17cd0812c43d8bf1979aa7546184386018c1f92ed2462a85eb5fb
6
+ metadata.gz: cbf5d8c7f6c10ca395518cbd7bf0e9083b1f217b3523386a482f19e5cdf47a16aa7ffb50a6290f89f0ec80ba113900c499944fdf35b7e2e71a941d537483a7e1
7
+ data.tar.gz: bff3c613a575687d0d3223c5bd60bb1128b0ae78accf2e5101228c3ec14d61f46cdc798855af9d07692de0b25ba01737dbda97409d4e8f44881e9bbe6da9c523
@@ -0,0 +1,72 @@
1
+ require_relative 'parser'
2
+
3
+ module Fop
4
+ module Compiler
5
+ def self.compile(src)
6
+ parser = Parser.new(src)
7
+ nodes, errors = parser.parse
8
+
9
+ instructions = nodes.map { |node|
10
+ case node
11
+ when Nodes::Text, Nodes::Regex
12
+ Instructions.regex_match(node.regex)
13
+ when Nodes::Expression
14
+ Instructions::ExpressionMatch.new(node)
15
+ else
16
+ raise "Unknown node type #{node}"
17
+ end
18
+ }
19
+
20
+ return nil, errors if errors.any?
21
+ return instructions, nil
22
+ end
23
+
24
+ module Instructions
25
+ BLANK = "".freeze
26
+ OPERATIONS = {
27
+ "=" => ->(_val, arg) { arg || BLANK },
28
+ "+" => ->(val, arg) { val.to_i + arg.to_i },
29
+ "-" => ->(val, arg) { val.to_i - arg.to_i },
30
+ ">" => ->(val, arg) { val + arg },
31
+ "<" => ->(val, arg) { arg + val },
32
+ }
33
+
34
+ def self.regex_match(regex)
35
+ ->(input) { input.slice! regex }
36
+ end
37
+
38
+ class ExpressionMatch
39
+ def initialize(node)
40
+ @regex = node.regex&.regex
41
+ @op = node.operator ? OPERATIONS.fetch(node.operator) : nil
42
+ @regex_match = node.regex_match
43
+ if node.arg&.any? { |a| a.is_a? Integer }
44
+ @arg, @arg_with_caps = nil, node.arg
45
+ else
46
+ @arg = node.arg&.join("")
47
+ @arg_with_caps = nil
48
+ end
49
+ end
50
+
51
+ def call(input)
52
+ if (match = @regex.match(input))
53
+ val = match.to_s
54
+ blank = val == BLANK
55
+ input.sub!(val, BLANK) unless blank
56
+ found_val = @regex_match || !blank
57
+ arg = @arg_with_caps ? sub_caps(@arg_with_caps, match.captures) : @arg
58
+ @op && found_val ? @op.call(val, arg) : val
59
+ end
60
+ end
61
+
62
+ private
63
+
64
+ def sub_caps(args, caps)
65
+ args.map { |a|
66
+ a.is_a?(Integer) ? caps[a].to_s : a
67
+ }.join("")
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
data/lib/fop/nodes.rb CHANGED
@@ -1,47 +1,31 @@
1
1
  module Fop
2
2
  module Nodes
3
- Text = Struct.new(:wildcard, :str) do
4
- def consume!(input)
5
- @regex ||= Regexp.new((wildcard ? ".*" : "^") + Regexp.escape(str))
6
- input.slice!(@regex)
7
- end
8
-
3
+ Text = Struct.new(:wildcard, :str, :regex) do
9
4
  def to_s
10
5
  w = wildcard ? "*" : nil
11
- "Text #{w}#{str}"
6
+ "[#{w}txt] #{str}"
12
7
  end
13
8
  end
14
9
 
15
- Op = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :operator_arg, :operator_arg_w_caps, :expression) do
16
- def consume!(input)
17
- if (match = regex.match(input))
18
- val = match.to_s
19
- blank = val == Parser::BLANK
20
- input.sub!(val, Parser::BLANK) unless blank
21
- found_val = regex_match || !blank
22
- arg = operator_arg_w_caps ? sub_caps(operator_arg_w_caps, match.captures) : operator_arg
23
- expression && found_val ? expression.call(val, operator, arg) : val
24
- end
10
+ Regex = Struct.new(:wildcard, :src, :regex) do
11
+ def to_s
12
+ w = wildcard ? "*" : nil
13
+ "[#{w}reg] #{src}"
25
14
  end
15
+ end
26
16
 
17
+ Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :arg) do
27
18
  def to_s
28
19
  w = wildcard ? "*" : nil
29
- s = "#{w}#{match}"
30
- s << " #{operator} #{operator_arg}" if operator
20
+ s = "[#{w}exp] #{match}"
21
+ if operator
22
+ arg_str = arg
23
+ .map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
24
+ .join("")
25
+ s << " #{operator} #{arg_str}"
26
+ end
31
27
  s
32
28
  end
33
-
34
- private
35
-
36
- def sub_caps(tokens, caps)
37
- tokens.map { |t|
38
- case t
39
- when String then t
40
- when Parser::CaptureGroup then caps[t.index].to_s
41
- else raise Parser::Error, "Unexpected #{t} in capture group"
42
- end
43
- }.join("")
44
- end
45
29
  end
46
30
  end
47
31
  end
data/lib/fop/parser.rb CHANGED
@@ -1,181 +1,162 @@
1
+ require_relative 'tokenizer'
1
2
  require_relative 'nodes'
2
3
 
3
4
  module Fop
4
- module Parser
5
- Error = Class.new(StandardError)
6
- CaptureGroup = Struct.new(:index)
5
+ class Parser
6
+ DIGIT = /^[0-9]$/
7
+ REGEX_START = "^".freeze
8
+ REGEX_LAZY_WILDCARD = ".*?".freeze
9
+ REGEX_MATCHES = {
10
+ "N" => "[0-9]+".freeze,
11
+ "W" => "\\w+".freeze,
12
+ "A" => "[a-zA-Z]+".freeze,
13
+ "*" => ".*".freeze,
14
+ }.freeze
15
+ OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
16
+ TR_REGEX = /.*/
17
+
18
+ Error = Struct.new(:type, :token, :message) do
19
+ def to_s
20
+ "#{type.to_s.capitalize} error: #{message} at column #{token.pos}"
21
+ end
22
+ end
7
23
 
8
- MATCH_NUM = "N".freeze
9
- MATCH_WORD = "W".freeze
10
- MATCH_ALPHA = "A".freeze
11
- MATCH_WILD = "*".freeze
12
- BLANK = "".freeze
13
- OP_REPLACE = "=".freeze
14
- OP_APPEND = ">".freeze
15
- OP_PREPEND = "<".freeze
16
- OP_ADD = "+".freeze
17
- OP_SUB = "-".freeze
18
- OP_MUL = "*".freeze
19
- OP_DIV = "/".freeze
20
- VAR = "$".freeze
21
- CAP_NUM = /^[1-9]$/
24
+ attr_reader :errors
22
25
 
23
- EXP_REPLACE = ->(_val, _op, arg) { arg || BLANK }
24
- EXP_MATH = ->(val, op, arg) { val.to_i.send(op, arg.to_i) }
25
- EXP_APPEND = ->(val, _op, arg) { val + arg }
26
- EXP_PREPEND = ->(val, _op, arg) { arg + val }
26
+ def initialize(src, debug: false)
27
+ @tokenizer = Tokenizer.new(src)
28
+ @errors = []
29
+ end
27
30
 
28
- def self.parse!(tokens)
31
+ def parse
29
32
  nodes = []
30
- curr_node = nil
31
-
32
- tokens.each { |token|
33
- case curr_node
34
- when nil
35
- curr_node = new_node token
36
- when :wildcard
37
- curr_node = new_node token, true
38
- raise Error, "Unexpected * after wildcard" if curr_node == :wildcard
39
- when Nodes::Text
40
- curr_node, finished_node = parse_text curr_node, token
41
- nodes << finished_node if finished_node
42
- when Nodes::Op
43
- nodes << curr_node
44
- curr_node = new_node token
33
+ wildcard = false
34
+ eof = false
35
+ # Top-level parsing. It will always be looking for a String, Regex, or Expression.
36
+ until eof
37
+ @tokenizer.reset_escapes!
38
+ t = @tokenizer.next
39
+ case t.type
40
+ when Tokens::WILDCARD
41
+ errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
42
+ wildcard = true
43
+ when Tokens::TEXT
44
+ reg = build_regex!(wildcard, t, Regexp.escape(t.val))
45
+ nodes << Nodes::Text.new(wildcard, t.val, reg)
46
+ wildcard = false
47
+ when Tokens::EXP_OPEN
48
+ nodes << parse_exp!(wildcard)
49
+ wildcard = false
50
+ when Tokens::REG_DELIM
51
+ nodes << parse_regex!(wildcard)
52
+ wildcard = false
53
+ when Tokens::EOF
54
+ eof = true
45
55
  else
46
- raise Error, "Unexpected node #{curr_node}"
56
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}")
47
57
  end
48
- }
49
-
50
- case curr_node
51
- when nil
52
- # noop
53
- when :wildcard
54
- nodes << Nodes::Text.new(true, "")
55
- when Nodes::Text, Nodes::Op
56
- nodes << curr_node
57
- else
58
- raise Error, "Unexpected end node #{curr_node}"
59
58
  end
60
-
61
- nodes
59
+ nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
60
+ return nodes, @errors
62
61
  end
63
62
 
64
- private
63
+ def parse_exp!(wildcard = false)
64
+ exp = Nodes::Expression.new(wildcard)
65
+ parse_exp_match! exp
66
+ op_token = parse_exp_operator! exp
67
+ if exp.operator
68
+ parse_exp_arg! exp, op_token
69
+ end
70
+ return exp
71
+ end
65
72
 
66
- def self.new_node(token, wildcard = false)
67
- case token
68
- when Tokenizer::Char
69
- Nodes::Text.new(wildcard, token.char.clone)
70
- when Tokenizer::Op
71
- op = Nodes::Op.new(wildcard)
72
- parse_op! op, token
73
- op
74
- when :wildcard
75
- :wildcard
73
+ def parse_exp_match!(exp)
74
+ @tokenizer.escape.operators = false
75
+ t = @tokenizer.next
76
+ case t.type
77
+ when Tokens::TEXT, Tokens::WILDCARD
78
+ exp.match = t.val
79
+ if (src = REGEX_MATCHES[exp.match])
80
+ reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
81
+ exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
82
+ else
83
+ errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
84
+ end
85
+ when Tokens::REG_DELIM
86
+ exp.regex = parse_regex!(exp.wildcard)
87
+ exp.match = exp.regex&.src
88
+ exp.regex_match = true
89
+ @tokenizer.reset_escapes!
76
90
  else
77
- raise Error, "Unexpected #{token}"
91
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
78
92
  end
79
93
  end
80
94
 
81
- # @return current node
82
- # @return finished node
83
- def self.parse_text(node, token)
84
- case token
85
- when Tokenizer::Char
86
- node.str << token.char
87
- return node, nil
88
- when Tokenizer::Op
89
- op = new_node token
90
- return op, node
91
- when :wildcard
92
- return :wildcard, node
95
+ def parse_exp_operator!(exp)
96
+ @tokenizer.escape.operators = false
97
+ t = @tokenizer.next
98
+ case t.type
99
+ when Tokens::EXP_CLOSE
100
+ # no op
101
+ when Tokens::OPERATOR
102
+ exp.operator = t.val
93
103
  else
94
- raise Error, "Unexpected #{token}"
104
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
95
105
  end
106
+ t
96
107
  end
97
108
 
98
- def self.parse_op!(node, token)
99
- # parse the matching type
100
- node.regex =
101
- case token.match
102
- when Tokenizer::Char
103
- node.match = token.match.char
104
- node.regex_match = false
105
- case node.match
106
- when MATCH_NUM then Regexp.new((node.wildcard ? ".*?" : "^") + "[0-9]+")
107
- when MATCH_WORD then Regexp.new((node.wildcard ? ".*?" : "^") + "\\w+")
108
- when MATCH_ALPHA then Regexp.new((node.wildcard ? ".*?" : "^") + "[a-zA-Z]+")
109
- when MATCH_WILD then /.*/
110
- else raise Error, "Unknown match type '#{node.match}'"
111
- end
112
- when Tokenizer::Regex
113
- node.match = "/#{token.match.src}/"
114
- node.regex_match = true
115
- Regexp.new((node.wildcard ? ".*?" : "^") + token.match.src)
116
- when nil
117
- raise Error, "Empty operation"
109
+ def parse_exp_arg!(exp, op_token)
110
+ @tokenizer.escape.operators = true
111
+ @tokenizer.escape.regex = true
112
+ @tokenizer.escape.regex_capture = false if exp.regex_match
113
+
114
+ exp.arg = []
115
+ found_close, eof = false, false
116
+ until found_close or eof
117
+ t = @tokenizer.next
118
+ case t.type
119
+ when Tokens::TEXT
120
+ exp.arg << t.val
121
+ when Tokens::REG_CAPTURE
122
+ exp.arg << t.val.to_i - 1
123
+ errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
124
+ errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
125
+ when Tokens::EXP_CLOSE
126
+ found_close = true
127
+ when Tokens::EOF
128
+ eof = true
129
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
118
130
  else
119
- raise Error, "Unexpected #{token.match}"
131
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
120
132
  end
133
+ end
121
134
 
122
- # parse the operator (if any)
123
- if token.operator
124
- raise Error, "Unexpected #{token.operator} for operator" unless token.operator.is_a? Tokenizer::Char
125
- node.operator = token.operator.char
126
- node.operator_arg = token.arg if token.arg and token.arg != BLANK
127
- node.operator_arg_w_caps = parse_captures! node.operator_arg if node.operator_arg and node.regex_match
128
- node.expression =
129
- case node.operator
130
- when OP_REPLACE
131
- EXP_REPLACE
132
- when OP_ADD, OP_SUB, OP_MUL, OP_DIV
133
- raise Error, "Operator #{node.operator} is only available for numeric matches" unless node.match == MATCH_NUM
134
- raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
135
- EXP_MATH
136
- when OP_APPEND
137
- raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
138
- EXP_APPEND
139
- when OP_PREPEND
140
- raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
141
- EXP_PREPEND
142
- else
143
- raise Error, "Unknown operator #{node.operator}"
144
- end
135
+ if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
136
+ errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
145
137
  end
146
138
  end
147
139
 
148
- def self.parse_captures!(arg)
149
- i = 0
150
- iend = arg.size - 1
151
- escape = false
152
- nodes = []
153
-
154
- until i > iend
155
- char = arg[i]
156
- i += 1
157
-
158
- if escape
159
- nodes << char
160
- escape = false
161
- next
162
- end
163
-
164
- case char
165
- when Tokenizer::ESCAPE
166
- escape = true
167
- when VAR
168
- num = arg[i].to_s
169
- raise Error, "Capture group number must be between 1 and 9; found '#{num}'" unless num =~ CAP_NUM
170
- nodes << CaptureGroup.new(num.to_i - 1)
171
- i += 1
172
- else
173
- nodes << char
174
- end
140
+ def parse_regex!(wildcard)
141
+ @tokenizer.regex_mode!
142
+ t = @tokenizer.next
143
+ reg = Nodes::Regex.new(wildcard, t.val)
144
+ if t.type == Tokens::TEXT
145
+ reg.regex = build_regex!(wildcard, t)
146
+ else
147
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
175
148
  end
176
149
 
177
- raise Error, "Trailing escape" if escape
178
- nodes
150
+ t = @tokenizer.next
151
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
152
+ reg
153
+ end
154
+
155
+ def build_regex!(wildcard, token, src = token.val)
156
+ Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
157
+ rescue RegexpError => e
158
+ errors << Error.new(:regex, token, e.message)
159
+ nil
179
160
  end
180
161
  end
181
162
  end
data/lib/fop/program.rb CHANGED
@@ -1,22 +1,16 @@
1
- require_relative 'tokenizer'
2
- require_relative 'parser'
3
-
4
1
  module Fop
5
2
  class Program
6
- attr_reader :nodes
7
-
8
- def initialize(src)
9
- tokens = Tokenizer.new(src).tokenize!
10
- @nodes = Parser.parse! tokens
3
+ def initialize(instructions)
4
+ @instructions = instructions
11
5
  end
12
6
 
13
7
  def apply(input)
14
8
  input = input.clone
15
9
  output =
16
- @nodes.reduce("") { |acc, token|
17
- section = token.consume!(input)
18
- return nil if section.nil?
19
- acc + section.to_s
10
+ @instructions.reduce("") { |acc, ins|
11
+ result = ins.call(input)
12
+ return nil if result.nil?
13
+ acc + result.to_s
20
14
  }
21
15
  input.empty? ? output : nil
22
16
  end
data/lib/fop/tokenizer.rb CHANGED
@@ -1,144 +1,175 @@
1
+ require_relative 'tokens'
2
+
1
3
  module Fop
2
4
  class Tokenizer
3
- Char = Struct.new(:char)
4
- Op = Struct.new(:match, :operator, :arg)
5
- Regex = Struct.new(:src)
6
- Error = Class.new(StandardError)
5
+ Token = Struct.new(:pos, :type, :val)
6
+ Error = Struct.new(:pos, :message)
7
+ Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
7
8
 
8
- OP_OPEN = "{".freeze
9
- OP_CLOSE = "}".freeze
9
+ EXP_OPEN = "{".freeze
10
+ EXP_CLOSE = "}".freeze
10
11
  ESCAPE = "\\".freeze
11
12
  WILDCARD = "*".freeze
12
- REGEX_MARKER = "/".freeze
13
+ REGEX_DELIM = "/".freeze
14
+ REGEX_CAPTURE = "$".freeze
15
+ OP_REPLACE = "=".freeze
16
+ OP_APPEND = ">".freeze
17
+ OP_PREPEND = "<".freeze
18
+ OP_ADD = "+".freeze
19
+ OP_SUB = "-".freeze
20
+
21
+ #
22
+ # Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
23
+ # explicit string delimiters. That *could* be worked around by requiring users to escape all reserved chars,
24
+ # but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on
25
+ # or off to auto-escape certain chars for the next token.
26
+ #
27
+ attr_reader :escape
13
28
 
14
29
  def initialize(src)
15
30
  @src = src
16
31
  @end = src.size - 1
32
+ @start_i = 0
33
+ @i = 0
34
+ reset_escapes!
17
35
  end
18
36
 
19
- def tokenize!
20
- tokens = []
21
- escape = false
22
- i = 0
23
- until i > @end do
24
- char = @src[i]
25
- i += 1
26
-
27
- if escape
28
- tokens << Char.new(char)
29
- escape = false
30
- next
31
- end
32
-
33
- case char
34
- when ESCAPE
35
- escape = true
36
- when OP_OPEN
37
- i, op = operation! i
38
- tokens << op
39
- when OP_CLOSE
40
- raise "Unexpected #{OP_CLOSE}"
41
- when WILDCARD
42
- tokens << :wildcard
43
- else
44
- tokens << Char.new(char)
45
- end
46
- end
47
-
48
- raise Error, "Trailing escape" if escape
49
- tokens
37
+ # Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
38
+ def reset_escapes!
39
+ @escape = Escapes.new(true, true)
50
40
  end
51
41
 
52
- private
53
-
54
- def operation!(i)
55
- found_close = false
56
- op = Op.new(nil, nil, "")
42
+ # Auto-escape anything you'd find in a regular expression
43
+ def regex_mode!
44
+ @escape.regex = false # look for the final /
45
+ @escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
46
+ @escape.wildcards = true
47
+ @escape.operators = true
48
+ @escape.regex_capture = true
49
+ @escape.exp = true
50
+ end
57
51
 
58
- # Find matcher
59
- until found_close or op.match or i > @end do
60
- char = @src[i]
61
- i += 1
62
- case char
63
- when OP_CLOSE
64
- found_close = true
65
- when REGEX_MARKER
66
- i, reg = regex! i
67
- op.match = reg
52
+ def next
53
+ return Token.new(@i, Tokens::EOF) if @i > @end
54
+ char = @src[@i]
55
+ case char
56
+ when EXP_OPEN
57
+ @i += 1
58
+ token! Tokens::EXP_OPEN
59
+ when EXP_CLOSE
60
+ @i += 1
61
+ token! Tokens::EXP_CLOSE
62
+ when WILDCARD
63
+ @i += 1
64
+ token! Tokens::WILDCARD, WILDCARD
65
+ when REGEX_DELIM
66
+ if @escape.regex
67
+ get_str!
68
68
  else
69
- op.match = Char.new(char)
69
+ @i += 1
70
+ token! Tokens::REG_DELIM
70
71
  end
71
- end
72
-
73
- # Find operator
74
- until found_close or op.operator or i > @end do
75
- char = @src[i]
76
- i += 1
77
- case char
78
- when OP_CLOSE
79
- found_close = true
72
+ when REGEX_CAPTURE
73
+ if @escape.regex_capture
74
+ get_str!
80
75
  else
81
- op.operator = Char.new(char)
82
- end
83
- end
84
-
85
- # Find operator arg
86
- escape = false
87
- until found_close or i > @end do
88
- char = @src[i]
89
- i += 1
90
-
91
- if escape
92
- op.arg << char
93
- escape = false
94
- next
76
+ @i += 1
77
+ t = token! Tokens::REG_CAPTURE, @src[@i]
78
+ @i += 1
79
+ @start_i = @i
80
+ t
95
81
  end
96
-
97
- case char
98
- when ESCAPE
99
- escape = true
100
- when OP_OPEN
101
- raise "Unexpected #{OP_OPEN}"
102
- when OP_CLOSE
103
- found_close = true
82
+ when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
83
+ if @escape.operators
84
+ get_str!
104
85
  else
105
- op.arg << char
86
+ @i += 1
87
+ token! Tokens::OPERATOR, char
106
88
  end
89
+ else
90
+ get_str!
107
91
  end
108
-
109
- raise Error, "Unclosed operation" if !found_close
110
- raise Error, "Trailing escape" if escape
111
- return i, op
112
92
  end
113
93
 
114
- def regex!(i)
115
- escape = false
116
- found_close = false
117
- src = ""
94
+ private
95
+
96
+ def token!(type, val = nil)
97
+ t = Token.new(@start_i, type, val)
98
+ @start_i = @i
99
+ t
100
+ end
118
101
 
119
- until found_close or i > @end
120
- char = @src[i]
121
- i += 1
102
+ def get_str!
103
+ str = ""
104
+ escape, found_end = false, false
105
+ until found_end or @i > @end
106
+ char = @src[@i]
122
107
 
123
108
  if escape
124
- src << char
109
+ @i += 1
110
+ str << char
125
111
  escape = false
126
112
  next
127
113
  end
128
114
 
129
115
  case char
130
116
  when ESCAPE
131
- escape = true
132
- when REGEX_MARKER
133
- found_close = true
117
+ @i += 1
118
+ if @escape.regex_escape and @src[@i] != REGEX_DELIM
119
+ str << char
120
+ else
121
+ escape = true
122
+ end
123
+ when EXP_OPEN
124
+ if @escape.exp
125
+ @i += 1
126
+ str << char
127
+ else
128
+ found_end = true
129
+ end
130
+ when EXP_CLOSE
131
+ if @escape.exp
132
+ @i += 1
133
+ str << char
134
+ else
135
+ found_end = true
136
+ end
137
+ when WILDCARD
138
+ if @escape.wildcards
139
+ @i += 1
140
+ str << char
141
+ else
142
+ found_end = true
143
+ end
144
+ when REGEX_DELIM
145
+ if @escape.regex
146
+ @i += 1
147
+ str << char
148
+ else
149
+ found_end = true
150
+ end
151
+ when REGEX_CAPTURE
152
+ if @escape.regex_capture
153
+ @i += 1
154
+ str << char
155
+ else
156
+ found_end = true
157
+ end
158
+ when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
159
+ if @escape.operators
160
+ @i += 1
161
+ str << char
162
+ else
163
+ found_end = true
164
+ end
134
165
  else
135
- src << char
166
+ @i += 1
167
+ str << char
136
168
  end
137
169
  end
138
170
 
139
- raise Error, "Unclosed regex" if !found_close
140
- raise Error, "Trailing escape" if escape
141
- return i, Regex.new(src)
171
+ return Token.new(@i - 1, Tokens::TR_ESC) if escape
172
+ token! Tokens::TEXT, str
142
173
  end
143
174
  end
144
175
  end
data/lib/fop/tokens.rb ADDED
@@ -0,0 +1,13 @@
1
+ module Fop
2
+ module Tokens
3
+ TEXT = :TXT
4
+ EXP_OPEN = :"{"
5
+ EXP_CLOSE = :"}"
6
+ REG_CAPTURE = :"$"
7
+ REG_DELIM = :/
8
+ WILDCARD = :*
9
+ OPERATOR = :op
10
+ TR_ESC = :"trailing escape"
11
+ EOF = :EOF
12
+ end
13
+ end
data/lib/fop/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Fop
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/fop_lang.rb CHANGED
@@ -1,12 +1,22 @@
1
1
  require_relative 'fop/version'
2
+ require_relative 'fop/compiler'
2
3
  require_relative 'fop/program'
3
4
 
4
5
  def Fop(src)
5
- ::Fop::Program.new(src)
6
+ ::Fop.compile!(src)
6
7
  end
7
8
 
8
9
  module Fop
10
+ def self.compile!(src)
11
+ prog, errors = compile(src)
12
+ # TODO better exception
13
+ raise "Fop errors: " + errors.map(&:message).join(",") if errors
14
+ prog
15
+ end
16
+
9
17
  def self.compile(src)
10
- Program.new(src)
18
+ instructions, errors = ::Fop::Compiler.compile(src)
19
+ return nil, errors if errors
20
+ return Program.new(instructions), nil
11
21
  end
12
22
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fop_lang
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jordan Hollinger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-16 00:00:00.000000000 Z
11
+ date: 2021-08-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A micro expression language for Filter and OPerations on text
14
14
  email: jordan.hollinger@gmail.com
@@ -17,10 +17,12 @@ extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
19
  - README.md
20
+ - lib/fop/compiler.rb
20
21
  - lib/fop/nodes.rb
21
22
  - lib/fop/parser.rb
22
23
  - lib/fop/program.rb
23
24
  - lib/fop/tokenizer.rb
25
+ - lib/fop/tokens.rb
24
26
  - lib/fop/version.rb
25
27
  - lib/fop_lang.rb
26
28
  homepage: https://jhollinger.github.io/fop-lang-rb/