fop_lang 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8ed95bb4708820a186e6485cc29dbb47286b0f309a1caf91af7778d768b0efb3
4
- data.tar.gz: 85d41728ddae13f3667f0a2d55a5c4dcbc26e8217d4f31466e6ed92038859881
3
+ metadata.gz: 1166e1e43fd54ed2263db8a37ff288431a6152543869d961f007a134483f1a4b
4
+ data.tar.gz: 17e55b17448c38a37afb6e24a5798c87de368a352bdb0c74df13ad865a7e3ad0
5
5
  SHA512:
6
- metadata.gz: e650bdf66d8d0b5dcb603eae494f38d4969a19647053b4e81e0612705f5b16a5755d007e4ce01fad9b487224d66eff462738f1e37b011ba2a4bf4a45b0203bb3
7
- data.tar.gz: 99b31736236785cecc85b9bb23ccc3e366713cd23dd04c992a8fff6676a316d5741bfe5058e17cd0812c43d8bf1979aa7546184386018c1f92ed2462a85eb5fb
6
+ metadata.gz: cbf5d8c7f6c10ca395518cbd7bf0e9083b1f217b3523386a482f19e5cdf47a16aa7ffb50a6290f89f0ec80ba113900c499944fdf35b7e2e71a941d537483a7e1
7
+ data.tar.gz: bff3c613a575687d0d3223c5bd60bb1128b0ae78accf2e5101228c3ec14d61f46cdc798855af9d07692de0b25ba01737dbda97409d4e8f44881e9bbe6da9c523
@@ -0,0 +1,72 @@
1
+ require_relative 'parser'
2
+
3
+ module Fop
4
+ module Compiler
5
+ def self.compile(src)
6
+ parser = Parser.new(src)
7
+ nodes, errors = parser.parse
8
+
9
+ instructions = nodes.map { |node|
10
+ case node
11
+ when Nodes::Text, Nodes::Regex
12
+ Instructions.regex_match(node.regex)
13
+ when Nodes::Expression
14
+ Instructions::ExpressionMatch.new(node)
15
+ else
16
+ raise "Unknown node type #{node}"
17
+ end
18
+ }
19
+
20
+ return nil, errors if errors.any?
21
+ return instructions, nil
22
+ end
23
+
24
+ module Instructions
25
+ BLANK = "".freeze
26
+ OPERATIONS = {
27
+ "=" => ->(_val, arg) { arg || BLANK },
28
+ "+" => ->(val, arg) { val.to_i + arg.to_i },
29
+ "-" => ->(val, arg) { val.to_i - arg.to_i },
30
+ ">" => ->(val, arg) { val + arg },
31
+ "<" => ->(val, arg) { arg + val },
32
+ }
33
+
34
+ def self.regex_match(regex)
35
+ ->(input) { input.slice! regex }
36
+ end
37
+
38
+ class ExpressionMatch
39
+ def initialize(node)
40
+ @regex = node.regex&.regex
41
+ @op = node.operator ? OPERATIONS.fetch(node.operator) : nil
42
+ @regex_match = node.regex_match
43
+ if node.arg&.any? { |a| a.is_a? Integer }
44
+ @arg, @arg_with_caps = nil, node.arg
45
+ else
46
+ @arg = node.arg&.join("")
47
+ @arg_with_caps = nil
48
+ end
49
+ end
50
+
51
+ def call(input)
52
+ if (match = @regex.match(input))
53
+ val = match.to_s
54
+ blank = val == BLANK
55
+ input.sub!(val, BLANK) unless blank
56
+ found_val = @regex_match || !blank
57
+ arg = @arg_with_caps ? sub_caps(@arg_with_caps, match.captures) : @arg
58
+ @op && found_val ? @op.call(val, arg) : val
59
+ end
60
+ end
61
+
62
+ private
63
+
64
+ def sub_caps(args, caps)
65
+ args.map { |a|
66
+ a.is_a?(Integer) ? caps[a].to_s : a
67
+ }.join("")
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
data/lib/fop/nodes.rb CHANGED
@@ -1,47 +1,31 @@
1
1
  module Fop
2
2
  module Nodes
3
- Text = Struct.new(:wildcard, :str) do
4
- def consume!(input)
5
- @regex ||= Regexp.new((wildcard ? ".*" : "^") + Regexp.escape(str))
6
- input.slice!(@regex)
7
- end
8
-
3
+ Text = Struct.new(:wildcard, :str, :regex) do
9
4
  def to_s
10
5
  w = wildcard ? "*" : nil
11
- "Text #{w}#{str}"
6
+ "[#{w}txt] #{str}"
12
7
  end
13
8
  end
14
9
 
15
- Op = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :operator_arg, :operator_arg_w_caps, :expression) do
16
- def consume!(input)
17
- if (match = regex.match(input))
18
- val = match.to_s
19
- blank = val == Parser::BLANK
20
- input.sub!(val, Parser::BLANK) unless blank
21
- found_val = regex_match || !blank
22
- arg = operator_arg_w_caps ? sub_caps(operator_arg_w_caps, match.captures) : operator_arg
23
- expression && found_val ? expression.call(val, operator, arg) : val
24
- end
10
+ Regex = Struct.new(:wildcard, :src, :regex) do
11
+ def to_s
12
+ w = wildcard ? "*" : nil
13
+ "[#{w}reg] #{src}"
25
14
  end
15
+ end
26
16
 
17
+ Expression = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :arg) do
27
18
  def to_s
28
19
  w = wildcard ? "*" : nil
29
- s = "#{w}#{match}"
30
- s << " #{operator} #{operator_arg}" if operator
20
+ s = "[#{w}exp] #{match}"
21
+ if operator
22
+ arg_str = arg
23
+ .map { |a| a.is_a?(Integer) ? "$#{a+1}" : a.to_s }
24
+ .join("")
25
+ s << " #{operator} #{arg_str}"
26
+ end
31
27
  s
32
28
  end
33
-
34
- private
35
-
36
- def sub_caps(tokens, caps)
37
- tokens.map { |t|
38
- case t
39
- when String then t
40
- when Parser::CaptureGroup then caps[t.index].to_s
41
- else raise Parser::Error, "Unexpected #{t} in capture group"
42
- end
43
- }.join("")
44
- end
45
29
  end
46
30
  end
47
31
  end
data/lib/fop/parser.rb CHANGED
@@ -1,181 +1,162 @@
1
+ require_relative 'tokenizer'
1
2
  require_relative 'nodes'
2
3
 
3
4
  module Fop
4
- module Parser
5
- Error = Class.new(StandardError)
6
- CaptureGroup = Struct.new(:index)
5
+ class Parser
6
+ DIGIT = /^[0-9]$/
7
+ REGEX_START = "^".freeze
8
+ REGEX_LAZY_WILDCARD = ".*?".freeze
9
+ REGEX_MATCHES = {
10
+ "N" => "[0-9]+".freeze,
11
+ "W" => "\\w+".freeze,
12
+ "A" => "[a-zA-Z]+".freeze,
13
+ "*" => ".*".freeze,
14
+ }.freeze
15
+ OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]
16
+ TR_REGEX = /.*/
17
+
18
+ Error = Struct.new(:type, :token, :message) do
19
+ def to_s
20
+ "#{type.to_s.capitalize} error: #{message} at column #{token.pos}"
21
+ end
22
+ end
7
23
 
8
- MATCH_NUM = "N".freeze
9
- MATCH_WORD = "W".freeze
10
- MATCH_ALPHA = "A".freeze
11
- MATCH_WILD = "*".freeze
12
- BLANK = "".freeze
13
- OP_REPLACE = "=".freeze
14
- OP_APPEND = ">".freeze
15
- OP_PREPEND = "<".freeze
16
- OP_ADD = "+".freeze
17
- OP_SUB = "-".freeze
18
- OP_MUL = "*".freeze
19
- OP_DIV = "/".freeze
20
- VAR = "$".freeze
21
- CAP_NUM = /^[1-9]$/
24
+ attr_reader :errors
22
25
 
23
- EXP_REPLACE = ->(_val, _op, arg) { arg || BLANK }
24
- EXP_MATH = ->(val, op, arg) { val.to_i.send(op, arg.to_i) }
25
- EXP_APPEND = ->(val, _op, arg) { val + arg }
26
- EXP_PREPEND = ->(val, _op, arg) { arg + val }
26
+ def initialize(src, debug: false)
27
+ @tokenizer = Tokenizer.new(src)
28
+ @errors = []
29
+ end
27
30
 
28
- def self.parse!(tokens)
31
+ def parse
29
32
  nodes = []
30
- curr_node = nil
31
-
32
- tokens.each { |token|
33
- case curr_node
34
- when nil
35
- curr_node = new_node token
36
- when :wildcard
37
- curr_node = new_node token, true
38
- raise Error, "Unexpected * after wildcard" if curr_node == :wildcard
39
- when Nodes::Text
40
- curr_node, finished_node = parse_text curr_node, token
41
- nodes << finished_node if finished_node
42
- when Nodes::Op
43
- nodes << curr_node
44
- curr_node = new_node token
33
+ wildcard = false
34
+ eof = false
35
+ # Top-level parsing. It will always be looking for a String, Regex, or Expression.
36
+ until eof
37
+ @tokenizer.reset_escapes!
38
+ t = @tokenizer.next
39
+ case t.type
40
+ when Tokens::WILDCARD
41
+ errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
42
+ wildcard = true
43
+ when Tokens::TEXT
44
+ reg = build_regex!(wildcard, t, Regexp.escape(t.val))
45
+ nodes << Nodes::Text.new(wildcard, t.val, reg)
46
+ wildcard = false
47
+ when Tokens::EXP_OPEN
48
+ nodes << parse_exp!(wildcard)
49
+ wildcard = false
50
+ when Tokens::REG_DELIM
51
+ nodes << parse_regex!(wildcard)
52
+ wildcard = false
53
+ when Tokens::EOF
54
+ eof = true
45
55
  else
46
- raise Error, "Unexpected node #{curr_node}"
56
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}")
47
57
  end
48
- }
49
-
50
- case curr_node
51
- when nil
52
- # noop
53
- when :wildcard
54
- nodes << Nodes::Text.new(true, "")
55
- when Nodes::Text, Nodes::Op
56
- nodes << curr_node
57
- else
58
- raise Error, "Unexpected end node #{curr_node}"
59
58
  end
60
-
61
- nodes
59
+ nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
60
+ return nodes, @errors
62
61
  end
63
62
 
64
- private
63
+ def parse_exp!(wildcard = false)
64
+ exp = Nodes::Expression.new(wildcard)
65
+ parse_exp_match! exp
66
+ op_token = parse_exp_operator! exp
67
+ if exp.operator
68
+ parse_exp_arg! exp, op_token
69
+ end
70
+ return exp
71
+ end
65
72
 
66
- def self.new_node(token, wildcard = false)
67
- case token
68
- when Tokenizer::Char
69
- Nodes::Text.new(wildcard, token.char.clone)
70
- when Tokenizer::Op
71
- op = Nodes::Op.new(wildcard)
72
- parse_op! op, token
73
- op
74
- when :wildcard
75
- :wildcard
73
+ def parse_exp_match!(exp)
74
+ @tokenizer.escape.operators = false
75
+ t = @tokenizer.next
76
+ case t.type
77
+ when Tokens::TEXT, Tokens::WILDCARD
78
+ exp.match = t.val
79
+ if (src = REGEX_MATCHES[exp.match])
80
+ reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
81
+ exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
82
+ else
83
+ errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
84
+ end
85
+ when Tokens::REG_DELIM
86
+ exp.regex = parse_regex!(exp.wildcard)
87
+ exp.match = exp.regex&.src
88
+ exp.regex_match = true
89
+ @tokenizer.reset_escapes!
76
90
  else
77
- raise Error, "Unexpected #{token}"
91
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
78
92
  end
79
93
  end
80
94
 
81
- # @return current node
82
- # @return finished node
83
- def self.parse_text(node, token)
84
- case token
85
- when Tokenizer::Char
86
- node.str << token.char
87
- return node, nil
88
- when Tokenizer::Op
89
- op = new_node token
90
- return op, node
91
- when :wildcard
92
- return :wildcard, node
95
+ def parse_exp_operator!(exp)
96
+ @tokenizer.escape.operators = false
97
+ t = @tokenizer.next
98
+ case t.type
99
+ when Tokens::EXP_CLOSE
100
+ # no op
101
+ when Tokens::OPERATOR
102
+ exp.operator = t.val
93
103
  else
94
- raise Error, "Unexpected #{token}"
104
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
95
105
  end
106
+ t
96
107
  end
97
108
 
98
- def self.parse_op!(node, token)
99
- # parse the matching type
100
- node.regex =
101
- case token.match
102
- when Tokenizer::Char
103
- node.match = token.match.char
104
- node.regex_match = false
105
- case node.match
106
- when MATCH_NUM then Regexp.new((node.wildcard ? ".*?" : "^") + "[0-9]+")
107
- when MATCH_WORD then Regexp.new((node.wildcard ? ".*?" : "^") + "\\w+")
108
- when MATCH_ALPHA then Regexp.new((node.wildcard ? ".*?" : "^") + "[a-zA-Z]+")
109
- when MATCH_WILD then /.*/
110
- else raise Error, "Unknown match type '#{node.match}'"
111
- end
112
- when Tokenizer::Regex
113
- node.match = "/#{token.match.src}/"
114
- node.regex_match = true
115
- Regexp.new((node.wildcard ? ".*?" : "^") + token.match.src)
116
- when nil
117
- raise Error, "Empty operation"
109
+ def parse_exp_arg!(exp, op_token)
110
+ @tokenizer.escape.operators = true
111
+ @tokenizer.escape.regex = true
112
+ @tokenizer.escape.regex_capture = false if exp.regex_match
113
+
114
+ exp.arg = []
115
+ found_close, eof = false, false
116
+ until found_close or eof
117
+ t = @tokenizer.next
118
+ case t.type
119
+ when Tokens::TEXT
120
+ exp.arg << t.val
121
+ when Tokens::REG_CAPTURE
122
+ exp.arg << t.val.to_i - 1
123
+ errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
124
+ errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
125
+ when Tokens::EXP_CLOSE
126
+ found_close = true
127
+ when Tokens::EOF
128
+ eof = true
129
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
118
130
  else
119
- raise Error, "Unexpected #{token.match}"
131
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
120
132
  end
133
+ end
121
134
 
122
- # parse the operator (if any)
123
- if token.operator
124
- raise Error, "Unexpected #{token.operator} for operator" unless token.operator.is_a? Tokenizer::Char
125
- node.operator = token.operator.char
126
- node.operator_arg = token.arg if token.arg and token.arg != BLANK
127
- node.operator_arg_w_caps = parse_captures! node.operator_arg if node.operator_arg and node.regex_match
128
- node.expression =
129
- case node.operator
130
- when OP_REPLACE
131
- EXP_REPLACE
132
- when OP_ADD, OP_SUB, OP_MUL, OP_DIV
133
- raise Error, "Operator #{node.operator} is only available for numeric matches" unless node.match == MATCH_NUM
134
- raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
135
- EXP_MATH
136
- when OP_APPEND
137
- raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
138
- EXP_APPEND
139
- when OP_PREPEND
140
- raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
141
- EXP_PREPEND
142
- else
143
- raise Error, "Unknown operator #{node.operator}"
144
- end
135
+ if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
136
+ errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
145
137
  end
146
138
  end
147
139
 
148
- def self.parse_captures!(arg)
149
- i = 0
150
- iend = arg.size - 1
151
- escape = false
152
- nodes = []
153
-
154
- until i > iend
155
- char = arg[i]
156
- i += 1
157
-
158
- if escape
159
- nodes << char
160
- escape = false
161
- next
162
- end
163
-
164
- case char
165
- when Tokenizer::ESCAPE
166
- escape = true
167
- when VAR
168
- num = arg[i].to_s
169
- raise Error, "Capture group number must be between 1 and 9; found '#{num}'" unless num =~ CAP_NUM
170
- nodes << CaptureGroup.new(num.to_i - 1)
171
- i += 1
172
- else
173
- nodes << char
174
- end
140
+ def parse_regex!(wildcard)
141
+ @tokenizer.regex_mode!
142
+ t = @tokenizer.next
143
+ reg = Nodes::Regex.new(wildcard, t.val)
144
+ if t.type == Tokens::TEXT
145
+ reg.regex = build_regex!(wildcard, t)
146
+ else
147
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
175
148
  end
176
149
 
177
- raise Error, "Trailing escape" if escape
178
- nodes
150
+ t = @tokenizer.next
151
+ errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
152
+ reg
153
+ end
154
+
155
+ def build_regex!(wildcard, token, src = token.val)
156
+ Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
157
+ rescue RegexpError => e
158
+ errors << Error.new(:regex, token, e.message)
159
+ nil
179
160
  end
180
161
  end
181
162
  end
data/lib/fop/program.rb CHANGED
@@ -1,22 +1,16 @@
1
- require_relative 'tokenizer'
2
- require_relative 'parser'
3
-
4
1
  module Fop
5
2
  class Program
6
- attr_reader :nodes
7
-
8
- def initialize(src)
9
- tokens = Tokenizer.new(src).tokenize!
10
- @nodes = Parser.parse! tokens
3
+ def initialize(instructions)
4
+ @instructions = instructions
11
5
  end
12
6
 
13
7
  def apply(input)
14
8
  input = input.clone
15
9
  output =
16
- @nodes.reduce("") { |acc, token|
17
- section = token.consume!(input)
18
- return nil if section.nil?
19
- acc + section.to_s
10
+ @instructions.reduce("") { |acc, ins|
11
+ result = ins.call(input)
12
+ return nil if result.nil?
13
+ acc + result.to_s
20
14
  }
21
15
  input.empty? ? output : nil
22
16
  end
data/lib/fop/tokenizer.rb CHANGED
@@ -1,144 +1,175 @@
1
+ require_relative 'tokens'
2
+
1
3
  module Fop
2
4
  class Tokenizer
3
- Char = Struct.new(:char)
4
- Op = Struct.new(:match, :operator, :arg)
5
- Regex = Struct.new(:src)
6
- Error = Class.new(StandardError)
5
+ Token = Struct.new(:pos, :type, :val)
6
+ Error = Struct.new(:pos, :message)
7
+ Escapes = Struct.new(:operators, :regex_capture, :regex, :regex_escape, :wildcards, :exp)
7
8
 
8
- OP_OPEN = "{".freeze
9
- OP_CLOSE = "}".freeze
9
+ EXP_OPEN = "{".freeze
10
+ EXP_CLOSE = "}".freeze
10
11
  ESCAPE = "\\".freeze
11
12
  WILDCARD = "*".freeze
12
- REGEX_MARKER = "/".freeze
13
+ REGEX_DELIM = "/".freeze
14
+ REGEX_CAPTURE = "$".freeze
15
+ OP_REPLACE = "=".freeze
16
+ OP_APPEND = ">".freeze
17
+ OP_PREPEND = "<".freeze
18
+ OP_ADD = "+".freeze
19
+ OP_SUB = "-".freeze
20
+
21
+ #
22
+ # Controls which "mode" the tokenizer is currently in. This is a necessary result of the syntax lacking
23
+ # explicit string delimiters. That *could* be worked around by requiring users to escape all reserved chars,
24
+ # but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on
25
+ # or off to auto-escape certain chars for the next token.
26
+ #
27
+ attr_reader :escape
13
28
 
14
29
  def initialize(src)
15
30
  @src = src
16
31
  @end = src.size - 1
32
+ @start_i = 0
33
+ @i = 0
34
+ reset_escapes!
17
35
  end
18
36
 
19
- def tokenize!
20
- tokens = []
21
- escape = false
22
- i = 0
23
- until i > @end do
24
- char = @src[i]
25
- i += 1
26
-
27
- if escape
28
- tokens << Char.new(char)
29
- escape = false
30
- next
31
- end
32
-
33
- case char
34
- when ESCAPE
35
- escape = true
36
- when OP_OPEN
37
- i, op = operation! i
38
- tokens << op
39
- when OP_CLOSE
40
- raise "Unexpected #{OP_CLOSE}"
41
- when WILDCARD
42
- tokens << :wildcard
43
- else
44
- tokens << Char.new(char)
45
- end
46
- end
47
-
48
- raise Error, "Trailing escape" if escape
49
- tokens
37
+ # Auto-escape operators and regex capture vars. Appropriate for top-level syntax.
38
+ def reset_escapes!
39
+ @escape = Escapes.new(true, true)
50
40
  end
51
41
 
52
- private
53
-
54
- def operation!(i)
55
- found_close = false
56
- op = Op.new(nil, nil, "")
42
+ # Auto-escape anything you'd find in a regular expression
43
+ def regex_mode!
44
+ @escape.regex = false # look for the final /
45
+ @escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
46
+ @escape.wildcards = true
47
+ @escape.operators = true
48
+ @escape.regex_capture = true
49
+ @escape.exp = true
50
+ end
57
51
 
58
- # Find matcher
59
- until found_close or op.match or i > @end do
60
- char = @src[i]
61
- i += 1
62
- case char
63
- when OP_CLOSE
64
- found_close = true
65
- when REGEX_MARKER
66
- i, reg = regex! i
67
- op.match = reg
52
+ def next
53
+ return Token.new(@i, Tokens::EOF) if @i > @end
54
+ char = @src[@i]
55
+ case char
56
+ when EXP_OPEN
57
+ @i += 1
58
+ token! Tokens::EXP_OPEN
59
+ when EXP_CLOSE
60
+ @i += 1
61
+ token! Tokens::EXP_CLOSE
62
+ when WILDCARD
63
+ @i += 1
64
+ token! Tokens::WILDCARD, WILDCARD
65
+ when REGEX_DELIM
66
+ if @escape.regex
67
+ get_str!
68
68
  else
69
- op.match = Char.new(char)
69
+ @i += 1
70
+ token! Tokens::REG_DELIM
70
71
  end
71
- end
72
-
73
- # Find operator
74
- until found_close or op.operator or i > @end do
75
- char = @src[i]
76
- i += 1
77
- case char
78
- when OP_CLOSE
79
- found_close = true
72
+ when REGEX_CAPTURE
73
+ if @escape.regex_capture
74
+ get_str!
80
75
  else
81
- op.operator = Char.new(char)
82
- end
83
- end
84
-
85
- # Find operator arg
86
- escape = false
87
- until found_close or i > @end do
88
- char = @src[i]
89
- i += 1
90
-
91
- if escape
92
- op.arg << char
93
- escape = false
94
- next
76
+ @i += 1
77
+ t = token! Tokens::REG_CAPTURE, @src[@i]
78
+ @i += 1
79
+ @start_i = @i
80
+ t
95
81
  end
96
-
97
- case char
98
- when ESCAPE
99
- escape = true
100
- when OP_OPEN
101
- raise "Unexpected #{OP_OPEN}"
102
- when OP_CLOSE
103
- found_close = true
82
+ when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
83
+ if @escape.operators
84
+ get_str!
104
85
  else
105
- op.arg << char
86
+ @i += 1
87
+ token! Tokens::OPERATOR, char
106
88
  end
89
+ else
90
+ get_str!
107
91
  end
108
-
109
- raise Error, "Unclosed operation" if !found_close
110
- raise Error, "Trailing escape" if escape
111
- return i, op
112
92
  end
113
93
 
114
- def regex!(i)
115
- escape = false
116
- found_close = false
117
- src = ""
94
+ private
95
+
96
+ def token!(type, val = nil)
97
+ t = Token.new(@start_i, type, val)
98
+ @start_i = @i
99
+ t
100
+ end
118
101
 
119
- until found_close or i > @end
120
- char = @src[i]
121
- i += 1
102
+ def get_str!
103
+ str = ""
104
+ escape, found_end = false, false
105
+ until found_end or @i > @end
106
+ char = @src[@i]
122
107
 
123
108
  if escape
124
- src << char
109
+ @i += 1
110
+ str << char
125
111
  escape = false
126
112
  next
127
113
  end
128
114
 
129
115
  case char
130
116
  when ESCAPE
131
- escape = true
132
- when REGEX_MARKER
133
- found_close = true
117
+ @i += 1
118
+ if @escape.regex_escape and @src[@i] != REGEX_DELIM
119
+ str << char
120
+ else
121
+ escape = true
122
+ end
123
+ when EXP_OPEN
124
+ if @escape.exp
125
+ @i += 1
126
+ str << char
127
+ else
128
+ found_end = true
129
+ end
130
+ when EXP_CLOSE
131
+ if @escape.exp
132
+ @i += 1
133
+ str << char
134
+ else
135
+ found_end = true
136
+ end
137
+ when WILDCARD
138
+ if @escape.wildcards
139
+ @i += 1
140
+ str << char
141
+ else
142
+ found_end = true
143
+ end
144
+ when REGEX_DELIM
145
+ if @escape.regex
146
+ @i += 1
147
+ str << char
148
+ else
149
+ found_end = true
150
+ end
151
+ when REGEX_CAPTURE
152
+ if @escape.regex_capture
153
+ @i += 1
154
+ str << char
155
+ else
156
+ found_end = true
157
+ end
158
+ when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
159
+ if @escape.operators
160
+ @i += 1
161
+ str << char
162
+ else
163
+ found_end = true
164
+ end
134
165
  else
135
- src << char
166
+ @i += 1
167
+ str << char
136
168
  end
137
169
  end
138
170
 
139
- raise Error, "Unclosed regex" if !found_close
140
- raise Error, "Trailing escape" if escape
141
- return i, Regex.new(src)
171
+ return Token.new(@i - 1, Tokens::TR_ESC) if escape
172
+ token! Tokens::TEXT, str
142
173
  end
143
174
  end
144
175
  end
data/lib/fop/tokens.rb ADDED
@@ -0,0 +1,13 @@
1
+ module Fop
2
+ module Tokens
3
+ TEXT = :TXT
4
+ EXP_OPEN = :"{"
5
+ EXP_CLOSE = :"}"
6
+ REG_CAPTURE = :"$"
7
+ REG_DELIM = :/
8
+ WILDCARD = :*
9
+ OPERATOR = :op
10
+ TR_ESC = :"trailing escape"
11
+ EOF = :EOF
12
+ end
13
+ end
data/lib/fop/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Fop
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/fop_lang.rb CHANGED
@@ -1,12 +1,22 @@
1
1
  require_relative 'fop/version'
2
+ require_relative 'fop/compiler'
2
3
  require_relative 'fop/program'
3
4
 
4
5
  def Fop(src)
5
- ::Fop::Program.new(src)
6
+ ::Fop.compile!(src)
6
7
  end
7
8
 
8
9
  module Fop
10
+ def self.compile!(src)
11
+ prog, errors = compile(src)
12
+ # TODO better exception
13
+ raise "Fop errors: " + errors.map(&:message).join(",") if errors
14
+ prog
15
+ end
16
+
9
17
  def self.compile(src)
10
- Program.new(src)
18
+ instructions, errors = ::Fop::Compiler.compile(src)
19
+ return nil, errors if errors
20
+ return Program.new(instructions), nil
11
21
  end
12
22
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fop_lang
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jordan Hollinger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-16 00:00:00.000000000 Z
11
+ date: 2021-08-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A micro expression language for Filter and OPerations on text
14
14
  email: jordan.hollinger@gmail.com
@@ -17,10 +17,12 @@ extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
19
  - README.md
20
+ - lib/fop/compiler.rb
20
21
  - lib/fop/nodes.rb
21
22
  - lib/fop/parser.rb
22
23
  - lib/fop/program.rb
23
24
  - lib/fop/tokenizer.rb
25
+ - lib/fop/tokens.rb
24
26
  - lib/fop/version.rb
25
27
  - lib/fop_lang.rb
26
28
  homepage: https://jhollinger.github.io/fop-lang-rb/