fop_lang 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 711af4fad2907616e057049dcb84bce16ffdb381b3601a387dc7260cc16057d3
4
- data.tar.gz: 40da554ca0cb21f275748593921bdc616dcf8cdfe5d4fa31494f588f2c25d66c
3
+ metadata.gz: b5f19a543b81c0046dc63fcc1c0769989628017d2c1d1da74ef0db9866a0f2f7
4
+ data.tar.gz: 03b6597f9cab97c95ccda8396693bb43d9da729137cb916cc74f7fbecc314b32
5
5
  SHA512:
6
- metadata.gz: 0253a6446b88b6de112f00a95c81f1d5f710a859998e5b3d8df21d64caecc8e013ab1edec2b68047558f8053ce135b60082078f85db1f7aa16a93b86aa487093
7
- data.tar.gz: c7f0cb0387df52a3ea121e3e91a35e0444afe8862aabe054ec433d889955ee9221129fd0c07f7e36d6c71e6b9ab33f30cd16264447dfda24d29babb671112140
6
+ metadata.gz: 3a17c82a561e20cbc5cb8abbad5be4f94f02110d60b6130e3e1e9489672c5c134befc6b1daca2f590f083a67934e600fb5d6fa0ea5433181ba3014514c558232
7
+ data.tar.gz: 790250c8a79dcf04b381f2dd33cbaa048fd070688ab45446ff87652dcb18844c2d6139d0ead060fa338a57b8590eee0167ea2c25abd84e1d71571f33c49bcbda
data/README.md CHANGED
@@ -1,6 +1,10 @@
1
1
  # fop_lang
2
2
 
3
- Fop (Filter and OPperations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
3
+ Fop (Filter and OPerations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
4
+
5
+ ```ruby
6
+ gem 'fop_lang'
7
+ ```
4
8
 
5
9
  ## Release Number Example
6
10
 
@@ -33,13 +37,14 @@ If `\` (escape) is used before the special characters `*`, `{` or `}`, then that
33
37
 
34
38
  Operations are the interesting part of Fop, and are specified between `{` and `}`. An Operation can consist of one to three parts:
35
39
 
36
- 1. Matching character class (required): Defines what characters the operation will match and operate on.
40
+ 1. Matching class (required): Defines what characters the operation will match and operate on.
37
41
  * `N` is the numeric class and will match one or more digits.
38
42
  * `A` is the alpha class and will match one or more letters (lower or upper case).
39
43
  * `W` is the word class and matches alphanumeric chars and underscores.
40
44
  * `*` is the wildcard class and greedily matches everything after it.
45
+ * `/.../` matches on the supplied regex between the `/`'s. If you're regex contains a `/`, it must be escaped.
41
46
  3. Operator (optional): What to do to the matching characters.
42
- * `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
47
+ * `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars. Note that any `/` chars must be escaped, so as not to be mistaken for a regex.
43
48
  * `+` Perform addition on the matching number and the argument (`N` only).
44
49
  * `-` Subtract the argument from the matching number (`N` only).
45
50
  5. Operator argument (required for some operators): meaning varies by operator.
@@ -53,6 +58,16 @@ Operations are the interesting part of Fop, and are specified between `{` and `}
53
58
  => 'release-5.100.0'
54
59
  ```
55
60
 
61
+ ```ruby
62
+ f = Fop('rel{/(ease)?/}-{N=5}.{N+1}.{N=0}')
63
+
64
+ puts f.apply('release-4.99.1')
65
+ => 'release-5.100.0'
66
+
67
+ puts f.apply('rel-4.99.1')
68
+ => 'rel-5.100.0'
69
+ ```
70
+
56
71
  ```ruby
57
72
  f = Fop('release-*{N=5}.{N+100}.{N=0}')
58
73
 
data/lib/fop/nodes.rb CHANGED
@@ -12,59 +12,19 @@ module Fop
12
12
  end
13
13
  end
14
14
 
15
- Match = Struct.new(:wildcard, :tokens) do
16
- NUM = "N".freeze
17
- WORD = "W".freeze
18
- ALPHA = "A".freeze
19
- WILD = "*".freeze
20
- BLANK = "".freeze
21
-
15
+ Op = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :operator_arg, :expression) do
22
16
  def consume!(input)
23
- if (val = input.slice!(@regex))
24
- @expression && val != BLANK ? @expression.call(val) : val
17
+ if (val = input.slice!(regex))
18
+ found_val = regex_match || val != Parser::BLANK
19
+ expression && found_val ? expression.call(val) : val
25
20
  end
26
21
  end
27
22
 
28
23
  def to_s
29
24
  w = wildcard ? "*" : nil
30
- @op ? "#{w}#{@match} #{@op} #{@arg}" : "#{w}#{@match}"
31
- end
32
-
33
- def parse!
34
- match = tokens.shift || raise(ParserError, "Empty match")
35
- raise ParserError, "Unexpected #{match}" unless match.is_a? Tokenizer::Char
36
-
37
- @match = match.char
38
- @regex =
39
- case @match
40
- when NUM then Regexp.new((wildcard ? ".*?" : "^") + "[0-9]+")
41
- when WORD then Regexp.new((wildcard ? ".*?" : "^") + "\\w+")
42
- when ALPHA then Regexp.new((wildcard ? ".*?" : "^") + "[a-zA-Z]+")
43
- when WILD then /.*/
44
- else raise ParserError, "Unknown match type '#{@match}'"
45
- end
46
-
47
- if (op = tokens.shift)
48
- raise ParserError, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
49
- arg = tokens.reduce("") { |acc, t|
50
- raise ParserError, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
51
- acc + t.char
52
- }
53
-
54
- @op = op.char
55
- @arg = arg == BLANK ? nil : arg
56
- @expression =
57
- case @op
58
- when "=" then ->(_) { @arg || BLANK }
59
- when "+", "-", "*", "/"
60
- raise ParserError, "Operator #{@op} is only available for numeric matches" unless @match == NUM
61
- raise ParserError, "Operator #{@op} expects an argument" if @arg.nil?
62
- ->(x) { x.to_i.send(@op, @arg.to_i) }
63
- else raise ParserError, "Unknown operator #{@op}"
64
- end
65
- else
66
- @op, @arg, @expression = nil, nil, nil
67
- end
25
+ s = "#{w}#{match}"
26
+ s << " #{operator} #{operator_arg}" if operator
27
+ s
68
28
  end
69
29
  end
70
30
  end
data/lib/fop/parser.rb CHANGED
@@ -4,89 +4,132 @@ module Fop
4
4
  module Parser
5
5
  Error = Class.new(StandardError)
6
6
 
7
+ MATCH_NUM = "N".freeze
8
+ MATCH_WORD = "W".freeze
9
+ MATCH_ALPHA = "A".freeze
10
+ MATCH_WILD = "*".freeze
11
+ BLANK = "".freeze
12
+ OP_REPLACE = "=".freeze
13
+ OP_ADD = "+".freeze
14
+ OP_SUB = "-".freeze
15
+ OP_MUL = "*".freeze
16
+ OP_DIV = "/".freeze
17
+
7
18
  def self.parse!(tokens)
8
- stack = []
9
- current_el = nil
19
+ nodes = []
20
+ curr_node = nil
10
21
 
11
22
  tokens.each { |token|
12
- case current_el
23
+ case curr_node
13
24
  when nil
14
- current_el = new_element token
25
+ curr_node = new_node token
15
26
  when :wildcard
16
- current_el = new_element token, true
17
- raise Error, "Unexpected * after wildcard" if current_el == :wildcard
27
+ curr_node = new_node token, true
28
+ raise Error, "Unexpected * after wildcard" if curr_node == :wildcard
18
29
  when Nodes::Text
19
- current_el = parse_text stack, current_el, token
20
- when Nodes::Match
21
- current_el = parse_match stack, current_el, token
30
+ curr_node, finished_node = parse_text curr_node, token
31
+ nodes << finished_node if finished_node
32
+ when Nodes::Op
33
+ nodes << curr_node
34
+ curr_node = new_node token
22
35
  else
23
- raise Error, "Unexpected token #{token} in #{current_el}"
36
+ raise Error, "Unexpected node #{curr_node}"
24
37
  end
25
38
  }
26
39
 
27
- case current_el
40
+ case curr_node
28
41
  when nil
29
42
  # noop
30
43
  when :wildcard
31
- stack << Nodes::Text.new(true, "")
32
- when Nodes::Text
33
- stack << current_el
34
- when Nodes::Match
35
- raise Error, "Unclosed match"
44
+ nodes << Nodes::Text.new(true, "")
45
+ when Nodes::Text, Nodes::Op
46
+ nodes << curr_node
47
+ else
48
+ raise "Unexpected end node #{curr_node}"
36
49
  end
37
50
 
38
- stack
51
+ nodes
39
52
  end
40
53
 
41
54
  private
42
55
 
43
- def self.new_element(token, wildcard = false)
56
+ def self.new_node(token, wildcard = false)
44
57
  case token
45
58
  when Tokenizer::Char
46
59
  Nodes::Text.new(wildcard, token.char.clone)
47
- when :match_open
48
- Nodes::Match.new(wildcard, [])
49
- when :match_close
50
- raise ParserError, "Unmatched }"
60
+ when Tokenizer::Op
61
+ op = Nodes::Op.new(wildcard)
62
+ parse_op! op, token.tokens
63
+ op
51
64
  when :wildcard
52
65
  :wildcard
53
66
  else
54
- raise ParserError, "Unexpected #{token}"
67
+ raise Error, "Unexpected #{token}"
55
68
  end
56
69
  end
57
70
 
58
- def self.parse_text(stack, text_el, token)
71
+ # @return current node
72
+ # @return finished node
73
+ def self.parse_text(node, token)
59
74
  case token
60
- when :match_open
61
- stack << text_el
62
- Nodes::Match.new(false, [])
63
- when :match_close
64
- raise ParserError.new, "Unexpected }"
65
75
  when Tokenizer::Char
66
- text_el.str << token.char
67
- text_el
76
+ node.str << token.char
77
+ return node, nil
78
+ when Tokenizer::Op
79
+ op = new_node token
80
+ return op, node
68
81
  when :wildcard
69
- stack << text_el
70
- :wildcard
82
+ return :wildcard, node
71
83
  else
72
- raise ParserError, "Unexpected #{token}"
84
+ raise Error, "Unexpected #{token}"
73
85
  end
74
86
  end
75
87
 
76
- def self.parse_match(stack, match_el, token)
77
- case token
78
- when Tokenizer::Char
79
- match_el.tokens << token
80
- match_el
81
- when :wildcard
82
- match_el.tokens << Tokenizer::Char.new("*").freeze
83
- match_el
84
- when :match_close
85
- match_el.parse!
86
- stack << match_el
87
- nil
88
- else
89
- raise ParserError, "Unexpected #{token}"
88
+ def self.parse_op!(node, tokens)
89
+ t = tokens[0] || raise(Error, "Empty operation")
90
+ # parse the matching type
91
+ node.regex =
92
+ case t
93
+ when Tokenizer::Char
94
+ node.match = t.char
95
+ node.regex_match = false
96
+ case t.char
97
+ when MATCH_NUM then Regexp.new((node.wildcard ? ".*?" : "^") + "[0-9]+")
98
+ when MATCH_WORD then Regexp.new((node.wildcard ? ".*?" : "^") + "\\w+")
99
+ when MATCH_ALPHA then Regexp.new((node.wildcard ? ".*?" : "^") + "[a-zA-Z]+")
100
+ when MATCH_WILD then /.*/
101
+ else raise Error, "Unknown match type '#{t.char}'"
102
+ end
103
+ when Tokenizer::Regex
104
+ node.match = "/#{t.src}/"
105
+ node.regex_match = true
106
+ Regexp.new((node.wildcard ? ".*?" : "^") + t.src)
107
+ else
108
+ raise Error, "Unexpected token #{t}"
109
+ end
110
+
111
+ # parse the operator (if any)
112
+ if (op = tokens[1])
113
+ raise Error, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
114
+ node.operator = op.char
115
+
116
+ arg = tokens[2..-1].reduce("") { |acc, t|
117
+ raise Error, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
118
+ acc + t.char
119
+ }
120
+ node.operator_arg = arg == BLANK ? nil : arg
121
+
122
+ node.expression =
123
+ case node.operator
124
+ when OP_REPLACE
125
+ ->(_) { node.operator_arg || BLANK }
126
+ when OP_ADD, OP_SUB, OP_MUL, OP_DIV
127
+ raise Error, "Operator #{node.operator} is only available for numeric matches" unless node.match == MATCH_NUM
128
+ raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
129
+ ->(x) { x.to_i.send(node.operator, node.operator_arg.to_i) }
130
+ else
131
+ raise(Error, "Unknown operator #{node.operator}")
132
+ end
90
133
  end
91
134
  end
92
135
  end
data/lib/fop/program.rb CHANGED
@@ -6,7 +6,7 @@ module Fop
6
6
  attr_reader :nodes
7
7
 
8
8
  def initialize(src)
9
- tokens = Tokenizer.tokenize! src
9
+ tokens = Tokenizer.new(src).tokenize!
10
10
  @nodes = Parser.parse! tokens
11
11
  end
12
12
 
data/lib/fop/tokenizer.rb CHANGED
@@ -1,34 +1,123 @@
1
1
  module Fop
2
- module Tokenizer
2
+ class Tokenizer
3
3
  Char = Struct.new(:char)
4
+ Op = Struct.new(:tokens)
5
+ Regex = Struct.new(:src)
4
6
  Error = Class.new(StandardError)
5
7
 
6
- def self.tokenize!(src)
8
+ OP_OPEN = "{".freeze
9
+ OP_CLOSE = "}".freeze
10
+ ESCAPE = "\\".freeze
11
+ WILDCARD = "*".freeze
12
+ REGEX_MARKER = "/".freeze
13
+
14
+ def initialize(src)
15
+ @src = src
16
+ @end = src.size - 1
17
+ end
18
+
19
+ def tokenize!
7
20
  tokens = []
8
21
  escape = false
9
- src.each_char { |char|
22
+ i = 0
23
+ until i > @end do
24
+ char = @src[i]
10
25
  if escape
11
26
  tokens << Char.new(char)
12
27
  escape = false
28
+ i += 1
13
29
  next
14
30
  end
15
31
 
16
32
  case char
17
- when "\\".freeze
33
+ when ESCAPE
18
34
  escape = true
19
- when "{".freeze
20
- tokens << :match_open
21
- when "}".freeze
22
- tokens << :match_close
23
- when "*".freeze
35
+ i += 1
36
+ when OP_OPEN
37
+ i, op = operation! i + 1
38
+ tokens << op
39
+ when OP_CLOSE
40
+ raise "Unexpected #{OP_CLOSE}"
41
+ when WILDCARD
24
42
  tokens << :wildcard
43
+ i += 1
25
44
  else
26
45
  tokens << Char.new(char)
46
+ i += 1
27
47
  end
28
- }
48
+ end
29
49
 
30
50
  raise Error, "Trailing escape" if escape
31
51
  tokens
32
52
  end
53
+
54
+ private
55
+
56
+ def operation!(i)
57
+ escape = false
58
+ found_close = false
59
+ tokens = []
60
+
61
+ until found_close or i > @end do
62
+ char = @src[i]
63
+ if escape
64
+ tokens << Char.new(char)
65
+ escape = false
66
+ i += 1
67
+ next
68
+ end
69
+
70
+ case char
71
+ when ESCAPE
72
+ escape = true
73
+ i += 1
74
+ when OP_OPEN
75
+ raise "Unexpected #{OP_OPEN}"
76
+ when OP_CLOSE
77
+ found_close = true
78
+ i += 1
79
+ when REGEX_MARKER
80
+ i, reg = regex! i + 1
81
+ tokens << reg
82
+ else
83
+ tokens << Char.new(char)
84
+ i += 1
85
+ end
86
+ end
87
+
88
+ raise Error, "Unclosed operation" if !found_close
89
+ raise Error, "Trailing escape" if escape
90
+ return i, Op.new(tokens)
91
+ end
92
+
93
+ def regex!(i)
94
+ escape = false
95
+ found_close = false
96
+ src = ""
97
+
98
+ until found_close or i > @end
99
+ char = @src[i]
100
+ i += 1
101
+
102
+ if escape
103
+ src << char
104
+ escape = false
105
+ next
106
+ end
107
+
108
+ case char
109
+ when ESCAPE
110
+ escape = true
111
+ when REGEX_MARKER
112
+ found_close = true
113
+ else
114
+ src << char
115
+ end
116
+ end
117
+
118
+ raise Error, "Unclosed regex" if !found_close
119
+ raise Error, "Trailing escape" if escape
120
+ return i, Regex.new(src)
121
+ end
33
122
  end
34
123
  end
data/lib/fop/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Fop
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fop_lang
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jordan Hollinger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-15 00:00:00.000000000 Z
11
+ date: 2021-08-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A micro expression language for Filter and OPerations on text
14
14
  email: jordan.hollinger@gmail.com