fop_lang 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 711af4fad2907616e057049dcb84bce16ffdb381b3601a387dc7260cc16057d3
4
- data.tar.gz: 40da554ca0cb21f275748593921bdc616dcf8cdfe5d4fa31494f588f2c25d66c
3
+ metadata.gz: b5f19a543b81c0046dc63fcc1c0769989628017d2c1d1da74ef0db9866a0f2f7
4
+ data.tar.gz: 03b6597f9cab97c95ccda8396693bb43d9da729137cb916cc74f7fbecc314b32
5
5
  SHA512:
6
- metadata.gz: 0253a6446b88b6de112f00a95c81f1d5f710a859998e5b3d8df21d64caecc8e013ab1edec2b68047558f8053ce135b60082078f85db1f7aa16a93b86aa487093
7
- data.tar.gz: c7f0cb0387df52a3ea121e3e91a35e0444afe8862aabe054ec433d889955ee9221129fd0c07f7e36d6c71e6b9ab33f30cd16264447dfda24d29babb671112140
6
+ metadata.gz: 3a17c82a561e20cbc5cb8abbad5be4f94f02110d60b6130e3e1e9489672c5c134befc6b1daca2f590f083a67934e600fb5d6fa0ea5433181ba3014514c558232
7
+ data.tar.gz: 790250c8a79dcf04b381f2dd33cbaa048fd070688ab45446ff87652dcb18844c2d6139d0ead060fa338a57b8590eee0167ea2c25abd84e1d71571f33c49bcbda
data/README.md CHANGED
@@ -1,6 +1,10 @@
1
1
  # fop_lang
2
2
 
3
- Fop (Filter and OPperations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
3
+ Fop (Filter and OPerations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
4
+
5
+ ```ruby
6
+ gem 'fop_lang'
7
+ ```
4
8
 
5
9
  ## Release Number Example
6
10
 
@@ -33,13 +37,14 @@ If `\` (escape) is used before the special characters `*`, `{` or `}`, then that
33
37
 
34
38
  Operations are the interesting part of Fop, and are specified between `{` and `}`. An Operation can consist of one to three parts:
35
39
 
36
- 1. Matching character class (required): Defines what characters the operation will match and operate on.
40
+ 1. Matching class (required): Defines what characters the operation will match and operate on.
37
41
  * `N` is the numeric class and will match one or more digits.
38
42
  * `A` is the alpha class and will match one or more letters (lower or upper case).
39
43
  * `W` is the word class and matches alphanumeric chars and underscores.
40
44
  * `*` is the wildcard class and greedily matches everything after it.
45
+ * `/.../` matches on the supplied regex between the `/`'s. If you're regex contains a `/`, it must be escaped.
41
46
  3. Operator (optional): What to do to the matching characters.
42
- * `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
47
+ * `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars. Note that any `/` chars must be escaped, so as not to be mistaken for a regex.
43
48
  * `+` Perform addition on the matching number and the argument (`N` only).
44
49
  * `-` Subtract the argument from the matching number (`N` only).
45
50
  5. Operator argument (required for some operators): meaning varies by operator.
@@ -53,6 +58,16 @@ Operations are the interesting part of Fop, and are specified between `{` and `}
53
58
  => 'release-5.100.0'
54
59
  ```
55
60
 
61
+ ```ruby
62
+ f = Fop('rel{/(ease)?/}-{N=5}.{N+1}.{N=0}')
63
+
64
+ puts f.apply('release-4.99.1')
65
+ => 'release-5.100.0'
66
+
67
+ puts f.apply('rel-4.99.1')
68
+ => 'rel-5.100.0'
69
+ ```
70
+
56
71
  ```ruby
57
72
  f = Fop('release-*{N=5}.{N+100}.{N=0}')
58
73
 
data/lib/fop/nodes.rb CHANGED
@@ -12,59 +12,19 @@ module Fop
12
12
  end
13
13
  end
14
14
 
15
- Match = Struct.new(:wildcard, :tokens) do
16
- NUM = "N".freeze
17
- WORD = "W".freeze
18
- ALPHA = "A".freeze
19
- WILD = "*".freeze
20
- BLANK = "".freeze
21
-
15
+ Op = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :operator_arg, :expression) do
22
16
  def consume!(input)
23
- if (val = input.slice!(@regex))
24
- @expression && val != BLANK ? @expression.call(val) : val
17
+ if (val = input.slice!(regex))
18
+ found_val = regex_match || val != Parser::BLANK
19
+ expression && found_val ? expression.call(val) : val
25
20
  end
26
21
  end
27
22
 
28
23
  def to_s
29
24
  w = wildcard ? "*" : nil
30
- @op ? "#{w}#{@match} #{@op} #{@arg}" : "#{w}#{@match}"
31
- end
32
-
33
- def parse!
34
- match = tokens.shift || raise(ParserError, "Empty match")
35
- raise ParserError, "Unexpected #{match}" unless match.is_a? Tokenizer::Char
36
-
37
- @match = match.char
38
- @regex =
39
- case @match
40
- when NUM then Regexp.new((wildcard ? ".*?" : "^") + "[0-9]+")
41
- when WORD then Regexp.new((wildcard ? ".*?" : "^") + "\\w+")
42
- when ALPHA then Regexp.new((wildcard ? ".*?" : "^") + "[a-zA-Z]+")
43
- when WILD then /.*/
44
- else raise ParserError, "Unknown match type '#{@match}'"
45
- end
46
-
47
- if (op = tokens.shift)
48
- raise ParserError, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
49
- arg = tokens.reduce("") { |acc, t|
50
- raise ParserError, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
51
- acc + t.char
52
- }
53
-
54
- @op = op.char
55
- @arg = arg == BLANK ? nil : arg
56
- @expression =
57
- case @op
58
- when "=" then ->(_) { @arg || BLANK }
59
- when "+", "-", "*", "/"
60
- raise ParserError, "Operator #{@op} is only available for numeric matches" unless @match == NUM
61
- raise ParserError, "Operator #{@op} expects an argument" if @arg.nil?
62
- ->(x) { x.to_i.send(@op, @arg.to_i) }
63
- else raise ParserError, "Unknown operator #{@op}"
64
- end
65
- else
66
- @op, @arg, @expression = nil, nil, nil
67
- end
25
+ s = "#{w}#{match}"
26
+ s << " #{operator} #{operator_arg}" if operator
27
+ s
68
28
  end
69
29
  end
70
30
  end
data/lib/fop/parser.rb CHANGED
@@ -4,89 +4,132 @@ module Fop
4
4
  module Parser
5
5
  Error = Class.new(StandardError)
6
6
 
7
+ MATCH_NUM = "N".freeze
8
+ MATCH_WORD = "W".freeze
9
+ MATCH_ALPHA = "A".freeze
10
+ MATCH_WILD = "*".freeze
11
+ BLANK = "".freeze
12
+ OP_REPLACE = "=".freeze
13
+ OP_ADD = "+".freeze
14
+ OP_SUB = "-".freeze
15
+ OP_MUL = "*".freeze
16
+ OP_DIV = "/".freeze
17
+
7
18
  def self.parse!(tokens)
8
- stack = []
9
- current_el = nil
19
+ nodes = []
20
+ curr_node = nil
10
21
 
11
22
  tokens.each { |token|
12
- case current_el
23
+ case curr_node
13
24
  when nil
14
- current_el = new_element token
25
+ curr_node = new_node token
15
26
  when :wildcard
16
- current_el = new_element token, true
17
- raise Error, "Unexpected * after wildcard" if current_el == :wildcard
27
+ curr_node = new_node token, true
28
+ raise Error, "Unexpected * after wildcard" if curr_node == :wildcard
18
29
  when Nodes::Text
19
- current_el = parse_text stack, current_el, token
20
- when Nodes::Match
21
- current_el = parse_match stack, current_el, token
30
+ curr_node, finished_node = parse_text curr_node, token
31
+ nodes << finished_node if finished_node
32
+ when Nodes::Op
33
+ nodes << curr_node
34
+ curr_node = new_node token
22
35
  else
23
- raise Error, "Unexpected token #{token} in #{current_el}"
36
+ raise Error, "Unexpected node #{curr_node}"
24
37
  end
25
38
  }
26
39
 
27
- case current_el
40
+ case curr_node
28
41
  when nil
29
42
  # noop
30
43
  when :wildcard
31
- stack << Nodes::Text.new(true, "")
32
- when Nodes::Text
33
- stack << current_el
34
- when Nodes::Match
35
- raise Error, "Unclosed match"
44
+ nodes << Nodes::Text.new(true, "")
45
+ when Nodes::Text, Nodes::Op
46
+ nodes << curr_node
47
+ else
48
+ raise "Unexpected end node #{curr_node}"
36
49
  end
37
50
 
38
- stack
51
+ nodes
39
52
  end
40
53
 
41
54
  private
42
55
 
43
- def self.new_element(token, wildcard = false)
56
+ def self.new_node(token, wildcard = false)
44
57
  case token
45
58
  when Tokenizer::Char
46
59
  Nodes::Text.new(wildcard, token.char.clone)
47
- when :match_open
48
- Nodes::Match.new(wildcard, [])
49
- when :match_close
50
- raise ParserError, "Unmatched }"
60
+ when Tokenizer::Op
61
+ op = Nodes::Op.new(wildcard)
62
+ parse_op! op, token.tokens
63
+ op
51
64
  when :wildcard
52
65
  :wildcard
53
66
  else
54
- raise ParserError, "Unexpected #{token}"
67
+ raise Error, "Unexpected #{token}"
55
68
  end
56
69
  end
57
70
 
58
- def self.parse_text(stack, text_el, token)
71
+ # @return current node
72
+ # @return finished node
73
+ def self.parse_text(node, token)
59
74
  case token
60
- when :match_open
61
- stack << text_el
62
- Nodes::Match.new(false, [])
63
- when :match_close
64
- raise ParserError.new, "Unexpected }"
65
75
  when Tokenizer::Char
66
- text_el.str << token.char
67
- text_el
76
+ node.str << token.char
77
+ return node, nil
78
+ when Tokenizer::Op
79
+ op = new_node token
80
+ return op, node
68
81
  when :wildcard
69
- stack << text_el
70
- :wildcard
82
+ return :wildcard, node
71
83
  else
72
- raise ParserError, "Unexpected #{token}"
84
+ raise Error, "Unexpected #{token}"
73
85
  end
74
86
  end
75
87
 
76
- def self.parse_match(stack, match_el, token)
77
- case token
78
- when Tokenizer::Char
79
- match_el.tokens << token
80
- match_el
81
- when :wildcard
82
- match_el.tokens << Tokenizer::Char.new("*").freeze
83
- match_el
84
- when :match_close
85
- match_el.parse!
86
- stack << match_el
87
- nil
88
- else
89
- raise ParserError, "Unexpected #{token}"
88
+ def self.parse_op!(node, tokens)
89
+ t = tokens[0] || raise(Error, "Empty operation")
90
+ # parse the matching type
91
+ node.regex =
92
+ case t
93
+ when Tokenizer::Char
94
+ node.match = t.char
95
+ node.regex_match = false
96
+ case t.char
97
+ when MATCH_NUM then Regexp.new((node.wildcard ? ".*?" : "^") + "[0-9]+")
98
+ when MATCH_WORD then Regexp.new((node.wildcard ? ".*?" : "^") + "\\w+")
99
+ when MATCH_ALPHA then Regexp.new((node.wildcard ? ".*?" : "^") + "[a-zA-Z]+")
100
+ when MATCH_WILD then /.*/
101
+ else raise Error, "Unknown match type '#{t.char}'"
102
+ end
103
+ when Tokenizer::Regex
104
+ node.match = "/#{t.src}/"
105
+ node.regex_match = true
106
+ Regexp.new((node.wildcard ? ".*?" : "^") + t.src)
107
+ else
108
+ raise Error, "Unexpected token #{t}"
109
+ end
110
+
111
+ # parse the operator (if any)
112
+ if (op = tokens[1])
113
+ raise Error, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
114
+ node.operator = op.char
115
+
116
+ arg = tokens[2..-1].reduce("") { |acc, t|
117
+ raise Error, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
118
+ acc + t.char
119
+ }
120
+ node.operator_arg = arg == BLANK ? nil : arg
121
+
122
+ node.expression =
123
+ case node.operator
124
+ when OP_REPLACE
125
+ ->(_) { node.operator_arg || BLANK }
126
+ when OP_ADD, OP_SUB, OP_MUL, OP_DIV
127
+ raise Error, "Operator #{node.operator} is only available for numeric matches" unless node.match == MATCH_NUM
128
+ raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
129
+ ->(x) { x.to_i.send(node.operator, node.operator_arg.to_i) }
130
+ else
131
+ raise(Error, "Unknown operator #{node.operator}")
132
+ end
90
133
  end
91
134
  end
92
135
  end
data/lib/fop/program.rb CHANGED
@@ -6,7 +6,7 @@ module Fop
6
6
  attr_reader :nodes
7
7
 
8
8
  def initialize(src)
9
- tokens = Tokenizer.tokenize! src
9
+ tokens = Tokenizer.new(src).tokenize!
10
10
  @nodes = Parser.parse! tokens
11
11
  end
12
12
 
data/lib/fop/tokenizer.rb CHANGED
@@ -1,34 +1,123 @@
1
1
  module Fop
2
- module Tokenizer
2
+ class Tokenizer
3
3
  Char = Struct.new(:char)
4
+ Op = Struct.new(:tokens)
5
+ Regex = Struct.new(:src)
4
6
  Error = Class.new(StandardError)
5
7
 
6
- def self.tokenize!(src)
8
+ OP_OPEN = "{".freeze
9
+ OP_CLOSE = "}".freeze
10
+ ESCAPE = "\\".freeze
11
+ WILDCARD = "*".freeze
12
+ REGEX_MARKER = "/".freeze
13
+
14
+ def initialize(src)
15
+ @src = src
16
+ @end = src.size - 1
17
+ end
18
+
19
+ def tokenize!
7
20
  tokens = []
8
21
  escape = false
9
- src.each_char { |char|
22
+ i = 0
23
+ until i > @end do
24
+ char = @src[i]
10
25
  if escape
11
26
  tokens << Char.new(char)
12
27
  escape = false
28
+ i += 1
13
29
  next
14
30
  end
15
31
 
16
32
  case char
17
- when "\\".freeze
33
+ when ESCAPE
18
34
  escape = true
19
- when "{".freeze
20
- tokens << :match_open
21
- when "}".freeze
22
- tokens << :match_close
23
- when "*".freeze
35
+ i += 1
36
+ when OP_OPEN
37
+ i, op = operation! i + 1
38
+ tokens << op
39
+ when OP_CLOSE
40
+ raise "Unexpected #{OP_CLOSE}"
41
+ when WILDCARD
24
42
  tokens << :wildcard
43
+ i += 1
25
44
  else
26
45
  tokens << Char.new(char)
46
+ i += 1
27
47
  end
28
- }
48
+ end
29
49
 
30
50
  raise Error, "Trailing escape" if escape
31
51
  tokens
32
52
  end
53
+
54
+ private
55
+
56
+ def operation!(i)
57
+ escape = false
58
+ found_close = false
59
+ tokens = []
60
+
61
+ until found_close or i > @end do
62
+ char = @src[i]
63
+ if escape
64
+ tokens << Char.new(char)
65
+ escape = false
66
+ i += 1
67
+ next
68
+ end
69
+
70
+ case char
71
+ when ESCAPE
72
+ escape = true
73
+ i += 1
74
+ when OP_OPEN
75
+ raise "Unexpected #{OP_OPEN}"
76
+ when OP_CLOSE
77
+ found_close = true
78
+ i += 1
79
+ when REGEX_MARKER
80
+ i, reg = regex! i + 1
81
+ tokens << reg
82
+ else
83
+ tokens << Char.new(char)
84
+ i += 1
85
+ end
86
+ end
87
+
88
+ raise Error, "Unclosed operation" if !found_close
89
+ raise Error, "Trailing escape" if escape
90
+ return i, Op.new(tokens)
91
+ end
92
+
93
+ def regex!(i)
94
+ escape = false
95
+ found_close = false
96
+ src = ""
97
+
98
+ until found_close or i > @end
99
+ char = @src[i]
100
+ i += 1
101
+
102
+ if escape
103
+ src << char
104
+ escape = false
105
+ next
106
+ end
107
+
108
+ case char
109
+ when ESCAPE
110
+ escape = true
111
+ when REGEX_MARKER
112
+ found_close = true
113
+ else
114
+ src << char
115
+ end
116
+ end
117
+
118
+ raise Error, "Unclosed regex" if !found_close
119
+ raise Error, "Trailing escape" if escape
120
+ return i, Regex.new(src)
121
+ end
33
122
  end
34
123
  end
data/lib/fop/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Fop
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fop_lang
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jordan Hollinger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-15 00:00:00.000000000 Z
11
+ date: 2021-08-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A micro expression language for Filter and OPerations on text
14
14
  email: jordan.hollinger@gmail.com