cannonbol 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +420 -0
- data/Rakefile +3 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/cannonbol.gemspec +36 -0
- data/lib/cannonbol.rb +664 -0
- data/lib/cannonbol/version.rb +3 -0
- data/tasks/rspec.rake +3 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f21caaed4e70356c5f043a524241bf103d8eddd8
|
4
|
+
data.tar.gz: 4ce959eb2fa8b569860d59bbb3de119b2b5c11ae
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 98cf5aa3d8ea84c36503b5cfde8d22f833957ac44c650bf0629bf8ca44b672cc56d66408e0ce732ebc83c6ee3d71923a7497ca7c2cecd70fa8e37e149db0f5b4
|
7
|
+
data.tar.gz: 0ac7d0acbb284131ab62eca5456ad3152877949c51282cf40cc11a896fb1e752028bc1f66f036e0e4157e35a4afca933c936c3846212c2d3cb175c7bf4b494a8
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
|
4
|
+
|
5
|
+
We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, age, or religion.
|
6
|
+
|
7
|
+
Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.
|
8
|
+
|
9
|
+
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.
|
10
|
+
|
11
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
|
12
|
+
|
13
|
+
This Code of Conduct is adapted from the [Contributor Covenant](http:contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 catmando
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,420 @@
|
|
1
|
+
# CannonBol
|
2
|
+
|
3
|
+
CannonBol is a ruby DSL for patten matching based on SNOBOL4 and SPITBOL.
|
4
|
+
|
5
|
+
* Makes complex patterns easier to read and write!
|
6
|
+
* Combine regexes, plain strings and powerful new primitive match functions!
|
7
|
+
* Makes capturing match results easy!
|
8
|
+
* Allows recursive patterns!
|
9
|
+
* Complete SNOBOL4 + SPITBOL extensions!
|
10
|
+
* Based on the well documented, proven SNOBOL4 language!
|
11
|
+
* Simple syntax looks great alongside ruby!
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
Add this line to your application's Gemfile:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
gem 'cannonbol'
|
19
|
+
```
|
20
|
+
|
21
|
+
And then execute:
|
22
|
+
|
23
|
+
$ bundle
|
24
|
+
|
25
|
+
Or install it yourself as:
|
26
|
+
|
27
|
+
$ gem install cannonbol
|
28
|
+
|
29
|
+
## Lets Go!
|
30
|
+
|
31
|
+
### Basic Matching `- &, |, capture?, match_any, match_all`
|
32
|
+
|
33
|
+
Strings, Regexes and primitives are combined using & (concatenation) and | (alternation) operators
|
34
|
+
|
35
|
+
Here is a simple pattern that matches a simple noun clause:
|
36
|
+
|
37
|
+
("a" | "the") & /\s+/ & ("boy" | "girl")
|
38
|
+
|
39
|
+
So we will match either "a" or "the" followed white space and then by "boy or "girl". Okay! Lets use it!
|
40
|
+
|
41
|
+
("a" | "the") & /\s+/ & ("boy" | "girl").match?("he saw a boy going home")
|
42
|
+
=> "a boy"
|
43
|
+
("a" | "the") & /\s+/ & ("boy" | "girl").match?("he saw a big boy going home")
|
44
|
+
=> nil
|
45
|
+
|
46
|
+
Now let's save the pieces of the match using the capture? (pronounced _capture IF_) method:
|
47
|
+
|
48
|
+
article, noun = nil, nil
|
49
|
+
pattern = ("a" | "the").capture? { |m| article = m } & /\s+/ & ("boy" | "girl").capture? { |m| noun = m }
|
50
|
+
pattern.match?("he saw the girl going home")
|
51
|
+
noun
|
52
|
+
=> girl
|
53
|
+
article
|
54
|
+
=> the
|
55
|
+
|
56
|
+
The capture? method and its friend capture! (pronounced _capture NOW_) have many powerful features. As shown above it can take a block which is passed the matching substring, _IF the match succeeds_. The other features of the capture method will be detailed [below.](Advanced capture techniques)
|
57
|
+
|
58
|
+
Arrays can be turned into patterns using the match_any and match_all methods:
|
59
|
+
|
60
|
+
ARTICLES = ["a", "the"]
|
61
|
+
NOUNS = ["boy", "girl", "dog", "cat"]
|
62
|
+
ADJECTIVES = ["big", "small", "fierce", "friendly"]
|
63
|
+
WS = /\s+/
|
64
|
+
[ARTICLES.match_any, [WS, [WS, ADJECTIVES.match_any, WS].match_all].match_any, NOUNS.match_any].match_all
|
65
|
+
|
66
|
+
This is equivilent to
|
67
|
+
|
68
|
+
("a" | "the") & (WS | (WS & ("big" | "small" | "fierce" | "friendly") & WS)) & ("boy" | "girl" | "dog" | "cat")
|
69
|
+
|
70
|
+
### match? options
|
71
|
+
|
72
|
+
The match? method shows above takes a couple of options to globally control the match process:
|
73
|
+
|
74
|
+
option | default | meaning
|
75
|
+
------|-----|-----
|
76
|
+
ignore_case | false | When on, the basic regex and string pattern will NOT be case sensitive.
|
77
|
+
anchor | false | When on pattern matching must begin at the first character. Normally the matcher will keep moving the starting character to the right, until the match suceeds.
|
78
|
+
raise_error | false | When on, a match failure will raise Cannonbol::MatchFailed.
|
79
|
+
replace_with | nil | When a non-falsy value is supplied, the value will replace the matched portion of the string, and the entire string will be returned. Normally only the matched portion of the string is returned.
|
80
|
+
|
81
|
+
Example of replace with:
|
82
|
+
|
83
|
+
"hello".match?("She said hello!")
|
84
|
+
=> hello
|
85
|
+
"hello".match?("She said hello!", replace_with => "goodby")
|
86
|
+
=> She said goodby!
|
87
|
+
|
88
|
+
### Patterns, Subjects, Cursors, Alternatives, and Backtracking
|
89
|
+
|
90
|
+
A pattern is an object that responds to the match? method. Cannonbol adds the match? method to Ruby strings, and regexes, and provides a number of _primitive_ patterns. A pattern can be combined with another pattern using the &, and | operators. There are also several primitive patterns that take a pattern and create a new pattern. Here are some example patterns:
|
91
|
+
|
92
|
+
"hello" # matches any string containing hello
|
93
|
+
/\s+/ # matches one or more white space characters
|
94
|
+
"hello" & /\s+/ & "there" # matches "hello" and "there" seperated by white space
|
95
|
+
"hello" | "goodby" # matches EITHER "hello" or "there"
|
96
|
+
ARB # a primitive pattern that matches anything (similar to /.*/)
|
97
|
+
("hello" | "goodby") & ARB & "Fred" # matches "hello" or "goodby" followed by any characters and finally "Fred"
|
98
|
+
|
99
|
+
Patterns are just objects, so they can be assigned to variables:
|
100
|
+
|
101
|
+
greeting = "hello" | "goodby"
|
102
|
+
names = "Fred" | "Suzy"
|
103
|
+
ws = /\s+/
|
104
|
+
greeting & ws & names # matches "hello Fred" or "goodby Suzy"
|
105
|
+
|
106
|
+
The first parameter of the match? method is the subject string. The subject string is matched left to right driven by the pattern object. Normally the matcher will attempt to match starting at the first character. If no match is found, then
|
107
|
+
matching begins again one character to the right. This continues until a match is made, or there are insufficient characters to make a match. This behavior can be turned off by specifying `anchor: true` in the match? options hash.
|
108
|
+
|
109
|
+
The current position of the matcher in the string is the _cursor_. The cursor begins at zero and as each character is matched it moves to the right. If the match fails (and anchor is false) then the match is restarted with the cursor at position 1, etc.
|
110
|
+
|
111
|
+
Alternatives are considered left to right as specified in the pattern. Once an alternative is matched, the matcher moves on to the next part of the match, but it does remember the alternative, and if matching fails at a later component, the matcher will back up and try the next alternative. For example:
|
112
|
+
|
113
|
+
a_pattern = "a" | "aaa" | "aa"
|
114
|
+
b_pattern = "b" | "aaabb" | "abbbc"
|
115
|
+
c_pattern = "cc"
|
116
|
+
(a_pattern & b_pattern & c_pattern).match?("aaabbbccc")
|
117
|
+
|
118
|
+
* "a" is matched from a_pattern, and then we move to b_pattern.
|
119
|
+
* None of the alternatives in b_pattern can match, so we backtrack and try the next alterntive in the a_pattern,
|
120
|
+
* "aaa" now matches, and so we move back to the b_pattern and start at the first alternative,
|
121
|
+
* "b" now matches, and so we move to the c_pattern,
|
122
|
+
* None of the alternatives in the c_pattern can match, so we move back to the b_pattern,
|
123
|
+
* None of the remaining alternatives in the b_pattern match, so we move back to the a_pattern,
|
124
|
+
* "aa" now matches, and so we move to the b_pattern, which can only match its last alternative, and
|
125
|
+
* finally we complete the match!
|
126
|
+
|
127
|
+
For a more complete explanation see the [SNOBOL4 manual Chapter 2](http://www.math.bas.bg/bantchev/place/snobol/gpp-2ed.pdf)
|
128
|
+
|
129
|
+
Bottom line is the matcher will try every possible option until a match is made or the match fails.
|
130
|
+
|
131
|
+
### Basic Primitive Patterns
|
132
|
+
|
133
|
+
Cannonbol includes the complete set of SNOBOL4 + SPITBOL primitive patterns and functions. These are added to the ruby name space via the Object class, and so are available everywhere.
|
134
|
+
|
135
|
+
`REM` Match 0 or more characters to the end of the subject string.
|
136
|
+
|
137
|
+
`("the" & REM).match?("he saw the small boy") === "the small boy"`
|
138
|
+
|
139
|
+
`ARB` Match 0 or more characters. ARB first tries to match zero characters, then 1 character, then 2 until the match succeeds. It is roughly equivilent to `\.*\`, except the regex will NOT backtrack like ARB will.
|
140
|
+
|
141
|
+
`("the" & ARB & "boy").match?("he saw the small boy running") === "the small boy"`
|
142
|
+
|
143
|
+
`LEN(n)` Match any n characters. Equivilent to `\.{n}\`
|
144
|
+
|
145
|
+
`POS(x)` Match ONLY if current cursor is at x. POS(0) is the start of the string.
|
146
|
+
|
147
|
+
`(POS(5) & ARB & POS(7)).match?("01234567") === "567"`
|
148
|
+
|
149
|
+
`RPOS(x)` Just like POS except measured from the end of the string. I.e. RPOS(0) is just after the last character.
|
150
|
+
|
151
|
+
`("hello" & RPOS(0)).match?("she said hello!")` would fail.
|
152
|
+
|
153
|
+
`TAB(x)` Is equivilent to `ARB & POS(x)`. In otherwords match zero or more characters up to the x'th character. Fails if x < the current cursor.
|
154
|
+
|
155
|
+
`RTAB(x)` You guessed it === `ARB & RPOS(x)`
|
156
|
+
|
157
|
+
`ANY(s)` Will match 1 character in s. So if s = "ABC" it will match A or B or C. Regexes are generally more useful.
|
158
|
+
|
159
|
+
`NOTANY(s)` Will match 1 character as long as its NOT in s.
|
160
|
+
|
161
|
+
`SPAN(s)` Matches 1 or more from s. Again regexes are generally easier to write.
|
162
|
+
|
163
|
+
`BREAK(s)` Matches 0 or more characters until a character in s is hit.
|
164
|
+
|
165
|
+
`BREAKX(s)` Woah... like BREAK, but if the match fails, then it will skip the character and try again. Huh!
|
166
|
+
|
167
|
+
`ARBNO(pat)` Match pat zero or more times.
|
168
|
+
|
169
|
+
`POS(0) & /\w+/ & ARBNO(/\s*,\s*/ & /\w+/) & /\s*/ & RPOS(0)` will match a list of identifiers separated by commas.
|
170
|
+
|
171
|
+
### Delayed Evaluation of Primitive Pattern Parameters
|
172
|
+
|
173
|
+
There are several cases where it is useful to delay the evaluation of a primitive pattern arguments until the match is
|
174
|
+
being made, rather than when the pattern is created.
|
175
|
+
|
176
|
+
To allow for this all primitive patterns can take a block. The block is evaluated when the matcher encounters the primitive, and the result of the block is used as the argument to the pattern.
|
177
|
+
|
178
|
+
Here is a method that will parse a set of fixed width fields, where the widths are supplied as arguments to the method:
|
179
|
+
|
180
|
+
def parse(s, *widths)
|
181
|
+
fields = []
|
182
|
+
(ARBNO(LEN {widths.shift}.capture? {|field| fields << field}) & RPOS(0)).match?(s)
|
183
|
+
fields
|
184
|
+
end
|
185
|
+
|
186
|
+
To really get into the power of delayed evaluation however we need to add two more concepts:
|
187
|
+
|
188
|
+
The MATCH primitive, and the capture! (pronounced _capture NOW_) method.
|
189
|
+
|
190
|
+
The capture? (pronounced _capture IF_) method executes when the match has completed successfully. In contrast the capture! method calls its block as soon as its sub-pattern matches. Using capture! allows you to pick up values during one phase of the match and then use those values later.
|
191
|
+
|
192
|
+
Meanwhile MATCH takes a pattern as its argument (like ARBNO) but will only match the pattern once. The power in MATCH is when it is used with a delayed evaluation block. Together MATCH and capture! allow for patterns that are much more powerful than simple regexes. For example here is a palindrome matcher:
|
193
|
+
|
194
|
+
palindrome = MATCH do | ; c|
|
195
|
+
/\W*/ & LEN(1).capture! { |m| c = m } & /\W*/ & ( palindrome | LEN(1) | LEN(0)) & /\W*/ & MATCH { c }
|
196
|
+
end
|
197
|
+
|
198
|
+
Lets see it again with some comments
|
199
|
+
|
200
|
+
palindrome = MATCH do | ; c |
|
201
|
+
# By putting the MATCH pattern in a block to be evaluated later we can use palindrome in its definition.
|
202
|
+
# Just to keep things clean and robust we declare c (the character matched) as local to the block.
|
203
|
+
|
204
|
+
/\W*/ & # skip any white space
|
205
|
+
LEN(1).capture! { |m| c = m } & # grab the next character now and save it in c
|
206
|
+
/\W*/ & # skip more white space
|
207
|
+
( # now there are three possibilities:
|
208
|
+
palindrome | # there are more characters on the left side of the palindrome OR
|
209
|
+
LEN(1) | # we are at the middle ODD character OR
|
210
|
+
LEN(0) # the palindrome has an even number of characters
|
211
|
+
) & # now that we have the left half matched, we match the right half
|
212
|
+
/\W*/ & # skip any white space and finally
|
213
|
+
MATCH { c } # match the same character on the left now on the far right
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
palindrome.match?('A man, a plan, a canal, Panama!")
|
218
|
+
|
219
|
+
Using MATCH to define recursive patterns makes Cannonbol into a full blown BNF parser. See the example [email address parser](A complete real world example)
|
220
|
+
|
221
|
+
### Advanced capture techniques
|
222
|
+
|
223
|
+
Both capture? and capture! have a number of useful features.
|
224
|
+
|
225
|
+
* They can take a block which is passed the matching substring.
|
226
|
+
* As well as the current match, they can pass the current cursor position and the current value of capture variable.
|
227
|
+
* They can take a symbol parameter i.e. `capture?(:data)` which will save the value under the name :data.
|
228
|
+
* The block can be used to update the capture variable before its aved.
|
229
|
+
* They can capture an array of values in a single capture variable.
|
230
|
+
|
231
|
+
#### Passing a block to a capture method
|
232
|
+
|
233
|
+
This is the most general way of capturing a submatch. For example
|
234
|
+
|
235
|
+
word = /\W*/ & /\w+/.capture? { |match| words << match } & /\W*/
|
236
|
+
|
237
|
+
will shovel each word it matches into the words array. You could use it like this:
|
238
|
+
|
239
|
+
words = []
|
240
|
+
(ARBNO(word).match?("a big strange, long sentence!")
|
241
|
+
|
242
|
+
Using `capture? { |m| puts m }` is handy for debugging your patterns.
|
243
|
+
|
244
|
+
#### Current cursor position
|
245
|
+
|
246
|
+
The second parameter of the capture block will recieve the current cursor position. For example
|
247
|
+
|
248
|
+
("i".capture! { |m, p| puts "i found at #{p-1}"} & RPOS(0)).match("I said hello!", ignore_case: true)
|
249
|
+
=> i found at 0
|
250
|
+
=> i found at 4
|
251
|
+
|
252
|
+
Notice the use of RPOS(0) which will force the pattern to look at every character in the subject, until the pattern finally fails. By using capture! (capture NOW) we record every hit, even though the pattern fails in the end.
|
253
|
+
|
254
|
+
#### Using capture variables
|
255
|
+
|
256
|
+
If the capture methods are supplied with a symbol, then the captured value will be saved in an internal capture variable. For example:
|
257
|
+
|
258
|
+
some_pattern.capture!(:value)
|
259
|
+
|
260
|
+
would save the string matched by some_pattern into the capture variable called :value.
|
261
|
+
|
262
|
+
There are a couple of ways to retrieve the capture variables:
|
263
|
+
|
264
|
+
Any primitive pattern that takes a parameter can use the value of a capture variable. So for example `LEN(:foo)` means
|
265
|
+
take the current value of the capture variable :foo as the parameter to LEN.
|
266
|
+
|
267
|
+
We can use this to clean up the palindrome pattern a little bit:
|
268
|
+
|
269
|
+
palindrome = /\W*/ & LEN(1).capture!(:c) & /\W*/ & ( MATCH{palindrome} | LEN(1) | LEN(0) ) & /\W*/ & MATCH(:c)
|
270
|
+
|
271
|
+
Another way to get the capture variables is to interogate the value returned by match?. The value returned by match? is a subclass of string, that has some extra methods. One of these is the captured method which gives a hash of all the captured variables. For example:
|
272
|
+
|
273
|
+
("dog" | "cat").capture?(:pet).match?("He had a dog named Spot.").captured[:pet]
|
274
|
+
=> dog
|
275
|
+
|
276
|
+
You can also give a block to the match? method which will be called whether the block passes or not. For example:
|
277
|
+
|
278
|
+
("dog" | "cat").capture?(:pet).match?("He had a dog named Spot."){ |match| match.captured[:pet] if match}
|
279
|
+
=> dog
|
280
|
+
|
281
|
+
The match? block can also explicitly name any capture variables you need to get the values of. So for example:
|
282
|
+
|
283
|
+
pet_data = (POS(0) & ARBNO(("big" | "small").capture?(:size) | ("dog" | "cat").capture?(:pet) | LEN(1)) & RPOS(0))
|
284
|
+
pet_data.match?("He has a big dog!") { |m, pet, size| "type of pet: #{pet.upcase}, size: #{size.upcase}"}
|
285
|
+
=> type of pet: DOG, size: BIG
|
286
|
+
|
287
|
+
If the match? block mentions capture variables that were not assigned in the match they get nil.
|
288
|
+
|
289
|
+
#### Initializing capture variables
|
290
|
+
|
291
|
+
When used as a parameter to a primitve the capture variable may be given an initial value. For example:
|
292
|
+
|
293
|
+
LEN(baz: 12)
|
294
|
+
|
295
|
+
would match LEN(12) if :baz had not yet been set.
|
296
|
+
|
297
|
+
A second way to initialize (or update capture variables) is to combine capture variables with a capture block like this:
|
298
|
+
|
299
|
+
some_pattern.capture!(:baz) { |match, position, baz| baz || position * 2 } initializes :baz to position * 2
|
300
|
+
|
301
|
+
If a symbol is specified in a capture!, and there is a block, then the symbol will be set to the value returned by the block.
|
302
|
+
|
303
|
+
#### Capturing arrays of data
|
304
|
+
|
305
|
+
To capture all the words into a capture variable as an array you could do this:
|
306
|
+
|
307
|
+
words = []
|
308
|
+
word = /\W*/ & /\w+/.capture?(:words) { |match| words << match } & /\W*/
|
309
|
+
|
310
|
+
This can be shortened to:
|
311
|
+
|
312
|
+
word = /\W*/ & /\w+/.capture?(:words => []) & /\W*/
|
313
|
+
|
314
|
+
This works because anytime there is a 1) capture with a capture variable that is 2) holding an array, 3) that does NOT have a block, capture method will go ahead and shovel the captured value into the capture variable. Note this behavior can be overriden if needed by including a block.
|
315
|
+
|
316
|
+
#### Capture variables and nested patterns
|
317
|
+
|
318
|
+
Each time MATCH, or ARBNO is called the current state of any known capture variables are saved, and those values will be restored when the MATCH/ARBNO exits. If new capture variables are introduced by the nested pattern, these new values will be merged with the existing set of variables.
|
319
|
+
|
320
|
+
More powerful yet is the fact that every match string sent to a capture variable has access to all the values captured so far via the captured method. For example:
|
321
|
+
|
322
|
+
subject_clause = article & noun.capture!(:subject)
|
323
|
+
object_clause = article & noun.capture!:object)
|
324
|
+
verb_clause = ...
|
325
|
+
sentence = (subject_clause & verb_clause & object_clause & ".")
|
326
|
+
sentences = ARBNO(sentence.capture?(:sentences => [])) & RPOS(0)
|
327
|
+
sentences.match(file_stream).captured[:sentences].collect(&:captured)
|
328
|
+
=> [{:subject => "dog", :object => "man"}, {:subject => "man", :object => "dog} ...]
|
329
|
+
|
330
|
+
As each noun is matched, it is captured and saved in :subject or :object. When the sentence is captured, the match is shoveled away into the :sentences variable. Because the match value itself responds to the captured method we end up with a all the data collected in a nice array.
|
331
|
+
|
332
|
+
Note that capture! is used for capturing the nouns. This is cheaper and does not hurt anything since the value of
|
333
|
+
the capture variable will just be overwritten.
|
334
|
+
|
335
|
+
### Advanced PRIMITIVES
|
336
|
+
|
337
|
+
There are few more SNOBOL4 + SPITBOL primitives that are included for completeness.
|
338
|
+
|
339
|
+
`FENCE` matches the empty string, but will fail if there is an attempt to backtrack through the FENCE.
|
340
|
+
`FENCE(pattern)` will attempt to match pattern, but if an attempt is made to backtrack through the FENCE the pattern will fail.
|
341
|
+
|
342
|
+
The difference is that FENCE will fail the whole match, but FENCE(pattern) will just fail the subpattern.
|
343
|
+
|
344
|
+
`ABORT` unconditionally will exit the match.
|
345
|
+
|
346
|
+
`FAIL` will never match anything, and will force the matcher to backtrack and retry the next alternative.
|
347
|
+
|
348
|
+
`SUCCEED` will force the match to retry. The only that gets passed `SUCCEED` is `ABORT`.
|
349
|
+
|
350
|
+
These can be used together to do some interesting things. For example
|
351
|
+
|
352
|
+
pattern = POS(0) & SUCCEED & (FENCE(TAB(n: 1).capture!(:n) { |m, p, n| puts m; p+1 } | ABORT)) & FAIL
|
353
|
+
pattern.match?("abcd")
|
354
|
+
|
355
|
+
prints
|
356
|
+
|
357
|
+
a
|
358
|
+
ab
|
359
|
+
abc
|
360
|
+
abcd
|
361
|
+
|
362
|
+
The SUCCEED and FAIL primitives keep forcing the matcher to retry. Eventually the TAB will fail causing the ABORT alternative to execute the matcher.
|
363
|
+
|
364
|
+
So it goes like this
|
365
|
+
|
366
|
+
SUCCEED
|
367
|
+
TAB(1)
|
368
|
+
FAIL
|
369
|
+
SUCEED
|
370
|
+
TAB(2)
|
371
|
+
etc...
|
372
|
+
|
373
|
+
The FENCE keeps the matcher from backtracking into the ABORT option too early. Otherwise when the matcher hit fail, it would try different alternatives, and would hit the ABORT.
|
374
|
+
|
375
|
+
### A complete real world example
|
376
|
+
|
377
|
+
Cannonbol can be used to easily translate the email BNF spec into an email address parser.
|
378
|
+
|
379
|
+
ws = /\s*/
|
380
|
+
quoted_string = ws & '"' & ARBNO(NOTANY('"\\') | '\\"' | '\\\n' | '\\\\') & '"' & ws
|
381
|
+
atom = ws & SPAN("!#$%&'*+-/0123456789=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~") & ws
|
382
|
+
word = (atom | quoted_string)
|
383
|
+
phrase = word & ARBNO(word)
|
384
|
+
domain_ref = atom
|
385
|
+
domain_literal = "[" & /[0-9]+/ & ARBNO(/\.[0-9]+/) & "]"
|
386
|
+
sub_domain = domain_ref | domain_literal
|
387
|
+
domain = (sub_domain & ARBNO("." & sub_domain)).capture?(:domain) { |m| m.strip }
|
388
|
+
local_part = (word & ARBNO("." & word)).capture?(:local_part) { |m| m.strip }
|
389
|
+
addr_spec = (local_part & "@" & domain)
|
390
|
+
route = (ws & "@" & domain & ARBNO("@" & domain)).capture?(:route) { |m| m.strip } & ":"
|
391
|
+
route_addr = "<" & ((route | "") & addr_spec).capture?(:mailbox) { |m| m.strip } & ">"
|
392
|
+
mailbox = (addr_spec.capture?(:mailbox) { |m| m.strip } |
|
393
|
+
(phrase.capture?(:display_name) { |m| m.strip } & route_addr))
|
394
|
+
group = (phrase.capture?(:group_name) { |m| m.strip } & ":" &
|
395
|
+
(( mailbox.capture?(group_mailboxes: []) & ARBNO("," & mailbox.capture?(:group_mailboxes) ) ) | ws)) & ";"
|
396
|
+
address = POS(0) & (mailbox | group ) & RPOS(0)
|
397
|
+
|
398
|
+
So for example we can even parse an obscure email with groups and routes
|
399
|
+
|
400
|
+
email = 'here is my "big fat \\\n groupen" : someone@catprint.com, Fred Nurph<@sub1.sub2@sub3.sub4:fred.nurph@catprint.com>;'
|
401
|
+
match = address.match?(email)
|
402
|
+
match.captured[:group_mailboxes].first.captured[:mailbox]
|
403
|
+
=> someone@catprint.com
|
404
|
+
match.captured[:group_name]
|
405
|
+
=> here is my "big fat \\\n groupen
|
406
|
+
|
407
|
+
|
408
|
+
## Development
|
409
|
+
|
410
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
411
|
+
|
412
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
413
|
+
|
414
|
+
## Contributing
|
415
|
+
|
416
|
+
1. Fork it ( https://github.com/[my-github-username]/cannonbol/fork )
|
417
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
418
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
419
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
420
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "cannonbol"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/cannonbol.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'cannonbol/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cannonbol"
|
8
|
+
spec.version = Cannonbol::VERSION
|
9
|
+
spec.authors = ["catmando"]
|
10
|
+
spec.email = ["mitch@catprint.com"]
|
11
|
+
|
12
|
+
if spec.respond_to?(:metadata)
|
13
|
+
end
|
14
|
+
|
15
|
+
spec.summary = %q{Cannonbol is a ruby dsl for patten matching based on SNOBOL4 and SPITBOL}
|
16
|
+
spec.description = %q{
|
17
|
+
Makes complex patterns easier to read and write!
|
18
|
+
Combine regexes, plain strings and powerful new primitive match functions!
|
19
|
+
Makes capturing match results easy!
|
20
|
+
Allows recursive patterns!
|
21
|
+
Complete SNOBOL4 + SPITBOL extensions!
|
22
|
+
Based on the well documented, proven SNOBOL4 language!
|
23
|
+
Simple syntax looks great alongside ruby!
|
24
|
+
}
|
25
|
+
spec.homepage = "https://github.com/catprintlabs/cannonbol"
|
26
|
+
spec.license = "MIT"
|
27
|
+
|
28
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
29
|
+
spec.bindir = "exe"
|
30
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
|
+
spec.require_paths = ["lib"]
|
32
|
+
|
33
|
+
spec.add_development_dependency "bundler", "~> 1.8"
|
34
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
35
|
+
spec.add_development_dependency "rspec"
|
36
|
+
end
|
data/lib/cannonbol.rb
ADDED
@@ -0,0 +1,664 @@
|
|
1
|
+
require "cannonbol/version"
|
2
|
+
|
3
|
+
module Cannonbol
|
4
|
+
|
5
|
+
class MatchFailed < Exception; end
|
6
|
+
|
7
|
+
class MatchString < String
|
8
|
+
|
9
|
+
attr_reader :captured
|
10
|
+
attr_reader :match_start
|
11
|
+
attr_reader :match_end
|
12
|
+
|
13
|
+
def initialize(string, match_start, match_end, captured)
|
14
|
+
@cannonbol_string = string
|
15
|
+
@match_start = match_start
|
16
|
+
@match_end = match_end
|
17
|
+
@captured = captured.dup
|
18
|
+
super(@match_end < 0 ? "" : string[@match_start..@match_end])
|
19
|
+
end
|
20
|
+
|
21
|
+
def replace_match_with(s)
|
22
|
+
@cannonbol_string.dup.tap do |new_s|
|
23
|
+
new_s[@match_start..@match_end] = "" if @match_end >= 0
|
24
|
+
new_s.insert(@match_start, s)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
class Needle
|
31
|
+
|
32
|
+
attr_reader :cursor
|
33
|
+
attr_reader :string
|
34
|
+
attr_accessor :captures
|
35
|
+
attr_accessor :match_failed
|
36
|
+
attr_accessor :ignore_case
|
37
|
+
|
38
|
+
def initialize(string)
|
39
|
+
@string = string
|
40
|
+
end
|
41
|
+
|
42
|
+
def thread(pattern, opts = {}, &match_block)
|
43
|
+
@captures = {}
|
44
|
+
anchor = opts[:anchor]
|
45
|
+
raise_error = opts[:raise_error]
|
46
|
+
replace_with = opts[:replace_match_with]
|
47
|
+
ignore_case = opts[:ignore_case]
|
48
|
+
@cursor = -1
|
49
|
+
match = nil
|
50
|
+
begin
|
51
|
+
while !match and !match_failed and @cursor < @string.length-1
|
52
|
+
@cursor += 1
|
53
|
+
@starting_character = nil
|
54
|
+
@success_blocks = []
|
55
|
+
@ignore_case = ignore_case
|
56
|
+
match = pattern._match?(self)
|
57
|
+
break if anchor and !match
|
58
|
+
end
|
59
|
+
rescue MatchFailed
|
60
|
+
end
|
61
|
+
if match
|
62
|
+
@success_blocks.each(&:call)
|
63
|
+
match = MatchString.new(@string, @starting_character || @cursor, @cursor-1, @captures)
|
64
|
+
else
|
65
|
+
raise MatchFailed if raise_error
|
66
|
+
end
|
67
|
+
if match_block
|
68
|
+
match = match_block.call(*([match] + (match_block.parameters[1..-1] || []).collect { |param| @captures[param[1].to_sym] }))
|
69
|
+
elsif replace_with
|
70
|
+
match = match.replace_match_with(replace_with)
|
71
|
+
end
|
72
|
+
match
|
73
|
+
end
|
74
|
+
|
75
|
+
def capture(name, value)
|
76
|
+
@captures[name.to_sym] = value if name
|
77
|
+
value
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def remaining_string
|
82
|
+
@string[@cursor..-1]
|
83
|
+
end
|
84
|
+
|
85
|
+
def push(length, &success_block)
|
86
|
+
thread_state = [@starting_character, @cursor, @success_blocks.dup, @ignore_case]
|
87
|
+
@starting_character ||= @cursor
|
88
|
+
@cursor += length
|
89
|
+
@success_blocks << success_block if success_block
|
90
|
+
thread_state
|
91
|
+
end
|
92
|
+
|
93
|
+
def pull(thread_state)
|
94
|
+
@starting_character, @cursor, @success_blocks, @ignore_case = thread_state if thread_state
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
module Operators
|
101
|
+
|
102
|
+
def _match?(needle, *args, &block)
|
103
|
+
return if needle.match_failed
|
104
|
+
__match?(needle, *args, &block)
|
105
|
+
end
|
106
|
+
|
107
|
+
def match?(s, opts = {}, &match_block)
|
108
|
+
Needle.new(s).thread(self, opts, &match_block)
|
109
|
+
end
|
110
|
+
|
111
|
+
def |(pattern)
|
112
|
+
Choose.new(self, pattern)
|
113
|
+
end
|
114
|
+
|
115
|
+
def &(pattern)
|
116
|
+
Concat.new(self, pattern)
|
117
|
+
end
|
118
|
+
|
119
|
+
def -@
|
120
|
+
CaseSensitiveOff.new(self)
|
121
|
+
end
|
122
|
+
|
123
|
+
def capture?(opts = {}, &block)
|
124
|
+
OnSuccess.new(self, opts, &block)
|
125
|
+
end
|
126
|
+
|
127
|
+
def capture!(opts = {}, &block)
|
128
|
+
OnMatch.new(self, opts, &block)
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
class Pattern < Array
|
134
|
+
|
135
|
+
include Operators
|
136
|
+
|
137
|
+
def to_s
|
138
|
+
"#{self.class.name}[#{self.collect(&:to_s).join(', ')}]"
|
139
|
+
end
|
140
|
+
|
141
|
+
def __match?(needle)
|
142
|
+
[]
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
class Choose < Pattern
|
148
|
+
|
149
|
+
def __match?(needle, i = 0, s = [])
|
150
|
+
while i < self.length
|
151
|
+
s = self[i]._match?(needle, *s)
|
152
|
+
return [i, s] if s
|
153
|
+
s = []
|
154
|
+
i += 1
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def initialize(p1, p2)
|
159
|
+
self << p1 << p2
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
|
164
|
+
class Concat < Pattern
|
165
|
+
|
166
|
+
def __match?(needle, i = 0, s = [])
|
167
|
+
while i < self.length and i >= 0
|
168
|
+
s[i] = self[i]._match?(needle, *(s[i] || []))
|
169
|
+
i = s[i] ? i+1 : i-1
|
170
|
+
end
|
171
|
+
[i-1, s] if i == self.length
|
172
|
+
end
|
173
|
+
|
174
|
+
def initialize(p1, p2)
|
175
|
+
self << p1 << p2
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
class CaseSensitiveOff < Pattern
|
181
|
+
|
182
|
+
def initialize(pattern)
|
183
|
+
@pattern = pattern
|
184
|
+
end
|
185
|
+
|
186
|
+
def __match?(needle, thread=nil, s=[])
|
187
|
+
needle.pull(thread)
|
188
|
+
thread = needle.push(0)
|
189
|
+
needle.ignore_case = true
|
190
|
+
s = @pattern._match?(needle, *s)
|
191
|
+
return [thread, s] if s
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
class OnSuccess < Pattern
|
197
|
+
|
198
|
+
def initialize(pattern, opts, &block)
|
199
|
+
if opts.class == Hash
|
200
|
+
if opts.first
|
201
|
+
@capture_name = opts.first.first
|
202
|
+
@initial_capture_value = opts.first.last
|
203
|
+
end
|
204
|
+
else
|
205
|
+
@capture_name = opts
|
206
|
+
end
|
207
|
+
@pattern = pattern
|
208
|
+
@block = block
|
209
|
+
end
|
210
|
+
|
211
|
+
def __match?(needle, thread_state = nil, starting_cursor = nil, s=[])
|
212
|
+
needle.pull(thread_state)
|
213
|
+
starting_cursor ||= needle.cursor
|
214
|
+
if s = @pattern._match?(needle, *s)
|
215
|
+
ending_cursor = needle.cursor-1
|
216
|
+
push = needle.push(0) do
|
217
|
+
match_string = MatchString.new(needle.string, starting_cursor, ending_cursor, needle.captures)
|
218
|
+
capture_value = @capture_name && (needle.captures.has_key?(@capture_name) ? needle.captures[@capture_name] : @initial_capture_value)
|
219
|
+
if @block
|
220
|
+
match_string = @block.call(match_string, ending_cursor+1, capture_value)
|
221
|
+
elsif capture_value.class == Array
|
222
|
+
match_string = capture_value + [match_string]
|
223
|
+
end
|
224
|
+
needle.capture(@capture_name, match_string)
|
225
|
+
end
|
226
|
+
[ push, starting_cursor, s ]
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
end
|
231
|
+
|
232
|
+
class OnMatch < OnSuccess
|
233
|
+
|
234
|
+
def __match?(needle, starting_cursor = nil, s=[])
|
235
|
+
starting_cursor ||= needle.cursor
|
236
|
+
if s = @pattern._match?(needle, *s)
|
237
|
+
match_string = MatchString.new(needle.string, starting_cursor, needle.cursor-1, needle.captures)
|
238
|
+
capture_value = @capture_name && (needle.captures.has_key?(@capture_name) ? needle.captures[@capture_name] : @initial_capture_value)
|
239
|
+
match_string = @block.call(match_string, needle.cursor, capture_value) if @block
|
240
|
+
needle.capture(@capture_name, match_string)
|
241
|
+
[starting_cursor, s]
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
end
|
246
|
+
|
247
|
+
class Match < Pattern
|
248
|
+
|
249
|
+
def initialize(sub_pattern_or_name = nil, &block)
|
250
|
+
if block
|
251
|
+
@block = block
|
252
|
+
elsif sub_pattern_or_name and sub_pattern_or_name.class == Symbol
|
253
|
+
@name = sub_pattern_or_name
|
254
|
+
elsif sub_pattern_or_name and sub_pattern_or_name.respond_to? "_match?"
|
255
|
+
@pattern = sub_pattern_or_name
|
256
|
+
elsif sub_pattern_or_name and sub_pattern_or_name.respond_to? "to_s"
|
257
|
+
@pattern = sub_pattern_or_name.to_s
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def __match?(needle, pattern = nil, s = [])
|
262
|
+
pattern ||= if @block
|
263
|
+
@block.call
|
264
|
+
elsif @name
|
265
|
+
needle.captures[@name] || ""
|
266
|
+
else
|
267
|
+
@pattern
|
268
|
+
end
|
269
|
+
existing_captures = needle.captures.dup
|
270
|
+
s = pattern._match?(needle, *s)
|
271
|
+
needle.captures = needle.captures.merge(existing_captures)
|
272
|
+
[pattern, s] if s
|
273
|
+
end
|
274
|
+
|
275
|
+
end
|
276
|
+
|
277
|
+
class Rem < Pattern
|
278
|
+
|
279
|
+
def __match?(needle, thread_state = nil)
|
280
|
+
if thread_state
|
281
|
+
needle_pull(thread_state)
|
282
|
+
else
|
283
|
+
[needle.push(needle.string.length-needle.cursor)]
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
end
|
288
|
+
|
289
|
+
class Arb < Pattern
|
290
|
+
|
291
|
+
def __match?(needle, match_length = 0, thread_state = nil)
|
292
|
+
needle.pull(thread_state)
|
293
|
+
if needle.remaining_string.length >= match_length
|
294
|
+
thread_state = needle.push(match_length)
|
295
|
+
match_length += 1
|
296
|
+
[match_length, thread_state]
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
end
|
301
|
+
|
302
|
+
class ParameterizedPattern < Pattern
|
303
|
+
|
304
|
+
def initialize(opts = nil, &block)
|
305
|
+
if opts.class == Hash
|
306
|
+
if opts.first
|
307
|
+
@param_name = opts.first.first
|
308
|
+
@initial_param_value = opts.first.last
|
309
|
+
end
|
310
|
+
else
|
311
|
+
@initial_param_value = opts
|
312
|
+
end
|
313
|
+
@block = block
|
314
|
+
self << @param << @block
|
315
|
+
end
|
316
|
+
|
317
|
+
def self.parameter(name, &post_processor)
|
318
|
+
@post_processor = post_processor
|
319
|
+
define_method(name) do |needle|
|
320
|
+
value = (@param_name && needle.captures.has_key?(@param_name)) ? needle.captures[@param_name] : @initial_param_value
|
321
|
+
value = @block.call(value) if @block
|
322
|
+
needle.capture(@param_name, value)
|
323
|
+
value = post_processor.call(value) if @post_processor
|
324
|
+
value
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
end
|
329
|
+
|
330
|
+
class Len < ParameterizedPattern
|
331
|
+
|
332
|
+
parameter :len
|
333
|
+
|
334
|
+
def __match?(needle, thread_state = nil)
|
335
|
+
|
336
|
+
if thread_state
|
337
|
+
needle.pull(thread_state)
|
338
|
+
else
|
339
|
+
len_temp = len(needle)
|
340
|
+
[needle.push(len_temp)] if needle.remaining_string.length >= len_temp
|
341
|
+
end
|
342
|
+
|
343
|
+
end
|
344
|
+
|
345
|
+
end
|
346
|
+
|
347
|
+
class Pos < ParameterizedPattern
|
348
|
+
|
349
|
+
parameter :pos
|
350
|
+
|
351
|
+
def __match?(needle, matched = nil)
|
352
|
+
return [true] if needle.cursor == pos(needle) and !matched
|
353
|
+
end
|
354
|
+
|
355
|
+
end
|
356
|
+
|
357
|
+
class RPos < ParameterizedPattern
|
358
|
+
|
359
|
+
parameter :pos
|
360
|
+
|
361
|
+
def __match?(needle, matched = nil)
|
362
|
+
return [true] if needle.string.length-needle.cursor == pos(needle) and !matched
|
363
|
+
end
|
364
|
+
|
365
|
+
end
|
366
|
+
|
367
|
+
class Tab < ParameterizedPattern
|
368
|
+
|
369
|
+
parameter :pos
|
370
|
+
|
371
|
+
def __match?(needle, thread_state = nil)
|
372
|
+
|
373
|
+
if thread_state
|
374
|
+
needle.pull(thread_state)
|
375
|
+
else
|
376
|
+
len = pos(needle) - needle.cursor
|
377
|
+
[needle.push(len)] if len >= 0 and needle.remaining_string.length >= len
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
end
|
382
|
+
|
383
|
+
class RTab < ParameterizedPattern
|
384
|
+
|
385
|
+
parameter :pos
|
386
|
+
|
387
|
+
def __match?(needle, thread_state = nil)
|
388
|
+
if thread_state
|
389
|
+
needle.pull(thread_state)
|
390
|
+
else
|
391
|
+
len = (needle.remaining_string.length - pos(needle))
|
392
|
+
[needle.push(len)] if len >= 0 and needle.remaining_string.length >= len
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
end
|
397
|
+
|
398
|
+
class Any < ParameterizedPattern
|
399
|
+
|
400
|
+
parameter :chars, &:split
|
401
|
+
|
402
|
+
def __match?(needle, thread_state = nil)
|
403
|
+
if thread_state
|
404
|
+
needle.pull(thread_state)
|
405
|
+
elsif chars(needle).include? needle.remaining_string[0..0]
|
406
|
+
[needle.push(1)]
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
end
|
411
|
+
|
412
|
+
class NotAny < ParameterizedPattern
|
413
|
+
|
414
|
+
parameter :chars, &:split
|
415
|
+
|
416
|
+
def __match?(needle, thread_state = nil)
|
417
|
+
if thread_state
|
418
|
+
needle.pull(thread_state)
|
419
|
+
elsif !(chars(needle).include? needle.remaining_string[0..0])
|
420
|
+
[needle.push(1)]
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
end
|
425
|
+
|
426
|
+
class Span < ParameterizedPattern
|
427
|
+
|
428
|
+
parameter :chars, &:split
|
429
|
+
|
430
|
+
def __match?(needle, match_length = nil, thread_state = nil)
|
431
|
+
unless match_length
|
432
|
+
the_chars, match_length = chars(needle), 0
|
433
|
+
while needle.remaining_string.length > match_length and the_chars.include? needle.remaining_string[match_length..match_length]
|
434
|
+
match_length += 1
|
435
|
+
end
|
436
|
+
end
|
437
|
+
needle.pull(thread_state)
|
438
|
+
if match_length > 0
|
439
|
+
thread_state = needle.push(match_length)
|
440
|
+
match_length -= 1
|
441
|
+
[match_length, thread_state]
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
end
|
446
|
+
|
447
|
+
class Break < ParameterizedPattern
|
448
|
+
|
449
|
+
parameter :chars, &:split
|
450
|
+
|
451
|
+
def __match?(needle, thread_state = nil)
|
452
|
+
if thread_state
|
453
|
+
needle.pull(thread_state)
|
454
|
+
else
|
455
|
+
the_chars, len = chars(needle), 0
|
456
|
+
while needle.remaining_string.length > len and !(the_chars.include? needle.remaining_string[len..len])
|
457
|
+
len += 1
|
458
|
+
end
|
459
|
+
[needle.push(len)]
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
end
|
464
|
+
|
465
|
+
|
466
|
+
class BreakX < ParameterizedPattern
|
467
|
+
|
468
|
+
parameter :chars, &:split
|
469
|
+
|
470
|
+
def __match?(needle, len = 0, thread_state = nil)
|
471
|
+
needle.pull(thread_state)
|
472
|
+
the_chars = chars(needle)
|
473
|
+
while needle.remaining_string.length > len and !(the_chars.include? needle.remaining_string[len..len])
|
474
|
+
len += 1
|
475
|
+
end
|
476
|
+
[len+1, needle.push(len)] if needle.remaining_string.length >= len
|
477
|
+
end
|
478
|
+
|
479
|
+
end
|
480
|
+
|
481
|
+
class Arbno < Match
|
482
|
+
|
483
|
+
def __match?(needle, pattern = nil, s = [[]])
|
484
|
+
return if s.length == 0
|
485
|
+
if pattern
|
486
|
+
existing_captures = needle.captures.dup
|
487
|
+
s[-1] = pattern._match?(needle, *(s.last))
|
488
|
+
s = s[-1] ? s + [[]] : s[0..-2]
|
489
|
+
needle.captures = needle.captures.merge(existing_captures)
|
490
|
+
else
|
491
|
+
if @block
|
492
|
+
pattern = @block.call
|
493
|
+
elsif @name
|
494
|
+
pattern = needle.captures[@name] || ""
|
495
|
+
else
|
496
|
+
pattern = @pattern
|
497
|
+
end
|
498
|
+
end
|
499
|
+
[pattern, s]
|
500
|
+
end
|
501
|
+
|
502
|
+
end
|
503
|
+
|
504
|
+
class FailPat < Pattern
|
505
|
+
|
506
|
+
def __match?(needle)
|
507
|
+
end
|
508
|
+
|
509
|
+
end
|
510
|
+
|
511
|
+
class Abort < Pattern
|
512
|
+
|
513
|
+
def __match?(needle)
|
514
|
+
raise MatchFailed
|
515
|
+
end
|
516
|
+
|
517
|
+
end
|
518
|
+
|
519
|
+
class Fence < Match
|
520
|
+
|
521
|
+
def __match?(needle, on_backtrack = nil)
|
522
|
+
if on_backtrack == :fail_match
|
523
|
+
needle.match_failed = true
|
524
|
+
return nil
|
525
|
+
elsif on_backtrack == :return_nil
|
526
|
+
return nil
|
527
|
+
elsif @block
|
528
|
+
pattern = @block.call
|
529
|
+
elsif @name
|
530
|
+
pattern = needle.captures[@name] || ""
|
531
|
+
elsif @pattern
|
532
|
+
pattern = @pattern
|
533
|
+
else
|
534
|
+
return [:fail_match]
|
535
|
+
end
|
536
|
+
return [:return_nil] if pattern._match?(needle)
|
537
|
+
end
|
538
|
+
|
539
|
+
end
|
540
|
+
|
541
|
+
class Succeed < Pattern
|
542
|
+
def _match?(needle, thread_state = nil)
|
543
|
+
needle.pull(thread_state)
|
544
|
+
[needle.push(0)]
|
545
|
+
end
|
546
|
+
end
|
547
|
+
|
548
|
+
end
|
549
|
+
|
550
|
+
class String
|
551
|
+
|
552
|
+
include Cannonbol::Operators
|
553
|
+
|
554
|
+
def __match?(needle, thread_state = nil)
|
555
|
+
if thread_state
|
556
|
+
needle.pull(thread_state)
|
557
|
+
elsif self.length == 0 or
|
558
|
+
(!needle.ignore_case and needle.remaining_string[0..self.length-1] == self) or
|
559
|
+
(needle.ignore_case and needle.remaining_string[0..self.length-1].upcase == self.upcase)
|
560
|
+
[needle.push(self.length)]
|
561
|
+
end
|
562
|
+
end
|
563
|
+
|
564
|
+
end
|
565
|
+
|
566
|
+
class Regexp
|
567
|
+
|
568
|
+
include Cannonbol::Operators
|
569
|
+
|
570
|
+
def __match?(needle, thread_state = nil)
|
571
|
+
@cannonbol_regex ||= Regexp.new("^#{self.source}", self.options | (needle.ignore_case ? Regexp::IGNORECASE : 0) )
|
572
|
+
if thread_state
|
573
|
+
needle.pull(thread_state)
|
574
|
+
elsif m = @cannonbol_regex.match(needle.remaining_string)
|
575
|
+
[needle.push(m[0].length)]
|
576
|
+
end
|
577
|
+
end
|
578
|
+
|
579
|
+
end
|
580
|
+
|
581
|
+
module Enumerable
|
582
|
+
|
583
|
+
def match_any
|
584
|
+
if self.first
|
585
|
+
self[1..-1].inject(self.first) { |memo, item| memo | item }
|
586
|
+
else
|
587
|
+
FAIL
|
588
|
+
end
|
589
|
+
end
|
590
|
+
|
591
|
+
def match_all
|
592
|
+
self.inject("") { |memo, item| memo & item }
|
593
|
+
end
|
594
|
+
|
595
|
+
end
|
596
|
+
|
597
|
+
|
598
|
+
class Object
|
599
|
+
|
600
|
+
REM = Cannonbol::Rem.new
|
601
|
+
|
602
|
+
ARB = Cannonbol::Arb.new
|
603
|
+
|
604
|
+
FAIL = Cannonbol::FailPat.new
|
605
|
+
|
606
|
+
ABORT = Cannonbol::Abort.new
|
607
|
+
|
608
|
+
FENCE = Cannonbol::Fence.new
|
609
|
+
|
610
|
+
SUCCEED = Cannonbol::Succeed.new
|
611
|
+
|
612
|
+
def LEN(p={}, &block)
|
613
|
+
Cannonbol::Len.new(p, &block)
|
614
|
+
end
|
615
|
+
|
616
|
+
def POS(p=nil, &block)
|
617
|
+
Cannonbol::Pos.new(p, &block)
|
618
|
+
end
|
619
|
+
|
620
|
+
def RPOS(p=nil, &block)
|
621
|
+
Cannonbol::RPos.new(p, &block)
|
622
|
+
end
|
623
|
+
|
624
|
+
def TAB(p=nil, &block)
|
625
|
+
Cannonbol::Tab.new(p, &block)
|
626
|
+
end
|
627
|
+
|
628
|
+
def RTAB(p=nil, &block)
|
629
|
+
Cannonbol::RTab.new(p, &block)
|
630
|
+
end
|
631
|
+
|
632
|
+
def ANY(p=nil, &block)
|
633
|
+
Cannonbol::Any.new(p, &block)
|
634
|
+
end
|
635
|
+
|
636
|
+
def NOTANY(p=nil, &block)
|
637
|
+
Cannonbol::NotAny.new(p, &block)
|
638
|
+
end
|
639
|
+
|
640
|
+
def SPAN(p=nil, &block)
|
641
|
+
Cannonbol::Span.new(p, &block)
|
642
|
+
end
|
643
|
+
|
644
|
+
def BREAK(p=nil, &block)
|
645
|
+
Cannonbol::Break.new(p, &block)
|
646
|
+
end
|
647
|
+
|
648
|
+
def BREAKX(p=nil, &block)
|
649
|
+
Cannonbol::BreakX.new(p, &block)
|
650
|
+
end
|
651
|
+
|
652
|
+
def MATCH(p=nil, &block)
|
653
|
+
Cannonbol::Match.new(p, &block)
|
654
|
+
end
|
655
|
+
|
656
|
+
def ARBNO(p=nil, &block)
|
657
|
+
Cannonbol::Arbno.new(p, &block)
|
658
|
+
end
|
659
|
+
|
660
|
+
def FENCE(p=nil, &block)
|
661
|
+
Cannonbol::Fence.new(p, &block)
|
662
|
+
end
|
663
|
+
|
664
|
+
end
|
data/tasks/rspec.rake
ADDED
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cannonbol
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- catmando
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-03-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.8'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.8'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: "\nMakes complex patterns easier to read and write!\nCombine regexes,
|
56
|
+
plain strings and powerful new primitive match functions!\nMakes capturing match
|
57
|
+
results easy!\nAllows recursive patterns!\nComplete SNOBOL4 + SPITBOL extensions!\nBased
|
58
|
+
on the well documented, proven SNOBOL4 language!\nSimple syntax looks great alongside
|
59
|
+
ruby!\n "
|
60
|
+
email:
|
61
|
+
- mitch@catprint.com
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- ".gitignore"
|
67
|
+
- ".rspec"
|
68
|
+
- ".travis.yml"
|
69
|
+
- CODE_OF_CONDUCT.md
|
70
|
+
- Gemfile
|
71
|
+
- LICENSE.txt
|
72
|
+
- README.md
|
73
|
+
- Rakefile
|
74
|
+
- bin/console
|
75
|
+
- bin/setup
|
76
|
+
- cannonbol.gemspec
|
77
|
+
- lib/cannonbol.rb
|
78
|
+
- lib/cannonbol/version.rb
|
79
|
+
- tasks/rspec.rake
|
80
|
+
homepage: https://github.com/catprintlabs/cannonbol
|
81
|
+
licenses:
|
82
|
+
- MIT
|
83
|
+
metadata: {}
|
84
|
+
post_install_message:
|
85
|
+
rdoc_options: []
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubyforge_project:
|
100
|
+
rubygems_version: 2.2.2
|
101
|
+
signing_key:
|
102
|
+
specification_version: 4
|
103
|
+
summary: Cannonbol is a ruby dsl for patten matching based on SNOBOL4 and SPITBOL
|
104
|
+
test_files: []
|