regexp_parser 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  require File.expand_path("../../../helpers", __FILE__)
2
2
 
3
- %w{1.8 1.9.1 1.9.3}.each do|tc|
3
+ %w{1.8 1.9.1 1.9.3 2.x}.each do|tc|
4
4
  require File.expand_path("../test_#{tc}", __FILE__)
5
5
  end
6
6
 
metadata CHANGED
@@ -1,23 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-14 00:00:00.000000000 Z
11
+ date: 2014-10-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
14
- email: ammarabuali@gmail.com
14
+ email:
15
+ - ammarabuali@gmail.com
15
16
  executables: []
16
17
  extensions: []
17
- extra_rdoc_files:
18
- - ChangeLog
19
- - LICENSE
20
- - README.rdoc
18
+ extra_rdoc_files: []
21
19
  files:
22
20
  - lib/regexp_parser/ctype.rb
23
21
  - lib/regexp_parser/expression/classes/alternation.rb
@@ -44,7 +42,11 @@ files:
44
42
  - lib/regexp_parser/syntax/ruby/1.9.3.rb
45
43
  - lib/regexp_parser/syntax/ruby/1.9.rb
46
44
  - lib/regexp_parser/syntax/ruby/2.0.0.rb
45
+ - lib/regexp_parser/syntax/ruby/2.0.rb
47
46
  - lib/regexp_parser/syntax/ruby/2.1.0.rb
47
+ - lib/regexp_parser/syntax/ruby/2.1.2.rb
48
+ - lib/regexp_parser/syntax/ruby/2.1.3.rb
49
+ - lib/regexp_parser/syntax/ruby/2.1.rb
48
50
  - lib/regexp_parser/syntax/tokens/anchor.rb
49
51
  - lib/regexp_parser/syntax/tokens/assertion.rb
50
52
  - lib/regexp_parser/syntax/tokens/backref.rb
@@ -80,7 +82,6 @@ files:
80
82
  - test/parser/test_sets.rb
81
83
  - test/scanner/test_all.rb
82
84
  - test/scanner/test_anchors.rb
83
- - test/scanner/test_conditionals.rb
84
85
  - test/scanner/test_errors.rb
85
86
  - test/scanner/test_escapes.rb
86
87
  - test/scanner/test_groups.rb
@@ -88,7 +89,6 @@ files:
88
89
  - test/scanner/test_meta.rb
89
90
  - test/scanner/test_properties.rb
90
91
  - test/scanner/test_quantifiers.rb
91
- - test/scanner/test_quoting.rb
92
92
  - test/scanner/test_refcalls.rb
93
93
  - test/scanner/test_scripts.rb
94
94
  - test/scanner/test_sets.rb
@@ -96,6 +96,7 @@ files:
96
96
  - test/syntax/ruby/test_1.8.rb
97
97
  - test/syntax/ruby/test_1.9.1.rb
98
98
  - test/syntax/ruby/test_1.9.3.rb
99
+ - test/syntax/ruby/test_2.x.rb
99
100
  - test/syntax/ruby/test_all.rb
100
101
  - test/syntax/test_all.rb
101
102
  - test/test_all.rb
@@ -104,12 +105,13 @@ files:
104
105
  - VERSION.yml
105
106
  - Rakefile
106
107
  - LICENSE
107
- - README.rdoc
108
+ - README.md
108
109
  - ChangeLog
109
110
  homepage: http://github.com/ammar/regexp_parser
110
111
  licenses:
111
112
  - MIT
112
- metadata: {}
113
+ metadata:
114
+ issue_tracker: https://github.com/ammar/regexp_parser/issues
113
115
  post_install_message:
114
116
  rdoc_options:
115
117
  - --inline-source
@@ -120,7 +122,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
120
122
  requirements:
121
123
  - - '>='
122
124
  - !ruby/object:Gem::Version
123
- version: '0'
125
+ version: 1.8.7
124
126
  required_rubygems_version: !ruby/object:Gem::Requirement
125
127
  requirements:
126
128
  - - '>='
@@ -154,7 +156,6 @@ test_files:
154
156
  - test/parser/test_sets.rb
155
157
  - test/scanner/test_all.rb
156
158
  - test/scanner/test_anchors.rb
157
- - test/scanner/test_conditionals.rb
158
159
  - test/scanner/test_errors.rb
159
160
  - test/scanner/test_escapes.rb
160
161
  - test/scanner/test_groups.rb
@@ -162,7 +163,6 @@ test_files:
162
163
  - test/scanner/test_meta.rb
163
164
  - test/scanner/test_properties.rb
164
165
  - test/scanner/test_quantifiers.rb
165
- - test/scanner/test_quoting.rb
166
166
  - test/scanner/test_refcalls.rb
167
167
  - test/scanner/test_scripts.rb
168
168
  - test/scanner/test_sets.rb
@@ -170,6 +170,7 @@ test_files:
170
170
  - test/syntax/ruby/test_1.8.rb
171
171
  - test/syntax/ruby/test_1.9.1.rb
172
172
  - test/syntax/ruby/test_1.9.3.rb
173
+ - test/syntax/ruby/test_2.x.rb
173
174
  - test/syntax/ruby/test_all.rb
174
175
  - test/syntax/test_all.rb
175
176
  - test/test_all.rb
data/README.rdoc DELETED
@@ -1,307 +0,0 @@
1
- = Regexp::Parser
2
-
3
- == What?
4
- A ruby library to help with lexing, parsing, and transforming regular expressions.
5
-
6
- * Multilayered
7
-
8
- * A scanner based on ragel[http://www.complang.org/ragel/]
9
- * A lexer that produces a "stream" of tokens
10
- * A parser that produces a "tree" of Regexp::Expression objects (OO API)
11
-
12
- * Lexes and parses both 1.8 and 1.9 regular expression flavors
13
- * Supports ruby 1.8 and 1.9 runtime
14
-
15
- For an example of regexp_parser in use, see the meta_re project[https://github.com/ammar/meta_re]
16
-
17
- ---
18
- == Requirements
19
-
20
- * ruby '1.8.6'..'1.9.2'
21
- * ragel, but only if you want to hack on the scanner
22
-
23
-
24
- ---
25
- == Install
26
-
27
- gem install regexp_parser
28
-
29
- ---
30
- == Components
31
- === Scanner
32
- A ragel generated scanner that recognizes the cumulative syntax of both
33
- supported flavors. Breaks the expression's text into tokens, including
34
- their type, token, text, and start/end offsets within the original
35
- pattern.
36
-
37
- ==== Example
38
- The following scans the given pattern and prints out the type, token, text and
39
- start/end offsets for each token found.
40
-
41
- require 'regexp_parser'
42
-
43
- Regexp::Scanner.scan /(ab?(cd)*[e-h]+)/ do |type, token, text, ts, te|
44
- puts "type: #{type}, token: #{token}, text: '#{text}' [#{ts}..#{te}]"
45
- end
46
-
47
- A one-liner that returns an array of the textual parts of the given pattern:
48
-
49
- Regexp::Scanner.scan( /(cat?([b]at)){3,5}/ ).map {|token| token[2]}
50
- #=> ["(", "cat", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
51
-
52
-
53
- ==== Notes
54
- * The scanner performs basic syntax error checking, like detecting missing
55
- balancing punctuation and premature end of pattern. Flavor validity checks
56
- are performed in the lexer.
57
-
58
- * To keep the scanner simple(r) and fairly reusable for other uses, it
59
- does not perform lexical analysis on the tokens, sticking to the task
60
- of tokenizing and leaving lexical analysis upto to the lexer.
61
-
62
- * If the input is a ruby Regexp object, the scanner calls #source on it to
63
- get its string representation. #source does not include the options of
64
- expression (m, i, and x) To include the options the scan, #to_s should
65
- be called on the Regexp before passing it to the scanner, or any of the
66
- higher layers.
67
-
68
-
69
- ---
70
- === Syntax
71
- Defines the supported tokens for a specific engine implementation (aka a
72
- flavor). Syntax classes act as lookup tables, and are layered to create
73
- flavor variations. Syntax only comes into play in the lexer.
74
-
75
- ==== Example
76
- The following instantiates the syntax for Ruby 1.9 and checks a couple of its
77
- implementations features, and then does the same for Ruby 1.8:
78
-
79
- require 'regexp_parser'
80
-
81
- ruby_19 = Regexp::Syntax.new 'ruby/1.9'
82
- ruby_19.implements? :quantifier, :zero_or_one # => true
83
- ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
84
- ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
85
-
86
- ruby_18 = Regexp::Syntax.new 'ruby/1.8'
87
- ruby_18.implements? :quantifier, :zero_or_one # => true
88
- ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
89
- ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
90
-
91
-
92
- ==== Notes
93
- * Variatiions on a token, for example a named group with < and > vs one with a
94
- pair of single quotes, are specified with an underscore followed by two
95
- characters appended to the base token. In the previous named group example,
96
- the tokens would be :named_ab (angle brackets) and :named_sq (single quotes).
97
- These variations are normalized by the syntax to :named.
98
-
99
- ==== TODO
100
- * Add flavor limits: like Ruby 1.8's maximum allowed number of grouped
101
- expressions (253).
102
-
103
-
104
- ---
105
- === Lexer
106
- Sits on top of the scanner and performs lexical analysis on the tokens that
107
- it emits. Among its tasks are breaking quantified literal runs, collecting the
108
- emitted token structures into an array of Token objects, calculating their
109
- nesting depth, normalizing tokens for the parser, and checkng if the tokens
110
- are implemented by the given syntax flavor.
111
-
112
- Tokens objects are Structs, basically data objects, with a few helper methods,
113
- like #next, #previous, #offsets and #length.
114
-
115
- ==== Example
116
- The following example scans the given pattern, checks it against the ruby 1.8
117
- syntax, and prints the token objects' text.
118
-
119
- require 'regexp_parser'
120
-
121
- Regexp::Lexer.scan(/a?(b)*[c]+/, 'ruby/1.8') do |token|
122
- puts "#{' ' * token.depth}#{token.text}"
123
- end
124
-
125
- A one-liner that returns an array of the textual parts of the given pattern.
126
- Compare the output with that of the one-liner example of the Scanner.
127
-
128
- Regexp::Lexer.scan( /(cat?([b]at)){3,5}/ ).map {|token| token.text}
129
- #=> ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
130
-
131
- ==== Notes
132
- * The default syntax is that of the latest released version of ruby.
133
-
134
- * The lexer performs some basic parsing to determine the depth of a the
135
- emitted tokens. This responsibility might be relegated to the scanner.
136
-
137
-
138
- ---
139
- === Parser
140
- Sits on top of the lexer and transforms the "stream" of Token objects emitted
141
- by it into a tree of Expression objects represented by an instance of the
142
- Expression::Root class. See Expression below for more information.
143
-
144
- ==== Example
145
-
146
- require 'regexp_parser'
147
-
148
- regex = /a?(b)*[c]+/m
149
-
150
- # using #to_s on the Regexp object to include options
151
- root = Regexp::Parser.parse( regex.to_s, 'ruby/1.8')
152
-
153
- root.multiline? # => true (aliased as m?)
154
- root.case_insensitive? # => false (aliased as i?)
155
-
156
- # simple tree walking method
157
- def walk(e, depth = 0)
158
- puts "#{' ' * depth}> #{e.class}"
159
- unless e.expressions.empty?
160
- e.each {|s| walk(s, depth+1) }
161
- end
162
- end
163
-
164
- walk(root)
165
-
166
- # output
167
- > Regexp::Expression::Root
168
- > Regexp::Expression::Literal
169
- > Regexp::Expression::Group::Capture
170
- > Regexp::Expression::Literal
171
- > Regexp::Expression::CharacterSet
172
-
173
- Note: quantifiers do not appear in the output because they are members of the
174
- Expression class. See the next section for more details.
175
-
176
- ---
177
- === Expression
178
- The base class of all objects returned by the parser, implements most of the
179
- functions that are common to all expression classes.
180
-
181
- Each Expression object contains the following members:
182
-
183
- * quantifier: an instance of Expression::Quantifier that holds the details
184
- of repetition for the Expression. Has a nil value if the expressions is not
185
- quantified.
186
-
187
- * expressions: an array, holds the sub-expressions for the expression if it
188
- is a group or alternation expression. Empty if the expression doesn't have
189
- sub-expressions.
190
-
191
- * options: a hash, holds the keys :i, :m, and :x with a boolean value that
192
- indicates if the expression has a given option.
193
-
194
- Expressions also contain the following "lower level" members
195
- (from the scanner/lexer)
196
-
197
- * type: a symbol, denoting the expression type, such as :group, :quantifier
198
- * token: a symbol, for the object's token, or opening token (in the case of
199
- groups and sets)
200
- * text: a string, the text of the expression (same as token for nesting expressions)
201
-
202
- Every expressions also has the following methods:
203
-
204
- * to_s: returns the string representation of the expression.
205
- * <<: adds sub-expresions to the expression.
206
- * each: iterates over the expressions sub-expressions, if any.
207
- * []: access sub-expressions by index.
208
- * quantified?: return true if the expressions was followed by a quantifier.
209
- * quantity: returns an array of the expression's min and max repetitions.
210
- * greedy?: returns true if the expression's quantifier is greedy.
211
- * reluctant? or lazy?: returns true if the expression's quantifier is
212
- reluctant.
213
- * possessive?: returns true if the expression's quantifier is possessive.
214
- * multiline? or m?: returns true if the expression has the m option
215
- * case_insensitive? or ignore_case? or i?: returns true if the expression
216
- has the i option
217
- * free_spacing? or extended? or x?: returns true if the expression has the x
218
- option
219
-
220
- A special expression class Expression::Sequence is used to hold the array of
221
- possible alternatives within an Expression::Alternation expression.
222
-
223
-
224
- == Scanner Syntax
225
- The following syntax elements are supported by the scanner.
226
-
227
- - Alternation: a|b|c, etc.
228
- - Anchors: ^, $, \b, etc.
229
- - Character Classes (aka Sets): [abc], [^\]]
230
- - Character Types: \d, \H, \s, etc.
231
- - Escape Sequences: \t, \+, \?, etc.
232
- - Grouped Expressions
233
- - Assertions
234
- - Lookahead: (?=abc)
235
- - Negative Lookahead: (?!abc)
236
- - Lookabehind: (?<=abc)
237
- - Negative Lookbehind: (?<\!abc)
238
- - Atomic: (?>abc)
239
- - Back-references:
240
- - Named: \k<name>
241
- - Nest Level: \k<n-1>
242
- - Numbered: \k<1>
243
- - Relative: \k<-2>
244
- - Capturing: (abc)
245
- - Comment: (?# comment)
246
- - Named: (?<name>abc)
247
- - Options: (?mi-x:abc)
248
- - Passive: (?:abc)
249
- - Sub-expression Calls: \g<name>, \g<1>
250
- - Literals: abc, def?, etc.
251
- - POSIX classes: [:alpha:], [:print:], etc.
252
- - Quantifiers
253
- - Greedy: ?, *, +, {m,M}
254
- - Reluctant: ??, *?, +?, {m,M}?
255
- - Possessive: ?+, *+, ++, {m,M}+
256
- - String Escapes
257
- - Control: \C-C, \cD, etc.
258
- - Hex: \x20, \x{701230}, etc.
259
- - Meta: \M-c, \M-\C-C etc.
260
- - Octal: \0, \01, \012
261
- - Unicode: \uHHHH, \u{H+ H+}
262
- - Traditional Back-references: \1 thru \9
263
- - Unicode Properties:
264
- - Age: \p{Age=2.1}, \P{age=5.2}, etc.
265
- - Classes: \p{Alpha}, \P{Space}, etc.
266
- - Derived Properties: \p{Math}, \P{Lowercase}, etc.
267
- - General Categories: \p{Lu}, \P{Cs}, etc.
268
- - Scripts: \p{Arabic}, \P{Hiragana}, etc.
269
- - Simple Properties: \p{Dash}, \p{Extender}, etc.
270
-
271
- See something missing? Please submit an issue[https://github.com/ammar/regexp_parser/issues]
272
-
273
- == References
274
- Documentation and information being read while working on this project.
275
-
276
- ==== Ruby Flavors
277
- * Oniguruma Regular Expressions link[http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt]
278
- * Read Ruby > Regexps link[http://ruby.runpaint.org/regexps]
279
-
280
-
281
- ==== General
282
- * Enumerating the strings of regular languages link[http://www.cs.dartmouth.edu/~doug/nfa.ps.gz]
283
- * Mastering Regular Expressions, By Jeffrey E.F. Friedl (2nd Edition) book[http://oreilly.com/catalog/9781565922570/]
284
- * Regular Expression Flavor Comparison link[http://www.regular-expressions.info/refflavors.html]
285
-
286
-
287
- ==== Unicode
288
- * Unicode Derived Properties link[http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt]
289
- * Unicode Explained, By Jukka K. Korpela. book[http://oreilly.com/catalog/9780596101213]
290
- * Unicode Property Aliases link[http://www.unicode.org/Public/UNIDATA/PropertyAliases.txt]
291
- * Unicode Regular Expressions link[http://www.unicode.org/reports/tr18/]
292
- * Unicode Standard Annex #44 link[http://www.unicode.org/reports/tr44/]
293
-
294
- == Thanks
295
- This work is based on and inspired by the hard work and ideas of many people,
296
- directly or indirectly. The following are only a few of those that should be
297
- thanked.
298
-
299
- * Adrian Thurston, for developing ragel[http://www.complang.org/ragel/].
300
- * Caleb Clausen, for feedback, which inspired this, valuable insights on structuring the parser,
301
- and lots of cool code[http://github.com/coatl].
302
- * Jan Goyvaerts, for his excellent resource[http://www.regular-expressions.info] on regular expressions. I owe him a "steak dinner", at least.
303
- * Run Paint Run Run, for his work on Read[http://ruby.runpaint.org/] Ruby
304
- * Yukihiro Matsumoto, of course! For "The Ruby", of course!
305
-
306
- == Copyright
307
- Copyright (c) 2010 Ammar Ali. See LICENSE file for details.
@@ -1,31 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class ScannerConditionals < Test::Unit::TestCase
4
-
5
- tests = {
6
- /(?(1)Y|N)/ => [0, :conditional, :open, '(?(', 0, 3],
7
- /(?(2)Y|N)/ => [1, :conditional, :condition, '2', 3, 4],
8
- /(?(3)Y|N)/ => [2, :conditional, :yes, 'Y', 5, 6],
9
-
10
- #"(?(<name>)Y|N)" => [0, :conditional, :condition, '(?(<name>)Y|N)', 0, 14],
11
- #"(?('name')Y|N)" => [0, :conditional, :yes, "(?('name')Y|N)", 0, 14],
12
- }
13
-
14
- count = 0
15
- tests.each do |pattern, test|
16
- define_method "test_scan_#{test[1]}_#{test[2]}_#{count+=1}" do
17
-
18
- tokens = RS.scan(pattern)
19
- token = tokens[test[0]]
20
- assert_equal( test[1,5], token )
21
-
22
- end
23
- end
24
-
25
- #def test_scanner_quote
26
- # tokens = RS.scan('a\QX\Eb\QX\Ec')
27
- # puts tokens.inspect
28
- # #assert_equal( tokens[0] )
29
- #end
30
-
31
- end