regexp_parser 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  require File.expand_path("../../../helpers", __FILE__)
2
2
 
3
- %w{1.8 1.9.1 1.9.3}.each do|tc|
3
+ %w{1.8 1.9.1 1.9.3 2.x}.each do|tc|
4
4
  require File.expand_path("../test_#{tc}", __FILE__)
5
5
  end
6
6
 
metadata CHANGED
@@ -1,23 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-14 00:00:00.000000000 Z
11
+ date: 2014-10-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
14
- email: ammarabuali@gmail.com
14
+ email:
15
+ - ammarabuali@gmail.com
15
16
  executables: []
16
17
  extensions: []
17
- extra_rdoc_files:
18
- - ChangeLog
19
- - LICENSE
20
- - README.rdoc
18
+ extra_rdoc_files: []
21
19
  files:
22
20
  - lib/regexp_parser/ctype.rb
23
21
  - lib/regexp_parser/expression/classes/alternation.rb
@@ -44,7 +42,11 @@ files:
44
42
  - lib/regexp_parser/syntax/ruby/1.9.3.rb
45
43
  - lib/regexp_parser/syntax/ruby/1.9.rb
46
44
  - lib/regexp_parser/syntax/ruby/2.0.0.rb
45
+ - lib/regexp_parser/syntax/ruby/2.0.rb
47
46
  - lib/regexp_parser/syntax/ruby/2.1.0.rb
47
+ - lib/regexp_parser/syntax/ruby/2.1.2.rb
48
+ - lib/regexp_parser/syntax/ruby/2.1.3.rb
49
+ - lib/regexp_parser/syntax/ruby/2.1.rb
48
50
  - lib/regexp_parser/syntax/tokens/anchor.rb
49
51
  - lib/regexp_parser/syntax/tokens/assertion.rb
50
52
  - lib/regexp_parser/syntax/tokens/backref.rb
@@ -80,7 +82,6 @@ files:
80
82
  - test/parser/test_sets.rb
81
83
  - test/scanner/test_all.rb
82
84
  - test/scanner/test_anchors.rb
83
- - test/scanner/test_conditionals.rb
84
85
  - test/scanner/test_errors.rb
85
86
  - test/scanner/test_escapes.rb
86
87
  - test/scanner/test_groups.rb
@@ -88,7 +89,6 @@ files:
88
89
  - test/scanner/test_meta.rb
89
90
  - test/scanner/test_properties.rb
90
91
  - test/scanner/test_quantifiers.rb
91
- - test/scanner/test_quoting.rb
92
92
  - test/scanner/test_refcalls.rb
93
93
  - test/scanner/test_scripts.rb
94
94
  - test/scanner/test_sets.rb
@@ -96,6 +96,7 @@ files:
96
96
  - test/syntax/ruby/test_1.8.rb
97
97
  - test/syntax/ruby/test_1.9.1.rb
98
98
  - test/syntax/ruby/test_1.9.3.rb
99
+ - test/syntax/ruby/test_2.x.rb
99
100
  - test/syntax/ruby/test_all.rb
100
101
  - test/syntax/test_all.rb
101
102
  - test/test_all.rb
@@ -104,12 +105,13 @@ files:
104
105
  - VERSION.yml
105
106
  - Rakefile
106
107
  - LICENSE
107
- - README.rdoc
108
+ - README.md
108
109
  - ChangeLog
109
110
  homepage: http://github.com/ammar/regexp_parser
110
111
  licenses:
111
112
  - MIT
112
- metadata: {}
113
+ metadata:
114
+ issue_tracker: https://github.com/ammar/regexp_parser/issues
113
115
  post_install_message:
114
116
  rdoc_options:
115
117
  - --inline-source
@@ -120,7 +122,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
120
122
  requirements:
121
123
  - - '>='
122
124
  - !ruby/object:Gem::Version
123
- version: '0'
125
+ version: 1.8.7
124
126
  required_rubygems_version: !ruby/object:Gem::Requirement
125
127
  requirements:
126
128
  - - '>='
@@ -154,7 +156,6 @@ test_files:
154
156
  - test/parser/test_sets.rb
155
157
  - test/scanner/test_all.rb
156
158
  - test/scanner/test_anchors.rb
157
- - test/scanner/test_conditionals.rb
158
159
  - test/scanner/test_errors.rb
159
160
  - test/scanner/test_escapes.rb
160
161
  - test/scanner/test_groups.rb
@@ -162,7 +163,6 @@ test_files:
162
163
  - test/scanner/test_meta.rb
163
164
  - test/scanner/test_properties.rb
164
165
  - test/scanner/test_quantifiers.rb
165
- - test/scanner/test_quoting.rb
166
166
  - test/scanner/test_refcalls.rb
167
167
  - test/scanner/test_scripts.rb
168
168
  - test/scanner/test_sets.rb
@@ -170,6 +170,7 @@ test_files:
170
170
  - test/syntax/ruby/test_1.8.rb
171
171
  - test/syntax/ruby/test_1.9.1.rb
172
172
  - test/syntax/ruby/test_1.9.3.rb
173
+ - test/syntax/ruby/test_2.x.rb
173
174
  - test/syntax/ruby/test_all.rb
174
175
  - test/syntax/test_all.rb
175
176
  - test/test_all.rb
data/README.rdoc DELETED
@@ -1,307 +0,0 @@
1
- = Regexp::Parser
2
-
3
- == What?
4
- A ruby library to help with lexing, parsing, and transforming regular expressions.
5
-
6
- * Multilayered
7
-
8
- * A scanner based on ragel[http://www.complang.org/ragel/]
9
- * A lexer that produces a "stream" of tokens
10
- * A parser that produces a "tree" of Regexp::Expression objects (OO API)
11
-
12
- * Lexes and parses both 1.8 and 1.9 regular expression flavors
13
- * Supports ruby 1.8 and 1.9 runtime
14
-
15
- For an example of regexp_parser in use, see the meta_re project[https://github.com/ammar/meta_re]
16
-
17
- ---
18
- == Requirements
19
-
20
- * ruby '1.8.6'..'1.9.2'
21
- * ragel, but only if you want to hack on the scanner
22
-
23
-
24
- ---
25
- == Install
26
-
27
- gem install regexp_parser
28
-
29
- ---
30
- == Components
31
- === Scanner
32
- A ragel generated scanner that recognizes the cumulative syntax of both
33
- supported flavors. Breaks the expression's text into tokens, including
34
- their type, token, text, and start/end offsets within the original
35
- pattern.
36
-
37
- ==== Example
38
- The following scans the given pattern and prints out the type, token, text and
39
- start/end offsets for each token found.
40
-
41
- require 'regexp_parser'
42
-
43
- Regexp::Scanner.scan /(ab?(cd)*[e-h]+)/ do |type, token, text, ts, te|
44
- puts "type: #{type}, token: #{token}, text: '#{text}' [#{ts}..#{te}]"
45
- end
46
-
47
- A one-liner that returns an array of the textual parts of the given pattern:
48
-
49
- Regexp::Scanner.scan( /(cat?([b]at)){3,5}/ ).map {|token| token[2]}
50
- #=> ["(", "cat", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
51
-
52
-
53
- ==== Notes
54
- * The scanner performs basic syntax error checking, like detecting missing
55
- balancing punctuation and premature end of pattern. Flavor validity checks
56
- are performed in the lexer.
57
-
58
- * To keep the scanner simple(r) and fairly reusable for other uses, it
59
- does not perform lexical analysis on the tokens, sticking to the task
60
- of tokenizing and leaving lexical analysis upto to the lexer.
61
-
62
- * If the input is a ruby Regexp object, the scanner calls #source on it to
63
- get its string representation. #source does not include the options of
64
- expression (m, i, and x) To include the options the scan, #to_s should
65
- be called on the Regexp before passing it to the scanner, or any of the
66
- higher layers.
67
-
68
-
69
- ---
70
- === Syntax
71
- Defines the supported tokens for a specific engine implementation (aka a
72
- flavor). Syntax classes act as lookup tables, and are layered to create
73
- flavor variations. Syntax only comes into play in the lexer.
74
-
75
- ==== Example
76
- The following instantiates the syntax for Ruby 1.9 and checks a couple of its
77
- implementations features, and then does the same for Ruby 1.8:
78
-
79
- require 'regexp_parser'
80
-
81
- ruby_19 = Regexp::Syntax.new 'ruby/1.9'
82
- ruby_19.implements? :quantifier, :zero_or_one # => true
83
- ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
84
- ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
85
-
86
- ruby_18 = Regexp::Syntax.new 'ruby/1.8'
87
- ruby_18.implements? :quantifier, :zero_or_one # => true
88
- ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
89
- ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
90
-
91
-
92
- ==== Notes
93
- * Variatiions on a token, for example a named group with < and > vs one with a
94
- pair of single quotes, are specified with an underscore followed by two
95
- characters appended to the base token. In the previous named group example,
96
- the tokens would be :named_ab (angle brackets) and :named_sq (single quotes).
97
- These variations are normalized by the syntax to :named.
98
-
99
- ==== TODO
100
- * Add flavor limits: like Ruby 1.8's maximum allowed number of grouped
101
- expressions (253).
102
-
103
-
104
- ---
105
- === Lexer
106
- Sits on top of the scanner and performs lexical analysis on the tokens that
107
- it emits. Among its tasks are breaking quantified literal runs, collecting the
108
- emitted token structures into an array of Token objects, calculating their
109
- nesting depth, normalizing tokens for the parser, and checkng if the tokens
110
- are implemented by the given syntax flavor.
111
-
112
- Tokens objects are Structs, basically data objects, with a few helper methods,
113
- like #next, #previous, #offsets and #length.
114
-
115
- ==== Example
116
- The following example scans the given pattern, checks it against the ruby 1.8
117
- syntax, and prints the token objects' text.
118
-
119
- require 'regexp_parser'
120
-
121
- Regexp::Lexer.scan(/a?(b)*[c]+/, 'ruby/1.8') do |token|
122
- puts "#{' ' * token.depth}#{token.text}"
123
- end
124
-
125
- A one-liner that returns an array of the textual parts of the given pattern.
126
- Compare the output with that of the one-liner example of the Scanner.
127
-
128
- Regexp::Lexer.scan( /(cat?([b]at)){3,5}/ ).map {|token| token.text}
129
- #=> ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
130
-
131
- ==== Notes
132
- * The default syntax is that of the latest released version of ruby.
133
-
134
- * The lexer performs some basic parsing to determine the depth of a the
135
- emitted tokens. This responsibility might be relegated to the scanner.
136
-
137
-
138
- ---
139
- === Parser
140
- Sits on top of the lexer and transforms the "stream" of Token objects emitted
141
- by it into a tree of Expression objects represented by an instance of the
142
- Expression::Root class. See Expression below for more information.
143
-
144
- ==== Example
145
-
146
- require 'regexp_parser'
147
-
148
- regex = /a?(b)*[c]+/m
149
-
150
- # using #to_s on the Regexp object to include options
151
- root = Regexp::Parser.parse( regex.to_s, 'ruby/1.8')
152
-
153
- root.multiline? # => true (aliased as m?)
154
- root.case_insensitive? # => false (aliased as i?)
155
-
156
- # simple tree walking method
157
- def walk(e, depth = 0)
158
- puts "#{' ' * depth}> #{e.class}"
159
- unless e.expressions.empty?
160
- e.each {|s| walk(s, depth+1) }
161
- end
162
- end
163
-
164
- walk(root)
165
-
166
- # output
167
- > Regexp::Expression::Root
168
- > Regexp::Expression::Literal
169
- > Regexp::Expression::Group::Capture
170
- > Regexp::Expression::Literal
171
- > Regexp::Expression::CharacterSet
172
-
173
- Note: quantifiers do not appear in the output because they are members of the
174
- Expression class. See the next section for more details.
175
-
176
- ---
177
- === Expression
178
- The base class of all objects returned by the parser, implements most of the
179
- functions that are common to all expression classes.
180
-
181
- Each Expression object contains the following members:
182
-
183
- * quantifier: an instance of Expression::Quantifier that holds the details
184
- of repetition for the Expression. Has a nil value if the expressions is not
185
- quantified.
186
-
187
- * expressions: an array, holds the sub-expressions for the expression if it
188
- is a group or alternation expression. Empty if the expression doesn't have
189
- sub-expressions.
190
-
191
- * options: a hash, holds the keys :i, :m, and :x with a boolean value that
192
- indicates if the expression has a given option.
193
-
194
- Expressions also contain the following "lower level" members
195
- (from the scanner/lexer)
196
-
197
- * type: a symbol, denoting the expression type, such as :group, :quantifier
198
- * token: a symbol, for the object's token, or opening token (in the case of
199
- groups and sets)
200
- * text: a string, the text of the expression (same as token for nesting expressions)
201
-
202
- Every expressions also has the following methods:
203
-
204
- * to_s: returns the string representation of the expression.
205
- * <<: adds sub-expresions to the expression.
206
- * each: iterates over the expressions sub-expressions, if any.
207
- * []: access sub-expressions by index.
208
- * quantified?: return true if the expressions was followed by a quantifier.
209
- * quantity: returns an array of the expression's min and max repetitions.
210
- * greedy?: returns true if the expression's quantifier is greedy.
211
- * reluctant? or lazy?: returns true if the expression's quantifier is
212
- reluctant.
213
- * possessive?: returns true if the expression's quantifier is possessive.
214
- * multiline? or m?: returns true if the expression has the m option
215
- * case_insensitive? or ignore_case? or i?: returns true if the expression
216
- has the i option
217
- * free_spacing? or extended? or x?: returns true if the expression has the x
218
- option
219
-
220
- A special expression class Expression::Sequence is used to hold the array of
221
- possible alternatives within an Expression::Alternation expression.
222
-
223
-
224
- == Scanner Syntax
225
- The following syntax elements are supported by the scanner.
226
-
227
- - Alternation: a|b|c, etc.
228
- - Anchors: ^, $, \b, etc.
229
- - Character Classes (aka Sets): [abc], [^\]]
230
- - Character Types: \d, \H, \s, etc.
231
- - Escape Sequences: \t, \+, \?, etc.
232
- - Grouped Expressions
233
- - Assertions
234
- - Lookahead: (?=abc)
235
- - Negative Lookahead: (?!abc)
236
- - Lookabehind: (?<=abc)
237
- - Negative Lookbehind: (?<\!abc)
238
- - Atomic: (?>abc)
239
- - Back-references:
240
- - Named: \k<name>
241
- - Nest Level: \k<n-1>
242
- - Numbered: \k<1>
243
- - Relative: \k<-2>
244
- - Capturing: (abc)
245
- - Comment: (?# comment)
246
- - Named: (?<name>abc)
247
- - Options: (?mi-x:abc)
248
- - Passive: (?:abc)
249
- - Sub-expression Calls: \g<name>, \g<1>
250
- - Literals: abc, def?, etc.
251
- - POSIX classes: [:alpha:], [:print:], etc.
252
- - Quantifiers
253
- - Greedy: ?, *, +, {m,M}
254
- - Reluctant: ??, *?, +?, {m,M}?
255
- - Possessive: ?+, *+, ++, {m,M}+
256
- - String Escapes
257
- - Control: \C-C, \cD, etc.
258
- - Hex: \x20, \x{701230}, etc.
259
- - Meta: \M-c, \M-\C-C etc.
260
- - Octal: \0, \01, \012
261
- - Unicode: \uHHHH, \u{H+ H+}
262
- - Traditional Back-references: \1 thru \9
263
- - Unicode Properties:
264
- - Age: \p{Age=2.1}, \P{age=5.2}, etc.
265
- - Classes: \p{Alpha}, \P{Space}, etc.
266
- - Derived Properties: \p{Math}, \P{Lowercase}, etc.
267
- - General Categories: \p{Lu}, \P{Cs}, etc.
268
- - Scripts: \p{Arabic}, \P{Hiragana}, etc.
269
- - Simple Properties: \p{Dash}, \p{Extender}, etc.
270
-
271
- See something missing? Please submit an issue[https://github.com/ammar/regexp_parser/issues]
272
-
273
- == References
274
- Documentation and information being read while working on this project.
275
-
276
- ==== Ruby Flavors
277
- * Oniguruma Regular Expressions link[http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt]
278
- * Read Ruby > Regexps link[http://ruby.runpaint.org/regexps]
279
-
280
-
281
- ==== General
282
- * Enumerating the strings of regular languages link[http://www.cs.dartmouth.edu/~doug/nfa.ps.gz]
283
- * Mastering Regular Expressions, By Jeffrey E.F. Friedl (2nd Edition) book[http://oreilly.com/catalog/9781565922570/]
284
- * Regular Expression Flavor Comparison link[http://www.regular-expressions.info/refflavors.html]
285
-
286
-
287
- ==== Unicode
288
- * Unicode Derived Properties link[http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt]
289
- * Unicode Explained, By Jukka K. Korpela. book[http://oreilly.com/catalog/9780596101213]
290
- * Unicode Property Aliases link[http://www.unicode.org/Public/UNIDATA/PropertyAliases.txt]
291
- * Unicode Regular Expressions link[http://www.unicode.org/reports/tr18/]
292
- * Unicode Standard Annex #44 link[http://www.unicode.org/reports/tr44/]
293
-
294
- == Thanks
295
- This work is based on and inspired by the hard work and ideas of many people,
296
- directly or indirectly. The following are only a few of those that should be
297
- thanked.
298
-
299
- * Adrian Thurston, for developing ragel[http://www.complang.org/ragel/].
300
- * Caleb Clausen, for feedback, which inspired this, valuable insights on structuring the parser,
301
- and lots of cool code[http://github.com/coatl].
302
- * Jan Goyvaerts, for his excellent resource[http://www.regular-expressions.info] on regular expressions. I owe him a "steak dinner", at least.
303
- * Run Paint Run Run, for his work on Read[http://ruby.runpaint.org/] Ruby
304
- * Yukihiro Matsumoto, of course! For "The Ruby", of course!
305
-
306
- == Copyright
307
- Copyright (c) 2010 Ammar Ali. See LICENSE file for details.
@@ -1,31 +0,0 @@
1
- require File.expand_path("../../helpers", __FILE__)
2
-
3
- class ScannerConditionals < Test::Unit::TestCase
4
-
5
- tests = {
6
- /(?(1)Y|N)/ => [0, :conditional, :open, '(?(', 0, 3],
7
- /(?(2)Y|N)/ => [1, :conditional, :condition, '2', 3, 4],
8
- /(?(3)Y|N)/ => [2, :conditional, :yes, 'Y', 5, 6],
9
-
10
- #"(?(<name>)Y|N)" => [0, :conditional, :condition, '(?(<name>)Y|N)', 0, 14],
11
- #"(?('name')Y|N)" => [0, :conditional, :yes, "(?('name')Y|N)", 0, 14],
12
- }
13
-
14
- count = 0
15
- tests.each do |pattern, test|
16
- define_method "test_scan_#{test[1]}_#{test[2]}_#{count+=1}" do
17
-
18
- tokens = RS.scan(pattern)
19
- token = tokens[test[0]]
20
- assert_equal( test[1,5], token )
21
-
22
- end
23
- end
24
-
25
- #def test_scanner_quote
26
- # tokens = RS.scan('a\QX\Eb\QX\Ec')
27
- # puts tokens.inspect
28
- # #assert_equal( tokens[0] )
29
- #end
30
-
31
- end