regexp-examples 0.5.4 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 749811525558a012bc616a34cc04f017270d04e8
4
- data.tar.gz: 31de4f7cbd8f7aed68d2c4efd370cfdc85f9bd0f
3
+ metadata.gz: 062a1310c8b7c861a7724fd75745c1e9bff9257f
4
+ data.tar.gz: b05ce36dbb3c0afee079091d5c1016a429f1d099
5
5
  SHA512:
6
- metadata.gz: fbc3bf4111f29daec1c550dbdf03fc97fcb3fde5069a7a0259547d5df0e47972e3a112473f2a1a9c95788ec8a39c885b71a19d1b22717d15e674b71864d55020
7
- data.tar.gz: 6082e8ed18458b1eca2c47b18cc3e3484a8cb93c1111215f3565578dada06fc2deb388fb3813cae24343c6e2d1c41cdc377359d87876bc14e8dc53ffa30b700d
6
+ metadata.gz: 5519ec6e710a257c165b35f6b4138bc03d37973c31fcb4a7c6091713ab347438dd8e8bbde722c5ab21f63d6ede0565be2b77de0747d42f9f1e83413312b71d14
7
+ data.tar.gz: 58d67c5e25de2dbd238cb53a192c784047b8ab7e8a836d87c47c9cf5316133d3383a8fdec0a7c9fe07eec0518266b3c66a66c8953e830277b41e1bf9ca53e525
data/README.md CHANGED
@@ -43,6 +43,7 @@ For more detail on this, see [configuration options](#configuration-options).
43
43
  * Escape sequences, e.g. `/\x42/`, `/\x5word/`, `/#{"\x80".force_encoding("ASCII-8BIT")}/`
44
44
  * Unicode characters, e.g. `/\u0123/`, `/\uabcd/`, `/\u{789}/`
45
45
  * Octal characters, e.g. `/\10/`, `/\177/`
46
+ * POSIX bracket expressions (including negation), e.g. `/[[:alnum:]]/`, `/[[:^space:]]/`
46
47
  * **Arbitrarily complex combinations of all the above!**
47
48
 
48
49
  * Regexp options can also be used:
@@ -54,14 +55,13 @@ For more detail on this, see [configuration options](#configuration-options).
54
55
  ## Bugs and Not-Yet-Supported syntax
55
56
 
56
57
  * Nested character classes, and the use of set intersection ([See here](http://www.ruby-doc.org/core-2.2.0/Regexp.html#class-Regexp-label-Character+Classes) for the official documentation on this.) For example:
57
- * `/[[abc]]/.examples` (which _should_ return `["a", "b", "c"]`)
58
+ * `/[[abc]de]/.examples` (which _should_ return `["a", "b", "c", "d", "e"]`)
58
59
  * `/[[a-d]&&[c-f]]/.examples` (which _should_ return: `["c", "d"]`)
59
60
 
60
61
  * Conditional capture groups, such as `/(group1) (?(1)yes|no)`
61
62
 
62
63
  Using any of the following will raise a RegexpExamples::UnsupportedSyntax exception (until such time as they are implemented!):
63
64
 
64
- * POSIX bracket expressions, e.g. `/[[:alnum:]]/`, `/[[:space:]]/`
65
65
  * Named properties, e.g. `/\p{L}/` ("Letter"), `/\p{Arabic}/` ("Arabic character"), `/\p{^Ll}/` ("Not a lowercase letter")
66
66
  * Subexpression calls, e.g. `/(?<name> ... \g<name>* )/` (Note: These could get _really_ ugly to implement, and may even be impossible, so I highly doubt it's worth the effort!)
67
67
 
@@ -0,0 +1,70 @@
1
+ module RegexpExamples
2
+ # Given an array of chars from inside a character set,
3
+ # Interprets all backslashes, ranges and negations
4
+ # TODO: This needs a bit of a rewrite because:
5
+ # A) It's ugly
6
+ # B) It doesn't take into account nested character groups, or set intersection
7
+ # To achieve this, the algorithm needs to be recursive, like the main Parser.
8
+ class ChargroupParser
9
+ def initialize(chars)
10
+ @chars = chars
11
+ if @chars[0] == "^"
12
+ @negative = true
13
+ @chars = @chars[1..-1]
14
+ else
15
+ @negative = false
16
+ end
17
+
18
+ init_backslash_chars
19
+ init_ranges
20
+ end
21
+
22
+ def result
23
+ @negative ? (CharSets::Any - @chars) : @chars
24
+ end
25
+
26
+ private
27
+ def init_backslash_chars
28
+ @chars.each_with_index do |char, i|
29
+ if char == "\\"
30
+ if BackslashCharMap.keys.include?(@chars[i+1])
31
+ @chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
32
+ elsif @chars[i+1] == 'b'
33
+ @chars[i..i+1] = "\b"
34
+ elsif @chars[i+1] == "\\"
35
+ @chars.delete_at(i+1)
36
+ else
37
+ @chars.delete_at(i)
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ def init_ranges
44
+ # remove hyphen ("-") from front/back, if present
45
+ hyphen = nil
46
+ hyphen = @chars.shift if @chars.first == "-"
47
+ hyphen ||= @chars.pop if @chars.last == "-"
48
+ # Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
49
+ while i = @chars.index("-")
50
+ # Prevent infinite loops from expanding [",", "-", "."] to itself
51
+ # (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
52
+ if (@chars[i-1] == ',' && @chars[i+1] == '.')
53
+ hyphen = @chars.delete_at(i)
54
+ else
55
+ @chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
56
+ end
57
+ end
58
+ # restore hyphen, if stripped out earlier
59
+ @chars.unshift(hyphen) if hyphen
60
+ end
61
+
62
+ def move_backslash_to_front(chars)
63
+ if index = chars.index { |char| char == '\\' }
64
+ chars.unshift chars.delete_at(index)
65
+ end
66
+ chars
67
+ end
68
+ end
69
+ end
70
+
@@ -32,21 +32,25 @@ module RegexpExamples
32
32
  end
33
33
 
34
34
  module CharSets
35
- Lower = Array('a'..'z')
36
- Upper = Array('A'..'Z')
37
- Digit = Array('0'..'9')
38
- Punct = [33..47, 58..64, 91..96, 123..126].map { |r| r.map { |val| val.chr } }.flatten
39
- Hex = Array('a'..'f') | Array('A'..'F') | Digit
40
- Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"]
41
- Any = Lower | Upper | Digit | Punct
42
- end
35
+ Lower = Array('a'..'z')
36
+ Upper = Array('A'..'Z')
37
+ Digit = Array('0'..'9')
38
+ Punct = %w(! " # % & ' ( ) * , - . / : ; ? @ [ \\ ] _ { })
39
+ Hex = Array('a'..'f') | Array('A'..'F') | Digit
40
+ Word = Lower | Upper | Digit | ['_']
41
+ Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"]
42
+ Control = (0..31).map(&:chr) | ["\x7f"]
43
+ # Ensure that the "common" characters appear first in the array
44
+ Any = Lower | Upper | Digit | Punct | (0..127).map(&:chr)
45
+ AnyNoNewLine = Any - ["\n"]
46
+ end.freeze
43
47
 
44
48
  # Map of special regex characters, to their associated character sets
45
49
  BackslashCharMap = {
46
50
  'd' => CharSets::Digit,
47
- 'D' => CharSets::Lower | CharSets::Upper | CharSets::Punct,
48
- 'w' => CharSets::Lower | CharSets::Upper | CharSets::Digit | ['_'],
49
- 'W' => CharSets::Punct.reject { |val| val == '_' },
51
+ 'D' => CharSets::Any - CharSets::Digit,
52
+ 'w' => CharSets::Word,
53
+ 'W' => CharSets::Any - CharSets::Word,
50
54
  's' => CharSets::Whitespace,
51
55
  'S' => CharSets::Any - CharSets::Whitespace,
52
56
  'h' => CharSets::Hex,
@@ -59,6 +63,23 @@ module RegexpExamples
59
63
  'a' => ["\a"], # alarm
60
64
  'v' => ["\v"], # vertical tab
61
65
  'e' => ["\e"], # escape
62
- }
66
+ }.freeze
67
+
68
+ POSIXCharMap = {
69
+ 'alnum' => CharSets::Upper | CharSets::Lower | CharSets::Digit,
70
+ 'alpha' => CharSets::Upper | CharSets::Lower,
71
+ 'blank' => [" ", "\t"],
72
+ 'cntrl' => CharSets::Control,
73
+ 'digit' => CharSets::Digit,
74
+ 'graph' => (CharSets::Any - CharSets::Control) - [" "], # Visible chars
75
+ 'lower' => CharSets::Lower,
76
+ 'print' => CharSets::Any - CharSets::Control,
77
+ 'punct' => CharSets::Punct,
78
+ 'space' => CharSets::Whitespace,
79
+ 'upper' => CharSets::Upper,
80
+ 'xdigit' => CharSets::Hex,
81
+ 'word' => CharSets::Word,
82
+ 'ascii' => CharSets::Any
83
+ }.freeze
63
84
  end
64
85
 
@@ -63,69 +63,14 @@ module RegexpExamples
63
63
  def initialize(chars, ignorecase)
64
64
  @chars = chars
65
65
  @ignorecase = ignorecase
66
- if chars[0] == "^"
67
- @negative = true
68
- @chars = @chars[1..-1]
69
- else
70
- @negative = false
71
- end
72
-
73
- init_backslash_chars
74
- init_ranges
75
- end
76
-
77
- def init_ranges
78
- # save first and last "-" if present
79
-
80
- first = nil
81
- last = nil
82
- first = @chars.shift if @chars.first == "-"
83
- last = @chars.pop if @chars.last == "-"
84
- # Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
85
- while i = @chars.index("-")
86
- # Prevent infinite loops from expanding [",", "-", "."] to itself
87
- # (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
88
- if (@chars[i-1] == ',' && @chars[i+1] == '.')
89
- first = '-'
90
- @chars.delete_at(i)
91
- else
92
- @chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
93
- end
94
- end
95
- # restore them back
96
- @chars.unshift(first) if first
97
- @chars.push(last) if last
98
- end
99
-
100
- def init_backslash_chars
101
- @chars.each_with_index do |char, i|
102
- if char == "\\"
103
- if BackslashCharMap.keys.include?(@chars[i+1])
104
- @chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
105
- elsif @chars[i+1] == 'b'
106
- @chars[i..i+1] = "\b"
107
- elsif @chars[i+1] == "\\"
108
- @chars.delete_at(i+1)
109
- else
110
- @chars.delete_at(i)
111
- end
112
- end
113
- end
114
66
  end
115
67
 
116
68
  def result
117
- (@negative ? (CharSets::Any - @chars) : @chars).map do |result|
69
+ @chars.map do |result|
118
70
  GroupResult.new(result)
119
71
  end
120
72
  end
121
73
 
122
- private
123
- def move_backslash_to_front(chars)
124
- if index = chars.index { |char| char == '\\' }
125
- chars.unshift chars.delete_at(index)
126
- end
127
- chars
128
- end
129
74
  end
130
75
 
131
76
  class DotGroup
@@ -135,8 +80,7 @@ module RegexpExamples
135
80
  end
136
81
 
137
82
  def result
138
- chars = CharSets::Any
139
- chars = (["\n"] | chars) if multiline
83
+ chars = multiline ? CharSets::Any : CharSets::AnyNoNewLine
140
84
  chars.map do |result|
141
85
  GroupResult.new(result)
142
86
  end
@@ -218,8 +218,11 @@ module RegexpExamples
218
218
  end
219
219
 
220
220
  def parse_char_group
221
- if rest_of_string =~ /\A\[\[:[^:]+:\]\]/
222
- raise UnsupportedSyntaxError, "POSIX bracket expressions are not yet implemented"
221
+ # TODO: Extract all this logic into ChargroupParser
222
+ if rest_of_string =~ /\A\[\[:(\^?)([^:]+):\]\]/
223
+ @current_position += (6 + $1.length + $2.length)
224
+ chars = $1.empty? ? POSIXCharMap[$2] : CharSets::Any - POSIXCharMap[$2]
225
+ return CharGroup.new(chars, @ignorecase)
223
226
  end
224
227
  chars = []
225
228
  @current_position += 1
@@ -238,7 +241,8 @@ module RegexpExamples
238
241
  chars << next_char
239
242
  @current_position += 1
240
243
  end
241
- CharGroup.new(chars, @ignorecase)
244
+ parsed_chars = ChargroupParser.new(chars).result
245
+ CharGroup.new(parsed_chars, @ignorecase)
242
246
  end
243
247
 
244
248
  def parse_dot_group
@@ -1,3 +1,3 @@
1
1
  module RegexpExamples
2
- VERSION = '0.5.4'
2
+ VERSION = '0.6.0'
3
3
  end
@@ -2,10 +2,16 @@ RSpec.describe Regexp, "#examples" do
2
2
  def self.examples_exist_and_match(*regexps)
3
3
  regexps.each do |regexp|
4
4
  it do
5
- regexp_examples = regexp.examples
6
- expect(regexp_examples).not_to be_empty
5
+ begin
6
+ regexp_examples = regexp.examples(max_group_results: 999)
7
+ rescue
8
+ # TODO: Find a nicer way to display this?
9
+ puts "Error generating examples for /#{regexp.source}/"
10
+ raise $!
11
+ end
12
+ expect(regexp_examples).not_to be_empty, "No examples were generated for regexp: /#{regexp.source}/"
7
13
  regexp_examples.each { |example| expect(example).to match(/\A(?:#{regexp.source})\z/) }
8
- # Note: /\A...\z/ is used, to prevent misleading examples from passing the test.
14
+ # Note: /\A...\z/ is used to prevent misleading examples from passing the test.
9
15
  # For example, we don't want things like:
10
16
  # /a*/.examples to include "xyz"
11
17
  # /a|b/.examples to include "bad"
@@ -32,7 +38,7 @@ RSpec.describe Regexp, "#examples" do
32
38
  def self.examples_are_empty(*regexps)
33
39
  regexps.each do |regexp|
34
40
  it do
35
- expect(regexp.examples).to be_empty
41
+ expect(regexp.examples).to be_empty, "Unexpected examples for regexp: /#{regexp.source}/"
36
42
  end
37
43
  end
38
44
  end
@@ -181,8 +187,7 @@ RSpec.describe Regexp, "#examples" do
181
187
  /\p{L}/,
182
188
  /\p{Arabic}/,
183
189
  /\p{^Ll}/,
184
- /(?<name> ... \g<name>*)/,
185
- /[[:space:]]/
190
+ /(?<name> ... \g<name>*)/
186
191
  )
187
192
  end
188
193
 
@@ -230,13 +235,33 @@ RSpec.describe Regexp, "#examples" do
230
235
  )
231
236
  end
232
237
 
233
- context "comment group" do
238
+ context "for comment groups" do
234
239
  examples_exist_and_match(
235
240
  /a(?#comment)b/,
236
241
  /a(?#ugly backslashy\ comment\\\))b/
237
242
  )
238
243
  end
239
244
 
245
+ context "for POSIX groups" do
246
+ examples_exist_and_match(
247
+ /[[:alnum:]]/,
248
+ /[[:alpha:]]/,
249
+ /[[:blank:]]/,
250
+ /[[:cntrl:]]/,
251
+ /[[:digit:]]/,
252
+ /[[:graph:]]/,
253
+ /[[:lower:]]/,
254
+ /[[:print:]]/,
255
+ /[[:punct:]]/,
256
+ /[[:space:]]/,
257
+ /[[:upper:]]/,
258
+ /[[:xdigit:]]/,
259
+ /[[:word:]]/,
260
+ /[[:ascii:]]/,
261
+ /[[:^alnum:]]/ # Negated
262
+ )
263
+ end
264
+
240
265
  context "exact examples match" do
241
266
  # More rigorous tests to assert that ALL examples are being listed
242
267
  context "default config options" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp-examples
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Lord
@@ -55,6 +55,7 @@ files:
55
55
  - coverage/coverage-badge.png
56
56
  - lib/regexp-examples.rb
57
57
  - lib/regexp-examples/backreferences.rb
58
+ - lib/regexp-examples/chargroup_parser.rb
58
59
  - lib/regexp-examples/constants.rb
59
60
  - lib/regexp-examples/core_extensions/regexp/examples.rb
60
61
  - lib/regexp-examples/exceptions.rb