regexp-examples 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 749811525558a012bc616a34cc04f017270d04e8
4
- data.tar.gz: 31de4f7cbd8f7aed68d2c4efd370cfdc85f9bd0f
3
+ metadata.gz: 062a1310c8b7c861a7724fd75745c1e9bff9257f
4
+ data.tar.gz: b05ce36dbb3c0afee079091d5c1016a429f1d099
5
5
  SHA512:
6
- metadata.gz: fbc3bf4111f29daec1c550dbdf03fc97fcb3fde5069a7a0259547d5df0e47972e3a112473f2a1a9c95788ec8a39c885b71a19d1b22717d15e674b71864d55020
7
- data.tar.gz: 6082e8ed18458b1eca2c47b18cc3e3484a8cb93c1111215f3565578dada06fc2deb388fb3813cae24343c6e2d1c41cdc377359d87876bc14e8dc53ffa30b700d
6
+ metadata.gz: 5519ec6e710a257c165b35f6b4138bc03d37973c31fcb4a7c6091713ab347438dd8e8bbde722c5ab21f63d6ede0565be2b77de0747d42f9f1e83413312b71d14
7
+ data.tar.gz: 58d67c5e25de2dbd238cb53a192c784047b8ab7e8a836d87c47c9cf5316133d3383a8fdec0a7c9fe07eec0518266b3c66a66c8953e830277b41e1bf9ca53e525
data/README.md CHANGED
@@ -43,6 +43,7 @@ For more detail on this, see [configuration options](#configuration-options).
43
43
  * Escape sequences, e.g. `/\x42/`, `/\x5word/`, `/#{"\x80".force_encoding("ASCII-8BIT")}/`
44
44
  * Unicode characters, e.g. `/\u0123/`, `/\uabcd/`, `/\u{789}/`
45
45
  * Octal characters, e.g. `/\10/`, `/\177/`
46
+ * POSIX bracket expressions (including negation), e.g. `/[[:alnum:]]/`, `/[[:^space:]]/`
46
47
  * **Arbitrarily complex combinations of all the above!**
47
48
 
48
49
  * Regexp options can also be used:
@@ -54,14 +55,13 @@ For more detail on this, see [configuration options](#configuration-options).
54
55
  ## Bugs and Not-Yet-Supported syntax
55
56
 
56
57
  * Nested character classes, and the use of set intersection ([See here](http://www.ruby-doc.org/core-2.2.0/Regexp.html#class-Regexp-label-Character+Classes) for the official documentation on this.) For example:
57
- * `/[[abc]]/.examples` (which _should_ return `["a", "b", "c"]`)
58
+ * `/[[abc]de]/.examples` (which _should_ return `["a", "b", "c", "d", "e"]`)
58
59
  * `/[[a-d]&&[c-f]]/.examples` (which _should_ return: `["c", "d"]`)
59
60
 
60
61
  * Conditional capture groups, such as `/(group1) (?(1)yes|no)`
61
62
 
62
63
  Using any of the following will raise a RegexpExamples::UnsupportedSyntax exception (until such time as they are implemented!):
63
64
 
64
- * POSIX bracket expressions, e.g. `/[[:alnum:]]/`, `/[[:space:]]/`
65
65
  * Named properties, e.g. `/\p{L}/` ("Letter"), `/\p{Arabic}/` ("Arabic character"), `/\p{^Ll}/` ("Not a lowercase letter")
66
66
  * Subexpression calls, e.g. `/(?<name> ... \g<name>* )/` (Note: These could get _really_ ugly to implement, and may even be impossible, so I highly doubt it's worth the effort!)
67
67
 
@@ -0,0 +1,70 @@
1
+ module RegexpExamples
2
+ # Given an array of chars from inside a character set,
3
+ # Interprets all backslashes, ranges and negations
4
+ # TODO: This needs a bit of a rewrite because:
5
+ # A) It's ugly
6
+ # B) It doesn't take into account nested character groups, or set intersection
7
+ # To achieve this, the algorithm needs to be recursive, like the main Parser.
8
+ class ChargroupParser
9
+ def initialize(chars)
10
+ @chars = chars
11
+ if @chars[0] == "^"
12
+ @negative = true
13
+ @chars = @chars[1..-1]
14
+ else
15
+ @negative = false
16
+ end
17
+
18
+ init_backslash_chars
19
+ init_ranges
20
+ end
21
+
22
+ def result
23
+ @negative ? (CharSets::Any - @chars) : @chars
24
+ end
25
+
26
+ private
27
+ def init_backslash_chars
28
+ @chars.each_with_index do |char, i|
29
+ if char == "\\"
30
+ if BackslashCharMap.keys.include?(@chars[i+1])
31
+ @chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
32
+ elsif @chars[i+1] == 'b'
33
+ @chars[i..i+1] = "\b"
34
+ elsif @chars[i+1] == "\\"
35
+ @chars.delete_at(i+1)
36
+ else
37
+ @chars.delete_at(i)
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ def init_ranges
44
+ # remove hyphen ("-") from front/back, if present
45
+ hyphen = nil
46
+ hyphen = @chars.shift if @chars.first == "-"
47
+ hyphen ||= @chars.pop if @chars.last == "-"
48
+ # Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
49
+ while i = @chars.index("-")
50
+ # Prevent infinite loops from expanding [",", "-", "."] to itself
51
+ # (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
52
+ if (@chars[i-1] == ',' && @chars[i+1] == '.')
53
+ hyphen = @chars.delete_at(i)
54
+ else
55
+ @chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
56
+ end
57
+ end
58
+ # restore hyphen, if stripped out earlier
59
+ @chars.unshift(hyphen) if hyphen
60
+ end
61
+
62
+ def move_backslash_to_front(chars)
63
+ if index = chars.index { |char| char == '\\' }
64
+ chars.unshift chars.delete_at(index)
65
+ end
66
+ chars
67
+ end
68
+ end
69
+ end
70
+
@@ -32,21 +32,25 @@ module RegexpExamples
32
32
  end
33
33
 
34
34
  module CharSets
35
- Lower = Array('a'..'z')
36
- Upper = Array('A'..'Z')
37
- Digit = Array('0'..'9')
38
- Punct = [33..47, 58..64, 91..96, 123..126].map { |r| r.map { |val| val.chr } }.flatten
39
- Hex = Array('a'..'f') | Array('A'..'F') | Digit
40
- Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"]
41
- Any = Lower | Upper | Digit | Punct
42
- end
35
+ Lower = Array('a'..'z')
36
+ Upper = Array('A'..'Z')
37
+ Digit = Array('0'..'9')
38
+ Punct = %w(! " # % & ' ( ) * , - . / : ; ? @ [ \\ ] _ { })
39
+ Hex = Array('a'..'f') | Array('A'..'F') | Digit
40
+ Word = Lower | Upper | Digit | ['_']
41
+ Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"]
42
+ Control = (0..31).map(&:chr) | ["\x7f"]
43
+ # Ensure that the "common" characters appear first in the array
44
+ Any = Lower | Upper | Digit | Punct | (0..127).map(&:chr)
45
+ AnyNoNewLine = Any - ["\n"]
46
+ end.freeze
43
47
 
44
48
  # Map of special regex characters, to their associated character sets
45
49
  BackslashCharMap = {
46
50
  'd' => CharSets::Digit,
47
- 'D' => CharSets::Lower | CharSets::Upper | CharSets::Punct,
48
- 'w' => CharSets::Lower | CharSets::Upper | CharSets::Digit | ['_'],
49
- 'W' => CharSets::Punct.reject { |val| val == '_' },
51
+ 'D' => CharSets::Any - CharSets::Digit,
52
+ 'w' => CharSets::Word,
53
+ 'W' => CharSets::Any - CharSets::Word,
50
54
  's' => CharSets::Whitespace,
51
55
  'S' => CharSets::Any - CharSets::Whitespace,
52
56
  'h' => CharSets::Hex,
@@ -59,6 +63,23 @@ module RegexpExamples
59
63
  'a' => ["\a"], # alarm
60
64
  'v' => ["\v"], # vertical tab
61
65
  'e' => ["\e"], # escape
62
- }
66
+ }.freeze
67
+
68
+ POSIXCharMap = {
69
+ 'alnum' => CharSets::Upper | CharSets::Lower | CharSets::Digit,
70
+ 'alpha' => CharSets::Upper | CharSets::Lower,
71
+ 'blank' => [" ", "\t"],
72
+ 'cntrl' => CharSets::Control,
73
+ 'digit' => CharSets::Digit,
74
+ 'graph' => (CharSets::Any - CharSets::Control) - [" "], # Visible chars
75
+ 'lower' => CharSets::Lower,
76
+ 'print' => CharSets::Any - CharSets::Control,
77
+ 'punct' => CharSets::Punct,
78
+ 'space' => CharSets::Whitespace,
79
+ 'upper' => CharSets::Upper,
80
+ 'xdigit' => CharSets::Hex,
81
+ 'word' => CharSets::Word,
82
+ 'ascii' => CharSets::Any
83
+ }.freeze
63
84
  end
64
85
 
@@ -63,69 +63,14 @@ module RegexpExamples
63
63
  def initialize(chars, ignorecase)
64
64
  @chars = chars
65
65
  @ignorecase = ignorecase
66
- if chars[0] == "^"
67
- @negative = true
68
- @chars = @chars[1..-1]
69
- else
70
- @negative = false
71
- end
72
-
73
- init_backslash_chars
74
- init_ranges
75
- end
76
-
77
- def init_ranges
78
- # save first and last "-" if present
79
-
80
- first = nil
81
- last = nil
82
- first = @chars.shift if @chars.first == "-"
83
- last = @chars.pop if @chars.last == "-"
84
- # Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
85
- while i = @chars.index("-")
86
- # Prevent infinite loops from expanding [",", "-", "."] to itself
87
- # (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
88
- if (@chars[i-1] == ',' && @chars[i+1] == '.')
89
- first = '-'
90
- @chars.delete_at(i)
91
- else
92
- @chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
93
- end
94
- end
95
- # restore them back
96
- @chars.unshift(first) if first
97
- @chars.push(last) if last
98
- end
99
-
100
- def init_backslash_chars
101
- @chars.each_with_index do |char, i|
102
- if char == "\\"
103
- if BackslashCharMap.keys.include?(@chars[i+1])
104
- @chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
105
- elsif @chars[i+1] == 'b'
106
- @chars[i..i+1] = "\b"
107
- elsif @chars[i+1] == "\\"
108
- @chars.delete_at(i+1)
109
- else
110
- @chars.delete_at(i)
111
- end
112
- end
113
- end
114
66
  end
115
67
 
116
68
  def result
117
- (@negative ? (CharSets::Any - @chars) : @chars).map do |result|
69
+ @chars.map do |result|
118
70
  GroupResult.new(result)
119
71
  end
120
72
  end
121
73
 
122
- private
123
- def move_backslash_to_front(chars)
124
- if index = chars.index { |char| char == '\\' }
125
- chars.unshift chars.delete_at(index)
126
- end
127
- chars
128
- end
129
74
  end
130
75
 
131
76
  class DotGroup
@@ -135,8 +80,7 @@ module RegexpExamples
135
80
  end
136
81
 
137
82
  def result
138
- chars = CharSets::Any
139
- chars = (["\n"] | chars) if multiline
83
+ chars = multiline ? CharSets::Any : CharSets::AnyNoNewLine
140
84
  chars.map do |result|
141
85
  GroupResult.new(result)
142
86
  end
@@ -218,8 +218,11 @@ module RegexpExamples
218
218
  end
219
219
 
220
220
  def parse_char_group
221
- if rest_of_string =~ /\A\[\[:[^:]+:\]\]/
222
- raise UnsupportedSyntaxError, "POSIX bracket expressions are not yet implemented"
221
+ # TODO: Extract all this logic into ChargroupParser
222
+ if rest_of_string =~ /\A\[\[:(\^?)([^:]+):\]\]/
223
+ @current_position += (6 + $1.length + $2.length)
224
+ chars = $1.empty? ? POSIXCharMap[$2] : CharSets::Any - POSIXCharMap[$2]
225
+ return CharGroup.new(chars, @ignorecase)
223
226
  end
224
227
  chars = []
225
228
  @current_position += 1
@@ -238,7 +241,8 @@ module RegexpExamples
238
241
  chars << next_char
239
242
  @current_position += 1
240
243
  end
241
- CharGroup.new(chars, @ignorecase)
244
+ parsed_chars = ChargroupParser.new(chars).result
245
+ CharGroup.new(parsed_chars, @ignorecase)
242
246
  end
243
247
 
244
248
  def parse_dot_group
@@ -1,3 +1,3 @@
1
1
  module RegexpExamples
2
- VERSION = '0.5.4'
2
+ VERSION = '0.6.0'
3
3
  end
@@ -2,10 +2,16 @@ RSpec.describe Regexp, "#examples" do
2
2
  def self.examples_exist_and_match(*regexps)
3
3
  regexps.each do |regexp|
4
4
  it do
5
- regexp_examples = regexp.examples
6
- expect(regexp_examples).not_to be_empty
5
+ begin
6
+ regexp_examples = regexp.examples(max_group_results: 999)
7
+ rescue
8
+ # TODO: Find a nicer way to display this?
9
+ puts "Error generating examples for /#{regexp.source}/"
10
+ raise $!
11
+ end
12
+ expect(regexp_examples).not_to be_empty, "No examples were generated for regexp: /#{regexp.source}/"
7
13
  regexp_examples.each { |example| expect(example).to match(/\A(?:#{regexp.source})\z/) }
8
- # Note: /\A...\z/ is used, to prevent misleading examples from passing the test.
14
+ # Note: /\A...\z/ is used to prevent misleading examples from passing the test.
9
15
  # For example, we don't want things like:
10
16
  # /a*/.examples to include "xyz"
11
17
  # /a|b/.examples to include "bad"
@@ -32,7 +38,7 @@ RSpec.describe Regexp, "#examples" do
32
38
  def self.examples_are_empty(*regexps)
33
39
  regexps.each do |regexp|
34
40
  it do
35
- expect(regexp.examples).to be_empty
41
+ expect(regexp.examples).to be_empty, "Unexpected examples for regexp: /#{regexp.source}/"
36
42
  end
37
43
  end
38
44
  end
@@ -181,8 +187,7 @@ RSpec.describe Regexp, "#examples" do
181
187
  /\p{L}/,
182
188
  /\p{Arabic}/,
183
189
  /\p{^Ll}/,
184
- /(?<name> ... \g<name>*)/,
185
- /[[:space:]]/
190
+ /(?<name> ... \g<name>*)/
186
191
  )
187
192
  end
188
193
 
@@ -230,13 +235,33 @@ RSpec.describe Regexp, "#examples" do
230
235
  )
231
236
  end
232
237
 
233
- context "comment group" do
238
+ context "for comment groups" do
234
239
  examples_exist_and_match(
235
240
  /a(?#comment)b/,
236
241
  /a(?#ugly backslashy\ comment\\\))b/
237
242
  )
238
243
  end
239
244
 
245
+ context "for POSIX groups" do
246
+ examples_exist_and_match(
247
+ /[[:alnum:]]/,
248
+ /[[:alpha:]]/,
249
+ /[[:blank:]]/,
250
+ /[[:cntrl:]]/,
251
+ /[[:digit:]]/,
252
+ /[[:graph:]]/,
253
+ /[[:lower:]]/,
254
+ /[[:print:]]/,
255
+ /[[:punct:]]/,
256
+ /[[:space:]]/,
257
+ /[[:upper:]]/,
258
+ /[[:xdigit:]]/,
259
+ /[[:word:]]/,
260
+ /[[:ascii:]]/,
261
+ /[[:^alnum:]]/ # Negated
262
+ )
263
+ end
264
+
240
265
  context "exact examples match" do
241
266
  # More rigorous tests to assert that ALL examples are being listed
242
267
  context "default config options" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp-examples
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Lord
@@ -55,6 +55,7 @@ files:
55
55
  - coverage/coverage-badge.png
56
56
  - lib/regexp-examples.rb
57
57
  - lib/regexp-examples/backreferences.rb
58
+ - lib/regexp-examples/chargroup_parser.rb
58
59
  - lib/regexp-examples/constants.rb
59
60
  - lib/regexp-examples/core_extensions/regexp/examples.rb
60
61
  - lib/regexp-examples/exceptions.rb