regexp-examples 1.1.0 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -9
- data/Rakefile +3 -3
- data/db/unicode_ranges_2.0.pstore +0 -0
- data/db/unicode_ranges_2.1.pstore +0 -0
- data/db/unicode_ranges_2.2.pstore +0 -0
- data/lib/{regexp-examples/core_extensions → core_extensions}/regexp/examples.rb +3 -3
- data/lib/regexp-examples.rb +11 -2
- data/lib/regexp-examples/backreferences.rb +3 -4
- data/lib/regexp-examples/chargroup_parser.rb +14 -14
- data/lib/regexp-examples/constants.rb +5 -156
- data/lib/regexp-examples/groups.rb +20 -12
- data/lib/regexp-examples/helpers.rb +5 -5
- data/lib/regexp-examples/parser.rb +52 -42
- data/lib/regexp-examples/repeaters.rb +5 -5
- data/lib/regexp-examples/unicode_char_ranges.rb +45 -0
- data/lib/regexp-examples/version.rb +1 -1
- data/regexp-examples.gemspec +4 -4
- data/scripts/unicode_lister.rb +34 -150
- data/spec/regexp-examples_spec.rb +81 -59
- data/spec/regexp-random_example_spec.rb +2 -2
- data/spec/spec_helper.rb +1 -1
- metadata +8 -4
@@ -8,7 +8,7 @@ module RegexpExamples
|
|
8
8
|
def self.permutations_of_strings(arrays_of_strings)
|
9
9
|
first = arrays_of_strings.shift
|
10
10
|
return first if arrays_of_strings.empty?
|
11
|
-
first.product(
|
11
|
+
first.product(permutations_of_strings(arrays_of_strings)).map do |result|
|
12
12
|
join_preserving_capture_groups(result)
|
13
13
|
end
|
14
14
|
end
|
@@ -16,8 +16,8 @@ module RegexpExamples
|
|
16
16
|
def self.join_preserving_capture_groups(result)
|
17
17
|
result.flatten!
|
18
18
|
subgroups = result
|
19
|
-
|
20
|
-
|
19
|
+
.map(&:all_subgroups)
|
20
|
+
.flatten
|
21
21
|
|
22
22
|
# Only save the LAST group from repeated capture groups, e.g. /([ab]){2}/
|
23
23
|
subgroups.delete_if do |subgroup|
|
@@ -35,12 +35,12 @@ module RegexpExamples
|
|
35
35
|
end
|
36
36
|
|
37
37
|
private
|
38
|
+
|
38
39
|
def self.generic_map_result(repeaters, method)
|
39
40
|
repeaters
|
40
|
-
.map {|repeater| repeater.public_send(method)}
|
41
|
+
.map { |repeater| repeater.public_send(method) }
|
41
42
|
.instance_eval do |partial_results|
|
42
43
|
RegexpExamples.permutations_of_strings(partial_results)
|
43
44
|
end
|
44
45
|
end
|
45
46
|
end
|
46
|
-
|
@@ -15,9 +15,7 @@ module RegexpExamples
|
|
15
15
|
repeaters = []
|
16
16
|
until end_of_regexp
|
17
17
|
group = parse_group(repeaters)
|
18
|
-
if group.is_a? OrGroup
|
19
|
-
return [OneTimeRepeater.new(group)]
|
20
|
-
end
|
18
|
+
return [OneTimeRepeater.new(group)] if group.is_a? OrGroup
|
21
19
|
@current_position += 1
|
22
20
|
repeaters << parse_repeater(group)
|
23
21
|
end
|
@@ -101,12 +99,16 @@ module RegexpExamples
|
|
101
99
|
@current_position += 1
|
102
100
|
case
|
103
101
|
when rest_of_string =~ /\A(\d{1,3})/
|
104
|
-
@current_position += (
|
105
|
-
group = parse_backreference_group(
|
102
|
+
@current_position += (Regexp.last_match(1).length - 1) # In case of 10+ backrefs!
|
103
|
+
group = parse_backreference_group(Regexp.last_match(1))
|
106
104
|
when rest_of_string =~ /\Ak['<]([\w-]+)['>]/ # Named capture group
|
107
|
-
@current_position += (
|
108
|
-
|
109
|
-
|
105
|
+
@current_position += (Regexp.last_match(1).length + 2)
|
106
|
+
group_id = if Regexp.last_match(1).to_i < 0
|
107
|
+
# RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
|
108
|
+
@num_groups + Regexp.last_match(1).to_i + 1
|
109
|
+
else
|
110
|
+
Regexp.last_match(1)
|
111
|
+
end
|
110
112
|
group = parse_backreference_group(group_id)
|
111
113
|
when BackslashCharMap.keys.include?(next_char)
|
112
114
|
group = CharGroup.new(
|
@@ -114,32 +116,39 @@ module RegexpExamples
|
|
114
116
|
@ignorecase
|
115
117
|
)
|
116
118
|
when rest_of_string =~ /\A(c|C-)(.)/ # Control character
|
117
|
-
@current_position +=
|
118
|
-
group = parse_single_char_group(
|
119
|
+
@current_position += Regexp.last_match(1).length
|
120
|
+
group = parse_single_char_group(parse_control_character(Regexp.last_match(2)))
|
119
121
|
when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence
|
120
|
-
@current_position +=
|
121
|
-
group = parse_single_char_group(
|
122
|
+
@current_position += Regexp.last_match(1).length
|
123
|
+
group = parse_single_char_group(parse_escape_sequence(Regexp.last_match(1)))
|
122
124
|
when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence
|
123
|
-
@current_position +=
|
124
|
-
sequence =
|
125
|
-
group = parse_single_char_group(
|
125
|
+
@current_position += Regexp.last_match(1).length
|
126
|
+
sequence = Regexp.last_match(1).match(/\h{1,4}/)[0] # Strip off "{" and "}"
|
127
|
+
group = parse_single_char_group(parse_unicode_sequence(sequence))
|
126
128
|
when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i # Named properties
|
127
|
-
@current_position += (
|
128
|
-
|
129
|
+
@current_position += (Regexp.last_match(2).length + # 0 or 1, of '^' is present
|
130
|
+
Regexp.last_match(3).length + # Length of the property name
|
131
|
+
2) # Length of opening and closing brackets (always 2)
|
132
|
+
# Beware of double negatives! E.g. /\P{^Space}/
|
133
|
+
is_negative = (Regexp.last_match(1) == 'P') ^ (Regexp.last_match(2) == '^')
|
129
134
|
group = CharGroup.new(
|
130
135
|
if is_negative
|
131
|
-
CharSets::Any.dup - NamedPropertyCharMap[
|
136
|
+
CharSets::Any.dup - NamedPropertyCharMap[Regexp.last_match(3).downcase]
|
132
137
|
else
|
133
|
-
NamedPropertyCharMap[
|
138
|
+
NamedPropertyCharMap[Regexp.last_match(3).downcase]
|
134
139
|
end,
|
135
140
|
@ignorecase
|
136
141
|
)
|
137
142
|
when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
|
138
143
|
group = PlaceHolderGroup.new
|
139
144
|
when next_char == 'R' # Linebreak
|
140
|
-
group = CharGroup.new(
|
145
|
+
group = CharGroup.new(
|
146
|
+
["\r\n", "\n", "\v", "\f", "\r"],
|
147
|
+
@ignorecase
|
148
|
+
) # Using "\r\n" as one character is little bit hacky...
|
141
149
|
when next_char == 'g' # Subexpression call
|
142
|
-
|
150
|
+
fail IllegalSyntaxError,
|
151
|
+
'Subexpression calls (\\g) cannot be supported, as they are not regular'
|
143
152
|
when next_char =~ /[bB]/ # Anchors
|
144
153
|
raise_anchors_exception!
|
145
154
|
when next_char =~ /[AG]/ # Start of string
|
@@ -155,7 +164,7 @@ module RegexpExamples
|
|
155
164
|
raise_anchors_exception!
|
156
165
|
end
|
157
166
|
else
|
158
|
-
group = parse_single_char_group(
|
167
|
+
group = parse_single_char_group(next_char)
|
159
168
|
end
|
160
169
|
group
|
161
170
|
end
|
@@ -193,7 +202,7 @@ module RegexpExamples
|
|
193
202
|
comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
|
194
203
|
@current_position += comment_group.length
|
195
204
|
when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
|
196
|
-
regexp_options_toggle(
|
205
|
+
regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
|
197
206
|
@num_groups -= 1 # Toggle "groups" should not increase backref group count
|
198
207
|
@current_position += $&.length + 1
|
199
208
|
if next_char == ':' # e.g. /(?i:subexpr)/
|
@@ -202,9 +211,11 @@ module RegexpExamples
|
|
202
211
|
return PlaceHolderGroup.new
|
203
212
|
end
|
204
213
|
when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
|
205
|
-
|
214
|
+
fail IllegalSyntaxError,
|
215
|
+
'Lookaheads are not regular; cannot generate examples'
|
206
216
|
when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
|
207
|
-
|
217
|
+
fail IllegalSyntaxError,
|
218
|
+
'Lookbehinds are not regular; cannot generate examples'
|
208
219
|
else # e.g. /(?<name>namedgroup)/
|
209
220
|
@current_position += (match[3].length + 3)
|
210
221
|
group_id = match[3]
|
@@ -226,12 +237,12 @@ module RegexpExamples
|
|
226
237
|
end
|
227
238
|
|
228
239
|
def regexp_options_toggle(on, off)
|
229
|
-
@ignorecase = true if
|
230
|
-
@ignorecase = false if
|
231
|
-
@multiline = true if
|
232
|
-
@multiline = false if
|
233
|
-
@extended = true if
|
234
|
-
@extended = false if
|
240
|
+
@ignorecase = true if on.include? 'i'
|
241
|
+
@ignorecase = false if off.include? 'i'
|
242
|
+
@multiline = true if on.include? 'm'
|
243
|
+
@multiline = false if off.include? 'm'
|
244
|
+
@extended = true if on.include? 'x'
|
245
|
+
@extended = false if off.include? 'x'
|
235
246
|
end
|
236
247
|
|
237
248
|
def parse_char_group
|
@@ -252,7 +263,6 @@ module RegexpExamples
|
|
252
263
|
OrGroup.new(left_repeaters, right_repeaters)
|
253
264
|
end
|
254
265
|
|
255
|
-
|
256
266
|
def parse_single_char_group(char)
|
257
267
|
SingleCharGroup.new(char, @ignorecase)
|
258
268
|
end
|
@@ -310,17 +320,18 @@ module RegexpExamples
|
|
310
320
|
end
|
311
321
|
|
312
322
|
def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
323
|
+
# .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
|
324
|
+
if min && !has_comma && !max && next_char == '?'
|
325
|
+
repeater = parse_question_mark_repeater(repeater)
|
326
|
+
else
|
327
|
+
parse_reluctant_or_possessive_repeater
|
328
|
+
end
|
329
|
+
repeater
|
320
330
|
end
|
321
331
|
|
322
332
|
def raise_anchors_exception!
|
323
|
-
|
333
|
+
fail IllegalSyntaxError,
|
334
|
+
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
|
324
335
|
end
|
325
336
|
|
326
337
|
def parse_one_time_repeater(group)
|
@@ -336,8 +347,7 @@ module RegexpExamples
|
|
336
347
|
end
|
337
348
|
|
338
349
|
def end_of_regexp
|
339
|
-
next_char ==
|
350
|
+
next_char == ')' || @current_position >= regexp_string.length
|
340
351
|
end
|
341
352
|
end
|
342
353
|
end
|
343
|
-
|
@@ -6,11 +6,11 @@ module RegexpExamples
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def result
|
9
|
-
group_results = group.result
|
9
|
+
group_results = group.result.first(RegexpExamples.MaxGroupResults)
|
10
10
|
results = []
|
11
11
|
min_repeats.upto(max_repeats) do |repeats|
|
12
12
|
if repeats.zero?
|
13
|
-
results << [
|
13
|
+
results << [GroupResult.new('')]
|
14
14
|
else
|
15
15
|
results << RegexpExamples.permutations_of_strings(
|
16
16
|
[group_results] * repeats
|
@@ -23,8 +23,8 @@ module RegexpExamples
|
|
23
23
|
def random_result
|
24
24
|
result = []
|
25
25
|
rand(min_repeats..max_repeats).times { result << group.random_result }
|
26
|
-
result << [
|
27
|
-
RegexpExamples
|
26
|
+
result << [GroupResult.new('')] if result.empty? # in case of 0.times
|
27
|
+
RegexpExamples.permutations_of_strings(result)
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -74,9 +74,9 @@ module RegexpExamples
|
|
74
74
|
end
|
75
75
|
|
76
76
|
private
|
77
|
+
|
77
78
|
def smallest(x, y)
|
78
79
|
(x < y) ? x : y
|
79
80
|
end
|
80
81
|
end
|
81
82
|
end
|
82
|
-
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'pstore'
|
2
|
+
|
3
|
+
module RegexpExamples
|
4
|
+
class UnicodeCharRanges
|
5
|
+
# These values were generated by: scripts/unicode_lister.rb
|
6
|
+
# Note: Only the first 128 results are listed, for performance.
|
7
|
+
# Also, some groups seem to have no matches (weird!)
|
8
|
+
# (Don't care about ruby micro version number)
|
9
|
+
STORE_FILENAME = "unicode_ranges_#{RUBY_VERSION[0..2]}.pstore"
|
10
|
+
|
11
|
+
attr_reader :range_store
|
12
|
+
|
13
|
+
def initialize(filename = STORE_FILENAME)
|
14
|
+
@range_store = PStore.new(File.expand_path("../../../db/#{filename}", __FILE__))
|
15
|
+
end
|
16
|
+
|
17
|
+
def get(key)
|
18
|
+
range_store.transaction(true) do
|
19
|
+
ranges_to_unicode(range_store[key])
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
alias_method :[], :get
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# TODO: Document example input/output of this method
|
28
|
+
# It's pretty simple, but this code is a little confusing!!
|
29
|
+
def ranges_to_unicode(ranges)
|
30
|
+
result = []
|
31
|
+
ranges.each do |range|
|
32
|
+
if range.is_a? Fixnum # Small hack to increase data compression
|
33
|
+
result << hex_to_unicode(range.to_s(16))
|
34
|
+
else
|
35
|
+
range.each { |num| result << hex_to_unicode(num.to_s(16)) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
def hex_to_unicode(hex)
|
42
|
+
eval("?\\u{#{hex}}")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/regexp-examples.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require File.expand_path(
|
1
|
+
require File.expand_path('../lib/regexp-examples/version', __FILE__)
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'regexp-examples'
|
@@ -11,11 +11,11 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.files = `git ls-files -z`.split("\x0")
|
12
12
|
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
13
13
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
14
|
-
s.require_paths = [
|
14
|
+
s.require_paths = ['lib']
|
15
15
|
s.homepage =
|
16
16
|
'http://rubygems.org/gems/regexp-examples'
|
17
|
-
s.add_development_dependency
|
18
|
-
s.add_development_dependency
|
17
|
+
s.add_development_dependency 'bundler', '~> 1.7'
|
18
|
+
s.add_development_dependency 'rake', '~> 10.0'
|
19
19
|
s.license = 'MIT'
|
20
20
|
s.required_ruby_version = '>= 2.0.0'
|
21
21
|
end
|
data/scripts/unicode_lister.rb
CHANGED
@@ -1,180 +1,64 @@
|
|
1
|
+
require 'pstore'
|
2
|
+
require_relative '../lib/regexp-examples/unicode_char_ranges'
|
1
3
|
# A script to generate lists of all unicode characters
|
2
4
|
# that match all named group/character properties regexps.
|
3
5
|
# For use in e.g. /\p{Arabic}/.examples
|
4
6
|
|
5
7
|
# To (re-)generate this list, simply run this file!
|
6
8
|
# > ruby scripts/unicode_lister.rb
|
7
|
-
OutputFilename = 'unicode_result'
|
8
9
|
|
9
10
|
# Taken from ruby documentation:
|
10
11
|
# http://ruby-doc.org//core-2.2.0/Regexp.html#class-Regexp-label-Character+Properties
|
11
12
|
NamedGroups = %w(
|
12
|
-
Alnum
|
13
|
-
Alpha
|
14
|
-
Blank
|
15
|
-
Cntrl
|
16
|
-
Digit
|
17
|
-
Graph
|
18
|
-
Lower
|
19
|
-
Print
|
20
|
-
Punct
|
21
|
-
Space
|
22
|
-
Upper
|
23
|
-
XDigit
|
24
|
-
Word
|
25
|
-
ASCII
|
26
|
-
Any
|
27
|
-
Assigned
|
13
|
+
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word ASCII Any Assigned
|
28
14
|
|
29
|
-
L
|
30
|
-
Ll
|
31
|
-
Lm
|
32
|
-
Lo
|
33
|
-
Lt
|
34
|
-
Lu
|
35
|
-
M
|
36
|
-
Mn
|
37
|
-
Mc
|
38
|
-
Me
|
39
|
-
N
|
40
|
-
Nd
|
41
|
-
Nl
|
42
|
-
No
|
43
|
-
P
|
44
|
-
Pc
|
45
|
-
Pd
|
46
|
-
Ps
|
47
|
-
Pe
|
48
|
-
Pi
|
49
|
-
Pf
|
50
|
-
Po
|
51
|
-
S
|
52
|
-
Sm
|
53
|
-
Sc
|
54
|
-
Sk
|
55
|
-
So
|
56
|
-
Z
|
57
|
-
Zs
|
58
|
-
Zl
|
59
|
-
Zp
|
60
|
-
C
|
61
|
-
Cc
|
62
|
-
Cf
|
63
|
-
Cn
|
64
|
-
Co
|
65
|
-
Cs
|
15
|
+
L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl Zp C Cc Cf Cn Co Cs
|
66
16
|
|
67
|
-
Arabic
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
Buginese
|
74
|
-
Buhid
|
75
|
-
Canadian_Aboriginal
|
76
|
-
Carian
|
77
|
-
Cham
|
78
|
-
Cherokee
|
79
|
-
Common
|
80
|
-
Coptic
|
81
|
-
Cuneiform
|
82
|
-
Cypriot
|
83
|
-
Cyrillic
|
84
|
-
Deseret
|
85
|
-
Devanagari
|
86
|
-
Ethiopic
|
87
|
-
Georgian
|
88
|
-
Glagolitic
|
89
|
-
Gothic
|
90
|
-
Greek
|
91
|
-
Gujarati
|
92
|
-
Gurmukhi
|
93
|
-
Han
|
94
|
-
Hangul
|
95
|
-
Hanunoo
|
96
|
-
Hebrew
|
97
|
-
Hiragana
|
98
|
-
Inherited
|
99
|
-
Kannada
|
100
|
-
Katakana
|
101
|
-
Kayah_Li
|
102
|
-
Kharoshthi
|
103
|
-
Khmer
|
104
|
-
Lao
|
105
|
-
Latin
|
106
|
-
Lepcha
|
107
|
-
Limbu
|
108
|
-
Linear_B
|
109
|
-
Lycian
|
110
|
-
Lydian
|
111
|
-
Malayalam
|
112
|
-
Mongolian
|
113
|
-
Myanmar
|
114
|
-
New_Tai_Lue
|
115
|
-
Nko
|
116
|
-
Ogham
|
117
|
-
Ol_Chiki
|
118
|
-
Old_Italic
|
119
|
-
Old_Persian
|
120
|
-
Oriya
|
121
|
-
Osmanya
|
122
|
-
Phags_Pa
|
123
|
-
Phoenician
|
124
|
-
Rejang
|
125
|
-
Runic
|
126
|
-
Saurashtra
|
127
|
-
Shavian
|
128
|
-
Sinhala
|
129
|
-
Sundanese
|
130
|
-
Syloti_Nagri
|
131
|
-
Syriac
|
132
|
-
Tagalog
|
133
|
-
Tagbanwa
|
134
|
-
Tai_Le
|
135
|
-
Tamil
|
136
|
-
Telugu
|
137
|
-
Thaana
|
138
|
-
Thai
|
139
|
-
Tibetan
|
140
|
-
Tifinagh
|
141
|
-
Ugaritic
|
142
|
-
Vai
|
143
|
-
Yi
|
17
|
+
Arabic Armenian Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal Carian Cham Cherokee
|
18
|
+
Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari Ethiopic Georgian Glagolitic Gothic Greek
|
19
|
+
Gujarati Gurmukhi Han Hangul Hanunoo Hebrew Hiragana Inherited Kannada Katakana Kayah_Li Kharoshthi Khmer
|
20
|
+
Lao Latin Lepcha Limbu Linear_B Lycian Lydian Malayalam Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki
|
21
|
+
Old_Italic Old_Persian Oriya Osmanya Phags_Pa Phoenician Rejang Runic Saurashtra Shavian Sinhala Sundanese
|
22
|
+
Syloti_Nagri Syriac Tagalog Tagbanwa Tai_Le Tamil Telugu Thaana Thai Tibetan Tifinagh Ugaritic Vai Yi
|
144
23
|
)
|
145
24
|
|
146
|
-
# Note: For
|
147
|
-
#
|
148
|
-
#
|
149
|
-
#
|
150
|
-
#
|
151
|
-
# If anyone ever cares about this (I doubt it), I'll look into fixing/improving it.
|
25
|
+
# Note: For the range 55296..57343, these are reserved values that are not legal
|
26
|
+
# unicode characters.
|
27
|
+
# I.e. a character encoding-related exception gets raised when you do:
|
28
|
+
# `/regex/ =~ eval("?\\u{#{x.to_s(16)}}")`
|
29
|
+
# TODO: Add a link to somewhere that explains this better.
|
152
30
|
|
153
|
-
#
|
154
|
-
# Example
|
31
|
+
# "Compresses" the values in an array by using ranges.
|
32
|
+
# Example input: [1, 2, 3, 4, 6, 7, 12, 14]
|
33
|
+
# Example output: [1..4, 6..7, 12, 14]
|
155
34
|
def calculate_ranges(matching_codes)
|
156
|
-
return
|
35
|
+
return [] if matching_codes.empty?
|
157
36
|
first = matching_codes.shift
|
158
|
-
matching_codes.inject([first..first]) do |r,x|
|
37
|
+
matching_codes.inject([first..first]) do |r, x|
|
159
38
|
if r.last.last.succ != x
|
160
39
|
r << (x..x) # Start new range
|
161
40
|
else
|
162
41
|
r[0..-2] << (r.last.first..x) # Update last range
|
163
42
|
end
|
164
43
|
end
|
165
|
-
.map { |range| range.size == 1 ? range.first : range}
|
166
|
-
.join(", ")
|
44
|
+
.map { |range| range.size == 1 ? range.first : range } # Replace `int..int` with `int`
|
167
45
|
end
|
168
46
|
|
169
47
|
count = 0
|
170
|
-
|
48
|
+
filename = RegexpExamples::UnicodeCharRanges::STORE_FILENAME
|
49
|
+
store = PStore.new(filename)
|
50
|
+
store.transaction do
|
171
51
|
NamedGroups.each do |name|
|
172
|
-
|
173
|
-
|
174
|
-
|
52
|
+
count += 1
|
53
|
+
# Only generating first 128 matches, for performance...
|
54
|
+
# (I have tried this with generating ALL examples, and it makes the ruby gem
|
55
|
+
# painfully slow and bloated... Especially the test suite.)
|
56
|
+
matching_codes = [(0..55_295), (57_344..65_535)].map(&:to_a).flatten.lazy
|
57
|
+
.find { |x| /\p{#{name}}/ =~ eval("?\\u{#{x.to_s(16)}}") }
|
58
|
+
(128)
|
59
|
+
store[name.downcase] = calculate_ranges(matching_codes)
|
175
60
|
puts "(#{count}/#{NamedGroups.length}) Finished property: #{name}"
|
176
61
|
end
|
177
|
-
puts
|
178
|
-
puts "Finished! Result stored in:
|
62
|
+
puts '*' * 50
|
63
|
+
puts "Finished! Result stored in: ./db/#{filename}"
|
179
64
|
end
|
180
|
-
|