regexp-examples 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -9
- data/Rakefile +3 -3
- data/db/unicode_ranges_2.0.pstore +0 -0
- data/db/unicode_ranges_2.1.pstore +0 -0
- data/db/unicode_ranges_2.2.pstore +0 -0
- data/lib/{regexp-examples/core_extensions → core_extensions}/regexp/examples.rb +3 -3
- data/lib/regexp-examples.rb +11 -2
- data/lib/regexp-examples/backreferences.rb +3 -4
- data/lib/regexp-examples/chargroup_parser.rb +14 -14
- data/lib/regexp-examples/constants.rb +5 -156
- data/lib/regexp-examples/groups.rb +20 -12
- data/lib/regexp-examples/helpers.rb +5 -5
- data/lib/regexp-examples/parser.rb +52 -42
- data/lib/regexp-examples/repeaters.rb +5 -5
- data/lib/regexp-examples/unicode_char_ranges.rb +45 -0
- data/lib/regexp-examples/version.rb +1 -1
- data/regexp-examples.gemspec +4 -4
- data/scripts/unicode_lister.rb +34 -150
- data/spec/regexp-examples_spec.rb +81 -59
- data/spec/regexp-random_example_spec.rb +2 -2
- data/spec/spec_helper.rb +1 -1
- metadata +8 -4
@@ -8,7 +8,7 @@ module RegexpExamples
|
|
8
8
|
def self.permutations_of_strings(arrays_of_strings)
|
9
9
|
first = arrays_of_strings.shift
|
10
10
|
return first if arrays_of_strings.empty?
|
11
|
-
first.product(
|
11
|
+
first.product(permutations_of_strings(arrays_of_strings)).map do |result|
|
12
12
|
join_preserving_capture_groups(result)
|
13
13
|
end
|
14
14
|
end
|
@@ -16,8 +16,8 @@ module RegexpExamples
|
|
16
16
|
def self.join_preserving_capture_groups(result)
|
17
17
|
result.flatten!
|
18
18
|
subgroups = result
|
19
|
-
|
20
|
-
|
19
|
+
.map(&:all_subgroups)
|
20
|
+
.flatten
|
21
21
|
|
22
22
|
# Only save the LAST group from repeated capture groups, e.g. /([ab]){2}/
|
23
23
|
subgroups.delete_if do |subgroup|
|
@@ -35,12 +35,12 @@ module RegexpExamples
|
|
35
35
|
end
|
36
36
|
|
37
37
|
private
|
38
|
+
|
38
39
|
def self.generic_map_result(repeaters, method)
|
39
40
|
repeaters
|
40
|
-
.map {|repeater| repeater.public_send(method)}
|
41
|
+
.map { |repeater| repeater.public_send(method) }
|
41
42
|
.instance_eval do |partial_results|
|
42
43
|
RegexpExamples.permutations_of_strings(partial_results)
|
43
44
|
end
|
44
45
|
end
|
45
46
|
end
|
46
|
-
|
@@ -15,9 +15,7 @@ module RegexpExamples
|
|
15
15
|
repeaters = []
|
16
16
|
until end_of_regexp
|
17
17
|
group = parse_group(repeaters)
|
18
|
-
if group.is_a? OrGroup
|
19
|
-
return [OneTimeRepeater.new(group)]
|
20
|
-
end
|
18
|
+
return [OneTimeRepeater.new(group)] if group.is_a? OrGroup
|
21
19
|
@current_position += 1
|
22
20
|
repeaters << parse_repeater(group)
|
23
21
|
end
|
@@ -101,12 +99,16 @@ module RegexpExamples
|
|
101
99
|
@current_position += 1
|
102
100
|
case
|
103
101
|
when rest_of_string =~ /\A(\d{1,3})/
|
104
|
-
@current_position += (
|
105
|
-
group = parse_backreference_group(
|
102
|
+
@current_position += (Regexp.last_match(1).length - 1) # In case of 10+ backrefs!
|
103
|
+
group = parse_backreference_group(Regexp.last_match(1))
|
106
104
|
when rest_of_string =~ /\Ak['<]([\w-]+)['>]/ # Named capture group
|
107
|
-
@current_position += (
|
108
|
-
|
109
|
-
|
105
|
+
@current_position += (Regexp.last_match(1).length + 2)
|
106
|
+
group_id = if Regexp.last_match(1).to_i < 0
|
107
|
+
# RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
|
108
|
+
@num_groups + Regexp.last_match(1).to_i + 1
|
109
|
+
else
|
110
|
+
Regexp.last_match(1)
|
111
|
+
end
|
110
112
|
group = parse_backreference_group(group_id)
|
111
113
|
when BackslashCharMap.keys.include?(next_char)
|
112
114
|
group = CharGroup.new(
|
@@ -114,32 +116,39 @@ module RegexpExamples
|
|
114
116
|
@ignorecase
|
115
117
|
)
|
116
118
|
when rest_of_string =~ /\A(c|C-)(.)/ # Control character
|
117
|
-
@current_position +=
|
118
|
-
group = parse_single_char_group(
|
119
|
+
@current_position += Regexp.last_match(1).length
|
120
|
+
group = parse_single_char_group(parse_control_character(Regexp.last_match(2)))
|
119
121
|
when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence
|
120
|
-
@current_position +=
|
121
|
-
group = parse_single_char_group(
|
122
|
+
@current_position += Regexp.last_match(1).length
|
123
|
+
group = parse_single_char_group(parse_escape_sequence(Regexp.last_match(1)))
|
122
124
|
when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence
|
123
|
-
@current_position +=
|
124
|
-
sequence =
|
125
|
-
group = parse_single_char_group(
|
125
|
+
@current_position += Regexp.last_match(1).length
|
126
|
+
sequence = Regexp.last_match(1).match(/\h{1,4}/)[0] # Strip off "{" and "}"
|
127
|
+
group = parse_single_char_group(parse_unicode_sequence(sequence))
|
126
128
|
when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i # Named properties
|
127
|
-
@current_position += (
|
128
|
-
|
129
|
+
@current_position += (Regexp.last_match(2).length + # 0 or 1, of '^' is present
|
130
|
+
Regexp.last_match(3).length + # Length of the property name
|
131
|
+
2) # Length of opening and closing brackets (always 2)
|
132
|
+
# Beware of double negatives! E.g. /\P{^Space}/
|
133
|
+
is_negative = (Regexp.last_match(1) == 'P') ^ (Regexp.last_match(2) == '^')
|
129
134
|
group = CharGroup.new(
|
130
135
|
if is_negative
|
131
|
-
CharSets::Any.dup - NamedPropertyCharMap[
|
136
|
+
CharSets::Any.dup - NamedPropertyCharMap[Regexp.last_match(3).downcase]
|
132
137
|
else
|
133
|
-
NamedPropertyCharMap[
|
138
|
+
NamedPropertyCharMap[Regexp.last_match(3).downcase]
|
134
139
|
end,
|
135
140
|
@ignorecase
|
136
141
|
)
|
137
142
|
when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
|
138
143
|
group = PlaceHolderGroup.new
|
139
144
|
when next_char == 'R' # Linebreak
|
140
|
-
group = CharGroup.new(
|
145
|
+
group = CharGroup.new(
|
146
|
+
["\r\n", "\n", "\v", "\f", "\r"],
|
147
|
+
@ignorecase
|
148
|
+
) # Using "\r\n" as one character is little bit hacky...
|
141
149
|
when next_char == 'g' # Subexpression call
|
142
|
-
|
150
|
+
fail IllegalSyntaxError,
|
151
|
+
'Subexpression calls (\\g) cannot be supported, as they are not regular'
|
143
152
|
when next_char =~ /[bB]/ # Anchors
|
144
153
|
raise_anchors_exception!
|
145
154
|
when next_char =~ /[AG]/ # Start of string
|
@@ -155,7 +164,7 @@ module RegexpExamples
|
|
155
164
|
raise_anchors_exception!
|
156
165
|
end
|
157
166
|
else
|
158
|
-
group = parse_single_char_group(
|
167
|
+
group = parse_single_char_group(next_char)
|
159
168
|
end
|
160
169
|
group
|
161
170
|
end
|
@@ -193,7 +202,7 @@ module RegexpExamples
|
|
193
202
|
comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
|
194
203
|
@current_position += comment_group.length
|
195
204
|
when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
|
196
|
-
regexp_options_toggle(
|
205
|
+
regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
|
197
206
|
@num_groups -= 1 # Toggle "groups" should not increase backref group count
|
198
207
|
@current_position += $&.length + 1
|
199
208
|
if next_char == ':' # e.g. /(?i:subexpr)/
|
@@ -202,9 +211,11 @@ module RegexpExamples
|
|
202
211
|
return PlaceHolderGroup.new
|
203
212
|
end
|
204
213
|
when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
|
205
|
-
|
214
|
+
fail IllegalSyntaxError,
|
215
|
+
'Lookaheads are not regular; cannot generate examples'
|
206
216
|
when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
|
207
|
-
|
217
|
+
fail IllegalSyntaxError,
|
218
|
+
'Lookbehinds are not regular; cannot generate examples'
|
208
219
|
else # e.g. /(?<name>namedgroup)/
|
209
220
|
@current_position += (match[3].length + 3)
|
210
221
|
group_id = match[3]
|
@@ -226,12 +237,12 @@ module RegexpExamples
|
|
226
237
|
end
|
227
238
|
|
228
239
|
def regexp_options_toggle(on, off)
|
229
|
-
@ignorecase = true if
|
230
|
-
@ignorecase = false if
|
231
|
-
@multiline = true if
|
232
|
-
@multiline = false if
|
233
|
-
@extended = true if
|
234
|
-
@extended = false if
|
240
|
+
@ignorecase = true if on.include? 'i'
|
241
|
+
@ignorecase = false if off.include? 'i'
|
242
|
+
@multiline = true if on.include? 'm'
|
243
|
+
@multiline = false if off.include? 'm'
|
244
|
+
@extended = true if on.include? 'x'
|
245
|
+
@extended = false if off.include? 'x'
|
235
246
|
end
|
236
247
|
|
237
248
|
def parse_char_group
|
@@ -252,7 +263,6 @@ module RegexpExamples
|
|
252
263
|
OrGroup.new(left_repeaters, right_repeaters)
|
253
264
|
end
|
254
265
|
|
255
|
-
|
256
266
|
def parse_single_char_group(char)
|
257
267
|
SingleCharGroup.new(char, @ignorecase)
|
258
268
|
end
|
@@ -310,17 +320,18 @@ module RegexpExamples
|
|
310
320
|
end
|
311
321
|
|
312
322
|
def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
323
|
+
# .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
|
324
|
+
if min && !has_comma && !max && next_char == '?'
|
325
|
+
repeater = parse_question_mark_repeater(repeater)
|
326
|
+
else
|
327
|
+
parse_reluctant_or_possessive_repeater
|
328
|
+
end
|
329
|
+
repeater
|
320
330
|
end
|
321
331
|
|
322
332
|
def raise_anchors_exception!
|
323
|
-
|
333
|
+
fail IllegalSyntaxError,
|
334
|
+
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
|
324
335
|
end
|
325
336
|
|
326
337
|
def parse_one_time_repeater(group)
|
@@ -336,8 +347,7 @@ module RegexpExamples
|
|
336
347
|
end
|
337
348
|
|
338
349
|
def end_of_regexp
|
339
|
-
next_char ==
|
350
|
+
next_char == ')' || @current_position >= regexp_string.length
|
340
351
|
end
|
341
352
|
end
|
342
353
|
end
|
343
|
-
|
@@ -6,11 +6,11 @@ module RegexpExamples
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def result
|
9
|
-
group_results = group.result
|
9
|
+
group_results = group.result.first(RegexpExamples.MaxGroupResults)
|
10
10
|
results = []
|
11
11
|
min_repeats.upto(max_repeats) do |repeats|
|
12
12
|
if repeats.zero?
|
13
|
-
results << [
|
13
|
+
results << [GroupResult.new('')]
|
14
14
|
else
|
15
15
|
results << RegexpExamples.permutations_of_strings(
|
16
16
|
[group_results] * repeats
|
@@ -23,8 +23,8 @@ module RegexpExamples
|
|
23
23
|
def random_result
|
24
24
|
result = []
|
25
25
|
rand(min_repeats..max_repeats).times { result << group.random_result }
|
26
|
-
result << [
|
27
|
-
RegexpExamples
|
26
|
+
result << [GroupResult.new('')] if result.empty? # in case of 0.times
|
27
|
+
RegexpExamples.permutations_of_strings(result)
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -74,9 +74,9 @@ module RegexpExamples
|
|
74
74
|
end
|
75
75
|
|
76
76
|
private
|
77
|
+
|
77
78
|
def smallest(x, y)
|
78
79
|
(x < y) ? x : y
|
79
80
|
end
|
80
81
|
end
|
81
82
|
end
|
82
|
-
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'pstore'
|
2
|
+
|
3
|
+
module RegexpExamples
|
4
|
+
class UnicodeCharRanges
|
5
|
+
# These values were generated by: scripts/unicode_lister.rb
|
6
|
+
# Note: Only the first 128 results are listed, for performance.
|
7
|
+
# Also, some groups seem to have no matches (weird!)
|
8
|
+
# (Don't care about ruby micro version number)
|
9
|
+
STORE_FILENAME = "unicode_ranges_#{RUBY_VERSION[0..2]}.pstore"
|
10
|
+
|
11
|
+
attr_reader :range_store
|
12
|
+
|
13
|
+
def initialize(filename = STORE_FILENAME)
|
14
|
+
@range_store = PStore.new(File.expand_path("../../../db/#{filename}", __FILE__))
|
15
|
+
end
|
16
|
+
|
17
|
+
def get(key)
|
18
|
+
range_store.transaction(true) do
|
19
|
+
ranges_to_unicode(range_store[key])
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
alias_method :[], :get
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# TODO: Document example input/output of this method
|
28
|
+
# It's pretty simple, but this code is a little confusing!!
|
29
|
+
def ranges_to_unicode(ranges)
|
30
|
+
result = []
|
31
|
+
ranges.each do |range|
|
32
|
+
if range.is_a? Fixnum # Small hack to increase data compression
|
33
|
+
result << hex_to_unicode(range.to_s(16))
|
34
|
+
else
|
35
|
+
range.each { |num| result << hex_to_unicode(num.to_s(16)) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
def hex_to_unicode(hex)
|
42
|
+
eval("?\\u{#{hex}}")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/regexp-examples.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require File.expand_path(
|
1
|
+
require File.expand_path('../lib/regexp-examples/version', __FILE__)
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'regexp-examples'
|
@@ -11,11 +11,11 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.files = `git ls-files -z`.split("\x0")
|
12
12
|
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
13
13
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
14
|
-
s.require_paths = [
|
14
|
+
s.require_paths = ['lib']
|
15
15
|
s.homepage =
|
16
16
|
'http://rubygems.org/gems/regexp-examples'
|
17
|
-
s.add_development_dependency
|
18
|
-
s.add_development_dependency
|
17
|
+
s.add_development_dependency 'bundler', '~> 1.7'
|
18
|
+
s.add_development_dependency 'rake', '~> 10.0'
|
19
19
|
s.license = 'MIT'
|
20
20
|
s.required_ruby_version = '>= 2.0.0'
|
21
21
|
end
|
data/scripts/unicode_lister.rb
CHANGED
@@ -1,180 +1,64 @@
|
|
1
|
+
require 'pstore'
|
2
|
+
require_relative '../lib/regexp-examples/unicode_char_ranges'
|
1
3
|
# A script to generate lists of all unicode characters
|
2
4
|
# that match all named group/character properties regexps.
|
3
5
|
# For use in e.g. /\p{Arabic}/.examples
|
4
6
|
|
5
7
|
# To (re-)generate this list, simply run this file!
|
6
8
|
# > ruby scripts/unicode_lister.rb
|
7
|
-
OutputFilename = 'unicode_result'
|
8
9
|
|
9
10
|
# Taken from ruby documentation:
|
10
11
|
# http://ruby-doc.org//core-2.2.0/Regexp.html#class-Regexp-label-Character+Properties
|
11
12
|
NamedGroups = %w(
|
12
|
-
Alnum
|
13
|
-
Alpha
|
14
|
-
Blank
|
15
|
-
Cntrl
|
16
|
-
Digit
|
17
|
-
Graph
|
18
|
-
Lower
|
19
|
-
Print
|
20
|
-
Punct
|
21
|
-
Space
|
22
|
-
Upper
|
23
|
-
XDigit
|
24
|
-
Word
|
25
|
-
ASCII
|
26
|
-
Any
|
27
|
-
Assigned
|
13
|
+
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word ASCII Any Assigned
|
28
14
|
|
29
|
-
L
|
30
|
-
Ll
|
31
|
-
Lm
|
32
|
-
Lo
|
33
|
-
Lt
|
34
|
-
Lu
|
35
|
-
M
|
36
|
-
Mn
|
37
|
-
Mc
|
38
|
-
Me
|
39
|
-
N
|
40
|
-
Nd
|
41
|
-
Nl
|
42
|
-
No
|
43
|
-
P
|
44
|
-
Pc
|
45
|
-
Pd
|
46
|
-
Ps
|
47
|
-
Pe
|
48
|
-
Pi
|
49
|
-
Pf
|
50
|
-
Po
|
51
|
-
S
|
52
|
-
Sm
|
53
|
-
Sc
|
54
|
-
Sk
|
55
|
-
So
|
56
|
-
Z
|
57
|
-
Zs
|
58
|
-
Zl
|
59
|
-
Zp
|
60
|
-
C
|
61
|
-
Cc
|
62
|
-
Cf
|
63
|
-
Cn
|
64
|
-
Co
|
65
|
-
Cs
|
15
|
+
L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl Zp C Cc Cf Cn Co Cs
|
66
16
|
|
67
|
-
Arabic
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
Buginese
|
74
|
-
Buhid
|
75
|
-
Canadian_Aboriginal
|
76
|
-
Carian
|
77
|
-
Cham
|
78
|
-
Cherokee
|
79
|
-
Common
|
80
|
-
Coptic
|
81
|
-
Cuneiform
|
82
|
-
Cypriot
|
83
|
-
Cyrillic
|
84
|
-
Deseret
|
85
|
-
Devanagari
|
86
|
-
Ethiopic
|
87
|
-
Georgian
|
88
|
-
Glagolitic
|
89
|
-
Gothic
|
90
|
-
Greek
|
91
|
-
Gujarati
|
92
|
-
Gurmukhi
|
93
|
-
Han
|
94
|
-
Hangul
|
95
|
-
Hanunoo
|
96
|
-
Hebrew
|
97
|
-
Hiragana
|
98
|
-
Inherited
|
99
|
-
Kannada
|
100
|
-
Katakana
|
101
|
-
Kayah_Li
|
102
|
-
Kharoshthi
|
103
|
-
Khmer
|
104
|
-
Lao
|
105
|
-
Latin
|
106
|
-
Lepcha
|
107
|
-
Limbu
|
108
|
-
Linear_B
|
109
|
-
Lycian
|
110
|
-
Lydian
|
111
|
-
Malayalam
|
112
|
-
Mongolian
|
113
|
-
Myanmar
|
114
|
-
New_Tai_Lue
|
115
|
-
Nko
|
116
|
-
Ogham
|
117
|
-
Ol_Chiki
|
118
|
-
Old_Italic
|
119
|
-
Old_Persian
|
120
|
-
Oriya
|
121
|
-
Osmanya
|
122
|
-
Phags_Pa
|
123
|
-
Phoenician
|
124
|
-
Rejang
|
125
|
-
Runic
|
126
|
-
Saurashtra
|
127
|
-
Shavian
|
128
|
-
Sinhala
|
129
|
-
Sundanese
|
130
|
-
Syloti_Nagri
|
131
|
-
Syriac
|
132
|
-
Tagalog
|
133
|
-
Tagbanwa
|
134
|
-
Tai_Le
|
135
|
-
Tamil
|
136
|
-
Telugu
|
137
|
-
Thaana
|
138
|
-
Thai
|
139
|
-
Tibetan
|
140
|
-
Tifinagh
|
141
|
-
Ugaritic
|
142
|
-
Vai
|
143
|
-
Yi
|
17
|
+
Arabic Armenian Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal Carian Cham Cherokee
|
18
|
+
Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari Ethiopic Georgian Glagolitic Gothic Greek
|
19
|
+
Gujarati Gurmukhi Han Hangul Hanunoo Hebrew Hiragana Inherited Kannada Katakana Kayah_Li Kharoshthi Khmer
|
20
|
+
Lao Latin Lepcha Limbu Linear_B Lycian Lydian Malayalam Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki
|
21
|
+
Old_Italic Old_Persian Oriya Osmanya Phags_Pa Phoenician Rejang Runic Saurashtra Shavian Sinhala Sundanese
|
22
|
+
Syloti_Nagri Syriac Tagalog Tagbanwa Tai_Le Tamil Telugu Thaana Thai Tibetan Tifinagh Ugaritic Vai Yi
|
144
23
|
)
|
145
24
|
|
146
|
-
# Note: For
|
147
|
-
#
|
148
|
-
#
|
149
|
-
#
|
150
|
-
#
|
151
|
-
# If anyone ever cares about this (I doubt it), I'll look into fixing/improving it.
|
25
|
+
# Note: For the range 55296..57343, these are reserved values that are not legal
|
26
|
+
# unicode characters.
|
27
|
+
# I.e. a character encoding-related exception gets raised when you do:
|
28
|
+
# `/regex/ =~ eval("?\\u{#{x.to_s(16)}}")`
|
29
|
+
# TODO: Add a link to somewhere that explains this better.
|
152
30
|
|
153
|
-
#
|
154
|
-
# Example
|
31
|
+
# "Compresses" the values in an array by using ranges.
|
32
|
+
# Example input: [1, 2, 3, 4, 6, 7, 12, 14]
|
33
|
+
# Example output: [1..4, 6..7, 12, 14]
|
155
34
|
def calculate_ranges(matching_codes)
|
156
|
-
return
|
35
|
+
return [] if matching_codes.empty?
|
157
36
|
first = matching_codes.shift
|
158
|
-
matching_codes.inject([first..first]) do |r,x|
|
37
|
+
matching_codes.inject([first..first]) do |r, x|
|
159
38
|
if r.last.last.succ != x
|
160
39
|
r << (x..x) # Start new range
|
161
40
|
else
|
162
41
|
r[0..-2] << (r.last.first..x) # Update last range
|
163
42
|
end
|
164
43
|
end
|
165
|
-
.map { |range| range.size == 1 ? range.first : range}
|
166
|
-
.join(", ")
|
44
|
+
.map { |range| range.size == 1 ? range.first : range } # Replace `int..int` with `int`
|
167
45
|
end
|
168
46
|
|
169
47
|
count = 0
|
170
|
-
|
48
|
+
filename = RegexpExamples::UnicodeCharRanges::STORE_FILENAME
|
49
|
+
store = PStore.new(filename)
|
50
|
+
store.transaction do
|
171
51
|
NamedGroups.each do |name|
|
172
|
-
|
173
|
-
|
174
|
-
|
52
|
+
count += 1
|
53
|
+
# Only generating first 128 matches, for performance...
|
54
|
+
# (I have tried this with generating ALL examples, and it makes the ruby gem
|
55
|
+
# painfully slow and bloated... Especially the test suite.)
|
56
|
+
matching_codes = [(0..55_295), (57_344..65_535)].map(&:to_a).flatten.lazy
|
57
|
+
.find { |x| /\p{#{name}}/ =~ eval("?\\u{#{x.to_s(16)}}") }
|
58
|
+
(128)
|
59
|
+
store[name.downcase] = calculate_ranges(matching_codes)
|
175
60
|
puts "(#{count}/#{NamedGroups.length}) Finished property: #{name}"
|
176
61
|
end
|
177
|
-
puts
|
178
|
-
puts "Finished! Result stored in:
|
62
|
+
puts '*' * 50
|
63
|
+
puts "Finished! Result stored in: ./db/#{filename}"
|
179
64
|
end
|
180
|
-
|