twitter_cldr 3.0.1 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +4 -2
  3. data/History.txt +4 -0
  4. data/README.md +17 -6
  5. data/lib/twitter_cldr/resources/postal_codes_importer.rb +12 -1
  6. data/lib/twitter_cldr/resources/regexp_ast_generator.rb +41 -0
  7. data/lib/twitter_cldr/resources.rb +1 -0
  8. data/lib/twitter_cldr/shared/postal_code_generator.rb +50 -0
  9. data/lib/twitter_cldr/shared/postal_codes.rb +48 -9
  10. data/lib/twitter_cldr/shared.rb +15 -14
  11. data/lib/twitter_cldr/utils/regexp_ast.rb +115 -0
  12. data/lib/twitter_cldr/utils/regexp_sampler.rb +149 -0
  13. data/lib/twitter_cldr/utils.rb +5 -3
  14. data/lib/twitter_cldr/version.rb +1 -1
  15. data/resources/shared/postal_codes.yml +1442 -159
  16. data/spec/bidi/bidi_spec.rb +1 -1
  17. data/spec/collation/collation_spec.rb +1 -1
  18. data/spec/collation/collator_spec.rb +31 -31
  19. data/spec/collation/implicit_collation_elements_spec.rb +6 -6
  20. data/spec/collation/sort_key_builder_spec.rb +28 -26
  21. data/spec/collation/tailoring_spec.rb +1 -1
  22. data/spec/collation/trie_builder_spec.rb +16 -16
  23. data/spec/collation/trie_dumps_spec.rb +2 -2
  24. data/spec/collation/trie_loader_spec.rb +8 -8
  25. data/spec/collation/trie_spec.rb +61 -61
  26. data/spec/collation/trie_with_fallback_spec.rb +5 -5
  27. data/spec/core_ext_spec.rb +1 -1
  28. data/spec/data_readers/additional_date_format_selector_spec.rb +38 -38
  29. data/spec/data_readers/date_time_data_reader_spec.rb +2 -2
  30. data/spec/data_readers/number_data_reader_spec.rb +1 -1
  31. data/spec/formatters/calendars/datetime_formatter_spec.rb +218 -218
  32. data/spec/formatters/list_formatter_spec.rb +8 -8
  33. data/spec/formatters/numbers/abbreviated/abbreviated_number_formatter_spec.rb +14 -14
  34. data/spec/formatters/numbers/abbreviated/long_decimal_formatter_spec.rb +4 -4
  35. data/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb +4 -4
  36. data/spec/formatters/numbers/currency_formatter_spec.rb +11 -11
  37. data/spec/formatters/numbers/decimal_formatter_spec.rb +3 -3
  38. data/spec/formatters/numbers/helpers/fraction_spec.rb +3 -3
  39. data/spec/formatters/numbers/helpers/integer_spec.rb +16 -16
  40. data/spec/formatters/numbers/number_formatter_spec.rb +21 -21
  41. data/spec/formatters/numbers/percent_formatter_spec.rb +3 -3
  42. data/spec/formatters/numbers/rbnf/rbnf_spec.rb +2 -2
  43. data/spec/formatters/plurals/plural_formatter_spec.rb +41 -41
  44. data/spec/formatters/plurals/rules_spec.rb +13 -13
  45. data/spec/localized/localized_array_spec.rb +12 -12
  46. data/spec/localized/localized_date_spec.rb +33 -33
  47. data/spec/localized/localized_datetime_spec.rb +11 -11
  48. data/spec/localized/localized_hash_spec.rb +4 -4
  49. data/spec/localized/localized_number_spec.rb +36 -36
  50. data/spec/localized/localized_object_spec.rb +8 -8
  51. data/spec/localized/localized_string_spec.rb +53 -53
  52. data/spec/localized/localized_symbol_spec.rb +9 -9
  53. data/spec/localized/localized_time_spec.rb +10 -10
  54. data/spec/localized/localized_timespan_spec.rb +8 -8
  55. data/spec/normalization_spec.rb +6 -6
  56. data/spec/parsers/number_parser_spec.rb +36 -36
  57. data/spec/parsers/parser_spec.rb +5 -5
  58. data/spec/parsers/segmentation_parser_spec.rb +19 -19
  59. data/spec/parsers/symbol_table_spec.rb +4 -4
  60. data/spec/parsers/unicode_regex/character_class_spec.rb +19 -19
  61. data/spec/parsers/unicode_regex/character_range_spec.rb +1 -1
  62. data/spec/parsers/unicode_regex/character_set_spec.rb +8 -8
  63. data/spec/parsers/unicode_regex/literal_spec.rb +5 -5
  64. data/spec/parsers/unicode_regex/unicode_string_spec.rb +2 -2
  65. data/spec/parsers/unicode_regex_parser_spec.rb +28 -28
  66. data/spec/resources/loader_spec.rb +32 -32
  67. data/spec/shared/break_iterator_spec.rb +13 -13
  68. data/spec/shared/calendar_spec.rb +59 -59
  69. data/spec/shared/casefolder_spec.rb +5 -5
  70. data/spec/shared/code_point_spec.rb +46 -46
  71. data/spec/shared/currencies_spec.rb +7 -7
  72. data/spec/shared/language_codes_spec.rb +34 -34
  73. data/spec/shared/languages_spec.rb +30 -30
  74. data/spec/shared/numbering_system_spec.rb +7 -7
  75. data/spec/shared/numbers_spec.rb +4 -4
  76. data/spec/shared/phone_codes_spec.rb +7 -7
  77. data/spec/shared/postal_code_generator_spec.rb +76 -0
  78. data/spec/shared/postal_codes_spec.rb +35 -29
  79. data/spec/shared/territories_spec.rb +40 -40
  80. data/spec/shared/unicode_regex_spec.rb +71 -71
  81. data/spec/spec_helper.rb +2 -2
  82. data/spec/tokenizers/calendars/date_tokenizer_spec.rb +1 -1
  83. data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +6 -6
  84. data/spec/tokenizers/composite_token_spec.rb +3 -3
  85. data/spec/tokenizers/token_spec.rb +3 -3
  86. data/spec/twitter_cldr_spec.rb +72 -72
  87. data/spec/utils/code_points_spec.rb +10 -10
  88. data/spec/utils/interpolation_spec.rb +32 -32
  89. data/spec/utils/range_set_spec.rb +36 -36
  90. data/spec/utils/regexp_ast_spec.rb +44 -0
  91. data/spec/utils/regexp_sampler_spec.rb +182 -0
  92. data/spec/utils/yaml/yaml_spec.rb +23 -23
  93. data/spec/utils_spec.rb +19 -19
  94. metadata +263 -258
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d66b9de7cc80964cf816c2981bf6aac51ec26134
4
- data.tar.gz: e5039d2b57b98ea896830872a6d6e2ea72229d9f
3
+ metadata.gz: a4b16ad78f734a596fa4488a61e58b0e971cf2ce
4
+ data.tar.gz: 1179820a11c171adab4e841cf14efe0e6534a6a7
5
5
  SHA512:
6
- metadata.gz: ba0506579dd20f7d92d480c4d46834bb25eae55a622e3bb28ffdb59cc3039a9c74f7571072a4b38686620e56b7a7c8fccade116a9ce4cfd5e16c269ad6819c66
7
- data.tar.gz: 06a830ff2fa877e6ee2e98d0d3acbfced0e7529181418c247e56b07980750acd364cbdc93fbf247218f022ee3557fdae0b5f92f228e2cca1abae52fffb363ee2
6
+ metadata.gz: e4268cc2df5dda027c6b3d2fad2398a0ac5f0597fc620931f0e86cc4dabc775a3d6d0b731290a042ec1114f088244bbd8cc457f2a36066ba8c7c5d8b2810f0b3
7
+ data.tar.gz: 635c217f690dabc37991f4eff3d215e1312c66a543b2aa7e665e0e3469f0cd10bd37fee2452901ba8c5665f331b0cac2cd059a93f451f03f9118dfc1b97e1217
data/Gemfile CHANGED
@@ -14,6 +14,8 @@ group :development, :test do
14
14
  if RUBY_VERSION <= "1.8.7"
15
15
  gem 'oniguruma'
16
16
  end
17
+
18
+ gem 'regexp_parser', '~> 0.1.5'
17
19
  end
18
20
 
19
21
  group :development do
@@ -25,8 +27,8 @@ group :development do
25
27
  end
26
28
 
27
29
  group :test do
28
- gem 'rspec', '~> 2.11.0'
29
- gem 'rr', '~> 1.0.4'
30
+ gem 'rspec', '~> 2.14.0'
31
+ gem 'rr', '~> 1.1.2'
30
32
 
31
33
  if RUBY_VERSION >= "1.9"
32
34
  gem 'rubyzip'
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ == 3.0.2
2
+
3
+ * Adding ability to generate sample postal codes from their regexes.
4
+
1
5
  == 3.0.1
2
6
 
3
7
  * Fixing abbreviated timespan formats for en-GB (backport from 2.4.3).
data/README.md CHANGED
@@ -439,17 +439,21 @@ The CLDR contains postal code validation regexes for a number of countries.
439
439
 
440
440
  ```ruby
441
441
  # United States
442
- TwitterCldr::Shared::PostalCodes.valid?(:us, "94103") # true
443
- TwitterCldr::Shared::PostalCodes.valid?(:us, "9410") # false
442
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
443
+ postal_code.valid?("94103") # true
444
+ postal_code.valid?("9410") # false
444
445
 
445
446
  # England (Great Britain)
446
- TwitterCldr::Shared::PostalCodes.valid?(:gb, "BS98 1TL") # true
447
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:gb)
448
+ postal_code.valid?("BS98 1TL") # true
447
449
 
448
450
  # Sweden
449
- TwitterCldr::Shared::PostalCodes.valid?(:se, "280 12") # true
451
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:se)
452
+ postal_code.valid?("280 12") # true
450
453
 
451
454
  # Canada
452
- TwitterCldr::Shared::PostalCodes.valid?(:ca, "V3H 1Z7") # true
455
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:ca)
456
+ postal_code.valid?("V3H 1Z7") # true
453
457
  ```
454
458
 
455
459
  Get a list of supported territories by using the `#territories` method:
@@ -461,7 +465,14 @@ TwitterCldr::Shared::PostalCodes.territories # [:ad, :am, :ar, :as, :at, ... ]
461
465
  Just want the regex? No problem:
462
466
 
463
467
  ```ruby
464
- TwitterCldr::Shared::PostalCodes.regex_for_territory(:us) # /\d{5}([ \-]\d{4})?/
468
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
469
+ postal_code.regexp # /\d{5}([ \-]\d{4})?/
470
+ ```
471
+
472
+ Get a sample of valid postal codes with the `#sample` method:
473
+
474
+ ```ruby
475
+ postal_code.sample(5) # ["83526", "31748-8754", "55851", "25788-4914", "55335"]
465
476
  ```
466
477
 
467
478
  ### Phone Codes
@@ -35,7 +35,18 @@ module TwitterCldr
35
35
 
36
36
  postal_codes = Hash[postal_codes.sort_by(&:first)]
37
37
 
38
- File.open(File.join(@output_path, 'postal_codes.yml'), 'w') { |output| output.write(YAML.dump(postal_codes)) }
38
+ postal_codes = postal_codes.each_with_object({}) do |(territory, regex), memo|
39
+ memo[territory] = {
40
+ :regex => regex,
41
+ :ast => TwitterCldr::Utils::RegexpAst.dump(
42
+ RegexpAstGenerator.generate(regex.source)
43
+ )
44
+ }
45
+ end
46
+
47
+ File.open(File.join(@output_path, 'postal_codes.yml'), 'w') do |output|
48
+ output.write(YAML.dump(postal_codes))
49
+ end
39
50
  end
40
51
 
41
52
  end
@@ -0,0 +1,41 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'regexp_parser'
7
+
8
+ module TwitterCldr
9
+ module Resources
10
+
11
+ class RegexpAstGenerator
12
+ class << self
13
+
14
+ def generate(regexp_str)
15
+ tree = Regexp::Parser.parse(regexp_str)
16
+ walk(tree)
17
+ end
18
+
19
+ private
20
+
21
+ def walk(node)
22
+ expressions = if node.respond_to?(:expressions)
23
+ node.expressions.map { |expr| walk(expr) }
24
+ else
25
+ []
26
+ end
27
+
28
+ class_for(node).from_parser_node(node, expressions)
29
+ end
30
+
31
+ def class_for(klass)
32
+ TwitterCldr::Utils::RegexpAst.const_get(
33
+ klass.class.to_s.split("::").last.to_sym
34
+ )
35
+ end
36
+
37
+ end
38
+ end
39
+
40
+ end
41
+ end
@@ -26,5 +26,6 @@ module TwitterCldr
26
26
  autoload :RbnfTestImporter, 'twitter_cldr/resources/rbnf_test_importer'
27
27
  autoload :ReadmeRenderer, 'twitter_cldr/resources/readme_renderer'
28
28
  autoload :CasefolderClassGenerator, 'twitter_cldr/resources/casefolder_class_generator'
29
+ autoload :RegexpAstGenerator, 'twitter_cldr/resources/regexp_ast_generator'
29
30
  end
30
31
  end
@@ -0,0 +1,50 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'set'
7
+
8
+ module TwitterCldr
9
+ module Shared
10
+ class PostalCodeGenerator
11
+
12
+ SAMPLE_MULTIPLIER = 4
13
+
14
+ def initialize(regexp_ast)
15
+ @regexp_generator = TwitterCldr::Utils::RegexpSampler.new(regexp_ast)
16
+ end
17
+
18
+ def generate
19
+ clean_result(@regexp_generator.generate)
20
+ end
21
+
22
+ def sample(sample_size = 1)
23
+ sample_set = Set.new
24
+ counter = 1
25
+
26
+ until sample_set.size == sample_size
27
+ sample = generate
28
+ sample_set << sample unless sample.empty?
29
+ counter += 1
30
+
31
+ # Stop if the number of attempted generations is
32
+ # n times more than requested. Some territories only
33
+ # have one postal code, so if the user asks for 10
34
+ # they'll get an infinite loop.
35
+ break if counter > sample_size * SAMPLE_MULTIPLIER
36
+ end
37
+
38
+ sample_set.to_a
39
+ end
40
+
41
+ private
42
+
43
+ # remove spaces that trail a dash
44
+ def clean_result(str)
45
+ str.gsub(/- /, '-')
46
+ end
47
+
48
+ end
49
+ end
50
+ end
@@ -5,7 +5,10 @@
5
5
 
6
6
  module TwitterCldr
7
7
  module Shared
8
- module PostalCodes
8
+
9
+ class InvalidTerritoryError < StandardError; end
10
+
11
+ class PostalCodes
9
12
 
10
13
  class << self
11
14
 
@@ -13,23 +16,59 @@ module TwitterCldr
13
16
  resource.keys
14
17
  end
15
18
 
16
- def regex_for_territory(territory)
17
- resource[territory.to_s.downcase.to_sym]
18
- end
19
-
20
- def valid?(territory, postal_code)
21
- regexp = regex_for_territory(territory)
22
- !!(regexp && regexp =~ postal_code)
19
+ def for_territory(territory)
20
+ key = territory.to_s.downcase.to_sym
21
+ if res = resource[key]
22
+ territory_cache[key] ||= begin
23
+ new(
24
+ territory,
25
+ res[:regex],
26
+ TwitterCldr::Utils::RegexpAst.load(res[:ast])
27
+ )
28
+ end
29
+ else
30
+ raise InvalidTerritoryError, "invalid territory"
31
+ end
23
32
  end
24
33
 
25
34
  private
26
35
 
36
+ def territory_cache
37
+ @territory_cache ||= {}
38
+ end
39
+
27
40
  def resource
28
41
  @resource ||= TwitterCldr.get_resource(:shared, :postal_codes)
29
42
  end
30
43
 
31
44
  end
32
45
 
46
+ attr_reader :territory, :regexp, :ast
47
+
48
+ def initialize(territory, regexp, ast)
49
+ @territory = territory
50
+ @regexp = regexp
51
+ @ast = ast
52
+ end
53
+
54
+ def valid?(postal_code)
55
+ !!(regexp && regexp =~ postal_code)
56
+ end
57
+
58
+ def sample(sample_size = 1)
59
+ generator.sample(sample_size)
60
+ end
61
+
62
+ private
63
+
64
+ def generator
65
+ generator_cache[territory] ||= PostalCodeGenerator.new(ast)
66
+ end
67
+
68
+ def generator_cache
69
+ @@generator_cache ||= {}
70
+ end
71
+
33
72
  end
34
73
  end
35
- end
74
+ end
@@ -5,19 +5,20 @@
5
5
 
6
6
  module TwitterCldr
7
7
  module Shared
8
- autoload :Calendar, 'twitter_cldr/shared/calendar'
9
- autoload :CodePoint, 'twitter_cldr/shared/code_point'
10
- autoload :Currencies, 'twitter_cldr/shared/currencies'
11
- autoload :LanguageCodes, 'twitter_cldr/shared/language_codes'
12
- autoload :Languages, 'twitter_cldr/shared/languages'
13
- autoload :Numbers, 'twitter_cldr/shared/numbers'
14
- autoload :PhoneCodes, 'twitter_cldr/shared/phone_codes'
15
- autoload :PostalCodes, 'twitter_cldr/shared/postal_codes'
16
- autoload :Bidi, 'twitter_cldr/shared/bidi'
17
- autoload :Territories, 'twitter_cldr/shared/territories'
18
- autoload :NumberingSystem, 'twitter_cldr/shared/numbering_system'
19
- autoload :Casefolder, 'twitter_cldr/shared/casefolder'
20
- autoload :UnicodeRegex, 'twitter_cldr/shared/unicode_regex'
21
- autoload :BreakIterator, 'twitter_cldr/shared/break_iterator'
8
+ autoload :Calendar, 'twitter_cldr/shared/calendar'
9
+ autoload :CodePoint, 'twitter_cldr/shared/code_point'
10
+ autoload :Currencies, 'twitter_cldr/shared/currencies'
11
+ autoload :LanguageCodes, 'twitter_cldr/shared/language_codes'
12
+ autoload :Languages, 'twitter_cldr/shared/languages'
13
+ autoload :Numbers, 'twitter_cldr/shared/numbers'
14
+ autoload :PhoneCodes, 'twitter_cldr/shared/phone_codes'
15
+ autoload :PostalCodes, 'twitter_cldr/shared/postal_codes'
16
+ autoload :PostalCodeGenerator, 'twitter_cldr/shared/postal_code_generator'
17
+ autoload :Bidi, 'twitter_cldr/shared/bidi'
18
+ autoload :Territories, 'twitter_cldr/shared/territories'
19
+ autoload :NumberingSystem, 'twitter_cldr/shared/numbering_system'
20
+ autoload :Casefolder, 'twitter_cldr/shared/casefolder'
21
+ autoload :UnicodeRegex, 'twitter_cldr/shared/unicode_regex'
22
+ autoload :BreakIterator, 'twitter_cldr/shared/break_iterator'
22
23
  end
23
24
  end
@@ -0,0 +1,115 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'base64'
7
+
8
+ module TwitterCldr
9
+ module Utils
10
+ module RegexpAst
11
+
12
+ def self.load(ast_str)
13
+ Marshal.load(Base64.decode64(ast_str))
14
+ end
15
+
16
+ def self.dump(ast)
17
+ Base64.encode64(Marshal.dump(ast))
18
+ end
19
+
20
+ class Node
21
+ attr_reader :expressions, :quantifier
22
+
23
+ def initialize(expressions, quantifier)
24
+ @expressions = expressions
25
+ @quantifier = quantifier
26
+ end
27
+
28
+ def quantified?
29
+ !!quantifier
30
+ end
31
+
32
+ def self.from_parser_node(node, expressions)
33
+ new(expressions, Quantifier.from_parser_node(node))
34
+ end
35
+ end
36
+
37
+ class CharacterSet < Node
38
+ attr_reader :members, :negated
39
+ alias :negated? :negated
40
+
41
+ def initialize(expressions, quantifier, members, negated)
42
+ @members = members; @negated = negated
43
+ super(expressions, quantifier)
44
+ end
45
+
46
+ def self.from_parser_node(node, expressions)
47
+ new(
48
+ expressions, Quantifier.from_parser_node(node),
49
+ fix_members(node.members), node.negative?
50
+ )
51
+ end
52
+
53
+ private
54
+
55
+ # CLDR occasionally uses \d and other escapes in character classes
56
+ # to signify 0-9 and friends. This is legal regex syntax, but the
57
+ # regexp_parser gem doesn't handle it correctly, so we have to
58
+ # repair things here.
59
+ def self.fix_members(members)
60
+ members.join.scan(/(\\[wd]|\w-\w|\w|-)/).to_a.flatten.inject([]) do |ret, member|
61
+ case member
62
+ when '\d' then ret << '0-9'
63
+ when '\w' then ret += ['A-Z', 'a-z', '0-9', '_']
64
+ else ret << member
65
+ end
66
+
67
+ ret
68
+ end
69
+ end
70
+ end
71
+
72
+ class Literal < Node
73
+ attr_reader :text
74
+
75
+ def initialize(expressions, quantifier, text)
76
+ @text = text
77
+ super(expressions, quantifier)
78
+ end
79
+
80
+ def self.from_parser_node(node, expressions)
81
+ new(
82
+ expressions, Quantifier.from_parser_node(node), node.text
83
+ )
84
+ end
85
+ end
86
+
87
+ class Quantifier
88
+ attr_reader :max, :min
89
+
90
+ def initialize(max, min)
91
+ @max = max; @min = min
92
+ end
93
+
94
+ def self.from_parser_node(node)
95
+ if node.quantifier
96
+ new(
97
+ node.quantifier.max,
98
+ node.quantifier.min
99
+ )
100
+ end
101
+ end
102
+ end
103
+
104
+ class EscapeSequence < Literal; end
105
+ class Word < Node; end
106
+ class Digit < Node; end
107
+ class Sequence < Node; end
108
+ class Alternation < Node; end
109
+ class Capture < Node; end
110
+ class Passive < Node; end
111
+ class Root < Node; end
112
+
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,149 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Utils
8
+
9
+ # Generates a valid string that would match the given regexp ast.
10
+ class RegexpSampler
11
+
12
+ attr_reader :regexp_ast
13
+
14
+ DIGITS = ('0'..'9').to_a
15
+ WORD_LETTERS = ('a'..'z').to_a + ('A'..'Z').to_a + ['_']
16
+
17
+ def initialize(regexp_ast)
18
+ @regexp_ast = regexp_ast
19
+ end
20
+
21
+ def generate
22
+ walk_children(regexp_ast)
23
+ end
24
+
25
+ private
26
+
27
+ def walk(node)
28
+ method = :"walk_#{class_name_for(node)}"
29
+ puts method unless respond_to?(method, true)
30
+ respond_to?(method, true) ? send(method, node) : ""
31
+ end
32
+
33
+ def walk_children(node)
34
+ node.expressions.map { |expr| walk(expr) }.join
35
+ end
36
+
37
+ def walk_digit(node)
38
+ if node.quantified?
39
+ quantifier_sample(DIGITS, node.quantifier)
40
+ else
41
+ [single_sample(DIGITS)]
42
+ end.join + walk_children(node)
43
+ end
44
+
45
+ def walk_word(node)
46
+ if node.quantified?
47
+ quantifier_sample(WORD_LETTERS, node.quantifier)
48
+ else
49
+ [single_sample(WORD_LETTERS)]
50
+ end.join + walk_children(node)
51
+ end
52
+
53
+ def walk_literal(node)
54
+ node.text * if node.quantified?
55
+ rand_in_quantifier(node.quantifier)
56
+ else
57
+ 1
58
+ end + walk_children(node)
59
+ end
60
+
61
+ def walk_character_set(node)
62
+ charset = expand_charset(node.members)
63
+
64
+ if node.quantified?
65
+ quantifier_sample(charset, node.quantifier)
66
+ else
67
+ [single_sample(charset)]
68
+ end.join + walk_children(node)
69
+ end
70
+
71
+ def walk_capture(node)
72
+ if node.quantified?
73
+ rand_in_quantifier(node.quantifier).times.map do
74
+ walk_children(node)
75
+ end.join
76
+ else
77
+ walk_children(node)
78
+ end
79
+ end
80
+
81
+ # "passive" means non-capturing group.
82
+ # Since we don't need to distinguish between
83
+ # captures/non-captures, we can just delegate
84
+ # to the walk_capture method.
85
+ def walk_passive(node)
86
+ walk_capture(node)
87
+ end
88
+
89
+ def walk_alternation(node)
90
+ if node.quantified?
91
+ rand_in_quantifier(node.quantifier).times.map do
92
+ walk(single_sample(node.expressions))
93
+ end.join
94
+ else
95
+ walk(single_sample(node.expressions))
96
+ end
97
+ end
98
+
99
+ def walk_sequence(node)
100
+ if node.quantified?
101
+ rand_in_quantifier(node.quantifier).times.map do
102
+ node.expressions.map { |expr| walk(expr) }.join
103
+ end.join
104
+ else
105
+ node.expressions.map { |expr| walk(expr) }.join
106
+ end
107
+ end
108
+
109
+ def expand_charset(members)
110
+ members.inject([]) do |ret, member|
111
+ ret + expand_charset_member(member)
112
+ end
113
+ end
114
+
115
+ def expand_charset_member(member)
116
+ left, right = member.scan(/([^\\])-?/).flatten
117
+ right ? (left..right).to_a : [left]
118
+ end
119
+
120
+ def quantifier_sample(arr, quantifier)
121
+ sample_size = if quantifier.min == quantifier.max
122
+ quantifier.min
123
+ else
124
+ rand_in_quantifier(quantifier)
125
+ end
126
+
127
+ sample_size.times.map { single_sample(arr) }
128
+ end
129
+
130
+ def single_sample(arr)
131
+ arr[rand(arr.size)]
132
+ end
133
+
134
+ def rand_in_quantifier(quantifier)
135
+ rand_in_range(quantifier.min, quantifier.max)
136
+ end
137
+
138
+ def rand_in_range(min, max)
139
+ min + rand((max - min) + 1)
140
+ end
141
+
142
+ def class_name_for(node)
143
+ name = node.class.to_s.split("::").last
144
+ name.gsub(/\A|([A-Z])/) { $1 ? "_#{$1.downcase}" : "" }.downcase
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -6,9 +6,11 @@
6
6
  module TwitterCldr
7
7
  module Utils
8
8
 
9
- autoload :CodePoints, 'twitter_cldr/utils/code_points'
10
- autoload :YAML, 'twitter_cldr/utils/yaml'
11
- autoload :RangeSet, 'twitter_cldr/utils/range_set'
9
+ autoload :CodePoints, 'twitter_cldr/utils/code_points'
10
+ autoload :YAML, 'twitter_cldr/utils/yaml'
11
+ autoload :RangeSet, 'twitter_cldr/utils/range_set'
12
+ autoload :RegexpAst, 'twitter_cldr/utils/regexp_ast'
13
+ autoload :RegexpSampler, 'twitter_cldr/utils/regexp_sampler'
12
14
 
13
15
  class << self
14
16
 
@@ -4,5 +4,5 @@
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
6
  module TwitterCldr
7
- VERSION = "3.0.1"
7
+ VERSION = "3.0.2"
8
8
  end