twitter_cldr 3.0.1 → 3.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +4 -2
  3. data/History.txt +4 -0
  4. data/README.md +17 -6
  5. data/lib/twitter_cldr/resources/postal_codes_importer.rb +12 -1
  6. data/lib/twitter_cldr/resources/regexp_ast_generator.rb +41 -0
  7. data/lib/twitter_cldr/resources.rb +1 -0
  8. data/lib/twitter_cldr/shared/postal_code_generator.rb +50 -0
  9. data/lib/twitter_cldr/shared/postal_codes.rb +48 -9
  10. data/lib/twitter_cldr/shared.rb +15 -14
  11. data/lib/twitter_cldr/utils/regexp_ast.rb +115 -0
  12. data/lib/twitter_cldr/utils/regexp_sampler.rb +149 -0
  13. data/lib/twitter_cldr/utils.rb +5 -3
  14. data/lib/twitter_cldr/version.rb +1 -1
  15. data/resources/shared/postal_codes.yml +1442 -159
  16. data/spec/bidi/bidi_spec.rb +1 -1
  17. data/spec/collation/collation_spec.rb +1 -1
  18. data/spec/collation/collator_spec.rb +31 -31
  19. data/spec/collation/implicit_collation_elements_spec.rb +6 -6
  20. data/spec/collation/sort_key_builder_spec.rb +28 -26
  21. data/spec/collation/tailoring_spec.rb +1 -1
  22. data/spec/collation/trie_builder_spec.rb +16 -16
  23. data/spec/collation/trie_dumps_spec.rb +2 -2
  24. data/spec/collation/trie_loader_spec.rb +8 -8
  25. data/spec/collation/trie_spec.rb +61 -61
  26. data/spec/collation/trie_with_fallback_spec.rb +5 -5
  27. data/spec/core_ext_spec.rb +1 -1
  28. data/spec/data_readers/additional_date_format_selector_spec.rb +38 -38
  29. data/spec/data_readers/date_time_data_reader_spec.rb +2 -2
  30. data/spec/data_readers/number_data_reader_spec.rb +1 -1
  31. data/spec/formatters/calendars/datetime_formatter_spec.rb +218 -218
  32. data/spec/formatters/list_formatter_spec.rb +8 -8
  33. data/spec/formatters/numbers/abbreviated/abbreviated_number_formatter_spec.rb +14 -14
  34. data/spec/formatters/numbers/abbreviated/long_decimal_formatter_spec.rb +4 -4
  35. data/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb +4 -4
  36. data/spec/formatters/numbers/currency_formatter_spec.rb +11 -11
  37. data/spec/formatters/numbers/decimal_formatter_spec.rb +3 -3
  38. data/spec/formatters/numbers/helpers/fraction_spec.rb +3 -3
  39. data/spec/formatters/numbers/helpers/integer_spec.rb +16 -16
  40. data/spec/formatters/numbers/number_formatter_spec.rb +21 -21
  41. data/spec/formatters/numbers/percent_formatter_spec.rb +3 -3
  42. data/spec/formatters/numbers/rbnf/rbnf_spec.rb +2 -2
  43. data/spec/formatters/plurals/plural_formatter_spec.rb +41 -41
  44. data/spec/formatters/plurals/rules_spec.rb +13 -13
  45. data/spec/localized/localized_array_spec.rb +12 -12
  46. data/spec/localized/localized_date_spec.rb +33 -33
  47. data/spec/localized/localized_datetime_spec.rb +11 -11
  48. data/spec/localized/localized_hash_spec.rb +4 -4
  49. data/spec/localized/localized_number_spec.rb +36 -36
  50. data/spec/localized/localized_object_spec.rb +8 -8
  51. data/spec/localized/localized_string_spec.rb +53 -53
  52. data/spec/localized/localized_symbol_spec.rb +9 -9
  53. data/spec/localized/localized_time_spec.rb +10 -10
  54. data/spec/localized/localized_timespan_spec.rb +8 -8
  55. data/spec/normalization_spec.rb +6 -6
  56. data/spec/parsers/number_parser_spec.rb +36 -36
  57. data/spec/parsers/parser_spec.rb +5 -5
  58. data/spec/parsers/segmentation_parser_spec.rb +19 -19
  59. data/spec/parsers/symbol_table_spec.rb +4 -4
  60. data/spec/parsers/unicode_regex/character_class_spec.rb +19 -19
  61. data/spec/parsers/unicode_regex/character_range_spec.rb +1 -1
  62. data/spec/parsers/unicode_regex/character_set_spec.rb +8 -8
  63. data/spec/parsers/unicode_regex/literal_spec.rb +5 -5
  64. data/spec/parsers/unicode_regex/unicode_string_spec.rb +2 -2
  65. data/spec/parsers/unicode_regex_parser_spec.rb +28 -28
  66. data/spec/resources/loader_spec.rb +32 -32
  67. data/spec/shared/break_iterator_spec.rb +13 -13
  68. data/spec/shared/calendar_spec.rb +59 -59
  69. data/spec/shared/casefolder_spec.rb +5 -5
  70. data/spec/shared/code_point_spec.rb +46 -46
  71. data/spec/shared/currencies_spec.rb +7 -7
  72. data/spec/shared/language_codes_spec.rb +34 -34
  73. data/spec/shared/languages_spec.rb +30 -30
  74. data/spec/shared/numbering_system_spec.rb +7 -7
  75. data/spec/shared/numbers_spec.rb +4 -4
  76. data/spec/shared/phone_codes_spec.rb +7 -7
  77. data/spec/shared/postal_code_generator_spec.rb +76 -0
  78. data/spec/shared/postal_codes_spec.rb +35 -29
  79. data/spec/shared/territories_spec.rb +40 -40
  80. data/spec/shared/unicode_regex_spec.rb +71 -71
  81. data/spec/spec_helper.rb +2 -2
  82. data/spec/tokenizers/calendars/date_tokenizer_spec.rb +1 -1
  83. data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +6 -6
  84. data/spec/tokenizers/composite_token_spec.rb +3 -3
  85. data/spec/tokenizers/token_spec.rb +3 -3
  86. data/spec/twitter_cldr_spec.rb +72 -72
  87. data/spec/utils/code_points_spec.rb +10 -10
  88. data/spec/utils/interpolation_spec.rb +32 -32
  89. data/spec/utils/range_set_spec.rb +36 -36
  90. data/spec/utils/regexp_ast_spec.rb +44 -0
  91. data/spec/utils/regexp_sampler_spec.rb +182 -0
  92. data/spec/utils/yaml/yaml_spec.rb +23 -23
  93. data/spec/utils_spec.rb +19 -19
  94. metadata +263 -258
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d66b9de7cc80964cf816c2981bf6aac51ec26134
4
- data.tar.gz: e5039d2b57b98ea896830872a6d6e2ea72229d9f
3
+ metadata.gz: a4b16ad78f734a596fa4488a61e58b0e971cf2ce
4
+ data.tar.gz: 1179820a11c171adab4e841cf14efe0e6534a6a7
5
5
  SHA512:
6
- metadata.gz: ba0506579dd20f7d92d480c4d46834bb25eae55a622e3bb28ffdb59cc3039a9c74f7571072a4b38686620e56b7a7c8fccade116a9ce4cfd5e16c269ad6819c66
7
- data.tar.gz: 06a830ff2fa877e6ee2e98d0d3acbfced0e7529181418c247e56b07980750acd364cbdc93fbf247218f022ee3557fdae0b5f92f228e2cca1abae52fffb363ee2
6
+ metadata.gz: e4268cc2df5dda027c6b3d2fad2398a0ac5f0597fc620931f0e86cc4dabc775a3d6d0b731290a042ec1114f088244bbd8cc457f2a36066ba8c7c5d8b2810f0b3
7
+ data.tar.gz: 635c217f690dabc37991f4eff3d215e1312c66a543b2aa7e665e0e3469f0cd10bd37fee2452901ba8c5665f331b0cac2cd059a93f451f03f9118dfc1b97e1217
data/Gemfile CHANGED
@@ -14,6 +14,8 @@ group :development, :test do
14
14
  if RUBY_VERSION <= "1.8.7"
15
15
  gem 'oniguruma'
16
16
  end
17
+
18
+ gem 'regexp_parser', '~> 0.1.5'
17
19
  end
18
20
 
19
21
  group :development do
@@ -25,8 +27,8 @@ group :development do
25
27
  end
26
28
 
27
29
  group :test do
28
- gem 'rspec', '~> 2.11.0'
29
- gem 'rr', '~> 1.0.4'
30
+ gem 'rspec', '~> 2.14.0'
31
+ gem 'rr', '~> 1.1.2'
30
32
 
31
33
  if RUBY_VERSION >= "1.9"
32
34
  gem 'rubyzip'
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ == 3.0.2
2
+
3
+ * Adding ability to generate sample postal codes from their regexes.
4
+
1
5
  == 3.0.1
2
6
 
3
7
  * Fixing abbreviated timespan formats for en-GB (backport from 2.4.3).
data/README.md CHANGED
@@ -439,17 +439,21 @@ The CLDR contains postal code validation regexes for a number of countries.
439
439
 
440
440
  ```ruby
441
441
  # United States
442
- TwitterCldr::Shared::PostalCodes.valid?(:us, "94103") # true
443
- TwitterCldr::Shared::PostalCodes.valid?(:us, "9410") # false
442
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
443
+ postal_code.valid?("94103") # true
444
+ postal_code.valid?("9410") # false
444
445
 
445
446
  # England (Great Britain)
446
- TwitterCldr::Shared::PostalCodes.valid?(:gb, "BS98 1TL") # true
447
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:gb)
448
+ postal_code.valid?("BS98 1TL") # true
447
449
 
448
450
  # Sweden
449
- TwitterCldr::Shared::PostalCodes.valid?(:se, "280 12") # true
451
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:se)
452
+ postal_code.valid?("280 12") # true
450
453
 
451
454
  # Canada
452
- TwitterCldr::Shared::PostalCodes.valid?(:ca, "V3H 1Z7") # true
455
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:ca)
456
+ postal_code.valid?("V3H 1Z7") # true
453
457
  ```
454
458
 
455
459
  Get a list of supported territories by using the `#territories` method:
@@ -461,7 +465,14 @@ TwitterCldr::Shared::PostalCodes.territories # [:ad, :am, :ar, :as, :at, ... ]
461
465
  Just want the regex? No problem:
462
466
 
463
467
  ```ruby
464
- TwitterCldr::Shared::PostalCodes.regex_for_territory(:us) # /\d{5}([ \-]\d{4})?/
468
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
469
+ postal_code.regexp # /\d{5}([ \-]\d{4})?/
470
+ ```
471
+
472
+ Get a sample of valid postal codes with the `#sample` method:
473
+
474
+ ```ruby
475
+ postal_code.sample(5) # ["83526", "31748-8754", "55851", "25788-4914", "55335"]
465
476
  ```
466
477
 
467
478
  ### Phone Codes
@@ -35,7 +35,18 @@ module TwitterCldr
35
35
 
36
36
  postal_codes = Hash[postal_codes.sort_by(&:first)]
37
37
 
38
- File.open(File.join(@output_path, 'postal_codes.yml'), 'w') { |output| output.write(YAML.dump(postal_codes)) }
38
+ postal_codes = postal_codes.each_with_object({}) do |(territory, regex), memo|
39
+ memo[territory] = {
40
+ :regex => regex,
41
+ :ast => TwitterCldr::Utils::RegexpAst.dump(
42
+ RegexpAstGenerator.generate(regex.source)
43
+ )
44
+ }
45
+ end
46
+
47
+ File.open(File.join(@output_path, 'postal_codes.yml'), 'w') do |output|
48
+ output.write(YAML.dump(postal_codes))
49
+ end
39
50
  end
40
51
 
41
52
  end
@@ -0,0 +1,41 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'regexp_parser'
7
+
8
+ module TwitterCldr
9
+ module Resources
10
+
11
+ class RegexpAstGenerator
12
+ class << self
13
+
14
+ def generate(regexp_str)
15
+ tree = Regexp::Parser.parse(regexp_str)
16
+ walk(tree)
17
+ end
18
+
19
+ private
20
+
21
+ def walk(node)
22
+ expressions = if node.respond_to?(:expressions)
23
+ node.expressions.map { |expr| walk(expr) }
24
+ else
25
+ []
26
+ end
27
+
28
+ class_for(node).from_parser_node(node, expressions)
29
+ end
30
+
31
+ def class_for(klass)
32
+ TwitterCldr::Utils::RegexpAst.const_get(
33
+ klass.class.to_s.split("::").last.to_sym
34
+ )
35
+ end
36
+
37
+ end
38
+ end
39
+
40
+ end
41
+ end
@@ -26,5 +26,6 @@ module TwitterCldr
26
26
  autoload :RbnfTestImporter, 'twitter_cldr/resources/rbnf_test_importer'
27
27
  autoload :ReadmeRenderer, 'twitter_cldr/resources/readme_renderer'
28
28
  autoload :CasefolderClassGenerator, 'twitter_cldr/resources/casefolder_class_generator'
29
+ autoload :RegexpAstGenerator, 'twitter_cldr/resources/regexp_ast_generator'
29
30
  end
30
31
  end
@@ -0,0 +1,50 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'set'
7
+
8
+ module TwitterCldr
9
+ module Shared
10
+ class PostalCodeGenerator
11
+
12
+ SAMPLE_MULTIPLIER = 4
13
+
14
+ def initialize(regexp_ast)
15
+ @regexp_generator = TwitterCldr::Utils::RegexpSampler.new(regexp_ast)
16
+ end
17
+
18
+ def generate
19
+ clean_result(@regexp_generator.generate)
20
+ end
21
+
22
+ def sample(sample_size = 1)
23
+ sample_set = Set.new
24
+ counter = 1
25
+
26
+ until sample_set.size == sample_size
27
+ sample = generate
28
+ sample_set << sample unless sample.empty?
29
+ counter += 1
30
+
31
+ # Stop if the number of attempted generations is
32
+ # n times more than requested. Some territories only
33
+ # have one postal code, so if the user asks for 10
34
+ # they'll get an infinite loop.
35
+ break if counter > sample_size * SAMPLE_MULTIPLIER
36
+ end
37
+
38
+ sample_set.to_a
39
+ end
40
+
41
+ private
42
+
43
+ # remove spaces that trail a dash
44
+ def clean_result(str)
45
+ str.gsub(/- /, '-')
46
+ end
47
+
48
+ end
49
+ end
50
+ end
@@ -5,7 +5,10 @@
5
5
 
6
6
  module TwitterCldr
7
7
  module Shared
8
- module PostalCodes
8
+
9
+ class InvalidTerritoryError < StandardError; end
10
+
11
+ class PostalCodes
9
12
 
10
13
  class << self
11
14
 
@@ -13,23 +16,59 @@ module TwitterCldr
13
16
  resource.keys
14
17
  end
15
18
 
16
- def regex_for_territory(territory)
17
- resource[territory.to_s.downcase.to_sym]
18
- end
19
-
20
- def valid?(territory, postal_code)
21
- regexp = regex_for_territory(territory)
22
- !!(regexp && regexp =~ postal_code)
19
+ def for_territory(territory)
20
+ key = territory.to_s.downcase.to_sym
21
+ if res = resource[key]
22
+ territory_cache[key] ||= begin
23
+ new(
24
+ territory,
25
+ res[:regex],
26
+ TwitterCldr::Utils::RegexpAst.load(res[:ast])
27
+ )
28
+ end
29
+ else
30
+ raise InvalidTerritoryError, "invalid territory"
31
+ end
23
32
  end
24
33
 
25
34
  private
26
35
 
36
+ def territory_cache
37
+ @territory_cache ||= {}
38
+ end
39
+
27
40
  def resource
28
41
  @resource ||= TwitterCldr.get_resource(:shared, :postal_codes)
29
42
  end
30
43
 
31
44
  end
32
45
 
46
+ attr_reader :territory, :regexp, :ast
47
+
48
+ def initialize(territory, regexp, ast)
49
+ @territory = territory
50
+ @regexp = regexp
51
+ @ast = ast
52
+ end
53
+
54
+ def valid?(postal_code)
55
+ !!(regexp && regexp =~ postal_code)
56
+ end
57
+
58
+ def sample(sample_size = 1)
59
+ generator.sample(sample_size)
60
+ end
61
+
62
+ private
63
+
64
+ def generator
65
+ generator_cache[territory] ||= PostalCodeGenerator.new(ast)
66
+ end
67
+
68
+ def generator_cache
69
+ @@generator_cache ||= {}
70
+ end
71
+
33
72
  end
34
73
  end
35
- end
74
+ end
@@ -5,19 +5,20 @@
5
5
 
6
6
  module TwitterCldr
7
7
  module Shared
8
- autoload :Calendar, 'twitter_cldr/shared/calendar'
9
- autoload :CodePoint, 'twitter_cldr/shared/code_point'
10
- autoload :Currencies, 'twitter_cldr/shared/currencies'
11
- autoload :LanguageCodes, 'twitter_cldr/shared/language_codes'
12
- autoload :Languages, 'twitter_cldr/shared/languages'
13
- autoload :Numbers, 'twitter_cldr/shared/numbers'
14
- autoload :PhoneCodes, 'twitter_cldr/shared/phone_codes'
15
- autoload :PostalCodes, 'twitter_cldr/shared/postal_codes'
16
- autoload :Bidi, 'twitter_cldr/shared/bidi'
17
- autoload :Territories, 'twitter_cldr/shared/territories'
18
- autoload :NumberingSystem, 'twitter_cldr/shared/numbering_system'
19
- autoload :Casefolder, 'twitter_cldr/shared/casefolder'
20
- autoload :UnicodeRegex, 'twitter_cldr/shared/unicode_regex'
21
- autoload :BreakIterator, 'twitter_cldr/shared/break_iterator'
8
+ autoload :Calendar, 'twitter_cldr/shared/calendar'
9
+ autoload :CodePoint, 'twitter_cldr/shared/code_point'
10
+ autoload :Currencies, 'twitter_cldr/shared/currencies'
11
+ autoload :LanguageCodes, 'twitter_cldr/shared/language_codes'
12
+ autoload :Languages, 'twitter_cldr/shared/languages'
13
+ autoload :Numbers, 'twitter_cldr/shared/numbers'
14
+ autoload :PhoneCodes, 'twitter_cldr/shared/phone_codes'
15
+ autoload :PostalCodes, 'twitter_cldr/shared/postal_codes'
16
+ autoload :PostalCodeGenerator, 'twitter_cldr/shared/postal_code_generator'
17
+ autoload :Bidi, 'twitter_cldr/shared/bidi'
18
+ autoload :Territories, 'twitter_cldr/shared/territories'
19
+ autoload :NumberingSystem, 'twitter_cldr/shared/numbering_system'
20
+ autoload :Casefolder, 'twitter_cldr/shared/casefolder'
21
+ autoload :UnicodeRegex, 'twitter_cldr/shared/unicode_regex'
22
+ autoload :BreakIterator, 'twitter_cldr/shared/break_iterator'
22
23
  end
23
24
  end
@@ -0,0 +1,115 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'base64'
7
+
8
+ module TwitterCldr
9
+ module Utils
10
+ module RegexpAst
11
+
12
+ def self.load(ast_str)
13
+ Marshal.load(Base64.decode64(ast_str))
14
+ end
15
+
16
+ def self.dump(ast)
17
+ Base64.encode64(Marshal.dump(ast))
18
+ end
19
+
20
+ class Node
21
+ attr_reader :expressions, :quantifier
22
+
23
+ def initialize(expressions, quantifier)
24
+ @expressions = expressions
25
+ @quantifier = quantifier
26
+ end
27
+
28
+ def quantified?
29
+ !!quantifier
30
+ end
31
+
32
+ def self.from_parser_node(node, expressions)
33
+ new(expressions, Quantifier.from_parser_node(node))
34
+ end
35
+ end
36
+
37
+ class CharacterSet < Node
38
+ attr_reader :members, :negated
39
+ alias :negated? :negated
40
+
41
+ def initialize(expressions, quantifier, members, negated)
42
+ @members = members; @negated = negated
43
+ super(expressions, quantifier)
44
+ end
45
+
46
+ def self.from_parser_node(node, expressions)
47
+ new(
48
+ expressions, Quantifier.from_parser_node(node),
49
+ fix_members(node.members), node.negative?
50
+ )
51
+ end
52
+
53
+ private
54
+
55
+ # CLDR occasionally uses \d and other escapes in character classes
56
+ # to signify 0-9 and friends. This is legal regex syntax, but the
57
+ # regexp_parser gem doesn't handle it correctly, so we have to
58
+ # repair things here.
59
+ def self.fix_members(members)
60
+ members.join.scan(/(\\[wd]|\w-\w|\w|-)/).to_a.flatten.inject([]) do |ret, member|
61
+ case member
62
+ when '\d' then ret << '0-9'
63
+ when '\w' then ret += ['A-Z', 'a-z', '0-9', '_']
64
+ else ret << member
65
+ end
66
+
67
+ ret
68
+ end
69
+ end
70
+ end
71
+
72
+ class Literal < Node
73
+ attr_reader :text
74
+
75
+ def initialize(expressions, quantifier, text)
76
+ @text = text
77
+ super(expressions, quantifier)
78
+ end
79
+
80
+ def self.from_parser_node(node, expressions)
81
+ new(
82
+ expressions, Quantifier.from_parser_node(node), node.text
83
+ )
84
+ end
85
+ end
86
+
87
+ class Quantifier
88
+ attr_reader :max, :min
89
+
90
+ def initialize(max, min)
91
+ @max = max; @min = min
92
+ end
93
+
94
+ def self.from_parser_node(node)
95
+ if node.quantifier
96
+ new(
97
+ node.quantifier.max,
98
+ node.quantifier.min
99
+ )
100
+ end
101
+ end
102
+ end
103
+
104
+ class EscapeSequence < Literal; end
105
+ class Word < Node; end
106
+ class Digit < Node; end
107
+ class Sequence < Node; end
108
+ class Alternation < Node; end
109
+ class Capture < Node; end
110
+ class Passive < Node; end
111
+ class Root < Node; end
112
+
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,149 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Utils
8
+
9
+ # Generates a valid string that would match the given regexp ast.
10
+ class RegexpSampler
11
+
12
+ attr_reader :regexp_ast
13
+
14
+ DIGITS = ('0'..'9').to_a
15
+ WORD_LETTERS = ('a'..'z').to_a + ('A'..'Z').to_a + ['_']
16
+
17
+ def initialize(regexp_ast)
18
+ @regexp_ast = regexp_ast
19
+ end
20
+
21
+ def generate
22
+ walk_children(regexp_ast)
23
+ end
24
+
25
+ private
26
+
27
+ def walk(node)
28
+ method = :"walk_#{class_name_for(node)}"
29
+ puts method unless respond_to?(method, true)
30
+ respond_to?(method, true) ? send(method, node) : ""
31
+ end
32
+
33
+ def walk_children(node)
34
+ node.expressions.map { |expr| walk(expr) }.join
35
+ end
36
+
37
+ def walk_digit(node)
38
+ if node.quantified?
39
+ quantifier_sample(DIGITS, node.quantifier)
40
+ else
41
+ [single_sample(DIGITS)]
42
+ end.join + walk_children(node)
43
+ end
44
+
45
+ def walk_word(node)
46
+ if node.quantified?
47
+ quantifier_sample(WORD_LETTERS, node.quantifier)
48
+ else
49
+ [single_sample(WORD_LETTERS)]
50
+ end.join + walk_children(node)
51
+ end
52
+
53
+ def walk_literal(node)
54
+ node.text * if node.quantified?
55
+ rand_in_quantifier(node.quantifier)
56
+ else
57
+ 1
58
+ end + walk_children(node)
59
+ end
60
+
61
+ def walk_character_set(node)
62
+ charset = expand_charset(node.members)
63
+
64
+ if node.quantified?
65
+ quantifier_sample(charset, node.quantifier)
66
+ else
67
+ [single_sample(charset)]
68
+ end.join + walk_children(node)
69
+ end
70
+
71
+ def walk_capture(node)
72
+ if node.quantified?
73
+ rand_in_quantifier(node.quantifier).times.map do
74
+ walk_children(node)
75
+ end.join
76
+ else
77
+ walk_children(node)
78
+ end
79
+ end
80
+
81
+ # "passive" means non-capturing group.
82
+ # Since we don't need to distinguish between
83
+ # captures/non-captures, we can just delegate
84
+ # to the walk_capture method.
85
+ def walk_passive(node)
86
+ walk_capture(node)
87
+ end
88
+
89
+ def walk_alternation(node)
90
+ if node.quantified?
91
+ rand_in_quantifier(node.quantifier).times.map do
92
+ walk(single_sample(node.expressions))
93
+ end.join
94
+ else
95
+ walk(single_sample(node.expressions))
96
+ end
97
+ end
98
+
99
+ def walk_sequence(node)
100
+ if node.quantified?
101
+ rand_in_quantifier(node.quantifier).times.map do
102
+ node.expressions.map { |expr| walk(expr) }.join
103
+ end.join
104
+ else
105
+ node.expressions.map { |expr| walk(expr) }.join
106
+ end
107
+ end
108
+
109
+ def expand_charset(members)
110
+ members.inject([]) do |ret, member|
111
+ ret + expand_charset_member(member)
112
+ end
113
+ end
114
+
115
+ def expand_charset_member(member)
116
+ left, right = member.scan(/([^\\])-?/).flatten
117
+ right ? (left..right).to_a : [left]
118
+ end
119
+
120
+ def quantifier_sample(arr, quantifier)
121
+ sample_size = if quantifier.min == quantifier.max
122
+ quantifier.min
123
+ else
124
+ rand_in_quantifier(quantifier)
125
+ end
126
+
127
+ sample_size.times.map { single_sample(arr) }
128
+ end
129
+
130
+ def single_sample(arr)
131
+ arr[rand(arr.size)]
132
+ end
133
+
134
+ def rand_in_quantifier(quantifier)
135
+ rand_in_range(quantifier.min, quantifier.max)
136
+ end
137
+
138
+ def rand_in_range(min, max)
139
+ min + rand((max - min) + 1)
140
+ end
141
+
142
+ def class_name_for(node)
143
+ name = node.class.to_s.split("::").last
144
+ name.gsub(/\A|([A-Z])/) { $1 ? "_#{$1.downcase}" : "" }.downcase
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -6,9 +6,11 @@
6
6
  module TwitterCldr
7
7
  module Utils
8
8
 
9
- autoload :CodePoints, 'twitter_cldr/utils/code_points'
10
- autoload :YAML, 'twitter_cldr/utils/yaml'
11
- autoload :RangeSet, 'twitter_cldr/utils/range_set'
9
+ autoload :CodePoints, 'twitter_cldr/utils/code_points'
10
+ autoload :YAML, 'twitter_cldr/utils/yaml'
11
+ autoload :RangeSet, 'twitter_cldr/utils/range_set'
12
+ autoload :RegexpAst, 'twitter_cldr/utils/regexp_ast'
13
+ autoload :RegexpSampler, 'twitter_cldr/utils/regexp_sampler'
12
14
 
13
15
  class << self
14
16
 
@@ -4,5 +4,5 @@
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
6
  module TwitterCldr
7
- VERSION = "3.0.1"
7
+ VERSION = "3.0.2"
8
8
  end