LittleWeasel 3.0.3 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +3 -0
  3. data/.reek.yml +17 -0
  4. data/.rspec +4 -2
  5. data/.rubocop.yml +187 -0
  6. data/.ruby-version +1 -1
  7. data/.yardopts +2 -0
  8. data/CHANGELOG.md +22 -1
  9. data/Gemfile +3 -1
  10. data/Jenkinsfile +20 -0
  11. data/LittleWeasel.gemspec +31 -18
  12. data/README.md +408 -42
  13. data/Rakefile +296 -3
  14. data/lib/LittleWeasel/block_results.rb +81 -0
  15. data/lib/LittleWeasel/configure.rb +98 -0
  16. data/lib/LittleWeasel/dictionary.rb +125 -0
  17. data/lib/LittleWeasel/dictionary_key.rb +48 -0
  18. data/lib/LittleWeasel/dictionary_manager.rb +91 -0
  19. data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
  20. data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
  21. data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
  22. data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
  23. data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
  24. data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
  25. data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
  26. data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
  27. data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
  28. data/lib/LittleWeasel/filters/word_filter.rb +59 -0
  29. data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
  30. data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
  31. data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
  32. data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
  33. data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
  34. data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
  35. data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
  36. data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
  37. data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
  38. data/lib/LittleWeasel/metadata/metadatable.rb +134 -0
  39. data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
  40. data/lib/LittleWeasel/modules/configurable.rb +26 -0
  41. data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
  42. data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
  43. data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
  44. data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +18 -0
  45. data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
  46. data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
  47. data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +17 -0
  48. data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
  49. data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
  50. data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +15 -0
  51. data/lib/LittleWeasel/modules/dictionary_source_validatable.rb +15 -0
  52. data/lib/LittleWeasel/modules/dictionary_sourceable.rb +86 -0
  53. data/lib/LittleWeasel/modules/dictionary_validatable.rb +18 -0
  54. data/lib/LittleWeasel/modules/language.rb +24 -0
  55. data/lib/LittleWeasel/modules/language_validatable.rb +14 -0
  56. data/lib/LittleWeasel/modules/locale.rb +23 -0
  57. data/lib/LittleWeasel/modules/order_validatable.rb +16 -0
  58. data/lib/LittleWeasel/modules/orderable.rb +17 -0
  59. data/lib/LittleWeasel/modules/region.rb +24 -0
  60. data/lib/LittleWeasel/modules/region_validatable.rb +14 -0
  61. data/lib/LittleWeasel/modules/tag_validatable.rb +14 -0
  62. data/lib/LittleWeasel/modules/taggable.rb +31 -0
  63. data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
  64. data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
  65. data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +29 -0
  66. data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +56 -0
  67. data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +59 -0
  68. data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +28 -0
  69. data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
  70. data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +123 -0
  71. data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
  72. data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
  73. data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
  74. data/lib/LittleWeasel/services/dictionary_cache_service.rb +211 -0
  75. data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
  76. data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
  77. data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
  78. data/lib/LittleWeasel/services/dictionary_metadata_service.rb +116 -0
  79. data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
  80. data/lib/LittleWeasel/version.rb +3 -1
  81. data/lib/LittleWeasel/word_results.rb +146 -0
  82. data/lib/LittleWeasel.rb +5 -184
  83. data/spec/factories/dictionary.rb +43 -0
  84. data/spec/factories/dictionary_cache_service.rb +95 -0
  85. data/spec/factories/dictionary_creator_service.rb +16 -0
  86. data/spec/factories/dictionary_file_loader_service.rb +13 -0
  87. data/spec/factories/dictionary_hash.rb +39 -0
  88. data/spec/factories/dictionary_key.rb +14 -0
  89. data/spec/factories/dictionary_killer_service.rb +14 -0
  90. data/spec/factories/dictionary_manager.rb +10 -0
  91. data/spec/factories/dictionary_metadata.rb +16 -0
  92. data/spec/factories/dictionary_metadata_service.rb +16 -0
  93. data/spec/factories/numeric_filter.rb +12 -0
  94. data/spec/factories/preprocessed_word.rb +16 -0
  95. data/spec/factories/preprocessed_words.rb +41 -0
  96. data/spec/factories/single_character_word_filter.rb +12 -0
  97. data/spec/factories/word_results.rb +16 -0
  98. data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
  99. data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
  100. data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
  101. data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +166 -0
  102. data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
  103. data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
  104. data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
  105. data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
  106. data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
  107. data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
  108. data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
  109. data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
  110. data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
  111. data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
  112. data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
  113. data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
  114. data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
  115. data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
  116. data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
  117. data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
  118. data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +81 -0
  119. data/spec/lib/LittleWeasel/modules/language_spec.rb +112 -0
  120. data/spec/lib/LittleWeasel/modules/locale_spec.rb +95 -0
  121. data/spec/lib/LittleWeasel/modules/region_spec.rb +112 -0
  122. data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
  123. data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
  124. data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
  125. data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
  126. data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
  127. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +242 -0
  128. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +218 -0
  129. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
  130. data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
  131. data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
  132. data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
  133. data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
  134. data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
  135. data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
  136. data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
  137. data/spec/spec_helper.rb +117 -6
  138. data/spec/support/factory_bot.rb +15 -0
  139. data/spec/support/file_helpers.rb +46 -0
  140. data/spec/support/files/empty-dictionary.txt +0 -0
  141. data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
  142. data/spec/support/files/en-US-tagged.txt +26 -0
  143. data/spec/support/files/en-US.txt +26 -0
  144. data/spec/support/files/en.txt +26 -0
  145. data/spec/support/files/es-ES.txt +27 -0
  146. data/spec/support/files/es.txt +27 -0
  147. data/spec/support/general_helpers.rb +68 -0
  148. data/spec/support/shared_contexts.rb +107 -0
  149. data/spec/support/shared_examples.rb +105 -0
  150. metadata +378 -38
  151. data/spec/checker/checker_spec.rb +0 -286
data/Rakefile CHANGED
@@ -1,13 +1,306 @@
1
- require "bundler/gem_tasks"
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/core_ext/object/try.rb'
4
+ require 'active_support/inflector'
5
+ require 'benchmark/ips'
6
+ require 'bundler/gem_tasks'
7
+ require 'pry'
8
+
9
+ require_relative 'lib/LittleWeasel'
10
+ require_relative 'spec/support/file_helpers'
11
+ require_relative 'spec/support/general_helpers'
12
+
13
+ DictionaryResultsHelpers = Support::GeneralHelpers::DictionaryResultsHelpers
14
+
15
+ def file_from(dictionary_key)
16
+ Support::FileHelpers.dictionary_path_for(file_name: dictionary_key.key)
17
+ end
2
18
 
3
19
  begin
4
20
  require 'rspec/core/rake_task'
5
21
  RSpec::Core::RakeTask.new(:spec)
6
22
  rescue LoadError => e
7
- task "spec" do
23
+ task 'spec' do
8
24
  puts "RSpec not loaded - make sure it's installed and you're using bundle exec"
9
25
  exit 1
10
26
  end
11
27
  end
12
28
 
13
- task :default => :spec
29
+ #
30
+ # Tasks related to the #word_results API
31
+
32
+ namespace 'word_results' do
33
+ # Creates a dictionary from a file on disk
34
+ task :basic do
35
+ LittleWeasel.configure do |config|
36
+ # TODO: Configure as needed here.
37
+ end
38
+
39
+ # Create a Dictionary Manager.
40
+ dictionary_manager = LittleWeasel::DictionaryManager.new
41
+
42
+ # Create our unique key for the dictionary.
43
+ en_us_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us)
44
+
45
+ file = Support::FileHelpers.dictionary_path_for file_name: en_us_key.key
46
+
47
+ # Create a dictionary of names from memory.
48
+ en_us_names_dictionary = dictionary_manager.create_dictionary_from_file(
49
+ dictionary_key: en_us_key,
50
+ file: file)
51
+
52
+ # Get some word results...
53
+
54
+ # Get results for a word we know exists.
55
+ word = 'apple'
56
+ word_results = en_us_names_dictionary.word_results word
57
+ DictionaryResultsHelpers.print_word_results word, word_results, "found (#{word} is in the dictionary)"
58
+
59
+ # Get results for a word we know DOES NOT exist.
60
+ word = 'dapple'
61
+ word_results = en_us_names_dictionary.word_results word
62
+ DictionaryResultsHelpers.print_word_results word, word_results, "not found (#{word} is not in the dictionary)"
63
+ rescue StandardError => e
64
+ task 'word_results:basic' do
65
+ puts "LittleWeasel task word_results:basic not loaded: #{e.message}"
66
+ exit 1
67
+ end
68
+ end
69
+
70
+ # Creates a dictionary of names from memory
71
+ task :from_memory do
72
+ LittleWeasel.configure do |config|
73
+ # TODO: Configure as needed here.
74
+ end
75
+
76
+ # Create a Dictionary Manager.
77
+ dictionary_manager = LittleWeasel::DictionaryManager.new
78
+
79
+ # Create our unique key for the dictionary.
80
+ en_us_names_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :names)
81
+
82
+ # Create a dictionary of names from memory.
83
+ en_us_names_dictionary = dictionary_manager.create_dictionary_from_memory(
84
+ dictionary_key: en_us_names_key, dictionary_words: %w(Abel Bartholomew Cain Deborah Elijah))
85
+
86
+ # Get some word results...
87
+
88
+ # Get results for a name we know exists.
89
+ word = 'Abel'
90
+ word_results = en_us_names_dictionary.word_results word
91
+ DictionaryResultsHelpers.print_word_results word, word_results, "found (#{word} is in the dictionary)"
92
+
93
+ # Get results for a name we know DOES NOT exist.
94
+ word = 'Henry'
95
+ word_results = en_us_names_dictionary.word_results word
96
+ DictionaryResultsHelpers.print_word_results word, word_results, "not found (#{word} is not in the dictionary)"
97
+ rescue StandardError => e
98
+ task 'word_results:from_memory' do
99
+ puts "LittleWeasel task word_results:from_memory not loaded: #{e.message}"
100
+ exit 1
101
+ end
102
+ end
103
+
104
+ # Shows application of word filters and word preprocessors.
105
+ task :advanced do
106
+ LittleWeasel.configure do |config|
107
+ # TODO: Configure as needed here.
108
+ end
109
+
110
+ # Create a Dictionary Manager.
111
+ dictionary_manager = LittleWeasel::DictionaryManager.new
112
+
113
+ # Create our unique key for the dictionary.
114
+ en_us_names_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :names)
115
+
116
+ # Create a Henry word filter.
117
+ class HenryFilter < LittleWeasel::Filters::WordFilter
118
+ class << self
119
+ def filter_match?(word)
120
+ word== 'Henry'
121
+ end
122
+ end
123
+ end
124
+ word_filters = [HenryFilter.new]
125
+
126
+ # Add a word preprocessor.
127
+ word_preprocessors = [LittleWeasel::Preprocessors::EnUs::CapitalizePreprocessor.new]
128
+
129
+ # Create a dictionary of names from memory.
130
+ en_us_names_dictionary = dictionary_manager.create_dictionary_from_memory(
131
+ dictionary_key: en_us_names_key,
132
+ dictionary_words: %w(Abel Bartholomew Cain Deborah Elijah),
133
+ word_filters: word_filters,
134
+ word_preprocessors: word_preprocessors)
135
+
136
+ puts '# Turning off our word filters and word preprocessors to start...'
137
+ puts
138
+
139
+ en_us_names_dictionary.filters_on = false
140
+ en_us_names_dictionary.preprocessors_on = false
141
+
142
+ # Get results for a name we know DOES NOT exist.
143
+ word = 'Henry'
144
+ word_results = en_us_names_dictionary.word_results word
145
+ DictionaryResultsHelpers.print_word_results word, word_results, "not found, #success? == false, word_valid? == false (#{word} is not in the dictionary)"
146
+
147
+ puts '# Turning word filters on...'
148
+ puts
149
+
150
+ en_us_names_dictionary.filters_on = true
151
+
152
+ # Get results for Henry again - it should be found due to the filter.
153
+ word = 'Henry'
154
+ word_results = en_us_names_dictionary.word_results word
155
+ DictionaryResultsHelpers.print_word_results word, word_results, '#success? == true due to the HenryFilter'
156
+
157
+ # Get results for a name we know DOES NOT exist.
158
+ word = 'henry'
159
+ word_results = en_us_names_dictionary.word_results word
160
+ DictionaryResultsHelpers.print_word_results word, word_results, "not found, #success? == false (#{word} is not in the dictionary and henry is lower case, no filter match)"
161
+
162
+ puts '# Turning preprocessors on so that henry is converted to Henry '
163
+ puts "# and consequently, the filter will match..."
164
+ puts
165
+
166
+ en_us_names_dictionary.preprocessors_on = true
167
+
168
+ word = 'henry'
169
+ word_results = en_us_names_dictionary.word_results word
170
+ DictionaryResultsHelpers.print_word_results word, word_results, "#success? == true, #filter_match? == true (#{word} is not in the dictionary but the word preprocessor and word filter work together to get a filter match and consider the name valid)"
171
+ rescue StandardError => e
172
+ task 'word_results:advanced' do
173
+ puts "LittleWeasel task word_results:advanced not loaded: #{e.message}"
174
+ exit 1
175
+ end
176
+ end
177
+
178
+ task :word_filters do
179
+ LittleWeasel.configure do |config|
180
+ # TODO: Configure as needed here.
181
+ end
182
+ dictionary_manager = LittleWeasel::DictionaryManager.new
183
+ dictionary_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us)
184
+ file = Support::FileHelpers.dictionary_path_for file_name: dictionary_key.key
185
+ word_filters = [
186
+ LittleWeasel::Filters::EnUs::NumericFilter.new,
187
+ LittleWeasel::Filters::EnUs::CurrencyFilter.new,
188
+ LittleWeasel::Filters::EnUs::SingleCharacterWordFilter.new
189
+ ]
190
+ word_preprocessors = nil
191
+ dictionary_words = Support::FileHelpers.dictionary_words_for dictionary_file_path: file
192
+ dictionary = dictionary_manager.create_dictionary_from_memory(dictionary_key: dictionary_key, dictionary_words: dictionary_words, word_filters: word_filters, word_preprocessors: word_preprocessors)
193
+ dictionary_words << 'A'.dup
194
+ dictionary_words << 'I'.dup
195
+ dictionary_words << '1000'.dup
196
+ dictionary_words << '1,000'.dup
197
+ dictionary_words << '10,000.00'.dup
198
+ dictionary_words << '+100.00'.dup
199
+ dictionary_words << '-200,000.00'.dup
200
+ dictionary_words << '$100,000'.dup
201
+ dictionary_words << '+$100,000,000.10'.dup
202
+ dictionary_words << '-$999,000,000.10'.dup
203
+ dictionary_words.each do |word|
204
+ word.strip!
205
+ word_results = dictionary.word_results word
206
+ DictionaryResultsHelpers.print_word_results word, word_results
207
+ end
208
+ rescue StandardError => e
209
+ task 'word_results:word_filters' do
210
+ puts "LittleWeasel task word_results:word_filters not loaded: #{e.message}"
211
+ exit 1
212
+ end
213
+ end
214
+ end
215
+
216
+ #
217
+ # Tasks related to the #block_results API
218
+
219
+ namespace 'block_results' do
220
+ task :basic do
221
+ LittleWeasel.configure do |config|
222
+ # TODO: Configure as needed here.
223
+ end
224
+
225
+ # Create a Dictionary Manager.
226
+ dictionary_manager = LittleWeasel::DictionaryManager.new
227
+
228
+ # Create our unique key for the dictionary.
229
+ en_us_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :big)
230
+
231
+ # Create a dictionary from a file on disk. The below assumes the
232
+ # dictionary file name matches the dictionary key (e.g. en-US-big).
233
+ en_us_dictionary = dictionary_manager.create_dictionary_from_file(
234
+ dictionary_key: en_us_key, file: file_from(en_us_key))
235
+
236
+ word_block = "This is a word-block of 8 words and 2 numbers."
237
+
238
+ # Add a word filter so that numbers are considered valid.
239
+ en_us_dictionary.add_filters word_filters: [
240
+ LittleWeasel::Filters::EnUs::NumericFilter.new
241
+ ]
242
+
243
+ block_results = en_us_dictionary.block_results word_block
244
+
245
+ # Returns a LittleWeasel::BlockResults object.
246
+ DictionaryResultsHelpers.print_block_results word_block, block_results
247
+ rescue StandardError => e
248
+ task 'block_results:basic' do
249
+ puts "LittleWeasel task block_results:basic not loaded: #{e.message}"
250
+ exit 1
251
+ end
252
+ end
253
+ end
254
+
255
+ namespace :bm do
256
+ task :hash do
257
+ STRING_LOCALE = { 'en-US' => 'en-us' }
258
+ SYMBOL_LOCALE = { 'en-US' => :enUS }
259
+
260
+ puts 'String variable vs. normal String.'
261
+ Benchmark.ips do |x|
262
+ string_variable = 'string_variable'
263
+ x.report('string variable') { STRING_LOCALE[string_variable] }
264
+ x.report('normal') { STRING_LOCALE['en-US'] }
265
+ end
266
+
267
+ puts 'String#freeze vs. normal String.'
268
+ Benchmark.ips do |x|
269
+ x.report('freeze') { STRING_LOCALE['en-US'.freeze] }
270
+ x.report('normal') { STRING_LOCALE['en-US'] }
271
+ end
272
+
273
+ puts 'String vs Symbol'
274
+ Benchmark.ips do |x|
275
+ x.report('string') { STRING_LOCALE['en-US'] }
276
+ x.report('symbol') { SYMBOL_LOCALE[:enUS] }
277
+ end
278
+
279
+ puts 'String#freeze vs. Symbol'
280
+ Benchmark.ips do |x|
281
+ x.report('string') { STRING_LOCALE['en-US'.freeze] }
282
+ x.report('symbol') { SYMBOL_LOCALE[:enUS] }
283
+ end
284
+ rescue StandardError => e
285
+ task 'hash' do
286
+ puts "LittleWeasel task bm:hash not loaded: #{e.message}"
287
+ exit 1
288
+ end
289
+ end
290
+
291
+ task :dictionary_key do
292
+ puts 'DictionaryKey test'
293
+ Benchmark.ips do |x|
294
+ x.report('DictionaryKey') do
295
+ DictionaryKey.key(language: :en, region: :us, tag: :tag)
296
+ end
297
+ end
298
+ rescue StandardError => e
299
+ task 'locale' do
300
+ puts "LittleWeasel task bm:dictionary_key not loaded: #{e.message}"
301
+ exit 1
302
+ end
303
+ end
304
+ end
305
+
306
+ task default: :spec
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/core_ext/module/delegation'
4
+ require_relative 'word_results'
5
+
6
+ module LittleWeasel
7
+ # This class represents the results of gathering information about a word
8
+ # block (group of words).
9
+ class BlockResults
10
+ # :reek:Attribute - Ignored, it doesn't make sense to create a formal setter method.
11
+ attr_reader :original_word_block, :word_results
12
+
13
+ def initialize(original_word_block:)
14
+ self.original_word_block = original_word_block
15
+ self.word_results = []
16
+ end
17
+
18
+ def <<(word_result)
19
+ unless word_result.is_a? WordResults
20
+ raise ArgumentError, "Argument word_result is not a WordResults object: #{word_result.class}"
21
+ end
22
+
23
+ word_results << word_result
24
+ end
25
+
26
+ # Calls #success? on all WordResults objects. Returns true if all
27
+ # WordResults return true; false is returned otherwise.
28
+ def success?
29
+ return false unless word_results.present?
30
+
31
+ word_results.all?(&:success?)
32
+ end
33
+
34
+ # Returns true if all WordResults object words are valid (#word_valid?);
35
+ # false otherwise.
36
+ def words_valid?
37
+ return false unless word_results.present?
38
+
39
+ word_results.all?(&:word_valid?)
40
+ end
41
+
42
+ # Returns true if all WordResults object words have filter matches (#filters_match?);
43
+ # false otherwise.
44
+ def filters_match?
45
+ return false unless word_results.present?
46
+
47
+ word_results.all?(&:filter_match?)
48
+ end
49
+
50
+ # Returns true if all WordResults object words have been preprocessed (#preprocessed_words?);
51
+ # false otherwise.
52
+ def preprocessed_words?
53
+ return false unless word_results.present?
54
+
55
+ word_results.all?(&:preprocessed_word?)
56
+ end
57
+
58
+ # Returns an Array of the results of calling
59
+ # #preprocessed_word_or_original_word on all WordResults objects.
60
+
61
+ # Calls #preprocessed_word_or_original_word on all WordResults objects.
62
+ # An Array of the results is returned.
63
+ def preprocessed_words_or_original_words
64
+ return [] unless word_results.present?
65
+
66
+ word_results.map(&:preprocessed_word_or_original_word)
67
+ end
68
+
69
+ # Returns true if all WordResults object words have been cached (#words_cached?);
70
+ # false otherwise.
71
+ def words_cached?
72
+ return false unless word_results.present?
73
+
74
+ word_results.all?(&:word_cached?)
75
+ end
76
+
77
+ private
78
+
79
+ attr_writer :original_word_block, :word_results
80
+ end
81
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This is the configuration for LittleWeasel.
4
+ module LittleWeasel
5
+ class << self
6
+ attr_reader :configuration
7
+
8
+ # Returns the application configuration object.
9
+ #
10
+ # @return [Configuration] the application Configuration object.
11
+ def configure
12
+ self.configuration ||= Configuration.new
13
+ yield(configuration)
14
+ end
15
+
16
+ private
17
+
18
+ attr_writer :configuration
19
+ end
20
+
21
+ # This class encapsulates the configuration properties for this gem and
22
+ # provides methods and attributes that allow for management of the same.
23
+ #
24
+ # attr_reader :max_dictionary_file_megabytes, :max_invalid_words_bytesize, :metadata_observers
25
+ class Configuration
26
+ attr_reader :max_dictionary_file_megabytes,
27
+ :max_invalid_words_bytesize, :metadata_observers, :word_block_regex
28
+
29
+ # The constructor; calls {#reset}.
30
+ def initialize
31
+ reset
32
+ end
33
+
34
+ # Resets the configuration settings to their default values.
35
+ #
36
+ # @return [void]
37
+ def reset
38
+ @max_dictionary_file_megabytes = 5
39
+ @max_invalid_words_bytesize = 25_000
40
+ @metadata_observers = [
41
+ LittleWeasel::Metadata::InvalidWordsMetadata
42
+ ]
43
+ # TODO: Is this the correct regex to use, or is there something better?
44
+ # @word_block_regex = /\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/
45
+ # @word_block_regex = /(?:(?:[\-A-Za-z0-9]|\d(?!\d|\b))+(?:'[\-A-Za-z0-9]+)?)/
46
+ # @word_block_regex = /(?:(?:[\-a-z0-9]|\d(?!\d|\b))+(?:'[\-a-z0-9]+)?)/i
47
+ @word_block_regex = /[[[:word:]]'-]+/
48
+ end
49
+
50
+ # Returns the maximum consumable dictionary size in bytes. Dictionaries
51
+ # larger than {#max_dictionary_file_bytes} will raise an error.
52
+ #
53
+ # The default is 5 megabytes.
54
+ #
55
+ # @return [Integer] the maximum number of bytes for a dictionary.
56
+ def max_dictionary_file_bytes
57
+ @max_dictionary_file_megabytes * 1_000_000
58
+ end
59
+
60
+ # If {#max_invalid_words_bytesize} is > 0, true will be returned; false
61
+ # otherwise.
62
+ #
63
+ # @return [true, false] based on {#max_invalid_words_bytesize}.
64
+ def max_invalid_words_bytesize?
65
+ max_invalid_words_bytesize.positive?
66
+ end
67
+
68
+ # rubocop: disable Style/TrivialAccessors
69
+ def max_dictionary_file_megabytes=(value)
70
+ @max_dictionary_file_megabytes = value
71
+ end
72
+
73
+ # Sets the maximum cache size (in bytes) for invalid words. If
74
+ # less than or equal to 0, invalid words will NOT be cached.
75
+ #
76
+ # If greater than 0, invalid words will be cached up to and including
77
+ # {#max_invalid_words_bytesize} bytes.
78
+ #
79
+ # @see #max_invalid_words_bytesize?
80
+ def max_invalid_words_bytesize=(value)
81
+ value = 0 if value.negative?
82
+ @max_invalid_words_bytesize = value
83
+ end
84
+
85
+ def metadata_observers=(value)
86
+ raise ArgumentError, "Argument value is not an Array: #{value.class}" unless value.is_a? Array
87
+
88
+ # TODO: Limit the amount of observer classes, exploits?
89
+
90
+ @metadata_observers = value
91
+ end
92
+
93
+ def word_block_regex=(value)
94
+ @word_block_regex = value
95
+ end
96
+ # rubocop: enable Style/TrivialAccessors
97
+ end
98
+ end
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'block_results'
4
+ require_relative 'filters/word_filter_managable'
5
+ require_relative 'metadata/dictionary_metadata'
6
+ require_relative 'modules/configurable'
7
+ require_relative 'modules/dictionary_cache_servicable'
8
+ require_relative 'modules/dictionary_keyable'
9
+ require_relative 'modules/dictionary_metadata_servicable'
10
+ require_relative 'preprocessors/word_preprocessor_managable'
11
+ require_relative 'word_results'
12
+
13
+ module LittleWeasel
14
+ class Dictionary
15
+ include Filters::WordFilterManagable
16
+ include Modules::Configurable
17
+ include Modules::DictionaryCacheServicable
18
+ include Modules::DictionaryKeyable
19
+ include Modules::DictionaryMetadataServicable
20
+ include Preprocessors::WordPreprocessorManagable
21
+
22
+ attr_reader :dictionary_metadata_object, :dictionary_words
23
+
24
+ def initialize(dictionary_key:, dictionary_words:, dictionary_cache:,
25
+ dictionary_metadata:, word_filters: nil, word_preprocessors: nil)
26
+ validate_dictionary_key dictionary_key: dictionary_key
27
+ self.dictionary_key = dictionary_key
28
+
29
+ validate_dictionary_cache dictionary_cache: dictionary_cache
30
+ self.dictionary_cache = dictionary_cache
31
+
32
+ validate_dictionary_metadata dictionary_metadata: dictionary_metadata
33
+ self.dictionary_metadata = dictionary_metadata
34
+
35
+ unless dictionary_words.is_a?(Array)
36
+ raise ArgumentError,
37
+ "Argument dictionary_words is not an Array: #{dictionary_words.class}"
38
+ end
39
+
40
+ # Set up the dictionary metadata object and observers
41
+ self.dictionary_words = self.class.to_hash(dictionary_words: dictionary_words)
42
+ self.dictionary_metadata_object = create_dictionary_metadata
43
+ dictionary_metadata_object.add_observers
44
+
45
+ add_filters word_filters: word_filters || []
46
+ add_preprocessors word_preprocessors: word_preprocessors || []
47
+ end
48
+
49
+ class << self
50
+ def to_hash(dictionary_words:)
51
+ dictionary_words.each_with_object(Hash.new(false)) { |word, hash| hash[word] = true; }
52
+ end
53
+ end
54
+
55
+ def word_results(word)
56
+ # TODO: Make max word size configurable.
57
+ raise ArgumentError, "Argument word is not a String: #{word.class}" unless word.is_a?(String)
58
+
59
+ preprocessed_words = preprocess(word: word)
60
+ preprocessed_word = preprocessed_words.preprocessed_word
61
+ filters_matched = filters_matched(preprocessed_word || word)
62
+ word_results = WordResults.new(original_word: word,
63
+ filters_matched: filters_matched,
64
+ preprocessed_words: preprocessed_words,
65
+ word_cached: dictionary_words.include?(preprocessed_word || word),
66
+ word_valid: dictionary_words[preprocessed_word || word] || false)
67
+
68
+ dictionary_metadata_object.notify(action: :word_search,
69
+ params: { word_results: word_results })
70
+
71
+ word_results
72
+ end
73
+
74
+ def block_results(word_block)
75
+ # TODO: Make max word_block size configurable.
76
+ raise ArgumentError, "Argument word_block is not a String: #{word_block.class}" unless word_block.is_a?(String)
77
+ raise ArgumentError, "Argument word_block is empty: #{word_block.class}" unless word_block.present?
78
+
79
+ BlockResults.new(original_word_block: word_block).tap do |block_results|
80
+ word_block.scan(config.word_block_regex)&.map do |word|
81
+ block_results << word_results(word)
82
+ end
83
+ end
84
+ end
85
+
86
+ # This method returns true if this dictionary object is detached from the
87
+ # dictionary cache; this can happen if the dictionary object is unloaded
88
+ # from the dictionary cache(DictionaryManager#unload_dictionary) or the
89
+ # dictionary is killed (DictionaryManager#kill_dictionary).
90
+ def detached?
91
+ !dictionary_cache_service.dictionary_object?
92
+ end
93
+
94
+ # This method returns a count of VALID words in the dictionary.
95
+ def count
96
+ dictionary_words.each_pair.count { |_word, valid| valid }
97
+ end
98
+
99
+ # This method returns a count of all VALID and INVALID words in
100
+ # the dictionary.
101
+ def count_all_words
102
+ dictionary_words.count
103
+ end
104
+
105
+ # This method returns a count of all INVALID words in the dictionary.
106
+ def count_invalid_words
107
+ dictionary_words.each_pair.count { |_word, valid| !valid }
108
+ end
109
+
110
+ private
111
+
112
+ attr_writer :dictionary_metadata_object, :dictionary_words
113
+
114
+ def create_dictionary_metadata
115
+ # We unconditionally attach metadata to this dictionary. DictionaryMetadata
116
+ # only attaches the metadata services that are turned "on".
117
+ Metadata::DictionaryMetadata.new(
118
+ dictionary_words: dictionary_words,
119
+ dictionary_key: dictionary_key,
120
+ dictionary_cache: dictionary_cache,
121
+ dictionary_metadata: dictionary_metadata
122
+ )
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'modules/language_validatable'
4
+ require_relative 'modules/locale'
5
+ require_relative 'modules/region_validatable'
6
+ require_relative 'modules/taggable'
7
+
8
+ module LittleWeasel
9
+ # This class describes a unique key associated with a particular dictionary
10
+ # file. Dictionary keys are used to identify a dictionary on which an action
11
+ # should be performed.
12
+ class DictionaryKey
13
+ include Modules::LanguageValidatable
14
+ include Modules::Locale
15
+ include Modules::RegionValidatable
16
+ include Modules::Taggable
17
+
18
+ attr_reader :language, :region
19
+
20
+ def initialize(language:, region: nil, tag: nil)
21
+ validate_language language: language
22
+ self.language = normalize_language language
23
+
24
+ validate_region region: region
25
+ self.region = normalize_region region
26
+
27
+ validate_tag tag: tag
28
+ self.tag = tag
29
+ end
30
+
31
+ def key
32
+ return locale unless tagged?
33
+
34
+ "#{locale}-#{tag}"
35
+ end
36
+ alias to_s key
37
+
38
+ class << self
39
+ def key(language:, region: nil, tag: nil)
40
+ new(language: language, region: region, tag: tag).key
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ attr_writer :language, :region
47
+ end
48
+ end