LittleWeasel 3.0.4 → 5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (153) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/codeql-analysis.yml +72 -0
  3. data/.gitignore +19 -17
  4. data/.reek.yml +17 -0
  5. data/.rspec +4 -2
  6. data/.rubocop.yml +187 -0
  7. data/.ruby-version +1 -1
  8. data/.yardopts +2 -0
  9. data/CHANGELOG.md +21 -1
  10. data/Gemfile +3 -1
  11. data/Gemfile.lock +114 -0
  12. data/Jenkinsfile +20 -0
  13. data/LittleWeasel.gemspec +31 -18
  14. data/README.md +408 -42
  15. data/Rakefile +296 -3
  16. data/lib/LittleWeasel/block_results.rb +81 -0
  17. data/lib/LittleWeasel/configure.rb +98 -0
  18. data/lib/LittleWeasel/dictionary.rb +125 -0
  19. data/lib/LittleWeasel/dictionary_key.rb +48 -0
  20. data/lib/LittleWeasel/dictionary_manager.rb +91 -0
  21. data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
  22. data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
  23. data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
  24. data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
  25. data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
  26. data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
  27. data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
  28. data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
  29. data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
  30. data/lib/LittleWeasel/filters/word_filter.rb +59 -0
  31. data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
  32. data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
  33. data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
  34. data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
  35. data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
  36. data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
  37. data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
  38. data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
  39. data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
  40. data/lib/LittleWeasel/metadata/metadatable.rb +134 -0
  41. data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
  42. data/lib/LittleWeasel/modules/configurable.rb +26 -0
  43. data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
  44. data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
  45. data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
  46. data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +18 -0
  47. data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
  48. data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
  49. data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +17 -0
  50. data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
  51. data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
  52. data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +15 -0
  53. data/lib/LittleWeasel/modules/dictionary_source_validatable.rb +15 -0
  54. data/lib/LittleWeasel/modules/dictionary_sourceable.rb +86 -0
  55. data/lib/LittleWeasel/modules/dictionary_validatable.rb +18 -0
  56. data/lib/LittleWeasel/modules/language.rb +24 -0
  57. data/lib/LittleWeasel/modules/language_validatable.rb +14 -0
  58. data/lib/LittleWeasel/modules/locale.rb +23 -0
  59. data/lib/LittleWeasel/modules/order_validatable.rb +16 -0
  60. data/lib/LittleWeasel/modules/orderable.rb +17 -0
  61. data/lib/LittleWeasel/modules/region.rb +24 -0
  62. data/lib/LittleWeasel/modules/region_validatable.rb +14 -0
  63. data/lib/LittleWeasel/modules/tag_validatable.rb +14 -0
  64. data/lib/LittleWeasel/modules/taggable.rb +31 -0
  65. data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
  66. data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
  67. data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +29 -0
  68. data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +56 -0
  69. data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +59 -0
  70. data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +28 -0
  71. data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
  72. data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +123 -0
  73. data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
  74. data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
  75. data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
  76. data/lib/LittleWeasel/services/dictionary_cache_service.rb +211 -0
  77. data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
  78. data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
  79. data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
  80. data/lib/LittleWeasel/services/dictionary_metadata_service.rb +116 -0
  81. data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
  82. data/lib/LittleWeasel/version.rb +3 -1
  83. data/lib/LittleWeasel/word_results.rb +146 -0
  84. data/lib/LittleWeasel.rb +72 -186
  85. data/spec/factories/dictionary.rb +43 -0
  86. data/spec/factories/dictionary_cache_service.rb +95 -0
  87. data/spec/factories/dictionary_creator_service.rb +16 -0
  88. data/spec/factories/dictionary_file_loader_service.rb +13 -0
  89. data/spec/factories/dictionary_hash.rb +39 -0
  90. data/spec/factories/dictionary_key.rb +14 -0
  91. data/spec/factories/dictionary_killer_service.rb +14 -0
  92. data/spec/factories/dictionary_manager.rb +10 -0
  93. data/spec/factories/dictionary_metadata.rb +16 -0
  94. data/spec/factories/dictionary_metadata_service.rb +16 -0
  95. data/spec/factories/numeric_filter.rb +12 -0
  96. data/spec/factories/preprocessed_word.rb +16 -0
  97. data/spec/factories/preprocessed_words.rb +41 -0
  98. data/spec/factories/single_character_word_filter.rb +12 -0
  99. data/spec/factories/word_results.rb +16 -0
  100. data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
  101. data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
  102. data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
  103. data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +166 -0
  104. data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
  105. data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
  106. data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
  107. data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
  108. data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
  109. data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
  110. data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
  111. data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
  112. data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
  113. data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
  114. data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
  115. data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
  116. data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
  117. data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
  118. data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
  119. data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
  120. data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +81 -0
  121. data/spec/lib/LittleWeasel/modules/language_spec.rb +112 -0
  122. data/spec/lib/LittleWeasel/modules/locale_spec.rb +95 -0
  123. data/spec/lib/LittleWeasel/modules/region_spec.rb +112 -0
  124. data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
  125. data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
  126. data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
  127. data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
  128. data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
  129. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +242 -0
  130. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +218 -0
  131. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
  132. data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
  133. data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
  134. data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
  135. data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
  136. data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
  137. data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
  138. data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
  139. data/spec/spec_helper.rb +117 -6
  140. data/spec/support/factory_bot.rb +15 -0
  141. data/spec/support/file_helpers.rb +46 -0
  142. data/spec/support/files/empty-dictionary.txt +0 -0
  143. data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
  144. data/spec/support/files/en-US-tagged.txt +26 -0
  145. data/spec/support/files/en-US.txt +26 -0
  146. data/spec/support/files/en.txt +26 -0
  147. data/spec/support/files/es-ES.txt +27 -0
  148. data/spec/support/files/es.txt +27 -0
  149. data/spec/support/general_helpers.rb +68 -0
  150. data/spec/support/shared_contexts.rb +107 -0
  151. data/spec/support/shared_examples.rb +105 -0
  152. metadata +418 -70
  153. data/spec/checker/checker_spec.rb +0 -286
data/Rakefile CHANGED
@@ -1,13 +1,306 @@
1
- require "bundler/gem_tasks"
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/core_ext/object/try.rb'
4
+ require 'active_support/inflector'
5
+ require 'benchmark/ips'
6
+ require 'bundler/gem_tasks'
7
+ require 'pry'
8
+
9
+ require_relative 'lib/LittleWeasel'
10
+ require_relative 'spec/support/file_helpers'
11
+ require_relative 'spec/support/general_helpers'
12
+
13
+ DictionaryResultsHelpers = Support::GeneralHelpers::DictionaryResultsHelpers
14
+
15
+ def file_from(dictionary_key)
16
+ Support::FileHelpers.dictionary_path_for(file_name: dictionary_key.key)
17
+ end
2
18
 
3
19
  begin
4
20
  require 'rspec/core/rake_task'
5
21
  RSpec::Core::RakeTask.new(:spec)
6
22
  rescue LoadError => e
7
- task "spec" do
23
+ task 'spec' do
8
24
  puts "RSpec not loaded - make sure it's installed and you're using bundle exec"
9
25
  exit 1
10
26
  end
11
27
  end
12
28
 
13
- task :default => :spec
29
+ #
30
+ # Tasks related to the #word_results API
31
+
32
+ namespace 'word_results' do
33
+ # Creates a dictionary from a file on disk
34
+ task :basic do
35
+ LittleWeasel.configure do |config|
36
+ # TODO: Configure as needed here.
37
+ end
38
+
39
+ # Create a Dictionary Manager.
40
+ dictionary_manager = LittleWeasel::DictionaryManager.new
41
+
42
+ # Create our unique key for the dictionary.
43
+ en_us_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us)
44
+
45
+ file = Support::FileHelpers.dictionary_path_for file_name: en_us_key.key
46
+
47
+ # Create a dictionary of names from memory.
48
+ en_us_names_dictionary = dictionary_manager.create_dictionary_from_file(
49
+ dictionary_key: en_us_key,
50
+ file: file)
51
+
52
+ # Get some word results...
53
+
54
+ # Get results for a word we know exists.
55
+ word = 'apple'
56
+ word_results = en_us_names_dictionary.word_results word
57
+ DictionaryResultsHelpers.print_word_results word, word_results, "found (#{word} is in the dictionary)"
58
+
59
+ # Get results for a word we know DOES NOT exist.
60
+ word = 'dapple'
61
+ word_results = en_us_names_dictionary.word_results word
62
+ DictionaryResultsHelpers.print_word_results word, word_results, "not found (#{word} is not in the dictionary)"
63
+ rescue StandardError => e
64
+ task 'word_results:basic' do
65
+ puts "LittleWeasel task word_results:basic not loaded: #{e.message}"
66
+ exit 1
67
+ end
68
+ end
69
+
70
+ # Creates a dictionary of names from memory
71
+ task :from_memory do
72
+ LittleWeasel.configure do |config|
73
+ # TODO: Configure as needed here.
74
+ end
75
+
76
+ # Create a Dictionary Manager.
77
+ dictionary_manager = LittleWeasel::DictionaryManager.new
78
+
79
+ # Create our unique key for the dictionary.
80
+ en_us_names_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :names)
81
+
82
+ # Create a dictionary of names from memory.
83
+ en_us_names_dictionary = dictionary_manager.create_dictionary_from_memory(
84
+ dictionary_key: en_us_names_key, dictionary_words: %w(Abel Bartholomew Cain Deborah Elijah))
85
+
86
+ # Get some word results...
87
+
88
+ # Get results for a name we know exists.
89
+ word = 'Abel'
90
+ word_results = en_us_names_dictionary.word_results word
91
+ DictionaryResultsHelpers.print_word_results word, word_results, "found (#{word} is in the dictionary)"
92
+
93
+ # Get results for a name we know DOES NOT exist.
94
+ word = 'Henry'
95
+ word_results = en_us_names_dictionary.word_results word
96
+ DictionaryResultsHelpers.print_word_results word, word_results, "not found (#{word} is not in the dictionary)"
97
+ rescue StandardError => e
98
+ task 'word_results:from_memory' do
99
+ puts "LittleWeasel task word_results:from_memory not loaded: #{e.message}"
100
+ exit 1
101
+ end
102
+ end
103
+
104
+ # Shows application of word filters and word preprocessors.
105
+ task :advanced do
106
+ LittleWeasel.configure do |config|
107
+ # TODO: Configure as needed here.
108
+ end
109
+
110
+ # Create a Dictionary Manager.
111
+ dictionary_manager = LittleWeasel::DictionaryManager.new
112
+
113
+ # Create our unique key for the dictionary.
114
+ en_us_names_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :names)
115
+
116
+ # Create a Henry word filter.
117
+ class HenryFilter < LittleWeasel::Filters::WordFilter
118
+ class << self
119
+ def filter_match?(word)
120
+ word== 'Henry'
121
+ end
122
+ end
123
+ end
124
+ word_filters = [HenryFilter.new]
125
+
126
+ # Add a word preprocessor.
127
+ word_preprocessors = [LittleWeasel::Preprocessors::EnUs::CapitalizePreprocessor.new]
128
+
129
+ # Create a dictionary of names from memory.
130
+ en_us_names_dictionary = dictionary_manager.create_dictionary_from_memory(
131
+ dictionary_key: en_us_names_key,
132
+ dictionary_words: %w(Abel Bartholomew Cain Deborah Elijah),
133
+ word_filters: word_filters,
134
+ word_preprocessors: word_preprocessors)
135
+
136
+ puts '# Turning off our word filters and word preprocessors to start...'
137
+ puts
138
+
139
+ en_us_names_dictionary.filters_on = false
140
+ en_us_names_dictionary.preprocessors_on = false
141
+
142
+ # Get results for a name we know DOES NOT exist.
143
+ word = 'Henry'
144
+ word_results = en_us_names_dictionary.word_results word
145
+ DictionaryResultsHelpers.print_word_results word, word_results, "not found, #success? == false, word_valid? == false (#{word} is not in the dictionary)"
146
+
147
+ puts '# Turning word filters on...'
148
+ puts
149
+
150
+ en_us_names_dictionary.filters_on = true
151
+
152
+ # Get results for Henry again - it should be found due to the filter.
153
+ word = 'Henry'
154
+ word_results = en_us_names_dictionary.word_results word
155
+ DictionaryResultsHelpers.print_word_results word, word_results, '#success? == true due to the HenryFilter'
156
+
157
+ # Get results for a name we know DOES NOT exist.
158
+ word = 'henry'
159
+ word_results = en_us_names_dictionary.word_results word
160
+ DictionaryResultsHelpers.print_word_results word, word_results, "not found, #success? == false (#{word} is not in the dictionary and henry is lower case, no filter match)"
161
+
162
+ puts '# Turning preprocessors on so that henry is converted to Henry '
163
+ puts "# and consequently, the filter will match..."
164
+ puts
165
+
166
+ en_us_names_dictionary.preprocessors_on = true
167
+
168
+ word = 'henry'
169
+ word_results = en_us_names_dictionary.word_results word
170
+ DictionaryResultsHelpers.print_word_results word, word_results, "#success? == true, #filter_match? == true (#{word} is not in the dictionary but the word preprocessor and word filter work together to get a filter match and consider the name valid)"
171
+ rescue StandardError => e
172
+ task 'word_results:advanced' do
173
+ puts "LittleWeasel task word_results:advanced not loaded: #{e.message}"
174
+ exit 1
175
+ end
176
+ end
177
+
178
+ task :word_filters do
179
+ LittleWeasel.configure do |config|
180
+ # TODO: Configure as needed here.
181
+ end
182
+ dictionary_manager = LittleWeasel::DictionaryManager.new
183
+ dictionary_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us)
184
+ file = Support::FileHelpers.dictionary_path_for file_name: dictionary_key.key
185
+ word_filters = [
186
+ LittleWeasel::Filters::EnUs::NumericFilter.new,
187
+ LittleWeasel::Filters::EnUs::CurrencyFilter.new,
188
+ LittleWeasel::Filters::EnUs::SingleCharacterWordFilter.new
189
+ ]
190
+ word_preprocessors = nil
191
+ dictionary_words = Support::FileHelpers.dictionary_words_for dictionary_file_path: file
192
+ dictionary = dictionary_manager.create_dictionary_from_memory(dictionary_key: dictionary_key, dictionary_words: dictionary_words, word_filters: word_filters, word_preprocessors: word_preprocessors)
193
+ dictionary_words << 'A'.dup
194
+ dictionary_words << 'I'.dup
195
+ dictionary_words << '1000'.dup
196
+ dictionary_words << '1,000'.dup
197
+ dictionary_words << '10,000.00'.dup
198
+ dictionary_words << '+100.00'.dup
199
+ dictionary_words << '-200,000.00'.dup
200
+ dictionary_words << '$100,000'.dup
201
+ dictionary_words << '+$100,000,000.10'.dup
202
+ dictionary_words << '-$999,000,000.10'.dup
203
+ dictionary_words.each do |word|
204
+ word.strip!
205
+ word_results = dictionary.word_results word
206
+ DictionaryResultsHelpers.print_word_results word, word_results
207
+ end
208
+ rescue StandardError => e
209
+ task 'word_results:word_filters' do
210
+ puts "LittleWeasel task word_results:word_filters not loaded: #{e.message}"
211
+ exit 1
212
+ end
213
+ end
214
+ end
215
+
216
+ #
217
+ # Tasks related to the #block_results API
218
+
219
+ namespace 'block_results' do
220
+ task :basic do
221
+ LittleWeasel.configure do |config|
222
+ # TODO: Configure as needed here.
223
+ end
224
+
225
+ # Create a Dictionary Manager.
226
+ dictionary_manager = LittleWeasel::DictionaryManager.new
227
+
228
+ # Create our unique key for the dictionary.
229
+ en_us_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :big)
230
+
231
+ # Create a dictionary from a file on disk. The below assumes the
232
+ # dictionary file name matches the dictionary key (e.g. en-US-big).
233
+ en_us_dictionary = dictionary_manager.create_dictionary_from_file(
234
+ dictionary_key: en_us_key, file: file_from(en_us_key))
235
+
236
+ word_block = "This is a word-block of 8 words and 2 numbers."
237
+
238
+ # Add a word filter so that numbers are considered valid.
239
+ en_us_dictionary.add_filters word_filters: [
240
+ LittleWeasel::Filters::EnUs::NumericFilter.new
241
+ ]
242
+
243
+ block_results = en_us_dictionary.block_results word_block
244
+
245
+ # Returns a LittleWeasel::BlockResults object.
246
+ DictionaryResultsHelpers.print_block_results word_block, block_results
247
+ rescue StandardError => e
248
+ task 'block_results:basic' do
249
+ puts "LittleWeasel task block_results:basic not loaded: #{e.message}"
250
+ exit 1
251
+ end
252
+ end
253
+ end
254
+
255
+ namespace :bm do
256
+ task :hash do
257
+ STRING_LOCALE = { 'en-US' => 'en-us' }
258
+ SYMBOL_LOCALE = { 'en-US' => :enUS }
259
+
260
+ puts 'String variable vs. normal String.'
261
+ Benchmark.ips do |x|
262
+ string_variable = 'string_variable'
263
+ x.report('string variable') { STRING_LOCALE[string_variable] }
264
+ x.report('normal') { STRING_LOCALE['en-US'] }
265
+ end
266
+
267
+ puts 'String#freeze vs. normal String.'
268
+ Benchmark.ips do |x|
269
+ x.report('freeze') { STRING_LOCALE['en-US'.freeze] }
270
+ x.report('normal') { STRING_LOCALE['en-US'] }
271
+ end
272
+
273
+ puts 'String vs Symbol'
274
+ Benchmark.ips do |x|
275
+ x.report('string') { STRING_LOCALE['en-US'] }
276
+ x.report('symbol') { SYMBOL_LOCALE[:enUS] }
277
+ end
278
+
279
+ puts 'String#freeze vs. Symbol'
280
+ Benchmark.ips do |x|
281
+ x.report('string') { STRING_LOCALE['en-US'.freeze] }
282
+ x.report('symbol') { SYMBOL_LOCALE[:enUS] }
283
+ end
284
+ rescue StandardError => e
285
+ task 'hash' do
286
+ puts "LittleWeasel task bm:hash not loaded: #{e.message}"
287
+ exit 1
288
+ end
289
+ end
290
+
291
+ task :dictionary_key do
292
+ puts 'DictionaryKey test'
293
+ Benchmark.ips do |x|
294
+ x.report('DictionaryKey') do
295
+ DictionaryKey.key(language: :en, region: :us, tag: :tag)
296
+ end
297
+ end
298
+ rescue StandardError => e
299
+ task 'locale' do
300
+ puts "LittleWeasel task bm:dictionary_key not loaded: #{e.message}"
301
+ exit 1
302
+ end
303
+ end
304
+ end
305
+
306
+ task default: :spec
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/core_ext/module/delegation'
4
+ require_relative 'word_results'
5
+
6
+ module LittleWeasel
7
+ # This class represents the results of gathering information about a word
8
+ # block (group of words).
9
+ class BlockResults
10
+ # :reek:Attribute - Ignored, it doesn't make sense to create a formal setter method.
11
+ attr_reader :original_word_block, :word_results
12
+
13
+ def initialize(original_word_block:)
14
+ self.original_word_block = original_word_block
15
+ self.word_results = []
16
+ end
17
+
18
+ def <<(word_result)
19
+ unless word_result.is_a? WordResults
20
+ raise ArgumentError, "Argument word_result is not a WordResults object: #{word_result.class}"
21
+ end
22
+
23
+ word_results << word_result
24
+ end
25
+
26
+ # Calls #success? on all WordResults objects. Returns true if all
27
+ # WordResults return true; false is returned otherwise.
28
+ def success?
29
+ return false unless word_results.present?
30
+
31
+ word_results.all?(&:success?)
32
+ end
33
+
34
+ # Returns true if all WordResults object words are valid (#word_valid?);
35
+ # false otherwise.
36
+ def words_valid?
37
+ return false unless word_results.present?
38
+
39
+ word_results.all?(&:word_valid?)
40
+ end
41
+
42
+ # Returns true if all WordResults object words have filter matches (#filters_match?);
43
+ # false otherwise.
44
+ def filters_match?
45
+ return false unless word_results.present?
46
+
47
+ word_results.all?(&:filter_match?)
48
+ end
49
+
50
+ # Returns true if all WordResults object words have been preprocessed (#preprocessed_words?);
51
+ # false otherwise.
52
+ def preprocessed_words?
53
+ return false unless word_results.present?
54
+
55
+ word_results.all?(&:preprocessed_word?)
56
+ end
57
+
58
+ # Returns an Array of the results of calling
59
+ # #preprocessed_word_or_original_word on all WordResults objects.
60
+
61
+ # Calls #preprocessed_word_or_original_word on all WordResults objects.
62
+ # An Array of the results is returned.
63
+ def preprocessed_words_or_original_words
64
+ return [] unless word_results.present?
65
+
66
+ word_results.map(&:preprocessed_word_or_original_word)
67
+ end
68
+
69
+ # Returns true if all WordResults object words have been cached (#words_cached?);
70
+ # false otherwise.
71
+ def words_cached?
72
+ return false unless word_results.present?
73
+
74
+ word_results.all?(&:word_cached?)
75
+ end
76
+
77
+ private
78
+
79
+ attr_writer :original_word_block, :word_results
80
+ end
81
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This is the configuration for LittleWeasel.
4
+ module LittleWeasel
5
+ class << self
6
+ attr_reader :configuration
7
+
8
+ # Returns the application configuration object.
9
+ #
10
+ # @return [Configuration] the application Configuration object.
11
+ def configure
12
+ self.configuration ||= Configuration.new
13
+ yield(configuration)
14
+ end
15
+
16
+ private
17
+
18
+ attr_writer :configuration
19
+ end
20
+
21
+ # This class encapsulates the configuration properties for this gem and
22
+ # provides methods and attributes that allow for management of the same.
23
+ #
24
+ # attr_reader :max_dictionary_file_megabytes, :max_invalid_words_bytesize, :metadata_observers
25
+ class Configuration
26
+ attr_reader :max_dictionary_file_megabytes,
27
+ :max_invalid_words_bytesize, :metadata_observers, :word_block_regex
28
+
29
+ # The constructor; calls {#reset}.
30
+ def initialize
31
+ reset
32
+ end
33
+
34
+ # Resets the configuration settings to their default values.
35
+ #
36
+ # @return [void]
37
+ def reset
38
+ @max_dictionary_file_megabytes = 5
39
+ @max_invalid_words_bytesize = 25_000
40
+ @metadata_observers = [
41
+ LittleWeasel::Metadata::InvalidWordsMetadata
42
+ ]
43
+ # TODO: Is this the correct regex to use, or is there something better?
44
+ # @word_block_regex = /\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/
45
+ # @word_block_regex = /(?:(?:[\-A-Za-z0-9]|\d(?!\d|\b))+(?:'[\-A-Za-z0-9]+)?)/
46
+ # @word_block_regex = /(?:(?:[\-a-z0-9]|\d(?!\d|\b))+(?:'[\-a-z0-9]+)?)/i
47
+ @word_block_regex = /[[[:word:]]'-]+/
48
+ end
49
+
50
+ # Returns the maximum consumable dictionary size in bytes. Dictionaries
51
+ # larger than {#max_dictionary_file_bytes} will raise an error.
52
+ #
53
+ # The default is 5 megabytes.
54
+ #
55
+ # @return [Integer] the maximum number of bytes for a dictionary.
56
+ def max_dictionary_file_bytes
57
+ @max_dictionary_file_megabytes * 1_000_000
58
+ end
59
+
60
+ # If {#max_invalid_words_bytesize} is > 0, true will be returned; false
61
+ # otherwise.
62
+ #
63
+ # @return [true, false] based on {#max_invalid_words_bytesize}.
64
+ def max_invalid_words_bytesize?
65
+ max_invalid_words_bytesize.positive?
66
+ end
67
+
68
+ # rubocop: disable Style/TrivialAccessors
69
+ def max_dictionary_file_megabytes=(value)
70
+ @max_dictionary_file_megabytes = value
71
+ end
72
+
73
+ # Sets the maximum cache size (in bytes) for invalid words. If
74
+ # less than or equal to 0, invalid words will NOT be cached.
75
+ #
76
+ # If greater than 0, invalid words will be cached up to and including
77
+ # {#max_invalid_words_bytesize} bytes.
78
+ #
79
+ # @see #max_invalid_words_bytesize?
80
+ def max_invalid_words_bytesize=(value)
81
+ value = 0 if value.negative?
82
+ @max_invalid_words_bytesize = value
83
+ end
84
+
85
+ def metadata_observers=(value)
86
+ raise ArgumentError, "Argument value is not an Array: #{value.class}" unless value.is_a? Array
87
+
88
+ # TODO: Limit the amount of observer classes, exploits?
89
+
90
+ @metadata_observers = value
91
+ end
92
+
93
+ def word_block_regex=(value)
94
+ @word_block_regex = value
95
+ end
96
+ # rubocop: enable Style/TrivialAccessors
97
+ end
98
+ end
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'block_results'
4
+ require_relative 'filters/word_filter_managable'
5
+ require_relative 'metadata/dictionary_metadata'
6
+ require_relative 'modules/configurable'
7
+ require_relative 'modules/dictionary_cache_servicable'
8
+ require_relative 'modules/dictionary_keyable'
9
+ require_relative 'modules/dictionary_metadata_servicable'
10
+ require_relative 'preprocessors/word_preprocessor_managable'
11
+ require_relative 'word_results'
12
+
13
+ module LittleWeasel
14
+ class Dictionary
15
+ include Filters::WordFilterManagable
16
+ include Modules::Configurable
17
+ include Modules::DictionaryCacheServicable
18
+ include Modules::DictionaryKeyable
19
+ include Modules::DictionaryMetadataServicable
20
+ include Preprocessors::WordPreprocessorManagable
21
+
22
+ attr_reader :dictionary_metadata_object, :dictionary_words
23
+
24
+ def initialize(dictionary_key:, dictionary_words:, dictionary_cache:,
25
+ dictionary_metadata:, word_filters: nil, word_preprocessors: nil)
26
+ validate_dictionary_key dictionary_key: dictionary_key
27
+ self.dictionary_key = dictionary_key
28
+
29
+ validate_dictionary_cache dictionary_cache: dictionary_cache
30
+ self.dictionary_cache = dictionary_cache
31
+
32
+ validate_dictionary_metadata dictionary_metadata: dictionary_metadata
33
+ self.dictionary_metadata = dictionary_metadata
34
+
35
+ unless dictionary_words.is_a?(Array)
36
+ raise ArgumentError,
37
+ "Argument dictionary_words is not an Array: #{dictionary_words.class}"
38
+ end
39
+
40
+ # Set up the dictionary metadata object and observers
41
+ self.dictionary_words = self.class.to_hash(dictionary_words: dictionary_words)
42
+ self.dictionary_metadata_object = create_dictionary_metadata
43
+ dictionary_metadata_object.add_observers
44
+
45
+ add_filters word_filters: word_filters || []
46
+ add_preprocessors word_preprocessors: word_preprocessors || []
47
+ end
48
+
49
+ class << self
50
+ def to_hash(dictionary_words:)
51
+ dictionary_words.each_with_object(Hash.new(false)) { |word, hash| hash[word] = true; }
52
+ end
53
+ end
54
+
55
+ def word_results(word)
56
+ # TODO: Make max word size configurable.
57
+ raise ArgumentError, "Argument word is not a String: #{word.class}" unless word.is_a?(String)
58
+
59
+ preprocessed_words = preprocess(word: word)
60
+ preprocessed_word = preprocessed_words.preprocessed_word
61
+ filters_matched = filters_matched(preprocessed_word || word)
62
+ word_results = WordResults.new(original_word: word,
63
+ filters_matched: filters_matched,
64
+ preprocessed_words: preprocessed_words,
65
+ word_cached: dictionary_words.include?(preprocessed_word || word),
66
+ word_valid: dictionary_words[preprocessed_word || word] || false)
67
+
68
+ dictionary_metadata_object.notify(action: :word_search,
69
+ params: { word_results: word_results })
70
+
71
+ word_results
72
+ end
73
+
74
+ def block_results(word_block)
75
+ # TODO: Make max word_block size configurable.
76
+ raise ArgumentError, "Argument word_block is not a String: #{word_block.class}" unless word_block.is_a?(String)
77
+ raise ArgumentError, "Argument word_block is empty: #{word_block.class}" unless word_block.present?
78
+
79
+ BlockResults.new(original_word_block: word_block).tap do |block_results|
80
+ word_block.scan(config.word_block_regex)&.map do |word|
81
+ block_results << word_results(word)
82
+ end
83
+ end
84
+ end
85
+
86
+ # This method returns true if this dictionary object is detached from the
87
+ # dictionary cache; this can happen if the dictionary object is unloaded
88
+ # from the dictionary cache(DictionaryManager#unload_dictionary) or the
89
+ # dictionary is killed (DictionaryManager#kill_dictionary).
90
+ def detached?
91
+ !dictionary_cache_service.dictionary_object?
92
+ end
93
+
94
+ # This method returns a count of VALID words in the dictionary.
95
+ def count
96
+ dictionary_words.each_pair.count { |_word, valid| valid }
97
+ end
98
+
99
+ # This method returns a count of all VALID and INVALID words in
100
+ # the dictionary.
101
+ def count_all_words
102
+ dictionary_words.count
103
+ end
104
+
105
+ # This method returns a count of all INVALID words in the dictionary.
106
+ def count_invalid_words
107
+ dictionary_words.each_pair.count { |_word, valid| !valid }
108
+ end
109
+
110
+ private
111
+
112
+ attr_writer :dictionary_metadata_object, :dictionary_words
113
+
114
+ def create_dictionary_metadata
115
+ # We unconditionally attach metadata to this dictionary. DictionaryMetadata
116
+ # only attaches the metadata services that are turned "on".
117
+ Metadata::DictionaryMetadata.new(
118
+ dictionary_words: dictionary_words,
119
+ dictionary_key: dictionary_key,
120
+ dictionary_cache: dictionary_cache,
121
+ dictionary_metadata: dictionary_metadata
122
+ )
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'modules/language_validatable'
4
+ require_relative 'modules/locale'
5
+ require_relative 'modules/region_validatable'
6
+ require_relative 'modules/taggable'
7
+
8
+ module LittleWeasel
9
+ # This class describes a unique key associated with a particular dictionary
10
+ # file. Dictionary keys are used to identify a dictionary on which an action
11
+ # should be performed.
12
+ class DictionaryKey
13
+ include Modules::LanguageValidatable
14
+ include Modules::Locale
15
+ include Modules::RegionValidatable
16
+ include Modules::Taggable
17
+
18
+ attr_reader :language, :region
19
+
20
+ def initialize(language:, region: nil, tag: nil)
21
+ validate_language language: language
22
+ self.language = normalize_language language
23
+
24
+ validate_region region: region
25
+ self.region = normalize_region region
26
+
27
+ validate_tag tag: tag
28
+ self.tag = tag
29
+ end
30
+
31
+ def key
32
+ return locale unless tagged?
33
+
34
+ "#{locale}-#{tag}"
35
+ end
36
+ alias to_s key
37
+
38
+ class << self
39
+ def key(language:, region: nil, tag: nil)
40
+ new(language: language, region: region, tag: tag).key
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ attr_writer :language, :region
47
+ end
48
+ end