LittleWeasel 3.0.3 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/.reek.yml +17 -0
- data/.rspec +4 -2
- data/.rubocop.yml +187 -0
- data/.ruby-version +1 -1
- data/.yardopts +2 -0
- data/CHANGELOG.md +22 -1
- data/Gemfile +3 -1
- data/Jenkinsfile +20 -0
- data/LittleWeasel.gemspec +31 -18
- data/README.md +408 -42
- data/Rakefile +296 -3
- data/lib/LittleWeasel/block_results.rb +81 -0
- data/lib/LittleWeasel/configure.rb +98 -0
- data/lib/LittleWeasel/dictionary.rb +125 -0
- data/lib/LittleWeasel/dictionary_key.rb +48 -0
- data/lib/LittleWeasel/dictionary_manager.rb +91 -0
- data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
- data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
- data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
- data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
- data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
- data/lib/LittleWeasel/filters/word_filter.rb +59 -0
- data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
- data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
- data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
- data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
- data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
- data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
- data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
- data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
- data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
- data/lib/LittleWeasel/metadata/metadatable.rb +134 -0
- data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
- data/lib/LittleWeasel/modules/configurable.rb +26 -0
- data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
- data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
- data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
- data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +18 -0
- data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
- data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
- data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +17 -0
- data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +15 -0
- data/lib/LittleWeasel/modules/dictionary_source_validatable.rb +15 -0
- data/lib/LittleWeasel/modules/dictionary_sourceable.rb +86 -0
- data/lib/LittleWeasel/modules/dictionary_validatable.rb +18 -0
- data/lib/LittleWeasel/modules/language.rb +24 -0
- data/lib/LittleWeasel/modules/language_validatable.rb +14 -0
- data/lib/LittleWeasel/modules/locale.rb +23 -0
- data/lib/LittleWeasel/modules/order_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/orderable.rb +17 -0
- data/lib/LittleWeasel/modules/region.rb +24 -0
- data/lib/LittleWeasel/modules/region_validatable.rb +14 -0
- data/lib/LittleWeasel/modules/tag_validatable.rb +14 -0
- data/lib/LittleWeasel/modules/taggable.rb +31 -0
- data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
- data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +29 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +56 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +59 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +28 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +123 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
- data/lib/LittleWeasel/services/dictionary_cache_service.rb +211 -0
- data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
- data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
- data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
- data/lib/LittleWeasel/services/dictionary_metadata_service.rb +116 -0
- data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
- data/lib/LittleWeasel/version.rb +3 -1
- data/lib/LittleWeasel/word_results.rb +146 -0
- data/lib/LittleWeasel.rb +5 -184
- data/spec/factories/dictionary.rb +43 -0
- data/spec/factories/dictionary_cache_service.rb +95 -0
- data/spec/factories/dictionary_creator_service.rb +16 -0
- data/spec/factories/dictionary_file_loader_service.rb +13 -0
- data/spec/factories/dictionary_hash.rb +39 -0
- data/spec/factories/dictionary_key.rb +14 -0
- data/spec/factories/dictionary_killer_service.rb +14 -0
- data/spec/factories/dictionary_manager.rb +10 -0
- data/spec/factories/dictionary_metadata.rb +16 -0
- data/spec/factories/dictionary_metadata_service.rb +16 -0
- data/spec/factories/numeric_filter.rb +12 -0
- data/spec/factories/preprocessed_word.rb +16 -0
- data/spec/factories/preprocessed_words.rb +41 -0
- data/spec/factories/single_character_word_filter.rb +12 -0
- data/spec/factories/word_results.rb +16 -0
- data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
- data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
- data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
- data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +166 -0
- data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
- data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
- data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
- data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
- data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
- data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
- data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
- data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
- data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
- data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
- data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
- data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
- data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
- data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
- data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +81 -0
- data/spec/lib/LittleWeasel/modules/language_spec.rb +112 -0
- data/spec/lib/LittleWeasel/modules/locale_spec.rb +95 -0
- data/spec/lib/LittleWeasel/modules/region_spec.rb +112 -0
- data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +242 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +218 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
- data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
- data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
- data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
- data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
- data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
- data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
- data/spec/spec_helper.rb +117 -6
- data/spec/support/factory_bot.rb +15 -0
- data/spec/support/file_helpers.rb +46 -0
- data/spec/support/files/empty-dictionary.txt +0 -0
- data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
- data/spec/support/files/en-US-tagged.txt +26 -0
- data/spec/support/files/en-US.txt +26 -0
- data/spec/support/files/en.txt +26 -0
- data/spec/support/files/es-ES.txt +27 -0
- data/spec/support/files/es.txt +27 -0
- data/spec/support/general_helpers.rb +68 -0
- data/spec/support/shared_contexts.rb +107 -0
- data/spec/support/shared_examples.rb +105 -0
- metadata +378 -38
- data/spec/checker/checker_spec.rb +0 -286
data/Rakefile
CHANGED
|
@@ -1,13 +1,306 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/core_ext/object/try.rb'
|
|
4
|
+
require 'active_support/inflector'
|
|
5
|
+
require 'benchmark/ips'
|
|
6
|
+
require 'bundler/gem_tasks'
|
|
7
|
+
require 'pry'
|
|
8
|
+
|
|
9
|
+
require_relative 'lib/LittleWeasel'
|
|
10
|
+
require_relative 'spec/support/file_helpers'
|
|
11
|
+
require_relative 'spec/support/general_helpers'
|
|
12
|
+
|
|
13
|
+
DictionaryResultsHelpers = Support::GeneralHelpers::DictionaryResultsHelpers
|
|
14
|
+
|
|
15
|
+
def file_from(dictionary_key)
|
|
16
|
+
Support::FileHelpers.dictionary_path_for(file_name: dictionary_key.key)
|
|
17
|
+
end
|
|
2
18
|
|
|
3
19
|
begin
|
|
4
20
|
require 'rspec/core/rake_task'
|
|
5
21
|
RSpec::Core::RakeTask.new(:spec)
|
|
6
22
|
rescue LoadError => e
|
|
7
|
-
task
|
|
23
|
+
task 'spec' do
|
|
8
24
|
puts "RSpec not loaded - make sure it's installed and you're using bundle exec"
|
|
9
25
|
exit 1
|
|
10
26
|
end
|
|
11
27
|
end
|
|
12
28
|
|
|
13
|
-
|
|
29
|
+
#
|
|
30
|
+
# Tasks related to the #word_results API
|
|
31
|
+
|
|
32
|
+
namespace 'word_results' do
|
|
33
|
+
# Creates a dictionary from a file on disk
|
|
34
|
+
task :basic do
|
|
35
|
+
LittleWeasel.configure do |config|
|
|
36
|
+
# TODO: Configure as needed here.
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Create a Dictionary Manager.
|
|
40
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
41
|
+
|
|
42
|
+
# Create our unique key for the dictionary.
|
|
43
|
+
en_us_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us)
|
|
44
|
+
|
|
45
|
+
file = Support::FileHelpers.dictionary_path_for file_name: en_us_key.key
|
|
46
|
+
|
|
47
|
+
# Create a dictionary of names from memory.
|
|
48
|
+
en_us_names_dictionary = dictionary_manager.create_dictionary_from_file(
|
|
49
|
+
dictionary_key: en_us_key,
|
|
50
|
+
file: file)
|
|
51
|
+
|
|
52
|
+
# Get some word results...
|
|
53
|
+
|
|
54
|
+
# Get results for a word we know exists.
|
|
55
|
+
word = 'apple'
|
|
56
|
+
word_results = en_us_names_dictionary.word_results word
|
|
57
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "found (#{word} is in the dictionary)"
|
|
58
|
+
|
|
59
|
+
# Get results for a word we know DOES NOT exist.
|
|
60
|
+
word = 'dapple'
|
|
61
|
+
word_results = en_us_names_dictionary.word_results word
|
|
62
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "not found (#{word} is not in the dictionary)"
|
|
63
|
+
rescue StandardError => e
|
|
64
|
+
task 'word_results:basic' do
|
|
65
|
+
puts "LittleWeasel task word_results:basic not loaded: #{e.message}"
|
|
66
|
+
exit 1
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Creates a dictionary of names from memory
|
|
71
|
+
task :from_memory do
|
|
72
|
+
LittleWeasel.configure do |config|
|
|
73
|
+
# TODO: Configure as needed here.
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Create a Dictionary Manager.
|
|
77
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
78
|
+
|
|
79
|
+
# Create our unique key for the dictionary.
|
|
80
|
+
en_us_names_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :names)
|
|
81
|
+
|
|
82
|
+
# Create a dictionary of names from memory.
|
|
83
|
+
en_us_names_dictionary = dictionary_manager.create_dictionary_from_memory(
|
|
84
|
+
dictionary_key: en_us_names_key, dictionary_words: %w(Abel Bartholomew Cain Deborah Elijah))
|
|
85
|
+
|
|
86
|
+
# Get some word results...
|
|
87
|
+
|
|
88
|
+
# Get results for a name we know exists.
|
|
89
|
+
word = 'Abel'
|
|
90
|
+
word_results = en_us_names_dictionary.word_results word
|
|
91
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "found (#{word} is in the dictionary)"
|
|
92
|
+
|
|
93
|
+
# Get results for a name we know DOES NOT exist.
|
|
94
|
+
word = 'Henry'
|
|
95
|
+
word_results = en_us_names_dictionary.word_results word
|
|
96
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "not found (#{word} is not in the dictionary)"
|
|
97
|
+
rescue StandardError => e
|
|
98
|
+
task 'word_results:from_memory' do
|
|
99
|
+
puts "LittleWeasel task word_results:from_memory not loaded: #{e.message}"
|
|
100
|
+
exit 1
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Shows application of word filters and word preprocessors.
|
|
105
|
+
task :advanced do
|
|
106
|
+
LittleWeasel.configure do |config|
|
|
107
|
+
# TODO: Configure as needed here.
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Create a Dictionary Manager.
|
|
111
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
112
|
+
|
|
113
|
+
# Create our unique key for the dictionary.
|
|
114
|
+
en_us_names_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :names)
|
|
115
|
+
|
|
116
|
+
# Create a Henry word filter.
|
|
117
|
+
class HenryFilter < LittleWeasel::Filters::WordFilter
|
|
118
|
+
class << self
|
|
119
|
+
def filter_match?(word)
|
|
120
|
+
word== 'Henry'
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
word_filters = [HenryFilter.new]
|
|
125
|
+
|
|
126
|
+
# Add a word preprocessor.
|
|
127
|
+
word_preprocessors = [LittleWeasel::Preprocessors::EnUs::CapitalizePreprocessor.new]
|
|
128
|
+
|
|
129
|
+
# Create a dictionary of names from memory.
|
|
130
|
+
en_us_names_dictionary = dictionary_manager.create_dictionary_from_memory(
|
|
131
|
+
dictionary_key: en_us_names_key,
|
|
132
|
+
dictionary_words: %w(Abel Bartholomew Cain Deborah Elijah),
|
|
133
|
+
word_filters: word_filters,
|
|
134
|
+
word_preprocessors: word_preprocessors)
|
|
135
|
+
|
|
136
|
+
puts '# Turning off our word filters and word preprocessors to start...'
|
|
137
|
+
puts
|
|
138
|
+
|
|
139
|
+
en_us_names_dictionary.filters_on = false
|
|
140
|
+
en_us_names_dictionary.preprocessors_on = false
|
|
141
|
+
|
|
142
|
+
# Get results for a name we know DOES NOT exist.
|
|
143
|
+
word = 'Henry'
|
|
144
|
+
word_results = en_us_names_dictionary.word_results word
|
|
145
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "not found, #success? == false, word_valid? == false (#{word} is not in the dictionary)"
|
|
146
|
+
|
|
147
|
+
puts '# Turning word filters on...'
|
|
148
|
+
puts
|
|
149
|
+
|
|
150
|
+
en_us_names_dictionary.filters_on = true
|
|
151
|
+
|
|
152
|
+
# Get results for Henry again - it should be found due to the filter.
|
|
153
|
+
word = 'Henry'
|
|
154
|
+
word_results = en_us_names_dictionary.word_results word
|
|
155
|
+
DictionaryResultsHelpers.print_word_results word, word_results, '#success? == true due to the HenryFilter'
|
|
156
|
+
|
|
157
|
+
# Get results for a name we know DOES NOT exist.
|
|
158
|
+
word = 'henry'
|
|
159
|
+
word_results = en_us_names_dictionary.word_results word
|
|
160
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "not found, #success? == false (#{word} is not in the dictionary and henry is lower case, no filter match)"
|
|
161
|
+
|
|
162
|
+
puts '# Turning preprocessors on so that henry is converted to Henry '
|
|
163
|
+
puts "# and consequently, the filter will match..."
|
|
164
|
+
puts
|
|
165
|
+
|
|
166
|
+
en_us_names_dictionary.preprocessors_on = true
|
|
167
|
+
|
|
168
|
+
word = 'henry'
|
|
169
|
+
word_results = en_us_names_dictionary.word_results word
|
|
170
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "#success? == true, #filter_match? == true (#{word} is not in the dictionary but the word preprocessor and word filter work together to get a filter match and consider the name valid)"
|
|
171
|
+
rescue StandardError => e
|
|
172
|
+
task 'word_results:advanced' do
|
|
173
|
+
puts "LittleWeasel task word_results:advanced not loaded: #{e.message}"
|
|
174
|
+
exit 1
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
task :word_filters do
|
|
179
|
+
LittleWeasel.configure do |config|
|
|
180
|
+
# TODO: Configure as needed here.
|
|
181
|
+
end
|
|
182
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
183
|
+
dictionary_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us)
|
|
184
|
+
file = Support::FileHelpers.dictionary_path_for file_name: dictionary_key.key
|
|
185
|
+
word_filters = [
|
|
186
|
+
LittleWeasel::Filters::EnUs::NumericFilter.new,
|
|
187
|
+
LittleWeasel::Filters::EnUs::CurrencyFilter.new,
|
|
188
|
+
LittleWeasel::Filters::EnUs::SingleCharacterWordFilter.new
|
|
189
|
+
]
|
|
190
|
+
word_preprocessors = nil
|
|
191
|
+
dictionary_words = Support::FileHelpers.dictionary_words_for dictionary_file_path: file
|
|
192
|
+
dictionary = dictionary_manager.create_dictionary_from_memory(dictionary_key: dictionary_key, dictionary_words: dictionary_words, word_filters: word_filters, word_preprocessors: word_preprocessors)
|
|
193
|
+
dictionary_words << 'A'.dup
|
|
194
|
+
dictionary_words << 'I'.dup
|
|
195
|
+
dictionary_words << '1000'.dup
|
|
196
|
+
dictionary_words << '1,000'.dup
|
|
197
|
+
dictionary_words << '10,000.00'.dup
|
|
198
|
+
dictionary_words << '+100.00'.dup
|
|
199
|
+
dictionary_words << '-200,000.00'.dup
|
|
200
|
+
dictionary_words << '$100,000'.dup
|
|
201
|
+
dictionary_words << '+$100,000,000.10'.dup
|
|
202
|
+
dictionary_words << '-$999,000,000.10'.dup
|
|
203
|
+
dictionary_words.each do |word|
|
|
204
|
+
word.strip!
|
|
205
|
+
word_results = dictionary.word_results word
|
|
206
|
+
DictionaryResultsHelpers.print_word_results word, word_results
|
|
207
|
+
end
|
|
208
|
+
rescue StandardError => e
|
|
209
|
+
task 'word_results:word_filters' do
|
|
210
|
+
puts "LittleWeasel task word_results:word_filters not loaded: #{e.message}"
|
|
211
|
+
exit 1
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
#
|
|
217
|
+
# Tasks related to the #block_results API
|
|
218
|
+
|
|
219
|
+
namespace 'block_results' do
|
|
220
|
+
task :basic do
|
|
221
|
+
LittleWeasel.configure do |config|
|
|
222
|
+
# TODO: Configure as needed here.
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Create a Dictionary Manager.
|
|
226
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
227
|
+
|
|
228
|
+
# Create our unique key for the dictionary.
|
|
229
|
+
en_us_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :big)
|
|
230
|
+
|
|
231
|
+
# Create a dictionary from a file on disk. The below assumes the
|
|
232
|
+
# dictionary file name matches the dictionary key (e.g. en-US-big).
|
|
233
|
+
en_us_dictionary = dictionary_manager.create_dictionary_from_file(
|
|
234
|
+
dictionary_key: en_us_key, file: file_from(en_us_key))
|
|
235
|
+
|
|
236
|
+
word_block = "This is a word-block of 8 words and 2 numbers."
|
|
237
|
+
|
|
238
|
+
# Add a word filter so that numbers are considered valid.
|
|
239
|
+
en_us_dictionary.add_filters word_filters: [
|
|
240
|
+
LittleWeasel::Filters::EnUs::NumericFilter.new
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
block_results = en_us_dictionary.block_results word_block
|
|
244
|
+
|
|
245
|
+
# Returns a LittleWeasel::BlockResults object.
|
|
246
|
+
DictionaryResultsHelpers.print_block_results word_block, block_results
|
|
247
|
+
rescue StandardError => e
|
|
248
|
+
task 'block_results:basic' do
|
|
249
|
+
puts "LittleWeasel task block_results:basic not loaded: #{e.message}"
|
|
250
|
+
exit 1
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
namespace :bm do
|
|
256
|
+
task :hash do
|
|
257
|
+
STRING_LOCALE = { 'en-US' => 'en-us' }
|
|
258
|
+
SYMBOL_LOCALE = { 'en-US' => :enUS }
|
|
259
|
+
|
|
260
|
+
puts 'String variable vs. normal String.'
|
|
261
|
+
Benchmark.ips do |x|
|
|
262
|
+
string_variable = 'string_variable'
|
|
263
|
+
x.report('string variable') { STRING_LOCALE[string_variable] }
|
|
264
|
+
x.report('normal') { STRING_LOCALE['en-US'] }
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
puts 'String#freeze vs. normal String.'
|
|
268
|
+
Benchmark.ips do |x|
|
|
269
|
+
x.report('freeze') { STRING_LOCALE['en-US'.freeze] }
|
|
270
|
+
x.report('normal') { STRING_LOCALE['en-US'] }
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
puts 'String vs Symbol'
|
|
274
|
+
Benchmark.ips do |x|
|
|
275
|
+
x.report('string') { STRING_LOCALE['en-US'] }
|
|
276
|
+
x.report('symbol') { SYMBOL_LOCALE[:enUS] }
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
puts 'String#freeze vs. Symbol'
|
|
280
|
+
Benchmark.ips do |x|
|
|
281
|
+
x.report('string') { STRING_LOCALE['en-US'.freeze] }
|
|
282
|
+
x.report('symbol') { SYMBOL_LOCALE[:enUS] }
|
|
283
|
+
end
|
|
284
|
+
rescue StandardError => e
|
|
285
|
+
task 'hash' do
|
|
286
|
+
puts "LittleWeasel task bm:hash not loaded: #{e.message}"
|
|
287
|
+
exit 1
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
task :dictionary_key do
|
|
292
|
+
puts 'DictionaryKey test'
|
|
293
|
+
Benchmark.ips do |x|
|
|
294
|
+
x.report('DictionaryKey') do
|
|
295
|
+
DictionaryKey.key(language: :en, region: :us, tag: :tag)
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
rescue StandardError => e
|
|
299
|
+
task 'locale' do
|
|
300
|
+
puts "LittleWeasel task bm:dictionary_key not loaded: #{e.message}"
|
|
301
|
+
exit 1
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
task default: :spec
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/core_ext/module/delegation'
|
|
4
|
+
require_relative 'word_results'
|
|
5
|
+
|
|
6
|
+
module LittleWeasel
|
|
7
|
+
# This class represents the results of gathering information about a word
|
|
8
|
+
# block (group of words).
|
|
9
|
+
class BlockResults
|
|
10
|
+
# :reek:Attribute - Ignored, it doesn't make sense to create a formal setter method.
|
|
11
|
+
attr_reader :original_word_block, :word_results
|
|
12
|
+
|
|
13
|
+
def initialize(original_word_block:)
|
|
14
|
+
self.original_word_block = original_word_block
|
|
15
|
+
self.word_results = []
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def <<(word_result)
|
|
19
|
+
unless word_result.is_a? WordResults
|
|
20
|
+
raise ArgumentError, "Argument word_result is not a WordResults object: #{word_result.class}"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
word_results << word_result
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Calls #success? on all WordResults objects. Returns true if all
|
|
27
|
+
# WordResults return true; false is returned otherwise.
|
|
28
|
+
def success?
|
|
29
|
+
return false unless word_results.present?
|
|
30
|
+
|
|
31
|
+
word_results.all?(&:success?)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Returns true if all WordResults object words are valid (#word_valid?);
|
|
35
|
+
# false otherwise.
|
|
36
|
+
def words_valid?
|
|
37
|
+
return false unless word_results.present?
|
|
38
|
+
|
|
39
|
+
word_results.all?(&:word_valid?)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Returns true if all WordResults object words have filter matches (#filters_match?);
|
|
43
|
+
# false otherwise.
|
|
44
|
+
def filters_match?
|
|
45
|
+
return false unless word_results.present?
|
|
46
|
+
|
|
47
|
+
word_results.all?(&:filter_match?)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Returns true if all WordResults object words have been preprocessed (#preprocessed_words?);
|
|
51
|
+
# false otherwise.
|
|
52
|
+
def preprocessed_words?
|
|
53
|
+
return false unless word_results.present?
|
|
54
|
+
|
|
55
|
+
word_results.all?(&:preprocessed_word?)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Returns an Array of the results of calling
|
|
59
|
+
# #preprocessed_word_or_original_word on all WordResults objects.
|
|
60
|
+
|
|
61
|
+
# Calls #preprocessed_word_or_original_word on all WordResults objects.
|
|
62
|
+
# An Array of the results is returned.
|
|
63
|
+
def preprocessed_words_or_original_words
|
|
64
|
+
return [] unless word_results.present?
|
|
65
|
+
|
|
66
|
+
word_results.map(&:preprocessed_word_or_original_word)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Returns true if all WordResults object words have been cached (#words_cached?);
|
|
70
|
+
# false otherwise.
|
|
71
|
+
def words_cached?
|
|
72
|
+
return false unless word_results.present?
|
|
73
|
+
|
|
74
|
+
word_results.all?(&:word_cached?)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
attr_writer :original_word_block, :word_results
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This is the configuration for LittleWeasel.
|
|
4
|
+
module LittleWeasel
|
|
5
|
+
class << self
|
|
6
|
+
attr_reader :configuration
|
|
7
|
+
|
|
8
|
+
# Returns the application configuration object.
|
|
9
|
+
#
|
|
10
|
+
# @return [Configuration] the application Configuration object.
|
|
11
|
+
def configure
|
|
12
|
+
self.configuration ||= Configuration.new
|
|
13
|
+
yield(configuration)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
attr_writer :configuration
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# This class encapsulates the configuration properties for this gem and
|
|
22
|
+
# provides methods and attributes that allow for management of the same.
|
|
23
|
+
#
|
|
24
|
+
# attr_reader :max_dictionary_file_megabytes, :max_invalid_words_bytesize, :metadata_observers
|
|
25
|
+
class Configuration
|
|
26
|
+
attr_reader :max_dictionary_file_megabytes,
|
|
27
|
+
:max_invalid_words_bytesize, :metadata_observers, :word_block_regex
|
|
28
|
+
|
|
29
|
+
# The constructor; calls {#reset}.
|
|
30
|
+
def initialize
|
|
31
|
+
reset
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Resets the configuration settings to their default values.
|
|
35
|
+
#
|
|
36
|
+
# @return [void]
|
|
37
|
+
def reset
|
|
38
|
+
@max_dictionary_file_megabytes = 5
|
|
39
|
+
@max_invalid_words_bytesize = 25_000
|
|
40
|
+
@metadata_observers = [
|
|
41
|
+
LittleWeasel::Metadata::InvalidWordsMetadata
|
|
42
|
+
]
|
|
43
|
+
# TODO: Is this the correct regex to use, or is there something better?
|
|
44
|
+
# @word_block_regex = /\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/
|
|
45
|
+
# @word_block_regex = /(?:(?:[\-A-Za-z0-9]|\d(?!\d|\b))+(?:'[\-A-Za-z0-9]+)?)/
|
|
46
|
+
# @word_block_regex = /(?:(?:[\-a-z0-9]|\d(?!\d|\b))+(?:'[\-a-z0-9]+)?)/i
|
|
47
|
+
@word_block_regex = /[[[:word:]]'-]+/
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Returns the maximum consumable dictionary size in bytes. Dictionaries
|
|
51
|
+
# larger than {#max_dictionary_file_bytes} will raise an error.
|
|
52
|
+
#
|
|
53
|
+
# The default is 5 megabytes.
|
|
54
|
+
#
|
|
55
|
+
# @return [Integer] the maximum number of bytes for a dictionary.
|
|
56
|
+
def max_dictionary_file_bytes
|
|
57
|
+
@max_dictionary_file_megabytes * 1_000_000
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# If {#max_invalid_words_bytesize} is > 0, true will be returned; false
|
|
61
|
+
# otherwise.
|
|
62
|
+
#
|
|
63
|
+
# @return [true, false] based on {#max_invalid_words_bytesize}.
|
|
64
|
+
def max_invalid_words_bytesize?
|
|
65
|
+
max_invalid_words_bytesize.positive?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# rubocop: disable Style/TrivialAccessors
|
|
69
|
+
def max_dictionary_file_megabytes=(value)
|
|
70
|
+
@max_dictionary_file_megabytes = value
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Sets the maximum cache size (in bytes) for invalid words. If
|
|
74
|
+
# less than or equal to 0, invalid words will NOT be cached.
|
|
75
|
+
#
|
|
76
|
+
# If greater than 0, invalid words will be cached up to and including
|
|
77
|
+
# {#max_invalid_words_bytesize} bytes.
|
|
78
|
+
#
|
|
79
|
+
# @see #max_invalid_words_bytesize?
|
|
80
|
+
def max_invalid_words_bytesize=(value)
|
|
81
|
+
value = 0 if value.negative?
|
|
82
|
+
@max_invalid_words_bytesize = value
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def metadata_observers=(value)
|
|
86
|
+
raise ArgumentError, "Argument value is not an Array: #{value.class}" unless value.is_a? Array
|
|
87
|
+
|
|
88
|
+
# TODO: Limit the amount of observer classes, exploits?
|
|
89
|
+
|
|
90
|
+
@metadata_observers = value
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def word_block_regex=(value)
|
|
94
|
+
@word_block_regex = value
|
|
95
|
+
end
|
|
96
|
+
# rubocop: enable Style/TrivialAccessors
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'block_results'
|
|
4
|
+
require_relative 'filters/word_filter_managable'
|
|
5
|
+
require_relative 'metadata/dictionary_metadata'
|
|
6
|
+
require_relative 'modules/configurable'
|
|
7
|
+
require_relative 'modules/dictionary_cache_servicable'
|
|
8
|
+
require_relative 'modules/dictionary_keyable'
|
|
9
|
+
require_relative 'modules/dictionary_metadata_servicable'
|
|
10
|
+
require_relative 'preprocessors/word_preprocessor_managable'
|
|
11
|
+
require_relative 'word_results'
|
|
12
|
+
|
|
13
|
+
module LittleWeasel
|
|
14
|
+
class Dictionary
|
|
15
|
+
include Filters::WordFilterManagable
|
|
16
|
+
include Modules::Configurable
|
|
17
|
+
include Modules::DictionaryCacheServicable
|
|
18
|
+
include Modules::DictionaryKeyable
|
|
19
|
+
include Modules::DictionaryMetadataServicable
|
|
20
|
+
include Preprocessors::WordPreprocessorManagable
|
|
21
|
+
|
|
22
|
+
attr_reader :dictionary_metadata_object, :dictionary_words
|
|
23
|
+
|
|
24
|
+
def initialize(dictionary_key:, dictionary_words:, dictionary_cache:,
|
|
25
|
+
dictionary_metadata:, word_filters: nil, word_preprocessors: nil)
|
|
26
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
|
27
|
+
self.dictionary_key = dictionary_key
|
|
28
|
+
|
|
29
|
+
validate_dictionary_cache dictionary_cache: dictionary_cache
|
|
30
|
+
self.dictionary_cache = dictionary_cache
|
|
31
|
+
|
|
32
|
+
validate_dictionary_metadata dictionary_metadata: dictionary_metadata
|
|
33
|
+
self.dictionary_metadata = dictionary_metadata
|
|
34
|
+
|
|
35
|
+
unless dictionary_words.is_a?(Array)
|
|
36
|
+
raise ArgumentError,
|
|
37
|
+
"Argument dictionary_words is not an Array: #{dictionary_words.class}"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Set up the dictionary metadata object and observers
|
|
41
|
+
self.dictionary_words = self.class.to_hash(dictionary_words: dictionary_words)
|
|
42
|
+
self.dictionary_metadata_object = create_dictionary_metadata
|
|
43
|
+
dictionary_metadata_object.add_observers
|
|
44
|
+
|
|
45
|
+
add_filters word_filters: word_filters || []
|
|
46
|
+
add_preprocessors word_preprocessors: word_preprocessors || []
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class << self
|
|
50
|
+
def to_hash(dictionary_words:)
|
|
51
|
+
dictionary_words.each_with_object(Hash.new(false)) { |word, hash| hash[word] = true; }
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def word_results(word)
|
|
56
|
+
# TODO: Make max word size configurable.
|
|
57
|
+
raise ArgumentError, "Argument word is not a String: #{word.class}" unless word.is_a?(String)
|
|
58
|
+
|
|
59
|
+
preprocessed_words = preprocess(word: word)
|
|
60
|
+
preprocessed_word = preprocessed_words.preprocessed_word
|
|
61
|
+
filters_matched = filters_matched(preprocessed_word || word)
|
|
62
|
+
word_results = WordResults.new(original_word: word,
|
|
63
|
+
filters_matched: filters_matched,
|
|
64
|
+
preprocessed_words: preprocessed_words,
|
|
65
|
+
word_cached: dictionary_words.include?(preprocessed_word || word),
|
|
66
|
+
word_valid: dictionary_words[preprocessed_word || word] || false)
|
|
67
|
+
|
|
68
|
+
dictionary_metadata_object.notify(action: :word_search,
|
|
69
|
+
params: { word_results: word_results })
|
|
70
|
+
|
|
71
|
+
word_results
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def block_results(word_block)
|
|
75
|
+
# TODO: Make max word_block size configurable.
|
|
76
|
+
raise ArgumentError, "Argument word_block is not a String: #{word_block.class}" unless word_block.is_a?(String)
|
|
77
|
+
raise ArgumentError, "Argument word_block is empty: #{word_block.class}" unless word_block.present?
|
|
78
|
+
|
|
79
|
+
BlockResults.new(original_word_block: word_block).tap do |block_results|
|
|
80
|
+
word_block.scan(config.word_block_regex)&.map do |word|
|
|
81
|
+
block_results << word_results(word)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# This method returns true if this dictionary object is detached from the
|
|
87
|
+
# dictionary cache; this can happen if the dictionary object is unloaded
|
|
88
|
+
# from the dictionary cache(DictionaryManager#unload_dictionary) or the
|
|
89
|
+
# dictionary is killed (DictionaryManager#kill_dictionary).
|
|
90
|
+
def detached?
|
|
91
|
+
!dictionary_cache_service.dictionary_object?
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# This method returns a count of VALID words in the dictionary.
|
|
95
|
+
def count
|
|
96
|
+
dictionary_words.each_pair.count { |_word, valid| valid }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# This method returns a count of all VALID and INVALID words in
|
|
100
|
+
# the dictionary.
|
|
101
|
+
def count_all_words
|
|
102
|
+
dictionary_words.count
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# This method returns a count of all INVALID words in the dictionary.
|
|
106
|
+
def count_invalid_words
|
|
107
|
+
dictionary_words.each_pair.count { |_word, valid| !valid }
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
attr_writer :dictionary_metadata_object, :dictionary_words
|
|
113
|
+
|
|
114
|
+
def create_dictionary_metadata
|
|
115
|
+
# We unconditionally attach metadata to this dictionary. DictionaryMetadata
|
|
116
|
+
# only attaches the metadata services that are turned "on".
|
|
117
|
+
Metadata::DictionaryMetadata.new(
|
|
118
|
+
dictionary_words: dictionary_words,
|
|
119
|
+
dictionary_key: dictionary_key,
|
|
120
|
+
dictionary_cache: dictionary_cache,
|
|
121
|
+
dictionary_metadata: dictionary_metadata
|
|
122
|
+
)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'modules/language_validatable'
|
|
4
|
+
require_relative 'modules/locale'
|
|
5
|
+
require_relative 'modules/region_validatable'
|
|
6
|
+
require_relative 'modules/taggable'
|
|
7
|
+
|
|
8
|
+
module LittleWeasel
|
|
9
|
+
# This class describes a unique key associated with a particular dictionary
|
|
10
|
+
# file. Dictionary keys are used to identify a dictionary on which an action
|
|
11
|
+
# should be performed.
|
|
12
|
+
class DictionaryKey
|
|
13
|
+
include Modules::LanguageValidatable
|
|
14
|
+
include Modules::Locale
|
|
15
|
+
include Modules::RegionValidatable
|
|
16
|
+
include Modules::Taggable
|
|
17
|
+
|
|
18
|
+
attr_reader :language, :region
|
|
19
|
+
|
|
20
|
+
def initialize(language:, region: nil, tag: nil)
|
|
21
|
+
validate_language language: language
|
|
22
|
+
self.language = normalize_language language
|
|
23
|
+
|
|
24
|
+
validate_region region: region
|
|
25
|
+
self.region = normalize_region region
|
|
26
|
+
|
|
27
|
+
validate_tag tag: tag
|
|
28
|
+
self.tag = tag
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def key
|
|
32
|
+
return locale unless tagged?
|
|
33
|
+
|
|
34
|
+
"#{locale}-#{tag}"
|
|
35
|
+
end
|
|
36
|
+
alias to_s key
|
|
37
|
+
|
|
38
|
+
class << self
|
|
39
|
+
def key(language:, region: nil, tag: nil)
|
|
40
|
+
new(language: language, region: region, tag: tag).key
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
attr_writer :language, :region
|
|
47
|
+
end
|
|
48
|
+
end
|