LittleWeasel 3.0.4 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/.reek.yml +17 -0
- data/.rspec +4 -2
- data/.rubocop.yml +187 -0
- data/.ruby-version +1 -1
- data/.yardopts +2 -0
- data/Gemfile +3 -1
- data/LittleWeasel.gemspec +31 -18
- data/README.md +408 -42
- data/Rakefile +296 -3
- data/lib/LittleWeasel.rb +5 -184
- data/lib/LittleWeasel/block_results.rb +81 -0
- data/lib/LittleWeasel/configure.rb +98 -0
- data/lib/LittleWeasel/dictionary.rb +125 -0
- data/lib/LittleWeasel/dictionary_key.rb +48 -0
- data/lib/LittleWeasel/dictionary_manager.rb +85 -0
- data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
- data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
- data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
- data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
- data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
- data/lib/LittleWeasel/filters/word_filter.rb +59 -0
- data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
- data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
- data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
- data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
- data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
- data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
- data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
- data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
- data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
- data/lib/LittleWeasel/metadata/metadatable.rb +136 -0
- data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
- data/lib/LittleWeasel/modules/configurable.rb +26 -0
- data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
- data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
- data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
- data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +20 -0
- data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
- data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
- data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +19 -0
- data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
- data/lib/LittleWeasel/modules/dictionary_loader_servicable.rb +27 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +17 -0
- data/lib/LittleWeasel/modules/dictionary_sourceable.rb +26 -0
- data/lib/LittleWeasel/modules/dictionary_validatable.rb +30 -0
- data/lib/LittleWeasel/modules/language.rb +23 -0
- data/lib/LittleWeasel/modules/language_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/locale.rb +40 -0
- data/lib/LittleWeasel/modules/order_validatable.rb +18 -0
- data/lib/LittleWeasel/modules/orderable.rb +17 -0
- data/lib/LittleWeasel/modules/region.rb +23 -0
- data/lib/LittleWeasel/modules/region_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/tag_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/taggable.rb +31 -0
- data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
- data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +28 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +55 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +55 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +27 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +122 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
- data/lib/LittleWeasel/services/dictionary_cache_service.rb +262 -0
- data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
- data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
- data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
- data/lib/LittleWeasel/services/dictionary_loader_service.rb +59 -0
- data/lib/LittleWeasel/services/dictionary_metadata_service.rb +114 -0
- data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
- data/lib/LittleWeasel/version.rb +3 -1
- data/lib/LittleWeasel/word_results.rb +146 -0
- data/spec/factories/dictionary.rb +43 -0
- data/spec/factories/dictionary_cache_service.rb +95 -0
- data/spec/factories/dictionary_creator_service.rb +16 -0
- data/spec/factories/dictionary_file_loader_service.rb +13 -0
- data/spec/factories/dictionary_hash.rb +39 -0
- data/spec/factories/dictionary_key.rb +14 -0
- data/spec/factories/dictionary_killer_service.rb +14 -0
- data/spec/factories/dictionary_loader_service.rb +14 -0
- data/spec/factories/dictionary_manager.rb +10 -0
- data/spec/factories/dictionary_metadata.rb +16 -0
- data/spec/factories/dictionary_metadata_service.rb +16 -0
- data/spec/factories/numeric_filter.rb +12 -0
- data/spec/factories/preprocessed_word.rb +16 -0
- data/spec/factories/preprocessed_words.rb +41 -0
- data/spec/factories/single_character_word_filter.rb +12 -0
- data/spec/factories/word_results.rb +16 -0
- data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
- data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
- data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
- data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +116 -0
- data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
- data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
- data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
- data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
- data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
- data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
- data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
- data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
- data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
- data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
- data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
- data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
- data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
- data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
- data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +44 -0
- data/spec/lib/LittleWeasel/modules/language_spec.rb +52 -0
- data/spec/lib/LittleWeasel/modules/locale_spec.rb +140 -0
- data/spec/lib/LittleWeasel/modules/region_spec.rb +52 -0
- data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +216 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +175 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
- data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
- data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
- data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
- data/spec/lib/LittleWeasel/services/dictionary_loader_service_spec.rb +50 -0
- data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
- data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
- data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
- data/spec/spec_helper.rb +117 -6
- data/spec/support/factory_bot.rb +15 -0
- data/spec/support/file_helpers.rb +32 -0
- data/spec/support/files/empty-dictionary.txt +0 -0
- data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
- data/spec/support/files/en-US-tagged.txt +26 -0
- data/spec/support/files/en-US.txt +26 -0
- data/spec/support/files/en.txt +26 -0
- data/spec/support/files/es-ES.txt +27 -0
- data/spec/support/files/es.txt +27 -0
- data/spec/support/general_helpers.rb +68 -0
- data/spec/support/shared_contexts.rb +108 -0
- data/spec/support/shared_examples.rb +105 -0
- metadata +408 -65
- data/spec/checker/checker_spec.rb +0 -286
data/Rakefile
CHANGED
|
@@ -1,13 +1,306 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/core_ext/object/try.rb'
|
|
4
|
+
require 'active_support/inflector'
|
|
5
|
+
require 'benchmark/ips'
|
|
6
|
+
require 'bundler/gem_tasks'
|
|
7
|
+
require 'pry'
|
|
8
|
+
|
|
9
|
+
require_relative 'lib/LittleWeasel'
|
|
10
|
+
require_relative 'spec/support/file_helpers'
|
|
11
|
+
require_relative 'spec/support/general_helpers'
|
|
12
|
+
|
|
13
|
+
DictionaryResultsHelpers = Support::GeneralHelpers::DictionaryResultsHelpers
|
|
14
|
+
|
|
15
|
+
def file_from(dictionary_key)
|
|
16
|
+
Support::FileHelpers.dictionary_path_for(file_name: dictionary_key.key)
|
|
17
|
+
end
|
|
2
18
|
|
|
3
19
|
begin
|
|
4
20
|
require 'rspec/core/rake_task'
|
|
5
21
|
RSpec::Core::RakeTask.new(:spec)
|
|
6
22
|
rescue LoadError => e
|
|
7
|
-
task
|
|
23
|
+
task 'spec' do
|
|
8
24
|
puts "RSpec not loaded - make sure it's installed and you're using bundle exec"
|
|
9
25
|
exit 1
|
|
10
26
|
end
|
|
11
27
|
end
|
|
12
28
|
|
|
13
|
-
|
|
29
|
+
#
|
|
30
|
+
# Tasks related to the #word_results API
|
|
31
|
+
|
|
32
|
+
namespace 'word_results' do
|
|
33
|
+
# Creates a dictionary from a file on disk
|
|
34
|
+
task :basic do
|
|
35
|
+
LittleWeasel.configure do |config|
|
|
36
|
+
# TODO: Configure as needed here.
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Create a Dictionary Manager.
|
|
40
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
41
|
+
|
|
42
|
+
# Create our unique key for the dictionary.
|
|
43
|
+
en_us_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us)
|
|
44
|
+
|
|
45
|
+
file = Support::FileHelpers.dictionary_path_for file_name: en_us_key.key
|
|
46
|
+
|
|
47
|
+
# Create a dictionary of names from memory.
|
|
48
|
+
en_us_names_dictionary = dictionary_manager.create_dictionary_from_file(
|
|
49
|
+
dictionary_key: en_us_key,
|
|
50
|
+
file: file)
|
|
51
|
+
|
|
52
|
+
# Get some word results...
|
|
53
|
+
|
|
54
|
+
# Get results for a word we know exists.
|
|
55
|
+
word = 'apple'
|
|
56
|
+
word_results = en_us_names_dictionary.word_results word
|
|
57
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "found (#{word} is in the dictionary)"
|
|
58
|
+
|
|
59
|
+
# Get results for a word we know DOES NOT exist.
|
|
60
|
+
word = 'dapple'
|
|
61
|
+
word_results = en_us_names_dictionary.word_results word
|
|
62
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "not found (#{word} is not in the dictionary)"
|
|
63
|
+
rescue StandardError => e
|
|
64
|
+
task 'word_results:basic' do
|
|
65
|
+
puts "LittleWeasel task word_results:basic not loaded: #{e.message}"
|
|
66
|
+
exit 1
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Creates a dictionary of names from memory
|
|
71
|
+
task :from_memory do
|
|
72
|
+
LittleWeasel.configure do |config|
|
|
73
|
+
# TODO: Configure as needed here.
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Create a Dictionary Manager.
|
|
77
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
78
|
+
|
|
79
|
+
# Create our unique key for the dictionary.
|
|
80
|
+
en_us_names_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :names)
|
|
81
|
+
|
|
82
|
+
# Create a dictionary of names from memory.
|
|
83
|
+
en_us_names_dictionary = dictionary_manager.create_dictionary_from_memory(
|
|
84
|
+
dictionary_key: en_us_names_key, dictionary_words: %w(Abel Bartholomew Cain Deborah Elijah))
|
|
85
|
+
|
|
86
|
+
# Get some word results...
|
|
87
|
+
|
|
88
|
+
# Get results for a name we know exists.
|
|
89
|
+
word = 'Abel'
|
|
90
|
+
word_results = en_us_names_dictionary.word_results word
|
|
91
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "found (#{word} is in the dictionary)"
|
|
92
|
+
|
|
93
|
+
# Get results for a name we know DOES NOT exist.
|
|
94
|
+
word = 'Henry'
|
|
95
|
+
word_results = en_us_names_dictionary.word_results word
|
|
96
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "not found (#{word} is not in the dictionary)"
|
|
97
|
+
rescue StandardError => e
|
|
98
|
+
task 'word_results:from_memory' do
|
|
99
|
+
puts "LittleWeasel task word_results:from_memory not loaded: #{e.message}"
|
|
100
|
+
exit 1
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Shows application of word filters and word preprocessors.
|
|
105
|
+
task :advanced do
|
|
106
|
+
LittleWeasel.configure do |config|
|
|
107
|
+
# TODO: Configure as needed here.
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Create a Dictionary Manager.
|
|
111
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
112
|
+
|
|
113
|
+
# Create our unique key for the dictionary.
|
|
114
|
+
en_us_names_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :names)
|
|
115
|
+
|
|
116
|
+
# Create a Henry word filter.
|
|
117
|
+
class HenryFilter < LittleWeasel::Filters::WordFilter
|
|
118
|
+
class << self
|
|
119
|
+
def filter_match?(word)
|
|
120
|
+
word== 'Henry'
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
word_filters = [HenryFilter.new]
|
|
125
|
+
|
|
126
|
+
# Add a word preprocessor.
|
|
127
|
+
word_preprocessors = [LittleWeasel::Preprocessors::EnUs::CapitalizePreprocessor.new]
|
|
128
|
+
|
|
129
|
+
# Create a dictionary of names from memory.
|
|
130
|
+
en_us_names_dictionary = dictionary_manager.create_dictionary_from_memory(
|
|
131
|
+
dictionary_key: en_us_names_key,
|
|
132
|
+
dictionary_words: %w(Abel Bartholomew Cain Deborah Elijah),
|
|
133
|
+
word_filters: word_filters,
|
|
134
|
+
word_preprocessors: word_preprocessors)
|
|
135
|
+
|
|
136
|
+
puts '# Turning off our word filters and word preprocessors to start...'
|
|
137
|
+
puts
|
|
138
|
+
|
|
139
|
+
en_us_names_dictionary.filters_on = false
|
|
140
|
+
en_us_names_dictionary.preprocessors_on = false
|
|
141
|
+
|
|
142
|
+
# Get results for a name we know DOES NOT exist.
|
|
143
|
+
word = 'Henry'
|
|
144
|
+
word_results = en_us_names_dictionary.word_results word
|
|
145
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "not found, #success? == false, word_valid? == false (#{word} is not in the dictionary)"
|
|
146
|
+
|
|
147
|
+
puts '# Turning word filters on...'
|
|
148
|
+
puts
|
|
149
|
+
|
|
150
|
+
en_us_names_dictionary.filters_on = true
|
|
151
|
+
|
|
152
|
+
# Get results for Henry again - it should be found due to the filter.
|
|
153
|
+
word = 'Henry'
|
|
154
|
+
word_results = en_us_names_dictionary.word_results word
|
|
155
|
+
DictionaryResultsHelpers.print_word_results word, word_results, '#success? == true due to the HenryFilter'
|
|
156
|
+
|
|
157
|
+
# Get results for a name we know DOES NOT exist.
|
|
158
|
+
word = 'henry'
|
|
159
|
+
word_results = en_us_names_dictionary.word_results word
|
|
160
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "not found, #success? == false (#{word} is not in the dictionary and henry is lower case, no filter match)"
|
|
161
|
+
|
|
162
|
+
puts '# Turning preprocessors on so that henry is converted to Henry '
|
|
163
|
+
puts "# and consequently, the filter will match..."
|
|
164
|
+
puts
|
|
165
|
+
|
|
166
|
+
en_us_names_dictionary.preprocessors_on = true
|
|
167
|
+
|
|
168
|
+
word = 'henry'
|
|
169
|
+
word_results = en_us_names_dictionary.word_results word
|
|
170
|
+
DictionaryResultsHelpers.print_word_results word, word_results, "#success? == true, #filter_match? == true (#{word} is not in the dictionary but the word preprocessor and word filter work together to get a filter match and consider the name valid)"
|
|
171
|
+
rescue StandardError => e
|
|
172
|
+
task 'word_results:advanced' do
|
|
173
|
+
puts "LittleWeasel task word_results:advanced not loaded: #{e.message}"
|
|
174
|
+
exit 1
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
task :word_filters do
|
|
179
|
+
LittleWeasel.configure do |config|
|
|
180
|
+
# TODO: Configure as needed here.
|
|
181
|
+
end
|
|
182
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
183
|
+
dictionary_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us)
|
|
184
|
+
file = Support::FileHelpers.dictionary_path_for file_name: dictionary_key.key
|
|
185
|
+
word_filters = [
|
|
186
|
+
LittleWeasel::Filters::EnUs::NumericFilter.new,
|
|
187
|
+
LittleWeasel::Filters::EnUs::CurrencyFilter.new,
|
|
188
|
+
LittleWeasel::Filters::EnUs::SingleCharacterWordFilter.new
|
|
189
|
+
]
|
|
190
|
+
word_preprocessors = nil
|
|
191
|
+
dictionary_words = Support::FileHelpers.dictionary_words_for dictionary_file_path: file
|
|
192
|
+
dictionary = dictionary_manager.create_dictionary_from_memory(dictionary_key: dictionary_key, dictionary_words: dictionary_words, word_filters: word_filters, word_preprocessors: word_preprocessors)
|
|
193
|
+
dictionary_words << 'A'.dup
|
|
194
|
+
dictionary_words << 'I'.dup
|
|
195
|
+
dictionary_words << '1000'.dup
|
|
196
|
+
dictionary_words << '1,000'.dup
|
|
197
|
+
dictionary_words << '10,000.00'.dup
|
|
198
|
+
dictionary_words << '+100.00'.dup
|
|
199
|
+
dictionary_words << '-200,000.00'.dup
|
|
200
|
+
dictionary_words << '$100,000'.dup
|
|
201
|
+
dictionary_words << '+$100,000,000.10'.dup
|
|
202
|
+
dictionary_words << '-$999,000,000.10'.dup
|
|
203
|
+
dictionary_words.each do |word|
|
|
204
|
+
word.strip!
|
|
205
|
+
word_results = dictionary.word_results word
|
|
206
|
+
DictionaryResultsHelpers.print_word_results word, word_results
|
|
207
|
+
end
|
|
208
|
+
rescue StandardError => e
|
|
209
|
+
task 'word_results:word_filters' do
|
|
210
|
+
puts "LittleWeasel task word_results:word_filters not loaded: #{e.message}"
|
|
211
|
+
exit 1
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
#
|
|
217
|
+
# Tasks related to the #block_results API
|
|
218
|
+
|
|
219
|
+
namespace 'block_results' do
|
|
220
|
+
task :basic do
|
|
221
|
+
LittleWeasel.configure do |config|
|
|
222
|
+
# TODO: Configure as needed here.
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Create a Dictionary Manager.
|
|
226
|
+
dictionary_manager = LittleWeasel::DictionaryManager.new
|
|
227
|
+
|
|
228
|
+
# Create our unique key for the dictionary.
|
|
229
|
+
en_us_key = LittleWeasel::DictionaryKey.new(language: :en, region: :us, tag: :big)
|
|
230
|
+
|
|
231
|
+
# Create a dictionary from a file on disk. The below assumes the
|
|
232
|
+
# dictionary file name matches the dictionary key (e.g. en-US-big).
|
|
233
|
+
en_us_dictionary = dictionary_manager.create_dictionary_from_file(
|
|
234
|
+
dictionary_key: en_us_key, file: file_from(en_us_key))
|
|
235
|
+
|
|
236
|
+
word_block = "This is a word-block of 8 words and 2 numbers."
|
|
237
|
+
|
|
238
|
+
# Add a word filter so that numbers are considered valid.
|
|
239
|
+
en_us_dictionary.add_filters word_filters: [
|
|
240
|
+
LittleWeasel::Filters::EnUs::NumericFilter.new
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
block_results = en_us_dictionary.block_results word_block
|
|
244
|
+
|
|
245
|
+
# Returns a LittleWeasel::BlockResults object.
|
|
246
|
+
DictionaryResultsHelpers.print_block_results word_block, block_results
|
|
247
|
+
rescue StandardError => e
|
|
248
|
+
task 'block_results:basic' do
|
|
249
|
+
puts "LittleWeasel task block_results:basic not loaded: #{e.message}"
|
|
250
|
+
exit 1
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
namespace :bm do
|
|
256
|
+
task :hash do
|
|
257
|
+
STRING_LOCALE = { 'en-US' => 'en-us' }
|
|
258
|
+
SYMBOL_LOCALE = { 'en-US' => :enUS }
|
|
259
|
+
|
|
260
|
+
puts 'String variable vs. normal String.'
|
|
261
|
+
Benchmark.ips do |x|
|
|
262
|
+
string_variable = 'string_variable'
|
|
263
|
+
x.report('string variable') { STRING_LOCALE[string_variable] }
|
|
264
|
+
x.report('normal') { STRING_LOCALE['en-US'] }
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
puts 'String#freeze vs. normal String.'
|
|
268
|
+
Benchmark.ips do |x|
|
|
269
|
+
x.report('freeze') { STRING_LOCALE['en-US'.freeze] }
|
|
270
|
+
x.report('normal') { STRING_LOCALE['en-US'] }
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
puts 'String vs Symbol'
|
|
274
|
+
Benchmark.ips do |x|
|
|
275
|
+
x.report('string') { STRING_LOCALE['en-US'] }
|
|
276
|
+
x.report('symbol') { SYMBOL_LOCALE[:enUS] }
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
puts 'String#freeze vs. Symbol'
|
|
280
|
+
Benchmark.ips do |x|
|
|
281
|
+
x.report('string') { STRING_LOCALE['en-US'.freeze] }
|
|
282
|
+
x.report('symbol') { SYMBOL_LOCALE[:enUS] }
|
|
283
|
+
end
|
|
284
|
+
rescue StandardError => e
|
|
285
|
+
task 'hash' do
|
|
286
|
+
puts "LittleWeasel task bm:hash not loaded: #{e.message}"
|
|
287
|
+
exit 1
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
task :dictionary_key do
|
|
292
|
+
puts 'DictionaryKey test'
|
|
293
|
+
Benchmark.ips do |x|
|
|
294
|
+
x.report('DictionaryKey') do
|
|
295
|
+
DictionaryKey.key(language: :en, region: :us, tag: :tag)
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
rescue StandardError => e
|
|
299
|
+
task 'locale' do
|
|
300
|
+
puts "LittleWeasel task bm:dictionary_key not loaded: #{e.message}"
|
|
301
|
+
exit 1
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
task default: :spec
|
data/lib/LittleWeasel.rb
CHANGED
|
@@ -1,186 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
require "LittleWeasel/version"
|
|
3
|
-
require 'active_support/inflector'
|
|
1
|
+
# frozen_string_literal: true
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
require 'active_support/core_ext/object/blank'
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
# Returns the dictionary.
|
|
12
|
-
#
|
|
13
|
-
# @return [Hash] the dictionary.
|
|
14
|
-
attr_reader :dictionary
|
|
15
|
-
|
|
16
|
-
private
|
|
17
|
-
|
|
18
|
-
attr_reader :alphabet_exclusion_list
|
|
19
|
-
|
|
20
|
-
# Keep these private...will expose as options later.
|
|
21
|
-
attr_accessor :word_regex, :numeric_regex, :non_wordchar_regex
|
|
22
|
-
|
|
23
|
-
public
|
|
24
|
-
|
|
25
|
-
# The constructor
|
|
26
|
-
def initialize
|
|
27
|
-
@options = { exclude_alphabet: false, strip_whitespace: false, ignore_numeric: true, single_word_mode: false }
|
|
28
|
-
@alphabet_exclusion_list = %w{ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z }
|
|
29
|
-
@numeric_regex = /^[-+]?[0-9]?(\.[0-9]+)?$+/
|
|
30
|
-
@word_regex = /\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/
|
|
31
|
-
@non_wordchar_regex = /\W+/
|
|
32
|
-
@dictionary = Hash.new(1)
|
|
33
|
-
load
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Interrogates the dictionary to determine whether or not [word] exists.
|
|
37
|
-
#
|
|
38
|
-
# @param [String] word the word or words to interrogate
|
|
39
|
-
# @param [Hash] options options to apply to this query (see #options=). Options passed to this
|
|
40
|
-
# method are applied for this query only.
|
|
41
|
-
#
|
|
42
|
-
# @return [Boolean] true if the word/words in *word* exists, false otherwise.
|
|
43
|
-
#
|
|
44
|
-
# @example
|
|
45
|
-
#
|
|
46
|
-
# LittleWeasel::Checker.instance.exists?('C') # true (default options, :exclude_alphabet => false)
|
|
47
|
-
# LittleWeasel::Checker.instance.exists?('A', {exclude_alphabet:true}) # false
|
|
48
|
-
# LittleWeasel::Checker.instance.exists?('X', {exclude_alphabet:false}) # true
|
|
49
|
-
# LittleWeasel::Checker.instance.exists?('Hello') # true
|
|
50
|
-
#
|
|
51
|
-
# LittleWeasel::Checker.instance.exists?(' Hello ') # false (default options, :strip_whitespace => false)
|
|
52
|
-
# LittleWeasel::Checker.instance.exists?(' Yes ', {strip_whitespace:true}) # true
|
|
53
|
-
# LittleWeasel::Checker.instance.exists?('No ', {strip_whitespace:false}) # false
|
|
54
|
-
# LittleWeasel::Checker.instance.exists?('How dy', {strip_whitespace:true}) # false, strip_whitespace only removes leading and trailing spaces
|
|
55
|
-
#
|
|
56
|
-
# LittleWeasel::Checker.instance.exists?('90210') # true (default options, ignore_numeric => true)
|
|
57
|
-
# LittleWeasel::Checker.instance.exists?('90210', {ignore_numeric:false}) # false
|
|
58
|
-
#
|
|
59
|
-
# LittleWeasel::Checker.instance.exists?('Hello World') # true, we're accepting multiple words now by default (default options, single_word_mode => false) :)
|
|
60
|
-
# LittleWeasel::Checker.instance.exists?("hello, mister; did I \'mention\'' that lemon cake is \"great?\" It's just wonderful!") # true
|
|
61
|
-
#
|
|
62
|
-
# LittleWeasel::Checker.instance.exists?('I love ice cream', {single_word_mode:true}) # false; while all the words are valid, more than one word will return false
|
|
63
|
-
#
|
|
64
|
-
def exists?(word, options=nil)
|
|
65
|
-
options = options || @options
|
|
66
|
-
|
|
67
|
-
return false unless word.is_a?(String)
|
|
68
|
-
|
|
69
|
-
word = word.dup
|
|
70
|
-
word.strip! if options[:strip_whitespace]
|
|
71
|
-
|
|
72
|
-
return false if word.empty?
|
|
73
|
-
|
|
74
|
-
if block? word
|
|
75
|
-
return false if options[:single_word_mode]
|
|
76
|
-
return block_exists? word
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
return true if options[:ignore_numeric] && number?(word)
|
|
80
|
-
return false if options[:exclude_alphabet] && word.length == 1 && @alphabet_exclusion_list.include?(word.upcase)
|
|
81
|
-
|
|
82
|
-
valid_word? word
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# Sets the global options for this gem.
|
|
86
|
-
#
|
|
87
|
-
# @param [Hash] options options that should apply to all subsequent calls to method *exists?* (see #exists?).
|
|
88
|
-
# Options set via this property apply to all subsequent queries.
|
|
89
|
-
#
|
|
90
|
-
# @option options [Boolean] :exclude_alphabet (false) If false, letters of the alphabet are considered words.
|
|
91
|
-
# @option options [Boolean] :strip_whitespace (false) If true, leading and trailing spaces are removed before checking to see if the word exists.
|
|
92
|
-
# @option options [Boolean] :ignore_numeric (true) If true, numeric values are considered valid words.
|
|
93
|
-
# @option options [Boolean] :single_word_mode (false) If false, word blocks (more than one word) are considered valid if all the words exist in the dictionary.
|
|
94
|
-
#
|
|
95
|
-
# @return [Hash] The options
|
|
96
|
-
#
|
|
97
|
-
# @example
|
|
98
|
-
# LittleWeasel::Checker.instance.options({exclude_alphabet:true})
|
|
99
|
-
# LittleWeasel::Checker.instance.exists?('A') # false
|
|
100
|
-
#
|
|
101
|
-
# LittleWeasel::Checker.instance.options({exclude_alphabet:false})
|
|
102
|
-
# LittleWeasel::Checker.instance.exists?('A') # true
|
|
103
|
-
#
|
|
104
|
-
# LittleWeasel::Checker.instance.options({strip_whitespace:false})
|
|
105
|
-
# LittleWeasel::Checker.instance.exists?(' Hello ') # false
|
|
106
|
-
# LittleWeasel::Checker.instance.exists?('No ') # false
|
|
107
|
-
# LittleWeasel::Checker.instance.exists?(' No') # false
|
|
108
|
-
#
|
|
109
|
-
# LittleWeasel::Checker.instance.options({strip_whitespace:true})
|
|
110
|
-
# LittleWeasel::Checker.instance.exists?(' Yes ') # true
|
|
111
|
-
# LittleWeasel::Checker.instance.exists?('How dy') # false, strip_whitespace only removes leading and trailing spaces
|
|
112
|
-
#
|
|
113
|
-
# LittleWeasel::Checker.instance.exists?('90210') # true (default options, ignore_numeric => true)
|
|
114
|
-
# LittleWeasel::Checker.instance.exists?('90210', {ignore_numeric:false}) # false
|
|
115
|
-
# LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210') # true (default options, ignore_numeric => true)
|
|
116
|
-
# LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210', {ignore_numeric:false}) # false
|
|
117
|
-
#
|
|
118
|
-
# LittleWeasel::Checker.instance.options({single_word_mode:true})
|
|
119
|
-
# LittleWeasel::Checker.instance.exists?('I love ice cream') # false; while all the words are valid, more than one word will return false
|
|
120
|
-
# LittleWeasel::Checker.instance.exists?('Baby') # true
|
|
121
|
-
#
|
|
122
|
-
def options=(options)
|
|
123
|
-
@options = options
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
# Gets the global options currently set for this gem.
|
|
127
|
-
#
|
|
128
|
-
# @return [Hash] The options
|
|
129
|
-
def options
|
|
130
|
-
@options
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
protected
|
|
134
|
-
|
|
135
|
-
def number?(word)
|
|
136
|
-
word.strip.gsub(@numeric_regex).count > 0
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
def block?(string)
|
|
140
|
-
string = string.dup
|
|
141
|
-
return false unless string.is_a?(String)
|
|
142
|
-
string.gsub!(@numeric_regex, "")
|
|
143
|
-
return false unless string.length > 1
|
|
144
|
-
string.strip.scan(/[\w'-]+/).length > 1
|
|
145
|
-
end
|
|
146
|
-
|
|
147
|
-
def block_exists?(word_block)
|
|
148
|
-
word_block = word_block.dup
|
|
149
|
-
|
|
150
|
-
word_block.gsub!(@numeric_regex, "") if options[:ignore_numeric]
|
|
151
|
-
return false if word_block.nil?
|
|
152
|
-
word_block.strip! unless word_block.nil?
|
|
153
|
-
word_block.gsub!(@non_wordchar_regex, " ")
|
|
154
|
-
word_block.split(@word_regex).uniq.each { |word|
|
|
155
|
-
return false unless valid_block_word?(word)
|
|
156
|
-
}
|
|
157
|
-
return true
|
|
158
|
-
end
|
|
159
|
-
|
|
160
|
-
def valid_word?(word)
|
|
161
|
-
word = word.dup.downcase
|
|
162
|
-
exists = dictionary.has_key?(word)
|
|
163
|
-
exists = dictionary.has_key?(word.singularize) unless exists
|
|
164
|
-
exists
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
def valid_block_word?(word)
|
|
168
|
-
return true if word.length == 1
|
|
169
|
-
valid_word? word.strip
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
private
|
|
173
|
-
|
|
174
|
-
def dictionary_path
|
|
175
|
-
File.expand_path(File.dirname(__FILE__) + '/dictionary')
|
|
176
|
-
end
|
|
177
|
-
|
|
178
|
-
def load
|
|
179
|
-
File.open(dictionary_path) do |io|
|
|
180
|
-
io.each { |line| line.chomp!; @dictionary[line] = line }
|
|
181
|
-
end
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
end
|
|
185
|
-
|
|
186
|
-
end
|
|
5
|
+
Dir[File.join('.', 'lib/LittleWeasel/**/*.rb')].each do |f|
|
|
6
|
+
require f
|
|
7
|
+
end
|