LittleWeasel 3.0.4 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/.reek.yml +17 -0
- data/.rspec +4 -2
- data/.rubocop.yml +187 -0
- data/.ruby-version +1 -1
- data/.yardopts +2 -0
- data/Gemfile +3 -1
- data/LittleWeasel.gemspec +31 -18
- data/README.md +408 -42
- data/Rakefile +296 -3
- data/lib/LittleWeasel.rb +5 -184
- data/lib/LittleWeasel/block_results.rb +81 -0
- data/lib/LittleWeasel/configure.rb +98 -0
- data/lib/LittleWeasel/dictionary.rb +125 -0
- data/lib/LittleWeasel/dictionary_key.rb +48 -0
- data/lib/LittleWeasel/dictionary_manager.rb +85 -0
- data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
- data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
- data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
- data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
- data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
- data/lib/LittleWeasel/filters/word_filter.rb +59 -0
- data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
- data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
- data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
- data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
- data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
- data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
- data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
- data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
- data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
- data/lib/LittleWeasel/metadata/metadatable.rb +136 -0
- data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
- data/lib/LittleWeasel/modules/configurable.rb +26 -0
- data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
- data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
- data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
- data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +20 -0
- data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
- data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
- data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +19 -0
- data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
- data/lib/LittleWeasel/modules/dictionary_loader_servicable.rb +27 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +17 -0
- data/lib/LittleWeasel/modules/dictionary_sourceable.rb +26 -0
- data/lib/LittleWeasel/modules/dictionary_validatable.rb +30 -0
- data/lib/LittleWeasel/modules/language.rb +23 -0
- data/lib/LittleWeasel/modules/language_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/locale.rb +40 -0
- data/lib/LittleWeasel/modules/order_validatable.rb +18 -0
- data/lib/LittleWeasel/modules/orderable.rb +17 -0
- data/lib/LittleWeasel/modules/region.rb +23 -0
- data/lib/LittleWeasel/modules/region_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/tag_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/taggable.rb +31 -0
- data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
- data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +28 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +55 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +55 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +27 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +122 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
- data/lib/LittleWeasel/services/dictionary_cache_service.rb +262 -0
- data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
- data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
- data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
- data/lib/LittleWeasel/services/dictionary_loader_service.rb +59 -0
- data/lib/LittleWeasel/services/dictionary_metadata_service.rb +114 -0
- data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
- data/lib/LittleWeasel/version.rb +3 -1
- data/lib/LittleWeasel/word_results.rb +146 -0
- data/spec/factories/dictionary.rb +43 -0
- data/spec/factories/dictionary_cache_service.rb +95 -0
- data/spec/factories/dictionary_creator_service.rb +16 -0
- data/spec/factories/dictionary_file_loader_service.rb +13 -0
- data/spec/factories/dictionary_hash.rb +39 -0
- data/spec/factories/dictionary_key.rb +14 -0
- data/spec/factories/dictionary_killer_service.rb +14 -0
- data/spec/factories/dictionary_loader_service.rb +14 -0
- data/spec/factories/dictionary_manager.rb +10 -0
- data/spec/factories/dictionary_metadata.rb +16 -0
- data/spec/factories/dictionary_metadata_service.rb +16 -0
- data/spec/factories/numeric_filter.rb +12 -0
- data/spec/factories/preprocessed_word.rb +16 -0
- data/spec/factories/preprocessed_words.rb +41 -0
- data/spec/factories/single_character_word_filter.rb +12 -0
- data/spec/factories/word_results.rb +16 -0
- data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
- data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
- data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
- data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +116 -0
- data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
- data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
- data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
- data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
- data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
- data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
- data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
- data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
- data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
- data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
- data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
- data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
- data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
- data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
- data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +44 -0
- data/spec/lib/LittleWeasel/modules/language_spec.rb +52 -0
- data/spec/lib/LittleWeasel/modules/locale_spec.rb +140 -0
- data/spec/lib/LittleWeasel/modules/region_spec.rb +52 -0
- data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +216 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +175 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
- data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
- data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
- data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
- data/spec/lib/LittleWeasel/services/dictionary_loader_service_spec.rb +50 -0
- data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
- data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
- data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
- data/spec/spec_helper.rb +117 -6
- data/spec/support/factory_bot.rb +15 -0
- data/spec/support/file_helpers.rb +32 -0
- data/spec/support/files/empty-dictionary.txt +0 -0
- data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
- data/spec/support/files/en-US-tagged.txt +26 -0
- data/spec/support/files/en-US.txt +26 -0
- data/spec/support/files/en.txt +26 -0
- data/spec/support/files/es-ES.txt +27 -0
- data/spec/support/files/es.txt +27 -0
- data/spec/support/general_helpers.rb +68 -0
- data/spec/support/shared_contexts.rb +108 -0
- data/spec/support/shared_examples.rb +105 -0
- metadata +408 -65
- data/spec/checker/checker_spec.rb +0 -286
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'word_preprocessable'
|
|
4
|
+
require_relative 'word_preprocessor_validatable'
|
|
5
|
+
|
|
6
|
+
module LittleWeasel
|
|
7
|
+
module Preprocessors
|
|
8
|
+
# This module provides methods and functionality to manage word
|
|
9
|
+
# preprocessors. A "word preprocessor" is an object that manipulates a word
|
|
10
|
+
# before it is passed to any word filters and before it is compared against
|
|
11
|
+
# the dictionary for validity.
|
|
12
|
+
#
|
|
13
|
+
# When creating your own word preprocessors, here are some things you
|
|
14
|
+
# need to consider:
|
|
15
|
+
#
|
|
16
|
+
# Multiple word preprocessors can be applied to a given word. Word
|
|
17
|
+
# processors will be applied to a word in
|
|
18
|
+
# Preprocessors::WordPreprocessor#order order (ascending). Even though this
|
|
19
|
+
# is the case, it doesn't mean you should seek to apply more than one word
|
|
20
|
+
# preprocessor at a time. However, if you do, write and order your word
|
|
21
|
+
# preprocessors in such a way that each preprocessor manipulates the word
|
|
22
|
+
# in a complimentary rather than contridictionary way. For example,
|
|
23
|
+
# applying one word preprocessor that convert a word to uppercase and a
|
|
24
|
+
# second that converts the word to lowercase, contradict each other.
|
|
25
|
+
#
|
|
26
|
+
# Another thing you need to consider, is whether or not metadata observers
|
|
27
|
+
# should be notified of the preprocessed word (now that it has been
|
|
28
|
+
# potentially manipulated) or if they should be notified of the original
|
|
29
|
+
# word; this is because, the original word may not be found as a valid word
|
|
30
|
+
# in the dictionary, while the preprocessed word might and vise versa.
|
|
31
|
+
module WordPreprocessorManagable
|
|
32
|
+
include WordPreprocessable
|
|
33
|
+
include WordPreprocessorsValidatable
|
|
34
|
+
|
|
35
|
+
# Override attr_reader word_preprocessor found in WordPreprocessable
|
|
36
|
+
# so that we don't raise nil errors when using word_preprocessors.
|
|
37
|
+
def word_preprocessors
|
|
38
|
+
@word_preprocessors ||= []
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def clear_preprocessors
|
|
42
|
+
self.word_preprocessors = []
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Appends word preprocessors to the #word_preprocessors Array.
|
|
46
|
+
#
|
|
47
|
+
# If Argument word_preprocessor is nil, a block must be passed to populate
|
|
48
|
+
# the word_preprocessors with an Array of valid word preprocessor objects.
|
|
49
|
+
#
|
|
50
|
+
# This method is used for adding/appending word preprocessors to the
|
|
51
|
+
# word_preprocessors Array. To replace word preprocessors, use #replace_preprocessors;
|
|
52
|
+
# to perform any other manipulation of the word_preprocessors Array,
|
|
53
|
+
# use #word_preprocessors directly.
|
|
54
|
+
def add_preprocessors(word_preprocessors: nil)
|
|
55
|
+
return if word_preprocessors.is_a?(Array) && word_preprocessors.blank?
|
|
56
|
+
|
|
57
|
+
unless word_preprocessors.present? || block_given?
|
|
58
|
+
raise 'A block is required if argument word_preprocessors is nil'
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
word_preprocessors ||= []
|
|
62
|
+
yield word_preprocessors if block_given?
|
|
63
|
+
|
|
64
|
+
concat_and_sort_word_preprocessors! word_preprocessors
|
|
65
|
+
end
|
|
66
|
+
alias append_preprocessors add_preprocessors
|
|
67
|
+
|
|
68
|
+
def replace_preprocessors(word_preprocessors:)
|
|
69
|
+
clear_preprocessors
|
|
70
|
+
add_preprocessors word_preprocessors: word_preprocessors
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def preprocessors_on=(on)
|
|
74
|
+
raise ArgumentError, "Argument on is not true or false: #{on.class}" unless [true, false].include?(on)
|
|
75
|
+
|
|
76
|
+
word_preprocessors.each { |word_preprocessor| word_preprocessor.preprocessor_on = on }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Returns a Preprocessors::PreprocessedWords object.
|
|
80
|
+
def preprocess(word:)
|
|
81
|
+
preprocessed_words = preprocessed_words word: word
|
|
82
|
+
PreprocessedWords.new(original_word: word, preprocessed_words: preprocessed_words)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def preprocessed_words(word:)
|
|
86
|
+
word_preprocessors.map do |word_preprocessor|
|
|
87
|
+
word_preprocessor.preprocess(word).tap do |processed_word|
|
|
88
|
+
word = processed_word.preprocessed_word
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Returns the final (or last) preprocessed word in the Array of
|
|
94
|
+
# preprocessed words. The final preprocessed word is the word that has
|
|
95
|
+
# passed through all the word preprocessors.
|
|
96
|
+
def preprocessed_word(word:)
|
|
97
|
+
preprocessed_words = self.preprocessed_words word: word
|
|
98
|
+
preprocessed_words.max_by(&:preprocessor_order).preprocessed_word unless preprocessed_words.blank?
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
# This method concatinates preprocessors to #word_preprocessors,
|
|
104
|
+
# sorts #word_preprocessors by WordPreprocessor#order and
|
|
105
|
+
# returns the results.
|
|
106
|
+
def concat_and_sort_word_preprocessors!(preprocessors)
|
|
107
|
+
validate_word_preprocessors word_preprocessors: preprocessors
|
|
108
|
+
|
|
109
|
+
word_preprocessors.concat preprocessors
|
|
110
|
+
word_preprocessors.sort_by!(&:order)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LittleWeasel
|
|
4
|
+
module Preprocessors
|
|
5
|
+
# This module validates word preprocessor types.
|
|
6
|
+
# rubocop: disable Layout/LineLength
|
|
7
|
+
module WordPreprocessorValidatable
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
# :reek:ManualDispatch - ignored, this is duck-typing not 'simulated polymorphism'
|
|
11
|
+
# :reek:TooManyStatements - ignored, "too many statements" is easier to understand than arbitrarily breaking all this down into individual methods
|
|
12
|
+
def validate_word_preprocessor(word_preprocessor:)
|
|
13
|
+
# You can use your own word preprocessor types as long as they quack
|
|
14
|
+
# correctly; however, you are responsible for the behavior of these
|
|
15
|
+
# required methods/ attributes. It's probably better to follow the
|
|
16
|
+
# pattern of existing word preprocessor objects and inherit from
|
|
17
|
+
# Preprocessors::WordPreprocessor.
|
|
18
|
+
|
|
19
|
+
word_preprocessor_class = word_preprocessor.class
|
|
20
|
+
|
|
21
|
+
# class methods
|
|
22
|
+
raise validation_error_message(object: word_preprocessor_class, respond_to: '.preprocess') unless word_preprocessor_class.respond_to?(:preprocess)
|
|
23
|
+
raise validation_error_message(object: word_preprocessor_class, respond_to: '.preprocess?') unless word_preprocessor_class.respond_to?(:preprocess?)
|
|
24
|
+
|
|
25
|
+
# instance methods
|
|
26
|
+
raise validation_error_message(object: word_preprocessor_class, respond_to: '#preprocess') unless word_preprocessor.respond_to?(:preprocess)
|
|
27
|
+
raise validation_error_message(object: word_preprocessor_class, respond_to: '#preprocess?') unless word_preprocessor.respond_to?(:preprocess?)
|
|
28
|
+
raise validation_error_message(object: word_preprocessor_class, respond_to: '#preprocessor_off?') unless word_preprocessor.respond_to?(:preprocessor_off?)
|
|
29
|
+
raise validation_error_message(object: word_preprocessor_class, respond_to: '#preprocessor_on') unless word_preprocessor.respond_to?(:preprocessor_on)
|
|
30
|
+
raise validation_error_message(object: word_preprocessor_class, respond_to: '#preprocessor_on=') unless word_preprocessor.respond_to?(:preprocessor_on=)
|
|
31
|
+
raise validation_error_message(object: word_preprocessor_class, respond_to: '#preprocessor_on?') unless word_preprocessor.respond_to?(:preprocessor_on?)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def validation_error_message(object:, respond_to:)
|
|
35
|
+
"Argument word_preprocessor: does not respond to: #{object}#{respond_to}"
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
# rubocop: enable Layout/LineLength
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'word_preprocessor_validatable'
|
|
4
|
+
|
|
5
|
+
module LittleWeasel
|
|
6
|
+
module Preprocessors
|
|
7
|
+
# This module provides methods to validate an Array of word preprocessor
|
|
8
|
+
# objects.
|
|
9
|
+
module WordPreprocessorsValidatable
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
def validate_word_preprocessors(word_preprocessors:)
|
|
13
|
+
return if word_preprocessors.blank?
|
|
14
|
+
|
|
15
|
+
raise ArgumentError, "Argument word_preprocessors is not an Array: #{word_preprocessors.class}" \
|
|
16
|
+
unless word_preprocessors.is_a? Array
|
|
17
|
+
|
|
18
|
+
word_preprocessors.each do |word_preprocessor|
|
|
19
|
+
WordPreprocessorValidatable.validate_word_preprocessor word_preprocessor: word_preprocessor
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../modules/dictionary_cache_keys'
|
|
4
|
+
require_relative '../modules/dictionary_cache_validatable'
|
|
5
|
+
require_relative '../modules/dictionary_keyable'
|
|
6
|
+
require_relative '../modules/dictionary_sourceable'
|
|
7
|
+
require_relative '../modules/dictionary_validatable'
|
|
8
|
+
|
|
9
|
+
module LittleWeasel
|
|
10
|
+
module Services
|
|
11
|
+
# This class provides methods and attributes that can be used to manage the
|
|
12
|
+
# dictionary cache. The "dictionary cache" is a simple Hash that provides
|
|
13
|
+
# access to informaiton related to dictionaries through a dictionary "key".
|
|
14
|
+
# A dictionary "key" is a unique key comprised of a locale and
|
|
15
|
+
# optional "tag" (see Modules::Taggable and DictionaryKey for more
|
|
16
|
+
# information). The dictionary cache also provides a way for dictionary
|
|
17
|
+
# objects to share dictionary information, in particular, the dictionary
|
|
18
|
+
# file and dictionary metadata.
|
|
19
|
+
class DictionaryCacheService
|
|
20
|
+
include Modules::DictionaryKeyable
|
|
21
|
+
include Modules::DictionaryCacheValidatable
|
|
22
|
+
include Modules::DictionaryCacheKeys
|
|
23
|
+
include Modules::DictionarySourceable
|
|
24
|
+
include Modules::DictionaryValidatable
|
|
25
|
+
|
|
26
|
+
attr_reader :dictionary_cache
|
|
27
|
+
|
|
28
|
+
# This class produces the following (example) Hash that represents the
|
|
29
|
+
# dictionary cache structure:
|
|
30
|
+
#
|
|
31
|
+
# @example This is an example:
|
|
32
|
+
#
|
|
33
|
+
# {
|
|
34
|
+
# 'dictionary_cache' =>
|
|
35
|
+
# {
|
|
36
|
+
# 'dictionary_references' =>
|
|
37
|
+
# {
|
|
38
|
+
# 'en' =>
|
|
39
|
+
# {
|
|
40
|
+
# 'dictionary_id' => 19ec7845
|
|
41
|
+
# },
|
|
42
|
+
# 'en-US' =>
|
|
43
|
+
# {
|
|
44
|
+
# 'dictionary_id' => 0987a3f2
|
|
45
|
+
# },
|
|
46
|
+
# 'en-US-temp' =>
|
|
47
|
+
# {
|
|
48
|
+
# 'dictionary_id' => 9273eac6
|
|
49
|
+
# }
|
|
50
|
+
# },
|
|
51
|
+
# 'dictionaries' =>
|
|
52
|
+
# {
|
|
53
|
+
# 19ec7845 =>
|
|
54
|
+
# {
|
|
55
|
+
# 'source' => '/en.txt',
|
|
56
|
+
# 'dictionary_object' => {}
|
|
57
|
+
# },
|
|
58
|
+
# 0987a3f2 =>
|
|
59
|
+
# {
|
|
60
|
+
# 'source' => '/en-US.txt',
|
|
61
|
+
# 'dictionary_object' => {}
|
|
62
|
+
# },
|
|
63
|
+
# 9273eac6 =>
|
|
64
|
+
# {
|
|
65
|
+
# 'source' => '*736ed423',
|
|
66
|
+
# 'dictionary_object' => {}
|
|
67
|
+
# }
|
|
68
|
+
# }
|
|
69
|
+
# }
|
|
70
|
+
# }
|
|
71
|
+
def initialize(dictionary_key:, dictionary_cache:)
|
|
72
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
|
73
|
+
self.dictionary_key = dictionary_key
|
|
74
|
+
|
|
75
|
+
validate_dictionary_cache dictionary_cache: dictionary_cache
|
|
76
|
+
self.dictionary_cache = dictionary_cache
|
|
77
|
+
|
|
78
|
+
self.class.init(dictionary_cache: dictionary_cache) unless dictionary_cache[DICTIONARY_CACHE]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
class << self
|
|
82
|
+
# This method resets dictionary_cache to its initialized state.
|
|
83
|
+
# This class method is different from the #init instance method
|
|
84
|
+
# in that ALL dictionary references and ALL dictionaries are
|
|
85
|
+
# initialized.
|
|
86
|
+
def init(dictionary_cache:)
|
|
87
|
+
Modules::DictionaryCacheKeys.initialize_dictionary_cache dictionary_cache: dictionary_cache
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Returns true if the dictionary cache is initialized; that
|
|
91
|
+
# is, if the given dictionary_cache is in the same state the
|
|
92
|
+
# dictionary cache would be in after .init were called.
|
|
93
|
+
def init?(dictionary_cache:)
|
|
94
|
+
initialized_dictionary_cache = init(dictionary_cache: {})
|
|
95
|
+
dictionary_cache.eql?(initialized_dictionary_cache)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Returns the number of dictionaries currently in the cache.
|
|
99
|
+
def count(dictionary_cache:)
|
|
100
|
+
dictionary_cache.dig(self::DICTIONARY_CACHE, self::DICTIONARIES)&.keys&.count || 0
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# This method resets the dictionary cache for the given key. This method
|
|
105
|
+
# is different from the .init class method in that ONLY the dictionary
|
|
106
|
+
# reference and dictionary specific to the given key is initialized.
|
|
107
|
+
def init
|
|
108
|
+
# TODO: Do not delete the dictionary if it is being pointed to by
|
|
109
|
+
# another dictionary reference.
|
|
110
|
+
dictionary_cache_hash = dictionary_cache[DICTIONARY_CACHE]
|
|
111
|
+
dictionary_cache_hash[DICTIONARIES]&.delete(dictionary_id)
|
|
112
|
+
dictionary_cache_hash[DICTIONARY_REFERENCES]&.delete(key)
|
|
113
|
+
self
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Returns true if the dictionary reference exists for the given key; false
|
|
117
|
+
# otherwise. This method is only concerned with the dictionary reference
|
|
118
|
+
# and has nothing to do with whether or not the associated dictionary
|
|
119
|
+
# is actually loaded into the dictionary cache.
|
|
120
|
+
def dictionary_reference?
|
|
121
|
+
dictionary_reference&.present? || false
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Returns true if a dictionaries Hash key exists for the given dictionary_id
|
|
125
|
+
# in the dictionary cache. This method is only concerned with the existance of
|
|
126
|
+
# the key and has nothing to do with whether or not file/memory sources are
|
|
127
|
+
# present or the presence of a dictionary object.
|
|
128
|
+
def dictionary?
|
|
129
|
+
dictionary_cache[DICTIONARY_CACHE][DICTIONARIES].key? dictionary_id
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Adds a dictionary source. A "dictionary source" specifies the source from which
|
|
133
|
+
# the dictionary ultimately obtains its words.
|
|
134
|
+
#
|
|
135
|
+
# @param source [String] the dictionary source. This can be a file path
|
|
136
|
+
# or a memory source indicator to signify that the dictionary was created
|
|
137
|
+
# dynamically from memory.
|
|
138
|
+
def add_dictionary_source(source:)
|
|
139
|
+
validate_dictionary_source_does_not_exist dictionary_cache_service: self
|
|
140
|
+
|
|
141
|
+
dictionary_id = dictionary_id_for_dictionary_source(source: source)
|
|
142
|
+
self.dictionary_reference = dictionary_id
|
|
143
|
+
# Only set the dictionary source if it doesn't already exist because settings
|
|
144
|
+
# the dictionary source wipes out the #dictionary_object; dictionary objects
|
|
145
|
+
# can have more than one dictionary reference pointing to them, and we don't
|
|
146
|
+
# want to blow away the #dictionary_object, metadata, or any other data
|
|
147
|
+
# associated with it if it already exists.
|
|
148
|
+
self.dictionary_source = source unless dictionary?
|
|
149
|
+
self
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Returns the dictionary id if there is a dictionary id in the dictionary
|
|
153
|
+
# cache associated with the given key; nil otherwise.
|
|
154
|
+
def dictionary_id
|
|
155
|
+
dictionary_cache.dig(DICTIONARY_CACHE, DICTIONARY_REFERENCES, key, DICTIONARY_ID)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Returns the dictionary id if there is a dictionary id in the dictionary
|
|
159
|
+
# cache associated with the given key. This method raises an error if the
|
|
160
|
+
# dictionary id cannot be found.
|
|
161
|
+
def dictionary_id!
|
|
162
|
+
return dictionary_id if dictionary_id?
|
|
163
|
+
|
|
164
|
+
raise ArgumentError, "A dictionary id could not be found for key '#{key}'."
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def dictionary_source!
|
|
168
|
+
raise ArgumentError, "A dictionary source could not be found for key '#{key}'." unless dictionary_reference?
|
|
169
|
+
|
|
170
|
+
dictionary_cache[DICTIONARY_CACHE][DICTIONARIES][dictionary_id!][SOURCE]
|
|
171
|
+
end
|
|
172
|
+
alias dictionary_file! dictionary_source!
|
|
173
|
+
|
|
174
|
+
def dictionary_source
|
|
175
|
+
dictionary_cache.dig(DICTIONARY_CACHE, DICTIONARIES, dictionary_id, SOURCE)
|
|
176
|
+
end
|
|
177
|
+
alias dictionary_file dictionary_source
|
|
178
|
+
|
|
179
|
+
# This method returns true if the dictionary associated with the
|
|
180
|
+
# given dictionary key is loaded/cached. If this is the case,
|
|
181
|
+
# a dictionary object is available in the dictionary cache.
|
|
182
|
+
def dictionary_object?
|
|
183
|
+
dictionary_object.present?
|
|
184
|
+
end
|
|
185
|
+
alias dictionary_exists? dictionary_object?
|
|
186
|
+
|
|
187
|
+
# Returns the dictionary object from the dictionary cache for the given
|
|
188
|
+
# key. This method raises an error if the dictionary is not in the cache;
|
|
189
|
+
# that is, if the dictionary was not previously loaded from disk or memory.
|
|
190
|
+
def dictionary_object!
|
|
191
|
+
unless dictionary_object?
|
|
192
|
+
raise ArgumentError,
|
|
193
|
+
"The dictionary object associated with argument key '#{key}' is not in the cache."
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
dictionary_object
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def dictionary_object
|
|
200
|
+
dictionary_cache.dig(DICTIONARY_CACHE, DICTIONARIES, dictionary_id, DICTIONARY_OBJECT)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def dictionary_object=(object)
|
|
204
|
+
raise ArgumentError, 'Argument object is not a Dictionary object' unless object.is_a? Dictionary
|
|
205
|
+
|
|
206
|
+
unless dictionary_reference?
|
|
207
|
+
raise ArgumentError,
|
|
208
|
+
"The dictionary reference associated with key '#{key}' could not be found."
|
|
209
|
+
end
|
|
210
|
+
return if object.equal? dictionary_object
|
|
211
|
+
|
|
212
|
+
if dictionary_exists?
|
|
213
|
+
raise ArgumentError,
|
|
214
|
+
"The dictionary is already loaded/cached for key '#{key}'; use #unload or #kill first."
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
dictionary_cache[DICTIONARY_CACHE][DICTIONARIES][dictionary_id!][DICTIONARY_OBJECT] = object
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
private
|
|
221
|
+
|
|
222
|
+
attr_writer :dictionary_cache
|
|
223
|
+
|
|
224
|
+
def dictionary_reference
|
|
225
|
+
dictionary_cache.dig(DICTIONARY_CACHE, DICTIONARY_REFERENCES, key)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def dictionary_reference=(dictionary_id)
|
|
229
|
+
dictionary_cache[DICTIONARY_CACHE][DICTIONARY_REFERENCES][key] = {
|
|
230
|
+
DICTIONARY_ID => dictionary_id
|
|
231
|
+
}
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Returns the dictionary_id for the source if it exists in dictionaries;
|
|
235
|
+
# otherwise, returns the new dictionary id that should be used.
|
|
236
|
+
def dictionary_id_for_dictionary_source(source:)
|
|
237
|
+
dictionary_source?(source: source) || SecureRandom.uuid[0..7]
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Returns the dictionary_id associated with source if source exists;
|
|
241
|
+
# nil otherwise.
|
|
242
|
+
def dictionary_source?(source:)
|
|
243
|
+
dictionaries = dictionary_cache.dig(DICTIONARY_CACHE, DICTIONARIES)
|
|
244
|
+
dictionaries&.each_pair do |dictionary_id, dictionary_hash|
|
|
245
|
+
return dictionary_id if source == dictionary_hash[SOURCE]
|
|
246
|
+
end
|
|
247
|
+
nil
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def dictionary_source=(source)
|
|
251
|
+
dictionary_cache[DICTIONARY_CACHE][DICTIONARIES][dictionary_id!] = {
|
|
252
|
+
SOURCE => source,
|
|
253
|
+
DICTIONARY_OBJECT => {}
|
|
254
|
+
}
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def dictionary_id?
|
|
258
|
+
dictionary_id.present?
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../dictionary'
|
|
4
|
+
require_relative '../filters/word_filterable'
|
|
5
|
+
require_relative '../filters/word_filters_validatable'
|
|
6
|
+
require_relative '../metadata/dictionary_metadata'
|
|
7
|
+
require_relative '../modules/dictionary_cache_servicable'
|
|
8
|
+
require_relative '../modules/dictionary_creator_servicable'
|
|
9
|
+
require_relative '../modules/dictionary_keyable'
|
|
10
|
+
require_relative '../modules/dictionary_metadata_servicable'
|
|
11
|
+
require_relative '../modules/dictionary_sourceable'
|
|
12
|
+
require_relative '../preprocessors/word_preprocessor_managable'
|
|
13
|
+
require_relative 'dictionary_file_loader_service'
|
|
14
|
+
|
|
15
|
+
module LittleWeasel
|
|
16
|
+
module Services
|
|
17
|
+
# This class provides a service to load dictionaries from disk, create
|
|
18
|
+
# and return a Dictionary object.
|
|
19
|
+
class DictionaryCreatorService
|
|
20
|
+
include Filters::WordFilterable
|
|
21
|
+
include Filters::WordFiltersValidatable
|
|
22
|
+
include Modules::DictionaryCacheServicable
|
|
23
|
+
include Modules::DictionaryKeyable
|
|
24
|
+
include Modules::DictionaryMetadataServicable
|
|
25
|
+
include Modules::DictionarySourceable
|
|
26
|
+
include Preprocessors::WordPreprocessorManagable
|
|
27
|
+
|
|
28
|
+
def initialize(dictionary_key:, dictionary_cache:, dictionary_metadata:,
|
|
29
|
+
word_filters: nil, word_preprocessors: nil)
|
|
30
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
|
31
|
+
self.dictionary_key = dictionary_key
|
|
32
|
+
|
|
33
|
+
validate_dictionary_cache dictionary_cache: dictionary_cache
|
|
34
|
+
self.dictionary_cache = dictionary_cache
|
|
35
|
+
|
|
36
|
+
validate_dictionary_metadata dictionary_metadata: dictionary_metadata
|
|
37
|
+
self.dictionary_metadata = dictionary_metadata
|
|
38
|
+
|
|
39
|
+
validate_word_filters word_filters: word_filters unless word_filters.blank?
|
|
40
|
+
self.word_filters = word_filters
|
|
41
|
+
|
|
42
|
+
validate_word_preprocessors word_preprocessors: word_preprocessors unless word_preprocessors.blank?
|
|
43
|
+
self.word_preprocessors = word_preprocessors
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def from_file_source(file:)
|
|
47
|
+
add_dictionary_file_source file: file
|
|
48
|
+
dictionary_words = dictionary_file_loader_service.execute
|
|
49
|
+
create_dictionary dictionary_words: dictionary_words
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def from_memory_source(dictionary_words:)
|
|
53
|
+
add_dictionary_memory_source
|
|
54
|
+
create_dictionary dictionary_words: dictionary_words
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
def dictionary_file_loader_service
|
|
60
|
+
Services::DictionaryFileLoaderService.new dictionary_key: dictionary_key, dictionary_cache: dictionary_cache
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def create_dictionary(dictionary_words:)
|
|
64
|
+
Dictionary.new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache,
|
|
65
|
+
dictionary_metadata: dictionary_metadata, dictionary_words: dictionary_words, word_filters: word_filters,
|
|
66
|
+
word_preprocessors: word_preprocessors).tap do |dictionary|
|
|
67
|
+
dictionary_cache_service.dictionary_object = dictionary
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Adds a dictionary file source. A "file source" is a file path that
|
|
72
|
+
# indicates that the dictionary words associated with this dictionary are
|
|
73
|
+
# located on disk. This file path is used to locate and load the
|
|
74
|
+
# dictionary words into the dictionary cache for use.
|
|
75
|
+
#
|
|
76
|
+
# @param file [String] a file path pointing to the dictionary file to load and use.
|
|
77
|
+
#
|
|
78
|
+
# @return returns a reference to self.
|
|
79
|
+
def add_dictionary_file_source(file:)
|
|
80
|
+
dictionary_cache_service.add_dictionary_source(source: file)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Adds a dictionary memory source. A "memory source" indicates that the
|
|
84
|
+
# dictionary words associated with this dictionary were created
|
|
85
|
+
# dynamically and will be located in memory, as opposed to loaded from
|
|
86
|
+
# a file on disk.
|
|
87
|
+
#
|
|
88
|
+
# @return returns a reference to self.
|
|
89
|
+
def add_dictionary_memory_source
|
|
90
|
+
dictionary_cache_service.add_dictionary_source(source: memory_source)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|