LittleWeasel 3.0.4 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/.reek.yml +17 -0
- data/.rspec +4 -2
- data/.rubocop.yml +187 -0
- data/.ruby-version +1 -1
- data/.yardopts +2 -0
- data/Gemfile +3 -1
- data/LittleWeasel.gemspec +31 -18
- data/README.md +408 -42
- data/Rakefile +296 -3
- data/lib/LittleWeasel.rb +5 -184
- data/lib/LittleWeasel/block_results.rb +81 -0
- data/lib/LittleWeasel/configure.rb +98 -0
- data/lib/LittleWeasel/dictionary.rb +125 -0
- data/lib/LittleWeasel/dictionary_key.rb +48 -0
- data/lib/LittleWeasel/dictionary_manager.rb +85 -0
- data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
- data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
- data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
- data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
- data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
- data/lib/LittleWeasel/filters/word_filter.rb +59 -0
- data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
- data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
- data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
- data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
- data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
- data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
- data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
- data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
- data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
- data/lib/LittleWeasel/metadata/metadatable.rb +136 -0
- data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
- data/lib/LittleWeasel/modules/configurable.rb +26 -0
- data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
- data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
- data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
- data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +20 -0
- data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
- data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
- data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +19 -0
- data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
- data/lib/LittleWeasel/modules/dictionary_loader_servicable.rb +27 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +17 -0
- data/lib/LittleWeasel/modules/dictionary_sourceable.rb +26 -0
- data/lib/LittleWeasel/modules/dictionary_validatable.rb +30 -0
- data/lib/LittleWeasel/modules/language.rb +23 -0
- data/lib/LittleWeasel/modules/language_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/locale.rb +40 -0
- data/lib/LittleWeasel/modules/order_validatable.rb +18 -0
- data/lib/LittleWeasel/modules/orderable.rb +17 -0
- data/lib/LittleWeasel/modules/region.rb +23 -0
- data/lib/LittleWeasel/modules/region_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/tag_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/taggable.rb +31 -0
- data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
- data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +28 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +55 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +55 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +27 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +122 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
- data/lib/LittleWeasel/services/dictionary_cache_service.rb +262 -0
- data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
- data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
- data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
- data/lib/LittleWeasel/services/dictionary_loader_service.rb +59 -0
- data/lib/LittleWeasel/services/dictionary_metadata_service.rb +114 -0
- data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
- data/lib/LittleWeasel/version.rb +3 -1
- data/lib/LittleWeasel/word_results.rb +146 -0
- data/spec/factories/dictionary.rb +43 -0
- data/spec/factories/dictionary_cache_service.rb +95 -0
- data/spec/factories/dictionary_creator_service.rb +16 -0
- data/spec/factories/dictionary_file_loader_service.rb +13 -0
- data/spec/factories/dictionary_hash.rb +39 -0
- data/spec/factories/dictionary_key.rb +14 -0
- data/spec/factories/dictionary_killer_service.rb +14 -0
- data/spec/factories/dictionary_loader_service.rb +14 -0
- data/spec/factories/dictionary_manager.rb +10 -0
- data/spec/factories/dictionary_metadata.rb +16 -0
- data/spec/factories/dictionary_metadata_service.rb +16 -0
- data/spec/factories/numeric_filter.rb +12 -0
- data/spec/factories/preprocessed_word.rb +16 -0
- data/spec/factories/preprocessed_words.rb +41 -0
- data/spec/factories/single_character_word_filter.rb +12 -0
- data/spec/factories/word_results.rb +16 -0
- data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
- data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
- data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
- data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +116 -0
- data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
- data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
- data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
- data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
- data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
- data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
- data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
- data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
- data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
- data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
- data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
- data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
- data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
- data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
- data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +44 -0
- data/spec/lib/LittleWeasel/modules/language_spec.rb +52 -0
- data/spec/lib/LittleWeasel/modules/locale_spec.rb +140 -0
- data/spec/lib/LittleWeasel/modules/region_spec.rb +52 -0
- data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +216 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +175 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
- data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
- data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
- data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
- data/spec/lib/LittleWeasel/services/dictionary_loader_service_spec.rb +50 -0
- data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
- data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
- data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
- data/spec/spec_helper.rb +117 -6
- data/spec/support/factory_bot.rb +15 -0
- data/spec/support/file_helpers.rb +32 -0
- data/spec/support/files/empty-dictionary.txt +0 -0
- data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
- data/spec/support/files/en-US-tagged.txt +26 -0
- data/spec/support/files/en-US.txt +26 -0
- data/spec/support/files/en.txt +26 -0
- data/spec/support/files/es-ES.txt +27 -0
- data/spec/support/files/es.txt +27 -0
- data/spec/support/general_helpers.rb +68 -0
- data/spec/support/shared_contexts.rb +108 -0
- data/spec/support/shared_examples.rb +105 -0
- metadata +408 -65
- data/spec/checker/checker_spec.rb +0 -286
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/core_ext/module/delegation'
|
|
4
|
+
require_relative 'word_results'
|
|
5
|
+
|
|
6
|
+
module LittleWeasel
|
|
7
|
+
# This class represents the results of gathering information about a word
|
|
8
|
+
# block (group of words).
|
|
9
|
+
class BlockResults
|
|
10
|
+
# :reek:Attribute - Ignored, it doesn't make sense to create a formal setter method.
|
|
11
|
+
attr_reader :original_word_block, :word_results
|
|
12
|
+
|
|
13
|
+
def initialize(original_word_block:)
|
|
14
|
+
self.original_word_block = original_word_block
|
|
15
|
+
self.word_results = []
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def <<(word_result)
|
|
19
|
+
unless word_result.is_a? WordResults
|
|
20
|
+
raise ArgumentError, "Argument word_result is not a WordResults object: #{word_result.class}"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
word_results << word_result
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Calls #success? on all WordResults objects. Returns true if all
|
|
27
|
+
# WordResults return true; false is returned otherwise.
|
|
28
|
+
def success?
|
|
29
|
+
return false unless word_results.present?
|
|
30
|
+
|
|
31
|
+
word_results.all?(&:success?)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Returns true if all WordResults object words are valid (#word_valid?);
|
|
35
|
+
# false otherwise.
|
|
36
|
+
def words_valid?
|
|
37
|
+
return false unless word_results.present?
|
|
38
|
+
|
|
39
|
+
word_results.all?(&:word_valid?)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Returns true if all WordResults object words have filter matches (#filters_match?);
|
|
43
|
+
# false otherwise.
|
|
44
|
+
def filters_match?
|
|
45
|
+
return false unless word_results.present?
|
|
46
|
+
|
|
47
|
+
word_results.all?(&:filter_match?)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Returns true if all WordResults object words have been preprocessed (#preprocessed_words?);
|
|
51
|
+
# false otherwise.
|
|
52
|
+
def preprocessed_words?
|
|
53
|
+
return false unless word_results.present?
|
|
54
|
+
|
|
55
|
+
word_results.all?(&:preprocessed_word?)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Returns an Array of the results of calling
|
|
59
|
+
# #preprocessed_word_or_original_word on all WordResults objects.
|
|
60
|
+
|
|
61
|
+
# Calls #preprocessed_word_or_original_word on all WordResults objects.
|
|
62
|
+
# An Array of the results is returned.
|
|
63
|
+
def preprocessed_words_or_original_words
|
|
64
|
+
return [] unless word_results.present?
|
|
65
|
+
|
|
66
|
+
word_results.map(&:preprocessed_word_or_original_word)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Returns true if all WordResults object words have been cached (#words_cached?);
|
|
70
|
+
# false otherwise.
|
|
71
|
+
def words_cached?
|
|
72
|
+
return false unless word_results.present?
|
|
73
|
+
|
|
74
|
+
word_results.all?(&:word_cached?)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
attr_writer :original_word_block, :word_results
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This is the configuration for LittleWeasel.
|
|
4
|
+
module LittleWeasel
|
|
5
|
+
class << self
|
|
6
|
+
attr_reader :configuration
|
|
7
|
+
|
|
8
|
+
# Returns the application configuration object.
|
|
9
|
+
#
|
|
10
|
+
# @return [Configuration] the application Configuration object.
|
|
11
|
+
def configure
|
|
12
|
+
self.configuration ||= Configuration.new
|
|
13
|
+
yield(configuration)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
attr_writer :configuration
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# This class encapsulates the configuration properties for this gem and
|
|
22
|
+
# provides methods and attributes that allow for management of the same.
|
|
23
|
+
#
|
|
24
|
+
# attr_reader :max_dictionary_file_megabytes, :max_invalid_words_bytesize, :metadata_observers
|
|
25
|
+
class Configuration
|
|
26
|
+
attr_reader :max_dictionary_file_megabytes,
|
|
27
|
+
:max_invalid_words_bytesize, :metadata_observers, :word_block_regex
|
|
28
|
+
|
|
29
|
+
# The constructor; calls {#reset}.
|
|
30
|
+
def initialize
|
|
31
|
+
reset
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Resets the configuration settings to their default values.
|
|
35
|
+
#
|
|
36
|
+
# @return [void]
|
|
37
|
+
def reset
|
|
38
|
+
@max_dictionary_file_megabytes = 5
|
|
39
|
+
@max_invalid_words_bytesize = 25_000
|
|
40
|
+
@metadata_observers = [
|
|
41
|
+
LittleWeasel::Metadata::InvalidWordsMetadata
|
|
42
|
+
]
|
|
43
|
+
# TODO: Is this the correct regex to use, or is there something better?
|
|
44
|
+
# @word_block_regex = /\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/
|
|
45
|
+
# @word_block_regex = /(?:(?:[\-A-Za-z0-9]|\d(?!\d|\b))+(?:'[\-A-Za-z0-9]+)?)/
|
|
46
|
+
# @word_block_regex = /(?:(?:[\-a-z0-9]|\d(?!\d|\b))+(?:'[\-a-z0-9]+)?)/i
|
|
47
|
+
@word_block_regex = /[[[:word:]]'-]+/
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Returns the maximum consumable dictionary size in bytes. Dictionaries
|
|
51
|
+
# larger than {#max_dictionary_file_bytes} will raise an error.
|
|
52
|
+
#
|
|
53
|
+
# The default is 5 megabytes.
|
|
54
|
+
#
|
|
55
|
+
# @return [Integer] the maximum number of bytes for a dictionary.
|
|
56
|
+
def max_dictionary_file_bytes
|
|
57
|
+
@max_dictionary_file_megabytes * 1_000_000
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# If {#max_invalid_words_bytesize} is > 0, true will be returned; false
|
|
61
|
+
# otherwise.
|
|
62
|
+
#
|
|
63
|
+
# @return [true, false] based on {#max_invalid_words_bytesize}.
|
|
64
|
+
def max_invalid_words_bytesize?
|
|
65
|
+
max_invalid_words_bytesize.positive?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# rubocop: disable Style/TrivialAccessors
|
|
69
|
+
def max_dictionary_file_megabytes=(value)
|
|
70
|
+
@max_dictionary_file_megabytes = value
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Sets the maximum cache size (in bytes) for invalid words. If
|
|
74
|
+
# less than or equal to 0, invalid words will NOT be cached.
|
|
75
|
+
#
|
|
76
|
+
# If greater than 0, invalid words will be cached up to and including
|
|
77
|
+
# {#max_invalid_words_bytesize} bytes.
|
|
78
|
+
#
|
|
79
|
+
# @see #max_invalid_words_bytesize?
|
|
80
|
+
def max_invalid_words_bytesize=(value)
|
|
81
|
+
value = 0 if value.negative?
|
|
82
|
+
@max_invalid_words_bytesize = value
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def metadata_observers=(value)
|
|
86
|
+
raise ArgumentError, "Argument value is not an Array: #{value.class}" unless value.is_a? Array
|
|
87
|
+
|
|
88
|
+
# TODO: Limit the amount of observer classes, exploits?
|
|
89
|
+
|
|
90
|
+
@metadata_observers = value
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def word_block_regex=(value)
|
|
94
|
+
@word_block_regex = value
|
|
95
|
+
end
|
|
96
|
+
# rubocop: enable Style/TrivialAccessors
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'block_results'
|
|
4
|
+
require_relative 'filters/word_filter_managable'
|
|
5
|
+
require_relative 'metadata/dictionary_metadata'
|
|
6
|
+
require_relative 'modules/configurable'
|
|
7
|
+
require_relative 'modules/dictionary_cache_servicable'
|
|
8
|
+
require_relative 'modules/dictionary_keyable'
|
|
9
|
+
require_relative 'modules/dictionary_metadata_servicable'
|
|
10
|
+
require_relative 'preprocessors/word_preprocessor_managable'
|
|
11
|
+
require_relative 'word_results'
|
|
12
|
+
|
|
13
|
+
module LittleWeasel
|
|
14
|
+
class Dictionary
|
|
15
|
+
include Filters::WordFilterManagable
|
|
16
|
+
include Modules::Configurable
|
|
17
|
+
include Modules::DictionaryCacheServicable
|
|
18
|
+
include Modules::DictionaryKeyable
|
|
19
|
+
include Modules::DictionaryMetadataServicable
|
|
20
|
+
include Preprocessors::WordPreprocessorManagable
|
|
21
|
+
|
|
22
|
+
attr_reader :dictionary_metadata_object, :dictionary_words
|
|
23
|
+
|
|
24
|
+
def initialize(dictionary_key:, dictionary_words:, dictionary_cache:,
|
|
25
|
+
dictionary_metadata:, word_filters: nil, word_preprocessors: nil)
|
|
26
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
|
27
|
+
self.dictionary_key = dictionary_key
|
|
28
|
+
|
|
29
|
+
validate_dictionary_cache dictionary_cache: dictionary_cache
|
|
30
|
+
self.dictionary_cache = dictionary_cache
|
|
31
|
+
|
|
32
|
+
validate_dictionary_metadata dictionary_metadata: dictionary_metadata
|
|
33
|
+
self.dictionary_metadata = dictionary_metadata
|
|
34
|
+
|
|
35
|
+
unless dictionary_words.is_a?(Array)
|
|
36
|
+
raise ArgumentError,
|
|
37
|
+
"Argument dictionary_words is not an Array: #{dictionary_words.class}"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Set up the dictionary metadata object and observers
|
|
41
|
+
self.dictionary_words = self.class.to_hash(dictionary_words: dictionary_words)
|
|
42
|
+
self.dictionary_metadata_object = create_dictionary_metadata
|
|
43
|
+
dictionary_metadata_object.add_observers
|
|
44
|
+
|
|
45
|
+
add_filters word_filters: word_filters || []
|
|
46
|
+
add_preprocessors word_preprocessors: word_preprocessors || []
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class << self
|
|
50
|
+
def to_hash(dictionary_words:)
|
|
51
|
+
dictionary_words.each_with_object(Hash.new(false)) { |word, hash| hash[word] = true; }
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def word_results(word)
|
|
56
|
+
# TODO: Make max word size configurable.
|
|
57
|
+
raise ArgumentError, "Argument word is not a String: #{word.class}" unless word.is_a?(String)
|
|
58
|
+
|
|
59
|
+
preprocessed_words = preprocess(word: word)
|
|
60
|
+
preprocessed_word = preprocessed_words.preprocessed_word
|
|
61
|
+
filters_matched = filters_matched(preprocessed_word || word)
|
|
62
|
+
word_results = WordResults.new(original_word: word,
|
|
63
|
+
filters_matched: filters_matched,
|
|
64
|
+
preprocessed_words: preprocessed_words,
|
|
65
|
+
word_cached: dictionary_words.include?(preprocessed_word || word),
|
|
66
|
+
word_valid: dictionary_words[preprocessed_word || word] || false)
|
|
67
|
+
|
|
68
|
+
dictionary_metadata_object.notify(action: :word_search,
|
|
69
|
+
params: { word_results: word_results })
|
|
70
|
+
|
|
71
|
+
word_results
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def block_results(word_block)
|
|
75
|
+
# TODO: Make max word_block size configurable.
|
|
76
|
+
raise ArgumentError, "Argument word_block is not a String: #{word_block.class}" unless word_block.is_a?(String)
|
|
77
|
+
raise ArgumentError, "Argument word_block is empty: #{word_block.class}" unless word_block.present?
|
|
78
|
+
|
|
79
|
+
BlockResults.new(original_word_block: word_block).tap do |block_results|
|
|
80
|
+
word_block.scan(config.word_block_regex)&.map do |word|
|
|
81
|
+
block_results << word_results(word)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# This method returns true if this dictionary object is detached from the
|
|
87
|
+
# dictionary cache; this can happen if the dictionary object is unloaded
|
|
88
|
+
# from the dictionary cache(DictionaryManager#unload_dictionary) or the
|
|
89
|
+
# dictionary is killed (DictionaryManager#kill_dictionary).
|
|
90
|
+
def detached?
|
|
91
|
+
!dictionary_cache_service.dictionary_object?
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# This method returns a count of VALID words in the dictionary.
|
|
95
|
+
def count
|
|
96
|
+
dictionary_words.each_pair.count { |_word, valid| valid }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# This method returns a count of all VALID and INVALID words in
|
|
100
|
+
# the dictionary.
|
|
101
|
+
def count_all_words
|
|
102
|
+
dictionary_words.count
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# This method returns a count of all INVALID words in the dictionary.
|
|
106
|
+
def count_invalid_words
|
|
107
|
+
dictionary_words.each_pair.count { |_word, valid| !valid }
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
attr_writer :dictionary_metadata_object, :dictionary_words
|
|
113
|
+
|
|
114
|
+
def create_dictionary_metadata
|
|
115
|
+
# We unconditionally attach metadata to this dictionary. DictionaryMetadata
|
|
116
|
+
# only attaches the metadata services that are turned "on".
|
|
117
|
+
Metadata::DictionaryMetadata.new(
|
|
118
|
+
dictionary_words: dictionary_words,
|
|
119
|
+
dictionary_key: dictionary_key,
|
|
120
|
+
dictionary_cache: dictionary_cache,
|
|
121
|
+
dictionary_metadata: dictionary_metadata
|
|
122
|
+
)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'modules/language_validatable'
|
|
4
|
+
require_relative 'modules/locale'
|
|
5
|
+
require_relative 'modules/region_validatable'
|
|
6
|
+
require_relative 'modules/taggable'
|
|
7
|
+
|
|
8
|
+
module LittleWeasel
|
|
9
|
+
# This class describes a unique key associated with a particular dictionary
|
|
10
|
+
# file. Dictionary keys are used to identify a dictionary on which an action
|
|
11
|
+
# should be performed.
|
|
12
|
+
class DictionaryKey
|
|
13
|
+
include Modules::LanguageValidatable
|
|
14
|
+
include Modules::Locale
|
|
15
|
+
include Modules::RegionValidatable
|
|
16
|
+
include Modules::Taggable
|
|
17
|
+
|
|
18
|
+
attr_reader :language, :region
|
|
19
|
+
|
|
20
|
+
def initialize(language:, region: nil, tag: nil)
|
|
21
|
+
validate_language language: language
|
|
22
|
+
self.language = self.class.normalize_language language
|
|
23
|
+
|
|
24
|
+
validate_region region: region
|
|
25
|
+
self.region = self.class.normalize_region region
|
|
26
|
+
|
|
27
|
+
validate_tag tag: tag
|
|
28
|
+
self.tag = tag
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def key
|
|
32
|
+
return locale unless tagged?
|
|
33
|
+
|
|
34
|
+
"#{locale}-#{tag}"
|
|
35
|
+
end
|
|
36
|
+
alias to_s key
|
|
37
|
+
|
|
38
|
+
class << self
|
|
39
|
+
def key(language:, region: nil, tag: nil)
|
|
40
|
+
new(language: language, region: region, tag: tag).key
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
attr_writer :language, :region
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/core_ext/module/delegation'
|
|
4
|
+
require_relative 'dictionary_key'
|
|
5
|
+
require_relative 'modules/dictionary_key_validatable'
|
|
6
|
+
|
|
7
|
+
module LittleWeasel
|
|
8
|
+
# This class provides dictionary management functionality.
|
|
9
|
+
class DictionaryManager
|
|
10
|
+
include Modules::DictionaryKeyValidatable
|
|
11
|
+
|
|
12
|
+
attr_reader :dictionary_cache, :dictionary_metadata
|
|
13
|
+
|
|
14
|
+
def initialize
|
|
15
|
+
self.dictionary_cache = {}
|
|
16
|
+
self.dictionary_metadata = {}
|
|
17
|
+
init
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def dictionary_for(dictionary_key:)
|
|
21
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
|
22
|
+
|
|
23
|
+
unless dictionary_cache_service(dictionary_key: dictionary_key).dictionary_exists?
|
|
24
|
+
# TODO: Raise an error or let the service handle it?
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
dictionary_cache_service(dictionary_key: dictionary_key).dictionary_object!
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Adds a dictionary file source, creates the dictionary and returns the
|
|
31
|
+
# Dictionary object.
|
|
32
|
+
def create_dictionary_from_file(dictionary_key:, file:, word_filters: nil, word_preprocessors: nil)
|
|
33
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
|
34
|
+
|
|
35
|
+
dictionary_creator_service(dictionary_key: dictionary_key, word_filters: word_filters,
|
|
36
|
+
word_preprocessors: word_preprocessors).from_file_source file: file
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Adds a dictionary memory source, creates the dictionary and returns the
|
|
40
|
+
# Dictionary object.
|
|
41
|
+
def create_dictionary_from_memory(dictionary_key:, dictionary_words:, word_filters: nil, word_preprocessors: nil)
|
|
42
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
|
43
|
+
|
|
44
|
+
dictionary_creator_service(dictionary_key: dictionary_key, word_filters: word_filters,
|
|
45
|
+
word_preprocessors: word_preprocessors).from_memory_source dictionary_words: dictionary_words
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Removes any and all traces of the dictionary associated with the
|
|
49
|
+
# dictionary key from the dictionary cache - the Dictionary object, file
|
|
50
|
+
# reference and any metadata associated with the dictionary are completely
|
|
51
|
+
# removed from the dictionary cache.
|
|
52
|
+
def kill_dictionary(dictionary_key:)
|
|
53
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
|
54
|
+
|
|
55
|
+
dictionary_killer_service(dictionary_key: dictionary_key).execute
|
|
56
|
+
self
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Resets the cache and metadata by clearing it out completely.
|
|
60
|
+
def init
|
|
61
|
+
Services::DictionaryCacheService.init dictionary_cache: dictionary_cache
|
|
62
|
+
Services::DictionaryMetadataService.init dictionary_metadata: dictionary_metadata
|
|
63
|
+
self
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
attr_writer :dictionary_cache, :dictionary_metadata
|
|
69
|
+
|
|
70
|
+
def dictionary_cache_service(dictionary_key:)
|
|
71
|
+
Services::DictionaryCacheService.new dictionary_key: dictionary_key, dictionary_cache: dictionary_cache
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def dictionary_creator_service(dictionary_key:, word_filters:, word_preprocessors:)
|
|
75
|
+
Services::DictionaryCreatorService.new dictionary_key: dictionary_key, dictionary_cache: dictionary_cache,
|
|
76
|
+
dictionary_metadata: dictionary_metadata, word_filters: word_filters,
|
|
77
|
+
word_preprocessors: word_preprocessors
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def dictionary_killer_service(dictionary_key:)
|
|
81
|
+
Services::DictionaryKillerService.new dictionary_key: dictionary_key, dictionary_cache: dictionary_cache,
|
|
82
|
+
dictionary_metadata: dictionary_metadata
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|