LittleWeasel 3.0.3 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/.reek.yml +17 -0
- data/.rspec +4 -2
- data/.rubocop.yml +187 -0
- data/.ruby-version +1 -1
- data/.yardopts +2 -0
- data/CHANGELOG.md +22 -1
- data/Gemfile +3 -1
- data/Jenkinsfile +20 -0
- data/LittleWeasel.gemspec +31 -18
- data/README.md +408 -42
- data/Rakefile +296 -3
- data/lib/LittleWeasel/block_results.rb +81 -0
- data/lib/LittleWeasel/configure.rb +98 -0
- data/lib/LittleWeasel/dictionary.rb +125 -0
- data/lib/LittleWeasel/dictionary_key.rb +48 -0
- data/lib/LittleWeasel/dictionary_manager.rb +91 -0
- data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
- data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
- data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
- data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
- data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
- data/lib/LittleWeasel/filters/word_filter.rb +59 -0
- data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
- data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
- data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
- data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
- data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
- data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
- data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
- data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
- data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
- data/lib/LittleWeasel/metadata/metadatable.rb +134 -0
- data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
- data/lib/LittleWeasel/modules/configurable.rb +26 -0
- data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
- data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
- data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
- data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +18 -0
- data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
- data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
- data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +17 -0
- data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +15 -0
- data/lib/LittleWeasel/modules/dictionary_source_validatable.rb +15 -0
- data/lib/LittleWeasel/modules/dictionary_sourceable.rb +86 -0
- data/lib/LittleWeasel/modules/dictionary_validatable.rb +18 -0
- data/lib/LittleWeasel/modules/language.rb +24 -0
- data/lib/LittleWeasel/modules/language_validatable.rb +14 -0
- data/lib/LittleWeasel/modules/locale.rb +23 -0
- data/lib/LittleWeasel/modules/order_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/orderable.rb +17 -0
- data/lib/LittleWeasel/modules/region.rb +24 -0
- data/lib/LittleWeasel/modules/region_validatable.rb +14 -0
- data/lib/LittleWeasel/modules/tag_validatable.rb +14 -0
- data/lib/LittleWeasel/modules/taggable.rb +31 -0
- data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
- data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +29 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +56 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +59 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +28 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +123 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
- data/lib/LittleWeasel/services/dictionary_cache_service.rb +211 -0
- data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
- data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
- data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
- data/lib/LittleWeasel/services/dictionary_metadata_service.rb +116 -0
- data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
- data/lib/LittleWeasel/version.rb +3 -1
- data/lib/LittleWeasel/word_results.rb +146 -0
- data/lib/LittleWeasel.rb +5 -184
- data/spec/factories/dictionary.rb +43 -0
- data/spec/factories/dictionary_cache_service.rb +95 -0
- data/spec/factories/dictionary_creator_service.rb +16 -0
- data/spec/factories/dictionary_file_loader_service.rb +13 -0
- data/spec/factories/dictionary_hash.rb +39 -0
- data/spec/factories/dictionary_key.rb +14 -0
- data/spec/factories/dictionary_killer_service.rb +14 -0
- data/spec/factories/dictionary_manager.rb +10 -0
- data/spec/factories/dictionary_metadata.rb +16 -0
- data/spec/factories/dictionary_metadata_service.rb +16 -0
- data/spec/factories/numeric_filter.rb +12 -0
- data/spec/factories/preprocessed_word.rb +16 -0
- data/spec/factories/preprocessed_words.rb +41 -0
- data/spec/factories/single_character_word_filter.rb +12 -0
- data/spec/factories/word_results.rb +16 -0
- data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
- data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
- data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
- data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +166 -0
- data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
- data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
- data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
- data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
- data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
- data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
- data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
- data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
- data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
- data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
- data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
- data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
- data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
- data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
- data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +81 -0
- data/spec/lib/LittleWeasel/modules/language_spec.rb +112 -0
- data/spec/lib/LittleWeasel/modules/locale_spec.rb +95 -0
- data/spec/lib/LittleWeasel/modules/region_spec.rb +112 -0
- data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +242 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +218 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
- data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
- data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
- data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
- data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
- data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
- data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
- data/spec/spec_helper.rb +117 -6
- data/spec/support/factory_bot.rb +15 -0
- data/spec/support/file_helpers.rb +46 -0
- data/spec/support/files/empty-dictionary.txt +0 -0
- data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
- data/spec/support/files/en-US-tagged.txt +26 -0
- data/spec/support/files/en-US.txt +26 -0
- data/spec/support/files/en.txt +26 -0
- data/spec/support/files/es-ES.txt +27 -0
- data/spec/support/files/es.txt +27 -0
- data/spec/support/general_helpers.rb +68 -0
- data/spec/support/shared_contexts.rb +107 -0
- data/spec/support/shared_examples.rb +105 -0
- metadata +378 -38
- data/spec/checker/checker_spec.rb +0 -286
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../modules/dictionary_cache_servicable'
|
|
4
|
+
require_relative '../modules/dictionary_keyable'
|
|
5
|
+
require_relative '../modules/dictionary_metadata_validatable'
|
|
6
|
+
|
|
7
|
+
module LittleWeasel
|
|
8
|
+
module Services
|
|
9
|
+
# This class provides methods for managing and manipulating the
|
|
10
|
+
# dictionary metadata associated with the given dictionary,
|
|
11
|
+
# (dictionary_key) for the supplied metadata_key.
|
|
12
|
+
class DictionaryMetadataService
|
|
13
|
+
include Modules::DictionaryKeyable
|
|
14
|
+
include Modules::DictionaryCacheServicable
|
|
15
|
+
include Modules::DictionaryMetadataValidatable
|
|
16
|
+
|
|
17
|
+
attr_reader :dictionary_metadata
|
|
18
|
+
|
|
19
|
+
# @example metadata Hash structure:
|
|
20
|
+
#
|
|
21
|
+
# {
|
|
22
|
+
# <dictionary_id!> =>
|
|
23
|
+
# {
|
|
24
|
+
# :<metadata_key> => <metadata_object>
|
|
25
|
+
# },
|
|
26
|
+
# ...
|
|
27
|
+
# }
|
|
28
|
+
# }
|
|
29
|
+
def initialize(dictionary_key:, dictionary_cache:, dictionary_metadata:)
|
|
30
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
|
31
|
+
self.dictionary_key = dictionary_key
|
|
32
|
+
|
|
33
|
+
validate_dictionary_cache dictionary_cache: dictionary_cache
|
|
34
|
+
self.dictionary_cache = dictionary_cache
|
|
35
|
+
|
|
36
|
+
validate_dictionary_metadata dictionary_metadata: dictionary_metadata
|
|
37
|
+
self.dictionary_metadata = dictionary_metadata
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
class << self
|
|
41
|
+
# This method initializes the dictionary_metadata object to its
|
|
42
|
+
# initialized state - all data is lost, but the object reference is
|
|
43
|
+
# maintained.
|
|
44
|
+
def init(dictionary_metadata:)
|
|
45
|
+
Modules::DictionaryMetadataValidatable.validate_dictionary_metadata \
|
|
46
|
+
dictionary_metadata: dictionary_metadata
|
|
47
|
+
|
|
48
|
+
dictionary_metadata.each_key { |key| dictionary_metadata.delete(key) }
|
|
49
|
+
dictionary_metadata
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Returns true if the dictionary metadata is initialized; that is, if
|
|
53
|
+
# it's in the same state the dictionary metadata would be in if #init
|
|
54
|
+
# were called.
|
|
55
|
+
def init?(dictionary_metadata:)
|
|
56
|
+
Modules::DictionaryMetadataValidatable.validate_dictionary_metadata \
|
|
57
|
+
dictionary_metadata: dictionary_metadata
|
|
58
|
+
|
|
59
|
+
initialized_dictionary_metadata = init(dictionary_metadata: {})
|
|
60
|
+
dictionary_metadata.eql?(initialized_dictionary_metadata)
|
|
61
|
+
end
|
|
62
|
+
alias initialized? init?
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# This method initializes the dictionary metadata for dictionary metadata
|
|
66
|
+
# associated with the dictionary_id! and metadata_key.
|
|
67
|
+
def init(metadata_key:)
|
|
68
|
+
metadata = dictionary_metadata[dictionary_id!]
|
|
69
|
+
metadata&.delete(metadata_key)
|
|
70
|
+
metadata = dictionary_metadata_init_if
|
|
71
|
+
metadata[metadata_key] = nil
|
|
72
|
+
self
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# This method will return true if metadata exists for the dictionary
|
|
76
|
+
# associated with the given dictionary key, for the given metadata key.
|
|
77
|
+
def dictionary_metadata?(metadata_key:)
|
|
78
|
+
dictionary_metadata.dig(dictionary_id, metadata_key)&.present? || false
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def get_dictionary_metadata(metadata_key:)
|
|
82
|
+
dictionary_metadata.dig(dictionary_id!, metadata_key)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def set_dictionary_metadata(value:, metadata_key:)
|
|
86
|
+
dictionary_metadata[dictionary_id!][metadata_key] = value
|
|
87
|
+
self
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
attr_writer :dictionary_metadata
|
|
93
|
+
|
|
94
|
+
def dictionary_metadata_init_needed?
|
|
95
|
+
dictionary_metadata[dictionary_id!].blank?
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# This method initializes the metadata for the
|
|
99
|
+
# dictionary_id! if it is not already initialized.
|
|
100
|
+
# The metadata for the given dictionary_id! is returned.
|
|
101
|
+
def dictionary_metadata_init_if
|
|
102
|
+
return dictionary_metadata[dictionary_id!] unless dictionary_metadata_init_needed?
|
|
103
|
+
|
|
104
|
+
dictionary_metadata[dictionary_id!] = {}
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def dictionary_id
|
|
108
|
+
dictionary_cache_service.dictionary_id
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def dictionary_id!
|
|
112
|
+
dictionary_cache_service.dictionary_id!
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../metadata/invalid_words_service_results'
|
|
4
|
+
|
|
5
|
+
module LittleWeasel
|
|
6
|
+
module Services
|
|
7
|
+
# This class calculates the total amount of bytes cached invalid words take
|
|
8
|
+
# up in the given dictionary and returns the results. In addition to this,
|
|
9
|
+
# metadata is also compiled to determine how many more bytes of invalid
|
|
10
|
+
# word data can be cached before the cache is depleted and shutdown.
|
|
11
|
+
class InvalidWordsService
|
|
12
|
+
def initialize(dictionary)
|
|
13
|
+
self.dictionary = dictionary
|
|
14
|
+
self.current_bytesize = 0
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def execute
|
|
18
|
+
return build_return unless max_invalid_words_bytesize?
|
|
19
|
+
|
|
20
|
+
self.current_bytesize = calculate_current_bytesize
|
|
21
|
+
build_return
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
attr_accessor :current_bytesize, :dictionary
|
|
27
|
+
|
|
28
|
+
def calculate_current_bytesize
|
|
29
|
+
dictionary.reduce(0) do |bytesize, word_and_found|
|
|
30
|
+
unless word_and_found.last
|
|
31
|
+
bytesize += word_and_found.first.bytesize
|
|
32
|
+
break unless bytesize < max_invalid_words_bytesize
|
|
33
|
+
end
|
|
34
|
+
bytesize
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def build_return
|
|
39
|
+
Metadata::InvalidWordsServiceResults.new(
|
|
40
|
+
max_invalid_words_bytesize_on: max_invalid_words_bytesize?,
|
|
41
|
+
current_invalid_word_bytesize: current_bytesize,
|
|
42
|
+
max_invalid_words_bytesize: max_invalid_words_bytesize
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def max_invalid_words_bytesize
|
|
47
|
+
@max_invalid_words_bytesize ||= config.max_invalid_words_bytesize
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def max_invalid_words_bytesize?
|
|
51
|
+
config.max_invalid_words_bytesize?
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def config
|
|
55
|
+
@config ||= LittleWeasel.configuration
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
data/lib/LittleWeasel/version.rb
CHANGED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/core_ext/module/delegation'
|
|
4
|
+
require_relative 'modules/word_results_validatable'
|
|
5
|
+
require_relative 'preprocessors/preprocessed_words_validatable'
|
|
6
|
+
|
|
7
|
+
module LittleWeasel
|
|
8
|
+
# This class represents the results of gathering information about a word.
|
|
9
|
+
class WordResults
|
|
10
|
+
include Modules::WordResultsValidatable
|
|
11
|
+
include Preprocessors::PreprocessedWordsValidatable
|
|
12
|
+
|
|
13
|
+
attr_reader :filters_matched, :original_word, :preprocessed_words, :word_cached, :word_valid
|
|
14
|
+
|
|
15
|
+
delegate :preprocessed_word, to: :preprocessed_words, allow_nil: true
|
|
16
|
+
|
|
17
|
+
# Important: Regarding Boolean Methods
|
|
18
|
+
#
|
|
19
|
+
# The return value of some of the boolean methods (i.e. methods ending with
|
|
20
|
+
# a '?') of this class depend on whether or not #original_word
|
|
21
|
+
# has passed through any preprocessing. If #orginal_word has passed
|
|
22
|
+
# through preprocessing, the following boolean methods will reflect
|
|
23
|
+
# that of #preprocessed_word; if #original_word has NOT passed through
|
|
24
|
+
# any preprocessing, the following methods will reflect that of
|
|
25
|
+
# #original_word:
|
|
26
|
+
#
|
|
27
|
+
# #success?
|
|
28
|
+
# #filter_match?
|
|
29
|
+
# #word_cached?
|
|
30
|
+
# #word_valid?
|
|
31
|
+
#
|
|
32
|
+
# In other words, if #original_word has passed through preprocessing
|
|
33
|
+
# and has been altered by any of the preprocessing modules, it is the
|
|
34
|
+
# #preprocessed_word that is passed through any subsequent word filters,
|
|
35
|
+
# checked against the dictionary for validity, and cached, NOT
|
|
36
|
+
# #original_word.
|
|
37
|
+
# :reek:BooleanParameter - ignored, boolean params do not determine logic path, but only report status.
|
|
38
|
+
def initialize(original_word:, filters_matched: [],
|
|
39
|
+
preprocessed_words: nil, word_cached: false, word_valid: false)
|
|
40
|
+
|
|
41
|
+
self.original_word = original_word
|
|
42
|
+
self.filters_matched = filters_matched
|
|
43
|
+
self.word_cached = word_cached
|
|
44
|
+
self.word_valid = word_valid
|
|
45
|
+
self.preprocessed_words = preprocessed_words
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def original_word=(value)
|
|
49
|
+
@original_word = value
|
|
50
|
+
validate_original_word
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def filters_matched=(value)
|
|
54
|
+
@filters_matched = value
|
|
55
|
+
validate_filters_matched
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def word_cached=(value)
|
|
59
|
+
@word_cached = value
|
|
60
|
+
validate_word_cached
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def word_valid=(value)
|
|
64
|
+
@word_valid = value
|
|
65
|
+
vaidate_word_valid
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def preprocessed_words=(value)
|
|
69
|
+
if value.present?
|
|
70
|
+
validate_prepreprocessed_words preprocessed_words: value
|
|
71
|
+
@preprocessed_words = value
|
|
72
|
+
else
|
|
73
|
+
@preprocessed_words = nil
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Returns true if the word is valid (found in the dictionary), or
|
|
78
|
+
# the word was matched against at least one filter; false, otherwise.
|
|
79
|
+
#
|
|
80
|
+
# Use the results of this method if you want to consider a word's
|
|
81
|
+
# validity as having been found in the dictionary as a valid word OR
|
|
82
|
+
# if the word has at least one word filter match. If the word has
|
|
83
|
+
# NOT passed through any word filters, or if word DID NOT match any
|
|
84
|
+
# filters, yet, it was found as a valid word in the dictionary, this
|
|
85
|
+
# method will return true and vice versa.
|
|
86
|
+
#
|
|
87
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
|
88
|
+
# class definition for more detail.
|
|
89
|
+
def success?
|
|
90
|
+
filter_match? || word_valid?
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Returns true if the word was found in the dictionary; false, otherwise.
|
|
94
|
+
#
|
|
95
|
+
# Use the results of this method if you want to consider a word's
|
|
96
|
+
# validity irrespective of whether or not the word has matched any word
|
|
97
|
+
# filters (if any).
|
|
98
|
+
#
|
|
99
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
|
100
|
+
# class definition for more detail.
|
|
101
|
+
def word_valid?
|
|
102
|
+
word_valid
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Returns true if the word was matched against at least one filter;
|
|
106
|
+
# false, otherwise.
|
|
107
|
+
#
|
|
108
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
|
109
|
+
# class definition for more detail.
|
|
110
|
+
def filter_match?
|
|
111
|
+
filters_matched.present?
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Returns true if #original_word passed through any preprocessing. If
|
|
115
|
+
# this is the case, #preprocessed_word may be different than
|
|
116
|
+
# #original_word. Preprocessing should take place before any filtering
|
|
117
|
+
# takes place.
|
|
118
|
+
#
|
|
119
|
+
# #word_cached, #word_valid and #filters_matched should all
|
|
120
|
+
# reflect that of the #preprocessed_word if #preprocessed_word is
|
|
121
|
+
# present?; otherwise, they should all reflect that of #original_word.
|
|
122
|
+
def preprocessed_word?
|
|
123
|
+
preprocessed_word.present?
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Returns #preprocessed_word (if available) or #original_word.
|
|
127
|
+
# #preprocessed_word will be present if #original_word has
|
|
128
|
+
# met the criteria for preprocessing and passed through at least
|
|
129
|
+
# one preprocessor.
|
|
130
|
+
#
|
|
131
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
|
132
|
+
# class definition for more detail.
|
|
133
|
+
def preprocessed_word_or_original_word
|
|
134
|
+
preprocessed_word || original_word
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Returns true if the word was found in the dictionary as a valid word
|
|
138
|
+
# OR if the word was found in the cache as an invalid word.
|
|
139
|
+
#
|
|
140
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
|
141
|
+
# class definition for more detail.
|
|
142
|
+
def word_cached?
|
|
143
|
+
word_cached
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
data/lib/LittleWeasel.rb
CHANGED
|
@@ -1,186 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
require "LittleWeasel/version"
|
|
3
|
-
require 'active_support/inflector'
|
|
1
|
+
# frozen_string_literal: true
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
require 'active_support/core_ext/object/blank'
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
# Returns the dictionary.
|
|
12
|
-
#
|
|
13
|
-
# @return [Hash] the dictionary.
|
|
14
|
-
attr_reader :dictionary
|
|
15
|
-
|
|
16
|
-
private
|
|
17
|
-
|
|
18
|
-
attr_reader :alphabet_exclusion_list
|
|
19
|
-
|
|
20
|
-
# Keep these private...will expose as options later.
|
|
21
|
-
attr_accessor :word_regex, :numeric_regex, :non_wordchar_regex
|
|
22
|
-
|
|
23
|
-
public
|
|
24
|
-
|
|
25
|
-
# The constructor
|
|
26
|
-
def initialize
|
|
27
|
-
@options = { exclude_alphabet: false, strip_whitespace: false, ignore_numeric: true, single_word_mode: false }
|
|
28
|
-
@alphabet_exclusion_list = %w{ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z }
|
|
29
|
-
@numeric_regex = /^[-+]?[0-9]?(\.[0-9]+)?$+/
|
|
30
|
-
@word_regex = /\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/
|
|
31
|
-
@non_wordchar_regex = /\W+/
|
|
32
|
-
@dictionary = Hash.new(1)
|
|
33
|
-
load
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Interrogates the dictionary to determine whether or not [word] exists.
|
|
37
|
-
#
|
|
38
|
-
# @param [String] word the word or words to interrogate
|
|
39
|
-
# @param [Hash] options options to apply to this query (see #options=). Options passed to this
|
|
40
|
-
# method are applied for this query only.
|
|
41
|
-
#
|
|
42
|
-
# @return [Boolean] true if the word/words in *word* exists, false otherwise.
|
|
43
|
-
#
|
|
44
|
-
# @example
|
|
45
|
-
#
|
|
46
|
-
# LittleWeasel::Checker.instance.exists?('C') # true (default options, :exclude_alphabet => false)
|
|
47
|
-
# LittleWeasel::Checker.instance.exists?('A', {exclude_alphabet:true}) # false
|
|
48
|
-
# LittleWeasel::Checker.instance.exists?('X', {exclude_alphabet:false}) # true
|
|
49
|
-
# LittleWeasel::Checker.instance.exists?('Hello') # true
|
|
50
|
-
#
|
|
51
|
-
# LittleWeasel::Checker.instance.exists?(' Hello ') # false (default options, :strip_whitespace => false)
|
|
52
|
-
# LittleWeasel::Checker.instance.exists?(' Yes ', {strip_whitespace:true}) # true
|
|
53
|
-
# LittleWeasel::Checker.instance.exists?('No ', {strip_whitespace:false}) # false
|
|
54
|
-
# LittleWeasel::Checker.instance.exists?('How dy', {strip_whitespace:true}) # false, strip_whitespace only removes leading and trailing spaces
|
|
55
|
-
#
|
|
56
|
-
# LittleWeasel::Checker.instance.exists?('90210') # true (default options, ignore_numeric => true)
|
|
57
|
-
# LittleWeasel::Checker.instance.exists?('90210', {ignore_numeric:false}) # false
|
|
58
|
-
#
|
|
59
|
-
# LittleWeasel::Checker.instance.exists?('Hello World') # true, we're accepting multiple words now by default (default options, single_word_mode => false) :)
|
|
60
|
-
# LittleWeasel::Checker.instance.exists?("hello, mister; did I \'mention\'' that lemon cake is \"great?\" It's just wonderful!") # true
|
|
61
|
-
#
|
|
62
|
-
# LittleWeasel::Checker.instance.exists?('I love ice cream', {single_word_mode:true}) # false; while all the words are valid, more than one word will return false
|
|
63
|
-
#
|
|
64
|
-
def exists?(word, options=nil)
|
|
65
|
-
options = options || @options
|
|
66
|
-
|
|
67
|
-
return false unless word.is_a?(String)
|
|
68
|
-
|
|
69
|
-
word = word.dup
|
|
70
|
-
word.strip! if options[:strip_whitespace]
|
|
71
|
-
|
|
72
|
-
return false if word.empty?
|
|
73
|
-
|
|
74
|
-
if block? word
|
|
75
|
-
return false if options[:single_word_mode]
|
|
76
|
-
return block_exists? word
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
return true if options[:ignore_numeric] && number?(word)
|
|
80
|
-
return false if options[:exclude_alphabet] && word.length == 1 && @alphabet_exclusion_list.include?(word.upcase)
|
|
81
|
-
|
|
82
|
-
valid_word? word
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# Sets the global options for this gem.
|
|
86
|
-
#
|
|
87
|
-
# @param [Hash] options options that should apply to all subsequent calls to method *exists?* (see #exists?).
|
|
88
|
-
# Options set via this property apply to all subsequent queries.
|
|
89
|
-
#
|
|
90
|
-
# @option options [Boolean] :exclude_alphabet (false) If false, letters of the alphabet are considered words.
|
|
91
|
-
# @option options [Boolean] :strip_whitespace (false) If true, leading and trailing spaces are removed before checking to see if the word exists.
|
|
92
|
-
# @option options [Boolean] :ignore_numeric (true) If true, numeric values are considered valid words.
|
|
93
|
-
# @option options [Boolean] :single_word_mode (false) If false, word blocks (more than one word) are considered valid if all the words exist in the dictionary.
|
|
94
|
-
#
|
|
95
|
-
# @return [Hash] The options
|
|
96
|
-
#
|
|
97
|
-
# @example
|
|
98
|
-
# LittleWeasel::Checker.instance.options({exclude_alphabet:true})
|
|
99
|
-
# LittleWeasel::Checker.instance.exists?('A') # false
|
|
100
|
-
#
|
|
101
|
-
# LittleWeasel::Checker.instance.options({exclude_alphabet:false})
|
|
102
|
-
# LittleWeasel::Checker.instance.exists?('A') # true
|
|
103
|
-
#
|
|
104
|
-
# LittleWeasel::Checker.instance.options({strip_whitespace:false})
|
|
105
|
-
# LittleWeasel::Checker.instance.exists?(' Hello ') # false
|
|
106
|
-
# LittleWeasel::Checker.instance.exists?('No ') # false
|
|
107
|
-
# LittleWeasel::Checker.instance.exists?(' No') # false
|
|
108
|
-
#
|
|
109
|
-
# LittleWeasel::Checker.instance.options({strip_whitespace:true})
|
|
110
|
-
# LittleWeasel::Checker.instance.exists?(' Yes ') # true
|
|
111
|
-
# LittleWeasel::Checker.instance.exists?('How dy') # false, strip_whitespace only removes leading and trailing spaces
|
|
112
|
-
#
|
|
113
|
-
# LittleWeasel::Checker.instance.exists?('90210') # true (default options, ignore_numeric => true)
|
|
114
|
-
# LittleWeasel::Checker.instance.exists?('90210', {ignore_numeric:false}) # false
|
|
115
|
-
# LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210') # true (default options, ignore_numeric => true)
|
|
116
|
-
# LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210', {ignore_numeric:false}) # false
|
|
117
|
-
#
|
|
118
|
-
# LittleWeasel::Checker.instance.options({single_word_mode:true})
|
|
119
|
-
# LittleWeasel::Checker.instance.exists?('I love ice cream') # false; while all the words are valid, more than one word will return false
|
|
120
|
-
# LittleWeasel::Checker.instance.exists?('Baby') # true
|
|
121
|
-
#
|
|
122
|
-
def options=(options)
|
|
123
|
-
@options = options
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
# Gets the global options currently set for this gem.
|
|
127
|
-
#
|
|
128
|
-
# @return [Hash] The options
|
|
129
|
-
def options
|
|
130
|
-
@options
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
protected
|
|
134
|
-
|
|
135
|
-
def number?(word)
|
|
136
|
-
word.strip.gsub(@numeric_regex).count > 0
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
def block?(string)
|
|
140
|
-
string = string.dup
|
|
141
|
-
return false unless string.is_a?(String)
|
|
142
|
-
string.gsub!(@numeric_regex, "")
|
|
143
|
-
return false unless string.length > 1
|
|
144
|
-
string.strip.scan(/[\w'-]+/).length > 1
|
|
145
|
-
end
|
|
146
|
-
|
|
147
|
-
def block_exists?(word_block)
|
|
148
|
-
word_block = word_block.dup
|
|
149
|
-
|
|
150
|
-
word_block.gsub!(@numeric_regex, "") if options[:ignore_numeric]
|
|
151
|
-
return false if word_block.nil?
|
|
152
|
-
word_block.strip! unless word_block.nil?
|
|
153
|
-
word_block.gsub!(@non_wordchar_regex, " ")
|
|
154
|
-
word_block.split(@word_regex).uniq.each { |word|
|
|
155
|
-
return false unless valid_block_word?(word)
|
|
156
|
-
}
|
|
157
|
-
return true
|
|
158
|
-
end
|
|
159
|
-
|
|
160
|
-
def valid_word?(word)
|
|
161
|
-
word = word.dup.downcase
|
|
162
|
-
exists = dictionary.has_key?(word)
|
|
163
|
-
exists = dictionary.has_key?(word.singularize) unless exists
|
|
164
|
-
exists
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
def valid_block_word?(word)
|
|
168
|
-
return true if word.length == 1
|
|
169
|
-
valid_word? word.strip
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
private
|
|
173
|
-
|
|
174
|
-
def dictionary_path
|
|
175
|
-
File.expand_path(File.dirname(__FILE__) + '/dictionary')
|
|
176
|
-
end
|
|
177
|
-
|
|
178
|
-
def load
|
|
179
|
-
File.open(dictionary_path) do |io|
|
|
180
|
-
io.each { |line| line.chomp!; @dictionary[line] = line }
|
|
181
|
-
end
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
end
|
|
185
|
-
|
|
186
|
-
end
|
|
5
|
+
Dir[File.join('.', 'lib/LittleWeasel/**/*.rb')].each do |f|
|
|
6
|
+
require f
|
|
7
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: false
|
|
2
|
+
|
|
3
|
+
FactoryBot.define do
|
|
4
|
+
factory :dictionary, class: LittleWeasel::Dictionary do
|
|
5
|
+
dictionary_key { create(:dictionary_key) }
|
|
6
|
+
dictionary_cache { {} }
|
|
7
|
+
dictionary_metadata { {} }
|
|
8
|
+
word_filters {}
|
|
9
|
+
dictionary_words do
|
|
10
|
+
%w(apple
|
|
11
|
+
better
|
|
12
|
+
cat
|
|
13
|
+
dog
|
|
14
|
+
everyone
|
|
15
|
+
fat
|
|
16
|
+
game
|
|
17
|
+
help
|
|
18
|
+
italic
|
|
19
|
+
jasmine
|
|
20
|
+
kelp
|
|
21
|
+
love
|
|
22
|
+
man
|
|
23
|
+
nope
|
|
24
|
+
octopus
|
|
25
|
+
popeye
|
|
26
|
+
queue
|
|
27
|
+
ruby
|
|
28
|
+
stop
|
|
29
|
+
top
|
|
30
|
+
ultimate
|
|
31
|
+
very
|
|
32
|
+
was
|
|
33
|
+
xylophone
|
|
34
|
+
yes
|
|
35
|
+
zebra)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
skip_create
|
|
39
|
+
initialize_with do
|
|
40
|
+
new dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_metadata: dictionary_metadata, dictionary_words: dictionary_words, word_filters: word_filters
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: false
|
|
2
|
+
|
|
3
|
+
require 'pry'
|
|
4
|
+
|
|
5
|
+
FactoryBot.define do
|
|
6
|
+
factory :dictionary_cache_service, class: LittleWeasel::Services::DictionaryCacheService do
|
|
7
|
+
dictionary_key { create(:dictionary_key) }
|
|
8
|
+
dictionary_cache { {} }
|
|
9
|
+
|
|
10
|
+
transient do
|
|
11
|
+
# The dictionary reference created in the cache will point to a MEMORY source.
|
|
12
|
+
#
|
|
13
|
+
# Valid values: nil | true | false | <Array of dictionary words>
|
|
14
|
+
#
|
|
15
|
+
# If nil or false - No memory source will be added to the dictionary cache.
|
|
16
|
+
# If true - A memory source will be added to the dictionary cache.
|
|
17
|
+
# If <An Array of dictionary words> - A memory source will be added to the dictionry cache.
|
|
18
|
+
# This only makes sense if load == true.
|
|
19
|
+
dictionary_memory_source {}
|
|
20
|
+
|
|
21
|
+
# The dictionary reference created in the cache will point to a FILE source.
|
|
22
|
+
#
|
|
23
|
+
# Important: dictionary_file_source will only be used if dictionary_memory_source
|
|
24
|
+
# is false.
|
|
25
|
+
#
|
|
26
|
+
# Valid values: nil | true | false | <Path to dictionary file>
|
|
27
|
+
#
|
|
28
|
+
# If nil or false - No file source will be added to the dictionary cache.
|
|
29
|
+
#
|
|
30
|
+
# If true - A file source will be added to the dictionry cache.
|
|
31
|
+
# dictionary_key.key will be used to create the dictionary
|
|
32
|
+
# file path.
|
|
33
|
+
# If <Path to dictionary file> - A files source will be added to the dictionry cache.
|
|
34
|
+
# The file source will point to <Path to dictionary file>.
|
|
35
|
+
dictionary_file_source {}
|
|
36
|
+
|
|
37
|
+
# If load == true - A dictionary object will be created and added to the dictionary cache
|
|
38
|
+
# depending on the dictionary source (file or memory).
|
|
39
|
+
load { false }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
skip_create
|
|
43
|
+
initialize_with do
|
|
44
|
+
new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
after :create do |dictionary_cache_service, evaluator|
|
|
48
|
+
dictionary_key = dictionary_cache_service.dictionary_key
|
|
49
|
+
dictionary_cache = dictionary_cache_service.dictionary_cache
|
|
50
|
+
|
|
51
|
+
# Initialize the dictionary cache if the user already passed an
|
|
52
|
+
# initialized dictionary cache; otherwise, just use what they passed us.
|
|
53
|
+
dictionary_cache_service.class.init(dictionary_cache: dictionary_cache) \
|
|
54
|
+
unless dictionary_cache_service.class.count(dictionary_cache: dictionary_cache).positive?
|
|
55
|
+
|
|
56
|
+
load = evaluator.load
|
|
57
|
+
dictionary_memory_source = evaluator.dictionary_memory_source
|
|
58
|
+
dictionary_file_source = evaluator.dictionary_file_source
|
|
59
|
+
|
|
60
|
+
if load
|
|
61
|
+
unless dictionary_memory_source.present? || dictionary_file_source.present?
|
|
62
|
+
raise 'Transient attributes dictionary_memory_source or dictionary_file_source ' \
|
|
63
|
+
"must be present if transient attribute load is true: #{dictionary_reference}"
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
if dictionary_file_source
|
|
68
|
+
file_name = if dictionary_file_source == true
|
|
69
|
+
dictionary_key.key
|
|
70
|
+
else
|
|
71
|
+
dictionary_file_source
|
|
72
|
+
end
|
|
73
|
+
dictionary_cache_service.add_dictionary_source(dictionary_source: dictionary_path_for(file_name: file_name))
|
|
74
|
+
elsif dictionary_memory_source
|
|
75
|
+
dictionary_cache_service.add_dictionary_source(dictionary_source: LittleWeasel::Modules::DictionarSourceable.memory_source)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
if load
|
|
79
|
+
dictionary_words = if dictionary_file_source
|
|
80
|
+
dictionary_file_loader_service = create(:dictionary_file_loader_service, dictionary_key: dictionary_key, dictionary_cache: dictionary_cache)
|
|
81
|
+
dictionary_file_loader_service.execute
|
|
82
|
+
else
|
|
83
|
+
unless dictionary_memory_source.is_a? Array
|
|
84
|
+
raise 'Transient attribute dictionary_memory_source must be an Array of words ' \
|
|
85
|
+
"if transient attribute load == true: #{dictionary_memory_source}"
|
|
86
|
+
end
|
|
87
|
+
dictionary_memory_source
|
|
88
|
+
end
|
|
89
|
+
dictionary_cache_service.dictionary_object = create(:dictionary, dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_words: dictionary_words)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
dictionary_cache_service
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: false
|
|
2
|
+
|
|
3
|
+
FactoryBot.define do
|
|
4
|
+
factory :dictionary_creator_service, class: LittleWeasel::Services::DictionaryCreatorService do
|
|
5
|
+
dictionary_key { create(:dictionary_key) }
|
|
6
|
+
dictionary_cache { {} }
|
|
7
|
+
dictionary_metadata { {} }
|
|
8
|
+
word_filters {}
|
|
9
|
+
word_preprocessors {}
|
|
10
|
+
|
|
11
|
+
skip_create
|
|
12
|
+
initialize_with do
|
|
13
|
+
new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_metadata: dictionary_metadata, word_filters: word_filters, word_preprocessors: word_preprocessors)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: false
|
|
2
|
+
|
|
3
|
+
FactoryBot.define do
|
|
4
|
+
factory :dictionary_file_loader_service, class: LittleWeasel::Services::DictionaryFileLoaderService do
|
|
5
|
+
dictionary_key { create(:dictionary_key) }
|
|
6
|
+
dictionary_cache { {} }
|
|
7
|
+
|
|
8
|
+
skip_create
|
|
9
|
+
initialize_with do
|
|
10
|
+
new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|