LittleWeasel 3.0.4 → 5.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/codeql-analysis.yml +72 -0
- data/.gitignore +19 -17
- data/.reek.yml +17 -0
- data/.rspec +4 -2
- data/.rubocop.yml +187 -0
- data/.ruby-version +1 -1
- data/.yardopts +2 -0
- data/CHANGELOG.md +21 -1
- data/Gemfile +3 -1
- data/Gemfile.lock +114 -0
- data/Jenkinsfile +20 -0
- data/LittleWeasel.gemspec +31 -18
- data/README.md +408 -42
- data/Rakefile +296 -3
- data/lib/LittleWeasel/block_results.rb +81 -0
- data/lib/LittleWeasel/configure.rb +98 -0
- data/lib/LittleWeasel/dictionary.rb +125 -0
- data/lib/LittleWeasel/dictionary_key.rb +48 -0
- data/lib/LittleWeasel/dictionary_manager.rb +91 -0
- data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
- data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
- data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
- data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
- data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
- data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
- data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
- data/lib/LittleWeasel/filters/word_filter.rb +59 -0
- data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
- data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
- data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
- data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
- data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
- data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
- data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
- data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
- data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
- data/lib/LittleWeasel/metadata/metadatable.rb +134 -0
- data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
- data/lib/LittleWeasel/modules/configurable.rb +26 -0
- data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
- data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
- data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
- data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +18 -0
- data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
- data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
- data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +17 -0
- data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
- data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +15 -0
- data/lib/LittleWeasel/modules/dictionary_source_validatable.rb +15 -0
- data/lib/LittleWeasel/modules/dictionary_sourceable.rb +86 -0
- data/lib/LittleWeasel/modules/dictionary_validatable.rb +18 -0
- data/lib/LittleWeasel/modules/language.rb +24 -0
- data/lib/LittleWeasel/modules/language_validatable.rb +14 -0
- data/lib/LittleWeasel/modules/locale.rb +23 -0
- data/lib/LittleWeasel/modules/order_validatable.rb +16 -0
- data/lib/LittleWeasel/modules/orderable.rb +17 -0
- data/lib/LittleWeasel/modules/region.rb +24 -0
- data/lib/LittleWeasel/modules/region_validatable.rb +14 -0
- data/lib/LittleWeasel/modules/tag_validatable.rb +14 -0
- data/lib/LittleWeasel/modules/taggable.rb +31 -0
- data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
- data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +29 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +56 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +59 -0
- data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +28 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +123 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
- data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
- data/lib/LittleWeasel/services/dictionary_cache_service.rb +211 -0
- data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
- data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
- data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
- data/lib/LittleWeasel/services/dictionary_metadata_service.rb +116 -0
- data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
- data/lib/LittleWeasel/version.rb +3 -1
- data/lib/LittleWeasel/word_results.rb +146 -0
- data/lib/LittleWeasel.rb +72 -186
- data/spec/factories/dictionary.rb +43 -0
- data/spec/factories/dictionary_cache_service.rb +95 -0
- data/spec/factories/dictionary_creator_service.rb +16 -0
- data/spec/factories/dictionary_file_loader_service.rb +13 -0
- data/spec/factories/dictionary_hash.rb +39 -0
- data/spec/factories/dictionary_key.rb +14 -0
- data/spec/factories/dictionary_killer_service.rb +14 -0
- data/spec/factories/dictionary_manager.rb +10 -0
- data/spec/factories/dictionary_metadata.rb +16 -0
- data/spec/factories/dictionary_metadata_service.rb +16 -0
- data/spec/factories/numeric_filter.rb +12 -0
- data/spec/factories/preprocessed_word.rb +16 -0
- data/spec/factories/preprocessed_words.rb +41 -0
- data/spec/factories/single_character_word_filter.rb +12 -0
- data/spec/factories/word_results.rb +16 -0
- data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
- data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
- data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
- data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +166 -0
- data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
- data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
- data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
- data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
- data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
- data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
- data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
- data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
- data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
- data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
- data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
- data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
- data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
- data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
- data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
- data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +81 -0
- data/spec/lib/LittleWeasel/modules/language_spec.rb +112 -0
- data/spec/lib/LittleWeasel/modules/locale_spec.rb +95 -0
- data/spec/lib/LittleWeasel/modules/region_spec.rb +112 -0
- data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
- data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +242 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +218 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
- data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
- data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
- data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
- data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
- data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
- data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
- data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
- data/spec/spec_helper.rb +117 -6
- data/spec/support/factory_bot.rb +15 -0
- data/spec/support/file_helpers.rb +46 -0
- data/spec/support/files/empty-dictionary.txt +0 -0
- data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
- data/spec/support/files/en-US-tagged.txt +26 -0
- data/spec/support/files/en-US.txt +26 -0
- data/spec/support/files/en.txt +26 -0
- data/spec/support/files/es-ES.txt +27 -0
- data/spec/support/files/es.txt +27 -0
- data/spec/support/general_helpers.rb +68 -0
- data/spec/support/shared_contexts.rb +107 -0
- data/spec/support/shared_examples.rb +105 -0
- metadata +418 -70
- data/spec/checker/checker_spec.rb +0 -286
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../modules/dictionary_cache_servicable'
|
4
|
+
require_relative '../modules/dictionary_keyable'
|
5
|
+
require_relative '../modules/dictionary_metadata_validatable'
|
6
|
+
|
7
|
+
module LittleWeasel
|
8
|
+
module Services
|
9
|
+
# This class provides methods for managing and manipulating the
|
10
|
+
# dictionary metadata associated with the given dictionary,
|
11
|
+
# (dictionary_key) for the supplied metadata_key.
|
12
|
+
class DictionaryMetadataService
|
13
|
+
include Modules::DictionaryKeyable
|
14
|
+
include Modules::DictionaryCacheServicable
|
15
|
+
include Modules::DictionaryMetadataValidatable
|
16
|
+
|
17
|
+
attr_reader :dictionary_metadata
|
18
|
+
|
19
|
+
# @example metadata Hash structure:
|
20
|
+
#
|
21
|
+
# {
|
22
|
+
# <dictionary_id!> =>
|
23
|
+
# {
|
24
|
+
# :<metadata_key> => <metadata_object>
|
25
|
+
# },
|
26
|
+
# ...
|
27
|
+
# }
|
28
|
+
# }
|
29
|
+
def initialize(dictionary_key:, dictionary_cache:, dictionary_metadata:)
|
30
|
+
validate_dictionary_key dictionary_key: dictionary_key
|
31
|
+
self.dictionary_key = dictionary_key
|
32
|
+
|
33
|
+
validate_dictionary_cache dictionary_cache: dictionary_cache
|
34
|
+
self.dictionary_cache = dictionary_cache
|
35
|
+
|
36
|
+
validate_dictionary_metadata dictionary_metadata: dictionary_metadata
|
37
|
+
self.dictionary_metadata = dictionary_metadata
|
38
|
+
end
|
39
|
+
|
40
|
+
class << self
|
41
|
+
# This method initializes the dictionary_metadata object to its
|
42
|
+
# initialized state - all data is lost, but the object reference is
|
43
|
+
# maintained.
|
44
|
+
def init(dictionary_metadata:)
|
45
|
+
Modules::DictionaryMetadataValidatable.validate_dictionary_metadata \
|
46
|
+
dictionary_metadata: dictionary_metadata
|
47
|
+
|
48
|
+
dictionary_metadata.each_key { |key| dictionary_metadata.delete(key) }
|
49
|
+
dictionary_metadata
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns true if the dictionary metadata is initialized; that is, if
|
53
|
+
# it's in the same state the dictionary metadata would be in if #init
|
54
|
+
# were called.
|
55
|
+
def init?(dictionary_metadata:)
|
56
|
+
Modules::DictionaryMetadataValidatable.validate_dictionary_metadata \
|
57
|
+
dictionary_metadata: dictionary_metadata
|
58
|
+
|
59
|
+
initialized_dictionary_metadata = init(dictionary_metadata: {})
|
60
|
+
dictionary_metadata.eql?(initialized_dictionary_metadata)
|
61
|
+
end
|
62
|
+
alias initialized? init?
|
63
|
+
end
|
64
|
+
|
65
|
+
# This method initializes the dictionary metadata for dictionary metadata
|
66
|
+
# associated with the dictionary_id! and metadata_key.
|
67
|
+
def init(metadata_key:)
|
68
|
+
metadata = dictionary_metadata[dictionary_id!]
|
69
|
+
metadata&.delete(metadata_key)
|
70
|
+
metadata = dictionary_metadata_init_if
|
71
|
+
metadata[metadata_key] = nil
|
72
|
+
self
|
73
|
+
end
|
74
|
+
|
75
|
+
# This method will return true if metadata exists for the dictionary
|
76
|
+
# associated with the given dictionary key, for the given metadata key.
|
77
|
+
def dictionary_metadata?(metadata_key:)
|
78
|
+
dictionary_metadata.dig(dictionary_id, metadata_key)&.present? || false
|
79
|
+
end
|
80
|
+
|
81
|
+
def get_dictionary_metadata(metadata_key:)
|
82
|
+
dictionary_metadata.dig(dictionary_id!, metadata_key)
|
83
|
+
end
|
84
|
+
|
85
|
+
def set_dictionary_metadata(value:, metadata_key:)
|
86
|
+
dictionary_metadata[dictionary_id!][metadata_key] = value
|
87
|
+
self
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
attr_writer :dictionary_metadata
|
93
|
+
|
94
|
+
def dictionary_metadata_init_needed?
|
95
|
+
dictionary_metadata[dictionary_id!].blank?
|
96
|
+
end
|
97
|
+
|
98
|
+
# This method initializes the metadata for the
|
99
|
+
# dictionary_id! if it is not already initialized.
|
100
|
+
# The metadata for the given dictionary_id! is returned.
|
101
|
+
def dictionary_metadata_init_if
|
102
|
+
return dictionary_metadata[dictionary_id!] unless dictionary_metadata_init_needed?
|
103
|
+
|
104
|
+
dictionary_metadata[dictionary_id!] = {}
|
105
|
+
end
|
106
|
+
|
107
|
+
def dictionary_id
|
108
|
+
dictionary_cache_service.dictionary_id
|
109
|
+
end
|
110
|
+
|
111
|
+
def dictionary_id!
|
112
|
+
dictionary_cache_service.dictionary_id!
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../metadata/invalid_words_service_results'
|
4
|
+
|
5
|
+
module LittleWeasel
|
6
|
+
module Services
|
7
|
+
# This class calculates the total amount of bytes cached invalid words take
|
8
|
+
# up in the given dictionary and returns the results. In addition to this,
|
9
|
+
# metadata is also compiled to determine how many more bytes of invalid
|
10
|
+
# word data can be cached before the cache is depleted and shutdown.
|
11
|
+
class InvalidWordsService
|
12
|
+
def initialize(dictionary)
|
13
|
+
self.dictionary = dictionary
|
14
|
+
self.current_bytesize = 0
|
15
|
+
end
|
16
|
+
|
17
|
+
def execute
|
18
|
+
return build_return unless max_invalid_words_bytesize?
|
19
|
+
|
20
|
+
self.current_bytesize = calculate_current_bytesize
|
21
|
+
build_return
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
attr_accessor :current_bytesize, :dictionary
|
27
|
+
|
28
|
+
def calculate_current_bytesize
|
29
|
+
dictionary.reduce(0) do |bytesize, word_and_found|
|
30
|
+
unless word_and_found.last
|
31
|
+
bytesize += word_and_found.first.bytesize
|
32
|
+
break unless bytesize < max_invalid_words_bytesize
|
33
|
+
end
|
34
|
+
bytesize
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def build_return
|
39
|
+
Metadata::InvalidWordsServiceResults.new(
|
40
|
+
max_invalid_words_bytesize_on: max_invalid_words_bytesize?,
|
41
|
+
current_invalid_word_bytesize: current_bytesize,
|
42
|
+
max_invalid_words_bytesize: max_invalid_words_bytesize
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
def max_invalid_words_bytesize
|
47
|
+
@max_invalid_words_bytesize ||= config.max_invalid_words_bytesize
|
48
|
+
end
|
49
|
+
|
50
|
+
def max_invalid_words_bytesize?
|
51
|
+
config.max_invalid_words_bytesize?
|
52
|
+
end
|
53
|
+
|
54
|
+
def config
|
55
|
+
@config ||= LittleWeasel.configuration
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/LittleWeasel/version.rb
CHANGED
@@ -0,0 +1,146 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'active_support/core_ext/module/delegation'
|
4
|
+
require_relative 'modules/word_results_validatable'
|
5
|
+
require_relative 'preprocessors/preprocessed_words_validatable'
|
6
|
+
|
7
|
+
module LittleWeasel
|
8
|
+
# This class represents the results of gathering information about a word.
|
9
|
+
class WordResults
|
10
|
+
include Modules::WordResultsValidatable
|
11
|
+
include Preprocessors::PreprocessedWordsValidatable
|
12
|
+
|
13
|
+
attr_reader :filters_matched, :original_word, :preprocessed_words, :word_cached, :word_valid
|
14
|
+
|
15
|
+
delegate :preprocessed_word, to: :preprocessed_words, allow_nil: true
|
16
|
+
|
17
|
+
# Important: Regarding Boolean Methods
|
18
|
+
#
|
19
|
+
# The return value of some of the boolean methods (i.e. methods ending with
|
20
|
+
# a '?') of this class depend on whether or not #original_word
|
21
|
+
# has passed through any preprocessing. If #orginal_word has passed
|
22
|
+
# through preprocessing, the following boolean methods will reflect
|
23
|
+
# that of #preprocessed_word; if #original_word has NOT passed through
|
24
|
+
# any preprocessing, the following methods will reflect that of
|
25
|
+
# #original_word:
|
26
|
+
#
|
27
|
+
# #success?
|
28
|
+
# #filter_match?
|
29
|
+
# #word_cached?
|
30
|
+
# #word_valid?
|
31
|
+
#
|
32
|
+
# In other words, if #original_word has passed through preprocessing
|
33
|
+
# and has been altered by any of the preprocessing modules, it is the
|
34
|
+
# #preprocessed_word that is passed through any subsequent word filters,
|
35
|
+
# checked against the dictionary for validity, and cached, NOT
|
36
|
+
# #original_word.
|
37
|
+
# :reek:BooleanParameter - ignored, boolean params do not determine logic path, but only report status.
|
38
|
+
def initialize(original_word:, filters_matched: [],
|
39
|
+
preprocessed_words: nil, word_cached: false, word_valid: false)
|
40
|
+
|
41
|
+
self.original_word = original_word
|
42
|
+
self.filters_matched = filters_matched
|
43
|
+
self.word_cached = word_cached
|
44
|
+
self.word_valid = word_valid
|
45
|
+
self.preprocessed_words = preprocessed_words
|
46
|
+
end
|
47
|
+
|
48
|
+
def original_word=(value)
|
49
|
+
@original_word = value
|
50
|
+
validate_original_word
|
51
|
+
end
|
52
|
+
|
53
|
+
def filters_matched=(value)
|
54
|
+
@filters_matched = value
|
55
|
+
validate_filters_matched
|
56
|
+
end
|
57
|
+
|
58
|
+
def word_cached=(value)
|
59
|
+
@word_cached = value
|
60
|
+
validate_word_cached
|
61
|
+
end
|
62
|
+
|
63
|
+
def word_valid=(value)
|
64
|
+
@word_valid = value
|
65
|
+
vaidate_word_valid
|
66
|
+
end
|
67
|
+
|
68
|
+
def preprocessed_words=(value)
|
69
|
+
if value.present?
|
70
|
+
validate_prepreprocessed_words preprocessed_words: value
|
71
|
+
@preprocessed_words = value
|
72
|
+
else
|
73
|
+
@preprocessed_words = nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Returns true if the word is valid (found in the dictionary), or
|
78
|
+
# the word was matched against at least one filter; false, otherwise.
|
79
|
+
#
|
80
|
+
# Use the results of this method if you want to consider a word's
|
81
|
+
# validity as having been found in the dictionary as a valid word OR
|
82
|
+
# if the word has at least one word filter match. If the word has
|
83
|
+
# NOT passed through any word filters, or if word DID NOT match any
|
84
|
+
# filters, yet, it was found as a valid word in the dictionary, this
|
85
|
+
# method will return true and vice versa.
|
86
|
+
#
|
87
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
88
|
+
# class definition for more detail.
|
89
|
+
def success?
|
90
|
+
filter_match? || word_valid?
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns true if the word was found in the dictionary; false, otherwise.
|
94
|
+
#
|
95
|
+
# Use the results of this method if you want to consider a word's
|
96
|
+
# validity irrespective of whether or not the word has matched any word
|
97
|
+
# filters (if any).
|
98
|
+
#
|
99
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
100
|
+
# class definition for more detail.
|
101
|
+
def word_valid?
|
102
|
+
word_valid
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns true if the word was matched against at least one filter;
|
106
|
+
# false, otherwise.
|
107
|
+
#
|
108
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
109
|
+
# class definition for more detail.
|
110
|
+
def filter_match?
|
111
|
+
filters_matched.present?
|
112
|
+
end
|
113
|
+
|
114
|
+
# Returns true if #original_word passed through any preprocessing. If
|
115
|
+
# this is the case, #preprocessed_word may be different than
|
116
|
+
# #original_word. Preprocessing should take place before any filtering
|
117
|
+
# takes place.
|
118
|
+
#
|
119
|
+
# #word_cached, #word_valid and #filters_matched should all
|
120
|
+
# reflect that of the #preprocessed_word if #preprocessed_word is
|
121
|
+
# present?; otherwise, they should all reflect that of #original_word.
|
122
|
+
def preprocessed_word?
|
123
|
+
preprocessed_word.present?
|
124
|
+
end
|
125
|
+
|
126
|
+
# Returns #preprocessed_word (if available) or #original_word.
|
127
|
+
# #preprocessed_word will be present if #original_word has
|
128
|
+
# met the criteria for preprocessing and passed through at least
|
129
|
+
# one preprocessor.
|
130
|
+
#
|
131
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
132
|
+
# class definition for more detail.
|
133
|
+
def preprocessed_word_or_original_word
|
134
|
+
preprocessed_word || original_word
|
135
|
+
end
|
136
|
+
|
137
|
+
# Returns true if the word was found in the dictionary as a valid word
|
138
|
+
# OR if the word was found in the cache as an invalid word.
|
139
|
+
#
|
140
|
+
# See "Important: Regarding Boolean Methods" notes at the top of this
|
141
|
+
# class definition for more detail.
|
142
|
+
def word_cached?
|
143
|
+
word_cached
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
data/lib/LittleWeasel.rb
CHANGED
@@ -1,186 +1,72 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'active_support/
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
if block? word
|
75
|
-
return false if options[:single_word_mode]
|
76
|
-
return block_exists? word
|
77
|
-
end
|
78
|
-
|
79
|
-
return true if options[:ignore_numeric] && number?(word)
|
80
|
-
return false if options[:exclude_alphabet] && word.length == 1 && @alphabet_exclusion_list.include?(word.upcase)
|
81
|
-
|
82
|
-
valid_word? word
|
83
|
-
end
|
84
|
-
|
85
|
-
# Sets the global options for this gem.
|
86
|
-
#
|
87
|
-
# @param [Hash] options options that should apply to all subsequent calls to method *exists?* (see #exists?).
|
88
|
-
# Options set via this property apply to all subsequent queries.
|
89
|
-
#
|
90
|
-
# @option options [Boolean] :exclude_alphabet (false) If false, letters of the alphabet are considered words.
|
91
|
-
# @option options [Boolean] :strip_whitespace (false) If true, leading and trailing spaces are removed before checking to see if the word exists.
|
92
|
-
# @option options [Boolean] :ignore_numeric (true) If true, numeric values are considered valid words.
|
93
|
-
# @option options [Boolean] :single_word_mode (false) If false, word blocks (more than one word) are considered valid if all the words exist in the dictionary.
|
94
|
-
#
|
95
|
-
# @return [Hash] The options
|
96
|
-
#
|
97
|
-
# @example
|
98
|
-
# LittleWeasel::Checker.instance.options({exclude_alphabet:true})
|
99
|
-
# LittleWeasel::Checker.instance.exists?('A') # false
|
100
|
-
#
|
101
|
-
# LittleWeasel::Checker.instance.options({exclude_alphabet:false})
|
102
|
-
# LittleWeasel::Checker.instance.exists?('A') # true
|
103
|
-
#
|
104
|
-
# LittleWeasel::Checker.instance.options({strip_whitespace:false})
|
105
|
-
# LittleWeasel::Checker.instance.exists?(' Hello ') # false
|
106
|
-
# LittleWeasel::Checker.instance.exists?('No ') # false
|
107
|
-
# LittleWeasel::Checker.instance.exists?(' No') # false
|
108
|
-
#
|
109
|
-
# LittleWeasel::Checker.instance.options({strip_whitespace:true})
|
110
|
-
# LittleWeasel::Checker.instance.exists?(' Yes ') # true
|
111
|
-
# LittleWeasel::Checker.instance.exists?('How dy') # false, strip_whitespace only removes leading and trailing spaces
|
112
|
-
#
|
113
|
-
# LittleWeasel::Checker.instance.exists?('90210') # true (default options, ignore_numeric => true)
|
114
|
-
# LittleWeasel::Checker.instance.exists?('90210', {ignore_numeric:false}) # false
|
115
|
-
# LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210') # true (default options, ignore_numeric => true)
|
116
|
-
# LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210', {ignore_numeric:false}) # false
|
117
|
-
#
|
118
|
-
# LittleWeasel::Checker.instance.options({single_word_mode:true})
|
119
|
-
# LittleWeasel::Checker.instance.exists?('I love ice cream') # false; while all the words are valid, more than one word will return false
|
120
|
-
# LittleWeasel::Checker.instance.exists?('Baby') # true
|
121
|
-
#
|
122
|
-
def options=(options)
|
123
|
-
@options = options
|
124
|
-
end
|
125
|
-
|
126
|
-
# Gets the global options currently set for this gem.
|
127
|
-
#
|
128
|
-
# @return [Hash] The options
|
129
|
-
def options
|
130
|
-
@options
|
131
|
-
end
|
132
|
-
|
133
|
-
protected
|
134
|
-
|
135
|
-
def number?(word)
|
136
|
-
word.strip.gsub(@numeric_regex).count > 0
|
137
|
-
end
|
138
|
-
|
139
|
-
def block?(string)
|
140
|
-
string = string.dup
|
141
|
-
return false unless string.is_a?(String)
|
142
|
-
string.gsub!(@numeric_regex, "")
|
143
|
-
return false unless string.length > 1
|
144
|
-
string.strip.scan(/[\w'-]+/).length > 1
|
145
|
-
end
|
146
|
-
|
147
|
-
def block_exists?(word_block)
|
148
|
-
word_block = word_block.dup
|
149
|
-
|
150
|
-
word_block.gsub!(@numeric_regex, "") if options[:ignore_numeric]
|
151
|
-
return false if word_block.nil?
|
152
|
-
word_block.strip! unless word_block.nil?
|
153
|
-
word_block.gsub!(@non_wordchar_regex, " ")
|
154
|
-
word_block.split(@word_regex).uniq.each { |word|
|
155
|
-
return false unless valid_block_word?(word)
|
156
|
-
}
|
157
|
-
return true
|
158
|
-
end
|
159
|
-
|
160
|
-
def valid_word?(word)
|
161
|
-
word = word.dup.downcase
|
162
|
-
exists = dictionary.has_key?(word)
|
163
|
-
exists = dictionary.has_key?(word.singularize) unless exists
|
164
|
-
exists
|
165
|
-
end
|
166
|
-
|
167
|
-
def valid_block_word?(word)
|
168
|
-
return true if word.length == 1
|
169
|
-
valid_word? word.strip
|
170
|
-
end
|
171
|
-
|
172
|
-
private
|
173
|
-
|
174
|
-
def dictionary_path
|
175
|
-
File.expand_path(File.dirname(__FILE__) + '/dictionary')
|
176
|
-
end
|
177
|
-
|
178
|
-
def load
|
179
|
-
File.open(dictionary_path) do |io|
|
180
|
-
io.each { |line| line.chomp!; @dictionary[line] = line }
|
181
|
-
end
|
182
|
-
end
|
183
|
-
|
184
|
-
end
|
185
|
-
|
186
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'active_support/core_ext/object/blank'
|
4
|
+
|
5
|
+
require_relative 'LittleWeasel/block_results'
|
6
|
+
require_relative 'LittleWeasel/configure'
|
7
|
+
require_relative 'LittleWeasel/dictionary'
|
8
|
+
require_relative 'LittleWeasel/dictionary_key'
|
9
|
+
require_relative 'LittleWeasel/dictionary_manager'
|
10
|
+
require_relative 'LittleWeasel/errors/dictionary_file_already_loaded_error'
|
11
|
+
require_relative 'LittleWeasel/errors/dictionary_file_empty_error'
|
12
|
+
require_relative 'LittleWeasel/errors/dictionary_file_not_found_error'
|
13
|
+
require_relative 'LittleWeasel/errors/dictionary_file_too_large_error'
|
14
|
+
require_relative 'LittleWeasel/errors/language_required_error'
|
15
|
+
require_relative 'LittleWeasel/errors/must_override_error'
|
16
|
+
require_relative 'LittleWeasel/filters/en_us/currency_filter'
|
17
|
+
require_relative 'LittleWeasel/filters/en_us/numeric_filter'
|
18
|
+
require_relative 'LittleWeasel/filters/en_us/single_character_word_filter'
|
19
|
+
require_relative 'LittleWeasel/filters/word_filter'
|
20
|
+
require_relative 'LittleWeasel/filters/word_filter_managable'
|
21
|
+
require_relative 'LittleWeasel/filters/word_filter_validatable'
|
22
|
+
require_relative 'LittleWeasel/filters/word_filterable'
|
23
|
+
require_relative 'LittleWeasel/filters/word_filters_validatable'
|
24
|
+
require_relative 'LittleWeasel/metadata/dictionary_metadata'
|
25
|
+
require_relative 'LittleWeasel/metadata/invalid_words_metadata'
|
26
|
+
require_relative 'LittleWeasel/metadata/invalid_words_service_results'
|
27
|
+
require_relative 'LittleWeasel/metadata/metadata_observable_validatable'
|
28
|
+
require_relative 'LittleWeasel/metadata/metadata_observerable'
|
29
|
+
require_relative 'LittleWeasel/metadata/metadatable'
|
30
|
+
require_relative 'LittleWeasel/modules/class_name_to_symbol'
|
31
|
+
require_relative 'LittleWeasel/modules/configurable'
|
32
|
+
require_relative 'LittleWeasel/modules/deep_dup'
|
33
|
+
require_relative 'LittleWeasel/modules/dictionary_cache_keys'
|
34
|
+
require_relative 'LittleWeasel/modules/dictionary_cache_servicable'
|
35
|
+
require_relative 'LittleWeasel/modules/dictionary_cache_validatable'
|
36
|
+
require_relative 'LittleWeasel/modules/dictionary_creator_servicable'
|
37
|
+
require_relative 'LittleWeasel/modules/dictionary_file_loader'
|
38
|
+
require_relative 'LittleWeasel/modules/dictionary_key_validatable'
|
39
|
+
require_relative 'LittleWeasel/modules/dictionary_keyable'
|
40
|
+
require_relative 'LittleWeasel/modules/dictionary_metadata_servicable'
|
41
|
+
require_relative 'LittleWeasel/modules/dictionary_metadata_validatable'
|
42
|
+
require_relative 'LittleWeasel/modules/dictionary_source_validatable'
|
43
|
+
require_relative 'LittleWeasel/modules/dictionary_sourceable'
|
44
|
+
require_relative 'LittleWeasel/modules/dictionary_validatable'
|
45
|
+
require_relative 'LittleWeasel/modules/language'
|
46
|
+
require_relative 'LittleWeasel/modules/language_validatable'
|
47
|
+
require_relative 'LittleWeasel/modules/locale'
|
48
|
+
require_relative 'LittleWeasel/modules/order_validatable'
|
49
|
+
require_relative 'LittleWeasel/modules/orderable'
|
50
|
+
require_relative 'LittleWeasel/modules/region'
|
51
|
+
require_relative 'LittleWeasel/modules/region_validatable'
|
52
|
+
require_relative 'LittleWeasel/modules/tag_validatable'
|
53
|
+
require_relative 'LittleWeasel/modules/taggable'
|
54
|
+
require_relative 'LittleWeasel/modules/word_results_validatable'
|
55
|
+
require_relative 'LittleWeasel/preprocessors/en_us/capitalize_preprocessor'
|
56
|
+
require_relative 'LittleWeasel/preprocessors/preprocessed_word'
|
57
|
+
require_relative 'LittleWeasel/preprocessors/preprocessed_word_validatable'
|
58
|
+
require_relative 'LittleWeasel/preprocessors/preprocessed_words'
|
59
|
+
require_relative 'LittleWeasel/preprocessors/preprocessed_words_validatable'
|
60
|
+
require_relative 'LittleWeasel/preprocessors/word_preprocessable'
|
61
|
+
require_relative 'LittleWeasel/preprocessors/word_preprocessor'
|
62
|
+
require_relative 'LittleWeasel/preprocessors/word_preprocessor_managable'
|
63
|
+
require_relative 'LittleWeasel/preprocessors/word_preprocessor_validatable'
|
64
|
+
require_relative 'LittleWeasel/preprocessors/word_preprocessors_validatable'
|
65
|
+
require_relative 'LittleWeasel/services/dictionary_cache_service'
|
66
|
+
require_relative 'LittleWeasel/services/dictionary_creator_service'
|
67
|
+
require_relative 'LittleWeasel/services/dictionary_file_loader_service'
|
68
|
+
require_relative 'LittleWeasel/services/dictionary_killer_service'
|
69
|
+
require_relative 'LittleWeasel/services/dictionary_metadata_service'
|
70
|
+
require_relative 'LittleWeasel/services/invalid_words_service'
|
71
|
+
require_relative 'LittleWeasel/version'
|
72
|
+
require_relative 'LittleWeasel/word_results'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
|
3
|
+
FactoryBot.define do
|
4
|
+
factory :dictionary, class: LittleWeasel::Dictionary do
|
5
|
+
dictionary_key { create(:dictionary_key) }
|
6
|
+
dictionary_cache { {} }
|
7
|
+
dictionary_metadata { {} }
|
8
|
+
word_filters {}
|
9
|
+
dictionary_words do
|
10
|
+
%w(apple
|
11
|
+
better
|
12
|
+
cat
|
13
|
+
dog
|
14
|
+
everyone
|
15
|
+
fat
|
16
|
+
game
|
17
|
+
help
|
18
|
+
italic
|
19
|
+
jasmine
|
20
|
+
kelp
|
21
|
+
love
|
22
|
+
man
|
23
|
+
nope
|
24
|
+
octopus
|
25
|
+
popeye
|
26
|
+
queue
|
27
|
+
ruby
|
28
|
+
stop
|
29
|
+
top
|
30
|
+
ultimate
|
31
|
+
very
|
32
|
+
was
|
33
|
+
xylophone
|
34
|
+
yes
|
35
|
+
zebra)
|
36
|
+
end
|
37
|
+
|
38
|
+
skip_create
|
39
|
+
initialize_with do
|
40
|
+
new dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_metadata: dictionary_metadata, dictionary_words: dictionary_words, word_filters: word_filters
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|