LittleWeasel 3.0.3 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +3 -0
  3. data/.reek.yml +17 -0
  4. data/.rspec +4 -2
  5. data/.rubocop.yml +187 -0
  6. data/.ruby-version +1 -1
  7. data/.yardopts +2 -0
  8. data/CHANGELOG.md +22 -1
  9. data/Gemfile +3 -1
  10. data/Jenkinsfile +20 -0
  11. data/LittleWeasel.gemspec +31 -18
  12. data/README.md +408 -42
  13. data/Rakefile +296 -3
  14. data/lib/LittleWeasel/block_results.rb +81 -0
  15. data/lib/LittleWeasel/configure.rb +98 -0
  16. data/lib/LittleWeasel/dictionary.rb +125 -0
  17. data/lib/LittleWeasel/dictionary_key.rb +48 -0
  18. data/lib/LittleWeasel/dictionary_manager.rb +91 -0
  19. data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
  20. data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
  21. data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
  22. data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
  23. data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
  24. data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
  25. data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
  26. data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
  27. data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
  28. data/lib/LittleWeasel/filters/word_filter.rb +59 -0
  29. data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
  30. data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
  31. data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
  32. data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
  33. data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
  34. data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
  35. data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
  36. data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
  37. data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
  38. data/lib/LittleWeasel/metadata/metadatable.rb +134 -0
  39. data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
  40. data/lib/LittleWeasel/modules/configurable.rb +26 -0
  41. data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
  42. data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
  43. data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
  44. data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +18 -0
  45. data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
  46. data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
  47. data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +17 -0
  48. data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
  49. data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
  50. data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +15 -0
  51. data/lib/LittleWeasel/modules/dictionary_source_validatable.rb +15 -0
  52. data/lib/LittleWeasel/modules/dictionary_sourceable.rb +86 -0
  53. data/lib/LittleWeasel/modules/dictionary_validatable.rb +18 -0
  54. data/lib/LittleWeasel/modules/language.rb +24 -0
  55. data/lib/LittleWeasel/modules/language_validatable.rb +14 -0
  56. data/lib/LittleWeasel/modules/locale.rb +23 -0
  57. data/lib/LittleWeasel/modules/order_validatable.rb +16 -0
  58. data/lib/LittleWeasel/modules/orderable.rb +17 -0
  59. data/lib/LittleWeasel/modules/region.rb +24 -0
  60. data/lib/LittleWeasel/modules/region_validatable.rb +14 -0
  61. data/lib/LittleWeasel/modules/tag_validatable.rb +14 -0
  62. data/lib/LittleWeasel/modules/taggable.rb +31 -0
  63. data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
  64. data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
  65. data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +29 -0
  66. data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +56 -0
  67. data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +59 -0
  68. data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +28 -0
  69. data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
  70. data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +123 -0
  71. data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
  72. data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
  73. data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
  74. data/lib/LittleWeasel/services/dictionary_cache_service.rb +211 -0
  75. data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
  76. data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
  77. data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
  78. data/lib/LittleWeasel/services/dictionary_metadata_service.rb +116 -0
  79. data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
  80. data/lib/LittleWeasel/version.rb +3 -1
  81. data/lib/LittleWeasel/word_results.rb +146 -0
  82. data/lib/LittleWeasel.rb +5 -184
  83. data/spec/factories/dictionary.rb +43 -0
  84. data/spec/factories/dictionary_cache_service.rb +95 -0
  85. data/spec/factories/dictionary_creator_service.rb +16 -0
  86. data/spec/factories/dictionary_file_loader_service.rb +13 -0
  87. data/spec/factories/dictionary_hash.rb +39 -0
  88. data/spec/factories/dictionary_key.rb +14 -0
  89. data/spec/factories/dictionary_killer_service.rb +14 -0
  90. data/spec/factories/dictionary_manager.rb +10 -0
  91. data/spec/factories/dictionary_metadata.rb +16 -0
  92. data/spec/factories/dictionary_metadata_service.rb +16 -0
  93. data/spec/factories/numeric_filter.rb +12 -0
  94. data/spec/factories/preprocessed_word.rb +16 -0
  95. data/spec/factories/preprocessed_words.rb +41 -0
  96. data/spec/factories/single_character_word_filter.rb +12 -0
  97. data/spec/factories/word_results.rb +16 -0
  98. data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
  99. data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
  100. data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
  101. data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +166 -0
  102. data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
  103. data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
  104. data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
  105. data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
  106. data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
  107. data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
  108. data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
  109. data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
  110. data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
  111. data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
  112. data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
  113. data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
  114. data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
  115. data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
  116. data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
  117. data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
  118. data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +81 -0
  119. data/spec/lib/LittleWeasel/modules/language_spec.rb +112 -0
  120. data/spec/lib/LittleWeasel/modules/locale_spec.rb +95 -0
  121. data/spec/lib/LittleWeasel/modules/region_spec.rb +112 -0
  122. data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
  123. data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
  124. data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
  125. data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
  126. data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
  127. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +242 -0
  128. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +218 -0
  129. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
  130. data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
  131. data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
  132. data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
  133. data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
  134. data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
  135. data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
  136. data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
  137. data/spec/spec_helper.rb +117 -6
  138. data/spec/support/factory_bot.rb +15 -0
  139. data/spec/support/file_helpers.rb +46 -0
  140. data/spec/support/files/empty-dictionary.txt +0 -0
  141. data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
  142. data/spec/support/files/en-US-tagged.txt +26 -0
  143. data/spec/support/files/en-US.txt +26 -0
  144. data/spec/support/files/en.txt +26 -0
  145. data/spec/support/files/es-ES.txt +27 -0
  146. data/spec/support/files/es.txt +27 -0
  147. data/spec/support/general_helpers.rb +68 -0
  148. data/spec/support/shared_contexts.rb +107 -0
  149. data/spec/support/shared_examples.rb +105 -0
  150. metadata +378 -38
  151. data/spec/checker/checker_spec.rb +0 -286
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../modules/dictionary_cache_servicable'
4
+ require_relative '../modules/dictionary_keyable'
5
+ require_relative '../modules/dictionary_metadata_validatable'
6
+
7
+ module LittleWeasel
8
+ module Services
9
+ # This class provides methods for managing and manipulating the
10
+ # dictionary metadata associated with the given dictionary,
11
+ # (dictionary_key) for the supplied metadata_key.
12
+ class DictionaryMetadataService
13
+ include Modules::DictionaryKeyable
14
+ include Modules::DictionaryCacheServicable
15
+ include Modules::DictionaryMetadataValidatable
16
+
17
+ attr_reader :dictionary_metadata
18
+
19
+ # @example metadata Hash structure:
20
+ #
21
+ # {
22
+ # <dictionary_id!> =>
23
+ # {
24
+ # :<metadata_key> => <metadata_object>
25
+ # },
26
+ # ...
27
+ # }
28
+ # }
29
+ def initialize(dictionary_key:, dictionary_cache:, dictionary_metadata:)
30
+ validate_dictionary_key dictionary_key: dictionary_key
31
+ self.dictionary_key = dictionary_key
32
+
33
+ validate_dictionary_cache dictionary_cache: dictionary_cache
34
+ self.dictionary_cache = dictionary_cache
35
+
36
+ validate_dictionary_metadata dictionary_metadata: dictionary_metadata
37
+ self.dictionary_metadata = dictionary_metadata
38
+ end
39
+
40
+ class << self
41
+ # This method initializes the dictionary_metadata object to its
42
+ # initialized state - all data is lost, but the object reference is
43
+ # maintained.
44
+ def init(dictionary_metadata:)
45
+ Modules::DictionaryMetadataValidatable.validate_dictionary_metadata \
46
+ dictionary_metadata: dictionary_metadata
47
+
48
+ dictionary_metadata.each_key { |key| dictionary_metadata.delete(key) }
49
+ dictionary_metadata
50
+ end
51
+
52
+ # Returns true if the dictionary metadata is initialized; that is, if
53
+ # it's in the same state the dictionary metadata would be in if #init
54
+ # were called.
55
+ def init?(dictionary_metadata:)
56
+ Modules::DictionaryMetadataValidatable.validate_dictionary_metadata \
57
+ dictionary_metadata: dictionary_metadata
58
+
59
+ initialized_dictionary_metadata = init(dictionary_metadata: {})
60
+ dictionary_metadata.eql?(initialized_dictionary_metadata)
61
+ end
62
+ alias initialized? init?
63
+ end
64
+
65
+ # This method initializes the dictionary metadata for dictionary metadata
66
+ # associated with the dictionary_id! and metadata_key.
67
+ def init(metadata_key:)
68
+ metadata = dictionary_metadata[dictionary_id!]
69
+ metadata&.delete(metadata_key)
70
+ metadata = dictionary_metadata_init_if
71
+ metadata[metadata_key] = nil
72
+ self
73
+ end
74
+
75
+ # This method will return true if metadata exists for the dictionary
76
+ # associated with the given dictionary key, for the given metadata key.
77
+ def dictionary_metadata?(metadata_key:)
78
+ dictionary_metadata.dig(dictionary_id, metadata_key)&.present? || false
79
+ end
80
+
81
+ def get_dictionary_metadata(metadata_key:)
82
+ dictionary_metadata.dig(dictionary_id!, metadata_key)
83
+ end
84
+
85
+ def set_dictionary_metadata(value:, metadata_key:)
86
+ dictionary_metadata[dictionary_id!][metadata_key] = value
87
+ self
88
+ end
89
+
90
+ private
91
+
92
+ attr_writer :dictionary_metadata
93
+
94
+ def dictionary_metadata_init_needed?
95
+ dictionary_metadata[dictionary_id!].blank?
96
+ end
97
+
98
+ # This method initializes the metadata for the
99
+ # dictionary_id! if it is not already initialized.
100
+ # The metadata for the given dictionary_id! is returned.
101
+ def dictionary_metadata_init_if
102
+ return dictionary_metadata[dictionary_id!] unless dictionary_metadata_init_needed?
103
+
104
+ dictionary_metadata[dictionary_id!] = {}
105
+ end
106
+
107
+ def dictionary_id
108
+ dictionary_cache_service.dictionary_id
109
+ end
110
+
111
+ def dictionary_id!
112
+ dictionary_cache_service.dictionary_id!
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../metadata/invalid_words_service_results'
4
+
5
+ module LittleWeasel
6
+ module Services
7
+ # This class calculates the total amount of bytes cached invalid words take
8
+ # up in the given dictionary and returns the results. In addition to this,
9
+ # metadata is also compiled to determine how many more bytes of invalid
10
+ # word data can be cached before the cache is depleted and shutdown.
11
+ class InvalidWordsService
12
+ def initialize(dictionary)
13
+ self.dictionary = dictionary
14
+ self.current_bytesize = 0
15
+ end
16
+
17
+ def execute
18
+ return build_return unless max_invalid_words_bytesize?
19
+
20
+ self.current_bytesize = calculate_current_bytesize
21
+ build_return
22
+ end
23
+
24
+ private
25
+
26
+ attr_accessor :current_bytesize, :dictionary
27
+
28
+ def calculate_current_bytesize
29
+ dictionary.reduce(0) do |bytesize, word_and_found|
30
+ unless word_and_found.last
31
+ bytesize += word_and_found.first.bytesize
32
+ break unless bytesize < max_invalid_words_bytesize
33
+ end
34
+ bytesize
35
+ end
36
+ end
37
+
38
+ def build_return
39
+ Metadata::InvalidWordsServiceResults.new(
40
+ max_invalid_words_bytesize_on: max_invalid_words_bytesize?,
41
+ current_invalid_word_bytesize: current_bytesize,
42
+ max_invalid_words_bytesize: max_invalid_words_bytesize
43
+ )
44
+ end
45
+
46
+ def max_invalid_words_bytesize
47
+ @max_invalid_words_bytesize ||= config.max_invalid_words_bytesize
48
+ end
49
+
50
+ def max_invalid_words_bytesize?
51
+ config.max_invalid_words_bytesize?
52
+ end
53
+
54
+ def config
55
+ @config ||= LittleWeasel.configuration
56
+ end
57
+ end
58
+ end
59
+ end
@@ -1,4 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # The version of this gem
2
4
  module LittleWeasel
3
- VERSION = "3.0.3"
5
+ VERSION = '5.0.0'
4
6
  end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/core_ext/module/delegation'
4
+ require_relative 'modules/word_results_validatable'
5
+ require_relative 'preprocessors/preprocessed_words_validatable'
6
+
7
+ module LittleWeasel
8
+ # This class represents the results of gathering information about a word.
9
+ class WordResults
10
+ include Modules::WordResultsValidatable
11
+ include Preprocessors::PreprocessedWordsValidatable
12
+
13
+ attr_reader :filters_matched, :original_word, :preprocessed_words, :word_cached, :word_valid
14
+
15
+ delegate :preprocessed_word, to: :preprocessed_words, allow_nil: true
16
+
17
+ # Important: Regarding Boolean Methods
18
+ #
19
+ # The return value of some of the boolean methods (i.e. methods ending with
20
+ # a '?') of this class depend on whether or not #original_word
21
+ # has passed through any preprocessing. If #orginal_word has passed
22
+ # through preprocessing, the following boolean methods will reflect
23
+ # that of #preprocessed_word; if #original_word has NOT passed through
24
+ # any preprocessing, the following methods will reflect that of
25
+ # #original_word:
26
+ #
27
+ # #success?
28
+ # #filter_match?
29
+ # #word_cached?
30
+ # #word_valid?
31
+ #
32
+ # In other words, if #original_word has passed through preprocessing
33
+ # and has been altered by any of the preprocessing modules, it is the
34
+ # #preprocessed_word that is passed through any subsequent word filters,
35
+ # checked against the dictionary for validity, and cached, NOT
36
+ # #original_word.
37
+ # :reek:BooleanParameter - ignored, boolean params do not determine logic path, but only report status.
38
+ def initialize(original_word:, filters_matched: [],
39
+ preprocessed_words: nil, word_cached: false, word_valid: false)
40
+
41
+ self.original_word = original_word
42
+ self.filters_matched = filters_matched
43
+ self.word_cached = word_cached
44
+ self.word_valid = word_valid
45
+ self.preprocessed_words = preprocessed_words
46
+ end
47
+
48
+ def original_word=(value)
49
+ @original_word = value
50
+ validate_original_word
51
+ end
52
+
53
+ def filters_matched=(value)
54
+ @filters_matched = value
55
+ validate_filters_matched
56
+ end
57
+
58
+ def word_cached=(value)
59
+ @word_cached = value
60
+ validate_word_cached
61
+ end
62
+
63
+ def word_valid=(value)
64
+ @word_valid = value
65
+ vaidate_word_valid
66
+ end
67
+
68
+ def preprocessed_words=(value)
69
+ if value.present?
70
+ validate_prepreprocessed_words preprocessed_words: value
71
+ @preprocessed_words = value
72
+ else
73
+ @preprocessed_words = nil
74
+ end
75
+ end
76
+
77
+ # Returns true if the word is valid (found in the dictionary), or
78
+ # the word was matched against at least one filter; false, otherwise.
79
+ #
80
+ # Use the results of this method if you want to consider a word's
81
+ # validity as having been found in the dictionary as a valid word OR
82
+ # if the word has at least one word filter match. If the word has
83
+ # NOT passed through any word filters, or if word DID NOT match any
84
+ # filters, yet, it was found as a valid word in the dictionary, this
85
+ # method will return true and vice versa.
86
+ #
87
+ # See "Important: Regarding Boolean Methods" notes at the top of this
88
+ # class definition for more detail.
89
+ def success?
90
+ filter_match? || word_valid?
91
+ end
92
+
93
+ # Returns true if the word was found in the dictionary; false, otherwise.
94
+ #
95
+ # Use the results of this method if you want to consider a word's
96
+ # validity irrespective of whether or not the word has matched any word
97
+ # filters (if any).
98
+ #
99
+ # See "Important: Regarding Boolean Methods" notes at the top of this
100
+ # class definition for more detail.
101
+ def word_valid?
102
+ word_valid
103
+ end
104
+
105
+ # Returns true if the word was matched against at least one filter;
106
+ # false, otherwise.
107
+ #
108
+ # See "Important: Regarding Boolean Methods" notes at the top of this
109
+ # class definition for more detail.
110
+ def filter_match?
111
+ filters_matched.present?
112
+ end
113
+
114
+ # Returns true if #original_word passed through any preprocessing. If
115
+ # this is the case, #preprocessed_word may be different than
116
+ # #original_word. Preprocessing should take place before any filtering
117
+ # takes place.
118
+ #
119
+ # #word_cached, #word_valid and #filters_matched should all
120
+ # reflect that of the #preprocessed_word if #preprocessed_word is
121
+ # present?; otherwise, they should all reflect that of #original_word.
122
+ def preprocessed_word?
123
+ preprocessed_word.present?
124
+ end
125
+
126
+ # Returns #preprocessed_word (if available) or #original_word.
127
+ # #preprocessed_word will be present if #original_word has
128
+ # met the criteria for preprocessing and passed through at least
129
+ # one preprocessor.
130
+ #
131
+ # See "Important: Regarding Boolean Methods" notes at the top of this
132
+ # class definition for more detail.
133
+ def preprocessed_word_or_original_word
134
+ preprocessed_word || original_word
135
+ end
136
+
137
+ # Returns true if the word was found in the dictionary as a valid word
138
+ # OR if the word was found in the cache as an invalid word.
139
+ #
140
+ # See "Important: Regarding Boolean Methods" notes at the top of this
141
+ # class definition for more detail.
142
+ def word_cached?
143
+ word_cached
144
+ end
145
+ end
146
+ end
data/lib/LittleWeasel.rb CHANGED
@@ -1,186 +1,7 @@
1
- require 'singleton'
2
- require "LittleWeasel/version"
3
- require 'active_support/inflector'
1
+ # frozen_string_literal: true
4
2
 
5
- module LittleWeasel
3
+ require 'active_support/core_ext/object/blank'
6
4
 
7
- # Provides methods to interrogate the dictionary.
8
- class Checker
9
- include Singleton
10
-
11
- # Returns the dictionary.
12
- #
13
- # @return [Hash] the dictionary.
14
- attr_reader :dictionary
15
-
16
- private
17
-
18
- attr_reader :alphabet_exclusion_list
19
-
20
- # Keep these private...will expose as options later.
21
- attr_accessor :word_regex, :numeric_regex, :non_wordchar_regex
22
-
23
- public
24
-
25
- # The constructor
26
- def initialize
27
- @options = { exclude_alphabet: false, strip_whitespace: false, ignore_numeric: true, single_word_mode: false }
28
- @alphabet_exclusion_list = %w{ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z }
29
- @numeric_regex = /^[-+]?[0-9]?(\.[0-9]+)?$+/
30
- @word_regex = /\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/
31
- @non_wordchar_regex = /\W+/
32
- @dictionary = Hash.new(1)
33
- load
34
- end
35
-
36
- # Interrogates the dictionary to determine whether or not [word] exists.
37
- #
38
- # @param [String] word the word or words to interrogate
39
- # @param [Hash] options options to apply to this query (see #options=). Options passed to this
40
- # method are applied for this query only.
41
- #
42
- # @return [Boolean] true if the word/words in *word* exists, false otherwise.
43
- #
44
- # @example
45
- #
46
- # LittleWeasel::Checker.instance.exists?('C') # true (default options, :exclude_alphabet => false)
47
- # LittleWeasel::Checker.instance.exists?('A', {exclude_alphabet:true}) # false
48
- # LittleWeasel::Checker.instance.exists?('X', {exclude_alphabet:false}) # true
49
- # LittleWeasel::Checker.instance.exists?('Hello') # true
50
- #
51
- # LittleWeasel::Checker.instance.exists?(' Hello ') # false (default options, :strip_whitespace => false)
52
- # LittleWeasel::Checker.instance.exists?(' Yes ', {strip_whitespace:true}) # true
53
- # LittleWeasel::Checker.instance.exists?('No ', {strip_whitespace:false}) # false
54
- # LittleWeasel::Checker.instance.exists?('How dy', {strip_whitespace:true}) # false, strip_whitespace only removes leading and trailing spaces
55
- #
56
- # LittleWeasel::Checker.instance.exists?('90210') # true (default options, ignore_numeric => true)
57
- # LittleWeasel::Checker.instance.exists?('90210', {ignore_numeric:false}) # false
58
- #
59
- # LittleWeasel::Checker.instance.exists?('Hello World') # true, we're accepting multiple words now by default (default options, single_word_mode => false) :)
60
- # LittleWeasel::Checker.instance.exists?("hello, mister; did I \'mention\'' that lemon cake is \"great?\" It's just wonderful!") # true
61
- #
62
- # LittleWeasel::Checker.instance.exists?('I love ice cream', {single_word_mode:true}) # false; while all the words are valid, more than one word will return false
63
- #
64
- def exists?(word, options=nil)
65
- options = options || @options
66
-
67
- return false unless word.is_a?(String)
68
-
69
- word = word.dup
70
- word.strip! if options[:strip_whitespace]
71
-
72
- return false if word.empty?
73
-
74
- if block? word
75
- return false if options[:single_word_mode]
76
- return block_exists? word
77
- end
78
-
79
- return true if options[:ignore_numeric] && number?(word)
80
- return false if options[:exclude_alphabet] && word.length == 1 && @alphabet_exclusion_list.include?(word.upcase)
81
-
82
- valid_word? word
83
- end
84
-
85
- # Sets the global options for this gem.
86
- #
87
- # @param [Hash] options options that should apply to all subsequent calls to method *exists?* (see #exists?).
88
- # Options set via this property apply to all subsequent queries.
89
- #
90
- # @option options [Boolean] :exclude_alphabet (false) If false, letters of the alphabet are considered words.
91
- # @option options [Boolean] :strip_whitespace (false) If true, leading and trailing spaces are removed before checking to see if the word exists.
92
- # @option options [Boolean] :ignore_numeric (true) If true, numeric values are considered valid words.
93
- # @option options [Boolean] :single_word_mode (false) If false, word blocks (more than one word) are considered valid if all the words exist in the dictionary.
94
- #
95
- # @return [Hash] The options
96
- #
97
- # @example
98
- # LittleWeasel::Checker.instance.options({exclude_alphabet:true})
99
- # LittleWeasel::Checker.instance.exists?('A') # false
100
- #
101
- # LittleWeasel::Checker.instance.options({exclude_alphabet:false})
102
- # LittleWeasel::Checker.instance.exists?('A') # true
103
- #
104
- # LittleWeasel::Checker.instance.options({strip_whitespace:false})
105
- # LittleWeasel::Checker.instance.exists?(' Hello ') # false
106
- # LittleWeasel::Checker.instance.exists?('No ') # false
107
- # LittleWeasel::Checker.instance.exists?(' No') # false
108
- #
109
- # LittleWeasel::Checker.instance.options({strip_whitespace:true})
110
- # LittleWeasel::Checker.instance.exists?(' Yes ') # true
111
- # LittleWeasel::Checker.instance.exists?('How dy') # false, strip_whitespace only removes leading and trailing spaces
112
- #
113
- # LittleWeasel::Checker.instance.exists?('90210') # true (default options, ignore_numeric => true)
114
- # LittleWeasel::Checker.instance.exists?('90210', {ignore_numeric:false}) # false
115
- # LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210') # true (default options, ignore_numeric => true)
116
- # LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210', {ignore_numeric:false}) # false
117
- #
118
- # LittleWeasel::Checker.instance.options({single_word_mode:true})
119
- # LittleWeasel::Checker.instance.exists?('I love ice cream') # false; while all the words are valid, more than one word will return false
120
- # LittleWeasel::Checker.instance.exists?('Baby') # true
121
- #
122
- def options=(options)
123
- @options = options
124
- end
125
-
126
- # Gets the global options currently set for this gem.
127
- #
128
- # @return [Hash] The options
129
- def options
130
- @options
131
- end
132
-
133
- protected
134
-
135
- def number?(word)
136
- word.strip.gsub(@numeric_regex).count > 0
137
- end
138
-
139
- def block?(string)
140
- string = string.dup
141
- return false unless string.is_a?(String)
142
- string.gsub!(@numeric_regex, "")
143
- return false unless string.length > 1
144
- string.strip.scan(/[\w'-]+/).length > 1
145
- end
146
-
147
- def block_exists?(word_block)
148
- word_block = word_block.dup
149
-
150
- word_block.gsub!(@numeric_regex, "") if options[:ignore_numeric]
151
- return false if word_block.nil?
152
- word_block.strip! unless word_block.nil?
153
- word_block.gsub!(@non_wordchar_regex, " ")
154
- word_block.split(@word_regex).uniq.each { |word|
155
- return false unless valid_block_word?(word)
156
- }
157
- return true
158
- end
159
-
160
- def valid_word?(word)
161
- word = word.dup.downcase
162
- exists = dictionary.has_key?(word)
163
- exists = dictionary.has_key?(word.singularize) unless exists
164
- exists
165
- end
166
-
167
- def valid_block_word?(word)
168
- return true if word.length == 1
169
- valid_word? word.strip
170
- end
171
-
172
- private
173
-
174
- def dictionary_path
175
- File.expand_path(File.dirname(__FILE__) + '/dictionary')
176
- end
177
-
178
- def load
179
- File.open(dictionary_path) do |io|
180
- io.each { |line| line.chomp!; @dictionary[line] = line }
181
- end
182
- end
183
-
184
- end
185
-
186
- end
5
+ Dir[File.join('.', 'lib/LittleWeasel/**/*.rb')].each do |f|
6
+ require f
7
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: false
2
+
3
+ FactoryBot.define do
4
+ factory :dictionary, class: LittleWeasel::Dictionary do
5
+ dictionary_key { create(:dictionary_key) }
6
+ dictionary_cache { {} }
7
+ dictionary_metadata { {} }
8
+ word_filters {}
9
+ dictionary_words do
10
+ %w(apple
11
+ better
12
+ cat
13
+ dog
14
+ everyone
15
+ fat
16
+ game
17
+ help
18
+ italic
19
+ jasmine
20
+ kelp
21
+ love
22
+ man
23
+ nope
24
+ octopus
25
+ popeye
26
+ queue
27
+ ruby
28
+ stop
29
+ top
30
+ ultimate
31
+ very
32
+ was
33
+ xylophone
34
+ yes
35
+ zebra)
36
+ end
37
+
38
+ skip_create
39
+ initialize_with do
40
+ new dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_metadata: dictionary_metadata, dictionary_words: dictionary_words, word_filters: word_filters
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: false
2
+
3
+ require 'pry'
4
+
5
+ FactoryBot.define do
6
+ factory :dictionary_cache_service, class: LittleWeasel::Services::DictionaryCacheService do
7
+ dictionary_key { create(:dictionary_key) }
8
+ dictionary_cache { {} }
9
+
10
+ transient do
11
+ # The dictionary reference created in the cache will point to a MEMORY source.
12
+ #
13
+ # Valid values: nil | true | false | <Array of dictionary words>
14
+ #
15
+ # If nil or false - No memory source will be added to the dictionary cache.
16
+ # If true - A memory source will be added to the dictionary cache.
17
+ # If <An Array of dictionary words> - A memory source will be added to the dictionry cache.
18
+ # This only makes sense if load == true.
19
+ dictionary_memory_source {}
20
+
21
+ # The dictionary reference created in the cache will point to a FILE source.
22
+ #
23
+ # Important: dictionary_file_source will only be used if dictionary_memory_source
24
+ # is false.
25
+ #
26
+ # Valid values: nil | true | false | <Path to dictionary file>
27
+ #
28
+ # If nil or false - No file source will be added to the dictionary cache.
29
+ #
30
+ # If true - A file source will be added to the dictionry cache.
31
+ # dictionary_key.key will be used to create the dictionary
32
+ # file path.
33
+ # If <Path to dictionary file> - A files source will be added to the dictionry cache.
34
+ # The file source will point to <Path to dictionary file>.
35
+ dictionary_file_source {}
36
+
37
+ # If load == true - A dictionary object will be created and added to the dictionary cache
38
+ # depending on the dictionary source (file or memory).
39
+ load { false }
40
+ end
41
+
42
+ skip_create
43
+ initialize_with do
44
+ new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache)
45
+ end
46
+
47
+ after :create do |dictionary_cache_service, evaluator|
48
+ dictionary_key = dictionary_cache_service.dictionary_key
49
+ dictionary_cache = dictionary_cache_service.dictionary_cache
50
+
51
+ # Initialize the dictionary cache if the user already passed an
52
+ # initialized dictionary cache; otherwise, just use what they passed us.
53
+ dictionary_cache_service.class.init(dictionary_cache: dictionary_cache) \
54
+ unless dictionary_cache_service.class.count(dictionary_cache: dictionary_cache).positive?
55
+
56
+ load = evaluator.load
57
+ dictionary_memory_source = evaluator.dictionary_memory_source
58
+ dictionary_file_source = evaluator.dictionary_file_source
59
+
60
+ if load
61
+ unless dictionary_memory_source.present? || dictionary_file_source.present?
62
+ raise 'Transient attributes dictionary_memory_source or dictionary_file_source ' \
63
+ "must be present if transient attribute load is true: #{dictionary_reference}"
64
+ end
65
+ end
66
+
67
+ if dictionary_file_source
68
+ file_name = if dictionary_file_source == true
69
+ dictionary_key.key
70
+ else
71
+ dictionary_file_source
72
+ end
73
+ dictionary_cache_service.add_dictionary_source(dictionary_source: dictionary_path_for(file_name: file_name))
74
+ elsif dictionary_memory_source
75
+ dictionary_cache_service.add_dictionary_source(dictionary_source: LittleWeasel::Modules::DictionarSourceable.memory_source)
76
+ end
77
+
78
+ if load
79
+ dictionary_words = if dictionary_file_source
80
+ dictionary_file_loader_service = create(:dictionary_file_loader_service, dictionary_key: dictionary_key, dictionary_cache: dictionary_cache)
81
+ dictionary_file_loader_service.execute
82
+ else
83
+ unless dictionary_memory_source.is_a? Array
84
+ raise 'Transient attribute dictionary_memory_source must be an Array of words ' \
85
+ "if transient attribute load == true: #{dictionary_memory_source}"
86
+ end
87
+ dictionary_memory_source
88
+ end
89
+ dictionary_cache_service.dictionary_object = create(:dictionary, dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_words: dictionary_words)
90
+ end
91
+
92
+ dictionary_cache_service
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: false
2
+
3
+ FactoryBot.define do
4
+ factory :dictionary_creator_service, class: LittleWeasel::Services::DictionaryCreatorService do
5
+ dictionary_key { create(:dictionary_key) }
6
+ dictionary_cache { {} }
7
+ dictionary_metadata { {} }
8
+ word_filters {}
9
+ word_preprocessors {}
10
+
11
+ skip_create
12
+ initialize_with do
13
+ new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_metadata: dictionary_metadata, word_filters: word_filters, word_preprocessors: word_preprocessors)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: false
2
+
3
+ FactoryBot.define do
4
+ factory :dictionary_file_loader_service, class: LittleWeasel::Services::DictionaryFileLoaderService do
5
+ dictionary_key { create(:dictionary_key) }
6
+ dictionary_cache { {} }
7
+
8
+ skip_create
9
+ initialize_with do
10
+ new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache)
11
+ end
12
+ end
13
+ end