LittleWeasel 3.0.3 → 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (151) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +3 -0
  3. data/.reek.yml +17 -0
  4. data/.rspec +4 -2
  5. data/.rubocop.yml +187 -0
  6. data/.ruby-version +1 -1
  7. data/.yardopts +2 -0
  8. data/CHANGELOG.md +22 -1
  9. data/Gemfile +3 -1
  10. data/Jenkinsfile +20 -0
  11. data/LittleWeasel.gemspec +31 -18
  12. data/README.md +408 -42
  13. data/Rakefile +296 -3
  14. data/lib/LittleWeasel/block_results.rb +81 -0
  15. data/lib/LittleWeasel/configure.rb +98 -0
  16. data/lib/LittleWeasel/dictionary.rb +125 -0
  17. data/lib/LittleWeasel/dictionary_key.rb +48 -0
  18. data/lib/LittleWeasel/dictionary_manager.rb +91 -0
  19. data/lib/LittleWeasel/errors/dictionary_file_already_loaded_error.rb +9 -0
  20. data/lib/LittleWeasel/errors/dictionary_file_empty_error.rb +8 -0
  21. data/lib/LittleWeasel/errors/dictionary_file_not_found_error.rb +8 -0
  22. data/lib/LittleWeasel/errors/dictionary_file_too_large_error.rb +16 -0
  23. data/lib/LittleWeasel/errors/language_required_error.rb +8 -0
  24. data/lib/LittleWeasel/errors/must_override_error.rb +8 -0
  25. data/lib/LittleWeasel/filters/en_us/currency_filter.rb +19 -0
  26. data/lib/LittleWeasel/filters/en_us/numeric_filter.rb +19 -0
  27. data/lib/LittleWeasel/filters/en_us/single_character_word_filter.rb +21 -0
  28. data/lib/LittleWeasel/filters/word_filter.rb +59 -0
  29. data/lib/LittleWeasel/filters/word_filter_managable.rb +80 -0
  30. data/lib/LittleWeasel/filters/word_filter_validatable.rb +31 -0
  31. data/lib/LittleWeasel/filters/word_filterable.rb +19 -0
  32. data/lib/LittleWeasel/filters/word_filters_validatable.rb +29 -0
  33. data/lib/LittleWeasel/metadata/dictionary_metadata.rb +145 -0
  34. data/lib/LittleWeasel/metadata/invalid_words_metadata.rb +134 -0
  35. data/lib/LittleWeasel/metadata/invalid_words_service_results.rb +45 -0
  36. data/lib/LittleWeasel/metadata/metadata_observable_validatable.rb +22 -0
  37. data/lib/LittleWeasel/metadata/metadata_observerable.rb +90 -0
  38. data/lib/LittleWeasel/metadata/metadatable.rb +134 -0
  39. data/lib/LittleWeasel/modules/class_name_to_symbol.rb +26 -0
  40. data/lib/LittleWeasel/modules/configurable.rb +26 -0
  41. data/lib/LittleWeasel/modules/deep_dup.rb +11 -0
  42. data/lib/LittleWeasel/modules/dictionary_cache_keys.rb +34 -0
  43. data/lib/LittleWeasel/modules/dictionary_cache_servicable.rb +26 -0
  44. data/lib/LittleWeasel/modules/dictionary_cache_validatable.rb +18 -0
  45. data/lib/LittleWeasel/modules/dictionary_creator_servicable.rb +27 -0
  46. data/lib/LittleWeasel/modules/dictionary_file_loader.rb +67 -0
  47. data/lib/LittleWeasel/modules/dictionary_key_validatable.rb +17 -0
  48. data/lib/LittleWeasel/modules/dictionary_keyable.rb +24 -0
  49. data/lib/LittleWeasel/modules/dictionary_metadata_servicable.rb +29 -0
  50. data/lib/LittleWeasel/modules/dictionary_metadata_validatable.rb +15 -0
  51. data/lib/LittleWeasel/modules/dictionary_source_validatable.rb +15 -0
  52. data/lib/LittleWeasel/modules/dictionary_sourceable.rb +86 -0
  53. data/lib/LittleWeasel/modules/dictionary_validatable.rb +18 -0
  54. data/lib/LittleWeasel/modules/language.rb +24 -0
  55. data/lib/LittleWeasel/modules/language_validatable.rb +14 -0
  56. data/lib/LittleWeasel/modules/locale.rb +23 -0
  57. data/lib/LittleWeasel/modules/order_validatable.rb +16 -0
  58. data/lib/LittleWeasel/modules/orderable.rb +17 -0
  59. data/lib/LittleWeasel/modules/region.rb +24 -0
  60. data/lib/LittleWeasel/modules/region_validatable.rb +14 -0
  61. data/lib/LittleWeasel/modules/tag_validatable.rb +14 -0
  62. data/lib/LittleWeasel/modules/taggable.rb +31 -0
  63. data/lib/LittleWeasel/modules/word_results_validatable.rb +28 -0
  64. data/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor.rb +22 -0
  65. data/lib/LittleWeasel/preprocessors/preprocessed_word.rb +29 -0
  66. data/lib/LittleWeasel/preprocessors/preprocessed_word_validatable.rb +56 -0
  67. data/lib/LittleWeasel/preprocessors/preprocessed_words.rb +59 -0
  68. data/lib/LittleWeasel/preprocessors/preprocessed_words_validatable.rb +28 -0
  69. data/lib/LittleWeasel/preprocessors/word_preprocessable.rb +19 -0
  70. data/lib/LittleWeasel/preprocessors/word_preprocessor.rb +123 -0
  71. data/lib/LittleWeasel/preprocessors/word_preprocessor_managable.rb +114 -0
  72. data/lib/LittleWeasel/preprocessors/word_preprocessor_validatable.rb +40 -0
  73. data/lib/LittleWeasel/preprocessors/word_preprocessors_validatable.rb +24 -0
  74. data/lib/LittleWeasel/services/dictionary_cache_service.rb +211 -0
  75. data/lib/LittleWeasel/services/dictionary_creator_service.rb +94 -0
  76. data/lib/LittleWeasel/services/dictionary_file_loader_service.rb +37 -0
  77. data/lib/LittleWeasel/services/dictionary_killer_service.rb +35 -0
  78. data/lib/LittleWeasel/services/dictionary_metadata_service.rb +116 -0
  79. data/lib/LittleWeasel/services/invalid_words_service.rb +59 -0
  80. data/lib/LittleWeasel/version.rb +3 -1
  81. data/lib/LittleWeasel/word_results.rb +146 -0
  82. data/lib/LittleWeasel.rb +5 -184
  83. data/spec/factories/dictionary.rb +43 -0
  84. data/spec/factories/dictionary_cache_service.rb +95 -0
  85. data/spec/factories/dictionary_creator_service.rb +16 -0
  86. data/spec/factories/dictionary_file_loader_service.rb +13 -0
  87. data/spec/factories/dictionary_hash.rb +39 -0
  88. data/spec/factories/dictionary_key.rb +14 -0
  89. data/spec/factories/dictionary_killer_service.rb +14 -0
  90. data/spec/factories/dictionary_manager.rb +10 -0
  91. data/spec/factories/dictionary_metadata.rb +16 -0
  92. data/spec/factories/dictionary_metadata_service.rb +16 -0
  93. data/spec/factories/numeric_filter.rb +12 -0
  94. data/spec/factories/preprocessed_word.rb +16 -0
  95. data/spec/factories/preprocessed_words.rb +41 -0
  96. data/spec/factories/single_character_word_filter.rb +12 -0
  97. data/spec/factories/word_results.rb +16 -0
  98. data/spec/lib/LittleWeasel/block_results_spec.rb +248 -0
  99. data/spec/lib/LittleWeasel/configure_spec.rb +74 -0
  100. data/spec/lib/LittleWeasel/dictionary_key_spec.rb +118 -0
  101. data/spec/lib/LittleWeasel/dictionary_manager_spec.rb +166 -0
  102. data/spec/lib/LittleWeasel/dictionary_spec.rb +289 -0
  103. data/spec/lib/LittleWeasel/filters/en_us/currency_filter_spec.rb +80 -0
  104. data/spec/lib/LittleWeasel/filters/en_us/numeric_filter_spec.rb +66 -0
  105. data/spec/lib/LittleWeasel/filters/en_us/single_character_word_filter_spec.rb +58 -0
  106. data/spec/lib/LittleWeasel/filters/word_filter_managable_spec.rb +180 -0
  107. data/spec/lib/LittleWeasel/filters/word_filter_spec.rb +151 -0
  108. data/spec/lib/LittleWeasel/filters/word_filter_validatable_spec.rb +94 -0
  109. data/spec/lib/LittleWeasel/filters/word_filters_validatable_spec.rb +48 -0
  110. data/spec/lib/LittleWeasel/integraton_tests/dictionary_integration_spec.rb +201 -0
  111. data/spec/lib/LittleWeasel/metadata/dictionary_creator_servicable_spec.rb +54 -0
  112. data/spec/lib/LittleWeasel/metadata/dictionary_metadata_spec.rb +209 -0
  113. data/spec/lib/LittleWeasel/metadata/invalid_words_metadata_spec.rb +155 -0
  114. data/spec/lib/LittleWeasel/metadata/metadata_observerable_spec.rb +31 -0
  115. data/spec/lib/LittleWeasel/metadata/metadatable_spec.rb +35 -0
  116. data/spec/lib/LittleWeasel/modules/class_name_to_symbol_spec.rb +21 -0
  117. data/spec/lib/LittleWeasel/modules/dictionary_file_loader_spec.rb +125 -0
  118. data/spec/lib/LittleWeasel/modules/dictionary_sourceable_spec.rb +81 -0
  119. data/spec/lib/LittleWeasel/modules/language_spec.rb +112 -0
  120. data/spec/lib/LittleWeasel/modules/locale_spec.rb +95 -0
  121. data/spec/lib/LittleWeasel/modules/region_spec.rb +112 -0
  122. data/spec/lib/LittleWeasel/preprocessors/en_us/capitalize_preprocessor_spec.rb +34 -0
  123. data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_spec.rb +105 -0
  124. data/spec/lib/LittleWeasel/preprocessors/preprocessed_word_validatable_spec.rb +143 -0
  125. data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_spec.rb +77 -0
  126. data/spec/lib/LittleWeasel/preprocessors/preprocessed_words_validatable_spec.rb +58 -0
  127. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_managable_spec.rb +242 -0
  128. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_spec.rb +218 -0
  129. data/spec/lib/LittleWeasel/preprocessors/word_preprocessor_validatable_spec.rb +109 -0
  130. data/spec/lib/LittleWeasel/preprocessors/word_preprocessors_validatable_spec.rb +49 -0
  131. data/spec/lib/LittleWeasel/services/dictionary_cache_service_spec.rb +444 -0
  132. data/spec/lib/LittleWeasel/services/dictionary_creator_service_spec.rb +119 -0
  133. data/spec/lib/LittleWeasel/services/dictionary_file_loader_service_spec.rb +71 -0
  134. data/spec/lib/LittleWeasel/services/dictionary_metadata_service_spec.rb +279 -0
  135. data/spec/lib/LittleWeasel/word_results_spec.rb +275 -0
  136. data/spec/lib/LittleWeasel/workflow/workflow_spec.rb +20 -0
  137. data/spec/spec_helper.rb +117 -6
  138. data/spec/support/factory_bot.rb +15 -0
  139. data/spec/support/file_helpers.rb +46 -0
  140. data/spec/support/files/empty-dictionary.txt +0 -0
  141. data/{lib/dictionary → spec/support/files/en-US-big.txt} +262156 -31488
  142. data/spec/support/files/en-US-tagged.txt +26 -0
  143. data/spec/support/files/en-US.txt +26 -0
  144. data/spec/support/files/en.txt +26 -0
  145. data/spec/support/files/es-ES.txt +27 -0
  146. data/spec/support/files/es.txt +27 -0
  147. data/spec/support/general_helpers.rb +68 -0
  148. data/spec/support/shared_contexts.rb +107 -0
  149. data/spec/support/shared_examples.rb +105 -0
  150. metadata +378 -38
  151. data/spec/checker/checker_spec.rb +0 -286
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../modules/dictionary_cache_servicable'
4
+ require_relative '../modules/dictionary_keyable'
5
+ require_relative '../modules/dictionary_metadata_validatable'
6
+
7
+ module LittleWeasel
8
+ module Services
9
+ # This class provides methods for managing and manipulating the
10
+ # dictionary metadata associated with the given dictionary,
11
+ # (dictionary_key) for the supplied metadata_key.
12
+ class DictionaryMetadataService
13
+ include Modules::DictionaryKeyable
14
+ include Modules::DictionaryCacheServicable
15
+ include Modules::DictionaryMetadataValidatable
16
+
17
+ attr_reader :dictionary_metadata
18
+
19
+ # @example metadata Hash structure:
20
+ #
21
+ # {
22
+ # <dictionary_id!> =>
23
+ # {
24
+ # :<metadata_key> => <metadata_object>
25
+ # },
26
+ # ...
27
+ # }
28
+ # }
29
+ def initialize(dictionary_key:, dictionary_cache:, dictionary_metadata:)
30
+ validate_dictionary_key dictionary_key: dictionary_key
31
+ self.dictionary_key = dictionary_key
32
+
33
+ validate_dictionary_cache dictionary_cache: dictionary_cache
34
+ self.dictionary_cache = dictionary_cache
35
+
36
+ validate_dictionary_metadata dictionary_metadata: dictionary_metadata
37
+ self.dictionary_metadata = dictionary_metadata
38
+ end
39
+
40
+ class << self
41
+ # This method initializes the dictionary_metadata object to its
42
+ # initialized state - all data is lost, but the object reference is
43
+ # maintained.
44
+ def init(dictionary_metadata:)
45
+ Modules::DictionaryMetadataValidatable.validate_dictionary_metadata \
46
+ dictionary_metadata: dictionary_metadata
47
+
48
+ dictionary_metadata.each_key { |key| dictionary_metadata.delete(key) }
49
+ dictionary_metadata
50
+ end
51
+
52
+ # Returns true if the dictionary metadata is initialized; that is, if
53
+ # it's in the same state the dictionary metadata would be in if #init
54
+ # were called.
55
+ def init?(dictionary_metadata:)
56
+ Modules::DictionaryMetadataValidatable.validate_dictionary_metadata \
57
+ dictionary_metadata: dictionary_metadata
58
+
59
+ initialized_dictionary_metadata = init(dictionary_metadata: {})
60
+ dictionary_metadata.eql?(initialized_dictionary_metadata)
61
+ end
62
+ alias initialized? init?
63
+ end
64
+
65
+ # This method initializes the dictionary metadata for dictionary metadata
66
+ # associated with the dictionary_id! and metadata_key.
67
+ def init(metadata_key:)
68
+ metadata = dictionary_metadata[dictionary_id!]
69
+ metadata&.delete(metadata_key)
70
+ metadata = dictionary_metadata_init_if
71
+ metadata[metadata_key] = nil
72
+ self
73
+ end
74
+
75
+ # This method will return true if metadata exists for the dictionary
76
+ # associated with the given dictionary key, for the given metadata key.
77
+ def dictionary_metadata?(metadata_key:)
78
+ dictionary_metadata.dig(dictionary_id, metadata_key)&.present? || false
79
+ end
80
+
81
+ def get_dictionary_metadata(metadata_key:)
82
+ dictionary_metadata.dig(dictionary_id!, metadata_key)
83
+ end
84
+
85
+ def set_dictionary_metadata(value:, metadata_key:)
86
+ dictionary_metadata[dictionary_id!][metadata_key] = value
87
+ self
88
+ end
89
+
90
+ private
91
+
92
+ attr_writer :dictionary_metadata
93
+
94
+ def dictionary_metadata_init_needed?
95
+ dictionary_metadata[dictionary_id!].blank?
96
+ end
97
+
98
+ # This method initializes the metadata for the
99
+ # dictionary_id! if it is not already initialized.
100
+ # The metadata for the given dictionary_id! is returned.
101
+ def dictionary_metadata_init_if
102
+ return dictionary_metadata[dictionary_id!] unless dictionary_metadata_init_needed?
103
+
104
+ dictionary_metadata[dictionary_id!] = {}
105
+ end
106
+
107
+ def dictionary_id
108
+ dictionary_cache_service.dictionary_id
109
+ end
110
+
111
+ def dictionary_id!
112
+ dictionary_cache_service.dictionary_id!
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../metadata/invalid_words_service_results'
4
+
5
+ module LittleWeasel
6
+ module Services
7
+ # This class calculates the total amount of bytes cached invalid words take
8
+ # up in the given dictionary and returns the results. In addition to this,
9
+ # metadata is also compiled to determine how many more bytes of invalid
10
+ # word data can be cached before the cache is depleted and shutdown.
11
+ class InvalidWordsService
12
+ def initialize(dictionary)
13
+ self.dictionary = dictionary
14
+ self.current_bytesize = 0
15
+ end
16
+
17
+ def execute
18
+ return build_return unless max_invalid_words_bytesize?
19
+
20
+ self.current_bytesize = calculate_current_bytesize
21
+ build_return
22
+ end
23
+
24
+ private
25
+
26
+ attr_accessor :current_bytesize, :dictionary
27
+
28
+ def calculate_current_bytesize
29
+ dictionary.reduce(0) do |bytesize, word_and_found|
30
+ unless word_and_found.last
31
+ bytesize += word_and_found.first.bytesize
32
+ break unless bytesize < max_invalid_words_bytesize
33
+ end
34
+ bytesize
35
+ end
36
+ end
37
+
38
+ def build_return
39
+ Metadata::InvalidWordsServiceResults.new(
40
+ max_invalid_words_bytesize_on: max_invalid_words_bytesize?,
41
+ current_invalid_word_bytesize: current_bytesize,
42
+ max_invalid_words_bytesize: max_invalid_words_bytesize
43
+ )
44
+ end
45
+
46
+ def max_invalid_words_bytesize
47
+ @max_invalid_words_bytesize ||= config.max_invalid_words_bytesize
48
+ end
49
+
50
+ def max_invalid_words_bytesize?
51
+ config.max_invalid_words_bytesize?
52
+ end
53
+
54
+ def config
55
+ @config ||= LittleWeasel.configuration
56
+ end
57
+ end
58
+ end
59
+ end
@@ -1,4 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # The version of this gem
2
4
  module LittleWeasel
3
- VERSION = "3.0.3"
5
+ VERSION = '5.0.0'
4
6
  end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/core_ext/module/delegation'
4
+ require_relative 'modules/word_results_validatable'
5
+ require_relative 'preprocessors/preprocessed_words_validatable'
6
+
7
+ module LittleWeasel
8
+ # This class represents the results of gathering information about a word.
9
+ class WordResults
10
+ include Modules::WordResultsValidatable
11
+ include Preprocessors::PreprocessedWordsValidatable
12
+
13
+ attr_reader :filters_matched, :original_word, :preprocessed_words, :word_cached, :word_valid
14
+
15
+ delegate :preprocessed_word, to: :preprocessed_words, allow_nil: true
16
+
17
+ # Important: Regarding Boolean Methods
18
+ #
19
+ # The return value of some of the boolean methods (i.e. methods ending with
20
+ # a '?') of this class depend on whether or not #original_word
21
+ # has passed through any preprocessing. If #orginal_word has passed
22
+ # through preprocessing, the following boolean methods will reflect
23
+ # that of #preprocessed_word; if #original_word has NOT passed through
24
+ # any preprocessing, the following methods will reflect that of
25
+ # #original_word:
26
+ #
27
+ # #success?
28
+ # #filter_match?
29
+ # #word_cached?
30
+ # #word_valid?
31
+ #
32
+ # In other words, if #original_word has passed through preprocessing
33
+ # and has been altered by any of the preprocessing modules, it is the
34
+ # #preprocessed_word that is passed through any subsequent word filters,
35
+ # checked against the dictionary for validity, and cached, NOT
36
+ # #original_word.
37
+ # :reek:BooleanParameter - ignored, boolean params do not determine logic path, but only report status.
38
+ def initialize(original_word:, filters_matched: [],
39
+ preprocessed_words: nil, word_cached: false, word_valid: false)
40
+
41
+ self.original_word = original_word
42
+ self.filters_matched = filters_matched
43
+ self.word_cached = word_cached
44
+ self.word_valid = word_valid
45
+ self.preprocessed_words = preprocessed_words
46
+ end
47
+
48
+ def original_word=(value)
49
+ @original_word = value
50
+ validate_original_word
51
+ end
52
+
53
+ def filters_matched=(value)
54
+ @filters_matched = value
55
+ validate_filters_matched
56
+ end
57
+
58
+ def word_cached=(value)
59
+ @word_cached = value
60
+ validate_word_cached
61
+ end
62
+
63
+ def word_valid=(value)
64
+ @word_valid = value
65
+ vaidate_word_valid
66
+ end
67
+
68
+ def preprocessed_words=(value)
69
+ if value.present?
70
+ validate_prepreprocessed_words preprocessed_words: value
71
+ @preprocessed_words = value
72
+ else
73
+ @preprocessed_words = nil
74
+ end
75
+ end
76
+
77
+ # Returns true if the word is valid (found in the dictionary), or
78
+ # the word was matched against at least one filter; false, otherwise.
79
+ #
80
+ # Use the results of this method if you want to consider a word's
81
+ # validity as having been found in the dictionary as a valid word OR
82
+ # if the word has at least one word filter match. If the word has
83
+ # NOT passed through any word filters, or if word DID NOT match any
84
+ # filters, yet, it was found as a valid word in the dictionary, this
85
+ # method will return true and vice versa.
86
+ #
87
+ # See "Important: Regarding Boolean Methods" notes at the top of this
88
+ # class definition for more detail.
89
+ def success?
90
+ filter_match? || word_valid?
91
+ end
92
+
93
+ # Returns true if the word was found in the dictionary; false, otherwise.
94
+ #
95
+ # Use the results of this method if you want to consider a word's
96
+ # validity irrespective of whether or not the word has matched any word
97
+ # filters (if any).
98
+ #
99
+ # See "Important: Regarding Boolean Methods" notes at the top of this
100
+ # class definition for more detail.
101
+ def word_valid?
102
+ word_valid
103
+ end
104
+
105
+ # Returns true if the word was matched against at least one filter;
106
+ # false, otherwise.
107
+ #
108
+ # See "Important: Regarding Boolean Methods" notes at the top of this
109
+ # class definition for more detail.
110
+ def filter_match?
111
+ filters_matched.present?
112
+ end
113
+
114
+ # Returns true if #original_word passed through any preprocessing. If
115
+ # this is the case, #preprocessed_word may be different than
116
+ # #original_word. Preprocessing should take place before any filtering
117
+ # takes place.
118
+ #
119
+ # #word_cached, #word_valid and #filters_matched should all
120
+ # reflect that of the #preprocessed_word if #preprocessed_word is
121
+ # present?; otherwise, they should all reflect that of #original_word.
122
+ def preprocessed_word?
123
+ preprocessed_word.present?
124
+ end
125
+
126
+ # Returns #preprocessed_word (if available) or #original_word.
127
+ # #preprocessed_word will be present if #original_word has
128
+ # met the criteria for preprocessing and passed through at least
129
+ # one preprocessor.
130
+ #
131
+ # See "Important: Regarding Boolean Methods" notes at the top of this
132
+ # class definition for more detail.
133
+ def preprocessed_word_or_original_word
134
+ preprocessed_word || original_word
135
+ end
136
+
137
+ # Returns true if the word was found in the dictionary as a valid word
138
+ # OR if the word was found in the cache as an invalid word.
139
+ #
140
+ # See "Important: Regarding Boolean Methods" notes at the top of this
141
+ # class definition for more detail.
142
+ def word_cached?
143
+ word_cached
144
+ end
145
+ end
146
+ end
data/lib/LittleWeasel.rb CHANGED
@@ -1,186 +1,7 @@
1
- require 'singleton'
2
- require "LittleWeasel/version"
3
- require 'active_support/inflector'
1
+ # frozen_string_literal: true
4
2
 
5
- module LittleWeasel
3
+ require 'active_support/core_ext/object/blank'
6
4
 
7
- # Provides methods to interrogate the dictionary.
8
- class Checker
9
- include Singleton
10
-
11
- # Returns the dictionary.
12
- #
13
- # @return [Hash] the dictionary.
14
- attr_reader :dictionary
15
-
16
- private
17
-
18
- attr_reader :alphabet_exclusion_list
19
-
20
- # Keep these private...will expose as options later.
21
- attr_accessor :word_regex, :numeric_regex, :non_wordchar_regex
22
-
23
- public
24
-
25
- # The constructor
26
- def initialize
27
- @options = { exclude_alphabet: false, strip_whitespace: false, ignore_numeric: true, single_word_mode: false }
28
- @alphabet_exclusion_list = %w{ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z }
29
- @numeric_regex = /^[-+]?[0-9]?(\.[0-9]+)?$+/
30
- @word_regex = /\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/
31
- @non_wordchar_regex = /\W+/
32
- @dictionary = Hash.new(1)
33
- load
34
- end
35
-
36
- # Interrogates the dictionary to determine whether or not [word] exists.
37
- #
38
- # @param [String] word the word or words to interrogate
39
- # @param [Hash] options options to apply to this query (see #options=). Options passed to this
40
- # method are applied for this query only.
41
- #
42
- # @return [Boolean] true if the word/words in *word* exists, false otherwise.
43
- #
44
- # @example
45
- #
46
- # LittleWeasel::Checker.instance.exists?('C') # true (default options, :exclude_alphabet => false)
47
- # LittleWeasel::Checker.instance.exists?('A', {exclude_alphabet:true}) # false
48
- # LittleWeasel::Checker.instance.exists?('X', {exclude_alphabet:false}) # true
49
- # LittleWeasel::Checker.instance.exists?('Hello') # true
50
- #
51
- # LittleWeasel::Checker.instance.exists?(' Hello ') # false (default options, :strip_whitespace => false)
52
- # LittleWeasel::Checker.instance.exists?(' Yes ', {strip_whitespace:true}) # true
53
- # LittleWeasel::Checker.instance.exists?('No ', {strip_whitespace:false}) # false
54
- # LittleWeasel::Checker.instance.exists?('How dy', {strip_whitespace:true}) # false, strip_whitespace only removes leading and trailing spaces
55
- #
56
- # LittleWeasel::Checker.instance.exists?('90210') # true (default options, ignore_numeric => true)
57
- # LittleWeasel::Checker.instance.exists?('90210', {ignore_numeric:false}) # false
58
- #
59
- # LittleWeasel::Checker.instance.exists?('Hello World') # true, we're accepting multiple words now by default (default options, single_word_mode => false) :)
60
- # LittleWeasel::Checker.instance.exists?("hello, mister; did I \'mention\'' that lemon cake is \"great?\" It's just wonderful!") # true
61
- #
62
- # LittleWeasel::Checker.instance.exists?('I love ice cream', {single_word_mode:true}) # false; while all the words are valid, more than one word will return false
63
- #
64
- def exists?(word, options=nil)
65
- options = options || @options
66
-
67
- return false unless word.is_a?(String)
68
-
69
- word = word.dup
70
- word.strip! if options[:strip_whitespace]
71
-
72
- return false if word.empty?
73
-
74
- if block? word
75
- return false if options[:single_word_mode]
76
- return block_exists? word
77
- end
78
-
79
- return true if options[:ignore_numeric] && number?(word)
80
- return false if options[:exclude_alphabet] && word.length == 1 && @alphabet_exclusion_list.include?(word.upcase)
81
-
82
- valid_word? word
83
- end
84
-
85
- # Sets the global options for this gem.
86
- #
87
- # @param [Hash] options options that should apply to all subsequent calls to method *exists?* (see #exists?).
88
- # Options set via this property apply to all subsequent queries.
89
- #
90
- # @option options [Boolean] :exclude_alphabet (false) If false, letters of the alphabet are considered words.
91
- # @option options [Boolean] :strip_whitespace (false) If true, leading and trailing spaces are removed before checking to see if the word exists.
92
- # @option options [Boolean] :ignore_numeric (true) If true, numeric values are considered valid words.
93
- # @option options [Boolean] :single_word_mode (false) If false, word blocks (more than one word) are considered valid if all the words exist in the dictionary.
94
- #
95
- # @return [Hash] The options
96
- #
97
- # @example
98
- # LittleWeasel::Checker.instance.options({exclude_alphabet:true})
99
- # LittleWeasel::Checker.instance.exists?('A') # false
100
- #
101
- # LittleWeasel::Checker.instance.options({exclude_alphabet:false})
102
- # LittleWeasel::Checker.instance.exists?('A') # true
103
- #
104
- # LittleWeasel::Checker.instance.options({strip_whitespace:false})
105
- # LittleWeasel::Checker.instance.exists?(' Hello ') # false
106
- # LittleWeasel::Checker.instance.exists?('No ') # false
107
- # LittleWeasel::Checker.instance.exists?(' No') # false
108
- #
109
- # LittleWeasel::Checker.instance.options({strip_whitespace:true})
110
- # LittleWeasel::Checker.instance.exists?(' Yes ') # true
111
- # LittleWeasel::Checker.instance.exists?('How dy') # false, strip_whitespace only removes leading and trailing spaces
112
- #
113
- # LittleWeasel::Checker.instance.exists?('90210') # true (default options, ignore_numeric => true)
114
- # LittleWeasel::Checker.instance.exists?('90210', {ignore_numeric:false}) # false
115
- # LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210') # true (default options, ignore_numeric => true)
116
- # LittleWeasel::Checker.instance.exists?('I watch Beverly Hills 90210', {ignore_numeric:false}) # false
117
- #
118
- # LittleWeasel::Checker.instance.options({single_word_mode:true})
119
- # LittleWeasel::Checker.instance.exists?('I love ice cream') # false; while all the words are valid, more than one word will return false
120
- # LittleWeasel::Checker.instance.exists?('Baby') # true
121
- #
122
- def options=(options)
123
- @options = options
124
- end
125
-
126
- # Gets the global options currently set for this gem.
127
- #
128
- # @return [Hash] The options
129
- def options
130
- @options
131
- end
132
-
133
- protected
134
-
135
- def number?(word)
136
- word.strip.gsub(@numeric_regex).count > 0
137
- end
138
-
139
- def block?(string)
140
- string = string.dup
141
- return false unless string.is_a?(String)
142
- string.gsub!(@numeric_regex, "")
143
- return false unless string.length > 1
144
- string.strip.scan(/[\w'-]+/).length > 1
145
- end
146
-
147
- def block_exists?(word_block)
148
- word_block = word_block.dup
149
-
150
- word_block.gsub!(@numeric_regex, "") if options[:ignore_numeric]
151
- return false if word_block.nil?
152
- word_block.strip! unless word_block.nil?
153
- word_block.gsub!(@non_wordchar_regex, " ")
154
- word_block.split(@word_regex).uniq.each { |word|
155
- return false unless valid_block_word?(word)
156
- }
157
- return true
158
- end
159
-
160
- def valid_word?(word)
161
- word = word.dup.downcase
162
- exists = dictionary.has_key?(word)
163
- exists = dictionary.has_key?(word.singularize) unless exists
164
- exists
165
- end
166
-
167
- def valid_block_word?(word)
168
- return true if word.length == 1
169
- valid_word? word.strip
170
- end
171
-
172
- private
173
-
174
- def dictionary_path
175
- File.expand_path(File.dirname(__FILE__) + '/dictionary')
176
- end
177
-
178
- def load
179
- File.open(dictionary_path) do |io|
180
- io.each { |line| line.chomp!; @dictionary[line] = line }
181
- end
182
- end
183
-
184
- end
185
-
186
- end
5
+ Dir[File.join('.', 'lib/LittleWeasel/**/*.rb')].each do |f|
6
+ require f
7
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: false
2
+
3
+ FactoryBot.define do
4
+ factory :dictionary, class: LittleWeasel::Dictionary do
5
+ dictionary_key { create(:dictionary_key) }
6
+ dictionary_cache { {} }
7
+ dictionary_metadata { {} }
8
+ word_filters {}
9
+ dictionary_words do
10
+ %w(apple
11
+ better
12
+ cat
13
+ dog
14
+ everyone
15
+ fat
16
+ game
17
+ help
18
+ italic
19
+ jasmine
20
+ kelp
21
+ love
22
+ man
23
+ nope
24
+ octopus
25
+ popeye
26
+ queue
27
+ ruby
28
+ stop
29
+ top
30
+ ultimate
31
+ very
32
+ was
33
+ xylophone
34
+ yes
35
+ zebra)
36
+ end
37
+
38
+ skip_create
39
+ initialize_with do
40
+ new dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_metadata: dictionary_metadata, dictionary_words: dictionary_words, word_filters: word_filters
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: false
2
+
3
+ require 'pry'
4
+
5
+ FactoryBot.define do
6
+ factory :dictionary_cache_service, class: LittleWeasel::Services::DictionaryCacheService do
7
+ dictionary_key { create(:dictionary_key) }
8
+ dictionary_cache { {} }
9
+
10
+ transient do
11
+ # The dictionary reference created in the cache will point to a MEMORY source.
12
+ #
13
+ # Valid values: nil | true | false | <Array of dictionary words>
14
+ #
15
+ # If nil or false - No memory source will be added to the dictionary cache.
16
+ # If true - A memory source will be added to the dictionary cache.
17
+ # If <An Array of dictionary words> - A memory source will be added to the dictionry cache.
18
+ # This only makes sense if load == true.
19
+ dictionary_memory_source {}
20
+
21
+ # The dictionary reference created in the cache will point to a FILE source.
22
+ #
23
+ # Important: dictionary_file_source will only be used if dictionary_memory_source
24
+ # is false.
25
+ #
26
+ # Valid values: nil | true | false | <Path to dictionary file>
27
+ #
28
+ # If nil or false - No file source will be added to the dictionary cache.
29
+ #
30
+ # If true - A file source will be added to the dictionry cache.
31
+ # dictionary_key.key will be used to create the dictionary
32
+ # file path.
33
+ # If <Path to dictionary file> - A files source will be added to the dictionry cache.
34
+ # The file source will point to <Path to dictionary file>.
35
+ dictionary_file_source {}
36
+
37
+ # If load == true - A dictionary object will be created and added to the dictionary cache
38
+ # depending on the dictionary source (file or memory).
39
+ load { false }
40
+ end
41
+
42
+ skip_create
43
+ initialize_with do
44
+ new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache)
45
+ end
46
+
47
+ after :create do |dictionary_cache_service, evaluator|
48
+ dictionary_key = dictionary_cache_service.dictionary_key
49
+ dictionary_cache = dictionary_cache_service.dictionary_cache
50
+
51
+ # Initialize the dictionary cache if the user already passed an
52
+ # initialized dictionary cache; otherwise, just use what they passed us.
53
+ dictionary_cache_service.class.init(dictionary_cache: dictionary_cache) \
54
+ unless dictionary_cache_service.class.count(dictionary_cache: dictionary_cache).positive?
55
+
56
+ load = evaluator.load
57
+ dictionary_memory_source = evaluator.dictionary_memory_source
58
+ dictionary_file_source = evaluator.dictionary_file_source
59
+
60
+ if load
61
+ unless dictionary_memory_source.present? || dictionary_file_source.present?
62
+ raise 'Transient attributes dictionary_memory_source or dictionary_file_source ' \
63
+ "must be present if transient attribute load is true: #{dictionary_reference}"
64
+ end
65
+ end
66
+
67
+ if dictionary_file_source
68
+ file_name = if dictionary_file_source == true
69
+ dictionary_key.key
70
+ else
71
+ dictionary_file_source
72
+ end
73
+ dictionary_cache_service.add_dictionary_source(dictionary_source: dictionary_path_for(file_name: file_name))
74
+ elsif dictionary_memory_source
75
+ dictionary_cache_service.add_dictionary_source(dictionary_source: LittleWeasel::Modules::DictionarSourceable.memory_source)
76
+ end
77
+
78
+ if load
79
+ dictionary_words = if dictionary_file_source
80
+ dictionary_file_loader_service = create(:dictionary_file_loader_service, dictionary_key: dictionary_key, dictionary_cache: dictionary_cache)
81
+ dictionary_file_loader_service.execute
82
+ else
83
+ unless dictionary_memory_source.is_a? Array
84
+ raise 'Transient attribute dictionary_memory_source must be an Array of words ' \
85
+ "if transient attribute load == true: #{dictionary_memory_source}"
86
+ end
87
+ dictionary_memory_source
88
+ end
89
+ dictionary_cache_service.dictionary_object = create(:dictionary, dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_words: dictionary_words)
90
+ end
91
+
92
+ dictionary_cache_service
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: false
2
+
3
+ FactoryBot.define do
4
+ factory :dictionary_creator_service, class: LittleWeasel::Services::DictionaryCreatorService do
5
+ dictionary_key { create(:dictionary_key) }
6
+ dictionary_cache { {} }
7
+ dictionary_metadata { {} }
8
+ word_filters {}
9
+ word_preprocessors {}
10
+
11
+ skip_create
12
+ initialize_with do
13
+ new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache, dictionary_metadata: dictionary_metadata, word_filters: word_filters, word_preprocessors: word_preprocessors)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: false
2
+
3
+ FactoryBot.define do
4
+ factory :dictionary_file_loader_service, class: LittleWeasel::Services::DictionaryFileLoaderService do
5
+ dictionary_key { create(:dictionary_key) }
6
+ dictionary_cache { {} }
7
+
8
+ skip_create
9
+ initialize_with do
10
+ new(dictionary_key: dictionary_key, dictionary_cache: dictionary_cache)
11
+ end
12
+ end
13
+ end