kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'vocabulary'
4
+ require_relative 'onnx_runtime_model'
5
+ require_relative 'similarity_engine'
6
+ require_relative 'search'
7
+ require_relative 'lru_cache'
8
+
9
+ # EmbeddingPipeline - Unified API for embedding-based similarity search
10
+ #
11
+ # Provides a simple, unified interface for loading vocabulary and models,
12
+ # and performing similarity search. This is the recommended entry point.
13
+ #
14
+ # @example Simple usage (one line)
15
+ # pipeline = EmbeddingPipeline.from_cache(language: 'en')
16
+ #
17
+ # @example Full configuration
18
+ # pipeline = EmbeddingPipeline.new(
19
+ # vocabulary: vocab,
20
+ # model: model,
21
+ # preload: true
22
+ # )
23
+ #
24
+ # @example Finding similar words
25
+ # neighbors = pipeline.find_nearest('semantic', k: 5)
26
+ # neighbors.each { |r| puts "#{r[:word]}: #{r[:similarity].round(4)}" }
27
+ #
28
+ class EmbeddingPipeline
29
+ # @return [Vocabulary]
30
+ attr_reader :vocabulary
31
+
32
+ # @return [EmbeddingModel]
33
+ attr_reader :model
34
+
35
+ # @return [SimilarityEngine]
36
+ attr_reader :similarity_engine
37
+
38
+ # @return [Search]
39
+ attr_reader :search
40
+
41
+ # Create pipeline from cache (one-line initialization)
42
+ #
43
+ # @param language [String] ISO 639-1 language code
44
+ # @param cache [Cache::ModelCache] Cache instance
45
+ # @param preload [Boolean] Preload embeddings into memory
46
+ # @param index [:exact, :auto] Search index type
47
+ # @return [EmbeddingPipeline]
48
+ #
49
+ # @raise [ArgumentError] If no cached model found for language
50
+ #
51
+ def self.from_cache(language:, cache: nil, preload: false, index: :exact)
52
+ require_relative 'cache/model_cache'
53
+
54
+ cache ||= Cache::ModelCache.new
55
+
56
+ vocab_path = cache.find_vocab(language)
57
+ model_path = cache.find_model(language, :onnx)
58
+
59
+ unless vocab_path && model_path
60
+ raise ArgumentError, "No cached model for language: #{language}. " \
61
+ "Run: ruby scripts/extract_vocabularies.rb --languages=#{language}"
62
+ end
63
+
64
+ from_files(
65
+ vocab_path: vocab_path,
66
+ model_path: model_path,
67
+ language: language,
68
+ preload: preload,
69
+ index: index
70
+ )
71
+ end
72
+
73
+ # Create pipeline from files
74
+ #
75
+ # @param vocab_path [String] Path to vocabulary JSON file
76
+ # @param model_path [String] Path to ONNX model file
77
+ # @param language [String] Language code
78
+ # @param preload [Boolean] Preload embeddings
79
+ # @param index [:exact, :auto] Search index type
80
+ # @return [EmbeddingPipeline]
81
+ #
82
+ def self.from_files(vocab_path:, model_path:, language:, preload: false, index: :exact)
83
+ vocab = Vocabulary.from_file(vocab_path, language_code: language)
84
+ model = OnnxRuntimeModel.from_file(model_path, language_code: language)
85
+
86
+ new(
87
+ vocabulary: vocab,
88
+ model: model,
89
+ preload: preload,
90
+ index: index
91
+ )
92
+ end
93
+
94
+ # Create pipeline with full configuration
95
+ #
96
+ # @param vocabulary [Vocabulary] Vocabulary instance
97
+ # @param model [EmbeddingModel] Model instance
98
+ # @param preload [Boolean] Preload embeddings
99
+ # @param index [:exact, :ann] Search index type (:exact = brute force, :ann = FAISS/HNSW)
100
+ # @param pre_normalize [Boolean] Pre-normalize vectors
101
+ # @param cache_size [Integer] Embedding cache size
102
+ #
103
+ def initialize(vocabulary:, model:, preload: false, index: :exact, pre_normalize: false, cache_size: 1000)
104
+ @vocabulary = vocabulary
105
+ @model = model
106
+ @similarity_engine = SimilarityEngine.new(pre_normalize: pre_normalize)
107
+ @cache_size = cache_size
108
+
109
+ # Create search engine
110
+ @search = Search.new(
111
+ vocabulary: vocabulary,
112
+ model: model,
113
+ similarity_engine: @similarity_engine,
114
+ pre_normalize: pre_normalize
115
+ )
116
+
117
+ preload_embeddings! if preload
118
+ end
119
+
120
+ # Find k nearest neighbors for a word
121
+ #
122
+ # @param word [String] Query word
123
+ # @param k [Integer] Number of neighbors
124
+ # @param exclude_self [Boolean] Exclude query word
125
+ # @param min_similarity [Float] Minimum similarity threshold
126
+ # @return [Array<Hash>] Array of {word, similarity, index}
127
+ #
128
+ def find_nearest(word, k: 10, exclude_self: true, min_similarity: 0.0)
129
+ @search.find_nearest(word, k: k, exclude_self: exclude_self, min_similarity: min_similarity)
130
+ end
131
+
132
+ # Find nearest neighbors for multiple words
133
+ #
134
+ # @param words [Array<String>] Query words
135
+ # @param k [Integer] Neighbors per word
136
+ # @return [Hash<String, Array<Hash>>]
137
+ #
138
+ def find_nearest_batch(words, k: 10)
139
+ @search.find_nearest_batch(words, k: k)
140
+ end
141
+
142
+ # Compute similarity between two words
143
+ #
144
+ # @param word1 [String] First word
145
+ # @param word2 [String] Second word
146
+ # @return [Float, nil] Similarity or nil if either word not found
147
+ #
148
+ def similarity(word1, word2)
149
+ @search.similarity(word1, word2)
150
+ end
151
+
152
+ # Get embedding for a word
153
+ #
154
+ # @param word [String] Word
155
+ # @return [Array<Float>, nil]
156
+ #
157
+ def get_embedding(word)
158
+ @model.get_embedding_for_word(word, @vocabulary)
159
+ end
160
+
161
+ # Get embedding by index
162
+ #
163
+ # @param index [Integer] Word index
164
+ # @return [Array<Float>, nil]
165
+ #
166
+ def get_embedding_by_index(index)
167
+ @model.get_embedding(index)
168
+ end
169
+
170
+ # Check if word exists in vocabulary
171
+ #
172
+ # @param word [String] Word
173
+ # @return [Boolean]
174
+ #
175
+ def include?(word)
176
+ @vocabulary.include?(word)
177
+ end
178
+
179
+ # Preload all embeddings into memory
180
+ #
181
+ # @return [self]
182
+ #
183
+ def preload_embeddings!
184
+ @model.load!
185
+ @search.preload_embeddings!
186
+ self
187
+ end
188
+
189
+ # Unload model from memory
190
+ #
191
+ # @return [self]
192
+ #
193
+ def unload!
194
+ @model.unload!
195
+ @search.clear_cache
196
+ self
197
+ end
198
+
199
+ # Get pipeline statistics
200
+ #
201
+ # @return [Hash]
202
+ #
203
+ def stats
204
+ {
205
+ language: @vocabulary.language_code,
206
+ vocabulary_size: @vocabulary.size,
207
+ embedding_dimension: @model.dimension,
208
+ model_loaded: @model.loaded?,
209
+ embeddings_preloaded: @search.embeddings_loaded,
210
+ cache_stats: @search.instance_variable_get(:@embedding_cache)&.stats
211
+ }
212
+ end
213
+
214
+ # Get model information
215
+ #
216
+ # @return [Hash]
217
+ #
218
+ def model_info
219
+ @model.model_info
220
+ end
221
+
222
+ # String representation
223
+ #
224
+ # @return [String]
225
+ #
226
+ def to_s
227
+ "EmbeddingPipeline(language: #{@vocabulary.language_code}, " \
228
+ "vocab_size: #{@vocabulary.size}, " \
229
+ "dimension: #{@model.dimension}, " \
230
+ "loaded: #{@model.loaded?})"
231
+ end
232
+ alias inspect to_s
233
+
234
+ # Convenience class methods
235
+ class << self
236
+ # Create pipeline for a specific language (shortcut)
237
+ #
238
+ # @param language [String] ISO 639-1 language code
239
+ # @param kwargs [Hash] Additional options
240
+ # @return [EmbeddingPipeline]
241
+ #
242
+ alias :[] :from_cache
243
+ end
244
+ end
@@ -0,0 +1,233 @@
1
+ # frozen_string_literal: true
2
+
3
+ # LruCache - Least Recently Used Cache
4
+ #
5
+ # Provides efficient O(1) LRU caching with optional TTL support.
6
+ # Used for caching embeddings during similarity search.
7
+ #
8
+ # @example Basic usage
9
+ # cache = LruCache.new(max_size: 1000)
10
+ # cache[:key] = value
11
+ # cache[:key] # => value
12
+ #
13
+ # @example With TTL
14
+ # cache = LruCache.new(max_size: 1000, ttl: 300) # 5 minutes
15
+ #
16
+ class LruCache
17
+ # @return [Integer] Maximum number of entries
18
+ attr_reader :max_size
19
+
20
+ # @return [Integer, nil] TTL in seconds
21
+ attr_reader :ttl
22
+
23
+ # @return [Integer] Number of cache hits
24
+ attr_reader :hits
25
+
26
+ # @return [Integer] Number of cache misses
27
+ attr_reader :misses
28
+
29
+ # Create a new LRU cache
30
+ #
31
+ # @param max_size [Integer] Maximum number of entries (default: 1000)
32
+ # @param ttl [Integer, nil] Time-to-live in seconds (default: nil = no expiry)
33
+ #
34
+ def initialize(max_size: 1000, ttl: nil)
35
+ @max_size = max_size
36
+ @ttl = ttl
37
+ @cache = {} # key -> {value: v, accessed_at: t, created_at: t}
38
+ @order = [] # Ordered list of keys (most recently used first)
39
+ @hits = 0
40
+ @misses = 0
41
+ end
42
+
43
+ # Get value for key
44
+ #
45
+ # @param key [Object] Cache key
46
+ # @return [Object, nil] Cached value or nil if not found/expired
47
+ #
48
+ def [](key)
49
+ entry = @cache[key]
50
+ return nil unless entry
51
+
52
+ # Check TTL
53
+ if @ttl && (Time.now - entry[:created_at]) > @ttl
54
+ delete(key)
55
+ @misses += 1
56
+ return nil
57
+ end
58
+
59
+ # Update access order (move to front = most recently used)
60
+ @order.delete(key)
61
+ @order.unshift(key)
62
+ entry[:accessed_at] = Time.now
63
+
64
+ @hits += 1
65
+ entry[:value]
66
+ end
67
+
68
+ # Set value for key
69
+ #
70
+ # @param key [Object] Cache key
71
+ # @param value [Object] Value to cache
72
+ # @return [Object] The value
73
+ #
74
+ def []=(key, value)
75
+ # Evict LRU if at capacity
76
+ if @cache.key?(key)
77
+ # Update existing entry
78
+ @cache[key][:value] = value
79
+ @cache[key][:accessed_at] = Time.now
80
+ # Move to front
81
+ @order.delete(key)
82
+ @order.unshift(key)
83
+ return value
84
+ end
85
+
86
+ if @cache.size >= @max_size
87
+ evict_lru
88
+ end
89
+
90
+ @cache[key] = {
91
+ value: value,
92
+ accessed_at: Time.now,
93
+ created_at: Time.now
94
+ }
95
+ @order.unshift(key)
96
+
97
+ value
98
+ end
99
+
100
+ # Check if key exists
101
+ #
102
+ # @param key [Object] Cache key
103
+ # @return [Boolean] True if key exists and not expired
104
+ #
105
+ def key?(key)
106
+ entry = @cache[key]
107
+ return false unless entry
108
+
109
+ if @ttl && (Time.now - entry[:created_at]) > @ttl
110
+ delete(key)
111
+ return false
112
+ end
113
+
114
+ true
115
+ end
116
+
117
+ # Delete key from cache
118
+ #
119
+ # @param key [Object] Cache key
120
+ # @return [Object, nil] Deleted value or nil
121
+ #
122
+ def delete(key)
123
+ entry = @cache.delete(key)
124
+ @order.delete(key)
125
+ entry&.[](:value)
126
+ end
127
+
128
+ # Clear all entries
129
+ #
130
+ # @return [self]
131
+ #
132
+ def clear
133
+ @cache.clear
134
+ @order.clear
135
+ self
136
+ end
137
+
138
+ # Get current size
139
+ #
140
+ # @return [Integer] Number of entries
141
+ #
142
+ def size
143
+ @cache.size
144
+ end
145
+
146
+ # Check if empty
147
+ #
148
+ # @return [Boolean]
149
+ #
150
+ def empty?
151
+ @cache.empty?
152
+ end
153
+
154
+ # Get least recently used key-value pair
155
+ #
156
+ # @return [Array<Object, Object>, nil]
157
+ #
158
+ def lru
159
+ return nil if @order.empty?
160
+
161
+ key = @order.last
162
+ [key, @cache[key][:value]]
163
+ end
164
+
165
+ # Get most recently used key-value pair
166
+ #
167
+ # @return [Array<Object, Object>, nil]
168
+ #
169
+ def mru
170
+ return nil if @order.empty?
171
+
172
+ key = @order.first
173
+ [key, @cache[key][:value]]
174
+ end
175
+
176
+ # Get all keys
177
+ #
178
+ # @return [Array<Object>] Array of keys
179
+ #
180
+ def keys
181
+ @order.dup
182
+ end
183
+
184
+ # Get all values
185
+ #
186
+ # @return [Array<Object>] Array of values
187
+ #
188
+ def values
189
+ @order.map { |key| @cache[key][:value] }
190
+ end
191
+
192
+ # Get cache statistics
193
+ #
194
+ # @return [Hash] Statistics
195
+ #
196
+ def stats
197
+ total = @hits + @misses
198
+ {
199
+ size: size,
200
+ max_size: @max_size,
201
+ hits: @hits,
202
+ misses: @misses,
203
+ hit_rate: total.zero? ? 0.0 : @hits.to_f / total,
204
+ ttl: @ttl
205
+ }
206
+ end
207
+
208
+ # Fetch with block (cache-aside pattern)
209
+ #
210
+ # @param key [Object] Cache key
211
+ # @return [Object] Cached value or block result
212
+ #
213
+ def fetch(key, &block)
214
+ result = self[key]
215
+ return result if result || key?(key)
216
+
217
+ value = block.call
218
+ self[key] = value
219
+ value
220
+ end
221
+
222
+ private
223
+
224
+ # Evict least recently used entry
225
+ #
226
+ def evict_lru
227
+ return if @order.empty?
228
+
229
+ lru_key = @order.last
230
+ @cache.delete(lru_key)
231
+ @order.pop
232
+ end
233
+ end