kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,356 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+ require_relative "../cache/language_cache"
5
+ require_relative "../configuration"
6
+ require "json"
7
+
8
+ module Kotoshu
9
+ module Cli
10
+ # Cache management commands.
11
+ #
12
+ # Provides CLI commands for managing the dictionary cache
13
+ # with automatic GitHub download support.
14
+ #
15
+ # @example List available languages
16
+ # kotoshu cache list
17
+ #
18
+ # @example Download a specific language
19
+ # kotoshu cache download de
20
+ #
21
+ # @example Show cache status
22
+ # kotoshu cache status
23
+ #
24
+ # @example Remove cached data
25
+ # kotoshu cache purge
26
+ class CacheCommand < Thor
27
+ class_option :verbose,
28
+ type: :boolean,
29
+ default: false,
30
+ desc: "Enable verbose output",
31
+ aliases: ["-v"]
32
+
33
+ desc "list", "List available languages and their cache status"
34
+ method_option :verbose, type: :boolean, aliases: '-v', desc: 'Show detailed information'
35
+ def list
36
+ cache = create_cache
37
+ status = cache.cache_status
38
+
39
+ puts "Available languages:"
40
+ puts
41
+
42
+ # Show cached languages first
43
+ unless status[:cached].empty?
44
+ puts "Cached languages:"
45
+ status[:cached].each do |info|
46
+ print " #{info[:code]}: #{info[:name]}"
47
+ print " (#{info[:word_count]} words)" if options[:verbose]
48
+ print " [#{info[:license]}]" if options[:verbose]
49
+ puts " ✓"
50
+ end
51
+ puts
52
+ end
53
+
54
+ # Show uncached languages
55
+ unless status[:not_cached].empty?
56
+ puts "Not cached (will be downloaded on first use):"
57
+ status[:not_cached].each do |info|
58
+ print " #{info[:code]}: #{info[:name]}"
59
+ print " (#{info[:word_count]} words)" if options[:verbose]
60
+ puts
61
+ end
62
+ end
63
+ end
64
+
65
+ desc "status", "Show cache status and statistics"
66
+ def status
67
+ cache = create_cache
68
+ all_status = cache.cache_status
69
+
70
+ total_languages = cache.available_languages.size
71
+ cached_count = all_status[:cached].size
72
+ not_cached_count = all_status[:not_cached].size
73
+
74
+ puts "Cache Status:"
75
+ puts " Cache directory: #{cache.cache_path}"
76
+ puts " Total languages: #{total_languages}"
77
+ puts " Cached: #{cached_count}"
78
+ puts " Not cached: #{not_cached_count}"
79
+ puts
80
+
81
+ # Calculate cache size
82
+ cache_size = Dir.glob(File.join(cache.cache_path, '**', '*'))
83
+ .select { |f| File.file?(f) }
84
+ .sum { |f| File.size(f) }
85
+
86
+ puts "Cache size: #{format_bytes(cache_size)}"
87
+
88
+ # Show oldest and newest cache entries
89
+ all_cached = all_status[:cached].map do |info|
90
+ lang_path = File.join(cache.cache_path, info[:code])
91
+ spelling_meta = File.join(lang_path, 'spelling', 'metadata.json')
92
+ if File.exist?(spelling_meta)
93
+ metadata = JSON.parse(File.read(spelling_meta, encoding: 'UTF-8'))
94
+ [info[:code], Time.iso8601(metadata['downloaded_at'])]
95
+ end
96
+ end.compact
97
+
98
+ if all_cached.any?
99
+ oldest = all_cached.min_by { |_, time| time }
100
+ newest = all_cached.max_by { |_, time| time }
101
+
102
+ puts
103
+ puts "Oldest cache: #{oldest[0]} (#{oldest[1].strftime('%Y-%m-%d %H:%M')})"
104
+ puts "Newest cache: #{newest[0]} (#{newest[1].strftime('%Y-%m-%d %H:%M')})"
105
+ end
106
+ end
107
+
108
+ desc "download LANGUAGE", "Download dictionary for a language from GitHub"
109
+ method_option :force, type: :boolean, aliases: '-f', desc: 'Force re-download even if cached'
110
+ def download(language)
111
+ cache = create_cache
112
+
113
+ unless cache.available_languages.include?(language)
114
+ puts "Error: Unknown language '#{language}'"
115
+ puts
116
+ puts "Available languages: #{cache.available_languages.join(', ')}"
117
+ exit(1)
118
+ end
119
+
120
+ begin
121
+ puts "Downloading #{language} dictionary from GitHub..."
122
+
123
+ # Get dictionary (download if needed)
124
+ dict_result = cache.get_dictionary(language, force_download: options[:force])
125
+
126
+ if options[:force] || !dict_result[:metadata]['downloaded_at']
127
+ puts " ✓ Hunspell dictionary downloaded"
128
+ puts " Location: #{File.dirname(dict_result[:dic_path])}"
129
+ puts " Version: #{dict_result[:metadata]['version']}"
130
+ else
131
+ puts " ✓ Using cached Hunspell dictionary"
132
+ puts " Location: #{File.dirname(dict_result[:dic_path])}"
133
+ puts " Cached: #{dict_result[:metadata]['downloaded_at']}"
134
+ end
135
+
136
+ # Try to download frequency data (may not be available yet)
137
+ begin
138
+ freq_result = cache.get_frequency_data(language, force_download: options[:force])
139
+ if options[:force] || !freq_result[:metadata]['downloaded_at']
140
+ puts " ✓ Frequency data downloaded"
141
+ else
142
+ puts " ✓ Using cached frequency data"
143
+ end
144
+ rescue StandardError => e
145
+ # Frequency data may not be available yet - that's okay
146
+ puts " ⚠ Frequency data not available (#{e.message})"
147
+ end
148
+
149
+ puts
150
+ puts "Dictionary for '#{language}' is ready to use!"
151
+ rescue StandardError => e
152
+ puts "Error downloading dictionary: #{e.message}"
153
+ exit(1)
154
+ end
155
+ end
156
+
157
+ desc "info LANGUAGE", "Show information about a language"
158
+ def info(language)
159
+ cache = create_cache
160
+
161
+ unless cache.available_languages.include?(language)
162
+ puts "Error: Unknown language '#{language}'"
163
+ puts
164
+ puts "Available languages: #{cache.available_languages.join(', ')}"
165
+ exit(1)
166
+ end
167
+
168
+ info_data = cache.get_language_info(language)
169
+
170
+ puts "Language: #{info_data[:name]}"
171
+ puts "Code: #{language}"
172
+ puts "Word count: #{info_data[:word_count]}"
173
+ puts "License: #{info_data[:license]}"
174
+ puts "Source: #{info_data[:source]}"
175
+ puts "Cached: #{info_data[:downloaded] ? 'Yes' : 'No'}"
176
+
177
+ # Show cached file info if available
178
+ if info_data[:downloaded]
179
+ lang_path = File.join(cache.cache_path, language)
180
+
181
+ # Show spelling dict info
182
+ spelling_path = File.join(lang_path, 'spelling', 'metadata.json')
183
+ if File.exist?(spelling_path)
184
+ metadata = JSON.parse(File.read(spelling_path, encoding: 'UTF-8'))
185
+ puts
186
+ puts "Hunspell Dictionary:"
187
+ puts " Downloaded: #{metadata['downloaded_at']}"
188
+ puts " Checksum: #{metadata['checksum']}"
189
+ end
190
+
191
+ # Show frequency data info if available
192
+ freq_path = File.join(lang_path, 'frequency', 'metadata.json')
193
+ if File.exist?(freq_path)
194
+ metadata = JSON.parse(File.read(freq_path, encoding: 'UTF-8'))
195
+ puts
196
+ puts "Frequency Data:"
197
+ puts " Downloaded: #{metadata['downloaded_at']}"
198
+ puts " Checksum: #{metadata['checksum']}"
199
+ end
200
+ end
201
+ end
202
+
203
+ desc "purge [LANGUAGE]", "Remove cached dictionary data (for a language or all languages)"
204
+ method_option :confirm, type: :boolean, default: false, desc: "Skip confirmation"
205
+ def purge(language = nil)
206
+ cache = create_cache
207
+
208
+ if language.nil?
209
+ # Purge all
210
+ unless options[:confirm]
211
+ puts "This will remove all cached dictionaries and frequency data."
212
+ print "Are you sure? [y/N] "
213
+ return unless $stdin.gets.chomp =~ /^[Yy]/
214
+ end
215
+
216
+ count = cache.purge_all
217
+ puts "Purged #{count} files from cache"
218
+ else
219
+ # Purge specific language
220
+ unless cache.available_languages.include?(language)
221
+ puts "Error: Unknown language '#{language}'"
222
+ puts
223
+ puts "Available languages: #{cache.available_languages.join(', ')}"
224
+ exit(1)
225
+ end
226
+
227
+ lang_path = File.join(cache.cache_path, language)
228
+
229
+ if File.exist?(lang_path)
230
+ count = Dir.glob(File.join(lang_path, '**', '*')).count { |f| File.file?(f) }
231
+ FileUtils.rm_rf(lang_path)
232
+ puts "Purged #{language} cache (#{count} files)"
233
+ else
234
+ puts "No cached data for #{language}"
235
+ end
236
+ end
237
+ end
238
+
239
+ desc "validate LANGUAGE", "Validate cached resources for a language"
240
+ def validate(language)
241
+ cache = create_cache
242
+
243
+ puts "Validating #{language}..."
244
+
245
+ unless cache.available_languages.include?(language)
246
+ puts " ✗ Unknown language"
247
+ return
248
+ end
249
+
250
+ # Check spelling
251
+ spelling_path = File.join(cache.cache_path, language, 'spelling')
252
+ spelling_meta = File.join(spelling_path, 'metadata.json')
253
+
254
+ if File.exist?(spelling_meta)
255
+ metadata = JSON.parse(File.read(spelling_meta, encoding: 'UTF-8'))
256
+ aff_file = File.join(spelling_path, 'index.aff')
257
+ dic_file = File.join(spelling_path, 'index.dic')
258
+
259
+ puts " Spelling:"
260
+ puts " AFF file: #{File.exist?(aff_file) ? '✓' : '✗'}"
261
+ puts " DIC file: #{File.exist?(dic_file) ? '✓' : '✗'}"
262
+ puts " Metadata: ✓"
263
+ puts " Checksum: #{verify_checksum(dic_file, metadata['checksum']) ? '✓' : '✗'}" if metadata['checksum']
264
+ puts " Expired: #{expired?(metadata) ? 'Yes' : 'No'}"
265
+ else
266
+ puts " Spelling: ✗ Not cached"
267
+ end
268
+
269
+ # Check frequency
270
+ freq_path = File.join(cache.cache_path, language, 'frequency')
271
+ freq_meta = File.join(freq_path, 'metadata.json')
272
+
273
+ if File.exist?(freq_meta)
274
+ puts " Frequency: ✓"
275
+ else
276
+ puts " Frequency: ✗ Not cached (optional)"
277
+ end
278
+ end
279
+
280
+ private
281
+
282
+ # Create a language cache instance.
283
+ #
284
+ # @return [Cache::LanguageCache] The cache instance
285
+ def create_cache
286
+ Cache::LanguageCache.new(
287
+ cache_path: options[:cache_path]
288
+ )
289
+ end
290
+
291
+ # Format bytes as human-readable.
292
+ #
293
+ # @param bytes [Integer] Bytes
294
+ # @return [String] Formatted string
295
+ def format_bytes(bytes)
296
+ return "0 B" if bytes.nil? || bytes.zero?
297
+
298
+ units = %w[B KB MB GB TB]
299
+ exp = [(Math.log(bytes, 1024)).floor, units.size - 1].min
300
+ "#{format('%.2f', bytes.to_f / 1024**exp)} #{units[exp]}"
301
+ end
302
+
303
+ # Get time ago string.
304
+ #
305
+ # @param iso_time [String] ISO8601 timestamp
306
+ # @return [String] Time ago string
307
+ def time_ago(iso_time)
308
+ return "unknown" unless iso_time
309
+
310
+ time = Time.iso8601(iso_time)
311
+ seconds = Time.now - time
312
+
313
+ return "just now" if seconds < 60
314
+
315
+ minutes = (seconds / 60).to_i
316
+ return "#{minutes}m ago" if minutes < 60
317
+
318
+ hours = (minutes / 60).to_i
319
+ return "#{hours}h ago" if hours < 24
320
+
321
+ days = (hours / 24).to_i
322
+ return "#{days}d ago" if days < 30
323
+
324
+ months = (days / 30).to_i
325
+ return "#{months}mo ago" if months < 12
326
+
327
+ years = (months / 12).to_i
328
+ "#{years}y ago"
329
+ end
330
+
331
+ # Verify checksum of a file.
332
+ #
333
+ # @param file_path [String] Path to file
334
+ # @param expected_checksum [String] Expected SHA256 checksum
335
+ # @return [Boolean] True if checksum matches
336
+ def verify_checksum(file_path, expected_checksum)
337
+ return false unless File.exist?(file_path)
338
+
339
+ require "digest"
340
+ actual = Digest::SHA256.file(file_path).hexdigest
341
+ actual == expected_checksum
342
+ end
343
+
344
+ # Check if metadata is expired.
345
+ #
346
+ # @param metadata [Hash] Metadata hash
347
+ # @return [Boolean] True if expired
348
+ def expired?(metadata)
349
+ return false unless metadata['version']
350
+
351
+ cached_time = Time.iso8601(metadata['version'])
352
+ Time.now.utc - cached_time > 604_800 # 7 days
353
+ end
354
+ end
355
+ end
356
+ end