kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,424 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../cache/language_cache'
4
+ require_relative '../cache/model_cache'
5
+ require_relative '../cache/frequency_cache'
6
+ require 'json'
7
+
8
+ module Kotoshu
9
+ class CacheCommand < Thor
10
+ namespace :cache
11
+
12
+ desc 'list [TYPE]', 'List available resources and their cache status'
13
+ method_option :verbose, type: :boolean, aliases: '-v', desc: 'Show detailed information'
14
+ method_option :type, type: :string, aliases: '-t', desc: 'Cache type: language, model, frequency (default: all)'
15
+ def list(type = nil)
16
+ type ||= options[:type] || 'all'
17
+
18
+ if type == 'all'
19
+ list_all_types
20
+ else
21
+ list_type(type)
22
+ end
23
+ end
24
+
25
+ desc 'download TYPE RESOURCE', 'Download a resource from GitHub'
26
+ method_option :force, type: :boolean, aliases: '-f', desc: 'Force re-download even if cached'
27
+ def download(type, resource)
28
+ cache = cache_for_type(type)
29
+
30
+ case type
31
+ when 'language', 'lang', 'l'
32
+ download_language(cache, resource)
33
+ when 'model', 'm'
34
+ download_model(cache, resource)
35
+ when 'frequency', 'freq', 'f'
36
+ download_frequency(cache, resource)
37
+ else
38
+ puts "Error: Unknown cache type '#{type}'"
39
+ puts "Available types: language, model, frequency"
40
+ exit(1)
41
+ end
42
+ end
43
+
44
+ desc 'info TYPE RESOURCE', 'Show information about a cached resource'
45
+ def info(type, resource)
46
+ cache = cache_for_type(type)
47
+
48
+ case type
49
+ when 'language', 'lang', 'l'
50
+ info_language(cache, resource)
51
+ when 'model', 'm'
52
+ info_model(cache, resource)
53
+ when 'frequency', 'freq', 'f'
54
+ info_frequency(cache, resource)
55
+ else
56
+ puts "Error: Unknown cache type '#{type}'"
57
+ puts "Available types: language, model, frequency"
58
+ exit(1)
59
+ end
60
+ end
61
+
62
+ desc 'purge TYPE [RESOURCE]', 'Remove cached data (for a resource or all resources)'
63
+ method_option :type, type: :string, aliases: '-t', desc: 'Cache type: language, model, frequency, or all'
64
+ def purge(type = nil, resource = nil)
65
+ type ||= options[:type]
66
+
67
+ if type.nil? || type == 'all'
68
+ purge_all_types
69
+ else
70
+ purge_type(type, resource)
71
+ end
72
+ end
73
+
74
+ desc 'status [TYPE]', 'Show cache status and statistics'
75
+ def status(type = nil)
76
+ type ||= options[:type] || 'all'
77
+
78
+ if type == 'all'
79
+ status_all_types
80
+ else
81
+ status_type(type)
82
+ end
83
+ end
84
+
85
+ desc 'clean', 'Clean expired cache entries'
86
+ def clean
87
+ total_removed = 0
88
+ total_reclaimed = 0
89
+
90
+ %w[language model frequency].each do |type|
91
+ cache = cache_for_type(type)
92
+ result = cache.clean
93
+ total_removed += result[:expired_entries_removed]
94
+ total_reclaimed += result[:bytes_reclaimed]
95
+ end
96
+
97
+ puts "Cleaned cache:"
98
+ puts " Entries removed: #{total_removed}"
99
+ puts " Bytes reclaimed: #{format_bytes(total_reclaimed)}"
100
+ end
101
+
102
+ private
103
+
104
+ def cache_for_type(type)
105
+ case type
106
+ when 'language', 'lang', 'l'
107
+ Cache::LanguageCache.new
108
+ when 'model', 'm'
109
+ Cache::ModelCache.new
110
+ when 'frequency', 'freq', 'f'
111
+ Cache::FrequencyCache.new
112
+ else
113
+ raise "Unknown cache type: #{type}"
114
+ end
115
+ end
116
+
117
+ def list_all_types
118
+ puts "=" * 70
119
+ puts "Kotoshu Cache Status"
120
+ puts "=" * 70
121
+ puts
122
+
123
+ %w[language model frequency].each do |type|
124
+ cache = cache_for_type(type)
125
+ puts "#{type.capitalize} Cache:"
126
+ puts " Directory: #{cache.cache_path}"
127
+ puts " Resources: #{cache.cached_resources.size} cached"
128
+
129
+ if options[:verbose]
130
+ stats = cache.stats
131
+ puts " Stats: #{stats[:hits]} hits, #{stats[:misses]} misses (#{(stats[:hit_rate] * 100).round(1)}% hit rate)"
132
+ puts " Size: #{format_bytes(stats[:size_bytes])}"
133
+ end
134
+ puts
135
+ end
136
+ end
137
+
138
+ def list_type(type)
139
+ cache = cache_for_type(type)
140
+ resources = cache.cached_resources
141
+
142
+ puts "#{type.capitalize} Cache:"
143
+ puts " Directory: #{cache.cache_path}"
144
+ puts " Resources: #{resources.size} cached"
145
+
146
+ if resources.any?
147
+ puts
148
+ resources.each do |res|
149
+ puts " - #{res}"
150
+ end
151
+ else
152
+ puts " (no resources cached yet)"
153
+ end
154
+ end
155
+
156
+ def download_language(cache, language)
157
+ unless cache.available_languages.include?(language)
158
+ puts "Error: Unknown language '#{language}'"
159
+ puts "Available languages: #{cache.available_languages.join(', ')}"
160
+ exit(1)
161
+ end
162
+
163
+ puts "Downloading resources for #{language}..."
164
+
165
+ # Try spelling dictionary
166
+ begin
167
+ dict_result = cache.get_spelling(language, force_download: options[:force])
168
+ puts " ✓ Spelling dictionary: #{dict_result[:cached] ? 'cached' : 'downloaded'}"
169
+ puts " Location: #{File.dirname(dict_result[:dic_path])}"
170
+ rescue StandardError => e
171
+ puts " ✗ Spelling dictionary failed: #{e.message}"
172
+ end
173
+
174
+ # Try frequency data (Kelly)
175
+ begin
176
+ freq_cache = Cache::FrequencyCache.new
177
+ if freq_cache.available?(language) || !options[:force]
178
+ freq_result = freq_cache.get(language, force_download: options[:force])
179
+ puts " ✓ Frequency data: #{freq_result ? 'loaded' : 'not available'}"
180
+ end
181
+ rescue StandardError => e
182
+ puts " ℹ Frequency data: #{e.message}"
183
+ end
184
+ end
185
+
186
+ def download_model(cache, resource_id)
187
+ parts = resource_id.split(':')
188
+ if parts.size != 2
189
+ puts "Error: Resource must be in format 'language:type' (e.g., 'en:fasttext')"
190
+ exit(1)
191
+ end
192
+
193
+ language, model_type = parts
194
+
195
+ unless cache.available_models_for(language).include?(model_type.to_sym)
196
+ puts "Error: Unknown model '#{model_type}' for language '#{language}'"
197
+ puts "Available models for #{language}: #{cache.available_models_for(language).join(', ')}"
198
+ exit(1)
199
+ end
200
+
201
+ puts "Downloading #{model_type} model for #{language}..."
202
+
203
+ result = cache.get(resource_id, force_download: options[:force])
204
+ if result
205
+ file_size = File.size(result[:model_path]) if File.exist?(result[:model_path])
206
+ puts " ✓ Model downloaded: #{result[:model_path]}"
207
+ puts " Size: #{format_bytes(file_size)}" if file_size
208
+ else
209
+ puts " ✗ Download failed"
210
+ exit(1)
211
+ end
212
+ end
213
+
214
+ def download_frequency(cache, language)
215
+ unless cache.available_languages.include?(language)
216
+ puts "Error: Unknown language '#{language}'"
217
+ puts "Available languages: #{cache.available_languages.join(', ')}"
218
+ exit(1)
219
+ end
220
+
221
+ puts "Downloading Kelly frequency data for #{language}..."
222
+
223
+ result = cache.get(language, force_download: options[:force])
224
+ if result
225
+ puts " ✓ Frequency data downloaded: #{result[:frequency_path]}"
226
+ puts " Tiers: top_50=#{result[:tiers][:top_50].size}, " \
227
+ "top_200=#{result[:tiers][:top_200].size}, top_1000=#{result[:tiers][:top_1000].size}"
228
+ else
229
+ puts " ✗ Download failed"
230
+ exit(1)
231
+ end
232
+ end
233
+
234
+ def info_language(cache, language)
235
+ unless cache.available_languages.include?(language)
236
+ puts "Error: Unknown language '#{language}'"
237
+ puts "Available languages: #{cache.available_languages.join(', ')}"
238
+ exit(1)
239
+ end
240
+
241
+ info_data = cache.language_info(language)
242
+
243
+ puts "Language: #{info_data[:name]}"
244
+ puts "Code: #{language}"
245
+ puts "Word count: #{info_data[:word_count]}"
246
+ puts "License: #{info_data[:license]}"
247
+ puts "Source: #{info_data[:source]}"
248
+
249
+ # Show cache status
250
+ resource_id = "#{language}:spelling"
251
+ if cache.available?(resource_id)
252
+ metadata_path = cache.metadata_path_for(resource_id)
253
+ metadata = cache.send(:read_metadata, metadata_path)
254
+ if metadata
255
+ puts
256
+ puts "Cache Status: Cached"
257
+ puts " Cached at: #{metadata['cached_at']}"
258
+ puts " Version: #{metadata['version']}"
259
+ puts " Checksum: #{metadata['checksum']}"
260
+ end
261
+ else
262
+ puts "Cache Status: Not cached"
263
+ end
264
+ end
265
+
266
+ def info_model(cache, resource_id)
267
+ parts = resource_id.split(':')
268
+ if parts.size != 2
269
+ puts "Error: Resource must be in format 'language:type' (e.g., 'en:fasttext')"
270
+ exit(1)
271
+ end
272
+
273
+ language, model_type = parts
274
+
275
+ model_info = cache.model_info(language, model_type.to_sym)
276
+ unless model_info
277
+ puts "Error: Unknown model '#{model_type}' for language '#{language}'"
278
+ exit(1)
279
+ end
280
+
281
+ puts "Model: #{model_info[:file]}"
282
+ puts "Language: #{language}"
283
+ puts "Type: #{model_type}"
284
+ puts "Source: #{model_info[:source]}"
285
+ puts "Size: #{model_info[:size]}"
286
+
287
+ # Show cache status
288
+ if cache.available?(resource_id)
289
+ metadata_path = cache.send(:metadata_path_for, resource_id)
290
+ metadata = cache.send(:read_metadata, metadata_path)
291
+ if metadata
292
+ puts
293
+ puts "Cache Status: Cached"
294
+ puts " Cached at: #{metadata['cached_at']}"
295
+ puts " Checksum: #{metadata['checksum']}"
296
+ end
297
+ else
298
+ puts "Cache Status: Not cached"
299
+ end
300
+ end
301
+
302
+ def info_frequency(cache, language)
303
+ unless cache.available_languages.include?(language)
304
+ puts "Error: Unknown language '#{language}'"
305
+ puts "Available languages: #{cache.available_languages.join(', ')}"
306
+ exit(1)
307
+ end
308
+
309
+ puts "Kelly Frequency Data"
310
+ puts "Language: #{language}"
311
+
312
+ # Show cache status
313
+ if cache.available?(language)
314
+ result = cache.get(language)
315
+ metadata = result[:metadata]
316
+
317
+ puts
318
+ puts "Cache Status: Cached"
319
+ puts " Cached at: #{metadata['cached_at']}"
320
+ puts " Version: #{metadata['version']}"
321
+ puts " Checksum: #{metadata['checksum']}"
322
+ puts " URL: #{metadata['url']}"
323
+ puts
324
+ puts "Frequency Tiers:"
325
+ puts " Top 50: #{result[:tiers][:top_50].size} words"
326
+ puts " Top 200: #{result[:tiers][:top_200].size} words"
327
+ puts " Top 1000: #{result[:tiers][:top_1000].size} words"
328
+ else
329
+ puts "Cache Status: Not cached"
330
+ puts " Download with: kotoshu cache download frequency #{language}"
331
+ end
332
+ end
333
+
334
+ def purge_all_types
335
+ puts "Purging all cache types..."
336
+
337
+ %w[language model frequency].each do |type|
338
+ cache = cache_for_type(type)
339
+ count = cache.cached_resources.size
340
+
341
+ if count.positive?
342
+ cache.clear_all
343
+ puts " ✓ #{type.capitalize}: #{count} resources purged"
344
+ else
345
+ puts " - #{type.capitalize}: no cached resources"
346
+ end
347
+ end
348
+ end
349
+
350
+ def purge_type(type, resource)
351
+ cache = cache_for_type(type)
352
+
353
+ if resource.nil?
354
+ # Purge all resources of this type
355
+ count = cache.cached_resources.size
356
+ cache.clear_all
357
+ puts "Purged #{count} #{type} resources"
358
+ else
359
+ # Purge specific resource
360
+ if cache.clear(resource)
361
+ puts "Purged #{type} resource: #{resource}"
362
+ else
363
+ puts "No cached data for #{type}:#{resource}"
364
+ end
365
+ end
366
+ end
367
+
368
+ def status_all_types
369
+ puts "=" * 70
370
+ puts "Kotoshu Cache Status"
371
+ puts "=" * 70
372
+ puts
373
+
374
+ total_size = 0
375
+ total_hits = 0
376
+ total_misses = 0
377
+
378
+ %w[language model frequency].each do |type|
379
+ cache = cache_for_type(type)
380
+ stats = cache.stats
381
+
382
+ total_size += stats[:size_bytes]
383
+ total_hits += stats[:hits]
384
+ total_misses += stats[:misses]
385
+
386
+ puts "#{type.capitalize} Cache:"
387
+ puts " Directory: #{cache.cache_path}"
388
+ puts " Resources cached: #{stats[:cached_resources].size}"
389
+ puts " Size: #{format_bytes(stats[:size_bytes])}"
390
+ puts " Hits: #{stats[:hits]}, Misses: #{stats[:misses]}"
391
+ puts " Hit rate: #{(stats[:hit_rate] * 100).round(1)}%"
392
+ puts
393
+ end
394
+
395
+ puts "Total:"
396
+ puts " Total size: #{format_bytes(total_size)}"
397
+ overall_hit_rate = total_hits + total_misses > 0 ? (total_hits.to_f / (total_hits + total_misses) * 100).round(1) : 0
398
+ puts " Overall hit rate: #{overall_hit_rate}%"
399
+ end
400
+
401
+ def status_type(type)
402
+ cache = cache_for_type(type)
403
+ stats = cache.stats
404
+
405
+ puts "#{type.capitalize} Cache:"
406
+ puts " Directory: #{cache.cache_path}"
407
+ puts " Resources cached: #{stats[:cached_resources].size}"
408
+ puts " Size: #{format_bytes(stats[:size_bytes])}"
409
+ puts " Hits: #{stats[:hits]}, Misses: #{stats[:misses]}"
410
+ puts " Hit rate: #{(stats[:hit_rate] * 100).round(1)}%"
411
+ end
412
+
413
+ # Format bytes to human-readable size
414
+ def format_bytes(bytes)
415
+ return '0 B' if bytes.zero?
416
+
417
+ units = %w[B KB MB GB]
418
+ exp = (Math.log(bytes) / Math.log(1024)).to_i
419
+ exp = units.size - 1 if exp >= units.size
420
+
421
+ format('%.2f %s', bytes / (1024.0**exp), units[exp])
422
+ end
423
+ end
424
+ end