kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "thor"
|
|
4
|
+
require_relative "../cache/language_cache"
|
|
5
|
+
require_relative "../configuration"
|
|
6
|
+
require "json"
|
|
7
|
+
|
|
8
|
+
module Kotoshu
|
|
9
|
+
module Cli
|
|
10
|
+
# Cache management commands.
|
|
11
|
+
#
|
|
12
|
+
# Provides CLI commands for managing the dictionary cache
|
|
13
|
+
# with automatic GitHub download support.
|
|
14
|
+
#
|
|
15
|
+
# @example List available languages
|
|
16
|
+
# kotoshu cache list
|
|
17
|
+
#
|
|
18
|
+
# @example Download a specific language
|
|
19
|
+
# kotoshu cache download de
|
|
20
|
+
#
|
|
21
|
+
# @example Show cache status
|
|
22
|
+
# kotoshu cache status
|
|
23
|
+
#
|
|
24
|
+
# @example Remove cached data
|
|
25
|
+
# kotoshu cache purge
|
|
26
|
+
class CacheCommand < Thor
|
|
27
|
+
class_option :verbose,
|
|
28
|
+
type: :boolean,
|
|
29
|
+
default: false,
|
|
30
|
+
desc: "Enable verbose output",
|
|
31
|
+
aliases: ["-v"]
|
|
32
|
+
|
|
33
|
+
desc "list", "List available languages and their cache status"
|
|
34
|
+
method_option :verbose, type: :boolean, aliases: '-v', desc: 'Show detailed information'
|
|
35
|
+
def list
|
|
36
|
+
cache = create_cache
|
|
37
|
+
status = cache.cache_status
|
|
38
|
+
|
|
39
|
+
puts "Available languages:"
|
|
40
|
+
puts
|
|
41
|
+
|
|
42
|
+
# Show cached languages first
|
|
43
|
+
unless status[:cached].empty?
|
|
44
|
+
puts "Cached languages:"
|
|
45
|
+
status[:cached].each do |info|
|
|
46
|
+
print " #{info[:code]}: #{info[:name]}"
|
|
47
|
+
print " (#{info[:word_count]} words)" if options[:verbose]
|
|
48
|
+
print " [#{info[:license]}]" if options[:verbose]
|
|
49
|
+
puts " ✓"
|
|
50
|
+
end
|
|
51
|
+
puts
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Show uncached languages
|
|
55
|
+
unless status[:not_cached].empty?
|
|
56
|
+
puts "Not cached (will be downloaded on first use):"
|
|
57
|
+
status[:not_cached].each do |info|
|
|
58
|
+
print " #{info[:code]}: #{info[:name]}"
|
|
59
|
+
print " (#{info[:word_count]} words)" if options[:verbose]
|
|
60
|
+
puts
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
desc "status", "Show cache status and statistics"
|
|
66
|
+
def status
|
|
67
|
+
cache = create_cache
|
|
68
|
+
all_status = cache.cache_status
|
|
69
|
+
|
|
70
|
+
total_languages = cache.available_languages.size
|
|
71
|
+
cached_count = all_status[:cached].size
|
|
72
|
+
not_cached_count = all_status[:not_cached].size
|
|
73
|
+
|
|
74
|
+
puts "Cache Status:"
|
|
75
|
+
puts " Cache directory: #{cache.cache_path}"
|
|
76
|
+
puts " Total languages: #{total_languages}"
|
|
77
|
+
puts " Cached: #{cached_count}"
|
|
78
|
+
puts " Not cached: #{not_cached_count}"
|
|
79
|
+
puts
|
|
80
|
+
|
|
81
|
+
# Calculate cache size
|
|
82
|
+
cache_size = Dir.glob(File.join(cache.cache_path, '**', '*'))
|
|
83
|
+
.select { |f| File.file?(f) }
|
|
84
|
+
.sum { |f| File.size(f) }
|
|
85
|
+
|
|
86
|
+
puts "Cache size: #{format_bytes(cache_size)}"
|
|
87
|
+
|
|
88
|
+
# Show oldest and newest cache entries
|
|
89
|
+
all_cached = all_status[:cached].map do |info|
|
|
90
|
+
lang_path = File.join(cache.cache_path, info[:code])
|
|
91
|
+
spelling_meta = File.join(lang_path, 'spelling', 'metadata.json')
|
|
92
|
+
if File.exist?(spelling_meta)
|
|
93
|
+
metadata = JSON.parse(File.read(spelling_meta, encoding: 'UTF-8'))
|
|
94
|
+
[info[:code], Time.iso8601(metadata['downloaded_at'])]
|
|
95
|
+
end
|
|
96
|
+
end.compact
|
|
97
|
+
|
|
98
|
+
if all_cached.any?
|
|
99
|
+
oldest = all_cached.min_by { |_, time| time }
|
|
100
|
+
newest = all_cached.max_by { |_, time| time }
|
|
101
|
+
|
|
102
|
+
puts
|
|
103
|
+
puts "Oldest cache: #{oldest[0]} (#{oldest[1].strftime('%Y-%m-%d %H:%M')})"
|
|
104
|
+
puts "Newest cache: #{newest[0]} (#{newest[1].strftime('%Y-%m-%d %H:%M')})"
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
desc "download LANGUAGE", "Download dictionary for a language from GitHub"
|
|
109
|
+
method_option :force, type: :boolean, aliases: '-f', desc: 'Force re-download even if cached'
|
|
110
|
+
def download(language)
|
|
111
|
+
cache = create_cache
|
|
112
|
+
|
|
113
|
+
unless cache.available_languages.include?(language)
|
|
114
|
+
puts "Error: Unknown language '#{language}'"
|
|
115
|
+
puts
|
|
116
|
+
puts "Available languages: #{cache.available_languages.join(', ')}"
|
|
117
|
+
exit(1)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
begin
|
|
121
|
+
puts "Downloading #{language} dictionary from GitHub..."
|
|
122
|
+
|
|
123
|
+
# Get dictionary (download if needed)
|
|
124
|
+
dict_result = cache.get_dictionary(language, force_download: options[:force])
|
|
125
|
+
|
|
126
|
+
if options[:force] || !dict_result[:metadata]['downloaded_at']
|
|
127
|
+
puts " ✓ Hunspell dictionary downloaded"
|
|
128
|
+
puts " Location: #{File.dirname(dict_result[:dic_path])}"
|
|
129
|
+
puts " Version: #{dict_result[:metadata]['version']}"
|
|
130
|
+
else
|
|
131
|
+
puts " ✓ Using cached Hunspell dictionary"
|
|
132
|
+
puts " Location: #{File.dirname(dict_result[:dic_path])}"
|
|
133
|
+
puts " Cached: #{dict_result[:metadata]['downloaded_at']}"
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Try to download frequency data (may not be available yet)
|
|
137
|
+
begin
|
|
138
|
+
freq_result = cache.get_frequency_data(language, force_download: options[:force])
|
|
139
|
+
if options[:force] || !freq_result[:metadata]['downloaded_at']
|
|
140
|
+
puts " ✓ Frequency data downloaded"
|
|
141
|
+
else
|
|
142
|
+
puts " ✓ Using cached frequency data"
|
|
143
|
+
end
|
|
144
|
+
rescue StandardError => e
|
|
145
|
+
# Frequency data may not be available yet - that's okay
|
|
146
|
+
puts " ⚠ Frequency data not available (#{e.message})"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
puts
|
|
150
|
+
puts "Dictionary for '#{language}' is ready to use!"
|
|
151
|
+
rescue StandardError => e
|
|
152
|
+
puts "Error downloading dictionary: #{e.message}"
|
|
153
|
+
exit(1)
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
desc "info LANGUAGE", "Show information about a language"
|
|
158
|
+
def info(language)
|
|
159
|
+
cache = create_cache
|
|
160
|
+
|
|
161
|
+
unless cache.available_languages.include?(language)
|
|
162
|
+
puts "Error: Unknown language '#{language}'"
|
|
163
|
+
puts
|
|
164
|
+
puts "Available languages: #{cache.available_languages.join(', ')}"
|
|
165
|
+
exit(1)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
info_data = cache.get_language_info(language)
|
|
169
|
+
|
|
170
|
+
puts "Language: #{info_data[:name]}"
|
|
171
|
+
puts "Code: #{language}"
|
|
172
|
+
puts "Word count: #{info_data[:word_count]}"
|
|
173
|
+
puts "License: #{info_data[:license]}"
|
|
174
|
+
puts "Source: #{info_data[:source]}"
|
|
175
|
+
puts "Cached: #{info_data[:downloaded] ? 'Yes' : 'No'}"
|
|
176
|
+
|
|
177
|
+
# Show cached file info if available
|
|
178
|
+
if info_data[:downloaded]
|
|
179
|
+
lang_path = File.join(cache.cache_path, language)
|
|
180
|
+
|
|
181
|
+
# Show spelling dict info
|
|
182
|
+
spelling_path = File.join(lang_path, 'spelling', 'metadata.json')
|
|
183
|
+
if File.exist?(spelling_path)
|
|
184
|
+
metadata = JSON.parse(File.read(spelling_path, encoding: 'UTF-8'))
|
|
185
|
+
puts
|
|
186
|
+
puts "Hunspell Dictionary:"
|
|
187
|
+
puts " Downloaded: #{metadata['downloaded_at']}"
|
|
188
|
+
puts " Checksum: #{metadata['checksum']}"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Show frequency data info if available
|
|
192
|
+
freq_path = File.join(lang_path, 'frequency', 'metadata.json')
|
|
193
|
+
if File.exist?(freq_path)
|
|
194
|
+
metadata = JSON.parse(File.read(freq_path, encoding: 'UTF-8'))
|
|
195
|
+
puts
|
|
196
|
+
puts "Frequency Data:"
|
|
197
|
+
puts " Downloaded: #{metadata['downloaded_at']}"
|
|
198
|
+
puts " Checksum: #{metadata['checksum']}"
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
desc "purge [LANGUAGE]", "Remove cached dictionary data (for a language or all languages)"
|
|
204
|
+
method_option :confirm, type: :boolean, default: false, desc: "Skip confirmation"
|
|
205
|
+
def purge(language = nil)
|
|
206
|
+
cache = create_cache
|
|
207
|
+
|
|
208
|
+
if language.nil?
|
|
209
|
+
# Purge all
|
|
210
|
+
unless options[:confirm]
|
|
211
|
+
puts "This will remove all cached dictionaries and frequency data."
|
|
212
|
+
print "Are you sure? [y/N] "
|
|
213
|
+
return unless $stdin.gets.chomp =~ /^[Yy]/
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
count = cache.purge_all
|
|
217
|
+
puts "Purged #{count} files from cache"
|
|
218
|
+
else
|
|
219
|
+
# Purge specific language
|
|
220
|
+
unless cache.available_languages.include?(language)
|
|
221
|
+
puts "Error: Unknown language '#{language}'"
|
|
222
|
+
puts
|
|
223
|
+
puts "Available languages: #{cache.available_languages.join(', ')}"
|
|
224
|
+
exit(1)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
lang_path = File.join(cache.cache_path, language)
|
|
228
|
+
|
|
229
|
+
if File.exist?(lang_path)
|
|
230
|
+
count = Dir.glob(File.join(lang_path, '**', '*')).count { |f| File.file?(f) }
|
|
231
|
+
FileUtils.rm_rf(lang_path)
|
|
232
|
+
puts "Purged #{language} cache (#{count} files)"
|
|
233
|
+
else
|
|
234
|
+
puts "No cached data for #{language}"
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
desc "validate LANGUAGE", "Validate cached resources for a language"
|
|
240
|
+
def validate(language)
|
|
241
|
+
cache = create_cache
|
|
242
|
+
|
|
243
|
+
puts "Validating #{language}..."
|
|
244
|
+
|
|
245
|
+
unless cache.available_languages.include?(language)
|
|
246
|
+
puts " ✗ Unknown language"
|
|
247
|
+
return
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Check spelling
|
|
251
|
+
spelling_path = File.join(cache.cache_path, language, 'spelling')
|
|
252
|
+
spelling_meta = File.join(spelling_path, 'metadata.json')
|
|
253
|
+
|
|
254
|
+
if File.exist?(spelling_meta)
|
|
255
|
+
metadata = JSON.parse(File.read(spelling_meta, encoding: 'UTF-8'))
|
|
256
|
+
aff_file = File.join(spelling_path, 'index.aff')
|
|
257
|
+
dic_file = File.join(spelling_path, 'index.dic')
|
|
258
|
+
|
|
259
|
+
puts " Spelling:"
|
|
260
|
+
puts " AFF file: #{File.exist?(aff_file) ? '✓' : '✗'}"
|
|
261
|
+
puts " DIC file: #{File.exist?(dic_file) ? '✓' : '✗'}"
|
|
262
|
+
puts " Metadata: ✓"
|
|
263
|
+
puts " Checksum: #{verify_checksum(dic_file, metadata['checksum']) ? '✓' : '✗'}" if metadata['checksum']
|
|
264
|
+
puts " Expired: #{expired?(metadata) ? 'Yes' : 'No'}"
|
|
265
|
+
else
|
|
266
|
+
puts " Spelling: ✗ Not cached"
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Check frequency
|
|
270
|
+
freq_path = File.join(cache.cache_path, language, 'frequency')
|
|
271
|
+
freq_meta = File.join(freq_path, 'metadata.json')
|
|
272
|
+
|
|
273
|
+
if File.exist?(freq_meta)
|
|
274
|
+
puts " Frequency: ✓"
|
|
275
|
+
else
|
|
276
|
+
puts " Frequency: ✗ Not cached (optional)"
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
private
|
|
281
|
+
|
|
282
|
+
# Create a language cache instance.
|
|
283
|
+
#
|
|
284
|
+
# @return [Cache::LanguageCache] The cache instance
|
|
285
|
+
def create_cache
|
|
286
|
+
Cache::LanguageCache.new(
|
|
287
|
+
cache_path: options[:cache_path]
|
|
288
|
+
)
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Format bytes as human-readable.
|
|
292
|
+
#
|
|
293
|
+
# @param bytes [Integer] Bytes
|
|
294
|
+
# @return [String] Formatted string
|
|
295
|
+
def format_bytes(bytes)
|
|
296
|
+
return "0 B" if bytes.nil? || bytes.zero?
|
|
297
|
+
|
|
298
|
+
units = %w[B KB MB GB TB]
|
|
299
|
+
exp = [(Math.log(bytes, 1024)).floor, units.size - 1].min
|
|
300
|
+
"#{format('%.2f', bytes.to_f / 1024**exp)} #{units[exp]}"
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Get time ago string.
|
|
304
|
+
#
|
|
305
|
+
# @param iso_time [String] ISO8601 timestamp
|
|
306
|
+
# @return [String] Time ago string
|
|
307
|
+
def time_ago(iso_time)
|
|
308
|
+
return "unknown" unless iso_time
|
|
309
|
+
|
|
310
|
+
time = Time.iso8601(iso_time)
|
|
311
|
+
seconds = Time.now - time
|
|
312
|
+
|
|
313
|
+
return "just now" if seconds < 60
|
|
314
|
+
|
|
315
|
+
minutes = (seconds / 60).to_i
|
|
316
|
+
return "#{minutes}m ago" if minutes < 60
|
|
317
|
+
|
|
318
|
+
hours = (minutes / 60).to_i
|
|
319
|
+
return "#{hours}h ago" if hours < 24
|
|
320
|
+
|
|
321
|
+
days = (hours / 24).to_i
|
|
322
|
+
return "#{days}d ago" if days < 30
|
|
323
|
+
|
|
324
|
+
months = (days / 30).to_i
|
|
325
|
+
return "#{months}mo ago" if months < 12
|
|
326
|
+
|
|
327
|
+
years = (months / 12).to_i
|
|
328
|
+
"#{years}y ago"
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Verify checksum of a file.
|
|
332
|
+
#
|
|
333
|
+
# @param file_path [String] Path to file
|
|
334
|
+
# @param expected_checksum [String] Expected SHA256 checksum
|
|
335
|
+
# @return [Boolean] True if checksum matches
|
|
336
|
+
def verify_checksum(file_path, expected_checksum)
|
|
337
|
+
return false unless File.exist?(file_path)
|
|
338
|
+
|
|
339
|
+
require "digest"
|
|
340
|
+
actual = Digest::SHA256.file(file_path).hexdigest
|
|
341
|
+
actual == expected_checksum
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# Check if metadata is expired.
|
|
345
|
+
#
|
|
346
|
+
# @param metadata [Hash] Metadata hash
|
|
347
|
+
# @return [Boolean] True if expired
|
|
348
|
+
def expired?(metadata)
|
|
349
|
+
return false unless metadata['version']
|
|
350
|
+
|
|
351
|
+
cached_time = Time.iso8601(metadata['version'])
|
|
352
|
+
Time.now.utc - cached_time > 604_800 # 7 days
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
end
|