kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lookup_cache"
|
|
4
|
+
|
|
5
|
+
module Kotoshu
|
|
6
|
+
module Cache
|
|
7
|
+
# LRU cache specifically for suggestion results.
|
|
8
|
+
#
|
|
9
|
+
# Extends LookupCache with suggestion-specific features like
|
|
10
|
+
# caching by word + max_results combination.
|
|
11
|
+
#
|
|
12
|
+
# @example Caching suggestions
|
|
13
|
+
# cache = SuggestionCache.new(max_size: 5000)
|
|
14
|
+
# cache.write("helo", ["hello", "help"], max_results: 10)
|
|
15
|
+
# cache.read("helo", max_results: 10) # => ["hello", "help"]
|
|
16
|
+
class SuggestionCache < LookupCache
|
|
17
|
+
# Default maximum cache size for suggestions
|
|
18
|
+
DEFAULT_MAX_SIZE = 5000
|
|
19
|
+
|
|
20
|
+
# Create a new suggestion cache.
|
|
21
|
+
#
|
|
22
|
+
# @param max_size [Integer] Maximum number of entries (default: 5000)
|
|
23
|
+
def initialize(max_size: DEFAULT_MAX_SIZE)
|
|
24
|
+
super(max_size: max_size)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Write suggestions to cache.
|
|
28
|
+
#
|
|
29
|
+
# @param word [String] The misspelled word
|
|
30
|
+
# @param suggestions [Array<String>] Suggested words
|
|
31
|
+
# @param max_results [Integer] Max results used for this query
|
|
32
|
+
# @return [Array<String>] The stored suggestions
|
|
33
|
+
def write(word, suggestions, max_results: 10)
|
|
34
|
+
cache_key = cache_key_for(word, max_results)
|
|
35
|
+
super(cache_key, suggestions)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Read suggestions from cache.
|
|
39
|
+
#
|
|
40
|
+
# @param word [String] The misspelled word
|
|
41
|
+
# @param max_results [Integer] Max results used for this query
|
|
42
|
+
# @return [Array<String>, nil] Cached suggestions or nil
|
|
43
|
+
def read(word, max_results: 10)
|
|
44
|
+
cache_key = cache_key_for(word, max_results)
|
|
45
|
+
super(cache_key)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Fetch suggestions from cache or compute them.
|
|
49
|
+
#
|
|
50
|
+
# @param word [String] The misspelled word
|
|
51
|
+
# @param max_results [Integer] Max results for this query
|
|
52
|
+
# @yield Block to compute suggestions on cache miss
|
|
53
|
+
# @return [Array<String>] Cached or computed suggestions
|
|
54
|
+
def fetch(word, max_results: 10)
|
|
55
|
+
cache_key = cache_key_for(word, max_results)
|
|
56
|
+
|
|
57
|
+
if @data.key?(cache_key)
|
|
58
|
+
record_hit
|
|
59
|
+
@access_order += 1
|
|
60
|
+
@data[cache_key][1] = @access_order # Update access order
|
|
61
|
+
@data[cache_key][0] # Return value
|
|
62
|
+
else
|
|
63
|
+
record_miss
|
|
64
|
+
suggestions = yield
|
|
65
|
+
write(word, suggestions, max_results: max_results)
|
|
66
|
+
suggestions
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Delete suggestions from cache.
|
|
71
|
+
#
|
|
72
|
+
# @param word [String] The misspelled word
|
|
73
|
+
# @param max_results [Integer] Max results for this query
|
|
74
|
+
# @return [Array<String>, nil] Deleted suggestions or nil
|
|
75
|
+
def delete(word, max_results: 10)
|
|
76
|
+
cache_key = cache_key_for(word, max_results)
|
|
77
|
+
super(cache_key)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Check if suggestions are cached for this word.
|
|
81
|
+
#
|
|
82
|
+
# @param word [String] The misspelled word
|
|
83
|
+
# @param max_results [Integer] Max results for this query
|
|
84
|
+
# @return [Boolean] True if cached
|
|
85
|
+
def key?(word, max_results: 10)
|
|
86
|
+
cache_key = cache_key_for(word, max_results)
|
|
87
|
+
super(cache_key)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Invalidate all cached suggestions for a word.
|
|
91
|
+
#
|
|
92
|
+
# @param word [String] The word to invalidate
|
|
93
|
+
# @return [self] Self for chaining
|
|
94
|
+
def invalidate_word(word)
|
|
95
|
+
# Find and delete all cache entries for this word
|
|
96
|
+
keys_to_delete = @data.keys.select { |key| key.start_with?("#{word}|") }
|
|
97
|
+
keys_to_delete.each { |key| @data.delete(key) }
|
|
98
|
+
self
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
# Generate cache key for word + max_results.
|
|
104
|
+
#
|
|
105
|
+
# @param word [String] The word
|
|
106
|
+
# @param max_results [Integer] Max results
|
|
107
|
+
# @return [String] Cache key
|
|
108
|
+
def cache_key_for(word, max_results)
|
|
109
|
+
"#{word.downcase}|#{max_results}"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'cache/cache'
|
|
4
|
+
require_relative 'cache/language_cache'
|
|
5
|
+
require_relative 'cache/model_cache'
|
|
6
|
+
|
|
7
|
+
module Kotoshu
|
|
8
|
+
# Cache module for Kotoshu
|
|
9
|
+
#
|
|
10
|
+
# This module provides access to various cache implementations for
|
|
11
|
+
# dictionaries, models, and other resources.
|
|
12
|
+
#
|
|
13
|
+
# @example Using the language cache
|
|
14
|
+
# cache = Kotoshu::Cache::LanguageCache.new
|
|
15
|
+
# dict = cache.get_spelling('en')
|
|
16
|
+
# # => { dic_path: "~/.cache/kotoshu/languages/en/spelling/index.dic",
|
|
17
|
+
# # aff_path: "~/.cache/kotoshu/languages/en/spelling/index.aff",
|
|
18
|
+
# # metadata: { ... } }
|
|
19
|
+
#
|
|
20
|
+
module Cache
|
|
21
|
+
class << self
|
|
22
|
+
# Create a new language cache instance
|
|
23
|
+
#
|
|
24
|
+
# @param cache_path [String] optional custom cache directory
|
|
25
|
+
# @param url_base [String] optional custom GitHub URL
|
|
26
|
+
# @return [LanguageCache] new language cache instance
|
|
27
|
+
def language_cache(cache_path: nil, url_base: nil)
|
|
28
|
+
LanguageCache.new(cache_path: cache_path, url_base: url_base)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Create a new model cache instance
|
|
32
|
+
#
|
|
33
|
+
# @param cache_path [String] optional custom cache directory
|
|
34
|
+
# @return [ModelCache] new model cache instance
|
|
35
|
+
def model_cache(cache_path: nil)
|
|
36
|
+
ModelCache.new(cache_path: cache_path)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "io/console"
|
|
4
|
+
|
|
5
|
+
module Kotoshu
|
|
6
|
+
module Cli
|
|
7
|
+
# Interactive prompt that wraps the strict two-stage setup/resolve flow
|
|
8
|
+
# for the human-facing CLI.
|
|
9
|
+
#
|
|
10
|
+
# The library API (`Kotoshu.correct?`, `Kotoshu.suggest`) still raises
|
|
11
|
+
# `ResourceNotSetupError` strictly — no surprise downloads on metered
|
|
12
|
+
# networks. This class catches that error in the CLI dispatcher, asks
|
|
13
|
+
# the user once, and retries the original command. Programmatic users
|
|
14
|
+
# never see it.
|
|
15
|
+
#
|
|
16
|
+
# Non-TTY contexts (pipes, CI) and offline mode never prompt. The caller
|
|
17
|
+
# decides how to surface a nil result — the CLI dispatcher raises
|
|
18
|
+
# Errors::ResourceUnavailable so scripts see stable exit codes.
|
|
19
|
+
class AutoSetup
|
|
20
|
+
# @param input [IO] Stdin (or override for tests)
|
|
21
|
+
# @param output [IO] Stderr (or override for tests)
|
|
22
|
+
def initialize(input: $stdin, output: $stderr)
|
|
23
|
+
@input = input
|
|
24
|
+
@output = output
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Prompt the user to set up the missing language.
|
|
28
|
+
#
|
|
29
|
+
# @param error [Kotoshu::ResourceNotSetupError] The error raised by resolve
|
|
30
|
+
# @param want [Array<Symbol>] Resource types to fetch (default [:spelling])
|
|
31
|
+
# @return [String, nil] Language code on success; nil when non-TTY,
|
|
32
|
+
# offline, or user declined.
|
|
33
|
+
def call(error, want: %i[spelling])
|
|
34
|
+
language = error.language
|
|
35
|
+
return nil if skip_prompt?
|
|
36
|
+
|
|
37
|
+
@output.puts prompt_message(language, error.resource_type, want)
|
|
38
|
+
answer = @input.gets&.strip&.downcase
|
|
39
|
+
return nil unless affirmative?(answer)
|
|
40
|
+
|
|
41
|
+
Kotoshu.setup(language, want: want)
|
|
42
|
+
language
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def skip_prompt?
|
|
48
|
+
Kotoshu.configuration.offline || !@input.tty?
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def prompt_message(language, resource, want)
|
|
52
|
+
size_hint = size_hint_for(want)
|
|
53
|
+
"Language '#{language}' is not set up (missing #{resource}).\n" \
|
|
54
|
+
"Download now (~#{size_hint} from github.com/kotoshu/dictionaries)? [Y/n]"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def size_hint_for(want)
|
|
58
|
+
case want
|
|
59
|
+
when %i[spelling] then "5 MB"
|
|
60
|
+
when %i[spelling frequency] then "6 MB"
|
|
61
|
+
when %i[spelling frequency model] then "120 MB"
|
|
62
|
+
else "unknown size"
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def affirmative?(answer)
|
|
67
|
+
answer.nil? || answer.empty? || answer.start_with?("y")
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'display_formatter'
|
|
4
|
+
require_relative 'navigation_manager'
|
|
5
|
+
require 'json'
|
|
6
|
+
require 'csv'
|
|
7
|
+
|
|
8
|
+
module Kotoshu
|
|
9
|
+
module Cli
|
|
10
|
+
# Batch reporter for non-interactive error reporting.
|
|
11
|
+
#
|
|
12
|
+
# Outputs error reports in various formats (JSON, YAML, CSV, text).
|
|
13
|
+
# Used for automated checking and CI/CD integration.
|
|
14
|
+
#
|
|
15
|
+
# @example Generate JSON report
|
|
16
|
+
# reporter = BatchReporter.new(document, navigation)
|
|
17
|
+
# reporter.to_json('errors.json')
|
|
18
|
+
#
|
|
19
|
+
# @example Generate CSV report
|
|
20
|
+
# reporter.to_csv('errors.csv')
|
|
21
|
+
#
|
|
22
|
+
# @example Generate text summary
|
|
23
|
+
# puts reporter.to_text
|
|
24
|
+
class BatchReporter
|
|
25
|
+
attr_reader :document, :navigation, :formatter
|
|
26
|
+
|
|
27
|
+
# Create a new batch reporter.
|
|
28
|
+
#
|
|
29
|
+
# @param document [Documents::Document] Document being reported
|
|
30
|
+
# @param navigation [NavigationManager] Navigation state
|
|
31
|
+
# @param formatter [DisplayFormatter, nil] Display formatter
|
|
32
|
+
def initialize(document, navigation, formatter: nil)
|
|
33
|
+
@document = document
|
|
34
|
+
@navigation = navigation
|
|
35
|
+
@formatter = formatter || DisplayFormatter.new
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Generate JSON report.
|
|
39
|
+
#
|
|
40
|
+
# @param filepath [String] Output file path (optional, returns string if nil)
|
|
41
|
+
# @param pretty [Boolean] Pretty-print JSON (default: true)
|
|
42
|
+
# @return [String, nil] JSON string or nil if written to file
|
|
43
|
+
def to_json(filepath: nil, pretty: true)
|
|
44
|
+
data = generate_report_data
|
|
45
|
+
json = pretty ? JSON.pretty_generate(data) : JSON.generate(data)
|
|
46
|
+
|
|
47
|
+
if filepath
|
|
48
|
+
File.write(filepath, json)
|
|
49
|
+
nil
|
|
50
|
+
else
|
|
51
|
+
json
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Generate YAML report.
|
|
56
|
+
#
|
|
57
|
+
# @param filepath [String] Output file path (optional, returns string if nil)
|
|
58
|
+
# @return [String, nil] YAML string or nil if written to file
|
|
59
|
+
def to_yaml(filepath: nil)
|
|
60
|
+
require 'yaml'
|
|
61
|
+
|
|
62
|
+
data = generate_report_data
|
|
63
|
+
yaml = data.to_yaml
|
|
64
|
+
|
|
65
|
+
if filepath
|
|
66
|
+
File.write(filepath, yaml)
|
|
67
|
+
nil
|
|
68
|
+
else
|
|
69
|
+
yaml
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Generate CSV report.
|
|
74
|
+
#
|
|
75
|
+
# @param filepath [String] Output file path (optional, returns string if nil)
|
|
76
|
+
# @return [String, nil] CSV string or nil if written to file
|
|
77
|
+
def to_csv(filepath: nil)
|
|
78
|
+
csv_string = CSV.generate do |csv|
|
|
79
|
+
# Header
|
|
80
|
+
csv << ['ID', 'Line', 'Original', 'Suggestion', 'Confidence', 'Error Type']
|
|
81
|
+
|
|
82
|
+
# Data rows
|
|
83
|
+
@navigation.errors.each do |error|
|
|
84
|
+
suggestion = error.recommended_suggestion
|
|
85
|
+
csv << [
|
|
86
|
+
error.id,
|
|
87
|
+
error.location.line,
|
|
88
|
+
error.original,
|
|
89
|
+
suggestion&.word || '',
|
|
90
|
+
"#{(error.confidence * 100).round(1)}%",
|
|
91
|
+
error.error_type.to_s.capitalize
|
|
92
|
+
]
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
if filepath
|
|
97
|
+
File.write(filepath, csv_string)
|
|
98
|
+
nil
|
|
99
|
+
else
|
|
100
|
+
csv_string
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Generate text summary.
|
|
105
|
+
#
|
|
106
|
+
# @return [String] Formatted text summary
|
|
107
|
+
def to_text
|
|
108
|
+
lines = []
|
|
109
|
+
lines << ""
|
|
110
|
+
lines << @formatter.colorize("╔═══════════════════════════════════════════════════════════════╗", :bold)
|
|
111
|
+
lines << @formatter.colorize("║ Batch Error Report ║", :bold)
|
|
112
|
+
lines << @formatter.colorize("╚═══════════════════════════════════════════════════════════════╝", :bold)
|
|
113
|
+
lines << ""
|
|
114
|
+
lines << "Document: #{@document.name}"
|
|
115
|
+
lines << "Format: #{Documents::Document::FORMATS[@document.format]}"
|
|
116
|
+
lines << "Language: #{@document.language_code}"
|
|
117
|
+
lines << ""
|
|
118
|
+
lines << @formatter.colorize("Summary", :bold)
|
|
119
|
+
lines << "─" * 70
|
|
120
|
+
|
|
121
|
+
stats = @navigation.statistics
|
|
122
|
+
lines << "Total errors: #{stats[:total]}"
|
|
123
|
+
lines << " • High confidence (>0.8): #{stats[:by_confidence][:high]}"
|
|
124
|
+
lines << " • Medium confidence (0.5-0.8): #{stats[:by_confidence][:medium]}"
|
|
125
|
+
lines << " • Low confidence (≤0.5): #{stats[:by_confidence][:low]}"
|
|
126
|
+
lines << ""
|
|
127
|
+
|
|
128
|
+
# Breakdown by type
|
|
129
|
+
if stats[:by_type]&.any?
|
|
130
|
+
lines << @formatter.colorize("By Type", :bold)
|
|
131
|
+
stats[:by_type].each do |type, count|
|
|
132
|
+
label = Models::SemanticError::ERROR_TYPES[type] || type.to_s.capitalize
|
|
133
|
+
lines << " • #{label}: #{count}"
|
|
134
|
+
end
|
|
135
|
+
lines << ""
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Top errors
|
|
139
|
+
if @navigation.errors.any?
|
|
140
|
+
lines << @formatter.colorize("Top Errors", :bold)
|
|
141
|
+
lines << "─" * 70
|
|
142
|
+
|
|
143
|
+
@navigation.errors.first(10).each_with_index do |error, idx|
|
|
144
|
+
lines << "#{idx + 1}. [#{error.location}] #{error.original}"
|
|
145
|
+
lines << " Type: #{error.error_type}"
|
|
146
|
+
lines << " Confidence: #{(error.confidence * 100).round(1)}%"
|
|
147
|
+
|
|
148
|
+
if error.suggestions&.any?
|
|
149
|
+
top_suggestion = error.suggestions.first
|
|
150
|
+
lines << " Suggestion: #{top_suggestion.word} (#{(top_suggestion.confidence * 100).round(0)}%)"
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
lines << ""
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
if @navigation.errors.size > 10
|
|
157
|
+
lines << "... and #{@navigation.errors.size - 10} more"
|
|
158
|
+
lines << ""
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
lines.join("\n")
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Generate SARIF report (Static Analysis Results Interchange Format).
|
|
166
|
+
#
|
|
167
|
+
# SARIF is a standard format for static analysis tools.
|
|
168
|
+
# Useful for CI/CD integration and IDE integration.
|
|
169
|
+
#
|
|
170
|
+
# @param filepath [String] Output file path (optional, returns string if nil)
|
|
171
|
+
# @return [String, nil] SARIF JSON string or nil if written to file
|
|
172
|
+
def to_sarif(filepath: nil)
|
|
173
|
+
sarif = {
|
|
174
|
+
version: "2.1.0",
|
|
175
|
+
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
|
|
176
|
+
runs: [
|
|
177
|
+
{
|
|
178
|
+
tool: {
|
|
179
|
+
driver: {
|
|
180
|
+
name: "Kotoshu",
|
|
181
|
+
version: Kotoshu::VERSION,
|
|
182
|
+
informationUri: "https://github.com/kotoshu/kotoshu",
|
|
183
|
+
rules: []
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
results: @navigation.errors.map do |error|
|
|
187
|
+
{
|
|
188
|
+
ruleId: error.error_type.to_s,
|
|
189
|
+
level: error.high_confidence? ? "error" : "warning",
|
|
190
|
+
message: {
|
|
191
|
+
text: "Potential #{error.error_type} error: '#{error.original}'"
|
|
192
|
+
},
|
|
193
|
+
locations: [
|
|
194
|
+
{
|
|
195
|
+
physicalLocation: {
|
|
196
|
+
artifactLocation: {
|
|
197
|
+
uri: @document.name
|
|
198
|
+
},
|
|
199
|
+
region: {
|
|
200
|
+
startLine: error.location.line || 1,
|
|
201
|
+
startColumn: error.location.column || 0
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
],
|
|
206
|
+
suggestions: error.suggestions&.map do |sugg|
|
|
207
|
+
{
|
|
208
|
+
text: sugg.word
|
|
209
|
+
}
|
|
210
|
+
end
|
|
211
|
+
}
|
|
212
|
+
end
|
|
213
|
+
}
|
|
214
|
+
]
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
json = JSON.pretty_generate(sarif)
|
|
218
|
+
|
|
219
|
+
if filepath
|
|
220
|
+
File.write(filepath, json)
|
|
221
|
+
nil
|
|
222
|
+
else
|
|
223
|
+
json
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Get exit code based on error severity.
|
|
228
|
+
#
|
|
229
|
+
# Useful for CI/CD pipelines.
|
|
230
|
+
#
|
|
231
|
+
# @param max_errors [Integer] Maximum errors allowed (default: 0)
|
|
232
|
+
# @return [Integer] Exit code (0 = success, 1 = errors found)
|
|
233
|
+
def exit_code(max_errors: 0)
|
|
234
|
+
return 0 if @navigation.errors.size <= max_errors
|
|
235
|
+
|
|
236
|
+
1
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Get report summary as hash.
|
|
240
|
+
#
|
|
241
|
+
# @return [Hash] Report summary
|
|
242
|
+
def summary
|
|
243
|
+
@navigation.statistics.merge(
|
|
244
|
+
document: {
|
|
245
|
+
name: @document.name,
|
|
246
|
+
format: @document.format,
|
|
247
|
+
language: @document.language_code
|
|
248
|
+
},
|
|
249
|
+
has_errors: @navigation.errors.any?
|
|
250
|
+
)
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Print report to stdout.
|
|
254
|
+
#
|
|
255
|
+
# @param format [Symbol] Output format (:text, :json, :yaml)
|
|
256
|
+
def print(format: :text)
|
|
257
|
+
case format
|
|
258
|
+
when :text
|
|
259
|
+
puts to_text
|
|
260
|
+
when :json
|
|
261
|
+
puts to_json
|
|
262
|
+
when :yaml
|
|
263
|
+
puts to_yaml
|
|
264
|
+
else
|
|
265
|
+
raise ArgumentError, "Unknown format: #{format}"
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
private
|
|
270
|
+
|
|
271
|
+
# Generate report data hash.
|
|
272
|
+
#
|
|
273
|
+
# @return [Hash] Report data
|
|
274
|
+
def generate_report_data
|
|
275
|
+
{
|
|
276
|
+
metadata: {
|
|
277
|
+
tool: "Kotoshu",
|
|
278
|
+
version: Kotoshu::VERSION,
|
|
279
|
+
generated_at: Time.now.utc.iso8601
|
|
280
|
+
},
|
|
281
|
+
document: {
|
|
282
|
+
name: @document.name,
|
|
283
|
+
format: @document.format.to_s,
|
|
284
|
+
language: @document.language_code,
|
|
285
|
+
word_count: @document.word_count,
|
|
286
|
+
line_count: @document.line_count
|
|
287
|
+
},
|
|
288
|
+
statistics: @navigation.statistics,
|
|
289
|
+
errors: @navigation.errors.map do |error|
|
|
290
|
+
{
|
|
291
|
+
id: error.id,
|
|
292
|
+
location: {
|
|
293
|
+
line: error.location.line,
|
|
294
|
+
column: error.location.column,
|
|
295
|
+
node_path: error.location.node_path
|
|
296
|
+
},
|
|
297
|
+
original: error.original,
|
|
298
|
+
suggestions: error.suggestions&.map do |sugg|
|
|
299
|
+
{
|
|
300
|
+
word: sugg.word,
|
|
301
|
+
confidence: sugg.confidence,
|
|
302
|
+
source: sugg.source
|
|
303
|
+
}
|
|
304
|
+
end,
|
|
305
|
+
error_type: error.error_type.to_s,
|
|
306
|
+
confidence: error.confidence,
|
|
307
|
+
recommended_suggestion: error.recommended_suggestion&.word
|
|
308
|
+
}
|
|
309
|
+
end,
|
|
310
|
+
corrections: @navigation.export_corrections
|
|
311
|
+
}
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|