kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lookup_cache"
4
+
5
+ module Kotoshu
6
+ module Cache
7
+ # LRU cache specifically for suggestion results.
8
+ #
9
+ # Extends LookupCache with suggestion-specific features like
10
+ # caching by word + max_results combination.
11
+ #
12
+ # @example Caching suggestions
13
+ # cache = SuggestionCache.new(max_size: 5000)
14
+ # cache.write("helo", ["hello", "help"], max_results: 10)
15
+ # cache.read("helo", max_results: 10) # => ["hello", "help"]
16
+ class SuggestionCache < LookupCache
17
+ # Default maximum cache size for suggestions
18
+ DEFAULT_MAX_SIZE = 5000
19
+
20
+ # Create a new suggestion cache.
21
+ #
22
+ # @param max_size [Integer] Maximum number of entries (default: 5000)
23
+ def initialize(max_size: DEFAULT_MAX_SIZE)
24
+ super(max_size: max_size)
25
+ end
26
+
27
+ # Write suggestions to cache.
28
+ #
29
+ # @param word [String] The misspelled word
30
+ # @param suggestions [Array<String>] Suggested words
31
+ # @param max_results [Integer] Max results used for this query
32
+ # @return [Array<String>] The stored suggestions
33
+ def write(word, suggestions, max_results: 10)
34
+ cache_key = cache_key_for(word, max_results)
35
+ super(cache_key, suggestions)
36
+ end
37
+
38
+ # Read suggestions from cache.
39
+ #
40
+ # @param word [String] The misspelled word
41
+ # @param max_results [Integer] Max results used for this query
42
+ # @return [Array<String>, nil] Cached suggestions or nil
43
+ def read(word, max_results: 10)
44
+ cache_key = cache_key_for(word, max_results)
45
+ super(cache_key)
46
+ end
47
+
48
+ # Fetch suggestions from cache or compute them.
49
+ #
50
+ # @param word [String] The misspelled word
51
+ # @param max_results [Integer] Max results for this query
52
+ # @yield Block to compute suggestions on cache miss
53
+ # @return [Array<String>] Cached or computed suggestions
54
+ def fetch(word, max_results: 10)
55
+ cache_key = cache_key_for(word, max_results)
56
+
57
+ if @data.key?(cache_key)
58
+ record_hit
59
+ @access_order += 1
60
+ @data[cache_key][1] = @access_order # Update access order
61
+ @data[cache_key][0] # Return value
62
+ else
63
+ record_miss
64
+ suggestions = yield
65
+ write(word, suggestions, max_results: max_results)
66
+ suggestions
67
+ end
68
+ end
69
+
70
+ # Delete suggestions from cache.
71
+ #
72
+ # @param word [String] The misspelled word
73
+ # @param max_results [Integer] Max results for this query
74
+ # @return [Array<String>, nil] Deleted suggestions or nil
75
+ def delete(word, max_results: 10)
76
+ cache_key = cache_key_for(word, max_results)
77
+ super(cache_key)
78
+ end
79
+
80
+ # Check if suggestions are cached for this word.
81
+ #
82
+ # @param word [String] The misspelled word
83
+ # @param max_results [Integer] Max results for this query
84
+ # @return [Boolean] True if cached
85
+ def key?(word, max_results: 10)
86
+ cache_key = cache_key_for(word, max_results)
87
+ super(cache_key)
88
+ end
89
+
90
+ # Invalidate all cached suggestions for a word.
91
+ #
92
+ # @param word [String] The word to invalidate
93
+ # @return [self] Self for chaining
94
+ def invalidate_word(word)
95
+ # Find and delete all cache entries for this word
96
+ keys_to_delete = @data.keys.select { |key| key.start_with?("#{word}|") }
97
+ keys_to_delete.each { |key| @data.delete(key) }
98
+ self
99
+ end
100
+
101
+ private
102
+
103
+ # Generate cache key for word + max_results.
104
+ #
105
+ # @param word [String] The word
106
+ # @param max_results [Integer] Max results
107
+ # @return [String] Cache key
108
+ def cache_key_for(word, max_results)
109
+ "#{word.downcase}|#{max_results}"
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'cache/cache'
4
+ require_relative 'cache/language_cache'
5
+ require_relative 'cache/model_cache'
6
+
7
+ module Kotoshu
8
+ # Cache module for Kotoshu
9
+ #
10
+ # This module provides access to various cache implementations for
11
+ # dictionaries, models, and other resources.
12
+ #
13
+ # @example Using the language cache
14
+ # cache = Kotoshu::Cache::LanguageCache.new
15
+ # dict = cache.get_spelling('en')
16
+ # # => { dic_path: "~/.cache/kotoshu/languages/en/spelling/index.dic",
17
+ # # aff_path: "~/.cache/kotoshu/languages/en/spelling/index.aff",
18
+ # # metadata: { ... } }
19
+ #
20
+ module Cache
21
+ class << self
22
+ # Create a new language cache instance
23
+ #
24
+ # @param cache_path [String] optional custom cache directory
25
+ # @param url_base [String] optional custom GitHub URL
26
+ # @return [LanguageCache] new language cache instance
27
+ def language_cache(cache_path: nil, url_base: nil)
28
+ LanguageCache.new(cache_path: cache_path, url_base: url_base)
29
+ end
30
+
31
+ # Create a new model cache instance
32
+ #
33
+ # @param cache_path [String] optional custom cache directory
34
+ # @return [ModelCache] new model cache instance
35
+ def model_cache(cache_path: nil)
36
+ ModelCache.new(cache_path: cache_path)
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "io/console"
4
+
5
+ module Kotoshu
6
+ module Cli
7
+ # Interactive prompt that wraps the strict two-stage setup/resolve flow
8
+ # for the human-facing CLI.
9
+ #
10
+ # The library API (`Kotoshu.correct?`, `Kotoshu.suggest`) still raises
11
+ # `ResourceNotSetupError` strictly — no surprise downloads on metered
12
+ # networks. This class catches that error in the CLI dispatcher, asks
13
+ # the user once, and retries the original command. Programmatic users
14
+ # never see it.
15
+ #
16
+ # Non-TTY contexts (pipes, CI) and offline mode never prompt. The caller
17
+ # decides how to surface a nil result — the CLI dispatcher raises
18
+ # Errors::ResourceUnavailable so scripts see stable exit codes.
19
+ class AutoSetup
20
+ # @param input [IO] Stdin (or override for tests)
21
+ # @param output [IO] Stderr (or override for tests)
22
+ def initialize(input: $stdin, output: $stderr)
23
+ @input = input
24
+ @output = output
25
+ end
26
+
27
+ # Prompt the user to set up the missing language.
28
+ #
29
+ # @param error [Kotoshu::ResourceNotSetupError] The error raised by resolve
30
+ # @param want [Array<Symbol>] Resource types to fetch (default [:spelling])
31
+ # @return [String, nil] Language code on success; nil when non-TTY,
32
+ # offline, or user declined.
33
+ def call(error, want: %i[spelling])
34
+ language = error.language
35
+ return nil if skip_prompt?
36
+
37
+ @output.puts prompt_message(language, error.resource_type, want)
38
+ answer = @input.gets&.strip&.downcase
39
+ return nil unless affirmative?(answer)
40
+
41
+ Kotoshu.setup(language, want: want)
42
+ language
43
+ end
44
+
45
+ private
46
+
47
+ def skip_prompt?
48
+ Kotoshu.configuration.offline || !@input.tty?
49
+ end
50
+
51
+ def prompt_message(language, resource, want)
52
+ size_hint = size_hint_for(want)
53
+ "Language '#{language}' is not set up (missing #{resource}).\n" \
54
+ "Download now (~#{size_hint} from github.com/kotoshu/dictionaries)? [Y/n]"
55
+ end
56
+
57
+ def size_hint_for(want)
58
+ case want
59
+ when %i[spelling] then "5 MB"
60
+ when %i[spelling frequency] then "6 MB"
61
+ when %i[spelling frequency model] then "120 MB"
62
+ else "unknown size"
63
+ end
64
+ end
65
+
66
+ def affirmative?(answer)
67
+ answer.nil? || answer.empty? || answer.start_with?("y")
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,315 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'display_formatter'
4
+ require_relative 'navigation_manager'
5
+ require 'json'
6
+ require 'csv'
7
+
8
+ module Kotoshu
9
+ module Cli
10
+ # Batch reporter for non-interactive error reporting.
11
+ #
12
+ # Outputs error reports in various formats (JSON, YAML, CSV, text).
13
+ # Used for automated checking and CI/CD integration.
14
+ #
15
+ # @example Generate JSON report
16
+ # reporter = BatchReporter.new(document, navigation)
17
+ # reporter.to_json('errors.json')
18
+ #
19
+ # @example Generate CSV report
20
+ # reporter.to_csv('errors.csv')
21
+ #
22
+ # @example Generate text summary
23
+ # puts reporter.to_text
24
+ class BatchReporter
25
+ attr_reader :document, :navigation, :formatter
26
+
27
+ # Create a new batch reporter.
28
+ #
29
+ # @param document [Documents::Document] Document being reported
30
+ # @param navigation [NavigationManager] Navigation state
31
+ # @param formatter [DisplayFormatter, nil] Display formatter
32
+ def initialize(document, navigation, formatter: nil)
33
+ @document = document
34
+ @navigation = navigation
35
+ @formatter = formatter || DisplayFormatter.new
36
+ end
37
+
38
+ # Generate JSON report.
39
+ #
40
+ # @param filepath [String] Output file path (optional, returns string if nil)
41
+ # @param pretty [Boolean] Pretty-print JSON (default: true)
42
+ # @return [String, nil] JSON string or nil if written to file
43
+ def to_json(filepath: nil, pretty: true)
44
+ data = generate_report_data
45
+ json = pretty ? JSON.pretty_generate(data) : JSON.generate(data)
46
+
47
+ if filepath
48
+ File.write(filepath, json)
49
+ nil
50
+ else
51
+ json
52
+ end
53
+ end
54
+
55
+ # Generate YAML report.
56
+ #
57
+ # @param filepath [String] Output file path (optional, returns string if nil)
58
+ # @return [String, nil] YAML string or nil if written to file
59
+ def to_yaml(filepath: nil)
60
+ require 'yaml'
61
+
62
+ data = generate_report_data
63
+ yaml = data.to_yaml
64
+
65
+ if filepath
66
+ File.write(filepath, yaml)
67
+ nil
68
+ else
69
+ yaml
70
+ end
71
+ end
72
+
73
+ # Generate CSV report.
74
+ #
75
+ # @param filepath [String] Output file path (optional, returns string if nil)
76
+ # @return [String, nil] CSV string or nil if written to file
77
+ def to_csv(filepath: nil)
78
+ csv_string = CSV.generate do |csv|
79
+ # Header
80
+ csv << ['ID', 'Line', 'Original', 'Suggestion', 'Confidence', 'Error Type']
81
+
82
+ # Data rows
83
+ @navigation.errors.each do |error|
84
+ suggestion = error.recommended_suggestion
85
+ csv << [
86
+ error.id,
87
+ error.location.line,
88
+ error.original,
89
+ suggestion&.word || '',
90
+ "#{(error.confidence * 100).round(1)}%",
91
+ error.error_type.to_s.capitalize
92
+ ]
93
+ end
94
+ end
95
+
96
+ if filepath
97
+ File.write(filepath, csv_string)
98
+ nil
99
+ else
100
+ csv_string
101
+ end
102
+ end
103
+
104
+ # Generate text summary.
105
+ #
106
+ # @return [String] Formatted text summary
107
+ def to_text
108
+ lines = []
109
+ lines << ""
110
+ lines << @formatter.colorize("╔═══════════════════════════════════════════════════════════════╗", :bold)
111
+ lines << @formatter.colorize("║ Batch Error Report ║", :bold)
112
+ lines << @formatter.colorize("╚═══════════════════════════════════════════════════════════════╝", :bold)
113
+ lines << ""
114
+ lines << "Document: #{@document.name}"
115
+ lines << "Format: #{Documents::Document::FORMATS[@document.format]}"
116
+ lines << "Language: #{@document.language_code}"
117
+ lines << ""
118
+ lines << @formatter.colorize("Summary", :bold)
119
+ lines << "─" * 70
120
+
121
+ stats = @navigation.statistics
122
+ lines << "Total errors: #{stats[:total]}"
123
+ lines << " • High confidence (>0.8): #{stats[:by_confidence][:high]}"
124
+ lines << " • Medium confidence (0.5-0.8): #{stats[:by_confidence][:medium]}"
125
+ lines << " • Low confidence (≤0.5): #{stats[:by_confidence][:low]}"
126
+ lines << ""
127
+
128
+ # Breakdown by type
129
+ if stats[:by_type]&.any?
130
+ lines << @formatter.colorize("By Type", :bold)
131
+ stats[:by_type].each do |type, count|
132
+ label = Models::SemanticError::ERROR_TYPES[type] || type.to_s.capitalize
133
+ lines << " • #{label}: #{count}"
134
+ end
135
+ lines << ""
136
+ end
137
+
138
+ # Top errors
139
+ if @navigation.errors.any?
140
+ lines << @formatter.colorize("Top Errors", :bold)
141
+ lines << "─" * 70
142
+
143
+ @navigation.errors.first(10).each_with_index do |error, idx|
144
+ lines << "#{idx + 1}. [#{error.location}] #{error.original}"
145
+ lines << " Type: #{error.error_type}"
146
+ lines << " Confidence: #{(error.confidence * 100).round(1)}%"
147
+
148
+ if error.suggestions&.any?
149
+ top_suggestion = error.suggestions.first
150
+ lines << " Suggestion: #{top_suggestion.word} (#{(top_suggestion.confidence * 100).round(0)}%)"
151
+ end
152
+
153
+ lines << ""
154
+ end
155
+
156
+ if @navigation.errors.size > 10
157
+ lines << "... and #{@navigation.errors.size - 10} more"
158
+ lines << ""
159
+ end
160
+ end
161
+
162
+ lines.join("\n")
163
+ end
164
+
165
+ # Generate SARIF report (Static Analysis Results Interchange Format).
166
+ #
167
+ # SARIF is a standard format for static analysis tools.
168
+ # Useful for CI/CD integration and IDE integration.
169
+ #
170
+ # @param filepath [String] Output file path (optional, returns string if nil)
171
+ # @return [String, nil] SARIF JSON string or nil if written to file
172
+ def to_sarif(filepath: nil)
173
+ sarif = {
174
+ version: "2.1.0",
175
+ "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
176
+ runs: [
177
+ {
178
+ tool: {
179
+ driver: {
180
+ name: "Kotoshu",
181
+ version: Kotoshu::VERSION,
182
+ informationUri: "https://github.com/kotoshu/kotoshu",
183
+ rules: []
184
+ }
185
+ },
186
+ results: @navigation.errors.map do |error|
187
+ {
188
+ ruleId: error.error_type.to_s,
189
+ level: error.high_confidence? ? "error" : "warning",
190
+ message: {
191
+ text: "Potential #{error.error_type} error: '#{error.original}'"
192
+ },
193
+ locations: [
194
+ {
195
+ physicalLocation: {
196
+ artifactLocation: {
197
+ uri: @document.name
198
+ },
199
+ region: {
200
+ startLine: error.location.line || 1,
201
+ startColumn: error.location.column || 0
202
+ }
203
+ }
204
+ }
205
+ ],
206
+ suggestions: error.suggestions&.map do |sugg|
207
+ {
208
+ text: sugg.word
209
+ }
210
+ end
211
+ }
212
+ end
213
+ }
214
+ ]
215
+ }
216
+
217
+ json = JSON.pretty_generate(sarif)
218
+
219
+ if filepath
220
+ File.write(filepath, json)
221
+ nil
222
+ else
223
+ json
224
+ end
225
+ end
226
+
227
+ # Get exit code based on error severity.
228
+ #
229
+ # Useful for CI/CD pipelines.
230
+ #
231
+ # @param max_errors [Integer] Maximum errors allowed (default: 0)
232
+ # @return [Integer] Exit code (0 = success, 1 = errors found)
233
+ def exit_code(max_errors: 0)
234
+ return 0 if @navigation.errors.size <= max_errors
235
+
236
+ 1
237
+ end
238
+
239
+ # Get report summary as hash.
240
+ #
241
+ # @return [Hash] Report summary
242
+ def summary
243
+ @navigation.statistics.merge(
244
+ document: {
245
+ name: @document.name,
246
+ format: @document.format,
247
+ language: @document.language_code
248
+ },
249
+ has_errors: @navigation.errors.any?
250
+ )
251
+ end
252
+
253
+ # Print report to stdout.
254
+ #
255
+ # @param format [Symbol] Output format (:text, :json, :yaml)
256
+ def print(format: :text)
257
+ case format
258
+ when :text
259
+ puts to_text
260
+ when :json
261
+ puts to_json
262
+ when :yaml
263
+ puts to_yaml
264
+ else
265
+ raise ArgumentError, "Unknown format: #{format}"
266
+ end
267
+ end
268
+
269
+ private
270
+
271
+ # Generate report data hash.
272
+ #
273
+ # @return [Hash] Report data
274
+ def generate_report_data
275
+ {
276
+ metadata: {
277
+ tool: "Kotoshu",
278
+ version: Kotoshu::VERSION,
279
+ generated_at: Time.now.utc.iso8601
280
+ },
281
+ document: {
282
+ name: @document.name,
283
+ format: @document.format.to_s,
284
+ language: @document.language_code,
285
+ word_count: @document.word_count,
286
+ line_count: @document.line_count
287
+ },
288
+ statistics: @navigation.statistics,
289
+ errors: @navigation.errors.map do |error|
290
+ {
291
+ id: error.id,
292
+ location: {
293
+ line: error.location.line,
294
+ column: error.location.column,
295
+ node_path: error.location.node_path
296
+ },
297
+ original: error.original,
298
+ suggestions: error.suggestions&.map do |sugg|
299
+ {
300
+ word: sugg.word,
301
+ confidence: sugg.confidence,
302
+ source: sugg.source
303
+ }
304
+ end,
305
+ error_type: error.error_type.to_s,
306
+ confidence: error.confidence,
307
+ recommended_suggestion: error.recommended_suggestion&.word
308
+ }
309
+ end,
310
+ corrections: @navigation.export_corrections
311
+ }
312
+ end
313
+ end
314
+ end
315
+ end