kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kotoshu
4
+ module Documents
5
+ # Unified location reference for errors in documents.
6
+ #
7
+ # Supports both line/column locations (plain text) and node paths
8
+ # (structured formats like Markdown, AsciiDoc).
9
+ #
10
+ # @example Plain text location
11
+ # Location.new(line: 5, column: 12)
12
+ #
13
+ # @example Node path location
14
+ # Location.new(node_path: [:paragraph, 3, :text, 2])
15
+ #
16
+ # @example Mixed location
17
+ # Location.new(line: 5, column: 12, node_path: [:paragraph, 3])
18
+ class Location
19
+ attr_reader :line, :column, :node_path, :offset
20
+
21
+ # Create a new location.
22
+ #
23
+ # @param line [Integer, nil] Line number (1-indexed)
24
+ # @param column [Integer, nil] Column number (0-indexed)
25
+ # @param node_path [Array<Symbol, Integer>, nil] Path to node in AST
26
+ # @param offset [Integer, nil] Byte offset in content
27
+ def initialize(line: nil, column: nil, node_path: nil, offset: nil)
28
+ @line = line
29
+ @column = column
30
+ @node_path = node_path&.freeze
31
+ @offset = offset
32
+ freeze
33
+ end
34
+
35
+ # Check if this is a line/column location.
36
+ #
37
+ # @return [Boolean] True if has line and column
38
+ def line_column?
39
+ !@line.nil? && !@column.nil?
40
+ end
41
+
42
+ # Check if this is a node path location.
43
+ #
44
+ # @return [Boolean] True if has node path
45
+ def node_location?
46
+ !@node_path.nil? && !@node_path.empty?
47
+ end
48
+
49
+ # Comparison for sorting (by line, then column).
50
+ #
51
+ # @param other [Location] Another location
52
+ # @return [Integer] Comparison result (-1, 0, 1)
53
+ def <=>(other)
54
+ return 0 unless other.is_a?(Location)
55
+
56
+ if line_column? && other.line_column?
57
+ # Both line/column - sort by line then column
58
+ [@line, @column] <=> [other.line, other.column]
59
+ elsif line_column?
60
+ # We're line/column, other is node path - we come first
61
+ -1
62
+ elsif other.line_column?
63
+ # Other is line/column, we're node path - other comes first
64
+ 1
65
+ else
66
+ # Both node paths - compare lexicographically
67
+ @node_path <=> other.node_path
68
+ end
69
+ end
70
+
71
+ # Check if this equals another location.
72
+ #
73
+ # @param other [Object] Another object
74
+ # @return [Boolean] True if locations match
75
+ def ==(other)
76
+ return false unless other.is_a?(Location)
77
+
78
+ @line == other.line &&
79
+ @column == other.column &&
80
+ @node_path == other.node_path &&
81
+ @offset == other.offset
82
+ end
83
+ alias_method :eql?, :==
84
+
85
+ # Hash code for hash table usage.
86
+ #
87
+ # @return [Integer] Hash code
88
+ def hash
89
+ [@line, @column, @node_path, @offset].hash
90
+ end
91
+
92
+ # String representation.
93
+ #
94
+ # @return [String] Human-readable representation
95
+ def to_s
96
+ if line_column?
97
+ "Line #{@line}:#{@column}"
98
+ elsif node_location?
99
+ "Path: #{@node_path.join('.')}"
100
+ elsif @offset
101
+ "Offset #{@offset}"
102
+ else
103
+ "Unknown"
104
+ end
105
+ end
106
+ alias_method :inspect, :to_s
107
+
108
+ # Create a location for a text node.
109
+ #
110
+ # @param node_path [Array] Path to the text node
111
+ # @param start_offset [Integer] Starting character offset
112
+ # @param length [Integer] Length of the text
113
+ # @return [Location] New location
114
+ def self.for_text_node(node_path, start_offset:, length:)
115
+ new(
116
+ node_path: node_path,
117
+ offset: start_offset
118
+ )
119
+ end
120
+
121
+ # Create a line/column location.
122
+ #
123
+ # @param line [Integer] Line number
124
+ # @param column [Integer] Column number
125
+ # @return [Location] New location
126
+ def self.for_line_column(line, column)
127
+ new(line: line, column: column)
128
+ end
129
+
130
+ # Create a line-only location.
131
+ #
132
+ # @param line [Integer] Line number
133
+ # @return [Location] New location
134
+ def self.for_line(line)
135
+ new(line: line, column: 0)
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,389 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'document'
4
+ require_relative 'location'
5
+
6
+ module Kotoshu
7
+ module Documents
8
+ # Markdown document implementation.
9
+ #
10
+ # Handles Markdown files with AST parsing for structured navigation.
11
+ #
12
+ # @example Creating a markdown document
13
+ # doc = MarkdownDocument.new("# Title\n\nParagraph text")
14
+ # doc.text_nodes.each { |node| puts node.text }
15
+ class MarkdownDocument < Document
16
+ require 'kramdown' if ENV['KOTOSHU_REQUIRE_MARKDOWN']
17
+
18
+ # Create a new markdown document.
19
+ #
20
+ # @param content [String] The document content
21
+ # @param format [Symbol] Document format (must be :markdown)
22
+ # @param language_code [String] Language code
23
+ def initialize(content, format: :markdown, language_code: 'en')
24
+ raise ArgumentError, "Format must be :markdown" unless format == :markdown
25
+
26
+ super(content, format: format, language_code: language_code)
27
+ @parsed = false
28
+ @ast = nil
29
+ end
30
+
31
+ # Parse the markdown document into an AST.
32
+ #
33
+ # @return [Hash] The parsed AST
34
+ def parse
35
+ return @ast if @parsed
36
+
37
+ begin
38
+ require 'kramdown'
39
+ rescue LoadError
40
+ raise "Kramdown gem not available. Add 'kramdown' to Gemfile"
41
+ end
42
+
43
+ kd = Kramdown::Document.new(content)
44
+ @ast = kd.to_hash
45
+ @parsed = true
46
+
47
+ @ast
48
+ end
49
+
50
+ # Get all text nodes for spell checking.
51
+ #
52
+ # Extracts text from the AST, skipping code blocks.
53
+ #
54
+ # @return [Array<TextNode>] Text nodes in the document
55
+ def text_nodes
56
+ extract_text_nodes
57
+ end
58
+
59
+ # Get node at a specific path in the AST.
60
+ #
61
+ # @param path [Array] Node path (e.g., [:document, :p, 1])
62
+ # @return [Object, nil] The node or nil
63
+ def get_node(path)
64
+ parse unless @parsed
65
+
66
+ navigate_ast(@ast, path)
67
+ end
68
+
69
+ # Get context around a location.
70
+ #
71
+ # For markdown, navigates the AST to find surrounding context.
72
+ #
73
+ # @param location [Location] The error location
74
+ # @param window [Integer] Number of sibling elements before/after
75
+ # @return [Models::Context] Context object
76
+ def context_for(location, window: 2)
77
+ return plain_text_context(location, window: 5) if location.line_column?
78
+
79
+ parse unless @parsed
80
+
81
+ # For node-based locations, find parent and siblings
82
+ parent_path = location.node_path[0..-2]
83
+ current_type = location.node_path.last
84
+
85
+ parent = navigate_ast(@ast, parent_path)
86
+ return Models::Context.new(before: "", current: "", after: "", location: location, window: window) unless parent
87
+
88
+ # Find siblings around current element
89
+ siblings = extract_siblings(parent)
90
+ current_idx = siblings.find_index { |s| s[:type] == current_type }
91
+
92
+ return Models::Context.new(before: "", current: "", after: "", location: location, window: window) unless current_idx
93
+
94
+ before_sibs = siblings[[0, current_idx - window].max..current_idx - 1]
95
+ after_sibs = siblings[(current_idx + 1)..(current_idx + window)]
96
+
97
+ before = before_sibs.map { |s| text_from_node(s) }.join("\n")
98
+ current = text_from_node(parent)
99
+ after = after_sibs.map { |s| text_from_node(s) }.join("\n")
100
+
101
+ Models::Context.new(
102
+ before: before,
103
+ current: current,
104
+ after: after,
105
+ location: location,
106
+ window: window
107
+ )
108
+ end
109
+
110
+ # Replace text at a specific location.
111
+ #
112
+ # Navigates the AST to find the text node and replaces it,
113
+ # then regenerates markdown.
114
+ #
115
+ # @param location [Location] The location to replace
116
+ # @param new_text [String] The new text
117
+ #return [MarkdownDocument] New document with replacement
118
+ def replace_node(location, new_text)
119
+ parse unless @parsed
120
+
121
+ # Navigate to the node and replace its text
122
+ modified_ast = replace_in_ast(@ast, location.node_path, new_text)
123
+
124
+ # Regenerate markdown from modified AST
125
+ begin
126
+ require 'kramdown'
127
+ new_content = Kramdown::Converter.new(modified_ast).to_kramdown
128
+ rescue LoadError
129
+ raise "Kramdown gem not available. Add 'kramdown' to Gemfile"
130
+ end
131
+
132
+ MarkdownDocument.new(new_content, @format, @language_code)
133
+ end
134
+
135
+ # Apply corrections and return new document.
136
+ #
137
+ # @param corrections [Array<Models::SemanticError>] Errors to fix
138
+ # @return [MarkdownDocument] New document with corrections
139
+ def apply(corrections)
140
+ return self if corrections.empty?
141
+
142
+ # Apply corrections one by one
143
+ result = self
144
+ corrections.each do |error|
145
+ suggestion = error.recommended_suggestion
146
+ result = result.replace_node(error.location, suggestion.word)
147
+ end
148
+
149
+ result
150
+ end
151
+
152
+ # Document name for display.
153
+ #
154
+ # @return [String] Document name
155
+ def name
156
+ "markdown"
157
+ end
158
+
159
+ private
160
+
161
+ # Extract text nodes from AST.
162
+ #
163
+ # @return [Array<TextNode>] Text nodes
164
+ def extract_text_nodes
165
+ parse unless @parsed
166
+ extract_from_ast(@ast)
167
+ end
168
+
169
+ # Extract text nodes recursively from AST.
170
+ #
171
+ # @param ast [Hash] The AST or node
172
+ # @param path [Array] Current path
173
+ # @return [Array<TextNode>] Text nodes
174
+ def extract_from_ast(ast, path: [])
175
+ nodes = []
176
+
177
+ case ast[:type]
178
+ when :text
179
+ nodes << TextNode.new(
180
+ ast[:value].strip,
181
+ location: Location.for_text_node(path, start_offset: 0, length: ast[:value].length),
182
+ node_path: path
183
+ )
184
+ when :p, :h1, :h2, :h3, :h4, :h5, :h6
185
+ # Check paragraph/header content
186
+ if ast[:value]
187
+ ast[:value].each_with_index do |child, idx|
188
+ nodes.concat(extract_from_ast(child, path + [:content, ast[:type], idx]))
189
+ end
190
+ end
191
+ when :blockquote
192
+ nodes.concat(extract_from_ast(ast[:value], path + [:blockquote]))
193
+ when :code_block
194
+ # Skip code blocks (don't check code)
195
+ when :link
196
+ # Check link text but not URL
197
+ link_text = ast[:value][:value]
198
+ if link_text && !link_text.empty?
199
+ nodes << TextNode.new(
200
+ link_text,
201
+ location: Location.for_text_node(path + [:link_text], start_offset: 0, length: link_text.length),
202
+ node_path: path + [:link_text]
203
+ )
204
+ end
205
+ when :strong, :em
206
+ # Check emphasis content
207
+ if ast[:value]
208
+ nodes.concat(extract_from_ast(ast[:value], path + [:emphasis]))
209
+ end
210
+ when :document
211
+ if ast[:children]
212
+ ast[:children].each_with_index do |child, idx|
213
+ nodes.concat(extract_from_ast(child, path + [:child, idx]))
214
+ end
215
+ end
216
+ when :list
217
+ # Check list items
218
+ if ast[:value]
219
+ ast[:value].each_with_index do |item, idx|
220
+ nodes.concat(extract_from_ast(item, path + [:item, idx]))
221
+ end
222
+ end
223
+ end
224
+
225
+ nodes
226
+ end
227
+
228
+ # Navigate AST to find node at path.
229
+ #
230
+ # @param ast [Hash] The AST
231
+ # @param path [Array] Node path
232
+ # @return [Object, nil] The node or nil
233
+ def navigate_ast(ast, path)
234
+ return nil unless path.is_a?(Array) || path.empty?
235
+
236
+ current = ast
237
+ path.each do |element|
238
+ case element
239
+ when Integer
240
+ # Array index
241
+ return nil unless current.is_a?(Array)
242
+ return nil if element >= current.size
243
+ current = current[element]
244
+ when Symbol, String
245
+ # Hash key
246
+ return nil unless current.is_a?(Hash)
247
+ current = current[element.to_sym]
248
+ else
249
+ return nil
250
+ end
251
+ end
252
+
253
+ current
254
+ end
255
+
256
+ # Extract sibling nodes from a parent node.
257
+ #
258
+ # @param parent [Hash] Parent node
259
+ # @return [Array<Hash>] Sibling nodes
260
+ def extract_siblings(parent)
261
+ case parent[:type]
262
+ when :document
263
+ parent[:children] || []
264
+ when :blockquote
265
+ [parent[:value]].compact
266
+ when :p, :h1, :h2, :h3, :h4, :h5, :h6
267
+ parent[:value] || []
268
+ when :list
269
+ parent[:value] || []
270
+ else
271
+ []
272
+ end
273
+ end
274
+
275
+ # Extract text content from a node.
276
+ #
277
+ # @param node [Hash] AST node
278
+ # @return [String] Text content
279
+ def text_from_node(node)
280
+ case node[:type]
281
+ when :text
282
+ node[:value]
283
+ when :p, :h1, :h2, :h3, :h4, :h5, :h6
284
+ # Extract text from inline elements
285
+ extract_inline_text(node[:value])
286
+ when :code_block
287
+ # Don't check code
288
+ nil
289
+ else
290
+ ""
291
+ end
292
+ end
293
+
294
+ # Extract text from inline markup.
295
+ #
296
+ # @param content [Array, String] Content with inline markup
297
+ # @return [String] Extracted text
298
+ def extract_inline_text(content)
299
+ return "" unless content
300
+
301
+ case content
302
+ when String
303
+ content
304
+ when Array
305
+ content.map { |elem| extract_inline_text(elem) }.join
306
+ when Hash
307
+ text = content[:value]
308
+ text ? extract_inline_text(text) : ""
309
+ else
310
+ ""
311
+ end
312
+ end
313
+
314
+ # Replace text in AST at a specific path.
315
+ #
316
+ # @param ast [Hash] The AST
317
+ # @param path [Array] Node path to the text node
318
+ # @param new_text [String] The replacement text
319
+ # @return [Hash] Modified AST (frozen)
320
+ def replace_in_ast(ast, path, new_text)
321
+ return ast if path.empty?
322
+
323
+ # Clone the AST (deep copy)
324
+ modified_ast = deep_clone_ast(ast)
325
+
326
+ # Navigate to the parent of the text node
327
+ current_path = path[0..-2] # All but last element (the text node)
328
+ text_type = path.last # Usually :text
329
+
330
+ current = navigate_ast(modified_ast, current_path)
331
+ return modified_ast unless current
332
+
333
+ if current.is_a?(Hash) && current[:type] == :text
334
+ # Replace the text value
335
+ current[:value] = new_text
336
+ elsif current.is_a?(Array)
337
+ # Array of elements - find text node and replace
338
+ current.each_with_index do |elem, idx|
339
+ if elem.is_a?(Hash) && elem[:type] == :text
340
+ current[idx][:value] = new_text
341
+ break
342
+ end
343
+ end
344
+ end
345
+
346
+ modified_ast
347
+ end
348
+
349
+ # Deep clone an AST.
350
+ #
351
+ # @param ast [Hash] The AST to clone
352
+ # @return [Hash] Cloned AST
353
+ def deep_clone_ast(ast)
354
+ case ast
355
+ when Hash
356
+ ast.transform_values { |v| deep_clone_ast(v) }
357
+ when Array
358
+ ast.map { |v| deep_clone_ast(v) }
359
+ else
360
+ ast
361
+ end
362
+ end
363
+
364
+ # Get plain text context for line/column locations.
365
+ #
366
+ # Fallback for line/column locations in structured documents.
367
+ #
368
+ # @param location [Location] The line/column location
369
+ # @param window [Integer] Number of lines before/after
370
+ # @return [Models::Context] Context object
371
+ def plain_text_context(location, window: 5)
372
+ start_line = [0, location.line - window - 1].max
373
+ end_line = [@lines.size - 1, location.line + window - 1].min
374
+
375
+ before = @lines[start_line...(location.line - 1)].join("\n")
376
+ current = @lines[location.line - 1]
377
+ after = @lines[(location.line + 1)..end_line].join("\n")
378
+
379
+ Models::Context.new(
380
+ before: before,
381
+ current: current,
382
+ after: after,
383
+ location: location,
384
+ window: window
385
+ )
386
+ end
387
+ end
388
+ end
389
+ end
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'document'
4
+ require_relative '../models/context'
5
+
6
+ module Kotoshu
7
+ module Documents
8
+ # Plain text document implementation.
9
+ #
10
+ # Handles plain text files with line-based navigation and correction.
11
+ #
12
+ # @example Creating a plain text document
13
+ # doc = PlainTextDocument.new("Hello world\nHow are you?")
14
+ # doc.text_nodes.each { |node| puts node.text }
15
+ class PlainTextDocument < Document
16
+ # Create a new plain text document.
17
+ #
18
+ # @param content [String] The document content
19
+ # @param format [Symbol] Document format (must be :text)
20
+ # @param language_code [String] Language code
21
+ def initialize(content, format: :text, language_code: 'en')
22
+ raise ArgumentError, "Format must be :text" unless format == :text
23
+
24
+ super(content, format: format, language_code: language_code)
25
+ @lines = content.lines
26
+ end
27
+
28
+ # Get all text nodes for spell checking.
29
+ #
30
+ # Each line becomes a text node.
31
+ #
32
+ # @return [Array<TextNode>] Text nodes (one per line)
33
+ def text_nodes
34
+ @lines.each_with_index.map do |line, idx|
35
+ # Strip leading/trailing whitespace but preserve structure
36
+ stripped_line = line.rstrip
37
+ next TextNode.new(
38
+ stripped_line,
39
+ location: Location.for_line_column(idx + 1, 0),
40
+ node_path: [:line, idx]
41
+ ) if stripped_line && !stripped_line.empty?
42
+ end.compact
43
+ end
44
+
45
+ # Get context around a location.
46
+ #
47
+ # Returns lines before and after the error location.
48
+ #
49
+ # @param location [Location] The error location (must be line/column)
50
+ # @param window [Integer] Number of lines before/after (default: 5)
51
+ # @return [Models::Context] Context object
52
+ def context_for(location, window: 5)
53
+ raise ArgumentError, "Location must be line/column" unless location.line_column?
54
+
55
+ start_line = [0, location.line - window - 1].max
56
+ end_line = [@lines.size - 1, location.line + window - 1].min
57
+
58
+ before = @lines[start_line...(location.line - 1)].join("\n")
59
+ current = @lines[location.line - 1]
60
+ after = @lines[(location.line + 1)..end_line].join("\n")
61
+
62
+ Models::Context.new(
63
+ before: before,
64
+ current: current,
65
+ after: after,
66
+ location: location,
67
+ window: window
68
+ )
69
+ end
70
+
71
+ # Get node at path (for plain text, just returns line).
72
+ #
73
+ # @param path [Array] Node path (e.g., [:line, 5])
74
+ # @return [String, nil] The line content
75
+ def get_node(path)
76
+ return nil unless path.is_a?(Array) && path.first == :line
77
+
78
+ line_idx = path[1]
79
+ return nil if line_idx < 0 || line_idx >= @lines.size
80
+
81
+ @lines[line_idx]
82
+ end
83
+
84
+ # Replace text at a specific location.
85
+ #
86
+ # For plain text, modifies a specific line.
87
+ #
88
+ # @param location [Location] The location to replace
89
+ # @param new_text [String] The new text
90
+ # @return [PlainTextDocument] New document with replacement
91
+ def replace_node(location, new_text)
92
+ raise ArgumentError, "Location must be line/column" unless location.line_column?
93
+
94
+ new_lines = @lines.dup
95
+ line = new_lines[location.line - 1]
96
+
97
+ # Replace the word at the specified column
98
+ if location.column > 0 && location.column < line.length
99
+ before = line[0...location.column]
100
+ after = line[(location.column + @original.length)..-1] || ''
101
+ line = "#{before}#{new_text}#{after}"
102
+ else
103
+ line = new_text
104
+ end
105
+
106
+ new_lines[location.line - 1] = line
107
+
108
+ PlainTextDocument.new(new_lines.join("\n"), @format, @language_code)
109
+ end
110
+
111
+ # Apply corrections and return new document.
112
+ #
113
+ # Corrections are applied in reverse order to preserve offsets.
114
+ #
115
+ # @param corrections [Array<Models::SemanticError>] Errors to fix
116
+ # @return [PlainTextDocument] New document with corrections
117
+ def apply(corrections)
118
+ return self if corrections.empty?
119
+
120
+ # Sort by location (reverse order for offset preservation)
121
+ sorted_corrections = corrections.sort_by { |c| c.location.line }.reverse
122
+
123
+ new_doc = self
124
+ corrections.each do |error|
125
+ suggestion = error.recommended_suggestion
126
+ new_doc = new_doc.replace_node(error.location, suggestion.word)
127
+ end
128
+
129
+ new_doc
130
+ end
131
+
132
+ # Document name for display.
133
+ #
134
+ # @return [String] Document name
135
+ def name
136
+ "plain_text"
137
+ end
138
+
139
+ # Get lines as array.
140
+ #
141
+ # @return [Array<String>] Lines
142
+ def lines
143
+ @lines
144
+ end
145
+ end
146
+ end
147
+ end