kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kotoshu
|
|
4
|
+
module Documents
|
|
5
|
+
# Unified location reference for errors in documents.
|
|
6
|
+
#
|
|
7
|
+
# Supports both line/column locations (plain text) and node paths
|
|
8
|
+
# (structured formats like Markdown, AsciiDoc).
|
|
9
|
+
#
|
|
10
|
+
# @example Plain text location
|
|
11
|
+
# Location.new(line: 5, column: 12)
|
|
12
|
+
#
|
|
13
|
+
# @example Node path location
|
|
14
|
+
# Location.new(node_path: [:paragraph, 3, :text, 2])
|
|
15
|
+
#
|
|
16
|
+
# @example Mixed location
|
|
17
|
+
# Location.new(line: 5, column: 12, node_path: [:paragraph, 3])
|
|
18
|
+
class Location
|
|
19
|
+
attr_reader :line, :column, :node_path, :offset
|
|
20
|
+
|
|
21
|
+
# Create a new location.
|
|
22
|
+
#
|
|
23
|
+
# @param line [Integer, nil] Line number (1-indexed)
|
|
24
|
+
# @param column [Integer, nil] Column number (0-indexed)
|
|
25
|
+
# @param node_path [Array<Symbol, Integer>, nil] Path to node in AST
|
|
26
|
+
# @param offset [Integer, nil] Byte offset in content
|
|
27
|
+
def initialize(line: nil, column: nil, node_path: nil, offset: nil)
|
|
28
|
+
@line = line
|
|
29
|
+
@column = column
|
|
30
|
+
@node_path = node_path&.freeze
|
|
31
|
+
@offset = offset
|
|
32
|
+
freeze
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Check if this is a line/column location.
|
|
36
|
+
#
|
|
37
|
+
# @return [Boolean] True if has line and column
|
|
38
|
+
def line_column?
|
|
39
|
+
!@line.nil? && !@column.nil?
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Check if this is a node path location.
|
|
43
|
+
#
|
|
44
|
+
# @return [Boolean] True if has node path
|
|
45
|
+
def node_location?
|
|
46
|
+
!@node_path.nil? && !@node_path.empty?
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Comparison for sorting (by line, then column).
|
|
50
|
+
#
|
|
51
|
+
# @param other [Location] Another location
|
|
52
|
+
# @return [Integer] Comparison result (-1, 0, 1)
|
|
53
|
+
def <=>(other)
|
|
54
|
+
return 0 unless other.is_a?(Location)
|
|
55
|
+
|
|
56
|
+
if line_column? && other.line_column?
|
|
57
|
+
# Both line/column - sort by line then column
|
|
58
|
+
[@line, @column] <=> [other.line, other.column]
|
|
59
|
+
elsif line_column?
|
|
60
|
+
# We're line/column, other is node path - we come first
|
|
61
|
+
-1
|
|
62
|
+
elsif other.line_column?
|
|
63
|
+
# Other is line/column, we're node path - other comes first
|
|
64
|
+
1
|
|
65
|
+
else
|
|
66
|
+
# Both node paths - compare lexicographically
|
|
67
|
+
@node_path <=> other.node_path
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Check if this equals another location.
|
|
72
|
+
#
|
|
73
|
+
# @param other [Object] Another object
|
|
74
|
+
# @return [Boolean] True if locations match
|
|
75
|
+
def ==(other)
|
|
76
|
+
return false unless other.is_a?(Location)
|
|
77
|
+
|
|
78
|
+
@line == other.line &&
|
|
79
|
+
@column == other.column &&
|
|
80
|
+
@node_path == other.node_path &&
|
|
81
|
+
@offset == other.offset
|
|
82
|
+
end
|
|
83
|
+
alias_method :eql?, :==
|
|
84
|
+
|
|
85
|
+
# Hash code for hash table usage.
|
|
86
|
+
#
|
|
87
|
+
# @return [Integer] Hash code
|
|
88
|
+
def hash
|
|
89
|
+
[@line, @column, @node_path, @offset].hash
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# String representation.
|
|
93
|
+
#
|
|
94
|
+
# @return [String] Human-readable representation
|
|
95
|
+
def to_s
|
|
96
|
+
if line_column?
|
|
97
|
+
"Line #{@line}:#{@column}"
|
|
98
|
+
elsif node_location?
|
|
99
|
+
"Path: #{@node_path.join('.')}"
|
|
100
|
+
elsif @offset
|
|
101
|
+
"Offset #{@offset}"
|
|
102
|
+
else
|
|
103
|
+
"Unknown"
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
alias_method :inspect, :to_s
|
|
107
|
+
|
|
108
|
+
# Create a location for a text node.
|
|
109
|
+
#
|
|
110
|
+
# @param node_path [Array] Path to the text node
|
|
111
|
+
# @param start_offset [Integer] Starting character offset
|
|
112
|
+
# @param length [Integer] Length of the text
|
|
113
|
+
# @return [Location] New location
|
|
114
|
+
def self.for_text_node(node_path, start_offset:, length:)
|
|
115
|
+
new(
|
|
116
|
+
node_path: node_path,
|
|
117
|
+
offset: start_offset
|
|
118
|
+
)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Create a line/column location.
|
|
122
|
+
#
|
|
123
|
+
# @param line [Integer] Line number
|
|
124
|
+
# @param column [Integer] Column number
|
|
125
|
+
# @return [Location] New location
|
|
126
|
+
def self.for_line_column(line, column)
|
|
127
|
+
new(line: line, column: column)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Create a line-only location.
|
|
131
|
+
#
|
|
132
|
+
# @param line [Integer] Line number
|
|
133
|
+
# @return [Location] New location
|
|
134
|
+
def self.for_line(line)
|
|
135
|
+
new(line: line, column: 0)
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'document'
|
|
4
|
+
require_relative 'location'
|
|
5
|
+
|
|
6
|
+
module Kotoshu
|
|
7
|
+
module Documents
|
|
8
|
+
# Markdown document implementation.
|
|
9
|
+
#
|
|
10
|
+
# Handles Markdown files with AST parsing for structured navigation.
|
|
11
|
+
#
|
|
12
|
+
# @example Creating a markdown document
|
|
13
|
+
# doc = MarkdownDocument.new("# Title\n\nParagraph text")
|
|
14
|
+
# doc.text_nodes.each { |node| puts node.text }
|
|
15
|
+
class MarkdownDocument < Document
|
|
16
|
+
require 'kramdown' if ENV['KOTOSHU_REQUIRE_MARKDOWN']
|
|
17
|
+
|
|
18
|
+
# Create a new markdown document.
|
|
19
|
+
#
|
|
20
|
+
# @param content [String] The document content
|
|
21
|
+
# @param format [Symbol] Document format (must be :markdown)
|
|
22
|
+
# @param language_code [String] Language code
|
|
23
|
+
def initialize(content, format: :markdown, language_code: 'en')
|
|
24
|
+
raise ArgumentError, "Format must be :markdown" unless format == :markdown
|
|
25
|
+
|
|
26
|
+
super(content, format: format, language_code: language_code)
|
|
27
|
+
@parsed = false
|
|
28
|
+
@ast = nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Parse the markdown document into an AST.
|
|
32
|
+
#
|
|
33
|
+
# @return [Hash] The parsed AST
|
|
34
|
+
def parse
|
|
35
|
+
return @ast if @parsed
|
|
36
|
+
|
|
37
|
+
begin
|
|
38
|
+
require 'kramdown'
|
|
39
|
+
rescue LoadError
|
|
40
|
+
raise "Kramdown gem not available. Add 'kramdown' to Gemfile"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
kd = Kramdown::Document.new(content)
|
|
44
|
+
@ast = kd.to_hash
|
|
45
|
+
@parsed = true
|
|
46
|
+
|
|
47
|
+
@ast
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Get all text nodes for spell checking.
|
|
51
|
+
#
|
|
52
|
+
# Extracts text from the AST, skipping code blocks.
|
|
53
|
+
#
|
|
54
|
+
# @return [Array<TextNode>] Text nodes in the document
|
|
55
|
+
def text_nodes
|
|
56
|
+
extract_text_nodes
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Get node at a specific path in the AST.
|
|
60
|
+
#
|
|
61
|
+
# @param path [Array] Node path (e.g., [:document, :p, 1])
|
|
62
|
+
# @return [Object, nil] The node or nil
|
|
63
|
+
def get_node(path)
|
|
64
|
+
parse unless @parsed
|
|
65
|
+
|
|
66
|
+
navigate_ast(@ast, path)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Get context around a location.
|
|
70
|
+
#
|
|
71
|
+
# For markdown, navigates the AST to find surrounding context.
|
|
72
|
+
#
|
|
73
|
+
# @param location [Location] The error location
|
|
74
|
+
# @param window [Integer] Number of sibling elements before/after
|
|
75
|
+
# @return [Models::Context] Context object
|
|
76
|
+
def context_for(location, window: 2)
|
|
77
|
+
return plain_text_context(location, window: 5) if location.line_column?
|
|
78
|
+
|
|
79
|
+
parse unless @parsed
|
|
80
|
+
|
|
81
|
+
# For node-based locations, find parent and siblings
|
|
82
|
+
parent_path = location.node_path[0..-2]
|
|
83
|
+
current_type = location.node_path.last
|
|
84
|
+
|
|
85
|
+
parent = navigate_ast(@ast, parent_path)
|
|
86
|
+
return Models::Context.new(before: "", current: "", after: "", location: location, window: window) unless parent
|
|
87
|
+
|
|
88
|
+
# Find siblings around current element
|
|
89
|
+
siblings = extract_siblings(parent)
|
|
90
|
+
current_idx = siblings.find_index { |s| s[:type] == current_type }
|
|
91
|
+
|
|
92
|
+
return Models::Context.new(before: "", current: "", after: "", location: location, window: window) unless current_idx
|
|
93
|
+
|
|
94
|
+
before_sibs = siblings[[0, current_idx - window].max..current_idx - 1]
|
|
95
|
+
after_sibs = siblings[(current_idx + 1)..(current_idx + window)]
|
|
96
|
+
|
|
97
|
+
before = before_sibs.map { |s| text_from_node(s) }.join("\n")
|
|
98
|
+
current = text_from_node(parent)
|
|
99
|
+
after = after_sibs.map { |s| text_from_node(s) }.join("\n")
|
|
100
|
+
|
|
101
|
+
Models::Context.new(
|
|
102
|
+
before: before,
|
|
103
|
+
current: current,
|
|
104
|
+
after: after,
|
|
105
|
+
location: location,
|
|
106
|
+
window: window
|
|
107
|
+
)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Replace text at a specific location.
|
|
111
|
+
#
|
|
112
|
+
# Navigates the AST to find the text node and replaces it,
|
|
113
|
+
# then regenerates markdown.
|
|
114
|
+
#
|
|
115
|
+
# @param location [Location] The location to replace
|
|
116
|
+
# @param new_text [String] The new text
|
|
117
|
+
#return [MarkdownDocument] New document with replacement
|
|
118
|
+
def replace_node(location, new_text)
|
|
119
|
+
parse unless @parsed
|
|
120
|
+
|
|
121
|
+
# Navigate to the node and replace its text
|
|
122
|
+
modified_ast = replace_in_ast(@ast, location.node_path, new_text)
|
|
123
|
+
|
|
124
|
+
# Regenerate markdown from modified AST
|
|
125
|
+
begin
|
|
126
|
+
require 'kramdown'
|
|
127
|
+
new_content = Kramdown::Converter.new(modified_ast).to_kramdown
|
|
128
|
+
rescue LoadError
|
|
129
|
+
raise "Kramdown gem not available. Add 'kramdown' to Gemfile"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
MarkdownDocument.new(new_content, @format, @language_code)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Apply corrections and return new document.
|
|
136
|
+
#
|
|
137
|
+
# @param corrections [Array<Models::SemanticError>] Errors to fix
|
|
138
|
+
# @return [MarkdownDocument] New document with corrections
|
|
139
|
+
def apply(corrections)
|
|
140
|
+
return self if corrections.empty?
|
|
141
|
+
|
|
142
|
+
# Apply corrections one by one
|
|
143
|
+
result = self
|
|
144
|
+
corrections.each do |error|
|
|
145
|
+
suggestion = error.recommended_suggestion
|
|
146
|
+
result = result.replace_node(error.location, suggestion.word)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
result
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Document name for display.
|
|
153
|
+
#
|
|
154
|
+
# @return [String] Document name
|
|
155
|
+
def name
|
|
156
|
+
"markdown"
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
private
|
|
160
|
+
|
|
161
|
+
# Extract text nodes from AST.
|
|
162
|
+
#
|
|
163
|
+
# @return [Array<TextNode>] Text nodes
|
|
164
|
+
def extract_text_nodes
|
|
165
|
+
parse unless @parsed
|
|
166
|
+
extract_from_ast(@ast)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Extract text nodes recursively from AST.
|
|
170
|
+
#
|
|
171
|
+
# @param ast [Hash] The AST or node
|
|
172
|
+
# @param path [Array] Current path
|
|
173
|
+
# @return [Array<TextNode>] Text nodes
|
|
174
|
+
def extract_from_ast(ast, path: [])
|
|
175
|
+
nodes = []
|
|
176
|
+
|
|
177
|
+
case ast[:type]
|
|
178
|
+
when :text
|
|
179
|
+
nodes << TextNode.new(
|
|
180
|
+
ast[:value].strip,
|
|
181
|
+
location: Location.for_text_node(path, start_offset: 0, length: ast[:value].length),
|
|
182
|
+
node_path: path
|
|
183
|
+
)
|
|
184
|
+
when :p, :h1, :h2, :h3, :h4, :h5, :h6
|
|
185
|
+
# Check paragraph/header content
|
|
186
|
+
if ast[:value]
|
|
187
|
+
ast[:value].each_with_index do |child, idx|
|
|
188
|
+
nodes.concat(extract_from_ast(child, path + [:content, ast[:type], idx]))
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
when :blockquote
|
|
192
|
+
nodes.concat(extract_from_ast(ast[:value], path + [:blockquote]))
|
|
193
|
+
when :code_block
|
|
194
|
+
# Skip code blocks (don't check code)
|
|
195
|
+
when :link
|
|
196
|
+
# Check link text but not URL
|
|
197
|
+
link_text = ast[:value][:value]
|
|
198
|
+
if link_text && !link_text.empty?
|
|
199
|
+
nodes << TextNode.new(
|
|
200
|
+
link_text,
|
|
201
|
+
location: Location.for_text_node(path + [:link_text], start_offset: 0, length: link_text.length),
|
|
202
|
+
node_path: path + [:link_text]
|
|
203
|
+
)
|
|
204
|
+
end
|
|
205
|
+
when :strong, :em
|
|
206
|
+
# Check emphasis content
|
|
207
|
+
if ast[:value]
|
|
208
|
+
nodes.concat(extract_from_ast(ast[:value], path + [:emphasis]))
|
|
209
|
+
end
|
|
210
|
+
when :document
|
|
211
|
+
if ast[:children]
|
|
212
|
+
ast[:children].each_with_index do |child, idx|
|
|
213
|
+
nodes.concat(extract_from_ast(child, path + [:child, idx]))
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
when :list
|
|
217
|
+
# Check list items
|
|
218
|
+
if ast[:value]
|
|
219
|
+
ast[:value].each_with_index do |item, idx|
|
|
220
|
+
nodes.concat(extract_from_ast(item, path + [:item, idx]))
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
nodes
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Navigate AST to find node at path.
|
|
229
|
+
#
|
|
230
|
+
# @param ast [Hash] The AST
|
|
231
|
+
# @param path [Array] Node path
|
|
232
|
+
# @return [Object, nil] The node or nil
|
|
233
|
+
def navigate_ast(ast, path)
|
|
234
|
+
return nil unless path.is_a?(Array) || path.empty?
|
|
235
|
+
|
|
236
|
+
current = ast
|
|
237
|
+
path.each do |element|
|
|
238
|
+
case element
|
|
239
|
+
when Integer
|
|
240
|
+
# Array index
|
|
241
|
+
return nil unless current.is_a?(Array)
|
|
242
|
+
return nil if element >= current.size
|
|
243
|
+
current = current[element]
|
|
244
|
+
when Symbol, String
|
|
245
|
+
# Hash key
|
|
246
|
+
return nil unless current.is_a?(Hash)
|
|
247
|
+
current = current[element.to_sym]
|
|
248
|
+
else
|
|
249
|
+
return nil
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
current
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Extract sibling nodes from a parent node.
|
|
257
|
+
#
|
|
258
|
+
# @param parent [Hash] Parent node
|
|
259
|
+
# @return [Array<Hash>] Sibling nodes
|
|
260
|
+
def extract_siblings(parent)
|
|
261
|
+
case parent[:type]
|
|
262
|
+
when :document
|
|
263
|
+
parent[:children] || []
|
|
264
|
+
when :blockquote
|
|
265
|
+
[parent[:value]].compact
|
|
266
|
+
when :p, :h1, :h2, :h3, :h4, :h5, :h6
|
|
267
|
+
parent[:value] || []
|
|
268
|
+
when :list
|
|
269
|
+
parent[:value] || []
|
|
270
|
+
else
|
|
271
|
+
[]
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Extract text content from a node.
|
|
276
|
+
#
|
|
277
|
+
# @param node [Hash] AST node
|
|
278
|
+
# @return [String] Text content
|
|
279
|
+
def text_from_node(node)
|
|
280
|
+
case node[:type]
|
|
281
|
+
when :text
|
|
282
|
+
node[:value]
|
|
283
|
+
when :p, :h1, :h2, :h3, :h4, :h5, :h6
|
|
284
|
+
# Extract text from inline elements
|
|
285
|
+
extract_inline_text(node[:value])
|
|
286
|
+
when :code_block
|
|
287
|
+
# Don't check code
|
|
288
|
+
nil
|
|
289
|
+
else
|
|
290
|
+
""
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Extract text from inline markup.
|
|
295
|
+
#
|
|
296
|
+
# @param content [Array, String] Content with inline markup
|
|
297
|
+
# @return [String] Extracted text
|
|
298
|
+
def extract_inline_text(content)
|
|
299
|
+
return "" unless content
|
|
300
|
+
|
|
301
|
+
case content
|
|
302
|
+
when String
|
|
303
|
+
content
|
|
304
|
+
when Array
|
|
305
|
+
content.map { |elem| extract_inline_text(elem) }.join
|
|
306
|
+
when Hash
|
|
307
|
+
text = content[:value]
|
|
308
|
+
text ? extract_inline_text(text) : ""
|
|
309
|
+
else
|
|
310
|
+
""
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# Replace text in AST at a specific path.
|
|
315
|
+
#
|
|
316
|
+
# @param ast [Hash] The AST
|
|
317
|
+
# @param path [Array] Node path to the text node
|
|
318
|
+
# @param new_text [String] The replacement text
|
|
319
|
+
# @return [Hash] Modified AST (frozen)
|
|
320
|
+
def replace_in_ast(ast, path, new_text)
|
|
321
|
+
return ast if path.empty?
|
|
322
|
+
|
|
323
|
+
# Clone the AST (deep copy)
|
|
324
|
+
modified_ast = deep_clone_ast(ast)
|
|
325
|
+
|
|
326
|
+
# Navigate to the parent of the text node
|
|
327
|
+
current_path = path[0..-2] # All but last element (the text node)
|
|
328
|
+
text_type = path.last # Usually :text
|
|
329
|
+
|
|
330
|
+
current = navigate_ast(modified_ast, current_path)
|
|
331
|
+
return modified_ast unless current
|
|
332
|
+
|
|
333
|
+
if current.is_a?(Hash) && current[:type] == :text
|
|
334
|
+
# Replace the text value
|
|
335
|
+
current[:value] = new_text
|
|
336
|
+
elsif current.is_a?(Array)
|
|
337
|
+
# Array of elements - find text node and replace
|
|
338
|
+
current.each_with_index do |elem, idx|
|
|
339
|
+
if elem.is_a?(Hash) && elem[:type] == :text
|
|
340
|
+
current[idx][:value] = new_text
|
|
341
|
+
break
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
modified_ast
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Deep clone an AST.
|
|
350
|
+
#
|
|
351
|
+
# @param ast [Hash] The AST to clone
|
|
352
|
+
# @return [Hash] Cloned AST
|
|
353
|
+
def deep_clone_ast(ast)
|
|
354
|
+
case ast
|
|
355
|
+
when Hash
|
|
356
|
+
ast.transform_values { |v| deep_clone_ast(v) }
|
|
357
|
+
when Array
|
|
358
|
+
ast.map { |v| deep_clone_ast(v) }
|
|
359
|
+
else
|
|
360
|
+
ast
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
# Get plain text context for line/column locations.
|
|
365
|
+
#
|
|
366
|
+
# Fallback for line/column locations in structured documents.
|
|
367
|
+
#
|
|
368
|
+
# @param location [Location] The line/column location
|
|
369
|
+
# @param window [Integer] Number of lines before/after
|
|
370
|
+
# @return [Models::Context] Context object
|
|
371
|
+
def plain_text_context(location, window: 5)
|
|
372
|
+
start_line = [0, location.line - window - 1].max
|
|
373
|
+
end_line = [@lines.size - 1, location.line + window - 1].min
|
|
374
|
+
|
|
375
|
+
before = @lines[start_line...(location.line - 1)].join("\n")
|
|
376
|
+
current = @lines[location.line - 1]
|
|
377
|
+
after = @lines[(location.line + 1)..end_line].join("\n")
|
|
378
|
+
|
|
379
|
+
Models::Context.new(
|
|
380
|
+
before: before,
|
|
381
|
+
current: current,
|
|
382
|
+
after: after,
|
|
383
|
+
location: location,
|
|
384
|
+
window: window
|
|
385
|
+
)
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
end
|
|
389
|
+
end
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'document'
|
|
4
|
+
require_relative '../models/context'
|
|
5
|
+
|
|
6
|
+
module Kotoshu
|
|
7
|
+
module Documents
|
|
8
|
+
# Plain text document implementation.
|
|
9
|
+
#
|
|
10
|
+
# Handles plain text files with line-based navigation and correction.
|
|
11
|
+
#
|
|
12
|
+
# @example Creating a plain text document
|
|
13
|
+
# doc = PlainTextDocument.new("Hello world\nHow are you?")
|
|
14
|
+
# doc.text_nodes.each { |node| puts node.text }
|
|
15
|
+
class PlainTextDocument < Document
|
|
16
|
+
# Create a new plain text document.
|
|
17
|
+
#
|
|
18
|
+
# @param content [String] The document content
|
|
19
|
+
# @param format [Symbol] Document format (must be :text)
|
|
20
|
+
# @param language_code [String] Language code
|
|
21
|
+
def initialize(content, format: :text, language_code: 'en')
|
|
22
|
+
raise ArgumentError, "Format must be :text" unless format == :text
|
|
23
|
+
|
|
24
|
+
super(content, format: format, language_code: language_code)
|
|
25
|
+
@lines = content.lines
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Get all text nodes for spell checking.
|
|
29
|
+
#
|
|
30
|
+
# Each line becomes a text node.
|
|
31
|
+
#
|
|
32
|
+
# @return [Array<TextNode>] Text nodes (one per line)
|
|
33
|
+
def text_nodes
|
|
34
|
+
@lines.each_with_index.map do |line, idx|
|
|
35
|
+
# Strip leading/trailing whitespace but preserve structure
|
|
36
|
+
stripped_line = line.rstrip
|
|
37
|
+
next TextNode.new(
|
|
38
|
+
stripped_line,
|
|
39
|
+
location: Location.for_line_column(idx + 1, 0),
|
|
40
|
+
node_path: [:line, idx]
|
|
41
|
+
) if stripped_line && !stripped_line.empty?
|
|
42
|
+
end.compact
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Get context around a location.
|
|
46
|
+
#
|
|
47
|
+
# Returns lines before and after the error location.
|
|
48
|
+
#
|
|
49
|
+
# @param location [Location] The error location (must be line/column)
|
|
50
|
+
# @param window [Integer] Number of lines before/after (default: 5)
|
|
51
|
+
# @return [Models::Context] Context object
|
|
52
|
+
def context_for(location, window: 5)
|
|
53
|
+
raise ArgumentError, "Location must be line/column" unless location.line_column?
|
|
54
|
+
|
|
55
|
+
start_line = [0, location.line - window - 1].max
|
|
56
|
+
end_line = [@lines.size - 1, location.line + window - 1].min
|
|
57
|
+
|
|
58
|
+
before = @lines[start_line...(location.line - 1)].join("\n")
|
|
59
|
+
current = @lines[location.line - 1]
|
|
60
|
+
after = @lines[(location.line + 1)..end_line].join("\n")
|
|
61
|
+
|
|
62
|
+
Models::Context.new(
|
|
63
|
+
before: before,
|
|
64
|
+
current: current,
|
|
65
|
+
after: after,
|
|
66
|
+
location: location,
|
|
67
|
+
window: window
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Get node at path (for plain text, just returns line).
|
|
72
|
+
#
|
|
73
|
+
# @param path [Array] Node path (e.g., [:line, 5])
|
|
74
|
+
# @return [String, nil] The line content
|
|
75
|
+
def get_node(path)
|
|
76
|
+
return nil unless path.is_a?(Array) && path.first == :line
|
|
77
|
+
|
|
78
|
+
line_idx = path[1]
|
|
79
|
+
return nil if line_idx < 0 || line_idx >= @lines.size
|
|
80
|
+
|
|
81
|
+
@lines[line_idx]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Replace text at a specific location.
|
|
85
|
+
#
|
|
86
|
+
# For plain text, modifies a specific line.
|
|
87
|
+
#
|
|
88
|
+
# @param location [Location] The location to replace
|
|
89
|
+
# @param new_text [String] The new text
|
|
90
|
+
# @return [PlainTextDocument] New document with replacement
|
|
91
|
+
def replace_node(location, new_text)
|
|
92
|
+
raise ArgumentError, "Location must be line/column" unless location.line_column?
|
|
93
|
+
|
|
94
|
+
new_lines = @lines.dup
|
|
95
|
+
line = new_lines[location.line - 1]
|
|
96
|
+
|
|
97
|
+
# Replace the word at the specified column
|
|
98
|
+
if location.column > 0 && location.column < line.length
|
|
99
|
+
before = line[0...location.column]
|
|
100
|
+
after = line[(location.column + @original.length)..-1] || ''
|
|
101
|
+
line = "#{before}#{new_text}#{after}"
|
|
102
|
+
else
|
|
103
|
+
line = new_text
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
new_lines[location.line - 1] = line
|
|
107
|
+
|
|
108
|
+
PlainTextDocument.new(new_lines.join("\n"), @format, @language_code)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Apply corrections and return new document.
|
|
112
|
+
#
|
|
113
|
+
# Corrections are applied in reverse order to preserve offsets.
|
|
114
|
+
#
|
|
115
|
+
# @param corrections [Array<Models::SemanticError>] Errors to fix
|
|
116
|
+
# @return [PlainTextDocument] New document with corrections
|
|
117
|
+
def apply(corrections)
|
|
118
|
+
return self if corrections.empty?
|
|
119
|
+
|
|
120
|
+
# Sort by location (reverse order for offset preservation)
|
|
121
|
+
sorted_corrections = corrections.sort_by { |c| c.location.line }.reverse
|
|
122
|
+
|
|
123
|
+
new_doc = self
|
|
124
|
+
corrections.each do |error|
|
|
125
|
+
suggestion = error.recommended_suggestion
|
|
126
|
+
new_doc = new_doc.replace_node(error.location, suggestion.word)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
new_doc
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Document name for display.
|
|
133
|
+
#
|
|
134
|
+
# @return [String] Document name
|
|
135
|
+
def name
|
|
136
|
+
"plain_text"
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Get lines as array.
|
|
140
|
+
#
|
|
141
|
+
# @return [Array<String>] Lines
|
|
142
|
+
def lines
|
|
143
|
+
@lines
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|