kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kotoshu
|
|
4
|
+
module Language
|
|
5
|
+
module Normalizer
|
|
6
|
+
# Abstract base class for text normalizers.
|
|
7
|
+
#
|
|
8
|
+
# Normalizers transform text to a standard form for comparison.
|
|
9
|
+
# Different languages use different normalization strategies.
|
|
10
|
+
#
|
|
11
|
+
# Examples of normalization:
|
|
12
|
+
# - Accent removal (café -> cafe)
|
|
13
|
+
# - Case folding (Hello -> hello)
|
|
14
|
+
# - Whitespace normalization
|
|
15
|
+
# - Punctuation normalization
|
|
16
|
+
#
|
|
17
|
+
# @example Implement a normalizer
|
|
18
|
+
# class MyNormalizer < Normalizer::Base
|
|
19
|
+
# def normalize(text)
|
|
20
|
+
# super.downcase.gsub(/[áàâä]/, 'a')
|
|
21
|
+
# end
|
|
22
|
+
# end
|
|
23
|
+
class Base
|
|
24
|
+
# Normalize text.
|
|
25
|
+
#
|
|
26
|
+
# Default implementation:
|
|
27
|
+
# - Strip leading/trailing whitespace
|
|
28
|
+
# - Collapse multiple whitespace to single space
|
|
29
|
+
# - Downcase (optional)
|
|
30
|
+
#
|
|
31
|
+
# @param text [String] Text to normalize
|
|
32
|
+
# @param options [Hash] Normalization options
|
|
33
|
+
# @option options [Boolean] :downcase (true) Convert to lowercase
|
|
34
|
+
# @option options [Boolean] :strip_punct (false) Remove punctuation
|
|
35
|
+
# @option options [Boolean] :collapse_ws (true) Collapse whitespace
|
|
36
|
+
# @return [String] Normalized text
|
|
37
|
+
def normalize(text, options = {})
|
|
38
|
+
return "" if text.nil?
|
|
39
|
+
|
|
40
|
+
defaults = {
|
|
41
|
+
downcase: true,
|
|
42
|
+
strip_punct: false,
|
|
43
|
+
collapse_ws: true
|
|
44
|
+
}
|
|
45
|
+
opts = defaults.merge(options)
|
|
46
|
+
|
|
47
|
+
result = text.dup
|
|
48
|
+
|
|
49
|
+
# Strip whitespace
|
|
50
|
+
result = result.strip
|
|
51
|
+
|
|
52
|
+
# Collapse multiple whitespace
|
|
53
|
+
result = result.gsub(/\s+/, " ") if opts[:collapse_ws]
|
|
54
|
+
|
|
55
|
+
# Downcase
|
|
56
|
+
result = result.downcase if opts[:downcase]
|
|
57
|
+
|
|
58
|
+
# Strip punctuation
|
|
59
|
+
result = strip_punctuation(result) if opts[:strip_punct]
|
|
60
|
+
|
|
61
|
+
result
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Normalize a word.
|
|
65
|
+
#
|
|
66
|
+
# @param word [String] Word to normalize
|
|
67
|
+
# @return [String] Normalized word
|
|
68
|
+
def normalize_word(word)
|
|
69
|
+
normalize(word)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Check if two normalized strings are equal.
|
|
73
|
+
#
|
|
74
|
+
# @param str1 [String] First string
|
|
75
|
+
# @param str2 [String] Second string
|
|
76
|
+
# @return [Boolean] True if equal after normalization
|
|
77
|
+
def normalized_eql?(str1, str2)
|
|
78
|
+
normalize(str1) == normalize(str2)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
protected
|
|
82
|
+
|
|
83
|
+
# Strip punctuation from text.
|
|
84
|
+
#
|
|
85
|
+
# @param text [String] Text to strip
|
|
86
|
+
# @return [String] Text without punctuation
|
|
87
|
+
def strip_punctuation(text)
|
|
88
|
+
text.gsub(/[^\p{L}\p{N}\s]/, "")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Remove accents from characters.
|
|
92
|
+
#
|
|
93
|
+
# @param text [String] Text with accents
|
|
94
|
+
# @return [String] Text without accents
|
|
95
|
+
def remove_accents(text)
|
|
96
|
+
# Unicode normalization form D (decompose)
|
|
97
|
+
normalized = text.unicode_normalize(:nfd)
|
|
98
|
+
|
|
99
|
+
# Remove combining diacritical marks
|
|
100
|
+
normalized.gsub(/[\u0300-\u036F]/, "")
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Normalize quotes to standard ASCII.
|
|
104
|
+
#
|
|
105
|
+
# @param text [String] Text with quotes
|
|
106
|
+
# @return [String] Text with normalized quotes
|
|
107
|
+
def normalize_quotes(text)
|
|
108
|
+
# Left double quote to straight
|
|
109
|
+
text = text.gsub(/[\u201C\u201D]/, '"')
|
|
110
|
+
# Right double quote to straight
|
|
111
|
+
text = text.gsub(/[\u2018\u2019]/, "'")
|
|
112
|
+
# Backticks to quotes
|
|
113
|
+
text = text.gsub(/`/, "'")
|
|
114
|
+
# Other quote variants
|
|
115
|
+
text = text.gsub(/\u00AB/, '"') # Left-pointing double angle
|
|
116
|
+
text = text.gsub(/\u00BB/, '"') # Right-pointing double angle
|
|
117
|
+
text = text.gsub(/\u2039/, "'") # Single left-pointing
|
|
118
|
+
text = text.gsub(/\u203A/, "'") # Single right-pointing
|
|
119
|
+
|
|
120
|
+
text
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Normalize whitespace.
|
|
124
|
+
#
|
|
125
|
+
# @param text [String] Text with irregular whitespace
|
|
126
|
+
# @return [String] Text with normalized whitespace
|
|
127
|
+
def normalize_whitespace(text)
|
|
128
|
+
text
|
|
129
|
+
.gsub(/[\u00A0\u202F\u205F]/, " ") # Various space chars
|
|
130
|
+
.gsub(/[\u2000-\u200B]/, " ") # Various space chars
|
|
131
|
+
.gsub(/\s+/, " ") # Collapse multiple spaces
|
|
132
|
+
.strip
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kotoshu
|
|
4
|
+
module Language
|
|
5
|
+
# Central registry for language registration and retrieval.
|
|
6
|
+
#
|
|
7
|
+
# Uses Registry pattern for dynamic language discovery and management.
|
|
8
|
+
# Languages register themselves on load, making the system extensible.
|
|
9
|
+
#
|
|
10
|
+
# @example Register a language
|
|
11
|
+
# Kotoshu::Language::Registry.register("en-US", English::American)
|
|
12
|
+
#
|
|
13
|
+
# @example Retrieve a language
|
|
14
|
+
# lang = Kotoshu::Language::Registry.get("en-US")
|
|
15
|
+
#
|
|
16
|
+
# @example List supported languages
|
|
17
|
+
# codes = Kotoshu::Language::Registry.supported_codes
|
|
18
|
+
class Registry
|
|
19
|
+
@languages = {}
|
|
20
|
+
@detectors = []
|
|
21
|
+
|
|
22
|
+
class << self
|
|
23
|
+
# Register a language class with its code.
|
|
24
|
+
#
|
|
25
|
+
# @param code [String] Language code (e.g., "en-US", "de-DE")
|
|
26
|
+
# @param language_class [Class] Class implementing Kotoshu::Language::Base
|
|
27
|
+
# @return [void]
|
|
28
|
+
#
|
|
29
|
+
# @example
|
|
30
|
+
# Registry.register("en-US", English::American)
|
|
31
|
+
def register(code, language_class)
|
|
32
|
+
@languages[code] = language_class
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Register a detector for auto-detection.
|
|
36
|
+
#
|
|
37
|
+
# Detectors are tried in order of registration.
|
|
38
|
+
#
|
|
39
|
+
# @param detector [#detect] Object with detect method
|
|
40
|
+
# @return [void]
|
|
41
|
+
def register_detector(detector)
|
|
42
|
+
@detectors << detector
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Get language class by code.
|
|
46
|
+
#
|
|
47
|
+
# Supports fallback to base language if variant not found.
|
|
48
|
+
# Also supports finding variants when asking for base language.
|
|
49
|
+
# For example:
|
|
50
|
+
# - "en-GB" falls back to "en" if "en-GB" not registered
|
|
51
|
+
# - "en" returns "en-US" if only "en-US" is registered
|
|
52
|
+
#
|
|
53
|
+
# @param code [String] Language code
|
|
54
|
+
# @return [Class, nil] Language class or nil if not found
|
|
55
|
+
def get(code)
|
|
56
|
+
return nil unless code
|
|
57
|
+
|
|
58
|
+
# Try exact match first
|
|
59
|
+
return @languages[code] if @languages.key?(code)
|
|
60
|
+
|
|
61
|
+
base = code.split('-').first
|
|
62
|
+
|
|
63
|
+
# If code has a hyphen (e.g., "en-GB"), try base language
|
|
64
|
+
if code.include?('-')
|
|
65
|
+
return @languages[base]
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# If code is base language (e.g., "en"), find any variant
|
|
69
|
+
@languages.each do |registered_code, klass|
|
|
70
|
+
return klass if registered_code.split('-').first == base
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
nil
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Check if a language is registered.
|
|
77
|
+
#
|
|
78
|
+
# @param code [String] Language code
|
|
79
|
+
# @return [Boolean] True if registered
|
|
80
|
+
def registered?(code)
|
|
81
|
+
!get(code).nil?
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Get all supported language codes.
|
|
85
|
+
#
|
|
86
|
+
# @return [Array<String>] Sorted list of language codes
|
|
87
|
+
def supported_codes
|
|
88
|
+
@languages.keys.sort
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Get all registered language classes.
|
|
92
|
+
#
|
|
93
|
+
# @return [Hash] Hash mapping codes to classes
|
|
94
|
+
def all
|
|
95
|
+
@languages.dup
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Detect language from text.
|
|
99
|
+
#
|
|
100
|
+
# Tries registered detectors in order.
|
|
101
|
+
#
|
|
102
|
+
# @param text [String] Text to analyze
|
|
103
|
+
# @return [String, nil] Detected language code or nil
|
|
104
|
+
def detect(text)
|
|
105
|
+
return nil if text.nil? || text.empty?
|
|
106
|
+
|
|
107
|
+
@detectors.each do |detector|
|
|
108
|
+
result = detector.detect(text)
|
|
109
|
+
return result if result
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
nil
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Clear all registrations (mainly for testing).
|
|
116
|
+
#
|
|
117
|
+
# @return [void]
|
|
118
|
+
def clear
|
|
119
|
+
@languages.clear
|
|
120
|
+
@detectors.clear
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Get language info by code.
|
|
124
|
+
#
|
|
125
|
+
# @param code [String] Language code
|
|
126
|
+
# @return [Hash, nil] Language info or nil
|
|
127
|
+
def info(code)
|
|
128
|
+
klass = get(code)
|
|
129
|
+
return nil unless klass
|
|
130
|
+
|
|
131
|
+
instance = klass.instance if klass.respond_to?(:instance)
|
|
132
|
+
instance ||= klass.new
|
|
133
|
+
|
|
134
|
+
{
|
|
135
|
+
code: code,
|
|
136
|
+
name: instance.name,
|
|
137
|
+
variant: instance.variant,
|
|
138
|
+
region: instance.region,
|
|
139
|
+
encoding: instance.encoding,
|
|
140
|
+
rtl?: instance.rtl?,
|
|
141
|
+
script_type: instance.script_type
|
|
142
|
+
}
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|