kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kotoshu
|
|
4
|
+
module Cli
|
|
5
|
+
# Renders download progress to an output IO.
|
|
6
|
+
#
|
|
7
|
+
# Two rendering strategies, picked at construction time:
|
|
8
|
+
# - TTY: a single-line bar that rewrites itself ('[====> ] 45% 51MB/114MB')
|
|
9
|
+
# - Non-TTY: a periodic line every REPORT_INTERVAL_BYTES ('downloaded 51 MB of 114 MB')
|
|
10
|
+
#
|
|
11
|
+
# Both share #update and #finish so callers don't care which mode
|
|
12
|
+
# they're in. Pass a NullReporter (or anything quack-like) to silence.
|
|
13
|
+
#
|
|
14
|
+
# The reporter knows nothing about HTTP, files, or chunks — callers
|
|
15
|
+
# feed it cumulative byte counts. This keeps it pure and testable.
|
|
16
|
+
class ProgressReporter
|
|
17
|
+
REPORT_INTERVAL_BYTES = 10 * 1024 * 1024 # 10 MB between non-TTY line prints
|
|
18
|
+
|
|
19
|
+
# @param output [IO] Where to render. Usually $stderr.
|
|
20
|
+
# @param label [String] Short prefix shown in TTY mode (e.g. "en model").
|
|
21
|
+
# @param tty [Boolean] Override the auto-detected TTY check.
|
|
22
|
+
def initialize(output:, label: "download", tty: nil)
|
|
23
|
+
@output = output
|
|
24
|
+
@label = label
|
|
25
|
+
@tty = tty.nil? ? output.tty? : tty
|
|
26
|
+
@total = nil
|
|
27
|
+
@received = 0
|
|
28
|
+
@last_reported_at = 0
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @param total_bytes [Integer, nil] Total size from Content-Length, or nil if unknown.
|
|
32
|
+
def start(total_bytes)
|
|
33
|
+
@total = total_bytes
|
|
34
|
+
@received = 0
|
|
35
|
+
@last_reported_at = 0
|
|
36
|
+
return unless @tty
|
|
37
|
+
|
|
38
|
+
@output.puts "#{@label}: " + indeterminate_bar(0)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @param received_bytes [Integer] Cumulative bytes received so far.
|
|
42
|
+
def update(received_bytes)
|
|
43
|
+
@received = received_bytes
|
|
44
|
+
return unless @tty
|
|
45
|
+
|
|
46
|
+
render_tty
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Print a line in non-TTY mode if enough bytes have flowed since
|
|
50
|
+
# the last print. Called by update() in non-TTY mode.
|
|
51
|
+
def maybe_report_periodic
|
|
52
|
+
return if @tty
|
|
53
|
+
return if @total.nil?
|
|
54
|
+
return unless @received >= @last_reported_at + REPORT_INTERVAL_BYTES
|
|
55
|
+
|
|
56
|
+
@output.puts " downloaded #{format_bytes(@received)} of #{format_bytes(@total)}"
|
|
57
|
+
@last_reported_at = @received
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def finish
|
|
61
|
+
return unless @tty
|
|
62
|
+
|
|
63
|
+
# Clear the bar line and print final newline.
|
|
64
|
+
@output.print "\r#{' ' * 80}\r"
|
|
65
|
+
@output.puts "#{@label}: done (#{format_bytes(@received)})"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Null-object variant. Use when callers want to silence progress
|
|
69
|
+
# (e.g., quiet mode or programmatic API).
|
|
70
|
+
class Null
|
|
71
|
+
def start(_total_bytes); end
|
|
72
|
+
def update(_received_bytes); end
|
|
73
|
+
def maybe_report_periodic; end
|
|
74
|
+
def finish; end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def render_tty
|
|
80
|
+
bar = if @total.nil? || @total.zero?
|
|
81
|
+
indeterminate_bar(@received)
|
|
82
|
+
else
|
|
83
|
+
determinate_bar(@received, @total)
|
|
84
|
+
end
|
|
85
|
+
@output.print "\r#{@label}: #{bar}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def determinate_bar(received, total)
|
|
89
|
+
pct = (received.to_f / total * 100).clamp(0, 100)
|
|
90
|
+
filled = (pct / 5).round
|
|
91
|
+
bar_str = ("=" * filled).ljust(20, " ")
|
|
92
|
+
"[#{bar_str}] #{pct.round(0)}% #{format_bytes(received)}/#{format_bytes(total)}"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def indeterminate_bar(received)
|
|
96
|
+
"[##########] #{format_bytes(received)} (size unknown)"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def format_bytes(bytes)
|
|
100
|
+
return "0 B" if bytes.nil? || bytes.zero?
|
|
101
|
+
|
|
102
|
+
units = %w[B KB MB GB TB]
|
|
103
|
+
size = bytes.to_f
|
|
104
|
+
i = 0
|
|
105
|
+
while size >= 1024 && i < units.length - 1
|
|
106
|
+
size /= 1024
|
|
107
|
+
i += 1
|
|
108
|
+
end
|
|
109
|
+
template = i.zero? ? "%.0f" : "%.1f"
|
|
110
|
+
"#{template % size} #{units[i]}"
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kotoshu
|
|
4
|
+
module Cli
|
|
5
|
+
# Pure data model describing what the `kotoshu status` command reports.
|
|
6
|
+
#
|
|
7
|
+
# Knows nothing about presentation — the CLI command formats it as
|
|
8
|
+
# text or JSON. Construction is split from presentation so both
|
|
9
|
+
# outputs share one source of truth (MECE).
|
|
10
|
+
class StatusReport
|
|
11
|
+
ResourceStatus = Struct.new(:language, :resource, :available, :size_bytes, :cached_at, keyword_init: true)
|
|
12
|
+
|
|
13
|
+
attr_reader :version, :languages_setup, :resources, :cache_path,
|
|
14
|
+
:cache_size_bytes, :audit_log_path, :audit_log_size_bytes,
|
|
15
|
+
:onnx_loaded, :default_language, :offline
|
|
16
|
+
|
|
17
|
+
def initialize(version:, languages_setup:, resources:, cache_path:, cache_size_bytes:,
|
|
18
|
+
audit_log_path:, audit_log_size_bytes:, onnx_loaded:, default_language:, offline:)
|
|
19
|
+
@version = version
|
|
20
|
+
@languages_setup = languages_setup
|
|
21
|
+
@resources = resources
|
|
22
|
+
@cache_path = cache_path
|
|
23
|
+
@cache_size_bytes = cache_size_bytes
|
|
24
|
+
@audit_log_path = audit_log_path
|
|
25
|
+
@audit_log_size_bytes = audit_log_size_bytes
|
|
26
|
+
@onnx_loaded = onnx_loaded
|
|
27
|
+
@default_language = default_language
|
|
28
|
+
@offline = offline
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def languages_with_model
|
|
32
|
+
resources.select { |r| r.resource == :model && r.available }.map(&:language).uniq.sort
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Probe the live system and produce a report. Each collaborator is
|
|
36
|
+
# injectable for tests; defaults pull from the live configuration.
|
|
37
|
+
def self.build(version:, resource_manager: Kotoshu::ResourceManager,
|
|
38
|
+
paths: Kotoshu::Paths, configuration: Kotoshu.configuration,
|
|
39
|
+
onnx_loaded: Kotoshu::Models::OnnxModel::ONNX_LOADED)
|
|
40
|
+
langs = resource_manager.languages_setup
|
|
41
|
+
cache_path = paths.cache_path
|
|
42
|
+
cache_size = directory_size(cache_path)
|
|
43
|
+
audit = audit_info(paths.audit_log_path)
|
|
44
|
+
|
|
45
|
+
new(
|
|
46
|
+
version: version,
|
|
47
|
+
languages_setup: langs,
|
|
48
|
+
resources: langs.flat_map { |lang| statuses_for(lang, resource_manager, cache_path) },
|
|
49
|
+
cache_path: cache_path,
|
|
50
|
+
cache_size_bytes: cache_size,
|
|
51
|
+
audit_log_path: audit[:path],
|
|
52
|
+
audit_log_size_bytes: audit[:size],
|
|
53
|
+
onnx_loaded: onnx_loaded,
|
|
54
|
+
default_language: configuration.default_language,
|
|
55
|
+
offline: configuration.offline
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Sum every regular file's size under `dir`. Returns 0 if missing.
|
|
60
|
+
def self.directory_size(dir)
|
|
61
|
+
return 0 unless File.directory?(dir)
|
|
62
|
+
|
|
63
|
+
Dir.glob(File.join(dir, "**", "*"))
|
|
64
|
+
.select { |path| File.file?(path) }
|
|
65
|
+
.sum { |path| File.size(path) }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Human-readable byte format (KB / MB / GB).
|
|
69
|
+
# @param bytes [Integer, nil]
|
|
70
|
+
# @return [String]
|
|
71
|
+
def self.format_bytes(bytes)
|
|
72
|
+
return "0 B" if bytes.nil? || bytes.zero?
|
|
73
|
+
|
|
74
|
+
units = %w[B KB MB GB TB]
|
|
75
|
+
size = bytes.to_f
|
|
76
|
+
i = 0
|
|
77
|
+
while size >= 1024 && i < units.length - 1
|
|
78
|
+
size /= 1024
|
|
79
|
+
i += 1
|
|
80
|
+
end
|
|
81
|
+
template = i.zero? ? "%.0f" : "%.1f"
|
|
82
|
+
"#{template % size} #{units[i]}"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
class << self
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
def statuses_for(lang, rm, cache_root)
|
|
89
|
+
%i[spelling frequency model].map do |res|
|
|
90
|
+
available = rm.setup?(lang, resource: res)
|
|
91
|
+
ResourceStatus.new(
|
|
92
|
+
language: lang,
|
|
93
|
+
resource: res,
|
|
94
|
+
available: available,
|
|
95
|
+
size_bytes: available ? resource_size(cache_root, lang, res) : nil,
|
|
96
|
+
cached_at: available ? resource_mtime(cache_root, lang, res) : nil
|
|
97
|
+
)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def resource_size(cache_root, lang, resource)
|
|
102
|
+
dir = resource_dir(cache_root, lang, resource)
|
|
103
|
+
directory_size(dir)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def resource_mtime(cache_root, lang, resource)
|
|
107
|
+
dir = resource_dir(cache_root, lang, resource)
|
|
108
|
+
paths = Dir.glob(File.join(dir, "**", "*")).select { |p| File.file?(p) }
|
|
109
|
+
return nil if paths.empty?
|
|
110
|
+
|
|
111
|
+
paths.map { |p| File.mtime(p) }.max
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def resource_dir(cache_root, lang, resource)
|
|
115
|
+
case resource
|
|
116
|
+
when :spelling then File.join(cache_root, "languages", lang, "spelling")
|
|
117
|
+
when :frequency then File.join(cache_root, "frequency-lists", lang)
|
|
118
|
+
when :model then File.join(cache_root, "models", lang)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def audit_info(path)
|
|
123
|
+
return { path: nil, size: nil } unless File.exist?(path)
|
|
124
|
+
|
|
125
|
+
{ path: path, size: File.size(path) }
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|