jekyll-l10n 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +94 -0
- data/lib/jekyll-l10n/constants.rb +136 -0
- data/lib/jekyll-l10n/errors.rb +60 -0
- data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
- data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
- data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
- data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
- data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
- data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
- data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
- data/lib/jekyll-l10n/extraction/logger.rb +48 -0
- data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
- data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
- data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
- data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
- data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
- data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
- data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
- data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
- data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
- data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
- data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
- data/lib/jekyll-l10n/po_file/loader.rb +64 -0
- data/lib/jekyll-l10n/po_file/manager.rb +160 -0
- data/lib/jekyll-l10n/po_file/merger.rb +80 -0
- data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
- data/lib/jekyll-l10n/po_file/reader.rb +518 -0
- data/lib/jekyll-l10n/po_file/writer.rb +232 -0
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
- data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
- data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
- data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
- data/lib/jekyll-l10n/translation/translator.rb +179 -0
- data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
- data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
- data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
- data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
- data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
- data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
- data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
- data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
- data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
- data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
- data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
- data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
- data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
- data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
- data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
- data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
- data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
- data/lib/jekyll-l10n/version.rb +10 -0
- data/lib/jekyll-l10n.rb +268 -0
- metadata +200 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
require_relative "../utils/logger_formatter"
|
|
7
|
+
|
|
8
|
+
module Jekyll
|
|
9
|
+
module L10n
|
|
10
|
+
# Translates compendium entries using the LibreTranslate API.
|
|
11
|
+
#
|
|
12
|
+
# LibreTranslator integrates with the LibreTranslate API to automatically
|
|
13
|
+
# translate untranslated entries in compendium PO files. It handles batching,
|
|
14
|
+
# retries with exponential backoff, progress logging, and API error handling.
|
|
15
|
+
# Translations are written directly to POEntry objects in-place.
|
|
16
|
+
#
|
|
17
|
+
# Key responsibilities:
|
|
18
|
+
# * Send translation requests to LibreTranslate API
|
|
19
|
+
# * Batch translations for efficiency
|
|
20
|
+
# * Handle API timeouts and failures with retries
|
|
21
|
+
# * Log translation progress at configurable intervals
|
|
22
|
+
# * Update POEntry msgstr with translated text
|
|
23
|
+
# * Parse JSON API responses
|
|
24
|
+
#
|
|
25
|
+
# @example
|
|
26
|
+
# translator = LibreTranslator.new(config)
|
|
27
|
+
# translator.translate_compendium(po_entries, 'es')
|
|
28
|
+
# # po_entries now have msgstr filled from API translations
|
|
29
|
+
class LibreTranslator
|
|
30
|
+
TranslationError = Class.new(StandardError) unless defined?(TranslationError)
|
|
31
|
+
|
|
32
|
+
# Initialize a new LibreTranslator.
|
|
33
|
+
#
|
|
34
|
+
# @param config [PageLocalesConfig] Configuration with LibreTranslate settings:
|
|
35
|
+
# - libretranslate_api_url [String] API endpoint URL
|
|
36
|
+
# - libretranslate_api_key [String, nil] Optional API key
|
|
37
|
+
# - libretranslate_timeout [Integer] Request timeout in seconds
|
|
38
|
+
# - libretranslate_batch_size [Integer] Entries per batch request
|
|
39
|
+
# - libretranslate_retry_attempts [Integer] Max retry attempts
|
|
40
|
+
# - libretranslate_retry_delay [Integer] Delay between retries in seconds
|
|
41
|
+
# - libretranslate_progress_interval [Integer] Log progress every N entries
|
|
42
|
+
def initialize(config)
|
|
43
|
+
@config = config
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Translate a compendium to a target locale.
|
|
47
|
+
#
|
|
48
|
+
# Identifies untranslated entries (empty msgstr) and sends them to LibreTranslate
|
|
49
|
+
# API for translation. Updates POEntry objects in-place with translated text.
|
|
50
|
+
# Handles batching, retries, and progress logging.
|
|
51
|
+
#
|
|
52
|
+
# @param po_entries [Array<GetText::POEntry>] Array of PO entries from compendium
|
|
53
|
+
# @param target_locale [String] Target locale code (e.g., 'es', 'fr')
|
|
54
|
+
# @return [void]
|
|
55
|
+
# @raise [TranslationError] If API request fails and stop_on_error is true
|
|
56
|
+
# @raise [TranslationError] If max retry attempts exceeded
|
|
57
|
+
def translate_compendium(po_entries, target_locale)
|
|
58
|
+
translatable_count, empty_entries = count_translatable_entries(po_entries)
|
|
59
|
+
return if empty_entries.empty?
|
|
60
|
+
|
|
61
|
+
log_translation_progress(empty_entries.length, translatable_count, target_locale)
|
|
62
|
+
start_time = Time.now
|
|
63
|
+
process_translation_batches(empty_entries, target_locale, start_time)
|
|
64
|
+
log_translation_complete(empty_entries.length, target_locale, start_time)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
private
|
|
68
|
+
|
|
69
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
70
|
+
def retry_with_backoff
|
|
71
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
|
72
|
+
attempts = 0
|
|
73
|
+
begin
|
|
74
|
+
yield
|
|
75
|
+
rescue StandardError => e
|
|
76
|
+
attempts += 1
|
|
77
|
+
|
|
78
|
+
if attempts > @config.libretranslate_retry_attempts
|
|
79
|
+
raise TranslationError,
|
|
80
|
+
"Max retry attempts reached"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
raise TranslationError, e.message if @config.libretranslate_stop_on_error?
|
|
84
|
+
|
|
85
|
+
max_attempts = @config.libretranslate_retry_attempts
|
|
86
|
+
|
|
87
|
+
# === EXPONENTIAL BACKOFF WITH JITTER ===
|
|
88
|
+
# Calculate delay using exponential backoff: base_delay * (2 ^ (attempt - 1))
|
|
89
|
+
# Add jitter (random 0-1 seconds) to prevent thundering herd problem
|
|
90
|
+
# Cap maximum delay at 30 seconds to prevent extremely long waits
|
|
91
|
+
base_delay = @config.libretranslate_retry_delay
|
|
92
|
+
exponential_delay = base_delay * (2**(attempts - 1))
|
|
93
|
+
jitter = rand(0..1000) / 1000.0 # Random jitter between 0-1 second
|
|
94
|
+
delay = [exponential_delay + jitter, 30].min # Cap at 30 seconds max
|
|
95
|
+
|
|
96
|
+
delay_str = delay.round(2)
|
|
97
|
+
Jekyll.logger.warn(
|
|
98
|
+
"LibreTranslator",
|
|
99
|
+
"Retrying translation (attempt #{attempts}/#{max_attempts}) after #{delay_str}s: " \
|
|
100
|
+
"#{e.message}"
|
|
101
|
+
)
|
|
102
|
+
sleep(delay)
|
|
103
|
+
retry
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def count_translatable_entries(po_entries)
|
|
108
|
+
translatable_count = po_entries.count { |e| !e.msgid.strip.empty? }
|
|
109
|
+
empty_count = po_entries.select { |entry| entry.msgstr.empty? && !entry.msgid.strip.empty? }
|
|
110
|
+
[translatable_count, empty_count]
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def log_translation_progress(empty_count, _translatable_count, target_locale)
|
|
114
|
+
Jekyll.logger.info "Localization",
|
|
115
|
+
"Translating #{empty_count} entries for #{target_locale}"
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def log_periodic_progress(current_num, total_entries, target_locale, start_time)
|
|
119
|
+
elapsed = Time.now - start_time
|
|
120
|
+
percent = (current_num.to_f / total_entries * 100).round
|
|
121
|
+
|
|
122
|
+
Jekyll.logger.info "Localization",
|
|
123
|
+
"[LibreTranslator] Progress: #{current_num}/#{total_entries} " \
|
|
124
|
+
"(#{percent}%) translated for #{target_locale} [#{elapsed.round(1)}s]"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def log_translation_complete(entries_translated, target_locale, start_time)
|
|
128
|
+
elapsed = Time.now - start_time
|
|
129
|
+
throughput = entries_translated.positive? ? (entries_translated / elapsed).round(1) : 0
|
|
130
|
+
|
|
131
|
+
Jekyll.logger.info "Localization",
|
|
132
|
+
"[LibreTranslator] Completed: #{entries_translated} entries " \
|
|
133
|
+
"translated for #{target_locale} [#{elapsed.round(1)}s, " \
|
|
134
|
+
"~#{throughput} entries/sec]"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def process_translation_batches(empty_entries, target_locale, start_time)
|
|
138
|
+
batch_size = @config.libretranslate_batch_size
|
|
139
|
+
progress_interval = @config.libretranslate_progress_interval
|
|
140
|
+
|
|
141
|
+
empty_entries.each_slice(batch_size).with_index do |batch, batch_num|
|
|
142
|
+
start_num = (batch_num * batch_size) + 1
|
|
143
|
+
retry_with_backoff do
|
|
144
|
+
translate_batch(batch, target_locale, start_num, empty_entries.length,
|
|
145
|
+
start_time, progress_interval)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# rubocop:disable Metrics/ParameterLists, Metrics/AbcSize
|
|
151
|
+
def translate_batch(batch, target_locale, start_num, total_entries, start_time,
|
|
152
|
+
progress_interval)
|
|
153
|
+
# rubocop:enable Metrics/ParameterLists, Metrics/AbcSize
|
|
154
|
+
batch.each_with_index do |entry, index|
|
|
155
|
+
current_num = start_num + index
|
|
156
|
+
|
|
157
|
+
# Log progress at intervals and at the end
|
|
158
|
+
if progress_interval.positive? &&
|
|
159
|
+
((current_num % progress_interval).zero? || current_num == total_entries)
|
|
160
|
+
log_periodic_progress(current_num, total_entries, target_locale, start_time)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Move per-entry logs to TRACE level for deep debugging
|
|
164
|
+
LoggerFormatter.trace_if_enabled(
|
|
165
|
+
@config, "LibreTranslator",
|
|
166
|
+
"Translating entry #{current_num}/#{total_entries}: #{entry.msgid[0..50]}"
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
texts = [entry.msgid]
|
|
170
|
+
translations = make_api_request(texts, target_locale)
|
|
171
|
+
entry.msgstr = translations[0] || ""
|
|
172
|
+
end
|
|
173
|
+
rescue StandardError => e
|
|
174
|
+
Jekyll.logger.error "LibreTranslator", "Error in translate_batch: #{e.class}: #{e.message}"
|
|
175
|
+
Jekyll.logger.error "LibreTranslator", e.backtrace.first(5).join("\n")
|
|
176
|
+
raise
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def http_config
|
|
180
|
+
@http_config ||= begin
|
|
181
|
+
uri = URI(@config.libretranslate_api_url)
|
|
182
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
183
|
+
http.read_timeout = @config.libretranslate_timeout
|
|
184
|
+
http.open_timeout = @config.libretranslate_timeout
|
|
185
|
+
http
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def api_headers
|
|
190
|
+
headers = { "Content-Type" => "application/json" }
|
|
191
|
+
if @config.libretranslate_api_key
|
|
192
|
+
headers["Authorization"] =
|
|
193
|
+
"Bearer #{@config.libretranslate_api_key}"
|
|
194
|
+
end
|
|
195
|
+
headers
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def make_api_request(text, target_locale)
|
|
199
|
+
uri = URI("#{@config.libretranslate_api_url}/translate")
|
|
200
|
+
Jekyll.logger.debug "LibreTranslator", "Requesting #{uri}"
|
|
201
|
+
request = Net::HTTP::Post.new(uri, api_headers)
|
|
202
|
+
request.body = {
|
|
203
|
+
:q => text,
|
|
204
|
+
:source => @config.libretranslate_source_locale,
|
|
205
|
+
:target => target_locale,
|
|
206
|
+
:format => @config.libretranslate_format,
|
|
207
|
+
}.to_json
|
|
208
|
+
response = http_config.request(request)
|
|
209
|
+
Jekyll.logger.debug "LibreTranslator", "Response code: #{response.code}"
|
|
210
|
+
handle_api_response(response)
|
|
211
|
+
rescue Net::ReadTimeout, Net::OpenTimeout => e
|
|
212
|
+
raise TranslationError, "API timeout: #{e.message}"
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def handle_api_response(response)
|
|
216
|
+
response_data = JSON.parse(response.body)
|
|
217
|
+
translated = response_data["translatedText"]
|
|
218
|
+
translation_str = translated.is_a?(Array) ? translated.inspect : translated
|
|
219
|
+
Jekyll.logger.debug "LibreTranslator", "Parsed translation: #{translation_str}"
|
|
220
|
+
translated
|
|
221
|
+
rescue JSON::ParserError, NoMethodError => e
|
|
222
|
+
raise TranslationError, "Invalid API response: #{e.message}"
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../constants"
|
|
4
|
+
require_relative "../po_file/manager"
|
|
5
|
+
require_relative "../utils/page_locales_config"
|
|
6
|
+
require_relative "../utils/url_path_builder"
|
|
7
|
+
require_relative "../utils/logger_formatter"
|
|
8
|
+
|
|
9
|
+
module Jekyll
|
|
10
|
+
module L10n
|
|
11
|
+
# Loads and merges page-specific and compendium translations.
|
|
12
|
+
#
|
|
13
|
+
# PageTranslationLoader combines compendium (site-wide) translations with
|
|
14
|
+
# page-specific translations for a given locale and URL. It loads both
|
|
15
|
+
# translation sources from PO files, merges them (page-specific takes
|
|
16
|
+
# precedence), and returns the combined translation hash for use by
|
|
17
|
+
# HtmlTranslator.
|
|
18
|
+
#
|
|
19
|
+
# Key responsibilities:
|
|
20
|
+
# * Load compendium translations for a locale
|
|
21
|
+
# * Convert page URLs to PO file paths
|
|
22
|
+
# * Load page-specific translations
|
|
23
|
+
# * Merge compendium and page-specific translations
|
|
24
|
+
# * Filter empty translations (untranslated entries)
|
|
25
|
+
# * Log loading progress at debug level
|
|
26
|
+
#
|
|
27
|
+
# @example
|
|
28
|
+
# config = PageLocalesConfig.new(page.data)
|
|
29
|
+
# translations = PageTranslationLoader.load(site, 'es', '/docs/index.html', config)
|
|
30
|
+
# # Returns merged translations for that page in Spanish
|
|
31
|
+
class PageTranslationLoader
|
|
32
|
+
# Load and merge translations for a page in a specific locale.
|
|
33
|
+
#
|
|
34
|
+
# Loads the compendium (site-wide) translations and page-specific translations,
|
|
35
|
+
# merges them (page-specific entries override compendium), and returns the
|
|
36
|
+
# combined hash. Filters out untranslated entries (empty msgstr).
|
|
37
|
+
#
|
|
38
|
+
# @param site [Jekyll::Site] Jekyll site object
|
|
39
|
+
# @param locale [String] Target locale code (e.g., 'es', 'fr')
|
|
40
|
+
# @param original_url [String] Original page URL (e.g., '/docs/index.html')
|
|
41
|
+
# @param config [PageLocalesConfig] Localization configuration for the page
|
|
42
|
+
# @return [Hash] Merged translation hash { msgid => msgstr }, filtered to only
|
|
43
|
+
# include non-empty translations
|
|
44
|
+
def self.load(site, locale, original_url, config)
|
|
45
|
+
po_manager = PoFileManager.new(site, config.locales_dir)
|
|
46
|
+
page_path = construct_po_page_path(original_url)
|
|
47
|
+
|
|
48
|
+
compendium = po_manager.load_compendium(locale)
|
|
49
|
+
log_compendium_loaded(locale, compendium)
|
|
50
|
+
|
|
51
|
+
page_specific = po_manager.load_po_file(locale, page_path)
|
|
52
|
+
log_page_specific_loaded(page_path, page_specific)
|
|
53
|
+
|
|
54
|
+
translations = compendium.merge(page_specific)
|
|
55
|
+
log_translations_merged(translations)
|
|
56
|
+
|
|
57
|
+
translations.reject { |_msgid, msgstr| msgstr.empty? }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def self.construct_po_page_path(url)
|
|
61
|
+
UrlPathBuilder.url_to_po_page_path(url)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.log_compendium_loaded(locale, compendium)
|
|
65
|
+
message = format_compendium_message(locale, compendium.size)
|
|
66
|
+
LoggerFormatter.debug_if_enabled("PageTranslationLoader", message)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def self.log_page_specific_loaded(page_path, page_specific)
|
|
70
|
+
message = format_page_specific_message(page_path, page_specific.size)
|
|
71
|
+
LoggerFormatter.debug_if_enabled("PageTranslationLoader", message)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.log_translations_merged(translations)
|
|
75
|
+
message = format_merged_message(translations.size)
|
|
76
|
+
LoggerFormatter.debug_if_enabled("PageTranslationLoader", message)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def self.format_compendium_message(locale, size)
|
|
80
|
+
"[PageTranslationLoader] Loaded compendium for #{locale}: #{size} entries"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def self.format_page_specific_message(page_path, size)
|
|
84
|
+
"[PageTranslationLoader] Loaded page-specific for #{page_path}: #{size} entries"
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def self.format_merged_message(size)
|
|
88
|
+
"[PageTranslationLoader] Merged translations: #{size} entries"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private_class_method :log_compendium_loaded,
|
|
92
|
+
:log_page_specific_loaded,
|
|
93
|
+
:log_translations_merged,
|
|
94
|
+
:format_compendium_message,
|
|
95
|
+
:format_page_specific_message,
|
|
96
|
+
:format_merged_message
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../utils/page_locales_config"
|
|
4
|
+
require_relative "../utils/html_parser"
|
|
5
|
+
require_relative "../utils/logger_formatter"
|
|
6
|
+
require_relative "../utils/external_link_icon_preserver"
|
|
7
|
+
require_relative "html_translator"
|
|
8
|
+
require_relative "page_translation_loader"
|
|
9
|
+
|
|
10
|
+
module Jekyll
|
|
11
|
+
module L10n
|
|
12
|
+
# Translation Orchestrator - Applies PO file translations to localized pages
|
|
13
|
+
#
|
|
14
|
+
# The Translator is the main entry point for applying translations to pages. It's invoked
|
|
15
|
+
# during the post_render phase of the Jekyll build (via a Jekyll hook) for each localized
|
|
16
|
+
# page variant. The translator loads appropriate translations from PO files and applies
|
|
17
|
+
# them to the page's HTML content.
|
|
18
|
+
#
|
|
19
|
+
# The translation workflow:
|
|
20
|
+
# 1. Check if the page is localized and has required metadata
|
|
21
|
+
# 2. Load translations from PO files (page-specific and compendium)
|
|
22
|
+
# 3. Apply translations to text nodes and attributes in HTML
|
|
23
|
+
# 4. Handle missing translations with configured fallback modes
|
|
24
|
+
# 5. Preserve special elements like external link icons
|
|
25
|
+
#
|
|
26
|
+
# Key responsibilities:
|
|
27
|
+
# - Load translations for the page's locale
|
|
28
|
+
# - Apply translations to HTML with DOM manipulation
|
|
29
|
+
# - Handle fallback modes (english, marker, empty)
|
|
30
|
+
# - Preserve special formatting and elements (icons, badges, etc.)
|
|
31
|
+
# - Log translation progress and errors
|
|
32
|
+
#
|
|
33
|
+
# This runs in phase 3 of the Jekyll build pipeline (Post-Render Phase). See the
|
|
34
|
+
# "Build Pipeline" section in lib/jekyll-l10n.rb for the complete workflow.
|
|
35
|
+
#
|
|
36
|
+
# @example Usage (typically invoked via Jekyll hook)
|
|
37
|
+
# # Automatically invoked for localized pages in post_render phase
|
|
38
|
+
# translator = Translator.new(localized_page)
|
|
39
|
+
# translator.translate
|
|
40
|
+
#
|
|
41
|
+
# @see Jekyll::L10n for Build Pipeline documentation
|
|
42
|
+
# @see Jekyll::L10n::HtmlTranslator for low-level DOM translation logic
|
|
43
|
+
# @see Jekyll::L10n::PageTranslationLoader for loading translations from PO files
|
|
44
|
+
#
|
|
45
|
+
class Translator
|
|
46
|
+
# @!attribute [r] page
|
|
47
|
+
# The Jekyll page being translated
|
|
48
|
+
# @return [Jekyll::Page]
|
|
49
|
+
attr_reader :page
|
|
50
|
+
|
|
51
|
+
# Initialize the translator
|
|
52
|
+
#
|
|
53
|
+
# @param page [Jekyll::Page] The page to translate (should be a LocalizedPage)
|
|
54
|
+
def initialize(page)
|
|
55
|
+
@page = page
|
|
56
|
+
@site = page.site
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Apply translations to the page
|
|
60
|
+
#
|
|
61
|
+
# Main entry point for translation. Checks if the page should be translated,
|
|
62
|
+
# loads the appropriate translations from PO files, applies them to the HTML,
|
|
63
|
+
# and updates the page's output.
|
|
64
|
+
#
|
|
65
|
+
# This method is called automatically by Jekyll's post_render hook for each
|
|
66
|
+
# LocalizedPage during the build process.
|
|
67
|
+
#
|
|
68
|
+
# @return [void]
|
|
69
|
+
# @note Only translates pages with localized: true and matching locale/original_url
|
|
70
|
+
# @note Gracefully handles missing translations with configured fallback mode
|
|
71
|
+
def translate
|
|
72
|
+
return unless should_translate?
|
|
73
|
+
|
|
74
|
+
locale = @page.data["locale"]
|
|
75
|
+
original_url = @page.data["original_url"]
|
|
76
|
+
baseurl = @site.config["baseurl"] || ""
|
|
77
|
+
|
|
78
|
+
original_page = find_and_log_original_page(original_url)
|
|
79
|
+
return unless original_page
|
|
80
|
+
|
|
81
|
+
translations = load_and_log_translations(locale, original_url, original_page)
|
|
82
|
+
return if translations.nil? || translations.empty?
|
|
83
|
+
|
|
84
|
+
apply_translations_to_page(original_page, translations, locale, baseurl)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
def load_translations_for_page(locale, original_url, original_page)
|
|
90
|
+
config = PageLocalesConfig.new(original_page.data)
|
|
91
|
+
PageTranslationLoader.load(@site, locale, original_url, config)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def apply_translations_to_page(original_page, translations, locale, baseurl)
|
|
95
|
+
config = PageLocalesConfig.new(original_page.data)
|
|
96
|
+
translator = HtmlTranslator.new(
|
|
97
|
+
config.fallback_mode,
|
|
98
|
+
config.translatable_attributes,
|
|
99
|
+
:debug_logging => config.debug_logging?
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
translated_html = apply_translations(translator, translations, locale, baseurl)
|
|
103
|
+
if translated_html
|
|
104
|
+
# Preserve external link icons from the original page after translation
|
|
105
|
+
translated_html = preserve_external_link_icons(original_page.output, translated_html)
|
|
106
|
+
@page.output = translated_html
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def preserve_external_link_icons(original_html, translated_html)
|
|
111
|
+
ExternalLinkIconPreserver.preserve(original_html, translated_html)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Apply translations to page output
|
|
115
|
+
#
|
|
116
|
+
# Internal helper that uses HtmlTranslator to apply translations to the page's
|
|
117
|
+
# HTML output, with configurable locale and baseurl for URL transformation.
|
|
118
|
+
#
|
|
119
|
+
# @param translator [HtmlTranslator] The translator instance to use
|
|
120
|
+
# @param translations [Hash] Translation hash mapping text to translations
|
|
121
|
+
# @param locale [String, nil] Target locale code (defaults to nil). If nil, uses "en"
|
|
122
|
+
# @param baseurl [String] Base URL for relative URL transformation (defaults to "")
|
|
123
|
+
# @return [String, nil] Translated HTML string, or nil on error
|
|
124
|
+
def apply_translations(translator, translations, locale = nil, baseurl = "")
|
|
125
|
+
if locale && locale != "en"
|
|
126
|
+
translator.translate(@page.output, translations, locale, baseurl)
|
|
127
|
+
else
|
|
128
|
+
translator.translate(@page.output, translations)
|
|
129
|
+
end
|
|
130
|
+
rescue StandardError => e
|
|
131
|
+
Jekyll.logger.error "Localization", "Error translating #{@page.url}: #{e.message}"
|
|
132
|
+
nil
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def should_translate?
|
|
136
|
+
has_data = @page.data
|
|
137
|
+
has_locale = has_data && @page.data["locale"]
|
|
138
|
+
has_original_url = has_data && @page.data["original_url"]
|
|
139
|
+
has_output = @page.output
|
|
140
|
+
output_not_empty = !@page.output&.empty?
|
|
141
|
+
|
|
142
|
+
msg = "should_translate? data=#{has_data}, locale=#{has_locale}, " \
|
|
143
|
+
"original_url=#{has_original_url}, output=#{has_output}, " \
|
|
144
|
+
"not_empty=#{output_not_empty}"
|
|
145
|
+
LoggerFormatter.debug_if_enabled("Translator", msg)
|
|
146
|
+
|
|
147
|
+
has_data && has_locale && has_original_url && has_output && output_not_empty
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def find_original_page_by_url(url)
|
|
151
|
+
@site.pages.each do |page|
|
|
152
|
+
next if page.data["localized"] == true
|
|
153
|
+
|
|
154
|
+
return page if page.url == url
|
|
155
|
+
end
|
|
156
|
+
nil
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def find_and_log_original_page(original_url)
|
|
160
|
+
LoggerFormatter.debug_if_enabled("Translator",
|
|
161
|
+
"Translating page: locale=#{@page.data["locale"]}, " \
|
|
162
|
+
"original_url=#{original_url}")
|
|
163
|
+
original_page = find_original_page_by_url(original_url)
|
|
164
|
+
if original_page
|
|
165
|
+
LoggerFormatter.debug_if_enabled("Translator",
|
|
166
|
+
"Original page found: #{original_page.url}")
|
|
167
|
+
end
|
|
168
|
+
original_page
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def load_and_log_translations(locale, original_url, original_page)
|
|
172
|
+
translations = load_translations_for_page(locale, original_url, original_page)
|
|
173
|
+
LoggerFormatter.debug_if_enabled("Translator",
|
|
174
|
+
"Loaded #{translations&.length || 0} translations")
|
|
175
|
+
translations
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../constants"
|
|
4
|
+
|
|
5
|
+
module Jekyll
|
|
6
|
+
module L10n
|
|
7
|
+
# Logs detailed translation debug information.
|
|
8
|
+
#
|
|
9
|
+
# DebugLogger provides detailed logging of translation attempts for
|
|
10
|
+
# debugging normalization and matching issues. It logs when translations
|
|
11
|
+
# are found, when translations are missing, character-level differences,
|
|
12
|
+
# and similarity analysis for debugging.
|
|
13
|
+
#
|
|
14
|
+
# Key responsibilities:
|
|
15
|
+
# * Log translation match details
|
|
16
|
+
# * Log missing translations with similar key analysis
|
|
17
|
+
# * Log character-level differences for debugging
|
|
18
|
+
# * Provide context around matching failures
|
|
19
|
+
# * Support verbose translation debugging
|
|
20
|
+
#
|
|
21
|
+
# @see Jekyll::L10n::HtmlTranslator for translation workflow context
|
|
22
|
+
# @see Jekyll::L10n::TextNormalizer for text normalization logic
|
|
23
|
+
class DebugLogger
|
|
24
|
+
# Data structure for translation debugging information.
|
|
25
|
+
TranslationData = Struct.new(:text, :normalized_text, :translated, :translations,
|
|
26
|
+
:keyword_init => true)
|
|
27
|
+
|
|
28
|
+
# Log detailed translation information.
|
|
29
|
+
#
|
|
30
|
+
# Logs the text being translated, how it was normalized, whether a translation
|
|
31
|
+
# was found, and provides debugging information if no translation matched.
|
|
32
|
+
# Only logs if translator has debug_logging enabled.
|
|
33
|
+
#
|
|
34
|
+
# @param translator [HtmlTranslator] Translator with debug_logging setting
|
|
35
|
+
# @param translation_data [TranslationData] Translation details to log
|
|
36
|
+
# @return [void]
|
|
37
|
+
def self.log_translation_details(translator, translation_data)
|
|
38
|
+
return unless translator.debug_logging
|
|
39
|
+
|
|
40
|
+
threshold = Jekyll::L10n::Constants::LOG_THRESHOLD_SHORT
|
|
41
|
+
return if translation_data.translated.nil? && translation_data.text.length <= threshold
|
|
42
|
+
|
|
43
|
+
log_missing_translation(translation_data.text, translation_data.normalized_text,
|
|
44
|
+
translation_data.translations)
|
|
45
|
+
log_alert_details(translation_data.text, translation_data.normalized_text,
|
|
46
|
+
translation_data.translations)
|
|
47
|
+
log_found_translation(translation_data.text, translation_data.translated)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def self.log_missing_translation(text, normalized_text, translations)
|
|
51
|
+
return unless translations[normalized_text].nil?
|
|
52
|
+
return unless text.length > Jekyll::L10n::Constants::LOG_THRESHOLD_SHORT
|
|
53
|
+
|
|
54
|
+
similar_keys = translations.keys.select do |key|
|
|
55
|
+
key.start_with?(text[0..Jekyll::L10n::Constants::LOG_TRUNCATE_SHORT])
|
|
56
|
+
end
|
|
57
|
+
return unless similar_keys.any?
|
|
58
|
+
|
|
59
|
+
log_no_translation_found(text, normalized_text, similar_keys)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.log_alert_details(text, normalized_text, translations)
|
|
63
|
+
return unless text.include?("Alert component is an inline message box")
|
|
64
|
+
|
|
65
|
+
matching_keys = translations.keys.select { |key| key.include?("Alert component is") }
|
|
66
|
+
return unless matching_keys.any?
|
|
67
|
+
|
|
68
|
+
key_from_hash = matching_keys[0]
|
|
69
|
+
log_alert_comparison(text, normalized_text, key_from_hash)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def self.log_first_difference(normalized_text, key_from_hash)
|
|
73
|
+
(0...normalized_text.length).each do |i|
|
|
74
|
+
next unless normalized_text[i] != key_from_hash[i]
|
|
75
|
+
|
|
76
|
+
log_difference_at_position(normalized_text, key_from_hash, i)
|
|
77
|
+
log_context_strings(normalized_text, key_from_hash, i)
|
|
78
|
+
break
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
log_normalization_match
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def self.log_found_translation(text, translated)
|
|
85
|
+
return unless translated && translated.length > Jekyll::L10n::Constants::LOG_THRESHOLD_SHORT
|
|
86
|
+
|
|
87
|
+
truncate_length = Jekyll::L10n::Constants::LOG_TRUNCATE_LONG
|
|
88
|
+
log_message(
|
|
89
|
+
"[HtmlTranslator] ✓ TRANSLATION FOUND: #{text[0..truncate_length]}... => " \
|
|
90
|
+
"#{translated[0..truncate_length]}..."
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
def self.log_no_translation_found(text, normalized_text, similar_keys)
|
|
97
|
+
truncate_length = Jekyll::L10n::Constants::LOG_TRUNCATE_LONG
|
|
98
|
+
log_message("[HtmlTranslator] ✗ NO TRANSLATION FOUND for: #{text[0..truncate_length]}...")
|
|
99
|
+
log_message("[HtmlTranslator] Normalized length: #{normalized_text.length}")
|
|
100
|
+
log_message("[HtmlTranslator] Similar keys found: #{similar_keys.length}")
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def self.log_alert_comparison(text, normalized_text, key_from_hash)
|
|
104
|
+
log_message(
|
|
105
|
+
"[HtmlTranslator] LENGTH match: HTML=#{text.length} vs KEY=#{key_from_hash.length}"
|
|
106
|
+
)
|
|
107
|
+
log_message("[HtmlTranslator] EQUAL: #{text == key_from_hash}")
|
|
108
|
+
log_message("[HtmlTranslator] NORMALIZED match: #{normalized_text == key_from_hash}")
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def self.log_difference_at_position(text1, text2, index)
|
|
112
|
+
char1 = text1[index]
|
|
113
|
+
char2 = text2[index]
|
|
114
|
+
log_message(
|
|
115
|
+
"[HtmlTranslator] DIFF at position #{index}: " \
|
|
116
|
+
"NORMALIZED=#{char1.inspect} (#{char1.ord}) vs KEY=#{char2.inspect} (#{char2.ord})"
|
|
117
|
+
)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def self.log_context_strings(normalized_text, key_from_hash, index)
|
|
121
|
+
normalized_context = context_string(normalized_text, index)
|
|
122
|
+
key_context = context_string(key_from_hash, index)
|
|
123
|
+
|
|
124
|
+
log_message("[HtmlTranslator] Context NORMALIZED: ...#{normalized_context.inspect}...")
|
|
125
|
+
log_message("[HtmlTranslator] Context KEY: ...#{key_context.inspect}...")
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def self.log_normalization_match
|
|
129
|
+
log_message(
|
|
130
|
+
"[HtmlTranslator] ✓ Text matches after normalization - translation will be applied"
|
|
131
|
+
)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def self.log_message(message)
|
|
135
|
+
Jekyll.logger.info "Localization", message
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def self.context_string(text, index)
|
|
139
|
+
start_idx = [0, index - Jekyll::L10n::Constants::BACKTRACE_CONTEXT_LENGTH].max
|
|
140
|
+
end_idx = [index + Jekyll::L10n::Constants::BACKTRACE_CONTEXT_LENGTH, text.length - 1].min
|
|
141
|
+
text[start_idx..end_idx]
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
private_class_method :log_no_translation_found,
|
|
145
|
+
:log_alert_comparison,
|
|
146
|
+
:log_difference_at_position,
|
|
147
|
+
:log_context_strings,
|
|
148
|
+
:log_normalization_match,
|
|
149
|
+
:log_message,
|
|
150
|
+
:context_string
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|