jekyll-l10n 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +94 -0
  4. data/lib/jekyll-l10n/constants.rb +136 -0
  5. data/lib/jekyll-l10n/errors.rb +60 -0
  6. data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
  7. data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
  8. data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
  9. data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
  10. data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
  11. data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
  12. data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
  13. data/lib/jekyll-l10n/extraction/logger.rb +48 -0
  14. data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
  15. data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
  16. data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
  17. data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
  18. data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
  19. data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
  20. data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
  21. data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
  22. data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
  23. data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
  24. data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
  25. data/lib/jekyll-l10n/po_file/loader.rb +64 -0
  26. data/lib/jekyll-l10n/po_file/manager.rb +160 -0
  27. data/lib/jekyll-l10n/po_file/merger.rb +80 -0
  28. data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
  29. data/lib/jekyll-l10n/po_file/reader.rb +518 -0
  30. data/lib/jekyll-l10n/po_file/writer.rb +232 -0
  31. data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
  32. data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
  33. data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
  34. data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
  35. data/lib/jekyll-l10n/translation/translator.rb +179 -0
  36. data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
  37. data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
  38. data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
  39. data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
  40. data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
  41. data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
  42. data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
  43. data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
  44. data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
  45. data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
  46. data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
  47. data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
  48. data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
  49. data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
  50. data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
  51. data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
  52. data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
  53. data/lib/jekyll-l10n/version.rb +10 -0
  54. data/lib/jekyll-l10n.rb +268 -0
  55. metadata +200 -0
@@ -0,0 +1,226 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "net/http"
5
+ require "uri"
6
+ require_relative "../utils/logger_formatter"
7
+
8
+ module Jekyll
9
+ module L10n
10
+ # Translates compendium entries using the LibreTranslate API.
11
+ #
12
+ # LibreTranslator integrates with the LibreTranslate API to automatically
13
+ # translate untranslated entries in compendium PO files. It handles batching,
14
+ # retries with exponential backoff, progress logging, and API error handling.
15
+ # Translations are written directly to POEntry objects in-place.
16
+ #
17
+ # Key responsibilities:
18
+ # * Send translation requests to LibreTranslate API
19
+ # * Batch translations for efficiency
20
+ # * Handle API timeouts and failures with retries
21
+ # * Log translation progress at configurable intervals
22
+ # * Update POEntry msgstr with translated text
23
+ # * Parse JSON API responses
24
+ #
25
+ # @example
26
+ # translator = LibreTranslator.new(config)
27
+ # translator.translate_compendium(po_entries, 'es')
28
+ # # po_entries now have msgstr filled from API translations
29
+ class LibreTranslator
30
+ TranslationError = Class.new(StandardError) unless defined?(TranslationError)
31
+
32
+ # Initialize a new LibreTranslator.
33
+ #
34
+ # @param config [PageLocalesConfig] Configuration with LibreTranslate settings:
35
+ # - libretranslate_api_url [String] API endpoint URL
36
+ # - libretranslate_api_key [String, nil] Optional API key
37
+ # - libretranslate_timeout [Integer] Request timeout in seconds
38
+ # - libretranslate_batch_size [Integer] Entries per batch request
39
+ # - libretranslate_retry_attempts [Integer] Max retry attempts
40
+ # - libretranslate_retry_delay [Integer] Delay between retries in seconds
41
+ # - libretranslate_progress_interval [Integer] Log progress every N entries
42
+ def initialize(config)
43
+ @config = config
44
+ end
45
+
46
+ # Translate a compendium to a target locale.
47
+ #
48
+ # Identifies untranslated entries (empty msgstr) and sends them to LibreTranslate
49
+ # API for translation. Updates POEntry objects in-place with translated text.
50
+ # Handles batching, retries, and progress logging.
51
+ #
52
+ # @param po_entries [Array<GetText::POEntry>] Array of PO entries from compendium
53
+ # @param target_locale [String] Target locale code (e.g., 'es', 'fr')
54
+ # @return [void]
55
+ # @raise [TranslationError] If API request fails and stop_on_error is true
56
+ # @raise [TranslationError] If max retry attempts exceeded
57
+ def translate_compendium(po_entries, target_locale)
58
+ translatable_count, empty_entries = count_translatable_entries(po_entries)
59
+ return if empty_entries.empty?
60
+
61
+ log_translation_progress(empty_entries.length, translatable_count, target_locale)
62
+ start_time = Time.now
63
+ process_translation_batches(empty_entries, target_locale, start_time)
64
+ log_translation_complete(empty_entries.length, target_locale, start_time)
65
+ end
66
+
67
+ private
68
+
69
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
70
+ def retry_with_backoff
71
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
72
+ attempts = 0
73
+ begin
74
+ yield
75
+ rescue StandardError => e
76
+ attempts += 1
77
+
78
+ if attempts > @config.libretranslate_retry_attempts
79
+ raise TranslationError,
80
+ "Max retry attempts reached"
81
+ end
82
+
83
+ raise TranslationError, e.message if @config.libretranslate_stop_on_error?
84
+
85
+ max_attempts = @config.libretranslate_retry_attempts
86
+
87
+ # === EXPONENTIAL BACKOFF WITH JITTER ===
88
+ # Calculate delay using exponential backoff: base_delay * (2 ^ (attempt - 1))
89
+ # Add jitter (random 0-1 seconds) to prevent thundering herd problem
90
+ # Cap maximum delay at 30 seconds to prevent extremely long waits
91
+ base_delay = @config.libretranslate_retry_delay
92
+ exponential_delay = base_delay * (2**(attempts - 1))
93
+ jitter = rand(0..1000) / 1000.0 # Random jitter between 0-1 second
94
+ delay = [exponential_delay + jitter, 30].min # Cap at 30 seconds max
95
+
96
+ delay_str = delay.round(2)
97
+ Jekyll.logger.warn(
98
+ "LibreTranslator",
99
+ "Retrying translation (attempt #{attempts}/#{max_attempts}) after #{delay_str}s: " \
100
+ "#{e.message}"
101
+ )
102
+ sleep(delay)
103
+ retry
104
+ end
105
+ end
106
+
107
+ def count_translatable_entries(po_entries)
108
+ translatable_count = po_entries.count { |e| !e.msgid.strip.empty? }
109
+ empty_count = po_entries.select { |entry| entry.msgstr.empty? && !entry.msgid.strip.empty? }
110
+ [translatable_count, empty_count]
111
+ end
112
+
113
+ def log_translation_progress(empty_count, _translatable_count, target_locale)
114
+ Jekyll.logger.info "Localization",
115
+ "Translating #{empty_count} entries for #{target_locale}"
116
+ end
117
+
118
+ def log_periodic_progress(current_num, total_entries, target_locale, start_time)
119
+ elapsed = Time.now - start_time
120
+ percent = (current_num.to_f / total_entries * 100).round
121
+
122
+ Jekyll.logger.info "Localization",
123
+ "[LibreTranslator] Progress: #{current_num}/#{total_entries} " \
124
+ "(#{percent}%) translated for #{target_locale} [#{elapsed.round(1)}s]"
125
+ end
126
+
127
+ def log_translation_complete(entries_translated, target_locale, start_time)
128
+ elapsed = Time.now - start_time
129
+ throughput = entries_translated.positive? ? (entries_translated / elapsed).round(1) : 0
130
+
131
+ Jekyll.logger.info "Localization",
132
+ "[LibreTranslator] Completed: #{entries_translated} entries " \
133
+ "translated for #{target_locale} [#{elapsed.round(1)}s, " \
134
+ "~#{throughput} entries/sec]"
135
+ end
136
+
137
+ def process_translation_batches(empty_entries, target_locale, start_time)
138
+ batch_size = @config.libretranslate_batch_size
139
+ progress_interval = @config.libretranslate_progress_interval
140
+
141
+ empty_entries.each_slice(batch_size).with_index do |batch, batch_num|
142
+ start_num = (batch_num * batch_size) + 1
143
+ retry_with_backoff do
144
+ translate_batch(batch, target_locale, start_num, empty_entries.length,
145
+ start_time, progress_interval)
146
+ end
147
+ end
148
+ end
149
+
150
+ # rubocop:disable Metrics/ParameterLists, Metrics/AbcSize
151
+ def translate_batch(batch, target_locale, start_num, total_entries, start_time,
152
+ progress_interval)
153
+ # rubocop:enable Metrics/ParameterLists, Metrics/AbcSize
154
+ batch.each_with_index do |entry, index|
155
+ current_num = start_num + index
156
+
157
+ # Log progress at intervals and at the end
158
+ if progress_interval.positive? &&
159
+ ((current_num % progress_interval).zero? || current_num == total_entries)
160
+ log_periodic_progress(current_num, total_entries, target_locale, start_time)
161
+ end
162
+
163
+ # Move per-entry logs to TRACE level for deep debugging
164
+ LoggerFormatter.trace_if_enabled(
165
+ @config, "LibreTranslator",
166
+ "Translating entry #{current_num}/#{total_entries}: #{entry.msgid[0..50]}"
167
+ )
168
+
169
+ texts = [entry.msgid]
170
+ translations = make_api_request(texts, target_locale)
171
+ entry.msgstr = translations[0] || ""
172
+ end
173
+ rescue StandardError => e
174
+ Jekyll.logger.error "LibreTranslator", "Error in translate_batch: #{e.class}: #{e.message}"
175
+ Jekyll.logger.error "LibreTranslator", e.backtrace.first(5).join("\n")
176
+ raise
177
+ end
178
+
179
+ def http_config
180
+ @http_config ||= begin
181
+ uri = URI(@config.libretranslate_api_url)
182
+ http = Net::HTTP.new(uri.host, uri.port)
183
+ http.read_timeout = @config.libretranslate_timeout
184
+ http.open_timeout = @config.libretranslate_timeout
185
+ http
186
+ end
187
+ end
188
+
189
+ def api_headers
190
+ headers = { "Content-Type" => "application/json" }
191
+ if @config.libretranslate_api_key
192
+ headers["Authorization"] =
193
+ "Bearer #{@config.libretranslate_api_key}"
194
+ end
195
+ headers
196
+ end
197
+
198
+ def make_api_request(text, target_locale)
199
+ uri = URI("#{@config.libretranslate_api_url}/translate")
200
+ Jekyll.logger.debug "LibreTranslator", "Requesting #{uri}"
201
+ request = Net::HTTP::Post.new(uri, api_headers)
202
+ request.body = {
203
+ :q => text,
204
+ :source => @config.libretranslate_source_locale,
205
+ :target => target_locale,
206
+ :format => @config.libretranslate_format,
207
+ }.to_json
208
+ response = http_config.request(request)
209
+ Jekyll.logger.debug "LibreTranslator", "Response code: #{response.code}"
210
+ handle_api_response(response)
211
+ rescue Net::ReadTimeout, Net::OpenTimeout => e
212
+ raise TranslationError, "API timeout: #{e.message}"
213
+ end
214
+
215
+ def handle_api_response(response)
216
+ response_data = JSON.parse(response.body)
217
+ translated = response_data["translatedText"]
218
+ translation_str = translated.is_a?(Array) ? translated.inspect : translated
219
+ Jekyll.logger.debug "LibreTranslator", "Parsed translation: #{translation_str}"
220
+ translated
221
+ rescue JSON::ParserError, NoMethodError => e
222
+ raise TranslationError, "Invalid API response: #{e.message}"
223
+ end
224
+ end
225
+ end
226
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../constants"
4
+ require_relative "../po_file/manager"
5
+ require_relative "../utils/page_locales_config"
6
+ require_relative "../utils/url_path_builder"
7
+ require_relative "../utils/logger_formatter"
8
+
9
+ module Jekyll
10
+ module L10n
11
+ # Loads and merges page-specific and compendium translations.
12
+ #
13
+ # PageTranslationLoader combines compendium (site-wide) translations with
14
+ # page-specific translations for a given locale and URL. It loads both
15
+ # translation sources from PO files, merges them (page-specific takes
16
+ # precedence), and returns the combined translation hash for use by
17
+ # HtmlTranslator.
18
+ #
19
+ # Key responsibilities:
20
+ # * Load compendium translations for a locale
21
+ # * Convert page URLs to PO file paths
22
+ # * Load page-specific translations
23
+ # * Merge compendium and page-specific translations
24
+ # * Filter empty translations (untranslated entries)
25
+ # * Log loading progress at debug level
26
+ #
27
+ # @example
28
+ # config = PageLocalesConfig.new(page.data)
29
+ # translations = PageTranslationLoader.load(site, 'es', '/docs/index.html', config)
30
+ # # Returns merged translations for that page in Spanish
31
+ class PageTranslationLoader
32
+ # Load and merge translations for a page in a specific locale.
33
+ #
34
+ # Loads the compendium (site-wide) translations and page-specific translations,
35
+ # merges them (page-specific entries override compendium), and returns the
36
+ # combined hash. Filters out untranslated entries (empty msgstr).
37
+ #
38
+ # @param site [Jekyll::Site] Jekyll site object
39
+ # @param locale [String] Target locale code (e.g., 'es', 'fr')
40
+ # @param original_url [String] Original page URL (e.g., '/docs/index.html')
41
+ # @param config [PageLocalesConfig] Localization configuration for the page
42
+ # @return [Hash] Merged translation hash { msgid => msgstr }, filtered to only
43
+ # include non-empty translations
44
+ def self.load(site, locale, original_url, config)
45
+ po_manager = PoFileManager.new(site, config.locales_dir)
46
+ page_path = construct_po_page_path(original_url)
47
+
48
+ compendium = po_manager.load_compendium(locale)
49
+ log_compendium_loaded(locale, compendium)
50
+
51
+ page_specific = po_manager.load_po_file(locale, page_path)
52
+ log_page_specific_loaded(page_path, page_specific)
53
+
54
+ translations = compendium.merge(page_specific)
55
+ log_translations_merged(translations)
56
+
57
+ translations.reject { |_msgid, msgstr| msgstr.empty? }
58
+ end
59
+
60
+ def self.construct_po_page_path(url)
61
+ UrlPathBuilder.url_to_po_page_path(url)
62
+ end
63
+
64
+ def self.log_compendium_loaded(locale, compendium)
65
+ message = format_compendium_message(locale, compendium.size)
66
+ LoggerFormatter.debug_if_enabled("PageTranslationLoader", message)
67
+ end
68
+
69
+ def self.log_page_specific_loaded(page_path, page_specific)
70
+ message = format_page_specific_message(page_path, page_specific.size)
71
+ LoggerFormatter.debug_if_enabled("PageTranslationLoader", message)
72
+ end
73
+
74
+ def self.log_translations_merged(translations)
75
+ message = format_merged_message(translations.size)
76
+ LoggerFormatter.debug_if_enabled("PageTranslationLoader", message)
77
+ end
78
+
79
+ def self.format_compendium_message(locale, size)
80
+ "[PageTranslationLoader] Loaded compendium for #{locale}: #{size} entries"
81
+ end
82
+
83
+ def self.format_page_specific_message(page_path, size)
84
+ "[PageTranslationLoader] Loaded page-specific for #{page_path}: #{size} entries"
85
+ end
86
+
87
+ def self.format_merged_message(size)
88
+ "[PageTranslationLoader] Merged translations: #{size} entries"
89
+ end
90
+
91
+ private_class_method :log_compendium_loaded,
92
+ :log_page_specific_loaded,
93
+ :log_translations_merged,
94
+ :format_compendium_message,
95
+ :format_page_specific_message,
96
+ :format_merged_message
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../utils/page_locales_config"
4
+ require_relative "../utils/html_parser"
5
+ require_relative "../utils/logger_formatter"
6
+ require_relative "../utils/external_link_icon_preserver"
7
+ require_relative "html_translator"
8
+ require_relative "page_translation_loader"
9
+
10
+ module Jekyll
11
+ module L10n
12
+ # Translation Orchestrator - Applies PO file translations to localized pages
13
+ #
14
+ # The Translator is the main entry point for applying translations to pages. It's invoked
15
+ # during the post_render phase of the Jekyll build (via a Jekyll hook) for each localized
16
+ # page variant. The translator loads appropriate translations from PO files and applies
17
+ # them to the page's HTML content.
18
+ #
19
+ # The translation workflow:
20
+ # 1. Check if the page is localized and has required metadata
21
+ # 2. Load translations from PO files (page-specific and compendium)
22
+ # 3. Apply translations to text nodes and attributes in HTML
23
+ # 4. Handle missing translations with configured fallback modes
24
+ # 5. Preserve special elements like external link icons
25
+ #
26
+ # Key responsibilities:
27
+ # - Load translations for the page's locale
28
+ # - Apply translations to HTML with DOM manipulation
29
+ # - Handle fallback modes (english, marker, empty)
30
+ # - Preserve special formatting and elements (icons, badges, etc.)
31
+ # - Log translation progress and errors
32
+ #
33
+ # This runs in phase 3 of the Jekyll build pipeline (Post-Render Phase). See the
34
+ # "Build Pipeline" section in lib/jekyll-l10n.rb for the complete workflow.
35
+ #
36
+ # @example Usage (typically invoked via Jekyll hook)
37
+ # # Automatically invoked for localized pages in post_render phase
38
+ # translator = Translator.new(localized_page)
39
+ # translator.translate
40
+ #
41
+ # @see Jekyll::L10n for Build Pipeline documentation
42
+ # @see Jekyll::L10n::HtmlTranslator for low-level DOM translation logic
43
+ # @see Jekyll::L10n::PageTranslationLoader for loading translations from PO files
44
+ #
45
+ class Translator
46
+ # @!attribute [r] page
47
+ # The Jekyll page being translated
48
+ # @return [Jekyll::Page]
49
+ attr_reader :page
50
+
51
+ # Initialize the translator
52
+ #
53
+ # @param page [Jekyll::Page] The page to translate (should be a LocalizedPage)
54
+ def initialize(page)
55
+ @page = page
56
+ @site = page.site
57
+ end
58
+
59
+ # Apply translations to the page
60
+ #
61
+ # Main entry point for translation. Checks if the page should be translated,
62
+ # loads the appropriate translations from PO files, applies them to the HTML,
63
+ # and updates the page's output.
64
+ #
65
+ # This method is called automatically by Jekyll's post_render hook for each
66
+ # LocalizedPage during the build process.
67
+ #
68
+ # @return [void]
69
+ # @note Only translates pages with localized: true and matching locale/original_url
70
+ # @note Gracefully handles missing translations with configured fallback mode
71
+ def translate
72
+ return unless should_translate?
73
+
74
+ locale = @page.data["locale"]
75
+ original_url = @page.data["original_url"]
76
+ baseurl = @site.config["baseurl"] || ""
77
+
78
+ original_page = find_and_log_original_page(original_url)
79
+ return unless original_page
80
+
81
+ translations = load_and_log_translations(locale, original_url, original_page)
82
+ return if translations.nil? || translations.empty?
83
+
84
+ apply_translations_to_page(original_page, translations, locale, baseurl)
85
+ end
86
+
87
+ private
88
+
89
+ def load_translations_for_page(locale, original_url, original_page)
90
+ config = PageLocalesConfig.new(original_page.data)
91
+ PageTranslationLoader.load(@site, locale, original_url, config)
92
+ end
93
+
94
+ def apply_translations_to_page(original_page, translations, locale, baseurl)
95
+ config = PageLocalesConfig.new(original_page.data)
96
+ translator = HtmlTranslator.new(
97
+ config.fallback_mode,
98
+ config.translatable_attributes,
99
+ :debug_logging => config.debug_logging?
100
+ )
101
+
102
+ translated_html = apply_translations(translator, translations, locale, baseurl)
103
+ if translated_html
104
+ # Preserve external link icons from the original page after translation
105
+ translated_html = preserve_external_link_icons(original_page.output, translated_html)
106
+ @page.output = translated_html
107
+ end
108
+ end
109
+
110
+ def preserve_external_link_icons(original_html, translated_html)
111
+ ExternalLinkIconPreserver.preserve(original_html, translated_html)
112
+ end
113
+
114
+ # Apply translations to page output
115
+ #
116
+ # Internal helper that uses HtmlTranslator to apply translations to the page's
117
+ # HTML output, with configurable locale and baseurl for URL transformation.
118
+ #
119
+ # @param translator [HtmlTranslator] The translator instance to use
120
+ # @param translations [Hash] Translation hash mapping text to translations
121
+ # @param locale [String, nil] Target locale code (defaults to nil). If nil, uses "en"
122
+ # @param baseurl [String] Base URL for relative URL transformation (defaults to "")
123
+ # @return [String, nil] Translated HTML string, or nil on error
124
+ def apply_translations(translator, translations, locale = nil, baseurl = "")
125
+ if locale && locale != "en"
126
+ translator.translate(@page.output, translations, locale, baseurl)
127
+ else
128
+ translator.translate(@page.output, translations)
129
+ end
130
+ rescue StandardError => e
131
+ Jekyll.logger.error "Localization", "Error translating #{@page.url}: #{e.message}"
132
+ nil
133
+ end
134
+
135
+ def should_translate?
136
+ has_data = @page.data
137
+ has_locale = has_data && @page.data["locale"]
138
+ has_original_url = has_data && @page.data["original_url"]
139
+ has_output = @page.output
140
+ output_not_empty = !@page.output&.empty?
141
+
142
+ msg = "should_translate? data=#{has_data}, locale=#{has_locale}, " \
143
+ "original_url=#{has_original_url}, output=#{has_output}, " \
144
+ "not_empty=#{output_not_empty}"
145
+ LoggerFormatter.debug_if_enabled("Translator", msg)
146
+
147
+ has_data && has_locale && has_original_url && has_output && output_not_empty
148
+ end
149
+
150
+ def find_original_page_by_url(url)
151
+ @site.pages.each do |page|
152
+ next if page.data["localized"] == true
153
+
154
+ return page if page.url == url
155
+ end
156
+ nil
157
+ end
158
+
159
+ def find_and_log_original_page(original_url)
160
+ LoggerFormatter.debug_if_enabled("Translator",
161
+ "Translating page: locale=#{@page.data["locale"]}, " \
162
+ "original_url=#{original_url}")
163
+ original_page = find_original_page_by_url(original_url)
164
+ if original_page
165
+ LoggerFormatter.debug_if_enabled("Translator",
166
+ "Original page found: #{original_page.url}")
167
+ end
168
+ original_page
169
+ end
170
+
171
+ def load_and_log_translations(locale, original_url, original_page)
172
+ translations = load_translations_for_page(locale, original_url, original_page)
173
+ LoggerFormatter.debug_if_enabled("Translator",
174
+ "Loaded #{translations&.length || 0} translations")
175
+ translations
176
+ end
177
+ end
178
+ end
179
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../constants"
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Logs detailed translation debug information.
8
+ #
9
+ # DebugLogger provides detailed logging of translation attempts for
10
+ # debugging normalization and matching issues. It logs when translations
11
+ # are found, when translations are missing, character-level differences,
12
+ # and similarity analysis for debugging.
13
+ #
14
+ # Key responsibilities:
15
+ # * Log translation match details
16
+ # * Log missing translations with similar key analysis
17
+ # * Log character-level differences for debugging
18
+ # * Provide context around matching failures
19
+ # * Support verbose translation debugging
20
+ #
21
+ # @see Jekyll::L10n::HtmlTranslator for translation workflow context
22
+ # @see Jekyll::L10n::TextNormalizer for text normalization logic
23
+ class DebugLogger
24
+ # Data structure for translation debugging information.
25
+ TranslationData = Struct.new(:text, :normalized_text, :translated, :translations,
26
+ :keyword_init => true)
27
+
28
+ # Log detailed translation information.
29
+ #
30
+ # Logs the text being translated, how it was normalized, whether a translation
31
+ # was found, and provides debugging information if no translation matched.
32
+ # Only logs if translator has debug_logging enabled.
33
+ #
34
+ # @param translator [HtmlTranslator] Translator with debug_logging setting
35
+ # @param translation_data [TranslationData] Translation details to log
36
+ # @return [void]
37
+ def self.log_translation_details(translator, translation_data)
38
+ return unless translator.debug_logging
39
+
40
+ threshold = Jekyll::L10n::Constants::LOG_THRESHOLD_SHORT
41
+ return if translation_data.translated.nil? && translation_data.text.length <= threshold
42
+
43
+ log_missing_translation(translation_data.text, translation_data.normalized_text,
44
+ translation_data.translations)
45
+ log_alert_details(translation_data.text, translation_data.normalized_text,
46
+ translation_data.translations)
47
+ log_found_translation(translation_data.text, translation_data.translated)
48
+ end
49
+
50
+ def self.log_missing_translation(text, normalized_text, translations)
51
+ return unless translations[normalized_text].nil?
52
+ return unless text.length > Jekyll::L10n::Constants::LOG_THRESHOLD_SHORT
53
+
54
+ similar_keys = translations.keys.select do |key|
55
+ key.start_with?(text[0..Jekyll::L10n::Constants::LOG_TRUNCATE_SHORT])
56
+ end
57
+ return unless similar_keys.any?
58
+
59
+ log_no_translation_found(text, normalized_text, similar_keys)
60
+ end
61
+
62
+ def self.log_alert_details(text, normalized_text, translations)
63
+ return unless text.include?("Alert component is an inline message box")
64
+
65
+ matching_keys = translations.keys.select { |key| key.include?("Alert component is") }
66
+ return unless matching_keys.any?
67
+
68
+ key_from_hash = matching_keys[0]
69
+ log_alert_comparison(text, normalized_text, key_from_hash)
70
+ end
71
+
72
+ def self.log_first_difference(normalized_text, key_from_hash)
73
+ (0...normalized_text.length).each do |i|
74
+ next unless normalized_text[i] != key_from_hash[i]
75
+
76
+ log_difference_at_position(normalized_text, key_from_hash, i)
77
+ log_context_strings(normalized_text, key_from_hash, i)
78
+ break
79
+ end
80
+
81
+ log_normalization_match
82
+ end
83
+
84
+ def self.log_found_translation(text, translated)
85
+ return unless translated && translated.length > Jekyll::L10n::Constants::LOG_THRESHOLD_SHORT
86
+
87
+ truncate_length = Jekyll::L10n::Constants::LOG_TRUNCATE_LONG
88
+ log_message(
89
+ "[HtmlTranslator] ✓ TRANSLATION FOUND: #{text[0..truncate_length]}... => " \
90
+ "#{translated[0..truncate_length]}..."
91
+ )
92
+ end
93
+
94
+ private
95
+
96
+ def self.log_no_translation_found(text, normalized_text, similar_keys)
97
+ truncate_length = Jekyll::L10n::Constants::LOG_TRUNCATE_LONG
98
+ log_message("[HtmlTranslator] ✗ NO TRANSLATION FOUND for: #{text[0..truncate_length]}...")
99
+ log_message("[HtmlTranslator] Normalized length: #{normalized_text.length}")
100
+ log_message("[HtmlTranslator] Similar keys found: #{similar_keys.length}")
101
+ end
102
+
103
+ def self.log_alert_comparison(text, normalized_text, key_from_hash)
104
+ log_message(
105
+ "[HtmlTranslator] LENGTH match: HTML=#{text.length} vs KEY=#{key_from_hash.length}"
106
+ )
107
+ log_message("[HtmlTranslator] EQUAL: #{text == key_from_hash}")
108
+ log_message("[HtmlTranslator] NORMALIZED match: #{normalized_text == key_from_hash}")
109
+ end
110
+
111
+ def self.log_difference_at_position(text1, text2, index)
112
+ char1 = text1[index]
113
+ char2 = text2[index]
114
+ log_message(
115
+ "[HtmlTranslator] DIFF at position #{index}: " \
116
+ "NORMALIZED=#{char1.inspect} (#{char1.ord}) vs KEY=#{char2.inspect} (#{char2.ord})"
117
+ )
118
+ end
119
+
120
+ def self.log_context_strings(normalized_text, key_from_hash, index)
121
+ normalized_context = context_string(normalized_text, index)
122
+ key_context = context_string(key_from_hash, index)
123
+
124
+ log_message("[HtmlTranslator] Context NORMALIZED: ...#{normalized_context.inspect}...")
125
+ log_message("[HtmlTranslator] Context KEY: ...#{key_context.inspect}...")
126
+ end
127
+
128
+ def self.log_normalization_match
129
+ log_message(
130
+ "[HtmlTranslator] ✓ Text matches after normalization - translation will be applied"
131
+ )
132
+ end
133
+
134
+ def self.log_message(message)
135
+ Jekyll.logger.info "Localization", message
136
+ end
137
+
138
+ def self.context_string(text, index)
139
+ start_idx = [0, index - Jekyll::L10n::Constants::BACKTRACE_CONTEXT_LENGTH].max
140
+ end_idx = [index + Jekyll::L10n::Constants::BACKTRACE_CONTEXT_LENGTH, text.length - 1].min
141
+ text[start_idx..end_idx]
142
+ end
143
+
144
+ private_class_method :log_no_translation_found,
145
+ :log_alert_comparison,
146
+ :log_difference_at_position,
147
+ :log_context_strings,
148
+ :log_normalization_match,
149
+ :log_message,
150
+ :context_string
151
+ end
152
+ end
153
+ end