jekyll-l10n 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +94 -0
- data/lib/jekyll-l10n/constants.rb +136 -0
- data/lib/jekyll-l10n/errors.rb +60 -0
- data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
- data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
- data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
- data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
- data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
- data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
- data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
- data/lib/jekyll-l10n/extraction/logger.rb +48 -0
- data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
- data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
- data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
- data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
- data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
- data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
- data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
- data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
- data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
- data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
- data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
- data/lib/jekyll-l10n/po_file/loader.rb +64 -0
- data/lib/jekyll-l10n/po_file/manager.rb +160 -0
- data/lib/jekyll-l10n/po_file/merger.rb +80 -0
- data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
- data/lib/jekyll-l10n/po_file/reader.rb +518 -0
- data/lib/jekyll-l10n/po_file/writer.rb +232 -0
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
- data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
- data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
- data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
- data/lib/jekyll-l10n/translation/translator.rb +179 -0
- data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
- data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
- data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
- data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
- data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
- data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
- data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
- data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
- data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
- data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
- data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
- data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
- data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
- data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
- data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
- data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
- data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
- data/lib/jekyll-l10n/version.rb +10 -0
- data/lib/jekyll-l10n.rb +268 -0
- metadata +200 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "gettext/po"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require_relative "reader"
|
|
6
|
+
require_relative "../constants"
|
|
7
|
+
require_relative "../utils/file_operations"
|
|
8
|
+
|
|
9
|
+
module Jekyll
|
|
10
|
+
module L10n
|
|
11
|
+
# Writes extraction entries to GNU Gettext PO files.
|
|
12
|
+
#
|
|
13
|
+
# PoFileWriter serializes extraction entries into standard PO file format with
|
|
14
|
+
# proper escaping, metadata, and optional merging of existing translations. It
|
|
15
|
+
# handles multi-line strings, special characters, fuzzy flags, and reference
|
|
16
|
+
# comments. When merging is enabled, existing translations are preserved while
|
|
17
|
+
# new strings are added or marked as fuzzy.
|
|
18
|
+
#
|
|
19
|
+
# Key responsibilities:
|
|
20
|
+
# * Create PO file entries from extraction data
|
|
21
|
+
# * Merge new entries with existing translations
|
|
22
|
+
# * Escape special characters and line breaks
|
|
23
|
+
# * Format multi-line and long strings properly
|
|
24
|
+
# * Add reference comments (file location references)
|
|
25
|
+
# * Add fuzzy flags for merge operations
|
|
26
|
+
# * Setup PO file headers with encoding information
|
|
27
|
+
# * Write UTF-8 encoded output
|
|
28
|
+
#
|
|
29
|
+
# @example
|
|
30
|
+
# entries = [{ msgid: "Hello", msgstr: "", reference: "html/body/p[1]" }]
|
|
31
|
+
# PoFileWriter.write('_locales/es.po', entries, 'es')
|
|
32
|
+
# # Writes PO file with proper header and formatted entries
|
|
33
|
+
class PoFileWriter
|
|
34
|
+
# Write entries to a PO file.
|
|
35
|
+
#
|
|
36
|
+
# Creates or updates a PO file with the provided entries. If file exists and
|
|
37
|
+
# skip_merge is false, existing translations are merged (preserved while new
|
|
38
|
+
# entries added). Sets proper PO header with language and UTF-8 encoding.
|
|
39
|
+
#
|
|
40
|
+
# @param po_path [String] Full path to output PO file
|
|
41
|
+
# @param entries [Array<Hash>] Array of extraction entries, each with:
|
|
42
|
+
# - :msgid [String] String to translate
|
|
43
|
+
# - :msgstr [String] Translated string (typically empty for new extractions)
|
|
44
|
+
# - :reference [String] File location reference for debugging
|
|
45
|
+
# @param locale [String] Locale code (e.g., 'es', 'fr')
|
|
46
|
+
# @param skip_merge [Boolean] If true, overwrite without merging existing translations
|
|
47
|
+
# @return [Boolean] True if successful
|
|
48
|
+
def self.write(po_path, entries, locale, skip_merge: false)
|
|
49
|
+
existing_po = if skip_merge || !File.exist?(po_path)
|
|
50
|
+
{}
|
|
51
|
+
else
|
|
52
|
+
PoFileReader.parse_for_merge(po_path)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
po_file = ::GetText::PO.new
|
|
56
|
+
setup_po_header(po_file, locale)
|
|
57
|
+
merge_entries_preserving_translations(po_file, entries, existing_po)
|
|
58
|
+
|
|
59
|
+
output = serialize_po_file(po_file)
|
|
60
|
+
FileOperations.write_utf8(po_path, output)
|
|
61
|
+
|
|
62
|
+
true
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def self.merge_entries_preserving_translations(po_file, entries, existing_po)
|
|
66
|
+
errors = []
|
|
67
|
+
|
|
68
|
+
entries.each do |entry|
|
|
69
|
+
po_entry = create_po_entry(entry, existing_po)
|
|
70
|
+
po_entry.add_comment(entry[:reference]) if entry[:reference]
|
|
71
|
+
|
|
72
|
+
po_file[entry[:msgid]] = po_entry
|
|
73
|
+
rescue StandardError => e
|
|
74
|
+
truncate_length = Jekyll::L10n::Constants::LOG_TRUNCATE_LONG
|
|
75
|
+
errors << "Error adding '#{entry[:msgid][0..truncate_length]}': " \
|
|
76
|
+
"#{e.class} - #{e.message}"
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
return unless errors.any?
|
|
80
|
+
|
|
81
|
+
Jekyll.logger.warn "Localization",
|
|
82
|
+
"#{errors.length} errors during merging: #{errors.join(", ")}"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def self.create_po_entry(entry, existing_po)
|
|
86
|
+
po_entry = ::GetText::POEntry.new(:normal)
|
|
87
|
+
po_entry.msgid = entry[:msgid]
|
|
88
|
+
|
|
89
|
+
existing_entry = existing_po[entry[:msgid]]
|
|
90
|
+
set_po_entry_msgstr(po_entry, existing_entry, entry[:msgstr])
|
|
91
|
+
|
|
92
|
+
po_entry
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def self.set_po_entry_msgstr(po_entry, existing_entry, new_msgstr)
|
|
96
|
+
if existing_entry.is_a?(Hash)
|
|
97
|
+
po_entry.msgstr = existing_entry[:msgstr] || ""
|
|
98
|
+
po_entry.flag = "fuzzy" if existing_entry[:fuzzy]
|
|
99
|
+
else
|
|
100
|
+
po_entry.msgstr = existing_entry || (new_msgstr || "")
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def self.header(locale)
|
|
105
|
+
header_str = "Language: #{locale}\nMIME-Version: 1.0\nContent-Type: text/plain; "
|
|
106
|
+
"#{header_str}charset=UTF-8\nContent-Transfer-Encoding: 8bit\n"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def self.setup_po_header(po_file, locale)
|
|
110
|
+
header_entry = ::GetText::POEntry.new(:normal)
|
|
111
|
+
header_entry.msgid = ""
|
|
112
|
+
header_entry.msgstr = header(locale)
|
|
113
|
+
po_file[""] = header_entry
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def self.serialize_po_file(po_file)
|
|
117
|
+
lines = []
|
|
118
|
+
entries_list = get_entries_list(po_file)
|
|
119
|
+
|
|
120
|
+
entries_list.each do |entry|
|
|
121
|
+
serialize_po_entry(entry, lines)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
lines.join("\n")
|
|
125
|
+
rescue StandardError => e
|
|
126
|
+
error_msg = "DEBUG serialize_po_file: Error iterating entries: #{e.class} - #{e.message}"
|
|
127
|
+
Jekyll.logger.info "Localization", error_msg
|
|
128
|
+
""
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def self.serialize_po_entry(entry, lines)
|
|
132
|
+
add_translator_comments(entry, lines)
|
|
133
|
+
add_flag_comment(entry, lines)
|
|
134
|
+
add_reference_comment(entry, lines)
|
|
135
|
+
add_msgid_msgstr(entry, lines)
|
|
136
|
+
lines << ""
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def self.add_translator_comments(entry, lines)
|
|
140
|
+
return unless entry.translator_comment && !entry.translator_comment.empty?
|
|
141
|
+
|
|
142
|
+
entry.translator_comment.split("\n").each do |comment_line|
|
|
143
|
+
lines << "# #{comment_line}" unless comment_line.empty?
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def self.add_flag_comment(entry, lines)
|
|
148
|
+
flags = entry.flag.to_s.strip
|
|
149
|
+
lines << "#, #{flags}" unless flags.empty?
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def self.add_reference_comment(entry, lines)
|
|
153
|
+
return unless entry.extracted_comment && !entry.extracted_comment.empty?
|
|
154
|
+
|
|
155
|
+
lines << "#: #{entry.extracted_comment}"
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def self.add_msgid_msgstr(entry, lines)
|
|
159
|
+
lines << escape_po_string("msgid", entry.msgid)
|
|
160
|
+
lines << escape_po_string("msgstr", entry.msgstr.to_s)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def self.get_entries_list(po_file)
|
|
164
|
+
begin
|
|
165
|
+
po_file.entries
|
|
166
|
+
rescue StandardError
|
|
167
|
+
# entries method failed - try values as fallback
|
|
168
|
+
po_file.values
|
|
169
|
+
end
|
|
170
|
+
rescue StandardError => e
|
|
171
|
+
# Both methods failed - log and return empty
|
|
172
|
+
Jekyll.logger.warn "Localization", "Could not retrieve PO entries: #{e.class}"
|
|
173
|
+
[]
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def self.escape_po_string(prefix, value)
|
|
177
|
+
value = value.strip
|
|
178
|
+
escaped = escape_backslashes(value)
|
|
179
|
+
|
|
180
|
+
delimiter, escaped = escape_quotes_and_get_delimiter(escaped)
|
|
181
|
+
|
|
182
|
+
if escaped.include?("\n")
|
|
183
|
+
format_multiline_string(prefix, delimiter, escaped)
|
|
184
|
+
elsif escaped.length < Jekyll::L10n::Constants::PO_SHORT_LINE_LENGTH
|
|
185
|
+
"#{prefix} #{delimiter}#{escaped}#{delimiter}"
|
|
186
|
+
else
|
|
187
|
+
format_long_string(prefix, delimiter, escaped)
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def self.escape_backslashes(value)
|
|
192
|
+
value.gsub("\\", "\\\\")
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def self.escape_quotes_and_get_delimiter(escaped)
|
|
196
|
+
double_quote_count = escaped.count('"')
|
|
197
|
+
single_quote_count = escaped.count("'")
|
|
198
|
+
use_single_quotes = double_quote_count.positive? && single_quote_count.zero?
|
|
199
|
+
|
|
200
|
+
if use_single_quotes
|
|
201
|
+
["'", escaped.gsub("'", "\\'")]
|
|
202
|
+
else
|
|
203
|
+
['"', escaped.gsub('"', '\\"')]
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def self.format_multiline_string(prefix, delimiter, escaped)
|
|
208
|
+
lines = ["#{prefix} #{delimiter}#{delimiter}"]
|
|
209
|
+
escaped.split("\n").each do |line|
|
|
210
|
+
next if line.strip.empty?
|
|
211
|
+
|
|
212
|
+
lines << "#{delimiter}#{line}#{delimiter}"
|
|
213
|
+
end
|
|
214
|
+
lines << "#{delimiter}#{delimiter}" if lines.length == 1
|
|
215
|
+
lines.join("\n")
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def self.format_long_string(prefix, delimiter, escaped)
|
|
219
|
+
lines = ["#{prefix} #{delimiter}#{delimiter}"]
|
|
220
|
+
i = 0
|
|
221
|
+
|
|
222
|
+
while i < escaped.length
|
|
223
|
+
chunk = escaped[i...i + Jekyll::L10n::Constants::PO_LINE_LENGTH]
|
|
224
|
+
lines << "#{delimiter}#{chunk}#{delimiter}"
|
|
225
|
+
i += Jekyll::L10n::Constants::PO_LINE_LENGTH
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
lines.join("\n")
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../utils/text_normalizer"
|
|
4
|
+
require_relative "../utils/html_text_utils"
|
|
5
|
+
require_relative "../utils/html_elements"
|
|
6
|
+
require_relative "../utils/text_validator"
|
|
7
|
+
|
|
8
|
+
module Jekyll
|
|
9
|
+
module L10n
|
|
10
|
+
# Extracts normalized text from block-level HTML elements.
|
|
11
|
+
#
|
|
12
|
+
# BlockTextExtractor extracts the complete text content from a block element
|
|
13
|
+
# while removing nested block-level elements and empty icon tags. This is used
|
|
14
|
+
# to match against block-level translations where the entire element has a
|
|
15
|
+
# single translation rather than individual text node translations.
|
|
16
|
+
#
|
|
17
|
+
# Key responsibilities:
|
|
18
|
+
# * Extract text from extractable block elements
|
|
19
|
+
# * Remove nested block elements from text
|
|
20
|
+
# * Remove empty icon tags (external link markers)
|
|
21
|
+
# * Normalize and validate extracted text
|
|
22
|
+
# * Decode HTML entities
|
|
23
|
+
#
|
|
24
|
+
# @example
|
|
25
|
+
# text = BlockTextExtractor.extract(paragraph_node)
|
|
26
|
+
# # Returns normalized text from paragraph, useful for finding block translations
|
|
27
|
+
module BlockTextExtractor
|
|
28
|
+
extend self
|
|
29
|
+
|
|
30
|
+
# Extract normalized block text from an element.
|
|
31
|
+
#
|
|
32
|
+
# Returns nil if element is not extractable or if extracted text fails
|
|
33
|
+
# validation. Clones the node, removes nested block elements and empty
|
|
34
|
+
# icon tags, normalizes whitespace, decodes HTML entities, and validates.
|
|
35
|
+
#
|
|
36
|
+
# @param node [Nokogiri::XML::Element] DOM element to extract from
|
|
37
|
+
# @return [String, nil] Normalized text from element, or nil if not valid
|
|
38
|
+
def extract(node)
|
|
39
|
+
return nil unless extractable?(node)
|
|
40
|
+
|
|
41
|
+
clone = node.dup
|
|
42
|
+
HtmlTextUtils.remove_block_elements(clone)
|
|
43
|
+
HtmlTextUtils.remove_empty_icon_tags(clone)
|
|
44
|
+
|
|
45
|
+
text = TextNormalizer.normalize(clone.inner_html).strip
|
|
46
|
+
text = HtmlTextUtils.decode_html_entities(text)
|
|
47
|
+
|
|
48
|
+
TextValidator.valid?(text) ? text : nil
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def extractable?(node)
|
|
52
|
+
node.element? && HtmlElements::CONTENT_ELEMENTS.include?(node.name)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../constants"
|
|
4
|
+
require_relative "../utils/text_normalizer"
|
|
5
|
+
require_relative "../utils/debug_logger"
|
|
6
|
+
require_relative "../utils/translation_resolver"
|
|
7
|
+
require_relative "../utils/url_transformer"
|
|
8
|
+
require_relative "../utils/html_elements"
|
|
9
|
+
require_relative "../utils/html_parser"
|
|
10
|
+
require_relative "../utils/text_validator"
|
|
11
|
+
require_relative "block_text_extractor"
|
|
12
|
+
|
|
13
|
+
module Jekyll
|
|
14
|
+
module L10n
|
|
15
|
+
# Applies translations from PO files to HTML text nodes and DOM attributes.
|
|
16
|
+
#
|
|
17
|
+
# HtmlTranslator walks the DOM tree of parsed HTML documents and applies translations
|
|
18
|
+
# to text content and configurable HTML attributes (title, alt, aria-label, etc.).
|
|
19
|
+
# It supports three fallback modes for missing translations: using original text,
|
|
20
|
+
# marking untranslated content, or leaving blank. It also handles block-level
|
|
21
|
+
# translations for elements with complete translations and preserves URL transformations.
|
|
22
|
+
#
|
|
23
|
+
# Key responsibilities:
|
|
24
|
+
# * Parse full HTML documents while preserving DOCTYPE and structure
|
|
25
|
+
# * Translate text nodes using normalized text for matching
|
|
26
|
+
# * Translate HTML attributes (title, alt, aria-label, placeholder, aria-description)
|
|
27
|
+
# * Apply fallback modes when translations are missing (english/marker/empty)
|
|
28
|
+
# * Handle block-level translations for content elements
|
|
29
|
+
# * Transform relative URLs to locale-prefixed URLs
|
|
30
|
+
# * Remove auto-inserted meta charset tags from serialized HTML
|
|
31
|
+
#
|
|
32
|
+
# @example
|
|
33
|
+
# translator = HtmlTranslator.new('english', ['title', 'alt'])
|
|
34
|
+
# translated = translator.translate(html, translations, 'es', '/baseurl')
|
|
35
|
+
# # Returns HTML with text and attributes translated to Spanish
|
|
36
|
+
#
|
|
37
|
+
# @see Jekyll::L10n::TranslationResolver for fallback mode logic
|
|
38
|
+
class HtmlTranslator
|
|
39
|
+
attr_reader :fallback_mode, :translatable_attrs, :debug_logging
|
|
40
|
+
|
|
41
|
+
# Initialize a new HtmlTranslator.
|
|
42
|
+
#
|
|
43
|
+
# @param fallback_mode [String, Symbol] How to handle missing translations:
|
|
44
|
+
# 'english' or :english - use original text (default)
|
|
45
|
+
# 'marker' or :marker - wrap with untranslated marker
|
|
46
|
+
# 'empty' or :empty - leave blank
|
|
47
|
+
# @param translatable_attrs [Array<String>] HTML attributes to extract and translate
|
|
48
|
+
# (e.g., ['title', 'alt', 'aria-label', 'placeholder', 'aria-description'])
|
|
49
|
+
# @param debug_logging [Boolean] (keyword) Enable detailed debug logging for
|
|
50
|
+
# translation process
|
|
51
|
+
def initialize(fallback_mode, translatable_attrs, debug_logging: false)
|
|
52
|
+
@fallback_mode = fallback_mode
|
|
53
|
+
@translatable_attrs = translatable_attrs
|
|
54
|
+
@debug_logging = debug_logging
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Translate an HTML document to a specific locale.
|
|
58
|
+
#
|
|
59
|
+
# Parses the HTML document, applies translations to text nodes and attributes,
|
|
60
|
+
# transforms URLs to be locale-aware, and returns the translated HTML with
|
|
61
|
+
# proper structure preserved.
|
|
62
|
+
#
|
|
63
|
+
# @param html [String] Full HTML document to translate
|
|
64
|
+
# @param translations [Hash] Translation hash mapping normalized text to translated strings
|
|
65
|
+
# or metadata hashes with :msgstr, :reference, :fuzzy keys
|
|
66
|
+
# @param locale [String] Target locale code (defaults to "en"; e.g., 'es', 'fr')
|
|
67
|
+
# @param baseurl [String] Base URL for relative URL transformation (defaults to "")
|
|
68
|
+
# @return [String] Translated HTML with URLs transformed and meta charset removed
|
|
69
|
+
def translate(html, translations, locale = "en", baseurl = "")
|
|
70
|
+
# Use HtmlParser to properly parse full HTML documents while preserving
|
|
71
|
+
# DOCTYPE, html tag, and document structure. Any auto-inserted meta tags are
|
|
72
|
+
# removed by HtmlParser.remove_meta_charset after serialization.
|
|
73
|
+
# See: spec/regression/nokogiri_meta_tag_spec.rb for regression tests
|
|
74
|
+
doc = HtmlParser.parse_document(html)
|
|
75
|
+
|
|
76
|
+
translate_node(doc, translations)
|
|
77
|
+
|
|
78
|
+
# Transform URLs on the document object before serialization to avoid double-parsing
|
|
79
|
+
# and preserve the correct DOCTYPE and HTML structure. This prevents Nokogiri from
|
|
80
|
+
# downgrading to HTML 4.0 DOCTYPE when parsing the serialized HTML again.
|
|
81
|
+
# See: spec/jekyll-l10n/utils/url_transformer_spec.rb for tests
|
|
82
|
+
UrlTransformer.transform_document(doc, locale, baseurl)
|
|
83
|
+
|
|
84
|
+
result = doc.to_html
|
|
85
|
+
|
|
86
|
+
# Remove the auto-inserted meta tag by libxml2 during HTML serialization
|
|
87
|
+
# Matches: <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
88
|
+
HtmlParser.remove_meta_charset(result)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
def translate_node(node, translations)
|
|
94
|
+
return if node.nil?
|
|
95
|
+
|
|
96
|
+
translate_text_node(node, translations) if node.text? && node.parent
|
|
97
|
+
translate_node_attributes(node, translations) if node.element?
|
|
98
|
+
node.children.each { |child| translate_node(child, translations) }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def translate_text_node(node, translations)
|
|
102
|
+
original_content = node.content
|
|
103
|
+
text = original_content.strip
|
|
104
|
+
return if should_skip_translation?(text)
|
|
105
|
+
|
|
106
|
+
log_text_node_debug(text) if should_log_text_debug?(text)
|
|
107
|
+
|
|
108
|
+
normalized_text = TextNormalizer.normalize(text)
|
|
109
|
+
translated = TranslationResolver.resolve(node, normalized_text, translations)
|
|
110
|
+
|
|
111
|
+
return if apply_block_level_translation?(node, normalized_text, translated)
|
|
112
|
+
|
|
113
|
+
if @debug_logging
|
|
114
|
+
log_translation_debug_info(text, normalized_text, translated,
|
|
115
|
+
translations)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
node.content = apply_fallback(original_content, translated)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def translate_node_attributes(node, translations)
|
|
122
|
+
@translatable_attrs.each do |attr_name|
|
|
123
|
+
value = node[attr_name]
|
|
124
|
+
next if value.nil? || value.empty?
|
|
125
|
+
|
|
126
|
+
value = value.strip
|
|
127
|
+
next unless TextValidator.valid?(value)
|
|
128
|
+
|
|
129
|
+
normalized_value = TextNormalizer.normalize(value)
|
|
130
|
+
translated = translations[normalized_value]
|
|
131
|
+
node[attr_name] = apply_fallback(value, translated)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def content_element?(node)
|
|
136
|
+
return false unless node.element?
|
|
137
|
+
|
|
138
|
+
HtmlElements::CONTENT_ELEMENTS.include?(node.name)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def apply_block_level_translation(parent, translation)
|
|
142
|
+
if @debug_logging
|
|
143
|
+
Jekyll.logger.info "Localization",
|
|
144
|
+
"[HtmlTranslator] BLOCK-LEVEL TRANSLATION for <#{parent.name}>"
|
|
145
|
+
truncate_length = Jekyll::L10n::Constants::LOG_TRUNCATE_LONG
|
|
146
|
+
Jekyll.logger.info "Localization",
|
|
147
|
+
"[HtmlTranslator] Translation: #{translation[0..truncate_length]}..."
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
parent.children.each(&:remove)
|
|
151
|
+
parent.inner_html = translation
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Apply fallback when translation is missing or empty.
|
|
155
|
+
#
|
|
156
|
+
# === Three Fallback Modes ===
|
|
157
|
+
# 1. :english (default)
|
|
158
|
+
# Returns original English text (msgid)
|
|
159
|
+
# Best for: Production sites - untranslated content is still readable
|
|
160
|
+
# Example: "Hello World" -> "Hello World" (if no translation found)
|
|
161
|
+
#
|
|
162
|
+
# 2. :marker
|
|
163
|
+
# Wraps text with visible marker "[UNTRANSLATED: ...]"
|
|
164
|
+
# Best for: QA/Development - clearly identifies missing translations
|
|
165
|
+
# Example: "Hello World" -> "[UNTRANSLATED: Hello World]"
|
|
166
|
+
# Why: Stakeholders can see exactly which content needs translation
|
|
167
|
+
#
|
|
168
|
+
# 3. :empty
|
|
169
|
+
# Leaves completely blank
|
|
170
|
+
# Best for: Templates where blank is preferred over wrong language
|
|
171
|
+
# Example: "Hello World" -> ""
|
|
172
|
+
# Why: Some designs handle missing content better than showing wrong language
|
|
173
|
+
#
|
|
174
|
+
# When might translation be empty but msgstr exist?
|
|
175
|
+
# - Translator marked entry as fuzzy (incomplete)
|
|
176
|
+
# - Entry has msgstr: "" (deliberately blank translation)
|
|
177
|
+
def apply_fallback(msgid, msgstr)
|
|
178
|
+
return msgstr if msgstr && !msgstr.empty?
|
|
179
|
+
|
|
180
|
+
case @fallback_mode
|
|
181
|
+
when Jekyll::L10n::Constants::FALLBACK_MODE_MARKER, :marker
|
|
182
|
+
"#{Jekyll::L10n::Constants::UNTRANSLATED_MARKER} #{msgid}"
|
|
183
|
+
when Jekyll::L10n::Constants::FALLBACK_MODE_EMPTY, :empty
|
|
184
|
+
""
|
|
185
|
+
else
|
|
186
|
+
msgid
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def log_text_node_debug(text)
|
|
191
|
+
truncate_length = Jekyll::L10n::Constants::LOG_TRUNCATE_MEDIUM
|
|
192
|
+
message = "[HtmlTranslator] Processing text node: " \
|
|
193
|
+
"#{text[0..truncate_length]}... (#{text.length} chars)"
|
|
194
|
+
Jekyll.logger.info "Localization", message
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def should_skip_translation?(text)
|
|
198
|
+
!TextValidator.valid?(text)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def should_log_text_debug?(text)
|
|
202
|
+
@debug_logging && text.include?("attribute")
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def apply_block_level_translation?(node, normalized_text, translated)
|
|
206
|
+
return false unless translated && node.parent && content_element?(node.parent)
|
|
207
|
+
|
|
208
|
+
return false if TranslationResolver.contains_protected_elements?(node.parent)
|
|
209
|
+
|
|
210
|
+
block_text = BlockTextExtractor.extract(node.parent)
|
|
211
|
+
return false unless block_text && block_text != normalized_text
|
|
212
|
+
|
|
213
|
+
apply_block_level_translation(node.parent, translated)
|
|
214
|
+
true
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def log_translation_debug_info(text, normalized_text, translated, translations)
|
|
218
|
+
translation_data = DebugLogger::TranslationData.new(:text => text,
|
|
219
|
+
:normalized_text => normalized_text,
|
|
220
|
+
:translated => translated,
|
|
221
|
+
:translations => translations)
|
|
222
|
+
DebugLogger.log_translation_details(self, translation_data)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
private :log_text_node_debug, :should_skip_translation?, :should_log_text_debug?,
|
|
226
|
+
:apply_block_level_translation?, :log_translation_debug_info
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|