jekyll-l10n 1.3.14 → 1.3.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-l10n/po_file/writer.rb +1 -1
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +2 -3
- data/lib/jekyll-l10n/translation/html_translator.rb +37 -7
- data/lib/jekyll-l10n/utils/html_text_utils.rb +4 -3
- data/lib/jekyll-l10n/utils/translation_resolver.rb +15 -5
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e9b11bbd19ea0a06c6282c11b22319d49ec9a25060d7324d667e14c78f397881
|
|
4
|
+
data.tar.gz: 0b7c89696c593e771c2694090d0fcebb4c9414a07fc2474f568c5d298953667b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fee23453215a6576799f774d9de4f94e870554b0c9fe9044ec7b0fd20477f20cb99d4d56d026005224774cdb4d332f0e493eb0bf8e8cf0750289a0028850fd8b
|
|
7
|
+
data.tar.gz: 0ac080acebbd08f462ee480f771804890111603e94f9c17aae24a12ec7e7d984659096c64e513e7c34c535fc5c2f0d127dad9a6038704ca9f4107b0c6d110115
|
|
@@ -19,7 +19,6 @@ module Jekyll
|
|
|
19
19
|
# * Remove nested block elements from text
|
|
20
20
|
# * Remove empty icon tags (external link markers)
|
|
21
21
|
# * Normalize and validate extracted text
|
|
22
|
-
# * Decode HTML entities
|
|
23
22
|
#
|
|
24
23
|
# @example
|
|
25
24
|
# text = BlockTextExtractor.extract(paragraph_node)
|
|
@@ -31,7 +30,8 @@ module Jekyll
|
|
|
31
30
|
#
|
|
32
31
|
# Returns nil if element is not extractable or if extracted text fails
|
|
33
32
|
# validation. Clones the node, removes nested block elements and empty
|
|
34
|
-
# icon tags, normalizes whitespace,
|
|
33
|
+
# icon tags, normalizes whitespace, and validates. HTML entities are
|
|
34
|
+
# preserved verbatim to match the keys produced by the extraction pipeline.
|
|
35
35
|
#
|
|
36
36
|
# @param node [Nokogiri::XML::Element] DOM element to extract from
|
|
37
37
|
# @return [String, nil] Normalized text from element, or nil if not valid
|
|
@@ -43,7 +43,6 @@ module Jekyll
|
|
|
43
43
|
HtmlTextUtils.remove_empty_icon_tags(clone)
|
|
44
44
|
|
|
45
45
|
text = TextNormalizer.normalize(clone.inner_html).strip
|
|
46
|
-
text = HtmlTextUtils.decode_html_entities(text)
|
|
47
46
|
|
|
48
47
|
TextValidator.valid?(text) ? text : nil
|
|
49
48
|
end
|
|
@@ -108,7 +108,7 @@ module Jekyll
|
|
|
108
108
|
normalized_text = TextNormalizer.normalize(text)
|
|
109
109
|
translated = TranslationResolver.resolve(node, normalized_text, translations)
|
|
110
110
|
|
|
111
|
-
return if apply_block_level_translation?(node, normalized_text,
|
|
111
|
+
return if apply_block_level_translation?(node, normalized_text, translations)
|
|
112
112
|
|
|
113
113
|
if @debug_logging
|
|
114
114
|
log_translation_debug_info(text, normalized_text, translated,
|
|
@@ -147,8 +147,10 @@ module Jekyll
|
|
|
147
147
|
"[HtmlTranslator] Translation: #{translation[0..truncate_length]}..."
|
|
148
148
|
end
|
|
149
149
|
|
|
150
|
+
preserved_anchors = parent.css('.heading-anchor').map(&:dup)
|
|
150
151
|
parent.children.each(&:remove)
|
|
151
152
|
parent.inner_html = translation
|
|
153
|
+
preserved_anchors.each { |a| parent.add_child(a) } if parent.css('.heading-anchor').empty?
|
|
152
154
|
end
|
|
153
155
|
|
|
154
156
|
# Apply fallback when translation is missing or empty.
|
|
@@ -202,18 +204,45 @@ module Jekyll
|
|
|
202
204
|
@debug_logging && text.include?('attribute')
|
|
203
205
|
end
|
|
204
206
|
|
|
205
|
-
def apply_block_level_translation?(node, normalized_text,
|
|
206
|
-
|
|
207
|
+
def apply_block_level_translation?(node, normalized_text, translations)
|
|
208
|
+
ancestor = find_content_element_ancestor(node)
|
|
209
|
+
return false unless ancestor
|
|
207
210
|
|
|
208
|
-
return false if TranslationResolver.contains_protected_elements?(
|
|
211
|
+
return false if TranslationResolver.contains_protected_elements?(ancestor)
|
|
209
212
|
|
|
210
|
-
block_text = BlockTextExtractor.extract(
|
|
213
|
+
block_text = BlockTextExtractor.extract(ancestor)
|
|
211
214
|
return false unless block_text && block_text != normalized_text
|
|
212
215
|
|
|
213
|
-
|
|
216
|
+
translation = select_block_translation(node, ancestor, normalized_text, block_text,
|
|
217
|
+
translations)
|
|
218
|
+
return false unless translation
|
|
219
|
+
|
|
220
|
+
apply_block_level_translation(ancestor, translation)
|
|
214
221
|
true
|
|
215
222
|
end
|
|
216
223
|
|
|
224
|
+
# When text is directly inside a content element, prefer its direct translation
|
|
225
|
+
# (the msgstr may itself contain HTML, e.g. a heading with an anchor).
|
|
226
|
+
# When text is nested inside inline element(s), only apply if the full block
|
|
227
|
+
# text is an explicit translation key.
|
|
228
|
+
def select_block_translation(node, ancestor, normalized_text, block_text, translations)
|
|
229
|
+
if node.parent == ancestor
|
|
230
|
+
translations[normalized_text] || translations[block_text]
|
|
231
|
+
else
|
|
232
|
+
translations[block_text]
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def find_content_element_ancestor(node)
|
|
237
|
+
current = node&.parent
|
|
238
|
+
while current&.element?
|
|
239
|
+
return current if content_element?(current)
|
|
240
|
+
|
|
241
|
+
current = current.parent
|
|
242
|
+
end
|
|
243
|
+
nil
|
|
244
|
+
end
|
|
245
|
+
|
|
217
246
|
def log_translation_debug_info(text, normalized_text, translated, translations)
|
|
218
247
|
translation_data = DebugLogger::TranslationData.new(text: text,
|
|
219
248
|
normalized_text: normalized_text,
|
|
@@ -223,7 +252,8 @@ module Jekyll
|
|
|
223
252
|
end
|
|
224
253
|
|
|
225
254
|
private :log_text_node_debug, :should_skip_translation?, :should_log_text_debug?,
|
|
226
|
-
:apply_block_level_translation?, :
|
|
255
|
+
:apply_block_level_translation?, :select_block_translation,
|
|
256
|
+
:find_content_element_ancestor, :log_translation_debug_info
|
|
227
257
|
end
|
|
228
258
|
end
|
|
229
259
|
end
|
|
@@ -89,8 +89,10 @@ module Jekyll
|
|
|
89
89
|
# Extract text with inline tags preserved.
|
|
90
90
|
#
|
|
91
91
|
# Extracts text from an element, removes block elements and empty icons,
|
|
92
|
-
# normalizes whitespace
|
|
93
|
-
#
|
|
92
|
+
# and normalizes whitespace. HTML entities (e.g. <, >) are preserved
|
|
93
|
+
# verbatim so that entity-encoded content inside inline elements (such as
|
|
94
|
+
# <code><p></code>) is written to PO msgids as-is and does not
|
|
95
|
+
# become a live HTML tag when the msgstr is later injected via inner_html.
|
|
94
96
|
#
|
|
95
97
|
# @param node [Nokogiri::XML::Node] Element to extract from
|
|
96
98
|
# @return [String] Extracted and normalized text
|
|
@@ -100,7 +102,6 @@ module Jekyll
|
|
|
100
102
|
remove_empty_icon_tags(clone)
|
|
101
103
|
|
|
102
104
|
text = TextNormalizer.normalize(clone.inner_html)
|
|
103
|
-
text = decode_html_entities(text)
|
|
104
105
|
text&.then { |t| TextNormalizer.normalize(t).strip }
|
|
105
106
|
end
|
|
106
107
|
|
|
@@ -59,13 +59,12 @@ module Jekyll
|
|
|
59
59
|
# # Returns nil (protected element present, prevents block translation)
|
|
60
60
|
# TranslationResolver.try_block_level_translation(node, "text", translations)
|
|
61
61
|
def self.try_block_level_translation(node, text, translations)
|
|
62
|
-
|
|
62
|
+
ancestor = find_content_element_ancestor(node)
|
|
63
|
+
return nil unless ancestor
|
|
63
64
|
|
|
64
|
-
|
|
65
|
-
# (script, style, pre). These cannot be safely applied at block level.
|
|
66
|
-
return nil if contains_protected_elements?(node.parent)
|
|
65
|
+
return nil if contains_protected_elements?(ancestor)
|
|
67
66
|
|
|
68
|
-
block_text = BlockTextExtractor.extract(
|
|
67
|
+
block_text = BlockTextExtractor.extract(ancestor)
|
|
69
68
|
return nil unless block_text && block_text != text
|
|
70
69
|
|
|
71
70
|
translations[block_text]
|
|
@@ -78,6 +77,17 @@ module Jekyll
|
|
|
78
77
|
HtmlElements::CONTENT_ELEMENTS.include?(node.name)
|
|
79
78
|
end
|
|
80
79
|
|
|
80
|
+
def self.find_content_element_ancestor(node)
|
|
81
|
+
current = node&.parent
|
|
82
|
+
while current&.element?
|
|
83
|
+
return current if content_element?(current)
|
|
84
|
+
|
|
85
|
+
current = current.parent
|
|
86
|
+
end
|
|
87
|
+
nil
|
|
88
|
+
end
|
|
89
|
+
private_class_method :find_content_element_ancestor
|
|
90
|
+
|
|
81
91
|
# Check if an element contains protected child elements that block translations.
|
|
82
92
|
#
|
|
83
93
|
# Protected elements (script, style, pre) cannot have their surrounding text
|