jekyll-l10n 1.3.15 → 1.3.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-l10n/po_file/writer.rb +1 -1
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +2 -3
- data/lib/jekyll-l10n/translation/html_translator.rb +35 -7
- data/lib/jekyll-l10n/utils/html_text_utils.rb +4 -3
- data/lib/jekyll-l10n/utils/translation_resolver.rb +15 -5
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e9b11bbd19ea0a06c6282c11b22319d49ec9a25060d7324d667e14c78f397881
|
|
4
|
+
data.tar.gz: 0b7c89696c593e771c2694090d0fcebb4c9414a07fc2474f568c5d298953667b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fee23453215a6576799f774d9de4f94e870554b0c9fe9044ec7b0fd20477f20cb99d4d56d026005224774cdb4d332f0e493eb0bf8e8cf0750289a0028850fd8b
|
|
7
|
+
data.tar.gz: 0ac080acebbd08f462ee480f771804890111603e94f9c17aae24a12ec7e7d984659096c64e513e7c34c535fc5c2f0d127dad9a6038704ca9f4107b0c6d110115
|
|
@@ -19,7 +19,6 @@ module Jekyll
|
|
|
19
19
|
# * Remove nested block elements from text
|
|
20
20
|
# * Remove empty icon tags (external link markers)
|
|
21
21
|
# * Normalize and validate extracted text
|
|
22
|
-
# * Decode HTML entities
|
|
23
22
|
#
|
|
24
23
|
# @example
|
|
25
24
|
# text = BlockTextExtractor.extract(paragraph_node)
|
|
@@ -31,7 +30,8 @@ module Jekyll
|
|
|
31
30
|
#
|
|
32
31
|
# Returns nil if element is not extractable or if extracted text fails
|
|
33
32
|
# validation. Clones the node, removes nested block elements and empty
|
|
34
|
-
# icon tags, normalizes whitespace,
|
|
33
|
+
# icon tags, normalizes whitespace, and validates. HTML entities are
|
|
34
|
+
# preserved verbatim to match the keys produced by the extraction pipeline.
|
|
35
35
|
#
|
|
36
36
|
# @param node [Nokogiri::XML::Element] DOM element to extract from
|
|
37
37
|
# @return [String, nil] Normalized text from element, or nil if not valid
|
|
@@ -43,7 +43,6 @@ module Jekyll
|
|
|
43
43
|
HtmlTextUtils.remove_empty_icon_tags(clone)
|
|
44
44
|
|
|
45
45
|
text = TextNormalizer.normalize(clone.inner_html).strip
|
|
46
|
-
text = HtmlTextUtils.decode_html_entities(text)
|
|
47
46
|
|
|
48
47
|
TextValidator.valid?(text) ? text : nil
|
|
49
48
|
end
|
|
@@ -108,7 +108,7 @@ module Jekyll
|
|
|
108
108
|
normalized_text = TextNormalizer.normalize(text)
|
|
109
109
|
translated = TranslationResolver.resolve(node, normalized_text, translations)
|
|
110
110
|
|
|
111
|
-
return if apply_block_level_translation?(node, normalized_text,
|
|
111
|
+
return if apply_block_level_translation?(node, normalized_text, translations)
|
|
112
112
|
|
|
113
113
|
if @debug_logging
|
|
114
114
|
log_translation_debug_info(text, normalized_text, translated,
|
|
@@ -204,18 +204,45 @@ module Jekyll
|
|
|
204
204
|
@debug_logging && text.include?('attribute')
|
|
205
205
|
end
|
|
206
206
|
|
|
207
|
-
def apply_block_level_translation?(node, normalized_text,
|
|
208
|
-
|
|
207
|
+
def apply_block_level_translation?(node, normalized_text, translations)
|
|
208
|
+
ancestor = find_content_element_ancestor(node)
|
|
209
|
+
return false unless ancestor
|
|
209
210
|
|
|
210
|
-
return false if TranslationResolver.contains_protected_elements?(
|
|
211
|
+
return false if TranslationResolver.contains_protected_elements?(ancestor)
|
|
211
212
|
|
|
212
|
-
block_text = BlockTextExtractor.extract(
|
|
213
|
+
block_text = BlockTextExtractor.extract(ancestor)
|
|
213
214
|
return false unless block_text && block_text != normalized_text
|
|
214
215
|
|
|
215
|
-
|
|
216
|
+
translation = select_block_translation(node, ancestor, normalized_text, block_text,
|
|
217
|
+
translations)
|
|
218
|
+
return false unless translation
|
|
219
|
+
|
|
220
|
+
apply_block_level_translation(ancestor, translation)
|
|
216
221
|
true
|
|
217
222
|
end
|
|
218
223
|
|
|
224
|
+
# When text is directly inside a content element, prefer its direct translation
|
|
225
|
+
# (the msgstr may itself contain HTML, e.g. a heading with an anchor).
|
|
226
|
+
# When text is nested inside inline element(s), only apply if the full block
|
|
227
|
+
# text is an explicit translation key.
|
|
228
|
+
def select_block_translation(node, ancestor, normalized_text, block_text, translations)
|
|
229
|
+
if node.parent == ancestor
|
|
230
|
+
translations[normalized_text] || translations[block_text]
|
|
231
|
+
else
|
|
232
|
+
translations[block_text]
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def find_content_element_ancestor(node)
|
|
237
|
+
current = node&.parent
|
|
238
|
+
while current&.element?
|
|
239
|
+
return current if content_element?(current)
|
|
240
|
+
|
|
241
|
+
current = current.parent
|
|
242
|
+
end
|
|
243
|
+
nil
|
|
244
|
+
end
|
|
245
|
+
|
|
219
246
|
def log_translation_debug_info(text, normalized_text, translated, translations)
|
|
220
247
|
translation_data = DebugLogger::TranslationData.new(text: text,
|
|
221
248
|
normalized_text: normalized_text,
|
|
@@ -225,7 +252,8 @@ module Jekyll
|
|
|
225
252
|
end
|
|
226
253
|
|
|
227
254
|
private :log_text_node_debug, :should_skip_translation?, :should_log_text_debug?,
|
|
228
|
-
:apply_block_level_translation?, :
|
|
255
|
+
:apply_block_level_translation?, :select_block_translation,
|
|
256
|
+
:find_content_element_ancestor, :log_translation_debug_info
|
|
229
257
|
end
|
|
230
258
|
end
|
|
231
259
|
end
|
|
@@ -89,8 +89,10 @@ module Jekyll
|
|
|
89
89
|
# Extract text with inline tags preserved.
|
|
90
90
|
#
|
|
91
91
|
# Extracts text from an element, removes block elements and empty icons,
|
|
92
|
-
# normalizes whitespace
|
|
93
|
-
#
|
|
92
|
+
# and normalizes whitespace. HTML entities (e.g. <, >) are preserved
|
|
93
|
+
# verbatim so that entity-encoded content inside inline elements (such as
|
|
94
|
+
# <code><p></code>) is written to PO msgids as-is and does not
|
|
95
|
+
# become a live HTML tag when the msgstr is later injected via inner_html.
|
|
94
96
|
#
|
|
95
97
|
# @param node [Nokogiri::XML::Node] Element to extract from
|
|
96
98
|
# @return [String] Extracted and normalized text
|
|
@@ -100,7 +102,6 @@ module Jekyll
|
|
|
100
102
|
remove_empty_icon_tags(clone)
|
|
101
103
|
|
|
102
104
|
text = TextNormalizer.normalize(clone.inner_html)
|
|
103
|
-
text = decode_html_entities(text)
|
|
104
105
|
text&.then { |t| TextNormalizer.normalize(t).strip }
|
|
105
106
|
end
|
|
106
107
|
|
|
@@ -59,13 +59,12 @@ module Jekyll
|
|
|
59
59
|
# # Returns nil (protected element present, prevents block translation)
|
|
60
60
|
# TranslationResolver.try_block_level_translation(node, "text", translations)
|
|
61
61
|
def self.try_block_level_translation(node, text, translations)
|
|
62
|
-
|
|
62
|
+
ancestor = find_content_element_ancestor(node)
|
|
63
|
+
return nil unless ancestor
|
|
63
64
|
|
|
64
|
-
|
|
65
|
-
# (script, style, pre). These cannot be safely applied at block level.
|
|
66
|
-
return nil if contains_protected_elements?(node.parent)
|
|
65
|
+
return nil if contains_protected_elements?(ancestor)
|
|
67
66
|
|
|
68
|
-
block_text = BlockTextExtractor.extract(
|
|
67
|
+
block_text = BlockTextExtractor.extract(ancestor)
|
|
69
68
|
return nil unless block_text && block_text != text
|
|
70
69
|
|
|
71
70
|
translations[block_text]
|
|
@@ -78,6 +77,17 @@ module Jekyll
|
|
|
78
77
|
HtmlElements::CONTENT_ELEMENTS.include?(node.name)
|
|
79
78
|
end
|
|
80
79
|
|
|
80
|
+
def self.find_content_element_ancestor(node)
|
|
81
|
+
current = node&.parent
|
|
82
|
+
while current&.element?
|
|
83
|
+
return current if content_element?(current)
|
|
84
|
+
|
|
85
|
+
current = current.parent
|
|
86
|
+
end
|
|
87
|
+
nil
|
|
88
|
+
end
|
|
89
|
+
private_class_method :find_content_element_ancestor
|
|
90
|
+
|
|
81
91
|
# Check if an element contains protected child elements that block translations.
|
|
82
92
|
#
|
|
83
93
|
# Protected elements (script, style, pre) cannot have their surrounding text
|