jekyll-l10n 1.3.15 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4dfd489e89732a8083e339c87e7ac32f3ceff7723be4a963303649eff9e397f9
4
- data.tar.gz: aa3f4cdc2756e7923307e9fa9f26ad37ee06709c9ce7a47cb3a20a35fbb304dd
3
+ metadata.gz: 1c8edcdac30ed6d89b587d914f1b8038ffd3b10a95a0e15cc43965dd915e39f2
4
+ data.tar.gz: 513218e1e01c3814a2f7130f60a6b35463d0836659ee13d85ae3d7c9a0aaf6c5
5
5
  SHA512:
6
- metadata.gz: ec106808864ebc0bdf6e278e4dcdbd29d4a56e58b8474cfa706738dd4065b71b65f844815061a86b6335dd64b26ca5c669bb4bdcc4447d4b5a2ce7c16b6b80d5
7
- data.tar.gz: e5e4a5a0af33c57cb526c2a9de8ecd84845d60b4eddd44f39d417e3f7bab6e1bf39a2da09a54566b919216e3cc4a058de09762038b695287827625f6eda7dd04
6
+ metadata.gz: 24586cc374d7ad819cbda298c98d9dc70438b60bc7afafcc6d695190bb62dc6294ddfd4bb02fb4f04c57c3231cea3fe12047dc9bdf3eb0de5390c6e4fb7a0970
7
+ data.tar.gz: f5f5bb5755c8354794fd81090c74c0519f06fb15c216cc1b9ba748e5837dcef7b7a05c324d5f67feef8c544884e17a936d492c76132ac26cb7f0d6f0bafa22e6
@@ -189,7 +189,7 @@ module Jekyll
189
189
  end
190
190
 
191
191
  def self.escape_backslashes(value)
192
- value.gsub('\\', '\\\\')
192
+ value.gsub('\\') { '\\\\' }
193
193
  end
194
194
 
195
195
  def self.escape_quotes_and_get_delimiter(escaped)
@@ -19,7 +19,6 @@ module Jekyll
19
19
  # * Remove nested block elements from text
20
20
  # * Remove empty icon tags (external link markers)
21
21
  # * Normalize and validate extracted text
22
- # * Decode HTML entities
23
22
  #
24
23
  # @example
25
24
  # text = BlockTextExtractor.extract(paragraph_node)
@@ -31,7 +30,8 @@ module Jekyll
31
30
  #
32
31
  # Returns nil if element is not extractable or if extracted text fails
33
32
  # validation. Clones the node, removes nested block elements and empty
34
- # icon tags, normalizes whitespace, decodes HTML entities, and validates.
33
+ # icon tags, normalizes whitespace, and validates. HTML entities are
34
+ # preserved verbatim to match the keys produced by the extraction pipeline.
35
35
  #
36
36
  # @param node [Nokogiri::XML::Element] DOM element to extract from
37
37
  # @return [String, nil] Normalized text from element, or nil if not valid
@@ -39,11 +39,11 @@ module Jekyll
39
39
  return nil unless extractable?(node)
40
40
 
41
41
  clone = node.dup
42
+ HtmlTextUtils.remove_code_blocks(clone)
42
43
  HtmlTextUtils.remove_block_elements(clone)
43
44
  HtmlTextUtils.remove_empty_icon_tags(clone)
44
45
 
45
46
  text = TextNormalizer.normalize(clone.inner_html).strip
46
- text = HtmlTextUtils.decode_html_entities(text)
47
47
 
48
48
  TextValidator.valid?(text) ? text : nil
49
49
  end
@@ -108,7 +108,7 @@ module Jekyll
108
108
  normalized_text = TextNormalizer.normalize(text)
109
109
  translated = TranslationResolver.resolve(node, normalized_text, translations)
110
110
 
111
- return if apply_block_level_translation?(node, normalized_text, translated)
111
+ return if apply_block_level_translation?(node, normalized_text, translations)
112
112
 
113
113
  if @debug_logging
114
114
  log_translation_debug_info(text, normalized_text, translated,
@@ -147,9 +147,15 @@ module Jekyll
147
147
  "[HtmlTranslator] Translation: #{translation[0..truncate_length]}..."
148
148
  end
149
149
 
150
+ inject_translation_preserving_structure(parent, translation)
151
+ end
152
+
153
+ def inject_translation_preserving_structure(parent, translation)
154
+ preserved_blocks = parent.css('pre').map(&:dup)
150
155
  preserved_anchors = parent.css('.heading-anchor').map(&:dup)
151
156
  parent.children.each(&:remove)
152
157
  parent.inner_html = translation
158
+ preserved_blocks.each { |b| parent.add_child(b) }
153
159
  preserved_anchors.each { |a| parent.add_child(a) } if parent.css('.heading-anchor').empty?
154
160
  end
155
161
 
@@ -204,18 +210,45 @@ module Jekyll
204
210
  @debug_logging && text.include?('attribute')
205
211
  end
206
212
 
207
- def apply_block_level_translation?(node, normalized_text, translated)
208
- return false unless translated && node.parent && content_element?(node.parent)
213
+ def apply_block_level_translation?(node, normalized_text, translations)
214
+ ancestor = find_content_element_ancestor(node)
215
+ return false unless ancestor
209
216
 
210
- return false if TranslationResolver.contains_protected_elements?(node.parent)
217
+ return false if TranslationResolver.contains_protected_elements?(ancestor)
211
218
 
212
- block_text = BlockTextExtractor.extract(node.parent)
219
+ block_text = BlockTextExtractor.extract(ancestor)
213
220
  return false unless block_text && block_text != normalized_text
214
221
 
215
- apply_block_level_translation(node.parent, translated)
222
+ translation = select_block_translation(node, ancestor, normalized_text, block_text,
223
+ translations)
224
+ return false unless translation
225
+
226
+ apply_block_level_translation(ancestor, translation)
216
227
  true
217
228
  end
218
229
 
230
+ # When text is directly inside a content element, prefer its direct translation
231
+ # (the msgstr may itself contain HTML, e.g. a heading with an anchor).
232
+ # When text is nested inside inline element(s), only apply if the full block
233
+ # text is an explicit translation key.
234
+ def select_block_translation(node, ancestor, normalized_text, block_text, translations)
235
+ if node.parent == ancestor
236
+ translations[normalized_text] || translations[block_text]
237
+ else
238
+ translations[block_text]
239
+ end
240
+ end
241
+
242
+ def find_content_element_ancestor(node)
243
+ current = node&.parent
244
+ while current&.element?
245
+ return current if content_element?(current)
246
+
247
+ current = current.parent
248
+ end
249
+ nil
250
+ end
251
+
219
252
  def log_translation_debug_info(text, normalized_text, translated, translations)
220
253
  translation_data = DebugLogger::TranslationData.new(text: text,
221
254
  normalized_text: normalized_text,
@@ -225,7 +258,9 @@ module Jekyll
225
258
  end
226
259
 
227
260
  private :log_text_node_debug, :should_skip_translation?, :should_log_text_debug?,
228
- :apply_block_level_translation?, :log_translation_debug_info
261
+ :apply_block_level_translation?, :select_block_translation,
262
+ :find_content_element_ancestor, :log_translation_debug_info,
263
+ :inject_translation_preserving_structure
229
264
  end
230
265
  end
231
266
  end
@@ -48,6 +48,22 @@ module Jekyll
48
48
  .gsub(''', "'")
49
49
  end
50
50
 
51
+ # Remove preformatted code blocks from a node.
52
+ #
53
+ # Removes all <pre> elements entirely. With highlighter: none in Jekyll config,
54
+ # fenced code blocks produce plain <pre><code> as direct children of content
55
+ # elements — no Rouge wrappers. Removing <pre> before extraction ensures raw
56
+ # code never appears in PO msgids.
57
+ #
58
+ # Must run before remove_block_elements_from_node so that <code> inside <pre>
59
+ # is gone before the general flattening pass.
60
+ #
61
+ # @param node [Nokogiri::XML::Node] Node to process (modified in place)
62
+ # @return [void]
63
+ def self.remove_code_blocks(node)
64
+ node.css('pre').each(&:remove)
65
+ end
66
+
51
67
  # Remove block-level elements from a cloned node.
52
68
  #
53
69
  # Replaces block-level element nodes with their children (flattening structure).
@@ -89,18 +105,20 @@ module Jekyll
89
105
  # Extract text with inline tags preserved.
90
106
  #
91
107
  # Extracts text from an element, removes block elements and empty icons,
92
- # normalizes whitespace, and decodes HTML entities. Returns plain text
93
- # suitable for translation.
108
+ # and normalizes whitespace. HTML entities (e.g. &lt;, &gt;) are preserved
109
+ # verbatim so that entity-encoded content inside inline elements (such as
110
+ # <code>&lt;p&gt;</code>) is written to PO msgids as-is and does not
111
+ # become a live HTML tag when the msgstr is later injected via inner_html.
94
112
  #
95
113
  # @param node [Nokogiri::XML::Node] Element to extract from
96
114
  # @return [String] Extracted and normalized text
97
115
  def self.extract_with_inline_tags(node)
98
116
  clone = node.dup
117
+ remove_code_blocks(clone)
99
118
  remove_block_elements_from_node(clone)
100
119
  remove_empty_icon_tags(clone)
101
120
 
102
121
  text = TextNormalizer.normalize(clone.inner_html)
103
- text = decode_html_entities(text)
104
122
  text&.then { |t| TextNormalizer.normalize(t).strip }
105
123
  end
106
124
 
@@ -47,7 +47,7 @@ module Jekyll
47
47
  # node alone doesn't have a direct translation but the entire block does.
48
48
  #
49
49
  # Security consideration: Returns nil if the block contains protected elements
50
- # (script, style, pre tags) to prevent unsafe translation application.
50
+ # (script, style) to prevent unsafe translation application.
51
51
  #
52
52
  # @param node [Nokogiri::XML::Node] Text node being translated
53
53
  # @param text [String] Normalized text of the node
@@ -59,13 +59,12 @@ module Jekyll
59
59
  # # Returns nil (protected element present, prevents block translation)
60
60
  # TranslationResolver.try_block_level_translation(node, "text", translations)
61
61
  def self.try_block_level_translation(node, text, translations)
62
- return nil unless node.parent && content_element?(node.parent)
62
+ ancestor = find_content_element_ancestor(node)
63
+ return nil unless ancestor
63
64
 
64
- # Don't attempt block-level translation if parent contains protected elements
65
- # (script, style, pre). These cannot be safely applied at block level.
66
- return nil if contains_protected_elements?(node.parent)
65
+ return nil if contains_protected_elements?(ancestor)
67
66
 
68
- block_text = BlockTextExtractor.extract(node.parent)
67
+ block_text = BlockTextExtractor.extract(ancestor)
69
68
  return nil unless block_text && block_text != text
70
69
 
71
70
  translations[block_text]
@@ -78,15 +77,24 @@ module Jekyll
78
77
  HtmlElements::CONTENT_ELEMENTS.include?(node.name)
79
78
  end
80
79
 
80
+ def self.find_content_element_ancestor(node)
81
+ current = node&.parent
82
+ while current&.element?
83
+ return current if content_element?(current)
84
+
85
+ current = current.parent
86
+ end
87
+ nil
88
+ end
89
+ private_class_method :find_content_element_ancestor
90
+
81
91
  # Check if an element contains protected child elements that block translations.
82
92
  #
83
- # Protected elements (script, style, pre) cannot have their surrounding text
84
- # translated at the block level because:
85
- # * script/style: Security and functionality reasons (executable content)
86
- # * pre: Multi-line code blocks where translations break formatting
87
- #
88
- # This is a shared utility used by both HtmlTranslator and TranslationResolver
89
- # to ensure consistent protection of sensitive content across the codebase.
93
+ # Protected elements (script, style) cannot have their surrounding text
94
+ # translated at the block level for security and functionality reasons.
95
+ # <pre> is not protected here HtmlTextUtils.remove_code_blocks strips it
96
+ # before extraction so code content never reaches PO msgids, and
97
+ # HtmlTranslator preserves <pre> verbatim across translation injection.
90
98
  #
91
99
  # @param node [Nokogiri::XML::Node] Element to check
92
100
  # @return [Boolean] true if node contains protected elements, false otherwise
@@ -101,13 +109,12 @@ module Jekyll
101
109
  # doc = Nokogiri::HTML('<p><code>inline</code> text</p>')
102
110
  # para = doc.xpath('//p').first
103
111
  # TranslationResolver.contains_protected_elements?(para)
104
- # # => false (code is allowed, only script/style/pre are protected)
112
+ # # => false (code is allowed, only script/style are protected)
105
113
  def self.contains_protected_elements?(node)
106
114
  return false unless node.element?
107
115
 
108
- # Block block-level translation for script, style (security/functionality),
109
- # and pre (multi-line code blocks). These cannot be safely applied at block level.
110
- protected_elements = %w[script style pre]
116
+ # Block block-level translation for script and style (security/functionality).
117
+ protected_elements = %w[script style]
111
118
  node.children.any? { |child| child.element? && protected_elements.include?(child.name) }
112
119
  end
113
120
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-l10n
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.15
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - ReleaseBot