jekyll-l10n 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +94 -0
- data/lib/jekyll-l10n/constants.rb +136 -0
- data/lib/jekyll-l10n/errors.rb +60 -0
- data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
- data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
- data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
- data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
- data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
- data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
- data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
- data/lib/jekyll-l10n/extraction/logger.rb +48 -0
- data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
- data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
- data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
- data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
- data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
- data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
- data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
- data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
- data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
- data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
- data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
- data/lib/jekyll-l10n/po_file/loader.rb +64 -0
- data/lib/jekyll-l10n/po_file/manager.rb +160 -0
- data/lib/jekyll-l10n/po_file/merger.rb +80 -0
- data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
- data/lib/jekyll-l10n/po_file/reader.rb +518 -0
- data/lib/jekyll-l10n/po_file/writer.rb +232 -0
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
- data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
- data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
- data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
- data/lib/jekyll-l10n/translation/translator.rb +179 -0
- data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
- data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
- data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
- data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
- data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
- data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
- data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
- data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
- data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
- data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
- data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
- data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
- data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
- data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
- data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
- data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
- data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
- data/lib/jekyll-l10n/version.rb +10 -0
- data/lib/jekyll-l10n.rb +268 -0
- metadata +200 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Handles errors with graceful fallback and logging.
|
|
6
|
+
#
|
|
7
|
+
# ErrorHandler provides error handling utilities for gracefully managing
|
|
8
|
+
# exceptions, logging them with context, and providing default fallback values.
|
|
9
|
+
# This keeps the build process running even when individual operations fail.
|
|
10
|
+
#
|
|
11
|
+
# Key responsibilities:
|
|
12
|
+
# * Execute code with error catching
|
|
13
|
+
# * Log errors with context information
|
|
14
|
+
# * Provide fallback default values on error
|
|
15
|
+
# * Optional debug backtraces
|
|
16
|
+
#
|
|
17
|
+
# @example
|
|
18
|
+
# result = ErrorHandler.handle_with_default('file loading', {}) do
|
|
19
|
+
# load_file('config.yml')
|
|
20
|
+
# end
|
|
21
|
+
# # Returns loaded data or {} on error
|
|
22
|
+
class ErrorHandler
|
|
23
|
+
# Execute code with error catching and fallback logging.
|
|
24
|
+
#
|
|
25
|
+
# Executes the provided block and catches any StandardError, logging it
|
|
26
|
+
# with context and returning nil.
|
|
27
|
+
#
|
|
28
|
+
# @param context [String] Context describing what was being done
|
|
29
|
+
# @yield Code to execute
|
|
30
|
+
# @return [Object, nil] Result of block or nil on error
|
|
31
|
+
def self.handle_with_logging(context)
|
|
32
|
+
yield
|
|
33
|
+
rescue StandardError => e
|
|
34
|
+
log_error(context, e)
|
|
35
|
+
nil
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Execute code with error catching and fallback value.
|
|
39
|
+
#
|
|
40
|
+
# Executes the provided block and catches any StandardError, logging it
|
|
41
|
+
# with context and returning the default value.
|
|
42
|
+
#
|
|
43
|
+
# @param context [String] Context describing what was being done
|
|
44
|
+
# @param default_value [Object] Default value to return on error
|
|
45
|
+
# @yield Code to execute
|
|
46
|
+
# @return [Object] Result of block or default_value on error
|
|
47
|
+
def self.handle_with_default(context, default_value)
|
|
48
|
+
yield
|
|
49
|
+
rescue StandardError => e
|
|
50
|
+
log_error(context, e)
|
|
51
|
+
default_value
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Log an error with context.
|
|
55
|
+
#
|
|
56
|
+
# Logs error message and optionally backtrace if DEBUG environment variable is set.
|
|
57
|
+
#
|
|
58
|
+
# @param context [String] Context describing what was being done
|
|
59
|
+
# @param error [StandardError] The error that occurred
|
|
60
|
+
# @return [void]
|
|
61
|
+
def self.log_error(context, error)
|
|
62
|
+
Jekyll.logger.error "Localization", "Error in #{context}: #{error.message}"
|
|
63
|
+
Jekyll.logger.debug "Localization", error.backtrace.join("\n") if ENV["DEBUG"]
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
require_relative "html_parser"
|
|
5
|
+
|
|
6
|
+
module Jekyll
|
|
7
|
+
module L10n
|
|
8
|
+
module ExternalLinkIconPreserver
|
|
9
|
+
# Preserves external link icons from original HTML in translated HTML
|
|
10
|
+
#
|
|
11
|
+
# External link icons (e.g., FontAwesome's fa-external-link) are often
|
|
12
|
+
# lost during translation because:
|
|
13
|
+
# 1. Block-level translation replaces entire element content
|
|
14
|
+
# 2. Empty icon tags are removed during text extraction
|
|
15
|
+
#
|
|
16
|
+
# This method restores icons by:
|
|
17
|
+
# 1. Finding all i.fa-external-link icons in the original HTML
|
|
18
|
+
# 2. Matching links by href attribute
|
|
19
|
+
# 3. Copying icons to the translated HTML
|
|
20
|
+
#
|
|
21
|
+
# @param original_html [String] Original HTML with icons
|
|
22
|
+
# @param translated_html [String] Translated HTML potentially missing icons
|
|
23
|
+
# @return [String] Translated HTML with icons restored
|
|
24
|
+
def self.preserve(original_html, translated_html)
|
|
25
|
+
# Use Nokogiri::HTML() instead of DocumentFragment.parse() to preserve the full
|
|
26
|
+
# HTML document structure including DOCTYPE, html, head, and body tags.
|
|
27
|
+
# DocumentFragment.parse() is appropriate for partial HTML only, and would strip
|
|
28
|
+
# document-level structure.
|
|
29
|
+
original_doc = Nokogiri::HTML(original_html)
|
|
30
|
+
translated_doc = Nokogiri::HTML(translated_html)
|
|
31
|
+
|
|
32
|
+
# Find all external link icons in original
|
|
33
|
+
original_icons = original_doc.css("i.fa-external-link")
|
|
34
|
+
return translated_html if original_icons.empty?
|
|
35
|
+
|
|
36
|
+
# Process each icon found in the original HTML
|
|
37
|
+
restore_icons_to_translated(original_icons, translated_doc)
|
|
38
|
+
|
|
39
|
+
result = translated_doc.to_html
|
|
40
|
+
|
|
41
|
+
# Remove the auto-inserted meta tag by libxml2 during HTML serialization
|
|
42
|
+
HtmlParser.remove_meta_charset(result)
|
|
43
|
+
rescue StandardError => e
|
|
44
|
+
Jekyll.logger.error "Localization", "Error preserving external link icons: #{e.message}"
|
|
45
|
+
translated_html
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def self.restore_icons_to_translated(original_icons, translated_doc)
|
|
49
|
+
original_icons.each do |icon|
|
|
50
|
+
link = icon.parent
|
|
51
|
+
next unless link&.name == "a"
|
|
52
|
+
|
|
53
|
+
href = link["href"]
|
|
54
|
+
next unless href
|
|
55
|
+
|
|
56
|
+
# Find the same link in translated version
|
|
57
|
+
translated_link = translated_doc.css("a[href=\"#{href}\"]").first
|
|
58
|
+
next unless translated_link
|
|
59
|
+
|
|
60
|
+
# Check if icon already exists
|
|
61
|
+
next if translated_link.css("i.fa-external-link").any?
|
|
62
|
+
|
|
63
|
+
# Add icon to translated link
|
|
64
|
+
icon_copy = icon.dup
|
|
65
|
+
translated_link.add_child(icon_copy)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Alternative method that preserves ALL inline elements, not just icons
|
|
70
|
+
# Useful for preserving SVG icons, badges, etc.
|
|
71
|
+
#
|
|
72
|
+
# @param original_html [String] Original HTML
|
|
73
|
+
# @param translated_html [String] Translated HTML
|
|
74
|
+
# @return [String] Translated HTML with all inline elements preserved
|
|
75
|
+
def self.preserve_all_inline_elements(original_html, translated_html)
|
|
76
|
+
original_doc = Nokogiri::HTML(original_html)
|
|
77
|
+
translated_doc = Nokogiri::HTML(translated_html)
|
|
78
|
+
|
|
79
|
+
# Find all links with child elements in original
|
|
80
|
+
original_links = original_doc.css("a[href]")
|
|
81
|
+
original_links.each do |original_link|
|
|
82
|
+
preserve_link_inline_elements(original_link, translated_doc)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
result = translated_doc.to_html
|
|
86
|
+
HtmlParser.remove_meta_charset(result)
|
|
87
|
+
rescue StandardError => e
|
|
88
|
+
Jekyll.logger.error "Localization", "Error preserving inline elements: #{e.message}"
|
|
89
|
+
translated_html
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def self.preserve_link_inline_elements(original_link, translated_doc)
|
|
93
|
+
href = original_link["href"]
|
|
94
|
+
return unless href
|
|
95
|
+
|
|
96
|
+
# Get all non-text children (i, svg, span, etc.)
|
|
97
|
+
inline_children = original_link.children.select(&:element?)
|
|
98
|
+
return if inline_children.empty?
|
|
99
|
+
|
|
100
|
+
# Find matching link in translated version
|
|
101
|
+
translated_link = translated_doc.css("a[href=\"#{href}\"]").first
|
|
102
|
+
return unless translated_link
|
|
103
|
+
|
|
104
|
+
# Append inline elements that don't already exist
|
|
105
|
+
inline_children.each do |child|
|
|
106
|
+
add_child_if_not_exists(child, translated_link)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def self.add_child_if_not_exists(child, translated_link)
|
|
111
|
+
child_class = child["class"] || ""
|
|
112
|
+
# Check if similar element already exists
|
|
113
|
+
existing = translated_link.css("#{child.name}.#{child_class.split.first}").first
|
|
114
|
+
return if existing
|
|
115
|
+
|
|
116
|
+
# Add the element
|
|
117
|
+
child_copy = child.dup
|
|
118
|
+
translated_link.add_child(child_copy)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module Jekyll
|
|
6
|
+
module L10n
|
|
7
|
+
# File I/O operations with UTF-8 encoding.
|
|
8
|
+
#
|
|
9
|
+
# FileOperations provides centralized file reading/writing and directory
|
|
10
|
+
# creation with automatic UTF-8 encoding. All PO files, HTML files, and
|
|
11
|
+
# configuration files are handled with UTF-8 encoding for internationalization.
|
|
12
|
+
#
|
|
13
|
+
# Key responsibilities:
|
|
14
|
+
# * Read files with UTF-8 encoding
|
|
15
|
+
# * Write files with UTF-8 encoding
|
|
16
|
+
# * Create directory structures as needed
|
|
17
|
+
#
|
|
18
|
+
# @example
|
|
19
|
+
# content = FileOperations.read_utf8('_locales/es.po')
|
|
20
|
+
# FileOperations.write_utf8('output.po', content)
|
|
21
|
+
# FileOperations.ensure_directory('output/path/file.po')
|
|
22
|
+
module FileOperations
|
|
23
|
+
ENCODING = "UTF-8"
|
|
24
|
+
|
|
25
|
+
# Read a file with UTF-8 encoding.
|
|
26
|
+
#
|
|
27
|
+
# @param path [String] Path to file to read
|
|
28
|
+
# @return [String] File contents as UTF-8 string
|
|
29
|
+
def self.read_utf8(path)
|
|
30
|
+
::File.read(path, :encoding => ENCODING)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Write content to a file with UTF-8 encoding.
|
|
34
|
+
#
|
|
35
|
+
# @param path [String] Path to file to write
|
|
36
|
+
# @param content [String] Content to write
|
|
37
|
+
# @return [Integer] Number of bytes written
|
|
38
|
+
def self.write_utf8(path, content)
|
|
39
|
+
::File.write(path, content, :encoding => ENCODING)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Ensure directory exists for a file path.
|
|
43
|
+
#
|
|
44
|
+
# Creates all parent directories as needed for the given file path.
|
|
45
|
+
# Does nothing if directory already exists.
|
|
46
|
+
#
|
|
47
|
+
# @param file_path [String] File path (directory is extracted from this)
|
|
48
|
+
# @return [void]
|
|
49
|
+
def self.ensure_directory(file_path)
|
|
50
|
+
dir = ::File.dirname(file_path)
|
|
51
|
+
::FileUtils.mkdir_p(dir) unless ::Dir.exist?(dir)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Constants defining HTML element categories for extraction and translation.
|
|
6
|
+
#
|
|
7
|
+
# HtmlElements categorizes HTML elements by their semantic role:
|
|
8
|
+
# content elements contain translatable text, container elements provide
|
|
9
|
+
# structural grouping, and block elements are used for layout considerations
|
|
10
|
+
# during text extraction.
|
|
11
|
+
#
|
|
12
|
+
# Key responsibilities:
|
|
13
|
+
# * Define content elements (paragraphs, headings, lists, etc.)
|
|
14
|
+
# * Define container elements (div, section, article, etc.)
|
|
15
|
+
# * Define block elements (all layout-related elements)
|
|
16
|
+
module HtmlElements
|
|
17
|
+
# Content elements that contain translatable text
|
|
18
|
+
CONTENT_ELEMENTS = %w(
|
|
19
|
+
p h1 h2 h3 h4 h5 h6
|
|
20
|
+
li dd dt blockquote figcaption
|
|
21
|
+
).freeze
|
|
22
|
+
|
|
23
|
+
# Container elements for structural grouping
|
|
24
|
+
CONTAINER_ELEMENTS = %w(div figure section article aside).freeze
|
|
25
|
+
|
|
26
|
+
# Block elements for layout
|
|
27
|
+
BLOCK_ELEMENTS = %w(
|
|
28
|
+
p div section article aside figure figcaption
|
|
29
|
+
blockquote pre ul ol li dl dt dd form table
|
|
30
|
+
header footer nav address
|
|
31
|
+
).freeze
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Parses HTML content using Nokogiri.
|
|
6
|
+
#
|
|
7
|
+
# HtmlParser provides a unified interface for parsing HTML as either full
|
|
8
|
+
# documents (preserving DOCTYPE and structure) or as fragments (partial HTML).
|
|
9
|
+
# It also provides utilities for cleaning up auto-inserted meta tags that
|
|
10
|
+
# Nokogiri/libxml2 adds during serialization.
|
|
11
|
+
#
|
|
12
|
+
# Key responsibilities:
|
|
13
|
+
# * Parse full HTML documents with DOCTYPE preservation
|
|
14
|
+
# * Parse HTML fragments for partial content
|
|
15
|
+
# * Remove auto-inserted meta charset tags
|
|
16
|
+
class HtmlParser
|
|
17
|
+
# Parse HTML as a full document.
|
|
18
|
+
#
|
|
19
|
+
# Preserves DOCTYPE, html tag, and document structure. Use this for complete
|
|
20
|
+
# HTML documents. Auto-inserted meta tags can be removed with remove_meta_charset.
|
|
21
|
+
#
|
|
22
|
+
# @param html [String] HTML content to parse
|
|
23
|
+
# @return [Nokogiri::HTML::Document] Parsed HTML document
|
|
24
|
+
def self.parse_document(html)
|
|
25
|
+
Nokogiri::HTML(html)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Parse HTML as a fragment.
|
|
29
|
+
#
|
|
30
|
+
# Parses partial HTML without wrapping in html/body tags. Use for
|
|
31
|
+
# extracting pieces of HTML content.
|
|
32
|
+
#
|
|
33
|
+
# @param html [String] HTML fragment to parse
|
|
34
|
+
# @return [Nokogiri::HTML::DocumentFragment] Parsed HTML fragment
|
|
35
|
+
def self.parse_fragment(html)
|
|
36
|
+
Nokogiri::HTML.fragment(html)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Remove auto-inserted meta charset tag from serialized HTML.
|
|
40
|
+
#
|
|
41
|
+
# Nokogiri/libxml2 automatically inserts a meta charset tag during
|
|
42
|
+
# serialization. This removes that tag which was not in the original HTML.
|
|
43
|
+
#
|
|
44
|
+
# @param html_string [String] Serialized HTML
|
|
45
|
+
# @return [String] HTML with meta charset tag removed
|
|
46
|
+
def self.remove_meta_charset(html_string)
|
|
47
|
+
pattern = %r!<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n?!
|
|
48
|
+
html_string.gsub(pattern, "")
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "text_normalizer"
|
|
4
|
+
require_relative "html_elements"
|
|
5
|
+
require_relative "text_validator"
|
|
6
|
+
|
|
7
|
+
module Jekyll
|
|
8
|
+
module L10n
|
|
9
|
+
# Utilities for extracting and manipulating HTML text content.
|
|
10
|
+
#
|
|
11
|
+
# HtmlTextUtils provides helpers for extracting text from HTML elements while
|
|
12
|
+
# preserving inline formatting, removing block-level elements, decoding HTML
|
|
13
|
+
# entities, and cleaning up icon tags. These utilities support the extraction
|
|
14
|
+
# and translation pipelines.
|
|
15
|
+
#
|
|
16
|
+
# Key responsibilities:
|
|
17
|
+
# * Extract text with inline HTML tags preserved
|
|
18
|
+
# * Remove block-level elements from cloned nodes
|
|
19
|
+
# * Remove empty icon tags
|
|
20
|
+
# * Decode HTML entities to plain text
|
|
21
|
+
# * Validate extracted text content
|
|
22
|
+
module HtmlTextUtils
|
|
23
|
+
# Extended content elements for text extraction (includes inline elements)
|
|
24
|
+
CONTENT_ELEMENTS = %w(
|
|
25
|
+
p h1 h2 h3 h4 h5 h6
|
|
26
|
+
li dd dt blockquote figcaption
|
|
27
|
+
button span a label
|
|
28
|
+
).freeze
|
|
29
|
+
|
|
30
|
+
CONTAINER_ELEMENTS = HtmlElements::CONTAINER_ELEMENTS
|
|
31
|
+
ALL_BLOCK_ELEMENTS = (CONTENT_ELEMENTS + CONTAINER_ELEMENTS).freeze
|
|
32
|
+
|
|
33
|
+
# Decode HTML entities to plain text.
|
|
34
|
+
#
|
|
35
|
+
# Converts HTML entities (&, <, etc.) to their plain text equivalents.
|
|
36
|
+
# Uses CGI.unescape_html if available, falls back to manual replacement.
|
|
37
|
+
#
|
|
38
|
+
# @param text [String] Text with HTML entities
|
|
39
|
+
# @return [String] Text with entities decoded
|
|
40
|
+
def self.decode_html_entities(text)
|
|
41
|
+
require "cgi"
|
|
42
|
+
CGI.unescape_html(text)
|
|
43
|
+
rescue StandardError
|
|
44
|
+
text.gsub("&", "&")
|
|
45
|
+
.gsub("<", "<")
|
|
46
|
+
.gsub(">", ">")
|
|
47
|
+
.gsub(""", '"')
|
|
48
|
+
.gsub("'", "'")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Remove block-level elements from a cloned node.
|
|
52
|
+
#
|
|
53
|
+
# Replaces block-level element nodes with their children (flattening structure).
|
|
54
|
+
# Used to extract text while preserving inline elements.
|
|
55
|
+
#
|
|
56
|
+
# @param node [Nokogiri::XML::Node] Node to process (modified in place)
|
|
57
|
+
# @return [void]
|
|
58
|
+
def self.remove_block_elements_from_node(node)
|
|
59
|
+
HtmlElements::BLOCK_ELEMENTS.each do |tag|
|
|
60
|
+
node.xpath(".//#{tag}").each do |elem|
|
|
61
|
+
elem.replace(elem.children)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Remove block-level elements from a node.
|
|
67
|
+
#
|
|
68
|
+
# Alias for remove_block_elements_from_node for convenience.
|
|
69
|
+
#
|
|
70
|
+
# @param node [Nokogiri::XML::Node] Node to process
|
|
71
|
+
# @return [void]
|
|
72
|
+
def self.remove_block_elements(node)
|
|
73
|
+
remove_block_elements_from_node(node)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Remove empty icon tags from a node.
|
|
77
|
+
#
|
|
78
|
+
# Removes all <i> (icon) elements that contain no text. Used to clean up
|
|
79
|
+
# external link icon markers before text extraction.
|
|
80
|
+
#
|
|
81
|
+
# @param node [Nokogiri::XML::Node] Node to process (modified in place)
|
|
82
|
+
# @return [void]
|
|
83
|
+
def self.remove_empty_icon_tags(node)
|
|
84
|
+
node.xpath(".//i").each do |elem|
|
|
85
|
+
elem.remove if elem.text.strip.empty?
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Extract text with inline tags preserved.
|
|
90
|
+
#
|
|
91
|
+
# Extracts text from an element, removes block elements and empty icons,
|
|
92
|
+
# normalizes whitespace, and decodes HTML entities. Returns plain text
|
|
93
|
+
# suitable for translation.
|
|
94
|
+
#
|
|
95
|
+
# @param node [Nokogiri::XML::Node] Element to extract from
|
|
96
|
+
# @return [String] Extracted and normalized text
|
|
97
|
+
def self.extract_with_inline_tags(node)
|
|
98
|
+
clone = node.dup
|
|
99
|
+
remove_block_elements_from_node(clone)
|
|
100
|
+
remove_empty_icon_tags(clone)
|
|
101
|
+
|
|
102
|
+
text = TextNormalizer.normalize(clone.inner_html)
|
|
103
|
+
text = text.strip unless text.nil?
|
|
104
|
+
|
|
105
|
+
decode_html_entities(text)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Extract and validate text from a node.
|
|
109
|
+
#
|
|
110
|
+
# Extracts text from element if it's a content element, then validates it
|
|
111
|
+
# meets minimum length requirements.
|
|
112
|
+
#
|
|
113
|
+
# @param node [Nokogiri::XML::Node] Node to extract from
|
|
114
|
+
# @return [String, nil] Validated text, or nil if not extractable or invalid
|
|
115
|
+
def self.extract_and_validate_text(node)
|
|
116
|
+
return nil unless extractable?(node)
|
|
117
|
+
|
|
118
|
+
text = extract_with_inline_tags(node)
|
|
119
|
+
TextValidator.valid?(text) ? text : nil
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Check if a node is extractable (content element).
|
|
123
|
+
#
|
|
124
|
+
# @param node [Nokogiri::XML::Node] Node to check
|
|
125
|
+
# @return [Boolean] True if node is a content element
|
|
126
|
+
def self.extractable?(node)
|
|
127
|
+
node.element? && CONTENT_ELEMENTS.include?(node.name)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Formats and conditionally logs messages with component prefixes.
|
|
6
|
+
#
|
|
7
|
+
# LoggerFormatter provides a consistent logging interface for the plugin,
|
|
8
|
+
# prefixing all messages with component names (e.g., "[HtmlTranslator]").
|
|
9
|
+
# It also handles conditional debug and trace logging based on Jekyll's
|
|
10
|
+
# log level and configuration.
|
|
11
|
+
#
|
|
12
|
+
# Key responsibilities:
|
|
13
|
+
# * Log messages with component prefixes
|
|
14
|
+
# * Conditionally log at debug level
|
|
15
|
+
# * Check debug and trace logging configuration
|
|
16
|
+
# * Support both Jekyll log levels and configuration-based trace mode
|
|
17
|
+
#
|
|
18
|
+
# @example
|
|
19
|
+
# LoggerFormatter.info("HtmlTranslator", "Starting translation")
|
|
20
|
+
# LoggerFormatter.debug_if_enabled("Extractor", "Extracting from file")
|
|
21
|
+
module LoggerFormatter
|
|
22
|
+
# Log an info message with component prefix.
|
|
23
|
+
#
|
|
24
|
+
# @param component [String] Component name (e.g., 'HtmlTranslator')
|
|
25
|
+
# @param message [String] Message to log
|
|
26
|
+
# @return [void]
|
|
27
|
+
def self.info(component, message)
|
|
28
|
+
Jekyll.logger.info "Localization", "[#{component}] #{message}"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Log a debug message with component prefix.
|
|
32
|
+
#
|
|
33
|
+
# @param component [String] Component name
|
|
34
|
+
# @param message [String] Message to log
|
|
35
|
+
# @return [void]
|
|
36
|
+
def self.debug(component, message)
|
|
37
|
+
Jekyll.logger.debug "Localization", "[#{component}] #{message}"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Log a warning message with component prefix.
|
|
41
|
+
#
|
|
42
|
+
# @param component [String] Component name
|
|
43
|
+
# @param message [String] Message to log
|
|
44
|
+
# @return [void]
|
|
45
|
+
def self.warn(component, message)
|
|
46
|
+
Jekyll.logger.warn "Localization", "[#{component}] #{message}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Log an error message with component prefix.
|
|
50
|
+
#
|
|
51
|
+
# @param component [String] Component name
|
|
52
|
+
# @param message [String] Message to log
|
|
53
|
+
# @return [void]
|
|
54
|
+
def self.error(component, message)
|
|
55
|
+
Jekyll.logger.error "Localization", "[#{component}] #{message}"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Check if debug logging is enabled.
|
|
59
|
+
#
|
|
60
|
+
# Returns true if JEKYLL_LOG_LEVEL is set to debug or lower (trace).
|
|
61
|
+
#
|
|
62
|
+
# @return [Boolean] True if debug logging is enabled
|
|
63
|
+
def self.debug?
|
|
64
|
+
level = Jekyll.logger.level
|
|
65
|
+
# Handle both numeric and symbol log levels
|
|
66
|
+
return [:debug, :trace].include?(level) if level.is_a?(Symbol)
|
|
67
|
+
|
|
68
|
+
level <= 0 # DEBUG = 0
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Check if trace logging is enabled in configuration.
|
|
72
|
+
#
|
|
73
|
+
# Accepts both PageLocalesConfig objects and hash-based configurations.
|
|
74
|
+
# Returns true if trace mode is explicitly enabled.
|
|
75
|
+
#
|
|
76
|
+
# @param config [PageLocalesConfig, Hash, nil] Configuration object or hash
|
|
77
|
+
# @return [Boolean] True if trace logging is enabled
|
|
78
|
+
def self.trace?(config)
|
|
79
|
+
return false if config.nil?
|
|
80
|
+
|
|
81
|
+
# Try to call trace_logging? method (for PageLocalesConfig objects)
|
|
82
|
+
return config.trace_logging? if config.respond_to?(:trace_logging?)
|
|
83
|
+
|
|
84
|
+
# Fall back to checking hash structure (for hash-based configs)
|
|
85
|
+
config.dig("logging", "trace") == true
|
|
86
|
+
rescue StandardError
|
|
87
|
+
false
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Conditionally log at debug level if debug is enabled.
|
|
91
|
+
#
|
|
92
|
+
# Only logs if debug logging is enabled via Jekyll log level.
|
|
93
|
+
#
|
|
94
|
+
# @param component [String] Component name
|
|
95
|
+
# @param message [String] Message to log
|
|
96
|
+
# @return [void]
|
|
97
|
+
def self.debug_if_enabled(component, message)
|
|
98
|
+
debug(component, message) if debug?
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Conditionally log at debug level if trace mode is enabled.
|
|
102
|
+
#
|
|
103
|
+
# Only logs if trace mode is enabled in configuration.
|
|
104
|
+
#
|
|
105
|
+
# @param config [PageLocalesConfig, Hash, nil] Configuration
|
|
106
|
+
# @param component [String] Component name
|
|
107
|
+
# @param message [String] Message to log
|
|
108
|
+
# @return [void]
|
|
109
|
+
def self.trace_if_enabled(config, component, message)
|
|
110
|
+
debug(component, message) if trace?(config)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|