jekyll-l10n 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +94 -0
- data/lib/jekyll-l10n/constants.rb +136 -0
- data/lib/jekyll-l10n/errors.rb +60 -0
- data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
- data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
- data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
- data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
- data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
- data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
- data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
- data/lib/jekyll-l10n/extraction/logger.rb +48 -0
- data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
- data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
- data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
- data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
- data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
- data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
- data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
- data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
- data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
- data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
- data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
- data/lib/jekyll-l10n/po_file/loader.rb +64 -0
- data/lib/jekyll-l10n/po_file/manager.rb +160 -0
- data/lib/jekyll-l10n/po_file/merger.rb +80 -0
- data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
- data/lib/jekyll-l10n/po_file/reader.rb +518 -0
- data/lib/jekyll-l10n/po_file/writer.rb +232 -0
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
- data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
- data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
- data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
- data/lib/jekyll-l10n/translation/translator.rb +179 -0
- data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
- data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
- data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
- data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
- data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
- data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
- data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
- data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
- data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
- data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
- data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
- data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
- data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
- data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
- data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
- data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
- data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
- data/lib/jekyll-l10n/version.rb +10 -0
- data/lib/jekyll-l10n.rb +268 -0
- metadata +200 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "text_normalizer"
|
|
4
|
+
require_relative "html_elements"
|
|
5
|
+
|
|
6
|
+
module Jekyll
|
|
7
|
+
module L10n
|
|
8
|
+
# Resolves translations for text nodes with fallback to block-level translations.
|
|
9
|
+
#
|
|
10
|
+
# TranslationResolver looks up translations for normalized text, first trying
|
|
11
|
+
# direct text node matches, then falling back to block-level translations when
|
|
12
|
+
# text is part of a larger block element with its own translation. This enables
|
|
13
|
+
# translating entire paragraphs as single units instead of word-by-word.
|
|
14
|
+
#
|
|
15
|
+
# Key responsibilities:
|
|
16
|
+
# * Look up direct translation for text node
|
|
17
|
+
# * Fall back to block-level translation if available
|
|
18
|
+
# * Return appropriate translation or nil if none found
|
|
19
|
+
#
|
|
20
|
+
# @example
|
|
21
|
+
# translation = TranslationResolver.resolve(node, "Hello", translations)
|
|
22
|
+
# # Returns translated text if available, nil otherwise
|
|
23
|
+
class TranslationResolver
|
|
24
|
+
# Resolve a translation for a text node.
|
|
25
|
+
#
|
|
26
|
+
# Attempts direct lookup of the normalized text in translations hash.
|
|
27
|
+
# If not found, checks if the text is part of a block element with
|
|
28
|
+
# a block-level translation and returns that.
|
|
29
|
+
#
|
|
30
|
+
# @param node [Nokogiri::XML::Node] Text node being translated
|
|
31
|
+
# @param text [String] Normalized text content
|
|
32
|
+
# @param translations [Hash] Translation hash mapping text to translations
|
|
33
|
+
# @return [String, nil] Translated text if found, nil otherwise
|
|
34
|
+
def self.resolve(node, text, translations)
|
|
35
|
+
return nil unless node && text && translations
|
|
36
|
+
|
|
37
|
+
direct_translation = translations[text]
|
|
38
|
+
return direct_translation if direct_translation
|
|
39
|
+
|
|
40
|
+
try_block_level_translation(node, text, translations)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Attempt block-level translation for text nodes that are part of larger blocks.
|
|
44
|
+
#
|
|
45
|
+
# Checks if a text node is part of a larger block element (like a paragraph)
|
|
46
|
+
# that has a complete translation. Only returns block translation if the text
|
|
47
|
+
# node alone doesn't have a direct translation but the entire block does.
|
|
48
|
+
#
|
|
49
|
+
# Security consideration: Returns nil if the block contains protected elements
|
|
50
|
+
# (script, style, pre tags) to prevent unsafe translation application.
|
|
51
|
+
#
|
|
52
|
+
# @param node [Nokogiri::XML::Node] Text node being translated
|
|
53
|
+
# @param text [String] Normalized text of the node
|
|
54
|
+
# @param translations [Hash] Translation hash mapping text to translations
|
|
55
|
+
# @return [String, nil] Block-level translation if available, nil otherwise
|
|
56
|
+
#
|
|
57
|
+
# @example
|
|
58
|
+
# # For text "text" in "<p><script>...</script> text</p>"
|
|
59
|
+
# # Returns nil (protected element present, prevents block translation)
|
|
60
|
+
# TranslationResolver.try_block_level_translation(node, "text", translations)
|
|
61
|
+
def self.try_block_level_translation(node, text, translations)
|
|
62
|
+
return nil unless node.parent && content_element?(node.parent)
|
|
63
|
+
|
|
64
|
+
# Don't attempt block-level translation if parent contains protected elements
|
|
65
|
+
# (script, style, pre). These cannot be safely applied at block level.
|
|
66
|
+
return nil if contains_protected_elements?(node.parent)
|
|
67
|
+
|
|
68
|
+
block_text = BlockTextExtractor.extract(node.parent)
|
|
69
|
+
return nil unless block_text && block_text != text
|
|
70
|
+
|
|
71
|
+
translations[block_text]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.content_element?(node)
|
|
75
|
+
return false unless node
|
|
76
|
+
return false unless node.element?
|
|
77
|
+
|
|
78
|
+
HtmlElements::CONTENT_ELEMENTS.include?(node.name)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Check if an element contains protected child elements that block translations.
|
|
82
|
+
#
|
|
83
|
+
# Protected elements (script, style, pre) cannot have their surrounding text
|
|
84
|
+
# translated at the block level because:
|
|
85
|
+
# * script/style: Security and functionality reasons (executable content)
|
|
86
|
+
# * pre: Multi-line code blocks where translations break formatting
|
|
87
|
+
#
|
|
88
|
+
# This is a shared utility used by both HtmlTranslator and TranslationResolver
|
|
89
|
+
# to ensure consistent protection of sensitive content across the codebase.
|
|
90
|
+
#
|
|
91
|
+
# @param node [Nokogiri::XML::Node] Element to check
|
|
92
|
+
# @return [Boolean] true if node contains protected elements, false otherwise
|
|
93
|
+
#
|
|
94
|
+
# @example
|
|
95
|
+
# doc = Nokogiri::HTML('<p><script>alert("xss")</script> text</p>')
|
|
96
|
+
# para = doc.xpath('//p').first
|
|
97
|
+
# TranslationResolver.contains_protected_elements?(para)
|
|
98
|
+
# # => true
|
|
99
|
+
#
|
|
100
|
+
# @example
|
|
101
|
+
# doc = Nokogiri::HTML('<p><code>inline</code> text</p>')
|
|
102
|
+
# para = doc.xpath('//p').first
|
|
103
|
+
# TranslationResolver.contains_protected_elements?(para)
|
|
104
|
+
# # => false (code is allowed, only script/style/pre are protected)
|
|
105
|
+
def self.contains_protected_elements?(node)
|
|
106
|
+
return false unless node.element?
|
|
107
|
+
|
|
108
|
+
# Block block-level translation for script, style (security/functionality),
|
|
109
|
+
# and pre (multi-line code blocks). These cannot be safely applied at block level.
|
|
110
|
+
protected_elements = %w(script style pre)
|
|
111
|
+
node.children.any? { |child| child.element? && protected_elements.include?(child.name) }
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Builds file paths from URLs and vice versa.
|
|
6
|
+
#
|
|
7
|
+
# UrlPathBuilder provides conversions between Jekyll URLs and file system
|
|
8
|
+
# paths, handling root paths specially and normalizing separators. It supports
|
|
9
|
+
# both regular URLs and PO page paths used for translation files.
|
|
10
|
+
#
|
|
11
|
+
# Key responsibilities:
|
|
12
|
+
# * Normalize URLs (strip leading/trailing slashes)
|
|
13
|
+
# * Convert URLs to file paths
|
|
14
|
+
# * Convert URLs to PO page paths
|
|
15
|
+
# * Calculate relative paths from absolute paths
|
|
16
|
+
#
|
|
17
|
+
# @example
|
|
18
|
+
# file_path = UrlPathBuilder.url_to_file_path('/docs/page.html')
|
|
19
|
+
# # Returns 'docs/page.html/index.html'
|
|
20
|
+
module UrlPathBuilder
|
|
21
|
+
extend self
|
|
22
|
+
|
|
23
|
+
# Normalize a URL by removing leading and trailing slashes.
|
|
24
|
+
#
|
|
25
|
+
# @param url [String] URL to normalize
|
|
26
|
+
# @return [String] Normalized URL
|
|
27
|
+
def normalize_url(url)
|
|
28
|
+
url.sub(%r!^/!, "").sub(%r!/$!, "")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Convert a URL to a file system path.
|
|
32
|
+
#
|
|
33
|
+
# Converts Jekyll URL to the path it would be written to on disk,
|
|
34
|
+
# using index.html for directory-based URLs.
|
|
35
|
+
#
|
|
36
|
+
# @param url [String] Jekyll URL (e.g., '/docs/page.html')
|
|
37
|
+
# @return [String] File path (e.g., 'docs/page.html/index.html')
|
|
38
|
+
def url_to_file_path(url)
|
|
39
|
+
"#{normalize_url(url)}/index.html"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Convert a URL to a PO page path for translation files.
|
|
43
|
+
#
|
|
44
|
+
# Handles root path specially (converts to 'index'). Used when saving
|
|
45
|
+
# page-specific translation files.
|
|
46
|
+
#
|
|
47
|
+
# @param url [String] Jekyll URL
|
|
48
|
+
# @return [String] PO page path (e.g., 'docs/page.html/index.html')
|
|
49
|
+
def url_to_po_page_path(url)
|
|
50
|
+
path = normalize_url(url)
|
|
51
|
+
path = "index" if path.empty?
|
|
52
|
+
"#{path}/index.html"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Calculate relative path from destination directory.
|
|
56
|
+
#
|
|
57
|
+
# @param file_path [String] Absolute file path
|
|
58
|
+
# @param dest [String] Destination directory to remove from path
|
|
59
|
+
# @return [String] Relative path
|
|
60
|
+
def relative_path(file_path, dest)
|
|
61
|
+
file_path.sub(dest, "").sub(%r!^/!, "")
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
|
|
5
|
+
module Jekyll
|
|
6
|
+
module L10n
|
|
7
|
+
# Transforms relative URLs in HTML to include locale prefixes.
|
|
8
|
+
#
|
|
9
|
+
# UrlTransformer modifies href attributes in links to prefix them with
|
|
10
|
+
# the target locale (e.g., /docs/page.html becomes /es/docs/page.html).
|
|
11
|
+
# It preserves external links, anchors, mailto, tel links, and links
|
|
12
|
+
# already containing locale prefixes. Skips English locale (default language).
|
|
13
|
+
#
|
|
14
|
+
# Key responsibilities:
|
|
15
|
+
# * Identify relative links to transform
|
|
16
|
+
# * Add locale prefix to href values
|
|
17
|
+
# * Preserve external links and special URLs
|
|
18
|
+
# * Skip already-localized URLs
|
|
19
|
+
# * Handle baseurl paths correctly
|
|
20
|
+
# * Remove auto-inserted meta tags
|
|
21
|
+
#
|
|
22
|
+
# @example
|
|
23
|
+
# html = '<a href="/docs/page.html">Link</a>'
|
|
24
|
+
# transformed = UrlTransformer.transform(html, 'es', '/baseurl')
|
|
25
|
+
# # Returns '<a href="/baseurl/es/docs/page.html">Link</a>'
|
|
26
|
+
class UrlTransformer
|
|
27
|
+
class << self
|
|
28
|
+
# Transform all relative URLs in HTML to include locale prefix.
|
|
29
|
+
#
|
|
30
|
+
# Parses HTML document, identifies relative links, adds locale prefix,
|
|
31
|
+
# removes auto-inserted meta tags, and returns transformed HTML.
|
|
32
|
+
#
|
|
33
|
+
# @param html [String] HTML content with URLs to transform
|
|
34
|
+
# @param locale [String] Target locale code (e.g., 'es', 'fr')
|
|
35
|
+
# @param baseurl [String] Base URL for site (e.g., '/baseurl')
|
|
36
|
+
# @return [String] HTML with locale-prefixed URLs
|
|
37
|
+
def transform(html, locale, baseurl)
|
|
38
|
+
return html if should_skip_transform?(locale)
|
|
39
|
+
|
|
40
|
+
# Use Nokogiri::HTML to properly parse full HTML documents while preserving
|
|
41
|
+
# DOCTYPE, html tag, and document structure. Auto-inserted meta tags are
|
|
42
|
+
# removed via regex post-processing (same approach as HtmlTranslator).
|
|
43
|
+
# See: spec/regression/nokogiri_meta_tag_spec.rb for regression tests
|
|
44
|
+
doc = Nokogiri::HTML(html)
|
|
45
|
+
|
|
46
|
+
transform_document(doc, locale, baseurl)
|
|
47
|
+
|
|
48
|
+
result = doc.to_html
|
|
49
|
+
|
|
50
|
+
# Remove the auto-inserted meta tag by libxml2 during HTML serialization
|
|
51
|
+
# Matches: <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
52
|
+
pattern = %r!<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n?!
|
|
53
|
+
result.gsub(pattern, "")
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Transform URLs in a parsed HTML document.
|
|
57
|
+
#
|
|
58
|
+
# Modifies href attributes of links in the parsed document in place.
|
|
59
|
+
# Useful when document is already parsed to avoid re-parsing.
|
|
60
|
+
#
|
|
61
|
+
# @param doc [Nokogiri::HTML::Document] Parsed HTML document
|
|
62
|
+
# @param locale [String] Target locale code
|
|
63
|
+
# @param baseurl [String] Base URL for site
|
|
64
|
+
# @return [void]
|
|
65
|
+
def transform_document(doc, locale, baseurl)
|
|
66
|
+
return if should_skip_transform?(locale)
|
|
67
|
+
|
|
68
|
+
doc.css("a[href]").each do |link|
|
|
69
|
+
href = link["href"]
|
|
70
|
+
next unless should_transform_href?(href, locale, baseurl)
|
|
71
|
+
|
|
72
|
+
next if language_dropdown_link?(link)
|
|
73
|
+
|
|
74
|
+
link["href"] = add_locale_prefix(href, locale, baseurl)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def should_skip_transform?(locale)
|
|
81
|
+
return true if locale.nil? || locale.empty?
|
|
82
|
+
return true if locale == "en"
|
|
83
|
+
|
|
84
|
+
false
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def should_transform_href?(href, locale, _baseurl)
|
|
88
|
+
return false if invalid_href?(href)
|
|
89
|
+
return false if external_or_special_link?(href)
|
|
90
|
+
return false if relative_path?(href)
|
|
91
|
+
return false if already_localized?(href, locale)
|
|
92
|
+
|
|
93
|
+
true
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def invalid_href?(href)
|
|
97
|
+
href.nil? || href.empty?
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def external_or_special_link?(href)
|
|
101
|
+
href.start_with?("#", "http://", "https://", "mailto:", "tel:")
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def relative_path?(href)
|
|
105
|
+
href.start_with?(".")
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def already_localized?(href, locale)
|
|
109
|
+
href.start_with?("/#{locale}/") || locale_prefix?(href)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def language_dropdown_link?(link)
|
|
113
|
+
link_classes = link["class"] || ""
|
|
114
|
+
link_classes.split.any? { |c| c == "dropdown-item" }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def locale_prefix?(href)
|
|
118
|
+
path_without_leading_slash = href.sub(%r!^/!, "")
|
|
119
|
+
return false if path_without_leading_slash.empty?
|
|
120
|
+
|
|
121
|
+
parts = path_without_leading_slash.split("/")
|
|
122
|
+
return false if parts.empty?
|
|
123
|
+
|
|
124
|
+
first_part = parts.first
|
|
125
|
+
first_part.match?(%r!^[a-z]{2}(?:_[A-Z]{2})?$!)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def add_locale_prefix(href, locale, baseurl)
|
|
129
|
+
return "/#{locale}#{href}" unless baseurl && !baseurl.empty?
|
|
130
|
+
|
|
131
|
+
if href.start_with?(baseurl)
|
|
132
|
+
relative_path = href[baseurl.length..]
|
|
133
|
+
"#{baseurl}/#{locale}#{relative_path}"
|
|
134
|
+
else
|
|
135
|
+
"#{baseurl}/#{locale}#{href}"
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "url_path_builder"
|
|
4
|
+
|
|
5
|
+
module Jekyll
|
|
6
|
+
module L10n
|
|
7
|
+
# Generates file location references for extracted strings.
|
|
8
|
+
#
|
|
9
|
+
# XPathReferenceGenerator creates location references for extracted
|
|
10
|
+
# translatable strings in the format "file_path:line_number". These references
|
|
11
|
+
# appear as comments in PO files to help translators locate text in the
|
|
12
|
+
# original source. Note: Despite the module name, these are file location
|
|
13
|
+
# references, not XPath expressions.
|
|
14
|
+
#
|
|
15
|
+
# Key responsibilities:
|
|
16
|
+
# * Generate location references from DOM nodes
|
|
17
|
+
# * Include file path and line number
|
|
18
|
+
# * Format references for PO file comments
|
|
19
|
+
#
|
|
20
|
+
# @example
|
|
21
|
+
# ref = XPathReferenceGenerator.generate(node, 'docs/index.html', '_site')
|
|
22
|
+
# # Returns 'docs/index.html:42' (42 is line number in HTML)
|
|
23
|
+
module XPathReferenceGenerator
|
|
24
|
+
extend self
|
|
25
|
+
|
|
26
|
+
# Generate a reference for an extracted string.
|
|
27
|
+
#
|
|
28
|
+
# Creates a reference in the format "file_path:line_number" for use as
|
|
29
|
+
# a PO file comment marking where a string was extracted from.
|
|
30
|
+
#
|
|
31
|
+
# @param node [Nokogiri::XML::Node] DOM node where string was found
|
|
32
|
+
# @param file_path [String] Path to HTML file being processed
|
|
33
|
+
# @param dest [String] Destination directory (stripped to get relative path)
|
|
34
|
+
# @param _attr_name [String, nil] Optional attribute name (defaults to nil; maintained
|
|
35
|
+
# for API compatibility)
|
|
36
|
+
# @return [String] Reference in format "relative_path:line_number"
|
|
37
|
+
def generate(node, file_path, dest, _attr_name = nil)
|
|
38
|
+
relative_path = UrlPathBuilder.relative_path(file_path, dest)
|
|
39
|
+
line_number = node.line.to_s
|
|
40
|
+
|
|
41
|
+
"#{relative_path}:#{line_number}"
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
data/lib/jekyll-l10n.rb
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# # jekyll-l10n: Complete Site Localization for Jekyll
|
|
4
|
+
#
|
|
5
|
+
# A comprehensive localization plugin for Jekyll that provides GNU Gettext-based translation
|
|
6
|
+
# management. The plugin automatically creates locale-prefixed page variants and applies
|
|
7
|
+
# translations using PO (Portable Object) files, the standard format for open-source translations.
|
|
8
|
+
#
|
|
9
|
+
# ## Core Features
|
|
10
|
+
#
|
|
11
|
+
# - **Automatic Page Duplication**: Creates locale-specific page variants with proper URL prefixes
|
|
12
|
+
# - **HTML String Extraction**: Automatically finds translatable strings and attributes in
|
|
13
|
+
# generated HTML
|
|
14
|
+
# - **PO File Management**: Reads, writes, and merges GNU Gettext PO files with caching
|
|
15
|
+
# - **Flexible Translation Application**: Applies translations with configurable fallback modes
|
|
16
|
+
# - **LibreTranslate Integration**: Automatic translation via LibreTranslate API (optional)
|
|
17
|
+
# - **Compendium Support**: Shared translation files across multiple pages
|
|
18
|
+
# - **Incremental Builds**: Optimizes performance by skipping unchanged content
|
|
19
|
+
# - **Liquid Filters**: Template helpers for locale-aware URL generation
|
|
20
|
+
#
|
|
21
|
+
# ## Architecture Overview
|
|
22
|
+
#
|
|
23
|
+
# The plugin consists of five major modules, each handling a specific aspect of localization:
|
|
24
|
+
#
|
|
25
|
+
# ### 1. Jekyll Integration (`Jekyll::L10n::Jekyll`)
|
|
26
|
+
# Integrates localization into the Jekyll build pipeline through hooks and generators.
|
|
27
|
+
# - **Generator**: Creates localized page variants during generation phase
|
|
28
|
+
# - **LocalizedPage**: Represents a page in a specific locale
|
|
29
|
+
# - **PostWriteProcessor**: Extracts strings and reprocesses translations after build
|
|
30
|
+
# - **UrlFilter**: Liquid filters for locale-aware URL generation
|
|
31
|
+
#
|
|
32
|
+
# ### 2. HTML Extraction (`Jekyll::L10n::Extraction`)
|
|
33
|
+
# Extracts translatable content from generated HTML and maintains PO files.
|
|
34
|
+
# - **Extractor**: Main orchestrator for extraction workflow
|
|
35
|
+
# - **HtmlStringExtractor**: Finds translatable strings and attributes
|
|
36
|
+
# - **DomTextExtractor**: Extracts text nodes with file location references
|
|
37
|
+
# - **DomAttributeExtractor**: Extracts configurable HTML attributes
|
|
38
|
+
# - **ResultSaver**: Writes extracted strings to PO files
|
|
39
|
+
#
|
|
40
|
+
# ### 3. HTML Translation (`Jekyll::L10n::Translation`)
|
|
41
|
+
# Applies translations from PO files to HTML documents.
|
|
42
|
+
# - **Translator**: Main orchestrator for translation workflow
|
|
43
|
+
# - **HtmlTranslator**: Applies translations to HTML DOM nodes
|
|
44
|
+
# - **PageTranslationLoader**: Loads translations with compendium support
|
|
45
|
+
# - **BlockTextExtractor**: Context-aware text extraction
|
|
46
|
+
# - **LibreTranslator**: LibreTranslate API integration
|
|
47
|
+
#
|
|
48
|
+
# ### 4. PO File Management (`Jekyll::L10n::PoFile`)
|
|
49
|
+
# Manages GNU Gettext PO file operations with caching.
|
|
50
|
+
# - **Manager**: Orchestrates PO file operations and caching
|
|
51
|
+
# - **Reader**: Parses PO files into Ruby objects
|
|
52
|
+
# - **Writer**: Serializes PO entries back to file format
|
|
53
|
+
# - **Merger**: Merges translations from compendium files
|
|
54
|
+
# - **Loader**: Loads and validates PO files
|
|
55
|
+
#
|
|
56
|
+
# ### 5. Utilities (`Jekyll::L10n`)
|
|
57
|
+
# Common utilities supporting all other modules.
|
|
58
|
+
# - **TextNormalizer**: Normalizes whitespace for consistent matching
|
|
59
|
+
# - **TextValidator**: Validates extractable text
|
|
60
|
+
# - **TranslationResolver**: Selects best translation with fallback
|
|
61
|
+
# - **HtmlParser**: DOM parsing with safety guards
|
|
62
|
+
# - **PathBuilder**: Constructs locale-specific file paths
|
|
63
|
+
#
|
|
64
|
+
# ## Configuration
|
|
65
|
+
#
|
|
66
|
+
# Configuration can be specified in Jekyll `_config.yml` or in individual page front matter:
|
|
67
|
+
#
|
|
68
|
+
# ```yaml
|
|
69
|
+
# plugins:
|
|
70
|
+
# - jekyll-l10n
|
|
71
|
+
#
|
|
72
|
+
# localization_gettext:
|
|
73
|
+
# with_locales_data:
|
|
74
|
+
# locales: [es, fr, pt, de]
|
|
75
|
+
# extract_on_build: true
|
|
76
|
+
# translation:
|
|
77
|
+
# fallback: english
|
|
78
|
+
# libretranslate_enabled: true
|
|
79
|
+
# libretranslate_api_url: "https://api.libretranslate.de"
|
|
80
|
+
# logging:
|
|
81
|
+
# debug: false
|
|
82
|
+
# ```
|
|
83
|
+
#
|
|
84
|
+
# Page-level configuration overrides site-level configuration:
|
|
85
|
+
#
|
|
86
|
+
# ```markdown
|
|
87
|
+
# ---
|
|
88
|
+
# with_locales: true
|
|
89
|
+
# with_locales_data:
|
|
90
|
+
# locales: [es, fr]
|
|
91
|
+
# ---
|
|
92
|
+
# ```
|
|
93
|
+
#
|
|
94
|
+
# ## Usage Examples
|
|
95
|
+
#
|
|
96
|
+
# ### Enable localization for a page
|
|
97
|
+
#
|
|
98
|
+
# ```markdown
|
|
99
|
+
# ---
|
|
100
|
+
# title: About Us
|
|
101
|
+
# with_locales: true
|
|
102
|
+
# with_locales_data:
|
|
103
|
+
# locales: [es, fr, pt_BR]
|
|
104
|
+
# ---
|
|
105
|
+
# Content here...
|
|
106
|
+
# ```
|
|
107
|
+
#
|
|
108
|
+
# ### Use locale-aware URLs in templates
|
|
109
|
+
#
|
|
110
|
+
# ```liquid
|
|
111
|
+
# <!-- Link with current locale -->
|
|
112
|
+
# <a href="{{ '/about/' | locale_url }}">About</a>
|
|
113
|
+
#
|
|
114
|
+
# <!-- Switch to different locale -->
|
|
115
|
+
# <a href="{{ page.url | switch_locale_url: 'es' }}">Español</a>
|
|
116
|
+
#
|
|
117
|
+
# <!-- Build language switcher -->
|
|
118
|
+
# {% for locale in page.with_locales_data.locales %}
|
|
119
|
+
# <a href="{{ page.url | switch_locale_url: locale }}">
|
|
120
|
+
# {{ locale | upcase }}
|
|
121
|
+
# </a>
|
|
122
|
+
# {% endfor %}
|
|
123
|
+
# ```
|
|
124
|
+
#
|
|
125
|
+
# ## Build Pipeline
|
|
126
|
+
#
|
|
127
|
+
# 1. **Generate Phase**: Generator creates LocalizedPage instances for each locale
|
|
128
|
+
# 2. **Render Phase**: Jekyll renders all pages (including localized variants)
|
|
129
|
+
# 3. **Post-Render Phase**: Translator hook applies translations to localized pages
|
|
130
|
+
# 4. **Write Phase**: Jekyll writes HTML to disk
|
|
131
|
+
# 5. **Post-Write Phase**: PostWriteProcessor extracts new strings and updates translations
|
|
132
|
+
#
|
|
133
|
+
# ## Translation Workflow Timing
|
|
134
|
+
#
|
|
135
|
+
# The plugin applies translations in two phases:
|
|
136
|
+
#
|
|
137
|
+
# 1. **Initial Build (post_render)**: Localized pages are translated during Jekyll's
|
|
138
|
+
# render phase using existing PO files
|
|
139
|
+
# 2. **Post-Extraction (post_write)**: When new strings are extracted and PO files
|
|
140
|
+
# updated, localized HTML is immediately reprocessed with new translations
|
|
141
|
+
#
|
|
142
|
+
# This dual-phase approach ensures both incremental builds (using cached translations)
|
|
143
|
+
# and immediate translation updates (when new strings are found).
|
|
144
|
+
#
|
|
145
|
+
# ## File Locations
|
|
146
|
+
#
|
|
147
|
+
# - **Source pages**: `src/**/*.md` (marked with `with_locales: true`)
|
|
148
|
+
# - **Page-specific PO files**: `_locales/{locale}/{page_path}.po`
|
|
149
|
+
# - **Compendium PO files**: `_locales/{locale}.po` (shared translations for locale)
|
|
150
|
+
# - **Output**: `_site/[locale]/path/to/page/index.html`
|
|
151
|
+
#
|
|
152
|
+
# ## Terminology
|
|
153
|
+
#
|
|
154
|
+
# - **Source pages**: Pages marked with `with_locales: true` in front matter
|
|
155
|
+
# - **Localized pages**: Generated LocalizedPage instances (marked with `localized: true`)
|
|
156
|
+
# - **Compendium**: Locale-level shared translations at `_locales/{locale}.po`
|
|
157
|
+
# - **Page-specific PO files**: Per-page translations at `_locales/{locale}/{page_path}.po`
|
|
158
|
+
# - **String extraction**: Finding translatable text content and HTML attribute values
|
|
159
|
+
#
|
|
160
|
+
# @see Jekyll::L10n::Generator for page generation
|
|
161
|
+
# @see Jekyll::L10n::Translator for translation application
|
|
162
|
+
# @see Jekyll::L10n::Extractor for string extraction
|
|
163
|
+
# @see Jekyll::L10n::UrlFilter for liquid filters
|
|
164
|
+
|
|
165
|
+
require_relative "jekyll-l10n/version"
|
|
166
|
+
require_relative "jekyll-l10n/constants"
|
|
167
|
+
require_relative "jekyll-l10n/errors"
|
|
168
|
+
require_relative "jekyll-l10n/po_file/reader"
|
|
169
|
+
require_relative "jekyll-l10n/po_file/writer"
|
|
170
|
+
require_relative "jekyll-l10n/po_file/loader"
|
|
171
|
+
require_relative "jekyll-l10n/po_file/merger"
|
|
172
|
+
require_relative "jekyll-l10n/po_file/path_builder"
|
|
173
|
+
require_relative "jekyll-l10n/po_file/manager"
|
|
174
|
+
require_relative "jekyll-l10n/extraction/dom_text_extractor"
|
|
175
|
+
require_relative "jekyll-l10n/extraction/dom_attribute_extractor"
|
|
176
|
+
require_relative "jekyll-l10n/extraction/html_string_extractor"
|
|
177
|
+
require_relative "jekyll-l10n/extraction/config_loader"
|
|
178
|
+
require_relative "jekyll-l10n/extraction/result_saver"
|
|
179
|
+
require_relative "jekyll-l10n/extraction/compendium_translator"
|
|
180
|
+
require_relative "jekyll-l10n/extraction/compendium_merger"
|
|
181
|
+
require_relative "jekyll-l10n/extraction/logger"
|
|
182
|
+
require_relative "jekyll-l10n/extraction/extractor"
|
|
183
|
+
require_relative "jekyll-l10n/translation/html_translator"
|
|
184
|
+
require_relative "jekyll-l10n/translation/page_translation_loader"
|
|
185
|
+
require_relative "jekyll-l10n/translation/translator"
|
|
186
|
+
require_relative "jekyll-l10n/translation/block_text_extractor"
|
|
187
|
+
require_relative "jekyll-l10n/jekyll/localized_page"
|
|
188
|
+
require_relative "jekyll-l10n/jekyll/regeneration_checker"
|
|
189
|
+
require_relative "jekyll-l10n/jekyll/generator"
|
|
190
|
+
require_relative "jekyll-l10n/jekyll/post_write_processor"
|
|
191
|
+
require_relative "jekyll-l10n/jekyll/post_write_html_reprocessor"
|
|
192
|
+
require_relative "jekyll-l10n/jekyll/file_sync"
|
|
193
|
+
require_relative "jekyll-l10n/jekyll/page_locator"
|
|
194
|
+
require_relative "jekyll-l10n/jekyll/page_writer"
|
|
195
|
+
require_relative "jekyll-l10n/jekyll/url_filter"
|
|
196
|
+
require_relative "jekyll-l10n/utils/text_normalizer"
|
|
197
|
+
require_relative "jekyll-l10n/utils/translation_resolver"
|
|
198
|
+
require_relative "jekyll-l10n/utils/html_elements"
|
|
199
|
+
require_relative "jekyll-l10n/utils/html_parser"
|
|
200
|
+
require_relative "jekyll-l10n/utils/html_text_utils"
|
|
201
|
+
require_relative "jekyll-l10n/utils/debug_logger"
|
|
202
|
+
require_relative "jekyll-l10n/utils/url_path_builder"
|
|
203
|
+
require_relative "jekyll-l10n/utils/xpath_reference_generator"
|
|
204
|
+
require_relative "jekyll-l10n/utils/page_locales_config"
|
|
205
|
+
require_relative "jekyll-l10n/utils/text_validator"
|
|
206
|
+
require_relative "jekyll-l10n/utils/url_transformer"
|
|
207
|
+
require_relative "jekyll-l10n/utils/site_config_accessor"
|
|
208
|
+
require_relative "jekyll-l10n/utils/error_handler"
|
|
209
|
+
require_relative "jekyll-l10n/utils/logger_formatter"
|
|
210
|
+
require_relative "jekyll-l10n/utils/external_link_icon_preserver"
|
|
211
|
+
|
|
212
|
+
module Jekyll
|
|
213
|
+
# Main plugin namespace
|
|
214
|
+
module L10n
|
|
215
|
+
# Complete Site Localization for Jekyll
|
|
216
|
+
#
|
|
217
|
+
# Main plugin module providing GNU Gettext-based translation management
|
|
218
|
+
# with automatic page duplication, HTML string extraction, and translation
|
|
219
|
+
# application. This module serves as the entry point for the plugin and
|
|
220
|
+
# coordinates all localization functionality.
|
|
221
|
+
#
|
|
222
|
+
# Key public APIs:
|
|
223
|
+
# - {Jekyll::L10n::Generator} - Creates locale-prefixed page variants
|
|
224
|
+
# - {Jekyll::L10n::Translator} - Applies translations to localized pages
|
|
225
|
+
# - {Jekyll::L10n::Extractor} - Extracts translatable strings from HTML
|
|
226
|
+
# - {Jekyll::L10n::UrlFilter} - Liquid filters for locale-aware URLs
|
|
227
|
+
#
|
|
228
|
+
# @see lib/jekyll-l10n.rb for complete architecture overview and build pipeline
|
|
229
|
+
|
|
230
|
+
# Include Constants module to provide centralized constant definitions
|
|
231
|
+
include Constants
|
|
232
|
+
|
|
233
|
+
# Module-level convenience constants that reference the Constants module
|
|
234
|
+
MIN_TRANSLATABLE_LENGTH = Constants::MIN_TRANSLATABLE_LENGTH
|
|
235
|
+
DEFAULT_LOCALES_DIR = Constants::DEFAULT_LOCALES_DIR
|
|
236
|
+
DEFAULT_FALLBACK_MODE = Constants::DEFAULT_FALLBACK_MODE
|
|
237
|
+
DEFAULT_TRANSLATABLE_ATTRIBUTES = Constants::DEFAULT_TRANSLATABLE_ATTRIBUTES
|
|
238
|
+
|
|
239
|
+
# Transform URLs in HTML to include locale prefix.
|
|
240
|
+
#
|
|
241
|
+
# Walks the HTML document and transforms relative URLs to be locale-aware
|
|
242
|
+
# by adding locale prefixes. Useful for translating internal links in
|
|
243
|
+
# localized pages.
|
|
244
|
+
#
|
|
245
|
+
# @param html [String] HTML content to transform
|
|
246
|
+
# @param locale [String] Locale code for URL prefix (e.g., 'es', 'fr')
|
|
247
|
+
# @param baseurl [String] Base URL for relative URL transformation
|
|
248
|
+
# @return [String] HTML with transformed URLs
|
|
249
|
+
# @example
|
|
250
|
+
# html_with_locale_urls = Jekyll::L10n.transform(html, 'es', '/mysite')
|
|
251
|
+
def self.transform(html, locale, baseurl)
|
|
252
|
+
UrlTransformer.transform(html, locale, baseurl)
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
Jekyll::Hooks.register :pages, :post_render do |page|
|
|
258
|
+
next unless page.data["localized"] == true
|
|
259
|
+
|
|
260
|
+
translator = Jekyll::L10n::Translator.new(page)
|
|
261
|
+
translator.translate
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
Jekyll::Hooks.register :site, :post_write do |site|
|
|
265
|
+
Jekyll::L10n::PostWriteProcessor.new(site).process_localizations
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
Liquid::Template.register_filter(Jekyll::L10n::UrlFilter)
|