jekyll-l10n 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +94 -0
- data/lib/jekyll-l10n/constants.rb +136 -0
- data/lib/jekyll-l10n/errors.rb +60 -0
- data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
- data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
- data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
- data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
- data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
- data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
- data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
- data/lib/jekyll-l10n/extraction/logger.rb +48 -0
- data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
- data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
- data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
- data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
- data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
- data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
- data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
- data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
- data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
- data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
- data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
- data/lib/jekyll-l10n/po_file/loader.rb +64 -0
- data/lib/jekyll-l10n/po_file/manager.rb +160 -0
- data/lib/jekyll-l10n/po_file/merger.rb +80 -0
- data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
- data/lib/jekyll-l10n/po_file/reader.rb +518 -0
- data/lib/jekyll-l10n/po_file/writer.rb +232 -0
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
- data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
- data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
- data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
- data/lib/jekyll-l10n/translation/translator.rb +179 -0
- data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
- data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
- data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
- data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
- data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
- data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
- data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
- data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
- data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
- data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
- data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
- data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
- data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
- data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
- data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
- data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
- data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
- data/lib/jekyll-l10n/version.rb +10 -0
- data/lib/jekyll-l10n.rb +268 -0
- metadata +200 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../translation/translator"
|
|
4
|
+
require_relative "page_writer"
|
|
5
|
+
require_relative "localized_page_mapper"
|
|
6
|
+
require_relative "../utils/url_path_builder"
|
|
7
|
+
require_relative "../utils/logger_formatter"
|
|
8
|
+
|
|
9
|
+
module Jekyll
|
|
10
|
+
module L10n
|
|
11
|
+
# File Synchronization Manager - Syncs localized pages and applies translations
|
|
12
|
+
#
|
|
13
|
+
# This class manages the synchronization of generated HTML content to localized page variants
|
|
14
|
+
# and applies translations to each variant. It works in coordination with Jekyll's file
|
|
15
|
+
# writing system to ensure all locale-specific content is properly translated.
|
|
16
|
+
#
|
|
17
|
+
# The sync process:
|
|
18
|
+
# 1. Builds a map of localized pages grouped by original URL
|
|
19
|
+
# 2. For each original page, finds its localized variants
|
|
20
|
+
# 3. Reads the original HTML file content
|
|
21
|
+
# 4. Applies translations to each localized variant
|
|
22
|
+
# 5. Writes the translated HTML to disk
|
|
23
|
+
#
|
|
24
|
+
# This class is typically used during the post-write phase to handle final translation
|
|
25
|
+
# application after Jekyll has written all files to disk.
|
|
26
|
+
#
|
|
27
|
+
# @example Usage (typically internal)
|
|
28
|
+
# file_reader = SomeFileReader.new
|
|
29
|
+
# page_writer = PageWriter.new
|
|
30
|
+
# syncer = LocalizationFileSync.new(site, file_reader, page_writer)
|
|
31
|
+
# syncer.sync
|
|
32
|
+
#
|
|
33
|
+
class LocalizationFileSync
|
|
34
|
+
# Initialize the file synchronization manager
|
|
35
|
+
#
|
|
36
|
+
# @param site [Jekyll::Site] The Jekyll site object
|
|
37
|
+
# @param file_reader [Object] A file reader object with a `read(page)` method
|
|
38
|
+
# @param page_writer [PageWriter] A page writer object to write translated HTML
|
|
39
|
+
def initialize(site, file_reader, page_writer)
|
|
40
|
+
@site = site
|
|
41
|
+
@file_reader = file_reader
|
|
42
|
+
@page_writer = page_writer
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Synchronize and translate all localized pages
|
|
46
|
+
#
|
|
47
|
+
# Builds a map of localized pages, reads original HTML content, and applies
|
|
48
|
+
# translations to each locale variant. Logs the process for debugging.
|
|
49
|
+
#
|
|
50
|
+
# @return [void]
|
|
51
|
+
def sync
|
|
52
|
+
Jekyll.logger.info "Localization", "Starting file sync process"
|
|
53
|
+
localized_pages_by_url = LocalizedPageMapper.build_map(@site)
|
|
54
|
+
count = localized_pages_by_url.keys.length
|
|
55
|
+
Jekyll.logger.info "Localization",
|
|
56
|
+
"Found #{count} pages with localized versions"
|
|
57
|
+
sync_files_and_apply_translations(localized_pages_by_url)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def sync_files_and_apply_translations(localized_pages_by_url)
|
|
63
|
+
@site.pages.each do |page|
|
|
64
|
+
with_locales = page.data["with_locales"]
|
|
65
|
+
is_localized = page.data["localized"]
|
|
66
|
+
log_page_check(page.url, with_locales, is_localized)
|
|
67
|
+
next unless page.data["with_locales"] == true && page.data["localized"] != true
|
|
68
|
+
|
|
69
|
+
LoggerFormatter.debug_if_enabled("FileSync",
|
|
70
|
+
"Processing page for translation: #{page.url}")
|
|
71
|
+
original_content = @file_reader.read(page)
|
|
72
|
+
next unless original_content
|
|
73
|
+
|
|
74
|
+
apply_translations_to_localized_pages(page, localized_pages_by_url, original_content)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def apply_translations_to_localized_pages(page, localized_pages_by_url, original_content)
|
|
79
|
+
return unless localized_pages_by_url[page.url]
|
|
80
|
+
|
|
81
|
+
localized_pages_by_url[page.url].each do |localized_page|
|
|
82
|
+
translate_and_write_localized_page(localized_page, original_content)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def translate_and_write_localized_page(localized_page, original_content)
|
|
87
|
+
content_to_translate = if localized_page.output && !localized_page.output.empty?
|
|
88
|
+
localized_page.output
|
|
89
|
+
else
|
|
90
|
+
original_content
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
localized_page.instance_variable_set(:@output, content_to_translate)
|
|
94
|
+
|
|
95
|
+
locale = localized_page.data["locale"]
|
|
96
|
+
baseurl = @site.config["baseurl"]
|
|
97
|
+
|
|
98
|
+
translator = Translator.new(localized_page)
|
|
99
|
+
|
|
100
|
+
@page_writer.translate_and_write(localized_page, translator, locale, baseurl)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def log_page_check(url, with_locales, is_localized)
|
|
104
|
+
LoggerFormatter.debug_if_enabled("FileSync",
|
|
105
|
+
"Checking page: #{url} (with_locales: #{with_locales}, " \
|
|
106
|
+
"localized: #{is_localized})")
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "regeneration_checker"
|
|
4
|
+
|
|
5
|
+
module Jekyll
|
|
6
|
+
module L10n
|
|
7
|
+
# Localized Page Generator - Creates locale-prefixed copies of pages during Jekyll build
|
|
8
|
+
#
|
|
9
|
+
# This is the main Jekyll integration point for the localization plugin. It runs as a
|
|
10
|
+
# low-priority generator during the Jekyll build process, creating duplicate pages for each
|
|
11
|
+
# configured locale. Each localized page inherits content and metadata from the source page
|
|
12
|
+
# but includes locale-prefixed URLs.
|
|
13
|
+
#
|
|
14
|
+
# Key responsibilities:
|
|
15
|
+
# - Identify pages marked for localization (with_locales: true)
|
|
16
|
+
# - Extract configured locales from page front matter
|
|
17
|
+
# - Create LocalizedPage instances for each locale variant
|
|
18
|
+
# - Optimize regeneration by skipping unchanged pages
|
|
19
|
+
#
|
|
20
|
+
# The generator respects Jekyll's incremental build mode and only regenerates pages when
|
|
21
|
+
# the source page or configuration has changed, improving rebuild performance.
|
|
22
|
+
#
|
|
23
|
+
# @example Usage in Jekyll site configuration
|
|
24
|
+
# # _config.yml
|
|
25
|
+
# plugins:
|
|
26
|
+
# - jekyll-l10n
|
|
27
|
+
#
|
|
28
|
+
# @example Marking pages for localization
|
|
29
|
+
# # src/page.md
|
|
30
|
+
# ---
|
|
31
|
+
# with_locales: true
|
|
32
|
+
# with_locales_data:
|
|
33
|
+
# locales: [es, fr, pt, de]
|
|
34
|
+
# ---
|
|
35
|
+
#
|
|
36
|
+
class Generator < Jekyll::Generator
|
|
37
|
+
priority :low
|
|
38
|
+
|
|
39
|
+
# Generate localized pages for all marked pages in the Jekyll site
|
|
40
|
+
#
|
|
41
|
+
# This method is automatically called by Jekyll during the generate phase.
|
|
42
|
+
# It iterates through all pages in the site, identifies those marked for localization,
|
|
43
|
+
# and creates locale-specific variants.
|
|
44
|
+
#
|
|
45
|
+
# @param site [Jekyll::Site] The Jekyll site object containing pages to process
|
|
46
|
+
# @return [void]
|
|
47
|
+
# @see LocalizedPage for details on page structure
|
|
48
|
+
def generate(site)
|
|
49
|
+
Jekyll.logger.info "Localization", "Generating localized pages..."
|
|
50
|
+
generate_localized_pages(site)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def generate_localized_pages(site)
|
|
56
|
+
original_pages = site.pages.dup
|
|
57
|
+
checker = Jekyll::L10n::RegenerationChecker.new(site)
|
|
58
|
+
|
|
59
|
+
original_pages.each do |page|
|
|
60
|
+
next unless should_localize_page?(page)
|
|
61
|
+
|
|
62
|
+
locales = get_page_locales(page)
|
|
63
|
+
|
|
64
|
+
locales.each do |locale|
|
|
65
|
+
next unless valid_locale_code?(locale)
|
|
66
|
+
|
|
67
|
+
if checker.should_regenerate?(page, locale)
|
|
68
|
+
localized_page = create_localized_page(site, page, locale)
|
|
69
|
+
site.pages << localized_page
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def should_localize_page?(page)
|
|
76
|
+
return false if page.data["localized"] == true
|
|
77
|
+
|
|
78
|
+
page.data["with_locales"] == true
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def get_page_locales(page)
|
|
82
|
+
data = page.data["with_locales_data"]
|
|
83
|
+
|
|
84
|
+
locales = if data.is_a?(Hash)
|
|
85
|
+
data["locales"] || []
|
|
86
|
+
else
|
|
87
|
+
[]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
locales.select { |l| valid_locale_code?(l) }
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def create_localized_page(site, page, locale)
|
|
94
|
+
new_dir = "/#{locale}#{page.dir}"
|
|
95
|
+
LocalizedPage.new(:site => site, :base => site.source, :dir => new_dir, :page => page,
|
|
96
|
+
:locale => locale)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def valid_locale_code?(locale)
|
|
100
|
+
return false unless locale.is_a?(String)
|
|
101
|
+
|
|
102
|
+
locale.match?(%r!^[a-z]{2}(_[A-Z]{2})?$!)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Localized Page - Represents a Jekyll page in a specific locale
|
|
6
|
+
#
|
|
7
|
+
# LocalizedPage extends Jekyll::Page to represent a page content variant for a
|
|
8
|
+
# specific language.
|
|
9
|
+
# It inherits content and metadata from the source page while maintaining locale-specific URLs
|
|
10
|
+
# and data attributes. The locale is included as a URL prefix (e.g., /es/path/to/page/).
|
|
11
|
+
#
|
|
12
|
+
# Key responsibilities:
|
|
13
|
+
# - Maintain both original and localized URLs and paths
|
|
14
|
+
# - Inherit content and front matter from source page
|
|
15
|
+
# - Set locale and lang attributes for Liquid templates
|
|
16
|
+
# - Generate locale-prefixed output paths
|
|
17
|
+
#
|
|
18
|
+
# During the post-render phase, LocalizedPage content is translated using translations from
|
|
19
|
+
# PO files. The translation process happens in the post_render Jekyll hook.
|
|
20
|
+
#
|
|
21
|
+
# @example Creating a localized page variant
|
|
22
|
+
# page = LocalizedPage.new(site: site, base: site.source, dir: '/es',
|
|
23
|
+
# page: source_page, locale: 'es')
|
|
24
|
+
# # Resulting URL: /es/path/to/page/
|
|
25
|
+
# # Resulting locale: es
|
|
26
|
+
#
|
|
27
|
+
class LocalizedPage < Jekyll::Page
|
|
28
|
+
# @!attribute [rw] locale
|
|
29
|
+
# The BCP 47 locale code for this page variant (e.g., 'es', 'pt_BR')
|
|
30
|
+
# @return [String]
|
|
31
|
+
# @!attribute [rw] lang
|
|
32
|
+
# Alias for locale, used in Liquid templates
|
|
33
|
+
# @return [String]
|
|
34
|
+
# @!attribute [rw] original_page
|
|
35
|
+
# Reference to the source page before localization
|
|
36
|
+
# @return [Jekyll::Page]
|
|
37
|
+
# @!attribute [rw] original_url
|
|
38
|
+
# The URL of the source page before locale prefix was added
|
|
39
|
+
# @return [String]
|
|
40
|
+
attr_accessor :locale, :lang, :original_page, :original_url
|
|
41
|
+
|
|
42
|
+
# Initialize a new localized page variant
|
|
43
|
+
#
|
|
44
|
+
# Creates a page instance that represents the given page in a specific locale.
|
|
45
|
+
# Copies all essential attributes from the source page and sets up locale-specific
|
|
46
|
+
# metadata and URLs.
|
|
47
|
+
#
|
|
48
|
+
# Note: This method copies the @output attribute from the source page. In production,
|
|
49
|
+
# @output is typically nil at this point (during the Generate phase) and gets populated
|
|
50
|
+
# by Jekyll during the Render phase. This initialization ensures LocalizedPage has the
|
|
51
|
+
# same @output as its source page, which may be useful in test scenarios or if
|
|
52
|
+
# LocalizedPage is created after rendering.
|
|
53
|
+
#
|
|
54
|
+
# @param site [Jekyll::Site] (keyword) The Jekyll site object
|
|
55
|
+
# @param base [String] (keyword) The base directory (typically site.source)
|
|
56
|
+
# @param dir [String] (keyword) The directory path (will be prefixed with locale,
|
|
57
|
+
# e.g., '/es/path')
|
|
58
|
+
# @param page [Jekyll::Page] (keyword) The source page to localize
|
|
59
|
+
# @param locale [String] (keyword) The BCP 47 locale code (e.g., 'es', 'pt_BR', 'zh_CN')
|
|
60
|
+
# @return [LocalizedPage] A new localized page instance
|
|
61
|
+
# @note All parameters are keyword arguments and must be passed by name
|
|
62
|
+
def initialize(site:, base:, dir:, page:, locale:) # rubocop:disable Metrics/ParameterLists
|
|
63
|
+
@site = site
|
|
64
|
+
@base = base
|
|
65
|
+
@dir = dir
|
|
66
|
+
@name = page.name
|
|
67
|
+
@ext = page.ext
|
|
68
|
+
@output_ext = page.output_ext
|
|
69
|
+
|
|
70
|
+
@locale = locale
|
|
71
|
+
@lang = locale
|
|
72
|
+
@original_url = page.url
|
|
73
|
+
@original_page = page
|
|
74
|
+
@path = page.path
|
|
75
|
+
@relative_path = page.relative_path
|
|
76
|
+
@content = page.content
|
|
77
|
+
@output = page.output
|
|
78
|
+
|
|
79
|
+
setup_localized_data(page, locale)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Get the URL placeholder substitutions for this localized page
|
|
83
|
+
#
|
|
84
|
+
# Overrides Jekyll::Page's url_placeholders to include the locale prefix in path generation.
|
|
85
|
+
# This ensures that Jekyll's permalink logic respects the locale prefix.
|
|
86
|
+
#
|
|
87
|
+
# @return [Hash<Symbol, String>] Hash of placeholders with locale-prefixed path
|
|
88
|
+
def url_placeholders
|
|
89
|
+
placeholders = super
|
|
90
|
+
placeholders[:path] = "/#{@locale}#{placeholders[:path]}" if placeholders[:path]
|
|
91
|
+
placeholders
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Get the full URL for this localized page
|
|
95
|
+
#
|
|
96
|
+
# Returns the page's URL with a locale prefix. For example, if the source page URL is
|
|
97
|
+
# '/path/to/page/', this returns '/es/path/to/page/' for locale 'es'.
|
|
98
|
+
# The URL always ends with a trailing slash for consistency.
|
|
99
|
+
#
|
|
100
|
+
# @return [String] The locale-prefixed URL (e.g., '/es/path/to/page/')
|
|
101
|
+
def url
|
|
102
|
+
url = "/#{@locale}#{@original_url}"
|
|
103
|
+
url += "/" unless url.end_with?("/")
|
|
104
|
+
url
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Get the destination file path for this localized page
|
|
108
|
+
#
|
|
109
|
+
# Computes the full file system path where this page's HTML will be written.
|
|
110
|
+
# For a page with URL '/es/path/to/page/', this returns 'dest/es/path/to/page/index.html'.
|
|
111
|
+
#
|
|
112
|
+
# @param dest [String, nil] The destination directory (defaults to nil, which uses
|
|
113
|
+
# site config destination)
|
|
114
|
+
# @return [String] The full file system path for the output HTML file
|
|
115
|
+
def destination(dest = nil)
|
|
116
|
+
dest ||= @site.config["destination"]
|
|
117
|
+
url_path = url
|
|
118
|
+
url_path += "/" unless url_path.end_with?("/")
|
|
119
|
+
"#{dest}#{url_path}index.html"
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Convert this localized page to Liquid template data
|
|
123
|
+
#
|
|
124
|
+
# Overrides Jekyll::Page's to_liquid to include locale and lang in template context.
|
|
125
|
+
# This makes the page's locale available to Liquid templates for conditional rendering
|
|
126
|
+
# and URL generation.
|
|
127
|
+
#
|
|
128
|
+
# @return [Hash<String, Object>] Hash of page data for Liquid templates, including locale
|
|
129
|
+
def to_liquid
|
|
130
|
+
computed_url = url
|
|
131
|
+
result = super
|
|
132
|
+
result["url"] = computed_url
|
|
133
|
+
result["locale"] = @locale
|
|
134
|
+
result["lang"] = @lang
|
|
135
|
+
result
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
private
|
|
139
|
+
|
|
140
|
+
def setup_localized_data(page, locale)
|
|
141
|
+
@data = page.data.dup
|
|
142
|
+
@data["locale"] = locale
|
|
143
|
+
@data["lang"] = locale
|
|
144
|
+
@data["localized"] = true
|
|
145
|
+
@data["original_url"] = @original_url
|
|
146
|
+
@data["original_permalink"] = page.data["permalink"] if page.data["permalink"]
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../utils/logger_formatter"
|
|
4
|
+
|
|
5
|
+
module Jekyll
|
|
6
|
+
module L10n
|
|
7
|
+
# Localized Page Mapper - Indexes generated localized pages by their original URL
|
|
8
|
+
#
|
|
9
|
+
# This module builds a mapping of original URLs to their localized page variants.
|
|
10
|
+
# It scans all pages in the Jekyll site, identifies those marked as localized,
|
|
11
|
+
# and groups them by their original_url for quick lookup during processing.
|
|
12
|
+
#
|
|
13
|
+
# The map structure is: { original_url => [localized_page_1, localized_page_2, ...] }
|
|
14
|
+
# This enables efficient matching of localized variants back to their source pages.
|
|
15
|
+
#
|
|
16
|
+
# @example Usage
|
|
17
|
+
# localized_map = LocalizedPageMapper.build_map(site)
|
|
18
|
+
# # => { "/about/" => [<LocalizedPage locale="es">, <LocalizedPage locale="fr">] }
|
|
19
|
+
#
|
|
20
|
+
module LocalizedPageMapper
|
|
21
|
+
extend self
|
|
22
|
+
|
|
23
|
+
# Build a mapping of original URLs to localized page variants
|
|
24
|
+
#
|
|
25
|
+
# Scans all pages in the Jekyll site, finds those marked with `localized: true`,
|
|
26
|
+
# and groups them by their original_url. Returns a hash with original URLs as keys
|
|
27
|
+
# and arrays of LocalizedPage objects as values.
|
|
28
|
+
#
|
|
29
|
+
# @param site [Jekyll::Site] The Jekyll site object with all generated pages
|
|
30
|
+
# @return [Hash<String, Array<LocalizedPage>>] Map of original URL to localized variants
|
|
31
|
+
# @example
|
|
32
|
+
# pages_map = LocalizedPageMapper.build_map(site)
|
|
33
|
+
# spanish_pages = pages_map["/about/"] # => [<LocalizedPage locale="es">]
|
|
34
|
+
def build_map(site)
|
|
35
|
+
localized_pages = {}
|
|
36
|
+
site.pages.each do |page|
|
|
37
|
+
next unless page.data["localized"] == true
|
|
38
|
+
|
|
39
|
+
original_url = page.data["original_url"]
|
|
40
|
+
LoggerFormatter.debug_if_enabled(
|
|
41
|
+
"LocalizedPageMapper",
|
|
42
|
+
"Found localized page: #{page.url} (original: #{original_url})"
|
|
43
|
+
)
|
|
44
|
+
localized_pages[original_url] ||= []
|
|
45
|
+
localized_pages[original_url] << page
|
|
46
|
+
end
|
|
47
|
+
localized_pages
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Locates original (non-localized) pages by URL.
|
|
6
|
+
#
|
|
7
|
+
# OriginalPageLocator builds an index of original pages in the site and
|
|
8
|
+
# provides fast lookup by URL. This is used during translation to find the
|
|
9
|
+
# original page configuration when processing localized variants.
|
|
10
|
+
#
|
|
11
|
+
# Key responsibilities:
|
|
12
|
+
# * Index original pages by URL
|
|
13
|
+
# * Exclude localized page variants from index
|
|
14
|
+
# * Provide O(1) lookup by URL
|
|
15
|
+
# * Lazily build index on first use
|
|
16
|
+
#
|
|
17
|
+
# @example
|
|
18
|
+
# locator = OriginalPageLocator.new(site)
|
|
19
|
+
# original_page = locator.find_by_url('/docs/index.html')
|
|
20
|
+
class OriginalPageLocator
|
|
21
|
+
# Initialize a new OriginalPageLocator.
|
|
22
|
+
#
|
|
23
|
+
# @param site [Jekyll::Site] Jekyll site object
|
|
24
|
+
def initialize(site)
|
|
25
|
+
@site = site
|
|
26
|
+
@index = nil
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Find an original page by URL.
|
|
30
|
+
#
|
|
31
|
+
# Builds index on first call, then uses cached index for subsequent lookups.
|
|
32
|
+
# Returns nil if page not found.
|
|
33
|
+
#
|
|
34
|
+
# @param url [String] Page URL (e.g., '/docs/index.html')
|
|
35
|
+
# @return [Jekyll::Page, nil] Original page if found, nil otherwise
|
|
36
|
+
def find_by_url(url)
|
|
37
|
+
build_index unless @index
|
|
38
|
+
@index[url]
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# Build index of original pages by URL.
|
|
44
|
+
#
|
|
45
|
+
# Indexes all pages that are not marked as localized, allowing fast
|
|
46
|
+
# lookup by URL without iterating through site.pages each time.
|
|
47
|
+
#
|
|
48
|
+
# @return [void]
|
|
49
|
+
def build_index
|
|
50
|
+
@index = {}
|
|
51
|
+
@site.pages.each do |page|
|
|
52
|
+
next if page.data["localized"] == true
|
|
53
|
+
|
|
54
|
+
@index[page.url] = page
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../utils/url_path_builder"
|
|
4
|
+
require_relative "../translation/html_translator"
|
|
5
|
+
require_relative "../utils/file_operations"
|
|
6
|
+
require_relative "../utils/logger_formatter"
|
|
7
|
+
|
|
8
|
+
module Jekyll
|
|
9
|
+
module L10n
|
|
10
|
+
# Writes localized pages with metadata to disk.
|
|
11
|
+
#
|
|
12
|
+
# LocalizedPageWriter translates page output and updates locale metadata
|
|
13
|
+
# (html lang attribute) before writing localized pages to the build output
|
|
14
|
+
# directory. It ensures proper directory structure and updates locale information
|
|
15
|
+
# in the HTML tag.
|
|
16
|
+
#
|
|
17
|
+
# Key responsibilities:
|
|
18
|
+
# * Apply translations to localized page output
|
|
19
|
+
# * Update HTML lang attribute to target locale
|
|
20
|
+
# * Create necessary directory structure
|
|
21
|
+
# * Write localized HTML to disk with UTF-8 encoding
|
|
22
|
+
# * Clean up auto-inserted meta charset tags
|
|
23
|
+
# * Handle parse errors gracefully
|
|
24
|
+
#
|
|
25
|
+
# @example
|
|
26
|
+
# writer = LocalizedPageWriter.new('_site')
|
|
27
|
+
# writer.translate_and_write(page, translator, 'es', '/baseurl')
|
|
28
|
+
# # Localized page written to disk with translations and lang attribute updated
|
|
29
|
+
class LocalizedPageWriter
|
|
30
|
+
# Initialize a new LocalizedPageWriter.
|
|
31
|
+
#
|
|
32
|
+
# @param dest [String] Destination build directory
|
|
33
|
+
def initialize(dest)
|
|
34
|
+
@dest = dest
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Translate page content and write to disk.
|
|
38
|
+
#
|
|
39
|
+
# Applies translator, updates HTML lang attribute with locale, ensures
|
|
40
|
+
# output directory exists, and writes translated HTML to file.
|
|
41
|
+
#
|
|
42
|
+
# @param localized_page [Jekyll::Page] Localized page to write
|
|
43
|
+
# @param translator [Object] Translator object with translate method
|
|
44
|
+
# @param locale [String] Target locale code
|
|
45
|
+
# @param _baseurl [String] Base URL (passed for compatibility, not used)
|
|
46
|
+
# @return [void]
|
|
47
|
+
def translate_and_write(localized_page, translator, locale, _baseurl)
|
|
48
|
+
log_debug_info(localized_page, locale, "start")
|
|
49
|
+
translator.translate
|
|
50
|
+
|
|
51
|
+
log_debug_info(localized_page, locale, "after translate")
|
|
52
|
+
localized_page.output = fix_locale_metadata(localized_page.output, locale)
|
|
53
|
+
|
|
54
|
+
log_debug_info(localized_page, locale, "after fix_locale")
|
|
55
|
+
write_localized_page(localized_page)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def log_debug_info(localized_page, locale, phase)
|
|
61
|
+
LoggerFormatter.debug_if_enabled("PageWriter",
|
|
62
|
+
"#{phase}: URL=#{localized_page.url}, locale=#{locale}, " \
|
|
63
|
+
"output_size=#{localized_page.output&.length || 0}")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def write_localized_page(localized_page)
|
|
67
|
+
localized_file_path = UrlPathBuilder.url_to_file_path(localized_page.url)
|
|
68
|
+
localized_file = File.join(@dest, localized_file_path)
|
|
69
|
+
FileOperations.ensure_directory(localized_file)
|
|
70
|
+
|
|
71
|
+
LoggerFormatter.debug_if_enabled("PageWriter", "Writing to #{localized_file_path}")
|
|
72
|
+
FileOperations.write_utf8(localized_file, localized_page.output)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def fix_locale_metadata(html, locale)
|
|
76
|
+
return html unless html && locale
|
|
77
|
+
|
|
78
|
+
doc = parse_html(html)
|
|
79
|
+
update_html_lang_attribute(doc, locale)
|
|
80
|
+
result = serialize_html(doc)
|
|
81
|
+
cleanup_auto_inserted_meta_tag(result)
|
|
82
|
+
rescue StandardError => e
|
|
83
|
+
Jekyll.logger.error "Localization",
|
|
84
|
+
"Failed to parse HTML for locale #{locale}: #{e.message}"
|
|
85
|
+
html
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def parse_html(html)
|
|
89
|
+
# CRITICAL: Nokogiri::HTML auto-inserts <meta http-equiv="Content-Type">
|
|
90
|
+
# We parse with HTML to access the <html> tag, then remove the auto-inserted
|
|
91
|
+
# meta tag using regex post-processing.
|
|
92
|
+
# See: spec/regression/nokogiri_meta_tag_spec.rb
|
|
93
|
+
Nokogiri::HTML(html)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def update_html_lang_attribute(doc, locale)
|
|
97
|
+
html_tag = doc.at("html")
|
|
98
|
+
|
|
99
|
+
if html_tag
|
|
100
|
+
html_tag["lang"] = locale
|
|
101
|
+
else
|
|
102
|
+
Jekyll.logger.warn("Localization",
|
|
103
|
+
"No <html> tag found for locale #{locale}, skipping lang attribute")
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def serialize_html(doc)
|
|
108
|
+
doc.to_html
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def cleanup_auto_inserted_meta_tag(result)
|
|
112
|
+
# Remove the auto-inserted meta tag by libxml2 during HTML serialization
|
|
113
|
+
# Matches: <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
114
|
+
# See: spec/regression/nokogiri_meta_tag_spec.rb
|
|
115
|
+
pattern = %r!<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n?!
|
|
116
|
+
result.gsub(pattern, "")
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|