jekyll-l10n 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-l10n/constants.rb +2 -2
- data/lib/jekyll-l10n/extraction/extractor.rb +4 -1
- data/lib/jekyll-l10n/po_file/path_builder.rb +22 -2
- data/lib/jekyll-l10n/translation/libre_translator.rb +13 -5
- data/lib/jekyll-l10n/utils/site_config_accessor.rb +1 -1
- data/lib/jekyll-l10n.rb +7 -3
- metadata +43 -5
- data/lib/jekyll-l10n/jekyll/file_sync.rb +0 -110
- data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +0 -51
- data/lib/jekyll-l10n/jekyll/page_locator.rb +0 -59
- data/lib/jekyll-l10n/jekyll/page_writer.rb +0 -120
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d59b40394ccd291d468bfd045b6b53ffd71204d6b42575f232e7159316c14561
|
|
4
|
+
data.tar.gz: c3c0bc5a22a1ce203a7623be5217f8477b9d4b57f39918ca221a10f693bbd053
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b022fa06243f0d8150be0f53bcbd9861751634fb1d3af4d5cd16ae36bf11cbfb19fd3e86e3af63cad783fea3d21f56338ad4f14cde2fb68165ffe6b9146e12ad
|
|
7
|
+
data.tar.gz: 74a6fa21f10fe87de0c8f891f9eedb77d36d0df2d50ccd23a794cc20d3a127c183ad6edd35b6284189beb7d0472928ee0bbea8a075fbdfd1abd016586354dbbc
|
|
@@ -114,8 +114,8 @@ module Jekyll
|
|
|
114
114
|
|
|
115
115
|
# Default behavior when LibreTranslate API returns an error
|
|
116
116
|
# If true, translation stops immediately. If false, continues with remaining entries.
|
|
117
|
-
# @return [Boolean]
|
|
118
|
-
DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR =
|
|
117
|
+
# @return [Boolean] false
|
|
118
|
+
DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR = false
|
|
119
119
|
|
|
120
120
|
# Default interval for logging LibreTranslate translation progress
|
|
121
121
|
# Progress is logged every N entries. Set to 0 to disable.
|
|
@@ -6,6 +6,7 @@ require_relative "logger"
|
|
|
6
6
|
require_relative "html_string_extractor"
|
|
7
7
|
require_relative "../utils/file_operations"
|
|
8
8
|
require_relative "../utils/site_config_accessor"
|
|
9
|
+
require_relative "../utils/error_handler"
|
|
9
10
|
|
|
10
11
|
module Jekyll
|
|
11
12
|
module L10n
|
|
@@ -99,7 +100,9 @@ module Jekyll
|
|
|
99
100
|
config = find_libretranslate_config
|
|
100
101
|
return unless config
|
|
101
102
|
|
|
102
|
-
|
|
103
|
+
ErrorHandler.handle_with_logging("machine translation") do
|
|
104
|
+
@result_saver.translate_compendia(config)
|
|
105
|
+
end
|
|
103
106
|
end
|
|
104
107
|
|
|
105
108
|
def process_file(file_path)
|
|
@@ -12,7 +12,8 @@ module Jekyll
|
|
|
12
12
|
# Key responsibilities:
|
|
13
13
|
# * Build paths for compendium PO files
|
|
14
14
|
# * Build paths for page-specific PO files
|
|
15
|
-
# * Handle file path normalization
|
|
15
|
+
# * Handle file path normalization and validation
|
|
16
|
+
# * Prevent path traversal attacks
|
|
16
17
|
#
|
|
17
18
|
# @example
|
|
18
19
|
# compendium = PoPathBuilder.build('_site', '_locales', 'es', nil)
|
|
@@ -25,16 +26,35 @@ module Jekyll
|
|
|
25
26
|
# For compendium (page_path nil): {source}/{locales_dir}/{locale}.po
|
|
26
27
|
# For page-specific: {source}/{locales_dir}/{locale}/{page_path}.po
|
|
27
28
|
#
|
|
29
|
+
# Validates that page_path doesn't escape the base directory using path
|
|
30
|
+
# traversal techniques (e.g., ../../../etc/passwd).
|
|
31
|
+
#
|
|
28
32
|
# @param source [String] Site source directory
|
|
29
33
|
# @param locales_dir [String] Locales directory name (e.g., '_locales')
|
|
30
34
|
# @param locale [String] Locale code (e.g., 'es', 'fr')
|
|
31
35
|
# @param page_path [String, nil] Page path for page-specific file, nil for compendium
|
|
32
36
|
# @return [String] Full path to PO file
|
|
37
|
+
# @raise [Jekyll::L10n::Errors::PoFileError] if page_path attempts to escape base directory
|
|
33
38
|
def self.build(source, locales_dir, locale, page_path)
|
|
34
39
|
if page_path.nil?
|
|
35
40
|
File.join(source, locales_dir, "#{locale}.po")
|
|
36
41
|
else
|
|
37
|
-
|
|
42
|
+
# Build the normal path
|
|
43
|
+
normal_path = File.join(source, locales_dir, locale, "#{page_path}.po")
|
|
44
|
+
|
|
45
|
+
# Validate against path traversal using expanded paths
|
|
46
|
+
base_path = File.expand_path(File.join(source, locales_dir))
|
|
47
|
+
expanded_path = File.expand_path(normal_path)
|
|
48
|
+
|
|
49
|
+
# Security: Validate path stays within base directory
|
|
50
|
+
valid_path = expanded_path.start_with?(base_path + File::SEPARATOR) ||
|
|
51
|
+
expanded_path == base_path
|
|
52
|
+
unless valid_path
|
|
53
|
+
raise Jekyll::L10n::Errors::PoFileError,
|
|
54
|
+
"Path traversal attempt detected in page_path: #{page_path}"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
normal_path
|
|
38
58
|
end
|
|
39
59
|
end
|
|
40
60
|
end
|
|
@@ -198,6 +198,18 @@ module Jekyll
|
|
|
198
198
|
def make_api_request(text, target_locale)
|
|
199
199
|
uri = URI("#{@config.libretranslate_api_url}/translate")
|
|
200
200
|
Jekyll.logger.debug "LibreTranslator", "Requesting #{uri}"
|
|
201
|
+
request = build_request(uri, text, target_locale)
|
|
202
|
+
response = http_config.request(request)
|
|
203
|
+
Jekyll.logger.debug "LibreTranslator", "Response code: #{response.code}"
|
|
204
|
+
handle_api_response(response)
|
|
205
|
+
rescue Net::ReadTimeout, Net::OpenTimeout => e
|
|
206
|
+
raise TranslationError, "API timeout: #{e.message}"
|
|
207
|
+
rescue Errno::ECONNREFUSED, Errno::ETIMEDOUT, SocketError => e
|
|
208
|
+
Jekyll.logger.warn "LibreTranslator", "#{e.class.name}: #{e.message}"
|
|
209
|
+
nil
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def build_request(uri, text, target_locale)
|
|
201
213
|
request = Net::HTTP::Post.new(uri, api_headers)
|
|
202
214
|
request.body = {
|
|
203
215
|
:q => text,
|
|
@@ -205,11 +217,7 @@ module Jekyll
|
|
|
205
217
|
:target => target_locale,
|
|
206
218
|
:format => @config.libretranslate_format,
|
|
207
219
|
}.to_json
|
|
208
|
-
|
|
209
|
-
Jekyll.logger.debug "LibreTranslator", "Response code: #{response.code}"
|
|
210
|
-
handle_api_response(response)
|
|
211
|
-
rescue Net::ReadTimeout, Net::OpenTimeout => e
|
|
212
|
-
raise TranslationError, "API timeout: #{e.message}"
|
|
220
|
+
request
|
|
213
221
|
end
|
|
214
222
|
|
|
215
223
|
def handle_api_response(response)
|
|
@@ -21,7 +21,7 @@ module Jekyll
|
|
|
21
21
|
class SiteConfigAccessor
|
|
22
22
|
# Extract localization configuration from site.
|
|
23
23
|
#
|
|
24
|
-
# Accesses the
|
|
24
|
+
# Accesses the with_locales_data configuration which contains the
|
|
25
25
|
# locales, extraction settings, and other localization options.
|
|
26
26
|
#
|
|
27
27
|
# @param site [Jekyll::Site, Hash] Jekyll site object or hash double
|
data/lib/jekyll-l10n.rb
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
# Require runtime dependencies explicitly
|
|
4
|
+
# These are declared in gemspec and must be loaded before using their classes
|
|
5
|
+
require "jekyll"
|
|
6
|
+
require "liquid"
|
|
7
|
+
require "nokogiri"
|
|
8
|
+
require "gettext/po"
|
|
9
|
+
|
|
3
10
|
# # jekyll-l10n: Complete Site Localization for Jekyll
|
|
4
11
|
#
|
|
5
12
|
# A comprehensive localization plugin for Jekyll that provides GNU Gettext-based translation
|
|
@@ -189,9 +196,6 @@ require_relative "jekyll-l10n/jekyll/regeneration_checker"
|
|
|
189
196
|
require_relative "jekyll-l10n/jekyll/generator"
|
|
190
197
|
require_relative "jekyll-l10n/jekyll/post_write_processor"
|
|
191
198
|
require_relative "jekyll-l10n/jekyll/post_write_html_reprocessor"
|
|
192
|
-
require_relative "jekyll-l10n/jekyll/file_sync"
|
|
193
|
-
require_relative "jekyll-l10n/jekyll/page_locator"
|
|
194
|
-
require_relative "jekyll-l10n/jekyll/page_writer"
|
|
195
199
|
require_relative "jekyll-l10n/jekyll/url_filter"
|
|
196
200
|
require_relative "jekyll-l10n/utils/text_normalizer"
|
|
197
201
|
require_relative "jekyll-l10n/utils/translation_resolver"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: jekyll-l10n
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- ReleaseBot
|
|
@@ -111,6 +111,48 @@ dependencies:
|
|
|
111
111
|
- - "~>"
|
|
112
112
|
- !ruby/object:Gem::Version
|
|
113
113
|
version: '3.13'
|
|
114
|
+
- !ruby/object:Gem::Dependency
|
|
115
|
+
name: simplecov
|
|
116
|
+
requirement: !ruby/object:Gem::Requirement
|
|
117
|
+
requirements:
|
|
118
|
+
- - "~>"
|
|
119
|
+
- !ruby/object:Gem::Version
|
|
120
|
+
version: '0.22'
|
|
121
|
+
type: :development
|
|
122
|
+
prerelease: false
|
|
123
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
124
|
+
requirements:
|
|
125
|
+
- - "~>"
|
|
126
|
+
- !ruby/object:Gem::Version
|
|
127
|
+
version: '0.22'
|
|
128
|
+
- !ruby/object:Gem::Dependency
|
|
129
|
+
name: simplecov-console
|
|
130
|
+
requirement: !ruby/object:Gem::Requirement
|
|
131
|
+
requirements:
|
|
132
|
+
- - "~>"
|
|
133
|
+
- !ruby/object:Gem::Version
|
|
134
|
+
version: '0.9'
|
|
135
|
+
type: :development
|
|
136
|
+
prerelease: false
|
|
137
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
138
|
+
requirements:
|
|
139
|
+
- - "~>"
|
|
140
|
+
- !ruby/object:Gem::Version
|
|
141
|
+
version: '0.9'
|
|
142
|
+
- !ruby/object:Gem::Dependency
|
|
143
|
+
name: webmock
|
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
|
145
|
+
requirements:
|
|
146
|
+
- - "~>"
|
|
147
|
+
- !ruby/object:Gem::Version
|
|
148
|
+
version: '3.23'
|
|
149
|
+
type: :development
|
|
150
|
+
prerelease: false
|
|
151
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
152
|
+
requirements:
|
|
153
|
+
- - "~>"
|
|
154
|
+
- !ruby/object:Gem::Version
|
|
155
|
+
version: '3.23'
|
|
114
156
|
description: Jekyll plugin for jekyll site localization.
|
|
115
157
|
email:
|
|
116
158
|
- group_58921183_bot_c5520aeba366578e2e444dd181ff3e23@noreply.gitlab.com
|
|
@@ -132,12 +174,8 @@ files:
|
|
|
132
174
|
- lib/jekyll-l10n/extraction/html_string_extractor.rb
|
|
133
175
|
- lib/jekyll-l10n/extraction/logger.rb
|
|
134
176
|
- lib/jekyll-l10n/extraction/result_saver.rb
|
|
135
|
-
- lib/jekyll-l10n/jekyll/file_sync.rb
|
|
136
177
|
- lib/jekyll-l10n/jekyll/generator.rb
|
|
137
178
|
- lib/jekyll-l10n/jekyll/localized_page.rb
|
|
138
|
-
- lib/jekyll-l10n/jekyll/localized_page_mapper.rb
|
|
139
|
-
- lib/jekyll-l10n/jekyll/page_locator.rb
|
|
140
|
-
- lib/jekyll-l10n/jekyll/page_writer.rb
|
|
141
179
|
- lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb
|
|
142
180
|
- lib/jekyll-l10n/jekyll/post_write_processor.rb
|
|
143
181
|
- lib/jekyll-l10n/jekyll/regeneration_checker.rb
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "../translation/translator"
|
|
4
|
-
require_relative "page_writer"
|
|
5
|
-
require_relative "localized_page_mapper"
|
|
6
|
-
require_relative "../utils/url_path_builder"
|
|
7
|
-
require_relative "../utils/logger_formatter"
|
|
8
|
-
|
|
9
|
-
module Jekyll
|
|
10
|
-
module L10n
|
|
11
|
-
# File Synchronization Manager - Syncs localized pages and applies translations
|
|
12
|
-
#
|
|
13
|
-
# This class manages the synchronization of generated HTML content to localized page variants
|
|
14
|
-
# and applies translations to each variant. It works in coordination with Jekyll's file
|
|
15
|
-
# writing system to ensure all locale-specific content is properly translated.
|
|
16
|
-
#
|
|
17
|
-
# The sync process:
|
|
18
|
-
# 1. Builds a map of localized pages grouped by original URL
|
|
19
|
-
# 2. For each original page, finds its localized variants
|
|
20
|
-
# 3. Reads the original HTML file content
|
|
21
|
-
# 4. Applies translations to each localized variant
|
|
22
|
-
# 5. Writes the translated HTML to disk
|
|
23
|
-
#
|
|
24
|
-
# This class is typically used during the post-write phase to handle final translation
|
|
25
|
-
# application after Jekyll has written all files to disk.
|
|
26
|
-
#
|
|
27
|
-
# @example Usage (typically internal)
|
|
28
|
-
# file_reader = SomeFileReader.new
|
|
29
|
-
# page_writer = PageWriter.new
|
|
30
|
-
# syncer = LocalizationFileSync.new(site, file_reader, page_writer)
|
|
31
|
-
# syncer.sync
|
|
32
|
-
#
|
|
33
|
-
class LocalizationFileSync
|
|
34
|
-
# Initialize the file synchronization manager
|
|
35
|
-
#
|
|
36
|
-
# @param site [Jekyll::Site] The Jekyll site object
|
|
37
|
-
# @param file_reader [Object] A file reader object with a `read(page)` method
|
|
38
|
-
# @param page_writer [PageWriter] A page writer object to write translated HTML
|
|
39
|
-
def initialize(site, file_reader, page_writer)
|
|
40
|
-
@site = site
|
|
41
|
-
@file_reader = file_reader
|
|
42
|
-
@page_writer = page_writer
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# Synchronize and translate all localized pages
|
|
46
|
-
#
|
|
47
|
-
# Builds a map of localized pages, reads original HTML content, and applies
|
|
48
|
-
# translations to each locale variant. Logs the process for debugging.
|
|
49
|
-
#
|
|
50
|
-
# @return [void]
|
|
51
|
-
def sync
|
|
52
|
-
Jekyll.logger.info "Localization", "Starting file sync process"
|
|
53
|
-
localized_pages_by_url = LocalizedPageMapper.build_map(@site)
|
|
54
|
-
count = localized_pages_by_url.keys.length
|
|
55
|
-
Jekyll.logger.info "Localization",
|
|
56
|
-
"Found #{count} pages with localized versions"
|
|
57
|
-
sync_files_and_apply_translations(localized_pages_by_url)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
private
|
|
61
|
-
|
|
62
|
-
def sync_files_and_apply_translations(localized_pages_by_url)
|
|
63
|
-
@site.pages.each do |page|
|
|
64
|
-
with_locales = page.data["with_locales"]
|
|
65
|
-
is_localized = page.data["localized"]
|
|
66
|
-
log_page_check(page.url, with_locales, is_localized)
|
|
67
|
-
next unless page.data["with_locales"] == true && page.data["localized"] != true
|
|
68
|
-
|
|
69
|
-
LoggerFormatter.debug_if_enabled("FileSync",
|
|
70
|
-
"Processing page for translation: #{page.url}")
|
|
71
|
-
original_content = @file_reader.read(page)
|
|
72
|
-
next unless original_content
|
|
73
|
-
|
|
74
|
-
apply_translations_to_localized_pages(page, localized_pages_by_url, original_content)
|
|
75
|
-
end
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
def apply_translations_to_localized_pages(page, localized_pages_by_url, original_content)
|
|
79
|
-
return unless localized_pages_by_url[page.url]
|
|
80
|
-
|
|
81
|
-
localized_pages_by_url[page.url].each do |localized_page|
|
|
82
|
-
translate_and_write_localized_page(localized_page, original_content)
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def translate_and_write_localized_page(localized_page, original_content)
|
|
87
|
-
content_to_translate = if localized_page.output && !localized_page.output.empty?
|
|
88
|
-
localized_page.output
|
|
89
|
-
else
|
|
90
|
-
original_content
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
localized_page.instance_variable_set(:@output, content_to_translate)
|
|
94
|
-
|
|
95
|
-
locale = localized_page.data["locale"]
|
|
96
|
-
baseurl = @site.config["baseurl"]
|
|
97
|
-
|
|
98
|
-
translator = Translator.new(localized_page)
|
|
99
|
-
|
|
100
|
-
@page_writer.translate_and_write(localized_page, translator, locale, baseurl)
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def log_page_check(url, with_locales, is_localized)
|
|
104
|
-
LoggerFormatter.debug_if_enabled("FileSync",
|
|
105
|
-
"Checking page: #{url} (with_locales: #{with_locales}, " \
|
|
106
|
-
"localized: #{is_localized})")
|
|
107
|
-
end
|
|
108
|
-
end
|
|
109
|
-
end
|
|
110
|
-
end
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "../utils/logger_formatter"
|
|
4
|
-
|
|
5
|
-
module Jekyll
|
|
6
|
-
module L10n
|
|
7
|
-
# Localized Page Mapper - Indexes generated localized pages by their original URL
|
|
8
|
-
#
|
|
9
|
-
# This module builds a mapping of original URLs to their localized page variants.
|
|
10
|
-
# It scans all pages in the Jekyll site, identifies those marked as localized,
|
|
11
|
-
# and groups them by their original_url for quick lookup during processing.
|
|
12
|
-
#
|
|
13
|
-
# The map structure is: { original_url => [localized_page_1, localized_page_2, ...] }
|
|
14
|
-
# This enables efficient matching of localized variants back to their source pages.
|
|
15
|
-
#
|
|
16
|
-
# @example Usage
|
|
17
|
-
# localized_map = LocalizedPageMapper.build_map(site)
|
|
18
|
-
# # => { "/about/" => [<LocalizedPage locale="es">, <LocalizedPage locale="fr">] }
|
|
19
|
-
#
|
|
20
|
-
module LocalizedPageMapper
|
|
21
|
-
extend self
|
|
22
|
-
|
|
23
|
-
# Build a mapping of original URLs to localized page variants
|
|
24
|
-
#
|
|
25
|
-
# Scans all pages in the Jekyll site, finds those marked with `localized: true`,
|
|
26
|
-
# and groups them by their original_url. Returns a hash with original URLs as keys
|
|
27
|
-
# and arrays of LocalizedPage objects as values.
|
|
28
|
-
#
|
|
29
|
-
# @param site [Jekyll::Site] The Jekyll site object with all generated pages
|
|
30
|
-
# @return [Hash<String, Array<LocalizedPage>>] Map of original URL to localized variants
|
|
31
|
-
# @example
|
|
32
|
-
# pages_map = LocalizedPageMapper.build_map(site)
|
|
33
|
-
# spanish_pages = pages_map["/about/"] # => [<LocalizedPage locale="es">]
|
|
34
|
-
def build_map(site)
|
|
35
|
-
localized_pages = {}
|
|
36
|
-
site.pages.each do |page|
|
|
37
|
-
next unless page.data["localized"] == true
|
|
38
|
-
|
|
39
|
-
original_url = page.data["original_url"]
|
|
40
|
-
LoggerFormatter.debug_if_enabled(
|
|
41
|
-
"LocalizedPageMapper",
|
|
42
|
-
"Found localized page: #{page.url} (original: #{original_url})"
|
|
43
|
-
)
|
|
44
|
-
localized_pages[original_url] ||= []
|
|
45
|
-
localized_pages[original_url] << page
|
|
46
|
-
end
|
|
47
|
-
localized_pages
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
end
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Jekyll
|
|
4
|
-
module L10n
|
|
5
|
-
# Locates original (non-localized) pages by URL.
|
|
6
|
-
#
|
|
7
|
-
# OriginalPageLocator builds an index of original pages in the site and
|
|
8
|
-
# provides fast lookup by URL. This is used during translation to find the
|
|
9
|
-
# original page configuration when processing localized variants.
|
|
10
|
-
#
|
|
11
|
-
# Key responsibilities:
|
|
12
|
-
# * Index original pages by URL
|
|
13
|
-
# * Exclude localized page variants from index
|
|
14
|
-
# * Provide O(1) lookup by URL
|
|
15
|
-
# * Lazily build index on first use
|
|
16
|
-
#
|
|
17
|
-
# @example
|
|
18
|
-
# locator = OriginalPageLocator.new(site)
|
|
19
|
-
# original_page = locator.find_by_url('/docs/index.html')
|
|
20
|
-
class OriginalPageLocator
|
|
21
|
-
# Initialize a new OriginalPageLocator.
|
|
22
|
-
#
|
|
23
|
-
# @param site [Jekyll::Site] Jekyll site object
|
|
24
|
-
def initialize(site)
|
|
25
|
-
@site = site
|
|
26
|
-
@index = nil
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# Find an original page by URL.
|
|
30
|
-
#
|
|
31
|
-
# Builds index on first call, then uses cached index for subsequent lookups.
|
|
32
|
-
# Returns nil if page not found.
|
|
33
|
-
#
|
|
34
|
-
# @param url [String] Page URL (e.g., '/docs/index.html')
|
|
35
|
-
# @return [Jekyll::Page, nil] Original page if found, nil otherwise
|
|
36
|
-
def find_by_url(url)
|
|
37
|
-
build_index unless @index
|
|
38
|
-
@index[url]
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
private
|
|
42
|
-
|
|
43
|
-
# Build index of original pages by URL.
|
|
44
|
-
#
|
|
45
|
-
# Indexes all pages that are not marked as localized, allowing fast
|
|
46
|
-
# lookup by URL without iterating through site.pages each time.
|
|
47
|
-
#
|
|
48
|
-
# @return [void]
|
|
49
|
-
def build_index
|
|
50
|
-
@index = {}
|
|
51
|
-
@site.pages.each do |page|
|
|
52
|
-
next if page.data["localized"] == true
|
|
53
|
-
|
|
54
|
-
@index[page.url] = page
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
end
|
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "../utils/url_path_builder"
|
|
4
|
-
require_relative "../translation/html_translator"
|
|
5
|
-
require_relative "../utils/file_operations"
|
|
6
|
-
require_relative "../utils/logger_formatter"
|
|
7
|
-
|
|
8
|
-
module Jekyll
|
|
9
|
-
module L10n
|
|
10
|
-
# Writes localized pages with metadata to disk.
|
|
11
|
-
#
|
|
12
|
-
# LocalizedPageWriter translates page output and updates locale metadata
|
|
13
|
-
# (html lang attribute) before writing localized pages to the build output
|
|
14
|
-
# directory. It ensures proper directory structure and updates locale information
|
|
15
|
-
# in the HTML tag.
|
|
16
|
-
#
|
|
17
|
-
# Key responsibilities:
|
|
18
|
-
# * Apply translations to localized page output
|
|
19
|
-
# * Update HTML lang attribute to target locale
|
|
20
|
-
# * Create necessary directory structure
|
|
21
|
-
# * Write localized HTML to disk with UTF-8 encoding
|
|
22
|
-
# * Clean up auto-inserted meta charset tags
|
|
23
|
-
# * Handle parse errors gracefully
|
|
24
|
-
#
|
|
25
|
-
# @example
|
|
26
|
-
# writer = LocalizedPageWriter.new('_site')
|
|
27
|
-
# writer.translate_and_write(page, translator, 'es', '/baseurl')
|
|
28
|
-
# # Localized page written to disk with translations and lang attribute updated
|
|
29
|
-
class LocalizedPageWriter
|
|
30
|
-
# Initialize a new LocalizedPageWriter.
|
|
31
|
-
#
|
|
32
|
-
# @param dest [String] Destination build directory
|
|
33
|
-
def initialize(dest)
|
|
34
|
-
@dest = dest
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# Translate page content and write to disk.
|
|
38
|
-
#
|
|
39
|
-
# Applies translator, updates HTML lang attribute with locale, ensures
|
|
40
|
-
# output directory exists, and writes translated HTML to file.
|
|
41
|
-
#
|
|
42
|
-
# @param localized_page [Jekyll::Page] Localized page to write
|
|
43
|
-
# @param translator [Object] Translator object with translate method
|
|
44
|
-
# @param locale [String] Target locale code
|
|
45
|
-
# @param _baseurl [String] Base URL (passed for compatibility, not used)
|
|
46
|
-
# @return [void]
|
|
47
|
-
def translate_and_write(localized_page, translator, locale, _baseurl)
|
|
48
|
-
log_debug_info(localized_page, locale, "start")
|
|
49
|
-
translator.translate
|
|
50
|
-
|
|
51
|
-
log_debug_info(localized_page, locale, "after translate")
|
|
52
|
-
localized_page.output = fix_locale_metadata(localized_page.output, locale)
|
|
53
|
-
|
|
54
|
-
log_debug_info(localized_page, locale, "after fix_locale")
|
|
55
|
-
write_localized_page(localized_page)
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
private
|
|
59
|
-
|
|
60
|
-
def log_debug_info(localized_page, locale, phase)
|
|
61
|
-
LoggerFormatter.debug_if_enabled("PageWriter",
|
|
62
|
-
"#{phase}: URL=#{localized_page.url}, locale=#{locale}, " \
|
|
63
|
-
"output_size=#{localized_page.output&.length || 0}")
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
def write_localized_page(localized_page)
|
|
67
|
-
localized_file_path = UrlPathBuilder.url_to_file_path(localized_page.url)
|
|
68
|
-
localized_file = File.join(@dest, localized_file_path)
|
|
69
|
-
FileOperations.ensure_directory(localized_file)
|
|
70
|
-
|
|
71
|
-
LoggerFormatter.debug_if_enabled("PageWriter", "Writing to #{localized_file_path}")
|
|
72
|
-
FileOperations.write_utf8(localized_file, localized_page.output)
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def fix_locale_metadata(html, locale)
|
|
76
|
-
return html unless html && locale
|
|
77
|
-
|
|
78
|
-
doc = parse_html(html)
|
|
79
|
-
update_html_lang_attribute(doc, locale)
|
|
80
|
-
result = serialize_html(doc)
|
|
81
|
-
cleanup_auto_inserted_meta_tag(result)
|
|
82
|
-
rescue StandardError => e
|
|
83
|
-
Jekyll.logger.error "Localization",
|
|
84
|
-
"Failed to parse HTML for locale #{locale}: #{e.message}"
|
|
85
|
-
html
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def parse_html(html)
|
|
89
|
-
# CRITICAL: Nokogiri::HTML auto-inserts <meta http-equiv="Content-Type">
|
|
90
|
-
# We parse with HTML to access the <html> tag, then remove the auto-inserted
|
|
91
|
-
# meta tag using regex post-processing.
|
|
92
|
-
# See: spec/regression/nokogiri_meta_tag_spec.rb
|
|
93
|
-
Nokogiri::HTML(html)
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
def update_html_lang_attribute(doc, locale)
|
|
97
|
-
html_tag = doc.at("html")
|
|
98
|
-
|
|
99
|
-
if html_tag
|
|
100
|
-
html_tag["lang"] = locale
|
|
101
|
-
else
|
|
102
|
-
Jekyll.logger.warn("Localization",
|
|
103
|
-
"No <html> tag found for locale #{locale}, skipping lang attribute")
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def serialize_html(doc)
|
|
108
|
-
doc.to_html
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def cleanup_auto_inserted_meta_tag(result)
|
|
112
|
-
# Remove the auto-inserted meta tag by libxml2 during HTML serialization
|
|
113
|
-
# Matches: <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
114
|
-
# See: spec/regression/nokogiri_meta_tag_spec.rb
|
|
115
|
-
pattern = %r!<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n?!
|
|
116
|
-
result.gsub(pattern, "")
|
|
117
|
-
end
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
end
|