jekyll-l10n 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2a1a64737ee988c61bc5d4c37986c5e2553966bf73fc251ce807976117013236
4
- data.tar.gz: b88c9b6283535514aebd9a77e2d5fe4f01407f008e5de2b8d21e0b30cb76bc85
3
+ metadata.gz: d59b40394ccd291d468bfd045b6b53ffd71204d6b42575f232e7159316c14561
4
+ data.tar.gz: c3c0bc5a22a1ce203a7623be5217f8477b9d4b57f39918ca221a10f693bbd053
5
5
  SHA512:
6
- metadata.gz: 332001e7246402a4a4acc41dbc70761f6d26bc506fff76f116d3f971754b617cc39b5496ec69544edd0264777fc17030797eb9be29edf7a60fb751c86d38d1a2
7
- data.tar.gz: 69b96d33ceaf4bcbe8e2ab8073e7d6fe549e1cfd5e2b09cce41ea1f32def1df650c8c7347ff8b15c230c7c237b3073ff77b015c8881b291b2f3a6c50ed020fa1
6
+ metadata.gz: b022fa06243f0d8150be0f53bcbd9861751634fb1d3af4d5cd16ae36bf11cbfb19fd3e86e3af63cad783fea3d21f56338ad4f14cde2fb68165ffe6b9146e12ad
7
+ data.tar.gz: 74a6fa21f10fe87de0c8f891f9eedb77d36d0df2d50ccd23a794cc20d3a127c183ad6edd35b6284189beb7d0472928ee0bbea8a075fbdfd1abd016586354dbbc
@@ -114,8 +114,8 @@ module Jekyll
114
114
 
115
115
  # Default behavior when LibreTranslate API returns an error
116
116
  # If true, translation stops immediately. If false, continues with remaining entries.
117
- # @return [Boolean] true
118
- DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR = true
117
+ # @return [Boolean] false
118
+ DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR = false
119
119
 
120
120
  # Default interval for logging LibreTranslate translation progress
121
121
  # Progress is logged every N entries. Set to 0 to disable.
@@ -6,6 +6,7 @@ require_relative "logger"
6
6
  require_relative "html_string_extractor"
7
7
  require_relative "../utils/file_operations"
8
8
  require_relative "../utils/site_config_accessor"
9
+ require_relative "../utils/error_handler"
9
10
 
10
11
  module Jekyll
11
12
  module L10n
@@ -99,7 +100,9 @@ module Jekyll
99
100
  config = find_libretranslate_config
100
101
  return unless config
101
102
 
102
- @result_saver.translate_compendia(config)
103
+ ErrorHandler.handle_with_logging("machine translation") do
104
+ @result_saver.translate_compendia(config)
105
+ end
103
106
  end
104
107
 
105
108
  def process_file(file_path)
@@ -12,7 +12,8 @@ module Jekyll
12
12
  # Key responsibilities:
13
13
  # * Build paths for compendium PO files
14
14
  # * Build paths for page-specific PO files
15
- # * Handle file path normalization
15
+ # * Handle file path normalization and validation
16
+ # * Prevent path traversal attacks
16
17
  #
17
18
  # @example
18
19
  # compendium = PoPathBuilder.build('_site', '_locales', 'es', nil)
@@ -25,16 +26,35 @@ module Jekyll
25
26
  # For compendium (page_path nil): {source}/{locales_dir}/{locale}.po
26
27
  # For page-specific: {source}/{locales_dir}/{locale}/{page_path}.po
27
28
  #
29
+ # Validates that page_path doesn't escape the base directory using path
30
+ # traversal techniques (e.g., ../../../etc/passwd).
31
+ #
28
32
  # @param source [String] Site source directory
29
33
  # @param locales_dir [String] Locales directory name (e.g., '_locales')
30
34
  # @param locale [String] Locale code (e.g., 'es', 'fr')
31
35
  # @param page_path [String, nil] Page path for page-specific file, nil for compendium
32
36
  # @return [String] Full path to PO file
37
+ # @raise [Jekyll::L10n::Errors::PoFileError] if page_path attempts to escape base directory
33
38
  def self.build(source, locales_dir, locale, page_path)
34
39
  if page_path.nil?
35
40
  File.join(source, locales_dir, "#{locale}.po")
36
41
  else
37
- File.join(source, locales_dir, locale, "#{page_path}.po")
42
+ # Build the normal path
43
+ normal_path = File.join(source, locales_dir, locale, "#{page_path}.po")
44
+
45
+ # Validate against path traversal using expanded paths
46
+ base_path = File.expand_path(File.join(source, locales_dir))
47
+ expanded_path = File.expand_path(normal_path)
48
+
49
+ # Security: Validate path stays within base directory
50
+ valid_path = expanded_path.start_with?(base_path + File::SEPARATOR) ||
51
+ expanded_path == base_path
52
+ unless valid_path
53
+ raise Jekyll::L10n::Errors::PoFileError,
54
+ "Path traversal attempt detected in page_path: #{page_path}"
55
+ end
56
+
57
+ normal_path
38
58
  end
39
59
  end
40
60
  end
@@ -198,6 +198,18 @@ module Jekyll
198
198
  def make_api_request(text, target_locale)
199
199
  uri = URI("#{@config.libretranslate_api_url}/translate")
200
200
  Jekyll.logger.debug "LibreTranslator", "Requesting #{uri}"
201
+ request = build_request(uri, text, target_locale)
202
+ response = http_config.request(request)
203
+ Jekyll.logger.debug "LibreTranslator", "Response code: #{response.code}"
204
+ handle_api_response(response)
205
+ rescue Net::ReadTimeout, Net::OpenTimeout => e
206
+ raise TranslationError, "API timeout: #{e.message}"
207
+ rescue Errno::ECONNREFUSED, Errno::ETIMEDOUT, SocketError => e
208
+ Jekyll.logger.warn "LibreTranslator", "#{e.class.name}: #{e.message}"
209
+ nil
210
+ end
211
+
212
+ def build_request(uri, text, target_locale)
201
213
  request = Net::HTTP::Post.new(uri, api_headers)
202
214
  request.body = {
203
215
  :q => text,
@@ -205,11 +217,7 @@ module Jekyll
205
217
  :target => target_locale,
206
218
  :format => @config.libretranslate_format,
207
219
  }.to_json
208
- response = http_config.request(request)
209
- Jekyll.logger.debug "LibreTranslator", "Response code: #{response.code}"
210
- handle_api_response(response)
211
- rescue Net::ReadTimeout, Net::OpenTimeout => e
212
- raise TranslationError, "API timeout: #{e.message}"
220
+ request
213
221
  end
214
222
 
215
223
  def handle_api_response(response)
@@ -21,7 +21,7 @@ module Jekyll
21
21
  class SiteConfigAccessor
22
22
  # Extract localization configuration from site.
23
23
  #
24
- # Accesses the localization_gettext configuration which contains the
24
+ # Accesses the with_locales_data configuration which contains the
25
25
  # locales, extraction settings, and other localization options.
26
26
  #
27
27
  # @param site [Jekyll::Site, Hash] Jekyll site object or hash double
data/lib/jekyll-l10n.rb CHANGED
@@ -1,5 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Require runtime dependencies explicitly
4
+ # These are declared in gemspec and must be loaded before using their classes
5
+ require "jekyll"
6
+ require "liquid"
7
+ require "nokogiri"
8
+ require "gettext/po"
9
+
3
10
  # # jekyll-l10n: Complete Site Localization for Jekyll
4
11
  #
5
12
  # A comprehensive localization plugin for Jekyll that provides GNU Gettext-based translation
@@ -189,9 +196,6 @@ require_relative "jekyll-l10n/jekyll/regeneration_checker"
189
196
  require_relative "jekyll-l10n/jekyll/generator"
190
197
  require_relative "jekyll-l10n/jekyll/post_write_processor"
191
198
  require_relative "jekyll-l10n/jekyll/post_write_html_reprocessor"
192
- require_relative "jekyll-l10n/jekyll/file_sync"
193
- require_relative "jekyll-l10n/jekyll/page_locator"
194
- require_relative "jekyll-l10n/jekyll/page_writer"
195
199
  require_relative "jekyll-l10n/jekyll/url_filter"
196
200
  require_relative "jekyll-l10n/utils/text_normalizer"
197
201
  require_relative "jekyll-l10n/utils/translation_resolver"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-l10n
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ReleaseBot
@@ -111,6 +111,48 @@ dependencies:
111
111
  - - "~>"
112
112
  - !ruby/object:Gem::Version
113
113
  version: '3.13'
114
+ - !ruby/object:Gem::Dependency
115
+ name: simplecov
116
+ requirement: !ruby/object:Gem::Requirement
117
+ requirements:
118
+ - - "~>"
119
+ - !ruby/object:Gem::Version
120
+ version: '0.22'
121
+ type: :development
122
+ prerelease: false
123
+ version_requirements: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - "~>"
126
+ - !ruby/object:Gem::Version
127
+ version: '0.22'
128
+ - !ruby/object:Gem::Dependency
129
+ name: simplecov-console
130
+ requirement: !ruby/object:Gem::Requirement
131
+ requirements:
132
+ - - "~>"
133
+ - !ruby/object:Gem::Version
134
+ version: '0.9'
135
+ type: :development
136
+ prerelease: false
137
+ version_requirements: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - "~>"
140
+ - !ruby/object:Gem::Version
141
+ version: '0.9'
142
+ - !ruby/object:Gem::Dependency
143
+ name: webmock
144
+ requirement: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - "~>"
147
+ - !ruby/object:Gem::Version
148
+ version: '3.23'
149
+ type: :development
150
+ prerelease: false
151
+ version_requirements: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - "~>"
154
+ - !ruby/object:Gem::Version
155
+ version: '3.23'
114
156
  description: Jekyll plugin for jekyll site localization.
115
157
  email:
116
158
  - group_58921183_bot_c5520aeba366578e2e444dd181ff3e23@noreply.gitlab.com
@@ -132,12 +174,8 @@ files:
132
174
  - lib/jekyll-l10n/extraction/html_string_extractor.rb
133
175
  - lib/jekyll-l10n/extraction/logger.rb
134
176
  - lib/jekyll-l10n/extraction/result_saver.rb
135
- - lib/jekyll-l10n/jekyll/file_sync.rb
136
177
  - lib/jekyll-l10n/jekyll/generator.rb
137
178
  - lib/jekyll-l10n/jekyll/localized_page.rb
138
- - lib/jekyll-l10n/jekyll/localized_page_mapper.rb
139
- - lib/jekyll-l10n/jekyll/page_locator.rb
140
- - lib/jekyll-l10n/jekyll/page_writer.rb
141
179
  - lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb
142
180
  - lib/jekyll-l10n/jekyll/post_write_processor.rb
143
181
  - lib/jekyll-l10n/jekyll/regeneration_checker.rb
@@ -1,110 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "../translation/translator"
4
- require_relative "page_writer"
5
- require_relative "localized_page_mapper"
6
- require_relative "../utils/url_path_builder"
7
- require_relative "../utils/logger_formatter"
8
-
9
- module Jekyll
10
- module L10n
11
- # File Synchronization Manager - Syncs localized pages and applies translations
12
- #
13
- # This class manages the synchronization of generated HTML content to localized page variants
14
- # and applies translations to each variant. It works in coordination with Jekyll's file
15
- # writing system to ensure all locale-specific content is properly translated.
16
- #
17
- # The sync process:
18
- # 1. Builds a map of localized pages grouped by original URL
19
- # 2. For each original page, finds its localized variants
20
- # 3. Reads the original HTML file content
21
- # 4. Applies translations to each localized variant
22
- # 5. Writes the translated HTML to disk
23
- #
24
- # This class is typically used during the post-write phase to handle final translation
25
- # application after Jekyll has written all files to disk.
26
- #
27
- # @example Usage (typically internal)
28
- # file_reader = SomeFileReader.new
29
- # page_writer = PageWriter.new
30
- # syncer = LocalizationFileSync.new(site, file_reader, page_writer)
31
- # syncer.sync
32
- #
33
- class LocalizationFileSync
34
- # Initialize the file synchronization manager
35
- #
36
- # @param site [Jekyll::Site] The Jekyll site object
37
- # @param file_reader [Object] A file reader object with a `read(page)` method
38
- # @param page_writer [PageWriter] A page writer object to write translated HTML
39
- def initialize(site, file_reader, page_writer)
40
- @site = site
41
- @file_reader = file_reader
42
- @page_writer = page_writer
43
- end
44
-
45
- # Synchronize and translate all localized pages
46
- #
47
- # Builds a map of localized pages, reads original HTML content, and applies
48
- # translations to each locale variant. Logs the process for debugging.
49
- #
50
- # @return [void]
51
- def sync
52
- Jekyll.logger.info "Localization", "Starting file sync process"
53
- localized_pages_by_url = LocalizedPageMapper.build_map(@site)
54
- count = localized_pages_by_url.keys.length
55
- Jekyll.logger.info "Localization",
56
- "Found #{count} pages with localized versions"
57
- sync_files_and_apply_translations(localized_pages_by_url)
58
- end
59
-
60
- private
61
-
62
- def sync_files_and_apply_translations(localized_pages_by_url)
63
- @site.pages.each do |page|
64
- with_locales = page.data["with_locales"]
65
- is_localized = page.data["localized"]
66
- log_page_check(page.url, with_locales, is_localized)
67
- next unless page.data["with_locales"] == true && page.data["localized"] != true
68
-
69
- LoggerFormatter.debug_if_enabled("FileSync",
70
- "Processing page for translation: #{page.url}")
71
- original_content = @file_reader.read(page)
72
- next unless original_content
73
-
74
- apply_translations_to_localized_pages(page, localized_pages_by_url, original_content)
75
- end
76
- end
77
-
78
- def apply_translations_to_localized_pages(page, localized_pages_by_url, original_content)
79
- return unless localized_pages_by_url[page.url]
80
-
81
- localized_pages_by_url[page.url].each do |localized_page|
82
- translate_and_write_localized_page(localized_page, original_content)
83
- end
84
- end
85
-
86
- def translate_and_write_localized_page(localized_page, original_content)
87
- content_to_translate = if localized_page.output && !localized_page.output.empty?
88
- localized_page.output
89
- else
90
- original_content
91
- end
92
-
93
- localized_page.instance_variable_set(:@output, content_to_translate)
94
-
95
- locale = localized_page.data["locale"]
96
- baseurl = @site.config["baseurl"]
97
-
98
- translator = Translator.new(localized_page)
99
-
100
- @page_writer.translate_and_write(localized_page, translator, locale, baseurl)
101
- end
102
-
103
- def log_page_check(url, with_locales, is_localized)
104
- LoggerFormatter.debug_if_enabled("FileSync",
105
- "Checking page: #{url} (with_locales: #{with_locales}, " \
106
- "localized: #{is_localized})")
107
- end
108
- end
109
- end
110
- end
@@ -1,51 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "../utils/logger_formatter"
4
-
5
- module Jekyll
6
- module L10n
7
- # Localized Page Mapper - Indexes generated localized pages by their original URL
8
- #
9
- # This module builds a mapping of original URLs to their localized page variants.
10
- # It scans all pages in the Jekyll site, identifies those marked as localized,
11
- # and groups them by their original_url for quick lookup during processing.
12
- #
13
- # The map structure is: { original_url => [localized_page_1, localized_page_2, ...] }
14
- # This enables efficient matching of localized variants back to their source pages.
15
- #
16
- # @example Usage
17
- # localized_map = LocalizedPageMapper.build_map(site)
18
- # # => { "/about/" => [<LocalizedPage locale="es">, <LocalizedPage locale="fr">] }
19
- #
20
- module LocalizedPageMapper
21
- extend self
22
-
23
- # Build a mapping of original URLs to localized page variants
24
- #
25
- # Scans all pages in the Jekyll site, finds those marked with `localized: true`,
26
- # and groups them by their original_url. Returns a hash with original URLs as keys
27
- # and arrays of LocalizedPage objects as values.
28
- #
29
- # @param site [Jekyll::Site] The Jekyll site object with all generated pages
30
- # @return [Hash<String, Array<LocalizedPage>>] Map of original URL to localized variants
31
- # @example
32
- # pages_map = LocalizedPageMapper.build_map(site)
33
- # spanish_pages = pages_map["/about/"] # => [<LocalizedPage locale="es">]
34
- def build_map(site)
35
- localized_pages = {}
36
- site.pages.each do |page|
37
- next unless page.data["localized"] == true
38
-
39
- original_url = page.data["original_url"]
40
- LoggerFormatter.debug_if_enabled(
41
- "LocalizedPageMapper",
42
- "Found localized page: #{page.url} (original: #{original_url})"
43
- )
44
- localized_pages[original_url] ||= []
45
- localized_pages[original_url] << page
46
- end
47
- localized_pages
48
- end
49
- end
50
- end
51
- end
@@ -1,59 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Jekyll
4
- module L10n
5
- # Locates original (non-localized) pages by URL.
6
- #
7
- # OriginalPageLocator builds an index of original pages in the site and
8
- # provides fast lookup by URL. This is used during translation to find the
9
- # original page configuration when processing localized variants.
10
- #
11
- # Key responsibilities:
12
- # * Index original pages by URL
13
- # * Exclude localized page variants from index
14
- # * Provide O(1) lookup by URL
15
- # * Lazily build index on first use
16
- #
17
- # @example
18
- # locator = OriginalPageLocator.new(site)
19
- # original_page = locator.find_by_url('/docs/index.html')
20
- class OriginalPageLocator
21
- # Initialize a new OriginalPageLocator.
22
- #
23
- # @param site [Jekyll::Site] Jekyll site object
24
- def initialize(site)
25
- @site = site
26
- @index = nil
27
- end
28
-
29
- # Find an original page by URL.
30
- #
31
- # Builds index on first call, then uses cached index for subsequent lookups.
32
- # Returns nil if page not found.
33
- #
34
- # @param url [String] Page URL (e.g., '/docs/index.html')
35
- # @return [Jekyll::Page, nil] Original page if found, nil otherwise
36
- def find_by_url(url)
37
- build_index unless @index
38
- @index[url]
39
- end
40
-
41
- private
42
-
43
- # Build index of original pages by URL.
44
- #
45
- # Indexes all pages that are not marked as localized, allowing fast
46
- # lookup by URL without iterating through site.pages each time.
47
- #
48
- # @return [void]
49
- def build_index
50
- @index = {}
51
- @site.pages.each do |page|
52
- next if page.data["localized"] == true
53
-
54
- @index[page.url] = page
55
- end
56
- end
57
- end
58
- end
59
- end
@@ -1,120 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "../utils/url_path_builder"
4
- require_relative "../translation/html_translator"
5
- require_relative "../utils/file_operations"
6
- require_relative "../utils/logger_formatter"
7
-
8
- module Jekyll
9
- module L10n
10
- # Writes localized pages with metadata to disk.
11
- #
12
- # LocalizedPageWriter translates page output and updates locale metadata
13
- # (html lang attribute) before writing localized pages to the build output
14
- # directory. It ensures proper directory structure and updates locale information
15
- # in the HTML tag.
16
- #
17
- # Key responsibilities:
18
- # * Apply translations to localized page output
19
- # * Update HTML lang attribute to target locale
20
- # * Create necessary directory structure
21
- # * Write localized HTML to disk with UTF-8 encoding
22
- # * Clean up auto-inserted meta charset tags
23
- # * Handle parse errors gracefully
24
- #
25
- # @example
26
- # writer = LocalizedPageWriter.new('_site')
27
- # writer.translate_and_write(page, translator, 'es', '/baseurl')
28
- # # Localized page written to disk with translations and lang attribute updated
29
- class LocalizedPageWriter
30
- # Initialize a new LocalizedPageWriter.
31
- #
32
- # @param dest [String] Destination build directory
33
- def initialize(dest)
34
- @dest = dest
35
- end
36
-
37
- # Translate page content and write to disk.
38
- #
39
- # Applies translator, updates HTML lang attribute with locale, ensures
40
- # output directory exists, and writes translated HTML to file.
41
- #
42
- # @param localized_page [Jekyll::Page] Localized page to write
43
- # @param translator [Object] Translator object with translate method
44
- # @param locale [String] Target locale code
45
- # @param _baseurl [String] Base URL (passed for compatibility, not used)
46
- # @return [void]
47
- def translate_and_write(localized_page, translator, locale, _baseurl)
48
- log_debug_info(localized_page, locale, "start")
49
- translator.translate
50
-
51
- log_debug_info(localized_page, locale, "after translate")
52
- localized_page.output = fix_locale_metadata(localized_page.output, locale)
53
-
54
- log_debug_info(localized_page, locale, "after fix_locale")
55
- write_localized_page(localized_page)
56
- end
57
-
58
- private
59
-
60
- def log_debug_info(localized_page, locale, phase)
61
- LoggerFormatter.debug_if_enabled("PageWriter",
62
- "#{phase}: URL=#{localized_page.url}, locale=#{locale}, " \
63
- "output_size=#{localized_page.output&.length || 0}")
64
- end
65
-
66
- def write_localized_page(localized_page)
67
- localized_file_path = UrlPathBuilder.url_to_file_path(localized_page.url)
68
- localized_file = File.join(@dest, localized_file_path)
69
- FileOperations.ensure_directory(localized_file)
70
-
71
- LoggerFormatter.debug_if_enabled("PageWriter", "Writing to #{localized_file_path}")
72
- FileOperations.write_utf8(localized_file, localized_page.output)
73
- end
74
-
75
- def fix_locale_metadata(html, locale)
76
- return html unless html && locale
77
-
78
- doc = parse_html(html)
79
- update_html_lang_attribute(doc, locale)
80
- result = serialize_html(doc)
81
- cleanup_auto_inserted_meta_tag(result)
82
- rescue StandardError => e
83
- Jekyll.logger.error "Localization",
84
- "Failed to parse HTML for locale #{locale}: #{e.message}"
85
- html
86
- end
87
-
88
- def parse_html(html)
89
- # CRITICAL: Nokogiri::HTML auto-inserts <meta http-equiv="Content-Type">
90
- # We parse with HTML to access the <html> tag, then remove the auto-inserted
91
- # meta tag using regex post-processing.
92
- # See: spec/regression/nokogiri_meta_tag_spec.rb
93
- Nokogiri::HTML(html)
94
- end
95
-
96
- def update_html_lang_attribute(doc, locale)
97
- html_tag = doc.at("html")
98
-
99
- if html_tag
100
- html_tag["lang"] = locale
101
- else
102
- Jekyll.logger.warn("Localization",
103
- "No <html> tag found for locale #{locale}, skipping lang attribute")
104
- end
105
- end
106
-
107
- def serialize_html(doc)
108
- doc.to_html
109
- end
110
-
111
- def cleanup_auto_inserted_meta_tag(result)
112
- # Remove the auto-inserted meta tag by libxml2 during HTML serialization
113
- # Matches: <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
114
- # See: spec/regression/nokogiri_meta_tag_spec.rb
115
- pattern = %r!<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n?!
116
- result.gsub(pattern, "")
117
- end
118
- end
119
- end
120
- end