jekyll-l10n 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +94 -0
  4. data/lib/jekyll-l10n/constants.rb +136 -0
  5. data/lib/jekyll-l10n/errors.rb +60 -0
  6. data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
  7. data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
  8. data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
  9. data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
  10. data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
  11. data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
  12. data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
  13. data/lib/jekyll-l10n/extraction/logger.rb +48 -0
  14. data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
  15. data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
  16. data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
  17. data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
  18. data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
  19. data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
  20. data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
  21. data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
  22. data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
  23. data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
  24. data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
  25. data/lib/jekyll-l10n/po_file/loader.rb +64 -0
  26. data/lib/jekyll-l10n/po_file/manager.rb +160 -0
  27. data/lib/jekyll-l10n/po_file/merger.rb +80 -0
  28. data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
  29. data/lib/jekyll-l10n/po_file/reader.rb +518 -0
  30. data/lib/jekyll-l10n/po_file/writer.rb +232 -0
  31. data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
  32. data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
  33. data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
  34. data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
  35. data/lib/jekyll-l10n/translation/translator.rb +179 -0
  36. data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
  37. data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
  38. data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
  39. data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
  40. data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
  41. data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
  42. data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
  43. data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
  44. data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
  45. data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
  46. data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
  47. data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
  48. data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
  49. data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
  50. data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
  51. data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
  52. data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
  53. data/lib/jekyll-l10n/version.rb +10 -0
  54. data/lib/jekyll-l10n.rb +268 -0
  55. metadata +200 -0
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../translation/translator"
4
+ require_relative "page_writer"
5
+ require_relative "localized_page_mapper"
6
+ require_relative "../utils/url_path_builder"
7
+ require_relative "../utils/logger_formatter"
8
+
9
+ module Jekyll
10
+ module L10n
11
+ # File Synchronization Manager - Syncs localized pages and applies translations
12
+ #
13
+ # This class manages the synchronization of generated HTML content to localized page variants
14
+ # and applies translations to each variant. It works in coordination with Jekyll's file
15
+ # writing system to ensure all locale-specific content is properly translated.
16
+ #
17
+ # The sync process:
18
+ # 1. Builds a map of localized pages grouped by original URL
19
+ # 2. For each original page, finds its localized variants
20
+ # 3. Reads the original HTML file content
21
+ # 4. Applies translations to each localized variant
22
+ # 5. Writes the translated HTML to disk
23
+ #
24
+ # This class is typically used during the post-write phase to handle final translation
25
+ # application after Jekyll has written all files to disk.
26
+ #
27
+ # @example Usage (typically internal)
28
+ # file_reader = SomeFileReader.new
29
+ # page_writer = PageWriter.new
30
+ # syncer = LocalizationFileSync.new(site, file_reader, page_writer)
31
+ # syncer.sync
32
+ #
33
+ class LocalizationFileSync
34
+ # Initialize the file synchronization manager
35
+ #
36
+ # @param site [Jekyll::Site] The Jekyll site object
37
+ # @param file_reader [Object] A file reader object with a `read(page)` method
38
+ # @param page_writer [PageWriter] A page writer object to write translated HTML
39
+ def initialize(site, file_reader, page_writer)
40
+ @site = site
41
+ @file_reader = file_reader
42
+ @page_writer = page_writer
43
+ end
44
+
45
+ # Synchronize and translate all localized pages
46
+ #
47
+ # Builds a map of localized pages, reads original HTML content, and applies
48
+ # translations to each locale variant. Logs the process for debugging.
49
+ #
50
+ # @return [void]
51
+ def sync
52
+ Jekyll.logger.info "Localization", "Starting file sync process"
53
+ localized_pages_by_url = LocalizedPageMapper.build_map(@site)
54
+ count = localized_pages_by_url.keys.length
55
+ Jekyll.logger.info "Localization",
56
+ "Found #{count} pages with localized versions"
57
+ sync_files_and_apply_translations(localized_pages_by_url)
58
+ end
59
+
60
+ private
61
+
62
+ def sync_files_and_apply_translations(localized_pages_by_url)
63
+ @site.pages.each do |page|
64
+ with_locales = page.data["with_locales"]
65
+ is_localized = page.data["localized"]
66
+ log_page_check(page.url, with_locales, is_localized)
67
+ next unless page.data["with_locales"] == true && page.data["localized"] != true
68
+
69
+ LoggerFormatter.debug_if_enabled("FileSync",
70
+ "Processing page for translation: #{page.url}")
71
+ original_content = @file_reader.read(page)
72
+ next unless original_content
73
+
74
+ apply_translations_to_localized_pages(page, localized_pages_by_url, original_content)
75
+ end
76
+ end
77
+
78
+ def apply_translations_to_localized_pages(page, localized_pages_by_url, original_content)
79
+ return unless localized_pages_by_url[page.url]
80
+
81
+ localized_pages_by_url[page.url].each do |localized_page|
82
+ translate_and_write_localized_page(localized_page, original_content)
83
+ end
84
+ end
85
+
86
+ def translate_and_write_localized_page(localized_page, original_content)
87
+ content_to_translate = if localized_page.output && !localized_page.output.empty?
88
+ localized_page.output
89
+ else
90
+ original_content
91
+ end
92
+
93
+ localized_page.instance_variable_set(:@output, content_to_translate)
94
+
95
+ locale = localized_page.data["locale"]
96
+ baseurl = @site.config["baseurl"]
97
+
98
+ translator = Translator.new(localized_page)
99
+
100
+ @page_writer.translate_and_write(localized_page, translator, locale, baseurl)
101
+ end
102
+
103
+ def log_page_check(url, with_locales, is_localized)
104
+ LoggerFormatter.debug_if_enabled("FileSync",
105
+ "Checking page: #{url} (with_locales: #{with_locales}, " \
106
+ "localized: #{is_localized})")
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "regeneration_checker"
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Localized Page Generator - Creates locale-prefixed copies of pages during Jekyll build
8
+ #
9
+ # This is the main Jekyll integration point for the localization plugin. It runs as a
10
+ # low-priority generator during the Jekyll build process, creating duplicate pages for each
11
+ # configured locale. Each localized page inherits content and metadata from the source page
12
+ # but includes locale-prefixed URLs.
13
+ #
14
+ # Key responsibilities:
15
+ # - Identify pages marked for localization (with_locales: true)
16
+ # - Extract configured locales from page front matter
17
+ # - Create LocalizedPage instances for each locale variant
18
+ # - Optimize regeneration by skipping unchanged pages
19
+ #
20
+ # The generator respects Jekyll's incremental build mode and only regenerates pages when
21
+ # the source page or configuration has changed, improving rebuild performance.
22
+ #
23
+ # @example Usage in Jekyll site configuration
24
+ # # _config.yml
25
+ # plugins:
26
+ # - jekyll-l10n
27
+ #
28
+ # @example Marking pages for localization
29
+ # # src/page.md
30
+ # ---
31
+ # with_locales: true
32
+ # with_locales_data:
33
+ # locales: [es, fr, pt, de]
34
+ # ---
35
+ #
36
+ class Generator < Jekyll::Generator
37
+ priority :low
38
+
39
+ # Generate localized pages for all marked pages in the Jekyll site
40
+ #
41
+ # This method is automatically called by Jekyll during the generate phase.
42
+ # It iterates through all pages in the site, identifies those marked for localization,
43
+ # and creates locale-specific variants.
44
+ #
45
+ # @param site [Jekyll::Site] The Jekyll site object containing pages to process
46
+ # @return [void]
47
+ # @see LocalizedPage for details on page structure
48
+ def generate(site)
49
+ Jekyll.logger.info "Localization", "Generating localized pages..."
50
+ generate_localized_pages(site)
51
+ end
52
+
53
+ private
54
+
55
+ def generate_localized_pages(site)
56
+ original_pages = site.pages.dup
57
+ checker = Jekyll::L10n::RegenerationChecker.new(site)
58
+
59
+ original_pages.each do |page|
60
+ next unless should_localize_page?(page)
61
+
62
+ locales = get_page_locales(page)
63
+
64
+ locales.each do |locale|
65
+ next unless valid_locale_code?(locale)
66
+
67
+ if checker.should_regenerate?(page, locale)
68
+ localized_page = create_localized_page(site, page, locale)
69
+ site.pages << localized_page
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ def should_localize_page?(page)
76
+ return false if page.data["localized"] == true
77
+
78
+ page.data["with_locales"] == true
79
+ end
80
+
81
+ def get_page_locales(page)
82
+ data = page.data["with_locales_data"]
83
+
84
+ locales = if data.is_a?(Hash)
85
+ data["locales"] || []
86
+ else
87
+ []
88
+ end
89
+
90
+ locales.select { |l| valid_locale_code?(l) }
91
+ end
92
+
93
+ def create_localized_page(site, page, locale)
94
+ new_dir = "/#{locale}#{page.dir}"
95
+ LocalizedPage.new(:site => site, :base => site.source, :dir => new_dir, :page => page,
96
+ :locale => locale)
97
+ end
98
+
99
+ def valid_locale_code?(locale)
100
+ return false unless locale.is_a?(String)
101
+
102
+ locale.match?(%r!^[a-z]{2}(_[A-Z]{2})?$!)
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module L10n
5
+ # Localized Page - Represents a Jekyll page in a specific locale
6
+ #
7
+ # LocalizedPage extends Jekyll::Page to represent a page content variant for a
8
+ # specific language.
9
+ # It inherits content and metadata from the source page while maintaining locale-specific URLs
10
+ # and data attributes. The locale is included as a URL prefix (e.g., /es/path/to/page/).
11
+ #
12
+ # Key responsibilities:
13
+ # - Maintain both original and localized URLs and paths
14
+ # - Inherit content and front matter from source page
15
+ # - Set locale and lang attributes for Liquid templates
16
+ # - Generate locale-prefixed output paths
17
+ #
18
+ # During the post-render phase, LocalizedPage content is translated using translations from
19
+ # PO files. The translation process happens in the post_render Jekyll hook.
20
+ #
21
+ # @example Creating a localized page variant
22
+ # page = LocalizedPage.new(site: site, base: site.source, dir: '/es',
23
+ # page: source_page, locale: 'es')
24
+ # # Resulting URL: /es/path/to/page/
25
+ # # Resulting locale: es
26
+ #
27
+ class LocalizedPage < Jekyll::Page
28
+ # @!attribute [rw] locale
29
+ # The BCP 47 locale code for this page variant (e.g., 'es', 'pt_BR')
30
+ # @return [String]
31
+ # @!attribute [rw] lang
32
+ # Alias for locale, used in Liquid templates
33
+ # @return [String]
34
+ # @!attribute [rw] original_page
35
+ # Reference to the source page before localization
36
+ # @return [Jekyll::Page]
37
+ # @!attribute [rw] original_url
38
+ # The URL of the source page before locale prefix was added
39
+ # @return [String]
40
+ attr_accessor :locale, :lang, :original_page, :original_url
41
+
42
+ # Initialize a new localized page variant
43
+ #
44
+ # Creates a page instance that represents the given page in a specific locale.
45
+ # Copies all essential attributes from the source page and sets up locale-specific
46
+ # metadata and URLs.
47
+ #
48
+ # Note: This method copies the @output attribute from the source page. In production,
49
+ # @output is typically nil at this point (during the Generate phase) and gets populated
50
+ # by Jekyll during the Render phase. This initialization ensures LocalizedPage has the
51
+ # same @output as its source page, which may be useful in test scenarios or if
52
+ # LocalizedPage is created after rendering.
53
+ #
54
+ # @param site [Jekyll::Site] (keyword) The Jekyll site object
55
+ # @param base [String] (keyword) The base directory (typically site.source)
56
+ # @param dir [String] (keyword) The directory path (will be prefixed with locale,
57
+ # e.g., '/es/path')
58
+ # @param page [Jekyll::Page] (keyword) The source page to localize
59
+ # @param locale [String] (keyword) The BCP 47 locale code (e.g., 'es', 'pt_BR', 'zh_CN')
60
+ # @return [LocalizedPage] A new localized page instance
61
+ # @note All parameters are keyword arguments and must be passed by name
62
+ def initialize(site:, base:, dir:, page:, locale:) # rubocop:disable Metrics/ParameterLists
63
+ @site = site
64
+ @base = base
65
+ @dir = dir
66
+ @name = page.name
67
+ @ext = page.ext
68
+ @output_ext = page.output_ext
69
+
70
+ @locale = locale
71
+ @lang = locale
72
+ @original_url = page.url
73
+ @original_page = page
74
+ @path = page.path
75
+ @relative_path = page.relative_path
76
+ @content = page.content
77
+ @output = page.output
78
+
79
+ setup_localized_data(page, locale)
80
+ end
81
+
82
+ # Get the URL placeholder substitutions for this localized page
83
+ #
84
+ # Overrides Jekyll::Page's url_placeholders to include the locale prefix in path generation.
85
+ # This ensures that Jekyll's permalink logic respects the locale prefix.
86
+ #
87
+ # @return [Hash<Symbol, String>] Hash of placeholders with locale-prefixed path
88
+ def url_placeholders
89
+ placeholders = super
90
+ placeholders[:path] = "/#{@locale}#{placeholders[:path]}" if placeholders[:path]
91
+ placeholders
92
+ end
93
+
94
+ # Get the full URL for this localized page
95
+ #
96
+ # Returns the page's URL with a locale prefix. For example, if the source page URL is
97
+ # '/path/to/page/', this returns '/es/path/to/page/' for locale 'es'.
98
+ # The URL always ends with a trailing slash for consistency.
99
+ #
100
+ # @return [String] The locale-prefixed URL (e.g., '/es/path/to/page/')
101
+ def url
102
+ url = "/#{@locale}#{@original_url}"
103
+ url += "/" unless url.end_with?("/")
104
+ url
105
+ end
106
+
107
+ # Get the destination file path for this localized page
108
+ #
109
+ # Computes the full file system path where this page's HTML will be written.
110
+ # For a page with URL '/es/path/to/page/', this returns 'dest/es/path/to/page/index.html'.
111
+ #
112
+ # @param dest [String, nil] The destination directory (defaults to nil, which uses
113
+ # site config destination)
114
+ # @return [String] The full file system path for the output HTML file
115
+ def destination(dest = nil)
116
+ dest ||= @site.config["destination"]
117
+ url_path = url
118
+ url_path += "/" unless url_path.end_with?("/")
119
+ "#{dest}#{url_path}index.html"
120
+ end
121
+
122
+ # Convert this localized page to Liquid template data
123
+ #
124
+ # Overrides Jekyll::Page's to_liquid to include locale and lang in template context.
125
+ # This makes the page's locale available to Liquid templates for conditional rendering
126
+ # and URL generation.
127
+ #
128
+ # @return [Hash<String, Object>] Hash of page data for Liquid templates, including locale
129
+ def to_liquid
130
+ computed_url = url
131
+ result = super
132
+ result["url"] = computed_url
133
+ result["locale"] = @locale
134
+ result["lang"] = @lang
135
+ result
136
+ end
137
+
138
+ private
139
+
140
+ def setup_localized_data(page, locale)
141
+ @data = page.data.dup
142
+ @data["locale"] = locale
143
+ @data["lang"] = locale
144
+ @data["localized"] = true
145
+ @data["original_url"] = @original_url
146
+ @data["original_permalink"] = page.data["permalink"] if page.data["permalink"]
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../utils/logger_formatter"
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Localized Page Mapper - Indexes generated localized pages by their original URL
8
+ #
9
+ # This module builds a mapping of original URLs to their localized page variants.
10
+ # It scans all pages in the Jekyll site, identifies those marked as localized,
11
+ # and groups them by their original_url for quick lookup during processing.
12
+ #
13
+ # The map structure is: { original_url => [localized_page_1, localized_page_2, ...] }
14
+ # This enables efficient matching of localized variants back to their source pages.
15
+ #
16
+ # @example Usage
17
+ # localized_map = LocalizedPageMapper.build_map(site)
18
+ # # => { "/about/" => [<LocalizedPage locale="es">, <LocalizedPage locale="fr">] }
19
+ #
20
+ module LocalizedPageMapper
21
+ extend self
22
+
23
+ # Build a mapping of original URLs to localized page variants
24
+ #
25
+ # Scans all pages in the Jekyll site, finds those marked with `localized: true`,
26
+ # and groups them by their original_url. Returns a hash with original URLs as keys
27
+ # and arrays of LocalizedPage objects as values.
28
+ #
29
+ # @param site [Jekyll::Site] The Jekyll site object with all generated pages
30
+ # @return [Hash<String, Array<LocalizedPage>>] Map of original URL to localized variants
31
+ # @example
32
+ # pages_map = LocalizedPageMapper.build_map(site)
33
+ # spanish_pages = pages_map["/about/"] # => [<LocalizedPage locale="es">]
34
+ def build_map(site)
35
+ localized_pages = {}
36
+ site.pages.each do |page|
37
+ next unless page.data["localized"] == true
38
+
39
+ original_url = page.data["original_url"]
40
+ LoggerFormatter.debug_if_enabled(
41
+ "LocalizedPageMapper",
42
+ "Found localized page: #{page.url} (original: #{original_url})"
43
+ )
44
+ localized_pages[original_url] ||= []
45
+ localized_pages[original_url] << page
46
+ end
47
+ localized_pages
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module L10n
5
+ # Locates original (non-localized) pages by URL.
6
+ #
7
+ # OriginalPageLocator builds an index of original pages in the site and
8
+ # provides fast lookup by URL. This is used during translation to find the
9
+ # original page configuration when processing localized variants.
10
+ #
11
+ # Key responsibilities:
12
+ # * Index original pages by URL
13
+ # * Exclude localized page variants from index
14
+ # * Provide O(1) lookup by URL
15
+ # * Lazily build index on first use
16
+ #
17
+ # @example
18
+ # locator = OriginalPageLocator.new(site)
19
+ # original_page = locator.find_by_url('/docs/index.html')
20
+ class OriginalPageLocator
21
+ # Initialize a new OriginalPageLocator.
22
+ #
23
+ # @param site [Jekyll::Site] Jekyll site object
24
+ def initialize(site)
25
+ @site = site
26
+ @index = nil
27
+ end
28
+
29
+ # Find an original page by URL.
30
+ #
31
+ # Builds index on first call, then uses cached index for subsequent lookups.
32
+ # Returns nil if page not found.
33
+ #
34
+ # @param url [String] Page URL (e.g., '/docs/index.html')
35
+ # @return [Jekyll::Page, nil] Original page if found, nil otherwise
36
+ def find_by_url(url)
37
+ build_index unless @index
38
+ @index[url]
39
+ end
40
+
41
+ private
42
+
43
+ # Build index of original pages by URL.
44
+ #
45
+ # Indexes all pages that are not marked as localized, allowing fast
46
+ # lookup by URL without iterating through site.pages each time.
47
+ #
48
+ # @return [void]
49
+ def build_index
50
+ @index = {}
51
+ @site.pages.each do |page|
52
+ next if page.data["localized"] == true
53
+
54
+ @index[page.url] = page
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../utils/url_path_builder"
4
+ require_relative "../translation/html_translator"
5
+ require_relative "../utils/file_operations"
6
+ require_relative "../utils/logger_formatter"
7
+
8
+ module Jekyll
9
+ module L10n
10
+ # Writes localized pages with metadata to disk.
11
+ #
12
+ # LocalizedPageWriter translates page output and updates locale metadata
13
+ # (html lang attribute) before writing localized pages to the build output
14
+ # directory. It ensures proper directory structure and updates locale information
15
+ # in the HTML tag.
16
+ #
17
+ # Key responsibilities:
18
+ # * Apply translations to localized page output
19
+ # * Update HTML lang attribute to target locale
20
+ # * Create necessary directory structure
21
+ # * Write localized HTML to disk with UTF-8 encoding
22
+ # * Clean up auto-inserted meta charset tags
23
+ # * Handle parse errors gracefully
24
+ #
25
+ # @example
26
+ # writer = LocalizedPageWriter.new('_site')
27
+ # writer.translate_and_write(page, translator, 'es', '/baseurl')
28
+ # # Localized page written to disk with translations and lang attribute updated
29
+ class LocalizedPageWriter
30
+ # Initialize a new LocalizedPageWriter.
31
+ #
32
+ # @param dest [String] Destination build directory
33
+ def initialize(dest)
34
+ @dest = dest
35
+ end
36
+
37
+ # Translate page content and write to disk.
38
+ #
39
+ # Applies translator, updates HTML lang attribute with locale, ensures
40
+ # output directory exists, and writes translated HTML to file.
41
+ #
42
+ # @param localized_page [Jekyll::Page] Localized page to write
43
+ # @param translator [Object] Translator object with translate method
44
+ # @param locale [String] Target locale code
45
+ # @param _baseurl [String] Base URL (passed for compatibility, not used)
46
+ # @return [void]
47
+ def translate_and_write(localized_page, translator, locale, _baseurl)
48
+ log_debug_info(localized_page, locale, "start")
49
+ translator.translate
50
+
51
+ log_debug_info(localized_page, locale, "after translate")
52
+ localized_page.output = fix_locale_metadata(localized_page.output, locale)
53
+
54
+ log_debug_info(localized_page, locale, "after fix_locale")
55
+ write_localized_page(localized_page)
56
+ end
57
+
58
+ private
59
+
60
+ def log_debug_info(localized_page, locale, phase)
61
+ LoggerFormatter.debug_if_enabled("PageWriter",
62
+ "#{phase}: URL=#{localized_page.url}, locale=#{locale}, " \
63
+ "output_size=#{localized_page.output&.length || 0}")
64
+ end
65
+
66
+ def write_localized_page(localized_page)
67
+ localized_file_path = UrlPathBuilder.url_to_file_path(localized_page.url)
68
+ localized_file = File.join(@dest, localized_file_path)
69
+ FileOperations.ensure_directory(localized_file)
70
+
71
+ LoggerFormatter.debug_if_enabled("PageWriter", "Writing to #{localized_file_path}")
72
+ FileOperations.write_utf8(localized_file, localized_page.output)
73
+ end
74
+
75
+ def fix_locale_metadata(html, locale)
76
+ return html unless html && locale
77
+
78
+ doc = parse_html(html)
79
+ update_html_lang_attribute(doc, locale)
80
+ result = serialize_html(doc)
81
+ cleanup_auto_inserted_meta_tag(result)
82
+ rescue StandardError => e
83
+ Jekyll.logger.error "Localization",
84
+ "Failed to parse HTML for locale #{locale}: #{e.message}"
85
+ html
86
+ end
87
+
88
+ def parse_html(html)
89
+ # CRITICAL: Nokogiri::HTML auto-inserts <meta http-equiv="Content-Type">
90
+ # We parse with HTML to access the <html> tag, then remove the auto-inserted
91
+ # meta tag using regex post-processing.
92
+ # See: spec/regression/nokogiri_meta_tag_spec.rb
93
+ Nokogiri::HTML(html)
94
+ end
95
+
96
+ def update_html_lang_attribute(doc, locale)
97
+ html_tag = doc.at("html")
98
+
99
+ if html_tag
100
+ html_tag["lang"] = locale
101
+ else
102
+ Jekyll.logger.warn("Localization",
103
+ "No <html> tag found for locale #{locale}, skipping lang attribute")
104
+ end
105
+ end
106
+
107
+ def serialize_html(doc)
108
+ doc.to_html
109
+ end
110
+
111
+ def cleanup_auto_inserted_meta_tag(result)
112
+ # Remove the auto-inserted meta tag by libxml2 during HTML serialization
113
+ # Matches: <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
114
+ # See: spec/regression/nokogiri_meta_tag_spec.rb
115
+ pattern = %r!<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n?!
116
+ result.gsub(pattern, "")
117
+ end
118
+ end
119
+ end
120
+ end