jekyll-l10n 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +94 -0
  4. data/lib/jekyll-l10n/constants.rb +136 -0
  5. data/lib/jekyll-l10n/errors.rb +60 -0
  6. data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
  7. data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
  8. data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
  9. data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
  10. data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
  11. data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
  12. data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
  13. data/lib/jekyll-l10n/extraction/logger.rb +48 -0
  14. data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
  15. data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
  16. data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
  17. data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
  18. data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
  19. data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
  20. data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
  21. data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
  22. data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
  23. data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
  24. data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
  25. data/lib/jekyll-l10n/po_file/loader.rb +64 -0
  26. data/lib/jekyll-l10n/po_file/manager.rb +160 -0
  27. data/lib/jekyll-l10n/po_file/merger.rb +80 -0
  28. data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
  29. data/lib/jekyll-l10n/po_file/reader.rb +518 -0
  30. data/lib/jekyll-l10n/po_file/writer.rb +232 -0
  31. data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
  32. data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
  33. data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
  34. data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
  35. data/lib/jekyll-l10n/translation/translator.rb +179 -0
  36. data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
  37. data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
  38. data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
  39. data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
  40. data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
  41. data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
  42. data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
  43. data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
  44. data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
  45. data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
  46. data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
  47. data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
  48. data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
  49. data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
  50. data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
  51. data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
  52. data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
  53. data/lib/jekyll-l10n/version.rb +10 -0
  54. data/lib/jekyll-l10n.rb +268 -0
  55. metadata +200 -0
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "text_normalizer"
4
+ require_relative "html_elements"
5
+
6
+ module Jekyll
7
+ module L10n
8
+ # Resolves translations for text nodes with fallback to block-level translations.
9
+ #
10
+ # TranslationResolver looks up translations for normalized text, first trying
11
+ # direct text node matches, then falling back to block-level translations when
12
+ # text is part of a larger block element with its own translation. This enables
13
+ # translating entire paragraphs as single units instead of word-by-word.
14
+ #
15
+ # Key responsibilities:
16
+ # * Look up direct translation for text node
17
+ # * Fall back to block-level translation if available
18
+ # * Return appropriate translation or nil if none found
19
+ #
20
+ # @example
21
+ # translation = TranslationResolver.resolve(node, "Hello", translations)
22
+ # # Returns translated text if available, nil otherwise
23
+ class TranslationResolver
24
+ # Resolve a translation for a text node.
25
+ #
26
+ # Attempts direct lookup of the normalized text in translations hash.
27
+ # If not found, checks if the text is part of a block element with
28
+ # a block-level translation and returns that.
29
+ #
30
+ # @param node [Nokogiri::XML::Node] Text node being translated
31
+ # @param text [String] Normalized text content
32
+ # @param translations [Hash] Translation hash mapping text to translations
33
+ # @return [String, nil] Translated text if found, nil otherwise
34
+ def self.resolve(node, text, translations)
35
+ return nil unless node && text && translations
36
+
37
+ direct_translation = translations[text]
38
+ return direct_translation if direct_translation
39
+
40
+ try_block_level_translation(node, text, translations)
41
+ end
42
+
43
+ # Attempt block-level translation for text nodes that are part of larger blocks.
44
+ #
45
+ # Checks if a text node is part of a larger block element (like a paragraph)
46
+ # that has a complete translation. Only returns block translation if the text
47
+ # node alone doesn't have a direct translation but the entire block does.
48
+ #
49
+ # Security consideration: Returns nil if the block contains protected elements
50
+ # (script, style, pre tags) to prevent unsafe translation application.
51
+ #
52
+ # @param node [Nokogiri::XML::Node] Text node being translated
53
+ # @param text [String] Normalized text of the node
54
+ # @param translations [Hash] Translation hash mapping text to translations
55
+ # @return [String, nil] Block-level translation if available, nil otherwise
56
+ #
57
+ # @example
58
+ # # For text "text" in "<p><script>...</script> text</p>"
59
+ # # Returns nil (protected element present, prevents block translation)
60
+ # TranslationResolver.try_block_level_translation(node, "text", translations)
61
+ def self.try_block_level_translation(node, text, translations)
62
+ return nil unless node.parent && content_element?(node.parent)
63
+
64
+ # Don't attempt block-level translation if parent contains protected elements
65
+ # (script, style, pre). These cannot be safely applied at block level.
66
+ return nil if contains_protected_elements?(node.parent)
67
+
68
+ block_text = BlockTextExtractor.extract(node.parent)
69
+ return nil unless block_text && block_text != text
70
+
71
+ translations[block_text]
72
+ end
73
+
74
+ def self.content_element?(node)
75
+ return false unless node
76
+ return false unless node.element?
77
+
78
+ HtmlElements::CONTENT_ELEMENTS.include?(node.name)
79
+ end
80
+
81
+ # Check if an element contains protected child elements that block translations.
82
+ #
83
+ # Protected elements (script, style, pre) cannot have their surrounding text
84
+ # translated at the block level because:
85
+ # * script/style: Security and functionality reasons (executable content)
86
+ # * pre: Multi-line code blocks where translations break formatting
87
+ #
88
+ # This is a shared utility used by both HtmlTranslator and TranslationResolver
89
+ # to ensure consistent protection of sensitive content across the codebase.
90
+ #
91
+ # @param node [Nokogiri::XML::Node] Element to check
92
+ # @return [Boolean] true if node contains protected elements, false otherwise
93
+ #
94
+ # @example
95
+ # doc = Nokogiri::HTML('<p><script>alert("xss")</script> text</p>')
96
+ # para = doc.xpath('//p').first
97
+ # TranslationResolver.contains_protected_elements?(para)
98
+ # # => true
99
+ #
100
+ # @example
101
+ # doc = Nokogiri::HTML('<p><code>inline</code> text</p>')
102
+ # para = doc.xpath('//p').first
103
+ # TranslationResolver.contains_protected_elements?(para)
104
+ # # => false (code is allowed, only script/style/pre are protected)
105
+ def self.contains_protected_elements?(node)
106
+ return false unless node.element?
107
+
108
+ # Block block-level translation for script, style (security/functionality),
109
+ # and pre (multi-line code blocks). These cannot be safely applied at block level.
110
+ protected_elements = %w(script style pre)
111
+ node.children.any? { |child| child.element? && protected_elements.include?(child.name) }
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module L10n
5
+ # Builds file paths from URLs and vice versa.
6
+ #
7
+ # UrlPathBuilder provides conversions between Jekyll URLs and file system
8
+ # paths, handling root paths specially and normalizing separators. It supports
9
+ # both regular URLs and PO page paths used for translation files.
10
+ #
11
+ # Key responsibilities:
12
+ # * Normalize URLs (strip leading/trailing slashes)
13
+ # * Convert URLs to file paths
14
+ # * Convert URLs to PO page paths
15
+ # * Calculate relative paths from absolute paths
16
+ #
17
+ # @example
18
+ # file_path = UrlPathBuilder.url_to_file_path('/docs/page.html')
19
+ # # Returns 'docs/page.html/index.html'
20
+ module UrlPathBuilder
21
+ extend self
22
+
23
+ # Normalize a URL by removing leading and trailing slashes.
24
+ #
25
+ # @param url [String] URL to normalize
26
+ # @return [String] Normalized URL
27
+ def normalize_url(url)
28
+ url.sub(%r!^/!, "").sub(%r!/$!, "")
29
+ end
30
+
31
+ # Convert a URL to a file system path.
32
+ #
33
+ # Converts Jekyll URL to the path it would be written to on disk,
34
+ # using index.html for directory-based URLs.
35
+ #
36
+ # @param url [String] Jekyll URL (e.g., '/docs/page.html')
37
+ # @return [String] File path (e.g., 'docs/page.html/index.html')
38
+ def url_to_file_path(url)
39
+ "#{normalize_url(url)}/index.html"
40
+ end
41
+
42
+ # Convert a URL to a PO page path for translation files.
43
+ #
44
+ # Handles root path specially (converts to 'index'). Used when saving
45
+ # page-specific translation files.
46
+ #
47
+ # @param url [String] Jekyll URL
48
+ # @return [String] PO page path (e.g., 'docs/page.html/index.html')
49
+ def url_to_po_page_path(url)
50
+ path = normalize_url(url)
51
+ path = "index" if path.empty?
52
+ "#{path}/index.html"
53
+ end
54
+
55
+ # Calculate relative path from destination directory.
56
+ #
57
+ # @param file_path [String] Absolute file path
58
+ # @param dest [String] Destination directory to remove from path
59
+ # @return [String] Relative path
60
+ def relative_path(file_path, dest)
61
+ file_path.sub(dest, "").sub(%r!^/!, "")
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Transforms relative URLs in HTML to include locale prefixes.
8
+ #
9
+ # UrlTransformer modifies href attributes in links to prefix them with
10
+ # the target locale (e.g., /docs/page.html becomes /es/docs/page.html).
11
+ # It preserves external links, anchors, mailto, tel links, and links
12
+ # already containing locale prefixes. Skips English locale (default language).
13
+ #
14
+ # Key responsibilities:
15
+ # * Identify relative links to transform
16
+ # * Add locale prefix to href values
17
+ # * Preserve external links and special URLs
18
+ # * Skip already-localized URLs
19
+ # * Handle baseurl paths correctly
20
+ # * Remove auto-inserted meta tags
21
+ #
22
+ # @example
23
+ # html = '<a href="/docs/page.html">Link</a>'
24
+ # transformed = UrlTransformer.transform(html, 'es', '/baseurl')
25
+ # # Returns '<a href="/baseurl/es/docs/page.html">Link</a>'
26
+ class UrlTransformer
27
+ class << self
28
+ # Transform all relative URLs in HTML to include locale prefix.
29
+ #
30
+ # Parses HTML document, identifies relative links, adds locale prefix,
31
+ # removes auto-inserted meta tags, and returns transformed HTML.
32
+ #
33
+ # @param html [String] HTML content with URLs to transform
34
+ # @param locale [String] Target locale code (e.g., 'es', 'fr')
35
+ # @param baseurl [String] Base URL for site (e.g., '/baseurl')
36
+ # @return [String] HTML with locale-prefixed URLs
37
+ def transform(html, locale, baseurl)
38
+ return html if should_skip_transform?(locale)
39
+
40
+ # Use Nokogiri::HTML to properly parse full HTML documents while preserving
41
+ # DOCTYPE, html tag, and document structure. Auto-inserted meta tags are
42
+ # removed via regex post-processing (same approach as HtmlTranslator).
43
+ # See: spec/regression/nokogiri_meta_tag_spec.rb for regression tests
44
+ doc = Nokogiri::HTML(html)
45
+
46
+ transform_document(doc, locale, baseurl)
47
+
48
+ result = doc.to_html
49
+
50
+ # Remove the auto-inserted meta tag by libxml2 during HTML serialization
51
+ # Matches: <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
52
+ pattern = %r!<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n?!
53
+ result.gsub(pattern, "")
54
+ end
55
+
56
+ # Transform URLs in a parsed HTML document.
57
+ #
58
+ # Modifies href attributes of links in the parsed document in place.
59
+ # Useful when document is already parsed to avoid re-parsing.
60
+ #
61
+ # @param doc [Nokogiri::HTML::Document] Parsed HTML document
62
+ # @param locale [String] Target locale code
63
+ # @param baseurl [String] Base URL for site
64
+ # @return [void]
65
+ def transform_document(doc, locale, baseurl)
66
+ return if should_skip_transform?(locale)
67
+
68
+ doc.css("a[href]").each do |link|
69
+ href = link["href"]
70
+ next unless should_transform_href?(href, locale, baseurl)
71
+
72
+ next if language_dropdown_link?(link)
73
+
74
+ link["href"] = add_locale_prefix(href, locale, baseurl)
75
+ end
76
+ end
77
+
78
+ private
79
+
80
+ def should_skip_transform?(locale)
81
+ return true if locale.nil? || locale.empty?
82
+ return true if locale == "en"
83
+
84
+ false
85
+ end
86
+
87
+ def should_transform_href?(href, locale, _baseurl)
88
+ return false if invalid_href?(href)
89
+ return false if external_or_special_link?(href)
90
+ return false if relative_path?(href)
91
+ return false if already_localized?(href, locale)
92
+
93
+ true
94
+ end
95
+
96
+ def invalid_href?(href)
97
+ href.nil? || href.empty?
98
+ end
99
+
100
+ def external_or_special_link?(href)
101
+ href.start_with?("#", "http://", "https://", "mailto:", "tel:")
102
+ end
103
+
104
+ def relative_path?(href)
105
+ href.start_with?(".")
106
+ end
107
+
108
+ def already_localized?(href, locale)
109
+ href.start_with?("/#{locale}/") || locale_prefix?(href)
110
+ end
111
+
112
+ def language_dropdown_link?(link)
113
+ link_classes = link["class"] || ""
114
+ link_classes.split.any? { |c| c == "dropdown-item" }
115
+ end
116
+
117
+ def locale_prefix?(href)
118
+ path_without_leading_slash = href.sub(%r!^/!, "")
119
+ return false if path_without_leading_slash.empty?
120
+
121
+ parts = path_without_leading_slash.split("/")
122
+ return false if parts.empty?
123
+
124
+ first_part = parts.first
125
+ first_part.match?(%r!^[a-z]{2}(?:_[A-Z]{2})?$!)
126
+ end
127
+
128
+ def add_locale_prefix(href, locale, baseurl)
129
+ return "/#{locale}#{href}" unless baseurl && !baseurl.empty?
130
+
131
+ if href.start_with?(baseurl)
132
+ relative_path = href[baseurl.length..]
133
+ "#{baseurl}/#{locale}#{relative_path}"
134
+ else
135
+ "#{baseurl}/#{locale}#{href}"
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "url_path_builder"
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Generates file location references for extracted strings.
8
+ #
9
+ # XPathReferenceGenerator creates location references for extracted
10
+ # translatable strings in the format "file_path:line_number". These references
11
+ # appear as comments in PO files to help translators locate text in the
12
+ # original source. Note: Despite the module name, these are file location
13
+ # references, not XPath expressions.
14
+ #
15
+ # Key responsibilities:
16
+ # * Generate location references from DOM nodes
17
+ # * Include file path and line number
18
+ # * Format references for PO file comments
19
+ #
20
+ # @example
21
+ # ref = XPathReferenceGenerator.generate(node, 'docs/index.html', '_site')
22
+ # # Returns 'docs/index.html:42' (42 is line number in HTML)
23
+ module XPathReferenceGenerator
24
+ extend self
25
+
26
+ # Generate a reference for an extracted string.
27
+ #
28
+ # Creates a reference in the format "file_path:line_number" for use as
29
+ # a PO file comment marking where a string was extracted from.
30
+ #
31
+ # @param node [Nokogiri::XML::Node] DOM node where string was found
32
+ # @param file_path [String] Path to HTML file being processed
33
+ # @param dest [String] Destination directory (stripped to get relative path)
34
+ # @param _attr_name [String, nil] Optional attribute name (defaults to nil; maintained
35
+ # for API compatibility)
36
+ # @return [String] Reference in format "relative_path:line_number"
37
+ def generate(node, file_path, dest, _attr_name = nil)
38
+ relative_path = UrlPathBuilder.relative_path(file_path, dest)
39
+ line_number = node.line.to_s
40
+
41
+ "#{relative_path}:#{line_number}"
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module L10n
5
+ # The version of the jekyll-l10n gem.
6
+ #
7
+ # Updated with each release to reflect the current version.
8
+ VERSION = "1.0.0"
9
+ end
10
+ end
@@ -0,0 +1,268 @@
1
+ # frozen_string_literal: true
2
+
3
+ # # jekyll-l10n: Complete Site Localization for Jekyll
4
+ #
5
+ # A comprehensive localization plugin for Jekyll that provides GNU Gettext-based translation
6
+ # management. The plugin automatically creates locale-prefixed page variants and applies
7
+ # translations using PO (Portable Object) files, the standard format for open-source translations.
8
+ #
9
+ # ## Core Features
10
+ #
11
+ # - **Automatic Page Duplication**: Creates locale-specific page variants with proper URL prefixes
12
+ # - **HTML String Extraction**: Automatically finds translatable strings and attributes in
13
+ # generated HTML
14
+ # - **PO File Management**: Reads, writes, and merges GNU Gettext PO files with caching
15
+ # - **Flexible Translation Application**: Applies translations with configurable fallback modes
16
+ # - **LibreTranslate Integration**: Automatic translation via LibreTranslate API (optional)
17
+ # - **Compendium Support**: Shared translation files across multiple pages
18
+ # - **Incremental Builds**: Optimizes performance by skipping unchanged content
19
+ # - **Liquid Filters**: Template helpers for locale-aware URL generation
20
+ #
21
+ # ## Architecture Overview
22
+ #
23
+ # The plugin consists of five major modules, each handling a specific aspect of localization:
24
+ #
25
+ # ### 1. Jekyll Integration (`Jekyll::L10n::Jekyll`)
26
+ # Integrates localization into the Jekyll build pipeline through hooks and generators.
27
+ # - **Generator**: Creates localized page variants during generation phase
28
+ # - **LocalizedPage**: Represents a page in a specific locale
29
+ # - **PostWriteProcessor**: Extracts strings and reprocesses translations after build
30
+ # - **UrlFilter**: Liquid filters for locale-aware URL generation
31
+ #
32
+ # ### 2. HTML Extraction (`Jekyll::L10n::Extraction`)
33
+ # Extracts translatable content from generated HTML and maintains PO files.
34
+ # - **Extractor**: Main orchestrator for extraction workflow
35
+ # - **HtmlStringExtractor**: Finds translatable strings and attributes
36
+ # - **DomTextExtractor**: Extracts text nodes with file location references
37
+ # - **DomAttributeExtractor**: Extracts configurable HTML attributes
38
+ # - **ResultSaver**: Writes extracted strings to PO files
39
+ #
40
+ # ### 3. HTML Translation (`Jekyll::L10n::Translation`)
41
+ # Applies translations from PO files to HTML documents.
42
+ # - **Translator**: Main orchestrator for translation workflow
43
+ # - **HtmlTranslator**: Applies translations to HTML DOM nodes
44
+ # - **PageTranslationLoader**: Loads translations with compendium support
45
+ # - **BlockTextExtractor**: Context-aware text extraction
46
+ # - **LibreTranslator**: LibreTranslate API integration
47
+ #
48
+ # ### 4. PO File Management (`Jekyll::L10n::PoFile`)
49
+ # Manages GNU Gettext PO file operations with caching.
50
+ # - **Manager**: Orchestrates PO file operations and caching
51
+ # - **Reader**: Parses PO files into Ruby objects
52
+ # - **Writer**: Serializes PO entries back to file format
53
+ # - **Merger**: Merges translations from compendium files
54
+ # - **Loader**: Loads and validates PO files
55
+ #
56
+ # ### 5. Utilities (`Jekyll::L10n`)
57
+ # Common utilities supporting all other modules.
58
+ # - **TextNormalizer**: Normalizes whitespace for consistent matching
59
+ # - **TextValidator**: Validates extractable text
60
+ # - **TranslationResolver**: Selects best translation with fallback
61
+ # - **HtmlParser**: DOM parsing with safety guards
62
+ # - **PathBuilder**: Constructs locale-specific file paths
63
+ #
64
+ # ## Configuration
65
+ #
66
+ # Configuration can be specified in Jekyll `_config.yml` or in individual page front matter:
67
+ #
68
+ # ```yaml
69
+ # plugins:
70
+ # - jekyll-l10n
71
+ #
72
+ # localization_gettext:
73
+ # with_locales_data:
74
+ # locales: [es, fr, pt, de]
75
+ # extract_on_build: true
76
+ # translation:
77
+ # fallback: english
78
+ # libretranslate_enabled: true
79
+ # libretranslate_api_url: "https://api.libretranslate.de"
80
+ # logging:
81
+ # debug: false
82
+ # ```
83
+ #
84
+ # Page-level configuration overrides site-level configuration:
85
+ #
86
+ # ```markdown
87
+ # ---
88
+ # with_locales: true
89
+ # with_locales_data:
90
+ # locales: [es, fr]
91
+ # ---
92
+ # ```
93
+ #
94
+ # ## Usage Examples
95
+ #
96
+ # ### Enable localization for a page
97
+ #
98
+ # ```markdown
99
+ # ---
100
+ # title: About Us
101
+ # with_locales: true
102
+ # with_locales_data:
103
+ # locales: [es, fr, pt_BR]
104
+ # ---
105
+ # Content here...
106
+ # ```
107
+ #
108
+ # ### Use locale-aware URLs in templates
109
+ #
110
+ # ```liquid
111
+ # <!-- Link with current locale -->
112
+ # <a href="{{ '/about/' | locale_url }}">About</a>
113
+ #
114
+ # <!-- Switch to different locale -->
115
+ # <a href="{{ page.url | switch_locale_url: 'es' }}">Español</a>
116
+ #
117
+ # <!-- Build language switcher -->
118
+ # {% for locale in page.with_locales_data.locales %}
119
+ # <a href="{{ page.url | switch_locale_url: locale }}">
120
+ # {{ locale | upcase }}
121
+ # </a>
122
+ # {% endfor %}
123
+ # ```
124
+ #
125
+ # ## Build Pipeline
126
+ #
127
+ # 1. **Generate Phase**: Generator creates LocalizedPage instances for each locale
128
+ # 2. **Render Phase**: Jekyll renders all pages (including localized variants)
129
+ # 3. **Post-Render Phase**: Translator hook applies translations to localized pages
130
+ # 4. **Write Phase**: Jekyll writes HTML to disk
131
+ # 5. **Post-Write Phase**: PostWriteProcessor extracts new strings and updates translations
132
+ #
133
+ # ## Translation Workflow Timing
134
+ #
135
+ # The plugin applies translations in two phases:
136
+ #
137
+ # 1. **Initial Build (post_render)**: Localized pages are translated during Jekyll's
138
+ # render phase using existing PO files
139
+ # 2. **Post-Extraction (post_write)**: When new strings are extracted and PO files
140
+ # updated, localized HTML is immediately reprocessed with new translations
141
+ #
142
+ # This dual-phase approach ensures both incremental builds (using cached translations)
143
+ # and immediate translation updates (when new strings are found).
144
+ #
145
+ # ## File Locations
146
+ #
147
+ # - **Source pages**: `src/**/*.md` (marked with `with_locales: true`)
148
+ # - **Page-specific PO files**: `_locales/{locale}/{page_path}.po`
149
+ # - **Compendium PO files**: `_locales/{locale}.po` (shared translations for locale)
150
+ # - **Output**: `_site/[locale]/path/to/page/index.html`
151
+ #
152
+ # ## Terminology
153
+ #
154
+ # - **Source pages**: Pages marked with `with_locales: true` in front matter
155
+ # - **Localized pages**: Generated LocalizedPage instances (marked with `localized: true`)
156
+ # - **Compendium**: Locale-level shared translations at `_locales/{locale}.po`
157
+ # - **Page-specific PO files**: Per-page translations at `_locales/{locale}/{page_path}.po`
158
+ # - **String extraction**: Finding translatable text content and HTML attribute values
159
+ #
160
+ # @see Jekyll::L10n::Generator for page generation
161
+ # @see Jekyll::L10n::Translator for translation application
162
+ # @see Jekyll::L10n::Extractor for string extraction
163
+ # @see Jekyll::L10n::UrlFilter for liquid filters
164
+
165
+ require_relative "jekyll-l10n/version"
166
+ require_relative "jekyll-l10n/constants"
167
+ require_relative "jekyll-l10n/errors"
168
+ require_relative "jekyll-l10n/po_file/reader"
169
+ require_relative "jekyll-l10n/po_file/writer"
170
+ require_relative "jekyll-l10n/po_file/loader"
171
+ require_relative "jekyll-l10n/po_file/merger"
172
+ require_relative "jekyll-l10n/po_file/path_builder"
173
+ require_relative "jekyll-l10n/po_file/manager"
174
+ require_relative "jekyll-l10n/extraction/dom_text_extractor"
175
+ require_relative "jekyll-l10n/extraction/dom_attribute_extractor"
176
+ require_relative "jekyll-l10n/extraction/html_string_extractor"
177
+ require_relative "jekyll-l10n/extraction/config_loader"
178
+ require_relative "jekyll-l10n/extraction/result_saver"
179
+ require_relative "jekyll-l10n/extraction/compendium_translator"
180
+ require_relative "jekyll-l10n/extraction/compendium_merger"
181
+ require_relative "jekyll-l10n/extraction/logger"
182
+ require_relative "jekyll-l10n/extraction/extractor"
183
+ require_relative "jekyll-l10n/translation/html_translator"
184
+ require_relative "jekyll-l10n/translation/page_translation_loader"
185
+ require_relative "jekyll-l10n/translation/translator"
186
+ require_relative "jekyll-l10n/translation/block_text_extractor"
187
+ require_relative "jekyll-l10n/jekyll/localized_page"
188
+ require_relative "jekyll-l10n/jekyll/regeneration_checker"
189
+ require_relative "jekyll-l10n/jekyll/generator"
190
+ require_relative "jekyll-l10n/jekyll/post_write_processor"
191
+ require_relative "jekyll-l10n/jekyll/post_write_html_reprocessor"
192
+ require_relative "jekyll-l10n/jekyll/file_sync"
193
+ require_relative "jekyll-l10n/jekyll/page_locator"
194
+ require_relative "jekyll-l10n/jekyll/page_writer"
195
+ require_relative "jekyll-l10n/jekyll/url_filter"
196
+ require_relative "jekyll-l10n/utils/text_normalizer"
197
+ require_relative "jekyll-l10n/utils/translation_resolver"
198
+ require_relative "jekyll-l10n/utils/html_elements"
199
+ require_relative "jekyll-l10n/utils/html_parser"
200
+ require_relative "jekyll-l10n/utils/html_text_utils"
201
+ require_relative "jekyll-l10n/utils/debug_logger"
202
+ require_relative "jekyll-l10n/utils/url_path_builder"
203
+ require_relative "jekyll-l10n/utils/xpath_reference_generator"
204
+ require_relative "jekyll-l10n/utils/page_locales_config"
205
+ require_relative "jekyll-l10n/utils/text_validator"
206
+ require_relative "jekyll-l10n/utils/url_transformer"
207
+ require_relative "jekyll-l10n/utils/site_config_accessor"
208
+ require_relative "jekyll-l10n/utils/error_handler"
209
+ require_relative "jekyll-l10n/utils/logger_formatter"
210
+ require_relative "jekyll-l10n/utils/external_link_icon_preserver"
211
+
212
+ module Jekyll
213
+ # Main plugin namespace
214
+ module L10n
215
+ # Complete Site Localization for Jekyll
216
+ #
217
+ # Main plugin module providing GNU Gettext-based translation management
218
+ # with automatic page duplication, HTML string extraction, and translation
219
+ # application. This module serves as the entry point for the plugin and
220
+ # coordinates all localization functionality.
221
+ #
222
+ # Key public APIs:
223
+ # - {Jekyll::L10n::Generator} - Creates locale-prefixed page variants
224
+ # - {Jekyll::L10n::Translator} - Applies translations to localized pages
225
+ # - {Jekyll::L10n::Extractor} - Extracts translatable strings from HTML
226
+ # - {Jekyll::L10n::UrlFilter} - Liquid filters for locale-aware URLs
227
+ #
228
+ # @see lib/jekyll-l10n.rb for complete architecture overview and build pipeline
229
+
230
+ # Include Constants module to provide centralized constant definitions
231
+ include Constants
232
+
233
+ # Module-level convenience constants that reference the Constants module
234
+ MIN_TRANSLATABLE_LENGTH = Constants::MIN_TRANSLATABLE_LENGTH
235
+ DEFAULT_LOCALES_DIR = Constants::DEFAULT_LOCALES_DIR
236
+ DEFAULT_FALLBACK_MODE = Constants::DEFAULT_FALLBACK_MODE
237
+ DEFAULT_TRANSLATABLE_ATTRIBUTES = Constants::DEFAULT_TRANSLATABLE_ATTRIBUTES
238
+
239
+ # Transform URLs in HTML to include locale prefix.
240
+ #
241
+ # Walks the HTML document and transforms relative URLs to be locale-aware
242
+ # by adding locale prefixes. Useful for translating internal links in
243
+ # localized pages.
244
+ #
245
+ # @param html [String] HTML content to transform
246
+ # @param locale [String] Locale code for URL prefix (e.g., 'es', 'fr')
247
+ # @param baseurl [String] Base URL for relative URL transformation
248
+ # @return [String] HTML with transformed URLs
249
+ # @example
250
+ # html_with_locale_urls = Jekyll::L10n.transform(html, 'es', '/mysite')
251
+ def self.transform(html, locale, baseurl)
252
+ UrlTransformer.transform(html, locale, baseurl)
253
+ end
254
+ end
255
+ end
256
+
257
+ Jekyll::Hooks.register :pages, :post_render do |page|
258
+ next unless page.data["localized"] == true
259
+
260
+ translator = Jekyll::L10n::Translator.new(page)
261
+ translator.translate
262
+ end
263
+
264
+ Jekyll::Hooks.register :site, :post_write do |site|
265
+ Jekyll::L10n::PostWriteProcessor.new(site).process_localizations
266
+ end
267
+
268
+ Liquid::Template.register_filter(Jekyll::L10n::UrlFilter)