jekyll-l10n 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +94 -0
  4. data/lib/jekyll-l10n/constants.rb +136 -0
  5. data/lib/jekyll-l10n/errors.rb +60 -0
  6. data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
  7. data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
  8. data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
  9. data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
  10. data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
  11. data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
  12. data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
  13. data/lib/jekyll-l10n/extraction/logger.rb +48 -0
  14. data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
  15. data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
  16. data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
  17. data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
  18. data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
  19. data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
  20. data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
  21. data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
  22. data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
  23. data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
  24. data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
  25. data/lib/jekyll-l10n/po_file/loader.rb +64 -0
  26. data/lib/jekyll-l10n/po_file/manager.rb +160 -0
  27. data/lib/jekyll-l10n/po_file/merger.rb +80 -0
  28. data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
  29. data/lib/jekyll-l10n/po_file/reader.rb +518 -0
  30. data/lib/jekyll-l10n/po_file/writer.rb +232 -0
  31. data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
  32. data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
  33. data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
  34. data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
  35. data/lib/jekyll-l10n/translation/translator.rb +179 -0
  36. data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
  37. data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
  38. data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
  39. data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
  40. data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
  41. data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
  42. data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
  43. data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
  44. data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
  45. data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
  46. data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
  47. data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
  48. data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
  49. data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
  50. data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
  51. data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
  52. data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
  53. data/lib/jekyll-l10n/version.rb +10 -0
  54. data/lib/jekyll-l10n.rb +268 -0
  55. metadata +200 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b4ab158ff22fa743e5ec910aab7b546e3a56796b2e7c09ed31b4f5acc7e4b123
4
+ data.tar.gz: b6b3f4a8a4bc06892581b9017b8018cc5d6c2c0b44f8599e3015fa123b29470f
5
+ SHA512:
6
+ metadata.gz: 809000529424718152cc2c7d9c918d5cf780c9262388001fab8e62c4efeb6c122dce06dd40312b350d3d0fe1761116ea662d915ee03df37c29e1e5f069551630
7
+ data.tar.gz: 63282d1fad6f225a9e34fb39d56eade8a3fe5965e793993d5aded4a41b43bee55fad61b9d8c36ae3d5cd3d63ef964f771e7196b003d1fa632b3460e0fbba6d85
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Alain Reguera Delgado
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,94 @@
1
+ # jekyll-l10n
2
+
3
+ `jekyll-l10n` is a Jekyll plugin that streamlines the localization of static
4
+ websites using industry-standard GNU Gettext PO files. Creating multilingual
5
+ sites presents unique challenges: maintaining consistent translations across
6
+ multiple pages, managing translation workflows with non-technical translators,
7
+ and keeping translations synchronized as content evolves. This plugin automates
8
+ these challenges by integrating the proven Gettext workflow directly into your
9
+ Jekyll build pipeline.
10
+
11
+ The plugin works by extracting translatable strings from your site's generated
12
+ HTML and organizing them into PO files that translators can edit using standard
13
+ translation tools. As your content changes, the plugin intelligently updates
14
+ these translation files, preserving existing translations while flagging
15
+ changed or new content. When you rebuild your site, translated strings are
16
+ automatically applied to localized versions of your pages, producing fully
17
+ translated HTML output with locale-prefixed URLs.
18
+
19
+ ![Jekyll site localization](docs/assets/img/jekyll-l10n.png)
20
+
21
+ Key features include automatic page duplication with locale-specific URL
22
+ prefixes, flexible fallback modes when translations are incomplete (display
23
+ original English, mark untranslated strings, or leave blank), and support for
24
+ compendium files to share common translations across your site. By leveraging
25
+ the standard Gettext format and workflow, `jekyll-l10n` enables collaboration
26
+ with professional translators and integrates with existing translation
27
+ management systems, while keeping your source content in Markdown where it
28
+ belongs.
29
+
30
+ ## Development Setup
31
+
32
+ To set up your development environment:
33
+
34
+ ```bash
35
+ bundle install # Install Ruby dependencies
36
+ pip install pre-commit # Install pre-commit framework
37
+ pre-commit install # Install pre-commit hooks
38
+ ```
39
+
40
+ This enables pre-commit hooks that automatically check code style and format on each commit.
41
+
42
+ ## Configuration
43
+
44
+ ### Incremental Builds (Performance Optimization)
45
+
46
+ By default, the plugin regenerates all localized pages on every build. For large
47
+ sites with many locales, this can impact build performance. You can enable
48
+ incremental build support to skip regenerating pages that haven't changed:
49
+
50
+ ```yaml
51
+ localization_gettext:
52
+ with_locales_data:
53
+ incremental: true # Enable incremental builds
54
+ ```
55
+
56
+ Or at the top level:
57
+
58
+ ```yaml
59
+ localization_gettext:
60
+ incremental: true
61
+ ```
62
+
63
+ When incremental mode is enabled, pages are only regenerated if:
64
+
65
+ - The source page content has been modified
66
+ - Any PO translation files have been updated
67
+ - The Jekyll configuration has changed
68
+
69
+ This significantly improves build times on subsequent builds when only
70
+ translations or a few pages have changed.
71
+
72
+ ## Machine Translation
73
+
74
+ For teams without translation resources, the plugin integrates with
75
+ LibreTranslate, a free and open-source machine translation service, to
76
+ automatically translate newly extracted strings during the extraction workflow.
77
+ You can enable this optional integration on a per-locale basis, allowing
78
+ LibreTranslate to generate initial translations for compendia files that serve
79
+ as a foundation for professional translators to refine. This significantly
80
+ reduces the initial translation effort and helps bootstrap localization for new
81
+ content, though professional review is still recommended for
82
+ publication-quality results.
83
+
84
+ ## License
85
+
86
+ MIT License - see LICENSE file for details.
87
+
88
+ ## Contributing
89
+
90
+ 1. Fork the repository
91
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
92
+ 3. Commit your changes (`git commit -m 'feat: Add amazing feature'`)
93
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
94
+ 5. Open a Pull Request
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module L10n
5
+ # Plugin Constants - Central definitions for magic values and configuration defaults
6
+ #
7
+ # This module consolidates all constant values used throughout the jekyll-l10n plugin,
8
+ # eliminating magic numbers and string literals from the codebase. Constants are organized
9
+ # into logical groups: text validation, fallback modes, formatting, debugging, and defaults.
10
+ #
11
+ # All constants are frozen to prevent accidental modification.
12
+ #
13
+ # @see PageLocalesConfig for configuration-specific defaults
14
+ module Constants
15
+ # ## Text Validation Constants
16
+
17
+ # Minimum length for translatable text strings (shorter strings not extracted)
18
+ # @return [Integer] Always 3
19
+ MIN_TRANSLATABLE_LENGTH = 3
20
+
21
+ # Regular expression pattern for validating locale codes
22
+ # Matches ISO 639-1 (2 letter language) with optional ISO 3166-1 (2 letter country)
23
+ # Examples: 'en', 'es', 'fr', 'pt_BR', 'zh_CN'
24
+ # @return [Regexp]
25
+ LOCALE_PATTERN = %r!^[a-z]{2}(_[A-Z]{2})?$!.freeze
26
+
27
+ # ## Translation Fallback Modes
28
+
29
+ # Fallback mode: use original English text if translation not found
30
+ # @return [String] "english"
31
+ FALLBACK_MODE_ENGLISH = "english"
32
+
33
+ # Fallback mode: wrap untranslated text with markers (e.g., "[UNTRANSLATED: text]")
34
+ # @return [String] "marker"
35
+ FALLBACK_MODE_MARKER = "marker"
36
+
37
+ # Fallback mode: leave text blank if no translation found
38
+ # @return [String] "empty"
39
+ FALLBACK_MODE_EMPTY = "empty"
40
+
41
+ # ## Translation Markers
42
+
43
+ # Marker used to indicate untranslated strings in marker fallback mode
44
+ # @return [String] "[UNTRANSLATED]"
45
+ UNTRANSLATED_MARKER = "[UNTRANSLATED]"
46
+
47
+ # ## PO File Formatting (GNU Gettext Standard)
48
+
49
+ # Line length threshold below which strings are rendered on a single line
50
+ # @return [Integer] 80
51
+ PO_SHORT_LINE_LENGTH = 80
52
+
53
+ # Character chunk size for long strings (split across multiple lines)
54
+ # @return [Integer] 70
55
+ PO_LINE_LENGTH = 70
56
+
57
+ # ## Debug Logger Text Length Thresholds
58
+
59
+ # Minimum text length to trigger logging (shorter strings are not logged)
60
+ # @return [Integer] 50
61
+ LOG_THRESHOLD_SHORT = 50
62
+
63
+ # Truncate length for key similarity logging (text[0..20])
64
+ # @return [Integer] 20
65
+ LOG_TRUNCATE_SHORT = 20
66
+
67
+ # Truncate length for text node previews (text[0..40])
68
+ # @return [Integer] 40
69
+ LOG_TRUNCATE_MEDIUM = 40
70
+
71
+ # Truncate length for translation log messages (text[0..60])
72
+ # @return [Integer] 60
73
+ LOG_TRUNCATE_LONG = 60
74
+
75
+ # ## Debug Logger Context Extraction
76
+
77
+ # Number of characters to show before/after difference in debug output
78
+ # @return [Integer] 10
79
+ BACKTRACE_CONTEXT_LENGTH = 10
80
+
81
+ # ## Default Configuration Values
82
+
83
+ # Default directory for storing PO translation files
84
+ # @return [String] "_locales"
85
+ DEFAULT_LOCALES_DIR = "_locales"
86
+
87
+ # Default fallback mode when translation is not found
88
+ # @return [String] "english"
89
+ DEFAULT_FALLBACK_MODE = FALLBACK_MODE_ENGLISH
90
+
91
+ # Default HTML attributes to extract from elements (can be overridden per-page)
92
+ # @return [Array<String>] ["title", "alt", "aria-label", "placeholder", "aria-description"]
93
+ DEFAULT_TRANSLATABLE_ATTRIBUTES = %w(title alt aria-label placeholder aria-description).freeze
94
+
95
+ # ## LibreTranslate Integration Defaults
96
+
97
+ # Default timeout (in seconds) for LibreTranslate API requests
98
+ # @return [Integer] 300 seconds (5 minutes)
99
+ DEFAULT_LIBRETRANSLATE_TIMEOUT = 300
100
+
101
+ # Default batch size for LibreTranslate API calls
102
+ # Controls how many strings are sent in a single request
103
+ # @return [Integer] 50
104
+ DEFAULT_LIBRETRANSLATE_BATCH_SIZE = 50
105
+
106
+ # Default number of retry attempts for failed LibreTranslate requests
107
+ # @return [Integer] 3
108
+ DEFAULT_LIBRETRANSLATE_RETRY_ATTEMPTS = 3
109
+
110
+ # Default delay (in seconds) between LibreTranslate retry attempts
111
+ # Exponential backoff is applied: actual_delay = base_delay * (2 ^ (attempt - 1))
112
+ # @return [Integer] 2 seconds
113
+ DEFAULT_LIBRETRANSLATE_RETRY_DELAY = 2
114
+
115
+ # Default behavior when LibreTranslate API returns an error
116
+ # If true, translation stops immediately. If false, continues with remaining entries.
117
+ # @return [Boolean] true
118
+ DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR = true
119
+
120
+ # Default interval for logging LibreTranslate translation progress
121
+ # Progress is logged every N entries. Set to 0 to disable.
122
+ # @return [Integer] 10 (log every 10 entries)
123
+ DEFAULT_LIBRETRANSLATE_PROGRESS_INTERVAL = 10
124
+
125
+ # Default source locale for LibreTranslate API
126
+ # The language of the original content being translated
127
+ # @return [String] "en" (English)
128
+ DEFAULT_LIBRETRANSLATE_SOURCE_LOCALE = "en"
129
+
130
+ # Default text format for LibreTranslate API requests
131
+ # Either 'text' (plain text) or 'html' (preserves markup)
132
+ # @return [String] "html"
133
+ DEFAULT_LIBRETRANSLATE_FORMAT = "html"
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module L10n
5
+ # Custom exception hierarchy for jekyll-l10n
6
+ #
7
+ # Provides specific error types for different aspects of the localization process.
8
+ # All custom exceptions inherit from BaseError, which inherits from StandardError,
9
+ # allowing for both specific exception handling and generic error catching.
10
+ #
11
+ # @example
12
+ # begin
13
+ # # Some localization operation
14
+ # rescue Jekyll::L10n::Errors::ExtractionError => e
15
+ # Jekyll.logger.error "Extraction failed: #{e.message}"
16
+ # end
17
+ module Errors
18
+ # Base error class for all jekyll-l10n exceptions
19
+ #
20
+ # All custom exceptions in the plugin inherit from this class, allowing
21
+ # for both specific exception handling and generic catching of all
22
+ # jekyll-l10n errors.
23
+ class BaseError < StandardError; end
24
+
25
+ # Error raised during HTML string extraction
26
+ #
27
+ # Indicates a failure during the extraction of translatable strings
28
+ # from HTML documents. Common causes include invalid HTML structure,
29
+ # file I/O errors, or parse failures.
30
+ class ExtractionError < BaseError; end
31
+
32
+ # Error raised during translation application
33
+ #
34
+ # Indicates a failure while applying translations from PO files to HTML.
35
+ # Common causes include missing translation files, malformed HTML,
36
+ # or translation resolver failures.
37
+ class TranslationError < BaseError; end
38
+
39
+ # Error raised during PO file operations
40
+ #
41
+ # Indicates a failure while reading, writing, or parsing PO files.
42
+ # Common causes include invalid PO file format, file permissions issues,
43
+ # or corrupted translation data.
44
+ class PoFileError < BaseError; end
45
+
46
+ # Error raised due to invalid configuration
47
+ #
48
+ # Indicates that configuration values are invalid, missing required settings,
49
+ # or have conflicting values. Raised during configuration validation.
50
+ class ConfigurationError < BaseError; end
51
+
52
+ # Error raised during LibreTranslate API operations
53
+ #
54
+ # Indicates a failure while communicating with the LibreTranslate API.
55
+ # Common causes include network errors, API unavailability, invalid API keys,
56
+ # or rate limiting.
57
+ class LibreTranslateError < BaseError; end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../po_file/manager"
4
+ require_relative "../utils/page_locales_config"
5
+ require_relative "../utils/site_config_accessor"
6
+
7
+ module Jekyll
8
+ module L10n
9
+ # Merges page-specific PO files into compendium files.
10
+ #
11
+ # CompendiumMerger combines all page-specific translations for a locale into
12
+ # a single compendium file. It preserves existing compendium translations,
13
+ # merges in new strings from pages, updates references, and cleans up the
14
+ # locale directory after merging.
15
+ #
16
+ # Key responsibilities:
17
+ # * Load existing compendium translations
18
+ # * Merge page-specific translations into compendium
19
+ # * Preserve existing translations while adding new ones
20
+ # * Update file location references for new entries
21
+ # * Save merged compendium back to file
22
+ # * Clean up locale-specific directory structure
23
+ #
24
+ # @example
25
+ # merger = CompendiumMerger.new(site)
26
+ # merger.merge_compendia(po_manager, config)
27
+ # # Page-specific PO files merged into _locales/{locale}.po, directories cleaned up
28
+ #
29
+ # @see Jekyll::L10n::CompendiumTranslator for automatic translation workflow
30
+ class CompendiumMerger
31
+ # Initialize a new CompendiumMerger.
32
+ #
33
+ # @param site [Jekyll::Site] Jekyll site object
34
+ def initialize(site)
35
+ @site = site
36
+ with_locales_data = SiteConfigAccessor.extract_locales_data(@site)
37
+ @site_config = PageLocalesConfig.new({ "with_locales_data" => with_locales_data })
38
+ end
39
+
40
+ # Merge page-specific PO files into compendia for all locales.
41
+ #
42
+ # For each configured locale, loads existing compendium, merges all
43
+ # page-specific translations, and saves the combined result. Cleans up
44
+ # locale subdirectories after merging.
45
+ #
46
+ # @param po_manager [PoFileManager] Manager for PO file operations
47
+ # @param config [PageLocalesConfig] Localization configuration with locales list
48
+ # @return [void]
49
+ def merge_compendia(po_manager, config)
50
+ config.locales.each do |locale|
51
+ process_locale(locale, po_manager, config)
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ # Process a single locale: merge existing compendium with new page translations
58
+ def process_locale(locale, po_manager, config)
59
+ compendium_path = File.join(@site.source, config.locales_dir, "#{locale}.po")
60
+ existing_compendium = load_existing_compendium(compendium_path)
61
+ merged = po_manager.merge_po_files(locale)
62
+
63
+ combined = build_combined_hash(existing_compendium)
64
+ merge_into_combined(combined, merged)
65
+ combined_entries = format_compendium_entries(combined)
66
+
67
+ po_manager.save_compendium(locale, combined_entries)
68
+ cleanup_locale_directory(locale, config)
69
+ end
70
+
71
+ # Load existing compendium translations or return empty hash if not found
72
+ def load_existing_compendium(compendium_path)
73
+ if File.exist?(compendium_path)
74
+ PoFileReader.parse_with_references(compendium_path)
75
+ else
76
+ {}
77
+ end
78
+ end
79
+
80
+ # Initialize combined hash from existing compendium entries
81
+ def build_combined_hash(existing_compendium)
82
+ combined = {}
83
+ existing_compendium.each do |msgid, data|
84
+ combined[msgid] = normalize_compendium_entry(data)
85
+ end
86
+ combined
87
+ end
88
+
89
+ # Normalize entry format to ensure consistent hash structure with :msgstr and :reference keys
90
+ def normalize_compendium_entry(data)
91
+ if data.is_a?(Hash)
92
+ { :msgstr => data[:msgstr], :reference => data[:reference] }
93
+ else
94
+ { :msgstr => data, :reference => nil }
95
+ end
96
+ end
97
+
98
+ # Merge newly found translations into combined hash, preserving existing translations
99
+ def merge_into_combined(combined, merged)
100
+ merged.each do |msgid, entry|
101
+ if combined[msgid]
102
+ update_entry_reference(combined[msgid], entry)
103
+ else
104
+ combined[msgid] = create_new_entry(entry)
105
+ end
106
+ end
107
+ end
108
+
109
+ # Update reference for existing entry if new reference is available
110
+ def update_entry_reference(combined_entry, entry)
111
+ if combined_entry[:reference].nil? && entry.is_a?(Hash) && entry[:reference]
112
+ combined_entry[:reference] = entry[:reference]
113
+ end
114
+ end
115
+
116
+ # Create new entry for untranslated string with optional reference
117
+ def create_new_entry(entry)
118
+ { :msgstr => "", :reference => entry.is_a?(Hash) ? entry[:reference] : nil }
119
+ end
120
+
121
+ # Convert combined hash to array of entries suitable for PO file writing
122
+ def format_compendium_entries(combined)
123
+ combined.map do |msgid, data|
124
+ entry = { :msgid => msgid, :msgstr => data[:msgstr] }
125
+ entry[:reference] = data[:reference] if data[:reference]
126
+ entry
127
+ end
128
+ end
129
+
130
+ # IMPORTANT: Remove locale-specific directory after compendium merge
131
+ # This cleans up page-specific PO files that have been merged into the compendium
132
+ def cleanup_locale_directory(locale, config)
133
+ locale_dir = File.join(@site.source, config.locales_dir, locale)
134
+ FileUtils.rm_rf(locale_dir) if File.directory?(locale_dir)
135
+ end
136
+
137
+ private :process_locale, :load_existing_compendium, :build_combined_hash,
138
+ :normalize_compendium_entry, :merge_into_combined, :update_entry_reference,
139
+ :create_new_entry, :format_compendium_entries, :cleanup_locale_directory
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../po_file/manager"
4
+ require_relative "../utils/page_locales_config"
5
+ require_relative "../utils/po_entry_converter"
6
+ require_relative "../utils/site_config_accessor"
7
+ require_relative "../utils/logger_formatter"
8
+ require_relative "../translation/libre_translator"
9
+
10
+ module Jekyll
11
+ module L10n
12
+ # Automatically translates compendium PO files using LibreTranslate.
13
+ #
14
+ # CompendiumTranslator loads compendium files for each locale, identifies
15
+ # untranslated entries, sends them to the LibreTranslate API for translation,
16
+ # and saves the translated results back to the PO files. It provides detailed
17
+ # logging of the translation process.
18
+ #
19
+ # Key responsibilities:
20
+ # * Load compendium PO files for each locale
21
+ # * Convert between PO entry formats for API compatibility
22
+ # * Trigger LibreTranslate translation for untranslated entries
23
+ # * Save translated entries back to compendium files
24
+ # * Log translation statistics and progress
25
+ #
26
+ # @example
27
+ # translator = CompendiumTranslator.new(site)
28
+ # translator.translate_compendia(config) if config.libretranslate_enabled?
29
+ # # Compendia updated with LibreTranslate translations
30
+ class CompendiumTranslator
31
+ # Initialize a new CompendiumTranslator.
32
+ #
33
+ # @param site [Jekyll::Site] Jekyll site object
34
+ def initialize(site)
35
+ @site = site
36
+ with_locales_data = SiteConfigAccessor.extract_locales_data(@site)
37
+ @site_config = PageLocalesConfig.new({ "with_locales_data" => with_locales_data })
38
+ end
39
+
40
+ # Translate compendia for all configured locales.
41
+ #
42
+ # Checks if LibreTranslate is enabled. If so, for each configured locale,
43
+ # loads the compendium PO file, identifies untranslated entries, sends them
44
+ # to LibreTranslate API, and saves the updated file.
45
+ #
46
+ # @param config [PageLocalesConfig] Localization configuration
47
+ # @return [void]
48
+ def translate_compendia(config)
49
+ po_manager = PoFileManager.new(@site, config.locales_dir)
50
+ translate_compendia_for_locale(po_manager, config)
51
+ end
52
+
53
+ private
54
+
55
+ def translate_compendia_for_locale(po_manager, config)
56
+ log_compendia_enabled_check(config.libretranslate_enabled?)
57
+ return unless config.libretranslate_enabled?
58
+
59
+ log_translation_start(config)
60
+ translator = LibreTranslator.new(config)
61
+
62
+ config.locales.each do |locale|
63
+ process_single_locale(locale, config, translator, po_manager)
64
+ end
65
+
66
+ log_translation_complete(config)
67
+ end
68
+
69
+ def process_single_locale(locale, config, translator, po_manager)
70
+ compendium_path = File.join(@site.source, config.locales_dir, "#{locale}.po")
71
+ LoggerFormatter.debug_if_enabled("CompendiumTranslator",
72
+ "Processing compendium file: #{compendium_path}")
73
+ return unless File.exist?(compendium_path)
74
+
75
+ entries = PoFileReader.parse_with_references(compendium_path)
76
+ po_entries = PoEntryConverter.hash_to_po_entry_array(entries)
77
+
78
+ log_compendium_stats(locale, po_entries, compendium_path)
79
+ translator.translate_compendium(po_entries, locale)
80
+
81
+ log_translation_complete_for_locale(locale)
82
+ translated_hashes = PoEntryConverter.po_entries_to_array_of_hashes(po_entries)
83
+
84
+ po_manager.save_compendium(locale, translated_hashes)
85
+ log_compendium_saved(locale, compendium_path)
86
+ end
87
+
88
+ def log_compendia_enabled_check(enabled)
89
+ msg = "translate_compendia_for_locale called, libretranslate_enabled:"
90
+ LoggerFormatter.debug_if_enabled("CompendiumTranslator", "#{msg} #{enabled}")
91
+ end
92
+
93
+ def log_compendium_stats(locale, po_entries, compendium_path)
94
+ msg = "Locale #{locale}: Loaded #{po_entries.length} entries from compendium"
95
+ LoggerFormatter.debug_if_enabled("CompendiumTranslator", "#{msg} #{compendium_path}")
96
+
97
+ empty_count = count_empty_entries(po_entries)
98
+ LoggerFormatter.debug_if_enabled(
99
+ "CompendiumTranslator",
100
+ "Locale #{locale}: #{empty_count} empty msgstr entries to translate"
101
+ )
102
+ end
103
+
104
+ def log_translation_complete_for_locale(locale)
105
+ msg = "Locale #{locale}: Translation complete, saving compendium"
106
+ LoggerFormatter.debug_if_enabled("CompendiumTranslator", msg)
107
+ end
108
+
109
+ def log_compendium_saved(locale, compendium_path)
110
+ msg = "Locale #{locale}: Saved compendium to"
111
+ LoggerFormatter.debug_if_enabled("CompendiumTranslator", "#{msg} #{compendium_path}")
112
+ end
113
+
114
+ def count_empty_entries(po_entries)
115
+ po_entries.count do |e|
116
+ (e.msgstr.nil? || e.msgstr.empty?) && !e.msgid.strip.empty?
117
+ end
118
+ end
119
+
120
+ def log_translation_start(config)
121
+ locales = config.locales.join(", ")
122
+ Jekyll.logger.info "Localization",
123
+ "Starting LibreTranslate translation for locales: #{locales}"
124
+ end
125
+
126
+ def log_translation_complete(config)
127
+ locales = config.locales.join(", ")
128
+ Jekyll.logger.info "Localization",
129
+ "LibreTranslate translation complete for locales: #{locales}"
130
+ end
131
+
132
+ private :translate_compendia_for_locale, :process_single_locale,
133
+ :log_compendia_enabled_check, :log_compendium_stats,
134
+ :log_translation_complete_for_locale, :log_compendium_saved,
135
+ :count_empty_entries, :log_translation_start, :log_translation_complete
136
+ end
137
+ end
138
+ end