jekyll-l10n 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +94 -0
- data/lib/jekyll-l10n/constants.rb +136 -0
- data/lib/jekyll-l10n/errors.rb +60 -0
- data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
- data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
- data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
- data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
- data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
- data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
- data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
- data/lib/jekyll-l10n/extraction/logger.rb +48 -0
- data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
- data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
- data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
- data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
- data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
- data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
- data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
- data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
- data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
- data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
- data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
- data/lib/jekyll-l10n/po_file/loader.rb +64 -0
- data/lib/jekyll-l10n/po_file/manager.rb +160 -0
- data/lib/jekyll-l10n/po_file/merger.rb +80 -0
- data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
- data/lib/jekyll-l10n/po_file/reader.rb +518 -0
- data/lib/jekyll-l10n/po_file/writer.rb +232 -0
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
- data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
- data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
- data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
- data/lib/jekyll-l10n/translation/translator.rb +179 -0
- data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
- data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
- data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
- data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
- data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
- data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
- data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
- data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
- data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
- data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
- data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
- data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
- data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
- data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
- data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
- data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
- data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
- data/lib/jekyll-l10n/version.rb +10 -0
- data/lib/jekyll-l10n.rb +268 -0
- metadata +200 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: b4ab158ff22fa743e5ec910aab7b546e3a56796b2e7c09ed31b4f5acc7e4b123
|
|
4
|
+
data.tar.gz: b6b3f4a8a4bc06892581b9017b8018cc5d6c2c0b44f8599e3015fa123b29470f
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 809000529424718152cc2c7d9c918d5cf780c9262388001fab8e62c4efeb6c122dce06dd40312b350d3d0fe1761116ea662d915ee03df37c29e1e5f069551630
|
|
7
|
+
data.tar.gz: 63282d1fad6f225a9e34fb39d56eade8a3fe5965e793993d5aded4a41b43bee55fad61b9d8c36ae3d5cd3d63ef964f771e7196b003d1fa632b3460e0fbba6d85
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Alain Reguera Delgado
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# jekyll-l10n
|
|
2
|
+
|
|
3
|
+
`jekyll-l10n` is a Jekyll plugin that streamlines the localization of static
|
|
4
|
+
websites using industry-standard GNU Gettext PO files. Creating multilingual
|
|
5
|
+
sites presents unique challenges: maintaining consistent translations across
|
|
6
|
+
multiple pages, managing translation workflows with non-technical translators,
|
|
7
|
+
and keeping translations synchronized as content evolves. This plugin automates
|
|
8
|
+
these challenges by integrating the proven Gettext workflow directly into your
|
|
9
|
+
Jekyll build pipeline.
|
|
10
|
+
|
|
11
|
+
The plugin works by extracting translatable strings from your site's generated
|
|
12
|
+
HTML and organizing them into PO files that translators can edit using standard
|
|
13
|
+
translation tools. As your content changes, the plugin intelligently updates
|
|
14
|
+
these translation files, preserving existing translations while flagging
|
|
15
|
+
changed or new content. When you rebuild your site, translated strings are
|
|
16
|
+
automatically applied to localized versions of your pages, producing fully
|
|
17
|
+
translated HTML output with locale-prefixed URLs.
|
|
18
|
+
|
|
19
|
+

|
|
20
|
+
|
|
21
|
+
Key features include automatic page duplication with locale-specific URL
|
|
22
|
+
prefixes, flexible fallback modes when translations are incomplete (display
|
|
23
|
+
original English, mark untranslated strings, or leave blank), and support for
|
|
24
|
+
compendium files to share common translations across your site. By leveraging
|
|
25
|
+
the standard Gettext format and workflow, `jekyll-l10n` enables collaboration
|
|
26
|
+
with professional translators and integrates with existing translation
|
|
27
|
+
management systems, while keeping your source content in Markdown where it
|
|
28
|
+
belongs.
|
|
29
|
+
|
|
30
|
+
## Development Setup
|
|
31
|
+
|
|
32
|
+
To set up your development environment:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
bundle install # Install Ruby dependencies
|
|
36
|
+
pip install pre-commit # Install pre-commit framework
|
|
37
|
+
pre-commit install # Install pre-commit hooks
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
This enables pre-commit hooks that automatically check code style and format on each commit.
|
|
41
|
+
|
|
42
|
+
## Configuration
|
|
43
|
+
|
|
44
|
+
### Incremental Builds (Performance Optimization)
|
|
45
|
+
|
|
46
|
+
By default, the plugin regenerates all localized pages on every build. For large
|
|
47
|
+
sites with many locales, this can impact build performance. You can enable
|
|
48
|
+
incremental build support to skip regenerating pages that haven't changed:
|
|
49
|
+
|
|
50
|
+
```yaml
|
|
51
|
+
localization_gettext:
|
|
52
|
+
with_locales_data:
|
|
53
|
+
incremental: true # Enable incremental builds
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Or at the top level:
|
|
57
|
+
|
|
58
|
+
```yaml
|
|
59
|
+
localization_gettext:
|
|
60
|
+
incremental: true
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
When incremental mode is enabled, pages are only regenerated if:
|
|
64
|
+
|
|
65
|
+
- The source page content has been modified
|
|
66
|
+
- Any PO translation files have been updated
|
|
67
|
+
- The Jekyll configuration has changed
|
|
68
|
+
|
|
69
|
+
This significantly improves build times on subsequent builds when only
|
|
70
|
+
translations or a few pages have changed.
|
|
71
|
+
|
|
72
|
+
## Machine Translation
|
|
73
|
+
|
|
74
|
+
For teams without translation resources, the plugin integrates with
|
|
75
|
+
LibreTranslate, a free and open-source machine translation service, to
|
|
76
|
+
automatically translate newly extracted strings during the extraction workflow.
|
|
77
|
+
You can enable this optional integration on a per-locale basis, allowing
|
|
78
|
+
LibreTranslate to generate initial translations for compendia files that serve
|
|
79
|
+
as a foundation for professional translators to refine. This significantly
|
|
80
|
+
reduces the initial translation effort and helps bootstrap localization for new
|
|
81
|
+
content, though professional review is still recommended for
|
|
82
|
+
publication-quality results.
|
|
83
|
+
|
|
84
|
+
## License
|
|
85
|
+
|
|
86
|
+
MIT License - see LICENSE file for details.
|
|
87
|
+
|
|
88
|
+
## Contributing
|
|
89
|
+
|
|
90
|
+
1. Fork the repository
|
|
91
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
92
|
+
3. Commit your changes (`git commit -m 'feat: Add amazing feature'`)
|
|
93
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
94
|
+
5. Open a Pull Request
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Plugin Constants - Central definitions for magic values and configuration defaults
|
|
6
|
+
#
|
|
7
|
+
# This module consolidates all constant values used throughout the jekyll-l10n plugin,
|
|
8
|
+
# eliminating magic numbers and string literals from the codebase. Constants are organized
|
|
9
|
+
# into logical groups: text validation, fallback modes, formatting, debugging, and defaults.
|
|
10
|
+
#
|
|
11
|
+
# All constants are frozen to prevent accidental modification.
|
|
12
|
+
#
|
|
13
|
+
# @see PageLocalesConfig for configuration-specific defaults
|
|
14
|
+
module Constants
|
|
15
|
+
# ## Text Validation Constants
|
|
16
|
+
|
|
17
|
+
# Minimum length for translatable text strings (shorter strings not extracted)
|
|
18
|
+
# @return [Integer] Always 3
|
|
19
|
+
MIN_TRANSLATABLE_LENGTH = 3
|
|
20
|
+
|
|
21
|
+
# Regular expression pattern for validating locale codes
|
|
22
|
+
# Matches ISO 639-1 (2 letter language) with optional ISO 3166-1 (2 letter country)
|
|
23
|
+
# Examples: 'en', 'es', 'fr', 'pt_BR', 'zh_CN'
|
|
24
|
+
# @return [Regexp]
|
|
25
|
+
LOCALE_PATTERN = %r!^[a-z]{2}(_[A-Z]{2})?$!.freeze
|
|
26
|
+
|
|
27
|
+
# ## Translation Fallback Modes
|
|
28
|
+
|
|
29
|
+
# Fallback mode: use original English text if translation not found
|
|
30
|
+
# @return [String] "english"
|
|
31
|
+
FALLBACK_MODE_ENGLISH = "english"
|
|
32
|
+
|
|
33
|
+
# Fallback mode: wrap untranslated text with markers (e.g., "[UNTRANSLATED: text]")
|
|
34
|
+
# @return [String] "marker"
|
|
35
|
+
FALLBACK_MODE_MARKER = "marker"
|
|
36
|
+
|
|
37
|
+
# Fallback mode: leave text blank if no translation found
|
|
38
|
+
# @return [String] "empty"
|
|
39
|
+
FALLBACK_MODE_EMPTY = "empty"
|
|
40
|
+
|
|
41
|
+
# ## Translation Markers
|
|
42
|
+
|
|
43
|
+
# Marker used to indicate untranslated strings in marker fallback mode
|
|
44
|
+
# @return [String] "[UNTRANSLATED]"
|
|
45
|
+
UNTRANSLATED_MARKER = "[UNTRANSLATED]"
|
|
46
|
+
|
|
47
|
+
# ## PO File Formatting (GNU Gettext Standard)
|
|
48
|
+
|
|
49
|
+
# Line length threshold below which strings are rendered on a single line
|
|
50
|
+
# @return [Integer] 80
|
|
51
|
+
PO_SHORT_LINE_LENGTH = 80
|
|
52
|
+
|
|
53
|
+
# Character chunk size for long strings (split across multiple lines)
|
|
54
|
+
# @return [Integer] 70
|
|
55
|
+
PO_LINE_LENGTH = 70
|
|
56
|
+
|
|
57
|
+
# ## Debug Logger Text Length Thresholds
|
|
58
|
+
|
|
59
|
+
# Minimum text length to trigger logging (shorter strings are not logged)
|
|
60
|
+
# @return [Integer] 50
|
|
61
|
+
LOG_THRESHOLD_SHORT = 50
|
|
62
|
+
|
|
63
|
+
# Truncate length for key similarity logging (text[0..20])
|
|
64
|
+
# @return [Integer] 20
|
|
65
|
+
LOG_TRUNCATE_SHORT = 20
|
|
66
|
+
|
|
67
|
+
# Truncate length for text node previews (text[0..40])
|
|
68
|
+
# @return [Integer] 40
|
|
69
|
+
LOG_TRUNCATE_MEDIUM = 40
|
|
70
|
+
|
|
71
|
+
# Truncate length for translation log messages (text[0..60])
|
|
72
|
+
# @return [Integer] 60
|
|
73
|
+
LOG_TRUNCATE_LONG = 60
|
|
74
|
+
|
|
75
|
+
# ## Debug Logger Context Extraction
|
|
76
|
+
|
|
77
|
+
# Number of characters to show before/after difference in debug output
|
|
78
|
+
# @return [Integer] 10
|
|
79
|
+
BACKTRACE_CONTEXT_LENGTH = 10
|
|
80
|
+
|
|
81
|
+
# ## Default Configuration Values
|
|
82
|
+
|
|
83
|
+
# Default directory for storing PO translation files
|
|
84
|
+
# @return [String] "_locales"
|
|
85
|
+
DEFAULT_LOCALES_DIR = "_locales"
|
|
86
|
+
|
|
87
|
+
# Default fallback mode when translation is not found
|
|
88
|
+
# @return [String] "english"
|
|
89
|
+
DEFAULT_FALLBACK_MODE = FALLBACK_MODE_ENGLISH
|
|
90
|
+
|
|
91
|
+
# Default HTML attributes to extract from elements (can be overridden per-page)
|
|
92
|
+
# @return [Array<String>] ["title", "alt", "aria-label", "placeholder", "aria-description"]
|
|
93
|
+
DEFAULT_TRANSLATABLE_ATTRIBUTES = %w(title alt aria-label placeholder aria-description).freeze
|
|
94
|
+
|
|
95
|
+
# ## LibreTranslate Integration Defaults
|
|
96
|
+
|
|
97
|
+
# Default timeout (in seconds) for LibreTranslate API requests
|
|
98
|
+
# @return [Integer] 300 seconds (5 minutes)
|
|
99
|
+
DEFAULT_LIBRETRANSLATE_TIMEOUT = 300
|
|
100
|
+
|
|
101
|
+
# Default batch size for LibreTranslate API calls
|
|
102
|
+
# Controls how many strings are sent in a single request
|
|
103
|
+
# @return [Integer] 50
|
|
104
|
+
DEFAULT_LIBRETRANSLATE_BATCH_SIZE = 50
|
|
105
|
+
|
|
106
|
+
# Default number of retry attempts for failed LibreTranslate requests
|
|
107
|
+
# @return [Integer] 3
|
|
108
|
+
DEFAULT_LIBRETRANSLATE_RETRY_ATTEMPTS = 3
|
|
109
|
+
|
|
110
|
+
# Default delay (in seconds) between LibreTranslate retry attempts
|
|
111
|
+
# Exponential backoff is applied: actual_delay = base_delay * (2 ^ (attempt - 1))
|
|
112
|
+
# @return [Integer] 2 seconds
|
|
113
|
+
DEFAULT_LIBRETRANSLATE_RETRY_DELAY = 2
|
|
114
|
+
|
|
115
|
+
# Default behavior when LibreTranslate API returns an error
|
|
116
|
+
# If true, translation stops immediately. If false, continues with remaining entries.
|
|
117
|
+
# @return [Boolean] true
|
|
118
|
+
DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR = true
|
|
119
|
+
|
|
120
|
+
# Default interval for logging LibreTranslate translation progress
|
|
121
|
+
# Progress is logged every N entries. Set to 0 to disable.
|
|
122
|
+
# @return [Integer] 10 (log every 10 entries)
|
|
123
|
+
DEFAULT_LIBRETRANSLATE_PROGRESS_INTERVAL = 10
|
|
124
|
+
|
|
125
|
+
# Default source locale for LibreTranslate API
|
|
126
|
+
# The language of the original content being translated
|
|
127
|
+
# @return [String] "en" (English)
|
|
128
|
+
DEFAULT_LIBRETRANSLATE_SOURCE_LOCALE = "en"
|
|
129
|
+
|
|
130
|
+
# Default text format for LibreTranslate API requests
|
|
131
|
+
# Either 'text' (plain text) or 'html' (preserves markup)
|
|
132
|
+
# @return [String] "html"
|
|
133
|
+
DEFAULT_LIBRETRANSLATE_FORMAT = "html"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Custom exception hierarchy for jekyll-l10n
|
|
6
|
+
#
|
|
7
|
+
# Provides specific error types for different aspects of the localization process.
|
|
8
|
+
# All custom exceptions inherit from BaseError, which inherits from StandardError,
|
|
9
|
+
# allowing for both specific exception handling and generic error catching.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# begin
|
|
13
|
+
# # Some localization operation
|
|
14
|
+
# rescue Jekyll::L10n::Errors::ExtractionError => e
|
|
15
|
+
# Jekyll.logger.error "Extraction failed: #{e.message}"
|
|
16
|
+
# end
|
|
17
|
+
module Errors
|
|
18
|
+
# Base error class for all jekyll-l10n exceptions
|
|
19
|
+
#
|
|
20
|
+
# All custom exceptions in the plugin inherit from this class, allowing
|
|
21
|
+
# for both specific exception handling and generic catching of all
|
|
22
|
+
# jekyll-l10n errors.
|
|
23
|
+
class BaseError < StandardError; end
|
|
24
|
+
|
|
25
|
+
# Error raised during HTML string extraction
|
|
26
|
+
#
|
|
27
|
+
# Indicates a failure during the extraction of translatable strings
|
|
28
|
+
# from HTML documents. Common causes include invalid HTML structure,
|
|
29
|
+
# file I/O errors, or parse failures.
|
|
30
|
+
class ExtractionError < BaseError; end
|
|
31
|
+
|
|
32
|
+
# Error raised during translation application
|
|
33
|
+
#
|
|
34
|
+
# Indicates a failure while applying translations from PO files to HTML.
|
|
35
|
+
# Common causes include missing translation files, malformed HTML,
|
|
36
|
+
# or translation resolver failures.
|
|
37
|
+
class TranslationError < BaseError; end
|
|
38
|
+
|
|
39
|
+
# Error raised during PO file operations
|
|
40
|
+
#
|
|
41
|
+
# Indicates a failure while reading, writing, or parsing PO files.
|
|
42
|
+
# Common causes include invalid PO file format, file permissions issues,
|
|
43
|
+
# or corrupted translation data.
|
|
44
|
+
class PoFileError < BaseError; end
|
|
45
|
+
|
|
46
|
+
# Error raised due to invalid configuration
|
|
47
|
+
#
|
|
48
|
+
# Indicates that configuration values are invalid, missing required settings,
|
|
49
|
+
# or have conflicting values. Raised during configuration validation.
|
|
50
|
+
class ConfigurationError < BaseError; end
|
|
51
|
+
|
|
52
|
+
# Error raised during LibreTranslate API operations
|
|
53
|
+
#
|
|
54
|
+
# Indicates a failure while communicating with the LibreTranslate API.
|
|
55
|
+
# Common causes include network errors, API unavailability, invalid API keys,
|
|
56
|
+
# or rate limiting.
|
|
57
|
+
class LibreTranslateError < BaseError; end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../po_file/manager"
|
|
4
|
+
require_relative "../utils/page_locales_config"
|
|
5
|
+
require_relative "../utils/site_config_accessor"
|
|
6
|
+
|
|
7
|
+
module Jekyll
|
|
8
|
+
module L10n
|
|
9
|
+
# Merges page-specific PO files into compendium files.
|
|
10
|
+
#
|
|
11
|
+
# CompendiumMerger combines all page-specific translations for a locale into
|
|
12
|
+
# a single compendium file. It preserves existing compendium translations,
|
|
13
|
+
# merges in new strings from pages, updates references, and cleans up the
|
|
14
|
+
# locale directory after merging.
|
|
15
|
+
#
|
|
16
|
+
# Key responsibilities:
|
|
17
|
+
# * Load existing compendium translations
|
|
18
|
+
# * Merge page-specific translations into compendium
|
|
19
|
+
# * Preserve existing translations while adding new ones
|
|
20
|
+
# * Update file location references for new entries
|
|
21
|
+
# * Save merged compendium back to file
|
|
22
|
+
# * Clean up locale-specific directory structure
|
|
23
|
+
#
|
|
24
|
+
# @example
|
|
25
|
+
# merger = CompendiumMerger.new(site)
|
|
26
|
+
# merger.merge_compendia(po_manager, config)
|
|
27
|
+
# # Page-specific PO files merged into _locales/{locale}.po, directories cleaned up
|
|
28
|
+
#
|
|
29
|
+
# @see Jekyll::L10n::CompendiumTranslator for automatic translation workflow
|
|
30
|
+
class CompendiumMerger
|
|
31
|
+
# Initialize a new CompendiumMerger.
|
|
32
|
+
#
|
|
33
|
+
# @param site [Jekyll::Site] Jekyll site object
|
|
34
|
+
def initialize(site)
|
|
35
|
+
@site = site
|
|
36
|
+
with_locales_data = SiteConfigAccessor.extract_locales_data(@site)
|
|
37
|
+
@site_config = PageLocalesConfig.new({ "with_locales_data" => with_locales_data })
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Merge page-specific PO files into compendia for all locales.
|
|
41
|
+
#
|
|
42
|
+
# For each configured locale, loads existing compendium, merges all
|
|
43
|
+
# page-specific translations, and saves the combined result. Cleans up
|
|
44
|
+
# locale subdirectories after merging.
|
|
45
|
+
#
|
|
46
|
+
# @param po_manager [PoFileManager] Manager for PO file operations
|
|
47
|
+
# @param config [PageLocalesConfig] Localization configuration with locales list
|
|
48
|
+
# @return [void]
|
|
49
|
+
def merge_compendia(po_manager, config)
|
|
50
|
+
config.locales.each do |locale|
|
|
51
|
+
process_locale(locale, po_manager, config)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
# Process a single locale: merge existing compendium with new page translations
|
|
58
|
+
def process_locale(locale, po_manager, config)
|
|
59
|
+
compendium_path = File.join(@site.source, config.locales_dir, "#{locale}.po")
|
|
60
|
+
existing_compendium = load_existing_compendium(compendium_path)
|
|
61
|
+
merged = po_manager.merge_po_files(locale)
|
|
62
|
+
|
|
63
|
+
combined = build_combined_hash(existing_compendium)
|
|
64
|
+
merge_into_combined(combined, merged)
|
|
65
|
+
combined_entries = format_compendium_entries(combined)
|
|
66
|
+
|
|
67
|
+
po_manager.save_compendium(locale, combined_entries)
|
|
68
|
+
cleanup_locale_directory(locale, config)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Load existing compendium translations or return empty hash if not found
|
|
72
|
+
def load_existing_compendium(compendium_path)
|
|
73
|
+
if File.exist?(compendium_path)
|
|
74
|
+
PoFileReader.parse_with_references(compendium_path)
|
|
75
|
+
else
|
|
76
|
+
{}
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Initialize combined hash from existing compendium entries
|
|
81
|
+
def build_combined_hash(existing_compendium)
|
|
82
|
+
combined = {}
|
|
83
|
+
existing_compendium.each do |msgid, data|
|
|
84
|
+
combined[msgid] = normalize_compendium_entry(data)
|
|
85
|
+
end
|
|
86
|
+
combined
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Normalize entry format to ensure consistent hash structure with :msgstr and :reference keys
|
|
90
|
+
def normalize_compendium_entry(data)
|
|
91
|
+
if data.is_a?(Hash)
|
|
92
|
+
{ :msgstr => data[:msgstr], :reference => data[:reference] }
|
|
93
|
+
else
|
|
94
|
+
{ :msgstr => data, :reference => nil }
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Merge newly found translations into combined hash, preserving existing translations
|
|
99
|
+
def merge_into_combined(combined, merged)
|
|
100
|
+
merged.each do |msgid, entry|
|
|
101
|
+
if combined[msgid]
|
|
102
|
+
update_entry_reference(combined[msgid], entry)
|
|
103
|
+
else
|
|
104
|
+
combined[msgid] = create_new_entry(entry)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Update reference for existing entry if new reference is available
|
|
110
|
+
def update_entry_reference(combined_entry, entry)
|
|
111
|
+
if combined_entry[:reference].nil? && entry.is_a?(Hash) && entry[:reference]
|
|
112
|
+
combined_entry[:reference] = entry[:reference]
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Create new entry for untranslated string with optional reference
|
|
117
|
+
def create_new_entry(entry)
|
|
118
|
+
{ :msgstr => "", :reference => entry.is_a?(Hash) ? entry[:reference] : nil }
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Convert combined hash to array of entries suitable for PO file writing
|
|
122
|
+
def format_compendium_entries(combined)
|
|
123
|
+
combined.map do |msgid, data|
|
|
124
|
+
entry = { :msgid => msgid, :msgstr => data[:msgstr] }
|
|
125
|
+
entry[:reference] = data[:reference] if data[:reference]
|
|
126
|
+
entry
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# IMPORTANT: Remove locale-specific directory after compendium merge
|
|
131
|
+
# This cleans up page-specific PO files that have been merged into the compendium
|
|
132
|
+
def cleanup_locale_directory(locale, config)
|
|
133
|
+
locale_dir = File.join(@site.source, config.locales_dir, locale)
|
|
134
|
+
FileUtils.rm_rf(locale_dir) if File.directory?(locale_dir)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
private :process_locale, :load_existing_compendium, :build_combined_hash,
|
|
138
|
+
:normalize_compendium_entry, :merge_into_combined, :update_entry_reference,
|
|
139
|
+
:create_new_entry, :format_compendium_entries, :cleanup_locale_directory
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../po_file/manager"
|
|
4
|
+
require_relative "../utils/page_locales_config"
|
|
5
|
+
require_relative "../utils/po_entry_converter"
|
|
6
|
+
require_relative "../utils/site_config_accessor"
|
|
7
|
+
require_relative "../utils/logger_formatter"
|
|
8
|
+
require_relative "../translation/libre_translator"
|
|
9
|
+
|
|
10
|
+
module Jekyll
|
|
11
|
+
module L10n
|
|
12
|
+
# Automatically translates compendium PO files using LibreTranslate.
|
|
13
|
+
#
|
|
14
|
+
# CompendiumTranslator loads compendium files for each locale, identifies
|
|
15
|
+
# untranslated entries, sends them to the LibreTranslate API for translation,
|
|
16
|
+
# and saves the translated results back to the PO files. It provides detailed
|
|
17
|
+
# logging of the translation process.
|
|
18
|
+
#
|
|
19
|
+
# Key responsibilities:
|
|
20
|
+
# * Load compendium PO files for each locale
|
|
21
|
+
# * Convert between PO entry formats for API compatibility
|
|
22
|
+
# * Trigger LibreTranslate translation for untranslated entries
|
|
23
|
+
# * Save translated entries back to compendium files
|
|
24
|
+
# * Log translation statistics and progress
|
|
25
|
+
#
|
|
26
|
+
# @example
|
|
27
|
+
# translator = CompendiumTranslator.new(site)
|
|
28
|
+
# translator.translate_compendia(config) if config.libretranslate_enabled?
|
|
29
|
+
# # Compendia updated with LibreTranslate translations
|
|
30
|
+
class CompendiumTranslator
|
|
31
|
+
# Initialize a new CompendiumTranslator.
|
|
32
|
+
#
|
|
33
|
+
# @param site [Jekyll::Site] Jekyll site object
|
|
34
|
+
def initialize(site)
|
|
35
|
+
@site = site
|
|
36
|
+
with_locales_data = SiteConfigAccessor.extract_locales_data(@site)
|
|
37
|
+
@site_config = PageLocalesConfig.new({ "with_locales_data" => with_locales_data })
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Translate compendia for all configured locales.
|
|
41
|
+
#
|
|
42
|
+
# Checks if LibreTranslate is enabled. If so, for each configured locale,
|
|
43
|
+
# loads the compendium PO file, identifies untranslated entries, sends them
|
|
44
|
+
# to LibreTranslate API, and saves the updated file.
|
|
45
|
+
#
|
|
46
|
+
# @param config [PageLocalesConfig] Localization configuration
|
|
47
|
+
# @return [void]
|
|
48
|
+
def translate_compendia(config)
|
|
49
|
+
po_manager = PoFileManager.new(@site, config.locales_dir)
|
|
50
|
+
translate_compendia_for_locale(po_manager, config)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def translate_compendia_for_locale(po_manager, config)
|
|
56
|
+
log_compendia_enabled_check(config.libretranslate_enabled?)
|
|
57
|
+
return unless config.libretranslate_enabled?
|
|
58
|
+
|
|
59
|
+
log_translation_start(config)
|
|
60
|
+
translator = LibreTranslator.new(config)
|
|
61
|
+
|
|
62
|
+
config.locales.each do |locale|
|
|
63
|
+
process_single_locale(locale, config, translator, po_manager)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
log_translation_complete(config)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def process_single_locale(locale, config, translator, po_manager)
|
|
70
|
+
compendium_path = File.join(@site.source, config.locales_dir, "#{locale}.po")
|
|
71
|
+
LoggerFormatter.debug_if_enabled("CompendiumTranslator",
|
|
72
|
+
"Processing compendium file: #{compendium_path}")
|
|
73
|
+
return unless File.exist?(compendium_path)
|
|
74
|
+
|
|
75
|
+
entries = PoFileReader.parse_with_references(compendium_path)
|
|
76
|
+
po_entries = PoEntryConverter.hash_to_po_entry_array(entries)
|
|
77
|
+
|
|
78
|
+
log_compendium_stats(locale, po_entries, compendium_path)
|
|
79
|
+
translator.translate_compendium(po_entries, locale)
|
|
80
|
+
|
|
81
|
+
log_translation_complete_for_locale(locale)
|
|
82
|
+
translated_hashes = PoEntryConverter.po_entries_to_array_of_hashes(po_entries)
|
|
83
|
+
|
|
84
|
+
po_manager.save_compendium(locale, translated_hashes)
|
|
85
|
+
log_compendium_saved(locale, compendium_path)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def log_compendia_enabled_check(enabled)
|
|
89
|
+
msg = "translate_compendia_for_locale called, libretranslate_enabled:"
|
|
90
|
+
LoggerFormatter.debug_if_enabled("CompendiumTranslator", "#{msg} #{enabled}")
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def log_compendium_stats(locale, po_entries, compendium_path)
|
|
94
|
+
msg = "Locale #{locale}: Loaded #{po_entries.length} entries from compendium"
|
|
95
|
+
LoggerFormatter.debug_if_enabled("CompendiumTranslator", "#{msg} #{compendium_path}")
|
|
96
|
+
|
|
97
|
+
empty_count = count_empty_entries(po_entries)
|
|
98
|
+
LoggerFormatter.debug_if_enabled(
|
|
99
|
+
"CompendiumTranslator",
|
|
100
|
+
"Locale #{locale}: #{empty_count} empty msgstr entries to translate"
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def log_translation_complete_for_locale(locale)
|
|
105
|
+
msg = "Locale #{locale}: Translation complete, saving compendium"
|
|
106
|
+
LoggerFormatter.debug_if_enabled("CompendiumTranslator", msg)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def log_compendium_saved(locale, compendium_path)
|
|
110
|
+
msg = "Locale #{locale}: Saved compendium to"
|
|
111
|
+
LoggerFormatter.debug_if_enabled("CompendiumTranslator", "#{msg} #{compendium_path}")
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def count_empty_entries(po_entries)
|
|
115
|
+
po_entries.count do |e|
|
|
116
|
+
(e.msgstr.nil? || e.msgstr.empty?) && !e.msgid.strip.empty?
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def log_translation_start(config)
|
|
121
|
+
locales = config.locales.join(", ")
|
|
122
|
+
Jekyll.logger.info "Localization",
|
|
123
|
+
"Starting LibreTranslate translation for locales: #{locales}"
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def log_translation_complete(config)
|
|
127
|
+
locales = config.locales.join(", ")
|
|
128
|
+
Jekyll.logger.info "Localization",
|
|
129
|
+
"LibreTranslate translation complete for locales: #{locales}"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
private :translate_compendia_for_locale, :process_single_locale,
|
|
133
|
+
:log_compendia_enabled_check, :log_compendium_stats,
|
|
134
|
+
:log_translation_complete_for_locale, :log_compendium_saved,
|
|
135
|
+
:count_empty_entries, :log_translation_start, :log_translation_complete
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|