jekyll-l10n 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +94 -0
- data/lib/jekyll-l10n/constants.rb +136 -0
- data/lib/jekyll-l10n/errors.rb +60 -0
- data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
- data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
- data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
- data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
- data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
- data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
- data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
- data/lib/jekyll-l10n/extraction/logger.rb +48 -0
- data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
- data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
- data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
- data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
- data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
- data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
- data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
- data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
- data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
- data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
- data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
- data/lib/jekyll-l10n/po_file/loader.rb +64 -0
- data/lib/jekyll-l10n/po_file/manager.rb +160 -0
- data/lib/jekyll-l10n/po_file/merger.rb +80 -0
- data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
- data/lib/jekyll-l10n/po_file/reader.rb +518 -0
- data/lib/jekyll-l10n/po_file/writer.rb +232 -0
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
- data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
- data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
- data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
- data/lib/jekyll-l10n/translation/translator.rb +179 -0
- data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
- data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
- data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
- data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
- data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
- data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
- data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
- data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
- data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
- data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
- data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
- data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
- data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
- data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
- data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
- data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
- data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
- data/lib/jekyll-l10n/version.rb +10 -0
- data/lib/jekyll-l10n.rb +268 -0
- metadata +200 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../constants"
|
|
4
|
+
|
|
5
|
+
module Jekyll
|
|
6
|
+
module L10n
|
|
7
|
+
# Configuration Parser - Extracts and validates localization settings from page front matter
|
|
8
|
+
#
|
|
9
|
+
# PageLocalesConfig parses the `with_locales_data` front matter field in Jekyll pages and
|
|
10
|
+
# provides a type-safe interface to localization configuration. It validates all values
|
|
11
|
+
# against expected types and ranges, raising clear errors for invalid configurations.
|
|
12
|
+
#
|
|
13
|
+
# Configuration can be specified in page front matter at multiple levels:
|
|
14
|
+
# - Translation settings (fallback modes, LibreTranslate API)
|
|
15
|
+
# - Extraction settings (which attributes to extract, directories)
|
|
16
|
+
# - Logging settings (debug output, statistics)
|
|
17
|
+
#
|
|
18
|
+
# Key responsibilities:
|
|
19
|
+
# - Parse `with_locales_data` from page front matter
|
|
20
|
+
# - Validate locale codes against ISO 639-1/2 format
|
|
21
|
+
# - Validate LibreTranslate configuration when enabled
|
|
22
|
+
# - Provide getter methods with sensible defaults
|
|
23
|
+
# - Raise detailed validation errors for invalid configurations
|
|
24
|
+
#
|
|
25
|
+
# @example Minimal configuration
|
|
26
|
+
# ---
|
|
27
|
+
# with_locales: true
|
|
28
|
+
# with_locales_data:
|
|
29
|
+
# locales: [es, fr, pt]
|
|
30
|
+
# ---
|
|
31
|
+
#
|
|
32
|
+
# @example Full configuration with LibreTranslate
|
|
33
|
+
# ---
|
|
34
|
+
# with_locales: true
|
|
35
|
+
# with_locales_data:
|
|
36
|
+
# locales: [es, fr, pt_BR]
|
|
37
|
+
# extract_on_build: true
|
|
38
|
+
# update_compendium: true
|
|
39
|
+
# extraction:
|
|
40
|
+
# translatable_attributes: [title, alt, aria-label]
|
|
41
|
+
# translation:
|
|
42
|
+
# fallback: english
|
|
43
|
+
# libretranslate_enabled: true
|
|
44
|
+
# libretranslate_api_url: "http://localhost:5000/translate"
|
|
45
|
+
# libretranslate_timeout: 300
|
|
46
|
+
# logging:
|
|
47
|
+
# debug: true
|
|
48
|
+
# ---
|
|
49
|
+
#
|
|
50
|
+
class PageLocalesConfig
|
|
51
|
+
# Delegate all constant definitions to Constants module
|
|
52
|
+
LOCALE_PATTERN = Constants::LOCALE_PATTERN
|
|
53
|
+
DEFAULT_LOCALES_DIR = Constants::DEFAULT_LOCALES_DIR
|
|
54
|
+
DEFAULT_FALLBACK_MODE = Constants::DEFAULT_FALLBACK_MODE
|
|
55
|
+
DEFAULT_TRANSLATABLE_ATTRIBUTES = Constants::DEFAULT_TRANSLATABLE_ATTRIBUTES
|
|
56
|
+
DEFAULT_LIBRETRANSLATE_TIMEOUT = Constants::DEFAULT_LIBRETRANSLATE_TIMEOUT
|
|
57
|
+
DEFAULT_LIBRETRANSLATE_BATCH_SIZE = Constants::DEFAULT_LIBRETRANSLATE_BATCH_SIZE
|
|
58
|
+
DEFAULT_LIBRETRANSLATE_RETRY_ATTEMPTS = Constants::DEFAULT_LIBRETRANSLATE_RETRY_ATTEMPTS
|
|
59
|
+
DEFAULT_LIBRETRANSLATE_RETRY_DELAY = Constants::DEFAULT_LIBRETRANSLATE_RETRY_DELAY
|
|
60
|
+
DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR = Constants::DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR
|
|
61
|
+
DEFAULT_LIBRETRANSLATE_PROGRESS_INTERVAL = Constants::DEFAULT_LIBRETRANSLATE_PROGRESS_INTERVAL
|
|
62
|
+
DEFAULT_LIBRETRANSLATE_SOURCE_LOCALE = Constants::DEFAULT_LIBRETRANSLATE_SOURCE_LOCALE
|
|
63
|
+
DEFAULT_LIBRETRANSLATE_FORMAT = Constants::DEFAULT_LIBRETRANSLATE_FORMAT
|
|
64
|
+
|
|
65
|
+
# @!attribute [r] data
|
|
66
|
+
# The raw page data object this config was parsed from
|
|
67
|
+
# @return [Hash]
|
|
68
|
+
attr_reader :data
|
|
69
|
+
|
|
70
|
+
# Initialize configuration from page front matter
|
|
71
|
+
#
|
|
72
|
+
# Parses the `with_locales_data` section from page front matter and validates
|
|
73
|
+
# all configuration values. Raises detailed errors if any values are invalid.
|
|
74
|
+
#
|
|
75
|
+
# @param page_data [Hash] The Jekyll page data/front matter object
|
|
76
|
+
# @raise [Jekyll::Errors::InvalidConfigurationError] If locale codes are invalid
|
|
77
|
+
# @raise [Jekyll::Errors::InvalidConfigurationError] If LibreTranslate config is invalid
|
|
78
|
+
def initialize(page_data)
|
|
79
|
+
@config = page_data["with_locales_data"] || {}
|
|
80
|
+
@data = page_data
|
|
81
|
+
@page_path = page_data["path"] || "unknown"
|
|
82
|
+
|
|
83
|
+
validate_locales!
|
|
84
|
+
validate_libretranslate!
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Get the list of locales configured for this page
|
|
88
|
+
#
|
|
89
|
+
# Returns the locales specified in `with_locales_data.locales`, or an empty array
|
|
90
|
+
# if not configured. All returned locales are guaranteed to match ISO 639-1/2 format.
|
|
91
|
+
#
|
|
92
|
+
# @return [Array<String>] BCP 47 locale codes (e.g., ['es', 'fr', 'pt_BR'])
|
|
93
|
+
def locales
|
|
94
|
+
@config["locales"] || []
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Get the directory where PO files are stored for this page
|
|
98
|
+
#
|
|
99
|
+
# Returns the directory specified in `with_locales_data.locales_dir`,
|
|
100
|
+
# or the default "_locales" if not configured.
|
|
101
|
+
#
|
|
102
|
+
# @return [String] The directory path relative to site root
|
|
103
|
+
def locales_dir
|
|
104
|
+
@config["locales_dir"] || DEFAULT_LOCALES_DIR
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Check if string extraction should run during Jekyll build
|
|
108
|
+
#
|
|
109
|
+
# @return [Boolean] true if extraction is enabled (default), false if explicitly disabled
|
|
110
|
+
def extract_on_build?
|
|
111
|
+
@config["extract_on_build"] != false
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Check if compendium files should be updated during extraction
|
|
115
|
+
#
|
|
116
|
+
# @return [Boolean] true if compendium updates are enabled (default), false if
|
|
117
|
+
# explicitly disabled
|
|
118
|
+
def update_compendium?
|
|
119
|
+
@config["update_compendium"] != false
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Check if extraction statistics should be shown in logs
|
|
123
|
+
#
|
|
124
|
+
# @return [Boolean] true if statistics are enabled (default), false if explicitly disabled
|
|
125
|
+
def show_statistics?
|
|
126
|
+
@config.dig("logging", "show_statistics") != false
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Check if debug-level logging is enabled
|
|
130
|
+
#
|
|
131
|
+
# @return [Boolean] true if debug logging is configured, false otherwise
|
|
132
|
+
def debug_logging?
|
|
133
|
+
@config.dig("logging", "debug") == true
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Check if trace-level logging is enabled
|
|
137
|
+
#
|
|
138
|
+
# Trace logging includes detailed per-entry logs for extraction and translation operations.
|
|
139
|
+
# This is automatically enabled if debug_logging? is true.
|
|
140
|
+
#
|
|
141
|
+
# @return [Boolean] true if trace logging is enabled or debug logging is enabled
|
|
142
|
+
def trace_logging?
|
|
143
|
+
@config.dig("logging", "trace") == true || debug_logging?
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Get the fallback mode for missing translations
|
|
147
|
+
#
|
|
148
|
+
# Determines how to handle translations that are not found in PO files.
|
|
149
|
+
# Valid modes: "english" (use original text), "marker" (wrap with markers),
|
|
150
|
+
# "empty" (leave blank).
|
|
151
|
+
#
|
|
152
|
+
# @return [String] The fallback mode ("english", "marker", or "empty")
|
|
153
|
+
def fallback_mode
|
|
154
|
+
@config.dig("translation", "fallback") || DEFAULT_FALLBACK_MODE
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Get the list of HTML attributes to extract for translation
|
|
158
|
+
#
|
|
159
|
+
# Returns attributes specified in `with_locales_data.extraction.translatable_attributes`,
|
|
160
|
+
# or the default list if not configured (title, alt, aria-label, placeholder,
|
|
161
|
+
# aria-description).
|
|
162
|
+
#
|
|
163
|
+
# @return [Array<String>] List of attribute names to extract
|
|
164
|
+
def translatable_attributes
|
|
165
|
+
@config.dig("extraction", "translatable_attributes") || DEFAULT_TRANSLATABLE_ATTRIBUTES
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Check if localization is enabled for this page
|
|
169
|
+
#
|
|
170
|
+
# A page is considered to have localization enabled if at least one locale is configured.
|
|
171
|
+
#
|
|
172
|
+
# @return [Boolean] true if locales list is not empty, false otherwise
|
|
173
|
+
def enabled?
|
|
174
|
+
!locales.empty?
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Check if LibreTranslate automatic translation is enabled
|
|
178
|
+
#
|
|
179
|
+
# Returns true if `libretranslate_enabled` is explicitly set to true,
|
|
180
|
+
# or if a `libretranslate_api_url` is configured (backward compatibility).
|
|
181
|
+
#
|
|
182
|
+
# @return [Boolean] true if LibreTranslate is enabled and configured
|
|
183
|
+
def libretranslate_enabled?
|
|
184
|
+
# Priority 1: Explicit flag (when set)
|
|
185
|
+
if libretranslate_config.key?("libretranslate_enabled")
|
|
186
|
+
return libretranslate_config["libretranslate_enabled"] == true
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Priority 2: Backward compatibility - URL presence (when flag not set)
|
|
190
|
+
!libretranslate_api_url.nil? && !libretranslate_api_url.empty?
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Get the source locale for LibreTranslate translation
|
|
194
|
+
#
|
|
195
|
+
# The source locale is the language of the original content being translated.
|
|
196
|
+
# Defaults to "en" (English) if not specified.
|
|
197
|
+
#
|
|
198
|
+
# @return [String] BCP 47 locale code (e.g., 'en', 'fr')
|
|
199
|
+
def libretranslate_source_locale
|
|
200
|
+
libretranslate_config["libretranslate_source_locale"] ||
|
|
201
|
+
DEFAULT_LIBRETRANSLATE_SOURCE_LOCALE
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Get the text format for LibreTranslate translation
|
|
205
|
+
#
|
|
206
|
+
# Determines how text is passed to LibreTranslate API. Valid values: 'text' or 'html'.
|
|
207
|
+
# HTML format preserves markup and performs better with structured content.
|
|
208
|
+
#
|
|
209
|
+
# @return [String] Either 'text' or 'html' (default: 'html')
|
|
210
|
+
def libretranslate_format
|
|
211
|
+
libretranslate_config["libretranslate_format"] || DEFAULT_LIBRETRANSLATE_FORMAT
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Get the LibreTranslate API endpoint URL
|
|
215
|
+
#
|
|
216
|
+
# Example: "https://api.libretranslate.de" or "http://localhost:5000"
|
|
217
|
+
#
|
|
218
|
+
# @return [String, nil] The API URL, or nil if not configured
|
|
219
|
+
def libretranslate_api_url
|
|
220
|
+
libretranslate_config["libretranslate_api_url"]
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Get the LibreTranslate API key (if required)
|
|
224
|
+
#
|
|
225
|
+
# Some LibreTranslate instances require authentication via API key.
|
|
226
|
+
#
|
|
227
|
+
# @return [String, nil] The API key, or nil if not configured
|
|
228
|
+
def libretranslate_api_key
|
|
229
|
+
libretranslate_config["libretranslate_api_key"]
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Get the timeout (in seconds) for LibreTranslate API requests
|
|
233
|
+
#
|
|
234
|
+
# @return [Integer] Timeout in seconds (default: 300)
|
|
235
|
+
def libretranslate_timeout
|
|
236
|
+
libretranslate_config["libretranslate_timeout"] || DEFAULT_LIBRETRANSLATE_TIMEOUT
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Get the batch size for LibreTranslate translations
|
|
240
|
+
#
|
|
241
|
+
# Controls how many strings are sent to LibreTranslate in a single API request.
|
|
242
|
+
# Larger batches are more efficient but may hit size limits.
|
|
243
|
+
#
|
|
244
|
+
# @return [Integer] Batch size (default: 50)
|
|
245
|
+
def libretranslate_batch_size
|
|
246
|
+
libretranslate_config["libretranslate_batch_size"] || DEFAULT_LIBRETRANSLATE_BATCH_SIZE
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Get the number of retry attempts for failed LibreTranslate requests
|
|
250
|
+
#
|
|
251
|
+
# @return [Integer] Number of retry attempts (default: 3)
|
|
252
|
+
def libretranslate_retry_attempts
|
|
253
|
+
libretranslate_config["libretranslate_retry_attempts"] ||
|
|
254
|
+
DEFAULT_LIBRETRANSLATE_RETRY_ATTEMPTS
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Get the delay (in seconds) between LibreTranslate retry attempts
|
|
258
|
+
#
|
|
259
|
+
# @return [Integer] Delay in seconds (default: 2)
|
|
260
|
+
def libretranslate_retry_delay
|
|
261
|
+
libretranslate_config["libretranslate_retry_delay"] ||
|
|
262
|
+
DEFAULT_LIBRETRANSLATE_RETRY_DELAY
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Check if translation should stop on LibreTranslate errors
|
|
266
|
+
#
|
|
267
|
+
# If true, any API error will halt the translation process. If false, errors are logged
|
|
268
|
+
# but translation continues with other entries.
|
|
269
|
+
#
|
|
270
|
+
# @return [Boolean] true if errors should stop translation (default), false if
|
|
271
|
+
# translation continues
|
|
272
|
+
def libretranslate_stop_on_error?
|
|
273
|
+
libretranslate_config["libretranslate_stop_on_error"] != false
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# Get the interval for logging LibreTranslate translation progress
|
|
277
|
+
#
|
|
278
|
+
# Translation progress is logged every N entries translated. Set to 0 to disable
|
|
279
|
+
# progress logging.
|
|
280
|
+
#
|
|
281
|
+
# @return [Integer] Number of entries between progress logs (default: 10)
|
|
282
|
+
def libretranslate_progress_interval
|
|
283
|
+
libretranslate_config["libretranslate_progress_interval"] ||
|
|
284
|
+
DEFAULT_LIBRETRANSLATE_PROGRESS_INTERVAL
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
private
|
|
288
|
+
|
|
289
|
+
def libretranslate_config
|
|
290
|
+
@config["translation"] || {}
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def validate_locales!
|
|
294
|
+
invalid = locales.grep_v(LOCALE_PATTERN)
|
|
295
|
+
|
|
296
|
+
return if invalid.empty?
|
|
297
|
+
|
|
298
|
+
error_msg = build_validation_error_message(invalid)
|
|
299
|
+
|
|
300
|
+
Jekyll.logger.error("Localization Config", error_msg)
|
|
301
|
+
|
|
302
|
+
raise Jekyll::Errors::InvalidConfigurationError, error_msg
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def build_validation_error_message(invalid_codes)
|
|
306
|
+
<<~ERROR
|
|
307
|
+
Invalid locale codes in #{@page_path}: #{invalid_codes.join(", ")}
|
|
308
|
+
|
|
309
|
+
Expected ISO 639-1/2 format (e.g., 'es', 'fr', 'pt_BR')
|
|
310
|
+
Valid pattern: ^[a-z]{2}(_[A-Z]{2})?$
|
|
311
|
+
|
|
312
|
+
Common mistakes:
|
|
313
|
+
- Use lowercase language code: 'es' not 'ES' or 'ESP'
|
|
314
|
+
- Use underscore for country: 'pt_BR' not 'pt-BR'
|
|
315
|
+
- Use ISO codes: 'es' not 'español'
|
|
316
|
+
- Minimum 2 characters: 'es' not 'e'
|
|
317
|
+
- Country code uppercase: 'pt_BR' not 'pt_br'
|
|
318
|
+
ERROR
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def validate_libretranslate!
|
|
322
|
+
return unless libretranslate_enabled?
|
|
323
|
+
|
|
324
|
+
# Require URL when enabled
|
|
325
|
+
if libretranslate_api_url.nil? || libretranslate_api_url.empty?
|
|
326
|
+
raise Jekyll::Errors::InvalidConfigurationError,
|
|
327
|
+
"libretranslate_enabled is true but libretranslate_api_url is not configured"
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
# Validate source locale format (ISO 639-1/2)
|
|
331
|
+
unless LOCALE_PATTERN.match?(libretranslate_source_locale)
|
|
332
|
+
raise Jekyll::Errors::InvalidConfigurationError,
|
|
333
|
+
"Invalid libretranslate_source_locale: #{libretranslate_source_locale}"
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# Validate format
|
|
337
|
+
unless %w(text html).include?(libretranslate_format)
|
|
338
|
+
raise Jekyll::Errors::InvalidConfigurationError,
|
|
339
|
+
"Invalid libretranslate_format: #{libretranslate_format} (must be 'text' or 'html')"
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
end
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "gettext/po"
|
|
4
|
+
|
|
5
|
+
module Jekyll
|
|
6
|
+
module L10n
|
|
7
|
+
# Converts between translation hash and GetText POEntry formats.
|
|
8
|
+
#
|
|
9
|
+
# PoEntryConverter handles bidirectional conversion between simple translation
|
|
10
|
+
# hashes and GetText::POEntry objects used by the ruby-gettext gem. It preserves
|
|
11
|
+
# msgstr values, reference comments, and fuzzy flags during conversion.
|
|
12
|
+
#
|
|
13
|
+
# Key responsibilities:
|
|
14
|
+
# * Convert translation hashes to POEntry objects
|
|
15
|
+
# * Convert POEntry objects back to hash format
|
|
16
|
+
# * Preserve metadata (references, fuzzy flags) during conversion
|
|
17
|
+
# * Handle both hash and array return formats
|
|
18
|
+
#
|
|
19
|
+
# @example
|
|
20
|
+
# hash = { "Hello" => { msgstr: "Hola", reference: "file.html:10" } }
|
|
21
|
+
# entries = PoEntryConverter.hash_to_po_entry_array(hash)
|
|
22
|
+
# # entries is array of GetText::POEntry objects
|
|
23
|
+
class PoEntryConverter
|
|
24
|
+
# Convert translation hash to POEntry hash.
|
|
25
|
+
#
|
|
26
|
+
# Converts simple translation hash { msgid => msgstr } or metadata format
|
|
27
|
+
# { msgid => { msgstr: "...", reference: "...", fuzzy: false } } to a
|
|
28
|
+
# hash of GetText::POEntry objects keyed by msgid.
|
|
29
|
+
#
|
|
30
|
+
# @param hash [Hash] Translation hash
|
|
31
|
+
# @return [Hash] Hash of { msgid => POEntry }
|
|
32
|
+
def self.hash_to_po_entries(hash)
|
|
33
|
+
return {} if hash.nil? || hash.empty?
|
|
34
|
+
|
|
35
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
36
|
+
entry = ::GetText::POEntry.new(:normal)
|
|
37
|
+
entry.msgid = key
|
|
38
|
+
|
|
39
|
+
if value.is_a?(Hash)
|
|
40
|
+
entry.msgstr = value[:msgstr]
|
|
41
|
+
entry.add_comment(value[:reference]) if value[:reference]
|
|
42
|
+
entry.flag = "fuzzy" if value[:fuzzy]
|
|
43
|
+
else
|
|
44
|
+
entry.msgstr = value
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
result[key] = entry
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Convert translation hash to array of POEntry objects.
|
|
52
|
+
#
|
|
53
|
+
# Converts hash format to array of GetText::POEntry objects, preserving
|
|
54
|
+
# references and fuzzy flags.
|
|
55
|
+
#
|
|
56
|
+
# @param hash [Hash] Translation hash
|
|
57
|
+
# @return [Array<GetText::POEntry>] Array of POEntry objects
|
|
58
|
+
def self.hash_to_po_entry_array(hash)
|
|
59
|
+
return [] if hash.nil? || hash.empty?
|
|
60
|
+
|
|
61
|
+
hash.map do |msgid, data|
|
|
62
|
+
entry = ::GetText::POEntry.new(:normal)
|
|
63
|
+
entry.msgid = msgid
|
|
64
|
+
|
|
65
|
+
if data.is_a?(Hash)
|
|
66
|
+
entry.msgstr = data[:msgstr]
|
|
67
|
+
entry.add_comment(data[:reference]) if data[:reference]
|
|
68
|
+
entry.flag = "fuzzy" if data[:fuzzy]
|
|
69
|
+
else
|
|
70
|
+
entry.msgstr = data
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
entry
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Convert POEntry objects to translation hash.
|
|
78
|
+
#
|
|
79
|
+
# @param entries [Array<GetText::POEntry>] Array of POEntry objects
|
|
80
|
+
# @return [Hash] Hash of { msgid => { msgstr: "...", reference: "..." } }
|
|
81
|
+
def self.po_entries_to_hash(entries)
|
|
82
|
+
return {} if entries.nil? || entries.empty?
|
|
83
|
+
|
|
84
|
+
entries.each_with_object({}) do |entry, hash|
|
|
85
|
+
hash[entry.msgid] = {
|
|
86
|
+
:msgstr => entry.msgstr.to_s,
|
|
87
|
+
:reference => entry.extracted_comment,
|
|
88
|
+
}
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Convert POEntry objects to array of hashes.
|
|
93
|
+
#
|
|
94
|
+
# Each entry becomes a hash with :msgid and :msgstr (and optional :reference).
|
|
95
|
+
#
|
|
96
|
+
# @param entries [Array<GetText::POEntry>] Array of POEntry objects
|
|
97
|
+
# @return [Array<Hash>] Array of { msgid: "...", msgstr: "...", reference: "..." }
|
|
98
|
+
def self.po_entries_to_array_of_hashes(entries)
|
|
99
|
+
return [] if entries.nil? || entries.empty?
|
|
100
|
+
|
|
101
|
+
entries.map do |entry|
|
|
102
|
+
hash = { :msgid => entry.msgid, :msgstr => entry.msgstr }
|
|
103
|
+
if entry.extracted_comment && !entry.extracted_comment.empty?
|
|
104
|
+
hash[:reference] = entry.extracted_comment
|
|
105
|
+
end
|
|
106
|
+
hash
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Accesses Jekyll site configuration properties.
|
|
6
|
+
#
|
|
7
|
+
# SiteConfigAccessor provides a unified interface for accessing Jekyll site
|
|
8
|
+
# properties, handling both normal Jekyll site objects and test doubles (hashes).
|
|
9
|
+
# This enables easier testing and more flexible configuration handling.
|
|
10
|
+
#
|
|
11
|
+
# Key responsibilities:
|
|
12
|
+
# * Extract localization data from site configuration
|
|
13
|
+
# * Access site source directory
|
|
14
|
+
# * Access site destination directory
|
|
15
|
+
# * Handle both Jekyll site objects and hash-based doubles
|
|
16
|
+
#
|
|
17
|
+
# @example
|
|
18
|
+
# locales_data = SiteConfigAccessor.extract_locales_data(site)
|
|
19
|
+
# source = SiteConfigAccessor.source(site)
|
|
20
|
+
# dest = SiteConfigAccessor.dest(site)
|
|
21
|
+
class SiteConfigAccessor
|
|
22
|
+
# Extract localization configuration from site.
|
|
23
|
+
#
|
|
24
|
+
# Accesses the localization_gettext configuration which contains the
|
|
25
|
+
# locales, extraction settings, and other localization options.
|
|
26
|
+
#
|
|
27
|
+
# @param site [Jekyll::Site, Hash] Jekyll site object or hash double
|
|
28
|
+
# @return [Hash] Localization configuration hash or empty hash if not found
|
|
29
|
+
def self.extract_locales_data(site)
|
|
30
|
+
config = site.is_a?(Hash) ? site["config"] : site.config
|
|
31
|
+
config.dig("defaults", 0, "values", "with_locales_data") || {}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Get the site source directory.
|
|
35
|
+
#
|
|
36
|
+
# @param site [Jekyll::Site, Hash] Jekyll site object or hash double
|
|
37
|
+
# @return [String] Path to site source directory
|
|
38
|
+
def self.source(site)
|
|
39
|
+
site.is_a?(Hash) ? site["source"] : site.source
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Get the site destination directory.
|
|
43
|
+
#
|
|
44
|
+
# @param site [Jekyll::Site, Hash] Jekyll site object or hash double
|
|
45
|
+
# @return [String] Path to site destination directory
|
|
46
|
+
def self.dest(site)
|
|
47
|
+
site.is_a?(Hash) ? site["dest"] : site.dest
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# Normalizes whitespace in text for consistent matching.
|
|
6
|
+
#
|
|
7
|
+
# TextNormalizer converts multiple whitespace characters (newlines, tabs,
|
|
8
|
+
# carriage returns) to single spaces and collapses consecutive spaces into
|
|
9
|
+
# a single space. Used during extraction and translation to ensure consistent
|
|
10
|
+
# text matching regardless of HTML formatting.
|
|
11
|
+
#
|
|
12
|
+
# Key responsibilities:
|
|
13
|
+
# * Replace newlines, tabs, carriage returns with spaces
|
|
14
|
+
# * Collapse consecutive spaces to single space
|
|
15
|
+
module TextNormalizer
|
|
16
|
+
extend self
|
|
17
|
+
|
|
18
|
+
# Normalize whitespace in text for consistent translation matching.
|
|
19
|
+
#
|
|
20
|
+
# === Why Normalization Is Critical ===
|
|
21
|
+
# HTML rendering treats whitespace differently than source code:
|
|
22
|
+
# - Multiple spaces render as one space
|
|
23
|
+
# - Newlines become spaces (unless in <pre> tags)
|
|
24
|
+
# - Tabs become spaces
|
|
25
|
+
#
|
|
26
|
+
# Without normalization, matching fails:
|
|
27
|
+
# Source HTML: "<p>Hello world</p>" (two spaces)
|
|
28
|
+
# Rendered: "Hello world" (one space)
|
|
29
|
+
# PO entry msgid: "Hello world" (one space)
|
|
30
|
+
# Without normalization: "Hello world" ≠ "Hello world" (NO MATCH!)
|
|
31
|
+
# With normalization: "Hello world" == "Hello world" (MATCH!)
|
|
32
|
+
#
|
|
33
|
+
# === Process ===
|
|
34
|
+
# 1. Replace all newlines, tabs, carriage returns with spaces
|
|
35
|
+
# 2. Collapse consecutive spaces into single space
|
|
36
|
+
# This ensures text from DOM matches extracted msgid exactly.
|
|
37
|
+
#
|
|
38
|
+
# @param text [String, nil] Text to normalize
|
|
39
|
+
# @return [String, nil] Normalized text or nil
|
|
40
|
+
def normalize(text)
|
|
41
|
+
return nil if text.nil?
|
|
42
|
+
|
|
43
|
+
text.gsub(%r![\n\t\r]+!, " ").gsub(%r!\s+!, " ")
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../constants"
|
|
4
|
+
|
|
5
|
+
module Jekyll
|
|
6
|
+
module L10n
|
|
7
|
+
# Validates text for extraction and translation.
|
|
8
|
+
#
|
|
9
|
+
# TextValidator checks if text meets minimum length requirements for
|
|
10
|
+
# extraction. Very short strings (< 3 characters) are typically skipped
|
|
11
|
+
# to focus on meaningful translatable content.
|
|
12
|
+
#
|
|
13
|
+
# Key responsibilities:
|
|
14
|
+
# * Check minimum text length requirement
|
|
15
|
+
# * Validate text is not nil
|
|
16
|
+
#
|
|
17
|
+
# @example
|
|
18
|
+
# TextValidator.valid?("Hello") # => true
|
|
19
|
+
# TextValidator.valid?("Hi") # => false (< 3 chars)
|
|
20
|
+
class TextValidator
|
|
21
|
+
# Check if text is valid for extraction.
|
|
22
|
+
#
|
|
23
|
+
# Text is valid if it's non-nil and meets the minimum length
|
|
24
|
+
# requirement (MIN_TRANSLATABLE_LENGTH, typically 3 characters).
|
|
25
|
+
#
|
|
26
|
+
# @param text [String, nil] Text to validate
|
|
27
|
+
# @return [Boolean] True if text meets requirements
|
|
28
|
+
def self.valid?(text)
|
|
29
|
+
return false if text.nil?
|
|
30
|
+
|
|
31
|
+
text.length >= Jekyll::L10n::Constants::MIN_TRANSLATABLE_LENGTH
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|