jekyll-l10n 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +94 -0
  4. data/lib/jekyll-l10n/constants.rb +136 -0
  5. data/lib/jekyll-l10n/errors.rb +60 -0
  6. data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
  7. data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
  8. data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
  9. data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
  10. data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
  11. data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
  12. data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
  13. data/lib/jekyll-l10n/extraction/logger.rb +48 -0
  14. data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
  15. data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
  16. data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
  17. data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
  18. data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
  19. data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
  20. data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
  21. data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
  22. data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
  23. data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
  24. data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
  25. data/lib/jekyll-l10n/po_file/loader.rb +64 -0
  26. data/lib/jekyll-l10n/po_file/manager.rb +160 -0
  27. data/lib/jekyll-l10n/po_file/merger.rb +80 -0
  28. data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
  29. data/lib/jekyll-l10n/po_file/reader.rb +518 -0
  30. data/lib/jekyll-l10n/po_file/writer.rb +232 -0
  31. data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
  32. data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
  33. data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
  34. data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
  35. data/lib/jekyll-l10n/translation/translator.rb +179 -0
  36. data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
  37. data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
  38. data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
  39. data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
  40. data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
  41. data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
  42. data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
  43. data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
  44. data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
  45. data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
  46. data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
  47. data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
  48. data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
  49. data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
  50. data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
  51. data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
  52. data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
  53. data/lib/jekyll-l10n/version.rb +10 -0
  54. data/lib/jekyll-l10n.rb +268 -0
  55. metadata +200 -0
@@ -0,0 +1,344 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../constants"
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Configuration Parser - Extracts and validates localization settings from page front matter
8
+ #
9
+ # PageLocalesConfig parses the `with_locales_data` front matter field in Jekyll pages and
10
+ # provides a type-safe interface to localization configuration. It validates all values
11
+ # against expected types and ranges, raising clear errors for invalid configurations.
12
+ #
13
+ # Configuration can be specified in page front matter at multiple levels:
14
+ # - Translation settings (fallback modes, LibreTranslate API)
15
+ # - Extraction settings (which attributes to extract, directories)
16
+ # - Logging settings (debug output, statistics)
17
+ #
18
+ # Key responsibilities:
19
+ # - Parse `with_locales_data` from page front matter
20
+ # - Validate locale codes against ISO 639-1/2 format
21
+ # - Validate LibreTranslate configuration when enabled
22
+ # - Provide getter methods with sensible defaults
23
+ # - Raise detailed validation errors for invalid configurations
24
+ #
25
+ # @example Minimal configuration
26
+ # ---
27
+ # with_locales: true
28
+ # with_locales_data:
29
+ # locales: [es, fr, pt]
30
+ # ---
31
+ #
32
+ # @example Full configuration with LibreTranslate
33
+ # ---
34
+ # with_locales: true
35
+ # with_locales_data:
36
+ # locales: [es, fr, pt_BR]
37
+ # extract_on_build: true
38
+ # update_compendium: true
39
+ # extraction:
40
+ # translatable_attributes: [title, alt, aria-label]
41
+ # translation:
42
+ # fallback: english
43
+ # libretranslate_enabled: true
44
+ # libretranslate_api_url: "http://localhost:5000/translate"
45
+ # libretranslate_timeout: 300
46
+ # logging:
47
+ # debug: true
48
+ # ---
49
+ #
50
+ class PageLocalesConfig
51
+ # Delegate all constant definitions to Constants module
52
+ LOCALE_PATTERN = Constants::LOCALE_PATTERN
53
+ DEFAULT_LOCALES_DIR = Constants::DEFAULT_LOCALES_DIR
54
+ DEFAULT_FALLBACK_MODE = Constants::DEFAULT_FALLBACK_MODE
55
+ DEFAULT_TRANSLATABLE_ATTRIBUTES = Constants::DEFAULT_TRANSLATABLE_ATTRIBUTES
56
+ DEFAULT_LIBRETRANSLATE_TIMEOUT = Constants::DEFAULT_LIBRETRANSLATE_TIMEOUT
57
+ DEFAULT_LIBRETRANSLATE_BATCH_SIZE = Constants::DEFAULT_LIBRETRANSLATE_BATCH_SIZE
58
+ DEFAULT_LIBRETRANSLATE_RETRY_ATTEMPTS = Constants::DEFAULT_LIBRETRANSLATE_RETRY_ATTEMPTS
59
+ DEFAULT_LIBRETRANSLATE_RETRY_DELAY = Constants::DEFAULT_LIBRETRANSLATE_RETRY_DELAY
60
+ DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR = Constants::DEFAULT_LIBRETRANSLATE_STOP_ON_ERROR
61
+ DEFAULT_LIBRETRANSLATE_PROGRESS_INTERVAL = Constants::DEFAULT_LIBRETRANSLATE_PROGRESS_INTERVAL
62
+ DEFAULT_LIBRETRANSLATE_SOURCE_LOCALE = Constants::DEFAULT_LIBRETRANSLATE_SOURCE_LOCALE
63
+ DEFAULT_LIBRETRANSLATE_FORMAT = Constants::DEFAULT_LIBRETRANSLATE_FORMAT
64
+
65
+ # @!attribute [r] data
66
+ # The raw page data object this config was parsed from
67
+ # @return [Hash]
68
+ attr_reader :data
69
+
70
+ # Initialize configuration from page front matter
71
+ #
72
+ # Parses the `with_locales_data` section from page front matter and validates
73
+ # all configuration values. Raises detailed errors if any values are invalid.
74
+ #
75
+ # @param page_data [Hash] The Jekyll page data/front matter object
76
+ # @raise [Jekyll::Errors::InvalidConfigurationError] If locale codes are invalid
77
+ # @raise [Jekyll::Errors::InvalidConfigurationError] If LibreTranslate config is invalid
78
+ def initialize(page_data)
79
+ @config = page_data["with_locales_data"] || {}
80
+ @data = page_data
81
+ @page_path = page_data["path"] || "unknown"
82
+
83
+ validate_locales!
84
+ validate_libretranslate!
85
+ end
86
+
87
+ # Get the list of locales configured for this page
88
+ #
89
+ # Returns the locales specified in `with_locales_data.locales`, or an empty array
90
+ # if not configured. All returned locales are guaranteed to match ISO 639-1/2 format.
91
+ #
92
+ # @return [Array<String>] BCP 47 locale codes (e.g., ['es', 'fr', 'pt_BR'])
93
+ def locales
94
+ @config["locales"] || []
95
+ end
96
+
97
+ # Get the directory where PO files are stored for this page
98
+ #
99
+ # Returns the directory specified in `with_locales_data.locales_dir`,
100
+ # or the default "_locales" if not configured.
101
+ #
102
+ # @return [String] The directory path relative to site root
103
+ def locales_dir
104
+ @config["locales_dir"] || DEFAULT_LOCALES_DIR
105
+ end
106
+
107
+ # Check if string extraction should run during Jekyll build
108
+ #
109
+ # @return [Boolean] true if extraction is enabled (default), false if explicitly disabled
110
+ def extract_on_build?
111
+ @config["extract_on_build"] != false
112
+ end
113
+
114
+ # Check if compendium files should be updated during extraction
115
+ #
116
+ # @return [Boolean] true if compendium updates are enabled (default), false if
117
+ # explicitly disabled
118
+ def update_compendium?
119
+ @config["update_compendium"] != false
120
+ end
121
+
122
+ # Check if extraction statistics should be shown in logs
123
+ #
124
+ # @return [Boolean] true if statistics are enabled (default), false if explicitly disabled
125
+ def show_statistics?
126
+ @config.dig("logging", "show_statistics") != false
127
+ end
128
+
129
+ # Check if debug-level logging is enabled
130
+ #
131
+ # @return [Boolean] true if debug logging is configured, false otherwise
132
+ def debug_logging?
133
+ @config.dig("logging", "debug") == true
134
+ end
135
+
136
+ # Check if trace-level logging is enabled
137
+ #
138
+ # Trace logging includes detailed per-entry logs for extraction and translation operations.
139
+ # This is automatically enabled if debug_logging? is true.
140
+ #
141
+ # @return [Boolean] true if trace logging is enabled or debug logging is enabled
142
+ def trace_logging?
143
+ @config.dig("logging", "trace") == true || debug_logging?
144
+ end
145
+
146
+ # Get the fallback mode for missing translations
147
+ #
148
+ # Determines how to handle translations that are not found in PO files.
149
+ # Valid modes: "english" (use original text), "marker" (wrap with markers),
150
+ # "empty" (leave blank).
151
+ #
152
+ # @return [String] The fallback mode ("english", "marker", or "empty")
153
+ def fallback_mode
154
+ @config.dig("translation", "fallback") || DEFAULT_FALLBACK_MODE
155
+ end
156
+
157
+ # Get the list of HTML attributes to extract for translation
158
+ #
159
+ # Returns attributes specified in `with_locales_data.extraction.translatable_attributes`,
160
+ # or the default list if not configured (title, alt, aria-label, placeholder,
161
+ # aria-description).
162
+ #
163
+ # @return [Array<String>] List of attribute names to extract
164
+ def translatable_attributes
165
+ @config.dig("extraction", "translatable_attributes") || DEFAULT_TRANSLATABLE_ATTRIBUTES
166
+ end
167
+
168
+ # Check if localization is enabled for this page
169
+ #
170
+ # A page is considered to have localization enabled if at least one locale is configured.
171
+ #
172
+ # @return [Boolean] true if locales list is not empty, false otherwise
173
+ def enabled?
174
+ !locales.empty?
175
+ end
176
+
177
+ # Check if LibreTranslate automatic translation is enabled
178
+ #
179
+ # Returns true if `libretranslate_enabled` is explicitly set to true,
180
+ # or if a `libretranslate_api_url` is configured (backward compatibility).
181
+ #
182
+ # @return [Boolean] true if LibreTranslate is enabled and configured
183
+ def libretranslate_enabled?
184
+ # Priority 1: Explicit flag (when set)
185
+ if libretranslate_config.key?("libretranslate_enabled")
186
+ return libretranslate_config["libretranslate_enabled"] == true
187
+ end
188
+
189
+ # Priority 2: Backward compatibility - URL presence (when flag not set)
190
+ !libretranslate_api_url.nil? && !libretranslate_api_url.empty?
191
+ end
192
+
193
+ # Get the source locale for LibreTranslate translation
194
+ #
195
+ # The source locale is the language of the original content being translated.
196
+ # Defaults to "en" (English) if not specified.
197
+ #
198
+ # @return [String] BCP 47 locale code (e.g., 'en', 'fr')
199
+ def libretranslate_source_locale
200
+ libretranslate_config["libretranslate_source_locale"] ||
201
+ DEFAULT_LIBRETRANSLATE_SOURCE_LOCALE
202
+ end
203
+
204
+ # Get the text format for LibreTranslate translation
205
+ #
206
+ # Determines how text is passed to LibreTranslate API. Valid values: 'text' or 'html'.
207
+ # HTML format preserves markup and performs better with structured content.
208
+ #
209
+ # @return [String] Either 'text' or 'html' (default: 'html')
210
+ def libretranslate_format
211
+ libretranslate_config["libretranslate_format"] || DEFAULT_LIBRETRANSLATE_FORMAT
212
+ end
213
+
214
+ # Get the LibreTranslate API endpoint URL
215
+ #
216
+ # Example: "https://api.libretranslate.de" or "http://localhost:5000"
217
+ #
218
+ # @return [String, nil] The API URL, or nil if not configured
219
+ def libretranslate_api_url
220
+ libretranslate_config["libretranslate_api_url"]
221
+ end
222
+
223
+ # Get the LibreTranslate API key (if required)
224
+ #
225
+ # Some LibreTranslate instances require authentication via API key.
226
+ #
227
+ # @return [String, nil] The API key, or nil if not configured
228
+ def libretranslate_api_key
229
+ libretranslate_config["libretranslate_api_key"]
230
+ end
231
+
232
+ # Get the timeout (in seconds) for LibreTranslate API requests
233
+ #
234
+ # @return [Integer] Timeout in seconds (default: 300)
235
+ def libretranslate_timeout
236
+ libretranslate_config["libretranslate_timeout"] || DEFAULT_LIBRETRANSLATE_TIMEOUT
237
+ end
238
+
239
+ # Get the batch size for LibreTranslate translations
240
+ #
241
+ # Controls how many strings are sent to LibreTranslate in a single API request.
242
+ # Larger batches are more efficient but may hit size limits.
243
+ #
244
+ # @return [Integer] Batch size (default: 50)
245
+ def libretranslate_batch_size
246
+ libretranslate_config["libretranslate_batch_size"] || DEFAULT_LIBRETRANSLATE_BATCH_SIZE
247
+ end
248
+
249
+ # Get the number of retry attempts for failed LibreTranslate requests
250
+ #
251
+ # @return [Integer] Number of retry attempts (default: 3)
252
+ def libretranslate_retry_attempts
253
+ libretranslate_config["libretranslate_retry_attempts"] ||
254
+ DEFAULT_LIBRETRANSLATE_RETRY_ATTEMPTS
255
+ end
256
+
257
+ # Get the delay (in seconds) between LibreTranslate retry attempts
258
+ #
259
+ # @return [Integer] Delay in seconds (default: 2)
260
+ def libretranslate_retry_delay
261
+ libretranslate_config["libretranslate_retry_delay"] ||
262
+ DEFAULT_LIBRETRANSLATE_RETRY_DELAY
263
+ end
264
+
265
+ # Check if translation should stop on LibreTranslate errors
266
+ #
267
+ # If true, any API error will halt the translation process. If false, errors are logged
268
+ # but translation continues with other entries.
269
+ #
270
+ # @return [Boolean] true if errors should stop translation (default), false if
271
+ # translation continues
272
+ def libretranslate_stop_on_error?
273
+ libretranslate_config["libretranslate_stop_on_error"] != false
274
+ end
275
+
276
+ # Get the interval for logging LibreTranslate translation progress
277
+ #
278
+ # Translation progress is logged every N entries translated. Set to 0 to disable
279
+ # progress logging.
280
+ #
281
+ # @return [Integer] Number of entries between progress logs (default: 10)
282
+ def libretranslate_progress_interval
283
+ libretranslate_config["libretranslate_progress_interval"] ||
284
+ DEFAULT_LIBRETRANSLATE_PROGRESS_INTERVAL
285
+ end
286
+
287
+ private
288
+
289
+ def libretranslate_config
290
+ @config["translation"] || {}
291
+ end
292
+
293
+ def validate_locales!
294
+ invalid = locales.grep_v(LOCALE_PATTERN)
295
+
296
+ return if invalid.empty?
297
+
298
+ error_msg = build_validation_error_message(invalid)
299
+
300
+ Jekyll.logger.error("Localization Config", error_msg)
301
+
302
+ raise Jekyll::Errors::InvalidConfigurationError, error_msg
303
+ end
304
+
305
+ def build_validation_error_message(invalid_codes)
306
+ <<~ERROR
307
+ Invalid locale codes in #{@page_path}: #{invalid_codes.join(", ")}
308
+
309
+ Expected ISO 639-1/2 format (e.g., 'es', 'fr', 'pt_BR')
310
+ Valid pattern: ^[a-z]{2}(_[A-Z]{2})?$
311
+
312
+ Common mistakes:
313
+ - Use lowercase language code: 'es' not 'ES' or 'ESP'
314
+ - Use underscore for country: 'pt_BR' not 'pt-BR'
315
+ - Use ISO codes: 'es' not 'español'
316
+ - Minimum 2 characters: 'es' not 'e'
317
+ - Country code uppercase: 'pt_BR' not 'pt_br'
318
+ ERROR
319
+ end
320
+
321
+ def validate_libretranslate!
322
+ return unless libretranslate_enabled?
323
+
324
+ # Require URL when enabled
325
+ if libretranslate_api_url.nil? || libretranslate_api_url.empty?
326
+ raise Jekyll::Errors::InvalidConfigurationError,
327
+ "libretranslate_enabled is true but libretranslate_api_url is not configured"
328
+ end
329
+
330
+ # Validate source locale format (ISO 639-1/2)
331
+ unless LOCALE_PATTERN.match?(libretranslate_source_locale)
332
+ raise Jekyll::Errors::InvalidConfigurationError,
333
+ "Invalid libretranslate_source_locale: #{libretranslate_source_locale}"
334
+ end
335
+
336
+ # Validate format
337
+ unless %w(text html).include?(libretranslate_format)
338
+ raise Jekyll::Errors::InvalidConfigurationError,
339
+ "Invalid libretranslate_format: #{libretranslate_format} (must be 'text' or 'html')"
340
+ end
341
+ end
342
+ end
343
+ end
344
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "gettext/po"
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Converts between translation hash and GetText POEntry formats.
8
+ #
9
+ # PoEntryConverter handles bidirectional conversion between simple translation
10
+ # hashes and GetText::POEntry objects used by the ruby-gettext gem. It preserves
11
+ # msgstr values, reference comments, and fuzzy flags during conversion.
12
+ #
13
+ # Key responsibilities:
14
+ # * Convert translation hashes to POEntry objects
15
+ # * Convert POEntry objects back to hash format
16
+ # * Preserve metadata (references, fuzzy flags) during conversion
17
+ # * Handle both hash and array return formats
18
+ #
19
+ # @example
20
+ # hash = { "Hello" => { msgstr: "Hola", reference: "file.html:10" } }
21
+ # entries = PoEntryConverter.hash_to_po_entry_array(hash)
22
+ # # entries is array of GetText::POEntry objects
23
+ class PoEntryConverter
24
+ # Convert translation hash to POEntry hash.
25
+ #
26
+ # Converts simple translation hash { msgid => msgstr } or metadata format
27
+ # { msgid => { msgstr: "...", reference: "...", fuzzy: false } } to a
28
+ # hash of GetText::POEntry objects keyed by msgid.
29
+ #
30
+ # @param hash [Hash] Translation hash
31
+ # @return [Hash] Hash of { msgid => POEntry }
32
+ def self.hash_to_po_entries(hash)
33
+ return {} if hash.nil? || hash.empty?
34
+
35
+ hash.each_with_object({}) do |(key, value), result|
36
+ entry = ::GetText::POEntry.new(:normal)
37
+ entry.msgid = key
38
+
39
+ if value.is_a?(Hash)
40
+ entry.msgstr = value[:msgstr]
41
+ entry.add_comment(value[:reference]) if value[:reference]
42
+ entry.flag = "fuzzy" if value[:fuzzy]
43
+ else
44
+ entry.msgstr = value
45
+ end
46
+
47
+ result[key] = entry
48
+ end
49
+ end
50
+
51
+ # Convert translation hash to array of POEntry objects.
52
+ #
53
+ # Converts hash format to array of GetText::POEntry objects, preserving
54
+ # references and fuzzy flags.
55
+ #
56
+ # @param hash [Hash] Translation hash
57
+ # @return [Array<GetText::POEntry>] Array of POEntry objects
58
+ def self.hash_to_po_entry_array(hash)
59
+ return [] if hash.nil? || hash.empty?
60
+
61
+ hash.map do |msgid, data|
62
+ entry = ::GetText::POEntry.new(:normal)
63
+ entry.msgid = msgid
64
+
65
+ if data.is_a?(Hash)
66
+ entry.msgstr = data[:msgstr]
67
+ entry.add_comment(data[:reference]) if data[:reference]
68
+ entry.flag = "fuzzy" if data[:fuzzy]
69
+ else
70
+ entry.msgstr = data
71
+ end
72
+
73
+ entry
74
+ end
75
+ end
76
+
77
+ # Convert POEntry objects to translation hash.
78
+ #
79
+ # @param entries [Array<GetText::POEntry>] Array of POEntry objects
80
+ # @return [Hash] Hash of { msgid => { msgstr: "...", reference: "..." } }
81
+ def self.po_entries_to_hash(entries)
82
+ return {} if entries.nil? || entries.empty?
83
+
84
+ entries.each_with_object({}) do |entry, hash|
85
+ hash[entry.msgid] = {
86
+ :msgstr => entry.msgstr.to_s,
87
+ :reference => entry.extracted_comment,
88
+ }
89
+ end
90
+ end
91
+
92
+ # Convert POEntry objects to array of hashes.
93
+ #
94
+ # Each entry becomes a hash with :msgid and :msgstr (and optional :reference).
95
+ #
96
+ # @param entries [Array<GetText::POEntry>] Array of POEntry objects
97
+ # @return [Array<Hash>] Array of { msgid: "...", msgstr: "...", reference: "..." }
98
+ def self.po_entries_to_array_of_hashes(entries)
99
+ return [] if entries.nil? || entries.empty?
100
+
101
+ entries.map do |entry|
102
+ hash = { :msgid => entry.msgid, :msgstr => entry.msgstr }
103
+ if entry.extracted_comment && !entry.extracted_comment.empty?
104
+ hash[:reference] = entry.extracted_comment
105
+ end
106
+ hash
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module L10n
5
+ # Accesses Jekyll site configuration properties.
6
+ #
7
+ # SiteConfigAccessor provides a unified interface for accessing Jekyll site
8
+ # properties, handling both normal Jekyll site objects and test doubles (hashes).
9
+ # This enables easier testing and more flexible configuration handling.
10
+ #
11
+ # Key responsibilities:
12
+ # * Extract localization data from site configuration
13
+ # * Access site source directory
14
+ # * Access site destination directory
15
+ # * Handle both Jekyll site objects and hash-based doubles
16
+ #
17
+ # @example
18
+ # locales_data = SiteConfigAccessor.extract_locales_data(site)
19
+ # source = SiteConfigAccessor.source(site)
20
+ # dest = SiteConfigAccessor.dest(site)
21
+ class SiteConfigAccessor
22
+ # Extract localization configuration from site.
23
+ #
24
+ # Accesses the localization_gettext configuration which contains the
25
+ # locales, extraction settings, and other localization options.
26
+ #
27
+ # @param site [Jekyll::Site, Hash] Jekyll site object or hash double
28
+ # @return [Hash] Localization configuration hash or empty hash if not found
29
+ def self.extract_locales_data(site)
30
+ config = site.is_a?(Hash) ? site["config"] : site.config
31
+ config.dig("defaults", 0, "values", "with_locales_data") || {}
32
+ end
33
+
34
+ # Get the site source directory.
35
+ #
36
+ # @param site [Jekyll::Site, Hash] Jekyll site object or hash double
37
+ # @return [String] Path to site source directory
38
+ def self.source(site)
39
+ site.is_a?(Hash) ? site["source"] : site.source
40
+ end
41
+
42
+ # Get the site destination directory.
43
+ #
44
+ # @param site [Jekyll::Site, Hash] Jekyll site object or hash double
45
+ # @return [String] Path to site destination directory
46
+ def self.dest(site)
47
+ site.is_a?(Hash) ? site["dest"] : site.dest
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module L10n
5
+ # Normalizes whitespace in text for consistent matching.
6
+ #
7
+ # TextNormalizer converts multiple whitespace characters (newlines, tabs,
8
+ # carriage returns) to single spaces and collapses consecutive spaces into
9
+ # a single space. Used during extraction and translation to ensure consistent
10
+ # text matching regardless of HTML formatting.
11
+ #
12
+ # Key responsibilities:
13
+ # * Replace newlines, tabs, carriage returns with spaces
14
+ # * Collapse consecutive spaces to single space
15
+ module TextNormalizer
16
+ extend self
17
+
18
+ # Normalize whitespace in text for consistent translation matching.
19
+ #
20
+ # === Why Normalization Is Critical ===
21
+ # HTML rendering treats whitespace differently than source code:
22
+ # - Multiple spaces render as one space
23
+ # - Newlines become spaces (unless in <pre> tags)
24
+ # - Tabs become spaces
25
+ #
26
+ # Without normalization, matching fails:
27
+ # Source HTML: "<p>Hello world</p>" (two spaces)
28
+ # Rendered: "Hello world" (one space)
29
+ # PO entry msgid: "Hello world" (one space)
30
+ # Without normalization: "Hello world" ≠ "Hello world" (NO MATCH!)
31
+ # With normalization: "Hello world" == "Hello world" (MATCH!)
32
+ #
33
+ # === Process ===
34
+ # 1. Replace all newlines, tabs, carriage returns with spaces
35
+ # 2. Collapse consecutive spaces into single space
36
+ # This ensures text from DOM matches extracted msgid exactly.
37
+ #
38
+ # @param text [String, nil] Text to normalize
39
+ # @return [String, nil] Normalized text or nil
40
+ def normalize(text)
41
+ return nil if text.nil?
42
+
43
+ text.gsub(%r![\n\t\r]+!, " ").gsub(%r!\s+!, " ")
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../constants"
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Validates text for extraction and translation.
8
+ #
9
+ # TextValidator checks if text meets minimum length requirements for
10
+ # extraction. Very short strings (< 3 characters) are typically skipped
11
+ # to focus on meaningful translatable content.
12
+ #
13
+ # Key responsibilities:
14
+ # * Check minimum text length requirement
15
+ # * Validate text is not nil
16
+ #
17
+ # @example
18
+ # TextValidator.valid?("Hello") # => true
19
+ # TextValidator.valid?("Hi") # => false (< 3 chars)
20
+ class TextValidator
21
+ # Check if text is valid for extraction.
22
+ #
23
+ # Text is valid if it's non-nil and meets the minimum length
24
+ # requirement (MIN_TRANSLATABLE_LENGTH, typically 3 characters).
25
+ #
26
+ # @param text [String, nil] Text to validate
27
+ # @return [Boolean] True if text meets requirements
28
+ def self.valid?(text)
29
+ return false if text.nil?
30
+
31
+ text.length >= Jekyll::L10n::Constants::MIN_TRANSLATABLE_LENGTH
32
+ end
33
+ end
34
+ end
35
+ end