jekyll-l10n 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/lib/jekyll-l10n/constants.rb +23 -0
  3. data/lib/jekyll-l10n/extraction/compendium_merger.rb +57 -10
  4. data/lib/jekyll-l10n/extraction/compendium_translator.rb +1 -1
  5. data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +2 -1
  6. data/lib/jekyll-l10n/extraction/extractor.rb +1 -0
  7. data/lib/jekyll-l10n/extraction/logger.rb +3 -1
  8. data/lib/jekyll-l10n/extraction/result_saver.rb +13 -2
  9. data/lib/jekyll-l10n/jekyll/generator.rb +2 -1
  10. data/lib/jekyll-l10n/jekyll/url_filter.rb +5 -3
  11. data/lib/jekyll-l10n/po_file/fuzzy_matcher.rb +117 -0
  12. data/lib/jekyll-l10n/po_file/loader.rb +3 -2
  13. data/lib/jekyll-l10n/po_file/manager.rb +2 -1
  14. data/lib/jekyll-l10n/po_file/merger.rb +2 -1
  15. data/lib/jekyll-l10n/po_file/reader.rb +71 -21
  16. data/lib/jekyll-l10n/po_file/writer.rb +50 -13
  17. data/lib/jekyll-l10n/translation/block_text_extractor.rb +2 -8
  18. data/lib/jekyll-l10n/translation/html_translator.rb +88 -19
  19. data/lib/jekyll-l10n/translation/libre_translator.rb +10 -10
  20. data/lib/jekyll-l10n/translation/translator.rb +2 -1
  21. data/lib/jekyll-l10n/utils/error_handler.rb +12 -0
  22. data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +3 -2
  23. data/lib/jekyll-l10n/utils/html_elements.rb +4 -0
  24. data/lib/jekyll-l10n/utils/html_text_utils.rb +77 -4
  25. data/lib/jekyll-l10n/utils/page_locales_config.rb +14 -14
  26. data/lib/jekyll-l10n/utils/po_entry_converter.rb +22 -17
  27. data/lib/jekyll-l10n/utils/site_config_accessor.rb +3 -1
  28. data/lib/jekyll-l10n/utils/translation_resolver.rb +1 -4
  29. data/lib/jekyll-l10n/utils/url_transformer.rb +4 -5
  30. data/lib/jekyll-l10n.rb +1 -0
  31. metadata +4 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c8edcdac30ed6d89b587d914f1b8038ffd3b10a95a0e15cc43965dd915e39f2
4
- data.tar.gz: 513218e1e01c3814a2f7130f60a6b35463d0836659ee13d85ae3d7c9a0aaf6c5
3
+ metadata.gz: 21d081d43cd7503f98fb38d64526397eaf5ff3164e0eb3d6525aa8d88290b9d7
4
+ data.tar.gz: 382bb065f52fed1d762c2cd5110afc349de73a4780d564f813e8a7be4929373b
5
5
  SHA512:
6
- metadata.gz: 24586cc374d7ad819cbda298c98d9dc70438b60bc7afafcc6d695190bb62dc6294ddfd4bb02fb4f04c57c3231cea3fe12047dc9bdf3eb0de5390c6e4fb7a0970
7
- data.tar.gz: f5f5bb5755c8354794fd81090c74c0519f06fb15c216cc1b9ba748e5837dcef7b7a05c324d5f67feef8c544884e17a936d492c76132ac26cb7f0d6f0bafa22e6
6
+ metadata.gz: cab0e3382f953d8f88acc764be019ccdb02bf382e65a4f227f99206e7e6c4370e4a6b158da960cf7a762e28d84765f75917ee0f351cfcd2b434aecd8d8bb8969
7
+ data.tar.gz: f5dc58255fd89efe2014d63e9a916d9ff3ff04d20ebf67a0d863469fd082bee7d3b399586a1f32392f78b2e4f1e6cb7c26254986faacaf0b1208a6fe5ed0c3f6
@@ -24,6 +24,13 @@ module Jekyll
24
24
  # @return [Regexp]
25
25
  LOCALE_PATTERN = /^[a-z]{2}(_[A-Z]{2})?$/.freeze
26
26
 
27
+ # Regex fragment for matching locale codes in URL path segments.
28
+ # Accepts both underscore ('pt_BR') and hyphen ('zh-CN') subtag separators,
29
+ # unlike LOCALE_PATTERN which only accepts underscore (strict validation form).
30
+ # Compose into full path patterns rather than using standalone.
31
+ # @return [String]
32
+ LOCALE_CODE_SEGMENT = '[a-z]{2}(?:[_-][A-Z]{2})?'
33
+
27
34
  # ## Translation Fallback Modes
28
35
 
29
36
  # Fallback mode: use original English text if translation not found
@@ -44,6 +51,16 @@ module Jekyll
44
51
  # @return [String] "[UNTRANSLATED]"
45
52
  UNTRANSLATED_MARKER = '[UNTRANSLATED]'
46
53
 
54
+ # Minimum similarity score (0.0–1.0) for fuzzy matching old translations to
55
+ # changed msgids. Mirrors GNU msgmerge default. Set to 1.0 to disable.
56
+ # @return [Float] 0.6
57
+ DEFAULT_FUZZY_THRESHOLD = 0.6
58
+
59
+ # Maximum msgid character length above which fuzzy matching is skipped.
60
+ # Long strings are typically unique HTML fragments; Levenshtein cost is O(n²).
61
+ # @return [Integer] 400
62
+ MAX_FUZZY_MSGID_LENGTH = 400
63
+
47
64
  # ## PO File Formatting (GNU Gettext Standard)
48
65
 
49
66
  # Line length threshold below which strings are rendered on a single line
@@ -99,6 +116,12 @@ module Jekyll
99
116
  # @return [Array<String>] ["title", "alt", "aria-label", "placeholder", "aria-description"]
100
117
  DEFAULT_TRANSLATABLE_ATTRIBUTES = %w[title alt aria-label placeholder aria-description].freeze
101
118
 
119
+ # Structural/styling attributes restored from the source DOM at render time.
120
+ # These are never read from msgstr — always sourced from original HTML.
121
+ # Prevents MT-corrupted class strings from reaching the rendered page.
122
+ # @return [Array<String>]
123
+ STRUCTURAL_PASSTHROUGH_ATTRS = %w[class style id target rel tabindex aria-hidden].freeze
124
+
102
125
  # ## LibreTranslate Integration Defaults
103
126
 
104
127
  # Default LibreTranslate API endpoint URL
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'fileutils'
3
4
  require_relative '../po_file/manager'
5
+ require_relative '../po_file/fuzzy_matcher'
4
6
  require_relative '../utils/page_locales_config'
5
7
  require_relative '../utils/site_config_accessor'
6
8
 
@@ -84,10 +86,11 @@ module Jekyll
84
86
  end
85
87
  end
86
88
 
87
- # Load existing compendium translations or return empty hash if not found
89
+ # Load existing compendium translations or return empty hash if not found.
90
+ # Uses parse_for_merge to preserve fuzzy flags from previous runs.
88
91
  def load_existing_compendium(compendium_path)
89
92
  if File.exist?(compendium_path)
90
- PoFileReader.parse_with_references(compendium_path)
93
+ PoFileReader.parse_for_merge(compendium_path)
91
94
  else
92
95
  {}
93
96
  end
@@ -102,24 +105,54 @@ module Jekyll
102
105
  combined
103
106
  end
104
107
 
105
- # Normalize entry format to ensure consistent hash structure with :msgstr and :reference keys
108
+ # Normalize entry format to ensure consistent hash structure with :msgstr and :reference keys.
109
+ # Fuzzy metadata is preserved when present so it survives subsequent compendium reads.
106
110
  def normalize_compendium_entry(data)
107
111
  if data.is_a?(Hash)
108
- { msgstr: data[:msgstr], reference: data[:reference] }
112
+ entry = { msgstr: data[:msgstr], reference: data[:reference] }
113
+ entry[:fuzzy] = data[:fuzzy] if data[:fuzzy]
114
+ entry[:previous_msgid] = data[:previous_msgid] if data[:previous_msgid]
115
+ entry
109
116
  else
110
117
  { msgstr: data, reference: nil }
111
118
  end
112
119
  end
113
120
 
114
- # Merge newly found translations into combined hash, preserving existing translations
121
+ # Merge newly found translations into combined hash, preserving existing translations.
122
+ # Applies Levenshtein fuzzy matching for changed msgids: when a new msgid closely
123
+ # resembles an old compendium entry, the old entry is replaced by a fuzzy-marked new
124
+ # entry carrying the old translation as a hint. Old entries without a new fuzzy match
125
+ # stay in the combined hash (they may belong to pages not extracted in this build).
115
126
  def merge_into_combined(combined, merged)
127
+ new_msgids = merged.keys.to_set
128
+ fuzzy_candidates = compendium_fuzzy_candidates(combined, new_msgids)
129
+ matched_old_msgids = []
130
+
116
131
  merged.each do |msgid, entry|
117
132
  if combined[msgid]
118
133
  update_entry_reference(combined[msgid], entry)
119
134
  else
120
- combined[msgid] = create_new_entry(entry)
135
+ resolve_new_entry(combined, msgid, entry, fuzzy_candidates, matched_old_msgids)
121
136
  end
122
137
  end
138
+
139
+ matched_old_msgids.each { |old_msgid| combined.delete(old_msgid) }
140
+ end
141
+
142
+ def compendium_fuzzy_candidates(combined, new_msgids)
143
+ combined.reject do |msgid, entry|
144
+ new_msgids.include?(msgid) || PoFuzzyMatcher.msgstr_from_entry(entry).empty?
145
+ end
146
+ end
147
+
148
+ def resolve_new_entry(combined, msgid, entry, fuzzy_candidates, matched_old_msgids)
149
+ match = PoFuzzyMatcher.find_match(msgid, fuzzy_candidates)
150
+ if match
151
+ combined[msgid] = create_fuzzy_entry(entry, match)
152
+ matched_old_msgids << match[:msgid]
153
+ else
154
+ combined[msgid] = create_new_entry(entry)
155
+ end
123
156
  end
124
157
 
125
158
  # Update reference for existing entry if new reference is available
@@ -138,11 +171,24 @@ module Jekyll
138
171
  end
139
172
  end
140
173
 
141
- # Convert combined hash to array of entries suitable for PO file writing
174
+ # Create a fuzzy entry for a changed msgid, carrying the old translation as a hint
175
+ def create_fuzzy_entry(entry, match)
176
+ {
177
+ msgstr: match[:msgstr],
178
+ reference: entry.is_a?(Hash) ? entry[:reference] : nil,
179
+ fuzzy: true,
180
+ previous_msgid: match[:msgid]
181
+ }
182
+ end
183
+
184
+ # Convert combined hash to array of entries suitable for PO file writing.
185
+ # Fuzzy metadata is included so PoFileWriter can serialize #, fuzzy and #| msgid.
142
186
  def format_compendium_entries(combined)
143
187
  combined.map do |msgid, data|
144
188
  entry = { msgid: msgid, msgstr: data[:msgstr] }
145
- entry[:reference] = data[:reference] if data[:reference]
189
+ entry[:reference] = data[:reference] if data[:reference]
190
+ entry[:fuzzy] = data[:fuzzy] if data[:fuzzy]
191
+ entry[:previous_msgid] = data[:previous_msgid] if data[:previous_msgid]
146
192
  entry
147
193
  end
148
194
  end
@@ -156,8 +202,9 @@ module Jekyll
156
202
 
157
203
  private :process_locale, :compendium_unchanged?, :load_existing_compendium,
158
204
  :build_combined_hash, :normalize_compendium_entry, :merge_into_combined,
159
- :update_entry_reference, :create_new_entry, :format_compendium_entries,
160
- :cleanup_locale_directory
205
+ :compendium_fuzzy_candidates, :resolve_new_entry,
206
+ :update_entry_reference, :create_new_entry, :create_fuzzy_entry,
207
+ :format_compendium_entries, :cleanup_locale_directory
161
208
  end
162
209
  end
163
210
  end
@@ -72,7 +72,7 @@ module Jekyll
72
72
  "Processing compendium file: #{compendium_path}")
73
73
  return unless File.exist?(compendium_path)
74
74
 
75
- entries = PoFileReader.parse_with_references(compendium_path)
75
+ entries = PoFileReader.parse_for_merge(compendium_path)
76
76
  po_entries = PoEntryConverter.hash_to_po_entry_array(entries)
77
77
 
78
78
  log_compendium_stats(locale, po_entries, compendium_path)
@@ -52,11 +52,12 @@ module Jekyll
52
52
  end
53
53
 
54
54
  def extractable?(node)
55
- node.element? && HtmlTextUtils::CONTENT_ELEMENTS.include?(node.name)
55
+ HtmlTextUtils.extractable?(node)
56
56
  end
57
57
 
58
58
  def extract_block_text(node)
59
59
  return nil if only_contains_block_elements?(node)
60
+ return nil if HtmlTextUtils.layout_block_children?(node)
60
61
 
61
62
  text = HtmlTextUtils.extract_with_inline_tags(node)
62
63
  TextValidator.valid?(text) ? text : nil
@@ -75,6 +75,7 @@ module Jekyll
75
75
  Jekyll.logger.info 'Localization', 'Extracting translatable strings...'
76
76
  start_time = Time.now
77
77
  stats = process_all_html_files
78
+ @result_saver.finalize_compendia
78
79
  translate_all_compendia
79
80
  ExtractionLogger.log_summary(stats, Time.now - start_time)
80
81
  stats
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../utils/error_handler'
4
+
3
5
  module Jekyll
4
6
  module L10n
5
7
  # Logs extraction process errors and summary statistics.
@@ -22,7 +24,7 @@ module Jekyll
22
24
  # @param error [StandardError] The error that occurred
23
25
  # @return [void]
24
26
  def self.log_error(file_path, error)
25
- Jekyll.logger.error 'Localization', "Error extracting from #{file_path}: #{error.message}"
27
+ ErrorHandler.log_error("extracting from #{file_path}", error)
26
28
  end
27
29
 
28
30
  # Log extraction completion summary.
@@ -63,6 +63,19 @@ module Jekyll
63
63
  }
64
64
  end
65
65
 
66
+ # Merge all page-specific PO files into compendia after all pages are extracted.
67
+ #
68
+ # Called once per build (from Extractor.extract_site) rather than per page,
69
+ # reducing disk I/O and fuzzy-matching passes from O(pages) to O(1).
70
+ #
71
+ # @return [void]
72
+ def finalize_compendia
73
+ return unless @site_config.update_compendium?
74
+
75
+ po_manager = PoFileManager.new(@site, @site_config.locales_dir)
76
+ CompendiumMerger.new(@site).merge_compendia(po_manager, @site_config)
77
+ end
78
+
66
79
  # Translate compendia using LibreTranslate.
67
80
  #
68
81
  # If LibreTranslate is enabled in config, translates all empty entries in
@@ -84,8 +97,6 @@ module Jekyll
84
97
  po_files_created += 1 if po_manager.save_po_file(locale, entries, page_path: page_path)
85
98
  end
86
99
 
87
- CompendiumMerger.new(@site).merge_compendia(po_manager, config) if config.update_compendium?
88
-
89
100
  po_files_created
90
101
  end
91
102
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../constants'
3
4
  require_relative 'regeneration_checker'
4
5
 
5
6
  module Jekyll
@@ -119,7 +120,7 @@ module Jekyll
119
120
  def valid_locale_code?(locale)
120
121
  return false unless locale.is_a?(String)
121
122
 
122
- locale.match?(/^[a-z]{2}(_[A-Z]{2})?$/)
123
+ locale.match?(Constants::LOCALE_PATTERN)
123
124
  end
124
125
 
125
126
  # Check if any pages in the site are marked for localization
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'liquid'
4
+ require_relative '../constants'
5
+ require_relative '../utils/error_handler'
4
6
 
5
7
  module Jekyll
6
8
  module L10n
@@ -99,7 +101,7 @@ module Jekyll
99
101
  page&.data&.[]('locale')
100
102
  end
101
103
  rescue StandardError => e
102
- Jekyll.logger.warn 'Localization', "Error retrieving current locale: #{e.message}"
104
+ ErrorHandler.log_warning('retrieving current locale', e)
103
105
  nil
104
106
  end
105
107
 
@@ -124,7 +126,7 @@ module Jekyll
124
126
  end
125
127
 
126
128
  def already_localized?(url_str)
127
- %r!^/[a-z]{2}(?:[_-][A-Z]{2})?(?=/|\?)!.match?(url_str)
129
+ %r{^/#{Constants::LOCALE_CODE_SEGMENT}(?=/|\?)}o.match?(url_str)
128
130
  end
129
131
 
130
132
  def external_url?(url_str)
@@ -192,7 +194,7 @@ module Jekyll
192
194
 
193
195
  def strip_locale_from_url(url)
194
196
  # Strip leading locale prefix like /es/, /fr/, /pt_BR/, /zh-CN/
195
- url.sub(%r!^/([a-z]{2}(?:[_-][A-Z]{2})?)(?=/|$)!, '')
197
+ url.sub(%r{^/(#{Constants::LOCALE_CODE_SEGMENT})(?=/|$)}o, '')
196
198
  end
197
199
  end
198
200
  end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../constants'
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Finds the closest matching old PO entry for a new msgid using normalized
8
+ # Levenshtein similarity. Mirrors GNU msgmerge fuzzy-matching behaviour.
9
+ #
10
+ # Key responsibilities:
11
+ # * Compute normalized edit-distance similarity between two strings
12
+ # * Select the best-scoring candidate from a pool of orphaned old entries
13
+ # * Return the matched old msgid and its msgstr for use as a fuzzy hint
14
+ class PoFuzzyMatcher
15
+ THRESHOLD = Constants::DEFAULT_FUZZY_THRESHOLD
16
+
17
+ # Extract msgstr from a PO entry that is either a plain String or a metadata Hash.
18
+ #
19
+ # @param entry [String, Hash] PO entry value
20
+ # @return [String]
21
+ def self.msgstr_from_entry(entry)
22
+ entry.is_a?(Hash) ? entry[:msgstr].to_s : entry.to_s
23
+ end
24
+
25
+ # Find the best fuzzy match for new_msgid among candidates.
26
+ #
27
+ # Skips new_msgid values longer than MAX_FUZZY_MSGID_LENGTH (long strings are
28
+ # unique HTML fragments with no useful near-duplicate and Levenshtein is O(n²)).
29
+ # Pre-filters candidates to the length range where similarity ≥ threshold is
30
+ # mathematically possible before invoking Levenshtein.
31
+ #
32
+ # @param new_msgid [String] the new source string to match
33
+ # @param candidates [Hash] { old_msgid => entry } where entry is either a
34
+ # String msgstr or a Hash with :msgstr key
35
+ # @param threshold [Float] minimum similarity score to accept (0.0–1.0)
36
+ # @return [Hash, nil] { msgid: String, msgstr: String } or nil if no match
37
+ def self.find_match(new_msgid, candidates, threshold: THRESHOLD)
38
+ return nil if candidates.empty?
39
+ return nil if new_msgid.nil? || new_msgid.length > Constants::MAX_FUZZY_MSGID_LENGTH
40
+
41
+ len = new_msgid.length
42
+ min_feas = (len * threshold).ceil
43
+ max_feas = threshold.positive? ? (len / threshold).floor : Float::INFINITY
44
+
45
+ best = best_candidate(new_msgid, candidates, min_feas, max_feas, threshold)
46
+ return nil unless best
47
+
48
+ { msgid: best[:msgid], msgstr: msgstr_from_entry(best[:entry]) }
49
+ end
50
+
51
+ def self.best_candidate(new_msgid, candidates, min_feas, max_feas, threshold)
52
+ best_msgid = best_entry = nil
53
+ best_score = 0.0
54
+
55
+ candidates.each do |old_msgid, entry|
56
+ ol = old_msgid.length
57
+ next if ol < min_feas || ol > max_feas
58
+
59
+ score = similarity(new_msgid, old_msgid)
60
+ next unless score > best_score && score >= threshold
61
+
62
+ best_score = score
63
+ best_msgid = old_msgid
64
+ best_entry = entry
65
+ break if best_score >= 1.0
66
+ end
67
+
68
+ best_msgid ? { msgid: best_msgid, entry: best_entry } : nil
69
+ end
70
+
71
+ # Normalized Levenshtein similarity between two strings.
72
+ #
73
+ # Returns 0.0 immediately when the length ratio falls below the threshold —
74
+ # the maximum achievable similarity is min_len/max_len, so Levenshtein cannot
75
+ # produce a useful result and the O(n²) computation is skipped.
76
+ #
77
+ # @param str_a [String]
78
+ # @param str_b [String]
79
+ # @return [Float] 0.0 (completely different) to 1.0 (identical)
80
+ def self.similarity(str_a, str_b)
81
+ return 1.0 if str_a == str_b
82
+ return 0.0 if str_a.empty? || str_b.empty?
83
+
84
+ max_len = [str_a.length, str_b.length].max
85
+ min_len = [str_a.length, str_b.length].min
86
+ return 0.0 if min_len.to_f / max_len < THRESHOLD
87
+
88
+ dist = levenshtein(str_a, str_b)
89
+ 1.0 - (dist.to_f / max_len)
90
+ end
91
+
92
+ def self.levenshtein(str_a, str_b)
93
+ # Keep the shorter string as the row to minimise the dp array size.
94
+ str_a, str_b = str_b, str_a if str_a.length > str_b.length
95
+
96
+ a_chars = str_a.chars
97
+ b_chars = str_b.chars
98
+ m = a_chars.length
99
+ n = b_chars.length
100
+
101
+ curr = Array.new(m + 1) { |i| i }
102
+ prev = Array.new(m + 1, 0)
103
+
104
+ n.times do |j|
105
+ curr, prev = prev, curr # swap in place — no allocation
106
+ curr[0] = j + 1
107
+ m.times do |i|
108
+ cost = a_chars[i] == b_chars[j] ? 0 : 1
109
+ curr[i + 1] = [curr[i] + 1, prev[i + 1] + 1, prev[i] + cost].min
110
+ end
111
+ end
112
+ curr[m]
113
+ end
114
+ private_class_method :levenshtein, :best_candidate
115
+ end
116
+ end
117
+ end
@@ -3,6 +3,7 @@
3
3
  require_relative 'reader'
4
4
  require_relative 'manager'
5
5
  require_relative 'path_builder'
6
+ require_relative '../utils/error_handler'
6
7
 
7
8
  module Jekyll
8
9
  module L10n
@@ -52,11 +53,11 @@ module Jekyll
52
53
  end
53
54
 
54
55
  def self.load_and_cache(cache_key, po_path)
55
- translations = PoFileReader.parse(po_path)
56
+ translations = PoFileReader.parse_for_translation(po_path)
56
57
  PoFileManager.cache[cache_key] = translations
57
58
  translations
58
59
  rescue StandardError => e
59
- Jekyll.logger.warn 'Localization', "Error loading PO file #{po_path}: #{e.message}"
60
+ ErrorHandler.log_warning("loading PO file #{po_path}", e)
60
61
  {}
61
62
  end
62
63
  end
@@ -7,6 +7,7 @@ require_relative 'path_builder'
7
7
  require_relative '../utils/site_config_accessor'
8
8
  require_relative '../utils/file_operations'
9
9
  require_relative '../utils/logger_formatter'
10
+ require_relative '../utils/error_handler'
10
11
 
11
12
  module Jekyll
12
13
  module L10n
@@ -113,7 +114,7 @@ module Jekyll
113
114
  prepare_and_write_po_file(po_path, entries, locale, page_path: page_path,
114
115
  skip_merge: skip_merge)
115
116
  rescue StandardError => e
116
- Jekyll.logger.error 'Localization', "Error saving PO file #{po_path}: #{e.message}"
117
+ ErrorHandler.log_error("saving PO file #{po_path}", e)
117
118
  false
118
119
  end
119
120
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'reader'
4
+ require_relative '../utils/error_handler'
4
5
 
5
6
  module Jekyll
6
7
  module L10n
@@ -73,7 +74,7 @@ module Jekyll
73
74
  merged[msgid] ||= entry
74
75
  end
75
76
  rescue StandardError => e
76
- Jekyll.logger.warn 'Localization', "Error merging PO file #{po_file}: #{e.message}"
77
+ ErrorHandler.log_warning("merging PO file #{po_file}", e)
77
78
  end
78
79
  end
79
80
  end
@@ -89,6 +89,20 @@ module Jekyll
89
89
  process_po_lines_instance(content, :merge)
90
90
  end
91
91
 
92
+ # Parse a PO file for translation injection, excluding fuzzy entries.
93
+ #
94
+ # Parses with full merge metadata (including fuzzy flags), then strips fuzzy
95
+ # entries and returns a simple msgid → msgstr hash. Fuzzy entries are treated
96
+ # as untranslated per the GNU Gettext standard: msgfmt skips them by default
97
+ # and they should fall through to the active fallback mode.
98
+ #
99
+ # @return [Hash] Simple translation hash { msgid => msgstr }, fuzzy entries omitted
100
+ def parse_for_translation
101
+ parse_for_merge
102
+ .reject { |_msgid, v| v.is_a?(Hash) && v[:fuzzy] }
103
+ .transform_values { |v| v.is_a?(Hash) ? v[:msgstr] : v }
104
+ end
105
+
92
106
  # Parse a PO file (class method, for backward compatibility).
93
107
  #
94
108
  # @param po_path [String] Path to PO file
@@ -113,6 +127,14 @@ module Jekyll
113
127
  new(po_path).parse_for_merge
114
128
  end
115
129
 
130
+ # Parse a PO file for translation injection, excluding fuzzy entries (class method).
131
+ #
132
+ # @param po_path [String] Path to PO file
133
+ # @return [Hash] Simple translation hash { msgid => msgstr }, fuzzy entries omitted
134
+ def self.parse_for_translation(po_path)
135
+ new(po_path).parse_for_translation
136
+ end
137
+
116
138
  # Backward compatibility wrapper
117
139
  def self.process_po_lines(content)
118
140
  process_po_lines_internal(content, false)
@@ -149,7 +171,8 @@ module Jekyll
149
171
  # with_mode: false (default, simple format), true (with reference), :merge (with both)
150
172
  # rubocop:disable Metrics/ParameterLists, Metrics/AbcSize, Metrics/PerceivedComplexity
151
173
  def self.process_msgid_msgstr_pair(lines, start_idx, translations,
152
- reference: nil, fuzzy: nil, with_mode: false)
174
+ reference: nil, fuzzy: nil, previous_msgid: nil,
175
+ with_mode: false)
153
176
  # rubocop:enable Metrics/ParameterLists, Metrics/AbcSize, Metrics/PerceivedComplexity
154
177
  # Handle nil sentinel values (from NO_REFERENCE constant)
155
178
  reference = nil if reference == NO_REFERENCE
@@ -168,7 +191,8 @@ module Jekyll
168
191
  with_metadata = with_mode == true || with_mode == :merge || !reference.nil? || !fuzzy.nil?
169
192
  store_translation(
170
193
  translations, msgid_value, msgstr_value,
171
- reference: reference, fuzzy: fuzzy, with_metadata: with_metadata
194
+ reference: reference, fuzzy: fuzzy, previous_msgid: previous_msgid,
195
+ with_metadata: with_metadata
172
196
  )
173
197
  else
174
198
  i += 1
@@ -259,10 +283,10 @@ module Jekyll
259
283
  reference: reference, fuzzy: nil, with_mode: true
260
284
  )
261
285
  when :merge
262
- reference, fuzzy = extract_reference_and_fuzzy_before_msgid(lines, idx)
286
+ reference, fuzzy, previous_msgid = extract_reference_and_fuzzy_before_msgid(lines, idx)
263
287
  process_msgid_msgstr_pair(
264
288
  lines, idx, translations,
265
- reference: reference, fuzzy: fuzzy, with_mode: :merge
289
+ reference: reference, fuzzy: fuzzy, previous_msgid: previous_msgid, with_mode: :merge
266
290
  )
267
291
  else
268
292
  process_msgid_msgstr_pair(lines, idx, translations, reference: nil, fuzzy: nil,
@@ -270,29 +294,48 @@ module Jekyll
270
294
  end
271
295
  end
272
296
 
273
- # Unified metadata extraction: extracts reference and optionally fuzzy flag
297
+ # Unified metadata extraction: extracts reference, fuzzy flag, and previous msgid.
298
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
274
299
  def self.extract_metadata_before_msgid(lines, msgid_idx, include_fuzzy: false)
275
- reference = nil
276
- fuzzy = false
277
- comments_end = msgid_idx - 1
300
+ reference = nil
301
+ fuzzy = false
302
+ previous_msgid = nil
303
+ comments_end = msgid_idx - 1
278
304
 
279
305
  while comments_end >= 0
280
306
  comment_line = lines[comments_end].strip
281
307
  break unless comment_line.start_with?('#') || comment_line.empty?
282
308
 
283
- reference = extract_reference_from_line(comment_line) || reference
284
- fuzzy = true if include_fuzzy && fuzzy_line?(comment_line)
309
+ reference = extract_reference_from_line(comment_line) || reference
310
+ previous_msgid = extract_previous_msgid_from_line(comment_line) || previous_msgid if include_fuzzy
311
+ fuzzy = true if include_fuzzy && fuzzy_line?(comment_line)
285
312
 
286
313
  comments_end -= 1
287
314
  end
288
315
 
289
- include_fuzzy ? [reference, fuzzy] : reference
316
+ include_fuzzy ? [reference, fuzzy, previous_msgid] : reference
290
317
  end
318
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
291
319
 
292
320
  def self.extract_reference_from_line(comment_line)
293
321
  comment_line.sub(/^#:\s*/, '').strip if comment_line.start_with?('#:')
294
322
  end
295
323
 
324
+ # Parse a #| msgid "..." previous-msgid comment line.
325
+ def self.extract_previous_msgid_from_line(comment_line)
326
+ return nil unless comment_line.start_with?('#|') && comment_line.include?('msgid')
327
+
328
+ raw = comment_line.sub(/^#\|\s*msgid\s*/, '').strip
329
+ # Strip surrounding quotes and unescape
330
+ if raw.start_with?('"') && raw.end_with?('"')
331
+ unescape_string(raw[1...-1], '"')
332
+ elsif raw.start_with?("'") && raw.end_with?("'")
333
+ unescape_string(raw[1...-1], "'")
334
+ else
335
+ raw
336
+ end
337
+ end
338
+
296
339
  def self.fuzzy_line?(comment_line)
297
340
  comment_line.start_with?('#,') && comment_line.include?('fuzzy')
298
341
  end
@@ -302,7 +345,7 @@ module Jekyll
302
345
  extract_metadata_before_msgid(lines, msgid_idx, include_fuzzy: false)
303
346
  end
304
347
 
305
- # Backward compatibility wrapper
348
+ # Backward compatibility wrapper — now returns [reference, fuzzy, previous_msgid]
306
349
  def self.extract_reference_and_fuzzy_before_msgid(lines, msgid_idx)
307
350
  extract_metadata_before_msgid(lines, msgid_idx, include_fuzzy: true)
308
351
  end
@@ -404,24 +447,29 @@ module Jekyll
404
447
  # - Backward compatibility: Supports legacy calling conventions
405
448
  # rubocop:disable Metrics/ParameterLists
406
449
  def self.store_translation(translations, msgid, msgstr, reference: nil, fuzzy: nil,
407
- with_metadata: false)
450
+ previous_msgid: nil, with_metadata: false)
408
451
  # rubocop:enable Metrics/ParameterLists
409
452
  return if msgid.nil? || msgstr.nil? || msgid.empty?
410
453
 
411
- translations[msgid] = build_translation_entry(msgstr, reference, fuzzy, with_metadata)
454
+ translations[msgid] = build_translation_entry(msgstr, reference, fuzzy,
455
+ previous_msgid, with_metadata: with_metadata)
412
456
  end
413
457
 
414
- def self.build_translation_entry(msgstr, reference, fuzzy, with_metadata)
458
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
459
+ def self.build_translation_entry(msgstr, reference, fuzzy, previous_msgid = nil,
460
+ with_metadata: false)
415
461
  # Simple format when no metadata requested and none provided
416
- return msgstr if !with_metadata && reference.nil? && fuzzy.nil?
462
+ return msgstr if !with_metadata && reference.nil? && fuzzy.nil? && previous_msgid.nil?
417
463
 
418
464
  # Build metadata hash based on what's provided
419
465
  entry = { msgstr: msgstr }
420
- entry[:reference] = reference unless reference.nil?
421
- entry[:fuzzy] = fuzzy unless fuzzy.nil?
422
- entry[:comment] = nil if !fuzzy.nil? || !reference.nil?
466
+ entry[:reference] = reference unless reference.nil?
467
+ entry[:fuzzy] = fuzzy unless fuzzy.nil?
468
+ entry[:previous_msgid] = previous_msgid unless previous_msgid.nil?
469
+ entry[:comment] = nil if !fuzzy.nil? || !reference.nil?
423
470
  entry
424
471
  end
472
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
425
473
 
426
474
  # Kept for backward compatibility with existing tests
427
475
  # Supports both positional and keyword argument calling styles
@@ -499,9 +547,11 @@ module Jekyll
499
547
  self.class.process_msgid_msgstr_pair(lines, idx, translations, reference: reference,
500
548
  fuzzy: nil, with_mode: true)
501
549
  when :merge
502
- reference, fuzzy = self.class.extract_reference_and_fuzzy_before_msgid(lines, idx)
550
+ reference, fuzzy, previous_msgid = self.class.extract_reference_and_fuzzy_before_msgid(lines, idx)
503
551
  self.class.process_msgid_msgstr_pair(lines, idx, translations, reference: reference,
504
- fuzzy: fuzzy, with_mode: :merge)
552
+ fuzzy: fuzzy,
553
+ previous_msgid: previous_msgid,
554
+ with_mode: :merge)
505
555
  else
506
556
  self.class.process_msgid_msgstr_pair(lines, idx, translations, reference: nil,
507
557
  fuzzy: nil, with_mode: false)