jekyll-l10n 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-l10n/constants.rb +23 -0
- data/lib/jekyll-l10n/extraction/compendium_merger.rb +57 -10
- data/lib/jekyll-l10n/extraction/compendium_translator.rb +1 -1
- data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +2 -1
- data/lib/jekyll-l10n/extraction/extractor.rb +1 -0
- data/lib/jekyll-l10n/extraction/logger.rb +3 -1
- data/lib/jekyll-l10n/extraction/result_saver.rb +13 -2
- data/lib/jekyll-l10n/instrumentation.rb +306 -0
- data/lib/jekyll-l10n/jekyll/generator.rb +2 -1
- data/lib/jekyll-l10n/jekyll/url_filter.rb +5 -3
- data/lib/jekyll-l10n/po_file/fuzzy_matcher.rb +117 -0
- data/lib/jekyll-l10n/po_file/loader.rb +3 -2
- data/lib/jekyll-l10n/po_file/manager.rb +2 -1
- data/lib/jekyll-l10n/po_file/merger.rb +2 -1
- data/lib/jekyll-l10n/po_file/reader.rb +73 -21
- data/lib/jekyll-l10n/po_file/writer.rb +50 -13
- data/lib/jekyll-l10n/translation/block_text_extractor.rb +2 -8
- data/lib/jekyll-l10n/translation/html_translator.rb +88 -19
- data/lib/jekyll-l10n/translation/libre_translator.rb +10 -10
- data/lib/jekyll-l10n/translation/translator.rb +2 -1
- data/lib/jekyll-l10n/utils/error_handler.rb +12 -0
- data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +3 -2
- data/lib/jekyll-l10n/utils/html_elements.rb +4 -0
- data/lib/jekyll-l10n/utils/html_text_utils.rb +77 -4
- data/lib/jekyll-l10n/utils/page_locales_config.rb +14 -14
- data/lib/jekyll-l10n/utils/po_entry_converter.rb +22 -17
- data/lib/jekyll-l10n/utils/site_config_accessor.rb +3 -1
- data/lib/jekyll-l10n/utils/translation_resolver.rb +1 -4
- data/lib/jekyll-l10n/utils/url_transformer.rb +4 -5
- data/lib/jekyll-l10n.rb +4 -0
- metadata +20 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a3b778612621ba8532ed88057a833cebfb615d4e7dfeb09befb9f58249896a5b
|
|
4
|
+
data.tar.gz: 399997d1222baaeeea243b1cc61638b169e797d72da8cc543c3949f8b63b5f4b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 122a6388d40c31898704cf3f91b96ef226d30fd9a8b706e7a68d0639bab07042d2707809ecd5076b2d84c79384824cca7964582bd4f931f4a26c95cedd4c567f
|
|
7
|
+
data.tar.gz: 6de61435ff7b179e62e1c7b3dbf705f6a8059a8b5b3f7158f9e2542145ddf3f391493b0a3e2dcff729c142d500658a0ce98815f514449aa27edfbcde4fe73566
|
|
@@ -24,6 +24,13 @@ module Jekyll
|
|
|
24
24
|
# @return [Regexp]
|
|
25
25
|
LOCALE_PATTERN = /^[a-z]{2}(_[A-Z]{2})?$/.freeze
|
|
26
26
|
|
|
27
|
+
# Regex fragment for matching locale codes in URL path segments.
|
|
28
|
+
# Accepts both underscore ('pt_BR') and hyphen ('zh-CN') subtag separators,
|
|
29
|
+
# unlike LOCALE_PATTERN which only accepts underscore (strict validation form).
|
|
30
|
+
# Compose into full path patterns rather than using standalone.
|
|
31
|
+
# @return [String]
|
|
32
|
+
LOCALE_CODE_SEGMENT = '[a-z]{2}(?:[_-][A-Z]{2})?'
|
|
33
|
+
|
|
27
34
|
# ## Translation Fallback Modes
|
|
28
35
|
|
|
29
36
|
# Fallback mode: use original English text if translation not found
|
|
@@ -44,6 +51,16 @@ module Jekyll
|
|
|
44
51
|
# @return [String] "[UNTRANSLATED]"
|
|
45
52
|
UNTRANSLATED_MARKER = '[UNTRANSLATED]'
|
|
46
53
|
|
|
54
|
+
# Minimum similarity score (0.0–1.0) for fuzzy matching old translations to
|
|
55
|
+
# changed msgids. Mirrors GNU msgmerge default. Set to 1.0 to disable.
|
|
56
|
+
# @return [Float] 0.6
|
|
57
|
+
DEFAULT_FUZZY_THRESHOLD = 0.6
|
|
58
|
+
|
|
59
|
+
# Maximum msgid character length above which fuzzy matching is skipped.
|
|
60
|
+
# Long strings are typically unique HTML fragments; Levenshtein cost is O(n²).
|
|
61
|
+
# @return [Integer] 400
|
|
62
|
+
MAX_FUZZY_MSGID_LENGTH = 400
|
|
63
|
+
|
|
47
64
|
# ## PO File Formatting (GNU Gettext Standard)
|
|
48
65
|
|
|
49
66
|
# Line length threshold below which strings are rendered on a single line
|
|
@@ -99,6 +116,12 @@ module Jekyll
|
|
|
99
116
|
# @return [Array<String>] ["title", "alt", "aria-label", "placeholder", "aria-description"]
|
|
100
117
|
DEFAULT_TRANSLATABLE_ATTRIBUTES = %w[title alt aria-label placeholder aria-description].freeze
|
|
101
118
|
|
|
119
|
+
# Structural/styling attributes restored from the source DOM at render time.
|
|
120
|
+
# These are never read from msgstr — always sourced from original HTML.
|
|
121
|
+
# Prevents MT-corrupted class strings from reaching the rendered page.
|
|
122
|
+
# @return [Array<String>]
|
|
123
|
+
STRUCTURAL_PASSTHROUGH_ATTRS = %w[class style id target rel tabindex aria-hidden].freeze
|
|
124
|
+
|
|
102
125
|
# ## LibreTranslate Integration Defaults
|
|
103
126
|
|
|
104
127
|
# Default LibreTranslate API endpoint URL
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'fileutils'
|
|
3
4
|
require_relative '../po_file/manager'
|
|
5
|
+
require_relative '../po_file/fuzzy_matcher'
|
|
4
6
|
require_relative '../utils/page_locales_config'
|
|
5
7
|
require_relative '../utils/site_config_accessor'
|
|
6
8
|
|
|
@@ -84,10 +86,11 @@ module Jekyll
|
|
|
84
86
|
end
|
|
85
87
|
end
|
|
86
88
|
|
|
87
|
-
# Load existing compendium translations or return empty hash if not found
|
|
89
|
+
# Load existing compendium translations or return empty hash if not found.
|
|
90
|
+
# Uses parse_for_merge to preserve fuzzy flags from previous runs.
|
|
88
91
|
def load_existing_compendium(compendium_path)
|
|
89
92
|
if File.exist?(compendium_path)
|
|
90
|
-
PoFileReader.
|
|
93
|
+
PoFileReader.parse_for_merge(compendium_path)
|
|
91
94
|
else
|
|
92
95
|
{}
|
|
93
96
|
end
|
|
@@ -102,24 +105,54 @@ module Jekyll
|
|
|
102
105
|
combined
|
|
103
106
|
end
|
|
104
107
|
|
|
105
|
-
# Normalize entry format to ensure consistent hash structure with :msgstr and :reference keys
|
|
108
|
+
# Normalize entry format to ensure consistent hash structure with :msgstr and :reference keys.
|
|
109
|
+
# Fuzzy metadata is preserved when present so it survives subsequent compendium reads.
|
|
106
110
|
def normalize_compendium_entry(data)
|
|
107
111
|
if data.is_a?(Hash)
|
|
108
|
-
{ msgstr: data[:msgstr], reference: data[:reference] }
|
|
112
|
+
entry = { msgstr: data[:msgstr], reference: data[:reference] }
|
|
113
|
+
entry[:fuzzy] = data[:fuzzy] if data[:fuzzy]
|
|
114
|
+
entry[:previous_msgid] = data[:previous_msgid] if data[:previous_msgid]
|
|
115
|
+
entry
|
|
109
116
|
else
|
|
110
117
|
{ msgstr: data, reference: nil }
|
|
111
118
|
end
|
|
112
119
|
end
|
|
113
120
|
|
|
114
|
-
# Merge newly found translations into combined hash, preserving existing translations
|
|
121
|
+
# Merge newly found translations into combined hash, preserving existing translations.
|
|
122
|
+
# Applies Levenshtein fuzzy matching for changed msgids: when a new msgid closely
|
|
123
|
+
# resembles an old compendium entry, the old entry is replaced by a fuzzy-marked new
|
|
124
|
+
# entry carrying the old translation as a hint. Old entries without a new fuzzy match
|
|
125
|
+
# stay in the combined hash (they may belong to pages not extracted in this build).
|
|
115
126
|
def merge_into_combined(combined, merged)
|
|
127
|
+
new_msgids = merged.keys.to_set
|
|
128
|
+
fuzzy_candidates = compendium_fuzzy_candidates(combined, new_msgids)
|
|
129
|
+
matched_old_msgids = []
|
|
130
|
+
|
|
116
131
|
merged.each do |msgid, entry|
|
|
117
132
|
if combined[msgid]
|
|
118
133
|
update_entry_reference(combined[msgid], entry)
|
|
119
134
|
else
|
|
120
|
-
combined
|
|
135
|
+
resolve_new_entry(combined, msgid, entry, fuzzy_candidates, matched_old_msgids)
|
|
121
136
|
end
|
|
122
137
|
end
|
|
138
|
+
|
|
139
|
+
matched_old_msgids.each { |old_msgid| combined.delete(old_msgid) }
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def compendium_fuzzy_candidates(combined, new_msgids)
|
|
143
|
+
combined.reject do |msgid, entry|
|
|
144
|
+
new_msgids.include?(msgid) || PoFuzzyMatcher.msgstr_from_entry(entry).empty?
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def resolve_new_entry(combined, msgid, entry, fuzzy_candidates, matched_old_msgids)
|
|
149
|
+
match = PoFuzzyMatcher.find_match(msgid, fuzzy_candidates)
|
|
150
|
+
if match
|
|
151
|
+
combined[msgid] = create_fuzzy_entry(entry, match)
|
|
152
|
+
matched_old_msgids << match[:msgid]
|
|
153
|
+
else
|
|
154
|
+
combined[msgid] = create_new_entry(entry)
|
|
155
|
+
end
|
|
123
156
|
end
|
|
124
157
|
|
|
125
158
|
# Update reference for existing entry if new reference is available
|
|
@@ -138,11 +171,24 @@ module Jekyll
|
|
|
138
171
|
end
|
|
139
172
|
end
|
|
140
173
|
|
|
141
|
-
#
|
|
174
|
+
# Create a fuzzy entry for a changed msgid, carrying the old translation as a hint
|
|
175
|
+
def create_fuzzy_entry(entry, match)
|
|
176
|
+
{
|
|
177
|
+
msgstr: match[:msgstr],
|
|
178
|
+
reference: entry.is_a?(Hash) ? entry[:reference] : nil,
|
|
179
|
+
fuzzy: true,
|
|
180
|
+
previous_msgid: match[:msgid]
|
|
181
|
+
}
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Convert combined hash to array of entries suitable for PO file writing.
|
|
185
|
+
# Fuzzy metadata is included so PoFileWriter can serialize #, fuzzy and #| msgid.
|
|
142
186
|
def format_compendium_entries(combined)
|
|
143
187
|
combined.map do |msgid, data|
|
|
144
188
|
entry = { msgid: msgid, msgstr: data[:msgstr] }
|
|
145
|
-
entry[:reference]
|
|
189
|
+
entry[:reference] = data[:reference] if data[:reference]
|
|
190
|
+
entry[:fuzzy] = data[:fuzzy] if data[:fuzzy]
|
|
191
|
+
entry[:previous_msgid] = data[:previous_msgid] if data[:previous_msgid]
|
|
146
192
|
entry
|
|
147
193
|
end
|
|
148
194
|
end
|
|
@@ -156,8 +202,9 @@ module Jekyll
|
|
|
156
202
|
|
|
157
203
|
private :process_locale, :compendium_unchanged?, :load_existing_compendium,
|
|
158
204
|
:build_combined_hash, :normalize_compendium_entry, :merge_into_combined,
|
|
159
|
-
:
|
|
160
|
-
:
|
|
205
|
+
:compendium_fuzzy_candidates, :resolve_new_entry,
|
|
206
|
+
:update_entry_reference, :create_new_entry, :create_fuzzy_entry,
|
|
207
|
+
:format_compendium_entries, :cleanup_locale_directory
|
|
161
208
|
end
|
|
162
209
|
end
|
|
163
210
|
end
|
|
@@ -72,7 +72,7 @@ module Jekyll
|
|
|
72
72
|
"Processing compendium file: #{compendium_path}")
|
|
73
73
|
return unless File.exist?(compendium_path)
|
|
74
74
|
|
|
75
|
-
entries = PoFileReader.
|
|
75
|
+
entries = PoFileReader.parse_for_merge(compendium_path)
|
|
76
76
|
po_entries = PoEntryConverter.hash_to_po_entry_array(entries)
|
|
77
77
|
|
|
78
78
|
log_compendium_stats(locale, po_entries, compendium_path)
|
|
@@ -52,11 +52,12 @@ module Jekyll
|
|
|
52
52
|
end
|
|
53
53
|
|
|
54
54
|
def extractable?(node)
|
|
55
|
-
|
|
55
|
+
HtmlTextUtils.extractable?(node)
|
|
56
56
|
end
|
|
57
57
|
|
|
58
58
|
def extract_block_text(node)
|
|
59
59
|
return nil if only_contains_block_elements?(node)
|
|
60
|
+
return nil if HtmlTextUtils.layout_block_children?(node)
|
|
60
61
|
|
|
61
62
|
text = HtmlTextUtils.extract_with_inline_tags(node)
|
|
62
63
|
TextValidator.valid?(text) ? text : nil
|
|
@@ -75,6 +75,7 @@ module Jekyll
|
|
|
75
75
|
Jekyll.logger.info 'Localization', 'Extracting translatable strings...'
|
|
76
76
|
start_time = Time.now
|
|
77
77
|
stats = process_all_html_files
|
|
78
|
+
@result_saver.finalize_compendia
|
|
78
79
|
translate_all_compendia
|
|
79
80
|
ExtractionLogger.log_summary(stats, Time.now - start_time)
|
|
80
81
|
stats
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative '../utils/error_handler'
|
|
4
|
+
|
|
3
5
|
module Jekyll
|
|
4
6
|
module L10n
|
|
5
7
|
# Logs extraction process errors and summary statistics.
|
|
@@ -22,7 +24,7 @@ module Jekyll
|
|
|
22
24
|
# @param error [StandardError] The error that occurred
|
|
23
25
|
# @return [void]
|
|
24
26
|
def self.log_error(file_path, error)
|
|
25
|
-
|
|
27
|
+
ErrorHandler.log_error("extracting from #{file_path}", error)
|
|
26
28
|
end
|
|
27
29
|
|
|
28
30
|
# Log extraction completion summary.
|
|
@@ -63,6 +63,19 @@ module Jekyll
|
|
|
63
63
|
}
|
|
64
64
|
end
|
|
65
65
|
|
|
66
|
+
# Merge all page-specific PO files into compendia after all pages are extracted.
|
|
67
|
+
#
|
|
68
|
+
# Called once per build (from Extractor.extract_site) rather than per page,
|
|
69
|
+
# reducing disk I/O and fuzzy-matching passes from O(pages) to O(1).
|
|
70
|
+
#
|
|
71
|
+
# @return [void]
|
|
72
|
+
def finalize_compendia
|
|
73
|
+
return unless @site_config.update_compendium?
|
|
74
|
+
|
|
75
|
+
po_manager = PoFileManager.new(@site, @site_config.locales_dir)
|
|
76
|
+
CompendiumMerger.new(@site).merge_compendia(po_manager, @site_config)
|
|
77
|
+
end
|
|
78
|
+
|
|
66
79
|
# Translate compendia using LibreTranslate.
|
|
67
80
|
#
|
|
68
81
|
# If LibreTranslate is enabled in config, translates all empty entries in
|
|
@@ -84,8 +97,6 @@ module Jekyll
|
|
|
84
97
|
po_files_created += 1 if po_manager.save_po_file(locale, entries, page_path: page_path)
|
|
85
98
|
end
|
|
86
99
|
|
|
87
|
-
CompendiumMerger.new(@site).merge_compendia(po_manager, config) if config.update_compendium?
|
|
88
|
-
|
|
89
100
|
po_files_created
|
|
90
101
|
end
|
|
91
102
|
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Jekyll
|
|
4
|
+
module L10n
|
|
5
|
+
# OpenTelemetry instrumentation facade for jekyll-l10n.
|
|
6
|
+
#
|
|
7
|
+
# All tracing is configured centrally in TRACED_METHODS — no span code lives
|
|
8
|
+
# in business logic classes. To add a span: append one entry. To rename a
|
|
9
|
+
# method: update the one entry. When a method is removed from its class the
|
|
10
|
+
# stale entry raises NoMethodError in tests, signalling the entry to delete.
|
|
11
|
+
#
|
|
12
|
+
# Requires opentelemetry-api at runtime; falls back to a no-op if absent.
|
|
13
|
+
# Users opt in to real tracing by adding opentelemetry-sdk and
|
|
14
|
+
# opentelemetry-exporter-otlp to their site Gemfile and exporting:
|
|
15
|
+
#
|
|
16
|
+
# OTEL_SERVICE_NAME=jekyll-l10n
|
|
17
|
+
# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
|
18
|
+
module Instrumentation
|
|
19
|
+
TRACER_NAME = 'jekyll-l10n'
|
|
20
|
+
|
|
21
|
+
# Central instrumentation registry.
|
|
22
|
+
#
|
|
23
|
+
# Each row: [class_name, method_type, method_name, span_name, attribute_proc]
|
|
24
|
+
#
|
|
25
|
+
# method_type:
|
|
26
|
+
# :instance — public instance method
|
|
27
|
+
# :private — private instance method (visibility is preserved on the wrapper)
|
|
28
|
+
# :class — class / module method (prepended on the singleton class)
|
|
29
|
+
#
|
|
30
|
+
# attribute_proc: ->(span, this, args, result) or nil
|
|
31
|
+
# span — OTel span (or NoopSpan); call span.set_attribute after super returns
|
|
32
|
+
# this — receiver object (instance or nil for class methods)
|
|
33
|
+
# args — positional args array as passed to the method
|
|
34
|
+
# result — return value of the original method
|
|
35
|
+
# :nocov: — attribute procs are configuration data; coverage comes from integration tests
|
|
36
|
+
|
|
37
|
+
# Helpers for common attribute patterns used inside TRACED_METHODS procs.
|
|
38
|
+
def self.hash_val(hash, key)
|
|
39
|
+
hash.is_a?(Hash) ? hash[key].to_i : 0
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.hash_size(hash)
|
|
43
|
+
hash.is_a?(Hash) ? hash.size : 0
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def self.array_size(val)
|
|
47
|
+
val.is_a?(Array) ? val.size : 0
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
TRACED_METHODS = [
|
|
51
|
+
# ── Jekyll integration ────────────────────────────────────────────────
|
|
52
|
+
['Jekyll::L10n::Generator', :instance, :generate, 'l10n.generate',
|
|
53
|
+
lambda { |span, _this, args, _result|
|
|
54
|
+
span.set_attribute('l10n.site_page_count', args[0]&.pages&.size || 0)
|
|
55
|
+
}],
|
|
56
|
+
|
|
57
|
+
['Jekyll::L10n::PostWriteProcessor', :instance, :process_localizations,
|
|
58
|
+
'l10n.post_write', nil],
|
|
59
|
+
|
|
60
|
+
# translate is the post_render hook entry point — one span per localized page
|
|
61
|
+
['Jekyll::L10n::Translator', :instance, :translate, 'l10n.translate_render',
|
|
62
|
+
lambda { |span, this, _args, _result|
|
|
63
|
+
span.set_attribute('l10n.locale', this.page.data['locale'].to_s)
|
|
64
|
+
span.set_attribute('l10n.page_url', this.page.url.to_s)
|
|
65
|
+
}],
|
|
66
|
+
|
|
67
|
+
# ── Extraction pipeline ───────────────────────────────────────────────
|
|
68
|
+
['Jekyll::L10n::Extractor', :instance, :extract_site, 'l10n.extract_site',
|
|
69
|
+
lambda { |span, _this, _args, result|
|
|
70
|
+
span.set_attribute('l10n.file_count', Instrumentation.hash_val(result, :files_processed))
|
|
71
|
+
}],
|
|
72
|
+
|
|
73
|
+
# process_file is the per-page body called from the html_files loop
|
|
74
|
+
['Jekyll::L10n::Extractor', :private, :process_file, 'l10n.extract_page',
|
|
75
|
+
lambda { |span, _this, args, result|
|
|
76
|
+
span.set_attribute('l10n.page_path', args[0].to_s)
|
|
77
|
+
span.set_attribute('l10n.strings_extracted',
|
|
78
|
+
Instrumentation.hash_val(result, :strings_extracted))
|
|
79
|
+
}],
|
|
80
|
+
|
|
81
|
+
['Jekyll::L10n::HtmlStringExtractor', :instance, :extract, 'l10n.html_extract',
|
|
82
|
+
lambda { |span, _this, args, result|
|
|
83
|
+
span.set_attribute('l10n.html_size_bytes', args[0].bytesize)
|
|
84
|
+
span.set_attribute('l10n.extracted_count', Instrumentation.array_size(result))
|
|
85
|
+
}],
|
|
86
|
+
|
|
87
|
+
['Jekyll::L10n::ExtractionResultSaver', :instance, :save_results, 'l10n.po_file_write',
|
|
88
|
+
lambda { |span, _this, args, result|
|
|
89
|
+
span.set_attribute('l10n.page_path', args[2].to_s)
|
|
90
|
+
span.set_attribute('l10n.entry_count', Instrumentation.array_size(args[1]))
|
|
91
|
+
span.set_attribute('l10n.po_files_created',
|
|
92
|
+
Instrumentation.hash_val(result, :po_files_created))
|
|
93
|
+
}],
|
|
94
|
+
|
|
95
|
+
['Jekyll::L10n::CompendiumMerger', :instance, :merge_compendia,
|
|
96
|
+
'l10n.compendium_merge', nil],
|
|
97
|
+
|
|
98
|
+
['Jekyll::L10n::CompendiumTranslator', :instance, :translate_compendia,
|
|
99
|
+
'l10n.translate_compendia',
|
|
100
|
+
lambda { |span, _this, args, _result|
|
|
101
|
+
config = args[0]
|
|
102
|
+
span.set_attribute('l10n.locale_count', config.locales.size) if config.respond_to?(:locales)
|
|
103
|
+
}],
|
|
104
|
+
|
|
105
|
+
# ── Translation pipeline ──────────────────────────────────────────────
|
|
106
|
+
['Jekyll::L10n::PostWriteHtmlReprocessor', :instance, :reprocess_localized_pages,
|
|
107
|
+
'l10n.reprocess_localized_pages', nil],
|
|
108
|
+
|
|
109
|
+
# translate_html_file is the per-page body called from the localized_files loop
|
|
110
|
+
['Jekyll::L10n::PostWriteHtmlReprocessor', :private, :translate_html_file,
|
|
111
|
+
'l10n.translate_page',
|
|
112
|
+
lambda { |span, _this, args, _result|
|
|
113
|
+
span.set_attribute('l10n.page_path', args[0].to_s)
|
|
114
|
+
span.set_attribute('l10n.locale', args[1].to_s)
|
|
115
|
+
}],
|
|
116
|
+
|
|
117
|
+
['Jekyll::L10n::PageTranslationLoader', :class, :load, 'l10n.translation_load',
|
|
118
|
+
lambda { |span, _this, args, result|
|
|
119
|
+
span.set_attribute('l10n.locale', args[1].to_s)
|
|
120
|
+
span.set_attribute('l10n.page_path', args[2].to_s)
|
|
121
|
+
span.set_attribute('l10n.entry_count', Instrumentation.hash_size(result))
|
|
122
|
+
}],
|
|
123
|
+
|
|
124
|
+
['Jekyll::L10n::HtmlTranslator', :instance, :translate, 'l10n.dom_translate',
|
|
125
|
+
lambda { |span, this, args, _result|
|
|
126
|
+
span.set_attribute('l10n.locale', (args[2] || 'en').to_s)
|
|
127
|
+
span.set_attribute('l10n.fallback_mode', this.fallback_mode.to_s)
|
|
128
|
+
}],
|
|
129
|
+
|
|
130
|
+
['Jekyll::L10n::LibreTranslator', :private, :make_api_request,
|
|
131
|
+
'l10n.libretranslate_batch',
|
|
132
|
+
lambda { |span, _this, args, _result|
|
|
133
|
+
span.set_attribute('l10n.locale', args[1].to_s)
|
|
134
|
+
span.set_attribute('l10n.batch_size', args[0].is_a?(Array) ? args[0].size : 1)
|
|
135
|
+
}],
|
|
136
|
+
|
|
137
|
+
# ── Utilities ─────────────────────────────────────────────────────────
|
|
138
|
+
['Jekyll::L10n::HtmlParser', :class, :parse_document, 'l10n.html_parse',
|
|
139
|
+
lambda { |span, _this, args, _result|
|
|
140
|
+
span.set_attribute('l10n.html_size_bytes', args[0].bytesize)
|
|
141
|
+
}],
|
|
142
|
+
|
|
143
|
+
['Jekyll::L10n::UrlTransformer', :class, :transform_document, 'l10n.url_transform',
|
|
144
|
+
lambda { |span, _this, args, _result|
|
|
145
|
+
span.set_attribute('l10n.locale', args[1].to_s)
|
|
146
|
+
doc = args[0]
|
|
147
|
+
span.set_attribute('l10n.href_count',
|
|
148
|
+
doc.respond_to?(:css) ? doc.css('a[href]').size : 0)
|
|
149
|
+
}],
|
|
150
|
+
|
|
151
|
+
['Jekyll::L10n::ExternalLinkIconPreserver', :class, :preserve,
|
|
152
|
+
'l10n.icon_preserve', nil],
|
|
153
|
+
|
|
154
|
+
# ── PO file operations ────────────────────────────────────────────────
|
|
155
|
+
['Jekyll::L10n::PoFileReader', :instance, :parse_for_translation,
|
|
156
|
+
'l10n.po_file_read',
|
|
157
|
+
lambda { |span, this, _args, result|
|
|
158
|
+
span.set_attribute('l10n.file_path', this.po_path.to_s)
|
|
159
|
+
span.set_attribute('l10n.entry_count', Instrumentation.hash_size(result))
|
|
160
|
+
}],
|
|
161
|
+
|
|
162
|
+
['Jekyll::L10n::PoFileMerger', :class, :merge_for_locale, 'l10n.po_merge',
|
|
163
|
+
lambda { |span, _this, args, result|
|
|
164
|
+
span.set_attribute('l10n.locale', args[2].to_s)
|
|
165
|
+
span.set_attribute('l10n.merged_count', Instrumentation.hash_size(result))
|
|
166
|
+
}]
|
|
167
|
+
].freeze
|
|
168
|
+
# :nocov:
|
|
169
|
+
|
|
170
|
+
# Returns the active OTel tracer, or a no-op tracer if opentelemetry-api is absent.
|
|
171
|
+
def self.tracer
|
|
172
|
+
@tracer ||=
|
|
173
|
+
if defined?(OpenTelemetry)
|
|
174
|
+
# :nocov:
|
|
175
|
+
OpenTelemetry.tracer_provider.tracer(TRACER_NAME, Jekyll::L10n::VERSION)
|
|
176
|
+
# :nocov:
|
|
177
|
+
else
|
|
178
|
+
NoopTracer.new
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Wraps a block in an OTel span.
|
|
183
|
+
#
|
|
184
|
+
# @param span_name [String] Dot-separated span name (e.g. 'l10n.extract_page')
|
|
185
|
+
# @param attributes [Hash] Initial span attributes
|
|
186
|
+
# @yieldparam span [OpenTelemetry::Trace::Span, NoopSpan] Active span
|
|
187
|
+
# @return [Object] The return value of the block
|
|
188
|
+
def self.instrument(span_name, attributes: {}, &block)
|
|
189
|
+
tracer.in_span(span_name, attributes: attributes, &block)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Resets the cached tracer and installation flag. Call in tests after changing OTel configuration.
|
|
193
|
+
def self.reset!
|
|
194
|
+
@tracer = nil
|
|
195
|
+
@installed = false
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Returns true when OTel is requested via standard environment variables.
|
|
199
|
+
#
|
|
200
|
+
# install! guards on this so the prepend wrappers are only applied when
|
|
201
|
+
# a real exporter is configured. In CI and local tests (no OTel env vars)
|
|
202
|
+
# business logic classes are untouched, keeping allow_any_instance_of stubs
|
|
203
|
+
# and other RSpec mechanics fully functional.
|
|
204
|
+
def self.enabled?
|
|
205
|
+
ENV.key?('OTEL_EXPORTER_OTLP_ENDPOINT') || ENV.key?('OTEL_SERVICE_NAME')
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Installs wrappers on all classes listed in TRACED_METHODS using Module#prepend.
|
|
209
|
+
#
|
|
210
|
+
# Called once at plugin load time (end of jekyll-l10n.rb, after all requires),
|
|
211
|
+
# but only when enabled? returns true. Use OTEL_EXPORTER_OTLP_ENDPOINT or
|
|
212
|
+
# OTEL_SERVICE_NAME to activate tracing.
|
|
213
|
+
def self.install!
|
|
214
|
+
return if @installed
|
|
215
|
+
|
|
216
|
+
@installed = true
|
|
217
|
+
setup_sdk!
|
|
218
|
+
TRACED_METHODS.group_by { |e| e[0] }.each do |class_name, entries|
|
|
219
|
+
prepend_wrappers(resolve_class(class_name), entries)
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Configures the OTel SDK from standard env vars if the SDK gem is available.
|
|
224
|
+
#
|
|
225
|
+
# Called from install! so that spans are exported to the configured backend
|
|
226
|
+
# without requiring sites to add a separate initializer. Silently no-ops when
|
|
227
|
+
# opentelemetry-sdk or opentelemetry-exporter-otlp is not installed.
|
|
228
|
+
#
|
|
229
|
+
# SimpleSpanProcessor is used instead of the SDK default (BatchSpanProcessor)
|
|
230
|
+
# because Jekyll is a short-lived CLI process. BatchSpanProcessor exports on a
|
|
231
|
+
# 5-second schedule; outer spans that close last (post_write, generate) are
|
|
232
|
+
# silently dropped when the process exits before the next flush. Simple exports
|
|
233
|
+
# each span synchronously the moment it closes, guaranteeing no span loss.
|
|
234
|
+
def self.setup_sdk!
|
|
235
|
+
# :nocov:
|
|
236
|
+
require 'opentelemetry/sdk'
|
|
237
|
+
require 'opentelemetry/exporter/otlp'
|
|
238
|
+
exporter = OpenTelemetry::Exporter::OTLP::Exporter.new
|
|
239
|
+
processor = OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(exporter)
|
|
240
|
+
OpenTelemetry::SDK.configure do |c|
|
|
241
|
+
c.add_span_processor(processor)
|
|
242
|
+
end
|
|
243
|
+
rescue LoadError
|
|
244
|
+
nil
|
|
245
|
+
# :nocov:
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Resolves a dot-separated class name to a constant; returns nil on NameError.
|
|
249
|
+
def self.resolve_class(name)
|
|
250
|
+
name.split('::').reduce(Object) { |m, c| m.const_get(c) }
|
|
251
|
+
rescue NameError
|
|
252
|
+
nil
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Applies instance and class-method wrappers to klass; skips if klass is nil.
|
|
256
|
+
def self.prepend_wrappers(klass, entries)
|
|
257
|
+
return unless klass
|
|
258
|
+
|
|
259
|
+
class_entries, instance_entries = entries.partition { |e| e[1] == :class }
|
|
260
|
+
klass.prepend(build_wrapper_module(instance_entries)) unless instance_entries.empty?
|
|
261
|
+
klass.singleton_class.prepend(build_wrapper_module(class_entries)) unless class_entries.empty?
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Builds an anonymous module that wraps each listed method in a span.
|
|
265
|
+
def self.build_wrapper_module(entries)
|
|
266
|
+
Module.new do
|
|
267
|
+
entries.each do |_class_name, method_type, method_name, span_name, attr_proc|
|
|
268
|
+
define_method(method_name) do |*args, **kwargs, &blk|
|
|
269
|
+
Instrumentation.instrument(span_name) do |span|
|
|
270
|
+
result = super(*args, **kwargs, &blk)
|
|
271
|
+
attr_proc&.call(span, self, args, result)
|
|
272
|
+
result
|
|
273
|
+
rescue StandardError => e
|
|
274
|
+
span.record_exception(e)
|
|
275
|
+
raise
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
private method_name if method_type == :private
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
private_class_method :build_wrapper_module, :prepend_wrappers, :setup_sdk!
|
|
284
|
+
|
|
285
|
+
# No-op tracer used when opentelemetry-api is not loaded.
|
|
286
|
+
class NoopTracer
|
|
287
|
+
def in_span(_name, **_opts)
|
|
288
|
+
yield NoopSpan.new
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# No-op span that silently accepts all attribute and event calls.
|
|
293
|
+
class NoopSpan
|
|
294
|
+
def set_attribute(*)
|
|
295
|
+
self
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def record_exception(*)
|
|
299
|
+
self
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def status=(*); end
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative '../constants'
|
|
3
4
|
require_relative 'regeneration_checker'
|
|
4
5
|
|
|
5
6
|
module Jekyll
|
|
@@ -119,7 +120,7 @@ module Jekyll
|
|
|
119
120
|
def valid_locale_code?(locale)
|
|
120
121
|
return false unless locale.is_a?(String)
|
|
121
122
|
|
|
122
|
-
locale.match?(
|
|
123
|
+
locale.match?(Constants::LOCALE_PATTERN)
|
|
123
124
|
end
|
|
124
125
|
|
|
125
126
|
# Check if any pages in the site are marked for localization
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'liquid'
|
|
4
|
+
require_relative '../constants'
|
|
5
|
+
require_relative '../utils/error_handler'
|
|
4
6
|
|
|
5
7
|
module Jekyll
|
|
6
8
|
module L10n
|
|
@@ -99,7 +101,7 @@ module Jekyll
|
|
|
99
101
|
page&.data&.[]('locale')
|
|
100
102
|
end
|
|
101
103
|
rescue StandardError => e
|
|
102
|
-
|
|
104
|
+
ErrorHandler.log_warning('retrieving current locale', e)
|
|
103
105
|
nil
|
|
104
106
|
end
|
|
105
107
|
|
|
@@ -124,7 +126,7 @@ module Jekyll
|
|
|
124
126
|
end
|
|
125
127
|
|
|
126
128
|
def already_localized?(url_str)
|
|
127
|
-
%r
|
|
129
|
+
%r{^/#{Constants::LOCALE_CODE_SEGMENT}(?=/|\?)}o.match?(url_str)
|
|
128
130
|
end
|
|
129
131
|
|
|
130
132
|
def external_url?(url_str)
|
|
@@ -192,7 +194,7 @@ module Jekyll
|
|
|
192
194
|
|
|
193
195
|
def strip_locale_from_url(url)
|
|
194
196
|
# Strip leading locale prefix like /es/, /fr/, /pt_BR/, /zh-CN/
|
|
195
|
-
url.sub(%r
|
|
197
|
+
url.sub(%r{^/(#{Constants::LOCALE_CODE_SEGMENT})(?=/|$)}o, '')
|
|
196
198
|
end
|
|
197
199
|
end
|
|
198
200
|
end
|