jekyll-l10n 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +94 -0
  4. data/lib/jekyll-l10n/constants.rb +136 -0
  5. data/lib/jekyll-l10n/errors.rb +60 -0
  6. data/lib/jekyll-l10n/extraction/compendium_merger.rb +142 -0
  7. data/lib/jekyll-l10n/extraction/compendium_translator.rb +138 -0
  8. data/lib/jekyll-l10n/extraction/config_loader.rb +114 -0
  9. data/lib/jekyll-l10n/extraction/dom_attribute_extractor.rb +69 -0
  10. data/lib/jekyll-l10n/extraction/dom_text_extractor.rb +89 -0
  11. data/lib/jekyll-l10n/extraction/extractor.rb +153 -0
  12. data/lib/jekyll-l10n/extraction/html_string_extractor.rb +103 -0
  13. data/lib/jekyll-l10n/extraction/logger.rb +48 -0
  14. data/lib/jekyll-l10n/extraction/result_saver.rb +95 -0
  15. data/lib/jekyll-l10n/jekyll/file_sync.rb +110 -0
  16. data/lib/jekyll-l10n/jekyll/generator.rb +106 -0
  17. data/lib/jekyll-l10n/jekyll/localized_page.rb +150 -0
  18. data/lib/jekyll-l10n/jekyll/localized_page_mapper.rb +51 -0
  19. data/lib/jekyll-l10n/jekyll/page_locator.rb +59 -0
  20. data/lib/jekyll-l10n/jekyll/page_writer.rb +120 -0
  21. data/lib/jekyll-l10n/jekyll/post_write_html_reprocessor.rb +118 -0
  22. data/lib/jekyll-l10n/jekyll/post_write_processor.rb +71 -0
  23. data/lib/jekyll-l10n/jekyll/regeneration_checker.rb +123 -0
  24. data/lib/jekyll-l10n/jekyll/url_filter.rb +199 -0
  25. data/lib/jekyll-l10n/po_file/loader.rb +64 -0
  26. data/lib/jekyll-l10n/po_file/manager.rb +160 -0
  27. data/lib/jekyll-l10n/po_file/merger.rb +80 -0
  28. data/lib/jekyll-l10n/po_file/path_builder.rb +42 -0
  29. data/lib/jekyll-l10n/po_file/reader.rb +518 -0
  30. data/lib/jekyll-l10n/po_file/writer.rb +232 -0
  31. data/lib/jekyll-l10n/translation/block_text_extractor.rb +56 -0
  32. data/lib/jekyll-l10n/translation/html_translator.rb +229 -0
  33. data/lib/jekyll-l10n/translation/libre_translator.rb +226 -0
  34. data/lib/jekyll-l10n/translation/page_translation_loader.rb +99 -0
  35. data/lib/jekyll-l10n/translation/translator.rb +179 -0
  36. data/lib/jekyll-l10n/utils/debug_logger.rb +153 -0
  37. data/lib/jekyll-l10n/utils/error_handler.rb +67 -0
  38. data/lib/jekyll-l10n/utils/external_link_icon_preserver.rb +122 -0
  39. data/lib/jekyll-l10n/utils/file_operations.rb +55 -0
  40. data/lib/jekyll-l10n/utils/html_elements.rb +34 -0
  41. data/lib/jekyll-l10n/utils/html_parser.rb +52 -0
  42. data/lib/jekyll-l10n/utils/html_text_utils.rb +131 -0
  43. data/lib/jekyll-l10n/utils/logger_formatter.rb +114 -0
  44. data/lib/jekyll-l10n/utils/page_locales_config.rb +344 -0
  45. data/lib/jekyll-l10n/utils/po_entry_converter.rb +111 -0
  46. data/lib/jekyll-l10n/utils/site_config_accessor.rb +51 -0
  47. data/lib/jekyll-l10n/utils/text_normalizer.rb +47 -0
  48. data/lib/jekyll-l10n/utils/text_validator.rb +35 -0
  49. data/lib/jekyll-l10n/utils/translation_resolver.rb +115 -0
  50. data/lib/jekyll-l10n/utils/url_path_builder.rb +65 -0
  51. data/lib/jekyll-l10n/utils/url_transformer.rb +141 -0
  52. data/lib/jekyll-l10n/utils/xpath_reference_generator.rb +45 -0
  53. data/lib/jekyll-l10n/version.rb +10 -0
  54. data/lib/jekyll-l10n.rb +268 -0
  55. metadata +200 -0
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../utils/page_locales_config"
4
+
5
+ module Jekyll
6
+ module L10n
7
+ # Loads and validates extraction configuration for files during build.
8
+ #
9
+ # ExtractionConfigLoader finds extraction configuration for generated HTML
10
+ # files by matching them against Jekyll site pages. It validates whether
11
+ # extraction is enabled and configured for a file, loads page-specific
12
+ # settings, and identifies files to skip (localized page variants).
13
+ #
14
+ # Key responsibilities:
15
+ # * Match generated HTML files to Jekyll site pages
16
+ # * Validate that extraction is enabled for a file
17
+ # * Load page-specific extraction configuration
18
+ # * Identify localized page variants to skip
19
+ # * Extract CSS selectors for element exclusion
20
+ #
21
+ # @example
22
+ # loader = ExtractionConfigLoader.new(site, '_site')
23
+ # if loader.valid_for_extraction?(file_path)
24
+ # config = loader.load_page_config(file_path)
25
+ # end
26
+ class ExtractionConfigLoader
27
+ # Initialize a new ExtractionConfigLoader.
28
+ #
29
+ # @param site [Jekyll::Site] Jekyll site object
30
+ # @param dest [String] Destination build directory
31
+ def initialize(site, dest)
32
+ @site = site
33
+ @dest = dest
34
+ end
35
+
36
+ # Check if a file is valid for extraction.
37
+ #
38
+ # Matches the file to a Jekyll page and verifies extraction is enabled
39
+ # for that page in the configuration.
40
+ #
41
+ # @param file_path [String] Path to file to check
42
+ # @return [Boolean] True if file matches a page with extraction enabled
43
+ def valid_for_extraction?(file_path)
44
+ page_config = find_page_config_for_file(file_path)
45
+ return false unless page_config
46
+
47
+ config = PageLocalesConfig.new(page_config)
48
+ config.enabled? && config.extract_on_build?
49
+ end
50
+
51
+ # Load extraction configuration for a file.
52
+ #
53
+ # Finds the Jekyll page matching this file and returns its localization
54
+ # configuration wrapped in PageLocalesConfig.
55
+ #
56
+ # @param file_path [String] Path to file
57
+ # @return [PageLocalesConfig] Localization configuration for the page
58
+ def load_page_config(file_path)
59
+ page_config = find_page_config_for_file(file_path)
60
+ PageLocalesConfig.new(page_config)
61
+ end
62
+
63
+ # Find page configuration for a file.
64
+ #
65
+ # Matches a generated file path to a Jekyll page with extraction enabled.
66
+ # Returns the page's front matter data, or nil if no match found.
67
+ #
68
+ # @param file_path [String] Path to generated file
69
+ # @return [Hash, nil] Page front matter data if found, nil otherwise
70
+ def find_page_config_for_file(file_path)
71
+ @site.pages.each do |page|
72
+ next unless page.data["with_locales"] == true
73
+
74
+ page_output = page.output_ext ? page.destination("") : page.destination("/")
75
+ next unless file_path.end_with?(page_output.sub(%r!/$!, "/index.html"))
76
+
77
+ return page.data
78
+ end
79
+
80
+ nil
81
+ end
82
+
83
+ # Check if a file is a localized page variant that should be skipped.
84
+ #
85
+ # Localized pages are generated copies in locale subdirectories that
86
+ # shouldn't be re-extracted (extraction happens on original pages only).
87
+ #
88
+ # @param file_path [String] Path to file to check
89
+ # @return [Boolean] True if file is in a locale subdirectory
90
+ def skip_localized_page?(file_path)
91
+ relative_path = file_path.sub(@dest, "")
92
+
93
+ all_locales = @site.pages.map do |p|
94
+ p.data.dig("with_locales_data", "locales") || []
95
+ end
96
+ all_locales.flatten!
97
+ all_locales.uniq!
98
+ all_locales.any? { |locale| relative_path.start_with?("/#{locale}/") }
99
+ end
100
+
101
+ # Extract CSS selectors for element exclusion from configuration.
102
+ #
103
+ # Returns CSS selectors of elements to exclude from extraction (e.g.,
104
+ # script, style, code blocks). Defaults to sensible defaults if not configured.
105
+ #
106
+ # @param config [Hash] Page front matter data
107
+ # @return [Array<String>] CSS selectors for excluded elements
108
+ def extract_exclude_selectors(config)
109
+ config.data.dig("with_locales_data", "extraction", "exclude_selectors") ||
110
+ ["script", "style", "code.language-plaintext", "pre code"]
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../utils/xpath_reference_generator"
4
+ require_relative "../utils/text_validator"
5
+
6
+ module Jekyll
7
+ module L10n
8
+ # Extracts HTML attribute values from elements for translation.
9
+ #
10
+ # DomAttributeExtractor identifies and extracts values from configurable HTML
11
+ # attributes (title, alt, aria-label, placeholder, aria-description) on DOM
12
+ # elements. It validates extracted values and generates file location references
13
+ # that include the attribute name for precise debugging and reference.
14
+ #
15
+ # Key responsibilities:
16
+ # * Extract attribute values from DOM elements
17
+ # * Filter extractable attributes by whitelist
18
+ # * Validate attribute values (minimum length, non-numeric)
19
+ # * Generate attribute-specific file location references
20
+ # * Return entries ready for PO file format
21
+ #
22
+ # @example
23
+ # entries = DomAttributeExtractor.extract(node, 'docs/index.html', '_site',
24
+ # ['title', 'alt', 'aria-label'])
25
+ # # Returns array of extraction entries for all valid attribute values
26
+ module DomAttributeExtractor
27
+ extend self
28
+
29
+ # Extract attribute values from an HTML element.
30
+ #
31
+ # Returns empty array if element is not an element node. For element nodes,
32
+ # identifies all specified translatable attributes with non-empty values and
33
+ # returns extraction entries for each, including attribute name in the reference.
34
+ #
35
+ # @param node [Nokogiri::XML::Element] DOM element to extract from
36
+ # @param file_path [String] Source file path (for file location reference)
37
+ # @param dest [String] Destination directory (for file location reference)
38
+ # @param translatable_attrs [Array<String>] Attribute names to extract
39
+ # (e.g., ['title', 'alt', 'aria-label', 'placeholder', 'aria-description'])
40
+ # @return [Array<Hash>] Array of extraction entries, each containing:
41
+ # - :msgid [String] The attribute value to translate
42
+ # - :msgstr [String] Empty string (to be filled by translator)
43
+ # - :reference [String] File location reference including attribute name
44
+ def extract(node, file_path, dest, translatable_attrs)
45
+ return [] unless node.element?
46
+
47
+ attrs = extract_node_attributes(node, translatable_attrs)
48
+ attrs.map do |attr_text, attr_name|
49
+ reference = XPathReferenceGenerator.generate(node, file_path, dest, attr_name)
50
+ { :msgid => attr_text, :msgstr => "", :reference => reference }
51
+ end
52
+ end
53
+
54
+ def extract_node_attributes(node, translatable_attrs)
55
+ attrs = {}
56
+
57
+ translatable_attrs.each do |attr_name|
58
+ value = node[attr_name]
59
+ next if value.nil? || value.empty?
60
+
61
+ value = value.strip
62
+ attrs[value] = attr_name if TextValidator.valid?(value)
63
+ end
64
+
65
+ attrs
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../utils/text_normalizer"
4
+ require_relative "../utils/xpath_reference_generator"
5
+ require_relative "../utils/html_text_utils"
6
+ require_relative "../utils/html_elements"
7
+ require_relative "../utils/text_validator"
8
+
9
+ module Jekyll
10
+ module L10n
11
+ # Extracts text content from HTML elements for translation.
12
+ #
13
+ # DomTextExtractor identifies content-bearing HTML elements (paragraphs, headings,
14
+ # list items, etc.) and extracts their text content while preserving inline HTML
15
+ # tags. It validates extracted text and generates file location references for
16
+ # debugging. Text is extracted from elements that contain text nodes or inline
17
+ # elements, but not from elements containing only block-level children.
18
+ #
19
+ # Key responsibilities:
20
+ # * Identify extractable content elements (p, h1-h6, li, blockquote, etc.)
21
+ # * Extract text while preserving inline HTML structure
22
+ # * Skip elements containing only block-level children
23
+ # * Validate extracted text (minimum length, non-numeric)
24
+ # * Generate file location references for extracted strings
25
+ #
26
+ # @example
27
+ # entry = DomTextExtractor.extract(node, 'docs/index.html', '_site')
28
+ # # Returns hash with :msgid, :msgstr, :reference if valid text found
29
+ module DomTextExtractor
30
+ extend self
31
+
32
+ # Extract text content from an HTML element.
33
+ #
34
+ # Returns nil if element is not extractable (not a content element) or if
35
+ # extracted text fails validation (too short, numeric-only, etc.). For valid
36
+ # text, returns hash with msgid, empty msgstr, and file location reference for
37
+ # debugging.
38
+ #
39
+ # @param node [Nokogiri::XML::Element] DOM element to extract from
40
+ # @param file_path [String] Source file path (for file location reference)
41
+ # @param dest [String] Destination directory (for file location reference)
42
+ # @return [Hash, nil] Hash with :msgid, :msgstr, :reference if valid text found,
43
+ # nil if element is not extractable or text fails validation
44
+ def extract(node, file_path, dest)
45
+ return nil unless extractable?(node)
46
+
47
+ text = extract_block_text(node)
48
+ return nil if text.nil?
49
+
50
+ reference = XPathReferenceGenerator.generate(node, file_path, dest)
51
+ { :msgid => text, :msgstr => "", :reference => reference }
52
+ end
53
+
54
+ def extractable?(node)
55
+ node.element? && HtmlTextUtils::CONTENT_ELEMENTS.include?(node.name)
56
+ end
57
+
58
+ def extract_block_text(node)
59
+ return nil if only_contains_block_elements?(node)
60
+
61
+ text = HtmlTextUtils.extract_with_inline_tags(node)
62
+ TextValidator.valid?(text) ? text : nil
63
+ end
64
+
65
+ def only_contains_block_elements?(node)
66
+ node.children.each do |child|
67
+ return false if non_empty_text?(child)
68
+ return false if non_block_element?(child)
69
+ end
70
+
71
+ block_element_children?(node)
72
+ end
73
+
74
+ private
75
+
76
+ def non_empty_text?(child)
77
+ child.text? && !child.content.strip.empty?
78
+ end
79
+
80
+ def non_block_element?(child)
81
+ child.element? && !HtmlTextUtils::ALL_BLOCK_ELEMENTS.include?(child.name)
82
+ end
83
+
84
+ def block_element_children?(node)
85
+ node.children.any? { |c| c.element? && HtmlTextUtils::ALL_BLOCK_ELEMENTS.include?(c.name) }
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "config_loader"
4
+ require_relative "result_saver"
5
+ require_relative "logger"
6
+ require_relative "html_string_extractor"
7
+ require_relative "../utils/file_operations"
8
+ require_relative "../utils/site_config_accessor"
9
+
10
+ module Jekyll
11
+ module L10n
12
+ # String Extraction Orchestrator - Finds translatable strings in generated HTML
13
+ #
14
+ # The Extractor is the main entry point for the string extraction workflow. It scans all
15
+ # generated HTML files after Jekyll's build, identifies translatable content (text nodes
16
+ # and configurable HTML attributes), and creates or updates GNU Gettext PO files with
17
+ # the extracted strings.
18
+ #
19
+ # The extraction workflow:
20
+ # 1. Scans all HTML files in Jekyll output directory (_site/)
21
+ # 2. For each HTML file, extracts translatable text and attributes
22
+ # 3. Normalizes text for consistent matching across builds
23
+ # 4. Creates or updates page-specific PO files in _locales/ directory
24
+ # 5. Optionally applies automatic translations via LibreTranslate API
25
+ #
26
+ # Key responsibilities:
27
+ # - Load and validate extraction configuration from pages
28
+ # - Extract text and attributes from HTML with file location references
29
+ # - Create and update PO files with extracted strings
30
+ # - Log extraction statistics and progress
31
+ # - Coordinate with LibreTranslate for automatic translation
32
+ #
33
+ # @example Usage (typically invoked by PostWriteProcessor)
34
+ # extractor = Extractor.new(site)
35
+ # result = extractor.extract_site
36
+ # # => { files_processed: 42, strings_extracted: 237, po_files_created: 3 }
37
+ #
38
+ # @see Jekyll::L10n::ExtractionResultSaver for PO file creation and updates
39
+ # @see Jekyll::L10n::HtmlStringExtractor for text and attribute extraction
40
+ #
41
+ class Extractor
42
+ # @!attribute [r] site
43
+ # The Jekyll site object with generated pages
44
+ # @return [Jekyll::Site]
45
+ attr_reader :site
46
+
47
+ # Initialize the string extractor
48
+ #
49
+ # Sets up configuration and result saving infrastructure for extraction.
50
+ #
51
+ # @param site [Jekyll::Site] The Jekyll site object
52
+ def initialize(site)
53
+ @site = site
54
+ @source = SiteConfigAccessor.source(@site)
55
+ @dest = SiteConfigAccessor.dest(@site)
56
+ @config_loader = ExtractionConfigLoader.new(@site, @dest)
57
+ @result_saver = ExtractionResultSaver.new(@site)
58
+ end
59
+
60
+ # Extract all translatable strings from the generated site
61
+ #
62
+ # Main entry point for extraction. Scans all HTML files in the build output,
63
+ # extracts translatable strings and attributes, creates/updates PO files,
64
+ # and optionally translates strings via LibreTranslate API.
65
+ #
66
+ # @return [Hash<Symbol, Integer>] Statistics hash with keys:
67
+ # - :files_processed - Number of HTML files processed
68
+ # - :strings_extracted - Total strings extracted
69
+ # - :po_files_created - Number of PO files created/updated
70
+ # @example
71
+ # result = extractor.extract_site
72
+ # puts "Processed #{result[:files_processed]} files"
73
+ def extract_site
74
+ Jekyll.logger.info "Localization", "Extracting translatable strings..."
75
+ start_time = Time.now
76
+ stats = process_all_html_files
77
+ translate_all_compendia
78
+ ExtractionLogger.log_summary(stats, Time.now - start_time)
79
+ stats
80
+ end
81
+
82
+ def process_all_html_files
83
+ stats = { :files_processed => 0, :strings_extracted => 0, :po_files_created => 0 }
84
+ html_files = Dir.glob(File.join(@dest, "**", "*.html"))
85
+
86
+ html_files.each do |file_path|
87
+ next if @config_loader.skip_localized_page?(file_path)
88
+
89
+ file_stats = process_file(file_path)
90
+ stats[:files_processed] += file_stats[:files_processed]
91
+ stats[:strings_extracted] += file_stats[:strings_extracted]
92
+ stats[:po_files_created] += file_stats[:po_files_created]
93
+ end
94
+
95
+ stats
96
+ end
97
+
98
+ def translate_all_compendia
99
+ config = find_libretranslate_config
100
+ return unless config
101
+
102
+ @result_saver.translate_compendia(config)
103
+ end
104
+
105
+ def process_file(file_path)
106
+ return default_stats unless @config_loader.valid_for_extraction?(file_path)
107
+
108
+ config = @config_loader.load_page_config(file_path)
109
+ entries = extract_strings_from_file(file_path, config)
110
+ return default_stats if entries.empty?
111
+
112
+ page_path = construct_page_path(file_path)
113
+ @result_saver.save_results(config, entries, page_path)
114
+ rescue StandardError => e
115
+ ExtractionLogger.log_error(file_path, e)
116
+ default_stats
117
+ end
118
+
119
+ def default_stats
120
+ { :files_processed => 0, :strings_extracted => 0, :po_files_created => 0 }
121
+ end
122
+
123
+ def extract_strings_from_file(file_path, config)
124
+ return [] unless File.exist?(file_path)
125
+
126
+ html = FileOperations.read_utf8(file_path)
127
+ exclude_selectors = @config_loader.extract_exclude_selectors(config)
128
+
129
+ extractor = HtmlStringExtractor.new(config.translatable_attributes, exclude_selectors)
130
+ extractor.extract(html, @dest, file_path)
131
+ end
132
+
133
+ def find_libretranslate_config
134
+ return nil unless @site.respond_to?(:pages)
135
+
136
+ @site.pages.each do |page|
137
+ next unless page.data["with_locales"] == true
138
+
139
+ config = @config_loader.load_page_config(page.destination(""))
140
+ return config if config.libretranslate_enabled?
141
+ end
142
+
143
+ nil
144
+ end
145
+
146
+ private
147
+
148
+ def construct_page_path(file_path)
149
+ file_path.sub("#{@dest}/", "")
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "dom_text_extractor"
4
+ require_relative "dom_attribute_extractor"
5
+ require_relative "logger"
6
+
7
+ module Jekyll
8
+ module L10n
9
+ # Extracts translatable strings from HTML documents for localization.
10
+ #
11
+ # HtmlStringExtractor walks the DOM tree of parsed HTML and extracts text content
12
+ # from content elements and values from configurable HTML attributes. It deduplicates
13
+ # entries by msgid and generates file location references for each extraction to aid
14
+ # in debugging and tracking. Entries are excluded based on CSS selectors.
15
+ #
16
+ # Key responsibilities:
17
+ # * Parse HTML into DOM tree
18
+ # * Walk DOM recursively to find translatable content
19
+ # * Extract text from content elements (p, h1-h6, li, etc.)
20
+ # * Extract attribute values (title, alt, aria-label, etc.)
21
+ # * Generate file location references for each extracted string
22
+ # * Skip elements matching exclude selectors
23
+ # * Deduplicate entries by msgid
24
+ #
25
+ # @example
26
+ # extractor = HtmlStringExtractor.new(['title', 'alt'], ['script', 'style'])
27
+ # entries = extractor.extract(html_content, '_site', 'docs/index.html')
28
+ # # Returns array of hash entries with :msgid, :msgstr, :reference keys
29
+ class HtmlStringExtractor
30
+ attr_reader :translatable_attrs, :exclude_selectors
31
+
32
+ # Initialize a new HtmlStringExtractor.
33
+ #
34
+ # @param translatable_attrs [Array<String>] HTML attributes to extract
35
+ # (e.g., ['title', 'alt', 'aria-label', 'placeholder', 'aria-description'])
36
+ # @param exclude_selectors [Array<String>] CSS selectors for elements to skip
37
+ # during extraction (e.g., ['script', 'style', '.no-translate'])
38
+ def initialize(translatable_attrs, exclude_selectors)
39
+ @translatable_attrs = translatable_attrs
40
+ @exclude_selectors = exclude_selectors
41
+ end
42
+
43
+ # Extract translatable strings from HTML.
44
+ #
45
+ # Walks the DOM tree and extracts text nodes from content elements and values
46
+ # from specified attributes. Each extraction is assigned a file location reference
47
+ # for debugging. Entries are deduplicated by msgid (multiple occurrences of same
48
+ # text yield a single entry).
49
+ #
50
+ # @param html [String] HTML content to extract from
51
+ # @param dest [String] Destination directory path (used in file location reference generation)
52
+ # @param file_path [String] Path to source file (used in file location reference generation)
53
+ # @return [Array<Hash>] Array of extraction entries, each containing:
54
+ # - :msgid [String] The text or attribute value to translate
55
+ # - :msgstr [String] Empty string (to be filled by translator)
56
+ # - :reference [String] File location reference for debugging
57
+ def extract(html, dest, file_path)
58
+ entries = []
59
+
60
+ doc = Nokogiri::HTML(html)
61
+ walk_dom(doc.root, file_path, entries, dest)
62
+
63
+ entries.uniq { |e| e[:msgid] }
64
+ end
65
+
66
+ private
67
+
68
+ def walk_dom(node, file_path, entries, dest)
69
+ return if node.nil? || should_exclude_element?(node)
70
+
71
+ process_text_content(node, file_path, entries, dest)
72
+ process_attributes(node, file_path, entries, dest)
73
+ process_children(node, file_path, entries, dest)
74
+ end
75
+
76
+ def process_text_content(node, file_path, entries, dest)
77
+ entry = DomTextExtractor.extract(node, file_path, dest)
78
+ entries << entry if entry
79
+ end
80
+
81
+ def process_attributes(node, file_path, entries, dest)
82
+ attr_entries = DomAttributeExtractor.extract(node, file_path, dest, @translatable_attrs)
83
+ entries.concat(attr_entries)
84
+ end
85
+
86
+ def process_children(node, file_path, entries, dest)
87
+ node.children.each { |child| walk_dom(child, file_path, entries, dest) }
88
+ end
89
+
90
+ def should_exclude_element?(node)
91
+ return false unless node.element?
92
+
93
+ @exclude_selectors.any? { |selector| node.matches?(selector) }
94
+ rescue Nokogiri::CSS::SyntaxError => e
95
+ Jekyll.logger.warn "Localization", "CSS selector syntax error: #{e.message}"
96
+ false
97
+ rescue StandardError => e
98
+ Jekyll.logger.warn "Localization", "Selector matching error (continuing): #{e.message}"
99
+ false
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module L10n
5
+ # Logs extraction process errors and summary statistics.
6
+ #
7
+ # ExtractionLogger provides centralized logging for the extraction pipeline,
8
+ # recording errors during extraction and final summary statistics including
9
+ # file count, string count, PO file creation count, and elapsed time.
10
+ #
11
+ # Key responsibilities:
12
+ # * Log extraction errors with file context
13
+ # * Log extraction summary statistics
14
+ # * Format statistics output for user visibility
15
+ #
16
+ # @see Jekyll::L10n::Extractor for extraction workflow context
17
+ # @see Jekyll::L10n::CompendiumTranslator for automated translation logging
18
+ class ExtractionLogger
19
+ # Log an extraction error.
20
+ #
21
+ # @param file_path [String] Path to file where error occurred
22
+ # @param error [StandardError] The error that occurred
23
+ # @return [void]
24
+ def self.log_error(file_path, error)
25
+ Jekyll.logger.error "Localization", "Error extracting from #{file_path}: #{error.message}"
26
+ end
27
+
28
+ # Log extraction completion summary.
29
+ #
30
+ # Logs final statistics about the extraction process including total files
31
+ # processed, strings extracted, PO files created/updated, and elapsed time.
32
+ #
33
+ # @param stats [Hash] Statistics hash with keys:
34
+ # - :files_processed [Integer]
35
+ # - :strings_extracted [Integer]
36
+ # - :po_files_created [Integer]
37
+ # @param elapsed [Float] Time elapsed in seconds
38
+ # @return [void]
39
+ def self.log_summary(stats, elapsed)
40
+ Jekyll.logger.info "Localization", "Extraction complete:"
41
+ Jekyll.logger.info "Localization", " Files processed: #{stats[:files_processed]}"
42
+ Jekyll.logger.info "Localization", " Strings extracted: #{stats[:strings_extracted]}"
43
+ Jekyll.logger.info "Localization", " PO files created/updated: #{stats[:po_files_created]}"
44
+ Jekyll.logger.info "Localization", " Time: #{elapsed.round(2)}s"
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../po_file/manager"
4
+ require_relative "../utils/page_locales_config"
5
+ require_relative "../utils/site_config_accessor"
6
+ require_relative "../utils/logger_formatter"
7
+ require_relative "compendium_translator"
8
+ require_relative "compendium_merger"
9
+
10
+ module Jekyll
11
+ module L10n
12
+ # Saves extraction results to PO files and optionally translates compendia.
13
+ #
14
+ # ExtractionResultSaver orchestrates the post-extraction process: writing
15
+ # page-specific PO files for all configured locales, merging with existing
16
+ # translations, optionally updating compendia, and triggering automatic
17
+ # translation via LibreTranslate if enabled.
18
+ #
19
+ # Key responsibilities:
20
+ # * Save page-specific PO files for each locale
21
+ # * Merge new entries with existing translations
22
+ # * Update compendium files from page-specific extractions
23
+ # * Trigger automatic translation of compendia
24
+ # * Report extraction statistics
25
+ #
26
+ # @example
27
+ # saver = ExtractionResultSaver.new(site)
28
+ # stats = saver.save_results(config, entries, 'docs/index.html')
29
+ # saver.translate_compendia(config) if config.libretranslate_enabled?
30
+ class ExtractionResultSaver
31
+ # Initialize a new ExtractionResultSaver.
32
+ #
33
+ # @param site [Jekyll::Site] Jekyll site object
34
+ def initialize(site)
35
+ @site = site
36
+ with_locales_data = SiteConfigAccessor.extract_locales_data(@site)
37
+ @site_config = PageLocalesConfig.new({ "with_locales_data" => with_locales_data })
38
+ end
39
+
40
+ # Save extraction results to PO files.
41
+ #
42
+ # Saves extracted strings to PO files for each configured locale, merging with
43
+ # existing translations to preserve manual edits. Optionally merges new entries
44
+ # into compendium files. Returns statistics about the save operation.
45
+ #
46
+ # @param config [PageLocalesConfig] Localization configuration
47
+ # @param entries [Array<Hash>] Array of extracted entries with :msgid, :msgstr, :reference
48
+ # @param page_path [String] Page path for file organization (e.g., 'docs/index.html')
49
+ # @return [Hash] Statistics hash with keys:
50
+ # - :files_processed [Integer] Number of pages processed (always 1)
51
+ # - :strings_extracted [Integer] Number of extracted strings
52
+ # - :po_files_created [Integer] Number of PO files created/updated
53
+ def save_results(config, entries, page_path)
54
+ LoggerFormatter.debug_if_enabled("ExtractionResultSaver", "Processing page: #{page_path}")
55
+
56
+ po_manager = PoFileManager.new(@site, config.locales_dir)
57
+ po_files_created = save_po_files(po_manager, config, entries, page_path)
58
+
59
+ {
60
+ :files_processed => 1,
61
+ :strings_extracted => entries.length,
62
+ :po_files_created => po_files_created,
63
+ }
64
+ end
65
+
66
+ # Translate compendia using LibreTranslate.
67
+ #
68
+ # If LibreTranslate is enabled in config, translates all empty entries in
69
+ # compendium files for configured locales. Called after extraction and
70
+ # compendium merging to fill in translations automatically.
71
+ #
72
+ # @param config [PageLocalesConfig] Localization configuration with LibreTranslate settings
73
+ # @return [void]
74
+ def translate_compendia(config)
75
+ CompendiumTranslator.new(@site).translate_compendia(config)
76
+ end
77
+
78
+ private
79
+
80
+ def save_po_files(po_manager, config, entries, page_path)
81
+ po_files_created = 0
82
+
83
+ config.locales.each do |locale|
84
+ po_files_created += 1 if po_manager.save_po_file(locale, entries, :page_path => page_path)
85
+ end
86
+
87
+ CompendiumMerger.new(@site).merge_compendia(po_manager, config) if config.update_compendium?
88
+
89
+ po_files_created
90
+ end
91
+
92
+ private :save_po_files
93
+ end
94
+ end
95
+ end