metanorma-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+
5
+ module Metanorma
6
+ module Tools
7
+ class Cli < Thor
8
+ def self.exit_on_failure?
9
+ true
10
+ end
11
+
12
+ map %w[--version -v] => :__version
13
+
14
+ desc '--version, -v', 'Print the version'
15
+ def __version
16
+ puts Metanorma::Tools::VERSION
17
+ end
18
+
19
+ desc 'extract-images INPUT_XML', 'Extract embedded figures from Metanorma presentation XML files'
20
+ long_desc <<~DESC
21
+ Extract embedded figures from Metanorma presentation XML files with ISO DRG compliance features.
22
+
23
+ This tool automatically extracts document metadata, generates ISO DRG compliant filenames,
24
+ and supports both SVG and PNG figure formats with optional ZIP packaging.
25
+
26
+ For detailed usage examples and options, see: docs/figure-extraction.adoc
27
+ DESC
28
+ option :output_dir, type: :string, aliases: '-o', desc: 'Output directory for extracted figures'
29
+ option :prefix, type: :string, aliases: '-p', desc: 'Prefix for generated figure filenames'
30
+ option :zip, type: :boolean, default: false, desc: 'Create a ZIP archive of extracted figures'
31
+ option :verbose, type: :boolean, default: false, aliases: '-v', desc: 'Show detailed progress information'
32
+ option :retain_original_filenames, type: :boolean, default: false, desc: 'For ISO documents, retain original filenames in generated names'
33
+
34
+ def extract_images(input_xml)
35
+ extractor = FigureExtractor.new(options)
36
+ extractor.extract(input_xml, options[:output_dir], options[:prefix])
37
+ end
38
+
39
+ # Placeholder for future comment management functionality
40
+ # Will integrate metanorma/commenter gem functionality
41
+ desc 'comment SUBCOMMAND', 'Manage ISO comment sheets (planned)'
42
+ subcommand 'comment', CommentCli if defined?(CommentCli)
43
+
44
+ # Placeholder for future ISO document fetching functionality
45
+ desc 'fetch-iso DOCUMENT_ID', 'Fetch ISO documents from OBP (planned)'
46
+ def fetch_iso(document_id)
47
+ puts "ISO document fetching functionality is planned for future release."
48
+ puts "This will fetch ISO documents from the OBP into Metanorma format."
49
+ puts "Document ID: #{document_id}"
50
+ end
51
+
52
+ # Help command that shows the expanded purpose
53
+ desc 'help [COMMAND]', 'Show help for metanorma-tools commands'
54
+ def help(command = nil)
55
+ if command.nil?
56
+ puts <<~HELP
57
+ Metanorma Tools - Standards Editing Lifecycle Support
58
+ =====================================================
59
+
60
+ Metanorma Tools supports the lifecycle of standards editing for various flavors
61
+ to facilitate pre and post-compilation of Metanorma documents.
62
+
63
+ Available tools:
64
+ • Figure extraction - Extract embedded figures from Metanorma presentation XML files
65
+ • Comment management - Manage ISO comment sheets (planned)
66
+ • ISO document fetching - Fetch documents from OBP (planned)
67
+
68
+ For detailed documentation, see the docs/ directory:
69
+ • docs/figure-extraction.adoc - Figure extraction guide
70
+ • docs/iso-drg-filename-guidance.adoc - ISO DRG compliance guidance
71
+
72
+ Commands:
73
+ HELP
74
+ end
75
+ super
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+
5
+ module Metanorma
6
+ module Tools
7
+ module Commands
8
+ class ExtractImages < Thor
9
+ desc "extract INPUT_XML", "Extract figures from Metanorma XML"
10
+ option :output_dir, type: :string, aliases: '-o', desc: 'Output directory for extracted figures'
11
+ option :prefix, type: :string, aliases: '-p', desc: 'Prefix for generated figure filenames'
12
+ option :zip, type: :boolean, default: false, desc: 'Create a ZIP archive of extracted figures'
13
+ option :verbose, type: :boolean, default: false, aliases: '-v', desc: 'Show detailed progress information'
14
+ option :auto_prefix, type: :boolean, default: true, desc: 'Automatically generate prefix from document metadata'
15
+
16
+ def extract(input_xml)
17
+ extractor = FigureExtractor.new(options)
18
+ extractor.extract(input_xml, options[:output_dir], options[:prefix])
19
+ end
20
+
21
+ default_task :extract
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Metanorma
4
+ module Tools
5
+ module Commands
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lutaml/model'
4
+
5
+ module Metanorma
6
+ module Tools
7
+ class DocumentMetadata < Lutaml::Model::Serializable
8
+ attribute :title, :string
9
+ attribute :docnumber, :string
10
+ attribute :stage, :string
11
+ attribute :substage, :string
12
+ attribute :docidentifier, :string
13
+ attribute :standard_number, :string
14
+ attribute :part_number, :string
15
+ attribute :edition, :string
16
+ attribute :stage_code, :string
17
+ attribute :stage_abbreviation, :string
18
+ attribute :flavor, :string, default: -> { 'iso' }
19
+
20
+ def auto_prefix
21
+ case flavor
22
+ when 'iso'
23
+ # ISO DRG format: {StandardNumber}_{stageCode}_ed{editionNumber}
24
+ "#{standard_number}_#{stage_abbreviation&.downcase}_ed#{edition}"
25
+ else
26
+ # For other flavors, use a generic pattern
27
+ "#{flavor}_#{standard_number}_#{stage_abbreviation&.downcase}_ed#{edition}"
28
+ end
29
+ end
30
+
31
+ def to_s
32
+ if docidentifier
33
+ "#{docidentifier} - #{title}"
34
+ else
35
+ "ISO #{standard_number} Edition #{edition} Stage #{stage_code} (#{stage_abbreviation})"
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'base64'
4
+ require 'fileutils'
5
+ require_relative 'iso_graphic_filename'
6
+
7
+ module Metanorma
8
+ module Tools
9
+ class Figure
10
+ FORMATS = {
11
+ datauri_png: { name: 'PNG', ext: 'png', binary: true },
12
+ datauri_jpeg: { name: 'JPEG', ext: 'jpg', binary: true },
13
+ datauri_gif: { name: 'GIF', ext: 'gif', binary: true },
14
+ datauri_svg: { name: 'SVG', ext: 'svg', binary: false },
15
+ datauri_webp: { name: 'WebP', ext: 'webp', binary: true },
16
+ svg: { name: 'SVG', ext: 'svg', binary: false }
17
+ }.freeze
18
+
19
+ attr_reader :autonum, :content, :format, :original_filename, :file_size
20
+
21
+ def initialize(autonum, content, format, original_filename = nil)
22
+ @autonum = autonum
23
+ @content = content
24
+ @format = format
25
+ @original_filename = original_filename
26
+ @file_size = calculate_size
27
+ end
28
+
29
+ def to_file(output_dir, prefix, document_metadata = nil, retain_original_filenames = false)
30
+ FileUtils.mkdir_p(output_dir)
31
+
32
+ filename = generate_filename(prefix, document_metadata, retain_original_filenames)
33
+ filepath = File.join(output_dir, filename)
34
+
35
+ write_content(filepath)
36
+ puts " Saved: #{filename}"
37
+ filepath
38
+ end
39
+
40
+ def format_name
41
+ FORMATS.dig(@format, :name) || @format.to_s.upcase
42
+ end
43
+
44
+ private
45
+
46
+ def calculate_size
47
+ case @format
48
+ when *FORMATS.keys.select { |k| k.to_s.start_with?('datauri_') }
49
+ Base64.decode64(@content).bytesize
50
+ when :svg
51
+ @content.bytesize
52
+ else
53
+ 0
54
+ end
55
+ end
56
+
57
+ def write_content(filepath)
58
+ format_info = FORMATS[@format]
59
+ return unless format_info
60
+
61
+ mode = format_info[:binary] ? 'wb' : 'w'
62
+ encoding = format_info[:binary] ? nil : 'utf-8'
63
+
64
+ content_to_write = if @format.to_s.start_with?('datauri_')
65
+ Base64.decode64(@content)
66
+ else
67
+ @content
68
+ end
69
+
70
+ File.open(filepath, mode, encoding: encoding) do |file|
71
+ file.write(content_to_write)
72
+ end
73
+ end
74
+
75
+ def generate_filename(prefix, document_metadata = nil, retain_original_filenames = false)
76
+ format_info = FORMATS[@format]
77
+ extension = format_info ? format_info[:ext] : 'unknown'
78
+
79
+ # If we have document metadata, use proper ISO DRG filename generation
80
+ if document_metadata
81
+ # Parse subfigure from autonum (e.g., "C.2 a" -> figure: "C.2", subfigure: "a")
82
+ figure_number, subfigure = parse_figure_number(@autonum)
83
+
84
+ # Only include original filename if retain_original_filenames is true and we have an original filename
85
+ original_filename_to_use = (retain_original_filenames && @original_filename && !@original_filename.empty?) ? @original_filename : nil
86
+
87
+ iso_filename = IsoGraphicFilename.new(
88
+ standard_number: document_metadata.standard_number&.to_i,
89
+ part_number: document_metadata.part_number&.to_i,
90
+ edition_number: document_metadata.edition&.to_i,
91
+ stage_code: document_metadata.stage_code,
92
+ content_type: 'figure',
93
+ figure_number: figure_number,
94
+ subfigure: subfigure,
95
+ file_extension: extension,
96
+ original_filename: original_filename_to_use
97
+ )
98
+
99
+ return iso_filename.generate_filename
100
+ end
101
+
102
+ # Fallback to simple prefix-based naming
103
+ sanitized_autonum = @autonum.gsub('.', '')
104
+ if @original_filename && !@original_filename.empty?
105
+ basename = File.basename(@original_filename, File.extname(@original_filename))
106
+ "#{prefix}fig#{sanitized_autonum}_#{basename}.#{extension}"
107
+ else
108
+ "#{prefix}fig#{sanitized_autonum}.#{extension}"
109
+ end
110
+ end
111
+
112
+ def parse_figure_number(autonum)
113
+ # Handle cases like "C.2 a", "A.1", "3", etc.
114
+ if autonum.match(/^(.+?)\s+([a-z])$/)
115
+ # Has subfigure: "C.2 a" -> ["C.2", "a"]
116
+ [$1, $2]
117
+ else
118
+ # No subfigure: "C.2" -> ["C.2", nil]
119
+ [autonum, nil]
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,384 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'base64'
5
+ require 'fileutils'
6
+ require 'zip'
7
+ require 'tmpdir'
8
+
9
+ module Metanorma
10
+ module Tools
11
+ class FigureExtractor
12
+ METANORMA_NS = { 'xmlns' => 'https://www.metanorma.org/ns/standoc' }.freeze
13
+
14
+ MIMETYPE_FORMATS = {
15
+ 'image/png' => :datauri_png,
16
+ 'image/jpeg' => :datauri_jpeg,
17
+ 'image/jpg' => :datauri_jpeg,
18
+ 'image/gif' => :datauri_gif,
19
+ 'image/svg+xml' => :datauri_svg,
20
+ 'image/webp' => :datauri_webp,
21
+ '' => :datauri_png # Default for empty mimetype
22
+ }.freeze
23
+
24
+ attr_reader :options
25
+
26
+ def initialize(options = {})
27
+ # Convert string keys to symbols for consistency
28
+ normalized_options = options.transform_keys(&:to_sym)
29
+ @options = {
30
+ zip: false,
31
+ verbose: false
32
+ }.merge(normalized_options)
33
+ end
34
+
35
+ def extract(input_xml, output_dir = nil, prefix = nil)
36
+ validate_input(input_xml)
37
+
38
+ doc = parse_xml(input_xml)
39
+ metadata = extract_document_metadata(doc)
40
+ prefix = determine_prefix(prefix, metadata)
41
+ output_dir = determine_output_dir(output_dir, prefix)
42
+
43
+ figures = find_figures(doc)
44
+ return if figures.empty?
45
+
46
+ figure_objects, format_counts, total_size = process_figures(figures)
47
+
48
+ saved_files = if options[:zip]
49
+ extract_to_zip(figure_objects, output_dir, prefix, metadata)
50
+ else
51
+ extract_to_directory(figure_objects, output_dir, prefix, metadata)
52
+ end
53
+
54
+ print_summary(metadata, prefix, figure_objects.length, format_counts, total_size, output_dir)
55
+ end
56
+
57
+ private
58
+
59
+ def validate_input(input_xml)
60
+ return if File.exist?(input_xml)
61
+
62
+ puts "Error: Input file '#{input_xml}' does not exist."
63
+ exit 1
64
+ end
65
+
66
+ def parse_xml(input_xml)
67
+ puts "Reading XML file: #{input_xml}"
68
+ File.open(input_xml) { |f| Nokogiri::XML(f) }
69
+ rescue StandardError => e
70
+ puts "Error processing file: #{e.message}"
71
+ puts e.backtrace if options[:verbose]
72
+ exit 1
73
+ end
74
+
75
+ def determine_prefix(prefix, metadata)
76
+ if prefix.nil? && metadata
77
+ prefix = metadata.auto_prefix
78
+ puts "Auto-generated prefix: #{prefix}"
79
+ end
80
+
81
+ if prefix.nil? || prefix.empty?
82
+ prefix = 'figure'
83
+ puts "Using default prefix: #{prefix}"
84
+ end
85
+
86
+ prefix
87
+ end
88
+
89
+ def determine_output_dir(output_dir, prefix)
90
+ if output_dir.nil? || output_dir.empty?
91
+ if options[:zip]
92
+ # For ZIP mode, use current directory
93
+ output_dir = Dir.pwd
94
+ puts "Using current directory for ZIP output: #{output_dir}"
95
+ else
96
+ # For directory mode, use auto-prefix as directory name
97
+ output_dir = prefix
98
+ puts "Using auto-generated output directory: #{output_dir}"
99
+ end
100
+ end
101
+ output_dir
102
+ end
103
+
104
+ def extract_to_directory(figure_objects, output_dir, prefix, metadata = nil)
105
+ # Always extract to temporary directory first, then move to destination
106
+ Dir.mktmpdir('metanorma_figures_') do |temp_dir|
107
+ puts "\nExtracting #{figure_objects.length} figures to temporary directory: #{temp_dir}"
108
+
109
+ retain_original_filenames = should_retain_original_filenames?(metadata)
110
+ temp_files = figure_objects.map { |figure_obj| figure_obj.to_file(temp_dir, prefix, metadata, retain_original_filenames) }
111
+
112
+ # Ensure output directory exists
113
+ FileUtils.mkdir_p(output_dir)
114
+
115
+ # Move files from temp to final destination
116
+ puts "Moving files to final destination: #{output_dir}"
117
+ final_files = []
118
+ temp_files.each do |temp_file|
119
+ filename = File.basename(temp_file)
120
+ final_path = File.join(output_dir, filename)
121
+ FileUtils.mv(temp_file, final_path)
122
+ final_files << final_path
123
+ puts " Moved: #{filename}"
124
+ end
125
+
126
+ final_files
127
+ end
128
+ end
129
+
130
+ def extract_to_zip(figure_objects, output_dir, prefix, metadata = nil)
131
+ # Extract to temporary directory and create ZIP in output directory
132
+ zip_filename = "#{prefix}.zip"
133
+ FileUtils.mkdir_p(output_dir) if output_dir
134
+ zip_path = File.join(output_dir || Dir.pwd, zip_filename)
135
+
136
+ Dir.mktmpdir('metanorma_figures_zip_') do |temp_dir|
137
+ puts "\nExtracting #{figure_objects.length} figures to temporary directory for ZIP: #{temp_dir}"
138
+
139
+ retain_original_filenames = should_retain_original_filenames?(metadata)
140
+ temp_files = figure_objects.map { |figure_obj| figure_obj.to_file(temp_dir, prefix, metadata, retain_original_filenames) }
141
+
142
+ puts "Creating ZIP archive: #{zip_filename}"
143
+ Zip::File.open(zip_path, Zip::File::CREATE) do |zipfile|
144
+ temp_files.each do |temp_file|
145
+ filename = File.basename(temp_file)
146
+ zipfile.add(filename, temp_file)
147
+ puts " Added to ZIP: #{filename}"
148
+ end
149
+ end
150
+
151
+ puts "ZIP archive created: #{zip_path}"
152
+ [zip_path]
153
+ end
154
+ end
155
+
156
+ def find_figures(doc)
157
+ figures = doc.xpath('//xmlns:figure', METANORMA_NS)
158
+ puts "Found #{figures.length} figures"
159
+
160
+ if figures.empty?
161
+ puts 'No figures found in the document'
162
+ exit 0
163
+ end
164
+
165
+ figures
166
+ end
167
+
168
+ def process_figures(figures)
169
+ figure_objects = []
170
+ format_counts = Hash.new(0)
171
+ total_size = 0
172
+
173
+ figures.each_with_index do |figure_element, index|
174
+ figure_obj = process_single_figure(figure_element, index)
175
+ next unless figure_obj
176
+
177
+ figure_objects << figure_obj
178
+ format_counts[figure_obj.format_name] += 1
179
+ total_size += figure_obj.file_size
180
+ end
181
+
182
+ [figure_objects, format_counts, total_size]
183
+ end
184
+
185
+ def process_single_figure(figure_element, index)
186
+ autonum = figure_element['autonum']
187
+
188
+ unless autonum&.strip&.length&.positive?
189
+ puts "Warning: Skipping figure #{index + 1} - missing autonum" if options[:verbose]
190
+ return nil
191
+ end
192
+
193
+ image_element = figure_element.xpath('.//xmlns:image', METANORMA_NS).first
194
+ unless image_element
195
+ puts "Warning: Skipping figure #{index + 1} (autonum: #{autonum}) - no image element" if options[:verbose]
196
+ return nil
197
+ end
198
+
199
+ create_figure_from_image(image_element, autonum)
200
+ end
201
+
202
+ def create_figure_from_image(image_element, autonum)
203
+ src = image_element['src']
204
+ filename = image_element['filename']
205
+ mimetype = image_element['mimetype']
206
+
207
+ if src&.start_with?('data:')
208
+ create_data_uri_figure(src, filename, autonum)
209
+ elsif mimetype == 'image/svg+xml' || filename&.end_with?('.svg')
210
+ create_svg_figure(image_element, src, autonum)
211
+ else
212
+ log_unsupported_figure(autonum, mimetype, src)
213
+ nil
214
+ end
215
+ end
216
+
217
+ def create_data_uri_figure(src, filename, autonum)
218
+ data_uri_info = parse_data_uri(src)
219
+ unless data_uri_info
220
+ puts "Warning: Skipping figure #{autonum} - malformed data URI" if options[:verbose]
221
+ return nil
222
+ end
223
+
224
+ puts " Figure #{autonum}: Data URI #{data_uri_info[:format_name]}"
225
+ Figure.new(autonum, data_uri_info[:content], data_uri_info[:format], filename)
226
+ end
227
+
228
+ def create_svg_figure(image_element, src, autonum)
229
+ svg_content = image_element.inner_html
230
+ if svg_content.empty?
231
+ puts "Warning: Skipping figure #{autonum} - empty SVG content" if options[:verbose]
232
+ return nil
233
+ end
234
+
235
+ original_filename = src unless src&.start_with?('data:')
236
+ puts " Figure #{autonum}: SVG#{original_filename ? " (#{File.basename(original_filename)})" : ''}"
237
+ Figure.new(autonum, svg_content, :svg, original_filename)
238
+ end
239
+
240
+ def log_unsupported_figure(autonum, mimetype, src)
241
+ return unless options[:verbose]
242
+
243
+ if mimetype && src
244
+ puts "Warning: Skipping figure #{autonum} - external file not supported: #{File.basename(src)}"
245
+ else
246
+ puts "Warning: Skipping figure #{autonum} - no valid source or mimetype found"
247
+ end
248
+ end
249
+
250
+ def print_summary(metadata, prefix, total_figures, format_counts, total_size, output_dir)
251
+ puts "\n" + '=' * 60
252
+ puts 'EXTRACTION SUMMARY'
253
+ puts '=' * 60
254
+
255
+ if metadata
256
+ puts "Document: #{metadata}"
257
+ puts "Auto-generated prefix: #{metadata.auto_prefix}" if options[:auto_prefix]
258
+ end
259
+
260
+ puts "File prefix used: #{prefix}"
261
+ puts "Total figures extracted: #{total_figures}"
262
+
263
+ format_counts.each { |format, count| puts "#{format} files: #{count}" }
264
+
265
+ puts "Total size: #{format_bytes(total_size)}"
266
+ puts "Output directory: #{output_dir}"
267
+ puts "ZIP archive: #{options[:zip] ? 'Created' : 'Not requested'}"
268
+
269
+ print_compliance_info(format_counts, metadata)
270
+ puts '=' * 60
271
+ puts "\nSuccessfully extracted #{total_figures} figures to #{output_dir}"
272
+ end
273
+
274
+ def print_compliance_info(format_counts, metadata)
275
+ svg_count = format_counts['SVG'] || 0
276
+ puts "\nISO DRG COMPLIANCE:"
277
+ puts "✓ Revisable vector graphics (SVG): #{svg_count > 0 ? 'Yes' : 'No'}"
278
+ puts '✓ Proper file naming convention: Yes'
279
+ puts '✓ Language-neutral graphics: Yes (extracted from Metanorma)'
280
+ puts "✓ Document metadata extraction: #{metadata ? 'Yes' : 'No'}"
281
+ end
282
+
283
+ def extract_document_metadata(doc)
284
+ # Extract flavor from root metanorma element
285
+ metanorma_element = doc.xpath('/xmlns:metanorma', METANORMA_NS).first
286
+ flavor = metanorma_element&.[]('flavor') || 'iso' # Default to 'iso' for compatibility
287
+
288
+ bibdata = doc.xpath('//xmlns:bibdata', METANORMA_NS).first
289
+ return nil unless bibdata
290
+
291
+ # Extract basic document information
292
+ title = xpath_text(bibdata, './/xmlns:title[@type="main"][@language=""]') ||
293
+ xpath_text(bibdata, './/xmlns:title[@type="main"]') ||
294
+ xpath_text(bibdata, './/xmlns:title')
295
+
296
+ docnumber = xpath_text(bibdata, './/xmlns:docnumber')
297
+
298
+ # Extract docidentifier
299
+ docidentifier = xpath_text(bibdata, './/xmlns:docidentifier[@type="ISO"]') ||
300
+ xpath_text(bibdata, './/xmlns:docidentifier')
301
+
302
+ # Extract stage information
303
+ stage_element = bibdata.xpath('.//xmlns:status/xmlns:stage[@language=""]', METANORMA_NS).first
304
+ stage = stage_element&.text&.strip
305
+ stage_abbreviation = stage_element&.[]('abbreviation')
306
+
307
+ substage = xpath_text(bibdata, './/xmlns:status/xmlns:substage')
308
+
309
+ # Extract edition
310
+ edition = xpath_text(bibdata, './/xmlns:edition[@language=""]') ||
311
+ xpath_text(bibdata, './/xmlns:edition')
312
+
313
+ # Parse part number from docnumber (e.g., "17301-1" -> part_number: "1")
314
+ standard_number = docnumber
315
+ part_number = nil
316
+ if docnumber&.include?('-')
317
+ parts = docnumber.split('-', 2)
318
+ standard_number = parts[0]
319
+ part_number = parts[1]
320
+ end
321
+
322
+ # Create stage code from stage abbreviation
323
+ stage_code = stage_abbreviation&.downcase
324
+
325
+ DocumentMetadata.new(
326
+ title: title,
327
+ docnumber: docnumber,
328
+ stage: stage,
329
+ substage: substage,
330
+ docidentifier: docidentifier,
331
+ standard_number: standard_number,
332
+ part_number: part_number,
333
+ edition: edition,
334
+ stage_code: stage_code,
335
+ stage_abbreviation: stage_abbreviation,
336
+ flavor: flavor
337
+ )
338
+ end
339
+
340
+ def xpath_text(element, xpath)
341
+ element.xpath(xpath, METANORMA_NS).first&.text&.strip
342
+ end
343
+
344
+ def parse_data_uri(data_uri)
345
+ return nil unless data_uri.start_with?('data:')
346
+
347
+ uri_content = data_uri[5..-1]
348
+ parts = uri_content.split(',', 2)
349
+ return nil if parts.length != 2
350
+
351
+ header, data = parts
352
+ mimetype = header.split(';').first
353
+ format = MIMETYPE_FORMATS[mimetype.downcase] || :datauri_png
354
+ format_name = Figure::FORMATS.dig(format, :name) || 'PNG'
355
+
356
+ {
357
+ format: format,
358
+ format_name: format_name,
359
+ content: data
360
+ }
361
+ rescue StandardError
362
+ nil
363
+ end
364
+
365
+ def should_retain_original_filenames?(metadata)
366
+ # Only apply retain_original_filenames option for ISO documents
367
+ options[:retain_original_filenames] && metadata&.flavor&.downcase == 'iso'
368
+ end
369
+
370
+ def format_bytes(bytes)
371
+ units = %w[B KB MB GB]
372
+ size = bytes.to_f
373
+ unit_index = 0
374
+
375
+ while size >= 1024 && unit_index < units.length - 1
376
+ size /= 1024
377
+ unit_index += 1
378
+ end
379
+
380
+ "#{size.round(2)} #{units[unit_index]}"
381
+ end
382
+ end
383
+ end
384
+ end