metanorma-tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/rake.yml +15 -0
- data/.github/workflows/release.yml +24 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.rubocop.yml +14 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/Gemfile +11 -0
- data/README.adoc +94 -0
- data/Rakefile +8 -0
- data/docs/figure-extraction.adoc +111 -0
- data/docs/iso-drg-filename-guidance.adoc +584 -0
- data/docs/workflows-iso.adoc +70 -0
- data/exe/metanorma-tools +6 -0
- data/lib/metanorma/tools/cli.rb +79 -0
- data/lib/metanorma/tools/commands/extract_images.rb +25 -0
- data/lib/metanorma/tools/commands.rb +8 -0
- data/lib/metanorma/tools/document_metadata.rb +40 -0
- data/lib/metanorma/tools/figure.rb +124 -0
- data/lib/metanorma/tools/figure_extractor.rb +384 -0
- data/lib/metanorma/tools/iso_graphic_filename.rb +149 -0
- data/lib/metanorma/tools/version.rb +7 -0
- data/lib/metanorma/tools.rb +18 -0
- data/metanorma-tools.gemspec +37 -0
- data/sig/metanorma/tools.rbs +6 -0
- data/spec/fixtures/document-en.dis.presentation.xml +3417 -0
- data/spec/metanorma/tools/cli_spec.rb +102 -0
- data/spec/metanorma/tools/document_metadata_spec.rb +308 -0
- data/spec/metanorma/tools/figure_extractor_spec.rb +265 -0
- data/spec/metanorma/tools/iso_graphic_filename_spec.rb +316 -0
- data/spec/metanorma/tools_spec.rb +15 -0
- data/spec/spec_helper.rb +16 -0
- metadata +148 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'tempfile'
|
5
|
+
|
6
|
+
RSpec.describe Metanorma::Tools::Cli do
|
7
|
+
let(:fixture_path) { File.join(__dir__, '../../fixtures/document-en.dis.presentation.xml') }
|
8
|
+
let(:temp_dir) { Dir.mktmpdir }
|
9
|
+
let(:output_path) { File.join(temp_dir, 'test_output.zip') }
|
10
|
+
|
11
|
+
after { FileUtils.rm_rf(temp_dir) }
|
12
|
+
|
13
|
+
describe '#extract_images' do
|
14
|
+
it 'extracts images from fixture document' do
|
15
|
+
cli = described_class.new
|
16
|
+
cli.options = { output_dir: temp_dir, zip: false }
|
17
|
+
cli.extract_images(fixture_path)
|
18
|
+
|
19
|
+
# Verify files were created
|
20
|
+
png_files = Dir.glob(File.join(temp_dir, '*.png'))
|
21
|
+
expect(png_files.length).to eq(6)
|
22
|
+
|
23
|
+
# Check for expected filenames
|
24
|
+
filenames = png_files.map { |f| File.basename(f) }
|
25
|
+
expect(filenames.any? { |f| f.include?('figA1') }).to be true
|
26
|
+
expect(filenames.any? { |f| f.include?('figC1') }).to be true
|
27
|
+
expect(filenames.any? { |f| f.include?('figC2') }).to be true
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'shows help when no arguments provided' do
|
31
|
+
expect { described_class.start([]) }.to output(/Commands:/).to_stdout
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'shows help for extract-images command' do
|
35
|
+
expect { described_class.start(['help', 'extract-images']) }.to output(/Usage:/).to_stdout
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'handles invalid input file' do
|
39
|
+
cli = described_class.new
|
40
|
+
cli.options = { output_dir: temp_dir }
|
41
|
+
expect { cli.extract_images('non_existent.xml') }.to raise_error(SystemExit)
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'creates output directory if it does not exist' do
|
45
|
+
nested_output = File.join(temp_dir, 'nested', 'dir')
|
46
|
+
|
47
|
+
cli = described_class.new
|
48
|
+
cli.options = { output_dir: nested_output }
|
49
|
+
cli.extract_images(fixture_path)
|
50
|
+
|
51
|
+
expect(Dir.exist?(nested_output)).to be true
|
52
|
+
png_files = Dir.glob(File.join(nested_output, '*.png'))
|
53
|
+
expect(png_files.length).to eq(6)
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'retains original filenames when option is enabled for ISO documents' do
|
57
|
+
cli = described_class.new
|
58
|
+
cli.options = { output_dir: temp_dir, retain_original_filenames: true }
|
59
|
+
cli.extract_images(fixture_path)
|
60
|
+
|
61
|
+
png_files = Dir.glob(File.join(temp_dir, '*.png'))
|
62
|
+
filenames = png_files.map { |f| File.basename(f) }
|
63
|
+
|
64
|
+
# Should include original filename parts
|
65
|
+
expect(filenames.any? { |f| f.include?('_a1.png') }).to be true
|
66
|
+
expect(filenames.any? { |f| f.include?('_b1.png') }).to be true
|
67
|
+
expect(filenames.any? { |f| f.include?('_c2-a.png') }).to be true
|
68
|
+
expect(filenames.any? { |f| f.include?('_c2-b.png') }).to be true
|
69
|
+
expect(filenames.any? { |f| f.include?('_c2-c.png') }).to be true
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'does not retain original filenames when option is disabled' do
|
73
|
+
cli = described_class.new
|
74
|
+
cli.options = { output_dir: temp_dir, retain_original_filenames: false }
|
75
|
+
cli.extract_images(fixture_path)
|
76
|
+
|
77
|
+
png_files = Dir.glob(File.join(temp_dir, '*.png'))
|
78
|
+
filenames = png_files.map { |f| File.basename(f) }
|
79
|
+
|
80
|
+
# Should NOT include original filename parts
|
81
|
+
expect(filenames.any? { |f| f.include?('_a1.png') }).to be false
|
82
|
+
expect(filenames.any? { |f| f.include?('_b1.png') }).to be false
|
83
|
+
expect(filenames.any? { |f| f.include?('_c2-a.png') }).to be false
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
describe 'command line integration' do
|
88
|
+
it 'can be executed via command line' do
|
89
|
+
# Test the actual CLI execution using Open3 for cross-platform compatibility
|
90
|
+
require 'open3'
|
91
|
+
|
92
|
+
stdout, stderr, status = Open3.capture3("bundle exec metanorma-tools extract-images #{fixture_path} --output-dir #{temp_dir}")
|
93
|
+
|
94
|
+
expect(status.success?).to be true
|
95
|
+
expect(stdout).to include('Successfully extracted')
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'shows version information' do
|
99
|
+
expect { described_class.start(['--version']) }.to output(/#{Metanorma::Tools::VERSION}/).to_stdout
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,308 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Metanorma::Tools::DocumentMetadata do
|
6
|
+
describe 'initialization and attributes' do
|
7
|
+
it 'creates metadata with all attributes' do
|
8
|
+
metadata = described_class.new(
|
9
|
+
title: 'Test Document Title',
|
10
|
+
docnumber: '17301',
|
11
|
+
stage: 'DRAFT International Standard',
|
12
|
+
substage: '00',
|
13
|
+
docidentifier: 'ISO/DIS 17301-1:2023',
|
14
|
+
standard_number: '17301',
|
15
|
+
part_number: '1',
|
16
|
+
edition: '1',
|
17
|
+
stage_code: 'dis',
|
18
|
+
stage_abbreviation: 'DIS',
|
19
|
+
flavor: 'iso'
|
20
|
+
)
|
21
|
+
|
22
|
+
expect(metadata.title).to eq('Test Document Title')
|
23
|
+
expect(metadata.docnumber).to eq('17301')
|
24
|
+
expect(metadata.stage).to eq('DRAFT International Standard')
|
25
|
+
expect(metadata.substage).to eq('00')
|
26
|
+
expect(metadata.docidentifier).to eq('ISO/DIS 17301-1:2023')
|
27
|
+
expect(metadata.standard_number).to eq('17301')
|
28
|
+
expect(metadata.part_number).to eq('1')
|
29
|
+
expect(metadata.edition).to eq('1')
|
30
|
+
expect(metadata.stage_code).to eq('dis')
|
31
|
+
expect(metadata.stage_abbreviation).to eq('DIS')
|
32
|
+
expect(metadata.flavor).to eq('iso')
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'sets default flavor to iso' do
|
36
|
+
metadata = described_class.new(
|
37
|
+
title: 'Test Document',
|
38
|
+
docnumber: '17301'
|
39
|
+
)
|
40
|
+
|
41
|
+
expect(metadata.flavor).to eq('iso')
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'allows custom flavor' do
|
45
|
+
metadata = described_class.new(
|
46
|
+
title: 'Test Document',
|
47
|
+
docnumber: '17301',
|
48
|
+
flavor: 'iec'
|
49
|
+
)
|
50
|
+
|
51
|
+
expect(metadata.flavor).to eq('iec')
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
describe 'auto_prefix generation' do
|
56
|
+
context 'with ISO flavor' do
|
57
|
+
it 'generates correct prefix for DIS stage' do
|
58
|
+
metadata = described_class.new(
|
59
|
+
standard_number: '17301',
|
60
|
+
edition: '1',
|
61
|
+
stage_abbreviation: 'DIS',
|
62
|
+
flavor: 'iso'
|
63
|
+
)
|
64
|
+
|
65
|
+
expect(metadata.auto_prefix).to eq('17301_dis_ed1')
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'generates correct prefix for PWI stage' do
|
69
|
+
metadata = described_class.new(
|
70
|
+
standard_number: '17301',
|
71
|
+
edition: '1',
|
72
|
+
stage_abbreviation: 'PWI',
|
73
|
+
flavor: 'iso'
|
74
|
+
)
|
75
|
+
|
76
|
+
expect(metadata.auto_prefix).to eq('17301_pwi_ed1')
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'generates correct prefix for FDIS stage' do
|
80
|
+
metadata = described_class.new(
|
81
|
+
standard_number: '17301',
|
82
|
+
edition: '2',
|
83
|
+
stage_abbreviation: 'FDIS',
|
84
|
+
flavor: 'iso'
|
85
|
+
)
|
86
|
+
|
87
|
+
expect(metadata.auto_prefix).to eq('17301_fdis_ed2')
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'handles nil stage_abbreviation gracefully' do
|
91
|
+
metadata = described_class.new(
|
92
|
+
standard_number: '17301',
|
93
|
+
edition: '1',
|
94
|
+
stage_abbreviation: nil,
|
95
|
+
flavor: 'iso'
|
96
|
+
)
|
97
|
+
|
98
|
+
expect(metadata.auto_prefix).to eq('17301__ed1')
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
context 'with other flavors' do
|
103
|
+
it 'generates correct prefix for IEC flavor' do
|
104
|
+
metadata = described_class.new(
|
105
|
+
standard_number: '62304',
|
106
|
+
edition: '1',
|
107
|
+
stage_abbreviation: 'DIS',
|
108
|
+
flavor: 'iec'
|
109
|
+
)
|
110
|
+
|
111
|
+
expect(metadata.auto_prefix).to eq('iec_62304_dis_ed1')
|
112
|
+
end
|
113
|
+
|
114
|
+
it 'generates correct prefix for custom flavor' do
|
115
|
+
metadata = described_class.new(
|
116
|
+
standard_number: '12345',
|
117
|
+
edition: '1',
|
118
|
+
stage_abbreviation: 'DRAFT',
|
119
|
+
flavor: 'custom'
|
120
|
+
)
|
121
|
+
|
122
|
+
expect(metadata.auto_prefix).to eq('custom_12345_draft_ed1')
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
describe 'string representation' do
|
128
|
+
context 'with docidentifier and title' do
|
129
|
+
it 'formats with docidentifier and title' do
|
130
|
+
metadata = described_class.new(
|
131
|
+
title: 'Céréales et légumineuses — Spécification et méthodes d\'essai — Riz (DIS)',
|
132
|
+
docidentifier: 'ISO/DIS 17301-1:2023'
|
133
|
+
)
|
134
|
+
|
135
|
+
expected = 'ISO/DIS 17301-1:2023 - Céréales et légumineuses — Spécification et méthodes d\'essai — Riz (DIS)'
|
136
|
+
expect(metadata.to_s).to eq(expected)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
context 'without docidentifier' do
|
141
|
+
it 'formats with standard information' do
|
142
|
+
metadata = described_class.new(
|
143
|
+
standard_number: '17301',
|
144
|
+
edition: '1',
|
145
|
+
stage_code: 'dis',
|
146
|
+
stage_abbreviation: 'DIS'
|
147
|
+
)
|
148
|
+
|
149
|
+
expect(metadata.to_s).to eq('ISO 17301 Edition 1 Stage dis (DIS)')
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
context 'with minimal information' do
|
154
|
+
it 'handles missing fields gracefully' do
|
155
|
+
metadata = described_class.new(
|
156
|
+
title: 'Test Document'
|
157
|
+
)
|
158
|
+
|
159
|
+
expect(metadata.to_s).to eq('ISO Edition Stage ()')
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
describe 'serialization' do
|
165
|
+
it 'can be serialized to hash' do
|
166
|
+
metadata = described_class.new(
|
167
|
+
title: 'Test Document',
|
168
|
+
docnumber: '17301',
|
169
|
+
stage: 'DRAFT International Standard',
|
170
|
+
substage: '00',
|
171
|
+
docidentifier: 'ISO/DIS 17301-1:2023',
|
172
|
+
standard_number: '17301',
|
173
|
+
part_number: '1',
|
174
|
+
edition: '1',
|
175
|
+
stage_code: 'dis',
|
176
|
+
stage_abbreviation: 'DIS',
|
177
|
+
flavor: 'iso'
|
178
|
+
)
|
179
|
+
|
180
|
+
hash = metadata.to_hash
|
181
|
+
expect(hash['title']).to eq('Test Document')
|
182
|
+
expect(hash['docnumber']).to eq('17301')
|
183
|
+
expect(hash['stage']).to eq('DRAFT International Standard')
|
184
|
+
expect(hash['substage']).to eq('00')
|
185
|
+
expect(hash['docidentifier']).to eq('ISO/DIS 17301-1:2023')
|
186
|
+
expect(hash['standard_number']).to eq('17301')
|
187
|
+
expect(hash['part_number']).to eq('1')
|
188
|
+
expect(hash['edition']).to eq('1')
|
189
|
+
expect(hash['stage_code']).to eq('dis')
|
190
|
+
expect(hash['stage_abbreviation']).to eq('DIS')
|
191
|
+
expect(hash['flavor']).to eq('iso')
|
192
|
+
end
|
193
|
+
|
194
|
+
it 'can be created from hash' do
|
195
|
+
hash = {
|
196
|
+
'title' => 'Test Document',
|
197
|
+
'docnumber' => '17301',
|
198
|
+
'stage' => 'DRAFT International Standard',
|
199
|
+
'substage' => '00',
|
200
|
+
'docidentifier' => 'ISO/DIS 17301-1:2023',
|
201
|
+
'standard_number' => '17301',
|
202
|
+
'part_number' => '1',
|
203
|
+
'edition' => '1',
|
204
|
+
'stage_code' => 'dis',
|
205
|
+
'stage_abbreviation' => 'DIS',
|
206
|
+
'flavor' => 'iso'
|
207
|
+
}
|
208
|
+
|
209
|
+
metadata = described_class.from_hash(hash)
|
210
|
+
expect(metadata.title).to eq('Test Document')
|
211
|
+
expect(metadata.docnumber).to eq('17301')
|
212
|
+
expect(metadata.auto_prefix).to eq('17301_dis_ed1')
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
describe 'real-world examples' do
|
217
|
+
it 'handles fixture document metadata correctly' do
|
218
|
+
metadata = described_class.new(
|
219
|
+
title: 'Céréales et légumineuses — Spécification et méthodes d\'essai — Riz (DIS)',
|
220
|
+
docnumber: '17301',
|
221
|
+
stage: 'DRAFT International Standard',
|
222
|
+
substage: '00',
|
223
|
+
docidentifier: 'ISO/DIS 17301-1:2023',
|
224
|
+
standard_number: '17301',
|
225
|
+
part_number: '1',
|
226
|
+
edition: '1',
|
227
|
+
stage_code: 'dis',
|
228
|
+
stage_abbreviation: 'DIS'
|
229
|
+
)
|
230
|
+
|
231
|
+
expect(metadata.auto_prefix).to eq('17301_dis_ed1')
|
232
|
+
expect(metadata.to_s).to include('ISO/DIS 17301-1:2023')
|
233
|
+
expect(metadata.to_s).to include('Céréales et légumineuses')
|
234
|
+
end
|
235
|
+
|
236
|
+
it 'handles multi-part standards' do
|
237
|
+
metadata = described_class.new(
|
238
|
+
title: 'Information technology — Security techniques — Part 3: Guidelines',
|
239
|
+
docnumber: '27001',
|
240
|
+
standard_number: '27001',
|
241
|
+
part_number: '3',
|
242
|
+
edition: '2',
|
243
|
+
stage_code: 'fdis',
|
244
|
+
stage_abbreviation: 'FDIS'
|
245
|
+
)
|
246
|
+
|
247
|
+
expect(metadata.auto_prefix).to eq('27001_fdis_ed2')
|
248
|
+
expect(metadata.part_number).to eq('3')
|
249
|
+
end
|
250
|
+
|
251
|
+
it 'handles different document stages' do
|
252
|
+
stages = [
|
253
|
+
{ code: 'pwi', abbr: 'PWI' },
|
254
|
+
{ code: 'nwip', abbr: 'NWIP' },
|
255
|
+
{ code: 'wd', abbr: 'WD' },
|
256
|
+
{ code: 'cd', abbr: 'CD' },
|
257
|
+
{ code: 'dis', abbr: 'DIS' },
|
258
|
+
{ code: 'fdis', abbr: 'FDIS' },
|
259
|
+
{ code: 'is', abbr: 'IS' }
|
260
|
+
]
|
261
|
+
|
262
|
+
stages.each do |stage|
|
263
|
+
metadata = described_class.new(
|
264
|
+
standard_number: '12345',
|
265
|
+
edition: '1',
|
266
|
+
stage_code: stage[:code],
|
267
|
+
stage_abbreviation: stage[:abbr]
|
268
|
+
)
|
269
|
+
|
270
|
+
expect(metadata.auto_prefix).to eq("12345_#{stage[:abbr].downcase}_ed1")
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
describe 'edge cases' do
|
276
|
+
it 'handles empty strings' do
|
277
|
+
metadata = described_class.new(
|
278
|
+
title: '',
|
279
|
+
docnumber: '',
|
280
|
+
standard_number: '',
|
281
|
+
edition: '',
|
282
|
+
stage_abbreviation: ''
|
283
|
+
)
|
284
|
+
|
285
|
+
expect(metadata.auto_prefix).to eq('__ed')
|
286
|
+
expect(metadata.to_s).to be_a(String)
|
287
|
+
end
|
288
|
+
|
289
|
+
it 'handles special characters in title' do
|
290
|
+
metadata = described_class.new(
|
291
|
+
title: 'Test — Document with "special" characters & symbols',
|
292
|
+
docidentifier: 'ISO/DIS 12345:2023'
|
293
|
+
)
|
294
|
+
|
295
|
+
expect(metadata.to_s).to include('Test — Document with "special" characters & symbols')
|
296
|
+
end
|
297
|
+
|
298
|
+
it 'handles very long titles' do
|
299
|
+
long_title = 'A' * 500
|
300
|
+
metadata = described_class.new(
|
301
|
+
title: long_title,
|
302
|
+
docidentifier: 'ISO/DIS 12345:2023'
|
303
|
+
)
|
304
|
+
|
305
|
+
expect(metadata.to_s).to include(long_title)
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
@@ -0,0 +1,265 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'tempfile'
|
5
|
+
require 'zip'
|
6
|
+
|
7
|
+
RSpec.describe Metanorma::Tools::FigureExtractor do
|
8
|
+
let(:fixture_path) { File.join(__dir__, '../../fixtures/document-en.dis.presentation.xml') }
|
9
|
+
let(:extractor) { described_class.new }
|
10
|
+
let(:temp_dir) { Dir.mktmpdir }
|
11
|
+
|
12
|
+
after { FileUtils.rm_rf(temp_dir) if Dir.exist?(temp_dir) }
|
13
|
+
|
14
|
+
describe '#extract' do
|
15
|
+
context 'with the fixture document containing figures in annexes' do
|
16
|
+
it 'extracts all figures from the document' do
|
17
|
+
# Capture output to verify extraction
|
18
|
+
output = capture_stdout do
|
19
|
+
extractor.extract(fixture_path, temp_dir)
|
20
|
+
end
|
21
|
+
|
22
|
+
expect(output).to include('Found 6 figures')
|
23
|
+
expect(output).to include('Successfully extracted 6 figures')
|
24
|
+
|
25
|
+
# Verify files were created
|
26
|
+
png_files = Dir.glob(File.join(temp_dir, '*.png'))
|
27
|
+
expect(png_files.length).to eq(6)
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'extracts document metadata correctly' do
|
31
|
+
output = capture_stdout do
|
32
|
+
extractor.extract(fixture_path, temp_dir)
|
33
|
+
end
|
34
|
+
|
35
|
+
expect(output).to include('ISO/DIS 17301-1:2023')
|
36
|
+
expect(output).to include('Auto-generated prefix: 17301_dis_ed3')
|
37
|
+
expect(output).to include('Document metadata extraction: Yes')
|
38
|
+
end
|
39
|
+
|
40
|
+
describe 'figure extraction details' do
|
41
|
+
before do
|
42
|
+
capture_stdout { extractor.extract(fixture_path, temp_dir) }
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'extracts all 6 figures with correct data' do
|
46
|
+
png_files = Dir.glob(File.join(temp_dir, '*.png'))
|
47
|
+
expect(png_files.length).to eq(6)
|
48
|
+
|
49
|
+
# Verify all files have content
|
50
|
+
png_files.each do |file|
|
51
|
+
expect(File.size(file)).to be > 1000 # PNG files should be reasonably sized
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'generates correct ISO graphic filenames for all figures' do
|
56
|
+
png_files = Dir.glob(File.join(temp_dir, '*.png')).map { |f| File.basename(f) }
|
57
|
+
|
58
|
+
# All files should follow the ISO DRG pattern
|
59
|
+
png_files.each do |filename|
|
60
|
+
expect(filename).to match(/^17301_dis_ed3fig.+\.png$/)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Check for specific expected patterns
|
64
|
+
expect(png_files.any? { |f| f.match(/figA1/) }).to be true
|
65
|
+
expect(png_files.any? { |f| f.match(/figC1/) }).to be true
|
66
|
+
expect(png_files.any? { |f| f.match(/figC2/) }).to be true
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
describe 'image format and data validation' do
|
71
|
+
before do
|
72
|
+
capture_stdout { extractor.extract(fixture_path, temp_dir) }
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'correctly processes all images as PNG with valid data' do
|
76
|
+
png_files = Dir.glob(File.join(temp_dir, '*.png'))
|
77
|
+
|
78
|
+
png_files.each do |file|
|
79
|
+
# Verify PNG file signature
|
80
|
+
File.open(file, 'rb') do |f|
|
81
|
+
signature = f.read(8)
|
82
|
+
expected_signature = "\x89PNG\r\n\x1A\n".dup.force_encoding('ASCII-8BIT')
|
83
|
+
expect(signature).to eq(expected_signature)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe 'archive creation' do
|
90
|
+
let(:zip_extractor) { described_class.new(zip: true) }
|
91
|
+
|
92
|
+
it 'creates a zip archive with all figures' do
|
93
|
+
output = capture_stdout do
|
94
|
+
zip_extractor.extract(fixture_path, temp_dir)
|
95
|
+
end
|
96
|
+
|
97
|
+
expect(output).to include('ZIP archive created')
|
98
|
+
|
99
|
+
zip_files = Dir.glob(File.join(temp_dir, '*.zip'))
|
100
|
+
expect(zip_files.length).to eq(1)
|
101
|
+
|
102
|
+
Zip::File.open(zip_files.first) do |zip_file|
|
103
|
+
expect(zip_file.entries.length).to eq(6)
|
104
|
+
|
105
|
+
zip_file.entries.each do |entry|
|
106
|
+
expect(entry.name).to match(/^17301_dis_ed3fig.+\.png$/)
|
107
|
+
expect(entry.size).to be > 0
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'creates correctly named files in archive' do
|
113
|
+
capture_stdout do
|
114
|
+
zip_extractor.extract(fixture_path, temp_dir)
|
115
|
+
end
|
116
|
+
|
117
|
+
zip_files = Dir.glob(File.join(temp_dir, '*.zip'))
|
118
|
+
|
119
|
+
Zip::File.open(zip_files.first) do |zip_file|
|
120
|
+
filenames = zip_file.entries.map(&:name).sort
|
121
|
+
|
122
|
+
# Verify we have the expected number of files
|
123
|
+
expect(filenames.length).to eq(6)
|
124
|
+
|
125
|
+
# All should be PNG files with proper naming
|
126
|
+
filenames.each do |filename|
|
127
|
+
expect(filename).to match(/^17301_dis_ed3fig.+\.png$/)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
context 'with error handling' do
|
135
|
+
it 'exits for non-existent file' do
|
136
|
+
expect do
|
137
|
+
capture_stdout { extractor.extract('non_existent.xml') }
|
138
|
+
end.to raise_error(SystemExit)
|
139
|
+
end
|
140
|
+
|
141
|
+
it 'handles malformed XML gracefully' do
|
142
|
+
malformed_xml = Tempfile.new(['malformed', '.xml'])
|
143
|
+
malformed_xml.write('not valid xml content')
|
144
|
+
malformed_xml.close
|
145
|
+
|
146
|
+
expect do
|
147
|
+
capture_stdout { extractor.extract(malformed_xml.path) }
|
148
|
+
end.to raise_error(SystemExit)
|
149
|
+
ensure
|
150
|
+
malformed_xml&.unlink
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
context 'with CLI integration' do
|
155
|
+
it 'works with the CLI extract-images command' do
|
156
|
+
# Test that the CLI can process the fixture file
|
157
|
+
output = `bundle exec metanorma-tools extract-images #{fixture_path} --output-dir #{temp_dir} 2>&1`
|
158
|
+
expect($?.success?).to be true
|
159
|
+
|
160
|
+
# Verify files were created
|
161
|
+
png_files = Dir.glob(File.join(temp_dir, '*.png'))
|
162
|
+
expect(png_files.length).to eq(6)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
describe 'individual component testing' do
|
168
|
+
describe Metanorma::Tools::Figure do
|
169
|
+
it 'creates figure with valid data' do
|
170
|
+
figure = described_class.new(
|
171
|
+
'A.1',
|
172
|
+
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==',
|
173
|
+
:datauri_png,
|
174
|
+
'test-figure.png'
|
175
|
+
)
|
176
|
+
|
177
|
+
expect(figure.autonum).to eq('A.1')
|
178
|
+
expect(figure.format).to eq(:datauri_png)
|
179
|
+
expect(figure.content).not_to be_empty
|
180
|
+
expect(figure.original_filename).to eq('test-figure.png')
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
describe Metanorma::Tools::DocumentMetadata do
|
185
|
+
it 'creates metadata with valid data' do
|
186
|
+
metadata = described_class.new(
|
187
|
+
title: 'Test Document',
|
188
|
+
docnumber: '12345',
|
189
|
+
stage: 'DRAFT',
|
190
|
+
substage: '00',
|
191
|
+
docidentifier: 'ISO/DIS 12345:2023',
|
192
|
+
standard_number: '12345',
|
193
|
+
part_number: '1',
|
194
|
+
edition: '1',
|
195
|
+
stage_code: 'dis',
|
196
|
+
stage_abbreviation: 'DIS'
|
197
|
+
)
|
198
|
+
|
199
|
+
expect(metadata.title).to eq('Test Document')
|
200
|
+
expect(metadata.docnumber).to eq('12345')
|
201
|
+
expect(metadata.stage).to eq('DRAFT')
|
202
|
+
expect(metadata.auto_prefix).to eq('12345_dis_ed1')
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
describe Metanorma::Tools::IsoGraphicFilename do
|
207
|
+
it 'generates correct filename format' do
|
208
|
+
filename = described_class.new(
|
209
|
+
standard_number: 17301,
|
210
|
+
part_number: 1,
|
211
|
+
stage_code: 'dis',
|
212
|
+
edition_number: 3,
|
213
|
+
content_type: 'figure',
|
214
|
+
figure_number: 'A1',
|
215
|
+
original_filename: 'figureA-1.png',
|
216
|
+
file_extension: 'png'
|
217
|
+
)
|
218
|
+
|
219
|
+
expect(filename.to_s).to eq('17301-1_dis_ed3figA1_figureA-1.png')
|
220
|
+
end
|
221
|
+
|
222
|
+
it 'handles UUID-based figure IDs' do
|
223
|
+
filename = described_class.new(
|
224
|
+
standard_number: 17301,
|
225
|
+
part_number: 1,
|
226
|
+
stage_code: 'dis',
|
227
|
+
edition_number: 3,
|
228
|
+
content_type: 'text',
|
229
|
+
text_number: 1,
|
230
|
+
original_filename: '_85f711f6-478d-a680-b5b9-3bc85332dfd1.png',
|
231
|
+
file_extension: 'png'
|
232
|
+
)
|
233
|
+
|
234
|
+
expect(filename.to_s).to eq('17301-1_dis_ed3figText1__85f711f6-478d-a680-b5b9-3bc85332dfd1.png')
|
235
|
+
end
|
236
|
+
|
237
|
+
it 'handles subfigure naming correctly' do
|
238
|
+
filename = described_class.new(
|
239
|
+
standard_number: 17301,
|
240
|
+
part_number: 1,
|
241
|
+
stage_code: 'dis',
|
242
|
+
edition_number: 3,
|
243
|
+
content_type: 'figure',
|
244
|
+
figure_number: 'C2',
|
245
|
+
subfigure: 'a',
|
246
|
+
original_filename: 'figureC-2-a.png',
|
247
|
+
file_extension: 'png'
|
248
|
+
)
|
249
|
+
|
250
|
+
expect(filename.to_s).to eq('17301-1_dis_ed3figC2a_figureC-2-a.png')
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
|
257
|
+
def capture_stdout
|
258
|
+
original_stdout = $stdout
|
259
|
+
$stdout = StringIO.new
|
260
|
+
yield
|
261
|
+
$stdout.string
|
262
|
+
ensure
|
263
|
+
$stdout = original_stdout
|
264
|
+
end
|
265
|
+
end
|