metanorma-tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/rake.yml +15 -0
- data/.github/workflows/release.yml +24 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.rubocop.yml +14 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/Gemfile +11 -0
- data/README.adoc +94 -0
- data/Rakefile +8 -0
- data/docs/figure-extraction.adoc +111 -0
- data/docs/iso-drg-filename-guidance.adoc +584 -0
- data/docs/workflows-iso.adoc +70 -0
- data/exe/metanorma-tools +6 -0
- data/lib/metanorma/tools/cli.rb +79 -0
- data/lib/metanorma/tools/commands/extract_images.rb +25 -0
- data/lib/metanorma/tools/commands.rb +8 -0
- data/lib/metanorma/tools/document_metadata.rb +40 -0
- data/lib/metanorma/tools/figure.rb +124 -0
- data/lib/metanorma/tools/figure_extractor.rb +384 -0
- data/lib/metanorma/tools/iso_graphic_filename.rb +149 -0
- data/lib/metanorma/tools/version.rb +7 -0
- data/lib/metanorma/tools.rb +18 -0
- data/metanorma-tools.gemspec +37 -0
- data/sig/metanorma/tools.rbs +6 -0
- data/spec/fixtures/document-en.dis.presentation.xml +3417 -0
- data/spec/metanorma/tools/cli_spec.rb +102 -0
- data/spec/metanorma/tools/document_metadata_spec.rb +308 -0
- data/spec/metanorma/tools/figure_extractor_spec.rb +265 -0
- data/spec/metanorma/tools/iso_graphic_filename_spec.rb +316 -0
- data/spec/metanorma/tools_spec.rb +15 -0
- data/spec/spec_helper.rb +16 -0
- metadata +148 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'lutaml/model'
|
4
|
+
|
5
|
+
module Metanorma
|
6
|
+
module Tools
|
7
|
+
class IsoGraphicFilename < Lutaml::Model::Serializable
|
8
|
+
VALID_STAGE_CODES = %w[pwi np awi wd cd dis fdis prf].freeze
|
9
|
+
VALID_SUPPLEMENT_TYPES = %w[amd cor].freeze
|
10
|
+
VALID_CONTENT_TYPES = %w[figure table key text special_layout].freeze
|
11
|
+
VALID_LANGUAGE_CODES = %w[e f r s a d].freeze
|
12
|
+
|
13
|
+
attribute :standard_number, :integer
|
14
|
+
attribute :part_number, :integer
|
15
|
+
attribute :edition_number, :integer
|
16
|
+
attribute :stage_code, :string, values: VALID_STAGE_CODES
|
17
|
+
attribute :supplement_type, :string, values: VALID_SUPPLEMENT_TYPES
|
18
|
+
attribute :supplement_number, :integer
|
19
|
+
attribute :content_type, :string, values: VALID_CONTENT_TYPES
|
20
|
+
attribute :figure_number, :string
|
21
|
+
attribute :subfigure, :string
|
22
|
+
attribute :table_number, :string
|
23
|
+
attribute :key_number, :integer
|
24
|
+
attribute :text_number, :integer
|
25
|
+
attribute :language_code, :string, values: VALID_LANGUAGE_CODES
|
26
|
+
attribute :file_extension, :string
|
27
|
+
attribute :original_filename, :string
|
28
|
+
|
29
|
+
key_value do
|
30
|
+
map 'standard_number', to: :standard_number
|
31
|
+
map 'part_number', to: :part_number
|
32
|
+
map 'edition_number', to: :edition_number
|
33
|
+
map 'stage_code', to: :stage_code
|
34
|
+
map 'supplement_type', to: :supplement_type
|
35
|
+
map 'supplement_number', to: :supplement_number
|
36
|
+
map 'content_type', to: :content_type
|
37
|
+
map 'figure_number', to: :figure_number
|
38
|
+
map 'subfigure', to: :subfigure
|
39
|
+
map 'table_number', to: :table_number
|
40
|
+
map 'key_number', to: :key_number
|
41
|
+
map 'text_number', to: :text_number
|
42
|
+
map 'language_code', to: :language_code
|
43
|
+
map 'file_extension', to: :file_extension
|
44
|
+
map 'original_filename', to: :original_filename
|
45
|
+
end
|
46
|
+
|
47
|
+
def generate_filename
|
48
|
+
document_portion = build_document_portion
|
49
|
+
content_portion = build_content_portion
|
50
|
+
language_portion = build_language_portion
|
51
|
+
original_portion = build_original_filename_portion
|
52
|
+
|
53
|
+
filename_parts = [document_portion, content_portion, language_portion, original_portion].compact
|
54
|
+
filename = filename_parts.join('')
|
55
|
+
|
56
|
+
"#{filename}.#{file_extension}"
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_s
|
60
|
+
generate_filename
|
61
|
+
end
|
62
|
+
|
63
|
+
def inspect
|
64
|
+
attrs = {
|
65
|
+
standard_number: standard_number,
|
66
|
+
part_number: part_number,
|
67
|
+
edition_number: edition_number,
|
68
|
+
stage_code: stage_code,
|
69
|
+
supplement_type: supplement_type,
|
70
|
+
supplement_number: supplement_number,
|
71
|
+
content_type: content_type,
|
72
|
+
figure_number: figure_number,
|
73
|
+
subfigure: subfigure,
|
74
|
+
table_number: table_number,
|
75
|
+
key_number: key_number,
|
76
|
+
text_number: text_number,
|
77
|
+
language_code: language_code,
|
78
|
+
file_extension: file_extension
|
79
|
+
}.compact
|
80
|
+
"#<IsoGraphicFilename #{attrs}>"
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
def build_document_portion
|
86
|
+
# Handle special layout prefix
|
87
|
+
prefix = content_type == 'special_layout' ? 'SL' : ''
|
88
|
+
|
89
|
+
# Build standard number with optional part
|
90
|
+
doc_id = "#{prefix}#{standard_number}"
|
91
|
+
doc_id += "-#{part_number}" if part_number
|
92
|
+
|
93
|
+
# Add stage code if present (before edition for standards, after supplement for amendments)
|
94
|
+
if supplement_type
|
95
|
+
# Amendment/Corrigenda pattern: {StandardNumber}-{partNumber}_ed{editionNumber}{supplementCode}{supplementNumber}[_{stageCode}]
|
96
|
+
doc_id += "_ed#{edition_number}#{supplement_type}#{supplement_number}"
|
97
|
+
doc_id += "_#{stage_code}" if stage_code
|
98
|
+
else
|
99
|
+
# Standard pattern: {StandardNumber}[-{partNumber}]_ed{editionNumber}[_{stageCode}]
|
100
|
+
doc_id += if stage_code
|
101
|
+
"_#{stage_code}_ed#{edition_number}"
|
102
|
+
else
|
103
|
+
"_ed#{edition_number}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
doc_id
|
108
|
+
end
|
109
|
+
|
110
|
+
def build_content_portion
|
111
|
+
case content_type
|
112
|
+
when 'figure'
|
113
|
+
content = "fig#{normalize_figure_number(figure_number)}"
|
114
|
+
content += subfigure if subfigure
|
115
|
+
content
|
116
|
+
when 'table'
|
117
|
+
"figTab#{normalize_figure_number(table_number || figure_number)}"
|
118
|
+
when 'key'
|
119
|
+
"fig#{normalize_figure_number(figure_number)}_key#{key_number}"
|
120
|
+
when 'text'
|
121
|
+
"figText#{text_number}"
|
122
|
+
when 'special_layout'
|
123
|
+
"figTab#{normalize_figure_number(table_number || figure_number)}"
|
124
|
+
else
|
125
|
+
raise ArgumentError, "Unknown content_type: #{content_type}"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def build_language_portion
|
130
|
+
language_code ? "_#{language_code}" : nil
|
131
|
+
end
|
132
|
+
|
133
|
+
def build_original_filename_portion
|
134
|
+
return nil unless original_filename && !original_filename.empty?
|
135
|
+
|
136
|
+
# Remove file extension from original filename if present
|
137
|
+
clean_filename = File.basename(original_filename, '.*')
|
138
|
+
"_#{clean_filename}"
|
139
|
+
end
|
140
|
+
|
141
|
+
def normalize_figure_number(figure_num)
|
142
|
+
return '' unless figure_num
|
143
|
+
|
144
|
+
# Convert "A.2" to "A2", "3" to "3", etc.
|
145
|
+
figure_num.to_s.gsub('.', '')
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'lutaml/model'
|
4
|
+
|
5
|
+
require_relative "tools/version"
|
6
|
+
require_relative "tools/iso_graphic_filename"
|
7
|
+
require_relative "tools/document_metadata"
|
8
|
+
require_relative "tools/figure"
|
9
|
+
require_relative "tools/figure_extractor"
|
10
|
+
require_relative "tools/commands"
|
11
|
+
require_relative "tools/commands/extract_images"
|
12
|
+
require_relative "tools/cli"
|
13
|
+
|
14
|
+
module Metanorma
|
15
|
+
module Tools
|
16
|
+
class Error < StandardError; end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'lib/metanorma/tools/version'
|
4
|
+
|
5
|
+
all_files_in_git = Dir.chdir(File.expand_path(__dir__)) do
|
6
|
+
`git ls-files -z`.split("\x0")
|
7
|
+
end
|
8
|
+
|
9
|
+
Gem::Specification.new do |spec|
|
10
|
+
spec.name = 'metanorma-tools'
|
11
|
+
spec.version = Metanorma::Tools::VERSION
|
12
|
+
spec.authors = ['Ribose']
|
13
|
+
spec.email = ['open.source@ribose.com']
|
14
|
+
|
15
|
+
spec.summary = 'Miscellaneous tools to work with Metanorma output.'
|
16
|
+
spec.homepage = 'https://github.com/metanorma/metanorma-tools'
|
17
|
+
spec.license = 'BSD-2-Clause'
|
18
|
+
spec.required_ruby_version = Gem::Requirement.new('>= 2.6.0')
|
19
|
+
|
20
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
21
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
22
|
+
spec.metadata['bug_tracker_uri'] = "#{spec.homepage}/issues"
|
23
|
+
|
24
|
+
# Specify which files should be added to the gem when it is released.
|
25
|
+
spec.files = all_files_in_git
|
26
|
+
.reject { |f| f.match(%r{\A(?:test|features|bin|\.)/}) }
|
27
|
+
|
28
|
+
spec.bindir = 'exe'
|
29
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
|
+
spec.require_paths = ['lib']
|
31
|
+
|
32
|
+
spec.add_dependency 'lutaml-model', '~> 0.7'
|
33
|
+
spec.add_dependency 'nokogiri'
|
34
|
+
spec.add_dependency 'rubyzip', '~> 2.0'
|
35
|
+
spec.add_dependency 'thor'
|
36
|
+
spec.add_dependency 'base64'
|
37
|
+
end
|