stepmod-utils 0.3.2 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/stepmod-extract-terms +46 -226
- data/lib/stepmod/utils/concept.rb +11 -5
- data/lib/stepmod/utils/converters/def.rb +14 -6
- data/lib/stepmod/utils/converters/example.rb +13 -2
- data/lib/stepmod/utils/converters/ext_description.rb +3 -1
- data/lib/stepmod/utils/converters/figure.rb +20 -0
- data/lib/stepmod/utils/converters/module_ref.rb +7 -3
- data/lib/stepmod/utils/converters/note.rb +13 -2
- data/lib/stepmod/utils/converters/stepmod_ext_description.rb +26 -4
- data/lib/stepmod/utils/converters/text.rb +1 -1
- data/lib/stepmod/utils/converters/uof.rb +10 -5
- data/lib/stepmod/utils/smrl_resource_converter.rb +1 -1
- data/lib/stepmod/utils/stepmod_definition_converter.rb +2 -1
- data/lib/stepmod/utils/terms_extractor.rb +326 -0
- data/lib/stepmod/utils/version.rb +1 -1
- data/stepmod-utils.gemspec +1 -1
- metadata +14 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 887612180cfe004fb244d41e7c553858ed441d5e4c60c8d27d06fb7ec3219cd2
|
4
|
+
data.tar.gz: f7f25caa5652593b6eafcb792206614f2b053e56a0dd05801018e2217533b636
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 41057b14b1c6ab326dff8e402be3a74737e9479212a76577a9ef81738da9f542ab6cbd892723949b004a2a6ff6058525aed5de47b1fbe3859ac139a724028608
|
7
|
+
data.tar.gz: 519d9daa5451c17f60e6e2e764405908bf7e3c9f6b7279dcef9ee2bb554c63a0535b1e63c3aaed178549413a2253e24a3687c86d1ad60c8fa569c65aed756647
|
data/exe/stepmod-extract-terms
CHANGED
@@ -17,38 +17,16 @@ class Gem::Specification
|
|
17
17
|
end
|
18
18
|
|
19
19
|
require 'bundler/setup'
|
20
|
-
require 'stepmod/utils/
|
21
|
-
require 'stepmod/utils/bibdata'
|
22
|
-
require 'stepmod/utils/concept'
|
23
|
-
require 'ptools'
|
24
|
-
|
25
|
-
ReverseAdoc.config.unknown_tags = :bypass
|
26
|
-
|
27
|
-
# TODO: we may want a command line option to override this in the future
|
28
|
-
ACCEPTED_STAGES = %w(IS DIS FDIS TS)
|
29
|
-
|
30
|
-
general_concepts = []
|
31
|
-
resource_concepts = []
|
32
|
-
module_concepts = []
|
33
|
-
parsed_bibliography = []
|
34
|
-
encountered_terms = {}
|
20
|
+
require 'stepmod/utils/terms_extractor'
|
35
21
|
|
36
22
|
stepmod_dir = ARGV.first || Dir.pwd
|
37
23
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
when 41,42,43,44,45,46,47,51
|
45
|
-
true
|
46
|
-
when [56..112]
|
47
|
-
true
|
48
|
-
else
|
49
|
-
false
|
50
|
-
end
|
51
|
-
end
|
24
|
+
general_concepts,
|
25
|
+
resource_concepts,
|
26
|
+
parsed_bibliography,
|
27
|
+
part_concepts,
|
28
|
+
part_resources,
|
29
|
+
part_modules = Stepmod::Utils::TermsExtractor.call(stepmod_dir)
|
52
30
|
|
53
31
|
def part_to_title(bibdata)
|
54
32
|
case bibdata.part.to_i
|
@@ -73,227 +51,69 @@ def part_to_title(bibdata)
|
|
73
51
|
end
|
74
52
|
end
|
75
53
|
|
76
|
-
|
77
|
-
|
78
|
-
# If we are using the stepmod CVS repository, provide the revision number per file
|
79
|
-
has_cvs = File.which("cvs")
|
80
|
-
cvs_mode = has_cvs && Dir.exists?(stepmod_path.join('CVS'))
|
81
|
-
|
82
|
-
log "INFO: STEPmod directory set to #{stepmod_dir}."
|
83
|
-
|
84
|
-
if cvs_mode
|
85
|
-
log "INFO: STEPmod directory is a CVS repository and will detect revisions."
|
86
|
-
log "INFO: [CVS] Detecting file revisions can be slow, please be patient!"
|
87
|
-
else
|
88
|
-
log "INFO: STEPmod directory is not a CVS repository, skipping revision detection."
|
54
|
+
def log message
|
55
|
+
puts "[stepmod-utils] #{message}"
|
89
56
|
end
|
90
57
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
candidate_paths = Dir["#{stepmod_dir}/**/#{t}"]
|
101
|
-
acc << candidate_paths
|
102
|
-
|
103
|
-
end.flatten.sort.uniq
|
104
|
-
|
105
|
-
max_encountered_refs_indexes = {}
|
106
|
-
|
107
|
-
files.each do |file_path|
|
108
|
-
file_path = Pathname.new(file_path).realpath
|
109
|
-
fpath = file_path.relative_path_from(stepmod_path)
|
110
|
-
|
111
|
-
log "INFO: Processing XML file #{fpath}"
|
112
|
-
current_document = Nokogiri::XML(File.read(file_path)).root
|
113
|
-
|
114
|
-
bibdata = nil
|
115
|
-
begin
|
116
|
-
bibdata = Stepmod::Utils::Bibdata.new(document: current_document)
|
117
|
-
rescue
|
118
|
-
log "WARNING: Unknown file #{fpath}, skipped"
|
119
|
-
next
|
120
|
-
end
|
121
|
-
|
122
|
-
unless ACCEPTED_STAGES.include? bibdata.doctype
|
123
|
-
log "INFO: skipped #{bibdata.docid} as it is not one of (#{ACCEPTED_STAGES.join(", ")})."
|
124
|
-
next
|
125
|
-
end
|
126
|
-
|
127
|
-
if bibdata.part.to_s.empty?
|
128
|
-
log "FATAL: missing `part` attribute: #{fpath}"
|
129
|
-
log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute."
|
130
|
-
next
|
131
|
-
end
|
132
|
-
|
133
|
-
revision_string = "\n// CVS: revision not detected"
|
134
|
-
if cvs_mode
|
135
|
-
# Run `cvs status` to find out version
|
136
|
-
|
137
|
-
log "INFO: Detecting CVS revision..."
|
138
|
-
Dir.chdir(stepmod_path) do
|
139
|
-
status = `cvs status #{fpath}`
|
140
|
-
|
141
|
-
unless status.empty?
|
142
|
-
working_rev = status.split(/\n/).grep(/Working revision:/).first.match(/revision:\s+(.+)$/)[1]
|
143
|
-
repo_rev = status.split(/\n/).grep(/Repository revision:/).first.match(/revision:\t(.+)\t/)[1]
|
144
|
-
log "INFO: CVS working rev (#{working_rev}), repo rev (#{repo_rev})"
|
145
|
-
revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" +
|
146
|
-
"// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}"
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
# read definitions
|
152
|
-
part_concepts = []
|
153
|
-
current_document.xpath('//definition').each do |definition|
|
154
|
-
index = max_encountered_refs_indexes[bibdata.anchor] || 1
|
155
|
-
term_id = definition['id']
|
156
|
-
unless term_id.nil?
|
157
|
-
if encountered_terms[term_id]
|
158
|
-
log "FATAL: Duplicated term with id: #{term_id}, #{fpath}"
|
159
|
-
end
|
160
|
-
encountered_terms[term_id] = true
|
161
|
-
end
|
162
|
-
|
163
|
-
# Assume that definition is located in clause 3 of the ISO document
|
164
|
-
# in order. We really don't have a good reference here.
|
165
|
-
ref_clause = "3.#{index}"
|
166
|
-
|
167
|
-
concept = Stepmod::Utils::Concept.parse(
|
168
|
-
definition,
|
169
|
-
reference_anchor: bibdata.anchor,
|
170
|
-
reference_clause: ref_clause,
|
171
|
-
file_path: fpath + revision_string
|
172
|
-
)
|
173
|
-
next unless concept
|
174
|
-
|
175
|
-
unless term_special_category(bibdata)
|
176
|
-
# log "INFO: this part is generic"
|
177
|
-
general_concepts << concept
|
178
|
-
else
|
179
|
-
# log "INFO: this part is special"
|
180
|
-
part_concepts << concept
|
181
|
-
end
|
182
|
-
|
183
|
-
max_encountered_refs_indexes[bibdata.anchor] = index + 1
|
184
|
-
parsed_bibliography << bibdata
|
185
|
-
end
|
186
|
-
|
187
|
-
part_modules = []
|
188
|
-
current_document.xpath('//arm/uof').each do |uof_node|
|
189
|
-
concept = Stepmod::Utils::Concept.parse(
|
190
|
-
uof_node,
|
191
|
-
reference_anchor: bibdata.anchor,
|
192
|
-
reference_clause: nil,
|
193
|
-
file_path: fpath + revision_string
|
194
|
-
)
|
195
|
-
# puts concept.inspect
|
196
|
-
|
197
|
-
next unless concept
|
198
|
-
|
199
|
-
unless term_special_category(bibdata)
|
200
|
-
# log "INFO: this part is generic"
|
201
|
-
module_concepts << concept
|
202
|
-
else
|
203
|
-
# log "INFO: this part is special"
|
204
|
-
part_modules << concept
|
205
|
-
end
|
206
|
-
|
207
|
-
parsed_bibliography << bibdata
|
208
|
-
end
|
58
|
+
part_concepts.each do |(bibdata, current_part_concepts)|
|
59
|
+
fn = "03x-stepmod-#{bibdata.part}.adoc"
|
60
|
+
File.open(fn, 'w') { |file|
|
61
|
+
file.puts("== #{part_to_title(bibdata)}\n\n")
|
62
|
+
file.puts(current_part_concepts.map(&:to_mn_adoc).join("\n"))
|
63
|
+
}
|
64
|
+
log "INFO: written to: #{fn}"
|
65
|
+
end
|
209
66
|
|
210
|
-
part_resources = []
|
211
|
-
# Assumption: every schema is only linked by a single resource_docs document.
|
212
|
-
current_document.xpath('//schema').each do |schema_node|
|
213
|
-
schema_name = schema_node['name']
|
214
67
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
68
|
+
part_resources.each do |(bibdata, current_part_resources)|
|
69
|
+
fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc"
|
70
|
+
File.open(fn, 'w') { |file|
|
71
|
+
file.puts("== #{part_to_title(bibdata)}\n\n")
|
72
|
+
file.puts(current_part_resources.map(&:to_mn_adoc).join("\n"))
|
73
|
+
}
|
74
|
+
log "INFO: written to: #{fn}"
|
75
|
+
end
|
219
76
|
|
220
|
-
concept = Stepmod::Utils::Concept.parse(
|
221
|
-
ext_description,
|
222
|
-
reference_anchor: bibdata.anchor,
|
223
|
-
reference_clause: nil,
|
224
|
-
file_path: fpath + revision_string
|
225
|
-
)
|
226
|
-
next unless concept
|
227
77
|
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
78
|
+
part_modules.each do |(bibdata, part_modules_arm, part_modules_mim)|
|
79
|
+
fn = "05x-stepmod-entities-modules-#{bibdata.part}.adoc"
|
80
|
+
File.open(fn, 'w') { |file|
|
81
|
+
file.puts("")
|
82
|
+
unless part_modules_arm.empty?
|
83
|
+
schema_name = part_modules_arm.first.first
|
84
|
+
concepts = part_modules_arm.first.last
|
235
85
|
|
236
|
-
|
237
|
-
|
86
|
+
# puts "SCHEMA NAME ARM: #{schema_name}"
|
87
|
+
file.puts("== #{schema_name}\n\n")
|
88
|
+
file.puts(concepts.map(&:to_mn_adoc).join("\n"))
|
238
89
|
end
|
239
|
-
end
|
240
90
|
|
241
|
-
|
91
|
+
file.puts("")
|
242
92
|
|
243
|
-
|
244
|
-
|
245
|
-
next
|
246
|
-
elsif part_concepts.length < 3
|
247
|
-
log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it only has #{part_concepts.length} terms."
|
93
|
+
unless part_modules_mim.empty?
|
94
|
+
schema_name = part_modules_mim.first.first
|
248
95
|
|
249
|
-
|
250
|
-
|
96
|
+
# puts "SCHEMA NAME MIM: #{schema_name}"
|
97
|
+
concepts = part_modules_mim.first.last
|
98
|
+
file.puts("== #{schema_name}\n\n")
|
99
|
+
file.puts(concepts.map(&:to_mn_adoc).join("\n"))
|
251
100
|
end
|
252
|
-
else
|
253
|
-
fn = "03x-stepmod-#{bibdata.part}.adoc"
|
254
|
-
File.open(fn, 'w') { |file|
|
255
|
-
file.puts("== #{part_to_title(bibdata)}\n\n")
|
256
|
-
file.puts(part_concepts.map(&:to_mn_adoc).join("\n"))
|
257
|
-
}
|
258
|
-
log "INFO: written to: #{fn}"
|
259
|
-
end
|
260
|
-
|
261
|
-
unless part_resources.empty?
|
262
|
-
fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc"
|
263
|
-
File.open(fn, 'w') { |file|
|
264
|
-
file.puts("== #{part_to_title(bibdata)}\n\n")
|
265
|
-
file.puts(part_resources.map(&:to_mn_adoc).join("\n"))
|
266
|
-
}
|
267
|
-
log "INFO: written to: #{fn}"
|
268
|
-
end
|
269
|
-
|
270
|
-
unless part_modules.empty?
|
271
|
-
fn = "04x-stepmod-entities-modules-#{bibdata.part}.adoc"
|
272
|
-
File.open(fn, 'w') { |file|
|
273
|
-
file.puts("== #{part_to_title(bibdata)}\n\n")
|
274
|
-
file.puts(part_modules.map(&:to_mn_adoc).join("\n"))
|
275
|
-
}
|
276
|
-
log "INFO: written to: #{fn}"
|
277
|
-
end
|
278
101
|
|
102
|
+
}
|
103
|
+
log "INFO: written to: #{fn}"
|
279
104
|
end
|
280
105
|
|
281
106
|
File.open('031-stepmod-general.adoc', 'w') { |file|
|
282
107
|
file.puts(general_concepts.map(&:to_mn_adoc).join("\n"))
|
283
108
|
}
|
109
|
+
log "INFO: written to: 031-stepmod-general.adoc"
|
284
110
|
|
285
111
|
File.open('041-stepmod-entities-resources.adoc', 'w') { |file|
|
286
112
|
file.puts(resource_concepts.map(&:to_mn_adoc).join("\n"))
|
287
113
|
}
|
288
|
-
|
289
|
-
File.open('051-stepmod-entities-modules.adoc', 'w') { |file|
|
290
|
-
file.puts(module_concepts.map(&:to_mn_adoc).join("\n"))
|
291
|
-
}
|
292
|
-
|
293
|
-
log "INFO: written to: 031-stepmod-general.adoc"
|
114
|
+
log "INFO: written to: 041-stepmod-entities-resources.adoc"
|
294
115
|
|
295
116
|
File.open('991-generated-bibliography.adoc', 'w') { |file|
|
296
117
|
file.puts(parsed_bibliography.map(&:to_mn_adoc).sort.uniq.join("\n"))
|
297
118
|
}
|
298
|
-
|
299
119
|
log "INFO: written to: 991-generated-bibliography.adoc"
|
@@ -21,14 +21,20 @@ module Stepmod
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def self.parse(definition_xml, reference_anchor:, reference_clause:, file_path:)
|
24
|
-
converted_definition = Stepmod::Utils::StepmodDefinitionConverter
|
25
|
-
|
24
|
+
converted_definition = Stepmod::Utils::StepmodDefinitionConverter.convert(
|
25
|
+
definition_xml,
|
26
|
+
{
|
27
|
+
# We don't want examples and notes
|
28
|
+
no_notes_examples: true,
|
29
|
+
reference_anchor: reference_anchor
|
30
|
+
}
|
31
|
+
)
|
26
32
|
|
27
33
|
return nil if converted_definition.nil? || converted_definition.strip.empty?
|
28
34
|
|
29
|
-
if definition_xml.name == '
|
35
|
+
if definition_xml.name == 'ext_description'
|
30
36
|
converted_definition = <<~TEXT
|
31
|
-
#{converted_definition
|
37
|
+
#{converted_definition}
|
32
38
|
|
33
39
|
NOTE: This term is incompletely defined in this document.
|
34
40
|
Reference <<#{reference_anchor}>> for the complete definition.
|
@@ -44,7 +50,7 @@ module Stepmod
|
|
44
50
|
|
45
51
|
def to_mn_adoc
|
46
52
|
<<~TEXT
|
47
|
-
// STEPmod path
|
53
|
+
// STEPmod path:#{!file_path.empty? ? " #{file_path}" : ""}
|
48
54
|
#{converted_definition}
|
49
55
|
|
50
56
|
[.source]
|
@@ -12,13 +12,15 @@ module Stepmod
|
|
12
12
|
|
13
13
|
def treat_children(node, state)
|
14
14
|
converted = node.children.each_with_object({}) do |child, res|
|
15
|
-
content = treat(child, state).strip
|
16
|
-
next if content.empty?
|
17
15
|
|
18
|
-
|
19
|
-
|
16
|
+
content = treat(child, state)
|
17
|
+
next if content.strip.empty?
|
18
|
+
|
19
|
+
res[child] = content
|
20
|
+
end
|
20
21
|
previous = nil
|
21
22
|
result = ''
|
23
|
+
|
22
24
|
converted.each.with_index do |(child, content), i|
|
23
25
|
if block_tag?(child, previous)
|
24
26
|
result += "\n\n"
|
@@ -30,6 +32,11 @@ module Stepmod
|
|
30
32
|
result += content
|
31
33
|
previous = child
|
32
34
|
end
|
35
|
+
|
36
|
+
# Remove double newlines for every line
|
37
|
+
result = result.gsub(/\n\n+/, "\n\n")
|
38
|
+
result = result.squeeze(' ')
|
39
|
+
|
33
40
|
result.strip
|
34
41
|
end
|
35
42
|
|
@@ -50,8 +57,9 @@ module Stepmod
|
|
50
57
|
return unless can_transform_to_alt?(first_child_tag)
|
51
58
|
|
52
59
|
result = Stepmod::Utils::Converters::Synonym
|
53
|
-
|
54
|
-
|
60
|
+
.new
|
61
|
+
.convert(first_child_tag)
|
62
|
+
|
55
63
|
first_child_tag.remove
|
56
64
|
"#{result}\n\n"
|
57
65
|
end
|
@@ -5,10 +5,21 @@ module Stepmod
|
|
5
5
|
module Converters
|
6
6
|
class Example < ReverseAdoc::Converters::Base
|
7
7
|
def convert(node, state = {})
|
8
|
-
|
8
|
+
|
9
|
+
# If we want to skip this node
|
10
|
+
return '' if state[:no_notes_examples]
|
11
|
+
|
12
|
+
<<~TEMPLATE
|
13
|
+
|
14
|
+
[example]
|
15
|
+
====
|
16
|
+
#{treat_children(node, state).strip}
|
17
|
+
====
|
18
|
+
|
19
|
+
TEMPLATE
|
9
20
|
end
|
10
21
|
end
|
11
22
|
ReverseAdoc::Converters.register :example, Example.new
|
12
23
|
end
|
13
24
|
end
|
14
|
-
end
|
25
|
+
end
|
@@ -4,9 +4,11 @@ module Stepmod
|
|
4
4
|
class ExtDescription < ReverseAdoc::Converters::Base
|
5
5
|
def convert(node, state = {})
|
6
6
|
state = state.merge(schema_name: node['linkend'])
|
7
|
+
child_text = treat_children(node, state).strip
|
8
|
+
|
7
9
|
<<~TEMPLATE
|
8
10
|
(*"#{node['linkend']}"
|
9
|
-
#{
|
11
|
+
#{child_text}
|
10
12
|
*)
|
11
13
|
TEMPLATE
|
12
14
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'reverse_adoc/converters/figure'
|
3
|
+
|
4
|
+
module Stepmod
|
5
|
+
module Utils
|
6
|
+
module Converters
|
7
|
+
class Figure < ReverseAdoc::Converters::Figure
|
8
|
+
def convert(node, state = {})
|
9
|
+
# If we want to skip this node
|
10
|
+
return '' if state[:no_notes_examples]
|
11
|
+
|
12
|
+
super
|
13
|
+
end
|
14
|
+
end
|
15
|
+
# This replaces the converter
|
16
|
+
ReverseAdoc::Converters.register :figure, Figure.new
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -13,10 +13,14 @@ module Stepmod
|
|
13
13
|
# We take the text value of the element and convert to this:
|
14
14
|
|
15
15
|
# term:[individual products]
|
16
|
-
|
17
|
-
|
16
|
+
|
17
|
+
ref = node.text.strip
|
18
|
+
if !ref.empty?
|
19
|
+
" term:[#{normalized_ref(ref)}] "
|
20
|
+
elsif
|
21
|
+
ref = node['linkend'].split(':').first
|
22
|
+
" *#{ref}*"
|
18
23
|
end
|
19
|
-
" term:[#{normalized_ref(ref)}] "
|
20
24
|
end
|
21
25
|
|
22
26
|
private
|
@@ -5,10 +5,21 @@ module Stepmod
|
|
5
5
|
module Converters
|
6
6
|
class Note < ReverseAdoc::Converters::Base
|
7
7
|
def convert(node, state = {})
|
8
|
-
|
8
|
+
|
9
|
+
# If we want to skip this node
|
10
|
+
return '' if state[:no_notes_examples]
|
11
|
+
|
12
|
+
<<~TEMPLATE
|
13
|
+
|
14
|
+
[NOTE]
|
15
|
+
--
|
16
|
+
#{treat_children(node, state).strip}
|
17
|
+
--
|
18
|
+
|
19
|
+
TEMPLATE
|
9
20
|
end
|
10
21
|
end
|
11
22
|
ReverseAdoc::Converters.register :note, Note.new
|
12
23
|
end
|
13
24
|
end
|
14
|
-
end
|
25
|
+
end
|
@@ -6,13 +6,35 @@ module Stepmod
|
|
6
6
|
state = state.merge(schema_name: node['linkend'])
|
7
7
|
linkend = node['linkend'].split('.')
|
8
8
|
|
9
|
-
# We
|
10
|
-
|
9
|
+
# We only want ENTITY entries, not their attributes
|
10
|
+
# https://github.com/metanorma/iso-10303-2/issues/36#issuecomment-841300092
|
11
|
+
return nil if linkend.length != 2
|
12
|
+
|
13
|
+
child_text = treat_children(node, state).strip
|
14
|
+
return nil if child_text.empty?
|
15
|
+
|
16
|
+
# Only taking the first paragraph of the definition
|
17
|
+
child_text = child_text.split("\n").first
|
18
|
+
|
19
|
+
# # Only taking the first sentence
|
20
|
+
# if child_text.contains?(".")
|
21
|
+
# child_text = child_text.split(".").first
|
22
|
+
# end
|
23
|
+
|
24
|
+
domain = case linkend.first
|
25
|
+
when /_mim$/, /_arm$/
|
26
|
+
"STEP module"
|
27
|
+
# when /_schema$/
|
28
|
+
else
|
29
|
+
"STEP resource"
|
30
|
+
end
|
11
31
|
|
12
32
|
<<~TEMPLATE
|
13
|
-
=== #{
|
33
|
+
=== #{linkend.last}
|
34
|
+
|
35
|
+
#{domain ? "domain:[" + domain + "]" : ""}
|
14
36
|
|
15
|
-
|
37
|
+
#{child_text}
|
16
38
|
TEMPLATE
|
17
39
|
end
|
18
40
|
end
|
@@ -30,8 +30,8 @@ module Stepmod
|
|
30
30
|
def treat_text(node)
|
31
31
|
text = node.text
|
32
32
|
text = preserve_nbsp(text)
|
33
|
-
# text = remove_border_newlines(text)
|
34
33
|
text = remove_inner_newlines(text)
|
34
|
+
text = remove_border_newlines(text)
|
35
35
|
|
36
36
|
text = preserve_keychars_within_backticks(text)
|
37
37
|
text = preserve_tags(text)
|
@@ -7,15 +7,20 @@ module Stepmod
|
|
7
7
|
module Converters
|
8
8
|
class Uof < ReverseAdoc::Converters::Base
|
9
9
|
def convert(node, state = {})
|
10
|
-
<<~TEXT
|
11
|
-
=== #{node['name'].strip}
|
12
10
|
|
13
|
-
<
|
14
|
-
|
11
|
+
# WARNING: <uof> tag content is deprecated
|
12
|
+
return ""
|
13
|
+
|
14
|
+
#
|
15
|
+
# <<~TEXT
|
16
|
+
# === #{node['name'].strip}
|
17
|
+
|
18
|
+
# <STEP module> #{treat_children(node, state).strip}
|
19
|
+
# TEXT
|
15
20
|
end
|
16
21
|
end
|
17
22
|
|
18
23
|
ReverseAdoc::Converters.register :uof, Uof.new
|
19
24
|
end
|
20
25
|
end
|
21
|
-
end
|
26
|
+
end
|
@@ -16,6 +16,7 @@ require 'stepmod/utils/converters/stepmod_ext_description'
|
|
16
16
|
require 'stepmod/utils/converters/term'
|
17
17
|
require 'stepmod/utils/converters/synonym'
|
18
18
|
require 'stepmod/utils/converters/uof'
|
19
|
+
require 'stepmod/utils/converters/figure'
|
19
20
|
|
20
21
|
require 'reverse_adoc/converters/a'
|
21
22
|
require 'reverse_adoc/converters/blockquote'
|
@@ -52,7 +53,7 @@ module Stepmod
|
|
52
53
|
return '' unless root
|
53
54
|
|
54
55
|
ReverseAdoc.config.with(options) do
|
55
|
-
result = ReverseAdoc::Converters.lookup(root.name).convert(root)
|
56
|
+
result = ReverseAdoc::Converters.lookup(root.name).convert(root, options)
|
56
57
|
return '' unless result
|
57
58
|
ReverseAdoc.cleaner.tidy(result.dup)
|
58
59
|
end
|
@@ -0,0 +1,326 @@
|
|
1
|
+
require 'stepmod/utils/stepmod_definition_converter'
|
2
|
+
require 'stepmod/utils/bibdata'
|
3
|
+
require 'stepmod/utils/concept'
|
4
|
+
|
5
|
+
ReverseAdoc.config.unknown_tags = :bypass
|
6
|
+
|
7
|
+
module Stepmod
|
8
|
+
module Utils
|
9
|
+
class TermsExtractor
|
10
|
+
# TODO: we may want a command line option to override this in the future
|
11
|
+
ACCEPTED_STAGES = %w(IS DIS FDIS TS)
|
12
|
+
|
13
|
+
attr_reader :stepmod_path,
|
14
|
+
:stepmod_dir,
|
15
|
+
:general_concepts,
|
16
|
+
:resource_concepts,
|
17
|
+
:parsed_bibliography,
|
18
|
+
:encountered_terms,
|
19
|
+
:cvs_mode,
|
20
|
+
:part_concepts,
|
21
|
+
:part_resources,
|
22
|
+
:part_modules,
|
23
|
+
:stdout
|
24
|
+
|
25
|
+
def self.call(stepmod_dir, stdout = STDOUT)
|
26
|
+
new(stepmod_dir, stdout).call
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(stepmod_dir, stdout)
|
30
|
+
@stdout = stdout
|
31
|
+
@stepmod_dir = stepmod_dir
|
32
|
+
@stepmod_path = Pathname.new(stepmod_dir).realpath
|
33
|
+
@general_concepts = []
|
34
|
+
@resource_concepts = []
|
35
|
+
@parsed_bibliography = []
|
36
|
+
@part_concepts = []
|
37
|
+
@part_resources = []
|
38
|
+
@part_modules = []
|
39
|
+
@encountered_terms = {}
|
40
|
+
end
|
41
|
+
|
42
|
+
def log message
|
43
|
+
stdout.puts "[stepmod-utils] #{message}"
|
44
|
+
end
|
45
|
+
|
46
|
+
def term_special_category(bibdata)
|
47
|
+
case bibdata.part.to_i
|
48
|
+
when 41,42,43,44,45,46,47,51
|
49
|
+
true
|
50
|
+
when [56..112]
|
51
|
+
true
|
52
|
+
else
|
53
|
+
false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def call
|
58
|
+
# If we are using the stepmod CVS repository, provide the revision number per file
|
59
|
+
@cvs_mode = if Dir.exists?(stepmod_path.join('CVS'))
|
60
|
+
require 'ptools'
|
61
|
+
# ptools provides File.which
|
62
|
+
File.which("cvs")
|
63
|
+
end
|
64
|
+
|
65
|
+
log "INFO: STEPmod directory set to #{stepmod_dir}."
|
66
|
+
|
67
|
+
if cvs_mode
|
68
|
+
log "INFO: STEPmod directory is a CVS repository and will detect revisions."
|
69
|
+
log "INFO: [CVS] Detecting file revisions can be slow, please be patient!"
|
70
|
+
else
|
71
|
+
log "INFO: STEPmod directory is not a CVS repository, skipping revision detection."
|
72
|
+
end
|
73
|
+
|
74
|
+
log "INFO: Detecting paths..."
|
75
|
+
|
76
|
+
repo_index = Nokogiri::XML(File.read(stepmod_path.join('repository_index.xml'))).root
|
77
|
+
|
78
|
+
files = []
|
79
|
+
|
80
|
+
# add module paths
|
81
|
+
repo_index.xpath('//module').each do |x|
|
82
|
+
path = Pathname.new("#{stepmod_dir}/modules/#{x['name']}/module.xml")
|
83
|
+
files << path if File.exists? path
|
84
|
+
end
|
85
|
+
|
86
|
+
# add resource_docs paths
|
87
|
+
repo_index.xpath('//resource_doc').each do |x|
|
88
|
+
path = Pathname.new("#{stepmod_dir}/resource_docs/#{x['name']}/resource.xml")
|
89
|
+
files << path if File.exists? path
|
90
|
+
end
|
91
|
+
|
92
|
+
# add business_object_models paths
|
93
|
+
repo_index.xpath('//business_object_model').each do |x|
|
94
|
+
path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/business_object_model.xml")
|
95
|
+
files << path if File.exists? path
|
96
|
+
end
|
97
|
+
|
98
|
+
# add application_protocols paths
|
99
|
+
repo_index.xpath('//application_protocol').each do |x|
|
100
|
+
path = Pathname.new("#{stepmod_dir}/application_protocols/#{x['name']}/application_protocol.xml")
|
101
|
+
files << path if File.exists? path
|
102
|
+
end
|
103
|
+
|
104
|
+
files.sort!.uniq!
|
105
|
+
process_term_files(files)
|
106
|
+
|
107
|
+
[
|
108
|
+
general_concepts,
|
109
|
+
resource_concepts,
|
110
|
+
parsed_bibliography,
|
111
|
+
part_concepts,
|
112
|
+
part_resources,
|
113
|
+
part_modules
|
114
|
+
]
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
def process_term_files(files)
|
120
|
+
parsed_schema_names = {}
|
121
|
+
files.each do |file_path|
|
122
|
+
file_path = file_path.realpath
|
123
|
+
fpath = file_path.relative_path_from(stepmod_path)
|
124
|
+
|
125
|
+
log "INFO: Processing XML file #{fpath}"
|
126
|
+
current_document = Nokogiri::XML(File.read(file_path)).root
|
127
|
+
|
128
|
+
bibdata = nil
|
129
|
+
begin
|
130
|
+
bibdata = Stepmod::Utils::Bibdata.new(document: current_document)
|
131
|
+
rescue
|
132
|
+
log "WARNING: Unknown file #{fpath}, skipped"
|
133
|
+
next
|
134
|
+
end
|
135
|
+
|
136
|
+
unless ACCEPTED_STAGES.include? bibdata.doctype
|
137
|
+
log "INFO: skipped #{bibdata.docid} as it is not one of (#{ACCEPTED_STAGES.join(", ")})."
|
138
|
+
next
|
139
|
+
end
|
140
|
+
|
141
|
+
if bibdata.part.to_s.empty?
|
142
|
+
log "FATAL: missing `part` attribute: #{fpath}"
|
143
|
+
log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute."
|
144
|
+
next
|
145
|
+
end
|
146
|
+
|
147
|
+
revision_string = "\n// CVS: revision not detected"
|
148
|
+
if cvs_mode
|
149
|
+
# Run `cvs status` to find out version
|
150
|
+
|
151
|
+
log "INFO: Detecting CVS revision..."
|
152
|
+
Dir.chdir(stepmod_path) do
|
153
|
+
status = `cvs status #{fpath}`
|
154
|
+
|
155
|
+
unless status.empty?
|
156
|
+
working_rev = status.split(/\n/).grep(/Working revision:/).first.match(/revision:\s+(.+)$/)[1]
|
157
|
+
repo_rev = status.split(/\n/).grep(/Repository revision:/).first.match(/revision:\t(.+)\t/)[1]
|
158
|
+
log "INFO: CVS working rev (#{working_rev}), repo rev (#{repo_rev})"
|
159
|
+
revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" +
|
160
|
+
"// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# read definitions
|
166
|
+
current_part_concepts = []
|
167
|
+
definition_index = 0
|
168
|
+
current_document.xpath('//definition').each do |definition|
|
169
|
+
definition_index += 1
|
170
|
+
term_id = definition['id']
|
171
|
+
unless term_id.nil?
|
172
|
+
if encountered_terms[term_id]
|
173
|
+
log "FATAL: Duplicated term with id: #{term_id}, #{fpath}"
|
174
|
+
end
|
175
|
+
encountered_terms[term_id] = true
|
176
|
+
end
|
177
|
+
|
178
|
+
# Assume that definition is located in clause 3 of the ISO document
|
179
|
+
# in order. We really don't have a good reference here.
|
180
|
+
ref_clause = "3.#{definition_index}"
|
181
|
+
|
182
|
+
concept = Stepmod::Utils::Concept.parse(
|
183
|
+
definition,
|
184
|
+
reference_anchor: bibdata.anchor,
|
185
|
+
reference_clause: ref_clause,
|
186
|
+
file_path: fpath + revision_string
|
187
|
+
)
|
188
|
+
next unless concept
|
189
|
+
|
190
|
+
unless term_special_category(bibdata)
|
191
|
+
# log "INFO: this part is generic"
|
192
|
+
general_concepts << concept
|
193
|
+
else
|
194
|
+
# log "INFO: this part is special"
|
195
|
+
current_part_concepts << concept
|
196
|
+
end
|
197
|
+
|
198
|
+
parsed_bibliography << bibdata
|
199
|
+
end
|
200
|
+
|
201
|
+
current_part_resources = []
|
202
|
+
current_part_modules_arm = {}
|
203
|
+
current_part_modules_mim = {}
|
204
|
+
|
205
|
+
log "INFO: FILE PATH IS #{file_path}"
|
206
|
+
case file_path.to_s
|
207
|
+
when /resource.xml$/
|
208
|
+
log "INFO: Processing resource.xml for #{file_path}"
|
209
|
+
# Assumption: every schema is only linked by a single resource_docs document.
|
210
|
+
current_document.xpath('//schema').each do |schema_node|
|
211
|
+
schema_name = schema_node['name']
|
212
|
+
if parsed_schema_names[schema_name]
|
213
|
+
log "ERROR: We have encountered this schema before: #{schema_name} from path #{parsed_schema_names[schema_name]}, now at #{file_path}"
|
214
|
+
next
|
215
|
+
else
|
216
|
+
parsed_schema_names[schema_name] = file_path
|
217
|
+
end
|
218
|
+
|
219
|
+
Dir["#{stepmod_path}/resources/#{schema_name}/descriptions.xml"].each do |description_xml_path|
|
220
|
+
log "INFO: Processing resources schema #{description_xml_path}"
|
221
|
+
description_document = Nokogiri::XML(File.read(description_xml_path)).root
|
222
|
+
description_document.xpath('//ext_description').each do |ext_description|
|
223
|
+
|
224
|
+
# log "INFO: Processing linkend[#{ext_description['linkend']}]"
|
225
|
+
|
226
|
+
concept = Stepmod::Utils::Concept.parse(
|
227
|
+
ext_description,
|
228
|
+
reference_anchor: bibdata.anchor,
|
229
|
+
reference_clause: nil,
|
230
|
+
file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
|
231
|
+
)
|
232
|
+
next unless concept
|
233
|
+
|
234
|
+
unless term_special_category(bibdata)
|
235
|
+
# log "INFO: this part is generic"
|
236
|
+
resource_concepts << concept
|
237
|
+
else
|
238
|
+
# log "INFO: this part is special"
|
239
|
+
current_part_resources << concept
|
240
|
+
end
|
241
|
+
|
242
|
+
parsed_bibliography << bibdata
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
when /module.xml$/
|
248
|
+
log "INFO: Processing module.xml for #{file_path}"
|
249
|
+
# Assumption: every schema is only linked by a single module document.
|
250
|
+
# puts current_document.xpath('//module').length
|
251
|
+
schema_name = current_document.xpath('//module').first['name']
|
252
|
+
if parsed_schema_names[schema_name]
|
253
|
+
log "ERROR: We have encountered this schema before: #{schema_name} from path #{parsed_schema_names[schema_name]}, now at #{file_path}"
|
254
|
+
next
|
255
|
+
else
|
256
|
+
parsed_schema_names[schema_name] = file_path
|
257
|
+
end
|
258
|
+
|
259
|
+
description_xml_path = "#{stepmod_path}/modules/#{schema_name}/arm_descriptions.xml"
|
260
|
+
log "INFO: Processing modules schema #{description_xml_path}"
|
261
|
+
|
262
|
+
if File.exists?(description_xml_path)
|
263
|
+
description_document = Nokogiri::XML(File.read(description_xml_path)).root
|
264
|
+
description_document.xpath('//ext_description').each do |ext_description|
|
265
|
+
|
266
|
+
linkend_schema = ext_description['linkend'].split('.').first
|
267
|
+
concept = Stepmod::Utils::Concept.parse(
|
268
|
+
ext_description,
|
269
|
+
reference_anchor: bibdata.anchor,
|
270
|
+
reference_clause: nil,
|
271
|
+
file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
|
272
|
+
)
|
273
|
+
next unless concept
|
274
|
+
|
275
|
+
current_part_modules_arm[linkend_schema] ||= []
|
276
|
+
current_part_modules_arm[linkend_schema] << concept
|
277
|
+
# puts part_modules_arm.inspect
|
278
|
+
parsed_bibliography << bibdata
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
description_xml_path = "#{stepmod_path}/modules/#{schema_name}/mim_descriptions.xml"
|
283
|
+
log "INFO: Processing modules schema #{description_xml_path}"
|
284
|
+
|
285
|
+
if File.exists?(description_xml_path)
|
286
|
+
description_document = Nokogiri::XML(File.read(description_xml_path)).root
|
287
|
+
description_document.xpath('//ext_description').each do |ext_description|
|
288
|
+
|
289
|
+
linkend_schema = ext_description['linkend'].split('.').first
|
290
|
+
|
291
|
+
concept = Stepmod::Utils::Concept.parse(
|
292
|
+
ext_description,
|
293
|
+
reference_anchor: bibdata.anchor,
|
294
|
+
reference_clause: nil,
|
295
|
+
file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
|
296
|
+
)
|
297
|
+
next unless concept
|
298
|
+
|
299
|
+
current_part_modules_mim[linkend_schema] ||= []
|
300
|
+
current_part_modules_mim[linkend_schema] << concept
|
301
|
+
|
302
|
+
parsed_bibliography << bibdata
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
end
|
307
|
+
|
308
|
+
log "INFO: Completed processing XML file #{fpath}"
|
309
|
+
if current_part_concepts.empty?
|
310
|
+
log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it contains no concepts."
|
311
|
+
elsif current_part_concepts.length < 3
|
312
|
+
log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it only has #{current_part_concepts.length} terms."
|
313
|
+
|
314
|
+
current_part_concepts.each do |x|
|
315
|
+
general_concepts << x
|
316
|
+
end
|
317
|
+
else
|
318
|
+
part_concepts << [bibdata, current_part_concepts] unless current_part_concepts.empty?
|
319
|
+
end
|
320
|
+
part_resources << [bibdata, current_part_resources] unless current_part_resources.empty?
|
321
|
+
part_modules << [bibdata, current_part_modules_arm, current_part_modules_mim] if current_part_modules_arm.size + current_part_modules_mim.size > 0
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
325
|
+
end
|
326
|
+
end
|
data/stepmod-utils.gemspec
CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
|
32
32
|
spec.add_runtime_dependency "thor", ">= 0.20.3"
|
33
33
|
spec.add_runtime_dependency "reverse_adoc", ">= 0.2.9"
|
34
|
-
spec.add_runtime_dependency "ptools", '~> 1.3'
|
35
34
|
spec.add_runtime_dependency "concurrent-ruby"
|
35
|
+
spec.add_runtime_dependency "ptools"
|
36
36
|
spec.add_development_dependency "byebug", "~> 11.1"
|
37
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stepmod-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-05-
|
11
|
+
date: 2021-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -39,21 +39,21 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 0.2.9
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: concurrent-ruby
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: ptools
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
@@ -139,6 +139,7 @@ files:
|
|
139
139
|
- lib/stepmod/utils/converters/express_ref_express_description.rb
|
140
140
|
- lib/stepmod/utils/converters/ext_description.rb
|
141
141
|
- lib/stepmod/utils/converters/ext_descriptions.rb
|
142
|
+
- lib/stepmod/utils/converters/figure.rb
|
142
143
|
- lib/stepmod/utils/converters/fund_cons.rb
|
143
144
|
- lib/stepmod/utils/converters/head.rb
|
144
145
|
- lib/stepmod/utils/converters/hr.rb
|
@@ -168,6 +169,7 @@ files:
|
|
168
169
|
- lib/stepmod/utils/smrl_resource_converter.rb
|
169
170
|
- lib/stepmod/utils/stepmod_definition_converter.rb
|
170
171
|
- lib/stepmod/utils/stepmod_file_annotator.rb
|
172
|
+
- lib/stepmod/utils/terms_extractor.rb
|
171
173
|
- lib/stepmod/utils/version.rb
|
172
174
|
- migrating_from_cvs.adoc
|
173
175
|
- resource_example.xml
|
@@ -176,7 +178,7 @@ homepage: https://github.com/metanorma/stepmod-utils
|
|
176
178
|
licenses:
|
177
179
|
- BSD-2-Clause
|
178
180
|
metadata: {}
|
179
|
-
post_install_message:
|
181
|
+
post_install_message:
|
180
182
|
rdoc_options: []
|
181
183
|
require_paths:
|
182
184
|
- lib
|
@@ -191,8 +193,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
191
193
|
- !ruby/object:Gem::Version
|
192
194
|
version: '0'
|
193
195
|
requirements: []
|
194
|
-
rubygems_version: 3.0.3
|
195
|
-
signing_key:
|
196
|
+
rubygems_version: 3.0.3.1
|
197
|
+
signing_key:
|
196
198
|
specification_version: 4
|
197
199
|
summary: Stepmod-utils is a toolkit that works on STEPmod data.
|
198
200
|
test_files: []
|