stepmod-utils 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/stepmod-extract-terms +46 -226
- data/lib/stepmod/utils/concept.rb +11 -5
- data/lib/stepmod/utils/converters/def.rb +14 -6
- data/lib/stepmod/utils/converters/example.rb +13 -2
- data/lib/stepmod/utils/converters/ext_description.rb +3 -1
- data/lib/stepmod/utils/converters/figure.rb +20 -0
- data/lib/stepmod/utils/converters/module_ref.rb +7 -3
- data/lib/stepmod/utils/converters/note.rb +13 -2
- data/lib/stepmod/utils/converters/stepmod_ext_description.rb +26 -4
- data/lib/stepmod/utils/converters/text.rb +1 -1
- data/lib/stepmod/utils/converters/uof.rb +10 -5
- data/lib/stepmod/utils/smrl_resource_converter.rb +1 -1
- data/lib/stepmod/utils/stepmod_definition_converter.rb +2 -1
- data/lib/stepmod/utils/terms_extractor.rb +326 -0
- data/lib/stepmod/utils/version.rb +1 -1
- data/stepmod-utils.gemspec +1 -1
- metadata +14 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 887612180cfe004fb244d41e7c553858ed441d5e4c60c8d27d06fb7ec3219cd2
|
4
|
+
data.tar.gz: f7f25caa5652593b6eafcb792206614f2b053e56a0dd05801018e2217533b636
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 41057b14b1c6ab326dff8e402be3a74737e9479212a76577a9ef81738da9f542ab6cbd892723949b004a2a6ff6058525aed5de47b1fbe3859ac139a724028608
|
7
|
+
data.tar.gz: 519d9daa5451c17f60e6e2e764405908bf7e3c9f6b7279dcef9ee2bb554c63a0535b1e63c3aaed178549413a2253e24a3687c86d1ad60c8fa569c65aed756647
|
data/exe/stepmod-extract-terms
CHANGED
@@ -17,38 +17,16 @@ class Gem::Specification
|
|
17
17
|
end
|
18
18
|
|
19
19
|
require 'bundler/setup'
|
20
|
-
require 'stepmod/utils/
|
21
|
-
require 'stepmod/utils/bibdata'
|
22
|
-
require 'stepmod/utils/concept'
|
23
|
-
require 'ptools'
|
24
|
-
|
25
|
-
ReverseAdoc.config.unknown_tags = :bypass
|
26
|
-
|
27
|
-
# TODO: we may want a command line option to override this in the future
|
28
|
-
ACCEPTED_STAGES = %w(IS DIS FDIS TS)
|
29
|
-
|
30
|
-
general_concepts = []
|
31
|
-
resource_concepts = []
|
32
|
-
module_concepts = []
|
33
|
-
parsed_bibliography = []
|
34
|
-
encountered_terms = {}
|
20
|
+
require 'stepmod/utils/terms_extractor'
|
35
21
|
|
36
22
|
stepmod_dir = ARGV.first || Dir.pwd
|
37
23
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
when 41,42,43,44,45,46,47,51
|
45
|
-
true
|
46
|
-
when [56..112]
|
47
|
-
true
|
48
|
-
else
|
49
|
-
false
|
50
|
-
end
|
51
|
-
end
|
24
|
+
general_concepts,
|
25
|
+
resource_concepts,
|
26
|
+
parsed_bibliography,
|
27
|
+
part_concepts,
|
28
|
+
part_resources,
|
29
|
+
part_modules = Stepmod::Utils::TermsExtractor.call(stepmod_dir)
|
52
30
|
|
53
31
|
def part_to_title(bibdata)
|
54
32
|
case bibdata.part.to_i
|
@@ -73,227 +51,69 @@ def part_to_title(bibdata)
|
|
73
51
|
end
|
74
52
|
end
|
75
53
|
|
76
|
-
|
77
|
-
|
78
|
-
# If we are using the stepmod CVS repository, provide the revision number per file
|
79
|
-
has_cvs = File.which("cvs")
|
80
|
-
cvs_mode = has_cvs && Dir.exists?(stepmod_path.join('CVS'))
|
81
|
-
|
82
|
-
log "INFO: STEPmod directory set to #{stepmod_dir}."
|
83
|
-
|
84
|
-
if cvs_mode
|
85
|
-
log "INFO: STEPmod directory is a CVS repository and will detect revisions."
|
86
|
-
log "INFO: [CVS] Detecting file revisions can be slow, please be patient!"
|
87
|
-
else
|
88
|
-
log "INFO: STEPmod directory is not a CVS repository, skipping revision detection."
|
54
|
+
def log message
|
55
|
+
puts "[stepmod-utils] #{message}"
|
89
56
|
end
|
90
57
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
candidate_paths = Dir["#{stepmod_dir}/**/#{t}"]
|
101
|
-
acc << candidate_paths
|
102
|
-
|
103
|
-
end.flatten.sort.uniq
|
104
|
-
|
105
|
-
max_encountered_refs_indexes = {}
|
106
|
-
|
107
|
-
files.each do |file_path|
|
108
|
-
file_path = Pathname.new(file_path).realpath
|
109
|
-
fpath = file_path.relative_path_from(stepmod_path)
|
110
|
-
|
111
|
-
log "INFO: Processing XML file #{fpath}"
|
112
|
-
current_document = Nokogiri::XML(File.read(file_path)).root
|
113
|
-
|
114
|
-
bibdata = nil
|
115
|
-
begin
|
116
|
-
bibdata = Stepmod::Utils::Bibdata.new(document: current_document)
|
117
|
-
rescue
|
118
|
-
log "WARNING: Unknown file #{fpath}, skipped"
|
119
|
-
next
|
120
|
-
end
|
121
|
-
|
122
|
-
unless ACCEPTED_STAGES.include? bibdata.doctype
|
123
|
-
log "INFO: skipped #{bibdata.docid} as it is not one of (#{ACCEPTED_STAGES.join(", ")})."
|
124
|
-
next
|
125
|
-
end
|
126
|
-
|
127
|
-
if bibdata.part.to_s.empty?
|
128
|
-
log "FATAL: missing `part` attribute: #{fpath}"
|
129
|
-
log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute."
|
130
|
-
next
|
131
|
-
end
|
132
|
-
|
133
|
-
revision_string = "\n// CVS: revision not detected"
|
134
|
-
if cvs_mode
|
135
|
-
# Run `cvs status` to find out version
|
136
|
-
|
137
|
-
log "INFO: Detecting CVS revision..."
|
138
|
-
Dir.chdir(stepmod_path) do
|
139
|
-
status = `cvs status #{fpath}`
|
140
|
-
|
141
|
-
unless status.empty?
|
142
|
-
working_rev = status.split(/\n/).grep(/Working revision:/).first.match(/revision:\s+(.+)$/)[1]
|
143
|
-
repo_rev = status.split(/\n/).grep(/Repository revision:/).first.match(/revision:\t(.+)\t/)[1]
|
144
|
-
log "INFO: CVS working rev (#{working_rev}), repo rev (#{repo_rev})"
|
145
|
-
revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" +
|
146
|
-
"// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}"
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
# read definitions
|
152
|
-
part_concepts = []
|
153
|
-
current_document.xpath('//definition').each do |definition|
|
154
|
-
index = max_encountered_refs_indexes[bibdata.anchor] || 1
|
155
|
-
term_id = definition['id']
|
156
|
-
unless term_id.nil?
|
157
|
-
if encountered_terms[term_id]
|
158
|
-
log "FATAL: Duplicated term with id: #{term_id}, #{fpath}"
|
159
|
-
end
|
160
|
-
encountered_terms[term_id] = true
|
161
|
-
end
|
162
|
-
|
163
|
-
# Assume that definition is located in clause 3 of the ISO document
|
164
|
-
# in order. We really don't have a good reference here.
|
165
|
-
ref_clause = "3.#{index}"
|
166
|
-
|
167
|
-
concept = Stepmod::Utils::Concept.parse(
|
168
|
-
definition,
|
169
|
-
reference_anchor: bibdata.anchor,
|
170
|
-
reference_clause: ref_clause,
|
171
|
-
file_path: fpath + revision_string
|
172
|
-
)
|
173
|
-
next unless concept
|
174
|
-
|
175
|
-
unless term_special_category(bibdata)
|
176
|
-
# log "INFO: this part is generic"
|
177
|
-
general_concepts << concept
|
178
|
-
else
|
179
|
-
# log "INFO: this part is special"
|
180
|
-
part_concepts << concept
|
181
|
-
end
|
182
|
-
|
183
|
-
max_encountered_refs_indexes[bibdata.anchor] = index + 1
|
184
|
-
parsed_bibliography << bibdata
|
185
|
-
end
|
186
|
-
|
187
|
-
part_modules = []
|
188
|
-
current_document.xpath('//arm/uof').each do |uof_node|
|
189
|
-
concept = Stepmod::Utils::Concept.parse(
|
190
|
-
uof_node,
|
191
|
-
reference_anchor: bibdata.anchor,
|
192
|
-
reference_clause: nil,
|
193
|
-
file_path: fpath + revision_string
|
194
|
-
)
|
195
|
-
# puts concept.inspect
|
196
|
-
|
197
|
-
next unless concept
|
198
|
-
|
199
|
-
unless term_special_category(bibdata)
|
200
|
-
# log "INFO: this part is generic"
|
201
|
-
module_concepts << concept
|
202
|
-
else
|
203
|
-
# log "INFO: this part is special"
|
204
|
-
part_modules << concept
|
205
|
-
end
|
206
|
-
|
207
|
-
parsed_bibliography << bibdata
|
208
|
-
end
|
58
|
+
part_concepts.each do |(bibdata, current_part_concepts)|
|
59
|
+
fn = "03x-stepmod-#{bibdata.part}.adoc"
|
60
|
+
File.open(fn, 'w') { |file|
|
61
|
+
file.puts("== #{part_to_title(bibdata)}\n\n")
|
62
|
+
file.puts(current_part_concepts.map(&:to_mn_adoc).join("\n"))
|
63
|
+
}
|
64
|
+
log "INFO: written to: #{fn}"
|
65
|
+
end
|
209
66
|
|
210
|
-
part_resources = []
|
211
|
-
# Assumption: every schema is only linked by a single resource_docs document.
|
212
|
-
current_document.xpath('//schema').each do |schema_node|
|
213
|
-
schema_name = schema_node['name']
|
214
67
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
68
|
+
part_resources.each do |(bibdata, current_part_resources)|
|
69
|
+
fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc"
|
70
|
+
File.open(fn, 'w') { |file|
|
71
|
+
file.puts("== #{part_to_title(bibdata)}\n\n")
|
72
|
+
file.puts(current_part_resources.map(&:to_mn_adoc).join("\n"))
|
73
|
+
}
|
74
|
+
log "INFO: written to: #{fn}"
|
75
|
+
end
|
219
76
|
|
220
|
-
concept = Stepmod::Utils::Concept.parse(
|
221
|
-
ext_description,
|
222
|
-
reference_anchor: bibdata.anchor,
|
223
|
-
reference_clause: nil,
|
224
|
-
file_path: fpath + revision_string
|
225
|
-
)
|
226
|
-
next unless concept
|
227
77
|
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
78
|
+
part_modules.each do |(bibdata, part_modules_arm, part_modules_mim)|
|
79
|
+
fn = "05x-stepmod-entities-modules-#{bibdata.part}.adoc"
|
80
|
+
File.open(fn, 'w') { |file|
|
81
|
+
file.puts("")
|
82
|
+
unless part_modules_arm.empty?
|
83
|
+
schema_name = part_modules_arm.first.first
|
84
|
+
concepts = part_modules_arm.first.last
|
235
85
|
|
236
|
-
|
237
|
-
|
86
|
+
# puts "SCHEMA NAME ARM: #{schema_name}"
|
87
|
+
file.puts("== #{schema_name}\n\n")
|
88
|
+
file.puts(concepts.map(&:to_mn_adoc).join("\n"))
|
238
89
|
end
|
239
|
-
end
|
240
90
|
|
241
|
-
|
91
|
+
file.puts("")
|
242
92
|
|
243
|
-
|
244
|
-
|
245
|
-
next
|
246
|
-
elsif part_concepts.length < 3
|
247
|
-
log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it only has #{part_concepts.length} terms."
|
93
|
+
unless part_modules_mim.empty?
|
94
|
+
schema_name = part_modules_mim.first.first
|
248
95
|
|
249
|
-
|
250
|
-
|
96
|
+
# puts "SCHEMA NAME MIM: #{schema_name}"
|
97
|
+
concepts = part_modules_mim.first.last
|
98
|
+
file.puts("== #{schema_name}\n\n")
|
99
|
+
file.puts(concepts.map(&:to_mn_adoc).join("\n"))
|
251
100
|
end
|
252
|
-
else
|
253
|
-
fn = "03x-stepmod-#{bibdata.part}.adoc"
|
254
|
-
File.open(fn, 'w') { |file|
|
255
|
-
file.puts("== #{part_to_title(bibdata)}\n\n")
|
256
|
-
file.puts(part_concepts.map(&:to_mn_adoc).join("\n"))
|
257
|
-
}
|
258
|
-
log "INFO: written to: #{fn}"
|
259
|
-
end
|
260
|
-
|
261
|
-
unless part_resources.empty?
|
262
|
-
fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc"
|
263
|
-
File.open(fn, 'w') { |file|
|
264
|
-
file.puts("== #{part_to_title(bibdata)}\n\n")
|
265
|
-
file.puts(part_resources.map(&:to_mn_adoc).join("\n"))
|
266
|
-
}
|
267
|
-
log "INFO: written to: #{fn}"
|
268
|
-
end
|
269
|
-
|
270
|
-
unless part_modules.empty?
|
271
|
-
fn = "04x-stepmod-entities-modules-#{bibdata.part}.adoc"
|
272
|
-
File.open(fn, 'w') { |file|
|
273
|
-
file.puts("== #{part_to_title(bibdata)}\n\n")
|
274
|
-
file.puts(part_modules.map(&:to_mn_adoc).join("\n"))
|
275
|
-
}
|
276
|
-
log "INFO: written to: #{fn}"
|
277
|
-
end
|
278
101
|
|
102
|
+
}
|
103
|
+
log "INFO: written to: #{fn}"
|
279
104
|
end
|
280
105
|
|
281
106
|
File.open('031-stepmod-general.adoc', 'w') { |file|
|
282
107
|
file.puts(general_concepts.map(&:to_mn_adoc).join("\n"))
|
283
108
|
}
|
109
|
+
log "INFO: written to: 031-stepmod-general.adoc"
|
284
110
|
|
285
111
|
File.open('041-stepmod-entities-resources.adoc', 'w') { |file|
|
286
112
|
file.puts(resource_concepts.map(&:to_mn_adoc).join("\n"))
|
287
113
|
}
|
288
|
-
|
289
|
-
File.open('051-stepmod-entities-modules.adoc', 'w') { |file|
|
290
|
-
file.puts(module_concepts.map(&:to_mn_adoc).join("\n"))
|
291
|
-
}
|
292
|
-
|
293
|
-
log "INFO: written to: 031-stepmod-general.adoc"
|
114
|
+
log "INFO: written to: 041-stepmod-entities-resources.adoc"
|
294
115
|
|
295
116
|
File.open('991-generated-bibliography.adoc', 'w') { |file|
|
296
117
|
file.puts(parsed_bibliography.map(&:to_mn_adoc).sort.uniq.join("\n"))
|
297
118
|
}
|
298
|
-
|
299
119
|
log "INFO: written to: 991-generated-bibliography.adoc"
|
@@ -21,14 +21,20 @@ module Stepmod
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def self.parse(definition_xml, reference_anchor:, reference_clause:, file_path:)
|
24
|
-
converted_definition = Stepmod::Utils::StepmodDefinitionConverter
|
25
|
-
|
24
|
+
converted_definition = Stepmod::Utils::StepmodDefinitionConverter.convert(
|
25
|
+
definition_xml,
|
26
|
+
{
|
27
|
+
# We don't want examples and notes
|
28
|
+
no_notes_examples: true,
|
29
|
+
reference_anchor: reference_anchor
|
30
|
+
}
|
31
|
+
)
|
26
32
|
|
27
33
|
return nil if converted_definition.nil? || converted_definition.strip.empty?
|
28
34
|
|
29
|
-
if definition_xml.name == '
|
35
|
+
if definition_xml.name == 'ext_description'
|
30
36
|
converted_definition = <<~TEXT
|
31
|
-
#{converted_definition
|
37
|
+
#{converted_definition}
|
32
38
|
|
33
39
|
NOTE: This term is incompletely defined in this document.
|
34
40
|
Reference <<#{reference_anchor}>> for the complete definition.
|
@@ -44,7 +50,7 @@ module Stepmod
|
|
44
50
|
|
45
51
|
def to_mn_adoc
|
46
52
|
<<~TEXT
|
47
|
-
// STEPmod path
|
53
|
+
// STEPmod path:#{!file_path.empty? ? " #{file_path}" : ""}
|
48
54
|
#{converted_definition}
|
49
55
|
|
50
56
|
[.source]
|
@@ -12,13 +12,15 @@ module Stepmod
|
|
12
12
|
|
13
13
|
def treat_children(node, state)
|
14
14
|
converted = node.children.each_with_object({}) do |child, res|
|
15
|
-
content = treat(child, state).strip
|
16
|
-
next if content.empty?
|
17
15
|
|
18
|
-
|
19
|
-
|
16
|
+
content = treat(child, state)
|
17
|
+
next if content.strip.empty?
|
18
|
+
|
19
|
+
res[child] = content
|
20
|
+
end
|
20
21
|
previous = nil
|
21
22
|
result = ''
|
23
|
+
|
22
24
|
converted.each.with_index do |(child, content), i|
|
23
25
|
if block_tag?(child, previous)
|
24
26
|
result += "\n\n"
|
@@ -30,6 +32,11 @@ module Stepmod
|
|
30
32
|
result += content
|
31
33
|
previous = child
|
32
34
|
end
|
35
|
+
|
36
|
+
# Remove double newlines for every line
|
37
|
+
result = result.gsub(/\n\n+/, "\n\n")
|
38
|
+
result = result.squeeze(' ')
|
39
|
+
|
33
40
|
result.strip
|
34
41
|
end
|
35
42
|
|
@@ -50,8 +57,9 @@ module Stepmod
|
|
50
57
|
return unless can_transform_to_alt?(first_child_tag)
|
51
58
|
|
52
59
|
result = Stepmod::Utils::Converters::Synonym
|
53
|
-
|
54
|
-
|
60
|
+
.new
|
61
|
+
.convert(first_child_tag)
|
62
|
+
|
55
63
|
first_child_tag.remove
|
56
64
|
"#{result}\n\n"
|
57
65
|
end
|
@@ -5,10 +5,21 @@ module Stepmod
|
|
5
5
|
module Converters
|
6
6
|
class Example < ReverseAdoc::Converters::Base
|
7
7
|
def convert(node, state = {})
|
8
|
-
|
8
|
+
|
9
|
+
# If we want to skip this node
|
10
|
+
return '' if state[:no_notes_examples]
|
11
|
+
|
12
|
+
<<~TEMPLATE
|
13
|
+
|
14
|
+
[example]
|
15
|
+
====
|
16
|
+
#{treat_children(node, state).strip}
|
17
|
+
====
|
18
|
+
|
19
|
+
TEMPLATE
|
9
20
|
end
|
10
21
|
end
|
11
22
|
ReverseAdoc::Converters.register :example, Example.new
|
12
23
|
end
|
13
24
|
end
|
14
|
-
end
|
25
|
+
end
|
@@ -4,9 +4,11 @@ module Stepmod
|
|
4
4
|
class ExtDescription < ReverseAdoc::Converters::Base
|
5
5
|
def convert(node, state = {})
|
6
6
|
state = state.merge(schema_name: node['linkend'])
|
7
|
+
child_text = treat_children(node, state).strip
|
8
|
+
|
7
9
|
<<~TEMPLATE
|
8
10
|
(*"#{node['linkend']}"
|
9
|
-
#{
|
11
|
+
#{child_text}
|
10
12
|
*)
|
11
13
|
TEMPLATE
|
12
14
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'reverse_adoc/converters/figure'
|
3
|
+
|
4
|
+
module Stepmod
|
5
|
+
module Utils
|
6
|
+
module Converters
|
7
|
+
class Figure < ReverseAdoc::Converters::Figure
|
8
|
+
def convert(node, state = {})
|
9
|
+
# If we want to skip this node
|
10
|
+
return '' if state[:no_notes_examples]
|
11
|
+
|
12
|
+
super
|
13
|
+
end
|
14
|
+
end
|
15
|
+
# This replaces the converter
|
16
|
+
ReverseAdoc::Converters.register :figure, Figure.new
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -13,10 +13,14 @@ module Stepmod
|
|
13
13
|
# We take the text value of the element and convert to this:
|
14
14
|
|
15
15
|
# term:[individual products]
|
16
|
-
|
17
|
-
|
16
|
+
|
17
|
+
ref = node.text.strip
|
18
|
+
if !ref.empty?
|
19
|
+
" term:[#{normalized_ref(ref)}] "
|
20
|
+
elsif
|
21
|
+
ref = node['linkend'].split(':').first
|
22
|
+
" *#{ref}*"
|
18
23
|
end
|
19
|
-
" term:[#{normalized_ref(ref)}] "
|
20
24
|
end
|
21
25
|
|
22
26
|
private
|
@@ -5,10 +5,21 @@ module Stepmod
|
|
5
5
|
module Converters
|
6
6
|
class Note < ReverseAdoc::Converters::Base
|
7
7
|
def convert(node, state = {})
|
8
|
-
|
8
|
+
|
9
|
+
# If we want to skip this node
|
10
|
+
return '' if state[:no_notes_examples]
|
11
|
+
|
12
|
+
<<~TEMPLATE
|
13
|
+
|
14
|
+
[NOTE]
|
15
|
+
--
|
16
|
+
#{treat_children(node, state).strip}
|
17
|
+
--
|
18
|
+
|
19
|
+
TEMPLATE
|
9
20
|
end
|
10
21
|
end
|
11
22
|
ReverseAdoc::Converters.register :note, Note.new
|
12
23
|
end
|
13
24
|
end
|
14
|
-
end
|
25
|
+
end
|
@@ -6,13 +6,35 @@ module Stepmod
|
|
6
6
|
state = state.merge(schema_name: node['linkend'])
|
7
7
|
linkend = node['linkend'].split('.')
|
8
8
|
|
9
|
-
# We
|
10
|
-
|
9
|
+
# We only want ENTITY entries, not their attributes
|
10
|
+
# https://github.com/metanorma/iso-10303-2/issues/36#issuecomment-841300092
|
11
|
+
return nil if linkend.length != 2
|
12
|
+
|
13
|
+
child_text = treat_children(node, state).strip
|
14
|
+
return nil if child_text.empty?
|
15
|
+
|
16
|
+
# Only taking the first paragraph of the definition
|
17
|
+
child_text = child_text.split("\n").first
|
18
|
+
|
19
|
+
# # Only taking the first sentence
|
20
|
+
# if child_text.contains?(".")
|
21
|
+
# child_text = child_text.split(".").first
|
22
|
+
# end
|
23
|
+
|
24
|
+
domain = case linkend.first
|
25
|
+
when /_mim$/, /_arm$/
|
26
|
+
"STEP module"
|
27
|
+
# when /_schema$/
|
28
|
+
else
|
29
|
+
"STEP resource"
|
30
|
+
end
|
11
31
|
|
12
32
|
<<~TEMPLATE
|
13
|
-
=== #{
|
33
|
+
=== #{linkend.last}
|
34
|
+
|
35
|
+
#{domain ? "domain:[" + domain + "]" : ""}
|
14
36
|
|
15
|
-
|
37
|
+
#{child_text}
|
16
38
|
TEMPLATE
|
17
39
|
end
|
18
40
|
end
|
@@ -30,8 +30,8 @@ module Stepmod
|
|
30
30
|
def treat_text(node)
|
31
31
|
text = node.text
|
32
32
|
text = preserve_nbsp(text)
|
33
|
-
# text = remove_border_newlines(text)
|
34
33
|
text = remove_inner_newlines(text)
|
34
|
+
text = remove_border_newlines(text)
|
35
35
|
|
36
36
|
text = preserve_keychars_within_backticks(text)
|
37
37
|
text = preserve_tags(text)
|
@@ -7,15 +7,20 @@ module Stepmod
|
|
7
7
|
module Converters
|
8
8
|
class Uof < ReverseAdoc::Converters::Base
|
9
9
|
def convert(node, state = {})
|
10
|
-
<<~TEXT
|
11
|
-
=== #{node['name'].strip}
|
12
10
|
|
13
|
-
<
|
14
|
-
|
11
|
+
# WARNING: <uof> tag content is deprecated
|
12
|
+
return ""
|
13
|
+
|
14
|
+
#
|
15
|
+
# <<~TEXT
|
16
|
+
# === #{node['name'].strip}
|
17
|
+
|
18
|
+
# <STEP module> #{treat_children(node, state).strip}
|
19
|
+
# TEXT
|
15
20
|
end
|
16
21
|
end
|
17
22
|
|
18
23
|
ReverseAdoc::Converters.register :uof, Uof.new
|
19
24
|
end
|
20
25
|
end
|
21
|
-
end
|
26
|
+
end
|
@@ -16,6 +16,7 @@ require 'stepmod/utils/converters/stepmod_ext_description'
|
|
16
16
|
require 'stepmod/utils/converters/term'
|
17
17
|
require 'stepmod/utils/converters/synonym'
|
18
18
|
require 'stepmod/utils/converters/uof'
|
19
|
+
require 'stepmod/utils/converters/figure'
|
19
20
|
|
20
21
|
require 'reverse_adoc/converters/a'
|
21
22
|
require 'reverse_adoc/converters/blockquote'
|
@@ -52,7 +53,7 @@ module Stepmod
|
|
52
53
|
return '' unless root
|
53
54
|
|
54
55
|
ReverseAdoc.config.with(options) do
|
55
|
-
result = ReverseAdoc::Converters.lookup(root.name).convert(root)
|
56
|
+
result = ReverseAdoc::Converters.lookup(root.name).convert(root, options)
|
56
57
|
return '' unless result
|
57
58
|
ReverseAdoc.cleaner.tidy(result.dup)
|
58
59
|
end
|
@@ -0,0 +1,326 @@
|
|
1
|
+
require 'stepmod/utils/stepmod_definition_converter'
|
2
|
+
require 'stepmod/utils/bibdata'
|
3
|
+
require 'stepmod/utils/concept'
|
4
|
+
|
5
|
+
ReverseAdoc.config.unknown_tags = :bypass
|
6
|
+
|
7
|
+
module Stepmod
|
8
|
+
module Utils
|
9
|
+
class TermsExtractor
|
10
|
+
# TODO: we may want a command line option to override this in the future
|
11
|
+
ACCEPTED_STAGES = %w(IS DIS FDIS TS)
|
12
|
+
|
13
|
+
attr_reader :stepmod_path,
|
14
|
+
:stepmod_dir,
|
15
|
+
:general_concepts,
|
16
|
+
:resource_concepts,
|
17
|
+
:parsed_bibliography,
|
18
|
+
:encountered_terms,
|
19
|
+
:cvs_mode,
|
20
|
+
:part_concepts,
|
21
|
+
:part_resources,
|
22
|
+
:part_modules,
|
23
|
+
:stdout
|
24
|
+
|
25
|
+
def self.call(stepmod_dir, stdout = STDOUT)
|
26
|
+
new(stepmod_dir, stdout).call
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(stepmod_dir, stdout)
|
30
|
+
@stdout = stdout
|
31
|
+
@stepmod_dir = stepmod_dir
|
32
|
+
@stepmod_path = Pathname.new(stepmod_dir).realpath
|
33
|
+
@general_concepts = []
|
34
|
+
@resource_concepts = []
|
35
|
+
@parsed_bibliography = []
|
36
|
+
@part_concepts = []
|
37
|
+
@part_resources = []
|
38
|
+
@part_modules = []
|
39
|
+
@encountered_terms = {}
|
40
|
+
end
|
41
|
+
|
42
|
+
def log message
|
43
|
+
stdout.puts "[stepmod-utils] #{message}"
|
44
|
+
end
|
45
|
+
|
46
|
+
def term_special_category(bibdata)
|
47
|
+
case bibdata.part.to_i
|
48
|
+
when 41,42,43,44,45,46,47,51
|
49
|
+
true
|
50
|
+
when [56..112]
|
51
|
+
true
|
52
|
+
else
|
53
|
+
false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def call
|
58
|
+
# If we are using the stepmod CVS repository, provide the revision number per file
|
59
|
+
@cvs_mode = if Dir.exists?(stepmod_path.join('CVS'))
|
60
|
+
require 'ptools'
|
61
|
+
# ptools provides File.which
|
62
|
+
File.which("cvs")
|
63
|
+
end
|
64
|
+
|
65
|
+
log "INFO: STEPmod directory set to #{stepmod_dir}."
|
66
|
+
|
67
|
+
if cvs_mode
|
68
|
+
log "INFO: STEPmod directory is a CVS repository and will detect revisions."
|
69
|
+
log "INFO: [CVS] Detecting file revisions can be slow, please be patient!"
|
70
|
+
else
|
71
|
+
log "INFO: STEPmod directory is not a CVS repository, skipping revision detection."
|
72
|
+
end
|
73
|
+
|
74
|
+
log "INFO: Detecting paths..."
|
75
|
+
|
76
|
+
repo_index = Nokogiri::XML(File.read(stepmod_path.join('repository_index.xml'))).root
|
77
|
+
|
78
|
+
files = []
|
79
|
+
|
80
|
+
# add module paths
|
81
|
+
repo_index.xpath('//module').each do |x|
|
82
|
+
path = Pathname.new("#{stepmod_dir}/modules/#{x['name']}/module.xml")
|
83
|
+
files << path if File.exists? path
|
84
|
+
end
|
85
|
+
|
86
|
+
# add resource_docs paths
|
87
|
+
repo_index.xpath('//resource_doc').each do |x|
|
88
|
+
path = Pathname.new("#{stepmod_dir}/resource_docs/#{x['name']}/resource.xml")
|
89
|
+
files << path if File.exists? path
|
90
|
+
end
|
91
|
+
|
92
|
+
# add business_object_models paths
|
93
|
+
repo_index.xpath('//business_object_model').each do |x|
|
94
|
+
path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/business_object_model.xml")
|
95
|
+
files << path if File.exists? path
|
96
|
+
end
|
97
|
+
|
98
|
+
# add application_protocols paths
|
99
|
+
repo_index.xpath('//application_protocol').each do |x|
|
100
|
+
path = Pathname.new("#{stepmod_dir}/application_protocols/#{x['name']}/application_protocol.xml")
|
101
|
+
files << path if File.exists? path
|
102
|
+
end
|
103
|
+
|
104
|
+
files.sort!.uniq!
|
105
|
+
process_term_files(files)
|
106
|
+
|
107
|
+
[
|
108
|
+
general_concepts,
|
109
|
+
resource_concepts,
|
110
|
+
parsed_bibliography,
|
111
|
+
part_concepts,
|
112
|
+
part_resources,
|
113
|
+
part_modules
|
114
|
+
]
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
def process_term_files(files)
|
120
|
+
parsed_schema_names = {}
|
121
|
+
files.each do |file_path|
|
122
|
+
file_path = file_path.realpath
|
123
|
+
fpath = file_path.relative_path_from(stepmod_path)
|
124
|
+
|
125
|
+
log "INFO: Processing XML file #{fpath}"
|
126
|
+
current_document = Nokogiri::XML(File.read(file_path)).root
|
127
|
+
|
128
|
+
bibdata = nil
|
129
|
+
begin
|
130
|
+
bibdata = Stepmod::Utils::Bibdata.new(document: current_document)
|
131
|
+
rescue
|
132
|
+
log "WARNING: Unknown file #{fpath}, skipped"
|
133
|
+
next
|
134
|
+
end
|
135
|
+
|
136
|
+
unless ACCEPTED_STAGES.include? bibdata.doctype
|
137
|
+
log "INFO: skipped #{bibdata.docid} as it is not one of (#{ACCEPTED_STAGES.join(", ")})."
|
138
|
+
next
|
139
|
+
end
|
140
|
+
|
141
|
+
if bibdata.part.to_s.empty?
|
142
|
+
log "FATAL: missing `part` attribute: #{fpath}"
|
143
|
+
log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute."
|
144
|
+
next
|
145
|
+
end
|
146
|
+
|
147
|
+
revision_string = "\n// CVS: revision not detected"
|
148
|
+
if cvs_mode
|
149
|
+
# Run `cvs status` to find out version
|
150
|
+
|
151
|
+
log "INFO: Detecting CVS revision..."
|
152
|
+
Dir.chdir(stepmod_path) do
|
153
|
+
status = `cvs status #{fpath}`
|
154
|
+
|
155
|
+
unless status.empty?
|
156
|
+
working_rev = status.split(/\n/).grep(/Working revision:/).first.match(/revision:\s+(.+)$/)[1]
|
157
|
+
repo_rev = status.split(/\n/).grep(/Repository revision:/).first.match(/revision:\t(.+)\t/)[1]
|
158
|
+
log "INFO: CVS working rev (#{working_rev}), repo rev (#{repo_rev})"
|
159
|
+
revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" +
|
160
|
+
"// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# read definitions
|
166
|
+
current_part_concepts = []
|
167
|
+
definition_index = 0
|
168
|
+
current_document.xpath('//definition').each do |definition|
|
169
|
+
definition_index += 1
|
170
|
+
term_id = definition['id']
|
171
|
+
unless term_id.nil?
|
172
|
+
if encountered_terms[term_id]
|
173
|
+
log "FATAL: Duplicated term with id: #{term_id}, #{fpath}"
|
174
|
+
end
|
175
|
+
encountered_terms[term_id] = true
|
176
|
+
end
|
177
|
+
|
178
|
+
# Assume that definition is located in clause 3 of the ISO document
|
179
|
+
# in order. We really don't have a good reference here.
|
180
|
+
ref_clause = "3.#{definition_index}"
|
181
|
+
|
182
|
+
concept = Stepmod::Utils::Concept.parse(
|
183
|
+
definition,
|
184
|
+
reference_anchor: bibdata.anchor,
|
185
|
+
reference_clause: ref_clause,
|
186
|
+
file_path: fpath + revision_string
|
187
|
+
)
|
188
|
+
next unless concept
|
189
|
+
|
190
|
+
unless term_special_category(bibdata)
|
191
|
+
# log "INFO: this part is generic"
|
192
|
+
general_concepts << concept
|
193
|
+
else
|
194
|
+
# log "INFO: this part is special"
|
195
|
+
current_part_concepts << concept
|
196
|
+
end
|
197
|
+
|
198
|
+
parsed_bibliography << bibdata
|
199
|
+
end
|
200
|
+
|
201
|
+
current_part_resources = []
|
202
|
+
current_part_modules_arm = {}
|
203
|
+
current_part_modules_mim = {}
|
204
|
+
|
205
|
+
log "INFO: FILE PATH IS #{file_path}"
|
206
|
+
case file_path.to_s
|
207
|
+
when /resource.xml$/
|
208
|
+
log "INFO: Processing resource.xml for #{file_path}"
|
209
|
+
# Assumption: every schema is only linked by a single resource_docs document.
|
210
|
+
current_document.xpath('//schema').each do |schema_node|
|
211
|
+
schema_name = schema_node['name']
|
212
|
+
if parsed_schema_names[schema_name]
|
213
|
+
log "ERROR: We have encountered this schema before: #{schema_name} from path #{parsed_schema_names[schema_name]}, now at #{file_path}"
|
214
|
+
next
|
215
|
+
else
|
216
|
+
parsed_schema_names[schema_name] = file_path
|
217
|
+
end
|
218
|
+
|
219
|
+
Dir["#{stepmod_path}/resources/#{schema_name}/descriptions.xml"].each do |description_xml_path|
|
220
|
+
log "INFO: Processing resources schema #{description_xml_path}"
|
221
|
+
description_document = Nokogiri::XML(File.read(description_xml_path)).root
|
222
|
+
description_document.xpath('//ext_description').each do |ext_description|
|
223
|
+
|
224
|
+
# log "INFO: Processing linkend[#{ext_description['linkend']}]"
|
225
|
+
|
226
|
+
concept = Stepmod::Utils::Concept.parse(
|
227
|
+
ext_description,
|
228
|
+
reference_anchor: bibdata.anchor,
|
229
|
+
reference_clause: nil,
|
230
|
+
file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
|
231
|
+
)
|
232
|
+
next unless concept
|
233
|
+
|
234
|
+
unless term_special_category(bibdata)
|
235
|
+
# log "INFO: this part is generic"
|
236
|
+
resource_concepts << concept
|
237
|
+
else
|
238
|
+
# log "INFO: this part is special"
|
239
|
+
current_part_resources << concept
|
240
|
+
end
|
241
|
+
|
242
|
+
parsed_bibliography << bibdata
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
when /module.xml$/
|
248
|
+
log "INFO: Processing module.xml for #{file_path}"
|
249
|
+
# Assumption: every schema is only linked by a single module document.
|
250
|
+
# puts current_document.xpath('//module').length
|
251
|
+
schema_name = current_document.xpath('//module').first['name']
|
252
|
+
if parsed_schema_names[schema_name]
|
253
|
+
log "ERROR: We have encountered this schema before: #{schema_name} from path #{parsed_schema_names[schema_name]}, now at #{file_path}"
|
254
|
+
next
|
255
|
+
else
|
256
|
+
parsed_schema_names[schema_name] = file_path
|
257
|
+
end
|
258
|
+
|
259
|
+
description_xml_path = "#{stepmod_path}/modules/#{schema_name}/arm_descriptions.xml"
|
260
|
+
log "INFO: Processing modules schema #{description_xml_path}"
|
261
|
+
|
262
|
+
if File.exists?(description_xml_path)
|
263
|
+
description_document = Nokogiri::XML(File.read(description_xml_path)).root
|
264
|
+
description_document.xpath('//ext_description').each do |ext_description|
|
265
|
+
|
266
|
+
linkend_schema = ext_description['linkend'].split('.').first
|
267
|
+
concept = Stepmod::Utils::Concept.parse(
|
268
|
+
ext_description,
|
269
|
+
reference_anchor: bibdata.anchor,
|
270
|
+
reference_clause: nil,
|
271
|
+
file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
|
272
|
+
)
|
273
|
+
next unless concept
|
274
|
+
|
275
|
+
current_part_modules_arm[linkend_schema] ||= []
|
276
|
+
current_part_modules_arm[linkend_schema] << concept
|
277
|
+
# puts part_modules_arm.inspect
|
278
|
+
parsed_bibliography << bibdata
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
description_xml_path = "#{stepmod_path}/modules/#{schema_name}/mim_descriptions.xml"
|
283
|
+
log "INFO: Processing modules schema #{description_xml_path}"
|
284
|
+
|
285
|
+
if File.exists?(description_xml_path)
|
286
|
+
description_document = Nokogiri::XML(File.read(description_xml_path)).root
|
287
|
+
description_document.xpath('//ext_description').each do |ext_description|
|
288
|
+
|
289
|
+
linkend_schema = ext_description['linkend'].split('.').first
|
290
|
+
|
291
|
+
concept = Stepmod::Utils::Concept.parse(
|
292
|
+
ext_description,
|
293
|
+
reference_anchor: bibdata.anchor,
|
294
|
+
reference_clause: nil,
|
295
|
+
file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
|
296
|
+
)
|
297
|
+
next unless concept
|
298
|
+
|
299
|
+
current_part_modules_mim[linkend_schema] ||= []
|
300
|
+
current_part_modules_mim[linkend_schema] << concept
|
301
|
+
|
302
|
+
parsed_bibliography << bibdata
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
end
|
307
|
+
|
308
|
+
log "INFO: Completed processing XML file #{fpath}"
|
309
|
+
if current_part_concepts.empty?
|
310
|
+
log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it contains no concepts."
|
311
|
+
elsif current_part_concepts.length < 3
|
312
|
+
log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it only has #{current_part_concepts.length} terms."
|
313
|
+
|
314
|
+
current_part_concepts.each do |x|
|
315
|
+
general_concepts << x
|
316
|
+
end
|
317
|
+
else
|
318
|
+
part_concepts << [bibdata, current_part_concepts] unless current_part_concepts.empty?
|
319
|
+
end
|
320
|
+
part_resources << [bibdata, current_part_resources] unless current_part_resources.empty?
|
321
|
+
part_modules << [bibdata, current_part_modules_arm, current_part_modules_mim] if current_part_modules_arm.size + current_part_modules_mim.size > 0
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
325
|
+
end
|
326
|
+
end
|
data/stepmod-utils.gemspec
CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
|
32
32
|
spec.add_runtime_dependency "thor", ">= 0.20.3"
|
33
33
|
spec.add_runtime_dependency "reverse_adoc", ">= 0.2.9"
|
34
|
-
spec.add_runtime_dependency "ptools", '~> 1.3'
|
35
34
|
spec.add_runtime_dependency "concurrent-ruby"
|
35
|
+
spec.add_runtime_dependency "ptools"
|
36
36
|
spec.add_development_dependency "byebug", "~> 11.1"
|
37
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stepmod-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-05-
|
11
|
+
date: 2021-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -39,21 +39,21 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 0.2.9
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: concurrent-ruby
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: ptools
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
@@ -139,6 +139,7 @@ files:
|
|
139
139
|
- lib/stepmod/utils/converters/express_ref_express_description.rb
|
140
140
|
- lib/stepmod/utils/converters/ext_description.rb
|
141
141
|
- lib/stepmod/utils/converters/ext_descriptions.rb
|
142
|
+
- lib/stepmod/utils/converters/figure.rb
|
142
143
|
- lib/stepmod/utils/converters/fund_cons.rb
|
143
144
|
- lib/stepmod/utils/converters/head.rb
|
144
145
|
- lib/stepmod/utils/converters/hr.rb
|
@@ -168,6 +169,7 @@ files:
|
|
168
169
|
- lib/stepmod/utils/smrl_resource_converter.rb
|
169
170
|
- lib/stepmod/utils/stepmod_definition_converter.rb
|
170
171
|
- lib/stepmod/utils/stepmod_file_annotator.rb
|
172
|
+
- lib/stepmod/utils/terms_extractor.rb
|
171
173
|
- lib/stepmod/utils/version.rb
|
172
174
|
- migrating_from_cvs.adoc
|
173
175
|
- resource_example.xml
|
@@ -176,7 +178,7 @@ homepage: https://github.com/metanorma/stepmod-utils
|
|
176
178
|
licenses:
|
177
179
|
- BSD-2-Clause
|
178
180
|
metadata: {}
|
179
|
-
post_install_message:
|
181
|
+
post_install_message:
|
180
182
|
rdoc_options: []
|
181
183
|
require_paths:
|
182
184
|
- lib
|
@@ -191,8 +193,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
191
193
|
- !ruby/object:Gem::Version
|
192
194
|
version: '0'
|
193
195
|
requirements: []
|
194
|
-
rubygems_version: 3.0.3
|
195
|
-
signing_key:
|
196
|
+
rubygems_version: 3.0.3.1
|
197
|
+
signing_key:
|
196
198
|
specification_version: 4
|
197
199
|
summary: Stepmod-utils is a toolkit that works on STEPmod data.
|
198
200
|
test_files: []
|