stepmod-utils 0.3.23 → 0.3.25
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test-concept-generation.yml +38 -0
- data/.gitignore +3 -0
- data/.rubocop.yml +3 -0
- data/exe/stepmod-annotate-all +27 -14
- data/exe/stepmod-extract-changes +57 -0
- data/exe/stepmod-extract-concepts +165 -0
- data/lib/stepmod/utils/change.rb +74 -0
- data/lib/stepmod/utils/change_collection.rb +50 -0
- data/lib/stepmod/utils/change_edition.rb +60 -0
- data/lib/stepmod/utils/change_edition_collection.rb +38 -0
- data/lib/stepmod/utils/changes_extractor.rb +194 -0
- data/lib/stepmod/utils/concept.rb +40 -10
- data/lib/stepmod/utils/converters/description.rb +22 -0
- data/lib/stepmod/utils/express_bibdata.rb +111 -0
- data/lib/stepmod/utils/smrl_description_converter.rb +1 -0
- data/lib/stepmod/utils/stepmod_file_annotator.rb +215 -14
- data/lib/stepmod/utils/term.rb +18 -5
- data/lib/stepmod/utils/terms_extractor.rb +253 -292
- data/lib/stepmod/utils/version.rb +1 -1
- data/stepmod-utils.gemspec +2 -1
- metadata +31 -11
- data/exe/stepmod-build-resource-docs-cache +0 -20
- data/exe/stepmod-extract-terms +0 -237
- data/exe/stepmod-find-express-files +0 -24
@@ -5,38 +5,69 @@ require "stepmod/utils/converters/express_note"
|
|
5
5
|
require "stepmod/utils/converters/express_example"
|
6
6
|
require "stepmod/utils/converters/express_figure"
|
7
7
|
require "stepmod/utils/converters/express_table"
|
8
|
+
require "expressir"
|
9
|
+
require "expressir/express/parser"
|
10
|
+
require "pubid-iso"
|
8
11
|
|
9
12
|
module Stepmod
|
10
13
|
module Utils
|
11
14
|
class StepmodFileAnnotator
|
12
|
-
attr_reader :express_file, :
|
15
|
+
attr_reader :express_file, :resource_docs_cache, :stepmod_dir
|
13
16
|
|
14
17
|
# @param express_file [String] path to the exp file needed to annotate
|
15
|
-
# @param
|
16
|
-
def initialize(express_file:,
|
18
|
+
# @param resource_docs_cache [String] output of ./stepmod-build-resource-docs-cache
|
19
|
+
def initialize(express_file:, stepmod_dir: nil)
|
17
20
|
@express_file = express_file
|
18
|
-
@
|
21
|
+
@resource_docs_cache = resource_docs_schemas(stepmod_dir)
|
19
22
|
@stepmod_dir = stepmod_dir || Dir.pwd
|
23
|
+
@added_bibdata = {}
|
24
|
+
|
25
|
+
@schema_name = Expressir::Express::Parser.from_file(express_file)
|
26
|
+
.schemas
|
27
|
+
.first
|
28
|
+
.id
|
29
|
+
end
|
30
|
+
|
31
|
+
def resource_docs_schemas(stepmod_dir)
|
32
|
+
filepath = File.join(stepmod_dir, "data/resource_docs/*/resource.xml")
|
33
|
+
|
34
|
+
schemas = {}
|
35
|
+
Dir.glob(filepath).each do |resource_docs_file|
|
36
|
+
match = resource_docs_file.match("data/resource_docs/([^/]+)/resource.xml")
|
37
|
+
resource_docs_dir = match.captures[0]
|
38
|
+
|
39
|
+
resource_docs = Nokogiri::XML(File.read(resource_docs_file)).root
|
40
|
+
resource_docs.xpath("schema").each do |schema|
|
41
|
+
schemas[schema["name"]] = resource_docs_dir
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
schemas
|
20
46
|
end
|
21
47
|
|
22
48
|
def call
|
23
49
|
match = File.basename(express_file).match('^(arm|mim|bom)\.exp$')
|
24
50
|
descriptions_base = match ? "#{match.captures[0]}_descriptions.xml" : "descriptions.xml"
|
51
|
+
|
25
52
|
descriptions_file = File.join(File.dirname(express_file),
|
26
53
|
descriptions_base)
|
54
|
+
|
27
55
|
output_express = File.read(express_file)
|
28
|
-
|
56
|
+
converted_description = ""
|
57
|
+
base_linked = ""
|
29
58
|
|
30
|
-
if File.
|
59
|
+
if File.exist?(descriptions_file)
|
31
60
|
descriptions = Nokogiri::XML(File.read(descriptions_file)).root
|
32
61
|
added_resource_descriptions = {}
|
62
|
+
|
33
63
|
descriptions.xpath("ext_description").each do |description|
|
34
64
|
# Add base resource from linked path if exists, eg "language_schema.language.wr:WR1" -> "language_schema"
|
35
65
|
base_linked = description["linkend"].to_s.split(".").first
|
66
|
+
|
36
67
|
if added_resource_descriptions[base_linked].nil?
|
37
68
|
base_reource_doc_dir = resource_docs_cache[description["linkend"].to_s.split(".").first]
|
38
69
|
if base_reource_doc_dir
|
39
|
-
|
70
|
+
converted_description << convert_from_resource_file(
|
40
71
|
base_reource_doc_dir, stepmod_dir, base_linked, descriptions_file
|
41
72
|
)
|
42
73
|
end
|
@@ -47,7 +78,7 @@ module Stepmod
|
|
47
78
|
# when a schema description is available from resource.xml and also descriptions.xml, the description from resource.xml is only used.
|
48
79
|
# https://github.com/metanorma/annotated-express/issues/32#issuecomment-792609078
|
49
80
|
if description.text.strip.length.positive? && resource_docs_dir.nil?
|
50
|
-
|
81
|
+
converted_description << convert_from_description_text(
|
51
82
|
descriptions_file, description
|
52
83
|
)
|
53
84
|
end
|
@@ -60,7 +91,28 @@ module Stepmod
|
|
60
91
|
end
|
61
92
|
end
|
62
93
|
|
63
|
-
|
94
|
+
bib_file_name = extract_bib_file_name(match, resource_docs_cache[@schema_name || ""])
|
95
|
+
bib_file = if match
|
96
|
+
File.join(File.dirname(express_file), bib_file_name)
|
97
|
+
else
|
98
|
+
resource_docs_file_path(stepmod_dir, bib_file_name)
|
99
|
+
end
|
100
|
+
|
101
|
+
output_express << if bib_file && File.exist?(bib_file)
|
102
|
+
prepend_bibdata(
|
103
|
+
converted_description || "",
|
104
|
+
# bib_file will not be present for resouces
|
105
|
+
# that are not in resource_docs cache.
|
106
|
+
# e.g hierarchy_schema
|
107
|
+
bib_file,
|
108
|
+
@schema_name,
|
109
|
+
match,
|
110
|
+
)
|
111
|
+
else
|
112
|
+
converted_description
|
113
|
+
end
|
114
|
+
|
115
|
+
sanitize(output_express)
|
64
116
|
rescue StandardError => e
|
65
117
|
puts "[ERROR]!!! #{e.message}"
|
66
118
|
puts e.backtrace
|
@@ -68,6 +120,10 @@ module Stepmod
|
|
68
120
|
|
69
121
|
private
|
70
122
|
|
123
|
+
def sanitize(file_content)
|
124
|
+
file_content.gsub("(*)", "(`*`)")
|
125
|
+
end
|
126
|
+
|
71
127
|
def convert_from_description_text(descriptions_file, description)
|
72
128
|
Dir.chdir(File.dirname(descriptions_file)) do
|
73
129
|
wrapper = "<ext_descriptions>#{description}</ext_descriptions>"
|
@@ -119,10 +175,146 @@ module Stepmod
|
|
119
175
|
end
|
120
176
|
end
|
121
177
|
|
178
|
+
def prepend_bibdata(description, bibdata_file, schema_and_entity, match)
|
179
|
+
bib = Nokogiri::XML(File.read(bibdata_file)).root
|
180
|
+
bibdata = extract_bib_data(match, bib, schema_and_entity)
|
181
|
+
|
182
|
+
return description.to_s if @added_bibdata[schema_and_entity]
|
183
|
+
|
184
|
+
published_in = <<~PUBLISHED_IN
|
185
|
+
|
186
|
+
(*"#{schema_and_entity}.__published_in"
|
187
|
+
#{bibdata[:identifier]}
|
188
|
+
*)
|
189
|
+
PUBLISHED_IN
|
190
|
+
|
191
|
+
identifier = <<~IDENTIFIER if bibdata[:number]
|
192
|
+
(*"#{schema_and_entity}.__identifier"
|
193
|
+
ISO/TC 184/SC 4/WG 12 N#{bibdata[:number]}
|
194
|
+
*)
|
195
|
+
IDENTIFIER
|
196
|
+
|
197
|
+
supersedes = <<~SUPERSEDES if bibdata[:supersedes_concept]
|
198
|
+
(*"#{schema_and_entity}.__supersedes"
|
199
|
+
ISO/TC 184/SC 4/WG 12 N#{bibdata[:supersedes_concept]}
|
200
|
+
*)
|
201
|
+
SUPERSEDES
|
202
|
+
|
203
|
+
status = <<~STATUS if bibdata[:status]
|
204
|
+
(*"#{schema_and_entity}.__status"
|
205
|
+
#{bibdata[:status]}
|
206
|
+
*)
|
207
|
+
STATUS
|
208
|
+
|
209
|
+
title = <<~TITLE if bibdata[:title]
|
210
|
+
(*"#{schema_and_entity}.__title"
|
211
|
+
#{bibdata[:title]}
|
212
|
+
*)
|
213
|
+
TITLE
|
214
|
+
|
215
|
+
document = <<~DOCUMENT if bibdata_file
|
216
|
+
(*"#{schema_and_entity}.__schema_file"
|
217
|
+
#{Pathname(bibdata_file).relative_path_from(@stepmod_dir)}
|
218
|
+
*)
|
219
|
+
DOCUMENT
|
220
|
+
|
221
|
+
@added_bibdata[schema_and_entity] = true
|
222
|
+
|
223
|
+
[
|
224
|
+
published_in,
|
225
|
+
identifier,
|
226
|
+
supersedes,
|
227
|
+
status,
|
228
|
+
title,
|
229
|
+
description,
|
230
|
+
document,
|
231
|
+
].compact.join("\n")
|
232
|
+
end
|
233
|
+
|
234
|
+
def module?(match)
|
235
|
+
match && %w[arm mim].include?(match.captures[0])
|
236
|
+
end
|
237
|
+
|
238
|
+
def bom?(match)
|
239
|
+
match && %w[bom].include?(match.captures[0])
|
240
|
+
end
|
241
|
+
|
242
|
+
def extract_bib_file_name(match, default_file_name = "")
|
243
|
+
return default_file_name || "" unless match
|
244
|
+
|
245
|
+
if %w[arm mim].include?(match.captures[0])
|
246
|
+
"module.xml"
|
247
|
+
else
|
248
|
+
"business_object_model.xml"
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def extract_bib_data(match, bib, schema_and_entity)
|
253
|
+
return resource_bib_data(bib, schema_and_entity) unless match
|
254
|
+
|
255
|
+
if module?(match)
|
256
|
+
module_bib_data(bib, match.captures[0])
|
257
|
+
elsif bom?(match)
|
258
|
+
bom_bib_data(bib)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def identifier(bib)
|
263
|
+
part = bib.attributes["part"].value
|
264
|
+
year = bib.attributes["publication.year"].value
|
265
|
+
|
266
|
+
# year="tbd" in data/modules/geometric_tolerance/module.xml and
|
267
|
+
# probabaly in some other places as well
|
268
|
+
year = "" if year == "tbd"
|
269
|
+
edition = bib.attributes["version"].value
|
270
|
+
|
271
|
+
pubid = Pubid::Iso::Identifier.new(
|
272
|
+
publisher: "ISO",
|
273
|
+
number: 10303,
|
274
|
+
)
|
275
|
+
|
276
|
+
pubid.part = part if part && !part.empty?
|
277
|
+
pubid.year = year.split("-").first if year && !year.empty?
|
278
|
+
pubid.edition = edition if edition && !edition.empty?
|
279
|
+
|
280
|
+
pubid.to_s(with_edition: true)
|
281
|
+
end
|
282
|
+
|
283
|
+
def resource_bib_data(bib, schema_and_entity)
|
284
|
+
schema = bib.xpath("schema[@name='#{schema_and_entity}']").first
|
285
|
+
|
286
|
+
{
|
287
|
+
identifier: identifier(bib),
|
288
|
+
number: schema.attributes["number"],
|
289
|
+
supersedes_concept: schema.attributes["number.supersedes"],
|
290
|
+
status: bib.attributes["status"],
|
291
|
+
title: bib.attributes["title"] || bib.attributes["name"],
|
292
|
+
}
|
293
|
+
end
|
294
|
+
|
295
|
+
def module_bib_data(bib, type)
|
296
|
+
{
|
297
|
+
identifier: identifier(bib),
|
298
|
+
number: bib.attributes["wg.number.#{type}"],
|
299
|
+
supersedes_concept: bib.attributes["wg.number.#{type}.supersedes"],
|
300
|
+
status: bib.attributes["status"],
|
301
|
+
title: bib.attributes["title"] || bib.attributes["name"],
|
302
|
+
}
|
303
|
+
end
|
304
|
+
|
305
|
+
def bom_bib_data(bib)
|
306
|
+
{
|
307
|
+
identifier: identifier(bib),
|
308
|
+
number: bib.attributes["wg.number.bom.exp"],
|
309
|
+
supersedes_concept: bib.attributes["wg.number.bom.supersedes"],
|
310
|
+
status: bib.attributes["status"],
|
311
|
+
title: bib.attributes["title"] || bib.attributes["name"],
|
312
|
+
}
|
313
|
+
end
|
314
|
+
|
122
315
|
def convert_from_resource_file(resource_docs_dir, stepmod_dir, linked, descriptions_file)
|
123
|
-
resource_docs_file =
|
124
|
-
|
125
|
-
# puts(resource_docs_file)
|
316
|
+
resource_docs_file = resource_docs_file_path(stepmod_dir, resource_docs_dir)
|
317
|
+
|
126
318
|
resource_docs = Nokogiri::XML(File.read(resource_docs_file)).root
|
127
319
|
schema = resource_docs.xpath("schema[@name='#{linked}']")
|
128
320
|
|
@@ -133,11 +325,20 @@ module Stepmod
|
|
133
325
|
wrapper,
|
134
326
|
{
|
135
327
|
no_notes_examples: false,
|
136
|
-
schema_and_entity: linked
|
137
|
-
}
|
328
|
+
schema_and_entity: linked,
|
329
|
+
},
|
138
330
|
)
|
139
331
|
end
|
140
332
|
end
|
333
|
+
|
334
|
+
def resource_docs_file_path(stepmod_dir, resource_docs_dir)
|
335
|
+
File.join(
|
336
|
+
stepmod_dir,
|
337
|
+
"data/resource_docs",
|
338
|
+
resource_docs_dir,
|
339
|
+
"resource.xml",
|
340
|
+
)
|
341
|
+
end
|
141
342
|
end
|
142
343
|
end
|
143
344
|
end
|
data/lib/stepmod/utils/term.rb
CHANGED
@@ -7,9 +7,9 @@ module Stepmod
|
|
7
7
|
attr_accessor :acronym
|
8
8
|
|
9
9
|
def to_mn_adoc
|
10
|
-
mn_adoc = ["=== #{definition}"]
|
10
|
+
mn_adoc = ["=== #{definition.map(&:content).join}"]
|
11
11
|
mn_adoc << "\nalt:[#{acronym}]" if acronym
|
12
|
-
mn_adoc << "\n\n#{designations.join(", ")}" if designations&.any?
|
12
|
+
mn_adoc << "\n\n#{designations.map(&:designation).join(", ")}" if designations&.any?
|
13
13
|
|
14
14
|
mn_adoc.join
|
15
15
|
end
|
@@ -18,15 +18,28 @@ module Stepmod
|
|
18
18
|
def from_h(hash)
|
19
19
|
_, definition, acronym = treat_acronym(hash["definition"])
|
20
20
|
|
21
|
-
hash["definition"] = definition
|
21
|
+
hash["definition"] = [definition]
|
22
|
+
|
22
23
|
hash["acronym"] = acronym.gsub(/\(|\)/, "") if acronym
|
23
|
-
hash["
|
24
|
+
add_designations(hash, hash["synonyms"]) if hash["synonyms"]
|
24
25
|
|
25
|
-
|
26
|
+
new(hash.reject { |k, _| k == "synonyms" })
|
26
27
|
end
|
27
28
|
|
28
29
|
private
|
29
30
|
|
31
|
+
def add_designations(hash, synonyms)
|
32
|
+
hash["designations"] ||= []
|
33
|
+
hash["designations"] << designation_hash(synonyms) if synonyms
|
34
|
+
end
|
35
|
+
|
36
|
+
def designation_hash(value, type = "expression")
|
37
|
+
{
|
38
|
+
"designation" => value,
|
39
|
+
"type" => type,
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
30
43
|
def treat_acronym(term_def)
|
31
44
|
return [nil, term_def.strip, nil] unless term_def.match?(/.+\(.+?\)$/)
|
32
45
|
|