stepmod-utils 0.3.23 → 0.3.25
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test-concept-generation.yml +38 -0
- data/.gitignore +3 -0
- data/.rubocop.yml +3 -0
- data/exe/stepmod-annotate-all +27 -14
- data/exe/stepmod-extract-changes +57 -0
- data/exe/stepmod-extract-concepts +165 -0
- data/lib/stepmod/utils/change.rb +74 -0
- data/lib/stepmod/utils/change_collection.rb +50 -0
- data/lib/stepmod/utils/change_edition.rb +60 -0
- data/lib/stepmod/utils/change_edition_collection.rb +38 -0
- data/lib/stepmod/utils/changes_extractor.rb +194 -0
- data/lib/stepmod/utils/concept.rb +40 -10
- data/lib/stepmod/utils/converters/description.rb +22 -0
- data/lib/stepmod/utils/express_bibdata.rb +111 -0
- data/lib/stepmod/utils/smrl_description_converter.rb +1 -0
- data/lib/stepmod/utils/stepmod_file_annotator.rb +215 -14
- data/lib/stepmod/utils/term.rb +18 -5
- data/lib/stepmod/utils/terms_extractor.rb +253 -292
- data/lib/stepmod/utils/version.rb +1 -1
- data/stepmod-utils.gemspec +2 -1
- metadata +31 -11
- data/exe/stepmod-build-resource-docs-cache +0 -20
- data/exe/stepmod-extract-terms +0 -237
- data/exe/stepmod-find-express-files +0 -24
@@ -1,11 +1,12 @@
|
|
1
1
|
require "stepmod/utils/stepmod_definition_converter"
|
2
|
-
require "stepmod/utils/
|
2
|
+
require "stepmod/utils/express_bibdata"
|
3
3
|
require "stepmod/utils/concept"
|
4
4
|
require "glossarist"
|
5
5
|
require "securerandom"
|
6
6
|
require "expressir"
|
7
7
|
require "expressir/express/parser"
|
8
8
|
require "indefinite_article"
|
9
|
+
require "pubid-iso"
|
9
10
|
|
10
11
|
ReverseAdoc.config.unknown_tags = :bypass
|
11
12
|
|
@@ -15,6 +16,7 @@ module Stepmod
|
|
15
16
|
# TODO: we may want a command line option to override this in the future
|
16
17
|
ACCEPTED_STAGES = %w(IS DIS FDIS TS).freeze
|
17
18
|
WITHDRAWN_STATUS = "withdrawn".freeze
|
19
|
+
REDUNDENT_NOTE_REGEX = /^An? .*? is a type of \{\{[^}]*\}\}\s*?\.?$/.freeze
|
18
20
|
|
19
21
|
attr_reader :stepmod_path,
|
20
22
|
:stepmod_dir,
|
@@ -38,13 +40,15 @@ module Stepmod
|
|
38
40
|
@stepmod_dir = stepmod_dir
|
39
41
|
@stepmod_path = Pathname.new(stepmod_dir).realpath
|
40
42
|
@index_path = Pathname.new(index_path).to_s
|
41
|
-
@general_concepts = Glossarist::
|
42
|
-
@resource_concepts = Glossarist::
|
43
|
+
@general_concepts = Glossarist::ManagedConceptCollection.new
|
44
|
+
@resource_concepts = Glossarist::ManagedConceptCollection.new
|
43
45
|
@parsed_bibliography = []
|
46
|
+
@added_bibdata = {}
|
44
47
|
@part_concepts = []
|
45
|
-
@part_resources =
|
46
|
-
@part_modules =
|
48
|
+
@part_resources = {}
|
49
|
+
@part_modules = {}
|
47
50
|
@encountered_terms = {}
|
51
|
+
@sequence = 0
|
48
52
|
end
|
49
53
|
|
50
54
|
def log(message)
|
@@ -80,313 +84,284 @@ module Stepmod
|
|
80
84
|
repo_index.xpath("//module").each do |x|
|
81
85
|
next if x['status'] == WITHDRAWN_STATUS
|
82
86
|
|
83
|
-
|
84
|
-
|
87
|
+
arm_path = Pathname.new("#{stepmod_dir}/modules/#{x['name']}/arm_annotated.exp")
|
88
|
+
mim_path = Pathname.new("#{stepmod_dir}/modules/#{x['name']}/mim_annotated.exp")
|
89
|
+
|
90
|
+
files << arm_path if File.exist? arm_path
|
91
|
+
files << mim_path if File.exist? mim_path
|
85
92
|
end
|
86
93
|
|
87
|
-
#
|
88
|
-
|
89
|
-
|
94
|
+
# Should ignore these because the `<resource_docs>` elements do not provide any EXPRESS schemas
|
95
|
+
# # add resource_docs paths
|
96
|
+
# repo_index.xpath("//resource_doc").each do |x|
|
97
|
+
# next if x['status'] == WITHDRAWN_STATUS
|
90
98
|
|
91
|
-
|
92
|
-
|
93
|
-
end
|
99
|
+
# path = Pathname.new("#{stepmod_dir}/resource_docs/#{x['name']}/resource.xml")
|
100
|
+
# files << path if File.exists? path
|
101
|
+
# end
|
94
102
|
|
95
|
-
# add
|
96
|
-
repo_index.xpath("//
|
103
|
+
# add resource paths
|
104
|
+
repo_index.xpath("//resource").each do |x|
|
97
105
|
next if x['status'] == WITHDRAWN_STATUS
|
98
106
|
|
99
|
-
path = Pathname.new("#{stepmod_dir}/
|
100
|
-
files << path if File.
|
107
|
+
path = Pathname.new("#{stepmod_dir}/resources/#{x['name']}/#{x['name']}_annotated.exp")
|
108
|
+
files << path if File.exist? path
|
101
109
|
end
|
102
110
|
|
103
|
-
#
|
104
|
-
|
105
|
-
|
111
|
+
# Should ignore these because we are skiping Clause 3 terms
|
112
|
+
# add business_object_models paths
|
113
|
+
# repo_index.xpath("//business_object_model").each do |x|
|
114
|
+
# next if x['status'] == WITHDRAWN_STATUS
|
115
|
+
|
116
|
+
# annotated_path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/bom_annotated.exp")
|
117
|
+
# path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/bom.exp")
|
118
|
+
# files << if File.exists?(annotated_path)
|
119
|
+
# annotated_path
|
120
|
+
# elsif File.exists?(path)
|
121
|
+
# path
|
122
|
+
# end
|
123
|
+
# end
|
106
124
|
|
107
|
-
|
108
|
-
|
109
|
-
|
125
|
+
# Should ignore these because there are no EXPRESS schemas here (they are implemented inside modules
|
126
|
+
# # add application_protocols paths
|
127
|
+
# repo_index.xpath("//application_protocol").each do |x|
|
128
|
+
# next if x['status'] == WITHDRAWN_STATUS
|
110
129
|
|
111
|
-
|
130
|
+
# path = Pathname.new("#{stepmod_dir}/application_protocols/#{x['name']}/application_protocol.xml")
|
131
|
+
# files << path if File.exists? path
|
132
|
+
# end
|
133
|
+
|
134
|
+
files.compact.sort!.uniq!
|
112
135
|
process_term_files(files)
|
113
136
|
|
114
137
|
[
|
115
|
-
general_concepts,
|
138
|
+
general_concepts, # Should be empty because skiping all Clause 3 terms
|
116
139
|
resource_concepts,
|
117
140
|
parsed_bibliography,
|
118
|
-
part_concepts,
|
119
|
-
part_resources,
|
120
|
-
part_modules,
|
141
|
+
part_concepts, # Should be empty because skiping all Clause 3 terms
|
142
|
+
part_resources.values.compact,
|
143
|
+
part_modules.values.compact,
|
121
144
|
]
|
122
145
|
end
|
123
146
|
|
124
147
|
private
|
125
148
|
|
126
149
|
def process_term_files(files)
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
150
|
+
repo = Expressir::Express::Parser.from_files(files)
|
151
|
+
|
152
|
+
repo.schemas.each do |schema|
|
153
|
+
parsed_schema_names = {}
|
154
|
+
|
155
|
+
schema_name = schema.id
|
156
|
+
file_path = schema.file
|
157
|
+
type = extract_file_type(file_path)
|
131
158
|
|
132
|
-
|
133
|
-
|
159
|
+
if parsed_schema_names[schema_name]
|
160
|
+
log <<~ERROR.gsub("\n", " ")
|
161
|
+
ERROR: We have encountered this schema before: #{schema_name} from
|
162
|
+
path #{parsed_schema_names[schema_name]}, now at #{schema.file}
|
163
|
+
ERROR
|
164
|
+
|
165
|
+
next
|
166
|
+
else
|
167
|
+
parsed_schema_names[schema_name] = file_path
|
168
|
+
end
|
169
|
+
|
170
|
+
log "INFO: Processing schema: #{schema.id}"
|
134
171
|
|
135
|
-
bibdata = nil
|
136
172
|
begin
|
137
|
-
bibdata = Stepmod::Utils::
|
138
|
-
rescue
|
139
|
-
log
|
173
|
+
bibdata = Stepmod::Utils::ExpressBibdata.new(schema: schema)
|
174
|
+
rescue => e
|
175
|
+
log e
|
176
|
+
log "ERROR: while processing bibdata for `#{schema_name}`"
|
177
|
+
|
140
178
|
next
|
141
179
|
end
|
142
180
|
|
143
181
|
unless ACCEPTED_STAGES.include? bibdata.doctype
|
144
|
-
log "INFO: skipped #{bibdata.
|
145
|
-
|
182
|
+
log "INFO: skipped #{bibdata.doctype} as it is not " \
|
183
|
+
"one of (#{ACCEPTED_STAGES.join(', ')})."
|
146
184
|
next
|
147
185
|
end
|
148
186
|
|
149
187
|
if bibdata.part.to_s.empty?
|
150
|
-
log "FATAL: missing `part` attribute: #{
|
151
|
-
log "INFO:
|
188
|
+
log "FATAL: missing `part` attribute: #{file_path}"
|
189
|
+
log "INFO: skipped #{schema.id} as it is missing `part` attribute."
|
152
190
|
next
|
153
191
|
end
|
154
192
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
end
|
163
|
-
encountered_terms[term_id] = true
|
164
|
-
end
|
165
|
-
|
166
|
-
# Assume that definition is located in clause 3 of the ISO document
|
167
|
-
# in order. We really don't have a good reference here.
|
168
|
-
ref_clause = "3.#{definition_index}"
|
169
|
-
|
170
|
-
concept = Stepmod::Utils::Concept.parse(
|
171
|
-
definition,
|
172
|
-
reference_anchor: bibdata.anchor,
|
173
|
-
reference_clause: ref_clause,
|
174
|
-
file_path: fpath,
|
193
|
+
case type
|
194
|
+
when "module_arm"
|
195
|
+
arm_concepts = parse_annotated_module(
|
196
|
+
schema: schema,
|
197
|
+
bibdata: bibdata,
|
198
|
+
# See: metanorma/iso-10303-2#90
|
199
|
+
domain_prefix: "application module",
|
175
200
|
)
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
# log "INFO: this part is generic"
|
183
|
-
find_or_initialize_concept(general_concepts, concept)
|
184
|
-
end
|
185
|
-
|
186
|
-
parsed_bibliography << bibdata
|
187
|
-
end
|
188
|
-
|
189
|
-
current_part_resources = Glossarist::Collection.new
|
190
|
-
current_part_modules_arm = {}
|
191
|
-
current_part_modules_mim = {}
|
192
|
-
|
193
|
-
# log "INFO: FILE PATH IS #{file_path}"
|
194
|
-
case file_path.to_s
|
195
|
-
when /resource.xml$/
|
196
|
-
log "INFO: Processing resource.xml for #{fpath}"
|
197
|
-
|
198
|
-
current_document.xpath("//schema").each do |schema_node|
|
199
|
-
schema_name = schema_node["name"]
|
200
|
-
if parsed_schema_names[schema_name]
|
201
|
-
log "ERROR: We have encountered this schema before: \
|
202
|
-
#{schema_name} from path \
|
203
|
-
#{parsed_schema_names[schema_name]}, now at #{file_path}"
|
204
|
-
next
|
205
|
-
else
|
206
|
-
parsed_schema_names[schema_name] = file_path
|
207
|
-
end
|
208
|
-
|
209
|
-
exp_annotated_path =
|
210
|
-
"#{stepmod_path}/resources/#{schema_name}/#{schema_name}_annotated.exp"
|
211
|
-
|
212
|
-
log "INFO: Processing resources schema #{exp_annotated_path}"
|
213
|
-
|
214
|
-
if File.exists?(exp_annotated_path)
|
215
|
-
repo = Expressir::Express::Parser.from_file(exp_annotated_path)
|
216
|
-
schema = repo.schemas.first
|
217
|
-
|
218
|
-
schema.entities.each do |entity|
|
219
|
-
old_definition = entity.remarks.first
|
220
|
-
|
221
|
-
domain = "resource: #{schema.id}"
|
222
|
-
entity_definition = generate_entity_definition(entity, domain, old_definition)
|
223
|
-
|
224
|
-
reference_anchor = bibdata.anchor
|
225
|
-
reference_clause = nil
|
226
|
-
|
227
|
-
concept = Stepmod::Utils::Concept.new(
|
228
|
-
designations: [entity.id],
|
229
|
-
definition: old_definition,
|
230
|
-
converted_definition: entity_definition,
|
231
|
-
id: "#{reference_anchor}.#{reference_clause}",
|
232
|
-
reference_anchor: reference_anchor,
|
233
|
-
reference_clause: reference_clause,
|
234
|
-
file_path: Pathname.new(exp_annotated_path)
|
235
|
-
.relative_path_from(stepmod_path),
|
236
|
-
language_code: "en",
|
237
|
-
)
|
238
|
-
|
239
|
-
next unless concept
|
240
|
-
|
241
|
-
if term_special_category(bibdata)
|
242
|
-
# log "INFO: this part is special"
|
243
|
-
find_or_initialize_concept(current_part_resources, concept)
|
244
|
-
else
|
245
|
-
# log "INFO: this part is generic"
|
246
|
-
find_or_initialize_concept(resource_concepts, concept)
|
247
|
-
end
|
248
|
-
|
249
|
-
parsed_bibliography << bibdata
|
250
|
-
end
|
251
|
-
end
|
252
|
-
end
|
253
|
-
|
254
|
-
when /module.xml$/
|
255
|
-
log "INFO: Processing module.xml for #{fpath}"
|
256
|
-
# Assumption: every schema is only linked by a single module document.
|
257
|
-
# puts current_document.xpath('//module').length
|
258
|
-
schema_name = current_document.xpath("//module").first["name"]
|
259
|
-
if parsed_schema_names[schema_name]
|
260
|
-
log "ERROR: We have encountered this schema before: \
|
261
|
-
#{schema_name} from path #{parsed_schema_names[schema_name]}, \
|
262
|
-
now at #{file_path}"
|
263
|
-
next
|
264
|
-
else
|
265
|
-
parsed_schema_names[schema_name] = file_path
|
266
|
-
end
|
267
|
-
|
268
|
-
arm_schema, arm_concepts = parse_annotated_module(
|
269
|
-
type: :arm,
|
270
|
-
stepmod_path: stepmod_path,
|
271
|
-
path: "modules/#{schema_name}/arm_annotated.exp",
|
272
|
-
bibdata: bibdata
|
201
|
+
when "module_mim"
|
202
|
+
mim_concepts = parse_annotated_module(
|
203
|
+
schema: schema,
|
204
|
+
bibdata: bibdata,
|
205
|
+
# See: metanorma/iso-10303-2#90
|
206
|
+
domain_prefix: "application object",
|
273
207
|
)
|
274
|
-
|
275
|
-
|
276
|
-
type: :mim,
|
277
|
-
stepmod_path: stepmod_path,
|
278
|
-
path: "modules/#{schema_name}/mim_annotated.exp",
|
279
|
-
bibdata: bibdata
|
280
|
-
)
|
281
|
-
|
282
|
-
if arm_concepts.to_a.size > 0
|
283
|
-
current_part_modules_arm[arm_schema] = arm_concepts
|
284
|
-
end
|
285
|
-
|
286
|
-
if mim_concepts.to_a.size > 0
|
287
|
-
current_part_modules_mim[mim_schema] = mim_concepts
|
288
|
-
end
|
289
|
-
end
|
290
|
-
|
291
|
-
log "INFO: Completed processing XML file #{fpath}"
|
292
|
-
if current_part_concepts.to_a.empty?
|
293
|
-
log "INFO: Skipping #{fpath} (#{bibdata.docid}) " \
|
294
|
-
"because it contains no concepts."
|
295
|
-
elsif current_part_concepts.to_a.length < 3
|
296
|
-
log "INFO: Skipping #{fpath} (#{bibdata.docid}) " \
|
297
|
-
"because it only has #{current_part_concepts.to_a.length} terms."
|
298
|
-
|
299
|
-
current_part_concepts.to_a.each do |x|
|
300
|
-
general_concepts.store(x)
|
301
|
-
end
|
302
|
-
else
|
303
|
-
unless current_part_concepts.to_a.empty?
|
304
|
-
part_concepts << [bibdata,
|
305
|
-
current_part_concepts]
|
306
|
-
end
|
307
|
-
end
|
308
|
-
|
309
|
-
unless current_part_resources.to_a.empty?
|
310
|
-
part_resources << [bibdata,
|
311
|
-
current_part_resources]
|
208
|
+
when "resource"
|
209
|
+
parse_annotated_resource(schema: schema, bibdata: bibdata)
|
312
210
|
end
|
211
|
+
end
|
212
|
+
end
|
313
213
|
|
314
|
-
|
315
|
-
|
214
|
+
def extract_file_type(filename)
|
215
|
+
match = filename.match(/(arm|mim|bom)_annotated\.exp$/)
|
216
|
+
return "resource" unless match
|
316
217
|
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
end
|
218
|
+
{
|
219
|
+
"arm" => "module_arm",
|
220
|
+
"mim" => "module_mim",
|
221
|
+
"bom" => "business_object_model",
|
222
|
+
}[match.captures[0]] || "resource"
|
323
223
|
end
|
324
224
|
|
325
|
-
def parse_annotated_module(
|
326
|
-
log "INFO: parse_annotated_module:
|
225
|
+
def parse_annotated_module(schema:, bibdata:, domain_prefix:)
|
226
|
+
log "INFO: parse_annotated_module: " \
|
227
|
+
"Processing modules schema #{schema.file}"
|
327
228
|
|
328
|
-
|
229
|
+
collection = Glossarist::ManagedConceptCollection.new
|
329
230
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
231
|
+
schema.entities.each do |entity|
|
232
|
+
@sequence += 1
|
233
|
+
document = entity.find("__schema_file")&.remarks&.first
|
234
|
+
|
235
|
+
concept = generate_concept_from_entity(
|
236
|
+
entity: entity,
|
237
|
+
domain: "#{domain_prefix}: #{schema.id}",
|
238
|
+
schema: {
|
239
|
+
"name" => schema.id,
|
240
|
+
"type" => "module",
|
241
|
+
"path" => extract_file_path(entity.parent.file),
|
242
|
+
},
|
243
|
+
document: {
|
244
|
+
"type" => "module",
|
245
|
+
"module" => document && document.split("/")[-2],
|
246
|
+
"path" => document,
|
247
|
+
},
|
248
|
+
bibdata: bibdata,
|
249
|
+
)
|
334
250
|
|
335
|
-
|
251
|
+
next unless concept
|
336
252
|
|
337
|
-
|
338
|
-
log "ERROR: parse_annotated_module: failed to parse EXPRESS file at #{path}."
|
339
|
-
return
|
253
|
+
find_or_initialize_concept(collection, concept)
|
340
254
|
end
|
341
255
|
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
when :arm
|
347
|
-
"application object"
|
256
|
+
if collection.to_a.size.positive?
|
257
|
+
part_index = domain_prefix == "application module" ? 1 : 2
|
258
|
+
part_modules[bibdata.part] ||= [bibdata, {}, {}]
|
259
|
+
part_modules[bibdata.part][part_index][schema.id] = collection
|
348
260
|
end
|
349
261
|
|
350
|
-
if
|
351
|
-
|
352
|
-
|
353
|
-
"#{repo.schemas.map(&:id).join(", ")} (not supposed to happen!!)"
|
354
|
-
)
|
262
|
+
if collection && !@added_bibdata[bibdata.part]
|
263
|
+
parsed_bibliography << bibdata
|
264
|
+
@added_bibdata[bibdata.part] = true
|
355
265
|
end
|
356
266
|
|
357
|
-
|
358
|
-
|
359
|
-
|
267
|
+
collection
|
268
|
+
end
|
269
|
+
|
270
|
+
def parse_annotated_resource(schema:, bibdata:)
|
271
|
+
log "INFO: parse_annotated_resource: " \
|
272
|
+
"Processing resources schema #{schema.file}"
|
360
273
|
|
361
274
|
schema.entities.each do |entity|
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
275
|
+
@sequence += 1
|
276
|
+
log "INFO: Processing entity: #{entity.id}"
|
277
|
+
|
278
|
+
document = entity.find("__schema_file")&.remarks&.first
|
279
|
+
|
280
|
+
concept = generate_concept_from_entity(
|
281
|
+
entity: entity,
|
282
|
+
domain: "resource: #{schema.id}",
|
283
|
+
schema: {
|
284
|
+
"name" => schema.id,
|
285
|
+
"type" => "resource",
|
286
|
+
"path" => extract_file_path(entity.parent.file),
|
287
|
+
},
|
288
|
+
document: {
|
289
|
+
"type" => "resource",
|
290
|
+
"resource" => document && document.split("/")[-2],
|
291
|
+
"path" => document,
|
292
|
+
},
|
293
|
+
bibdata: bibdata,
|
375
294
|
)
|
376
295
|
|
377
296
|
next unless concept
|
378
|
-
|
297
|
+
|
298
|
+
if term_special_category(bibdata)
|
299
|
+
part_resources[bibdata.part] ||= [
|
300
|
+
bibdata,
|
301
|
+
Glossarist::ManagedConceptCollection.new,
|
302
|
+
]
|
303
|
+
# log "INFO: this part is special"
|
304
|
+
find_or_initialize_concept(part_resources[bibdata.part][1], concept)
|
305
|
+
else
|
306
|
+
# log "INFO: this part is generic"
|
307
|
+
find_or_initialize_concept(resource_concepts, concept)
|
308
|
+
end
|
309
|
+
|
310
|
+
unless @added_bibdata[bibdata.part]
|
311
|
+
parsed_bibliography << bibdata
|
312
|
+
@added_bibdata[bibdata.part] = true
|
313
|
+
end
|
379
314
|
end
|
315
|
+
end
|
380
316
|
|
381
|
-
|
317
|
+
# rubocop:disable Metrics/MethodLength
|
318
|
+
def generate_concept_from_entity(entity:, schema:, domain:, bibdata:, document:)
|
319
|
+
old_definition = trim_definition(entity.remarks.first)
|
320
|
+
definition = generate_entity_definition(entity, domain)
|
321
|
+
|
322
|
+
notes = [old_definition].reject { |note| redundant_note?(note) }
|
323
|
+
|
324
|
+
Stepmod::Utils::Concept.new(
|
325
|
+
designations: [
|
326
|
+
{
|
327
|
+
"type" => "expression",
|
328
|
+
"normative_status" => "preferred",
|
329
|
+
"designation" => entity.id,
|
330
|
+
},
|
331
|
+
],
|
332
|
+
domain: domain,
|
333
|
+
definition: [definition.strip],
|
334
|
+
id: "#{bibdata.part}-#{@sequence}",
|
335
|
+
sources: [
|
336
|
+
{
|
337
|
+
"type" => "authoritative",
|
338
|
+
"ref" => bibdata.docid,
|
339
|
+
"link" => "https://www.iso.org/standard/32858.html",
|
340
|
+
},
|
341
|
+
],
|
342
|
+
notes: notes,
|
343
|
+
language_code: "en",
|
344
|
+
part: bibdata.part,
|
345
|
+
schema: schema,
|
346
|
+
document: document,
|
347
|
+
)
|
348
|
+
end
|
349
|
+
# rubocop:enable Metrics/MethodLength
|
350
|
+
|
351
|
+
def extract_file_path(file_path)
|
352
|
+
Pathname
|
353
|
+
.new(file_path)
|
354
|
+
.realpath
|
355
|
+
.relative_path_from(stepmod_path)
|
356
|
+
.to_s
|
382
357
|
end
|
383
358
|
|
384
359
|
def find_or_initialize_concept(collection, localized_concept)
|
385
|
-
concept = collection
|
386
|
-
.store(Glossarist::Concept.new(id: SecureRandom.uuid))
|
360
|
+
concept = collection.fetch_or_initialize(localized_concept.id)
|
387
361
|
concept.add_l10n(localized_concept)
|
388
362
|
end
|
389
363
|
|
364
|
+
# rubocop:disable Metrics/MethodLength
|
390
365
|
def combine_paragraphs(full_paragraph, next_paragraph)
|
391
366
|
# If full_paragraph already contains a period, extract that.
|
392
367
|
if m = full_paragraph.match(/\A(?<inner_first>[^\n]*?\.)\s/)
|
@@ -421,6 +396,8 @@ module Stepmod
|
|
421
396
|
end
|
422
397
|
|
423
398
|
def trim_definition(definition)
|
399
|
+
return nil if definition.nil? || definition.empty?
|
400
|
+
|
424
401
|
# Unless the first paragraph ends with "between" and is followed by a
|
425
402
|
# list, don't split
|
426
403
|
paragraphs = definition.split("\n\n")
|
@@ -429,25 +406,26 @@ module Stepmod
|
|
429
406
|
|
430
407
|
first_paragraph = paragraphs.first
|
431
408
|
|
432
|
-
if paragraphs.length > 1
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
409
|
+
combined = if paragraphs.length > 1
|
410
|
+
paragraphs[1..-1].inject(first_paragraph) do |acc, p|
|
411
|
+
combine_paragraphs(acc, p)
|
412
|
+
end
|
413
|
+
else
|
414
|
+
combine_paragraphs(first_paragraph, "")
|
415
|
+
end
|
439
416
|
|
440
417
|
# puts "combined--------- #{combined}"
|
441
418
|
|
442
419
|
# Remove comments until end of line
|
443
|
-
combined = combined
|
420
|
+
combined = "#{combined}\n"
|
444
421
|
combined.gsub!(/\n\/\/.*?\n/, "\n")
|
445
422
|
combined.strip!
|
446
423
|
|
447
424
|
express_reference_to_mention(combined)
|
448
425
|
|
449
426
|
# combined
|
450
|
-
# # TODO: If the definition contains a list immediately after
|
427
|
+
# # TODO: If the definition contains a list immediately after
|
428
|
+
# # the first paragraph, don't split
|
451
429
|
# return definition if definition =~ /\n\* /
|
452
430
|
|
453
431
|
# unless (
|
@@ -462,6 +440,7 @@ module Stepmod
|
|
462
440
|
# first_paragraph
|
463
441
|
# end
|
464
442
|
end
|
443
|
+
# rubocop:enable Metrics/MethodLength
|
465
444
|
|
466
445
|
# Replace `<<express:{schema}.{entity},{render}>>` with {{entity,render}}
|
467
446
|
def express_reference_to_mention(description)
|
@@ -486,52 +465,30 @@ module Stepmod
|
|
486
465
|
# end
|
487
466
|
|
488
467
|
# rubocop:disable Layout/LineLength
|
489
|
-
def generate_entity_definition(entity, domain
|
468
|
+
def generate_entity_definition(entity, domain)
|
490
469
|
return "" if entity.nil?
|
491
470
|
|
492
471
|
# See: metanorma/iso-10303-2#90
|
493
472
|
entity_type = if domain_type = domain.match(/\A(application object):/)
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
"#{entity_type} "
|
501
|
-
|
473
|
+
"{{#{domain_type[1]}}}"
|
474
|
+
else
|
475
|
+
"{{entity data type}}"
|
476
|
+
end
|
477
|
+
|
478
|
+
if entity.subtype_of.size.zero?
|
479
|
+
"#{entity_type} " \
|
480
|
+
"that represents the " \
|
481
|
+
"#{entity_name_to_text(entity.id)} {{entity}}"
|
502
482
|
else
|
503
483
|
entity_subtypes = entity.subtype_of.map do |e|
|
504
484
|
"{{#{e.id}}}"
|
505
485
|
end
|
506
|
-
"#{entity_type} that is a type of " +
|
507
|
-
"#{entity_subtypes.join(' and ')} " +
|
508
|
-
"that represents the " + entity_name_to_text(entity.id) + " {{entity}}"
|
509
|
-
end
|
510
|
-
|
511
|
-
definition = <<~DEFINITION
|
512
|
-
=== #{entity.id}
|
513
|
-
domain:[#{domain}]
|
514
|
-
|
515
|
-
#{entity_text}
|
516
|
-
|
517
|
-
DEFINITION
|
518
|
-
|
519
|
-
# If there is a definition, we add it as the first NOTE
|
520
|
-
unless old_definition.nil? || old_definition.blank?
|
521
|
-
old_definition = trim_definition(old_definition)
|
522
486
|
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
#{
|
527
|
-
--
|
528
|
-
OLD_DEFINITION
|
487
|
+
"#{entity_type} that is a type of " \
|
488
|
+
"#{entity_subtypes.join(' and ')} " \
|
489
|
+
"that represents the " \
|
490
|
+
"#{entity_name_to_text(entity.id)} {{entity}}"
|
529
491
|
end
|
530
|
-
|
531
|
-
# We no longer add Notes and Examples to the extracted terms
|
532
|
-
# definition + format_remark_items(entity.remark_items)
|
533
|
-
|
534
|
-
definition
|
535
492
|
end
|
536
493
|
|
537
494
|
def format_remark_items(remark_items)
|
@@ -558,6 +515,10 @@ module Stepmod
|
|
558
515
|
REMARK
|
559
516
|
end.join
|
560
517
|
end
|
518
|
+
|
519
|
+
def redundant_note?(note)
|
520
|
+
note && note.match?(REDUNDENT_NOTE_REGEX) && !note.include?("\n")
|
521
|
+
end
|
561
522
|
end
|
562
523
|
end
|
563
524
|
end
|