suma 0.1.15 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52836adb9e003f9d760a6c015207379be88b8821855d64388a5f30d09474de7e
4
- data.tar.gz: d7427de3e6dc21291671122bd07bc23c739e4f59f031e3b02988f89254767d00
3
+ metadata.gz: 5ecace379e8cbe19d7f3d6144ce5862be6529c02b418352c79b382f0be184e67
4
+ data.tar.gz: c1ce481cd14a29ce112782a088c8b11722c7d091e17df3ca771339c0446b1396
5
5
  SHA512:
6
- metadata.gz: c0de4abdc501a6dba9b716e7ad9a15f66985bec98b2978c9f1b25ed0f069d02d8a172f8b78c442e8f90e32098ad1622a3ba16a890c2ed57d896c4c2b427c5c3e
7
- data.tar.gz: ed0f32387285db507797e7b34962a20e4dc6174e7389ffa2488afb8724668c11d1e0089e9e2b62a958875e0e2bb9d7f4976d5d0ef887cd83604acb4939d25b7a
6
+ metadata.gz: 4c84af1274e4b93530d213648de1a32cd580203ca8da2123f2071a6457c007b87ed343fbe8326ac652cd706c650603c959aea0ebb54d2f5109a6e92098e5ccd7
7
+ data.tar.gz: 4b1ee08b731c160b772a89c9dd5af33c7720fcc388bf8b8e9cf06554a4c07c8f052cf78f953f4841a48543ef255626e666b9f4c6bd0a4e225a956684a82954bc
data/.gitignore CHANGED
@@ -9,3 +9,7 @@
9
9
 
10
10
  # rspec failure tracking
11
11
  .rspec_status
12
+ .rubocop_todo.yml
13
+ .rubocop-https---*-yml
14
+ .ruby-version
15
+ Gemfile.lock
data/.rubocop.yml CHANGED
@@ -8,7 +8,7 @@ plugins:
8
8
  - rubocop-rspec
9
9
 
10
10
  AllCops:
11
- TargetRubyVersion: 3.0
11
+ TargetRubyVersion: 3.1
12
12
  NewCops: enable
13
13
  Exclude:
14
14
  - 'vendor/**/*'
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2025-07-05 22:42:28 UTC using RuboCop version 1.77.0.
3
+ # on 2025-07-17 08:10:53 UTC using RuboCop version 1.78.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -13,7 +13,30 @@ Gemspec/DuplicatedAssignment:
13
13
  Exclude:
14
14
  - 'suma.gemspec'
15
15
 
16
- # Offense count: 57
16
+ # Offense count: 1
17
+ # Configuration parameters: Severity, Include.
18
+ # Include: **/*.gemspec
19
+ Gemspec/RequiredRubyVersion:
20
+ Exclude:
21
+ - 'suma.gemspec'
22
+
23
+ # Offense count: 17
24
+ # This cop supports safe autocorrection (--autocorrect).
25
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
26
+ # SupportedStyles: with_first_argument, with_fixed_indentation
27
+ Layout/ArgumentAlignment:
28
+ Exclude:
29
+ - 'lib/suma/cli/validate_ascii.rb'
30
+
31
+ # Offense count: 1
32
+ # This cop supports safe autocorrection (--autocorrect).
33
+ # Configuration parameters: EnforcedStyle.
34
+ # SupportedStyles: normal, indented_internal_methods
35
+ Layout/IndentationConsistency:
36
+ Exclude:
37
+ - 'lib/suma/glossarist_extensions.rb'
38
+
39
+ # Offense count: 90
17
40
  # This cop supports safe autocorrection (--autocorrect).
18
41
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
19
42
  # URISchemes: http, https
@@ -21,10 +44,10 @@ Layout/LineLength:
21
44
  Exclude:
22
45
  - 'lib/suma/cli.rb'
23
46
  - 'lib/suma/cli/build.rb'
24
- - 'lib/suma/cli/extract_terms.rb'
25
47
  - 'lib/suma/cli/validate.rb'
26
48
  - 'lib/suma/cli/validate_ascii.rb'
27
49
  - 'lib/suma/cli/validate_links.rb'
50
+ - 'lib/suma/glossarist_extensions.rb'
28
51
  - 'lib/suma/processor.rb'
29
52
  - 'lib/suma/schema_attachment.rb'
30
53
  - 'lib/suma/schema_collection.rb'
@@ -34,6 +57,13 @@ Layout/LineLength:
34
57
  - 'spec/suma/cli/validate_ascii_spec.rb'
35
58
  - 'suma.gemspec'
36
59
 
60
+ # Offense count: 22
61
+ # This cop supports safe autocorrection (--autocorrect).
62
+ # Configuration parameters: AllowInHeredoc.
63
+ Layout/TrailingWhitespace:
64
+ Exclude:
65
+ - 'lib/suma/cli/validate_ascii.rb'
66
+
37
67
  # Offense count: 1
38
68
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
39
69
  Lint/DuplicateBranch:
@@ -46,33 +76,48 @@ Lint/DuplicateMethods:
46
76
  - 'lib/suma/cli/validate_ascii.rb'
47
77
  - 'lib/suma/express_schema.rb'
48
78
 
49
- # Offense count: 20
79
+ # Offense count: 2
80
+ # This cop supports unsafe autocorrection (--autocorrect-all).
81
+ Lint/NonAtomicFileOperation:
82
+ Exclude:
83
+ - 'lib/suma/glossarist_extensions.rb'
84
+
85
+ # Offense count: 1
86
+ # This cop supports safe autocorrection (--autocorrect).
87
+ # Configuration parameters: AutoCorrect, IgnoreEmptyBlocks, AllowUnusedKeywordArguments.
88
+ Lint/UnusedBlockArgument:
89
+ Exclude:
90
+ - 'lib/suma/glossarist_extensions.rb'
91
+
92
+ # Offense count: 23
50
93
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
51
94
  Metrics/AbcSize:
52
95
  Exclude:
53
96
  - 'lib/suma/cli/extract_terms.rb'
54
97
  - 'lib/suma/cli/validate_ascii.rb'
55
98
  - 'lib/suma/cli/validate_links.rb'
99
+ - 'lib/suma/glossarist_extensions.rb'
56
100
  - 'lib/suma/schema_attachment.rb'
57
101
  - 'lib/suma/schema_document.rb'
58
102
  - 'lib/suma/thor_ext.rb'
59
103
 
60
- # Offense count: 1
104
+ # Offense count: 3
61
105
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
62
106
  # AllowedMethods: refine
63
107
  Metrics/BlockLength:
64
108
  Max: 64
65
109
 
66
- # Offense count: 8
110
+ # Offense count: 9
67
111
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
68
112
  Metrics/CyclomaticComplexity:
69
113
  Exclude:
70
114
  - 'lib/suma/cli/extract_terms.rb'
71
115
  - 'lib/suma/cli/validate_ascii.rb'
72
116
  - 'lib/suma/cli/validate_links.rb'
117
+ - 'lib/suma/glossarist_extensions.rb'
73
118
  - 'lib/suma/thor_ext.rb'
74
119
 
75
- # Offense count: 30
120
+ # Offense count: 36
76
121
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
77
122
  Metrics/MethodLength:
78
123
  Max: 107
@@ -82,13 +127,14 @@ Metrics/MethodLength:
82
127
  Metrics/ParameterLists:
83
128
  Max: 6
84
129
 
85
- # Offense count: 5
130
+ # Offense count: 6
86
131
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
87
132
  Metrics/PerceivedComplexity:
88
133
  Exclude:
89
134
  - 'lib/suma/cli/extract_terms.rb'
90
135
  - 'lib/suma/cli/validate_ascii.rb'
91
136
  - 'lib/suma/cli/validate_links.rb'
137
+ - 'lib/suma/glossarist_extensions.rb'
92
138
 
93
139
  # Offense count: 5
94
140
  # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
@@ -114,13 +160,14 @@ RSpec/ExampleLength:
114
160
  RSpec/MultipleExpectations:
115
161
  Max: 12
116
162
 
117
- # Offense count: 1
163
+ # Offense count: 2
118
164
  # This cop supports safe autocorrection (--autocorrect).
119
165
  # Configuration parameters: AutoCorrect, EnforcedStyle, AllowComments.
120
166
  # SupportedStyles: empty, nil, both
121
167
  Style/EmptyElse:
122
168
  Exclude:
123
169
  - 'lib/suma/cli/validate_links.rb'
170
+ - 'lib/suma/glossarist_extensions.rb'
124
171
 
125
172
  # Offense count: 4
126
173
  # This cop supports safe autocorrection (--autocorrect).
@@ -129,8 +176,37 @@ Style/EmptyElse:
129
176
  Style/FormatStringToken:
130
177
  EnforcedStyle: unannotated
131
178
 
179
+ # Offense count: 2
180
+ # This cop supports unsafe autocorrection (--autocorrect-all).
181
+ # Configuration parameters: AllowedReceivers.
182
+ # AllowedReceivers: Thread.current
183
+ Style/HashEachMethods:
184
+ Exclude:
185
+ - 'lib/suma/glossarist_extensions.rb'
186
+
187
+ # Offense count: 2
188
+ # This cop supports unsafe autocorrection (--autocorrect-all).
189
+ Style/IdenticalConditionalBranches:
190
+ Exclude:
191
+ - 'lib/suma/glossarist_extensions.rb'
192
+
193
+ # Offense count: 3
194
+ # This cop supports unsafe autocorrection (--autocorrect-all).
195
+ # Configuration parameters: ConvertCodeThatCanStartToReturnNil, AllowedMethods, MaxChainLength.
196
+ # AllowedMethods: present?, blank?, presence, try, try!
197
+ Style/SafeNavigation:
198
+ Exclude:
199
+ - 'lib/suma/glossarist_extensions.rb'
200
+
132
201
  # Offense count: 1
133
202
  # Configuration parameters: Max.
134
203
  Style/SafeNavigationChainLength:
135
204
  Exclude:
136
205
  - 'lib/suma/cli/extract_terms.rb'
206
+
207
+ # Offense count: 4
208
+ # This cop supports unsafe autocorrection (--autocorrect-all).
209
+ # Configuration parameters: Mode.
210
+ Style/StringConcatenation:
211
+ Exclude:
212
+ - 'lib/suma/cli/validate_ascii.rb'
data/README.adoc CHANGED
@@ -291,8 +291,9 @@ Replacement: AsciiMath: xx
291
291
 
292
292
  === Generate schemas command
293
293
 
294
- The `suma generate_schemas` command generates a schema manifest file containing
295
- all schemas defined in the Metanorma manifest file.
294
+ The `suma generate_schemas` command generates an EXPRESS schema manifest file
295
+ containing all schemas of documents referenced in the Metanorma manifest file,
296
+ recursively.
296
297
 
297
298
  [source,sh]
298
299
  ----
@@ -331,6 +332,10 @@ out which schemas the document includes.
331
332
 
332
333
  === Extract terms command
333
334
 
335
+ The "extract terms" command is implemented for ISO 10303-2, and could also be
336
+ used for other EXPRESS schema collections that require term extraction for
337
+ glossary or dictionary applications.
338
+
334
339
  The `suma extract_terms` command extracts terms from EXPRESS schemas and
335
340
  generates a Glossarist v2 dataset in the output directory. This command processes
336
341
  various types of STEP schemas and creates standardized terminology datasets
@@ -357,36 +362,35 @@ Options:
357
362
 
358
363
  The command supports extraction from the following EXPRESS schema types:
359
364
 
360
- * **ARM (Application Reference Model)** - Application module schemas ending with `_arm`
361
- * **MIM (Module Implementation Model)** - Application module schemas ending with `_mim`
365
+ * **ARM (application reference model)** - application module schemas ending with `_arm`
366
+ * **MIM (module implementation model)** - application module schemas ending with `_mim`
362
367
  * **Resource schemas** - General resource schemas
363
- * **BOM (Business Object Model)** - Business object model schemas ending with `_bom`
368
+ * **BOM (business object model)** - business object model schemas ending with `_bom`
369
+
370
+ NOTE: The "Long Form" EXPRESS schemas (ending with `_lf`) should be excluded
371
+ from the manifest file as they do not contain definitions of `ENTITY` objects.
364
372
 
365
- ==== Extracted terms
366
373
 
367
- The command extracts the following types of terms from EXPRESS schemas:
374
+ ==== Extracted concepts
368
375
 
369
- * **Entities** - EXPRESS entity definitions with their attributes and relationships
370
- * **Types** - EXPRESS type definitions including enumerations and select types
371
- * **Functions** - EXPRESS function definitions
372
- * **Procedures** - EXPRESS procedure definitions
373
- * **Constants** - EXPRESS constant definitions
376
+ The command extracts `ENTITY` objects from EXPRESS schemas, with the following
377
+ information:
374
378
 
375
- Each extracted term includes:
379
+ . a unique identifier in form of `{schema_name}.{entity_name}` (where `entity_name` is a slug
380
+ derived from the `ENTITY` name);
381
+ . a generated definition depending on the schema type and entity type;
382
+ . a "Note to entry" from its first coherent textual element described in Annotated EXPRESS;
383
+ . source information including: schema name and schema version;
384
+ . domain in the pattern of: `{domain_type}: {schema_name}`, where `domain_type` is one of
385
+ `application module`, `resource`, or `business object model`.
376
386
 
377
- * Unique identifier based on schema and term name
378
- * Term definition and description
379
- * Source schema information
380
- * Appropriate domain classification (application module, resource, or business object model)
381
387
 
382
388
  ==== Output format
383
389
 
384
390
  The command generates a Glossarist v2 compliant dataset with:
385
391
 
386
- * `concept/` directory containing concept definition files
387
- * `localized_concept/` directory containing localized concept files
388
- * YAML format following Glossarist v2 schema specifications
389
- * Proper cross-references and citations to source schemas
392
+ * `concept/` directory containing concept definition files in YAML
393
+ * `localized_concept/` directory containing localized concept files in YAML
390
394
 
391
395
  .To extract terms from a schema manifest file
392
396
  [example]
@@ -394,7 +398,8 @@ The command generates a Glossarist v2 compliant dataset with:
394
398
  [source,sh]
395
399
  ----
396
400
  $ bundle exec suma extract_terms schemas-smrl-all.yml glossarist_output
397
- # => generates glossarist_output/concept/*.yaml and glossarist_output/localized_concept/*.yaml
401
+ # => generates glossarist_output/concept/*.yaml and
402
+ # glossarist_output/localized_concept/*.yaml
398
403
  ----
399
404
  ====
400
405
 
@@ -408,7 +413,6 @@ $ bundle exec suma extract_terms schemas-activity-modules.yml terms_output
408
413
  ----
409
414
  ====
410
415
 
411
- The generated dataset is meant to be used for ISO 10303-2.
412
416
 
413
417
 
414
418
  == Usage: Ruby
@@ -12,6 +12,17 @@ module Suma
12
12
  # ExtractTerms command using Expressir to extract terms into the
13
13
  # Glossarist v2 format
14
14
  class ExtractTerms < Thor
15
+ # Matches patterns like "A thing is a type of {{entity}}." or
16
+ # "An object is a type of a {{entity}}"
17
+ REDUNDANT_NOTE_REGEX =
18
+ %r{
19
+ ^An? # Starts with "A" or "An"
20
+ \s.*?\sis\sa\stype\sof # Text followed by "is a type of"
21
+ (\sa|\san)? # Optional " a" or " an"
22
+ \s\{\{[^\}]*\}\} # Text in double curly braces
23
+ \s*?\.?$ # Optional whitespace and period at the end
24
+ }x
25
+
15
26
  desc "extract_terms SCHEMA_MANIFEST_FILE GLOSSARIST_OUTPUT_PATH",
16
27
  "Extract terms from SCHEMA_MANIFEST_FILE into " \
17
28
  "Glossarist v2 format"
@@ -129,11 +140,12 @@ module Suma
129
140
  data.sources = [source_ref] if source_ref
130
141
 
131
142
  # Only assign optional fields if they have content
132
- notes = get_entity_notes(entity, schema_domain)
143
+ notes = get_entity_notes(entity, schema_domain, data.definition)
133
144
  data.notes = notes if notes && !notes.empty?
134
145
 
135
- examples = get_entity_examples(entity, schema_domain)
136
- data.examples = examples if examples && !examples.empty?
146
+ # examples = get_entity_examples(entity, schema_domain)
147
+ # data.examples = examples if examples && !examples.empty?
148
+ data.examples = []
137
149
  end
138
150
 
139
151
  Glossarist::LocalizedConcept.new.tap do |concept|
@@ -245,9 +257,20 @@ module Suma
245
257
  [Glossarist::DetailedDefinition.new(content: definition)]
246
258
  end
247
259
 
248
- def get_entity_notes(entity, schema_domain)
260
+ def get_entity_notes(entity, schema_domain, definitions)
261
+ puts "Extracting notes for entity: #{entity.id}"
249
262
  notes = []
250
263
 
264
+ notes = add_entity_notes(entity, schema_domain, notes)
265
+ # notes = add_other_notes(entity, schema_domain, notes)
266
+ notes = only_keep_first_sentence(notes)
267
+ notes = remove_see_content(notes)
268
+ notes = remove_redundant_note(notes)
269
+ notes = remove_invalid_references(notes)
270
+ compare_with_definitions(notes, definitions)
271
+ end
272
+
273
+ def add_entity_notes(entity, schema_domain, notes)
251
274
  # Add trimmed definition from entity description as first note
252
275
  if entity.remarks && !entity.remarks.empty?
253
276
  trimmed_def = trim_definition(entity.remarks)
@@ -258,7 +281,11 @@ module Suma
258
281
  end
259
282
  end
260
283
 
261
- # Add other notes
284
+ notes.compact
285
+ end
286
+
287
+ def add_other_notes(entity, schema_domain, notes)
288
+ # Add other notes from entity remarks
262
289
  other_notes = [
263
290
  entity.remark_items&.select do |ri|
264
291
  ri.id == "__note"
@@ -274,6 +301,67 @@ module Suma
274
301
  notes
275
302
  end
276
303
 
304
+ # https://github.com/metanorma/iso-10303/issues/621
305
+ # 1. First sentence in first paragraph of the entity description
306
+ # (in EXPRESS remark) becomes NOTE 1 in ISO 10303-2 of the entity.
307
+ def only_keep_first_sentence(notes)
308
+ notes.each do |note|
309
+ # Split by period and take the first sentence
310
+ # Avoid splitting by pattern like "abc.def"
311
+ if note&.content
312
+ new_content = note.content
313
+ .split(".\n").first.strip
314
+ .split(". ").first.strip
315
+ note.content = if new_content.end_with?(".")
316
+ new_content
317
+ else
318
+ "#{new_content}."
319
+ end
320
+ end
321
+ end
322
+ end
323
+
324
+ # https://github.com/metanorma/iso-10303/issues/621
325
+ # 2. If this first sentence matches the 7-word magic sentence
326
+ # (2-3 forms of that), it is discarded so there will not be a NOTE 1.
327
+ def compare_with_definitions(notes, definitions)
328
+ if notes&.first&.content == definitions&.first&.content
329
+ # Discarding first note as it matches the definition
330
+ return []
331
+ end
332
+
333
+ notes
334
+ end
335
+
336
+ # https://github.com/metanorma/iso-10303/issues/621
337
+ # 3. No reference to any types or attribute or figures allowed in first
338
+ # sentence. Entity references “{{…}}” are allowed.
339
+ def remove_invalid_references(notes)
340
+ notes.reject do |note|
341
+ note.content.include?("image::") ||
342
+ note.content.match?(/<<(.*?){1,999}>>/)
343
+ end
344
+ end
345
+
346
+ # https://github.com/metanorma/iso-10303/issues/621
347
+ # 4. Entity notes and examples in EXPRESS remarks are NOT represented in
348
+ # part 2.
349
+ def remove_redundant_note(notes)
350
+ notes.reject do |note|
351
+ note.content.match?(REDUNDANT_NOTE_REGEX) &&
352
+ !note.content.include?("\n")
353
+ end
354
+ end
355
+
356
+ # https://github.com/metanorma/iso-10303/issues/621
357
+ # 5. If the sentence contains “\s+(see …)”, the contents including the
358
+ # parentheses are removed.
359
+ def remove_see_content(notes)
360
+ notes.each do |note|
361
+ note.content = note.content.gsub(/\s+\(see(.*?){1,999}\)/, "")
362
+ end
363
+ end
364
+
277
365
  def get_entity_examples(entity, schema_domain)
278
366
  examples = entity.remark_items&.select do |ri|
279
367
  ri.id == "__example"
@@ -382,13 +470,18 @@ module Suma
382
470
  end
383
471
  # rubocop:enable Metrics/MethodLength
384
472
 
385
- # Replace `<<express:{schema}.{entity},{render}>>` with {{entity,render}}
473
+ # Replace `<<express:{schema}.{entity}>>` with {{entity}}
474
+ # and `<<express:{schema}.{entity},{render}>>` with {{entity,render}}
386
475
  def express_reference_to_mention(description)
387
476
  # TODO: Use Expressir to check whether the "entity" is really an
388
477
  # EXPRESS ENTITY. If not, skip the mention.
389
- description.gsub(/<<express:([^,]+),([^>]+)>>/) do |_match|
390
- "{{#{Regexp.last_match[1].split('.').last},#{Regexp.last_match[2]}}}"
391
- end
478
+ description
479
+ .gsub(/<<express:([^,]+)>>/) do |_match|
480
+ "{{#{Regexp.last_match[1].split('.').last}}}"
481
+ end.gsub(/<<express:([^,]+),([^>]+)>>/) do |_match|
482
+ "{{#{Regexp.last_match[1].split('.').last}," \
483
+ "#{Regexp.last_match[2]}}}"
484
+ end
392
485
  end
393
486
 
394
487
  def entity_name_to_text(entity_id)
@@ -64,8 +64,6 @@ module Suma
64
64
  def load_yaml(file_path)
65
65
  YAML.safe_load(
66
66
  File.read(file_path, encoding: "UTF-8"),
67
- permitted_classes: [Date, Time, Symbol],
68
- permitted_symbols: [],
69
67
  aliases: true,
70
68
  )
71
69
  end
data/lib/suma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Suma
4
- VERSION = "0.1.15"
4
+ VERSION = "0.1.17"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: suma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.15
4
+ version: 0.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-07-07 00:00:00.000000000 Z
11
+ date: 2025-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: expressir