suma 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52836adb9e003f9d760a6c015207379be88b8821855d64388a5f30d09474de7e
4
- data.tar.gz: d7427de3e6dc21291671122bd07bc23c739e4f59f031e3b02988f89254767d00
3
+ metadata.gz: 955161f5bfcd18f46e1fcafafdc05bea5a7310eaa8899b03552e7d22f6f41795
4
+ data.tar.gz: ff06abeaf766ce76d814079ef718c6deb45e1dd0cae5d89e9d03d203e3a5635d
5
5
  SHA512:
6
- metadata.gz: c0de4abdc501a6dba9b716e7ad9a15f66985bec98b2978c9f1b25ed0f069d02d8a172f8b78c442e8f90e32098ad1622a3ba16a890c2ed57d896c4c2b427c5c3e
7
- data.tar.gz: ed0f32387285db507797e7b34962a20e4dc6174e7389ffa2488afb8724668c11d1e0089e9e2b62a958875e0e2bb9d7f4976d5d0ef887cd83604acb4939d25b7a
6
+ metadata.gz: 2568587bc47fe2eb892fc7e9f8f4981aceaa06eef4b27ad54cb427e9082cc23571a2c26f8ae9380dfcd52fbb40f38a77ad0d2282e7ada54d8744e3e21edcd301
7
+ data.tar.gz: 4b8505d33b3eef150aeb76f23548f5983740d1940488a0ee1c7d6e515cd249e5fa74fbdadb72543a91104ca9ce609eb0f374dd01c033db8eee2bb7585525b09f
data/.gitignore CHANGED
@@ -9,3 +9,7 @@
9
9
 
10
10
  # rspec failure tracking
11
11
  .rspec_status
12
+ .rubocop_todo.yml
13
+ .rubocop-https---*-yml
14
+ .ruby-version
15
+ Gemfile.lock
@@ -12,6 +12,17 @@ module Suma
12
12
  # ExtractTerms command using Expressir to extract terms into the
13
13
  # Glossarist v2 format
14
14
  class ExtractTerms < Thor
15
+ # Matches patterns like "A thing is a type of {{entity}}." or
16
+ # "An object is a type of a {{entity}}"
17
+ REDUNDANT_NOTE_REGEX =
18
+ %r{
19
+ ^An? # Starts with "A" or "An"
20
+ \s.*?\sis\sa\stype\sof # Text followed by "is a type of"
21
+ (\sa|\san)? # Optional " a" or " an"
22
+ \s\{\{[^\}]*\}\} # Text in double curly braces
23
+ \s*?\.?$ # Optional whitespace and period at the end
24
+ }x
25
+
15
26
  desc "extract_terms SCHEMA_MANIFEST_FILE GLOSSARIST_OUTPUT_PATH",
16
27
  "Extract terms from SCHEMA_MANIFEST_FILE into " \
17
28
  "Glossarist v2 format"
@@ -129,11 +140,12 @@ module Suma
129
140
  data.sources = [source_ref] if source_ref
130
141
 
131
142
  # Only assign optional fields if they have content
132
- notes = get_entity_notes(entity, schema_domain)
143
+ notes = get_entity_notes(entity, schema_domain, data.definition)
133
144
  data.notes = notes if notes && !notes.empty?
134
145
 
135
- examples = get_entity_examples(entity, schema_domain)
136
- data.examples = examples if examples && !examples.empty?
146
+ # examples = get_entity_examples(entity, schema_domain)
147
+ # data.examples = examples if examples && !examples.empty?
148
+ data.examples = []
137
149
  end
138
150
 
139
151
  Glossarist::LocalizedConcept.new.tap do |concept|
@@ -245,9 +257,20 @@ module Suma
245
257
  [Glossarist::DetailedDefinition.new(content: definition)]
246
258
  end
247
259
 
248
- def get_entity_notes(entity, schema_domain)
260
+ def get_entity_notes(entity, schema_domain, definitions)
261
+ puts "Extracting notes for entity: #{entity.id}"
249
262
  notes = []
250
263
 
264
+ notes = add_entity_notes(entity, schema_domain, notes)
265
+ # notes = add_other_notes(entity, schema_domain, notes)
266
+ notes = only_keep_first_sentence(notes)
267
+ notes = remove_see_content(notes)
268
+ notes = remove_redundant_note(notes)
269
+ notes = remove_invalid_references(notes)
270
+ compare_with_definitions(notes, definitions)
271
+ end
272
+
273
+ def add_entity_notes(entity, schema_domain, notes)
251
274
  # Add trimmed definition from entity description as first note
252
275
  if entity.remarks && !entity.remarks.empty?
253
276
  trimmed_def = trim_definition(entity.remarks)
@@ -258,7 +281,11 @@ module Suma
258
281
  end
259
282
  end
260
283
 
261
- # Add other notes
284
+ notes.compact
285
+ end
286
+
287
+ def add_other_notes(entity, schema_domain, notes)
288
+ # Add other notes from entity remarks
262
289
  other_notes = [
263
290
  entity.remark_items&.select do |ri|
264
291
  ri.id == "__note"
@@ -274,6 +301,67 @@ module Suma
274
301
  notes
275
302
  end
276
303
 
304
+ # https://github.com/metanorma/iso-10303/issues/621
305
+ # 1. First sentence in first paragraph of the entity description
306
+ # (in EXPRESS remark) becomes NOTE 1 in ISO 10303-2 of the entity.
307
+ def only_keep_first_sentence(notes)
308
+ notes.each do |note|
309
+ # Split by period and take the first sentence
310
+ # Avoid splitting by pattern like "abc.def"
311
+ if note&.content
312
+ new_content = note.content
313
+ .split(".\n").first.strip
314
+ .split(". ").first.strip
315
+ note.content = if new_content.end_with?(".")
316
+ new_content
317
+ else
318
+ "#{new_content}."
319
+ end
320
+ end
321
+ end
322
+ end
323
+
324
+ # https://github.com/metanorma/iso-10303/issues/621
325
+ # 2. If this first sentence matches the 7-word magic sentence
326
+ # (2-3 forms of that), it is discarded so there will not be a NOTE 1.
327
+ def compare_with_definitions(notes, definitions)
328
+ if notes&.first&.content == definitions&.first&.content
329
+ # Discarding first note as it matches the definition
330
+ return []
331
+ end
332
+
333
+ notes
334
+ end
335
+
336
+ # https://github.com/metanorma/iso-10303/issues/621
337
+ # 3. No reference to any types or attribute or figures allowed in first
338
+ # sentence. Entity references “{{…}}” are allowed.
339
+ def remove_invalid_references(notes)
340
+ notes.reject do |note|
341
+ note.content.include?("image::") ||
342
+ note.content.match?(/<<(.*?){1,999}>>/)
343
+ end
344
+ end
345
+
346
+ # https://github.com/metanorma/iso-10303/issues/621
347
+ # 4. Entity notes and examples in EXPRESS remarks are NOT represented in
348
+ # part 2.
349
+ def remove_redundant_note(notes)
350
+ notes.reject do |note|
351
+ note.content.match?(REDUNDANT_NOTE_REGEX) &&
352
+ !note.content.include?("\n")
353
+ end
354
+ end
355
+
356
+ # https://github.com/metanorma/iso-10303/issues/621
357
+ # 5. If the sentence contains “\s+(see …)”, the contents including the
358
+ # parentheses are removed.
359
+ def remove_see_content(notes)
360
+ notes.each do |note|
361
+ note.content = note.content.gsub(/\s+\(see(.*?){1,999}\)/, "")
362
+ end
363
+ end
364
+
277
365
  def get_entity_examples(entity, schema_domain)
278
366
  examples = entity.remark_items&.select do |ri|
279
367
  ri.id == "__example"
@@ -382,13 +470,18 @@ module Suma
382
470
  end
383
471
  # rubocop:enable Metrics/MethodLength
384
472
 
385
- # Replace `<<express:{schema}.{entity},{render}>>` with {{entity,render}}
473
+ # Replace `<<express:{schema}.{entity}>>` with {{entity}}
474
+ # and `<<express:{schema}.{entity},{render}>>` with {{entity,render}}
386
475
  def express_reference_to_mention(description)
387
476
  # TODO: Use Expressir to check whether the "entity" is really an
388
477
  # EXPRESS ENTITY. If not, skip the mention.
389
- description.gsub(/<<express:([^,]+),([^>]+)>>/) do |_match|
390
- "{{#{Regexp.last_match[1].split('.').last},#{Regexp.last_match[2]}}}"
391
- end
478
+ description
479
+ .gsub(/<<express:([^,]+)>>/) do |_match|
480
+ "{{#{Regexp.last_match[1].split('.').last}}}"
481
+ end.gsub(/<<express:([^,]+),([^>]+)>>/) do |_match|
482
+ "{{#{Regexp.last_match[1].split('.').last}," \
483
+ "#{Regexp.last_match[2]}}}"
484
+ end
392
485
  end
393
486
 
394
487
  def entity_name_to_text(entity_id)
data/lib/suma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Suma
4
- VERSION = "0.1.15"
4
+ VERSION = "0.1.16"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: suma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.15
4
+ version: 0.1.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-07-07 00:00:00.000000000 Z
11
+ date: 2025-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: expressir