suma 0.1.18 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3fa571b528df159a9d83210a7f91f03cdb57cfa497dcaa312ad831392b4ab3ae
4
- data.tar.gz: e536b9101471873b9ba3df2f422ec13221db3941e240c8731bcba1e79e361431
3
+ metadata.gz: 930231a5ca9c90a49c8e52b4a0881e2f93fd87508506872c02e693b9c0b8b25e
4
+ data.tar.gz: 13202f05c7673f950e8d4de87a7088a044db5e0b393cf768889c3c0483baf174
5
5
  SHA512:
6
- metadata.gz: 0a759070a8972bba5f92705f9abc5bc7e23038d1c92873b35457709dfeed1e3c5a03acfd426feca37760fe849e5d173faf742e84231d97544b13f38e91cba192
7
- data.tar.gz: cca4eeff40165f757f60311ee9db9a33802cfb1733c5de09aba15f63dce4aca1d1d38c74af1701b8ce689fac41ee994fd6b6de90f231c6a3ce1b76a026b1fce1
6
+ metadata.gz: 63c387978764d7bf16fb2b8ea129038dbbf771a360415ddf09254ab14dcf33cdae44166fbdb60439e52026925c7755e4b631985ac0b41d9b2cc43311e3e2374c
7
+ data.tar.gz: 8632ad3a285fa3a970805c7491812f07ed9501268f6cf3ce12fcb2be22b48d30a494e47bfbb40de1f4fe3f4c0e4822f5c2e3978637605f3afac7b2eb28edb8b9
data/.gitignore CHANGED
@@ -9,7 +9,6 @@
9
9
 
10
10
  # rspec failure tracking
11
11
  .rspec_status
12
- .rubocop_todo.yml
13
12
  .rubocop-https---*-yml
14
13
  .ruby-version
15
14
  Gemfile.lock
data/.rubocop.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  inherit_from:
2
- - https://raw.githubusercontent.com/riboseinc/oss-guides/main/ci/rubocop.yml
3
2
  - .rubocop_todo.yml
3
+ - https://raw.githubusercontent.com/riboseinc/oss-guides/main/ci/rubocop.yml
4
4
 
5
5
  plugins:
6
6
  - rubocop-performance
@@ -11,4 +11,9 @@ AllCops:
11
11
  TargetRubyVersion: 3.1
12
12
  NewCops: enable
13
13
  Exclude:
14
+ - 'lib/suma/cli/reformat.rb'
15
+ - 'lib/suma/cli/generate_schemas.rb'
16
+ - 'lib/suma/cli/validate_ascii.rb'
17
+ - 'lib/suma/cli/validate_links.rb'
18
+ - 'lib/suma/cli/extract_terms.rb'
14
19
  - 'vendor/**/*'
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2025-07-17 08:10:53 UTC using RuboCop version 1.78.0.
3
+ # on 2025-07-18 05:24:45 UTC using RuboCop version 1.78.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -20,23 +20,7 @@ Gemspec/RequiredRubyVersion:
20
20
  Exclude:
21
21
  - 'suma.gemspec'
22
22
 
23
- # Offense count: 17
24
- # This cop supports safe autocorrection (--autocorrect).
25
- # Configuration parameters: EnforcedStyle, IndentationWidth.
26
- # SupportedStyles: with_first_argument, with_fixed_indentation
27
- Layout/ArgumentAlignment:
28
- Exclude:
29
- - 'lib/suma/cli/validate_ascii.rb'
30
-
31
- # Offense count: 1
32
- # This cop supports safe autocorrection (--autocorrect).
33
- # Configuration parameters: EnforcedStyle.
34
- # SupportedStyles: normal, indented_internal_methods
35
- Layout/IndentationConsistency:
36
- Exclude:
37
- - 'lib/suma/glossarist_extensions.rb'
38
-
39
- # Offense count: 90
23
+ # Offense count: 28
40
24
  # This cop supports safe autocorrection (--autocorrect).
41
25
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
42
26
  # URISchemes: http, https
@@ -45,9 +29,6 @@ Layout/LineLength:
45
29
  - 'lib/suma/cli.rb'
46
30
  - 'lib/suma/cli/build.rb'
47
31
  - 'lib/suma/cli/validate.rb'
48
- - 'lib/suma/cli/validate_ascii.rb'
49
- - 'lib/suma/cli/validate_links.rb'
50
- - 'lib/suma/glossarist_extensions.rb'
51
32
  - 'lib/suma/processor.rb'
52
33
  - 'lib/suma/schema_attachment.rb'
53
34
  - 'lib/suma/schema_collection.rb'
@@ -57,92 +38,31 @@ Layout/LineLength:
57
38
  - 'spec/suma/cli/validate_ascii_spec.rb'
58
39
  - 'suma.gemspec'
59
40
 
60
- # Offense count: 22
61
- # This cop supports safe autocorrection (--autocorrect).
62
- # Configuration parameters: AllowInHeredoc.
63
- Layout/TrailingWhitespace:
64
- Exclude:
65
- - 'lib/suma/cli/validate_ascii.rb'
66
-
67
41
  # Offense count: 1
68
- # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
69
- Lint/DuplicateBranch:
70
- Exclude:
71
- - 'lib/suma/cli/extract_terms.rb'
72
-
73
- # Offense count: 2
74
42
  Lint/DuplicateMethods:
75
43
  Exclude:
76
- - 'lib/suma/cli/validate_ascii.rb'
77
44
  - 'lib/suma/express_schema.rb'
78
45
 
79
- # Offense count: 2
80
- # This cop supports unsafe autocorrection (--autocorrect-all).
81
- Lint/NonAtomicFileOperation:
82
- Exclude:
83
- - 'lib/suma/glossarist_extensions.rb'
84
-
85
- # Offense count: 1
86
- # This cop supports safe autocorrection (--autocorrect).
87
- # Configuration parameters: AutoCorrect, IgnoreEmptyBlocks, AllowUnusedKeywordArguments.
88
- Lint/UnusedBlockArgument:
89
- Exclude:
90
- - 'lib/suma/glossarist_extensions.rb'
91
-
92
- # Offense count: 23
46
+ # Offense count: 4
93
47
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
94
48
  Metrics/AbcSize:
95
49
  Exclude:
96
- - 'lib/suma/cli/extract_terms.rb'
97
- - 'lib/suma/cli/validate_ascii.rb'
98
- - 'lib/suma/cli/validate_links.rb'
99
- - 'lib/suma/glossarist_extensions.rb'
100
50
  - 'lib/suma/schema_attachment.rb'
101
51
  - 'lib/suma/schema_document.rb'
102
52
  - 'lib/suma/thor_ext.rb'
103
53
 
104
- # Offense count: 3
105
- # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
106
- # AllowedMethods: refine
107
- Metrics/BlockLength:
108
- Max: 64
109
-
110
- # Offense count: 9
54
+ # Offense count: 1
111
55
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
112
56
  Metrics/CyclomaticComplexity:
113
57
  Exclude:
114
- - 'lib/suma/cli/extract_terms.rb'
115
- - 'lib/suma/cli/validate_ascii.rb'
116
- - 'lib/suma/cli/validate_links.rb'
117
- - 'lib/suma/glossarist_extensions.rb'
118
58
  - 'lib/suma/thor_ext.rb'
119
59
 
120
- # Offense count: 36
121
- # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
122
- Metrics/MethodLength:
123
- Max: 107
124
-
125
60
  # Offense count: 4
126
- # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
127
- Metrics/ParameterLists:
128
- Max: 6
129
-
130
- # Offense count: 6
131
- # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
132
- Metrics/PerceivedComplexity:
133
- Exclude:
134
- - 'lib/suma/cli/extract_terms.rb'
135
- - 'lib/suma/cli/validate_ascii.rb'
136
- - 'lib/suma/cli/validate_links.rb'
137
- - 'lib/suma/glossarist_extensions.rb'
138
-
139
- # Offense count: 5
140
61
  # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
141
62
  # SupportedStyles: snake_case, normalcase, non_integer
142
63
  # AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
143
64
  Naming/VariableNumber:
144
65
  Exclude:
145
- - 'lib/suma/cli/validate_ascii.rb'
146
66
  - 'spec/suma/cli/validate_ascii_spec.rb'
147
67
 
148
68
  # Offense count: 1
@@ -159,54 +79,3 @@ RSpec/ExampleLength:
159
79
  # Offense count: 6
160
80
  RSpec/MultipleExpectations:
161
81
  Max: 12
162
-
163
- # Offense count: 2
164
- # This cop supports safe autocorrection (--autocorrect).
165
- # Configuration parameters: AutoCorrect, EnforcedStyle, AllowComments.
166
- # SupportedStyles: empty, nil, both
167
- Style/EmptyElse:
168
- Exclude:
169
- - 'lib/suma/cli/validate_links.rb'
170
- - 'lib/suma/glossarist_extensions.rb'
171
-
172
- # Offense count: 4
173
- # This cop supports safe autocorrection (--autocorrect).
174
- # Configuration parameters: MaxUnannotatedPlaceholdersAllowed, Mode, AllowedMethods, AllowedPatterns.
175
- # SupportedStyles: annotated, template, unannotated
176
- Style/FormatStringToken:
177
- EnforcedStyle: unannotated
178
-
179
- # Offense count: 2
180
- # This cop supports unsafe autocorrection (--autocorrect-all).
181
- # Configuration parameters: AllowedReceivers.
182
- # AllowedReceivers: Thread.current
183
- Style/HashEachMethods:
184
- Exclude:
185
- - 'lib/suma/glossarist_extensions.rb'
186
-
187
- # Offense count: 2
188
- # This cop supports unsafe autocorrection (--autocorrect-all).
189
- Style/IdenticalConditionalBranches:
190
- Exclude:
191
- - 'lib/suma/glossarist_extensions.rb'
192
-
193
- # Offense count: 3
194
- # This cop supports unsafe autocorrection (--autocorrect-all).
195
- # Configuration parameters: ConvertCodeThatCanStartToReturnNil, AllowedMethods, MaxChainLength.
196
- # AllowedMethods: present?, blank?, presence, try, try!
197
- Style/SafeNavigation:
198
- Exclude:
199
- - 'lib/suma/glossarist_extensions.rb'
200
-
201
- # Offense count: 1
202
- # Configuration parameters: Max.
203
- Style/SafeNavigationChainLength:
204
- Exclude:
205
- - 'lib/suma/cli/extract_terms.rb'
206
-
207
- # Offense count: 4
208
- # This cop supports unsafe autocorrection (--autocorrect-all).
209
- # Configuration parameters: Mode.
210
- Style/StringConcatenation:
211
- Exclude:
212
- - 'lib/suma/cli/validate_ascii.rb'
@@ -308,7 +308,14 @@ module Suma
308
308
  # (in EXPRESS remark) becomes NOTE 1 in ISO 10303-2 of the entity.
309
309
  def only_keep_first_sentence(notes)
310
310
  notes.each do |note|
311
- # Split by period and take the first sentence
311
+ # Skip truncation only for content that starts with a paragraph ending in ":"
312
+ # followed by a list (complete list structures that should be preserved)
313
+ if note&.content && should_preserve_complete_structure?(note.content)
314
+ # For complete list structures, keep the content as-is
315
+ next
316
+ end
317
+
318
+ # Split by period and take the first sentence for all other content
312
319
  # Avoid splitting by pattern like "abc.def"
313
320
  if note&.content
314
321
  new_content = note.content
@@ -323,6 +330,30 @@ module Suma
323
330
  end
324
331
  end
325
332
 
333
+ def should_preserve_complete_structure?(content)
334
+ return false if content.nil? || content.empty?
335
+
336
+ # Check if content starts with a single introductory sentence ending in ":"
337
+ # followed by a list. This indicates a complete list structure that should be preserved.
338
+ lines = content.split("\n")
339
+ first_paragraph = lines.first&.strip
340
+
341
+ # Look for pattern: Single sentence ending with ":" (introductory pattern)
342
+ if first_paragraph&.end_with?(":") && lines.length > 1
343
+ # Check if the first paragraph contains multiple sentences (periods before the colon)
344
+ # If it does, this is NOT an introductory paragraph - extract first sentence only
345
+ if first_paragraph.count(".").positive?
346
+ return false
347
+ end
348
+
349
+ # Check if there's a list after the colon
350
+ remaining_content = lines[1..].join("\n")
351
+ return starts_with_list?(remaining_content.strip)
352
+ end
353
+
354
+ false
355
+ end
356
+
326
357
  # https://github.com/metanorma/iso-10303/issues/621
327
358
  # 2. If this first sentence matches the 7-word magic sentence
328
359
  # (2-3 forms of that), it is discarded so there will not be a NOTE 1.
@@ -399,11 +430,112 @@ module Suma
399
430
  schema_id.end_with?("_bom")
400
431
  end
401
432
 
433
+ def contains_list?(content)
434
+ return false if content.nil? || content.empty?
435
+
436
+ # Check if content contains list markers
437
+ content.match?(/^\s*[\*\-\+]\s+/m) || content.match?(/^\s*\d+\.\s+/m)
438
+ end
439
+
440
+ def starts_with_list?(content)
441
+ return false if content.nil? || content.empty?
442
+
443
+ # Check if content starts with list markers
444
+ content.match?(/^\s*[\*\-\+]\s+/) || content.match?(/^\s*\d+\.\s+/)
445
+ end
446
+
447
+ def is_list_continuation?(content)
448
+ return false if content.nil? || content.empty?
449
+
450
+ # Check for AsciiDoc list continuation patterns
451
+ content.match?(/^\+\s*$/) ||
452
+ content.match?(/^--\s*$/) ||
453
+ content.match?(/^\s{2,}/) || # Indented content (continuation)
454
+ content.start_with?("which", "where", "that") # Logical continuation
455
+ end
456
+
457
+ def extract_complete_list(paragraphs, start_index)
458
+ return paragraphs[start_index] if start_index >= paragraphs.length
459
+
460
+ combined = paragraphs[start_index].dup
461
+ current_index = start_index + 1
462
+
463
+ # Check if the first paragraph already contains an opening continuation block
464
+ in_continuation_block = combined.include?("--") && !combined.match?(/--.*--/m)
465
+
466
+ # Continue collecting paragraphs while we're in a list context
467
+ while current_index < paragraphs.length
468
+ next_para = paragraphs[current_index]
469
+
470
+ # Check if we're entering or exiting a continuation block
471
+ if next_para.match?(/^--\s*$/) || next_para.end_with?("--")
472
+ in_continuation_block = !in_continuation_block
473
+ combined += "\n\n#{next_para}"
474
+ current_index += 1
475
+ next
476
+ end
477
+
478
+ # If we're in a continuation block, include all content until we hit the closing --
479
+ if in_continuation_block
480
+ combined += "\n\n#{next_para}"
481
+ current_index += 1
482
+ next
483
+ end
484
+
485
+ # Check if this is a list item or list continuation
486
+ if starts_with_list?(next_para) || is_list_continuation?(next_para)
487
+ combined += "\n\n#{next_para}"
488
+ current_index += 1
489
+
490
+ # Check if this paragraph contains an opening continuation block
491
+ if next_para.include?("--") && !next_para.match?(/--.*--/m)
492
+ in_continuation_block = true
493
+ end
494
+ else
495
+ # This paragraph is not part of the list structure
496
+ break
497
+ end
498
+ end
499
+
500
+ combined
501
+ end
502
+
503
+ def ends_list_structure?(current_para, next_para)
504
+ return true if next_para.nil?
505
+
506
+ # List ends if:
507
+ # 1. Current paragraph doesn't end with continuation markers
508
+ # 2. Next paragraph starts a new section (not list or continuation)
509
+ !current_para.match?(/\+\s*$/) &&
510
+ !starts_with_list?(next_para) &&
511
+ !is_list_continuation?(next_para)
512
+ end
513
+
514
+ def apply_first_sentence_logic(paragraph)
515
+ # Apply the original first-sentence extraction logic
516
+ # Split by period and take the first sentence
517
+ # Avoid splitting by pattern like "abc.def"
518
+ new_content = paragraph
519
+ .split(".\n").first.strip
520
+ .split(". ").first.strip
521
+
522
+ if new_content.end_with?(".")
523
+ new_content
524
+ else
525
+ "#{new_content}."
526
+ end
527
+ end
528
+
402
529
  # rubocop:disable Metrics/MethodLength
403
530
  def combine_paragraphs(full_paragraph, next_paragraph)
531
+ # Check if we're dealing with a list structure
532
+ if contains_list?(full_paragraph) || starts_with_list?(next_paragraph)
533
+ return combine_list_content(full_paragraph, next_paragraph)
534
+ end
535
+
536
+ # For regular paragraphs, apply the original first-sentence logic
404
537
  # If full_paragraph already contains a period, extract that.
405
538
  if m = full_paragraph.match(/\A(?<inner_first>[^\n]*?\.)\s/)
406
- # puts "CONDITION 1"
407
539
  if m[:inner_first]
408
540
  return m[:inner_first]
409
541
  else
@@ -413,24 +545,26 @@ module Suma
413
545
 
414
546
  # If full_paragraph ends with a period, this is the last.
415
547
  if /\.\s*\Z/.match?(full_paragraph)
416
- # puts "CONDITION 2"
417
548
  return full_paragraph
418
549
  end
419
550
 
420
- # If next_paragraph is a list
421
- if next_paragraph.start_with?("*")
422
- # puts "CONDITION 3"
551
+ # If next_paragraph is a continuation of a paragraph
552
+ if next_paragraph&.start_with?("which", "where", "that")
423
553
  return "#{full_paragraph}\n\n#{next_paragraph}"
424
554
  end
425
555
 
426
- # If next_paragraph is a continuation of a list
427
- if next_paragraph.start_with?("which", "that")
428
- # puts "CONDITION 4"
429
- return "#{full_paragraph}\n\n#{next_paragraph}"
556
+ full_paragraph
557
+ end
558
+
559
+ def combine_list_content(full_paragraph, next_paragraph)
560
+ combined = full_paragraph.dup
561
+
562
+ # If we have a next paragraph, add it
563
+ unless next_paragraph.nil? || next_paragraph.empty?
564
+ combined += "\n\n#{next_paragraph}"
430
565
  end
431
566
 
432
- # puts "CONDITION 5"
433
- full_paragraph
567
+ combined
434
568
  end
435
569
 
436
570
  def trim_definition(definition)
@@ -445,23 +579,22 @@ module Suma
445
579
 
446
580
  return nil if definition_str.empty?
447
581
 
448
- # Unless the first paragraph ends with "between" and is followed by a
449
- # list, don't split
450
582
  paragraphs = definition_str.split("\n\n")
451
-
452
- # puts paragraphs.inspect
453
-
454
583
  first_paragraph = paragraphs.first
455
584
 
456
- combined = if paragraphs.length > 1
457
- paragraphs[1..].inject(first_paragraph) do |acc, p|
458
- combine_paragraphs(acc, p)
459
- end
460
- else
461
- combine_paragraphs(first_paragraph, "")
462
- end
463
-
464
- # puts "combined--------- #{combined}"
585
+ # If we only have one paragraph, apply the original logic
586
+ if paragraphs.length == 1
587
+ combined = apply_first_sentence_logic(first_paragraph)
588
+ elsif first_paragraph.end_with?(":") && paragraphs.length > 1 && starts_with_list?(paragraphs[1])
589
+ # Case 1: First paragraph ends with ":" and leads into a list
590
+ # Extract the complete list structure (this is an introductory paragraph)
591
+ complete_list = extract_complete_list(paragraphs, 1)
592
+ combined = "#{first_paragraph}\n\n#{complete_list}"
593
+ else
594
+ # Case 2: For all other cases (including sentences followed by lists)
595
+ # Extract only the first sentence from the first paragraph
596
+ combined = apply_first_sentence_logic(first_paragraph)
597
+ end
465
598
 
466
599
  # Remove comments until end of line
467
600
  combined = "#{combined}\n"
data/lib/suma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Suma
4
- VERSION = "0.1.18"
4
+ VERSION = "0.1.20"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: suma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.18
4
+ version: 0.1.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-07-17 00:00:00.000000000 Z
11
+ date: 2025-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: expressir