suma 0.1.17 → 0.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +25 -69
- data/lib/suma/cli/extract_terms.rb +164 -29
- data/lib/suma/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c1d7370c2e26551a0f6c13b354b1cf7c78180bc941a194421948918f35e69bd
|
4
|
+
data.tar.gz: 80aa2f22309620d3516c2aa65943c5287a9f09c40c797d24f27d34eea3821997
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 38a4ea2e865b003787c466a6c49e86b5acee535a2dd218f0f384988392b0c8b8a67f2ef2e28c068a41c1ba97f72417f8b0f11620c41122e86f0c56c78f143caf
|
7
|
+
data.tar.gz: b61f64d0a8cec12a54ef82ed1ff6c1dab8b45200e155588d78bbb80d80f3468236d48e46c21af83beb911c4983afdadc91e8c35f5bbd92dcad2ad2e70d005299
|
data/.rubocop_todo.yml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2025-07-
|
3
|
+
# on 2025-07-18 02:29:17 UTC using RuboCop version 1.78.0.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
@@ -20,23 +20,7 @@ Gemspec/RequiredRubyVersion:
|
|
20
20
|
Exclude:
|
21
21
|
- 'suma.gemspec'
|
22
22
|
|
23
|
-
# Offense count:
|
24
|
-
# This cop supports safe autocorrection (--autocorrect).
|
25
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
26
|
-
# SupportedStyles: with_first_argument, with_fixed_indentation
|
27
|
-
Layout/ArgumentAlignment:
|
28
|
-
Exclude:
|
29
|
-
- 'lib/suma/cli/validate_ascii.rb'
|
30
|
-
|
31
|
-
# Offense count: 1
|
32
|
-
# This cop supports safe autocorrection (--autocorrect).
|
33
|
-
# Configuration parameters: EnforcedStyle.
|
34
|
-
# SupportedStyles: normal, indented_internal_methods
|
35
|
-
Layout/IndentationConsistency:
|
36
|
-
Exclude:
|
37
|
-
- 'lib/suma/glossarist_extensions.rb'
|
38
|
-
|
39
|
-
# Offense count: 90
|
23
|
+
# Offense count: 70
|
40
24
|
# This cop supports safe autocorrection (--autocorrect).
|
41
25
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
|
42
26
|
# URISchemes: http, https
|
@@ -44,6 +28,7 @@ Layout/LineLength:
|
|
44
28
|
Exclude:
|
45
29
|
- 'lib/suma/cli.rb'
|
46
30
|
- 'lib/suma/cli/build.rb'
|
31
|
+
- 'lib/suma/cli/extract_terms.rb'
|
47
32
|
- 'lib/suma/cli/validate.rb'
|
48
33
|
- 'lib/suma/cli/validate_ascii.rb'
|
49
34
|
- 'lib/suma/cli/validate_links.rb'
|
@@ -57,14 +42,7 @@ Layout/LineLength:
|
|
57
42
|
- 'spec/suma/cli/validate_ascii_spec.rb'
|
58
43
|
- 'suma.gemspec'
|
59
44
|
|
60
|
-
# Offense count:
|
61
|
-
# This cop supports safe autocorrection (--autocorrect).
|
62
|
-
# Configuration parameters: AllowInHeredoc.
|
63
|
-
Layout/TrailingWhitespace:
|
64
|
-
Exclude:
|
65
|
-
- 'lib/suma/cli/validate_ascii.rb'
|
66
|
-
|
67
|
-
# Offense count: 1
|
45
|
+
# Offense count: 2
|
68
46
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
69
47
|
Lint/DuplicateBranch:
|
70
48
|
Exclude:
|
@@ -76,20 +54,7 @@ Lint/DuplicateMethods:
|
|
76
54
|
- 'lib/suma/cli/validate_ascii.rb'
|
77
55
|
- 'lib/suma/express_schema.rb'
|
78
56
|
|
79
|
-
# Offense count:
|
80
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
81
|
-
Lint/NonAtomicFileOperation:
|
82
|
-
Exclude:
|
83
|
-
- 'lib/suma/glossarist_extensions.rb'
|
84
|
-
|
85
|
-
# Offense count: 1
|
86
|
-
# This cop supports safe autocorrection (--autocorrect).
|
87
|
-
# Configuration parameters: AutoCorrect, IgnoreEmptyBlocks, AllowUnusedKeywordArguments.
|
88
|
-
Lint/UnusedBlockArgument:
|
89
|
-
Exclude:
|
90
|
-
- 'lib/suma/glossarist_extensions.rb'
|
91
|
-
|
92
|
-
# Offense count: 23
|
57
|
+
# Offense count: 25
|
93
58
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
94
59
|
Metrics/AbcSize:
|
95
60
|
Exclude:
|
@@ -107,7 +72,7 @@ Metrics/AbcSize:
|
|
107
72
|
Metrics/BlockLength:
|
108
73
|
Max: 64
|
109
74
|
|
110
|
-
# Offense count:
|
75
|
+
# Offense count: 12
|
111
76
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
112
77
|
Metrics/CyclomaticComplexity:
|
113
78
|
Exclude:
|
@@ -117,7 +82,7 @@ Metrics/CyclomaticComplexity:
|
|
117
82
|
- 'lib/suma/glossarist_extensions.rb'
|
118
83
|
- 'lib/suma/thor_ext.rb'
|
119
84
|
|
120
|
-
# Offense count:
|
85
|
+
# Offense count: 37
|
121
86
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
122
87
|
Metrics/MethodLength:
|
123
88
|
Max: 107
|
@@ -127,7 +92,7 @@ Metrics/MethodLength:
|
|
127
92
|
Metrics/ParameterLists:
|
128
93
|
Max: 6
|
129
94
|
|
130
|
-
# Offense count:
|
95
|
+
# Offense count: 9
|
131
96
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
132
97
|
Metrics/PerceivedComplexity:
|
133
98
|
Exclude:
|
@@ -136,6 +101,17 @@ Metrics/PerceivedComplexity:
|
|
136
101
|
- 'lib/suma/cli/validate_links.rb'
|
137
102
|
- 'lib/suma/glossarist_extensions.rb'
|
138
103
|
|
104
|
+
# Offense count: 1
|
105
|
+
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros, UseSorbetSigs.
|
106
|
+
# NamePrefix: is_, has_, have_, does_
|
107
|
+
# ForbiddenPrefixes: is_, has_, have_, does_
|
108
|
+
# AllowedMethods: is_a?
|
109
|
+
# MethodDefinitionMacros: define_method, define_singleton_method
|
110
|
+
Naming/PredicatePrefix:
|
111
|
+
Exclude:
|
112
|
+
- 'spec/**/*'
|
113
|
+
- 'lib/suma/cli/extract_terms.rb'
|
114
|
+
|
139
115
|
# Offense count: 5
|
140
116
|
# Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
|
141
117
|
# SupportedStyles: snake_case, normalcase, non_integer
|
@@ -176,37 +152,17 @@ Style/EmptyElse:
|
|
176
152
|
Style/FormatStringToken:
|
177
153
|
EnforcedStyle: unannotated
|
178
154
|
|
179
|
-
# Offense count:
|
180
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
181
|
-
# Configuration parameters: AllowedReceivers.
|
182
|
-
# AllowedReceivers: Thread.current
|
183
|
-
Style/HashEachMethods:
|
184
|
-
Exclude:
|
185
|
-
- 'lib/suma/glossarist_extensions.rb'
|
186
|
-
|
187
|
-
# Offense count: 2
|
188
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
189
|
-
Style/IdenticalConditionalBranches:
|
190
|
-
Exclude:
|
191
|
-
- 'lib/suma/glossarist_extensions.rb'
|
192
|
-
|
193
|
-
# Offense count: 3
|
155
|
+
# Offense count: 1
|
194
156
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
195
|
-
# Configuration parameters:
|
196
|
-
#
|
197
|
-
Style/
|
157
|
+
# Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns.
|
158
|
+
# SupportedStyles: predicate, comparison
|
159
|
+
Style/NumericPredicate:
|
198
160
|
Exclude:
|
199
|
-
- '
|
161
|
+
- 'spec/**/*'
|
162
|
+
- 'lib/suma/cli/extract_terms.rb'
|
200
163
|
|
201
164
|
# Offense count: 1
|
202
165
|
# Configuration parameters: Max.
|
203
166
|
Style/SafeNavigationChainLength:
|
204
167
|
Exclude:
|
205
168
|
- 'lib/suma/cli/extract_terms.rb'
|
206
|
-
|
207
|
-
# Offense count: 4
|
208
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
209
|
-
# Configuration parameters: Mode.
|
210
|
-
Style/StringConcatenation:
|
211
|
-
Exclude:
|
212
|
-
- 'lib/suma/cli/validate_ascii.rb'
|
@@ -103,6 +103,10 @@ module Suma
|
|
103
103
|
language_code: language_code,
|
104
104
|
source_ref: source_ref,
|
105
105
|
)
|
106
|
+
localized_concept_id = get_localized_concept_identifier(
|
107
|
+
schema, entity, language_code
|
108
|
+
)
|
109
|
+
localized_concept.uuid = localized_concept_id
|
106
110
|
|
107
111
|
managed_data = Glossarist::ManagedConceptData.new.tap do |data|
|
108
112
|
data.id = get_entity_identifier(schema, entity)
|
@@ -111,9 +115,7 @@ module Suma
|
|
111
115
|
data.localizations[language_code] = localized_concept
|
112
116
|
# uuid is automatically set from the serialization of the object
|
113
117
|
data.localized_concepts = {
|
114
|
-
language_code =>
|
115
|
-
schema, entity, language_code
|
116
|
-
),
|
118
|
+
language_code => localized_concept_id,
|
117
119
|
}
|
118
120
|
end
|
119
121
|
|
@@ -306,7 +308,14 @@ module Suma
|
|
306
308
|
# (in EXPRESS remark) becomes NOTE 1 in ISO 10303-2 of the entity.
|
307
309
|
def only_keep_first_sentence(notes)
|
308
310
|
notes.each do |note|
|
309
|
-
#
|
311
|
+
# Skip truncation only for content that starts with a paragraph ending in ":"
|
312
|
+
# followed by a list (complete list structures that should be preserved)
|
313
|
+
if note&.content && should_preserve_complete_structure?(note.content)
|
314
|
+
# For complete list structures, keep the content as-is
|
315
|
+
next
|
316
|
+
end
|
317
|
+
|
318
|
+
# Split by period and take the first sentence for all other content
|
310
319
|
# Avoid splitting by pattern like "abc.def"
|
311
320
|
if note&.content
|
312
321
|
new_content = note.content
|
@@ -321,6 +330,30 @@ module Suma
|
|
321
330
|
end
|
322
331
|
end
|
323
332
|
|
333
|
+
def should_preserve_complete_structure?(content)
|
334
|
+
return false if content.nil? || content.empty?
|
335
|
+
|
336
|
+
# Check if content starts with a single introductory sentence ending in ":"
|
337
|
+
# followed by a list. This indicates a complete list structure that should be preserved.
|
338
|
+
lines = content.split("\n")
|
339
|
+
first_paragraph = lines.first&.strip
|
340
|
+
|
341
|
+
# Look for pattern: Single sentence ending with ":" (introductory pattern)
|
342
|
+
if first_paragraph&.end_with?(":") && lines.length > 1
|
343
|
+
# Check if the first paragraph contains multiple sentences (periods before the colon)
|
344
|
+
# If it does, this is NOT an introductory paragraph - extract first sentence only
|
345
|
+
if first_paragraph.count(".").positive?
|
346
|
+
return false
|
347
|
+
end
|
348
|
+
|
349
|
+
# Check if there's a list after the colon
|
350
|
+
remaining_content = lines[1..].join("\n")
|
351
|
+
return starts_with_list?(remaining_content.strip)
|
352
|
+
end
|
353
|
+
|
354
|
+
false
|
355
|
+
end
|
356
|
+
|
324
357
|
# https://github.com/metanorma/iso-10303/issues/621
|
325
358
|
# 2. If this first sentence matches the 7-word magic sentence
|
326
359
|
# (2-3 forms of that), it is discarded so there will not be a NOTE 1.
|
@@ -397,11 +430,112 @@ module Suma
|
|
397
430
|
schema_id.end_with?("_bom")
|
398
431
|
end
|
399
432
|
|
433
|
+
def contains_list?(content)
|
434
|
+
return false if content.nil? || content.empty?
|
435
|
+
|
436
|
+
# Check if content contains list markers
|
437
|
+
content.match?(/^\s*[\*\-\+]\s+/m) || content.match?(/^\s*\d+\.\s+/m)
|
438
|
+
end
|
439
|
+
|
440
|
+
def starts_with_list?(content)
|
441
|
+
return false if content.nil? || content.empty?
|
442
|
+
|
443
|
+
# Check if content starts with list markers
|
444
|
+
content.match?(/^\s*[\*\-\+]\s+/) || content.match?(/^\s*\d+\.\s+/)
|
445
|
+
end
|
446
|
+
|
447
|
+
def is_list_continuation?(content)
|
448
|
+
return false if content.nil? || content.empty?
|
449
|
+
|
450
|
+
# Check for AsciiDoc list continuation patterns
|
451
|
+
content.match?(/^\+\s*$/) ||
|
452
|
+
content.match?(/^--\s*$/) ||
|
453
|
+
content.match?(/^\s{2,}/) || # Indented content (continuation)
|
454
|
+
content.start_with?("which", "that") # Logical continuation
|
455
|
+
end
|
456
|
+
|
457
|
+
def extract_complete_list(paragraphs, start_index)
|
458
|
+
return paragraphs[start_index] if start_index >= paragraphs.length
|
459
|
+
|
460
|
+
combined = paragraphs[start_index].dup
|
461
|
+
current_index = start_index + 1
|
462
|
+
|
463
|
+
# Check if the first paragraph already contains an opening continuation block
|
464
|
+
in_continuation_block = combined.include?("--") && !combined.match?(/--.*--/m)
|
465
|
+
|
466
|
+
# Continue collecting paragraphs while we're in a list context
|
467
|
+
while current_index < paragraphs.length
|
468
|
+
next_para = paragraphs[current_index]
|
469
|
+
|
470
|
+
# Check if we're entering or exiting a continuation block
|
471
|
+
if next_para.match?(/^--\s*$/) || next_para.end_with?("--")
|
472
|
+
in_continuation_block = !in_continuation_block
|
473
|
+
combined += "\n\n#{next_para}"
|
474
|
+
current_index += 1
|
475
|
+
next
|
476
|
+
end
|
477
|
+
|
478
|
+
# If we're in a continuation block, include all content until we hit the closing --
|
479
|
+
if in_continuation_block
|
480
|
+
combined += "\n\n#{next_para}"
|
481
|
+
current_index += 1
|
482
|
+
next
|
483
|
+
end
|
484
|
+
|
485
|
+
# Check if this is a list item or list continuation
|
486
|
+
if starts_with_list?(next_para) || is_list_continuation?(next_para)
|
487
|
+
combined += "\n\n#{next_para}"
|
488
|
+
current_index += 1
|
489
|
+
|
490
|
+
# Check if this paragraph contains an opening continuation block
|
491
|
+
if next_para.include?("--") && !next_para.match?(/--.*--/m)
|
492
|
+
in_continuation_block = true
|
493
|
+
end
|
494
|
+
else
|
495
|
+
# This paragraph is not part of the list structure
|
496
|
+
break
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
combined
|
501
|
+
end
|
502
|
+
|
503
|
+
def ends_list_structure?(current_para, next_para)
|
504
|
+
return true if next_para.nil?
|
505
|
+
|
506
|
+
# List ends if:
|
507
|
+
# 1. Current paragraph doesn't end with continuation markers
|
508
|
+
# 2. Next paragraph starts a new section (not list or continuation)
|
509
|
+
!current_para.match?(/\+\s*$/) &&
|
510
|
+
!starts_with_list?(next_para) &&
|
511
|
+
!is_list_continuation?(next_para)
|
512
|
+
end
|
513
|
+
|
514
|
+
def apply_first_sentence_logic(paragraph)
|
515
|
+
# Apply the original first-sentence extraction logic
|
516
|
+
# Split by period and take the first sentence
|
517
|
+
# Avoid splitting by pattern like "abc.def"
|
518
|
+
new_content = paragraph
|
519
|
+
.split(".\n").first.strip
|
520
|
+
.split(". ").first.strip
|
521
|
+
|
522
|
+
if new_content.end_with?(".")
|
523
|
+
new_content
|
524
|
+
else
|
525
|
+
"#{new_content}."
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
400
529
|
# rubocop:disable Metrics/MethodLength
|
401
530
|
def combine_paragraphs(full_paragraph, next_paragraph)
|
531
|
+
# Check if we're dealing with a list structure
|
532
|
+
if contains_list?(full_paragraph) || starts_with_list?(next_paragraph)
|
533
|
+
return combine_list_content(full_paragraph, next_paragraph)
|
534
|
+
end
|
535
|
+
|
536
|
+
# For regular paragraphs, apply the original first-sentence logic
|
402
537
|
# If full_paragraph already contains a period, extract that.
|
403
538
|
if m = full_paragraph.match(/\A(?<inner_first>[^\n]*?\.)\s/)
|
404
|
-
# puts "CONDITION 1"
|
405
539
|
if m[:inner_first]
|
406
540
|
return m[:inner_first]
|
407
541
|
else
|
@@ -411,24 +545,26 @@ module Suma
|
|
411
545
|
|
412
546
|
# If full_paragraph ends with a period, this is the last.
|
413
547
|
if /\.\s*\Z/.match?(full_paragraph)
|
414
|
-
# puts "CONDITION 2"
|
415
548
|
return full_paragraph
|
416
549
|
end
|
417
550
|
|
418
|
-
# If next_paragraph is a
|
419
|
-
if next_paragraph
|
420
|
-
# puts "CONDITION 3"
|
551
|
+
# If next_paragraph is a continuation of a paragraph
|
552
|
+
if next_paragraph&.start_with?("which", "that")
|
421
553
|
return "#{full_paragraph}\n\n#{next_paragraph}"
|
422
554
|
end
|
423
555
|
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
556
|
+
full_paragraph
|
557
|
+
end
|
558
|
+
|
559
|
+
def combine_list_content(full_paragraph, next_paragraph)
|
560
|
+
combined = full_paragraph.dup
|
561
|
+
|
562
|
+
# If we have a next paragraph, add it
|
563
|
+
unless next_paragraph.nil? || next_paragraph.empty?
|
564
|
+
combined += "\n\n#{next_paragraph}"
|
428
565
|
end
|
429
566
|
|
430
|
-
|
431
|
-
full_paragraph
|
567
|
+
combined
|
432
568
|
end
|
433
569
|
|
434
570
|
def trim_definition(definition)
|
@@ -443,23 +579,22 @@ module Suma
|
|
443
579
|
|
444
580
|
return nil if definition_str.empty?
|
445
581
|
|
446
|
-
# Unless the first paragraph ends with "between" and is followed by a
|
447
|
-
# list, don't split
|
448
582
|
paragraphs = definition_str.split("\n\n")
|
449
|
-
|
450
|
-
# puts paragraphs.inspect
|
451
|
-
|
452
583
|
first_paragraph = paragraphs.first
|
453
584
|
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
585
|
+
# If we only have one paragraph, apply the original logic
|
586
|
+
if paragraphs.length == 1
|
587
|
+
combined = apply_first_sentence_logic(first_paragraph)
|
588
|
+
elsif first_paragraph.end_with?(":") && paragraphs.length > 1 && starts_with_list?(paragraphs[1])
|
589
|
+
# Case 1: First paragraph ends with ":" and leads into a list
|
590
|
+
# Extract the complete list structure (this is an introductory paragraph)
|
591
|
+
complete_list = extract_complete_list(paragraphs, 1)
|
592
|
+
combined = "#{first_paragraph}\n\n#{complete_list}"
|
593
|
+
else
|
594
|
+
# Case 2: For all other cases (including sentences followed by lists)
|
595
|
+
# Extract only the first sentence from the first paragraph
|
596
|
+
combined = apply_first_sentence_logic(first_paragraph)
|
597
|
+
end
|
463
598
|
|
464
599
|
# Remove comments until end of line
|
465
600
|
combined = "#{combined}\n"
|
data/lib/suma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: suma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-07-
|
11
|
+
date: 2025-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: expressir
|