RubyGems - suma - Versions diffs - 0.1.18 → 0.1.20 - Mend

suma 0.1.18 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/.gitignore +0 -1
data/.rubocop.yml +6 -1
data/.rubocop_todo.yml +4 -135
data/lib/suma/cli/extract_terms.rb +159 -26
data/lib/suma/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3fa571b528df159a9d83210a7f91f03cdb57cfa497dcaa312ad831392b4ab3ae
-  data.tar.gz: e536b9101471873b9ba3df2f422ec13221db3941e240c8731bcba1e79e361431
+  metadata.gz: 930231a5ca9c90a49c8e52b4a0881e2f93fd87508506872c02e693b9c0b8b25e
+  data.tar.gz: 13202f05c7673f950e8d4de87a7088a044db5e0b393cf768889c3c0483baf174
 SHA512:
-  metadata.gz: 0a759070a8972bba5f92705f9abc5bc7e23038d1c92873b35457709dfeed1e3c5a03acfd426feca37760fe849e5d173faf742e84231d97544b13f38e91cba192
-  data.tar.gz: cca4eeff40165f757f60311ee9db9a33802cfb1733c5de09aba15f63dce4aca1d1d38c74af1701b8ce689fac41ee994fd6b6de90f231c6a3ce1b76a026b1fce1
+  metadata.gz: 63c387978764d7bf16fb2b8ea129038dbbf771a360415ddf09254ab14dcf33cdae44166fbdb60439e52026925c7755e4b631985ac0b41d9b2cc43311e3e2374c
+  data.tar.gz: 8632ad3a285fa3a970805c7491812f07ed9501268f6cf3ce12fcb2be22b48d30a494e47bfbb40de1f4fe3f4c0e4822f5c2e3978637605f3afac7b2eb28edb8b9

data/.gitignore CHANGED Viewed

@@ -9,7 +9,6 @@
 # rspec failure tracking
 .rspec_status
-.rubocop_todo.yml
 .rubocop-https---*-yml
 .ruby-version
 Gemfile.lock

data/.rubocop.yml CHANGED Viewed

@@ -1,6 +1,6 @@
 inherit_from:
-  - https://raw.githubusercontent.com/riboseinc/oss-guides/main/ci/rubocop.yml
   - .rubocop_todo.yml
+  - https://raw.githubusercontent.com/riboseinc/oss-guides/main/ci/rubocop.yml
 plugins:
 - rubocop-performance
@@ -11,4 +11,9 @@ AllCops:
   TargetRubyVersion: 3.1
   NewCops: enable
   Exclude:
+    - 'lib/suma/cli/reformat.rb'
+    - 'lib/suma/cli/generate_schemas.rb'
+    - 'lib/suma/cli/validate_ascii.rb'
+    - 'lib/suma/cli/validate_links.rb'
+    - 'lib/suma/cli/extract_terms.rb'
     - 'vendor/**/*'

data/.rubocop_todo.yml CHANGED Viewed

@@ -1,6 +1,6 @@
 # This configuration was generated by
 # `rubocop --auto-gen-config`
-# on 2025-07-17 08:10:53 UTC using RuboCop version 1.78.0.
+# on 2025-07-18 05:24:45 UTC using RuboCop version 1.78.0.
 # The point is for the user to remove these configuration records
 # one by one as the offenses are removed from the code base.
 # Note that changes in the inspected code, or installation of new
@@ -20,23 +20,7 @@ Gemspec/RequiredRubyVersion:
   Exclude:
     - 'suma.gemspec'
-# Offense count: 17
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: EnforcedStyle, IndentationWidth.
-# SupportedStyles: with_first_argument, with_fixed_indentation
-Layout/ArgumentAlignment:
-  Exclude:
-    - 'lib/suma/cli/validate_ascii.rb'
-# Offense count: 1
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: EnforcedStyle.
-# SupportedStyles: normal, indented_internal_methods
-Layout/IndentationConsistency:
-  Exclude:
-    - 'lib/suma/glossarist_extensions.rb'
-# Offense count: 90
+# Offense count: 28
 # This cop supports safe autocorrection (--autocorrect).
 # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
 # URISchemes: http, https
@@ -45,9 +29,6 @@ Layout/LineLength:
     - 'lib/suma/cli.rb'
     - 'lib/suma/cli/build.rb'
     - 'lib/suma/cli/validate.rb'
-    - 'lib/suma/cli/validate_ascii.rb'
-    - 'lib/suma/cli/validate_links.rb'
-    - 'lib/suma/glossarist_extensions.rb'
     - 'lib/suma/processor.rb'
     - 'lib/suma/schema_attachment.rb'
     - 'lib/suma/schema_collection.rb'
@@ -57,92 +38,31 @@ Layout/LineLength:
     - 'spec/suma/cli/validate_ascii_spec.rb'
     - 'suma.gemspec'
-# Offense count: 22
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: AllowInHeredoc.
-Layout/TrailingWhitespace:
-  Exclude:
-    - 'lib/suma/cli/validate_ascii.rb'
 # Offense count: 1
-# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
-Lint/DuplicateBranch:
-  Exclude:
-    - 'lib/suma/cli/extract_terms.rb'
-# Offense count: 2
 Lint/DuplicateMethods:
   Exclude:
-    - 'lib/suma/cli/validate_ascii.rb'
     - 'lib/suma/express_schema.rb'
-# Offense count: 2
-# This cop supports unsafe autocorrection (--autocorrect-all).
-Lint/NonAtomicFileOperation:
-  Exclude:
-    - 'lib/suma/glossarist_extensions.rb'
-# Offense count: 1
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: AutoCorrect, IgnoreEmptyBlocks, AllowUnusedKeywordArguments.
-Lint/UnusedBlockArgument:
-  Exclude:
-    - 'lib/suma/glossarist_extensions.rb'
-# Offense count: 23
+# Offense count: 4
 # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
 Metrics/AbcSize:
   Exclude:
-    - 'lib/suma/cli/extract_terms.rb'
-    - 'lib/suma/cli/validate_ascii.rb'
-    - 'lib/suma/cli/validate_links.rb'
-    - 'lib/suma/glossarist_extensions.rb'
     - 'lib/suma/schema_attachment.rb'
     - 'lib/suma/schema_document.rb'
     - 'lib/suma/thor_ext.rb'
-# Offense count: 3
-# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
-# AllowedMethods: refine
-Metrics/BlockLength:
-  Max: 64
-# Offense count: 9
+# Offense count: 1
 # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
 Metrics/CyclomaticComplexity:
   Exclude:
-    - 'lib/suma/cli/extract_terms.rb'
-    - 'lib/suma/cli/validate_ascii.rb'
-    - 'lib/suma/cli/validate_links.rb'
-    - 'lib/suma/glossarist_extensions.rb'
     - 'lib/suma/thor_ext.rb'
-# Offense count: 36
-# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
-Metrics/MethodLength:
-  Max: 107
 # Offense count: 4
-# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
-Metrics/ParameterLists:
-  Max: 6
-# Offense count: 6
-# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
-Metrics/PerceivedComplexity:
-  Exclude:
-    - 'lib/suma/cli/extract_terms.rb'
-    - 'lib/suma/cli/validate_ascii.rb'
-    - 'lib/suma/cli/validate_links.rb'
-    - 'lib/suma/glossarist_extensions.rb'
-# Offense count: 5
 # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
 # SupportedStyles: snake_case, normalcase, non_integer
 # AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
 Naming/VariableNumber:
   Exclude:
-    - 'lib/suma/cli/validate_ascii.rb'
     - 'spec/suma/cli/validate_ascii_spec.rb'
 # Offense count: 1
@@ -159,54 +79,3 @@ RSpec/ExampleLength:
 # Offense count: 6
 RSpec/MultipleExpectations:
   Max: 12
-# Offense count: 2
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: AutoCorrect, EnforcedStyle, AllowComments.
-# SupportedStyles: empty, nil, both
-Style/EmptyElse:
-  Exclude:
-    - 'lib/suma/cli/validate_links.rb'
-    - 'lib/suma/glossarist_extensions.rb'
-# Offense count: 4
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: MaxUnannotatedPlaceholdersAllowed, Mode, AllowedMethods, AllowedPatterns.
-# SupportedStyles: annotated, template, unannotated
-Style/FormatStringToken:
-  EnforcedStyle: unannotated
-# Offense count: 2
-# This cop supports unsafe autocorrection (--autocorrect-all).
-# Configuration parameters: AllowedReceivers.
-# AllowedReceivers: Thread.current
-Style/HashEachMethods:
-  Exclude:
-    - 'lib/suma/glossarist_extensions.rb'
-# Offense count: 2
-# This cop supports unsafe autocorrection (--autocorrect-all).
-Style/IdenticalConditionalBranches:
-  Exclude:
-    - 'lib/suma/glossarist_extensions.rb'
-# Offense count: 3
-# This cop supports unsafe autocorrection (--autocorrect-all).
-# Configuration parameters: ConvertCodeThatCanStartToReturnNil, AllowedMethods, MaxChainLength.
-# AllowedMethods: present?, blank?, presence, try, try!
-Style/SafeNavigation:
-  Exclude:
-    - 'lib/suma/glossarist_extensions.rb'
-# Offense count: 1
-# Configuration parameters: Max.
-Style/SafeNavigationChainLength:
-  Exclude:
-    - 'lib/suma/cli/extract_terms.rb'
-# Offense count: 4
-# This cop supports unsafe autocorrection (--autocorrect-all).
-# Configuration parameters: Mode.
-Style/StringConcatenation:
-  Exclude:
-    - 'lib/suma/cli/validate_ascii.rb'

data/lib/suma/cli/extract_terms.rb CHANGED Viewed

@@ -308,7 +308,14 @@ module Suma
       # (in EXPRESS remark) becomes NOTE 1 in ISO 10303-2 of the entity.
       def only_keep_first_sentence(notes)
         notes.each do |note|
-          # Split by period and take the first sentence
+          # Skip truncation only for content that starts with a paragraph ending in ":"
+          # followed by a list (complete list structures that should be preserved)
+          if note&.content && should_preserve_complete_structure?(note.content)
+            # For complete list structures, keep the content as-is
+            next
+          end
+          # Split by period and take the first sentence for all other content
           # Avoid splitting by pattern like "abc.def"
           if note&.content
             new_content = note.content
@@ -323,6 +330,30 @@ module Suma
         end
       end
+      def should_preserve_complete_structure?(content)
+        return false if content.nil? || content.empty?
+        # Check if content starts with a single introductory sentence ending in ":"
+        # followed by a list. This indicates a complete list structure that should be preserved.
+        lines = content.split("\n")
+        first_paragraph = lines.first&.strip
+        # Look for pattern: Single sentence ending with ":" (introductory pattern)
+        if first_paragraph&.end_with?(":") && lines.length > 1
+          # Check if the first paragraph contains multiple sentences (periods before the colon)
+          # If it does, this is NOT an introductory paragraph - extract first sentence only
+          if first_paragraph.count(".").positive?
+            return false
+          end
+          # Check if there's a list after the colon
+          remaining_content = lines[1..].join("\n")
+          return starts_with_list?(remaining_content.strip)
+        end
+        false
+      end
       # https://github.com/metanorma/iso-10303/issues/621
       # 2. If this first sentence matches the 7-word magic sentence
       # (2-3 forms of that), it is discarded so there will not be a NOTE 1.
@@ -399,11 +430,112 @@ module Suma
         schema_id.end_with?("_bom")
       end
+      def contains_list?(content)
+        return false if content.nil? || content.empty?
+        # Check if content contains list markers
+        content.match?(/^\s*[\*\-\+]\s+/m) || content.match?(/^\s*\d+\.\s+/m)
+      end
+      def starts_with_list?(content)
+        return false if content.nil? || content.empty?
+        # Check if content starts with list markers
+        content.match?(/^\s*[\*\-\+]\s+/) || content.match?(/^\s*\d+\.\s+/)
+      end
+      def is_list_continuation?(content)
+        return false if content.nil? || content.empty?
+        # Check for AsciiDoc list continuation patterns
+        content.match?(/^\+\s*$/) ||
+          content.match?(/^--\s*$/) ||
+          content.match?(/^\s{2,}/) || # Indented content (continuation)
+          content.start_with?("which", "where", "that") # Logical continuation
+      end
+      def extract_complete_list(paragraphs, start_index)
+        return paragraphs[start_index] if start_index >= paragraphs.length
+        combined = paragraphs[start_index].dup
+        current_index = start_index + 1
+        # Check if the first paragraph already contains an opening continuation block
+        in_continuation_block = combined.include?("--") && !combined.match?(/--.*--/m)
+        # Continue collecting paragraphs while we're in a list context
+        while current_index < paragraphs.length
+          next_para = paragraphs[current_index]
+          # Check if we're entering or exiting a continuation block
+          if next_para.match?(/^--\s*$/) || next_para.end_with?("--")
+            in_continuation_block = !in_continuation_block
+            combined += "\n\n#{next_para}"
+            current_index += 1
+            next
+          end
+          # If we're in a continuation block, include all content until we hit the closing --
+          if in_continuation_block
+            combined += "\n\n#{next_para}"
+            current_index += 1
+            next
+          end
+          # Check if this is a list item or list continuation
+          if starts_with_list?(next_para) || is_list_continuation?(next_para)
+            combined += "\n\n#{next_para}"
+            current_index += 1
+            # Check if this paragraph contains an opening continuation block
+            if next_para.include?("--") && !next_para.match?(/--.*--/m)
+              in_continuation_block = true
+            end
+          else
+            # This paragraph is not part of the list structure
+            break
+          end
+        end
+        combined
+      end
+      def ends_list_structure?(current_para, next_para)
+        return true if next_para.nil?
+        # List ends if:
+        # 1. Current paragraph doesn't end with continuation markers
+        # 2. Next paragraph starts a new section (not list or continuation)
+        !current_para.match?(/\+\s*$/) &&
+          !starts_with_list?(next_para) &&
+          !is_list_continuation?(next_para)
+      end
+      def apply_first_sentence_logic(paragraph)
+        # Apply the original first-sentence extraction logic
+        # Split by period and take the first sentence
+        # Avoid splitting by pattern like "abc.def"
+        new_content = paragraph
+          .split(".\n").first.strip
+          .split(". ").first.strip
+        if new_content.end_with?(".")
+          new_content
+        else
+          "#{new_content}."
+        end
+      end
       # rubocop:disable Metrics/MethodLength
       def combine_paragraphs(full_paragraph, next_paragraph)
+        # Check if we're dealing with a list structure
+        if contains_list?(full_paragraph) || starts_with_list?(next_paragraph)
+          return combine_list_content(full_paragraph, next_paragraph)
+        end
+        # For regular paragraphs, apply the original first-sentence logic
         # If full_paragraph already contains a period, extract that.
         if m = full_paragraph.match(/\A(?<inner_first>[^\n]*?\.)\s/)
-          # puts "CONDITION 1"
           if m[:inner_first]
             return m[:inner_first]
           else
@@ -413,24 +545,26 @@ module Suma
         # If full_paragraph ends with a period, this is the last.
         if /\.\s*\Z/.match?(full_paragraph)
-          # puts "CONDITION 2"
           return full_paragraph
         end
-        # If next_paragraph is a list
-        if next_paragraph.start_with?("*")
-          # puts "CONDITION 3"
+        # If next_paragraph is a continuation of a paragraph
+        if next_paragraph&.start_with?("which", "where", "that")
           return "#{full_paragraph}\n\n#{next_paragraph}"
         end
-        # If next_paragraph is a continuation of a list
-        if next_paragraph.start_with?("which", "that")
-          # puts "CONDITION 4"
-          return "#{full_paragraph}\n\n#{next_paragraph}"
+        full_paragraph
+      end
+      def combine_list_content(full_paragraph, next_paragraph)
+        combined = full_paragraph.dup
+        # If we have a next paragraph, add it
+        unless next_paragraph.nil? || next_paragraph.empty?
+          combined += "\n\n#{next_paragraph}"
         end
-        # puts "CONDITION 5"
-        full_paragraph
+        combined
       end
       def trim_definition(definition)
@@ -445,23 +579,22 @@ module Suma
         return nil if definition_str.empty?
-        # Unless the first paragraph ends with "between" and is followed by a
-        # list, don't split
         paragraphs = definition_str.split("\n\n")
-        # puts paragraphs.inspect
         first_paragraph = paragraphs.first
-        combined = if paragraphs.length > 1
-                     paragraphs[1..].inject(first_paragraph) do |acc, p|
-                       combine_paragraphs(acc, p)
-                     end
-                   else
-                     combine_paragraphs(first_paragraph, "")
-                   end
-        # puts "combined--------- #{combined}"
+        # If we only have one paragraph, apply the original logic
+        if paragraphs.length == 1
+          combined = apply_first_sentence_logic(first_paragraph)
+        elsif first_paragraph.end_with?(":") && paragraphs.length > 1 && starts_with_list?(paragraphs[1])
+          # Case 1: First paragraph ends with ":" and leads into a list
+          # Extract the complete list structure (this is an introductory paragraph)
+          complete_list = extract_complete_list(paragraphs, 1)
+          combined = "#{first_paragraph}\n\n#{complete_list}"
+        else
+          # Case 2: For all other cases (including sentences followed by lists)
+          # Extract only the first sentence from the first paragraph
+          combined = apply_first_sentence_logic(first_paragraph)
+        end
         # Remove comments until end of line
         combined = "#{combined}\n"

data/lib/suma/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Suma
-  VERSION = "0.1.18"
+  VERSION = "0.1.20"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: suma
 version: !ruby/object:Gem::Version
-  version: 0.1.18
+  version: 0.1.20
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2025-07-17 00:00:00.000000000 Z
+date: 2025-07-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: expressir