RubyGems - stepmod-utils - Versions diffs - 0.3.2 → 0.3.4 - Mend

stepmod-utils 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/exe/stepmod-extract-terms +46 -226
data/lib/stepmod/utils/concept.rb +11 -5
data/lib/stepmod/utils/converters/def.rb +14 -6
data/lib/stepmod/utils/converters/example.rb +13 -2
data/lib/stepmod/utils/converters/ext_description.rb +3 -1
data/lib/stepmod/utils/converters/figure.rb +20 -0
data/lib/stepmod/utils/converters/module_ref.rb +7 -3
data/lib/stepmod/utils/converters/note.rb +13 -2
data/lib/stepmod/utils/converters/stepmod_ext_description.rb +26 -4
data/lib/stepmod/utils/converters/text.rb +1 -1
data/lib/stepmod/utils/converters/uof.rb +10 -5
data/lib/stepmod/utils/smrl_resource_converter.rb +1 -1
data/lib/stepmod/utils/stepmod_definition_converter.rb +2 -1
data/lib/stepmod/utils/terms_extractor.rb +326 -0
data/lib/stepmod/utils/version.rb +1 -1
data/stepmod-utils.gemspec +1 -1
metadata +14 -12

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: fcb84433f081424b5227cdb8c9c6b7d7342d31089e176f8f333817c4533c721d
-  data.tar.gz: 82fee2a54cc8d818587c2c9b08e955d1328497f05362307f528b968f1a576f61
+  metadata.gz: 887612180cfe004fb244d41e7c553858ed441d5e4c60c8d27d06fb7ec3219cd2
+  data.tar.gz: f7f25caa5652593b6eafcb792206614f2b053e56a0dd05801018e2217533b636
 SHA512:
-  metadata.gz: 4840fc050b58a4eda03c0c7106d7af524c7d3f863d5a40907facf6e6393ddf26d60a00852478df1ccadc2568b20616139251bb5fd4f4fcdb37296a8bfc5a94cb
-  data.tar.gz: f3a7b2d85581e1f5db195bef23b28422a82dd8588b66c66df2147700ccebc6b15c96b3b2b696cfb0435e2a6cdedf02579b542bd1af77efd6c1afd608a61593ca
+  metadata.gz: 41057b14b1c6ab326dff8e402be3a74737e9479212a76577a9ef81738da9f542ab6cbd892723949b004a2a6ff6058525aed5de47b1fbe3859ac139a724028608
+  data.tar.gz: 519d9daa5451c17f60e6e2e764405908bf7e3c9f6b7279dcef9ee2bb554c63a0535b1e63c3aaed178549413a2253e24a3687c86d1ad60c8fa569c65aed756647

data/exe/stepmod-extract-terms CHANGED Viewed

@@ -17,38 +17,16 @@ class Gem::Specification
 end
 require 'bundler/setup'
-require 'stepmod/utils/stepmod_definition_converter'
-require 'stepmod/utils/bibdata'
-require 'stepmod/utils/concept'
-require 'ptools'
-ReverseAdoc.config.unknown_tags = :bypass
-# TODO: we may want a command line option to override this in the future
-ACCEPTED_STAGES = %w(IS DIS FDIS TS)
-general_concepts = []
-resource_concepts = []
-module_concepts = []
-parsed_bibliography = []
-encountered_terms = {}
+require 'stepmod/utils/terms_extractor'
 stepmod_dir = ARGV.first || Dir.pwd
-def log message
-  puts "[stepmod-utils] #{message}"
-end
-def term_special_category(bibdata)
-  case bibdata.part.to_i
-  when 41,42,43,44,45,46,47,51
-    true
-  when [56..112]
-    true
-  else
-    false
-  end
-end
+general_concepts,
+resource_concepts,
+parsed_bibliography,
+part_concepts,
+part_resources,
+part_modules = Stepmod::Utils::TermsExtractor.call(stepmod_dir)
 def part_to_title(bibdata)
   case bibdata.part.to_i
@@ -73,227 +51,69 @@ def part_to_title(bibdata)
   end
 end
-stepmod_path = Pathname.new(stepmod_dir).realpath
-# If we are using the stepmod CVS repository, provide the revision number per file
-has_cvs = File.which("cvs")
-cvs_mode = has_cvs && Dir.exists?(stepmod_path.join('CVS'))
-log "INFO: STEPmod directory set to #{stepmod_dir}."
-if cvs_mode
-  log "INFO: STEPmod directory is a CVS repository and will detect revisions."
-  log "INFO: [CVS] Detecting file revisions can be slow, please be patient!"
-else
-  log "INFO: STEPmod directory is not a CVS repository, skipping revision detection."
+def log message
+  puts "[stepmod-utils] #{message}"
 end
-log "INFO: Detecting paths..."
-files = %w(
-  resource.xml
-  application_protocol.xml
-  business_object_model.xml
-  module.xml
-  ).inject([]) do |acc, t|
-    candidate_paths = Dir["#{stepmod_dir}/**/#{t}"]
-    acc << candidate_paths
-end.flatten.sort.uniq
-max_encountered_refs_indexes = {}
-files.each do |file_path|
-  file_path = Pathname.new(file_path).realpath
-  fpath = file_path.relative_path_from(stepmod_path)
-  log "INFO: Processing XML file #{fpath}"
-  current_document = Nokogiri::XML(File.read(file_path)).root
-  bibdata = nil
-  begin
-    bibdata = Stepmod::Utils::Bibdata.new(document: current_document)
-  rescue
-    log "WARNING: Unknown file #{fpath}, skipped"
-    next
-  end
-  unless ACCEPTED_STAGES.include? bibdata.doctype
-    log "INFO: skipped #{bibdata.docid} as it is not one of (#{ACCEPTED_STAGES.join(", ")})."
-    next
-  end
-  if bibdata.part.to_s.empty?
-    log "FATAL: missing `part` attribute: #{fpath}"
-    log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute."
-    next
-  end
-  revision_string = "\n// CVS: revision not detected"
-  if cvs_mode
-    # Run `cvs status` to find out version
-    log "INFO: Detecting CVS revision..."
-    Dir.chdir(stepmod_path) do
-      status = `cvs status #{fpath}`
-      unless status.empty?
-        working_rev = status.split(/\n/).grep(/Working revision:/).first.match(/revision:\s+(.+)$/)[1]
-        repo_rev = status.split(/\n/).grep(/Repository revision:/).first.match(/revision:\t(.+)\t/)[1]
-        log "INFO: CVS working rev (#{working_rev}), repo rev (#{repo_rev})"
-        revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" +
-          "// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}"
-      end
-    end
-  end
-  # read definitions
-  part_concepts = []
-  current_document.xpath('//definition').each do |definition|
-    index = max_encountered_refs_indexes[bibdata.anchor] || 1
-    term_id = definition['id']
-    unless term_id.nil?
-      if encountered_terms[term_id]
-        log "FATAL: Duplicated term with id: #{term_id}, #{fpath}"
-      end
-      encountered_terms[term_id] = true
-    end
-    # Assume that definition is located in clause 3 of the ISO document
-    # in order. We really don't have a good reference here.
-    ref_clause = "3.#{index}"
-    concept = Stepmod::Utils::Concept.parse(
-      definition,
-      reference_anchor: bibdata.anchor,
-      reference_clause: ref_clause,
-      file_path: fpath + revision_string
-    )
-    next unless concept
-    unless term_special_category(bibdata)
-      # log "INFO: this part is generic"
-      general_concepts << concept
-    else
-      # log "INFO: this part is special"
-      part_concepts << concept
-    end
-    max_encountered_refs_indexes[bibdata.anchor] = index + 1
-    parsed_bibliography << bibdata
-  end
-  part_modules = []
-  current_document.xpath('//arm/uof').each do |uof_node|
-    concept = Stepmod::Utils::Concept.parse(
-      uof_node,
-      reference_anchor: bibdata.anchor,
-      reference_clause: nil,
-      file_path: fpath + revision_string
-    )
-    # puts concept.inspect
-    next unless concept
-    unless term_special_category(bibdata)
-      # log "INFO: this part is generic"
-      module_concepts << concept
-    else
-      # log "INFO: this part is special"
-      part_modules << concept
-    end
-    parsed_bibliography << bibdata
-  end
+part_concepts.each do |(bibdata, current_part_concepts)|
+  fn = "03x-stepmod-#{bibdata.part}.adoc"
+  File.open(fn, 'w') { |file|
+    file.puts("== #{part_to_title(bibdata)}\n\n")
+    file.puts(current_part_concepts.map(&:to_mn_adoc).join("\n"))
+  }
+  log "INFO: written to: #{fn}"
+end
-  part_resources = []
-  # Assumption: every schema is only linked by a single resource_docs document.
-  current_document.xpath('//schema').each do |schema_node|
-    schema_name = schema_node['name']
-    Dir["#{stepmod_path}/*/#{schema_name}/descriptions.xml"].each do |description_xml_path|
-      log "INFO: Processing resources schema #{description_xml_path}"
-      description_document = Nokogiri::XML(File.read(description_xml_path)).root
-      description_document.xpath('//ext_description').each do |ext_description|
+part_resources.each do |(bibdata, current_part_resources)|
+  fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc"
+  File.open(fn, 'w') { |file|
+    file.puts("== #{part_to_title(bibdata)}\n\n")
+    file.puts(current_part_resources.map(&:to_mn_adoc).join("\n"))
+  }
+  log "INFO: written to: #{fn}"
+end
-        concept = Stepmod::Utils::Concept.parse(
-          ext_description,
-          reference_anchor: bibdata.anchor,
-          reference_clause: nil,
-          file_path: fpath + revision_string
-        )
-        next unless concept
-        unless term_special_category(bibdata)
-          # log "INFO: this part is generic"
-          resource_concepts << concept
-        else
-          # log "INFO: this part is special"
-          part_resources << concept
-        end
+part_modules.each do |(bibdata, part_modules_arm, part_modules_mim)|
+  fn = "05x-stepmod-entities-modules-#{bibdata.part}.adoc"
+  File.open(fn, 'w') { |file|
+    file.puts("")
+    unless part_modules_arm.empty?
+      schema_name = part_modules_arm.first.first
+      concepts = part_modules_arm.first.last
-        parsed_bibliography << bibdata
-      end
+      # puts "SCHEMA NAME ARM: #{schema_name}"
+      file.puts("== #{schema_name}\n\n")
+      file.puts(concepts.map(&:to_mn_adoc).join("\n"))
     end
-  end
-  log "INFO: Completed processing XML file #{fpath}"
+    file.puts("")
-  if part_concepts.empty?
-    log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it contains no concepts."
-    next
-  elsif part_concepts.length < 3
-    log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it only has #{part_concepts.length} terms."
+    unless part_modules_mim.empty?
+      schema_name = part_modules_mim.first.first
-    part_concepts.each do |x|
-      general_concepts << x
+      # puts "SCHEMA NAME MIM: #{schema_name}"
+      concepts = part_modules_mim.first.last
+      file.puts("== #{schema_name}\n\n")
+      file.puts(concepts.map(&:to_mn_adoc).join("\n"))
     end
-  else
-    fn = "03x-stepmod-#{bibdata.part}.adoc"
-    File.open(fn, 'w') { |file|
-      file.puts("== #{part_to_title(bibdata)}\n\n")
-      file.puts(part_concepts.map(&:to_mn_adoc).join("\n"))
-    }
-    log "INFO: written to: #{fn}"
-  end
-  unless part_resources.empty?
-    fn = "04x-stepmod-entities-resources-#{bibdata.part}.adoc"
-    File.open(fn, 'w') { |file|
-      file.puts("== #{part_to_title(bibdata)}\n\n")
-      file.puts(part_resources.map(&:to_mn_adoc).join("\n"))
-    }
-    log "INFO: written to: #{fn}"
-  end
-  unless part_modules.empty?
-    fn = "04x-stepmod-entities-modules-#{bibdata.part}.adoc"
-    File.open(fn, 'w') { |file|
-      file.puts("== #{part_to_title(bibdata)}\n\n")
-      file.puts(part_modules.map(&:to_mn_adoc).join("\n"))
-    }
-    log "INFO: written to: #{fn}"
-  end
+  }
+  log "INFO: written to: #{fn}"
 end
 File.open('031-stepmod-general.adoc', 'w') { |file|
   file.puts(general_concepts.map(&:to_mn_adoc).join("\n"))
 }
+log "INFO: written to: 031-stepmod-general.adoc"
 File.open('041-stepmod-entities-resources.adoc', 'w') { |file|
   file.puts(resource_concepts.map(&:to_mn_adoc).join("\n"))
 }
-File.open('051-stepmod-entities-modules.adoc', 'w') { |file|
-  file.puts(module_concepts.map(&:to_mn_adoc).join("\n"))
-}
-log "INFO: written to: 031-stepmod-general.adoc"
+log "INFO: written to: 041-stepmod-entities-resources.adoc"
 File.open('991-generated-bibliography.adoc', 'w') { |file|
   file.puts(parsed_bibliography.map(&:to_mn_adoc).sort.uniq.join("\n"))
 }
 log "INFO: written to: 991-generated-bibliography.adoc"

data/lib/stepmod/utils/concept.rb CHANGED Viewed

@@ -21,14 +21,20 @@ module Stepmod
       end
       def self.parse(definition_xml, reference_anchor:, reference_clause:, file_path:)
-        converted_definition = Stepmod::Utils::StepmodDefinitionConverter
-                                  .convert(definition_xml, { reference_anchor: reference_anchor })
+        converted_definition = Stepmod::Utils::StepmodDefinitionConverter.convert(
+          definition_xml,
+          {
+            # We don't want examples and notes
+            no_notes_examples: true,
+            reference_anchor: reference_anchor
+          }
+        )
         return nil if converted_definition.nil? || converted_definition.strip.empty?
-        if definition_xml.name == 'uof' || definition_xml.name == 'ext_description'
+        if definition_xml.name == 'ext_description'
           converted_definition = <<~TEXT
-            #{converted_definition.split("\n")[0..3].join("\n")}
+            #{converted_definition}
             NOTE: This term is incompletely defined in this document.
             Reference <<#{reference_anchor}>> for the complete definition.
@@ -44,7 +50,7 @@ module Stepmod
       def to_mn_adoc
         <<~TEXT
-          // STEPmod path: #{file_path}
+          // STEPmod path:#{!file_path.empty? ? " #{file_path}" : ""}
           #{converted_definition}
           [.source]

data/lib/stepmod/utils/converters/def.rb CHANGED Viewed

@@ -12,13 +12,15 @@ module Stepmod
         def treat_children(node, state)
           converted = node.children.each_with_object({}) do |child, res|
-                        content = treat(child, state).strip
-                        next if content.empty?
-                        res[child] = content
-                      end
+            content = treat(child, state)
+            next if content.strip.empty?
+            res[child] = content
+          end
           previous = nil
           result = ''
           converted.each.with_index do |(child, content), i|
             if block_tag?(child, previous)
               result += "\n\n"
@@ -30,6 +32,11 @@ module Stepmod
             result += content
             previous = child
           end
+          # Remove double newlines for every line
+          result = result.gsub(/\n\n+/, "\n\n")
+          result = result.squeeze(' ')
           result.strip
         end
@@ -50,8 +57,9 @@ module Stepmod
           return unless can_transform_to_alt?(first_child_tag)
           result = Stepmod::Utils::Converters::Synonym
-                    .new
-                    .convert(first_child_tag)
+            .new
+            .convert(first_child_tag)
           first_child_tag.remove
           "#{result}\n\n"
         end

data/lib/stepmod/utils/converters/example.rb CHANGED Viewed

@@ -5,10 +5,21 @@ module Stepmod
     module Converters
       class Example < ReverseAdoc::Converters::Base
         def convert(node, state = {})
-          "\n\n[example]\n====\n#{treat_children(node, state).strip}\n====\n\n"
+          # If we want to skip this node
+          return '' if state[:no_notes_examples]
+          <<~TEMPLATE
+          [example]
+          ====
+          #{treat_children(node, state).strip}
+          ====
+          TEMPLATE
         end
       end
       ReverseAdoc::Converters.register :example, Example.new
     end
   end
-end
+end

data/lib/stepmod/utils/converters/ext_description.rb CHANGED Viewed

@@ -4,9 +4,11 @@ module Stepmod
       class ExtDescription < ReverseAdoc::Converters::Base
         def convert(node, state = {})
           state = state.merge(schema_name: node['linkend'])
+          child_text = treat_children(node, state).strip
           <<~TEMPLATE
             (*"#{node['linkend']}"
-            #{treat_children(node, state).strip}
+            #{child_text}
             *)
           TEMPLATE
         end

data/lib/stepmod/utils/converters/figure.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+require 'reverse_adoc/converters/figure'
+module Stepmod
+  module Utils
+    module Converters
+      class Figure < ReverseAdoc::Converters::Figure
+        def convert(node, state = {})
+          # If we want to skip this node
+          return '' if state[:no_notes_examples]
+          super
+        end
+      end
+      # This replaces the converter
+      ReverseAdoc::Converters.register :figure, Figure.new
+    end
+  end
+end

data/lib/stepmod/utils/converters/module_ref.rb CHANGED Viewed

@@ -13,10 +13,14 @@ module Stepmod
           # We take the text value of the element and convert to this:
           # term:[individual products]
-          if node['linkend'].split(':').length > 1
-            ref = node.text
+          ref = node.text.strip
+          if !ref.empty?
+            " term:[#{normalized_ref(ref)}] "
+          elsif
+            ref = node['linkend'].split(':').first
+            " *#{ref}*"
           end
-          " term:[#{normalized_ref(ref)}] "
         end
         private

data/lib/stepmod/utils/converters/note.rb CHANGED Viewed

@@ -5,10 +5,21 @@ module Stepmod
     module Converters
       class Note < ReverseAdoc::Converters::Base
         def convert(node, state = {})
-          "\n\n[NOTE]\n--\n#{treat_children(node, state).strip}\n--\n\n"
+          # If we want to skip this node
+          return '' if state[:no_notes_examples]
+          <<~TEMPLATE
+          [NOTE]
+          --
+          #{treat_children(node, state).strip}
+          --
+          TEMPLATE
         end
       end
       ReverseAdoc::Converters.register :note, Note.new
     end
   end
-end
+end

data/lib/stepmod/utils/converters/stepmod_ext_description.rb CHANGED Viewed

@@ -6,13 +6,35 @@ module Stepmod
           state = state.merge(schema_name: node['linkend'])
           linkend = node['linkend'].split('.')
-          # We ignore all the WHERE and IP rules because those are not terms
-          return nil if linkend.last =~ /^wr/
+          # We only want ENTITY entries, not their attributes
+          # https://github.com/metanorma/iso-10303-2/issues/36#issuecomment-841300092
+          return nil if linkend.length != 2
+          child_text = treat_children(node, state).strip
+          return nil if child_text.empty?
+          # Only taking the first paragraph of the definition
+          child_text = child_text.split("\n").first
+          # # Only taking the first sentence
+          # if child_text.contains?(".")
+          #   child_text = child_text.split(".").first
+          # end
+          domain =  case linkend.first
+                    when /_mim$/, /_arm$/
+                      "STEP module"
+                    # when /_schema$/
+                    else
+                      "STEP resource"
+                    end
           <<~TEMPLATE
-            === #{node['linkend'].split('.').last}
+            === #{linkend.last}
+            #{domain ? "domain:[" + domain + "]" : ""}
-            <STEP resource> #{treat_children(node, state).strip}
+            #{child_text}
           TEMPLATE
         end
       end

data/lib/stepmod/utils/converters/text.rb CHANGED Viewed

@@ -30,8 +30,8 @@ module Stepmod
         def treat_text(node)
           text = node.text
           text = preserve_nbsp(text)
-          # text = remove_border_newlines(text)
           text = remove_inner_newlines(text)
+          text = remove_border_newlines(text)
           text = preserve_keychars_within_backticks(text)
           text = preserve_tags(text)

data/lib/stepmod/utils/converters/uof.rb CHANGED Viewed

@@ -7,15 +7,20 @@ module Stepmod
     module Converters
       class Uof < ReverseAdoc::Converters::Base
         def convert(node, state = {})
-          <<~TEXT
-          === #{node['name'].strip}
-          <STEP module> #{treat_children(node, state).strip}
-          TEXT
+          # WARNING: <uof> tag content is deprecated
+          return ""
+          #
+          # <<~TEXT
+          # === #{node['name'].strip}
+          # <STEP module> #{treat_children(node, state).strip}
+          # TEXT
         end
       end
       ReverseAdoc::Converters.register :uof, Uof.new
     end
   end
-end
+end

data/lib/stepmod/utils/smrl_resource_converter.rb CHANGED Viewed

@@ -64,4 +64,4 @@ module Stepmod
       end
     end
   end
-end
+end

data/lib/stepmod/utils/stepmod_definition_converter.rb CHANGED Viewed

@@ -16,6 +16,7 @@ require 'stepmod/utils/converters/stepmod_ext_description'
 require 'stepmod/utils/converters/term'
 require 'stepmod/utils/converters/synonym'
 require 'stepmod/utils/converters/uof'
+require 'stepmod/utils/converters/figure'
 require 'reverse_adoc/converters/a'
 require 'reverse_adoc/converters/blockquote'
@@ -52,7 +53,7 @@ module Stepmod
         return '' unless root
         ReverseAdoc.config.with(options) do
-          result = ReverseAdoc::Converters.lookup(root.name).convert(root)
+          result = ReverseAdoc::Converters.lookup(root.name).convert(root, options)
           return '' unless result
           ReverseAdoc.cleaner.tidy(result.dup)
         end

data/lib/stepmod/utils/terms_extractor.rb ADDED Viewed

@@ -0,0 +1,326 @@
+require 'stepmod/utils/stepmod_definition_converter'
+require 'stepmod/utils/bibdata'
+require 'stepmod/utils/concept'
+ReverseAdoc.config.unknown_tags = :bypass
+module Stepmod
+  module Utils
+    class TermsExtractor
+      # TODO: we may want a command line option to override this in the future
+      ACCEPTED_STAGES = %w(IS DIS FDIS TS)
+      attr_reader :stepmod_path,
+        :stepmod_dir,
+        :general_concepts,
+        :resource_concepts,
+        :parsed_bibliography,
+        :encountered_terms,
+        :cvs_mode,
+        :part_concepts,
+        :part_resources,
+        :part_modules,
+        :stdout
+      def self.call(stepmod_dir, stdout = STDOUT)
+        new(stepmod_dir, stdout).call
+      end
+      def initialize(stepmod_dir, stdout)
+        @stdout = stdout
+        @stepmod_dir = stepmod_dir
+        @stepmod_path = Pathname.new(stepmod_dir).realpath
+        @general_concepts = []
+        @resource_concepts = []
+        @parsed_bibliography = []
+        @part_concepts = []
+        @part_resources = []
+        @part_modules = []
+        @encountered_terms = {}
+      end
+      def log message
+        stdout.puts "[stepmod-utils] #{message}"
+      end
+      def term_special_category(bibdata)
+        case bibdata.part.to_i
+        when 41,42,43,44,45,46,47,51
+          true
+        when [56..112]
+          true
+        else
+          false
+        end
+      end
+      def call
+        # If we are using the stepmod CVS repository, provide the revision number per file
+        @cvs_mode = if Dir.exists?(stepmod_path.join('CVS'))
+          require 'ptools'
+          # ptools provides File.which
+          File.which("cvs")
+        end
+        log "INFO: STEPmod directory set to #{stepmod_dir}."
+        if cvs_mode
+          log "INFO: STEPmod directory is a CVS repository and will detect revisions."
+          log "INFO: [CVS] Detecting file revisions can be slow, please be patient!"
+        else
+          log "INFO: STEPmod directory is not a CVS repository, skipping revision detection."
+        end
+        log "INFO: Detecting paths..."
+        repo_index = Nokogiri::XML(File.read(stepmod_path.join('repository_index.xml'))).root
+        files = []
+        # add module paths
+        repo_index.xpath('//module').each do |x|
+          path = Pathname.new("#{stepmod_dir}/modules/#{x['name']}/module.xml")
+          files << path if File.exists? path
+        end
+        # add resource_docs paths
+        repo_index.xpath('//resource_doc').each do |x|
+          path = Pathname.new("#{stepmod_dir}/resource_docs/#{x['name']}/resource.xml")
+          files << path if File.exists? path
+        end
+        # add business_object_models paths
+        repo_index.xpath('//business_object_model').each do |x|
+          path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/business_object_model.xml")
+          files << path if File.exists? path
+        end
+        # add application_protocols paths
+        repo_index.xpath('//application_protocol').each do |x|
+          path = Pathname.new("#{stepmod_dir}/application_protocols/#{x['name']}/application_protocol.xml")
+          files << path if File.exists? path
+        end
+        files.sort!.uniq!
+        process_term_files(files)
+        [
+          general_concepts,
+          resource_concepts,
+          parsed_bibliography,
+          part_concepts,
+          part_resources,
+          part_modules
+        ]
+      end
+      private
+      def process_term_files(files)
+        parsed_schema_names = {}
+        files.each do |file_path|
+          file_path = file_path.realpath
+          fpath = file_path.relative_path_from(stepmod_path)
+          log "INFO: Processing XML file #{fpath}"
+          current_document = Nokogiri::XML(File.read(file_path)).root
+          bibdata = nil
+          begin
+            bibdata = Stepmod::Utils::Bibdata.new(document: current_document)
+          rescue
+            log "WARNING: Unknown file #{fpath}, skipped"
+            next
+          end
+          unless ACCEPTED_STAGES.include? bibdata.doctype
+            log "INFO: skipped #{bibdata.docid} as it is not one of (#{ACCEPTED_STAGES.join(", ")})."
+            next
+          end
+          if bibdata.part.to_s.empty?
+            log "FATAL: missing `part` attribute: #{fpath}"
+            log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute."
+            next
+          end
+          revision_string = "\n// CVS: revision not detected"
+          if cvs_mode
+            # Run `cvs status` to find out version
+            log "INFO: Detecting CVS revision..."
+            Dir.chdir(stepmod_path) do
+              status = `cvs status #{fpath}`
+              unless status.empty?
+                working_rev = status.split(/\n/).grep(/Working revision:/).first.match(/revision:\s+(.+)$/)[1]
+                repo_rev = status.split(/\n/).grep(/Repository revision:/).first.match(/revision:\t(.+)\t/)[1]
+                log "INFO: CVS working rev (#{working_rev}), repo rev (#{repo_rev})"
+                revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" +
+                  "// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}"
+              end
+            end
+          end
+          # read definitions
+          current_part_concepts = []
+          definition_index = 0
+          current_document.xpath('//definition').each do |definition|
+            definition_index += 1
+            term_id = definition['id']
+            unless term_id.nil?
+              if encountered_terms[term_id]
+                log "FATAL: Duplicated term with id: #{term_id}, #{fpath}"
+              end
+              encountered_terms[term_id] = true
+            end
+            # Assume that definition is located in clause 3 of the ISO document
+            # in order. We really don't have a good reference here.
+            ref_clause = "3.#{definition_index}"
+            concept = Stepmod::Utils::Concept.parse(
+              definition,
+              reference_anchor: bibdata.anchor,
+              reference_clause: ref_clause,
+              file_path: fpath + revision_string
+            )
+            next unless concept
+            unless term_special_category(bibdata)
+              # log "INFO: this part is generic"
+              general_concepts << concept
+            else
+              # log "INFO: this part is special"
+              current_part_concepts << concept
+            end
+            parsed_bibliography << bibdata
+          end
+          current_part_resources = []
+          current_part_modules_arm = {}
+          current_part_modules_mim = {}
+          log "INFO: FILE PATH IS #{file_path}"
+          case file_path.to_s
+          when /resource.xml$/
+            log "INFO: Processing resource.xml for #{file_path}"
+            # Assumption: every schema is only linked by a single resource_docs document.
+            current_document.xpath('//schema').each do |schema_node|
+              schema_name = schema_node['name']
+              if parsed_schema_names[schema_name]
+                log "ERROR: We have encountered this schema before: #{schema_name} from path #{parsed_schema_names[schema_name]}, now at #{file_path}"
+                next
+              else
+                parsed_schema_names[schema_name] = file_path
+              end
+              Dir["#{stepmod_path}/resources/#{schema_name}/descriptions.xml"].each do |description_xml_path|
+                log "INFO: Processing resources schema #{description_xml_path}"
+                description_document = Nokogiri::XML(File.read(description_xml_path)).root
+                description_document.xpath('//ext_description').each do |ext_description|
+                  # log "INFO: Processing linkend[#{ext_description['linkend']}]"
+                  concept = Stepmod::Utils::Concept.parse(
+                    ext_description,
+                    reference_anchor: bibdata.anchor,
+                    reference_clause: nil,
+                    file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
+                  )
+                  next unless concept
+                  unless term_special_category(bibdata)
+                    # log "INFO: this part is generic"
+                    resource_concepts << concept
+                  else
+                    # log "INFO: this part is special"
+                    current_part_resources << concept
+                  end
+                  parsed_bibliography << bibdata
+                end
+              end
+            end
+          when /module.xml$/
+            log "INFO: Processing module.xml for #{file_path}"
+            # Assumption: every schema is only linked by a single module document.
+            # puts current_document.xpath('//module').length
+            schema_name = current_document.xpath('//module').first['name']
+            if parsed_schema_names[schema_name]
+              log "ERROR: We have encountered this schema before: #{schema_name} from path #{parsed_schema_names[schema_name]}, now at #{file_path}"
+              next
+            else
+              parsed_schema_names[schema_name] = file_path
+            end
+            description_xml_path = "#{stepmod_path}/modules/#{schema_name}/arm_descriptions.xml"
+            log "INFO: Processing modules schema #{description_xml_path}"
+            if File.exists?(description_xml_path)
+              description_document = Nokogiri::XML(File.read(description_xml_path)).root
+              description_document.xpath('//ext_description').each do |ext_description|
+                linkend_schema = ext_description['linkend'].split('.').first
+                concept = Stepmod::Utils::Concept.parse(
+                  ext_description,
+                  reference_anchor: bibdata.anchor,
+                  reference_clause: nil,
+                  file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
+                )
+                next unless concept
+                current_part_modules_arm[linkend_schema] ||= []
+                current_part_modules_arm[linkend_schema] << concept
+                # puts part_modules_arm.inspect
+                parsed_bibliography << bibdata
+              end
+            end
+            description_xml_path = "#{stepmod_path}/modules/#{schema_name}/mim_descriptions.xml"
+            log "INFO: Processing modules schema #{description_xml_path}"
+            if File.exists?(description_xml_path)
+              description_document = Nokogiri::XML(File.read(description_xml_path)).root
+              description_document.xpath('//ext_description').each do |ext_description|
+                linkend_schema = ext_description['linkend'].split('.').first
+                concept = Stepmod::Utils::Concept.parse(
+                  ext_description,
+                  reference_anchor: bibdata.anchor,
+                  reference_clause: nil,
+                  file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
+                )
+                next unless concept
+                current_part_modules_mim[linkend_schema] ||= []
+                current_part_modules_mim[linkend_schema] << concept
+                parsed_bibliography << bibdata
+              end
+            end
+          end
+          log "INFO: Completed processing XML file #{fpath}"
+          if current_part_concepts.empty?
+            log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it contains no concepts."
+          elsif current_part_concepts.length < 3
+            log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it only has #{current_part_concepts.length} terms."
+            current_part_concepts.each do |x|
+              general_concepts << x
+            end
+          else
+            part_concepts << [bibdata, current_part_concepts] unless current_part_concepts.empty?
+          end
+          part_resources << [bibdata, current_part_resources] unless current_part_resources.empty?
+          part_modules << [bibdata, current_part_modules_arm, current_part_modules_mim] if current_part_modules_arm.size + current_part_modules_mim.size > 0
+        end
+      end
+    end
+  end
+end

data/lib/stepmod/utils/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Stepmod
   module Utils
-    VERSION = "0.3.2"
+    VERSION = "0.3.4"
   end
 end

data/stepmod-utils.gemspec CHANGED Viewed

@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
   spec.add_runtime_dependency "thor", ">= 0.20.3"
   spec.add_runtime_dependency "reverse_adoc", ">= 0.2.9"
-  spec.add_runtime_dependency "ptools", '~> 1.3'
   spec.add_runtime_dependency "concurrent-ruby"
+  spec.add_runtime_dependency "ptools"
   spec.add_development_dependency "byebug", "~> 11.1"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: stepmod-utils
 version: !ruby/object:Gem::Version
-  version: 0.3.2
+  version: 0.3.4
 platform: ruby
 authors:
 - Ribose Inc.
-autorequire:
+autorequire:
 bindir: exe
 cert_chain: []
-date: 2021-05-14 00:00:00.000000000 Z
+date: 2021-05-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: thor
@@ -39,21 +39,21 @@ dependencies:
       - !ruby/object:Gem::Version
         version: 0.2.9
 - !ruby/object:Gem::Dependency
-  name: ptools
+  name: concurrent-ruby
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.3'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.3'
+        version: '0'
 - !ruby/object:Gem::Dependency
-  name: concurrent-ruby
+  name: ptools
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
@@ -139,6 +139,7 @@ files:
 - lib/stepmod/utils/converters/express_ref_express_description.rb
 - lib/stepmod/utils/converters/ext_description.rb
 - lib/stepmod/utils/converters/ext_descriptions.rb
+- lib/stepmod/utils/converters/figure.rb
 - lib/stepmod/utils/converters/fund_cons.rb
 - lib/stepmod/utils/converters/head.rb
 - lib/stepmod/utils/converters/hr.rb
@@ -168,6 +169,7 @@ files:
 - lib/stepmod/utils/smrl_resource_converter.rb
 - lib/stepmod/utils/stepmod_definition_converter.rb
 - lib/stepmod/utils/stepmod_file_annotator.rb
+- lib/stepmod/utils/terms_extractor.rb
 - lib/stepmod/utils/version.rb
 - migrating_from_cvs.adoc
 - resource_example.xml
@@ -176,7 +178,7 @@ homepage: https://github.com/metanorma/stepmod-utils
 licenses:
 - BSD-2-Clause
 metadata: {}
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -191,8 +193,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.3
-signing_key:
+rubygems_version: 3.0.3.1
+signing_key:
 specification_version: 4
 summary: Stepmod-utils is a toolkit that works on STEPmod data.
 test_files: []