RubyGems - tc211-termbase - Versions diffs - 0.1.1 → 0.1.2 - Mend

tc211-termbase 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +5 -5
data/.gitignore +3 -0
data/Gemfile.lock +59 -7
data/README.adoc +10 -4
data/db/iso/iso_1087_1_2000,_3.4.16,_modified_/342/200/224_the_note_1_to_entry_has_been_added..xml +72 -0
data/db/iso/iso_1087_1_2000,_3.4.9.xml +72 -0
data/db/iso/iso_19101_1_2014,_4.1.1.xml +62 -0
data/db/iso/iso_19101_1_2014,_4.1.2.xml +62 -0
data/db/iso/iso_19105.xml +96 -0
data/db/iso/iso_19105_2000.xml +55 -0
data/db/iso/iso_19116.xml +97 -0
data/db/iso/iso_19116_2004.xml +56 -0
data/db/iso/iso_19117_2012,_4.1.xml +60 -0
data/db/iso/iso_3534_1.xml +112 -0
data/db/iso/iso_3534_1_1993.xml +71 -0
data/db/iso/iso_iec_19501.xml +105 -0
data/db/iso/iso_iec_19501_2005_(adapted_from.xml +60 -0
data/db/iso/iso_iec_2382_17_1999.xml +77 -0
data/db/version +1 -0
data/lib/tc211/termbase/relaton_db.rb +21 -0
data/lib/tc211/termbase/terms_section.rb +149 -126
data/lib/tc211/termbase/version.rb +1 -1
data/tc211-termbase.gemspec +4 -1
data/vcr_cassettes/terms.yml +491 -0
metadata +65 -5

data/db/version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.4.1

data/lib/tc211/termbase/relaton_db.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require "singleton"
+require "relaton"
+module Tc211
+  module Termbase
+    # Relaton cach singleton.
+    class RelatonDb
+      include Singleton
+      def initialize
+        @db = Relaton::Db.new "db", nil
+      end
+      # @param code [String] reference
+      # @return [RelatonIso::IsoBibliongraphicItem]
+      def fetch(code)
+        @db.fetch code
+      end
+    end
+  end
+end

data/lib/tc211/termbase/terms_section.rb CHANGED Viewed

@@ -1,151 +1,174 @@
 require_relative "sheet_section"
 require_relative "term"
+require_relative "relaton_db"
 module Tc211::Termbase
-class TermsSection < SheetSection
-  attr_accessor :structure
-  attr_accessor :header_row
-  TERM_HEADER_ROW_MATCH = {
-    "A" => ["ISO 19135 Field\nRE_RegisterItem.itemIdentifier"],
-    "B" => ["ISO 19135 Field\nRE_RegisterItem.name"],
-    "C" => ["ISO 19135 Field\nRE_RegisterItem.\nalternativeExpression"],
-    "D" => ["Country_Code"],
-    # ... We don't need to match all the cells
-  }
-  TERM_BODY_COLUMN_MAP = {
-    "Term_ID" => "id",
-    "Term" => "term",
-    "Term .OPERATING LANGUAGE." => "term",
-    # In the English sheet, column is named "Term Abbreviation"
-    "Term Abbreviation" => "abbrev",
-    # In other sheets, column named "Term_Abbreviation"
-    "Term_Abbreviation .OPERATING LANGUAGE." => "abbrev",
-    "Country code" => "country-code",
-    "Definition" => "definition",
-    "Term .OPERATING LANGUAGE - ALTERNATIVE CHARACTER SET." => "alt",
-    "Term in English" => nil,
-    "Entry Status" => "entry-status",
-    ## Must be one of 'notValid' 'valid' 'superseded' 'retired'
-    "Term Clasification" => "classification",
-    ## Must be one of the following 'preferred' 'admitted' 'deprecated'
-    "Review Indicator" => "review-indicator",
-    ## Must be one of the following <empty field> 'Under Review in Source Document'",
-    "Authoritative Source" => "authoritative-source",
-    "Similarity to Authoritative Source" => "authoritative-source-similarity",
-    ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
-    "Lineage Source" => "lineage-source",
-    "Similarity to Lineage Source" => "lineage-source-similarity",
-    ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
-    "Term Synonyms" => "synonyms",
-    "Date Accepted" => "date-accepted", # yyyy-mm-dd,
-    "Date Amended" => "date-amended",   # yyyy-mm-dd,
-    "Review Date" => "review-date",     # yyyy-mm-dd,
-    "Review Status" => "review-status", ## Must be one of 'pending' 'tentative' 'final'",
-    "Review Type" => "review-type",     ## Must be one of 'supersession', 'retirement'",
-    "Review Decision" => "review-decision", ## Must be one of 'withdrawn', 'accepted' 'notAccepted'",
-    "Review Decision Date" => "review-decision-date", # yyyy-mm-dd
-    "Review Decision Event" => "review-decision-event",
-    "Review Decision Notes" => "review-decision-notes",
-    "Example_1" => "example-1",
-    "Note_1" => "note-1",
-    "Example_2" => "example-2",
-    "Note_2" => "note-2",
-    "Example_3" => "example-3",
-    "Note_3" => "note-3",
-    "Example_4" => "example-4",
-    "Note_4" => "note-4",
-    "Example_5" => "example-5",
-    "Note_5" => "note-5",
-    "Example_6" => "example-6",
-    "Note_6" => "note-6",
-    "Example_7" => "example-7",
-    "Note_7" => "note-7",
-    "Example_8" => "example-8",
-    "Note_8" => "note-8",
-    "Glossary Release" => "release"
-    ## Must be one of the following codes 'release1' = 1 'release1_retired' = -1 'release2' = 2 'release2_retired' = -2 etc "
-  }
-  def initialize(rows, options={})
-    super
-    raise StandardError.new("Does not match TermsSection header!") unless self.class.match_header(@rows[0])
-    @mapping_rows = @rows[0..1]
-    @header_row = @rows[2]
-    @body_rows = @rows[3..-1]
-    @language_code = options.delete(:language_code)
-    self
-  end
+  class TermsSection < SheetSection
+    attr_accessor :structure
+    attr_accessor :header_row
+    TERM_HEADER_ROW_MATCH = {
+      "A" => ["ISO 19135 Field\nRE_RegisterItem.itemIdentifier"],
+      "B" => ["ISO 19135 Field\nRE_RegisterItem.name"],
+      "C" => ["ISO 19135 Field\nRE_RegisterItem.\nalternativeExpression"],
+      "D" => ["Country_Code"],
+      # ... We don't need to match all the cells
+    }
+    TERM_BODY_COLUMN_MAP = {
+      "Term_ID" => "id",
+      "Term" => "term",
+      "Term .OPERATING LANGUAGE." => "term",
+      # In the English sheet, column is named "Term Abbreviation"
+      "Term Abbreviation" => "abbrev",
+      # In other sheets, column named "Term_Abbreviation"
+      "Term_Abbreviation .OPERATING LANGUAGE." => "abbrev",
+      "Country code" => "country-code",
+      "Definition" => "definition",
+      "Term .OPERATING LANGUAGE - ALTERNATIVE CHARACTER SET." => "alt",
+      "Term in English" => nil,
+      "Entry Status" => "entry-status",
+      ## Must be one of 'notValid' 'valid' 'superseded' 'retired'
+      "Term Clasification" => "classification",
+      ## Must be one of the following 'preferred' 'admitted' 'deprecated'
+      "Review Indicator" => "review-indicator",
+      ## Must be one of the following <empty field> 'Under Review in Source Document'",
+      "Authoritative Source" => "authoritative-source",
+      "Similarity to Authoritative Source" => "authoritative-source-similarity",
+      ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
+      "Lineage Source" => "lineage-source",
+      "Similarity to Lineage Source" => "lineage-source-similarity",
+      ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
+      "Term Synonyms" => "synonyms",
+      "Date Accepted" => "date-accepted", # yyyy-mm-dd,
+      "Date Amended" => "date-amended",   # yyyy-mm-dd,
+      "Review Date" => "review-date",     # yyyy-mm-dd,
+      "Review Status" => "review-status", ## Must be one of 'pending' 'tentative' 'final'",
+      "Review Type" => "review-type",     ## Must be one of 'supersession', 'retirement'",
+      "Review Decision" => "review-decision", ## Must be one of 'withdrawn', 'accepted' 'notAccepted'",
+      "Review Decision Date" => "review-decision-date", # yyyy-mm-dd
+      "Review Decision Event" => "review-decision-event",
+      "Review Decision Notes" => "review-decision-notes",
+      "Example_1" => "example-1",
+      "Note_1" => "note-1",
+      "Example_2" => "example-2",
+      "Note_2" => "note-2",
+      "Example_3" => "example-3",
+      "Note_3" => "note-3",
+      "Example_4" => "example-4",
+      "Note_4" => "note-4",
+      "Example_5" => "example-5",
+      "Note_5" => "note-5",
+      "Example_6" => "example-6",
+      "Note_6" => "note-6",
+      "Example_7" => "example-7",
+      "Note_7" => "note-7",
+      "Example_8" => "example-8",
+      "Note_8" => "note-8",
+      "Glossary Release" => "release"
+      ## Must be one of the following codes 'release1' = 1 'release1_retired' = -1 'release2' = 2 'release2_retired' = -2 etc "
+    }
+    def initialize(rows, options={})
+      super
+      raise StandardError.new("Does not match TermsSection header!") unless self.class.match_header(@rows[0])
+      @mapping_rows = @rows[0..1]
+      @header_row = @rows[2]
+      @body_rows = @rows[3..-1]
+      @language_code = options.delete(:language_code)
+      self
+    end
+    def structure
+      @structure ||= @header_row.inject({}) do |acc, (key, value)|
+        # puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
-  def structure
-    @structure ||= @header_row.inject({}) do |acc, (key, value)|
-      # puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
+        # convert whitespace to a single space
+        cleaned_value = value.gsub(/\s+/, ' ')
-      # convert whitespace to a single space
-      cleaned_value = value.gsub(/\s+/, ' ')
+        matches = TERM_BODY_COLUMN_MAP.map do |key, value|
+          # puts "key #{key}, value #{value}"
+          if cleaned_value[Regexp.new("^#{key}")]
+            [key, value]
+          end
+        end.compact
-      matches = TERM_BODY_COLUMN_MAP.map do |key, value|
-        # puts "key #{key}, value #{value}"
-        if cleaned_value[Regexp.new("^#{key}")]
-          [key, value]
+        discard, longest_match_key = matches.max_by do |(a, b)|
+          a.length
         end
-      end.compact
-      discard, longest_match_key = matches.max_by do |(a, b)|
-        a.length
-      end
+        # Here we need to skip "Term in English"
+        if key && longest_match_key
+          acc.merge!({ key => longest_match_key })
+        else
+          acc
+        end
-      # Here we need to skip "Term in English"
-      if key && longest_match_key
-        acc.merge!({ key => longest_match_key })
-      else
-        acc
       end
     end
-  end
-  def self.match_header(row)
-    # puts "row #{row}"
-    row.inject(true) do |acc, (key, value)|
-      # puts "#{key}, #{value}"
-      if TERM_HEADER_ROW_MATCH[key]
-        acc && TERM_HEADER_ROW_MATCH[key].include?(value)
-      else
-        acc
+    def self.match_header(row)
+      # puts "row #{row}"
+      row.inject(true) do |acc, (key, value)|
+        # puts "#{key}, #{value}"
+        if TERM_HEADER_ROW_MATCH[key]
+          acc && TERM_HEADER_ROW_MATCH[key].include?(value)
+        else
+          acc
+        end
       end
     end
-  end
-  def parse_row(row)
-    return nil if row.empty?
-    attributes = {}
+    def parse_row(row)
+      return nil if row.empty?
-    structure.each_pair do |key, value|
-      # puts "#{key}, #{value}, #{row[key]}"
-      attribute_key = value
-      attribute_value = row[key]
-      next if attribute_value.nil?
-      attributes[attribute_key] = attribute_value
-    end
+      attributes = {}
-    attributes
-  end
+      structure.each_pair do |key, value|
+        # puts "#{key}, #{value}, #{row[key]}"
+        attribute_key = value
+        next if row[key].nil?
+        attribute_value = fetch_attribute row[key], attribute_key
+        attributes[attribute_key] = attribute_value
+      end
-  def terms
-    @terms ||= @body_rows.map do |row|
-      Term.new(parse_row(row).merge("language_code" => @language_code))
+      attributes
     end
-  end
-  def to_hash
-    {
-      "terms" => terms.map(&:to_hash)
-    }
-  end
+    def terms
+      @terms ||= @body_rows.map do |row|
+        Term.new(parse_row(row).merge("language_code" => @language_code))
+      end
+    end
-end
+    def to_hash
+      {
+        "terms" => terms.map(&:to_hash)
+      }
+    end
+    private
+    # @param value [String]
+    # @param key [String]
+    # @return [Hash]
+    def fetch_attribute(value, key)
+      case key
+      when "authoritative-source"
+        begin
+          src = { "ref" => value }
+          item = RelatonDb.instance.fetch value
+          src["link"] = item.url if item
+          src
+        rescue RelatonBib::RequestError => e
+          warn e.message
+          src
+        end
+      else
+        value
+      end
+    end
+  end
 end

data/lib/tc211/termbase/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Tc211
   module Termbase
-    VERSION = "0.1.1"
+    VERSION = "0.1.2"
   end
 end

data/tc211-termbase.gemspec CHANGED Viewed

@@ -24,8 +24,11 @@ Gem::Specification.new do |spec|
   spec.add_runtime_dependency "iso-639"
   spec.add_runtime_dependency "creek"
+  spec.add_runtime_dependency "relaton", "~>0.4.0"
-  spec.add_development_dependency "bundler", "~> 1.17"
+  spec.add_development_dependency "bundler", "~> 2.0.1"
+  spec.add_development_dependency "debase"
   spec.add_development_dependency "rake", "~> 10.0"
   spec.add_development_dependency "rspec", "~> 3.0"
+  spec.add_development_dependency "ruby-debug-ide"
 end