RubyGems - tc211-termbase - Versions diffs - 0.1.13 → 0.2.3 - Mend

tc211-termbase 0.1.13 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/.github/workflows/rake.yml +13 -0
data/.github/workflows/release.yml +23 -0
data/.gitignore +3 -1
data/exe/tc211-termbase-xlsx2yaml +2 -5
data/lib/tc211/termbase/concept.rb +88 -39
data/lib/tc211/termbase/concept_collection.rb +29 -21
data/lib/tc211/termbase/information_sheet.rb +11 -15
data/lib/tc211/termbase/metadata_section.rb +108 -112
data/lib/tc211/termbase/sheet_section.rb +18 -19
data/lib/tc211/termbase/term.rb +348 -216
data/lib/tc211/termbase/term_workbook.rb +35 -36
data/lib/tc211/termbase/terminology_sheet.rb +84 -74
data/lib/tc211/termbase/terms_section.rb +9 -8
data/lib/tc211/termbase/version.rb +1 -1
data/lib/tc211/termbase.rb +1 -0
data/tc211-termbase.gemspec +6 -6
metadata +16 -29
data/Gemfile.lock +0 -123

data/lib/tc211/termbase/term.rb CHANGED Viewed

@@ -1,245 +1,377 @@
 module Tc211::Termbase
+  class Term
+    INPUT_ATTRIBS = %i(
+      id
+      term
+      abbrev
+      synonyms
+      alt
+      definition
+      country_code
+      language_code
+      notes
+      examples
+      entry_status
+      classification
+      review_indicator
+      authoritative_source
+      authoritative_source_similarity
+      lineage_source
+      lineage_source_similarity
+      date_accepted
+      date_amended
+      review_date
+      review_status
+      review_type
+      review_decision
+      review_decision_date
+      review_decision_event
+      review_decision_notes
+      release
+    ).freeze
-class Term
-  ATTRIBS = %i(
-    id term abbrev synonyms alt definition
-    country_code
-    language_code
-    notes examples
-    entry_status
-    classification
-    review_indicator
-    authoritative_source
-    authoritative_source_similarity
-    lineage_source
-    lineage_source_similarity
-    date_accepted
-    date_amended
-    review_date
-    review_status
-    review_type
-    review_decision
-    review_decision_date
-    review_decision_event
-    review_decision_notes
-    release
-  )
-  attr_accessor *ATTRIBS
-  def initialize(options={})
-    @examples = []
-    @notes = []
-    # puts "options #{options.inspect}"
-    options.each_pair do |k, v|
-      v = v.strip if v.is_a?(String)
-      next unless v
-      case k
-      when /^example/
-        add_example(v)
-      when /^note/
-        add_note(v)
-      else
-        # puts"Key #{k}"
-        key = k.gsub("-", "_")
-        self.send("#{key}=", v)
+    OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt abbrev synonyms classification) + %i(terms)
+    attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
+    def initialize(options = {})
+      @examples = []
+      @notes = []
+      @definition = []
+      # puts "options #{options.inspect}"
+      options.each_pair do |k, v|
+        v = v.strip if v.is_a?(String)
+        next unless v
+        case k
+        when /^example/
+          add_example(v)
+        when /^note/
+          add_note(v)
+        else
+          # puts"Key #{k}"
+          key = k.gsub("-", "_")
+          send("#{key}=", v)
+        end
       end
+      self
     end
-    self
-  end
-  STRIP_PUNCTUATION = [
-    "：",
-    ":",
-    ".",
-    "–",
-    "\-"
-  ]
-  # WARNING
-  # Always put the longer Regexp match in front!
-  EXAMPLE_PREFIXES = {
-    # TODO: fix this, we should not have "EXAMPLES"
-    eng: ["EXAMPLES", "EXAMPLE"],
-    ara: "مثال",
-    chi: "示例",
-    dan: "EKSEMPEL",
-    dut: ["VOORBEELD", "VOORBEELDEN"],
-    fin: "ESIM",
-    fre: "Exemple",
-    # ger: "",
-    jpn: "例",
-    kor: "보기",
-    pol: "PRZYKŁAD",
-    may: "Contoh",
-    rus: "Пример",
-    spa: "Ejemplo",
-    swe: "Exempel"
-  }
-  # WARNING
-  # Always put the longer Regexp match in front!
-  NOTE_PREFIXES = {
-    eng: ["Note \\d to entry", "NOTE"],
-    ara: "ملاحظة",
-    chi: "注",
-    dan: "Note",
-    dut: "OPMERKING",
-    fin: "HUOM\\.?",  # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1." (numeral added by the method)
-    fre: "A noter",
-    # ger: "",
-    jpn: "備考",
-    kor: "비고",
-    pol: "UWAGA",
-    may: "catatan",
-    rus: "нота",
-    spa: "Nota",
-    swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."]
-  }
-  # To match Chinese and Japanese numerals
-  ALL_FULL_HALF_WIDTH_NUMBERS = "[0-9０-９]"
-  def add_example(example)
-    c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
-    @examples << c unless c.empty?
-  end
+    STRIP_PUNCTUATION = [
+      "：",
+      ":",
+      ".",
+      "–",
+      "\-",
+    ].freeze
-  def add_note(note)
-    c = clean_prefixed_string(note, NOTE_PREFIXES)
-    @notes << c unless c.empty?
-  end
+    # WARNING
+    # Always put the longer Regexp match in front!
+    EXAMPLE_PREFIXES = {
+      # TODO: fix this, we should not have "EXAMPLES"
+      eng: ["EXAMPLES", "EXAMPLE"],
+      ara: "مثال",
+      chi: "示例",
+      dan: "EKSEMPEL",
+      dut: ["VOORBEELD", "VOORBEELDEN"],
+      fin: "ESIM",
+      fre: "Exemple",
+      # ger: "",
+      jpn: "例",
+      kor: "보기",
+      pol: "PRZYKŁAD",
+      may: "Contoh",
+      rus: "Пример",
+      spa: "Ejemplo",
+      swe: "Exempel",
+    }.freeze
-  def clean_prefixed_string(string, criterion_map)
-    carry = string.strip
-    criterion_map.values.flatten.each do |mat|
-      # puts "example string: #{carry}, mat: #{mat}"
-      # puts "note string: #{carry}, mat: #{mat}"
-      # if @id == 318 and mat == "Nota" and string == "NOTA 1 Una operación tiene un nombre y una lista de parámetros."
-      #   require "pry"
-      #   binding.pry
-      # end
-      # Arabic notes/examples sometimes use parantheses around numbers
-      carry = carry.sub(
-        Regexp.new(
-          "^#{mat}\s*[#{STRIP_PUNCTUATION.join('')}]?" +
-          "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*"+
-          "[#{STRIP_PUNCTUATION.join('')}]?\s*",
-          Regexp::IGNORECASE
-        ),
-      '')
-    end
-    carry
-  end
+    # WARNING
+    # Always put the longer Regexp match in front!
+    NOTE_PREFIXES = {
+      eng: ["Note \\d to entry", "NOTE"],
+      ara: "ملاحظة",
+      chi: "注",
+      dan: "Note",
+      dut: "OPMERKING",
+      # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1."
+      # (numeral added by the method)
+      fin: "HUOM\\.?",
+      fre: "A noter",
+      # ger: "",
+      jpn: "備考",
+      kor: "비고",
+      pol: "UWAGA",
+      may: "catatan",
+      rus: "нота",
+      spa: "Nota",
+      swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."],
+    }.freeze
+    # To match Chinese and Japanese numerals
+    ALL_FULL_HALF_WIDTH_NUMBERS = "[0-9０-９]".freeze
-  # The termid should ALWAYS be an integer.
-  # https://github.com/riboseinc/tc211-termbase/issues/1
-  def id=(newid)
-    @id = Integer(newid)
-  end
+    SOURCE_STATUSES = {
+      1 => "identical",
+      2 => "restyle",
+      3 => "context_added",
+      4 => "generalisation",
+      5 => "specialisation",
+      6 => "unspecified",
+    }.freeze
-  def to_hash
-    ATTRIBS.inject({}) do |acc, attrib|
-      value = self.send(attrib)
-      unless value.nil?
-        acc.merge(attrib.to_s => value)
-      else
-        acc
+    def add_example(example)
+      c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
+      @examples << c unless c.empty?
+    end
+    def add_note(note)
+      c = clean_prefixed_string(note, NOTE_PREFIXES)
+      @notes << c unless c.empty?
+    end
+    def clean_prefixed_string(string, criterion_map)
+      carry = string.to_s.strip
+      criterion_map.values.flatten.each do |mat|
+        # Arabic notes/examples sometimes use parantheses around numbers
+        carry = carry.sub(carry_regex(mat), "")
       end
+      carry
     end
-  end
-  # entry-status
-  ## Must be one of notValid valid superseded retired
-  def entry_status=(value)
-    case value
-    when "有效的", "käytössä", "действующий", "válido"
-      value = "valid"
-    when "korvattu", "reemplazado"
-      value = "superseded"
-    when "информация отсутствует" # "information absent"!?
-      value = "retired"
-    when %w(notValid valid superseded retired)
-      # do nothing
-    end
-    @entry_status = value
-  end
+    def carry_regex(mat)
+      Regexp.new(
+        [
+          "^#{mat}\s*[#{STRIP_PUNCTUATION.join}]?",
+          "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*",
+          "[#{STRIP_PUNCTUATION.join}]?\s*",
+        ].join,
+      )
+    end
-  # classification
-  ## Must be one of the following: preferred admitted deprecated
-  def classification=(value)
-    case value
-    when ""
-      value = "admitted"
-    when "认可的", "допустимый", "admitido"
-      value = "admitted"
-    when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
-      value = "preferred"
-    when %w(preferred admitted deprecated)
-      # do nothing
-    end
-    @classification = value
-  end
+    # The termid should ALWAYS be an integer.
+    # https://github.com/riboseinc/tc211-termbase/issues/1
+    def id=(newid)
+      @id = Integer(newid)
+    end
-  # review-indicator
-  ## Must be one of the following <empty field> Under Review in Source Document",
-  def review_indicator=(value)
-    unless ["", "Under Review in Source Document"].include?(value)
-      value = ""
+    def definition=(definition)
+      @definition << definition
     end
-    @review_indicator = value
-  end
-  # authoritative-source-similarity
-  #     ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
-  def authoritative_source_similarity=(value)
-    unless (1..6).include?(value)
-      value = 6
+    def to_hash
+      OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
+        value = send(attrib)
+        if value.nil?
+          acc
+        else
+          acc.merge(attrib.to_s => value)
+        end
+      end
     end
-    @authoritative_source_similarity = value
-  end
-  # lineage-source-similarity
-  #     ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
-  def authoritative_source_similarity=(value)
-    unless (1..6).include?(value)
-      value = 6
+    # entry-status
+    ## Must be one of notValid valid superseded retired
+    def entry_status=(value)
+      case value
+      when "有效的", "käytössä", "действующий", "válido"
+        value = "valid"
+      when "korvattu", "reemplazado"
+        value = "superseded"
+      when "информация отсутствует" # "information absent"!?
+        value = "retired"
+      when %w(notValid valid superseded retired) # do nothing
+      end
+      @entry_status = value
     end
-    @authoritative_source_similarity
-  end
-  def review_status=(value) ## Must be one of pending tentative final
-    unless ["", "pending", "tentative", "final"].include?(value)
-      value = ""
+    # classification
+    ## Must be one of the following: preferred admitted deprecated
+    def classification=(value)
+      case value
+      when "", "认可的", "допустимый", "admitido", "adminitido"
+        value = "admitted"
+      when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
+        value = "preferred"
+      when %w(preferred admitted deprecated)
+        # do nothing
+      end
+      @classification = value
     end
-    @review_status = value
-  end
-  def review_type=(value)     ## Must be one of supersession, retirement
-    unless ["", "supersession", "retirement"].include?(value)
-      value = ""
+    # review-indicator
+    #   Must be one of the following
+    #     <empty field>
+    #     Under Review in Source Document
+    def review_indicator=(value)
+      unless ["", "Under Review in Source Document"].include?(value)
+        value = ""
+      end
+      @review_indicator = value
     end
-    @review_type = value
-  end
-  def review_decision=(value) ## Must be one of withdrawn, accepted notAccepted
-    unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
-      value = ""
+    # authoritative-source-similarity
+    #   Must be one of the following codes:
+    #     identical = 1
+    #     restyled = 2
+    #     context added = 3
+    #     generalisation = 4
+    #     specialisation = 5
+    #     unspecified = 6
+    def authoritative_source_similarity=(value)
+      unless SOURCE_STATUSES.key?(value)
+        value = 6
+      end
+      @authoritative_source_similarity = value
     end
-    @review_decision = value
-  end
-  def retired?
-    release >= 0
-  end
+    # lineage-source-similarity
+    #   Must be one of the following codes:
+    #     identical = 1
+    #     restyled = 2
+    #     context added = 3
+    #     generalisation = 4
+    #     specialisation = 5
+    #     unspecified = 6
+    def lineage_source_similarity=(value)
+      unless SOURCE_STATUSES.key?(value)
+        value = 6
+      end
+      @lineage_source_similarity = value
+    end
-end
+    ## value Must be one of pending tentative final
+    def review_status=(value)
+      unless ["", "pending", "tentative", "final"].include?(value)
+        value = ""
+      end
+      @review_status = value
+    end
-end
+    ## value Must be one of supersession, retirement
+    def review_type=(value)
+      unless ["", "supersession", "retirement"].include?(value)
+        value = ""
+      end
+      @review_type = value
+    end
+    ## value Must be one of withdrawn, accepted notAccepted
+    def review_decision=(value)
+      unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
+        value = ""
+      end
+      @review_decision = value
+    end
+    def retired?
+      release >= 0
+    end
+    def terms
+      [
+        primary_term_hash,
+        alt_term_hash,
+        abbreviation_term_hash,
+        synonyms_term_hash,
+      ].compact
+    end
+    def primary_term_hash
+      return unless term
+      {
+        "type" => "expression",
+        "designation" => term,
+        "normative_status" => classification,
+      }
+    end
+    def alt_term_hash
+      return unless alt
+      {
+        "type" => "expression",
+        "designation" => alt,
+        "normative_status" => classification,
+      }
+    end
+    def abbreviation_term_hash
+      return unless abbrev
+      {
+        "type" => "abbreviation",
+        "designation" => abbrev,
+      }
+    end
+    def synonyms_term_hash
+      return unless synonyms
+      {
+        "type" => "expression",
+        "designation" => synonyms,
+      }
+    end
+    def sources_hash
+      [
+        authoritative_source_hash,
+        lineage_source_hash,
+      ].compact
+    end
+    def authoritative_source_hash
+      return unless authoritative_source
+      {
+        origin: {
+          link: authoritative_source["link"],
+          ref: authoritative_source["ref"],
+          clause: authoritative_source["clause"],
+        },
+        type: "authoritative",
+        status: SOURCE_STATUSES[authoritative_source_similarity],
+      }
+    end
+    def lineage_source_hash
+      return unless lineage_source
+      {
+        origin: {
+          ref: lineage_source,
+        },
+        type: "lineage",
+        status: SOURCE_STATUSES[lineage_source_similarity],
+      }
+    end
+    def to_localized_concept_hash
+      localized_concept_hash = to_hash
+      %w[
+        review_status
+        review_decision
+        review_decision_notes
+        review_indicator
+        authoritative_source
+        authoritative_source_similarity
+        lineage_source
+        lineage_source_similarity
+        country_code
+      ].each do |key|
+        localized_concept_hash.delete(key)
+      end
+      localized_concept_hash["id"] = localized_concept_hash["id"].to_s
+      localized_concept_hash["sources"] = sources_hash
+      localized_concept_hash
+    end
+  end
+end

data/lib/tc211/termbase/term_workbook.rb CHANGED Viewed

@@ -6,48 +6,47 @@ require_relative "information_sheet"
 require_relative "terminology_sheet"
 module Tc211::Termbase
+  class TermWorkbook
+    attr_accessor :workbook, :glossary_info, :languages, :filename
+    SPECIAL_SHEETS = [
+      "Glossary Information",
+      "Character Encoding Spreadsheet",
+    ].freeze
+    def initialize(filepath)
+      @filename = filepath
+      @workbook = Creek::Book.new(filepath)
+      @glossary_info = InformationSheet.new(
+        find_sheet_by_name("Glossary Information"),
+      )
+      @languages = languages_supported
+      self
+    end
-class TermWorkbook
-  attr_accessor :workbook
-  attr_accessor :glossary_info
-  attr_accessor :languages
-  attr_accessor :filename
-  SPECIAL_SHEETS = [
-    "Glossary Information",
-    "Character Encoding Spreadsheet"
-  ]
-  def initialize(filepath)
-    @filename = filepath
-    @workbook = Creek::Book.new(filepath)
-    @glossary_info = InformationSheet.new(find_sheet_by_name("Glossary Information"))
-    @languages = languages_supported
-    self
-  end
-  def languages_supported
-    @workbook.sheets.map(&:name).reject! do |name|
-      SPECIAL_SHEETS.include?(name)
+    def languages_supported
+      @workbook.sheets.map(&:name).reject! do |name|
+        SPECIAL_SHEETS.include?(name)
+      end
     end
-  end
-  def language_sheet(lang)
-    raise unless @languages.include?(lang)
-    TerminologySheet.new(find_sheet_by_name(lang))
-  end
+    def language_sheet(lang)
+      raise unless @languages.include?(lang)
-  def find_sheet_by_name(sheet_name)
-    @workbook.sheets.detect do |sheet|
-      sheet.name == sheet_name
+      TerminologySheet.new(find_sheet_by_name(lang))
+    end
+    def find_sheet_by_name(sheet_name)
+      @workbook.sheets.detect do |sheet|
+        sheet.name == sheet_name
+      end
     end
-  end
-  def write_glossary_info
-    glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
-    File.open(glossary_info_fn,"w") do |file|
-      file.write(glossary_info.to_yaml)
+    def write_glossary_info
+      glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
+      File.open(glossary_info_fn, "w") do |file|
+        file.write(glossary_info.to_yaml)
+      end
     end
   end
 end
-end