RubyGems - tc211-termbase - Versions diffs - 0.2.1 → 0.2.3 - Mend

tc211-termbase 0.2.1 → 0.2.3

Files changed (19) hide show

checksums.yaml +4 -4
data/.github/workflows/rake.yml +13 -0
data/.github/workflows/release.yml +23 -0
data/.gitignore +3 -1
data/exe/tc211-termbase-xlsx2yaml +2 -5
data/lib/tc211/termbase/concept.rb +88 -39
data/lib/tc211/termbase/concept_collection.rb +29 -21
data/lib/tc211/termbase/information_sheet.rb +11 -15
data/lib/tc211/termbase/metadata_section.rb +108 -112
data/lib/tc211/termbase/sheet_section.rb +18 -19
data/lib/tc211/termbase/term.rb +341 -230
data/lib/tc211/termbase/term_workbook.rb +35 -36
data/lib/tc211/termbase/terminology_sheet.rb +84 -74
data/lib/tc211/termbase/terms_section.rb +9 -8
data/lib/tc211/termbase/version.rb +1 -1
data/lib/tc211/termbase.rb +1 -0
data/tc211-termbase.gemspec +6 -6
metadata +20 -39
data/Gemfile.lock +0 -122

data/lib/tc211/termbase/term.rb CHANGED Viewed

@@ -1,266 +1,377 @@
 module Tc211::Termbase
+  class Term
+    INPUT_ATTRIBS = %i(
+      id
+      term
+      abbrev
+      synonyms
+      alt
+      definition
+      country_code
+      language_code
+      notes
+      examples
+      entry_status
+      classification
+      review_indicator
+      authoritative_source
+      authoritative_source_similarity
+      lineage_source
+      lineage_source_similarity
+      date_accepted
+      date_amended
+      review_date
+      review_status
+      review_type
+      review_decision
+      review_decision_date
+      review_decision_event
+      review_decision_notes
+      release
+    ).freeze
+    OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt abbrev synonyms classification) + %i(terms)
+    attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
+    def initialize(options = {})
+      @examples = []
+      @notes = []
+      @definition = []
+      # puts "options #{options.inspect}"
+      options.each_pair do |k, v|
+        v = v.strip if v.is_a?(String)
+        next unless v
+        case k
+        when /^example/
+          add_example(v)
+        when /^note/
+          add_note(v)
+        else
+          # puts"Key #{k}"
+          key = k.gsub("-", "_")
+          send("#{key}=", v)
+        end
+      end
+      self
+    end
+    STRIP_PUNCTUATION = [
+      "：",
+      ":",
+      ".",
+      "–",
+      "\-",
+    ].freeze
+    # WARNING
+    # Always put the longer Regexp match in front!
+    EXAMPLE_PREFIXES = {
+      # TODO: fix this, we should not have "EXAMPLES"
+      eng: ["EXAMPLES", "EXAMPLE"],
+      ara: "مثال",
+      chi: "示例",
+      dan: "EKSEMPEL",
+      dut: ["VOORBEELD", "VOORBEELDEN"],
+      fin: "ESIM",
+      fre: "Exemple",
+      # ger: "",
+      jpn: "例",
+      kor: "보기",
+      pol: "PRZYKŁAD",
+      may: "Contoh",
+      rus: "Пример",
+      spa: "Ejemplo",
+      swe: "Exempel",
+    }.freeze
+    # WARNING
+    # Always put the longer Regexp match in front!
+    NOTE_PREFIXES = {
+      eng: ["Note \\d to entry", "NOTE"],
+      ara: "ملاحظة",
+      chi: "注",
+      dan: "Note",
+      dut: "OPMERKING",
+      # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1."
+      # (numeral added by the method)
+      fin: "HUOM\\.?",
+      fre: "A noter",
+      # ger: "",
+      jpn: "備考",
+      kor: "비고",
+      pol: "UWAGA",
+      may: "catatan",
+      rus: "нота",
+      spa: "Nota",
+      swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."],
+    }.freeze
+    # To match Chinese and Japanese numerals
+    ALL_FULL_HALF_WIDTH_NUMBERS = "[0-9０-９]".freeze
+    SOURCE_STATUSES = {
+      1 => "identical",
+      2 => "restyle",
+      3 => "context_added",
+      4 => "generalisation",
+      5 => "specialisation",
+      6 => "unspecified",
+    }.freeze
+    def add_example(example)
+      c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
+      @examples << c unless c.empty?
+    end
-class Term
-  INPUT_ATTRIBS = %i(
-    id term abbrev synonyms alt definition
-    country_code
-    language_code
-    notes examples
-    entry_status
-    classification
-    review_indicator
-    authoritative_source
-    authoritative_source_similarity
-    lineage_source
-    lineage_source_similarity
-    date_accepted
-    date_amended
-    review_date
-    review_status
-    review_type
-    review_decision
-    review_decision_date
-    review_decision_event
-    review_decision_notes
-    release
-  )
-  OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt classification) + %i(terms)
-  attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
-  def initialize(options={})
-    @examples = []
-    @notes = []
-    # puts "options #{options.inspect}"
-    options.each_pair do |k, v|
-      v = v.strip if v.is_a?(String)
-      next unless v
-      case k
-      when /^example/
-        add_example(v)
-      when /^note/
-        add_note(v)
-      else
-        # puts"Key #{k}"
-        key = k.gsub("-", "_")
-        self.send("#{key}=", v)
+    def add_note(note)
+      c = clean_prefixed_string(note, NOTE_PREFIXES)
+      @notes << c unless c.empty?
+    end
+    def clean_prefixed_string(string, criterion_map)
+      carry = string.to_s.strip
+      criterion_map.values.flatten.each do |mat|
+        # Arabic notes/examples sometimes use parantheses around numbers
+        carry = carry.sub(carry_regex(mat), "")
       end
+      carry
     end
-    self
-  end
-  STRIP_PUNCTUATION = [
-    "：",
-    ":",
-    ".",
-    "–",
-    "\-"
-  ]
-  # WARNING
-  # Always put the longer Regexp match in front!
-  EXAMPLE_PREFIXES = {
-    # TODO: fix this, we should not have "EXAMPLES"
-    eng: ["EXAMPLES", "EXAMPLE"],
-    ara: "مثال",
-    chi: "示例",
-    dan: "EKSEMPEL",
-    dut: ["VOORBEELD", "VOORBEELDEN"],
-    fin: "ESIM",
-    fre: "Exemple",
-    # ger: "",
-    jpn: "例",
-    kor: "보기",
-    pol: "PRZYKŁAD",
-    may: "Contoh",
-    rus: "Пример",
-    spa: "Ejemplo",
-    swe: "Exempel"
-  }
-  # WARNING
-  # Always put the longer Regexp match in front!
-  NOTE_PREFIXES = {
-    eng: ["Note \\d to entry", "NOTE"],
-    ara: "ملاحظة",
-    chi: "注",
-    dan: "Note",
-    dut: "OPMERKING",
-    fin: "HUOM\\.?",  # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1." (numeral added by the method)
-    fre: "A noter",
-    # ger: "",
-    jpn: "備考",
-    kor: "비고",
-    pol: "UWAGA",
-    may: "catatan",
-    rus: "нота",
-    spa: "Nota",
-    swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."]
-  }
-  # To match Chinese and Japanese numerals
-  ALL_FULL_HALF_WIDTH_NUMBERS = "[0-9０-９]"
-  def add_example(example)
-    c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
-    @examples << c unless c.empty?
-  end
+    def carry_regex(mat)
+      Regexp.new(
+        [
+          "^#{mat}\s*[#{STRIP_PUNCTUATION.join}]?",
+          "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*",
+          "[#{STRIP_PUNCTUATION.join}]?\s*",
+        ].join,
+      )
+    end
-  def add_note(note)
-    c = clean_prefixed_string(note, NOTE_PREFIXES)
-    @notes << c unless c.empty?
-  end
+    # The termid should ALWAYS be an integer.
+    # https://github.com/riboseinc/tc211-termbase/issues/1
+    def id=(newid)
+      @id = Integer(newid)
+    end
-  def clean_prefixed_string(string, criterion_map)
-    carry = string.strip
-    criterion_map.values.flatten.each do |mat|
-      # puts "example string: #{carry}, mat: #{mat}"
-      # puts "note string: #{carry}, mat: #{mat}"
-      # if @id == 318 and mat == "Nota" and string == "NOTA 1 Una operación tiene un nombre y una lista de parámetros."
-      #   require "pry"
-      #   binding.pry
-      # end
-      # Arabic notes/examples sometimes use parantheses around numbers
-      carry = carry.sub(
-        Regexp.new(
-          "^#{mat}\s*[#{STRIP_PUNCTUATION.join('')}]?" +
-          "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*"+
-          "[#{STRIP_PUNCTUATION.join('')}]?\s*",
-          Regexp::IGNORECASE
-        ),
-      '')
+    def definition=(definition)
+      @definition << definition
     end
-    carry
-  end
+    def to_hash
+      OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
+        value = send(attrib)
+        if value.nil?
+          acc
+        else
+          acc.merge(attrib.to_s => value)
+        end
+      end
+    end
+    # entry-status
+    ## Must be one of notValid valid superseded retired
+    def entry_status=(value)
+      case value
+      when "有效的", "käytössä", "действующий", "válido"
+        value = "valid"
+      when "korvattu", "reemplazado"
+        value = "superseded"
+      when "информация отсутствует" # "information absent"!?
+        value = "retired"
+      when %w(notValid valid superseded retired) # do nothing
+      end
+      @entry_status = value
+    end
-  # The termid should ALWAYS be an integer.
-  # https://github.com/riboseinc/tc211-termbase/issues/1
-  def id=(newid)
-    @id = Integer(newid)
-  end
+    # classification
+    ## Must be one of the following: preferred admitted deprecated
+    def classification=(value)
+      case value
+      when "", "认可的", "допустимый", "admitido", "adminitido"
+        value = "admitted"
+      when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
+        value = "preferred"
+      when %w(preferred admitted deprecated)
+        # do nothing
+      end
+      @classification = value
+    end
-  def to_hash
-    OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
-      value = self.send(attrib)
-      unless value.nil?
-        acc.merge(attrib.to_s => value)
-      else
-        acc
+    # review-indicator
+    #   Must be one of the following
+    #     <empty field>
+    #     Under Review in Source Document
+    def review_indicator=(value)
+      unless ["", "Under Review in Source Document"].include?(value)
+        value = ""
       end
+      @review_indicator = value
     end
-  end
-  # entry-status
-  ## Must be one of notValid valid superseded retired
-  def entry_status=(value)
-    case value
-    when "有效的", "käytössä", "действующий", "válido"
-      value = "valid"
-    when "korvattu", "reemplazado"
-      value = "superseded"
-    when "информация отсутствует" # "information absent"!?
-      value = "retired"
-    when %w(notValid valid superseded retired)
-      # do nothing
+    # authoritative-source-similarity
+    #   Must be one of the following codes:
+    #     identical = 1
+    #     restyled = 2
+    #     context added = 3
+    #     generalisation = 4
+    #     specialisation = 5
+    #     unspecified = 6
+    def authoritative_source_similarity=(value)
+      unless SOURCE_STATUSES.key?(value)
+        value = 6
+      end
+      @authoritative_source_similarity = value
     end
-    @entry_status = value
-  end
-  # classification
-  ## Must be one of the following: preferred admitted deprecated
-  def classification=(value)
-    case value
-    when ""
-      value = "admitted"
-    when "认可的", "допустимый", "admitido"
-      value = "admitted"
-    when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
-      value = "preferred"
-    when %w(preferred admitted deprecated)
-      # do nothing
+    # lineage-source-similarity
+    #   Must be one of the following codes:
+    #     identical = 1
+    #     restyled = 2
+    #     context added = 3
+    #     generalisation = 4
+    #     specialisation = 5
+    #     unspecified = 6
+    def lineage_source_similarity=(value)
+      unless SOURCE_STATUSES.key?(value)
+        value = 6
+      end
+      @lineage_source_similarity = value
     end
-    @classification = value
-  end
-  # review-indicator
-  ## Must be one of the following <empty field> Under Review in Source Document",
-  def review_indicator=(value)
-    unless ["", "Under Review in Source Document"].include?(value)
-      value = ""
+    ## value Must be one of pending tentative final
+    def review_status=(value)
+      unless ["", "pending", "tentative", "final"].include?(value)
+        value = ""
+      end
+      @review_status = value
+    end
+    ## value Must be one of supersession, retirement
+    def review_type=(value)
+      unless ["", "supersession", "retirement"].include?(value)
+        value = ""
+      end
+      @review_type = value
     end
-    @review_indicator = value
-  end
-  # authoritative-source-similarity
-  #     ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
-  def authoritative_source_similarity=(value)
-    unless (1..6).include?(value)
-      value = 6
+    ## value Must be one of withdrawn, accepted notAccepted
+    def review_decision=(value)
+      unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
+        value = ""
+      end
+      @review_decision = value
     end
-    @authoritative_source_similarity = value
-  end
-  # lineage-source-similarity
-  #     ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
-  def authoritative_source_similarity=(value)
-    unless (1..6).include?(value)
-      value = 6
+    def retired?
+      release >= 0
     end
-    @authoritative_source_similarity
-  end
-  def review_status=(value) ## Must be one of pending tentative final
-    unless ["", "pending", "tentative", "final"].include?(value)
-      value = ""
+    def terms
+      [
+        primary_term_hash,
+        alt_term_hash,
+        abbreviation_term_hash,
+        synonyms_term_hash,
+      ].compact
     end
-    @review_status = value
-  end
-  def review_type=(value)     ## Must be one of supersession, retirement
-    unless ["", "supersession", "retirement"].include?(value)
-      value = ""
+    def primary_term_hash
+      return unless term
+      {
+        "type" => "expression",
+        "designation" => term,
+        "normative_status" => classification,
+      }
     end
-    @review_type = value
-  end
-  def review_decision=(value) ## Must be one of withdrawn, accepted notAccepted
-    unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
-      value = ""
+    def alt_term_hash
+      return unless alt
+      {
+        "type" => "expression",
+        "designation" => alt,
+        "normative_status" => classification,
+      }
     end
-    @review_decision = value
-  end
-  def retired?
-    release >= 0
-  end
+    def abbreviation_term_hash
+      return unless abbrev
-  def terms
-    [primary_term_hash, alt_term_hash].compact
-  end
+      {
+        "type" => "abbreviation",
+        "designation" => abbrev,
+      }
+    end
-  def primary_term_hash
-    {
-      "type" => "expression",
-      "designation" => term,
-      "normative_status" => classification,
-    } if term
-  end
+    def synonyms_term_hash
+      return unless synonyms
-  def alt_term_hash
-    {
-      "type" => "expression",
-      "designation" => alt,
-      "normative_status" => classification,
-    } if alt
+      {
+        "type" => "expression",
+        "designation" => synonyms,
+      }
+    end
+    def sources_hash
+      [
+        authoritative_source_hash,
+        lineage_source_hash,
+      ].compact
+    end
+    def authoritative_source_hash
+      return unless authoritative_source
+      {
+        origin: {
+          link: authoritative_source["link"],
+          ref: authoritative_source["ref"],
+          clause: authoritative_source["clause"],
+        },
+        type: "authoritative",
+        status: SOURCE_STATUSES[authoritative_source_similarity],
+      }
+    end
+    def lineage_source_hash
+      return unless lineage_source
+      {
+        origin: {
+          ref: lineage_source,
+        },
+        type: "lineage",
+        status: SOURCE_STATUSES[lineage_source_similarity],
+      }
+    end
+    def to_localized_concept_hash
+      localized_concept_hash = to_hash
+      %w[
+        review_status
+        review_decision
+        review_decision_notes
+        review_indicator
+        authoritative_source
+        authoritative_source_similarity
+        lineage_source
+        lineage_source_similarity
+        country_code
+      ].each do |key|
+        localized_concept_hash.delete(key)
+      end
+      localized_concept_hash["id"] = localized_concept_hash["id"].to_s
+      localized_concept_hash["sources"] = sources_hash
+      localized_concept_hash
+    end
   end
 end
-end

data/lib/tc211/termbase/term_workbook.rb CHANGED Viewed

@@ -6,48 +6,47 @@ require_relative "information_sheet"
 require_relative "terminology_sheet"
 module Tc211::Termbase
+  class TermWorkbook
+    attr_accessor :workbook, :glossary_info, :languages, :filename
+    SPECIAL_SHEETS = [
+      "Glossary Information",
+      "Character Encoding Spreadsheet",
+    ].freeze
+    def initialize(filepath)
+      @filename = filepath
+      @workbook = Creek::Book.new(filepath)
+      @glossary_info = InformationSheet.new(
+        find_sheet_by_name("Glossary Information"),
+      )
+      @languages = languages_supported
+      self
+    end
-class TermWorkbook
-  attr_accessor :workbook
-  attr_accessor :glossary_info
-  attr_accessor :languages
-  attr_accessor :filename
-  SPECIAL_SHEETS = [
-    "Glossary Information",
-    "Character Encoding Spreadsheet"
-  ]
-  def initialize(filepath)
-    @filename = filepath
-    @workbook = Creek::Book.new(filepath)
-    @glossary_info = InformationSheet.new(find_sheet_by_name("Glossary Information"))
-    @languages = languages_supported
-    self
-  end
-  def languages_supported
-    @workbook.sheets.map(&:name).reject! do |name|
-      SPECIAL_SHEETS.include?(name)
+    def languages_supported
+      @workbook.sheets.map(&:name).reject! do |name|
+        SPECIAL_SHEETS.include?(name)
+      end
     end
-  end
-  def language_sheet(lang)
-    raise unless @languages.include?(lang)
-    TerminologySheet.new(find_sheet_by_name(lang))
-  end
+    def language_sheet(lang)
+      raise unless @languages.include?(lang)
-  def find_sheet_by_name(sheet_name)
-    @workbook.sheets.detect do |sheet|
-      sheet.name == sheet_name
+      TerminologySheet.new(find_sheet_by_name(lang))
+    end
+    def find_sheet_by_name(sheet_name)
+      @workbook.sheets.detect do |sheet|
+        sheet.name == sheet_name
+      end
     end
-  end
-  def write_glossary_info
-    glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
-    File.open(glossary_info_fn,"w") do |file|
-      file.write(glossary_info.to_yaml)
+    def write_glossary_info
+      glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
+      File.open(glossary_info_fn, "w") do |file|
+        file.write(glossary_info.to_yaml)
+      end
     end
   end
 end
-end