RubyGems - relaton-itu - Versions diffs - 1.20.1 → 1.20.3 - Mend

relaton-itu 1.20.1 → 1.20.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/Gemfile +4 -0
data/lib/relaton_itu/hit_collection.rb +3 -0
data/lib/relaton_itu/radio_regulations_parser.rb +66 -0
data/lib/relaton_itu/recommendation_parser.rb +191 -0
data/lib/relaton_itu/scrapper.rb +127 -282
data/lib/relaton_itu/version.rb +1 -1
data/lib/relaton_itu/xml_parser.rb +2 -4
data/lib/relaton_itu.rb +1 -0
data/relaton_itu.gemspec +1 -0
metadata +19 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 5f782e5e67be5f75336a106dd6e2366aabf9085391d20f0e0e0424d7965e2b7f
-  data.tar.gz: 3491eda42b19b30bdbc7568f8a8e436c6d64bfcef80298ff6ca57c50e93737b8
+  metadata.gz: f70f91da524595847c16b4a8cb46dc7379a254822a84b03034e9b4965560ff4a
+  data.tar.gz: eb69591d51a38a1ad4c99f5914f2fd7be27abaad712947ea04a603939fe65196
 SHA512:
-  metadata.gz: 48987e9207d2a28778de5442e53ac2cba35481a219791aba02f119ed2048ec3018effffbea90d825cb3702c4e3c04285e649264542a6686163d046e52a528957
-  data.tar.gz: 2ece9eb73b4dbdb478d2c54d635c76cd8b11f54a59357aed2a8227a7fb69a5cb5d5cf7eaefcb4b0fa6a907fb93b95afcf80e0b950e44d533fb86456cc7db9bda
+  metadata.gz: 0d7300e181010d2bdd2829a0a888a0db59bd3c61448e09d840ab76f3a5c24b17700c5ea6ae4598e1aebfba82f173c3a619ad9e37e9581e26d636db601d178d99
+  data.tar.gz: b82909697c98ac580e0eaf13e1c0f0652365fb08f2951b8a86ffc7c972959703c916596b5f210624e6ec7945900502b63ac52f7ae516f4be0420fa79065ead8f

data/Gemfile CHANGED Viewed

@@ -13,3 +13,7 @@ gem "ruby-jing"
 gem "simplecov"
 gem "vcr"
 gem "webmock"
+group :development do
+  gem 'pry'
+end

data/lib/relaton_itu/hit_collection.rb CHANGED Viewed

@@ -45,6 +45,9 @@ module RelatonItu
       data = { json: params.to_json }
       resp = agent.post url, data
       @array = hits JSON.parse(resp.body)
+    rescue Mechanize::ResponseCodeError, SocketError, Timeout::Error, Errno::ECONNRESET,
+            EOFError, Net::ProtocolError, OpenSSL::SSL::SSLError => e
+      raise RelatonBib::RequestError, "Could not access #{url}: #{e.message}"
     end
     def request_document # rubocop:todo Metrics/MethodLength, Metrics/AbcSize

data/lib/relaton_itu/radio_regulations_parser.rb ADDED Viewed

@@ -0,0 +1,66 @@
+module RelatonItu
+  class RadioRegulationsParser
+    include Relaton::Core::ArrayWrapper
+    ROMAN_MONTHS = %w[I II III IV V VI VII VIII IX X XI XII].freeze
+    def initialize(hit)
+      @hit = hit
+    end
+    def doc
+      @doc ||= hit.hit_collection.agent.get doc_url
+    rescue Mechanize::ResponseCodeError, SocketError, Timeout::Error, Errno::ECONNRESET,
+            EOFError, Net::ProtocolError, OpenSSL::SSL::SSLError => e
+      raise RelatonBib::RequestError, "Could not access #{url}: #{e.message}"
+    end
+    def doc_url
+      CGI.unescape(hit.hit[:url]).split("dest=").last
+    end
+    def fetch_edition = nil
+    def fetch_status = nil
+    def fetch_workgroup = nil
+    def fetch_abstract = []
+    def fetch_relations = []
+    def fetch_titles
+      title = doc.at("//title")&.text&.strip
+      return [] if title.nil? || title.empty?
+      RelatonBib::TypedTitleString.from_string title, "en", "Latn"
+    end
+    def fetch_dates
+      array(doc_date).map { |on| { type: "published", on: on } }
+    end
+    def doc_date
+      return @doc_date if defined? @doc_date
+      date_str = doc.at("//td[@class='title']/text()")&.text&.slice(/(?<=Year:\s)(?:\d{1,2}\.\w+\.)?\d{4}/)
+      @doc_date = date_str ? roman_to_arabic(date_str) : nil
+    end
+    def fetch_link
+      [RelatonBib::TypedUri.new(type: "src", content: doc_url)]
+    end
+    private
+    attr_reader :hit
+    # Convert roman month number in string date to arabic number
+    # @param date [String]
+    # @return [String]
+    def roman_to_arabic(date)
+      %r{(?<rmonth>[IVX]+)} =~ date
+      if ROMAN_MONTHS.index(rmonth)
+        month = ROMAN_MONTHS.index(rmonth) + 1
+        Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
+      else date
+      end
+    end
+  end
+end

data/lib/relaton_itu/recommendation_parser.rb ADDED Viewed

@@ -0,0 +1,191 @@
+module RelatonItu
+  # Parse ITU Radio Regulations from XML to Relaton format.
+  class RecommendationParser
+    include Relaton::Core::ArrayWrapper
+    RECHDR = "https://www.itu.int/mws/api/recommendations/getRecHdrDetail?idrec=%{idrec}&lang=en".freeze
+    RECEDITIONS = "https://www.itu.int/mws/api/recommendations/getRecEditions?idrec=%{idrec}&lang=en".freeze
+    RECSUPPLEMENTS = "https://www.itu.int/mws/api/recommendations/getRecSupplements?idrec=%{idrec}&lang=en".freeze
+    IMPLGUIDES = "https://www.itu.int/mws/api/recommendations/getImplGuides?idrec=%{idrec}&lang=en".freeze
+    def initialize(hit, idrec, imp)
+      @hit = hit
+      @idrec = idrec
+      @imp = imp
+    end
+    def doc
+      @doc ||= begin
+        url = (imp ? IMPLGUIDES : RECHDR ) % { idrec: idrec }
+        resp = get_data url
+        imp ? resp.first : resp
+      end
+    end
+    # @return [Strign, nil]
+    def fetch_edition
+      self_edition.dig("Version")
+    end
+    # Fetch titles.
+    # @return [RelatonBib::TypedTitleStringCollection]
+    def fetch_titles
+      title = imp ? doc["imp_title_e"] : doc["rec_title"]
+      return [] if title.nil? || title.empty?
+      RelatonBib::TypedTitleString.from_string title, "en", "Latn"
+    end
+    # Fetch status.
+    # @return [RelatonBib::DocumentStatus, NilClass]
+    def fetch_status
+      inforce = imp ? imp_status : doc["status"]
+      return if inforce.nil? || inforce.empty?
+      status = inforce == "In force" ? "Published" : "Withdrawal"
+      RelatonBib::DocumentStatus.new(stage: status)
+    end
+    # Fetch dates
+    # @return [Array<Hash>]
+    def fetch_dates
+      array(doc_date).map { |on| { type: "published", on: on } }
+    end
+    # Fetch workgroup.
+    # @return [RelatonItu::EditorialGroup, NilClass]
+    def fetch_workgroup
+      group = itugroup(doc["sg"])
+      EditorialGroup.new(
+        bureau: hit.hit[:code].match(/(?<=-)./).to_s, group: group
+      )
+    end
+    # Fetch abstracts.
+    # @return [Array<Hash>]
+    def fetch_abstract
+      array(doc["summary"]).map do |content|
+        { content: content, language: "en", script: "Latn" }
+      end
+    end
+    # Fetch links.
+    # @return [Array<Hash>]
+    def fetch_link
+      link = imp ? doc["imp_dms_link"] : doc["handle_id"]
+      links = [{ type: "src", content: link }]
+      links << typed_link("pdf", doc["handle_id_pdf_link"]) if doc["handle_id_pdf_link"]
+      imp_word_link { |wlink| links << typed_link("word", wlink) }
+      links
+    end
+    def doc_date
+      return @doc_date if defined? @doc_date
+      date = imp ? doc["imp_approval_date"] : doc["approval_date"]
+      @doc_date = Date.parse(date).to_s rescue date
+    end
+    # Fetch relations.
+    # @return [Array<Hash>]
+    def fetch_relations
+      relations = []
+      editions.each do |ed|
+        next if ed["idrec"] == idrec
+        relations << create_relation("hasEdition", ed["title"], ed["rec_name"])
+      end
+      supplements.each { |supp| relations << create_relation("complementOf", supp["title_text"], supp["rec_name"]) }
+      relations
+    end
+    private
+    attr_reader :hit, :idrec, :imp
+    # Get data.
+    # @param url [String, nil]
+    # @return [Array<String, Nokogiri::HTML::Document>]
+    def get_data(url)
+      JSON.parse request_document(url).body
+    end
+    def request_document(url)
+      hit.hit_collection.agent.get url
+    rescue Mechanize::ResponseCodeError, SocketError, Timeout::Error, Errno::ECONNRESET,
+            EOFError, Net::ProtocolError, OpenSSL::SSL::SSLError => e
+      raise RelatonBib::RequestError, "Could not access #{url}: #{e.message}"
+    end
+    def editions
+      @editions ||= begin
+        url = RECEDITIONS % { idrec: idrec }
+        get_data(url) || []
+      end
+    end
+    def self_edition
+      @self_edition ||= editions.find { |ed| ed["idrec"] == idrec }
+    end
+    def imp_status
+      self_edition.dig("status")
+    end
+    # @param name [String]
+    # @return [RelatonItu::ItuGroup]
+    def itugroup(name) # rubocop:disable Metrics/MethodLength
+      return if name.nil? || name.empty?
+      if name.include? "Study Group"
+        type = "study-group"
+        acronym = "SG"
+      elsif name.include? "Telecommunication Standardization Advisory Group"
+        type = "tsag"
+        acronym = "TSAG"
+      else
+        type = "work-group"
+        acronym = "WG"
+      end
+      ItuGroup.new name: name, type: type, acronym: acronym
+    end
+    def imp_word_link
+      return unless doc["imp_dms_link"]
+      @doc_page ||= request_document(doc["imp_dms_link"])
+      wrd_elm = @doc_page.at("//font[contains(.,'Word')]/../..")
+      yield wrd_elm[:href] if block_given? && wrd_elm
+    end
+    def create_relation(type, title_text, id)
+      title = []
+      if title_text && !title.empty?
+        title << RelatonBib::TypedTitleString.new(content: title_text, language: "en", script: "Latn")
+      else
+        fref = RelatonBib::FormattedRef.new(content: id, language: "en", script: "Latn")
+      end
+      did = RelatonBib::DocumentIdentifier.new(id: id, type: "ITU", primary: true)
+      item = ItuBibliographicItem.new(title: title, formattedref: fref, docid: [did])
+      { type: "hasEdition", bibitem: item }
+    end
+    def supplements
+      @supplements ||= begin
+        if imp
+          []
+        else
+          url = RECSUPPLEMENTS % { idrec: idrec }
+          get_data(url) || []
+        end
+      end
+    end
+    # @param type [String]
+    # @param url [Nokogiri::XML::Element]
+    def typed_link(type, url)
+      { type: type, content: url }
+    end
+  end
+end

data/lib/relaton_itu/scrapper.rb CHANGED Viewed

@@ -2,11 +2,13 @@
 require "nokogiri"
 require "net/http"
+require_relative "recommendation_parser"
+require_relative "radio_regulations_parser"
 module RelatonItu
   # Scrapper.
-  module Scrapper
-    ROMAN_MONTHS = %w[I II III IV V VI VII VIII IX X XI XII].freeze
+  class Scrapper
+    attr_reader :hit, :imp
     TYPES = {
       "ISO" => "international-standard",
@@ -23,302 +25,145 @@ module RelatonItu
       "Guide" => "guide",
     }.freeze
-    class << self
-      # Parse page.
-      # @param hit [RelatonItu::Hit]
-      # @return [Hash]
-      def parse_page(hit, imp: false) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
-        doc = get_page hit
-        return unless doc.code == "200"
-        if imp
-          a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a"
-          return unless a
-          doc = get_page hit, a[:href].to_s
-        end
-        # Fetch edition.
-        edition = doc.at("//table/tr/td[contains(@style,'color: white')]/span[contains(@id, 'Label8')]/b")&.text
-        docid = fetch_docid(doc, hit)
-        ItuBibliographicItem.new(
-          id: fetch_id(docid),
-          fetched: Date.today.to_s,
-          type: "standard",
-          docid: docid,
-          edition: edition,
-          language: ["en"],
-          script: ["Latn"],
-          title: fetch_titles(doc),
-          doctype: DocumentType.new(type: hit.hit[:type]),
-          docstatus: fetch_status(doc),
-          ics: [], # fetch_ics(doc),
-          date: fetch_dates(doc),
-          contributor: fetch_contributors(hit.hit[:code]),
-          editorialgroup: fetch_workgroup(hit.hit[:code], doc),
-          abstract: fetch_abstract(doc, hit),
-          copyright: fetch_copyright(hit.hit[:code], doc),
-          link: fetch_link(doc),
-          relation: fetch_relations(doc),
-          place: ["Geneva"],
-        )
-      end
-      private
-      def fetch_id(docid)
-        docid.find(&:primary).id.gsub(/[.\s()\/-]/, "")
-      end
-      # Fetch abstracts.
-      # @param doc [Mechanize::Page]
-      # @param hit [RelatonItu::Hit]
-      # @return [Array<Hash>]
-      def fetch_abstract(doc, hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
-        abstract_url = doc.at '//table/tr/td[contains(@style,"color: white")]/span[contains(@id, "lbl_dms")]/div'
-        if abstract_url
-          url = abstract_url[:onclick].match(/https?[^']+/).to_s
-          rsp = hit.hit_collection.agent.get url
-          d = Nokogiri::HTML rsp.body.encode(undef: :replace, replace: "")
-          d.css("p.MsoNormal").text.gsub("\r\n", "").squeeze(" ").gsub("\u00a0", "")
-        elsif a = doc.at('//table/tr/td/span[contains(@class, "observation")]/text()')
-          a.text.strip
-        end => content
-        return [] unless content
-        [{
-          content: content,
-          language: "en",
-          script: "Latn",
-        }]
-      rescue Mechanize::ResponseCodeError => e
-        Util.error "HTTP Service Unavailable: #{e.message}"
-        []
-      end
-      # Get page.
-      # @param hit [RelatonItu::Hit]
-      # @param url [String, nil]
-      # @return [Array<String, Nokogiri::HTML::Document>]
-      def get_page(hit, url = nil)
-        uri = url || hit.hit[:url]
-        hit.hit_collection.agent.get uri
-      rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
-             EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
-             Net::ProtocolError, OpenSSL::SSL::SSLError
-        raise RelatonBib::RequestError, "Could not access #{uri}"
-      end
-      # Fetch docid.
-      # @param doc [Mechanize::Page]
-      # @param hit [RelatonItu::Hit]
-      # @return [Hash]
-      def fetch_docid(doc, hit)
-        docids = hit.hit[:code].to_s.split(" | ").map { |c| createdocid(c) }
-        docids += parse_id(doc).map { |c| createdocid c.text } if docids.empty?
-        docids << createdocid(title) unless docids.any?
-        docids
-      end
-      def parse_id(doc)
-        doc.xpath(
-          "//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
-          "//td[.='Identical standard:']/following-sibling::td",
-          "//div/table[1]/tr[4]/td/strong",
-        )
-      end
+    def initialize(hit, imp: false)
+      @hit = hit
+      @imp = imp
+    end
-      # @param text [String]
-      # @return [RelatonBib::DocumentIdentifier]
-      def createdocid(text) # rubocop:disable Metrics/MethodLength
-        # %r{
-        #   ^(?<code>(?:(?:ITU-\w|ISO/IEC)\s)?[^(:]*)
-        #   (?:\s\(V(?<version>\d+)\))?
-        #   (?:\s\((?:(?<_month>\d{2})/)?(?<_year>\d{4})\))?
-        #   (?::[^(]+\((?<buldate>\d{2}\.\w{1,4}\.\d{4})\))?
-        #   (?:\s(?<corr>(?:Amd|Cor)\.\s?\d+))?
-        #   # (\s\(((?<_cormonth>\d{2})\/)?(?<_coryear>\d{4})\))?
-        # }x =~ text.squeeze(" ")
-        # corr&.sub!(/\.\s?/, " ")
-        # id = [code.sub(/[[:space:]]$/, ""), corr].compact.join " "
-        # id += " (V#{version})" if version
-        # id += " - #{buldate}" if buldate
-        # type = id.match(%r{^\w+}).to_s
-        # type = "ITU" if type == "G"
-        if text.match?(/^(?:ISO|ETSI)/)
-          type = "ISO"
-          text.match(/[^(]+/).to_s.strip.squeeze(" ")
-        else
-          pubid = Pubid.parse(text)
-          type = pubid.prefix # == "G" ? "ITU" : pubid.prefix
-          pubid.to_s
-        end => id
-        RelatonBib::DocumentIdentifier.new(type: type, id: id, primary: true)
-      end
+    def self.parse_page(hit, imp: false)
+      new(hit, imp: imp).parse_page
+    end
-      # Fetch status.
-      # @param doc [Mechanize::Page]
-      # @return [RelatonBib::DocumentStatus, NilClass]
-      def fetch_status(doc)
-        s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]",
-                   "//p[contains(.,'Status :')]")
-        return unless s
+    # Parse page.
+    # @return [Hash]
+    def parse_page # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
+      return unless parser.doc
+      ItuBibliographicItem.new(
+        id: fetch_id,
+        fetched: Date.today.to_s,
+        type: "standard",
+        docid: docid,
+        edition: parser.fetch_edition,
+        language: ["en"],
+        script: ["Latn"],
+        title: parser.fetch_titles,
+        doctype: DocumentType.new(type: hit.hit[:type]),
+        docstatus: parser.fetch_status,
+        ics: [], # fetch_ics(doc),
+        date: parser.fetch_dates,
+        contributor: fetch_contributors,
+        editorialgroup: parser.fetch_workgroup,
+        abstract: parser.fetch_abstract,
+        copyright: fetch_copyright,
+        link: parser.fetch_link,
+        relation: parser.fetch_relations,
+        place: ["Geneva"],
+      )
+    end
-        status = s.text.include?("In force") ? "Published" : "Withdrawal"
-        RelatonBib::DocumentStatus.new(stage: status)
-      end
+    private
-      # Fetch workgroup.
-      # @param code [String]
-      # @param doc [Mechanize::Page]
-      # @return [RelatonItu::EditorialGroup, NilClass]
-      def fetch_workgroup(code, doc)
-        wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
-        # return unless wg
+    def idrec
+      return @idrec if defined? @idrec
-        group = wg && itugroup(wg.text)
-        EditorialGroup.new(
-          bureau: code.match(/(?<=-)./).to_s, group: group
-        )
-      end
+      @idrec = CGI.unescape(hit.hit[:url]).split("/").last.slice(/^\d+(?=-)/)&.to_i
+    end
-      # @param name [String]
-      # @return [RelatonItu::ItuGroup]
-      def itugroup(name) # rubocop:disable Metrics/MethodLength
-        if name.include? "Study Group"
-          type = "study-group"
-          acronym = "SG"
-        elsif name.include? "Telecommunication Standardization Advisory Group"
-          type = "tsag"
-          acronym = "TSAG"
+    def parser
+      @parser ||= begin
+        if idrec
+          RecommendationParser.new hit, idrec, imp
         else
-          type = "work-group"
-          acronym = "WG"
-        end
-        ItuGroup.new name: name, type: type, acronym: acronym
-      end
-      # Fetch relations.
-      # @param doc [Mechanize::Page]
-      # @return [Array<Hash>]
-      def fetch_relations(doc)
-        doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]')
-          .map do |r|
-          ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
-          fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en",
-                                              script: "Latn")
-          did = RelatonBib::DocumentIdentifier.new(id: ref.text, type: "ITU")
-          bibitem = ItuBibliographicItem.new(formattedref: fref, docid: [did],
-                                             type: "standard")
-          { type: "complementOf", bibitem: bibitem }
-        end
-      end
-      # Fetch titles.
-      # @param doc [Mechanize::Page]
-      # @return [RelatonBib::TypedTitleStringCollection]
-      def fetch_titles(doc)
-        t = doc.at("//td[@class='title']|//div/table[1]/tr[4]/td/strong")
-        return [] unless t
-        RelatonBib::TypedTitleString.from_string t.text, "en", "Latn"
-      end
-      # Fetch dates
-      # @param doc [Mechanize::Page]
-      # @return [Array<Hash>]
-      def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
-        dates = []
-        date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]",
-                      "//p[contains(.,'Approved in')]")
-        pdate = date&.text&.match(/\d{4}-\d{2}-\d{2}/).to_s || ob_date(doc)
-        if pdate && !pdate&.empty?
-          dates << { type: "published", on: pdate }
-        elsif pdate = ob_date(doc)
-          dates << { type: "published", on: pdate }
-        end
-        dates
-      end
-      # Scrape Operational Bulletin date.
-      # @param doc [Mechanize::Page]
-      # @return [String]
-      def ob_date(doc)
-        pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
-        return unless pdate
-        roman_to_arabic pdate.text.match(%r{(?<=Year: )(\d{2}.\w+.)?\d{4}}).to_s
-      end
-      # Convert roman month number in string date to arabic number
-      # @param date [String]
-      # @return [String]
-      def roman_to_arabic(date)
-        %r{(?<rmonth>[IVX]+)} =~ date
-        if ROMAN_MONTHS.index(rmonth)
-          month = ROMAN_MONTHS.index(rmonth) + 1
-          Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
-        else date
+          RadioRegulationsParser.new hit
         end
       end
+    end
-      # Fetch contributors
-      # @param doc [Mechanize::Page]
-      # @return [Array<Hash>]
-      def fetch_contributors(code)
-        return [] unless code
+    def fetch_id
+      docid.find(&:primary).id.gsub(/[.\s()\/-]/, "")
+    end
-        abbrev = code.sub(/-\w\s.*/, "")
-        case abbrev
-        when "ITU"
-          name = "International Telecommunication Union"
-          url = "www.itu.int"
-        end
-        [{ entity: { name: name, url: url, abbreviation: abbrev },
-           role: [type: "publisher"] }]
+    # Fetch docid.
+    # @return [Hash]
+    def docid
+      @docid ||= begin
+        docids = hit.hit[:code].to_s.split(" | ").map { |c| createdocid(c) }
+        docids << createdocid(doc["rec_name"]) if docids.empty?
+        docids
       end
+    end
-      # Fetch links.
-      # @param doc [Mechanize::Page]
-      # @return [Array<Hash>]
-      def fetch_link(doc)
-        links = [{ type: "src", content: doc.uri.to_s }]
-        obp_elm = doc.at(
-          '//a[@title="Persistent link to download the PDF file"]',
-          "//font[contains(.,'PDF')]/../..",
-        )
-        links << typed_link("obp", obp_elm) if obp_elm
-        wrd_elm = doc.at("//font[contains(.,'Word')]/../..")
-        links << typed_link("word", wrd_elm) if wrd_elm
-        links
-      end
+    # @param text [String]
+    # @return [RelatonBib::DocumentIdentifier]
+    def createdocid(text) # rubocop:disable Metrics/MethodLength
+      # %r{
+      #   ^(?<code>(?:(?:ITU-\w|ISO/IEC)\s)?[^(:]*)
+      #   (?:\s\(V(?<version>\d+)\))?
+      #   (?:\s\((?:(?<_month>\d{2})/)?(?<_year>\d{4})\))?
+      #   (?::[^(]+\((?<buldate>\d{2}\.\w{1,4}\.\d{4})\))?
+      #   (?:\s(?<corr>(?:Amd|Cor)\.\s?\d+))?
+      #   # (\s\(((?<_cormonth>\d{2})\/)?(?<_coryear>\d{4})\))?
+      # }x =~ text.squeeze(" ")
+      # corr&.sub!(/\.\s?/, " ")
+      # id = [code.sub(/[[:space:]]$/, ""), corr].compact.join " "
+      # id += " (V#{version})" if version
+      # id += " - #{buldate}" if buldate
+      # type = id.match(%r{^\w+}).to_s
+      # type = "ITU" if type == "G"
+      if text.match?(/^(?:ISO|ETSI)/)
+        type = "ISO"
+        text.match(/[^(]+/).to_s.strip.squeeze(" ")
+      else
+        pubid = Pubid.parse(text)
+        type = pubid.prefix # == "G" ? "ITU" : pubid.prefix
+        pubid.to_s
+      end => id
+      RelatonBib::DocumentIdentifier.new(type: type, id: id, primary: true)
+    end
-      # @param type [String]
-      # @param elm [Nokogiri::XML::Element]
-      def typed_link(type, elm)
-        {
-          type: type,
-          content: URI.join(HitCollection::DOMAIN, elm[:href].strip).to_s,
-        }
-      end
+    # def fetch_data(url)
+    #   resp = hit.hit_collection.agent.get url
+    #   JSON.parse(resp.body)
+    # rescue Mechanize::ResponseCodeError => e
+    #   Util.error "HTTP Service Unavailable: #{e.message}"
+    #   nil
+    # end
+    # Scrape Operational Bulletin date.
+    # @param doc [Mechanize::Page]
+    # @return [String]
+    # def ob_date(doc)
+    #   pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
+    #   return unless pdate
+    #   roman_to_arabic pdate.text.match(%r{(?<=Year: )(\d{2}.\w+.)?\d{4}}).to_s
+    # end
+    # Fetch contributors
+    # @return [Array<Hash>]
+    def fetch_contributors
+      return [] unless hit.hit[:code]
+      abbrev = hit.hit[:code].sub(/-\w\s.*/, "")
+      case abbrev
+      when "ITU"
+        name = "International Telecommunication Union"
+        url = "www.itu.int"
+      end
+      [{ entity: { name: name, url: url, abbreviation: abbrev }, role: [type: "publisher"] }]
+    end
-      # Fetch copyright.
-      # @param code [String]
-      # @param doc [Mechanize::Page]
-      # @return [Array<Hash>]
-      def fetch_copyright(code, doc)
-        abbreviation = code.match(/^[^-]+/).to_s
-        case abbreviation
-        when "ITU"
-          name = "International Telecommunication Union"
-          url = "www.itu.int"
-        end
-        fdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
-        from = fdate&.text || ob_date(doc)
-        [{ owner: [{ name: name, abbreviation: abbreviation, url: url }],
-           from: from }]
+    # Fetch copyright.
+    # @return [Array<Hash>]
+    def fetch_copyright
+      abbreviation = hit.hit[:code].match(/^[^-]+/).to_s
+      case abbreviation
+      when "ITU"
+        name = "International Telecommunication Union"
+        url = "www.itu.int"
       end
+      [{ owner: [{ name: name, abbreviation: abbreviation, url: url }], from: parser.doc_date }]
     end
   end
 end

data/lib/relaton_itu/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module RelatonItu
-  VERSION = "1.20.1".freeze
+  VERSION = "1.20.3".freeze
 end

data/lib/relaton_itu/xml_parser.rb CHANGED Viewed

@@ -14,8 +14,7 @@ module RelatonItu
       # @param ext [Nokogiri::XML::Element]
       # @return [RelatonItu::EditorialGroup]
       def fetch_editorialgroup(ext)
-        eg = ext.at("./editorialgroup")
-        return unless eg
+        return unless ext && (eg = ext.at "editorialgroup")
         EditorialGroup.new(
           bureau: eg.at("bureau")&.text,
@@ -51,8 +50,7 @@ module RelatonItu
       # @param ext [Nokogiri::XML::Element]
       # @return [RelatonItu::StructuredIdentifier]
       def fetch_structuredidentifier(ext)
-        sid = ext.at "./structuredidentifier"
-        return unless sid
+        return unless ext && (sid = ext.at "./structuredidentifier")
         br = sid.at("bureau").text
         dn = sid.at("docnumber").text

data/lib/relaton_itu.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 require "mechanize"
 require "parslet"
 require "digest/md5"
+require "relaton/core"
 require "relaton/index"
 require "relaton_bib"
 require "relaton_itu/version"

data/relaton_itu.gemspec CHANGED Viewed

@@ -28,5 +28,6 @@ Gem::Specification.new do |spec|
   spec.add_dependency "mechanize", "~> 2.10"
   spec.add_dependency "parslet", "~> 2.0.0"
   spec.add_dependency "relaton-bib", "~> 1.20.0"
+  spec.add_dependency "relaton-core", "~> 0.0.6"
   spec.add_dependency "relaton-index", "~> 0.2.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: relaton-itu
 version: !ruby/object:Gem::Version
-  version: 1.20.1
+  version: 1.20.3
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2025-01-16 00:00:00.000000000 Z
+date: 2026-01-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -52,6 +52,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 1.20.0
+- !ruby/object:Gem::Dependency
+  name: relaton-core
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.0.6
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.0.6
 - !ruby/object:Gem::Dependency
   name: relaton-index
   requirement: !ruby/object:Gem::Requirement
@@ -104,6 +118,8 @@ files:
 - lib/relaton_itu/itu_group.rb
 - lib/relaton_itu/processor.rb
 - lib/relaton_itu/pubid.rb
+- lib/relaton_itu/radio_regulations_parser.rb
+- lib/relaton_itu/recommendation_parser.rb
 - lib/relaton_itu/scrapper.rb
 - lib/relaton_itu/structured_identifier.rb
 - lib/relaton_itu/util.rb
@@ -129,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.27
+rubygems_version: 3.5.22
 signing_key:
 specification_version: 4
 summary: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem