relaton-itu 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/grammars/biblio.rng +89 -32
- data/grammars/isodoc.rng +450 -4
- data/lib/relaton_itu.rb +0 -5
- data/lib/relaton_itu/editorial_group.rb +6 -4
- data/lib/relaton_itu/hash_converter.rb +10 -1
- data/lib/relaton_itu/hit.rb +2 -2
- data/lib/relaton_itu/hit_collection.rb +38 -12
- data/lib/relaton_itu/itu_bibliographic_item.rb +6 -5
- data/lib/relaton_itu/itu_bibliography.rb +24 -21
- data/lib/relaton_itu/scrapper.rb +54 -103
- data/lib/relaton_itu/structured_identifier.rb +41 -0
- data/lib/relaton_itu/version.rb +1 -1
- data/lib/relaton_itu/xml_parser.rb +18 -13
- data/relaton-itu.gemspec +2 -2
- metadata +6 -6
- data/grammars/isostandard.rng +0 -522
    
        data/lib/relaton_itu.rb
    CHANGED
    
    | @@ -2,11 +2,6 @@ require "relaton_itu/version" | |
| 2 2 | 
             
            require "relaton_itu/itu_bibliography"
         | 
| 3 3 | 
             
            require "digest/md5"
         | 
| 4 4 |  | 
| 5 | 
            -
            # if defined? Relaton
         | 
| 6 | 
            -
            #   require_relative "relaton/processor"
         | 
| 7 | 
            -
            #   Relaton::Registry.instance.register(Relaton::RelatonItu::Processor)
         | 
| 8 | 
            -
            # end
         | 
| 9 | 
            -
             | 
| 10 5 | 
             
            module RelatonItu
         | 
| 11 6 | 
             
              class Error < StandardError; end
         | 
| 12 7 |  | 
| @@ -16,8 +16,9 @@ module RelatonItu | |
| 16 16 | 
             
                # @param subgroup [Hash, RelatonItu::ItuGroup, NilClass]
         | 
| 17 17 | 
             
                # @param workgroup [Hash, RelatonItu::ItuGroup, NilClass]
         | 
| 18 18 | 
             
                def initialize(bureau:, group:, subgroup: nil, workgroup: nil)
         | 
| 19 | 
            -
                   | 
| 20 | 
            -
             | 
| 19 | 
            +
                  unless BUREAUS.include? bureau
         | 
| 20 | 
            +
                    warn "[relaton-itu] WARNING: invalid bureau: #{bureau}"
         | 
| 21 | 
            +
                  end
         | 
| 21 22 | 
             
                  @bureau = bureau
         | 
| 22 23 | 
             
                  @group = group.is_a?(Hash) ? ItuGroup.new(group) : group
         | 
| 23 24 | 
             
                  @subgroup = subgroup.is_a?(Hash) ? ItuGroup.new(subgroup) : subgroup
         | 
| @@ -28,7 +29,7 @@ module RelatonItu | |
| 28 29 | 
             
                def to_xml(builder)
         | 
| 29 30 | 
             
                  builder.editorialgroup do
         | 
| 30 31 | 
             
                    builder.bureau bureau
         | 
| 31 | 
            -
                    builder.group { |b| group.to_xml b }
         | 
| 32 | 
            +
                    builder.group { |b| group.to_xml b } if group
         | 
| 32 33 | 
             
                    builder.subgroup { |b| group.to_xml b } if subgroup
         | 
| 33 34 | 
             
                    builder.workgroup { |b| group.to_xml b } if workgroup
         | 
| 34 35 | 
             
                  end
         | 
| @@ -36,7 +37,8 @@ module RelatonItu | |
| 36 37 |  | 
| 37 38 | 
             
                # @return [Hash]
         | 
| 38 39 | 
             
                def to_hash
         | 
| 39 | 
            -
                  hash = { "bureau" => bureau | 
| 40 | 
            +
                  hash = { "bureau" => bureau }
         | 
| 41 | 
            +
                  hash["group"] = group.to_hash if group
         | 
| 40 42 | 
             
                  hash["subgroup"] = subgroup.to_hash if subgroup
         | 
| 41 43 | 
             
                  hash["workgroup"] = workgroup.to_hash if workgroup
         | 
| 42 44 | 
             
                  hash
         | 
| @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            module RelatonItu
         | 
| 2 | 
            -
              class HashConverter <  | 
| 2 | 
            +
              class HashConverter < RelatonBib::HashConverter
         | 
| 3 3 | 
             
                class << self
         | 
| 4 4 | 
             
                  private
         | 
| 5 5 |  | 
| @@ -9,6 +9,15 @@ module RelatonItu | |
| 9 9 |  | 
| 10 10 | 
             
                    ret[:editorialgroup] = EditorialGroup.new eg
         | 
| 11 11 | 
             
                  end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  # @param ret [Hash]
         | 
| 14 | 
            +
                  def structuredidentifier_hash_to_bib(ret)
         | 
| 15 | 
            +
                    return unless ret[:structuredidentifier]
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                    ret[:structuredidentifier] = StructuredIdentifier.new(
         | 
| 18 | 
            +
                      ret[:structuredidentifier],
         | 
| 19 | 
            +
                    )
         | 
| 20 | 
            +
                  end
         | 
| 12 21 | 
             
                end
         | 
| 13 22 | 
             
              end
         | 
| 14 23 | 
             
            end
         | 
    
        data/lib/relaton_itu/hit.rb
    CHANGED
    
    | @@ -4,9 +4,9 @@ module RelatonItu | |
| 4 4 | 
             
              # Hit.
         | 
| 5 5 | 
             
              class Hit < RelatonBib::Hit
         | 
| 6 6 | 
             
                # Parse page.
         | 
| 7 | 
            -
                # @return [ | 
| 7 | 
            +
                # @return [RelatonItu::ItuBibliographicItem]
         | 
| 8 8 | 
             
                def fetch
         | 
| 9 | 
            -
                  @fetch ||= Scrapper.parse_page  | 
| 9 | 
            +
                  @fetch ||= Scrapper.parse_page hit, hit_collection.gi_imp
         | 
| 10 10 | 
             
                end
         | 
| 11 11 | 
             
              end
         | 
| 12 12 | 
             
            end
         | 
| @@ -7,16 +7,39 @@ require "net/http" | |
| 7 7 | 
             
            module RelatonItu
         | 
| 8 8 | 
             
              # Page of hit collection.
         | 
| 9 9 | 
             
              class HitCollection < RelatonBib::HitCollection
         | 
| 10 | 
            -
                DOMAIN = "https://www.itu.int" | 
| 10 | 
            +
                DOMAIN = "https://www.itu.int"
         | 
| 11 11 |  | 
| 12 | 
            -
                # @ | 
| 12 | 
            +
                # @return [TrueClass, FalseClass]
         | 
| 13 | 
            +
                attr_reader :gi_imp
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                # @param ref [String]
         | 
| 13 16 | 
             
                # @param year [String]
         | 
| 14 | 
            -
                def initialize( | 
| 15 | 
            -
                   | 
| 16 | 
            -
                   | 
| 17 | 
            -
                   | 
| 18 | 
            -
                   | 
| 19 | 
            -
             | 
| 17 | 
            +
                def initialize(ref, year = nil)
         | 
| 18 | 
            +
                  text = ref.sub /(?<=\.)Imp\s?(?=\d)/, ""
         | 
| 19 | 
            +
                  super text, year
         | 
| 20 | 
            +
                  @gi_imp = /\.Imp\d/.match?(ref)
         | 
| 21 | 
            +
                  uri = URI "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
         | 
| 22 | 
            +
                  data = { json: params.to_json }
         | 
| 23 | 
            +
                  resp = Net::HTTP.post(uri, data.to_json,
         | 
| 24 | 
            +
                                        "Content-Type" => "application/json")
         | 
| 25 | 
            +
                  @array = hits JSON.parse(resp.body)
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                private
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                # @return [String]
         | 
| 31 | 
            +
                def group
         | 
| 32 | 
            +
                  @group ||= if %r{(OB|Operational Bulletin) No} =~ text then "Publications"
         | 
| 33 | 
            +
                             else "Recommendations"
         | 
| 34 | 
            +
                             end
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                # rubocop:disable Metrics/MethodLength
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                # @return [Hash]
         | 
| 40 | 
            +
                def params
         | 
| 41 | 
            +
                  {
         | 
| 42 | 
            +
                    "Input" => text,
         | 
| 20 43 | 
             
                    "Start" => 0,
         | 
| 21 44 | 
             
                    "Rows" => 10,
         | 
| 22 45 | 
             
                    "SortBy" => "RELEVANCE",
         | 
| @@ -61,10 +84,13 @@ module RelatonItu | |
| 61 84 | 
             
                    "IP" => "",
         | 
| 62 85 | 
             
                    "SearchType" => "All",
         | 
| 63 86 | 
             
                  }
         | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
                # rubocop:enable Metrics/MethodLength
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                # @param data [Hash]
         | 
| 91 | 
            +
                # @return [Array<RelatonItu::Hit>]
         | 
| 92 | 
            +
                def hits(data)
         | 
| 93 | 
            +
                  data["results"].map do |h|
         | 
| 68 94 | 
             
                    code  = h["Media"]["Name"]
         | 
| 69 95 | 
             
                    title = h["Title"]
         | 
| 70 96 | 
             
                    url   = h["Redirection"]
         | 
| @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            module RelatonItu
         | 
| 2 | 
            -
              class ItuBibliographicItem <  | 
| 2 | 
            +
              class ItuBibliographicItem < RelatonBib::BibliographicItem
         | 
| 3 3 | 
             
                TYPES = %w[
         | 
| 4 4 | 
             
                  recommendation recommendation-supplement recommendation-amendment
         | 
| 5 5 | 
             
                  recommendation-corrigendum recommendation-errata recommendation-annex
         | 
| @@ -7,13 +7,14 @@ module RelatonItu | |
| 7 7 | 
             
                  joint-itu-iso-iec
         | 
| 8 8 | 
             
                ].freeze
         | 
| 9 9 |  | 
| 10 | 
            +
                # @params structuredidentifier [RelatonItu::StructuredIdentifier]
         | 
| 10 11 | 
             
                def initialize(**args)
         | 
| 11 | 
            -
                  @doctype = args.delete :doctype
         | 
| 12 | 
            -
                  if doctype && !TYPES.include?(doctype)
         | 
| 13 | 
            -
                     | 
| 12 | 
            +
                  # @doctype = args.delete :doctype
         | 
| 13 | 
            +
                  if args[:doctype] && !TYPES.include?(args[:doctype])
         | 
| 14 | 
            +
                    warn "[relaton-itu] WARNING: invalid doctype: #{args[:doctype]}"
         | 
| 14 15 | 
             
                  end
         | 
| 15 | 
            -
             | 
| 16 16 | 
             
                  super
         | 
| 17 | 
            +
                  # @doctype = args[:doctype]
         | 
| 17 18 | 
             
                end
         | 
| 18 19 | 
             
              end
         | 
| 19 20 | 
             
            end
         | 
| @@ -1,8 +1,9 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 | 
            -
            require " | 
| 3 | 
            +
            require "relaton_bib"
         | 
| 4 4 | 
             
            require "relaton_itu/itu_bibliographic_item"
         | 
| 5 5 | 
             
            require "relaton_itu/editorial_group"
         | 
| 6 | 
            +
            require "relaton_itu/structured_identifier"
         | 
| 6 7 | 
             
            require "relaton_itu/itu_group"
         | 
| 7 8 | 
             
            require "relaton_itu/scrapper"
         | 
| 8 9 | 
             
            require "relaton_itu/hit_collection"
         | 
| @@ -19,9 +20,9 @@ module RelatonItu | |
| 19 20 | 
             
                  # @return [RelatonItu::HitCollection]
         | 
| 20 21 | 
             
                  def search(text, year = nil)
         | 
| 21 22 | 
             
                    HitCollection.new text, year
         | 
| 22 | 
            -
                  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, | 
| 23 | 
            -
                         Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, | 
| 24 | 
            -
                         OpenSSL::SSL::SSLError
         | 
| 23 | 
            +
                  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
         | 
| 24 | 
            +
                         EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
         | 
| 25 | 
            +
                         Net::ProtocolError, OpenSSL::SSL::SSLError
         | 
| 25 26 | 
             
                    raise RelatonBib::RequestError, "Could not access http://www.itu.int"
         | 
| 26 27 | 
             
                  end
         | 
| 27 28 |  | 
| @@ -66,17 +67,17 @@ module RelatonItu | |
| 66 67 | 
             
                    nil
         | 
| 67 68 | 
             
                  end
         | 
| 68 69 |  | 
| 69 | 
            -
                  def fetch_pages(hits, threads)
         | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 75 | 
            -
                  end
         | 
| 70 | 
            +
                  # def fetch_pages(hits, threads)
         | 
| 71 | 
            +
                  #   workers = RelatonBib::WorkersPool.new threads
         | 
| 72 | 
            +
                  #   workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
         | 
| 73 | 
            +
                  #   hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
         | 
| 74 | 
            +
                  #   workers.end
         | 
| 75 | 
            +
                  #   workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
         | 
| 76 | 
            +
                  # end
         | 
| 76 77 |  | 
| 77 78 | 
             
                  def search_filter(code)
         | 
| 78 | 
            -
                    docidrx = %r{\w | 
| 79 | 
            -
                    c = code.match(docidrx).to_s
         | 
| 79 | 
            +
                    docidrx = %r{\w+\.\d+|\w\sSuppl\.\s\d+} # %r{^ITU-T\s[^\s]+}
         | 
| 80 | 
            +
                    c = code.sub(/Imp\s?/, "").match(docidrx).to_s
         | 
| 80 81 | 
             
                    warn "[relaton-itu] (\"#{code}\") fetching..."
         | 
| 81 82 | 
             
                    result = search(code)
         | 
| 82 83 | 
             
                    result.select do |i|
         | 
| @@ -93,16 +94,18 @@ module RelatonItu | |
| 93 94 | 
             
                  # If no match, returns any years which caused mismatch, for error reporting
         | 
| 94 95 | 
             
                  def isobib_results_filter(result, year)
         | 
| 95 96 | 
             
                    missed_years = []
         | 
| 96 | 
            -
                    result.each_slice(3) do |s| # ISO website only allows 3 connections
         | 
| 97 | 
            -
             | 
| 98 | 
            -
             | 
| 97 | 
            +
                    # result.each_slice(3) do |s| # ISO website only allows 3 connections
         | 
| 98 | 
            +
                    #   fetch_pages(s, 3).each do |r|
         | 
| 99 | 
            +
                    result.each do |r|
         | 
| 100 | 
            +
                      return { ret: r.fetch } if !year
         | 
| 99 101 |  | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 102 | 
            +
                      /\(\d{2}\/(?<pyear>\d{4})\)/ =~ r.hit[:code]
         | 
| 103 | 
            +
                      # r.date.select { |d| d.type == "published" }.each do |d|
         | 
| 104 | 
            +
                      return { ret: r.fetch } if year == pyear
         | 
| 102 105 |  | 
| 103 | 
            -
             | 
| 104 | 
            -
             | 
| 105 | 
            -
                      end
         | 
| 106 | 
            +
                      missed_years << pyear
         | 
| 107 | 
            +
                      # end
         | 
| 108 | 
            +
                      # end
         | 
| 106 109 | 
             
                    end
         | 
| 107 110 | 
             
                    { years: missed_years }
         | 
| 108 111 | 
             
                  end
         | 
    
        data/lib/relaton_itu/scrapper.rb
    CHANGED
    
    | @@ -3,16 +3,9 @@ | |
| 3 3 | 
             
            require "nokogiri"
         | 
| 4 4 | 
             
            require "net/http"
         | 
| 5 5 |  | 
| 6 | 
            -
            # Capybara.request_driver :poltergeist do |app|
         | 
| 7 | 
            -
            #   Capybara::Poltergeist::Driver.new app, js_errors: false
         | 
| 8 | 
            -
            # end
         | 
| 9 | 
            -
            # Capybara.default_driver = :poltergeist
         | 
| 10 | 
            -
             | 
| 11 6 | 
             
            module RelatonItu
         | 
| 12 7 | 
             
              # Scrapper.
         | 
| 13 | 
            -
              # rubocop:disable Metrics/ModuleLength
         | 
| 14 8 | 
             
              module Scrapper
         | 
| 15 | 
            -
                DOMAIN = "https://www.itu.int"
         | 
| 16 9 | 
             
                ROMAN_MONTHS = %w[I II III IV V VI VII VIII IX X XI XII].freeze
         | 
| 17 10 |  | 
| 18 11 | 
             
                TYPES = {
         | 
| @@ -31,30 +24,26 @@ module RelatonItu | |
| 31 24 | 
             
                }.freeze
         | 
| 32 25 |  | 
| 33 26 | 
             
                class << self
         | 
| 34 | 
            -
                  #  | 
| 35 | 
            -
                  # @return [Array<Hash>]
         | 
| 36 | 
            -
                  # def get(text)
         | 
| 37 | 
            -
                  #   iso_workers = WorkersPool.new 4
         | 
| 38 | 
            -
                  #   iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
         | 
| 39 | 
            -
                  #   algolia_workers = start_algolia_search(text, iso_workers)
         | 
| 40 | 
            -
                  #   iso_docs = iso_workers.result
         | 
| 41 | 
            -
                  #   algolia_workers.end
         | 
| 42 | 
            -
                  #   algolia_workers.result
         | 
| 43 | 
            -
                  #   iso_docs
         | 
| 44 | 
            -
                  # end
         | 
| 27 | 
            +
                  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
         | 
| 45 28 |  | 
| 46 29 | 
             
                  # Parse page.
         | 
| 47 | 
            -
                  # @param  | 
| 30 | 
            +
                  # @param hit_data [Hash]
         | 
| 48 31 | 
             
                  # @return [Hash]
         | 
| 49 | 
            -
                   | 
| 50 | 
            -
                  def parse_page(hit_data)
         | 
| 32 | 
            +
                  def parse_page(hit_data, imp = false)
         | 
| 51 33 | 
             
                    url, doc = get_page hit_data[:url]
         | 
| 34 | 
            +
                    if imp
         | 
| 35 | 
            +
                      a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a"
         | 
| 36 | 
            +
                      return unless a
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                      url, doc = get_page URI.join(url, a[:href]).to_s
         | 
| 39 | 
            +
                    end
         | 
| 52 40 |  | 
| 53 41 | 
             
                    # Fetch edition.
         | 
| 54 42 | 
             
                    edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
         | 
| 55 43 |  | 
| 56 44 | 
             
                    ItuBibliographicItem.new(
         | 
| 57 45 | 
             
                      fetched: Date.today.to_s,
         | 
| 46 | 
            +
                      type: "standard",
         | 
| 58 47 | 
             
                      docid: fetch_docid(doc),
         | 
| 59 48 | 
             
                      edition: edition,
         | 
| 60 49 | 
             
                      language: ["en"],
         | 
| @@ -73,7 +62,7 @@ module RelatonItu | |
| 73 62 | 
             
                      place: ["Geneva"],
         | 
| 74 63 | 
             
                    )
         | 
| 75 64 | 
             
                  end
         | 
| 76 | 
            -
                  # rubocop:enable Metrics/AbcSize | 
| 65 | 
            +
                  # rubocop:enable Metrics/AbcSize
         | 
| 77 66 |  | 
| 78 67 | 
             
                  private
         | 
| 79 68 |  | 
| @@ -96,37 +85,23 @@ module RelatonItu | |
| 96 85 | 
             
                    }]
         | 
| 97 86 | 
             
                  end
         | 
| 98 87 |  | 
| 99 | 
            -
                  # Get langs.
         | 
| 100 | 
            -
                  # @param doc [Nokogiri::HTML::Document]
         | 
| 101 | 
            -
                  # @return [Array<Hash>]
         | 
| 102 | 
            -
                  # def langs(doc)
         | 
| 103 | 
            -
                  #   lgs = [{ lang: 'en' }]
         | 
| 104 | 
            -
                  #   doc.css('ul#lang-switcher ul li a').each do |lang_link|
         | 
| 105 | 
            -
                  #     lang_path = lang_link.attr('href')
         | 
| 106 | 
            -
                  #     lang = lang_path.match(%r{^\/(fr)\/})
         | 
| 107 | 
            -
                  #     lgs << { lang: lang[1], path: lang_path } if lang
         | 
| 108 | 
            -
                  #   end
         | 
| 109 | 
            -
                  #   lgs
         | 
| 110 | 
            -
                  # end
         | 
| 111 | 
            -
             | 
| 112 | 
            -
                  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
         | 
| 113 88 | 
             
                  # Get page.
         | 
| 114 89 | 
             
                  # @param path [String] page's path
         | 
| 115 | 
            -
                  # @return [Array<Nokogiri::HTML::Document | 
| 90 | 
            +
                  # @return [Array<String, Nokogiri::HTML::Document>]
         | 
| 116 91 | 
             
                  def get_page(url)
         | 
| 117 92 | 
             
                    uri = URI url
         | 
| 118 | 
            -
                    resp = Net::HTTP.get_response(uri) | 
| 93 | 
            +
                    resp = Net::HTTP.get_response(uri)
         | 
| 119 94 | 
             
                    until resp.code == "200"
         | 
| 120 95 | 
             
                      uri = URI resp["location"] if resp.code =~ /^30/
         | 
| 121 | 
            -
                      resp = Net::HTTP.get_response(uri) | 
| 96 | 
            +
                      resp = Net::HTTP.get_response(uri)
         | 
| 122 97 | 
             
                    end
         | 
| 123 98 | 
             
                    [uri.to_s, Nokogiri::HTML(resp.body)]
         | 
| 124 | 
            -
                  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, | 
| 125 | 
            -
                         Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, | 
| 126 | 
            -
                         OpenSSL::SSL::SSLError
         | 
| 99 | 
            +
                  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
         | 
| 100 | 
            +
                         EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
         | 
| 101 | 
            +
                         Net::ProtocolError, OpenSSL::SSL::SSLError
         | 
| 127 102 | 
             
                    raise RelatonBib::RequestError, "Could not access #{url}"
         | 
| 128 103 | 
             
                  end
         | 
| 129 | 
            -
                  # rubocop:enable Metrics/ | 
| 104 | 
            +
                  # rubocop:enable Metrics/MethodLength
         | 
| 130 105 |  | 
| 131 106 | 
             
                  # Fetch docid.
         | 
| 132 107 | 
             
                  # @param doc [Nokogiri::HTML::Document]
         | 
| @@ -135,9 +110,11 @@ module RelatonItu | |
| 135 110 | 
             
                    doc.xpath(
         | 
| 136 111 | 
             
                      "//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
         | 
| 137 112 | 
             
                      "//td[.='Identical standard:']/following-sibling::td",
         | 
| 113 | 
            +
                      "//div/table[1]/tr[4]/td/strong",
         | 
| 138 114 | 
             
                    ).map do |code|
         | 
| 139 | 
            -
                      id = code.text.match(%r{^.*?(?= \()}).to_s.squeeze(" ")
         | 
| 115 | 
            +
                      id = code.text.match(%r{^.*?(?= \()|\w\.Imp\s?\d+}).to_s.squeeze(" ")
         | 
| 140 116 | 
             
                      type = id.match(%r{^\w+}).to_s
         | 
| 117 | 
            +
                      type = "ITU" if type == "G"
         | 
| 141 118 | 
             
                      RelatonBib::DocumentIdentifier.new(type: type, id: id)
         | 
| 142 119 | 
             
                    end
         | 
| 143 120 | 
             
                  end
         | 
| @@ -146,10 +123,11 @@ module RelatonItu | |
| 146 123 | 
             
                  # @param doc [Nokogiri::HTML::Document]
         | 
| 147 124 | 
             
                  # @return [RelatonBib::DocumentStatus, NilClass]
         | 
| 148 125 | 
             
                  def fetch_status(doc)
         | 
| 149 | 
            -
                    s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]" | 
| 126 | 
            +
                    s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]",
         | 
| 127 | 
            +
                               "//p[contains(.,'Status :')]")
         | 
| 150 128 | 
             
                    return unless s
         | 
| 151 129 |  | 
| 152 | 
            -
                    status = s.text | 
| 130 | 
            +
                    status = s.text.include?("In force") ? "Published" : "Withdrawal"
         | 
| 153 131 | 
             
                    RelatonBib::DocumentStatus.new(stage: status)
         | 
| 154 132 | 
             
                  end
         | 
| 155 133 |  | 
| @@ -191,55 +169,22 @@ module RelatonItu | |
| 191 169 | 
             
                  # @return [Array<Hash>]
         | 
| 192 170 | 
             
                  def fetch_relations(doc)
         | 
| 193 171 | 
             
                    doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
         | 
| 194 | 
            -
                      # r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
         | 
| 195 172 | 
             
                      ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
         | 
| 196 | 
            -
                      # url = DOMAIN + ref[:href].sub(/^\./, "/ITU-T/recommendations")
         | 
| 197 173 | 
             
                      fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
         | 
| 198 | 
            -
                      bibitem =  | 
| 174 | 
            +
                      bibitem = ItuBibliographicItem.new(formattedref: fref, type: "standard")
         | 
| 199 175 | 
             
                      { type: "complements", bibitem: bibitem }
         | 
| 200 176 | 
             
                    end
         | 
| 201 177 | 
             
                  end
         | 
| 202 178 | 
             
                  # rubocop:enable Metrics/MethodLength
         | 
| 203 179 |  | 
| 204 | 
            -
                  # Fetch type.
         | 
| 205 | 
            -
                  # @param doc [Nokogiri::HTML::Document]
         | 
| 206 | 
            -
                  # @return [String]
         | 
| 207 | 
            -
                  # def fetch_type(_doc)
         | 
| 208 | 
            -
                  #   "recommendation"
         | 
| 209 | 
            -
                  # end
         | 
| 210 | 
            -
             | 
| 211 180 | 
             
                  # Fetch titles.
         | 
| 212 181 | 
             
                  # @param doc [Nokogiri::HTML::Document]
         | 
| 213 182 | 
             
                  # @return [Array<Hash>]
         | 
| 214 183 | 
             
                  def fetch_titles(doc)
         | 
| 215 | 
            -
                     | 
| 216 | 
            -
                    # t = hit_data[:title] if t.empty?
         | 
| 217 | 
            -
                    t = doc.at("//td[@class='title']")
         | 
| 184 | 
            +
                    t = doc.at("//td[@class='title']|//div/table[1]/tr[4]/td/strong")
         | 
| 218 185 | 
             
                    return [] unless t
         | 
| 219 | 
            -
             | 
| 220 | 
            -
                     | 
| 221 | 
            -
                    when 0
         | 
| 222 | 
            -
                      intro, main, part = nil, "", nil
         | 
| 223 | 
            -
                    when 1
         | 
| 224 | 
            -
                      intro, main, part = nil, titles[0], nil
         | 
| 225 | 
            -
                    when 2
         | 
| 226 | 
            -
                      if /^(Part|Partie) \d+:/ =~ titles[1]
         | 
| 227 | 
            -
                        intro, main, part = nil, titles[0], titles[1]
         | 
| 228 | 
            -
                      else
         | 
| 229 | 
            -
                        intro, main, part = titles[0], titles[1], nil
         | 
| 230 | 
            -
                      end
         | 
| 231 | 
            -
                    when 3
         | 
| 232 | 
            -
                      intro, main, part = titles[0], titles[1], titles[2]
         | 
| 233 | 
            -
                    else
         | 
| 234 | 
            -
                      intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
         | 
| 235 | 
            -
                    end
         | 
| 236 | 
            -
                    [{
         | 
| 237 | 
            -
                      title_intro: intro,
         | 
| 238 | 
            -
                      title_main: main,
         | 
| 239 | 
            -
                      title_part: part,
         | 
| 240 | 
            -
                      language: "en",
         | 
| 241 | 
            -
                      script: "Latn",
         | 
| 242 | 
            -
                    }]
         | 
| 186 | 
            +
             | 
| 187 | 
            +
                    RelatonBib::TypedTitleString.from_string t.text, "en", "Latn"
         | 
| 243 188 | 
             
                  end
         | 
| 244 189 |  | 
| 245 190 | 
             
                  # Fetch dates
         | 
| @@ -247,10 +192,11 @@ module RelatonItu | |
| 247 192 | 
             
                  # @return [Array<Hash>]
         | 
| 248 193 | 
             
                  def fetch_dates(doc)
         | 
| 249 194 | 
             
                    dates = []
         | 
| 250 | 
            -
                     | 
| 251 | 
            -
             | 
| 252 | 
            -
                     | 
| 253 | 
            -
             | 
| 195 | 
            +
                    date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]",
         | 
| 196 | 
            +
                                  "//p[contains(.,'Approved in')]")
         | 
| 197 | 
            +
                    pdate = date&.text&.match(/\d{4}-\d{2}-\d{2}/).to_s || ob_date(doc)
         | 
| 198 | 
            +
                    if pdate && !pdate&.empty?
         | 
| 199 | 
            +
                      dates << { type: "published", on: pdate }
         | 
| 254 200 | 
             
                    end
         | 
| 255 201 | 
             
                    dates
         | 
| 256 202 | 
             
                  end
         | 
| @@ -278,40 +224,45 @@ module RelatonItu | |
| 278 224 | 
             
                  # @param doc [Nokogiri::HTML::Document]
         | 
| 279 225 | 
             
                  # @return [Array<Hash>]
         | 
| 280 226 | 
             
                  def fetch_contributors(code)
         | 
| 227 | 
            +
                    return [] unless code
         | 
| 228 | 
            +
             | 
| 281 229 | 
             
                    abbrev = code.sub(/-\w\s.*/, "")
         | 
| 282 230 | 
             
                    case abbrev
         | 
| 283 231 | 
             
                    when "ITU"
         | 
| 284 232 | 
             
                      name = "International Telecommunication Union"
         | 
| 285 233 | 
             
                      url = "www.itu.int"
         | 
| 286 234 | 
             
                    end
         | 
| 287 | 
            -
                    [{ entity: { name: name, url: url, abbreviation: abbrev }, | 
| 235 | 
            +
                    [{ entity: { name: name, url: url, abbreviation: abbrev },
         | 
| 236 | 
            +
                       role: [type: "publisher"] }]
         | 
| 288 237 | 
             
                  end
         | 
| 289 238 |  | 
| 290 | 
            -
                  # Fetch ICS.
         | 
| 291 | 
            -
                  # @param doc [Nokogiri::HTML::Document]
         | 
| 292 | 
            -
                  # @return [Array<Hash>]
         | 
| 293 | 
            -
                  # def fetch_ics(doc)
         | 
| 294 | 
            -
                  #   doc.xpath('//th[contains(text(), "ICS")]/following-sibling::td/a').map do |i|
         | 
| 295 | 
            -
                  #     code = i.text.match(/[\d\.]+/).to_s.split '.'
         | 
| 296 | 
            -
                  #     { field: code[0], group: code[1], subgroup: code[2] }
         | 
| 297 | 
            -
                  #   end
         | 
| 298 | 
            -
                  # end
         | 
| 299 | 
            -
             | 
| 300 239 | 
             
                  # Fetch links.
         | 
| 301 240 | 
             
                  # @param doc [Nokogiri::HTML::Document]
         | 
| 302 241 | 
             
                  # @param url [String]
         | 
| 303 242 | 
             
                  # @return [Array<Hash>]
         | 
| 304 243 | 
             
                  def fetch_link(doc, url)
         | 
| 305 244 | 
             
                    links = [{ type: "src", content: url }]
         | 
| 306 | 
            -
                     | 
| 307 | 
            -
             | 
| 245 | 
            +
                    obp_elm = doc.at(
         | 
| 246 | 
            +
                      '//a[@title="Persistent link to download the PDF file"]',
         | 
| 247 | 
            +
                      "//font[contains(.,'PDF')]/../..",
         | 
| 248 | 
            +
                    )
         | 
| 249 | 
            +
                    links << typed_link("obp", obp_elm) if obp_elm
         | 
| 250 | 
            +
                    wrd_elm = doc.at("//font[contains(.,'Word')]/../..")
         | 
| 251 | 
            +
                    links << typed_link("word", wrd_elm) if wrd_elm
         | 
| 308 252 | 
             
                    links
         | 
| 309 253 | 
             
                  end
         | 
| 310 254 |  | 
| 255 | 
            +
                  def typed_link(type, elm)
         | 
| 256 | 
            +
                    {
         | 
| 257 | 
            +
                      type: type,
         | 
| 258 | 
            +
                      content: URI.join(HitCollection::DOMAIN + elm[:href].strip).to_s,
         | 
| 259 | 
            +
                    }
         | 
| 260 | 
            +
                  end
         | 
| 261 | 
            +
             | 
| 311 262 | 
             
                  # Fetch copyright.
         | 
| 312 263 | 
             
                  # @param code [String]
         | 
| 313 264 | 
             
                  # @param doc [Nokogiri::HTML::Document]
         | 
| 314 | 
            -
                  # @return [Hash]
         | 
| 265 | 
            +
                  # @return [Array<Hash>]
         | 
| 315 266 | 
             
                  def fetch_copyright(code, doc)
         | 
| 316 267 | 
             
                    abbreviation = code.match(/^[^-]+/).to_s
         | 
| 317 268 | 
             
                    case abbreviation
         | 
| @@ -321,9 +272,9 @@ module RelatonItu | |
| 321 272 | 
             
                    end
         | 
| 322 273 | 
             
                    fdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
         | 
| 323 274 | 
             
                    from = fdate&.text || ob_date(doc)
         | 
| 324 | 
            -
                    { owner: { name: name, abbreviation: abbreviation, url: url }, | 
| 275 | 
            +
                    [{ owner: [{ name: name, abbreviation: abbreviation, url: url }],
         | 
| 276 | 
            +
                       from: from }]
         | 
| 325 277 | 
             
                  end
         | 
| 326 278 | 
             
                end
         | 
| 327 279 | 
             
              end
         | 
| 328 | 
            -
              # rubocop:enable Metrics/ModuleLength
         | 
| 329 280 | 
             
            end
         |