relaton-itu 1.7.3 → 1.7.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 971645ffb5b0111df7d841c47f6ac1a64904cc85274218f2f6ff622dbba3b576
4
- data.tar.gz: 68bc7e0bbb844896c7bf30c6d3e5747a9d6f0e68aa9384f9e761f2881f3be2bc
3
+ metadata.gz: 249fe896ec8a77979ca15d6a42da98ad2ac2620cfe8dc0f468cd14277c5a35b0
4
+ data.tar.gz: 62415ed835abc49cf00d3048b52556b3f718a4ad0dc531ec2c20572b95305210
5
5
  SHA512:
6
- metadata.gz: 83a9d8ee974bfe44cad5715db0199dfcf1eec9709d2a1d778d08cdc21379a69063bb6d6f0ee818b962f825c684d680bb4dda5cbba9b0ca48b24dc8dde4876cdd
7
- data.tar.gz: 96a7cf63dd9ff84a115959d7766544e8d8f5f1328fa976737875b7808152939f1168e6a1644e0eb5e4af5cd22f6077b232cf45968e6945bc1a86d2a7721f8e19
6
+ metadata.gz: 65a5bcf91f851cc4ec3139fad83b0c83f143b1bafefe8c9638e34072f7d82f76b77517fedf153fcdf7b63ee903fcd372c64e378edaeb231835f477f004f7e94a
7
+ data.tar.gz: 61cdc7df34b24f5d3f3e56b967e9e2b34337bc164b32691747853660cbb6c9e337a6eee058162f97e218da356f4028d7c107b4be4ed7dc10f99253e6985fccd9
@@ -0,0 +1,46 @@
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
+ name: rake
4
+
5
+ on:
6
+ push:
7
+ branches: [ master, main ]
8
+ tags: [ v* ]
9
+ pull_request:
10
+
11
+ jobs:
12
+ rake:
13
+ name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
14
+ runs-on: ${{ matrix.os }}
15
+ continue-on-error: ${{ matrix.experimental }}
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ ruby: [ '2.7', '2.6', '2.5', '2.4' ]
20
+ os: [ ubuntu-latest, windows-latest, macos-latest ]
21
+ experimental: [ false ]
22
+ include:
23
+ - ruby: '3.0'
24
+ os: 'ubuntu-latest'
25
+ experimental: true
26
+ - ruby: '3.0'
27
+ os: 'windows-latest'
28
+ experimental: true
29
+ - ruby: '3.0'
30
+ os: 'macos-latest'
31
+ experimental: true
32
+ steps:
33
+ - uses: actions/checkout@v2
34
+ with:
35
+ submodules: true
36
+
37
+ # https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
38
+ - if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
39
+ run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
40
+
41
+ - uses: ruby/setup-ruby@v1
42
+ with:
43
+ ruby-version: ${{ matrix.ruby }}
44
+ bundler-cache: true
45
+
46
+ - run: bundle exec rake
data/README.adoc CHANGED
@@ -97,6 +97,29 @@ RelatonItu::ItuBibliography.get "ITU-T G.989.2/Amd 1"
97
97
  ...
98
98
  ----
99
99
 
100
+ === Get ITU-R documents
101
+
102
+ [source,ruby]
103
+ ----
104
+ RelatonItu::ItuBibliography.get "ITU-R REC-BO.600-1"
105
+ [relaton-itu] ("ITU-R REC-BO.600-1") fetching...
106
+ [relaton-itu] ("ITU-R REC-BO.600-1") found R-REC-BO.600-1
107
+ => #<RelatonItu::ItuBibliographicItem:0x007f84e9930498
108
+ ...
109
+ ----
110
+
111
+ === Get ITU-R Radio Regulations (RR)
112
+
113
+ [source,ruby]
114
+ ----
115
+ RelatonItu::ItuBibliography.get 'ITU-R RR (2020)'
116
+ [relaton-itu] ("ITU-R RR") fetching...
117
+ [relaton-itu] WARNING: invalid doctype: publication
118
+ [relaton-itu] ("ITU-R RR") found ITU-R RR
119
+ => #<RelatonItu::ItuBibliographicItem:0x007fb4b55eb660
120
+ ...
121
+ ----
122
+
100
123
  === Create bibliographic item form YAML
101
124
  [source,ruby]
102
125
  ----
@@ -104,11 +127,7 @@ hash = YAML.load_file 'spec/examples/itu_bib_item.yml'
104
127
  => {"id"=>"ITU-T L.163 (11/2018)",
105
128
  ...
106
129
 
107
- bib_hash = RelatonItu::HashConverter.hash_to_bib hash
108
- => {:id=>"ITU-T L.163 (11/2018)",
109
- ...
110
-
111
- RelatonItu::ItuBibliographicItem.new bib_hash
130
+ RelatonItu::ItuBibliographicItem.from_hash hash
112
131
  => #<RelatonItu::ItuBibliographicItem:0x007fd88ac02aa0
113
132
  ...
114
133
  ----
data/bin/rspec ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rspec' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rspec-core", "rspec")
data/lib/relaton_itu.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require "mechanize"
1
2
  require "relaton_itu/version"
2
3
  require "relaton_itu/itu_bibliography"
3
4
  require "digest/md5"
@@ -20,7 +20,7 @@ module RelatonItu
20
20
  warn "[relaton-itu] WARNING: invalid bureau: #{bureau}"
21
21
  end
22
22
  @bureau = bureau
23
- @group = group.is_a?(Hash) ? ItuGroup.new(group) : group
23
+ @group = group.is_a?(Hash) ? ItuGroup.new(**group) : group
24
24
  @subgroup = subgroup.is_a?(Hash) ? ItuGroup.new(subgroup) : subgroup
25
25
  @workgroup = workgroup.is_a?(Hash) ? ItuGroup.new(workgroup) : workgroup
26
26
  end
@@ -3,11 +3,20 @@ module RelatonItu
3
3
  class << self
4
4
  private
5
5
 
6
+ #
7
+ # Ovverides superclass's method
8
+ #
9
+ # @param item [Hash]
10
+ # @retirn [RelatonItu::ItuBibliographicItem]
11
+ def bib_item(item)
12
+ ItuBibliographicItem.new(**item)
13
+ end
14
+
6
15
  def editorialgroup_hash_to_bib(ret)
7
16
  eg = ret[:editorialgroup]
8
17
  return unless eg
9
18
 
10
- ret[:editorialgroup] = EditorialGroup.new eg
19
+ ret[:editorialgroup] = EditorialGroup.new **eg
11
20
  end
12
21
 
13
22
  # @param ret [Hash]
@@ -15,7 +24,7 @@ module RelatonItu
15
24
  return unless ret[:structuredidentifier]
16
25
 
17
26
  ret[:structuredidentifier] = StructuredIdentifier.new(
18
- ret[:structuredidentifier]
27
+ **ret[:structuredidentifier]
19
28
  )
20
29
  end
21
30
  end
@@ -3,10 +3,12 @@
3
3
  module RelatonItu
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
+ attr_writer :fetch
7
+
6
8
  # Parse page.
7
9
  # @return [RelatonItu::ItuBibliographicItem]
8
10
  def fetch
9
- @fetch ||= Scrapper.parse_page hit, hit_collection.gi_imp
11
+ @fetch ||= Scrapper.parse_page self, hit_collection.gi_imp
10
12
  end
11
13
  end
12
14
  end
@@ -12,25 +12,48 @@ module RelatonItu
12
12
  # @return [TrueClass, FalseClass]
13
13
  attr_reader :gi_imp
14
14
 
15
+ # @return [Mechanize]
16
+ attr_reader :agent
17
+
15
18
  # @param ref [String]
16
19
  # @param year [String]
17
- def initialize(ref, year = nil)
20
+ def initialize(ref, year = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
18
21
  text = ref.sub /(?<=\.)Imp\s?(?=\d)/, ""
19
22
  super text, year
23
+ @agent = Mechanize.new
24
+ agent.user_agent_alias = "Mac Safari"
20
25
  @gi_imp = /\.Imp\d/.match?(ref)
21
- uri = URI "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
22
- data = { json: params.to_json }
23
- resp = Net::HTTP.post(uri, data.to_json,
24
- "Content-Type" => "application/json")
25
- @array = hits JSON.parse(resp.body)
26
+ if ref.match? /^(ITU-T|ITU-R\sRR)/
27
+ url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
28
+ data = { json: params.to_json }
29
+ resp = agent.post url, data.to_json, "Content-Type" => "application/json"
30
+ @array = hits JSON.parse(resp.body)
31
+ elsif ref.match? /^ITU-R/
32
+ rf = ref.sub(/^ITU-R\s/, "").upcase
33
+ url = "https://raw.githubusercontent.com/relaton/relaton-data-itu-r/master/data/#{rf}.yaml"
34
+ resp = Net::HTTP.get_response(URI(url))
35
+ if resp.code == "404"
36
+ @array = []
37
+ return
38
+ end
39
+
40
+ hash = YAML.safe_load resp.body
41
+ item_hash = HashConverter.hash_to_bib(hash)
42
+ item = ItuBibliographicItem.new **item_hash
43
+ hit = Hit.new({ url: url }, self)
44
+ hit.fetch = item
45
+ @array = [hit]
46
+ end
26
47
  end
27
48
 
28
49
  private
29
50
 
30
51
  # @return [String]
31
52
  def group
32
- @group ||= if %r{OB|Operational Bulletin}.match? text then "Publications"
33
- else "Recommendations"
53
+ @group ||= case text
54
+ when %r{OB|Operational Bulletin}, %r{^ITU-R\sRR}
55
+ "Publications"
56
+ when %r{^ITU-T} then "Recommendations"
34
57
  end
35
58
  end
36
59
 
@@ -46,7 +69,7 @@ module RelatonItu
46
69
  "ExactPhrase" => false,
47
70
  "CollectionName" => "General",
48
71
  "CollectionGroup" => group,
49
- "Sector" => "t",
72
+ "Sector" => text.match(/(?<=^ITU-)\w/).to_s.downcase,
50
73
  "Criterias" => [{
51
74
  "Name" => "Search in",
52
75
  "Criterias" => [
@@ -93,7 +116,7 @@ module RelatonItu
93
116
  code = h["Media"]["Name"]
94
117
  title = h["Title"]
95
118
  url = h["Redirection"]
96
- type = group.downcase[0...-1]
119
+ type = h["Collection"]["Group"].downcase[0...-1]
97
120
  Hit.new({ code: code, title: title, url: url, type: type }, self)
98
121
  end
99
122
  end
@@ -14,5 +14,12 @@ module RelatonItu
14
14
  end
15
15
  super
16
16
  end
17
+
18
+ # @param hash [Hash]
19
+ # @return [RelatonItu::ItuBibliographicItem]
20
+ def self.from_hash(hash)
21
+ item_hash = ::RelatonItu::HashConverter.hash_to_bib(hash)
22
+ new **item_hash
23
+ end
17
24
  end
18
25
  end
@@ -19,20 +19,27 @@ module RelatonItu
19
19
  # @param text [String]
20
20
  # @return [RelatonItu::HitCollection]
21
21
  def search(text, year = nil)
22
+ # code = text.sub(/(?<=ITU-T\s\w)\.(\w+\.)(?=\d+)/, ' \1')
23
+ if text =~ /(ITU-T\s\w)\.(Suppl\.|Annex)\s?(\w?\d+)/
24
+ correct_ref = "#{$~[1]} #{$~[2]} #{$~[3]}"
25
+ warn "[relaton-itu] WARNING: Incorrect reference #{text}"
26
+ warn "[relaton-itu] the reference should be #{correct_ref}"
27
+ end
22
28
  HitCollection.new text, year
23
29
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
24
30
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
25
- Net::ProtocolError, OpenSSL::SSL::SSLError
26
- raise RelatonBib::RequestError, "Could not access http://www.itu.int"
31
+ Net::ProtocolError, URI::InvalidURIError => e
32
+ raise RelatonBib::RequestError, e.message
27
33
  end
28
34
 
29
35
  # @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
30
36
  # @param year [String] the year the standard was published (optional)
31
- # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required
37
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
38
+ # reference is required
32
39
  # @return [String] Relaton XML serialisation of reference
33
40
  def get(code, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
34
41
  if year.nil?
35
- /^(?<code1>[^\s]+\s[^\s]+)\s\(\d{2}\/(?<year1>\d+)\)$/ =~ code
42
+ /^(?<code1>[^\s]+\s[^\s]+)\s\((\d{2}\/)?(?<year1>\d+)\)$/ =~ code
36
43
  unless code1.nil?
37
44
  code = code1
38
45
  year = year1
@@ -54,11 +61,13 @@ module RelatonItu
54
61
  id = year ? "#{code}:#{year}" : code
55
62
  warn "[relaton-itu] WARNING: no match found online for #{id}. "\
56
63
  "The code must be exactly like it is on the standards website."
57
- warn "[relaton-itu] (There was no match for #{year}, though there were matches "\
58
- "found for #{missed_years.join(', ')}.)" unless missed_years.empty?
59
- if /\d-\d/ =~ code
60
- warn "[relaton-itu] The provided document part may not exist, or the document "\
61
- "may no longer be published in parts."
64
+ unless missed_years.empty?
65
+ warn "[relaton-itu] (There was no match for #{year}, though there "\
66
+ "were matches found for #{missed_years.join(', ')}.)"
67
+ end
68
+ if /\d-\d/.match? code
69
+ warn "[relaton-itu] The provided document part may not exist, or "\
70
+ "the document may no longer be published in parts."
62
71
  else
63
72
  warn "[relaton-itu] If you wanted to cite all document parts for the reference, "\
64
73
  "use \"#{code} (all parts)\".\nIf the document is not a standard, "\
@@ -69,10 +78,11 @@ module RelatonItu
69
78
 
70
79
  def search_filter(code, year) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
71
80
  %r{
72
- ^(?<pref1>ITU)?(-(?<type1>\w))?\s?(?<code1>[^\s\/]+)
81
+ ^(?<pref1>ITU)?(-(?<type1>\w))?\s?(?<code1>[^\s\/]+(?:\/\w[\.\d]+)?)
82
+ (\s\(?(?<ver1>v\d+)\)?)?
73
83
  (\s\(((?<month1>\d{2})\/)?(?<year1>\d{4})\))?
74
84
  (\s-\s(?<buldate1>\d{2}\.\w{1,4}\.\d{4}))?
75
- (\/(?<corr1>(Amd|Cor)\s?\d+))?
85
+ (\s(?<corr1>(Amd|Cor|Amendment|Corrigendum)\.?\s?\d+))?
76
86
  (\s\(((?<cormonth1>\d{2})\/)?(?<coryear1>\d{4})\))?
77
87
  }x =~ code
78
88
  year ||= year1
@@ -81,21 +91,27 @@ module RelatonItu
81
91
  warn "[relaton-itu] (\"#{code}\") fetching..."
82
92
  result = search(code)
83
93
  code1.sub! /(?<=\.)Imp(?=\d)/, "" if result.gi_imp
94
+ if corr1
95
+ corr1.sub!(/[\.\s]+/, " ").sub!("Amendment", "Amd")
96
+ corr1.sub!("Corrigendum", "Corr")
97
+ end
84
98
  result.select do |i|
99
+ next true unless i.hit[:code]
100
+
85
101
  %r{
86
102
  ^(?<pref2>ITU)?(-(?<type2>\w))?\s?(?<code2>[\S]+)
103
+ (\s\(?(?<ver2>v\d+)\)?)?
87
104
  (\s\(((?<month2>\d{2})\/)?(?<year2>\d{4})\))?
88
105
  (\s(?<corr2>(Amd|Cor)\.\s?\d+))?
89
106
  (\s\(((?<cormonth2>\d{2})\/)?(?<coryear2>\d{4})\))?
90
107
  }x =~ i.hit[:code]
91
108
  /:[^\(]+\((?<buldate2>\d{2}\.\w{1,4}\.\d{4})\)/ =~ i.hit[:title]
92
109
  corr2&.sub! /\.\s?/, " "
93
- pref1 == pref2 && (!type1 || type1 == type2) && code1 == code2 &&
110
+ pref1 == pref2 && (!type1 || type1 == type2) && code2.include?(code1) &&
94
111
  (!year || year == year2) && (!month1 || month1 == month2) &&
95
112
  corr1 == corr2 && (!coryear1 || coryear1 == coryear2) &&
96
- buldate1 == buldate2 && (!cormonth1 || cormonth1 == cormonth2)
97
- # i.hit[:code] &&
98
- # i.hit[:code].match(docidrx).to_s == c
113
+ buldate1 == buldate2 && (!cormonth1 || cormonth1 == cormonth2) &&
114
+ (!ver1 || ver1 == ver2)
99
115
  end
100
116
  end
101
117
 
@@ -108,10 +124,11 @@ module RelatonItu
108
124
  def isobib_results_filter(result, year)
109
125
  missed_years = []
110
126
  result.each do |r|
111
- return { ret: r.fetch } if !year
112
-
113
- /\(\d{2}\/(?<pyear>\d{4})\)/ =~ r.hit[:code]
114
- return { ret: r.fetch } if year == pyear
127
+ /\((\d{2}\/)?(?<pyear>\d{4})\)/ =~ r.hit[:code]
128
+ if !year || year == pyear
129
+ ret = r.fetch
130
+ return { ret: ret } if ret
131
+ end
115
132
 
116
133
  missed_years << pyear
117
134
  end
@@ -63,7 +63,7 @@ module RelatonItu
63
63
  @type = type
64
64
  @name = name
65
65
  @acronym = acronym
66
- @period = period.is_a?(Hash) ? Period.new(period) : period
66
+ @period = period.is_a?(Hash) ? Period.new(**period) : period
67
67
  end
68
68
 
69
69
  # @param builder [Nokogiri::XML::Builder]
@@ -26,8 +26,7 @@ module RelatonItu
26
26
  # @param hash [Hash]
27
27
  # @return [RelatonItu::ItuBibliographicItem]
28
28
  def hash_to_bib(hash)
29
- item_hash = ::RelatonItu::HashConverter.hash_to_bib(hash)
30
- ::RelatonItu::ItuBibliographicItem.new item_hash
29
+ ::RelatonItu::ItuBibliographicItem.from_hash hash
31
30
  end
32
31
 
33
32
  # Returns hash of XML grammar
@@ -24,18 +24,18 @@ module RelatonItu
24
24
  }.freeze
25
25
 
26
26
  class << self
27
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
28
-
29
27
  # Parse page.
30
- # @param hit_data [Hash]
28
+ # @param hit [RelatonItu::Hit]
31
29
  # @return [Hash]
32
- def parse_page(hit_data, imp = false)
33
- url, doc = get_page hit_data[:url]
30
+ def parse_page(hit, imp = false) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
31
+ doc = get_page hit
32
+ return unless doc.code == "200"
33
+
34
34
  if imp
35
35
  a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a"
36
36
  return unless a
37
37
 
38
- url, doc = get_page URI.join(url, a[:href]).to_s
38
+ doc = get_page hit, a[:href].to_s
39
39
  end
40
40
 
41
41
  # Fetch edition.
@@ -44,67 +44,65 @@ module RelatonItu
44
44
  ItuBibliographicItem.new(
45
45
  fetched: Date.today.to_s,
46
46
  type: "standard",
47
- docid: fetch_docid(doc, hit_data[:title]),
47
+ docid: fetch_docid(doc, hit.hit[:title]),
48
48
  edition: edition,
49
49
  language: ["en"],
50
50
  script: ["Latn"],
51
51
  title: fetch_titles(doc),
52
- doctype: hit_data[:type],
52
+ doctype: hit.hit[:type],
53
53
  docstatus: fetch_status(doc),
54
54
  ics: [], # fetch_ics(doc),
55
55
  date: fetch_dates(doc),
56
- contributor: fetch_contributors(hit_data[:code]),
57
- editorialgroup: fetch_workgroup(hit_data[:code], doc),
58
- abstract: fetch_abstract(doc),
59
- copyright: fetch_copyright(hit_data[:code], doc),
60
- link: fetch_link(doc, url),
56
+ contributor: fetch_contributors(hit.hit[:code]),
57
+ editorialgroup: fetch_workgroup(hit.hit[:code], doc),
58
+ abstract: fetch_abstract(doc, hit),
59
+ copyright: fetch_copyright(hit.hit[:code], doc),
60
+ link: fetch_link(doc),
61
61
  relation: fetch_relations(doc),
62
62
  place: ["Geneva"]
63
63
  )
64
64
  end
65
- # rubocop:enable Metrics/AbcSize
66
65
 
67
66
  private
68
67
 
69
68
  # Fetch abstracts.
70
- # @param doc [Nokigiri::HTML::Document]
71
- # @return [Array<Array>]
72
- def fetch_abstract(doc)
73
- abstract_url = doc.at('//table/tr/td/span[contains(@id, "lbl_dms")]/div')
74
- return [] unless abstract_url
75
-
76
- url = abstract_url[:onclick].match(/https?[^']+/).to_s
77
- d = Nokogiri::HTML Net::HTTP.get(URI(url)).encode(undef: :replace, replace: "")
78
- abstract_content = d.css("p.MsoNormal").text.gsub(/\r\n/, "")
79
- .squeeze(" ").gsub(/\u00a0/, "")
69
+ # @param doc [Mechanize::Page]
70
+ # @param hit [RelatonItu::Hit]
71
+ # @return [Array<Hash>]
72
+ def fetch_abstract(doc, hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
73
+ abstract_url = doc.at '//table/tr/td/span[contains(@id, "lbl_dms")]/div'
74
+ content = if abstract_url
75
+ url = abstract_url[:onclick].match(/https?[^']+/).to_s
76
+ rsp = hit.hit_collection.agent.get url
77
+ d = Nokogiri::HTML rsp.body.encode(undef: :replace, replace: "")
78
+ d.css("p.MsoNormal").text.gsub(/\r\n/, "").squeeze(" ").gsub(/\u00a0/, "")
79
+ elsif a = doc.at('//table/tr/td/span[contains(@class, "observation")]/text()')
80
+ a.text.strip
81
+ end
82
+ return [] unless content
80
83
 
81
84
  [{
82
- content: abstract_content,
85
+ content: content,
83
86
  language: "en",
84
87
  script: "Latn",
85
88
  }]
86
89
  end
87
90
 
88
91
  # Get page.
89
- # @param path [String] page's path
92
+ # @param hit [RelatonItu::Hit]
93
+ # @param url [String, nil]
90
94
  # @return [Array<String, Nokogiri::HTML::Document>]
91
- def get_page(url)
92
- uri = URI url
93
- resp = Net::HTTP.get_response(uri)
94
- until resp.code == "200"
95
- uri = URI resp["location"] if resp.code.match? /^30/
96
- resp = Net::HTTP.get_response(uri)
97
- end
98
- [uri.to_s, Nokogiri::HTML(resp.body)]
95
+ def get_page(hit, url = nil)
96
+ uri = url || hit.hit[:url]
97
+ hit.hit_collection.agent.get uri
99
98
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
100
99
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
101
100
  Net::ProtocolError, OpenSSL::SSL::SSLError
102
- raise RelatonBib::RequestError, "Could not access #{url}"
101
+ raise RelatonBib::RequestError, "Could not access #{uri}"
103
102
  end
104
- # rubocop:enable Metrics/MethodLength
105
103
 
106
104
  # Fetch docid.
107
- # @param doc [Nokogiri::HTML::Document]
105
+ # @param doc [Mechanize::Page]
108
106
  # @param title [String]
109
107
  # @return [Hash]
110
108
  def fetch_docid(doc, title)
@@ -117,16 +115,18 @@ module RelatonItu
117
115
  docids
118
116
  end
119
117
 
120
- def createdocid(text)
118
+ # @param text [String]
119
+ # @return [RelatonBib::DocumentIdentifier]
120
+ def createdocid(text) # rubocop:disable Metrics/MethodLength
121
121
  %r{
122
122
  ^(?<code>((ITU-\w|ISO\/IEC)\s)?[^\(:]+)
123
- (\(((?<month>\d{2})\/)?(?<year>\d{4})\))?
123
+ (\(((?<_month>\d{2})\/)?(?<_year>\d{4})\))?
124
124
  (:[^\(]+\((?<buldate>\d{2}\.\w{1,4}\.\d{4})\))?
125
125
  (\s(?<corr>(Amd|Cor)\.\s?\d+))?
126
- # (\s\(((?<cormonth>\d{2})\/)?(?<coryear>\d{4})\))?
126
+ # (\s\(((?<_cormonth>\d{2})\/)?(?<_coryear>\d{4})\))?
127
127
  }x =~ text.squeeze(" ")
128
128
  corr&.sub! /\.\s?/, " "
129
- id = [code.sub(/[[:space:]]$/, ""), corr].compact.join "/"
129
+ id = [code.sub(/[[:space:]]$/, ""), corr].compact.join " "
130
130
  id += " - #{buldate}" if buldate
131
131
  type = id.match(%r{^\w+}).to_s
132
132
  type = "ITU" if type == "G"
@@ -134,7 +134,7 @@ module RelatonItu
134
134
  end
135
135
 
136
136
  # Fetch status.
137
- # @param doc [Nokogiri::HTML::Document]
137
+ # @param doc [Mechanize::Page]
138
138
  # @return [RelatonBib::DocumentStatus, NilClass]
139
139
  def fetch_status(doc)
140
140
  s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]",
@@ -147,7 +147,7 @@ module RelatonItu
147
147
 
148
148
  # Fetch workgroup.
149
149
  # @param code [String]
150
- # @param doc [Nokogiri::HTML::Document]
150
+ # @param doc [Mechanize::Page]
151
151
  # @return [RelatonItu::EditorialGroup, NilClass]
152
152
  def fetch_workgroup(code, doc)
153
153
  wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
@@ -155,8 +155,7 @@ module RelatonItu
155
155
 
156
156
  group = wg && itugroup(wg.text)
157
157
  EditorialGroup.new(
158
- bureau: code.match(/(?<=-)./).to_s,
159
- group: group
158
+ bureau: code.match(/(?<=-)./).to_s, group: group
160
159
  )
161
160
  end
162
161
 
@@ -176,24 +175,24 @@ module RelatonItu
176
175
  ItuGroup.new name: name, type: type, acronym: acronym
177
176
  end
178
177
 
179
- # rubocop:disable Metrics/MethodLength
180
-
181
178
  # Fetch relations.
182
- # @param doc [Nokogiri::HTML::Document]
179
+ # @param doc [Mechanize::Page]
183
180
  # @return [Array<Hash>]
184
181
  def fetch_relations(doc)
185
- doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
182
+ doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]')
183
+ .map do |r|
186
184
  ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
187
- fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
188
- bibitem = ItuBibliographicItem.new(formattedref: fref, type: "standard")
185
+ fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en",
186
+ script: "Latn")
187
+ bibitem = ItuBibliographicItem.new(formattedref: fref,
188
+ type: "standard")
189
189
  { type: "complements", bibitem: bibitem }
190
190
  end
191
191
  end
192
- # rubocop:enable Metrics/MethodLength
193
192
 
194
193
  # Fetch titles.
195
- # @param doc [Nokogiri::HTML::Document]
196
- # @return [Array<Hash>]
194
+ # @param doc [Mechanize::Page]
195
+ # @return [RelatonBib::TypedTitleStringCollection]
197
196
  def fetch_titles(doc)
198
197
  t = doc.at("//td[@class='title']|//div/table[1]/tr[4]/td/strong")
199
198
  return [] unless t
@@ -202,27 +201,29 @@ module RelatonItu
202
201
  end
203
202
 
204
203
  # Fetch dates
205
- # @param doc [Nokogiri::HTML::Document]
204
+ # @param doc [Mechanize::Page]
206
205
  # @return [Array<Hash>]
207
- def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity
206
+ def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
208
207
  dates = []
209
208
  date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]",
210
209
  "//p[contains(.,'Approved in')]")
211
210
  pdate = date&.text&.match(/\d{4}-\d{2}-\d{2}/).to_s || ob_date(doc)
212
211
  if pdate && !pdate&.empty?
213
212
  dates << { type: "published", on: pdate }
213
+ elsif pdate = ob_date(doc)
214
+ dates << { type: "published", on: pdate }
214
215
  end
215
216
  dates
216
217
  end
217
218
 
218
219
  # Scrape Operational Bulletin date.
219
- # @param doc [Nokogiri::HTML::Document]
220
+ # @param doc [Mechanize::Page]
220
221
  # @return [String]
221
222
  def ob_date(doc)
222
223
  pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
223
224
  return unless pdate
224
225
 
225
- roman_to_arabic pdate.text.match(%r{(?<=Year: )\d{2}.\w+.\d{4}}).to_s
226
+ roman_to_arabic pdate.text.match(%r{(?<=Year: )(\d{2}.\w+.)?\d{4}}).to_s
226
227
  end
227
228
 
228
229
  # Convert roman month number in string date to arabic number
@@ -230,12 +231,15 @@ module RelatonItu
230
231
  # @return [String]
231
232
  def roman_to_arabic(date)
232
233
  %r{(?<rmonth>[IVX]+)} =~ date
233
- month = ROMAN_MONTHS.index(rmonth) + 1
234
- Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
234
+ if ROMAN_MONTHS.index(rmonth)
235
+ month = ROMAN_MONTHS.index(rmonth) + 1
236
+ Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
237
+ else date
238
+ end
235
239
  end
236
240
 
237
241
  # Fetch contributors
238
- # @param doc [Nokogiri::HTML::Document]
242
+ # @param doc [Mechanize::Page]
239
243
  # @return [Array<Hash>]
240
244
  def fetch_contributors(code)
241
245
  return [] unless code
@@ -251,11 +255,10 @@ module RelatonItu
251
255
  end
252
256
 
253
257
  # Fetch links.
254
- # @param doc [Nokogiri::HTML::Document]
255
- # @param url [String]
258
+ # @param doc [Mechanize::Page]
256
259
  # @return [Array<Hash>]
257
- def fetch_link(doc, url)
258
- links = [{ type: "src", content: url }]
260
+ def fetch_link(doc)
261
+ links = [{ type: "src", content: doc.uri.to_s }]
259
262
  obp_elm = doc.at(
260
263
  '//a[@title="Persistent link to download the PDF file"]',
261
264
  "//font[contains(.,'PDF')]/../.."
@@ -266,6 +269,8 @@ module RelatonItu
266
269
  links
267
270
  end
268
271
 
272
+ # @param type [String]
273
+ # @param elm [Nokogiri::XML::Element]
269
274
  def typed_link(type, elm)
270
275
  {
271
276
  type: type,
@@ -275,7 +280,7 @@ module RelatonItu
275
280
 
276
281
  # Fetch copyright.
277
282
  # @param code [String]
278
- # @param doc [Nokogiri::HTML::Document]
283
+ # @param doc [Mechanize::Page]
279
284
  # @return [Array<Hash>]
280
285
  def fetch_copyright(code, doc)
281
286
  abbreviation = code.match(/^[^-]+/).to_s
@@ -1,3 +1,3 @@
1
1
  module RelatonItu
2
- VERSION = "1.7.3".freeze
2
+ VERSION = "1.7.8".freeze
3
3
  end
@@ -8,7 +8,7 @@ module RelatonItu
8
8
  # @param item_hash [Hash]
9
9
  # @return [RelatonItu::ItuBibliographicItem]
10
10
  def bib_item(item_hash)
11
- ItuBibliographicItem.new item_hash
11
+ ItuBibliographicItem.new **item_hash
12
12
  end
13
13
 
14
14
  # @param ext [Nokogiri::XML::Element]
data/relaton-itu.gemspec CHANGED
@@ -26,16 +26,17 @@ Gem::Specification.new do |spec|
26
26
  spec.require_paths = ["lib"]
27
27
  spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
28
28
 
29
- spec.add_development_dependency "debase"
29
+ # spec.add_development_dependency "debase"
30
30
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
31
31
  spec.add_development_dependency "pry-byebug"
32
- spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rake", "~> 13.0"
33
33
  spec.add_development_dependency "rspec", "~> 3.0"
34
- spec.add_development_dependency "ruby-debug-ide"
34
+ # spec.add_development_dependency "ruby-debug-ide"
35
35
  spec.add_development_dependency "ruby-jing"
36
36
  spec.add_development_dependency "simplecov"
37
37
  spec.add_development_dependency "vcr", "~> 5.0.0"
38
38
  spec.add_development_dependency "webmock"
39
39
 
40
+ spec.add_dependency "mechanize"
40
41
  spec.add_dependency "relaton-bib", "~> 1.7.0"
41
42
  end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-itu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.3
4
+ version: 1.7.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-01-04 00:00:00.000000000 Z
11
+ date: 2021-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: debase
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: equivalent-xml
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -58,14 +44,14 @@ dependencies:
58
44
  requirements:
59
45
  - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '10.0'
47
+ version: '13.0'
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '10.0'
54
+ version: '13.0'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: rspec
71
57
  requirement: !ruby/object:Gem::Requirement
@@ -80,20 +66,6 @@ dependencies:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
68
  version: '3.0'
83
- - !ruby/object:Gem::Dependency
84
- name: ruby-debug-ide
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
69
  - !ruby/object:Gem::Dependency
98
70
  name: ruby-jing
99
71
  requirement: !ruby/object:Gem::Requirement
@@ -150,6 +122,20 @@ dependencies:
150
122
  - - ">="
151
123
  - !ruby/object:Gem::Version
152
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: mechanize
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
153
139
  - !ruby/object:Gem::Dependency
154
140
  name: relaton-bib
155
141
  requirement: !ruby/object:Gem::Requirement
@@ -172,9 +158,7 @@ executables: []
172
158
  extensions: []
173
159
  extra_rdoc_files: []
174
160
  files:
175
- - ".github/workflows/macos.yml"
176
- - ".github/workflows/ubuntu.yml"
177
- - ".github/workflows/windows.yml"
161
+ - ".github/workflows/rake.yml"
178
162
  - ".gitignore"
179
163
  - ".rspec"
180
164
  - ".rubocop.yml"
@@ -183,6 +167,7 @@ files:
183
167
  - README.adoc
184
168
  - Rakefile
185
169
  - bin/console
170
+ - bin/rspec
186
171
  - bin/setup
187
172
  - grammars/basicdoc.rng
188
173
  - grammars/biblio.rng
@@ -1,34 +0,0 @@
1
- # Auto-generated by Cimas: Do not edit it manually!
2
- # See https://github.com/metanorma/cimas
3
- name: macos
4
-
5
- on:
6
- push:
7
- branches: [ master ]
8
- pull_request:
9
- branches: [ '**' ]
10
-
11
- jobs:
12
- test-macos:
13
- name: Test on Ruby ${{ matrix.ruby }} macOS
14
- runs-on: macos-latest
15
- strategy:
16
- fail-fast: false
17
- matrix:
18
- ruby: [ '2.6', '2.5', '2.4' ]
19
- steps:
20
- - uses: actions/checkout@master
21
- - name: Use Ruby
22
- uses: actions/setup-ruby@v1
23
- with:
24
- ruby-version: ${{ matrix.ruby }}
25
- architecture: 'x64'
26
- - name: Update gems
27
- run: |
28
- sudo gem install bundler --force
29
- ruby -v | grep 2.5 && bundle config set build.debase --with-cflags="-Wno-error=implicit-function-declaration"
30
- ruby -v | grep 2.5 && bundle config set build.ruby-debug-ide --with-cflags="-Wno-error=implicit-function-declaration"
31
- bundle install --jobs 4 --retry 3
32
- - name: Run specs
33
- run: |
34
- bundle exec rake
@@ -1,33 +0,0 @@
1
- # Auto-generated by Cimas: Do not edit it manually!
2
- # See https://github.com/metanorma/cimas
3
- name: ubuntu
4
-
5
- on:
6
- push:
7
- branches: [ master ]
8
- pull_request:
9
- branches: [ '**' ]
10
-
11
- jobs:
12
- test-linux:
13
- name: Test on Ruby ${{ matrix.ruby }} Ubuntu
14
- runs-on: ubuntu-latest
15
- strategy:
16
- fail-fast: false
17
- matrix:
18
- ruby: [ '2.6', '2.5', '2.4' ]
19
- steps:
20
- - uses: actions/checkout@master
21
- - name: Use Ruby
22
- uses: actions/setup-ruby@v1
23
- with:
24
- ruby-version: ${{ matrix.ruby }}
25
- architecture: 'x64'
26
- - name: Update gems
27
- run: |
28
- gem install bundler
29
- bundle install --jobs 4 --retry 3
30
- - name: Run specs
31
- run: |
32
- unset JAVA_TOOL_OPTIONS
33
- bundle exec rake
@@ -1,35 +0,0 @@
1
- # Auto-generated by Cimas: Do not edit it manually!
2
- # See https://github.com/metanorma/cimas
3
- name: windows
4
-
5
- on:
6
- push:
7
- branches: [ master ]
8
- pull_request:
9
- branches: [ '**' ]
10
-
11
- jobs:
12
- test-windows:
13
- name: Test on Ruby ${{ matrix.ruby }} Windows
14
- runs-on: windows-latest
15
- strategy:
16
- fail-fast: false
17
- matrix:
18
- ruby: [ '2.6', '2.5', '2.4' ]
19
- steps:
20
- - uses: actions/checkout@master
21
- - name: Use Ruby
22
- uses: actions/setup-ruby@v1
23
- with:
24
- ruby-version: ${{ matrix.ruby }}
25
- architecture: 'x64'
26
- - name: Update gems
27
- shell: pwsh
28
- run: |
29
- gem install bundler
30
- bundle config --local path vendor/bundle
31
- bundle update
32
- bundle install --jobs 4 --retry 3
33
- - name: Run specs
34
- run: |
35
- bundle exec rake