relaton-itu 1.7.3 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 971645ffb5b0111df7d841c47f6ac1a64904cc85274218f2f6ff622dbba3b576
4
- data.tar.gz: 68bc7e0bbb844896c7bf30c6d3e5747a9d6f0e68aa9384f9e761f2881f3be2bc
3
+ metadata.gz: 249fe896ec8a77979ca15d6a42da98ad2ac2620cfe8dc0f468cd14277c5a35b0
4
+ data.tar.gz: 62415ed835abc49cf00d3048b52556b3f718a4ad0dc531ec2c20572b95305210
5
5
  SHA512:
6
- metadata.gz: 83a9d8ee974bfe44cad5715db0199dfcf1eec9709d2a1d778d08cdc21379a69063bb6d6f0ee818b962f825c684d680bb4dda5cbba9b0ca48b24dc8dde4876cdd
7
- data.tar.gz: 96a7cf63dd9ff84a115959d7766544e8d8f5f1328fa976737875b7808152939f1168e6a1644e0eb5e4af5cd22f6077b232cf45968e6945bc1a86d2a7721f8e19
6
+ metadata.gz: 65a5bcf91f851cc4ec3139fad83b0c83f143b1bafefe8c9638e34072f7d82f76b77517fedf153fcdf7b63ee903fcd372c64e378edaeb231835f477f004f7e94a
7
+ data.tar.gz: 61cdc7df34b24f5d3f3e56b967e9e2b34337bc164b32691747853660cbb6c9e337a6eee058162f97e218da356f4028d7c107b4be4ed7dc10f99253e6985fccd9
@@ -0,0 +1,46 @@
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
+ name: rake
4
+
5
+ on:
6
+ push:
7
+ branches: [ master, main ]
8
+ tags: [ v* ]
9
+ pull_request:
10
+
11
+ jobs:
12
+ rake:
13
+ name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
14
+ runs-on: ${{ matrix.os }}
15
+ continue-on-error: ${{ matrix.experimental }}
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ ruby: [ '2.7', '2.6', '2.5', '2.4' ]
20
+ os: [ ubuntu-latest, windows-latest, macos-latest ]
21
+ experimental: [ false ]
22
+ include:
23
+ - ruby: '3.0'
24
+ os: 'ubuntu-latest'
25
+ experimental: true
26
+ - ruby: '3.0'
27
+ os: 'windows-latest'
28
+ experimental: true
29
+ - ruby: '3.0'
30
+ os: 'macos-latest'
31
+ experimental: true
32
+ steps:
33
+ - uses: actions/checkout@v2
34
+ with:
35
+ submodules: true
36
+
37
+ # https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
38
+ - if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
39
+ run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
40
+
41
+ - uses: ruby/setup-ruby@v1
42
+ with:
43
+ ruby-version: ${{ matrix.ruby }}
44
+ bundler-cache: true
45
+
46
+ - run: bundle exec rake
data/README.adoc CHANGED
@@ -97,6 +97,29 @@ RelatonItu::ItuBibliography.get "ITU-T G.989.2/Amd 1"
97
97
  ...
98
98
  ----
99
99
 
100
+ === Get ITU-R documents
101
+
102
+ [source,ruby]
103
+ ----
104
+ RelatonItu::ItuBibliography.get "ITU-R REC-BO.600-1"
105
+ [relaton-itu] ("ITU-R REC-BO.600-1") fetching...
106
+ [relaton-itu] ("ITU-R REC-BO.600-1") found R-REC-BO.600-1
107
+ => #<RelatonItu::ItuBibliographicItem:0x007f84e9930498
108
+ ...
109
+ ----
110
+
111
+ === Get ITU-R Radio Regulations (RR)
112
+
113
+ [source,ruby]
114
+ ----
115
+ RelatonItu::ItuBibliography.get 'ITU-R RR (2020)'
116
+ [relaton-itu] ("ITU-R RR") fetching...
117
+ [relaton-itu] WARNING: invalid doctype: publication
118
+ [relaton-itu] ("ITU-R RR") found ITU-R RR
119
+ => #<RelatonItu::ItuBibliographicItem:0x007fb4b55eb660
120
+ ...
121
+ ----
122
+
100
123
  === Create bibliographic item form YAML
101
124
  [source,ruby]
102
125
  ----
@@ -104,11 +127,7 @@ hash = YAML.load_file 'spec/examples/itu_bib_item.yml'
104
127
  => {"id"=>"ITU-T L.163 (11/2018)",
105
128
  ...
106
129
 
107
- bib_hash = RelatonItu::HashConverter.hash_to_bib hash
108
- => {:id=>"ITU-T L.163 (11/2018)",
109
- ...
110
-
111
- RelatonItu::ItuBibliographicItem.new bib_hash
130
+ RelatonItu::ItuBibliographicItem.from_hash hash
112
131
  => #<RelatonItu::ItuBibliographicItem:0x007fd88ac02aa0
113
132
  ...
114
133
  ----
data/bin/rspec ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rspec' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rspec-core", "rspec")
data/lib/relaton_itu.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require "mechanize"
1
2
  require "relaton_itu/version"
2
3
  require "relaton_itu/itu_bibliography"
3
4
  require "digest/md5"
@@ -20,7 +20,7 @@ module RelatonItu
20
20
  warn "[relaton-itu] WARNING: invalid bureau: #{bureau}"
21
21
  end
22
22
  @bureau = bureau
23
- @group = group.is_a?(Hash) ? ItuGroup.new(group) : group
23
+ @group = group.is_a?(Hash) ? ItuGroup.new(**group) : group
24
24
  @subgroup = subgroup.is_a?(Hash) ? ItuGroup.new(subgroup) : subgroup
25
25
  @workgroup = workgroup.is_a?(Hash) ? ItuGroup.new(workgroup) : workgroup
26
26
  end
@@ -3,11 +3,20 @@ module RelatonItu
3
3
  class << self
4
4
  private
5
5
 
6
+ #
7
+ # Ovverides superclass's method
8
+ #
9
+ # @param item [Hash]
10
+ # @retirn [RelatonItu::ItuBibliographicItem]
11
+ def bib_item(item)
12
+ ItuBibliographicItem.new(**item)
13
+ end
14
+
6
15
  def editorialgroup_hash_to_bib(ret)
7
16
  eg = ret[:editorialgroup]
8
17
  return unless eg
9
18
 
10
- ret[:editorialgroup] = EditorialGroup.new eg
19
+ ret[:editorialgroup] = EditorialGroup.new **eg
11
20
  end
12
21
 
13
22
  # @param ret [Hash]
@@ -15,7 +24,7 @@ module RelatonItu
15
24
  return unless ret[:structuredidentifier]
16
25
 
17
26
  ret[:structuredidentifier] = StructuredIdentifier.new(
18
- ret[:structuredidentifier]
27
+ **ret[:structuredidentifier]
19
28
  )
20
29
  end
21
30
  end
@@ -3,10 +3,12 @@
3
3
  module RelatonItu
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
+ attr_writer :fetch
7
+
6
8
  # Parse page.
7
9
  # @return [RelatonItu::ItuBibliographicItem]
8
10
  def fetch
9
- @fetch ||= Scrapper.parse_page hit, hit_collection.gi_imp
11
+ @fetch ||= Scrapper.parse_page self, hit_collection.gi_imp
10
12
  end
11
13
  end
12
14
  end
@@ -12,25 +12,48 @@ module RelatonItu
12
12
  # @return [TrueClass, FalseClass]
13
13
  attr_reader :gi_imp
14
14
 
15
+ # @return [Mechanize]
16
+ attr_reader :agent
17
+
15
18
  # @param ref [String]
16
19
  # @param year [String]
17
- def initialize(ref, year = nil)
20
+ def initialize(ref, year = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
18
21
  text = ref.sub /(?<=\.)Imp\s?(?=\d)/, ""
19
22
  super text, year
23
+ @agent = Mechanize.new
24
+ agent.user_agent_alias = "Mac Safari"
20
25
  @gi_imp = /\.Imp\d/.match?(ref)
21
- uri = URI "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
22
- data = { json: params.to_json }
23
- resp = Net::HTTP.post(uri, data.to_json,
24
- "Content-Type" => "application/json")
25
- @array = hits JSON.parse(resp.body)
26
+ if ref.match? /^(ITU-T|ITU-R\sRR)/
27
+ url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
28
+ data = { json: params.to_json }
29
+ resp = agent.post url, data.to_json, "Content-Type" => "application/json"
30
+ @array = hits JSON.parse(resp.body)
31
+ elsif ref.match? /^ITU-R/
32
+ rf = ref.sub(/^ITU-R\s/, "").upcase
33
+ url = "https://raw.githubusercontent.com/relaton/relaton-data-itu-r/master/data/#{rf}.yaml"
34
+ resp = Net::HTTP.get_response(URI(url))
35
+ if resp.code == "404"
36
+ @array = []
37
+ return
38
+ end
39
+
40
+ hash = YAML.safe_load resp.body
41
+ item_hash = HashConverter.hash_to_bib(hash)
42
+ item = ItuBibliographicItem.new **item_hash
43
+ hit = Hit.new({ url: url }, self)
44
+ hit.fetch = item
45
+ @array = [hit]
46
+ end
26
47
  end
27
48
 
28
49
  private
29
50
 
30
51
  # @return [String]
31
52
  def group
32
- @group ||= if %r{OB|Operational Bulletin}.match? text then "Publications"
33
- else "Recommendations"
53
+ @group ||= case text
54
+ when %r{OB|Operational Bulletin}, %r{^ITU-R\sRR}
55
+ "Publications"
56
+ when %r{^ITU-T} then "Recommendations"
34
57
  end
35
58
  end
36
59
 
@@ -46,7 +69,7 @@ module RelatonItu
46
69
  "ExactPhrase" => false,
47
70
  "CollectionName" => "General",
48
71
  "CollectionGroup" => group,
49
- "Sector" => "t",
72
+ "Sector" => text.match(/(?<=^ITU-)\w/).to_s.downcase,
50
73
  "Criterias" => [{
51
74
  "Name" => "Search in",
52
75
  "Criterias" => [
@@ -93,7 +116,7 @@ module RelatonItu
93
116
  code = h["Media"]["Name"]
94
117
  title = h["Title"]
95
118
  url = h["Redirection"]
96
- type = group.downcase[0...-1]
119
+ type = h["Collection"]["Group"].downcase[0...-1]
97
120
  Hit.new({ code: code, title: title, url: url, type: type }, self)
98
121
  end
99
122
  end
@@ -14,5 +14,12 @@ module RelatonItu
14
14
  end
15
15
  super
16
16
  end
17
+
18
+ # @param hash [Hash]
19
+ # @return [RelatonItu::ItuBibliographicItem]
20
+ def self.from_hash(hash)
21
+ item_hash = ::RelatonItu::HashConverter.hash_to_bib(hash)
22
+ new **item_hash
23
+ end
17
24
  end
18
25
  end
@@ -19,20 +19,27 @@ module RelatonItu
19
19
  # @param text [String]
20
20
  # @return [RelatonItu::HitCollection]
21
21
  def search(text, year = nil)
22
+ # code = text.sub(/(?<=ITU-T\s\w)\.(\w+\.)(?=\d+)/, ' \1')
23
+ if text =~ /(ITU-T\s\w)\.(Suppl\.|Annex)\s?(\w?\d+)/
24
+ correct_ref = "#{$~[1]} #{$~[2]} #{$~[3]}"
25
+ warn "[relaton-itu] WARNING: Incorrect reference #{text}"
26
+ warn "[relaton-itu] the reference should be #{correct_ref}"
27
+ end
22
28
  HitCollection.new text, year
23
29
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
24
30
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
25
- Net::ProtocolError, OpenSSL::SSL::SSLError
26
- raise RelatonBib::RequestError, "Could not access http://www.itu.int"
31
+ Net::ProtocolError, URI::InvalidURIError => e
32
+ raise RelatonBib::RequestError, e.message
27
33
  end
28
34
 
29
35
  # @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
30
36
  # @param year [String] the year the standard was published (optional)
31
- # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required
37
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
38
+ # reference is required
32
39
  # @return [String] Relaton XML serialisation of reference
33
40
  def get(code, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
34
41
  if year.nil?
35
- /^(?<code1>[^\s]+\s[^\s]+)\s\(\d{2}\/(?<year1>\d+)\)$/ =~ code
42
+ /^(?<code1>[^\s]+\s[^\s]+)\s\((\d{2}\/)?(?<year1>\d+)\)$/ =~ code
36
43
  unless code1.nil?
37
44
  code = code1
38
45
  year = year1
@@ -54,11 +61,13 @@ module RelatonItu
54
61
  id = year ? "#{code}:#{year}" : code
55
62
  warn "[relaton-itu] WARNING: no match found online for #{id}. "\
56
63
  "The code must be exactly like it is on the standards website."
57
- warn "[relaton-itu] (There was no match for #{year}, though there were matches "\
58
- "found for #{missed_years.join(', ')}.)" unless missed_years.empty?
59
- if /\d-\d/ =~ code
60
- warn "[relaton-itu] The provided document part may not exist, or the document "\
61
- "may no longer be published in parts."
64
+ unless missed_years.empty?
65
+ warn "[relaton-itu] (There was no match for #{year}, though there "\
66
+ "were matches found for #{missed_years.join(', ')}.)"
67
+ end
68
+ if /\d-\d/.match? code
69
+ warn "[relaton-itu] The provided document part may not exist, or "\
70
+ "the document may no longer be published in parts."
62
71
  else
63
72
  warn "[relaton-itu] If you wanted to cite all document parts for the reference, "\
64
73
  "use \"#{code} (all parts)\".\nIf the document is not a standard, "\
@@ -69,10 +78,11 @@ module RelatonItu
69
78
 
70
79
  def search_filter(code, year) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
71
80
  %r{
72
- ^(?<pref1>ITU)?(-(?<type1>\w))?\s?(?<code1>[^\s\/]+)
81
+ ^(?<pref1>ITU)?(-(?<type1>\w))?\s?(?<code1>[^\s\/]+(?:\/\w[\.\d]+)?)
82
+ (\s\(?(?<ver1>v\d+)\)?)?
73
83
  (\s\(((?<month1>\d{2})\/)?(?<year1>\d{4})\))?
74
84
  (\s-\s(?<buldate1>\d{2}\.\w{1,4}\.\d{4}))?
75
- (\/(?<corr1>(Amd|Cor)\s?\d+))?
85
+ (\s(?<corr1>(Amd|Cor|Amendment|Corrigendum)\.?\s?\d+))?
76
86
  (\s\(((?<cormonth1>\d{2})\/)?(?<coryear1>\d{4})\))?
77
87
  }x =~ code
78
88
  year ||= year1
@@ -81,21 +91,27 @@ module RelatonItu
81
91
  warn "[relaton-itu] (\"#{code}\") fetching..."
82
92
  result = search(code)
83
93
  code1.sub! /(?<=\.)Imp(?=\d)/, "" if result.gi_imp
94
+ if corr1
95
+ corr1.sub!(/[\.\s]+/, " ").sub!("Amendment", "Amd")
96
+ corr1.sub!("Corrigendum", "Corr")
97
+ end
84
98
  result.select do |i|
99
+ next true unless i.hit[:code]
100
+
85
101
  %r{
86
102
  ^(?<pref2>ITU)?(-(?<type2>\w))?\s?(?<code2>[\S]+)
103
+ (\s\(?(?<ver2>v\d+)\)?)?
87
104
  (\s\(((?<month2>\d{2})\/)?(?<year2>\d{4})\))?
88
105
  (\s(?<corr2>(Amd|Cor)\.\s?\d+))?
89
106
  (\s\(((?<cormonth2>\d{2})\/)?(?<coryear2>\d{4})\))?
90
107
  }x =~ i.hit[:code]
91
108
  /:[^\(]+\((?<buldate2>\d{2}\.\w{1,4}\.\d{4})\)/ =~ i.hit[:title]
92
109
  corr2&.sub! /\.\s?/, " "
93
- pref1 == pref2 && (!type1 || type1 == type2) && code1 == code2 &&
110
+ pref1 == pref2 && (!type1 || type1 == type2) && code2.include?(code1) &&
94
111
  (!year || year == year2) && (!month1 || month1 == month2) &&
95
112
  corr1 == corr2 && (!coryear1 || coryear1 == coryear2) &&
96
- buldate1 == buldate2 && (!cormonth1 || cormonth1 == cormonth2)
97
- # i.hit[:code] &&
98
- # i.hit[:code].match(docidrx).to_s == c
113
+ buldate1 == buldate2 && (!cormonth1 || cormonth1 == cormonth2) &&
114
+ (!ver1 || ver1 == ver2)
99
115
  end
100
116
  end
101
117
 
@@ -108,10 +124,11 @@ module RelatonItu
108
124
  def isobib_results_filter(result, year)
109
125
  missed_years = []
110
126
  result.each do |r|
111
- return { ret: r.fetch } if !year
112
-
113
- /\(\d{2}\/(?<pyear>\d{4})\)/ =~ r.hit[:code]
114
- return { ret: r.fetch } if year == pyear
127
+ /\((\d{2}\/)?(?<pyear>\d{4})\)/ =~ r.hit[:code]
128
+ if !year || year == pyear
129
+ ret = r.fetch
130
+ return { ret: ret } if ret
131
+ end
115
132
 
116
133
  missed_years << pyear
117
134
  end
@@ -63,7 +63,7 @@ module RelatonItu
63
63
  @type = type
64
64
  @name = name
65
65
  @acronym = acronym
66
- @period = period.is_a?(Hash) ? Period.new(period) : period
66
+ @period = period.is_a?(Hash) ? Period.new(**period) : period
67
67
  end
68
68
 
69
69
  # @param builder [Nokogiri::XML::Builder]
@@ -26,8 +26,7 @@ module RelatonItu
26
26
  # @param hash [Hash]
27
27
  # @return [RelatonItu::ItuBibliographicItem]
28
28
  def hash_to_bib(hash)
29
- item_hash = ::RelatonItu::HashConverter.hash_to_bib(hash)
30
- ::RelatonItu::ItuBibliographicItem.new item_hash
29
+ ::RelatonItu::ItuBibliographicItem.from_hash hash
31
30
  end
32
31
 
33
32
  # Returns hash of XML grammar
@@ -24,18 +24,18 @@ module RelatonItu
24
24
  }.freeze
25
25
 
26
26
  class << self
27
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
28
-
29
27
  # Parse page.
30
- # @param hit_data [Hash]
28
+ # @param hit [RelatonItu::Hit]
31
29
  # @return [Hash]
32
- def parse_page(hit_data, imp = false)
33
- url, doc = get_page hit_data[:url]
30
+ def parse_page(hit, imp = false) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
31
+ doc = get_page hit
32
+ return unless doc.code == "200"
33
+
34
34
  if imp
35
35
  a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a"
36
36
  return unless a
37
37
 
38
- url, doc = get_page URI.join(url, a[:href]).to_s
38
+ doc = get_page hit, a[:href].to_s
39
39
  end
40
40
 
41
41
  # Fetch edition.
@@ -44,67 +44,65 @@ module RelatonItu
44
44
  ItuBibliographicItem.new(
45
45
  fetched: Date.today.to_s,
46
46
  type: "standard",
47
- docid: fetch_docid(doc, hit_data[:title]),
47
+ docid: fetch_docid(doc, hit.hit[:title]),
48
48
  edition: edition,
49
49
  language: ["en"],
50
50
  script: ["Latn"],
51
51
  title: fetch_titles(doc),
52
- doctype: hit_data[:type],
52
+ doctype: hit.hit[:type],
53
53
  docstatus: fetch_status(doc),
54
54
  ics: [], # fetch_ics(doc),
55
55
  date: fetch_dates(doc),
56
- contributor: fetch_contributors(hit_data[:code]),
57
- editorialgroup: fetch_workgroup(hit_data[:code], doc),
58
- abstract: fetch_abstract(doc),
59
- copyright: fetch_copyright(hit_data[:code], doc),
60
- link: fetch_link(doc, url),
56
+ contributor: fetch_contributors(hit.hit[:code]),
57
+ editorialgroup: fetch_workgroup(hit.hit[:code], doc),
58
+ abstract: fetch_abstract(doc, hit),
59
+ copyright: fetch_copyright(hit.hit[:code], doc),
60
+ link: fetch_link(doc),
61
61
  relation: fetch_relations(doc),
62
62
  place: ["Geneva"]
63
63
  )
64
64
  end
65
- # rubocop:enable Metrics/AbcSize
66
65
 
67
66
  private
68
67
 
69
68
  # Fetch abstracts.
70
- # @param doc [Nokigiri::HTML::Document]
71
- # @return [Array<Array>]
72
- def fetch_abstract(doc)
73
- abstract_url = doc.at('//table/tr/td/span[contains(@id, "lbl_dms")]/div')
74
- return [] unless abstract_url
75
-
76
- url = abstract_url[:onclick].match(/https?[^']+/).to_s
77
- d = Nokogiri::HTML Net::HTTP.get(URI(url)).encode(undef: :replace, replace: "")
78
- abstract_content = d.css("p.MsoNormal").text.gsub(/\r\n/, "")
79
- .squeeze(" ").gsub(/\u00a0/, "")
69
+ # @param doc [Mechanize::Page]
70
+ # @param hit [RelatonItu::Hit]
71
+ # @return [Array<Hash>]
72
+ def fetch_abstract(doc, hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
73
+ abstract_url = doc.at '//table/tr/td/span[contains(@id, "lbl_dms")]/div'
74
+ content = if abstract_url
75
+ url = abstract_url[:onclick].match(/https?[^']+/).to_s
76
+ rsp = hit.hit_collection.agent.get url
77
+ d = Nokogiri::HTML rsp.body.encode(undef: :replace, replace: "")
78
+ d.css("p.MsoNormal").text.gsub(/\r\n/, "").squeeze(" ").gsub(/\u00a0/, "")
79
+ elsif a = doc.at('//table/tr/td/span[contains(@class, "observation")]/text()')
80
+ a.text.strip
81
+ end
82
+ return [] unless content
80
83
 
81
84
  [{
82
- content: abstract_content,
85
+ content: content,
83
86
  language: "en",
84
87
  script: "Latn",
85
88
  }]
86
89
  end
87
90
 
88
91
  # Get page.
89
- # @param path [String] page's path
92
+ # @param hit [RelatonItu::Hit]
93
+ # @param url [String, nil]
90
94
  # @return [Array<String, Nokogiri::HTML::Document>]
91
- def get_page(url)
92
- uri = URI url
93
- resp = Net::HTTP.get_response(uri)
94
- until resp.code == "200"
95
- uri = URI resp["location"] if resp.code.match? /^30/
96
- resp = Net::HTTP.get_response(uri)
97
- end
98
- [uri.to_s, Nokogiri::HTML(resp.body)]
95
+ def get_page(hit, url = nil)
96
+ uri = url || hit.hit[:url]
97
+ hit.hit_collection.agent.get uri
99
98
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
100
99
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
101
100
  Net::ProtocolError, OpenSSL::SSL::SSLError
102
- raise RelatonBib::RequestError, "Could not access #{url}"
101
+ raise RelatonBib::RequestError, "Could not access #{uri}"
103
102
  end
104
- # rubocop:enable Metrics/MethodLength
105
103
 
106
104
  # Fetch docid.
107
- # @param doc [Nokogiri::HTML::Document]
105
+ # @param doc [Mechanize::Page]
108
106
  # @param title [String]
109
107
  # @return [Hash]
110
108
  def fetch_docid(doc, title)
@@ -117,16 +115,18 @@ module RelatonItu
117
115
  docids
118
116
  end
119
117
 
120
- def createdocid(text)
118
+ # @param text [String]
119
+ # @return [RelatonBib::DocumentIdentifier]
120
+ def createdocid(text) # rubocop:disable Metrics/MethodLength
121
121
  %r{
122
122
  ^(?<code>((ITU-\w|ISO\/IEC)\s)?[^\(:]+)
123
- (\(((?<month>\d{2})\/)?(?<year>\d{4})\))?
123
+ (\(((?<_month>\d{2})\/)?(?<_year>\d{4})\))?
124
124
  (:[^\(]+\((?<buldate>\d{2}\.\w{1,4}\.\d{4})\))?
125
125
  (\s(?<corr>(Amd|Cor)\.\s?\d+))?
126
- # (\s\(((?<cormonth>\d{2})\/)?(?<coryear>\d{4})\))?
126
+ # (\s\(((?<_cormonth>\d{2})\/)?(?<_coryear>\d{4})\))?
127
127
  }x =~ text.squeeze(" ")
128
128
  corr&.sub! /\.\s?/, " "
129
- id = [code.sub(/[[:space:]]$/, ""), corr].compact.join "/"
129
+ id = [code.sub(/[[:space:]]$/, ""), corr].compact.join " "
130
130
  id += " - #{buldate}" if buldate
131
131
  type = id.match(%r{^\w+}).to_s
132
132
  type = "ITU" if type == "G"
@@ -134,7 +134,7 @@ module RelatonItu
134
134
  end
135
135
 
136
136
  # Fetch status.
137
- # @param doc [Nokogiri::HTML::Document]
137
+ # @param doc [Mechanize::Page]
138
138
  # @return [RelatonBib::DocumentStatus, NilClass]
139
139
  def fetch_status(doc)
140
140
  s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]",
@@ -147,7 +147,7 @@ module RelatonItu
147
147
 
148
148
  # Fetch workgroup.
149
149
  # @param code [String]
150
- # @param doc [Nokogiri::HTML::Document]
150
+ # @param doc [Mechanize::Page]
151
151
  # @return [RelatonItu::EditorialGroup, NilClass]
152
152
  def fetch_workgroup(code, doc)
153
153
  wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
@@ -155,8 +155,7 @@ module RelatonItu
155
155
 
156
156
  group = wg && itugroup(wg.text)
157
157
  EditorialGroup.new(
158
- bureau: code.match(/(?<=-)./).to_s,
159
- group: group
158
+ bureau: code.match(/(?<=-)./).to_s, group: group
160
159
  )
161
160
  end
162
161
 
@@ -176,24 +175,24 @@ module RelatonItu
176
175
  ItuGroup.new name: name, type: type, acronym: acronym
177
176
  end
178
177
 
179
- # rubocop:disable Metrics/MethodLength
180
-
181
178
  # Fetch relations.
182
- # @param doc [Nokogiri::HTML::Document]
179
+ # @param doc [Mechanize::Page]
183
180
  # @return [Array<Hash>]
184
181
  def fetch_relations(doc)
185
- doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
182
+ doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]')
183
+ .map do |r|
186
184
  ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
187
- fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
188
- bibitem = ItuBibliographicItem.new(formattedref: fref, type: "standard")
185
+ fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en",
186
+ script: "Latn")
187
+ bibitem = ItuBibliographicItem.new(formattedref: fref,
188
+ type: "standard")
189
189
  { type: "complements", bibitem: bibitem }
190
190
  end
191
191
  end
192
- # rubocop:enable Metrics/MethodLength
193
192
 
194
193
  # Fetch titles.
195
- # @param doc [Nokogiri::HTML::Document]
196
- # @return [Array<Hash>]
194
+ # @param doc [Mechanize::Page]
195
+ # @return [RelatonBib::TypedTitleStringCollection]
197
196
  def fetch_titles(doc)
198
197
  t = doc.at("//td[@class='title']|//div/table[1]/tr[4]/td/strong")
199
198
  return [] unless t
@@ -202,27 +201,29 @@ module RelatonItu
202
201
  end
203
202
 
204
203
  # Fetch dates
205
- # @param doc [Nokogiri::HTML::Document]
204
+ # @param doc [Mechanize::Page]
206
205
  # @return [Array<Hash>]
207
- def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity
206
+ def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
208
207
  dates = []
209
208
  date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]",
210
209
  "//p[contains(.,'Approved in')]")
211
210
  pdate = date&.text&.match(/\d{4}-\d{2}-\d{2}/).to_s || ob_date(doc)
212
211
  if pdate && !pdate&.empty?
213
212
  dates << { type: "published", on: pdate }
213
+ elsif pdate = ob_date(doc)
214
+ dates << { type: "published", on: pdate }
214
215
  end
215
216
  dates
216
217
  end
217
218
 
218
219
  # Scrape Operational Bulletin date.
219
- # @param doc [Nokogiri::HTML::Document]
220
+ # @param doc [Mechanize::Page]
220
221
  # @return [String]
221
222
  def ob_date(doc)
222
223
  pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
223
224
  return unless pdate
224
225
 
225
- roman_to_arabic pdate.text.match(%r{(?<=Year: )\d{2}.\w+.\d{4}}).to_s
226
+ roman_to_arabic pdate.text.match(%r{(?<=Year: )(\d{2}.\w+.)?\d{4}}).to_s
226
227
  end
227
228
 
228
229
  # Convert roman month number in string date to arabic number
@@ -230,12 +231,15 @@ module RelatonItu
230
231
  # @return [String]
231
232
  def roman_to_arabic(date)
232
233
  %r{(?<rmonth>[IVX]+)} =~ date
233
- month = ROMAN_MONTHS.index(rmonth) + 1
234
- Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
234
+ if ROMAN_MONTHS.index(rmonth)
235
+ month = ROMAN_MONTHS.index(rmonth) + 1
236
+ Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
237
+ else date
238
+ end
235
239
  end
236
240
 
237
241
  # Fetch contributors
238
- # @param doc [Nokogiri::HTML::Document]
242
+ # @param doc [Mechanize::Page]
239
243
  # @return [Array<Hash>]
240
244
  def fetch_contributors(code)
241
245
  return [] unless code
@@ -251,11 +255,10 @@ module RelatonItu
251
255
  end
252
256
 
253
257
  # Fetch links.
254
- # @param doc [Nokogiri::HTML::Document]
255
- # @param url [String]
258
+ # @param doc [Mechanize::Page]
256
259
  # @return [Array<Hash>]
257
- def fetch_link(doc, url)
258
- links = [{ type: "src", content: url }]
260
+ def fetch_link(doc)
261
+ links = [{ type: "src", content: doc.uri.to_s }]
259
262
  obp_elm = doc.at(
260
263
  '//a[@title="Persistent link to download the PDF file"]',
261
264
  "//font[contains(.,'PDF')]/../.."
@@ -266,6 +269,8 @@ module RelatonItu
266
269
  links
267
270
  end
268
271
 
272
+ # @param type [String]
273
+ # @param elm [Nokogiri::XML::Element]
269
274
  def typed_link(type, elm)
270
275
  {
271
276
  type: type,
@@ -275,7 +280,7 @@ module RelatonItu
275
280
 
276
281
  # Fetch copyright.
277
282
  # @param code [String]
278
- # @param doc [Nokogiri::HTML::Document]
283
+ # @param doc [Mechanize::Page]
279
284
  # @return [Array<Hash>]
280
285
  def fetch_copyright(code, doc)
281
286
  abbreviation = code.match(/^[^-]+/).to_s
@@ -1,3 +1,3 @@
1
1
  module RelatonItu
2
- VERSION = "1.7.3".freeze
2
+ VERSION = "1.7.8".freeze
3
3
  end
@@ -8,7 +8,7 @@ module RelatonItu
8
8
  # @param item_hash [Hash]
9
9
  # @return [RelatonItu::ItuBibliographicItem]
10
10
  def bib_item(item_hash)
11
- ItuBibliographicItem.new item_hash
11
+ ItuBibliographicItem.new **item_hash
12
12
  end
13
13
 
14
14
  # @param ext [Nokogiri::XML::Element]
data/relaton-itu.gemspec CHANGED
@@ -26,16 +26,17 @@ Gem::Specification.new do |spec|
26
26
  spec.require_paths = ["lib"]
27
27
  spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
28
28
 
29
- spec.add_development_dependency "debase"
29
+ # spec.add_development_dependency "debase"
30
30
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
31
31
  spec.add_development_dependency "pry-byebug"
32
- spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rake", "~> 13.0"
33
33
  spec.add_development_dependency "rspec", "~> 3.0"
34
- spec.add_development_dependency "ruby-debug-ide"
34
+ # spec.add_development_dependency "ruby-debug-ide"
35
35
  spec.add_development_dependency "ruby-jing"
36
36
  spec.add_development_dependency "simplecov"
37
37
  spec.add_development_dependency "vcr", "~> 5.0.0"
38
38
  spec.add_development_dependency "webmock"
39
39
 
40
+ spec.add_dependency "mechanize"
40
41
  spec.add_dependency "relaton-bib", "~> 1.7.0"
41
42
  end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-itu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.3
4
+ version: 1.7.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-01-04 00:00:00.000000000 Z
11
+ date: 2021-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: debase
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: equivalent-xml
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -58,14 +44,14 @@ dependencies:
58
44
  requirements:
59
45
  - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '10.0'
47
+ version: '13.0'
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '10.0'
54
+ version: '13.0'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: rspec
71
57
  requirement: !ruby/object:Gem::Requirement
@@ -80,20 +66,6 @@ dependencies:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
68
  version: '3.0'
83
- - !ruby/object:Gem::Dependency
84
- name: ruby-debug-ide
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
69
  - !ruby/object:Gem::Dependency
98
70
  name: ruby-jing
99
71
  requirement: !ruby/object:Gem::Requirement
@@ -150,6 +122,20 @@ dependencies:
150
122
  - - ">="
151
123
  - !ruby/object:Gem::Version
152
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: mechanize
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
153
139
  - !ruby/object:Gem::Dependency
154
140
  name: relaton-bib
155
141
  requirement: !ruby/object:Gem::Requirement
@@ -172,9 +158,7 @@ executables: []
172
158
  extensions: []
173
159
  extra_rdoc_files: []
174
160
  files:
175
- - ".github/workflows/macos.yml"
176
- - ".github/workflows/ubuntu.yml"
177
- - ".github/workflows/windows.yml"
161
+ - ".github/workflows/rake.yml"
178
162
  - ".gitignore"
179
163
  - ".rspec"
180
164
  - ".rubocop.yml"
@@ -183,6 +167,7 @@ files:
183
167
  - README.adoc
184
168
  - Rakefile
185
169
  - bin/console
170
+ - bin/rspec
186
171
  - bin/setup
187
172
  - grammars/basicdoc.rng
188
173
  - grammars/biblio.rng
@@ -1,34 +0,0 @@
1
- # Auto-generated by Cimas: Do not edit it manually!
2
- # See https://github.com/metanorma/cimas
3
- name: macos
4
-
5
- on:
6
- push:
7
- branches: [ master ]
8
- pull_request:
9
- branches: [ '**' ]
10
-
11
- jobs:
12
- test-macos:
13
- name: Test on Ruby ${{ matrix.ruby }} macOS
14
- runs-on: macos-latest
15
- strategy:
16
- fail-fast: false
17
- matrix:
18
- ruby: [ '2.6', '2.5', '2.4' ]
19
- steps:
20
- - uses: actions/checkout@master
21
- - name: Use Ruby
22
- uses: actions/setup-ruby@v1
23
- with:
24
- ruby-version: ${{ matrix.ruby }}
25
- architecture: 'x64'
26
- - name: Update gems
27
- run: |
28
- sudo gem install bundler --force
29
- ruby -v | grep 2.5 && bundle config set build.debase --with-cflags="-Wno-error=implicit-function-declaration"
30
- ruby -v | grep 2.5 && bundle config set build.ruby-debug-ide --with-cflags="-Wno-error=implicit-function-declaration"
31
- bundle install --jobs 4 --retry 3
32
- - name: Run specs
33
- run: |
34
- bundle exec rake
@@ -1,33 +0,0 @@
1
- # Auto-generated by Cimas: Do not edit it manually!
2
- # See https://github.com/metanorma/cimas
3
- name: ubuntu
4
-
5
- on:
6
- push:
7
- branches: [ master ]
8
- pull_request:
9
- branches: [ '**' ]
10
-
11
- jobs:
12
- test-linux:
13
- name: Test on Ruby ${{ matrix.ruby }} Ubuntu
14
- runs-on: ubuntu-latest
15
- strategy:
16
- fail-fast: false
17
- matrix:
18
- ruby: [ '2.6', '2.5', '2.4' ]
19
- steps:
20
- - uses: actions/checkout@master
21
- - name: Use Ruby
22
- uses: actions/setup-ruby@v1
23
- with:
24
- ruby-version: ${{ matrix.ruby }}
25
- architecture: 'x64'
26
- - name: Update gems
27
- run: |
28
- gem install bundler
29
- bundle install --jobs 4 --retry 3
30
- - name: Run specs
31
- run: |
32
- unset JAVA_TOOL_OPTIONS
33
- bundle exec rake
@@ -1,35 +0,0 @@
1
- # Auto-generated by Cimas: Do not edit it manually!
2
- # See https://github.com/metanorma/cimas
3
- name: windows
4
-
5
- on:
6
- push:
7
- branches: [ master ]
8
- pull_request:
9
- branches: [ '**' ]
10
-
11
- jobs:
12
- test-windows:
13
- name: Test on Ruby ${{ matrix.ruby }} Windows
14
- runs-on: windows-latest
15
- strategy:
16
- fail-fast: false
17
- matrix:
18
- ruby: [ '2.6', '2.5', '2.4' ]
19
- steps:
20
- - uses: actions/checkout@master
21
- - name: Use Ruby
22
- uses: actions/setup-ruby@v1
23
- with:
24
- ruby-version: ${{ matrix.ruby }}
25
- architecture: 'x64'
26
- - name: Update gems
27
- shell: pwsh
28
- run: |
29
- gem install bundler
30
- bundle config --local path vendor/bundle
31
- bundle update
32
- bundle install --jobs 4 --retry 3
33
- - name: Run specs
34
- run: |
35
- bundle exec rake