relaton-itu 1.7.7 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c9e267804ed407243d999f68830546b55db550bd511fa9146ab87e2db146c1d
4
- data.tar.gz: c52daf864de7aeacd2651ca1a5442c0cadd8982e43ff671ae8a730a8a9559096
3
+ metadata.gz: 249fe896ec8a77979ca15d6a42da98ad2ac2620cfe8dc0f468cd14277c5a35b0
4
+ data.tar.gz: 62415ed835abc49cf00d3048b52556b3f718a4ad0dc531ec2c20572b95305210
5
5
  SHA512:
6
- metadata.gz: e412c86f5a31146d27ae6b05feef03c817e2004581efde5e3051a66b37fe061c1e984e417ba92e94ca15032bab17c3e6f70a2062842922c4b300dfc4224cac61
7
- data.tar.gz: e6718282c40d4119d1b7f33bc0f553c611b87d8f46abe57ddf7eb8569f781d22fa54ea135808d7b895d2b9b48b267eafac4d8d25a43df841b35d21d5041dcaea
6
+ metadata.gz: 65a5bcf91f851cc4ec3139fad83b0c83f143b1bafefe8c9638e34072f7d82f76b77517fedf153fcdf7b63ee903fcd372c64e378edaeb231835f477f004f7e94a
7
+ data.tar.gz: 61cdc7df34b24f5d3f3e56b967e9e2b34337bc164b32691747853660cbb6c9e337a6eee058162f97e218da356f4028d7c107b4be4ed7dc10f99253e6985fccd9
data/lib/relaton_itu.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require "mechanize"
1
2
  require "relaton_itu/version"
2
3
  require "relaton_itu/itu_bibliography"
3
4
  require "digest/md5"
@@ -3,6 +3,15 @@ module RelatonItu
3
3
  class << self
4
4
  private
5
5
 
6
+ #
7
+ # Ovverides superclass's method
8
+ #
9
+ # @param item [Hash]
10
+ # @retirn [RelatonItu::ItuBibliographicItem]
11
+ def bib_item(item)
12
+ ItuBibliographicItem.new(**item)
13
+ end
14
+
6
15
  def editorialgroup_hash_to_bib(ret)
7
16
  eg = ret[:editorialgroup]
8
17
  return unless eg
@@ -8,7 +8,7 @@ module RelatonItu
8
8
  # Parse page.
9
9
  # @return [RelatonItu::ItuBibliographicItem]
10
10
  def fetch
11
- @fetch ||= Scrapper.parse_page hit, hit_collection.gi_imp
11
+ @fetch ||= Scrapper.parse_page self, hit_collection.gi_imp
12
12
  end
13
13
  end
14
14
  end
@@ -12,17 +12,21 @@ module RelatonItu
12
12
  # @return [TrueClass, FalseClass]
13
13
  attr_reader :gi_imp
14
14
 
15
+ # @return [Mechanize]
16
+ attr_reader :agent
17
+
15
18
  # @param ref [String]
16
19
  # @param year [String]
17
20
  def initialize(ref, year = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
18
21
  text = ref.sub /(?<=\.)Imp\s?(?=\d)/, ""
19
22
  super text, year
23
+ @agent = Mechanize.new
24
+ agent.user_agent_alias = "Mac Safari"
20
25
  @gi_imp = /\.Imp\d/.match?(ref)
21
26
  if ref.match? /^(ITU-T|ITU-R\sRR)/
22
- uri = URI "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
27
+ url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
23
28
  data = { json: params.to_json }
24
- resp = Net::HTTP.post(uri, data.to_json,
25
- "Content-Type" => "application/json")
29
+ resp = agent.post url, data.to_json, "Content-Type" => "application/json"
26
30
  @array = hits JSON.parse(resp.body)
27
31
  elsif ref.match? /^ITU-R/
28
32
  rf = ref.sub(/^ITU-R\s/, "").upcase
@@ -47,7 +51,8 @@ module RelatonItu
47
51
  # @return [String]
48
52
  def group
49
53
  @group ||= case text
50
- when %r{OB|Operational Bulletin}, %r{^ITU-R\sRR} then "Publications"
54
+ when %r{OB|Operational Bulletin}, %r{^ITU-R\sRR}
55
+ "Publications"
51
56
  when %r{^ITU-T} then "Recommendations"
52
57
  end
53
58
  end
@@ -28,13 +28,14 @@ module RelatonItu
28
28
  HitCollection.new text, year
29
29
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
30
30
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
31
- Net::ProtocolError, OpenSSL::SSL::SSLError
32
- raise RelatonBib::RequestError, "Could not access http://www.itu.int"
31
+ Net::ProtocolError, URI::InvalidURIError => e
32
+ raise RelatonBib::RequestError, e.message
33
33
  end
34
34
 
35
35
  # @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
36
36
  # @param year [String] the year the standard was published (optional)
37
- # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required
37
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
38
+ # reference is required
38
39
  # @return [String] Relaton XML serialisation of reference
39
40
  def get(code, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
40
41
  if year.nil?
@@ -61,12 +62,12 @@ module RelatonItu
61
62
  warn "[relaton-itu] WARNING: no match found online for #{id}. "\
62
63
  "The code must be exactly like it is on the standards website."
63
64
  unless missed_years.empty?
64
- warn "[relaton-itu] (There was no match for #{year}, though there were matches "\
65
- "found for #{missed_years.join(', ')}.)"
65
+ warn "[relaton-itu] (There was no match for #{year}, though there "\
66
+ "were matches found for #{missed_years.join(', ')}.)"
66
67
  end
67
68
  if /\d-\d/.match? code
68
- warn "[relaton-itu] The provided document part may not exist, or the document "\
69
- "may no longer be published in parts."
69
+ warn "[relaton-itu] The provided document part may not exist, or "\
70
+ "the document may no longer be published in parts."
70
71
  else
71
72
  warn "[relaton-itu] If you wanted to cite all document parts for the reference, "\
72
73
  "use \"#{code} (all parts)\".\nIf the document is not a standard, "\
@@ -123,7 +124,8 @@ module RelatonItu
123
124
  def isobib_results_filter(result, year)
124
125
  missed_years = []
125
126
  result.each do |r|
126
- if !year || /\((\d{2}\/)?(?<pyear>\d{4})\)/ =~ r.hit[:code]
127
+ /\((\d{2}\/)?(?<pyear>\d{4})\)/ =~ r.hit[:code]
128
+ if !year || year == pyear
127
129
  ret = r.fetch
128
130
  return { ret: ret } if ret
129
131
  end
@@ -24,20 +24,18 @@ module RelatonItu
24
24
  }.freeze
25
25
 
26
26
  class << self
27
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
28
-
29
27
  # Parse page.
30
- # @param hit_data [Hash]
28
+ # @param hit [RelatonItu::Hit]
31
29
  # @return [Hash]
32
- def parse_page(hit_data, imp = false)
33
- url, doc = get_page hit_data[:url]
34
- return unless doc
30
+ def parse_page(hit, imp = false) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
31
+ doc = get_page hit
32
+ return unless doc.code == "200"
35
33
 
36
34
  if imp
37
35
  a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a"
38
36
  return unless a
39
37
 
40
- url, doc = get_page URI.join(url, a[:href]).to_s
38
+ doc = get_page hit, a[:href].to_s
41
39
  end
42
40
 
43
41
  # Fetch edition.
@@ -46,36 +44,37 @@ module RelatonItu
46
44
  ItuBibliographicItem.new(
47
45
  fetched: Date.today.to_s,
48
46
  type: "standard",
49
- docid: fetch_docid(doc, hit_data[:title]),
47
+ docid: fetch_docid(doc, hit.hit[:title]),
50
48
  edition: edition,
51
49
  language: ["en"],
52
50
  script: ["Latn"],
53
51
  title: fetch_titles(doc),
54
- doctype: hit_data[:type],
52
+ doctype: hit.hit[:type],
55
53
  docstatus: fetch_status(doc),
56
54
  ics: [], # fetch_ics(doc),
57
55
  date: fetch_dates(doc),
58
- contributor: fetch_contributors(hit_data[:code]),
59
- editorialgroup: fetch_workgroup(hit_data[:code], doc),
60
- abstract: fetch_abstract(doc),
61
- copyright: fetch_copyright(hit_data[:code], doc),
62
- link: fetch_link(doc, url),
56
+ contributor: fetch_contributors(hit.hit[:code]),
57
+ editorialgroup: fetch_workgroup(hit.hit[:code], doc),
58
+ abstract: fetch_abstract(doc, hit),
59
+ copyright: fetch_copyright(hit.hit[:code], doc),
60
+ link: fetch_link(doc),
63
61
  relation: fetch_relations(doc),
64
62
  place: ["Geneva"]
65
63
  )
66
64
  end
67
- # rubocop:enable Metrics/AbcSize
68
65
 
69
66
  private
70
67
 
71
68
  # Fetch abstracts.
72
- # @param doc [Nokigiri::HTML::Document]
73
- # @return [Array<Array>]
74
- def fetch_abstract(doc) # rubocop:disable Metrics/AbcSize
75
- abstract_url = doc.at('//table/tr/td/span[contains(@id, "lbl_dms")]/div')
69
+ # @param doc [Mechanize::Page]
70
+ # @param hit [RelatonItu::Hit]
71
+ # @return [Array<Hash>]
72
+ def fetch_abstract(doc, hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
73
+ abstract_url = doc.at '//table/tr/td/span[contains(@id, "lbl_dms")]/div'
76
74
  content = if abstract_url
77
75
  url = abstract_url[:onclick].match(/https?[^']+/).to_s
78
- d = Nokogiri::HTML Net::HTTP.get(URI(url)).encode(undef: :replace, replace: "")
76
+ rsp = hit.hit_collection.agent.get url
77
+ d = Nokogiri::HTML rsp.body.encode(undef: :replace, replace: "")
79
78
  d.css("p.MsoNormal").text.gsub(/\r\n/, "").squeeze(" ").gsub(/\u00a0/, "")
80
79
  elsif a = doc.at('//table/tr/td/span[contains(@class, "observation")]/text()')
81
80
  a.text.strip
@@ -90,27 +89,20 @@ module RelatonItu
90
89
  end
91
90
 
92
91
  # Get page.
93
- # @param path [String] page's path
92
+ # @param hit [RelatonItu::Hit]
93
+ # @param url [String, nil]
94
94
  # @return [Array<String, Nokogiri::HTML::Document>]
95
- def get_page(url)
96
- uri = URI url
97
- resp = Net::HTTP.get_response(uri)
98
- until resp.code == "200"
99
- return if resp["location"] == "/en/publications/pages/notfound.aspx"
100
-
101
- uri = URI resp["location"] if resp.code.match? /^30/
102
- resp = Net::HTTP.get_response(uri)
103
- end
104
- [uri.to_s, Nokogiri::HTML(resp.body)]
95
+ def get_page(hit, url = nil)
96
+ uri = url || hit.hit[:url]
97
+ hit.hit_collection.agent.get uri
105
98
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
106
99
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
107
100
  Net::ProtocolError, OpenSSL::SSL::SSLError
108
- raise RelatonBib::RequestError, "Could not access #{url}"
101
+ raise RelatonBib::RequestError, "Could not access #{uri}"
109
102
  end
110
- # rubocop:enable Metrics/MethodLength
111
103
 
112
104
  # Fetch docid.
113
- # @param doc [Nokogiri::HTML::Document]
105
+ # @param doc [Mechanize::Page]
114
106
  # @param title [String]
115
107
  # @return [Hash]
116
108
  def fetch_docid(doc, title)
@@ -123,6 +115,8 @@ module RelatonItu
123
115
  docids
124
116
  end
125
117
 
118
+ # @param text [String]
119
+ # @return [RelatonBib::DocumentIdentifier]
126
120
  def createdocid(text) # rubocop:disable Metrics/MethodLength
127
121
  %r{
128
122
  ^(?<code>((ITU-\w|ISO\/IEC)\s)?[^\(:]+)
@@ -140,7 +134,7 @@ module RelatonItu
140
134
  end
141
135
 
142
136
  # Fetch status.
143
- # @param doc [Nokogiri::HTML::Document]
137
+ # @param doc [Mechanize::Page]
144
138
  # @return [RelatonBib::DocumentStatus, NilClass]
145
139
  def fetch_status(doc)
146
140
  s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]",
@@ -153,7 +147,7 @@ module RelatonItu
153
147
 
154
148
  # Fetch workgroup.
155
149
  # @param code [String]
156
- # @param doc [Nokogiri::HTML::Document]
150
+ # @param doc [Mechanize::Page]
157
151
  # @return [RelatonItu::EditorialGroup, NilClass]
158
152
  def fetch_workgroup(code, doc)
159
153
  wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
@@ -161,8 +155,7 @@ module RelatonItu
161
155
 
162
156
  group = wg && itugroup(wg.text)
163
157
  EditorialGroup.new(
164
- bureau: code.match(/(?<=-)./).to_s,
165
- group: group
158
+ bureau: code.match(/(?<=-)./).to_s, group: group
166
159
  )
167
160
  end
168
161
 
@@ -182,24 +175,24 @@ module RelatonItu
182
175
  ItuGroup.new name: name, type: type, acronym: acronym
183
176
  end
184
177
 
185
- # rubocop:disable Metrics/MethodLength
186
-
187
178
  # Fetch relations.
188
- # @param doc [Nokogiri::HTML::Document]
179
+ # @param doc [Mechanize::Page]
189
180
  # @return [Array<Hash>]
190
181
  def fetch_relations(doc)
191
- doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
182
+ doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]')
183
+ .map do |r|
192
184
  ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
193
- fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
194
- bibitem = ItuBibliographicItem.new(formattedref: fref, type: "standard")
185
+ fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en",
186
+ script: "Latn")
187
+ bibitem = ItuBibliographicItem.new(formattedref: fref,
188
+ type: "standard")
195
189
  { type: "complements", bibitem: bibitem }
196
190
  end
197
191
  end
198
- # rubocop:enable Metrics/MethodLength
199
192
 
200
193
  # Fetch titles.
201
- # @param doc [Nokogiri::HTML::Document]
202
- # @return [Array<Hash>]
194
+ # @param doc [Mechanize::Page]
195
+ # @return [RelatonBib::TypedTitleStringCollection]
203
196
  def fetch_titles(doc)
204
197
  t = doc.at("//td[@class='title']|//div/table[1]/tr[4]/td/strong")
205
198
  return [] unless t
@@ -208,7 +201,7 @@ module RelatonItu
208
201
  end
209
202
 
210
203
  # Fetch dates
211
- # @param doc [Nokogiri::HTML::Document]
204
+ # @param doc [Mechanize::Page]
212
205
  # @return [Array<Hash>]
213
206
  def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
214
207
  dates = []
@@ -224,7 +217,7 @@ module RelatonItu
224
217
  end
225
218
 
226
219
  # Scrape Operational Bulletin date.
227
- # @param doc [Nokogiri::HTML::Document]
220
+ # @param doc [Mechanize::Page]
228
221
  # @return [String]
229
222
  def ob_date(doc)
230
223
  pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
@@ -246,7 +239,7 @@ module RelatonItu
246
239
  end
247
240
 
248
241
  # Fetch contributors
249
- # @param doc [Nokogiri::HTML::Document]
242
+ # @param doc [Mechanize::Page]
250
243
  # @return [Array<Hash>]
251
244
  def fetch_contributors(code)
252
245
  return [] unless code
@@ -262,11 +255,10 @@ module RelatonItu
262
255
  end
263
256
 
264
257
  # Fetch links.
265
- # @param doc [Nokogiri::HTML::Document]
266
- # @param url [String]
258
+ # @param doc [Mechanize::Page]
267
259
  # @return [Array<Hash>]
268
- def fetch_link(doc, url)
269
- links = [{ type: "src", content: url }]
260
+ def fetch_link(doc)
261
+ links = [{ type: "src", content: doc.uri.to_s }]
270
262
  obp_elm = doc.at(
271
263
  '//a[@title="Persistent link to download the PDF file"]',
272
264
  "//font[contains(.,'PDF')]/../.."
@@ -277,6 +269,8 @@ module RelatonItu
277
269
  links
278
270
  end
279
271
 
272
+ # @param type [String]
273
+ # @param elm [Nokogiri::XML::Element]
280
274
  def typed_link(type, elm)
281
275
  {
282
276
  type: type,
@@ -286,7 +280,7 @@ module RelatonItu
286
280
 
287
281
  # Fetch copyright.
288
282
  # @param code [String]
289
- # @param doc [Nokogiri::HTML::Document]
283
+ # @param doc [Mechanize::Page]
290
284
  # @return [Array<Hash>]
291
285
  def fetch_copyright(code, doc)
292
286
  abbreviation = code.match(/^[^-]+/).to_s
@@ -1,3 +1,3 @@
1
1
  module RelatonItu
2
- VERSION = "1.7.7".freeze
2
+ VERSION = "1.7.8".freeze
3
3
  end
data/relaton-itu.gemspec CHANGED
@@ -37,5 +37,6 @@ Gem::Specification.new do |spec|
37
37
  spec.add_development_dependency "vcr", "~> 5.0.0"
38
38
  spec.add_development_dependency "webmock"
39
39
 
40
+ spec.add_dependency "mechanize"
40
41
  spec.add_dependency "relaton-bib", "~> 1.7.0"
41
42
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-itu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.7
4
+ version: 1.7.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-14 00:00:00.000000000 Z
11
+ date: 2021-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: mechanize
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: relaton-bib
127
141
  requirement: !ruby/object:Gem::Requirement