relaton-itu 1.7.7 → 1.7.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c9e267804ed407243d999f68830546b55db550bd511fa9146ab87e2db146c1d
4
- data.tar.gz: c52daf864de7aeacd2651ca1a5442c0cadd8982e43ff671ae8a730a8a9559096
3
+ metadata.gz: 249fe896ec8a77979ca15d6a42da98ad2ac2620cfe8dc0f468cd14277c5a35b0
4
+ data.tar.gz: 62415ed835abc49cf00d3048b52556b3f718a4ad0dc531ec2c20572b95305210
5
5
  SHA512:
6
- metadata.gz: e412c86f5a31146d27ae6b05feef03c817e2004581efde5e3051a66b37fe061c1e984e417ba92e94ca15032bab17c3e6f70a2062842922c4b300dfc4224cac61
7
- data.tar.gz: e6718282c40d4119d1b7f33bc0f553c611b87d8f46abe57ddf7eb8569f781d22fa54ea135808d7b895d2b9b48b267eafac4d8d25a43df841b35d21d5041dcaea
6
+ metadata.gz: 65a5bcf91f851cc4ec3139fad83b0c83f143b1bafefe8c9638e34072f7d82f76b77517fedf153fcdf7b63ee903fcd372c64e378edaeb231835f477f004f7e94a
7
+ data.tar.gz: 61cdc7df34b24f5d3f3e56b967e9e2b34337bc164b32691747853660cbb6c9e337a6eee058162f97e218da356f4028d7c107b4be4ed7dc10f99253e6985fccd9
data/lib/relaton_itu.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require "mechanize"
1
2
  require "relaton_itu/version"
2
3
  require "relaton_itu/itu_bibliography"
3
4
  require "digest/md5"
@@ -3,6 +3,15 @@ module RelatonItu
3
3
  class << self
4
4
  private
5
5
 
6
+ #
7
+ # Ovverides superclass's method
8
+ #
9
+ # @param item [Hash]
10
+ # @retirn [RelatonItu::ItuBibliographicItem]
11
+ def bib_item(item)
12
+ ItuBibliographicItem.new(**item)
13
+ end
14
+
6
15
  def editorialgroup_hash_to_bib(ret)
7
16
  eg = ret[:editorialgroup]
8
17
  return unless eg
@@ -8,7 +8,7 @@ module RelatonItu
8
8
  # Parse page.
9
9
  # @return [RelatonItu::ItuBibliographicItem]
10
10
  def fetch
11
- @fetch ||= Scrapper.parse_page hit, hit_collection.gi_imp
11
+ @fetch ||= Scrapper.parse_page self, hit_collection.gi_imp
12
12
  end
13
13
  end
14
14
  end
@@ -12,17 +12,21 @@ module RelatonItu
12
12
  # @return [TrueClass, FalseClass]
13
13
  attr_reader :gi_imp
14
14
 
15
+ # @return [Mechanize]
16
+ attr_reader :agent
17
+
15
18
  # @param ref [String]
16
19
  # @param year [String]
17
20
  def initialize(ref, year = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
18
21
  text = ref.sub /(?<=\.)Imp\s?(?=\d)/, ""
19
22
  super text, year
23
+ @agent = Mechanize.new
24
+ agent.user_agent_alias = "Mac Safari"
20
25
  @gi_imp = /\.Imp\d/.match?(ref)
21
26
  if ref.match? /^(ITU-T|ITU-R\sRR)/
22
- uri = URI "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
27
+ url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
23
28
  data = { json: params.to_json }
24
- resp = Net::HTTP.post(uri, data.to_json,
25
- "Content-Type" => "application/json")
29
+ resp = agent.post url, data.to_json, "Content-Type" => "application/json"
26
30
  @array = hits JSON.parse(resp.body)
27
31
  elsif ref.match? /^ITU-R/
28
32
  rf = ref.sub(/^ITU-R\s/, "").upcase
@@ -47,7 +51,8 @@ module RelatonItu
47
51
  # @return [String]
48
52
  def group
49
53
  @group ||= case text
50
- when %r{OB|Operational Bulletin}, %r{^ITU-R\sRR} then "Publications"
54
+ when %r{OB|Operational Bulletin}, %r{^ITU-R\sRR}
55
+ "Publications"
51
56
  when %r{^ITU-T} then "Recommendations"
52
57
  end
53
58
  end
@@ -28,13 +28,14 @@ module RelatonItu
28
28
  HitCollection.new text, year
29
29
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
30
30
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
31
- Net::ProtocolError, OpenSSL::SSL::SSLError
32
- raise RelatonBib::RequestError, "Could not access http://www.itu.int"
31
+ Net::ProtocolError, URI::InvalidURIError => e
32
+ raise RelatonBib::RequestError, e.message
33
33
  end
34
34
 
35
35
  # @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
36
36
  # @param year [String] the year the standard was published (optional)
37
- # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required
37
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
38
+ # reference is required
38
39
  # @return [String] Relaton XML serialisation of reference
39
40
  def get(code, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
40
41
  if year.nil?
@@ -61,12 +62,12 @@ module RelatonItu
61
62
  warn "[relaton-itu] WARNING: no match found online for #{id}. "\
62
63
  "The code must be exactly like it is on the standards website."
63
64
  unless missed_years.empty?
64
- warn "[relaton-itu] (There was no match for #{year}, though there were matches "\
65
- "found for #{missed_years.join(', ')}.)"
65
+ warn "[relaton-itu] (There was no match for #{year}, though there "\
66
+ "were matches found for #{missed_years.join(', ')}.)"
66
67
  end
67
68
  if /\d-\d/.match? code
68
- warn "[relaton-itu] The provided document part may not exist, or the document "\
69
- "may no longer be published in parts."
69
+ warn "[relaton-itu] The provided document part may not exist, or "\
70
+ "the document may no longer be published in parts."
70
71
  else
71
72
  warn "[relaton-itu] If you wanted to cite all document parts for the reference, "\
72
73
  "use \"#{code} (all parts)\".\nIf the document is not a standard, "\
@@ -123,7 +124,8 @@ module RelatonItu
123
124
  def isobib_results_filter(result, year)
124
125
  missed_years = []
125
126
  result.each do |r|
126
- if !year || /\((\d{2}\/)?(?<pyear>\d{4})\)/ =~ r.hit[:code]
127
+ /\((\d{2}\/)?(?<pyear>\d{4})\)/ =~ r.hit[:code]
128
+ if !year || year == pyear
127
129
  ret = r.fetch
128
130
  return { ret: ret } if ret
129
131
  end
@@ -24,20 +24,18 @@ module RelatonItu
24
24
  }.freeze
25
25
 
26
26
  class << self
27
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
28
-
29
27
  # Parse page.
30
- # @param hit_data [Hash]
28
+ # @param hit [RelatonItu::Hit]
31
29
  # @return [Hash]
32
- def parse_page(hit_data, imp = false)
33
- url, doc = get_page hit_data[:url]
34
- return unless doc
30
+ def parse_page(hit, imp = false) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
31
+ doc = get_page hit
32
+ return unless doc.code == "200"
35
33
 
36
34
  if imp
37
35
  a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a"
38
36
  return unless a
39
37
 
40
- url, doc = get_page URI.join(url, a[:href]).to_s
38
+ doc = get_page hit, a[:href].to_s
41
39
  end
42
40
 
43
41
  # Fetch edition.
@@ -46,36 +44,37 @@ module RelatonItu
46
44
  ItuBibliographicItem.new(
47
45
  fetched: Date.today.to_s,
48
46
  type: "standard",
49
- docid: fetch_docid(doc, hit_data[:title]),
47
+ docid: fetch_docid(doc, hit.hit[:title]),
50
48
  edition: edition,
51
49
  language: ["en"],
52
50
  script: ["Latn"],
53
51
  title: fetch_titles(doc),
54
- doctype: hit_data[:type],
52
+ doctype: hit.hit[:type],
55
53
  docstatus: fetch_status(doc),
56
54
  ics: [], # fetch_ics(doc),
57
55
  date: fetch_dates(doc),
58
- contributor: fetch_contributors(hit_data[:code]),
59
- editorialgroup: fetch_workgroup(hit_data[:code], doc),
60
- abstract: fetch_abstract(doc),
61
- copyright: fetch_copyright(hit_data[:code], doc),
62
- link: fetch_link(doc, url),
56
+ contributor: fetch_contributors(hit.hit[:code]),
57
+ editorialgroup: fetch_workgroup(hit.hit[:code], doc),
58
+ abstract: fetch_abstract(doc, hit),
59
+ copyright: fetch_copyright(hit.hit[:code], doc),
60
+ link: fetch_link(doc),
63
61
  relation: fetch_relations(doc),
64
62
  place: ["Geneva"]
65
63
  )
66
64
  end
67
- # rubocop:enable Metrics/AbcSize
68
65
 
69
66
  private
70
67
 
71
68
  # Fetch abstracts.
72
- # @param doc [Nokigiri::HTML::Document]
73
- # @return [Array<Array>]
74
- def fetch_abstract(doc) # rubocop:disable Metrics/AbcSize
75
- abstract_url = doc.at('//table/tr/td/span[contains(@id, "lbl_dms")]/div')
69
+ # @param doc [Mechanize::Page]
70
+ # @param hit [RelatonItu::Hit]
71
+ # @return [Array<Hash>]
72
+ def fetch_abstract(doc, hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
73
+ abstract_url = doc.at '//table/tr/td/span[contains(@id, "lbl_dms")]/div'
76
74
  content = if abstract_url
77
75
  url = abstract_url[:onclick].match(/https?[^']+/).to_s
78
- d = Nokogiri::HTML Net::HTTP.get(URI(url)).encode(undef: :replace, replace: "")
76
+ rsp = hit.hit_collection.agent.get url
77
+ d = Nokogiri::HTML rsp.body.encode(undef: :replace, replace: "")
79
78
  d.css("p.MsoNormal").text.gsub(/\r\n/, "").squeeze(" ").gsub(/\u00a0/, "")
80
79
  elsif a = doc.at('//table/tr/td/span[contains(@class, "observation")]/text()')
81
80
  a.text.strip
@@ -90,27 +89,20 @@ module RelatonItu
90
89
  end
91
90
 
92
91
  # Get page.
93
- # @param path [String] page's path
92
+ # @param hit [RelatonItu::Hit]
93
+ # @param url [String, nil]
94
94
  # @return [Array<String, Nokogiri::HTML::Document>]
95
- def get_page(url)
96
- uri = URI url
97
- resp = Net::HTTP.get_response(uri)
98
- until resp.code == "200"
99
- return if resp["location"] == "/en/publications/pages/notfound.aspx"
100
-
101
- uri = URI resp["location"] if resp.code.match? /^30/
102
- resp = Net::HTTP.get_response(uri)
103
- end
104
- [uri.to_s, Nokogiri::HTML(resp.body)]
95
+ def get_page(hit, url = nil)
96
+ uri = url || hit.hit[:url]
97
+ hit.hit_collection.agent.get uri
105
98
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
106
99
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
107
100
  Net::ProtocolError, OpenSSL::SSL::SSLError
108
- raise RelatonBib::RequestError, "Could not access #{url}"
101
+ raise RelatonBib::RequestError, "Could not access #{uri}"
109
102
  end
110
- # rubocop:enable Metrics/MethodLength
111
103
 
112
104
  # Fetch docid.
113
- # @param doc [Nokogiri::HTML::Document]
105
+ # @param doc [Mechanize::Page]
114
106
  # @param title [String]
115
107
  # @return [Hash]
116
108
  def fetch_docid(doc, title)
@@ -123,6 +115,8 @@ module RelatonItu
123
115
  docids
124
116
  end
125
117
 
118
+ # @param text [String]
119
+ # @return [RelatonBib::DocumentIdentifier]
126
120
  def createdocid(text) # rubocop:disable Metrics/MethodLength
127
121
  %r{
128
122
  ^(?<code>((ITU-\w|ISO\/IEC)\s)?[^\(:]+)
@@ -140,7 +134,7 @@ module RelatonItu
140
134
  end
141
135
 
142
136
  # Fetch status.
143
- # @param doc [Nokogiri::HTML::Document]
137
+ # @param doc [Mechanize::Page]
144
138
  # @return [RelatonBib::DocumentStatus, NilClass]
145
139
  def fetch_status(doc)
146
140
  s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]",
@@ -153,7 +147,7 @@ module RelatonItu
153
147
 
154
148
  # Fetch workgroup.
155
149
  # @param code [String]
156
- # @param doc [Nokogiri::HTML::Document]
150
+ # @param doc [Mechanize::Page]
157
151
  # @return [RelatonItu::EditorialGroup, NilClass]
158
152
  def fetch_workgroup(code, doc)
159
153
  wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
@@ -161,8 +155,7 @@ module RelatonItu
161
155
 
162
156
  group = wg && itugroup(wg.text)
163
157
  EditorialGroup.new(
164
- bureau: code.match(/(?<=-)./).to_s,
165
- group: group
158
+ bureau: code.match(/(?<=-)./).to_s, group: group
166
159
  )
167
160
  end
168
161
 
@@ -182,24 +175,24 @@ module RelatonItu
182
175
  ItuGroup.new name: name, type: type, acronym: acronym
183
176
  end
184
177
 
185
- # rubocop:disable Metrics/MethodLength
186
-
187
178
  # Fetch relations.
188
- # @param doc [Nokogiri::HTML::Document]
179
+ # @param doc [Mechanize::Page]
189
180
  # @return [Array<Hash>]
190
181
  def fetch_relations(doc)
191
- doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
182
+ doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]')
183
+ .map do |r|
192
184
  ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
193
- fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
194
- bibitem = ItuBibliographicItem.new(formattedref: fref, type: "standard")
185
+ fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en",
186
+ script: "Latn")
187
+ bibitem = ItuBibliographicItem.new(formattedref: fref,
188
+ type: "standard")
195
189
  { type: "complements", bibitem: bibitem }
196
190
  end
197
191
  end
198
- # rubocop:enable Metrics/MethodLength
199
192
 
200
193
  # Fetch titles.
201
- # @param doc [Nokogiri::HTML::Document]
202
- # @return [Array<Hash>]
194
+ # @param doc [Mechanize::Page]
195
+ # @return [RelatonBib::TypedTitleStringCollection]
203
196
  def fetch_titles(doc)
204
197
  t = doc.at("//td[@class='title']|//div/table[1]/tr[4]/td/strong")
205
198
  return [] unless t
@@ -208,7 +201,7 @@ module RelatonItu
208
201
  end
209
202
 
210
203
  # Fetch dates
211
- # @param doc [Nokogiri::HTML::Document]
204
+ # @param doc [Mechanize::Page]
212
205
  # @return [Array<Hash>]
213
206
  def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
214
207
  dates = []
@@ -224,7 +217,7 @@ module RelatonItu
224
217
  end
225
218
 
226
219
  # Scrape Operational Bulletin date.
227
- # @param doc [Nokogiri::HTML::Document]
220
+ # @param doc [Mechanize::Page]
228
221
  # @return [String]
229
222
  def ob_date(doc)
230
223
  pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
@@ -246,7 +239,7 @@ module RelatonItu
246
239
  end
247
240
 
248
241
  # Fetch contributors
249
- # @param doc [Nokogiri::HTML::Document]
242
+ # @param doc [Mechanize::Page]
250
243
  # @return [Array<Hash>]
251
244
  def fetch_contributors(code)
252
245
  return [] unless code
@@ -262,11 +255,10 @@ module RelatonItu
262
255
  end
263
256
 
264
257
  # Fetch links.
265
- # @param doc [Nokogiri::HTML::Document]
266
- # @param url [String]
258
+ # @param doc [Mechanize::Page]
267
259
  # @return [Array<Hash>]
268
- def fetch_link(doc, url)
269
- links = [{ type: "src", content: url }]
260
+ def fetch_link(doc)
261
+ links = [{ type: "src", content: doc.uri.to_s }]
270
262
  obp_elm = doc.at(
271
263
  '//a[@title="Persistent link to download the PDF file"]',
272
264
  "//font[contains(.,'PDF')]/../.."
@@ -277,6 +269,8 @@ module RelatonItu
277
269
  links
278
270
  end
279
271
 
272
+ # @param type [String]
273
+ # @param elm [Nokogiri::XML::Element]
280
274
  def typed_link(type, elm)
281
275
  {
282
276
  type: type,
@@ -286,7 +280,7 @@ module RelatonItu
286
280
 
287
281
  # Fetch copyright.
288
282
  # @param code [String]
289
- # @param doc [Nokogiri::HTML::Document]
283
+ # @param doc [Mechanize::Page]
290
284
  # @return [Array<Hash>]
291
285
  def fetch_copyright(code, doc)
292
286
  abbreviation = code.match(/^[^-]+/).to_s
@@ -1,3 +1,3 @@
1
1
  module RelatonItu
2
- VERSION = "1.7.7".freeze
2
+ VERSION = "1.7.8".freeze
3
3
  end
data/relaton-itu.gemspec CHANGED
@@ -37,5 +37,6 @@ Gem::Specification.new do |spec|
37
37
  spec.add_development_dependency "vcr", "~> 5.0.0"
38
38
  spec.add_development_dependency "webmock"
39
39
 
40
+ spec.add_dependency "mechanize"
40
41
  spec.add_dependency "relaton-bib", "~> 1.7.0"
41
42
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-itu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.7
4
+ version: 1.7.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-14 00:00:00.000000000 Z
11
+ date: 2021-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: mechanize
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: relaton-bib
127
141
  requirement: !ruby/object:Gem::Requirement