relaton-itu 0.3.5 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: fb722b2271d57d169b7f89c4ff87b0e16b3a9a25
4
- data.tar.gz: 2755f77b6f20cb3f1f688ec1894132814f9401f0
2
+ SHA256:
3
+ metadata.gz: 4ca4655d0b046848fd1ddd492e4e0d3e7ae53dd28bf491bc4cc049d2eccb5810
4
+ data.tar.gz: 40c7fbd808c557a460fbfed3811c4b87d51ed2dfb937a38a77b07c7c323ae554
5
5
  SHA512:
6
- metadata.gz: 8df40643d1c03fbe71338a898fa19aa0b398ff32cfccd4c0065e864c5dc5e60fefb521fe16c319d1c173711a83850236d86e7ada4b1fbf654be32a994b598995
7
- data.tar.gz: 8a41125691555e4b69775e302717f0b8f95fe675e465757b25bfc9f3ec7d7949fdd589e78252045658c21d74db812b883f25743d24ca64e911f1acd1c445ffda
6
+ metadata.gz: '08234d67ae2e8ec6e5461e80446e183c3482390a850b33e114fac0328b609409eb137eeb58cbf216e5025f4a4d97ea3fcad6d4e4cca8550400ac92ed916fee38'
7
+ data.tar.gz: 2c24f8c0b1fe4f4c25b94c128c51c939dc4980b9c5e38d0b48575c8705abb33d99b3b77ca1bc5222903048ab4b8f3ee6dfdd75c6fa0cdd6e25cf110ed7c4be0c
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- relaton-itu (0.3.5)
4
+ relaton-itu (0.3.6)
5
5
  relaton-iso-bib (~> 0.3.0)
6
6
 
7
7
  GEM
@@ -35,10 +35,10 @@ GEM
35
35
  pry (~> 0.10)
36
36
  public_suffix (4.0.1)
37
37
  rake (10.5.0)
38
- relaton-bib (0.3.9)
38
+ relaton-bib (0.3.11)
39
39
  addressable
40
40
  nokogiri
41
- relaton-iso-bib (0.3.9)
41
+ relaton-iso-bib (0.3.11)
42
42
  isoics (~> 0.1.6)
43
43
  relaton-bib (~> 0.3.0)
44
44
  ruby_deep_clone (~> 0.8.0)
@@ -48,18 +48,18 @@ module RelatonItu
48
48
  # @return [Hash]
49
49
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
50
50
  def parse_page(hit_data)
51
- doc = get_page hit_data[:url]
51
+ url, doc = get_page hit_data[:url]
52
52
 
53
53
  # Fetch edition.
54
54
  edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
55
55
 
56
56
  ItuBibliographicItem.new(
57
57
  fetched: Date.today.to_s,
58
- docid: fetch_docid(hit_data[:code]),
58
+ docid: fetch_docid(doc),
59
59
  edition: edition,
60
60
  language: ["en"],
61
61
  script: ["Latn"],
62
- title: fetch_titles(hit_data),
62
+ title: fetch_titles(doc),
63
63
  doctype: hit_data[:type],
64
64
  docstatus: fetch_status(doc),
65
65
  ics: [], # fetch_ics(doc),
@@ -68,7 +68,7 @@ module RelatonItu
68
68
  editorialgroup: fetch_workgroup(doc),
69
69
  abstract: fetch_abstract(doc),
70
70
  copyright: fetch_copyright(hit_data[:code], doc),
71
- link: fetch_link(doc, hit_data[:url]),
71
+ link: fetch_link(doc, url),
72
72
  relation: fetch_relations(doc),
73
73
  )
74
74
  end
@@ -119,7 +119,7 @@ module RelatonItu
119
119
  uri = URI resp["location"]
120
120
  resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
121
121
  end
122
- Nokogiri::HTML(resp.body)
122
+ [uri.to_s, Nokogiri::HTML(resp.body)]
123
123
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
124
124
  Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
125
125
  OpenSSL::SSL::SSLError
@@ -130,12 +130,15 @@ module RelatonItu
130
130
  # Fetch docid.
131
131
  # @param doc [Nokogiri::HTML::Document]
132
132
  # @return [Hash]
133
- def fetch_docid(code)
134
- # m = code.match(/(?<=\s)(?<project>[^\s]+)-?(?<part>(?<=-)\d+|)-?(?<subpart>(?<=-)\d+|)/)
135
- # project_number: m[:project],
136
- # part_number: m[:part],
137
- # subpart_number: m[:subpart],
138
- [RelatonBib::DocumentIdentifier.new(type: "ITU", id: code)]
133
+ def fetch_docid(doc)
134
+ doc.xpath(
135
+ "//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
136
+ "//td[.='Identical standard:']/following-sibling::td",
137
+ ).map do |code|
138
+ id = code.text.match(%r{^.*?(?= \()}).to_s.squeeze(" ")
139
+ type = id.match(%r{^\w+}).to_s
140
+ RelatonBib::DocumentIdentifier.new(type: type, id: id)
141
+ end
139
142
  end
140
143
 
141
144
  # Fetch status.
@@ -186,16 +189,12 @@ module RelatonItu
186
189
  # @return [Array<Hash>]
187
190
  def fetch_relations(doc)
188
191
  doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
189
- r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
190
- type = case r_type
191
- when "in force" then "published"
192
- else r_type
193
- end
192
+ # r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
194
193
  ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
195
194
  # url = DOMAIN + ref[:href].sub(/^\./, "/ITU-T/recommendations")
196
195
  fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
197
196
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(formattedref: fref)
198
- { type: type, bibitem: bibitem }
197
+ { type: "complements", bibitem: bibitem }
199
198
  end
200
199
  end
201
200
  # rubocop:enable Metrics/MethodLength
@@ -208,10 +207,14 @@ module RelatonItu
208
207
  # end
209
208
 
210
209
  # Fetch titles.
211
- # @param hit_data [Hash]
210
+ # @param doc [Nokogiri::HTML::Document]
212
211
  # @return [Array<Hash>]
213
- def fetch_titles(hit_data)
214
- titles = hit_data[:title].split " - "
212
+ def fetch_titles(doc)
213
+ # t = hit_data[:title].match(%r{(?<=\(\d{2}\/\d{4}\): ).*}).to_s
214
+ # t = hit_data[:title] if t.empty?
215
+ t = doc.at("//td[@class='title']")
216
+ return [] unless t
217
+ titles = t.text.split " - "
215
218
  case titles.size
216
219
  when 0
217
220
  intro, main, part = nil, "", nil
@@ -298,8 +301,8 @@ module RelatonItu
298
301
  # @return [Array<Hash>]
299
302
  def fetch_link(doc, url)
300
303
  links = [{ type: "src", content: url }]
301
- obp_elms = doc.at('//table/tr/td/span[contains(@id, "Label4")]/a')
302
- links << { type: "obp", content: DOMAIN + obp_elms[:href] } if obp_elms
304
+ obp_elms = doc.at('//a[@title="Persistent link to download the PDF file"]')
305
+ links << { type: "obp", content: DOMAIN + obp_elms[:href].strip } if obp_elms
303
306
  links
304
307
  end
305
308
 
@@ -1,3 +1,3 @@
1
1
  module RelatonItu
2
- VERSION = "0.3.5".freeze
2
+ VERSION = "0.3.6".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-itu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-14 00:00:00.000000000 Z
11
+ date: 2019-10-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -216,8 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
216
216
  - !ruby/object:Gem::Version
217
217
  version: '0'
218
218
  requirements: []
219
- rubyforge_project:
220
- rubygems_version: 2.6.12
219
+ rubygems_version: 3.0.6
221
220
  signing_key:
222
221
  specification_version: 4
223
222
  summary: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem