relaton-itu 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: fb722b2271d57d169b7f89c4ff87b0e16b3a9a25
4
- data.tar.gz: 2755f77b6f20cb3f1f688ec1894132814f9401f0
2
+ SHA256:
3
+ metadata.gz: 4ca4655d0b046848fd1ddd492e4e0d3e7ae53dd28bf491bc4cc049d2eccb5810
4
+ data.tar.gz: 40c7fbd808c557a460fbfed3811c4b87d51ed2dfb937a38a77b07c7c323ae554
5
5
  SHA512:
6
- metadata.gz: 8df40643d1c03fbe71338a898fa19aa0b398ff32cfccd4c0065e864c5dc5e60fefb521fe16c319d1c173711a83850236d86e7ada4b1fbf654be32a994b598995
7
- data.tar.gz: 8a41125691555e4b69775e302717f0b8f95fe675e465757b25bfc9f3ec7d7949fdd589e78252045658c21d74db812b883f25743d24ca64e911f1acd1c445ffda
6
+ metadata.gz: '08234d67ae2e8ec6e5461e80446e183c3482390a850b33e114fac0328b609409eb137eeb58cbf216e5025f4a4d97ea3fcad6d4e4cca8550400ac92ed916fee38'
7
+ data.tar.gz: 2c24f8c0b1fe4f4c25b94c128c51c939dc4980b9c5e38d0b48575c8705abb33d99b3b77ca1bc5222903048ab4b8f3ee6dfdd75c6fa0cdd6e25cf110ed7c4be0c
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- relaton-itu (0.3.5)
4
+ relaton-itu (0.3.6)
5
5
  relaton-iso-bib (~> 0.3.0)
6
6
 
7
7
  GEM
@@ -35,10 +35,10 @@ GEM
35
35
  pry (~> 0.10)
36
36
  public_suffix (4.0.1)
37
37
  rake (10.5.0)
38
- relaton-bib (0.3.9)
38
+ relaton-bib (0.3.11)
39
39
  addressable
40
40
  nokogiri
41
- relaton-iso-bib (0.3.9)
41
+ relaton-iso-bib (0.3.11)
42
42
  isoics (~> 0.1.6)
43
43
  relaton-bib (~> 0.3.0)
44
44
  ruby_deep_clone (~> 0.8.0)
@@ -48,18 +48,18 @@ module RelatonItu
48
48
  # @return [Hash]
49
49
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
50
50
  def parse_page(hit_data)
51
- doc = get_page hit_data[:url]
51
+ url, doc = get_page hit_data[:url]
52
52
 
53
53
  # Fetch edition.
54
54
  edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
55
55
 
56
56
  ItuBibliographicItem.new(
57
57
  fetched: Date.today.to_s,
58
- docid: fetch_docid(hit_data[:code]),
58
+ docid: fetch_docid(doc),
59
59
  edition: edition,
60
60
  language: ["en"],
61
61
  script: ["Latn"],
62
- title: fetch_titles(hit_data),
62
+ title: fetch_titles(doc),
63
63
  doctype: hit_data[:type],
64
64
  docstatus: fetch_status(doc),
65
65
  ics: [], # fetch_ics(doc),
@@ -68,7 +68,7 @@ module RelatonItu
68
68
  editorialgroup: fetch_workgroup(doc),
69
69
  abstract: fetch_abstract(doc),
70
70
  copyright: fetch_copyright(hit_data[:code], doc),
71
- link: fetch_link(doc, hit_data[:url]),
71
+ link: fetch_link(doc, url),
72
72
  relation: fetch_relations(doc),
73
73
  )
74
74
  end
@@ -119,7 +119,7 @@ module RelatonItu
119
119
  uri = URI resp["location"]
120
120
  resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
121
121
  end
122
- Nokogiri::HTML(resp.body)
122
+ [uri.to_s, Nokogiri::HTML(resp.body)]
123
123
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
124
124
  Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
125
125
  OpenSSL::SSL::SSLError
@@ -130,12 +130,15 @@ module RelatonItu
130
130
  # Fetch docid.
131
131
  # @param doc [Nokogiri::HTML::Document]
132
132
  # @return [Hash]
133
- def fetch_docid(code)
134
- # m = code.match(/(?<=\s)(?<project>[^\s]+)-?(?<part>(?<=-)\d+|)-?(?<subpart>(?<=-)\d+|)/)
135
- # project_number: m[:project],
136
- # part_number: m[:part],
137
- # subpart_number: m[:subpart],
138
- [RelatonBib::DocumentIdentifier.new(type: "ITU", id: code)]
133
+ def fetch_docid(doc)
134
+ doc.xpath(
135
+ "//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
136
+ "//td[.='Identical standard:']/following-sibling::td",
137
+ ).map do |code|
138
+ id = code.text.match(%r{^.*?(?= \()}).to_s.squeeze(" ")
139
+ type = id.match(%r{^\w+}).to_s
140
+ RelatonBib::DocumentIdentifier.new(type: type, id: id)
141
+ end
139
142
  end
140
143
 
141
144
  # Fetch status.
@@ -186,16 +189,12 @@ module RelatonItu
186
189
  # @return [Array<Hash>]
187
190
  def fetch_relations(doc)
188
191
  doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
189
- r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
190
- type = case r_type
191
- when "in force" then "published"
192
- else r_type
193
- end
192
+ # r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
194
193
  ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
195
194
  # url = DOMAIN + ref[:href].sub(/^\./, "/ITU-T/recommendations")
196
195
  fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
197
196
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(formattedref: fref)
198
- { type: type, bibitem: bibitem }
197
+ { type: "complements", bibitem: bibitem }
199
198
  end
200
199
  end
201
200
  # rubocop:enable Metrics/MethodLength
@@ -208,10 +207,14 @@ module RelatonItu
208
207
  # end
209
208
 
210
209
  # Fetch titles.
211
- # @param hit_data [Hash]
210
+ # @param doc [Nokogiri::HTML::Document]
212
211
  # @return [Array<Hash>]
213
- def fetch_titles(hit_data)
214
- titles = hit_data[:title].split " - "
212
+ def fetch_titles(doc)
213
+ # t = hit_data[:title].match(%r{(?<=\(\d{2}\/\d{4}\): ).*}).to_s
214
+ # t = hit_data[:title] if t.empty?
215
+ t = doc.at("//td[@class='title']")
216
+ return [] unless t
217
+ titles = t.text.split " - "
215
218
  case titles.size
216
219
  when 0
217
220
  intro, main, part = nil, "", nil
@@ -298,8 +301,8 @@ module RelatonItu
298
301
  # @return [Array<Hash>]
299
302
  def fetch_link(doc, url)
300
303
  links = [{ type: "src", content: url }]
301
- obp_elms = doc.at('//table/tr/td/span[contains(@id, "Label4")]/a')
302
- links << { type: "obp", content: DOMAIN + obp_elms[:href] } if obp_elms
304
+ obp_elms = doc.at('//a[@title="Persistent link to download the PDF file"]')
305
+ links << { type: "obp", content: DOMAIN + obp_elms[:href].strip } if obp_elms
303
306
  links
304
307
  end
305
308
 
@@ -1,3 +1,3 @@
1
1
  module RelatonItu
2
- VERSION = "0.3.5".freeze
2
+ VERSION = "0.3.6".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-itu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-14 00:00:00.000000000 Z
11
+ date: 2019-10-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -216,8 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
216
216
  - !ruby/object:Gem::Version
217
217
  version: '0'
218
218
  requirements: []
219
- rubyforge_project:
220
- rubygems_version: 2.6.12
219
+ rubygems_version: 3.0.6
221
220
  signing_key:
222
221
  specification_version: 4
223
222
  summary: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem