relaton-gb 1.18.1 → 1.18.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a44c8b9396c30bc3aa644a45da432d5327bff2954af18af1f22fbe416b694b38
4
- data.tar.gz: c9a157c81de458c6d76c8980a1f68cadb222b3c1a1ef1e8e8ae6de4e005cd739
3
+ metadata.gz: c4e2a76bd47e6b265cce01a1ba065fdc4490735855e9e2bb67a1c96255806a06
4
+ data.tar.gz: 827223b0f3d44cd5df21894f60161aadb6efb149bda9f01748ed8e93bb437eb9
5
5
  SHA512:
6
- metadata.gz: 322268f34b944fdccc2b0500fd599a9866e28ee84123b8990d34a032d6321909a509991c75c781dbd91afc50eb9b72e29c3b6c7c0cbf9bbd413ee4e07dbea35f
7
- data.tar.gz: ac2e1c9efc5df285e1f11f792e62b45241f615ed579017d1365738cda504143c53925ff3d4baee89389198554d8ce99e9a6a27d5d50f89e3b36b5dbfac2b15a8
6
+ metadata.gz: 61885e81838c7f922eafc1847d32db3eed5e82a7c6514858c1f05b298a7a1bc7eee2aafe43bddde644684187e5ef65e279000c0df6c25eb75bba8f97ffae95c5
7
+ data.tar.gz: d04bdc20de49043e9238f357535e2752d7be37dc552f70f7c66272f37af084f6e506bc5d7f13b2abbaeee0928d61652fa63287a669025932c314b38b642b3a6f
@@ -10,15 +10,15 @@ module RelatonGb
10
10
  # National standard scrapper.
11
11
  module GbScrapper
12
12
  extend Scrapper
13
+ SEARCH_URL = "https://openstd.samr.gov.cn/bzgk/gb/std_list"
14
+ DOC_URL = "http://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno="
13
15
 
14
16
  class << self
15
17
  # @param text [Strin] code of standard for serarch
16
18
  # @return [RelatonGb::HitCollection]
17
19
  def scrape_page(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
18
- host = "https://openstd.samr.gov.cn/bzgk/gb/std_list"
19
- search_html = OpenURI.open_uri("#{host}?p.p2=#{text}", ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE)
20
- result = Nokogiri::HTML search_html
21
- hits = result.xpath(
20
+ doc = agent.get("#{SEARCH_URL}?p.p2=#{CGI.escape(text)}")
21
+ hits = doc.xpath(
22
22
  "//table[contains(@class, 'result_list')]/tbody[2]/tr",
23
23
  ).map do |h|
24
24
  ref = h.at "./td[2]/a"
@@ -27,18 +27,22 @@ module RelatonGb
27
27
  Hit.new pid: pid, docref: ref.text, scrapper: self, release_date: rdate
28
28
  end
29
29
  HitCollection.new hits.sort_by(&:release_date).reverse
30
- rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError, Net::OpenTimeout
31
- raise RelatonBib::RequestError, "Cannot access #{host}"
30
+ rescue Mechanize::Error => e
31
+ raise RelatonBib::RequestError, e.message
32
+ end
33
+
34
+ def agent
35
+ @agent ||= Mechanize.new
32
36
  end
33
37
 
34
38
  # @param hit [RelatonGb::Hit] standard's page id
35
39
  # @return [RelatonGb::GbBibliographicItem]
36
40
  def scrape_doc(hit)
37
- src = "http://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno=#{hit.pid}"
38
- doc = Nokogiri::HTML OpenURI.open_uri(src, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE)
41
+ src = DOC_URL + hit.pid
42
+ doc = agent.get src
39
43
  GbBibliographicItem.new(**scrapped_data(doc, src, hit))
40
- rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError, Net::OpenTimeout
41
- raise RelatonBib::RequestError, "Cannot access #{src}"
44
+ rescue Mechanize::Error => e
45
+ raise RelatonBib::RequestError, e.message
42
46
  end
43
47
 
44
48
  # @param doc [Nokogiri::HTML]
@@ -99,7 +99,9 @@ module RelatonGb
99
99
  # @param status [String, NilClass]
100
100
  # @return [RelatonBib::DocumentStatus]
101
101
  def get_status(doc, status = nil)
102
- status ||= doc.at("//td[contains(., '标准状态')]/span")&.text
102
+ status ||= doc.at("//td[contains(., '标准状态')]/span")&.text&.strip
103
+ return unless STAGES[status]
104
+
103
105
  RelatonBib::DocumentStatus.new stage: STAGES[status]
104
106
  end
105
107
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonGb
4
- VERSION = "1.18.1"
4
+ VERSION = "1.18.2"
5
5
  end
data/lib/relaton_gb.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "digest/md5"
2
+ require "mechanize"
2
3
  require "relaton_iso_bib"
3
4
  require "cnccs"
4
5
  require "relaton_gb/version"
data/relaton_gb.gemspec CHANGED
@@ -26,5 +26,6 @@ Gem::Specification.new do |spec|
26
26
 
27
27
  spec.add_dependency "cnccs", "~> 0.1.1"
28
28
  spec.add_dependency "gb-agencies", "~> 0.0.1"
29
+ spec.add_dependency "mechanize", "~> 2.7"
29
30
  spec.add_dependency "relaton-iso-bib", "~> 1.18.0"
30
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-gb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.18.1
4
+ version: 1.18.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-16 00:00:00.000000000 Z
11
+ date: 2024-06-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cnccs
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.0.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: mechanize
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.7'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.7'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: relaton-iso-bib
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -118,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
132
  - !ruby/object:Gem::Version
119
133
  version: '0'
120
134
  requirements: []
121
- rubygems_version: 3.3.26
135
+ rubygems_version: 3.3.27
122
136
  signing_key:
123
137
  specification_version: 4
124
138
  summary: 'RelatonGb: retrieve Chinese GB Standards for bibliographic use using the