relaton-gb 1.18.1 → 1.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a44c8b9396c30bc3aa644a45da432d5327bff2954af18af1f22fbe416b694b38
4
- data.tar.gz: c9a157c81de458c6d76c8980a1f68cadb222b3c1a1ef1e8e8ae6de4e005cd739
3
+ metadata.gz: c4e2a76bd47e6b265cce01a1ba065fdc4490735855e9e2bb67a1c96255806a06
4
+ data.tar.gz: 827223b0f3d44cd5df21894f60161aadb6efb149bda9f01748ed8e93bb437eb9
5
5
  SHA512:
6
- metadata.gz: 322268f34b944fdccc2b0500fd599a9866e28ee84123b8990d34a032d6321909a509991c75c781dbd91afc50eb9b72e29c3b6c7c0cbf9bbd413ee4e07dbea35f
7
- data.tar.gz: ac2e1c9efc5df285e1f11f792e62b45241f615ed579017d1365738cda504143c53925ff3d4baee89389198554d8ce99e9a6a27d5d50f89e3b36b5dbfac2b15a8
6
+ metadata.gz: 61885e81838c7f922eafc1847d32db3eed5e82a7c6514858c1f05b298a7a1bc7eee2aafe43bddde644684187e5ef65e279000c0df6c25eb75bba8f97ffae95c5
7
+ data.tar.gz: d04bdc20de49043e9238f357535e2752d7be37dc552f70f7c66272f37af084f6e506bc5d7f13b2abbaeee0928d61652fa63287a669025932c314b38b642b3a6f
@@ -10,15 +10,15 @@ module RelatonGb
10
10
  # National standard scrapper.
11
11
  module GbScrapper
12
12
  extend Scrapper
13
+ SEARCH_URL = "https://openstd.samr.gov.cn/bzgk/gb/std_list"
14
+ DOC_URL = "http://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno="
13
15
 
14
16
  class << self
15
17
  # @param text [Strin] code of standard for serarch
16
18
  # @return [RelatonGb::HitCollection]
17
19
  def scrape_page(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
18
- host = "https://openstd.samr.gov.cn/bzgk/gb/std_list"
19
- search_html = OpenURI.open_uri("#{host}?p.p2=#{text}", ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE)
20
- result = Nokogiri::HTML search_html
21
- hits = result.xpath(
20
+ doc = agent.get("#{SEARCH_URL}?p.p2=#{CGI.escape(text)}")
21
+ hits = doc.xpath(
22
22
  "//table[contains(@class, 'result_list')]/tbody[2]/tr",
23
23
  ).map do |h|
24
24
  ref = h.at "./td[2]/a"
@@ -27,18 +27,22 @@ module RelatonGb
27
27
  Hit.new pid: pid, docref: ref.text, scrapper: self, release_date: rdate
28
28
  end
29
29
  HitCollection.new hits.sort_by(&:release_date).reverse
30
- rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError, Net::OpenTimeout
31
- raise RelatonBib::RequestError, "Cannot access #{host}"
30
+ rescue Mechanize::Error => e
31
+ raise RelatonBib::RequestError, e.message
32
+ end
33
+
34
+ def agent
35
+ @agent ||= Mechanize.new
32
36
  end
33
37
 
34
38
  # @param hit [RelatonGb::Hit] standard's page id
35
39
  # @return [RelatonGb::GbBibliographicItem]
36
40
  def scrape_doc(hit)
37
- src = "http://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno=#{hit.pid}"
38
- doc = Nokogiri::HTML OpenURI.open_uri(src, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE)
41
+ src = DOC_URL + hit.pid
42
+ doc = agent.get src
39
43
  GbBibliographicItem.new(**scrapped_data(doc, src, hit))
40
- rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError, Net::OpenTimeout
41
- raise RelatonBib::RequestError, "Cannot access #{src}"
44
+ rescue Mechanize::Error => e
45
+ raise RelatonBib::RequestError, e.message
42
46
  end
43
47
 
44
48
  # @param doc [Nokogiri::HTML]
@@ -99,7 +99,9 @@ module RelatonGb
99
99
  # @param status [String, NilClass]
100
100
  # @return [RelatonBib::DocumentStatus]
101
101
  def get_status(doc, status = nil)
102
- status ||= doc.at("//td[contains(., '标准状态')]/span")&.text
102
+ status ||= doc.at("//td[contains(., '标准状态')]/span")&.text&.strip
103
+ return unless STAGES[status]
104
+
103
105
  RelatonBib::DocumentStatus.new stage: STAGES[status]
104
106
  end
105
107
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonGb
4
- VERSION = "1.18.1"
4
+ VERSION = "1.18.2"
5
5
  end
data/lib/relaton_gb.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "digest/md5"
2
+ require "mechanize"
2
3
  require "relaton_iso_bib"
3
4
  require "cnccs"
4
5
  require "relaton_gb/version"
data/relaton_gb.gemspec CHANGED
@@ -26,5 +26,6 @@ Gem::Specification.new do |spec|
26
26
 
27
27
  spec.add_dependency "cnccs", "~> 0.1.1"
28
28
  spec.add_dependency "gb-agencies", "~> 0.0.1"
29
+ spec.add_dependency "mechanize", "~> 2.7"
29
30
  spec.add_dependency "relaton-iso-bib", "~> 1.18.0"
30
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-gb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.18.1
4
+ version: 1.18.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-16 00:00:00.000000000 Z
11
+ date: 2024-06-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cnccs
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.0.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: mechanize
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.7'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.7'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: relaton-iso-bib
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -118,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
132
  - !ruby/object:Gem::Version
119
133
  version: '0'
120
134
  requirements: []
121
- rubygems_version: 3.3.26
135
+ rubygems_version: 3.3.27
122
136
  signing_key:
123
137
  specification_version: 4
124
138
  summary: 'RelatonGb: retrieve Chinese GB Standards for bibliographic use using the