relaton-gb 1.18.1 → 1.18.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/relaton_gb/gb_scrapper.rb +14 -10
- data/lib/relaton_gb/scrapper.rb +3 -1
- data/lib/relaton_gb/version.rb +1 -1
- data/lib/relaton_gb.rb +1 -0
- data/relaton_gb.gemspec +1 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c4e2a76bd47e6b265cce01a1ba065fdc4490735855e9e2bb67a1c96255806a06
|
4
|
+
data.tar.gz: 827223b0f3d44cd5df21894f60161aadb6efb149bda9f01748ed8e93bb437eb9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61885e81838c7f922eafc1847d32db3eed5e82a7c6514858c1f05b298a7a1bc7eee2aafe43bddde644684187e5ef65e279000c0df6c25eb75bba8f97ffae95c5
|
7
|
+
data.tar.gz: d04bdc20de49043e9238f357535e2752d7be37dc552f70f7c66272f37af084f6e506bc5d7f13b2abbaeee0928d61652fa63287a669025932c314b38b642b3a6f
|
@@ -10,15 +10,15 @@ module RelatonGb
|
|
10
10
|
# National standard scrapper.
|
11
11
|
module GbScrapper
|
12
12
|
extend Scrapper
|
13
|
+
SEARCH_URL = "https://openstd.samr.gov.cn/bzgk/gb/std_list"
|
14
|
+
DOC_URL = "http://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno="
|
13
15
|
|
14
16
|
class << self
|
15
17
|
# @param text [Strin] code of standard for serarch
|
16
18
|
# @return [RelatonGb::HitCollection]
|
17
19
|
def scrape_page(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
18
|
-
|
19
|
-
|
20
|
-
result = Nokogiri::HTML search_html
|
21
|
-
hits = result.xpath(
|
20
|
+
doc = agent.get("#{SEARCH_URL}?p.p2=#{CGI.escape(text)}")
|
21
|
+
hits = doc.xpath(
|
22
22
|
"//table[contains(@class, 'result_list')]/tbody[2]/tr",
|
23
23
|
).map do |h|
|
24
24
|
ref = h.at "./td[2]/a"
|
@@ -27,18 +27,22 @@ module RelatonGb
|
|
27
27
|
Hit.new pid: pid, docref: ref.text, scrapper: self, release_date: rdate
|
28
28
|
end
|
29
29
|
HitCollection.new hits.sort_by(&:release_date).reverse
|
30
|
-
rescue
|
31
|
-
raise RelatonBib::RequestError,
|
30
|
+
rescue Mechanize::Error => e
|
31
|
+
raise RelatonBib::RequestError, e.message
|
32
|
+
end
|
33
|
+
|
34
|
+
def agent
|
35
|
+
@agent ||= Mechanize.new
|
32
36
|
end
|
33
37
|
|
34
38
|
# @param hit [RelatonGb::Hit] standard's page id
|
35
39
|
# @return [RelatonGb::GbBibliographicItem]
|
36
40
|
def scrape_doc(hit)
|
37
|
-
src =
|
38
|
-
doc =
|
41
|
+
src = DOC_URL + hit.pid
|
42
|
+
doc = agent.get src
|
39
43
|
GbBibliographicItem.new(**scrapped_data(doc, src, hit))
|
40
|
-
rescue
|
41
|
-
raise RelatonBib::RequestError,
|
44
|
+
rescue Mechanize::Error => e
|
45
|
+
raise RelatonBib::RequestError, e.message
|
42
46
|
end
|
43
47
|
|
44
48
|
# @param doc [Nokogiri::HTML]
|
data/lib/relaton_gb/scrapper.rb
CHANGED
@@ -99,7 +99,9 @@ module RelatonGb
|
|
99
99
|
# @param status [String, NilClass]
|
100
100
|
# @return [RelatonBib::DocumentStatus]
|
101
101
|
def get_status(doc, status = nil)
|
102
|
-
status ||= doc.at("//td[contains(., '标准状态')]/span")&.text
|
102
|
+
status ||= doc.at("//td[contains(., '标准状态')]/span")&.text&.strip
|
103
|
+
return unless STAGES[status]
|
104
|
+
|
103
105
|
RelatonBib::DocumentStatus.new stage: STAGES[status]
|
104
106
|
end
|
105
107
|
|
data/lib/relaton_gb/version.rb
CHANGED
data/lib/relaton_gb.rb
CHANGED
data/relaton_gb.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-gb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.18.
|
4
|
+
version: 1.18.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cnccs
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 0.0.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: mechanize
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.7'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.7'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: relaton-iso-bib
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -118,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
118
132
|
- !ruby/object:Gem::Version
|
119
133
|
version: '0'
|
120
134
|
requirements: []
|
121
|
-
rubygems_version: 3.3.
|
135
|
+
rubygems_version: 3.3.27
|
122
136
|
signing_key:
|
123
137
|
specification_version: 4
|
124
138
|
summary: 'RelatonGb: retrieve Chinese GB Standards for bibliographic use using the
|