relaton-gb 1.18.1 → 1.18.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton_gb/gb_scrapper.rb +14 -10
- data/lib/relaton_gb/scrapper.rb +3 -1
- data/lib/relaton_gb/version.rb +1 -1
- data/lib/relaton_gb.rb +1 -0
- data/relaton_gb.gemspec +1 -0
- metadata +17 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c4e2a76bd47e6b265cce01a1ba065fdc4490735855e9e2bb67a1c96255806a06
|
|
4
|
+
data.tar.gz: 827223b0f3d44cd5df21894f60161aadb6efb149bda9f01748ed8e93bb437eb9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 61885e81838c7f922eafc1847d32db3eed5e82a7c6514858c1f05b298a7a1bc7eee2aafe43bddde644684187e5ef65e279000c0df6c25eb75bba8f97ffae95c5
|
|
7
|
+
data.tar.gz: d04bdc20de49043e9238f357535e2752d7be37dc552f70f7c66272f37af084f6e506bc5d7f13b2abbaeee0928d61652fa63287a669025932c314b38b642b3a6f
|
|
@@ -10,15 +10,15 @@ module RelatonGb
|
|
|
10
10
|
# National standard scrapper.
|
|
11
11
|
module GbScrapper
|
|
12
12
|
extend Scrapper
|
|
13
|
+
SEARCH_URL = "https://openstd.samr.gov.cn/bzgk/gb/std_list"
|
|
14
|
+
DOC_URL = "http://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno="
|
|
13
15
|
|
|
14
16
|
class << self
|
|
15
17
|
# @param text [Strin] code of standard for serarch
|
|
16
18
|
# @return [RelatonGb::HitCollection]
|
|
17
19
|
def scrape_page(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
result = Nokogiri::HTML search_html
|
|
21
|
-
hits = result.xpath(
|
|
20
|
+
doc = agent.get("#{SEARCH_URL}?p.p2=#{CGI.escape(text)}")
|
|
21
|
+
hits = doc.xpath(
|
|
22
22
|
"//table[contains(@class, 'result_list')]/tbody[2]/tr",
|
|
23
23
|
).map do |h|
|
|
24
24
|
ref = h.at "./td[2]/a"
|
|
@@ -27,18 +27,22 @@ module RelatonGb
|
|
|
27
27
|
Hit.new pid: pid, docref: ref.text, scrapper: self, release_date: rdate
|
|
28
28
|
end
|
|
29
29
|
HitCollection.new hits.sort_by(&:release_date).reverse
|
|
30
|
-
rescue
|
|
31
|
-
raise RelatonBib::RequestError,
|
|
30
|
+
rescue Mechanize::Error => e
|
|
31
|
+
raise RelatonBib::RequestError, e.message
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def agent
|
|
35
|
+
@agent ||= Mechanize.new
|
|
32
36
|
end
|
|
33
37
|
|
|
34
38
|
# @param hit [RelatonGb::Hit] standard's page id
|
|
35
39
|
# @return [RelatonGb::GbBibliographicItem]
|
|
36
40
|
def scrape_doc(hit)
|
|
37
|
-
src =
|
|
38
|
-
doc =
|
|
41
|
+
src = DOC_URL + hit.pid
|
|
42
|
+
doc = agent.get src
|
|
39
43
|
GbBibliographicItem.new(**scrapped_data(doc, src, hit))
|
|
40
|
-
rescue
|
|
41
|
-
raise RelatonBib::RequestError,
|
|
44
|
+
rescue Mechanize::Error => e
|
|
45
|
+
raise RelatonBib::RequestError, e.message
|
|
42
46
|
end
|
|
43
47
|
|
|
44
48
|
# @param doc [Nokogiri::HTML]
|
data/lib/relaton_gb/scrapper.rb
CHANGED
|
@@ -99,7 +99,9 @@ module RelatonGb
|
|
|
99
99
|
# @param status [String, NilClass]
|
|
100
100
|
# @return [RelatonBib::DocumentStatus]
|
|
101
101
|
def get_status(doc, status = nil)
|
|
102
|
-
status ||= doc.at("//td[contains(., '标准状态')]/span")&.text
|
|
102
|
+
status ||= doc.at("//td[contains(., '标准状态')]/span")&.text&.strip
|
|
103
|
+
return unless STAGES[status]
|
|
104
|
+
|
|
103
105
|
RelatonBib::DocumentStatus.new stage: STAGES[status]
|
|
104
106
|
end
|
|
105
107
|
|
data/lib/relaton_gb/version.rb
CHANGED
data/lib/relaton_gb.rb
CHANGED
data/relaton_gb.gemspec
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-gb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.18.
|
|
4
|
+
version: 1.18.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-
|
|
11
|
+
date: 2024-06-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: cnccs
|
|
@@ -38,6 +38,20 @@ dependencies:
|
|
|
38
38
|
- - "~>"
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: 0.0.1
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: mechanize
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '2.7'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '2.7'
|
|
41
55
|
- !ruby/object:Gem::Dependency
|
|
42
56
|
name: relaton-iso-bib
|
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -118,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
118
132
|
- !ruby/object:Gem::Version
|
|
119
133
|
version: '0'
|
|
120
134
|
requirements: []
|
|
121
|
-
rubygems_version: 3.3.
|
|
135
|
+
rubygems_version: 3.3.27
|
|
122
136
|
signing_key:
|
|
123
137
|
specification_version: 4
|
|
124
138
|
summary: 'RelatonGb: retrieve Chinese GB Standards for bibliographic use using the
|