relaton-gb 1.16.1 → 1.16.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +49 -15
- data/lib/relaton_gb/gb_bibliographic_item.rb +2 -0
- data/lib/relaton_gb/gb_bibliography.rb +10 -8
- data/lib/relaton_gb/gb_scrapper.rb +3 -3
- data/lib/relaton_gb/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fbe2626219dda70b56d1c186f9220eb521eb124a89d328f7f7743a0ca971e87b
|
4
|
+
data.tar.gz: a57f4fb37c2edd1451d2b29b3896d636648be8f0486dfc0dba385759cd9f73ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c68e56282a8ddf5e0da3bdac5ce9e8a26bb5af07eb227750a0060b5c48a075c991e418f878283557327b8bbc332f9da1926763ad08e0cc57d74ee2c4b4260f21
|
7
|
+
data.tar.gz: e263b6a66f9054ec3872fca035a0a19877acc8bc2655e7b7f400fd7d14ccdac84e14ff9be25d8506d2976bebf5a05ddc7ce719fe6b76017ff221f64aee72c888
|
data/README.adoc
CHANGED
@@ -39,14 +39,26 @@ Or install it yourself as:
|
|
39
39
|
|
40
40
|
== Usage
|
41
41
|
|
42
|
-
===
|
42
|
+
=== Configuration
|
43
|
+
|
44
|
+
Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonGb.configure` block.
|
43
45
|
|
44
46
|
[source,ruby]
|
45
47
|
----
|
46
48
|
require 'relaton_gb'
|
47
49
|
=> true
|
48
50
|
|
51
|
+
RelatonGb.configure do |config|
|
52
|
+
config.logger.level = Logger::DEBUG
|
53
|
+
end
|
54
|
+
----
|
55
|
+
|
56
|
+
=== Search document
|
57
|
+
|
58
|
+
[source,ruby]
|
59
|
+
----
|
49
60
|
hit_collection = RelatonGb::GbBibliography.search "GB/T 20223-2006"
|
61
|
+
[relaton-gb] (GB/T 20223-2006) Fetching from openstd.samr.gov.cn ...
|
50
62
|
=> <RelatonGb::HitCollection:0x007fc8d8a26d10 @ref= @fetched=false>
|
51
63
|
|
52
64
|
hit_collection.first
|
@@ -64,7 +76,7 @@ hit_collection.first.fetch.committee
|
|
64
76
|
=> #<RelatonGb::GbTechnicalCommittee:0x007f975713c450 @name="中国纤维检验局", @type="technical">
|
65
77
|
|
66
78
|
hit_collection.first.fetch.gbtype
|
67
|
-
=> #<RelatonGb::GbStandardType:
|
79
|
+
=> #<RelatonGb::GbStandardType:0x0000000112957dc0 @mandate="recommended", @prefix="GB_national", @scope="national", @topic="other">
|
68
80
|
|
69
81
|
hit_collection.first.fetch.ccs
|
70
82
|
=> [#<Cnccs::Ccs:0x007f975713c090
|
@@ -77,21 +89,43 @@ hit_collection.first.fetch.ccs
|
|
77
89
|
@subgroupcode="32">]
|
78
90
|
|
79
91
|
hit_collection.first.fetch.title
|
80
|
-
=>
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
+
=> #<RelatonBib::TypedTitleStringCollection:0x0000000112a1ac08
|
93
|
+
@array=
|
94
|
+
[#<RelatonBib::TypedTitleString:0x0000000112a1f438
|
95
|
+
@title=#<RelatonBib::FormattedString:0x0000000112cd6050 @content="棉短绒", @format="text/plain", @language=["zh"], @script=["Hans"]>,
|
96
|
+
@type="title-main">,
|
97
|
+
#<RelatonBib::TypedTitleString:0x0000000112a1eb78
|
98
|
+
@title=#<RelatonBib::FormattedString:0x0000000112cd6000 @content="棉短绒", @format="text/plain", @language=["zh"], @script=["Hans"]>,
|
99
|
+
@type="main">,
|
100
|
+
#<RelatonBib::TypedTitleString:0x0000000112a1c198
|
101
|
+
@title=#<RelatonBib::FormattedString:0x0000000112cd5ce0 @content="Cotton linter", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
102
|
+
@type="title-main">,
|
103
|
+
#<RelatonBib::TypedTitleString:0x0000000112a1bba8
|
104
|
+
@title=#<RelatonBib::FormattedString:0x0000000112cd5c40 @content="Cotton linter", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
105
|
+
@type="main">]>
|
92
106
|
|
93
107
|
hit_collection.first.fetch.date
|
94
|
-
=> [#<RelatonBib::BibliographicDate:
|
108
|
+
=> [#<RelatonBib::BibliographicDate:0x000000011307f468 @from=nil, @on="2006-03-10", @to=nil, @type="published">]
|
109
|
+
----
|
110
|
+
|
111
|
+
=== Get document by identifier
|
112
|
+
|
113
|
+
[source,ruby]
|
114
|
+
----
|
115
|
+
RelatonGb::GbBibliography.get "GB/T 20223-2006"
|
116
|
+
[relaton-gb] (GB/T 20223-2006) Fetching from openstd.samr.gov.cn ...
|
117
|
+
[relaton-gb] (GB/T 20223-2006) Found: `GB/T 20223-2006`
|
118
|
+
=> <RelatonGb::GbBibliographicItem:0x000000000011f8>
|
119
|
+
----
|
120
|
+
|
121
|
+
=== Get document by identifier and year
|
122
|
+
|
123
|
+
[source,ruby]
|
124
|
+
----
|
125
|
+
RelatonGb::GbBibliography.get "GB/T 20223", "2006"
|
126
|
+
[relaton-gb] (GB/T 20223-2006) Fetching from openstd.samr.gov.cn ...
|
127
|
+
[relaton-gb] (GB/T 20223-2006) Found: `GB/T 20223-2006`
|
128
|
+
=> <RelatonGb::GbBibliographicItem:0x00000000001248>
|
95
129
|
----
|
96
130
|
|
97
131
|
=== Create bibliographic item from YAML
|
@@ -9,6 +9,8 @@ require "relaton_gb/ccs"
|
|
9
9
|
module RelatonGb
|
10
10
|
# GB bibliographic item class.
|
11
11
|
class GbBibliographicItem < RelatonIsoBib::IsoBibliographicItem
|
12
|
+
DOCTYPES = %w[standard reccomendation].freeze
|
13
|
+
|
12
14
|
# @return [RelatonGb::GbTechnicalCommittee]
|
13
15
|
attr_reader :committee
|
14
16
|
|
@@ -15,10 +15,11 @@ module RelatonGb
|
|
15
15
|
# @return [RelatonGb::HitCollection]
|
16
16
|
def search(text)
|
17
17
|
case text
|
18
|
-
|
18
|
+
when /^(GB|GJ|GS)/
|
19
19
|
# Scrape national standards.
|
20
|
-
#
|
21
|
-
|
20
|
+
Util.warn "(#{text}) Fetching from openstd.samr.gov.cn ..."
|
21
|
+
require "relaton_gb/gb_scrapper"
|
22
|
+
GbScrapper.scrape_page text
|
22
23
|
# when /^ZB/
|
23
24
|
# Scrape proffesional.
|
24
25
|
# when /^DB/
|
@@ -27,6 +28,7 @@ module RelatonGb
|
|
27
28
|
# Enterprise standard
|
28
29
|
when %r{^T/[^\s]{2,6}\s}
|
29
30
|
# Scrape social standard.
|
31
|
+
Util.warn "(#{text}) Fetching from www.ttbz.org.cn ..."
|
30
32
|
require "relaton_gb/t_scrapper"
|
31
33
|
TScrapper.scrape_page text
|
32
34
|
else
|
@@ -64,9 +66,9 @@ module RelatonGb
|
|
64
66
|
private
|
65
67
|
|
66
68
|
def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength
|
67
|
-
id = year ? "#{code}:#{year}" : code
|
68
|
-
Util.warn "WARNING:
|
69
|
-
|
69
|
+
# id = year ? "#{code}:#{year}" : code
|
70
|
+
# Util.warn "WARNING: No match found on the GB website for `#{id}`. " \
|
71
|
+
# "The code must be exactly like it is on the website."
|
70
72
|
unless missed_years.empty?
|
71
73
|
Util.warn "(There was no match for `#{year}`, though there " \
|
72
74
|
"were matches found for `#{missed_years.join('`, `')}`.)"
|
@@ -89,9 +91,10 @@ module RelatonGb
|
|
89
91
|
result = search_filter(searchcode) || return
|
90
92
|
ret = results_filter(result, year)
|
91
93
|
if ret[:ret]
|
92
|
-
Util.warn "(#{
|
94
|
+
Util.warn "(#{searchcode}) Found: `#{ret[:ret].docidentifier.first.id}`"
|
93
95
|
ret[:ret]
|
94
96
|
else
|
97
|
+
Util.warn "(#{searchcode}) Not found."
|
95
98
|
fetch_ref_err(code, year, ret[:years])
|
96
99
|
end
|
97
100
|
end
|
@@ -99,7 +102,6 @@ module RelatonGb
|
|
99
102
|
def search_filter(code)
|
100
103
|
# search filter needs to incorporate year
|
101
104
|
docidrx = %r{^[^\s]+\s[\d.-]+}
|
102
|
-
Util.warn "(#{code}) fetching..."
|
103
105
|
result = search(code)
|
104
106
|
result.select do |hit|
|
105
107
|
hit.docref && hit.docref.match(docidrx).to_s.include?(code)
|
@@ -15,8 +15,8 @@ module RelatonGb
|
|
15
15
|
# @param text [Strin] code of standard for serarch
|
16
16
|
# @return [RelatonGb::HitCollection]
|
17
17
|
def scrape_page(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
18
|
-
host = "
|
19
|
-
search_html = OpenURI.open_uri("#{host}?p.p2=#{text}")
|
18
|
+
host = "https://openstd.samr.gov.cn/bzgk/gb/std_list"
|
19
|
+
search_html = OpenURI.open_uri("#{host}?p.p2=#{text}", ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE)
|
20
20
|
result = Nokogiri::HTML search_html
|
21
21
|
hits = result.xpath(
|
22
22
|
"//table[contains(@class, 'result_list')]/tbody[2]/tr",
|
@@ -35,7 +35,7 @@ module RelatonGb
|
|
35
35
|
# @return [RelatonGb::GbBibliographicItem]
|
36
36
|
def scrape_doc(hit)
|
37
37
|
src = "http://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno=#{hit.pid}"
|
38
|
-
doc = Nokogiri::HTML OpenURI.open_uri(src)
|
38
|
+
doc = Nokogiri::HTML OpenURI.open_uri(src, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE)
|
39
39
|
GbBibliographicItem.new(**scrapped_data(doc, src, hit))
|
40
40
|
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError, Net::OpenTimeout
|
41
41
|
raise RelatonBib::RequestError, "Cannot access #{src}"
|
data/lib/relaton_gb/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-gb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.16.
|
4
|
+
version: 1.16.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cnccs
|