relaton-gb 1.8.0 → 1.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.rubocop.yml +3 -1
- data/README.adoc +10 -0
- data/grammars/basicdoc.rng +26 -7
- data/grammars/biblio.rng +8 -5
- data/grammars/gbstandard.rng +4 -1
- data/grammars/isodoc.rng +686 -96
- data/grammars/isostandard.rng +42 -26
- data/grammars/reqt.rng +34 -5
- data/lib/relaton_gb/gb_bibliography.rb +7 -6
- data/lib/relaton_gb/gb_scrapper.rb +8 -9
- data/lib/relaton_gb/scrapper.rb +1 -1
- data/lib/relaton_gb/sec_scrapper.rb +3 -3
- data/lib/relaton_gb/t_scrapper.rb +2 -2
- data/lib/relaton_gb/version.rb +1 -1
- data/relaton_gb.gemspec +3 -5
- metadata +9 -9
data/grammars/isostandard.rng
CHANGED
@@ -38,6 +38,9 @@
|
|
38
38
|
</define>
|
39
39
|
<define name="BibDataExtensionType">
|
40
40
|
<ref name="doctype"/>
|
41
|
+
<optional>
|
42
|
+
<ref name="docsubtype"/>
|
43
|
+
</optional>
|
41
44
|
<optional>
|
42
45
|
<ref name="horizontal"/>
|
43
46
|
</optional>
|
@@ -98,7 +101,11 @@
|
|
98
101
|
<ref name="definitions"/>
|
99
102
|
</optional>
|
100
103
|
<oneOrMore>
|
101
|
-
<
|
104
|
+
<choice>
|
105
|
+
<ref name="clause"/>
|
106
|
+
<ref name="term-clause"/>
|
107
|
+
<ref name="terms"/>
|
108
|
+
</choice>
|
102
109
|
</oneOrMore>
|
103
110
|
</element>
|
104
111
|
</define>
|
@@ -133,22 +140,17 @@
|
|
133
140
|
<optional>
|
134
141
|
<ref name="section-title"/>
|
135
142
|
</optional>
|
136
|
-
<
|
143
|
+
<choice>
|
137
144
|
<choice>
|
138
|
-
<
|
139
|
-
<
|
140
|
-
|
141
|
-
</oneOrMore>
|
142
|
-
<zeroOrMore>
|
143
|
-
<ref name="note"/>
|
144
|
-
</zeroOrMore>
|
145
|
-
</group>
|
145
|
+
<oneOrMore>
|
146
|
+
<ref name="BasicBlock"/>
|
147
|
+
</oneOrMore>
|
146
148
|
<ref name="amend"/>
|
147
149
|
</choice>
|
148
150
|
<oneOrMore>
|
149
151
|
<ref name="clause-subsection"/>
|
150
152
|
</oneOrMore>
|
151
|
-
</
|
153
|
+
</choice>
|
152
154
|
</define>
|
153
155
|
<define name="term">
|
154
156
|
<element name="term">
|
@@ -157,6 +159,20 @@
|
|
157
159
|
<data type="ID"/>
|
158
160
|
</attribute>
|
159
161
|
</optional>
|
162
|
+
<optional>
|
163
|
+
<attribute name="language"/>
|
164
|
+
</optional>
|
165
|
+
<optional>
|
166
|
+
<attribute name="script"/>
|
167
|
+
</optional>
|
168
|
+
<optional>
|
169
|
+
<attribute name="tag"/>
|
170
|
+
</optional>
|
171
|
+
<optional>
|
172
|
+
<attribute name="multilingual-rendering">
|
173
|
+
<ref name="MultilingualRenderingType"/>
|
174
|
+
</attribute>
|
175
|
+
</optional>
|
160
176
|
<ref name="preferred"/>
|
161
177
|
<zeroOrMore>
|
162
178
|
<ref name="admitted"/>
|
@@ -167,7 +183,7 @@
|
|
167
183
|
<optional>
|
168
184
|
<ref name="termdomain"/>
|
169
185
|
</optional>
|
170
|
-
<ref name="
|
186
|
+
<ref name="termdefinition"/>
|
171
187
|
<zeroOrMore>
|
172
188
|
<ref name="termnote"/>
|
173
189
|
</zeroOrMore>
|
@@ -177,6 +193,9 @@
|
|
177
193
|
<zeroOrMore>
|
178
194
|
<ref name="termsource"/>
|
179
195
|
</zeroOrMore>
|
196
|
+
<zeroOrMore>
|
197
|
+
<ref name="term"/>
|
198
|
+
</zeroOrMore>
|
180
199
|
</element>
|
181
200
|
</define>
|
182
201
|
<define name="annex">
|
@@ -215,9 +234,6 @@
|
|
215
234
|
-->
|
216
235
|
<ref name="BasicBlock"/>
|
217
236
|
</zeroOrMore>
|
218
|
-
<zeroOrMore>
|
219
|
-
<ref name="note"/>
|
220
|
-
</zeroOrMore>
|
221
237
|
<zeroOrMore>
|
222
238
|
<ref name="clause-hanging-paragraph-with-footnote"/>
|
223
239
|
</zeroOrMore>
|
@@ -259,6 +275,14 @@
|
|
259
275
|
<value>directive</value>
|
260
276
|
</choice>
|
261
277
|
</define>
|
278
|
+
<define name="DocumentSubtype">
|
279
|
+
<choice>
|
280
|
+
<value>specification</value>
|
281
|
+
<value>method-of-test</value>
|
282
|
+
<value>vocabulary</value>
|
283
|
+
<value>code-of-practice</value>
|
284
|
+
</choice>
|
285
|
+
</define>
|
262
286
|
<define name="structuredidentifier">
|
263
287
|
<element name="structuredidentifier">
|
264
288
|
<optional>
|
@@ -333,14 +357,9 @@
|
|
333
357
|
<ref name="section-title"/>
|
334
358
|
</optional>
|
335
359
|
<choice>
|
336
|
-
<
|
337
|
-
<
|
338
|
-
|
339
|
-
</zeroOrMore>
|
340
|
-
<zeroOrMore>
|
341
|
-
<ref name="note"/>
|
342
|
-
</zeroOrMore>
|
343
|
-
</group>
|
360
|
+
<zeroOrMore>
|
361
|
+
<ref name="BasicBlock"/>
|
362
|
+
</zeroOrMore>
|
344
363
|
<oneOrMore>
|
345
364
|
<ref name="content-subsection"/>
|
346
365
|
</oneOrMore>
|
@@ -457,9 +476,6 @@
|
|
457
476
|
<!-- allow hanging paragraphs in annexes: they introduce lists -->
|
458
477
|
<ref name="BasicBlock"/>
|
459
478
|
</zeroOrMore>
|
460
|
-
<zeroOrMore>
|
461
|
-
<ref name="note"/>
|
462
|
-
</zeroOrMore>
|
463
479
|
<zeroOrMore>
|
464
480
|
<ref name="clause-hanging-paragraph-with-footnote"/>
|
465
481
|
</zeroOrMore>
|
data/grammars/reqt.rng
CHANGED
@@ -58,15 +58,23 @@
|
|
58
58
|
<optional>
|
59
59
|
<attribute name="type"/>
|
60
60
|
</optional>
|
61
|
+
<optional>
|
62
|
+
<attribute name="tag"/>
|
63
|
+
</optional>
|
64
|
+
<optional>
|
65
|
+
<attribute name="multilingual-rendering">
|
66
|
+
<ref name="MultilingualRenderingType"/>
|
67
|
+
</attribute>
|
68
|
+
</optional>
|
61
69
|
<optional>
|
62
70
|
<ref name="reqtitle"/>
|
63
71
|
</optional>
|
64
72
|
<optional>
|
65
73
|
<ref name="label"/>
|
66
74
|
</optional>
|
67
|
-
<
|
75
|
+
<zeroOrMore>
|
68
76
|
<ref name="subject"/>
|
69
|
-
</
|
77
|
+
</zeroOrMore>
|
70
78
|
<zeroOrMore>
|
71
79
|
<ref name="reqinherit"/>
|
72
80
|
</zeroOrMore>
|
@@ -80,6 +88,7 @@
|
|
80
88
|
<ref name="verification"/>
|
81
89
|
<ref name="import"/>
|
82
90
|
<ref name="description"/>
|
91
|
+
<ref name="component"/>
|
83
92
|
</choice>
|
84
93
|
</zeroOrMore>
|
85
94
|
<optional>
|
@@ -100,17 +109,23 @@
|
|
100
109
|
</define>
|
101
110
|
<define name="label">
|
102
111
|
<element name="label">
|
103
|
-
<
|
112
|
+
<oneOrMore>
|
113
|
+
<ref name="TextElement"/>
|
114
|
+
</oneOrMore>
|
104
115
|
</element>
|
105
116
|
</define>
|
106
117
|
<define name="subject">
|
107
118
|
<element name="subject">
|
108
|
-
<
|
119
|
+
<oneOrMore>
|
120
|
+
<ref name="TextElement"/>
|
121
|
+
</oneOrMore>
|
109
122
|
</element>
|
110
123
|
</define>
|
111
124
|
<define name="reqinherit">
|
112
125
|
<element name="inherit">
|
113
|
-
<
|
126
|
+
<oneOrMore>
|
127
|
+
<ref name="TextElement"/>
|
128
|
+
</oneOrMore>
|
114
129
|
</element>
|
115
130
|
</define>
|
116
131
|
<define name="measurementtarget">
|
@@ -138,6 +153,12 @@
|
|
138
153
|
<ref name="RequirementSubpart"/>
|
139
154
|
</element>
|
140
155
|
</define>
|
156
|
+
<define name="component">
|
157
|
+
<element name="component">
|
158
|
+
<attribute name="class"/>
|
159
|
+
<ref name="RequirementSubpart"/>
|
160
|
+
</element>
|
161
|
+
</define>
|
141
162
|
<define name="reqt_references">
|
142
163
|
<element name="references">
|
143
164
|
<oneOrMore>
|
@@ -164,6 +185,14 @@
|
|
164
185
|
<data type="boolean"/>
|
165
186
|
</attribute>
|
166
187
|
</optional>
|
188
|
+
<optional>
|
189
|
+
<attribute name="tag"/>
|
190
|
+
</optional>
|
191
|
+
<optional>
|
192
|
+
<attribute name="multilingual-rendering">
|
193
|
+
<ref name="MultilingualRenderingType"/>
|
194
|
+
</attribute>
|
195
|
+
</optional>
|
167
196
|
<oneOrMore>
|
168
197
|
<ref name="BasicBlock"/>
|
169
198
|
</oneOrMore>
|
@@ -14,17 +14,18 @@ module RelatonGb
|
|
14
14
|
# @param text [Strin] code of standard for search
|
15
15
|
# @return [RelatonGb::HitCollection]
|
16
16
|
def search(text)
|
17
|
-
|
17
|
+
case text
|
18
|
+
when /^(GB|GJ|GS)/
|
18
19
|
# Scrape national standards.
|
19
20
|
require "relaton_gb/gb_scrapper"
|
20
21
|
GbScrapper.scrape_page text
|
21
|
-
|
22
|
+
when /^ZB/
|
22
23
|
# Scrape proffesional.
|
23
|
-
|
24
|
+
when /^DB/
|
24
25
|
# Scrape local standard.
|
25
|
-
|
26
|
+
when %r{^Q/}
|
26
27
|
# Enterprise standard
|
27
|
-
|
28
|
+
when %r{^T/[^\s]{3,6}\s}
|
28
29
|
# Scrape social standard.
|
29
30
|
require "relaton_gb/t_scrapper"
|
30
31
|
TScrapper.scrape_page text
|
@@ -96,7 +97,7 @@ module RelatonGb
|
|
96
97
|
|
97
98
|
def search_filter(code)
|
98
99
|
# search filter needs to incorporate year
|
99
|
-
docidrx = %r{^[^\s]+\s[\d
|
100
|
+
docidrx = %r{^[^\s]+\s[\d.-]+}
|
100
101
|
warn "[relaton-gb] (\"#{code}\") fetching..."
|
101
102
|
result = search(code)
|
102
103
|
result.select do |hit|
|
@@ -14,10 +14,9 @@ module RelatonGb
|
|
14
14
|
class << self
|
15
15
|
# @param text [Strin] code of standard for serarch
|
16
16
|
# @return [RelatonGb::HitCollection]
|
17
|
-
def scrape_page(text)
|
18
|
-
|
19
|
-
|
20
|
-
)
|
17
|
+
def scrape_page(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
18
|
+
host = "http://openstd.samr.gov.cn/bzgk/gb/std_list"
|
19
|
+
search_html = OpenURI.open_uri("#{host}?p.p2=#{text}")
|
21
20
|
result = Nokogiri::HTML search_html
|
22
21
|
hits = result.xpath(
|
23
22
|
"//table[contains(@class, 'result_list')]/tbody[2]/tr",
|
@@ -28,17 +27,17 @@ module RelatonGb
|
|
28
27
|
Hit.new pid: pid, docref: ref.text, scrapper: self, release_date: rdate
|
29
28
|
end
|
30
29
|
HitCollection.new hits.sort_by(&:release_date).reverse
|
31
|
-
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError
|
32
|
-
raise RelatonBib::RequestError, "Cannot access
|
30
|
+
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError, Net::OpenTimeout
|
31
|
+
raise RelatonBib::RequestError, "Cannot access #{host}"
|
33
32
|
end
|
34
33
|
|
35
34
|
# @param hit [RelatonGb::Hit] standard's page id
|
36
35
|
# @return [RelatonGb::GbBibliographicItem]
|
37
36
|
def scrape_doc(hit)
|
38
|
-
src = "http://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno
|
37
|
+
src = "http://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno=#{hit.pid}"
|
39
38
|
doc = Nokogiri::HTML OpenURI.open_uri(src)
|
40
|
-
GbBibliographicItem.new
|
41
|
-
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError
|
39
|
+
GbBibliographicItem.new(**scrapped_data(doc, src, hit))
|
40
|
+
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError, Net::OpenTimeout
|
42
41
|
raise RelatonBib::RequestError, "Cannot access #{src}"
|
43
42
|
end
|
44
43
|
|
data/lib/relaton_gb/scrapper.rb
CHANGED
@@ -43,7 +43,7 @@ module RelatonGb
|
|
43
43
|
# @param docref [String]
|
44
44
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
45
45
|
def get_docid(docref)
|
46
|
-
[RelatonBib::DocumentIdentifier.new(id: docref, type: "Chinese Standard")]
|
46
|
+
[RelatonBib::DocumentIdentifier.new(id: docref, type: "Chinese Standard", primary: true)]
|
47
47
|
end
|
48
48
|
|
49
49
|
# @param docref [String]
|
@@ -32,7 +32,7 @@ module RelatonGb
|
|
32
32
|
HitCollection.new hits
|
33
33
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
34
34
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
35
|
-
OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
|
35
|
+
OpenSSL::SSL::SSLError, Errno::ETIMEDOUT, Net::OpenTimeout
|
36
36
|
raise RelatonBib::RequestError, "Cannot access #{uri}"
|
37
37
|
end
|
38
38
|
|
@@ -42,10 +42,10 @@ module RelatonGb
|
|
42
42
|
src = "http://hbba.sacinfo.org.cn/stdDetail/#{hit.pid}"
|
43
43
|
page_uri = URI src
|
44
44
|
doc = Nokogiri::HTML Net::HTTP.get(page_uri)
|
45
|
-
GbBibliographicItem.new
|
45
|
+
GbBibliographicItem.new(**scrapped_data(doc, src, hit))
|
46
46
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
47
47
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
48
|
-
OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
|
48
|
+
OpenSSL::SSL::SSLError, Errno::ETIMEDOUT, Net::OpenTimeout
|
49
49
|
raise RelatonBib::RequestError, "Cannot access #{src}"
|
50
50
|
end
|
51
51
|
|
@@ -33,7 +33,7 @@ module RelatonGb
|
|
33
33
|
Hit.new pid: pid, docref: docref, status: status, scrapper: self
|
34
34
|
end
|
35
35
|
HitCollection.new hits
|
36
|
-
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError
|
36
|
+
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError, Net::OpenTimeout
|
37
37
|
raise RelatonBib::RequestError, "Cannot access http://www.ttbz.org.cn/Home/Standard"
|
38
38
|
end
|
39
39
|
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
@@ -44,7 +44,7 @@ module RelatonGb
|
|
44
44
|
src = "http://www.ttbz.org.cn#{hit.pid}"
|
45
45
|
doc = Nokogiri::HTML OpenURI.open_uri(src), nil, Encoding::UTF_8.to_s
|
46
46
|
GbBibliographicItem.new **scrapped_data(doc, src, hit)
|
47
|
-
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError
|
47
|
+
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError, Net::OpenTimeout
|
48
48
|
raise RelatonBib::RequestError, "Cannot access #{src}"
|
49
49
|
end
|
50
50
|
|
data/lib/relaton_gb/version.rb
CHANGED
data/relaton_gb.gemspec
CHANGED
@@ -22,14 +22,12 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.bindir = "exe"
|
23
23
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
24
24
|
spec.require_paths = ["lib"]
|
25
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 2.
|
25
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
26
26
|
|
27
|
-
# spec.add_development_dependency "debase"
|
28
27
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
29
28
|
spec.add_development_dependency "pry-byebug"
|
30
|
-
spec.add_development_dependency "rake", "~>
|
29
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
31
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
32
|
-
# spec.add_development_dependency "ruby-debug-ide"
|
33
31
|
spec.add_development_dependency "ruby-jing"
|
34
32
|
spec.add_development_dependency "simplecov"
|
35
33
|
spec.add_development_dependency "vcr"
|
@@ -37,5 +35,5 @@ Gem::Specification.new do |spec|
|
|
37
35
|
|
38
36
|
spec.add_dependency "cnccs", "~> 0.1.1"
|
39
37
|
spec.add_dependency "gb-agencies", "~> 0.0.1"
|
40
|
-
spec.add_dependency "relaton-iso-bib", "
|
38
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.10.1"
|
41
39
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-gb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.10.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '13.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '13.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rspec
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -154,16 +154,16 @@ dependencies:
|
|
154
154
|
name: relaton-iso-bib
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
156
156
|
requirements:
|
157
|
-
- - "
|
157
|
+
- - "~>"
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: 1.
|
159
|
+
version: 1.10.1
|
160
160
|
type: :runtime
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
|
-
- - "
|
164
|
+
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: 1.
|
166
|
+
version: 1.10.1
|
167
167
|
description: 'RelatonGb: retrieve Chinese GB Standards for bibliographic use using
|
168
168
|
the BibliographicItem model.'
|
169
169
|
email:
|
@@ -220,7 +220,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
220
220
|
requirements:
|
221
221
|
- - ">="
|
222
222
|
- !ruby/object:Gem::Version
|
223
|
-
version: 2.
|
223
|
+
version: 2.5.0
|
224
224
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
225
225
|
requirements:
|
226
226
|
- - ">="
|