relaton-gb 1.0.2 → 1.5.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ubuntu.yml +1 -0
- data/.rubocop.yml +2 -2
- data/README.adoc +4 -22
- data/grammars/biblio.rng +36 -6
- data/grammars/isodoc.rng +574 -22
- data/grammars/isostandard.rng +13 -2
- data/lib/relaton_gb.rb +1 -1
- data/lib/relaton_gb/ccs.rb +12 -0
- data/lib/relaton_gb/gb_bibliographic_item.rb +21 -39
- data/lib/relaton_gb/gb_scrapper.rb +1 -1
- data/lib/relaton_gb/hash_converter.rb +9 -0
- data/lib/relaton_gb/hit.rb +1 -1
- data/lib/relaton_gb/hit_collection.rb +3 -2
- data/lib/relaton_gb/scrapper.rb +7 -18
- data/lib/relaton_gb/sec_scrapper.rb +5 -3
- data/lib/relaton_gb/t_scrapper.rb +11 -16
- data/lib/relaton_gb/version.rb +1 -1
- data/lib/relaton_gb/xml_parser.rb +7 -15
- data/relaton_gb.gemspec +1 -1
- metadata +9 -9
data/grammars/isostandard.rng
CHANGED
@@ -91,6 +91,12 @@
|
|
91
91
|
</define>
|
92
92
|
<define name="sections">
|
93
93
|
<element name="sections">
|
94
|
+
<zeroOrMore>
|
95
|
+
<choice>
|
96
|
+
<ref name="note"/>
|
97
|
+
<ref name="admonition"/>
|
98
|
+
</choice>
|
99
|
+
</zeroOrMore>
|
94
100
|
<ref name="clause"/>
|
95
101
|
<optional>
|
96
102
|
<choice>
|
@@ -247,7 +253,7 @@
|
|
247
253
|
<define name="preface">
|
248
254
|
<element name="preface">
|
249
255
|
<optional>
|
250
|
-
<ref name="
|
256
|
+
<ref name="abstract"/>
|
251
257
|
</optional>
|
252
258
|
<ref name="foreword"/>
|
253
259
|
<optional>
|
@@ -263,6 +269,8 @@
|
|
263
269
|
<value>publicly-available-specification</value>
|
264
270
|
<value>international-workshop-agreement</value>
|
265
271
|
<value>guide</value>
|
272
|
+
<value>amendment</value>
|
273
|
+
<value>technical-corrigendum</value>
|
266
274
|
</choice>
|
267
275
|
</define>
|
268
276
|
<define name="structuredidentifier">
|
@@ -354,6 +362,9 @@
|
|
354
362
|
<data type="boolean"/>
|
355
363
|
</attribute>
|
356
364
|
</optional>
|
365
|
+
<optional>
|
366
|
+
<attribute name="number"/>
|
367
|
+
</optional>
|
357
368
|
<optional>
|
358
369
|
<attribute name="subsequence"/>
|
359
370
|
</optional>
|
@@ -510,7 +521,7 @@
|
|
510
521
|
</attribute>
|
511
522
|
</optional>
|
512
523
|
<oneOrMore>
|
513
|
-
<ref name="
|
524
|
+
<ref name="BasicBlock"/>
|
514
525
|
</oneOrMore>
|
515
526
|
</element>
|
516
527
|
</define>
|
data/lib/relaton_gb.rb
CHANGED
data/lib/relaton_gb/ccs.rb
CHANGED
@@ -4,5 +4,17 @@ module Cnccs
|
|
4
4
|
def to_hash
|
5
5
|
{ "code" => code }
|
6
6
|
end
|
7
|
+
|
8
|
+
# @param prefix [String]
|
9
|
+
# @param count [Integer]
|
10
|
+
# @return [String]
|
11
|
+
def to_aciibib(prefix = "", count = 1)
|
12
|
+
pref = prefix.empty? ? prefix : prefix + "."
|
13
|
+
pref += "ccs"
|
14
|
+
out = count > 1 ? "#{pref}::\n" : ""
|
15
|
+
out += "#{pref}.code:: #{code}\n" if code
|
16
|
+
out += "#{pref}.description:: #{description}\n" if description
|
17
|
+
out
|
18
|
+
end
|
7
19
|
end
|
8
20
|
end
|
@@ -31,26 +31,25 @@ module RelatonGb
|
|
31
31
|
|
32
32
|
def initialize(**args)
|
33
33
|
super
|
34
|
-
|
34
|
+
@committee = GbTechnicalCommittee.new args[:committee] if args[:committee]
|
35
35
|
@ccs = args[:ccs].map { |c| c.is_a?(Cnccs::Ccs) ? c : Cnccs.fetch(c) }
|
36
36
|
@gbtype = GbStandardType.new args[:gbtype]
|
37
|
-
@gbplannumber = args[:gbplannumber] ||
|
37
|
+
@gbplannumber = args[:gbplannumber] ||
|
38
|
+
structuredidentifier&.project_number
|
38
39
|
end
|
39
40
|
|
40
|
-
# @param
|
41
|
-
# @
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
end.doc.root.to_xml
|
49
|
-
end
|
41
|
+
# @param opts [Hash]
|
42
|
+
# @option opts [Nokogiri::XML::Builder] :builder XML builder
|
43
|
+
# @option opts [Boolean] :bibdata
|
44
|
+
# @option opts [Symbol, NilClass] :date_format (:short), :full
|
45
|
+
# @option opts [String, Symbol] :lang language
|
46
|
+
# @return [String] XML
|
47
|
+
def to_xml(**opts)
|
48
|
+
super(**opts) { |xml| render_gbxml(xml) }
|
50
49
|
end
|
51
50
|
|
52
51
|
# @return [Hash]
|
53
|
-
def to_hash
|
52
|
+
def to_hash # rubocop:disable Metrics/AbcSize
|
54
53
|
hash = super
|
55
54
|
hash["ccs"] = single_element_array(ccs) if ccs&.any?
|
56
55
|
hash["committee"] = committee.to_hash if committee
|
@@ -59,11 +58,17 @@ module RelatonGb
|
|
59
58
|
hash
|
60
59
|
end
|
61
60
|
|
61
|
+
# @param prefix [String]
|
62
|
+
# @return [String]
|
63
|
+
def to_asciibib(prefix = "")
|
64
|
+
out = super
|
65
|
+
ccs.each { |c| out += c.to_aciibib prefix, ccs.size }
|
66
|
+
out
|
67
|
+
end
|
68
|
+
|
62
69
|
# @return [String]
|
63
70
|
def inspect
|
64
|
-
"<#{self.class}:#{format('
|
65
|
-
# "@fullIdentifier=\"#{@fetch&.shortref}\" "\
|
66
|
-
# "@title=\"#{title}\">"
|
71
|
+
"<#{self.class}:#{format('%<id>#.14x', id: object_id << 1)}>"
|
67
72
|
end
|
68
73
|
|
69
74
|
# @return [String]
|
@@ -76,34 +81,11 @@ module RelatonGb
|
|
76
81
|
|
77
82
|
id ||= @docidentifier.reject { |i| i.type == "DOI" }[0]
|
78
83
|
idstr = id.id
|
79
|
-
# if id.part_number&.size&.positive?
|
80
|
-
# idstr = idstr + "-#{id.part_number}"
|
81
|
-
# end
|
82
84
|
idstr.gsub(/\s/, "").strip
|
83
85
|
end
|
84
86
|
|
85
87
|
private
|
86
88
|
|
87
|
-
# Overraides IsoBibliographicItem method.
|
88
|
-
# @param language [Array<String>]
|
89
|
-
# @raise ArgumentError
|
90
|
-
def check_language(language)
|
91
|
-
language.each do |lang|
|
92
|
-
unless %w[en zh].include? lang
|
93
|
-
raise ArgumentError, "invalid language: #{lang}"
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
# Overraides IsoBibliographicItem method.
|
99
|
-
# @param script [Array<String>]
|
100
|
-
# @raise ArgumentError
|
101
|
-
def check_script(script)
|
102
|
-
script.each do |scr|
|
103
|
-
raise ArgumentError, "invalid script: #{scr}" unless %w[Latn Hans].include? scr
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
89
|
# @param builder [Nokogiri::XML::Builder]
|
108
90
|
def render_gbxml(builder)
|
109
91
|
gbtype.to_xml builder
|
@@ -16,7 +16,7 @@ module RelatonGb
|
|
16
16
|
# @return [RelatonGb::HitCollection]
|
17
17
|
def scrape_page(text)
|
18
18
|
search_html = OpenURI.open_uri(
|
19
|
-
"http://openstd.samr.gov.cn/bzgk/gb/std_list?p.p2=" + text
|
19
|
+
"http://openstd.samr.gov.cn/bzgk/gb/std_list?p.p2=" + text,
|
20
20
|
)
|
21
21
|
result = Nokogiri::HTML search_html
|
22
22
|
hits = result.xpath(
|
@@ -17,6 +17,15 @@ module RelatonGb
|
|
17
17
|
|
18
18
|
private
|
19
19
|
|
20
|
+
#
|
21
|
+
# Ovverides superclass's method
|
22
|
+
#
|
23
|
+
# @param item [Hash]
|
24
|
+
# @retirn [RelatonGb::GbBibliographicItem]
|
25
|
+
def bib_item(item)
|
26
|
+
GbBibliographicItem.new(item)
|
27
|
+
end
|
28
|
+
|
20
29
|
def ccs_hash_to_bib(ret)
|
21
30
|
ret[:ccs] = array(ret[:ccs]).map do |ccs|
|
22
31
|
ccs[:code] ? Cnccs.fetch(ccs[:code]) : Cnccs.fetch(ccs)
|
data/lib/relaton_gb/hit.rb
CHANGED
@@ -38,7 +38,7 @@ module RelatonGb
|
|
38
38
|
|
39
39
|
# @return [String]
|
40
40
|
def inspect
|
41
|
-
"<#{self.class}:#{format('
|
41
|
+
"<#{self.class}:#{format('%<id>#.14x', id: object_id << 1)} "\
|
42
42
|
"@fullIdentifier=\"#{@fetch&.shortref}\" "\
|
43
43
|
"@docref=\"#{docref}\">"
|
44
44
|
end
|
@@ -5,10 +5,11 @@ module RelatonGb
|
|
5
5
|
class HitCollection < RelatonBib::HitCollection
|
6
6
|
# @param hits [Array<Hash>]
|
7
7
|
# @param hit_pages [Integer]
|
8
|
-
# @param scrapper [RelatonGb::GbScrapper, RelatonGb::SecScrapper,
|
8
|
+
# @param scrapper [RelatonGb::GbScrapper, RelatonGb::SecScrapper,
|
9
|
+
# RelatonGb::TScrapper]
|
9
10
|
def initialize(hits = [])
|
10
11
|
@array = hits
|
11
|
-
@fetched
|
12
|
+
@fetched = false
|
12
13
|
end
|
13
14
|
end
|
14
15
|
end
|
data/lib/relaton_gb/scrapper.rb
CHANGED
@@ -78,19 +78,14 @@ module RelatonGb
|
|
78
78
|
end
|
79
79
|
|
80
80
|
# @param doc [Nokogiri::HTML::Document]
|
81
|
-
# @return [Array<
|
82
|
-
# * :title_intro [String]
|
83
|
-
# * :title_main [String]
|
84
|
-
# * :language [String]
|
85
|
-
# * :script [String]
|
81
|
+
# @return [Array<RelatonBib::TypedTitleString>]
|
86
82
|
def get_titles(doc)
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
titles
|
83
|
+
tzh = doc.at("//td[contains(text(), '中文标准名称')]/b").text
|
84
|
+
titles = RelatonBib::TypedTitleString.from_string tzh, "zh", "Hans"
|
85
|
+
ten = doc.at("//td[contains(text(), '英文标准名称')]").text.match(/[\w\s]+/).to_s
|
86
|
+
return titles if ten.empty?
|
87
|
+
|
88
|
+
titles + RelatonBib::TypedTitleString.from_string(ten, "en", "Latn")
|
94
89
|
end
|
95
90
|
|
96
91
|
def get_type
|
@@ -123,12 +118,6 @@ module RelatonGb
|
|
123
118
|
mandate: get_mandate(ref), topic: "other" }
|
124
119
|
end
|
125
120
|
|
126
|
-
# @param doc [Nokogiri::HTML::Document]
|
127
|
-
# @return [String]
|
128
|
-
# def get_ref(doc)
|
129
|
-
# doc.xpath('//dt[text()="标准号"]/following-sibling::dd[1]').text
|
130
|
-
# end
|
131
|
-
|
132
121
|
# @param doc [Nokogiri::HTML::Document]
|
133
122
|
# @return [Array<String>]
|
134
123
|
def get_ccs(doc)
|
@@ -58,13 +58,15 @@ module RelatonGb
|
|
58
58
|
# * :language [String]
|
59
59
|
# * :script [String]
|
60
60
|
def get_titles(doc)
|
61
|
-
titles = [{ title_main: doc.at("//h4").text.delete("\r\n\t"),
|
62
|
-
|
61
|
+
# titles = [{ title_main: doc.at("//h4").text.delete("\r\n\t"),
|
62
|
+
# title_intro: nil, language: "zh", script: "Hans" }]
|
63
|
+
tzh = doc.at("//h4").text.delete("\r\n\t")
|
64
|
+
RelatonBib::TypedTitleString.from_string tzh, "zh", "Hans"
|
63
65
|
# title_main = doc.at("//td[contains(text(), '英文标准名称')]").text.match(/[\w\s]+/).to_s
|
64
66
|
# unless title_main.empty?
|
65
67
|
# titles << { title_main: title_main, title_intro: nil, language: "en", script: "Latn" }
|
66
68
|
# end
|
67
|
-
titles
|
69
|
+
# titles
|
68
70
|
end
|
69
71
|
|
70
72
|
# @param _doc [Nokogiri::HTML::Document]
|
@@ -21,7 +21,7 @@ module RelatonGb
|
|
21
21
|
def scrape_page(text)
|
22
22
|
search_html = OpenURI.open_uri(
|
23
23
|
"http://www.ttbz.org.cn/Home/Standard?searchType=2&key=" +
|
24
|
-
CGI.escape(text.tr("-", [8212].pack("U")))
|
24
|
+
CGI.escape(text.tr("-", [8212].pack("U")))
|
25
25
|
).read
|
26
26
|
header = Nokogiri::HTML search_html
|
27
27
|
xpath = '//table[contains(@class, "standard_list_table")]/tr/td/a'
|
@@ -84,26 +84,21 @@ module RelatonGb
|
|
84
84
|
end
|
85
85
|
|
86
86
|
def get_titles(doc)
|
87
|
-
|
88
|
-
titles =
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
end
|
96
|
-
titles
|
87
|
+
xpz = '//td[contains(.,"中文标题")]/following-sibling::td[1]'
|
88
|
+
titles = RelatonBib::TypedTitleString.from_string doc.at(xpz)
|
89
|
+
.text, "zh", "Hans"
|
90
|
+
xpe = '//td[contains(.,"英文标题")]/following-sibling::td[1]'
|
91
|
+
ten = doc.xpath(xpe).text
|
92
|
+
return titles if ten.empty?
|
93
|
+
|
94
|
+
titles + RelatonBib::TypedTitleString.from_string(ten, "en", "Latn")
|
97
95
|
end
|
98
96
|
|
99
97
|
def gbtype
|
100
|
-
{ scope: "social-group", prefix: "T", mandate: "mandatory",
|
98
|
+
{ scope: "social-group", prefix: "T", mandate: "mandatory",
|
99
|
+
topic: "other" }
|
101
100
|
end
|
102
101
|
|
103
|
-
# def get_group_code(ref)
|
104
|
-
# ref.match(%r{(?<=\/)[^\s]})
|
105
|
-
# end
|
106
|
-
|
107
102
|
def get_ccs(doc)
|
108
103
|
[doc.xpath('//td[contains(.,"中国标准分类号")]/following-sibling::td[1]')
|
109
104
|
.text.gsub(/[\r\n]/, "").strip.match(/^[^\s]+/).to_s]
|
data/lib/relaton_gb/version.rb
CHANGED
@@ -3,18 +3,15 @@ require "nokogiri"
|
|
3
3
|
module RelatonGb
|
4
4
|
class XMLParser < RelatonIsoBib::XMLParser
|
5
5
|
class << self
|
6
|
-
def from_xml(xml)
|
7
|
-
doc = Nokogiri::XML(xml)
|
8
|
-
gbitem = doc.at "/bibitem|/bibdata"
|
9
|
-
if gbitem
|
10
|
-
GbBibliographicItem.new item_data(gbitem)
|
11
|
-
else
|
12
|
-
warn "[relato-gb] can't find bibitem or bibdata element in the XML"
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
6
|
private
|
17
7
|
|
8
|
+
# override RelatonBib::BibliographicItem.bib_item method
|
9
|
+
# @param item_hash [Hash]
|
10
|
+
# @return [RelatonGb::GbBibliographicItem]
|
11
|
+
def bib_item(item_hash)
|
12
|
+
GbBibliographicItem.new item_hash
|
13
|
+
end
|
14
|
+
|
18
15
|
def item_data(gbitem)
|
19
16
|
data = super
|
20
17
|
data[:committee] = fetch_committee gbitem
|
@@ -24,11 +21,6 @@ module RelatonGb
|
|
24
21
|
data
|
25
22
|
end
|
26
23
|
|
27
|
-
# Overrade get_id from RelatonIsoBib::XMLParser
|
28
|
-
# def get_id(did)
|
29
|
-
# did.text.match(/^(?<project>.*?\d+)(?<hyphen>-)?(?(<hyphen>)(?<year>\d*))/)
|
30
|
-
# end
|
31
|
-
|
32
24
|
def fetch_committee(doc)
|
33
25
|
committee = doc.at "./ext/gbcommittee"
|
34
26
|
return nil unless committee
|
data/relaton_gb.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-gb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|
@@ -184,14 +184,14 @@ dependencies:
|
|
184
184
|
requirements:
|
185
185
|
- - ">="
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: 1.
|
187
|
+
version: 1.5.pre
|
188
188
|
type: :runtime
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - ">="
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 1.
|
194
|
+
version: 1.5.pre
|
195
195
|
description: 'RelatonGb: retrieve Chinese GB Standards for bibliographic use using
|
196
196
|
the BibliographicItem model.'
|
197
197
|
email:
|
@@ -241,7 +241,7 @@ homepage: https://github.com/metanorma/relaton_gb
|
|
241
241
|
licenses:
|
242
242
|
- BSD-2-Clause
|
243
243
|
metadata: {}
|
244
|
-
post_install_message:
|
244
|
+
post_install_message:
|
245
245
|
rdoc_options: []
|
246
246
|
require_paths:
|
247
247
|
- lib
|
@@ -252,12 +252,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
252
252
|
version: 2.4.0
|
253
253
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
254
254
|
requirements:
|
255
|
-
- - "
|
255
|
+
- - ">"
|
256
256
|
- !ruby/object:Gem::Version
|
257
|
-
version:
|
257
|
+
version: 1.3.1
|
258
258
|
requirements: []
|
259
259
|
rubygems_version: 3.0.6
|
260
|
-
signing_key:
|
260
|
+
signing_key:
|
261
261
|
specification_version: 4
|
262
262
|
summary: 'RelatonGb: retrieve Chinese GB Standards for bibliographic use using the
|
263
263
|
BibliographicItem model.'
|