relaton-gb 0.12.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/grammars/basicdoc.rng +12 -55
- data/grammars/biblio.rng +145 -50
- data/grammars/gbstandard.rng +4 -1
- data/grammars/isodoc.rng +1045 -55
- data/grammars/isostandard.rng +148 -472
- data/lib/relaton_gb.rb +1 -1
- data/lib/relaton_gb/gb_bibliographic_item.rb +2 -26
- data/lib/relaton_gb/gb_scrapper.rb +1 -1
- data/lib/relaton_gb/hash_converter.rb +9 -0
- data/lib/relaton_gb/hit.rb +1 -1
- data/lib/relaton_gb/hit_collection.rb +3 -2
- data/lib/relaton_gb/scrapper.rb +23 -24
- data/lib/relaton_gb/sec_scrapper.rb +5 -3
- data/lib/relaton_gb/t_scrapper.rb +7 -10
- data/lib/relaton_gb/version.rb +1 -1
- data/lib/relaton_gb/xml_parser.rb +7 -15
- data/relaton_gb.gemspec +1 -1
- metadata +6 -6
data/lib/relaton_gb.rb
CHANGED
@@ -35,6 +35,7 @@ module RelatonGb
|
|
35
35
|
@ccs = args[:ccs].map { |c| c.is_a?(Cnccs::Ccs) ? c : Cnccs.fetch(c) }
|
36
36
|
@gbtype = GbStandardType.new args[:gbtype]
|
37
37
|
@gbplannumber = args[:gbplannumber] || structuredidentifier&.project_number
|
38
|
+
# @doctype = args[:doctype]
|
38
39
|
end
|
39
40
|
|
40
41
|
# @param builder [Nokogiri::XML::Builder]
|
@@ -61,9 +62,7 @@ module RelatonGb
|
|
61
62
|
|
62
63
|
# @return [String]
|
63
64
|
def inspect
|
64
|
-
"<#{self.class}:#{format('
|
65
|
-
# "@fullIdentifier=\"#{@fetch&.shortref}\" "\
|
66
|
-
# "@title=\"#{title}\">"
|
65
|
+
"<#{self.class}:#{format('%<id>#.14x', id: object_id << 1)}>"
|
67
66
|
end
|
68
67
|
|
69
68
|
# @return [String]
|
@@ -76,34 +75,11 @@ module RelatonGb
|
|
76
75
|
|
77
76
|
id ||= @docidentifier.reject { |i| i.type == "DOI" }[0]
|
78
77
|
idstr = id.id
|
79
|
-
# if id.part_number&.size&.positive?
|
80
|
-
# idstr = idstr + "-#{id.part_number}"
|
81
|
-
# end
|
82
78
|
idstr.gsub(/\s/, "").strip
|
83
79
|
end
|
84
80
|
|
85
81
|
private
|
86
82
|
|
87
|
-
# Overraides IsoBibliographicItem method.
|
88
|
-
# @param language [Array<String>]
|
89
|
-
# @raise ArgumentError
|
90
|
-
def check_language(language)
|
91
|
-
language.each do |lang|
|
92
|
-
unless %w[en zh].include? lang
|
93
|
-
raise ArgumentError, "invalid language: #{lang}"
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
# Overraides IsoBibliographicItem method.
|
99
|
-
# @param script [Array<String>]
|
100
|
-
# @raise ArgumentError
|
101
|
-
def check_script(script)
|
102
|
-
script.each do |scr|
|
103
|
-
raise ArgumentError, "invalid script: #{scr}" unless %w[Latn Hans].include? scr
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
83
|
# @param builder [Nokogiri::XML::Builder]
|
108
84
|
def render_gbxml(builder)
|
109
85
|
gbtype.to_xml builder
|
@@ -16,7 +16,7 @@ module RelatonGb
|
|
16
16
|
# @return [RelatonGb::HitCollection]
|
17
17
|
def scrape_page(text)
|
18
18
|
search_html = OpenURI.open_uri(
|
19
|
-
"http://openstd.samr.gov.cn/bzgk/gb/std_list?p.p2=" + text
|
19
|
+
"http://openstd.samr.gov.cn/bzgk/gb/std_list?p.p2=" + text,
|
20
20
|
)
|
21
21
|
result = Nokogiri::HTML search_html
|
22
22
|
hits = result.xpath(
|
@@ -17,6 +17,15 @@ module RelatonGb
|
|
17
17
|
|
18
18
|
private
|
19
19
|
|
20
|
+
#
|
21
|
+
# Ovverides superclass's method
|
22
|
+
#
|
23
|
+
# @param item [Hash]
|
24
|
+
# @retirn [RelatonGb::GbBibliographicItem]
|
25
|
+
def bib_item(item)
|
26
|
+
GbBibliographicItem.new(item)
|
27
|
+
end
|
28
|
+
|
20
29
|
def ccs_hash_to_bib(ret)
|
21
30
|
ret[:ccs] = array(ret[:ccs]).map do |ccs|
|
22
31
|
ccs[:code] ? Cnccs.fetch(ccs[:code]) : Cnccs.fetch(ccs)
|
data/lib/relaton_gb/hit.rb
CHANGED
@@ -38,7 +38,7 @@ module RelatonGb
|
|
38
38
|
|
39
39
|
# @return [String]
|
40
40
|
def inspect
|
41
|
-
"<#{self.class}:#{format('
|
41
|
+
"<#{self.class}:#{format('%<id>#.14x', id: object_id << 1)} "\
|
42
42
|
"@fullIdentifier=\"#{@fetch&.shortref}\" "\
|
43
43
|
"@docref=\"#{docref}\">"
|
44
44
|
end
|
@@ -5,10 +5,11 @@ module RelatonGb
|
|
5
5
|
class HitCollection < RelatonBib::HitCollection
|
6
6
|
# @param hits [Array<Hash>]
|
7
7
|
# @param hit_pages [Integer]
|
8
|
-
# @param scrapper [RelatonGb::GbScrapper, RelatonGb::SecScrapper,
|
8
|
+
# @param scrapper [RelatonGb::GbScrapper, RelatonGb::SecScrapper,
|
9
|
+
# RelatonGb::TScrapper]
|
9
10
|
def initialize(hits = [])
|
10
11
|
@array = hits
|
11
|
-
@fetched
|
12
|
+
@fetched = false
|
12
13
|
end
|
13
14
|
end
|
14
15
|
end
|
data/lib/relaton_gb/scrapper.rb
CHANGED
@@ -55,32 +55,37 @@ module RelatonGb
|
|
55
55
|
# @param docref [Strings]
|
56
56
|
# @return [Array<Hash>]
|
57
57
|
def get_contributors(doc, docref)
|
58
|
-
gb_en = GbAgencies::Agencies.new("en", {}, "")
|
59
|
-
gb_zh = GbAgencies::Agencies.new("zh", {}, "")
|
60
58
|
name = docref.match(/^[^\s]+/).to_s
|
61
59
|
name.sub!(%r{/[TZ]$}, "") unless name =~ /^GB/
|
62
60
|
gbtype = get_gbtype(doc, docref)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
61
|
+
orgs = %w[en zh].map { |l| org(l, name, gbtype) }.compact
|
62
|
+
return [] unless orgs.any?
|
63
|
+
|
64
|
+
entity = RelatonBib::Organization.new name: orgs
|
67
65
|
[{ entity: entity, role: [type: "publisher"] }]
|
68
66
|
end
|
69
67
|
|
68
|
+
# @param lang [String]
|
69
|
+
# @param name [String]
|
70
|
+
# @param gbtype [Hash]
|
71
|
+
# @return [Hash]
|
72
|
+
def org(lang, name, gbtype)
|
73
|
+
ag = GbAgencies::Agencies.new(lang, {}, "")
|
74
|
+
content = ag.standard_agency1(gbtype[:scope], name, gbtype[:mandate])
|
75
|
+
return unless content
|
76
|
+
|
77
|
+
{ language: lang, content: content }
|
78
|
+
end
|
79
|
+
|
70
80
|
# @param doc [Nokogiri::HTML::Document]
|
71
|
-
# @return [Array<
|
72
|
-
# * :title_intro [String]
|
73
|
-
# * :title_main [String]
|
74
|
-
# * :language [String]
|
75
|
-
# * :script [String]
|
81
|
+
# @return [Array<RelatonBib::TypedTitleString>]
|
76
82
|
def get_titles(doc)
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
titles
|
83
|
+
tzh = doc.at("//td[contains(text(), '中文标准名称')]/b").text
|
84
|
+
titles = RelatonBib::TypedTitleString.from_string tzh, "zh", "Hans"
|
85
|
+
ten = doc.at("//td[contains(text(), '英文标准名称')]").text.match(/[\w\s]+/).to_s
|
86
|
+
return titles if ten.empty?
|
87
|
+
|
88
|
+
titles + RelatonBib::TypedTitleString.from_string(ten, "en", "Latn")
|
84
89
|
end
|
85
90
|
|
86
91
|
def get_type
|
@@ -113,12 +118,6 @@ module RelatonGb
|
|
113
118
|
mandate: get_mandate(ref), topic: "other" }
|
114
119
|
end
|
115
120
|
|
116
|
-
# @param doc [Nokogiri::HTML::Document]
|
117
|
-
# @return [String]
|
118
|
-
# def get_ref(doc)
|
119
|
-
# doc.xpath('//dt[text()="标准号"]/following-sibling::dd[1]').text
|
120
|
-
# end
|
121
|
-
|
122
121
|
# @param doc [Nokogiri::HTML::Document]
|
123
122
|
# @return [Array<String>]
|
124
123
|
def get_ccs(doc)
|
@@ -58,13 +58,15 @@ module RelatonGb
|
|
58
58
|
# * :language [String]
|
59
59
|
# * :script [String]
|
60
60
|
def get_titles(doc)
|
61
|
-
titles = [{ title_main: doc.at("//h4").text.delete("\r\n\t"),
|
62
|
-
|
61
|
+
# titles = [{ title_main: doc.at("//h4").text.delete("\r\n\t"),
|
62
|
+
# title_intro: nil, language: "zh", script: "Hans" }]
|
63
|
+
tzh = doc.at("//h4").text.delete("\r\n\t")
|
64
|
+
RelatonBib::TypedTitleString.from_string tzh, "zh", "Hans"
|
63
65
|
# title_main = doc.at("//td[contains(text(), '英文标准名称')]").text.match(/[\w\s]+/).to_s
|
64
66
|
# unless title_main.empty?
|
65
67
|
# titles << { title_main: title_main, title_intro: nil, language: "en", script: "Latn" }
|
66
68
|
# end
|
67
|
-
titles
|
69
|
+
# titles
|
68
70
|
end
|
69
71
|
|
70
72
|
# @param _doc [Nokogiri::HTML::Document]
|
@@ -84,16 +84,13 @@ module RelatonGb
|
|
84
84
|
end
|
85
85
|
|
86
86
|
def get_titles(doc)
|
87
|
-
|
88
|
-
titles =
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
script: "Latn" }
|
95
|
-
end
|
96
|
-
titles
|
87
|
+
xpz = '//td[contains(.,"中文标题")]/following-sibling::td[1]'
|
88
|
+
titles = RelatonBib::TypedTitleString.from_string doc.at(xpz).text, "zh", "Hans"
|
89
|
+
xpe = '//td[contains(.,"英文标题")]/following-sibling::td[1]'
|
90
|
+
ten = doc.xpath(xpe).text
|
91
|
+
return titles if ten.empty?
|
92
|
+
|
93
|
+
titles + RelatonBib::TypedTitleString.from_string(ten, "en", "Latn")
|
97
94
|
end
|
98
95
|
|
99
96
|
def gbtype
|
data/lib/relaton_gb/version.rb
CHANGED
@@ -3,18 +3,15 @@ require "nokogiri"
|
|
3
3
|
module RelatonGb
|
4
4
|
class XMLParser < RelatonIsoBib::XMLParser
|
5
5
|
class << self
|
6
|
-
def from_xml(xml)
|
7
|
-
doc = Nokogiri::XML(xml)
|
8
|
-
gbitem = doc.at "/bibitem|/bibdata"
|
9
|
-
if gbitem
|
10
|
-
GbBibliographicItem.new item_data(gbitem)
|
11
|
-
else
|
12
|
-
warn "[relato-gb] can't find bibitem or bibdata element in the XML"
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
6
|
private
|
17
7
|
|
8
|
+
# override RelatonBib::BibliographicItem.bib_item method
|
9
|
+
# @param item_hash [Hash]
|
10
|
+
# @return [RelatonGb::GbBibliographicItem]
|
11
|
+
def bib_item(item_hash)
|
12
|
+
GbBibliographicItem.new item_hash
|
13
|
+
end
|
14
|
+
|
18
15
|
def item_data(gbitem)
|
19
16
|
data = super
|
20
17
|
data[:committee] = fetch_committee gbitem
|
@@ -24,11 +21,6 @@ module RelatonGb
|
|
24
21
|
data
|
25
22
|
end
|
26
23
|
|
27
|
-
# Overrade get_id from RelatonIsoBib::XMLParser
|
28
|
-
# def get_id(did)
|
29
|
-
# did.text.match(/^(?<project>.*?\d+)(?<hyphen>-)?(?(<hyphen>)(?<year>\d*))/)
|
30
|
-
# end
|
31
|
-
|
32
24
|
def fetch_committee(doc)
|
33
25
|
committee = doc.at "./ext/gbcommittee"
|
34
26
|
return nil unless committee
|
data/relaton_gb.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-gb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|
@@ -182,16 +182,16 @@ dependencies:
|
|
182
182
|
name: relaton-iso-bib
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
184
184
|
requirements:
|
185
|
-
- - "
|
185
|
+
- - ">="
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version:
|
187
|
+
version: 1.2.0
|
188
188
|
type: :runtime
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
|
-
- - "
|
192
|
+
- - ">="
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version:
|
194
|
+
version: 1.2.0
|
195
195
|
description: 'RelatonGb: retrieve Chinese GB Standards for bibliographic use using
|
196
196
|
the BibliographicItem model.'
|
197
197
|
email:
|