relaton-gb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.hound.yml +3 -0
- data/.rspec +3 -0
- data/.rubocop.yml +10 -0
- data/.travis.yml +17 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +84 -0
- data/LICENSE.txt +25 -0
- data/README.adoc +202 -0
- data/Rakefile +6 -0
- data/appveyor.yml +35 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/relaton/processor.rb +25 -0
- data/lib/relaton_gb.rb +7 -0
- data/lib/relaton_gb/gb_bibliographic_item.rb +111 -0
- data/lib/relaton_gb/gb_bibliography.rb +137 -0
- data/lib/relaton_gb/gb_scrapper.rb +51 -0
- data/lib/relaton_gb/gb_standard_type.rb +30 -0
- data/lib/relaton_gb/gb_technical_committee.rb +23 -0
- data/lib/relaton_gb/hit.rb +60 -0
- data/lib/relaton_gb/hit_collection.rb +45 -0
- data/lib/relaton_gb/scrapper.rb +197 -0
- data/lib/relaton_gb/sec_scrapper.rb +57 -0
- data/lib/relaton_gb/t_scrapper.rb +121 -0
- data/lib/relaton_gb/version.rb +5 -0
- data/lib/relaton_gb/xml_parser.rb +49 -0
- data/lib/relaton_gb/yaml/prefixes.yaml +197 -0
- data/relaton_gb.gemspec +39 -0
- metadata +229 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RelatonGb
|
4
|
+
# Page of hit collection
|
5
|
+
class HitCollection < Array
|
6
|
+
# @return [TrueClass, FalseClass]
|
7
|
+
attr_reader :fetched
|
8
|
+
|
9
|
+
# @return [Isobib::HitPages]
|
10
|
+
attr_reader :hit_pages
|
11
|
+
|
12
|
+
# @return [RelatonGb::GbScrapper, RelatonGb::SecScrapper, RelatonGb::TScrapper]
|
13
|
+
attr_reader :scrapper
|
14
|
+
|
15
|
+
# @param hits [Array<Hash>]
|
16
|
+
# @param hit_pages [Integer]
|
17
|
+
# @param scrapper [RelatonGb::GbScrapper, RelatonGb::SecScrapper, RelatonGb::TScrapper]
|
18
|
+
def initialize(hits = [], hit_pages = nil)
|
19
|
+
concat hits
|
20
|
+
@fetched = false
|
21
|
+
@hit_pages = hit_pages
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [RelatonGb::HitCollection]
|
25
|
+
# def fetch
|
26
|
+
# workers = RelatonBib::WorkersPool.new 4
|
27
|
+
# workers.worker(&:fetch)
|
28
|
+
# each do |hit|
|
29
|
+
# workers << hit
|
30
|
+
# end
|
31
|
+
# workers.end
|
32
|
+
# workers.result
|
33
|
+
# @fetched = true
|
34
|
+
# self
|
35
|
+
# end
|
36
|
+
|
37
|
+
def to_s
|
38
|
+
inspect
|
39
|
+
end
|
40
|
+
|
41
|
+
def inspect
|
42
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "yaml"
|
5
|
+
require "gb_agencies"
|
6
|
+
|
7
|
+
module RelatonGb
|
8
|
+
# Common scrapping methods.
|
9
|
+
module Scrapper
|
10
|
+
@prefixes = nil
|
11
|
+
|
12
|
+
# rubocop:disable Metrics/MethodLength
|
13
|
+
# @param doc [Nokogiri::HTML::Document]
|
14
|
+
# @param src [String] url of scrapped page
|
15
|
+
# @return [Hash]
|
16
|
+
def scrapped_data(doc, src:)
|
17
|
+
{
|
18
|
+
committee: get_committee(doc),
|
19
|
+
docid: get_docid(doc),
|
20
|
+
titles: get_titles(doc),
|
21
|
+
contributors: get_contributors(doc),
|
22
|
+
type: get_type(doc),
|
23
|
+
docstatus: get_status(doc),
|
24
|
+
gbtype: get_gbtype(doc),
|
25
|
+
ccs: get_ccs(doc),
|
26
|
+
ics: get_ics(doc),
|
27
|
+
link: [{ type: "src", content: src }],
|
28
|
+
dates: get_dates(doc),
|
29
|
+
language: ["zh"],
|
30
|
+
script: ["Hans"],
|
31
|
+
structuredidentifier: fetch_structuredidentifier(doc),
|
32
|
+
}
|
33
|
+
end
|
34
|
+
# rubocop:enable Metrics/MethodLength
|
35
|
+
|
36
|
+
# @param doc [Nokogiri::HTML::Document]
|
37
|
+
# @param xpt [String]
|
38
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
39
|
+
def get_docid(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
40
|
+
item_ref = doc.at xpt
|
41
|
+
return [] unless item_ref
|
42
|
+
|
43
|
+
[RelatonBib::DocumentIdentifier.new(id: item_ref.text, type: "Chinese Standard")]
|
44
|
+
end
|
45
|
+
|
46
|
+
# @param doc [Nokogiri::HTML::Document]
|
47
|
+
# @param xpt [String]
|
48
|
+
# @return [RelatonIsoBib::StructuredIdentifier]
|
49
|
+
def fetch_structuredidentifier(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
50
|
+
item_ref = doc.at xpt
|
51
|
+
unless item_ref
|
52
|
+
return RelatonIsoBib::StructuredIdentifier.new(
|
53
|
+
project_number: "?", part_number: "?", prefix: nil, id: "?",
|
54
|
+
type: "Chinese Standard"
|
55
|
+
)
|
56
|
+
end
|
57
|
+
|
58
|
+
m = item_ref.text.match(/^([^–—.-]*\d+)\.?((?<=\.)\d+|)/)
|
59
|
+
# prefix = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
|
60
|
+
RelatonIsoBib::StructuredIdentifier.new(
|
61
|
+
project_number: m[1], part_number: m[2], prefix: nil,
|
62
|
+
id: item_ref.text, type: "Chinese Standard"
|
63
|
+
)
|
64
|
+
end
|
65
|
+
|
66
|
+
def get_contributors(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
67
|
+
gb_en = GbAgencies::Agencies.new("en", {}, "")
|
68
|
+
gb_zh = GbAgencies::Agencies.new("zh", {}, "")
|
69
|
+
name = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
|
70
|
+
name.sub!(%r{/[TZ]$}, "") unless name =~ /^GB/
|
71
|
+
gbtype = get_gbtype(doc)
|
72
|
+
entity = RelatonBib::Organization.new name: [
|
73
|
+
{ language: "en", content: gb_en.standard_agency1(gbtype[:scope], name, gbtype[:mandate]) },
|
74
|
+
{ language: "zh", content: gb_zh.standard_agency1(gbtype[:scope], name, gbtype[:mandate]) },
|
75
|
+
]
|
76
|
+
[{ entity: entity, roles: ["publisher"] }]
|
77
|
+
end
|
78
|
+
|
79
|
+
# @param doc [Nokogiri::HTML::Document]
|
80
|
+
# @return [Array<Hash>]
|
81
|
+
# * :title_intro [String]
|
82
|
+
# * :title_main [String]
|
83
|
+
# * :language [String]
|
84
|
+
# * :script [String]
|
85
|
+
def get_titles(doc)
|
86
|
+
titles = [{ title_main: doc.css("div.page-header h4").text, title_intro: nil,
|
87
|
+
language: "zh", script: "Hans" }]
|
88
|
+
title_main = doc.css("div.page-header h5").text
|
89
|
+
unless title_main.empty?
|
90
|
+
titles << { title_main: title_main, title_intro: nil, language: "en", script: "Latn" }
|
91
|
+
end
|
92
|
+
titles
|
93
|
+
end
|
94
|
+
|
95
|
+
def get_type(_doc)
|
96
|
+
"international-standard"
|
97
|
+
end
|
98
|
+
|
99
|
+
# @param doc [Nokogiri::HTML::Document]
|
100
|
+
# @param xpt [String]
|
101
|
+
# @return [RelatonBib::DocumentStatus]
|
102
|
+
def get_status(doc, xpt = ".s-status.label:nth-child(3)")
|
103
|
+
case doc.at(xpt).text.gsub(/\s/, "")
|
104
|
+
when "即将实施"
|
105
|
+
stage = "published"
|
106
|
+
when "现行"
|
107
|
+
stage = "activated"
|
108
|
+
when "废止"
|
109
|
+
stage = "obsoleted"
|
110
|
+
end
|
111
|
+
RelatonBib::DocumentStatus.new stage: stage
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
# @param doc [Nokogiri::HTML::Document]
|
117
|
+
# @return [Hash]
|
118
|
+
# * :scope [String]
|
119
|
+
# * :prefix [String]
|
120
|
+
# * :mandate [String]
|
121
|
+
def get_gbtype(doc)
|
122
|
+
ref = get_ref(doc)
|
123
|
+
{ scope: get_scope(doc), prefix: get_prefix(ref)["prefix"],
|
124
|
+
mandate: get_mandate(ref) }
|
125
|
+
end
|
126
|
+
|
127
|
+
# @param doc [Nokogiri::HTML::Document]
|
128
|
+
# @return [String]
|
129
|
+
def get_ref(doc)
|
130
|
+
doc.xpath('//dt[text()="标准号"]/following-sibling::dd[1]').text
|
131
|
+
end
|
132
|
+
|
133
|
+
# @param doc [Nokogiri::HTML::Document]
|
134
|
+
# @return [Array<String>]
|
135
|
+
def get_ccs(doc)
|
136
|
+
[doc&.xpath('//dt[text()="中国标准分类号"]/following-sibling::dd[1]')&.text]
|
137
|
+
end
|
138
|
+
|
139
|
+
# @param doc [Nokogiri::HTML::Document]
|
140
|
+
# @return [Array<Hash>]
|
141
|
+
# * :field [String]
|
142
|
+
# * :group [String]
|
143
|
+
# * :subgroup [String]
|
144
|
+
def get_ics(doc)
|
145
|
+
ics = doc.xpath('//dt[(.="国际标准分类号")]/following-sibling::dd[1]/span')
|
146
|
+
return [] if ics.empty?
|
147
|
+
|
148
|
+
field, group, subgroup = ics.text.split "."
|
149
|
+
[{ field: field, group: group.ljust(3, "0"), subgroup: subgroup }]
|
150
|
+
end
|
151
|
+
|
152
|
+
# @param doc [Nokogiri::HTML::Document]
|
153
|
+
# @return [String]
|
154
|
+
def get_scope(doc)
|
155
|
+
scope = doc.at(".s-status.label-info").text
|
156
|
+
if scope == "国家标准"
|
157
|
+
"national"
|
158
|
+
elsif scope =~ /^行业标准/
|
159
|
+
"sector"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# @param ref [String]
|
164
|
+
# @return [String]
|
165
|
+
def get_prefix(ref)
|
166
|
+
pref = ref.match(/^[^\s]+/).to_s.split("/").first
|
167
|
+
prefix pref
|
168
|
+
end
|
169
|
+
|
170
|
+
# @param pref [String]
|
171
|
+
# @return [Hash{String=>String}]
|
172
|
+
def prefix(pref)
|
173
|
+
file_path = File.join(__dir__, "yaml/prefixes.yaml")
|
174
|
+
@prefixes ||= YAML.load_file(file_path)
|
175
|
+
@prefixes[pref]
|
176
|
+
end
|
177
|
+
|
178
|
+
# @param ref [String]
|
179
|
+
# @return [String]
|
180
|
+
def get_mandate(ref)
|
181
|
+
case ref.match(%r{(?<=\/)[^\s]+}).to_s
|
182
|
+
when "T" then "recommended"
|
183
|
+
when "Z" then "guidelines"
|
184
|
+
else "mandatory"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# @param doc [Nokogiri::HTML::Document]
|
189
|
+
# @return [Array<Hash>]
|
190
|
+
# * :type [String] type of date
|
191
|
+
# * :on [String] date
|
192
|
+
def get_dates(doc)
|
193
|
+
date = doc.xpath('//dt[.="发布日期"]/following-sibling::dd[1]').text
|
194
|
+
[{ type: "published", on: date }]
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "net/http"
|
5
|
+
require "json"
|
6
|
+
require "nokogiri"
|
7
|
+
require "relaton_gb/scrapper"
|
8
|
+
require "relaton_gb/gb_bibliographic_item"
|
9
|
+
require "relaton_gb/hit_collection"
|
10
|
+
require "relaton_gb/hit"
|
11
|
+
|
12
|
+
module RelatonGb
|
13
|
+
# Sector standard scrapper
|
14
|
+
module SecScrapper
|
15
|
+
extend Scrapper
|
16
|
+
|
17
|
+
class << self
|
18
|
+
# @param text [String] code of standard for serarch
|
19
|
+
# @return [RelatonGb::HitCollection]
|
20
|
+
def scrape_page(text)
|
21
|
+
uri = URI "http://www.std.gov.cn/hb/search/hbPage?searchText=#{text}"
|
22
|
+
res = JSON.parse Net::HTTP.get(uri)
|
23
|
+
hits = res["rows"].map do |r|
|
24
|
+
Hit.new pid: r["id"], title: r["STD_CODE"], scrapper: self
|
25
|
+
end
|
26
|
+
HitCollection.new hits
|
27
|
+
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
28
|
+
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
|
29
|
+
warn "Cannot access #{uri}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param pid [String] standard's page id
|
33
|
+
# @return [RelatonGb::GbBibliographicItem]
|
34
|
+
def scrape_doc(pid)
|
35
|
+
src = "http://www.std.gov.cn/hb/search/stdHBDetailed?id=#{pid}"
|
36
|
+
page_uri = URI src
|
37
|
+
doc = Nokogiri::HTML Net::HTTP.get(page_uri)
|
38
|
+
GbBibliographicItem.new scrapped_data(doc, src: src)
|
39
|
+
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
40
|
+
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
|
41
|
+
warn "Cannot access #{src}"
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
# @param doc [Nokogiri::HTML::Document]
|
47
|
+
# @return [Hash]
|
48
|
+
# * :type [String]
|
49
|
+
# * :name [String]
|
50
|
+
def get_committee(doc)
|
51
|
+
ref = get_ref(doc)
|
52
|
+
name = get_prefix(ref)["administration"]
|
53
|
+
{ type: "technical", name: name }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "open-uri"
|
5
|
+
require "nokogiri"
|
6
|
+
require "relaton_gb/scrapper"
|
7
|
+
require "relaton_gb/gb_bibliographic_item"
|
8
|
+
require "relaton_gb/hit_collection"
|
9
|
+
require "relaton_gb/hit"
|
10
|
+
|
11
|
+
module RelatonGb
|
12
|
+
# Social standard scarpper.
|
13
|
+
module TScrapper
|
14
|
+
extend Scrapper
|
15
|
+
|
16
|
+
class << self
|
17
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
18
|
+
# @param text [String]
|
19
|
+
# @return [RelatonGb::HitCollection]
|
20
|
+
def scrape_page(text)
|
21
|
+
search_html = OpenURI.open_uri(
|
22
|
+
"http://www.ttbz.org.cn/Home/Standard?searchType=2&key=" +
|
23
|
+
CGI.escape(text.tr("-", [8212].pack("U"))),
|
24
|
+
)
|
25
|
+
header = Nokogiri::HTML search_html
|
26
|
+
xpath = '//table[contains(@class, "standard_list_table")]/tr/td/a'
|
27
|
+
t_xpath = "../preceding-sibling::td[3]"
|
28
|
+
hits = header.xpath(xpath).map do |h|
|
29
|
+
title = h.at(t_xpath).text.gsub(/â\u0080\u0094/, "-")
|
30
|
+
Hit.new pid: h[:href].sub(%r{\/$}, ""), title: title, scrapper: self
|
31
|
+
end
|
32
|
+
HitCollection.new hits
|
33
|
+
rescue OpenURI::HTTPError, SocketError
|
34
|
+
warn "Cannot access http://www.ttbz.org.cn/Home/Standard"
|
35
|
+
end
|
36
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
37
|
+
|
38
|
+
# @param pid [String] standard's page path
|
39
|
+
# @return [RelatonGb::GbBibliographicItem]
|
40
|
+
def scrape_doc(pid)
|
41
|
+
src = "http://www.ttbz.org.cn#{pid}"
|
42
|
+
doc = Nokogiri::HTML OpenURI.open_uri(src), nil, Encoding::UTF_8.to_s
|
43
|
+
GbBibliographicItem.new scrapped_data(doc, src: src)
|
44
|
+
rescue OpenURI::HTTPError, SocketError
|
45
|
+
warn "Cannot access #{src}"
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
# rubocop:disable Metrics/MethodLength
|
51
|
+
# @param doc [Nokogiri::HTML::Document]
|
52
|
+
# @return [Hash]
|
53
|
+
def scrapped_data(doc, src:)
|
54
|
+
docid_xpt = '//td[contains(.,"标准编号")]/following-sibling::td[1]'
|
55
|
+
status_xpt = '//td[contains(.,"标准状态")]/following-sibling::td[1]/span'
|
56
|
+
{
|
57
|
+
committee: get_committee(doc),
|
58
|
+
docid: get_docid(doc, docid_xpt),
|
59
|
+
titles: get_titles(doc),
|
60
|
+
type: "international-standard",
|
61
|
+
docstatus: get_status(doc, status_xpt),
|
62
|
+
gbtype: gbtype,
|
63
|
+
ccs: get_ccs(doc),
|
64
|
+
ics: get_ics(doc),
|
65
|
+
link: [{ type: "src", content: src }],
|
66
|
+
dates: get_dates(doc),
|
67
|
+
language: ["zh"],
|
68
|
+
script: ["Hans"],
|
69
|
+
structuredidentifier: fetch_structuredidentifier(doc),
|
70
|
+
}
|
71
|
+
end
|
72
|
+
# rubocop:enable Metrics/MethodLength
|
73
|
+
|
74
|
+
def get_committee(doc)
|
75
|
+
{
|
76
|
+
name: doc.xpath('//td[.="团体名称"]/following-sibling::td[1]').text,
|
77
|
+
type: "technical",
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
def get_titles(doc)
|
82
|
+
xpath = '//td[contains(.,"中文标题")]/following-sibling::td[1]'
|
83
|
+
titles = [{ title_main: doc.xpath(xpath).text,
|
84
|
+
title_intro: nil, language: "zh", script: "Hans" }]
|
85
|
+
xpath = '//td[contains(.,"英文标题")]/following-sibling::td[1]'
|
86
|
+
title_main = doc.xpath(xpath).text
|
87
|
+
unless title_main.empty?
|
88
|
+
titles << { title_main: title_main, title_intro: nil, language: "en",
|
89
|
+
script: "Latn" }
|
90
|
+
end
|
91
|
+
titles
|
92
|
+
end
|
93
|
+
|
94
|
+
def gbtype
|
95
|
+
{ scope: "social-group", prefix: "T", mandate: "mandatory" }
|
96
|
+
end
|
97
|
+
|
98
|
+
# def get_group_code(ref)
|
99
|
+
# ref.match(%r{(?<=\/)[^\s]})
|
100
|
+
# end
|
101
|
+
|
102
|
+
def get_ccs(doc)
|
103
|
+
[doc.xpath('//td[contains(.,"中国标准分类号")]/following-sibling::td[1]')
|
104
|
+
.text.gsub(/[\r\n]/, "").strip.match(/^[^\s]+/).to_s]
|
105
|
+
end
|
106
|
+
|
107
|
+
def get_ics(doc)
|
108
|
+
xpath = '//td[contains(.,"国际标准分类号")]/following-sibling::td[1]/span'
|
109
|
+
ics = doc.xpath(xpath).text.match(/^[^\s]+/).to_s
|
110
|
+
field, group, subgroup = ics.split "."
|
111
|
+
[{ field: field, group: group.ljust(3, "0"), subgroup: subgroup }]
|
112
|
+
end
|
113
|
+
|
114
|
+
def get_dates(doc)
|
115
|
+
d = doc.xpath('//td[contains(.,"发布日期")]/following-sibling::td[1]/span')
|
116
|
+
.text.match(/(?<y>\d{4})[^\d]+(?<m>\d{2})[^\d]+(?<d>\d{2})/)
|
117
|
+
[{ type: "published", on: "#{d[:y]}-#{d[:m]}-#{d[:d]}" }]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
module RelatonGb
|
4
|
+
class XMLParser < RelatonIsoBib::XMLParser
|
5
|
+
class << self
|
6
|
+
def from_xml(xml)
|
7
|
+
doc = Nokogiri::XML(xml)
|
8
|
+
gbitem = doc.at "/bibitem|/bibdata"
|
9
|
+
GbBibliographicItem.new item_data(gbitem)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def item_data(gbitem)
|
15
|
+
data = super
|
16
|
+
data[:committee] = fetch_committee gbitem
|
17
|
+
data[:gbtype] = fetch_gbtype gbitem
|
18
|
+
data[:ccs] = fetch_ccs gbitem
|
19
|
+
data[:plannumber] = gbitem.at("./plannumber")&.text
|
20
|
+
data
|
21
|
+
end
|
22
|
+
|
23
|
+
# Overrade get_id from RelatonIsoBib::XMLParser
|
24
|
+
# def get_id(did)
|
25
|
+
# did.text.match(/^(?<project>.*?\d+)(?<hyphen>-)?(?(<hyphen>)(?<year>\d*))/)
|
26
|
+
# end
|
27
|
+
|
28
|
+
def fetch_committee(doc)
|
29
|
+
committee = doc.at "./ext/gbcommittee"
|
30
|
+
return nil unless committee
|
31
|
+
|
32
|
+
{ type: committee[:type], name: committee.text }
|
33
|
+
end
|
34
|
+
|
35
|
+
def fetch_ccs(doc)
|
36
|
+
doc.xpath("./ext/ccs/code").map &:text
|
37
|
+
end
|
38
|
+
|
39
|
+
def fetch_gbtype(doc)
|
40
|
+
gbtype = doc.at "./ext/gbtype"
|
41
|
+
{
|
42
|
+
scope: gbtype&.at("gbscope")&.text,
|
43
|
+
prefix: gbtype&.at("gbprefix")&.text,
|
44
|
+
mandate: gbtype&.at("gbmandate")&.text,
|
45
|
+
}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|