relaton-gb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.hound.yml +3 -0
- data/.rspec +3 -0
- data/.rubocop.yml +10 -0
- data/.travis.yml +17 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +84 -0
- data/LICENSE.txt +25 -0
- data/README.adoc +202 -0
- data/Rakefile +6 -0
- data/appveyor.yml +35 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/relaton/processor.rb +25 -0
- data/lib/relaton_gb.rb +7 -0
- data/lib/relaton_gb/gb_bibliographic_item.rb +111 -0
- data/lib/relaton_gb/gb_bibliography.rb +137 -0
- data/lib/relaton_gb/gb_scrapper.rb +51 -0
- data/lib/relaton_gb/gb_standard_type.rb +30 -0
- data/lib/relaton_gb/gb_technical_committee.rb +23 -0
- data/lib/relaton_gb/hit.rb +60 -0
- data/lib/relaton_gb/hit_collection.rb +45 -0
- data/lib/relaton_gb/scrapper.rb +197 -0
- data/lib/relaton_gb/sec_scrapper.rb +57 -0
- data/lib/relaton_gb/t_scrapper.rb +121 -0
- data/lib/relaton_gb/version.rb +5 -0
- data/lib/relaton_gb/xml_parser.rb +49 -0
- data/lib/relaton_gb/yaml/prefixes.yaml +197 -0
- data/relaton_gb.gemspec +39 -0
- metadata +229 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RelatonGb
|
4
|
+
# Page of hit collection
|
5
|
+
class HitCollection < Array
|
6
|
+
# @return [TrueClass, FalseClass]
|
7
|
+
attr_reader :fetched
|
8
|
+
|
9
|
+
# @return [Isobib::HitPages]
|
10
|
+
attr_reader :hit_pages
|
11
|
+
|
12
|
+
# @return [RelatonGb::GbScrapper, RelatonGb::SecScrapper, RelatonGb::TScrapper]
|
13
|
+
attr_reader :scrapper
|
14
|
+
|
15
|
+
# @param hits [Array<Hash>]
|
16
|
+
# @param hit_pages [Integer]
|
17
|
+
# @param scrapper [RelatonGb::GbScrapper, RelatonGb::SecScrapper, RelatonGb::TScrapper]
|
18
|
+
def initialize(hits = [], hit_pages = nil)
|
19
|
+
concat hits
|
20
|
+
@fetched = false
|
21
|
+
@hit_pages = hit_pages
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [RelatonGb::HitCollection]
|
25
|
+
# def fetch
|
26
|
+
# workers = RelatonBib::WorkersPool.new 4
|
27
|
+
# workers.worker(&:fetch)
|
28
|
+
# each do |hit|
|
29
|
+
# workers << hit
|
30
|
+
# end
|
31
|
+
# workers.end
|
32
|
+
# workers.result
|
33
|
+
# @fetched = true
|
34
|
+
# self
|
35
|
+
# end
|
36
|
+
|
37
|
+
def to_s
|
38
|
+
inspect
|
39
|
+
end
|
40
|
+
|
41
|
+
def inspect
|
42
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "yaml"
|
5
|
+
require "gb_agencies"
|
6
|
+
|
7
|
+
module RelatonGb
|
8
|
+
# Common scrapping methods.
|
9
|
+
module Scrapper
|
10
|
+
@prefixes = nil
|
11
|
+
|
12
|
+
# rubocop:disable Metrics/MethodLength
|
13
|
+
# @param doc [Nokogiri::HTML::Document]
|
14
|
+
# @param src [String] url of scrapped page
|
15
|
+
# @return [Hash]
|
16
|
+
def scrapped_data(doc, src:)
|
17
|
+
{
|
18
|
+
committee: get_committee(doc),
|
19
|
+
docid: get_docid(doc),
|
20
|
+
titles: get_titles(doc),
|
21
|
+
contributors: get_contributors(doc),
|
22
|
+
type: get_type(doc),
|
23
|
+
docstatus: get_status(doc),
|
24
|
+
gbtype: get_gbtype(doc),
|
25
|
+
ccs: get_ccs(doc),
|
26
|
+
ics: get_ics(doc),
|
27
|
+
link: [{ type: "src", content: src }],
|
28
|
+
dates: get_dates(doc),
|
29
|
+
language: ["zh"],
|
30
|
+
script: ["Hans"],
|
31
|
+
structuredidentifier: fetch_structuredidentifier(doc),
|
32
|
+
}
|
33
|
+
end
|
34
|
+
# rubocop:enable Metrics/MethodLength
|
35
|
+
|
36
|
+
# @param doc [Nokogiri::HTML::Document]
|
37
|
+
# @param xpt [String]
|
38
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
39
|
+
def get_docid(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
40
|
+
item_ref = doc.at xpt
|
41
|
+
return [] unless item_ref
|
42
|
+
|
43
|
+
[RelatonBib::DocumentIdentifier.new(id: item_ref.text, type: "Chinese Standard")]
|
44
|
+
end
|
45
|
+
|
46
|
+
# @param doc [Nokogiri::HTML::Document]
|
47
|
+
# @param xpt [String]
|
48
|
+
# @return [RelatonIsoBib::StructuredIdentifier]
|
49
|
+
def fetch_structuredidentifier(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
50
|
+
item_ref = doc.at xpt
|
51
|
+
unless item_ref
|
52
|
+
return RelatonIsoBib::StructuredIdentifier.new(
|
53
|
+
project_number: "?", part_number: "?", prefix: nil, id: "?",
|
54
|
+
type: "Chinese Standard"
|
55
|
+
)
|
56
|
+
end
|
57
|
+
|
58
|
+
m = item_ref.text.match(/^([^–—.-]*\d+)\.?((?<=\.)\d+|)/)
|
59
|
+
# prefix = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
|
60
|
+
RelatonIsoBib::StructuredIdentifier.new(
|
61
|
+
project_number: m[1], part_number: m[2], prefix: nil,
|
62
|
+
id: item_ref.text, type: "Chinese Standard"
|
63
|
+
)
|
64
|
+
end
|
65
|
+
|
66
|
+
def get_contributors(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
67
|
+
gb_en = GbAgencies::Agencies.new("en", {}, "")
|
68
|
+
gb_zh = GbAgencies::Agencies.new("zh", {}, "")
|
69
|
+
name = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
|
70
|
+
name.sub!(%r{/[TZ]$}, "") unless name =~ /^GB/
|
71
|
+
gbtype = get_gbtype(doc)
|
72
|
+
entity = RelatonBib::Organization.new name: [
|
73
|
+
{ language: "en", content: gb_en.standard_agency1(gbtype[:scope], name, gbtype[:mandate]) },
|
74
|
+
{ language: "zh", content: gb_zh.standard_agency1(gbtype[:scope], name, gbtype[:mandate]) },
|
75
|
+
]
|
76
|
+
[{ entity: entity, roles: ["publisher"] }]
|
77
|
+
end
|
78
|
+
|
79
|
+
# @param doc [Nokogiri::HTML::Document]
|
80
|
+
# @return [Array<Hash>]
|
81
|
+
# * :title_intro [String]
|
82
|
+
# * :title_main [String]
|
83
|
+
# * :language [String]
|
84
|
+
# * :script [String]
|
85
|
+
def get_titles(doc)
|
86
|
+
titles = [{ title_main: doc.css("div.page-header h4").text, title_intro: nil,
|
87
|
+
language: "zh", script: "Hans" }]
|
88
|
+
title_main = doc.css("div.page-header h5").text
|
89
|
+
unless title_main.empty?
|
90
|
+
titles << { title_main: title_main, title_intro: nil, language: "en", script: "Latn" }
|
91
|
+
end
|
92
|
+
titles
|
93
|
+
end
|
94
|
+
|
95
|
+
def get_type(_doc)
|
96
|
+
"international-standard"
|
97
|
+
end
|
98
|
+
|
99
|
+
# @param doc [Nokogiri::HTML::Document]
|
100
|
+
# @param xpt [String]
|
101
|
+
# @return [RelatonBib::DocumentStatus]
|
102
|
+
def get_status(doc, xpt = ".s-status.label:nth-child(3)")
|
103
|
+
case doc.at(xpt).text.gsub(/\s/, "")
|
104
|
+
when "即将实施"
|
105
|
+
stage = "published"
|
106
|
+
when "现行"
|
107
|
+
stage = "activated"
|
108
|
+
when "废止"
|
109
|
+
stage = "obsoleted"
|
110
|
+
end
|
111
|
+
RelatonBib::DocumentStatus.new stage: stage
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
# @param doc [Nokogiri::HTML::Document]
|
117
|
+
# @return [Hash]
|
118
|
+
# * :scope [String]
|
119
|
+
# * :prefix [String]
|
120
|
+
# * :mandate [String]
|
121
|
+
def get_gbtype(doc)
|
122
|
+
ref = get_ref(doc)
|
123
|
+
{ scope: get_scope(doc), prefix: get_prefix(ref)["prefix"],
|
124
|
+
mandate: get_mandate(ref) }
|
125
|
+
end
|
126
|
+
|
127
|
+
# @param doc [Nokogiri::HTML::Document]
|
128
|
+
# @return [String]
|
129
|
+
def get_ref(doc)
|
130
|
+
doc.xpath('//dt[text()="标准号"]/following-sibling::dd[1]').text
|
131
|
+
end
|
132
|
+
|
133
|
+
# @param doc [Nokogiri::HTML::Document]
|
134
|
+
# @return [Array<String>]
|
135
|
+
def get_ccs(doc)
|
136
|
+
[doc&.xpath('//dt[text()="中国标准分类号"]/following-sibling::dd[1]')&.text]
|
137
|
+
end
|
138
|
+
|
139
|
+
# @param doc [Nokogiri::HTML::Document]
|
140
|
+
# @return [Array<Hash>]
|
141
|
+
# * :field [String]
|
142
|
+
# * :group [String]
|
143
|
+
# * :subgroup [String]
|
144
|
+
def get_ics(doc)
|
145
|
+
ics = doc.xpath('//dt[(.="国际标准分类号")]/following-sibling::dd[1]/span')
|
146
|
+
return [] if ics.empty?
|
147
|
+
|
148
|
+
field, group, subgroup = ics.text.split "."
|
149
|
+
[{ field: field, group: group.ljust(3, "0"), subgroup: subgroup }]
|
150
|
+
end
|
151
|
+
|
152
|
+
# @param doc [Nokogiri::HTML::Document]
|
153
|
+
# @return [String]
|
154
|
+
def get_scope(doc)
|
155
|
+
scope = doc.at(".s-status.label-info").text
|
156
|
+
if scope == "国家标准"
|
157
|
+
"national"
|
158
|
+
elsif scope =~ /^行业标准/
|
159
|
+
"sector"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# @param ref [String]
|
164
|
+
# @return [String]
|
165
|
+
def get_prefix(ref)
|
166
|
+
pref = ref.match(/^[^\s]+/).to_s.split("/").first
|
167
|
+
prefix pref
|
168
|
+
end
|
169
|
+
|
170
|
+
# @param pref [String]
|
171
|
+
# @return [Hash{String=>String}]
|
172
|
+
def prefix(pref)
|
173
|
+
file_path = File.join(__dir__, "yaml/prefixes.yaml")
|
174
|
+
@prefixes ||= YAML.load_file(file_path)
|
175
|
+
@prefixes[pref]
|
176
|
+
end
|
177
|
+
|
178
|
+
# @param ref [String]
|
179
|
+
# @return [String]
|
180
|
+
def get_mandate(ref)
|
181
|
+
case ref.match(%r{(?<=\/)[^\s]+}).to_s
|
182
|
+
when "T" then "recommended"
|
183
|
+
when "Z" then "guidelines"
|
184
|
+
else "mandatory"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# @param doc [Nokogiri::HTML::Document]
|
189
|
+
# @return [Array<Hash>]
|
190
|
+
# * :type [String] type of date
|
191
|
+
# * :on [String] date
|
192
|
+
def get_dates(doc)
|
193
|
+
date = doc.xpath('//dt[.="发布日期"]/following-sibling::dd[1]').text
|
194
|
+
[{ type: "published", on: date }]
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "net/http"
|
5
|
+
require "json"
|
6
|
+
require "nokogiri"
|
7
|
+
require "relaton_gb/scrapper"
|
8
|
+
require "relaton_gb/gb_bibliographic_item"
|
9
|
+
require "relaton_gb/hit_collection"
|
10
|
+
require "relaton_gb/hit"
|
11
|
+
|
12
|
+
module RelatonGb
|
13
|
+
# Sector standard scrapper
|
14
|
+
module SecScrapper
|
15
|
+
extend Scrapper
|
16
|
+
|
17
|
+
class << self
|
18
|
+
# @param text [String] code of standard for serarch
|
19
|
+
# @return [RelatonGb::HitCollection]
|
20
|
+
def scrape_page(text)
|
21
|
+
uri = URI "http://www.std.gov.cn/hb/search/hbPage?searchText=#{text}"
|
22
|
+
res = JSON.parse Net::HTTP.get(uri)
|
23
|
+
hits = res["rows"].map do |r|
|
24
|
+
Hit.new pid: r["id"], title: r["STD_CODE"], scrapper: self
|
25
|
+
end
|
26
|
+
HitCollection.new hits
|
27
|
+
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
28
|
+
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
|
29
|
+
warn "Cannot access #{uri}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param pid [String] standard's page id
|
33
|
+
# @return [RelatonGb::GbBibliographicItem]
|
34
|
+
def scrape_doc(pid)
|
35
|
+
src = "http://www.std.gov.cn/hb/search/stdHBDetailed?id=#{pid}"
|
36
|
+
page_uri = URI src
|
37
|
+
doc = Nokogiri::HTML Net::HTTP.get(page_uri)
|
38
|
+
GbBibliographicItem.new scrapped_data(doc, src: src)
|
39
|
+
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
40
|
+
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
|
41
|
+
warn "Cannot access #{src}"
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
# @param doc [Nokogiri::HTML::Document]
|
47
|
+
# @return [Hash]
|
48
|
+
# * :type [String]
|
49
|
+
# * :name [String]
|
50
|
+
def get_committee(doc)
|
51
|
+
ref = get_ref(doc)
|
52
|
+
name = get_prefix(ref)["administration"]
|
53
|
+
{ type: "technical", name: name }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "open-uri"
|
5
|
+
require "nokogiri"
|
6
|
+
require "relaton_gb/scrapper"
|
7
|
+
require "relaton_gb/gb_bibliographic_item"
|
8
|
+
require "relaton_gb/hit_collection"
|
9
|
+
require "relaton_gb/hit"
|
10
|
+
|
11
|
+
module RelatonGb
|
12
|
+
# Social standard scarpper.
|
13
|
+
module TScrapper
|
14
|
+
extend Scrapper
|
15
|
+
|
16
|
+
class << self
|
17
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
18
|
+
# @param text [String]
|
19
|
+
# @return [RelatonGb::HitCollection]
|
20
|
+
def scrape_page(text)
|
21
|
+
search_html = OpenURI.open_uri(
|
22
|
+
"http://www.ttbz.org.cn/Home/Standard?searchType=2&key=" +
|
23
|
+
CGI.escape(text.tr("-", [8212].pack("U"))),
|
24
|
+
)
|
25
|
+
header = Nokogiri::HTML search_html
|
26
|
+
xpath = '//table[contains(@class, "standard_list_table")]/tr/td/a'
|
27
|
+
t_xpath = "../preceding-sibling::td[3]"
|
28
|
+
hits = header.xpath(xpath).map do |h|
|
29
|
+
title = h.at(t_xpath).text.gsub(/â\u0080\u0094/, "-")
|
30
|
+
Hit.new pid: h[:href].sub(%r{\/$}, ""), title: title, scrapper: self
|
31
|
+
end
|
32
|
+
HitCollection.new hits
|
33
|
+
rescue OpenURI::HTTPError, SocketError
|
34
|
+
warn "Cannot access http://www.ttbz.org.cn/Home/Standard"
|
35
|
+
end
|
36
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
37
|
+
|
38
|
+
# @param pid [String] standard's page path
|
39
|
+
# @return [RelatonGb::GbBibliographicItem]
|
40
|
+
def scrape_doc(pid)
|
41
|
+
src = "http://www.ttbz.org.cn#{pid}"
|
42
|
+
doc = Nokogiri::HTML OpenURI.open_uri(src), nil, Encoding::UTF_8.to_s
|
43
|
+
GbBibliographicItem.new scrapped_data(doc, src: src)
|
44
|
+
rescue OpenURI::HTTPError, SocketError
|
45
|
+
warn "Cannot access #{src}"
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
# rubocop:disable Metrics/MethodLength
|
51
|
+
# @param doc [Nokogiri::HTML::Document]
|
52
|
+
# @return [Hash]
|
53
|
+
def scrapped_data(doc, src:)
|
54
|
+
docid_xpt = '//td[contains(.,"标准编号")]/following-sibling::td[1]'
|
55
|
+
status_xpt = '//td[contains(.,"标准状态")]/following-sibling::td[1]/span'
|
56
|
+
{
|
57
|
+
committee: get_committee(doc),
|
58
|
+
docid: get_docid(doc, docid_xpt),
|
59
|
+
titles: get_titles(doc),
|
60
|
+
type: "international-standard",
|
61
|
+
docstatus: get_status(doc, status_xpt),
|
62
|
+
gbtype: gbtype,
|
63
|
+
ccs: get_ccs(doc),
|
64
|
+
ics: get_ics(doc),
|
65
|
+
link: [{ type: "src", content: src }],
|
66
|
+
dates: get_dates(doc),
|
67
|
+
language: ["zh"],
|
68
|
+
script: ["Hans"],
|
69
|
+
structuredidentifier: fetch_structuredidentifier(doc),
|
70
|
+
}
|
71
|
+
end
|
72
|
+
# rubocop:enable Metrics/MethodLength
|
73
|
+
|
74
|
+
def get_committee(doc)
|
75
|
+
{
|
76
|
+
name: doc.xpath('//td[.="团体名称"]/following-sibling::td[1]').text,
|
77
|
+
type: "technical",
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
def get_titles(doc)
|
82
|
+
xpath = '//td[contains(.,"中文标题")]/following-sibling::td[1]'
|
83
|
+
titles = [{ title_main: doc.xpath(xpath).text,
|
84
|
+
title_intro: nil, language: "zh", script: "Hans" }]
|
85
|
+
xpath = '//td[contains(.,"英文标题")]/following-sibling::td[1]'
|
86
|
+
title_main = doc.xpath(xpath).text
|
87
|
+
unless title_main.empty?
|
88
|
+
titles << { title_main: title_main, title_intro: nil, language: "en",
|
89
|
+
script: "Latn" }
|
90
|
+
end
|
91
|
+
titles
|
92
|
+
end
|
93
|
+
|
94
|
+
def gbtype
|
95
|
+
{ scope: "social-group", prefix: "T", mandate: "mandatory" }
|
96
|
+
end
|
97
|
+
|
98
|
+
# def get_group_code(ref)
|
99
|
+
# ref.match(%r{(?<=\/)[^\s]})
|
100
|
+
# end
|
101
|
+
|
102
|
+
def get_ccs(doc)
|
103
|
+
[doc.xpath('//td[contains(.,"中国标准分类号")]/following-sibling::td[1]')
|
104
|
+
.text.gsub(/[\r\n]/, "").strip.match(/^[^\s]+/).to_s]
|
105
|
+
end
|
106
|
+
|
107
|
+
def get_ics(doc)
|
108
|
+
xpath = '//td[contains(.,"国际标准分类号")]/following-sibling::td[1]/span'
|
109
|
+
ics = doc.xpath(xpath).text.match(/^[^\s]+/).to_s
|
110
|
+
field, group, subgroup = ics.split "."
|
111
|
+
[{ field: field, group: group.ljust(3, "0"), subgroup: subgroup }]
|
112
|
+
end
|
113
|
+
|
114
|
+
def get_dates(doc)
|
115
|
+
d = doc.xpath('//td[contains(.,"发布日期")]/following-sibling::td[1]/span')
|
116
|
+
.text.match(/(?<y>\d{4})[^\d]+(?<m>\d{2})[^\d]+(?<d>\d{2})/)
|
117
|
+
[{ type: "published", on: "#{d[:y]}-#{d[:m]}-#{d[:d]}" }]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
module RelatonGb
|
4
|
+
class XMLParser < RelatonIsoBib::XMLParser
|
5
|
+
class << self
|
6
|
+
def from_xml(xml)
|
7
|
+
doc = Nokogiri::XML(xml)
|
8
|
+
gbitem = doc.at "/bibitem|/bibdata"
|
9
|
+
GbBibliographicItem.new item_data(gbitem)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def item_data(gbitem)
|
15
|
+
data = super
|
16
|
+
data[:committee] = fetch_committee gbitem
|
17
|
+
data[:gbtype] = fetch_gbtype gbitem
|
18
|
+
data[:ccs] = fetch_ccs gbitem
|
19
|
+
data[:plannumber] = gbitem.at("./plannumber")&.text
|
20
|
+
data
|
21
|
+
end
|
22
|
+
|
23
|
+
# Overrade get_id from RelatonIsoBib::XMLParser
|
24
|
+
# def get_id(did)
|
25
|
+
# did.text.match(/^(?<project>.*?\d+)(?<hyphen>-)?(?(<hyphen>)(?<year>\d*))/)
|
26
|
+
# end
|
27
|
+
|
28
|
+
def fetch_committee(doc)
|
29
|
+
committee = doc.at "./ext/gbcommittee"
|
30
|
+
return nil unless committee
|
31
|
+
|
32
|
+
{ type: committee[:type], name: committee.text }
|
33
|
+
end
|
34
|
+
|
35
|
+
def fetch_ccs(doc)
|
36
|
+
doc.xpath("./ext/ccs/code").map &:text
|
37
|
+
end
|
38
|
+
|
39
|
+
def fetch_gbtype(doc)
|
40
|
+
gbtype = doc.at "./ext/gbtype"
|
41
|
+
{
|
42
|
+
scope: gbtype&.at("gbscope")&.text,
|
43
|
+
prefix: gbtype&.at("gbprefix")&.text,
|
44
|
+
mandate: gbtype&.at("gbmandate")&.text,
|
45
|
+
}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|