relaton-un 0.2.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/grammars/biblio.rng +145 -50
- data/grammars/isodoc.rng +481 -5
- data/grammars/un.rng +115 -1
- data/lib/relaton_un.rb +2 -0
- data/lib/relaton_un/editorialgroup.rb +25 -0
- data/lib/relaton_un/hash_converter.rb +28 -0
- data/lib/relaton_un/hit.rb +109 -15
- data/lib/relaton_un/hit_collection.rb +124 -47
- data/lib/relaton_un/processor.rb +1 -1
- data/lib/relaton_un/session.rb +65 -0
- data/lib/relaton_un/un_bibliographic_item.rb +40 -0
- data/lib/relaton_un/un_bibliography.rb +8 -5
- data/lib/relaton_un/version.rb +1 -1
- data/lib/relaton_un/xml_parser.rb +46 -9
- data/relaton_un.gemspec +13 -11
- metadata +7 -4
data/grammars/un.rng
CHANGED
@@ -107,7 +107,9 @@
|
|
107
107
|
<zeroOrMore>
|
108
108
|
<ref name="submissionlanguage"/>
|
109
109
|
</zeroOrMore>
|
110
|
-
<
|
110
|
+
<optional>
|
111
|
+
<ref name="editorialgroup"/>
|
112
|
+
</optional>
|
111
113
|
<zeroOrMore>
|
112
114
|
<ref name="ics"/>
|
113
115
|
</zeroOrMore>
|
@@ -117,6 +119,9 @@
|
|
117
119
|
<optional>
|
118
120
|
<ref name="session"/>
|
119
121
|
</optional>
|
122
|
+
<optional>
|
123
|
+
<ref name="job_number"/>
|
124
|
+
</optional>
|
120
125
|
</define>
|
121
126
|
<define name="preface">
|
122
127
|
<element name="preface">
|
@@ -136,6 +141,109 @@
|
|
136
141
|
<ref name="Basic-Section"/>
|
137
142
|
</element>
|
138
143
|
</define>
|
144
|
+
<define name="Clause-Section">
|
145
|
+
<optional>
|
146
|
+
<attribute name="id">
|
147
|
+
<data type="ID"/>
|
148
|
+
</attribute>
|
149
|
+
</optional>
|
150
|
+
<optional>
|
151
|
+
<attribute name="language"/>
|
152
|
+
</optional>
|
153
|
+
<optional>
|
154
|
+
<attribute name="script"/>
|
155
|
+
</optional>
|
156
|
+
<optional>
|
157
|
+
<attribute name="inline-header">
|
158
|
+
<data type="boolean"/>
|
159
|
+
</attribute>
|
160
|
+
</optional>
|
161
|
+
<optional>
|
162
|
+
<attribute name="obligation">
|
163
|
+
<choice>
|
164
|
+
<value>normative</value>
|
165
|
+
<value>informative</value>
|
166
|
+
</choice>
|
167
|
+
</attribute>
|
168
|
+
</optional>
|
169
|
+
<optional>
|
170
|
+
<attribute name="unnumbered">
|
171
|
+
<data type="boolean"/>
|
172
|
+
</attribute>
|
173
|
+
</optional>
|
174
|
+
<optional>
|
175
|
+
<ref name="section-title"/>
|
176
|
+
</optional>
|
177
|
+
<group>
|
178
|
+
<group>
|
179
|
+
<zeroOrMore>
|
180
|
+
<ref name="BasicBlock"/>
|
181
|
+
</zeroOrMore>
|
182
|
+
<zeroOrMore>
|
183
|
+
<ref name="note"/>
|
184
|
+
</zeroOrMore>
|
185
|
+
</group>
|
186
|
+
<zeroOrMore>
|
187
|
+
<choice>
|
188
|
+
<ref name="clause-subsection"/>
|
189
|
+
<ref name="terms"/>
|
190
|
+
<ref name="definitions"/>
|
191
|
+
</choice>
|
192
|
+
</zeroOrMore>
|
193
|
+
</group>
|
194
|
+
</define>
|
195
|
+
<define name="Annex-Section">
|
196
|
+
<optional>
|
197
|
+
<attribute name="id">
|
198
|
+
<data type="ID"/>
|
199
|
+
</attribute>
|
200
|
+
</optional>
|
201
|
+
<optional>
|
202
|
+
<attribute name="language"/>
|
203
|
+
</optional>
|
204
|
+
<optional>
|
205
|
+
<attribute name="script"/>
|
206
|
+
</optional>
|
207
|
+
<optional>
|
208
|
+
<attribute name="inline-header">
|
209
|
+
<data type="boolean"/>
|
210
|
+
</attribute>
|
211
|
+
</optional>
|
212
|
+
<optional>
|
213
|
+
<attribute name="obligation">
|
214
|
+
<choice>
|
215
|
+
<value>normative</value>
|
216
|
+
<value>informative</value>
|
217
|
+
</choice>
|
218
|
+
</attribute>
|
219
|
+
</optional>
|
220
|
+
<optional>
|
221
|
+
<attribute name="unnumbered">
|
222
|
+
<data type="boolean"/>
|
223
|
+
</attribute>
|
224
|
+
</optional>
|
225
|
+
<optional>
|
226
|
+
<ref name="section-title"/>
|
227
|
+
</optional>
|
228
|
+
<group>
|
229
|
+
<group>
|
230
|
+
<zeroOrMore>
|
231
|
+
<ref name="BasicBlock"/>
|
232
|
+
</zeroOrMore>
|
233
|
+
<zeroOrMore>
|
234
|
+
<ref name="note"/>
|
235
|
+
</zeroOrMore>
|
236
|
+
</group>
|
237
|
+
<zeroOrMore>
|
238
|
+
<choice>
|
239
|
+
<ref name="annex-subsection"/>
|
240
|
+
<ref name="terms"/>
|
241
|
+
<ref name="definitions"/>
|
242
|
+
<ref name="references"/>
|
243
|
+
</choice>
|
244
|
+
</zeroOrMore>
|
245
|
+
</group>
|
246
|
+
</define>
|
139
247
|
</include>
|
140
248
|
<define name="session">
|
141
249
|
<element name="session">
|
@@ -221,9 +329,15 @@
|
|
221
329
|
<value>general</value>
|
222
330
|
<value>limited</value>
|
223
331
|
<value>restricted</value>
|
332
|
+
<value>provisional</value>
|
224
333
|
</choice>
|
225
334
|
</element>
|
226
335
|
</define>
|
336
|
+
<define name="job_number">
|
337
|
+
<element name="job_number">
|
338
|
+
<text/>
|
339
|
+
</element>
|
340
|
+
</define>
|
227
341
|
<define name="un-standard">
|
228
342
|
<element name="un-standard">
|
229
343
|
<ref name="bibdata"/>
|
data/lib/relaton_un.rb
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
module RelatonUn
|
2
|
+
class EditorialGroup
|
3
|
+
include RelatonBib
|
4
|
+
|
5
|
+
# @return [Array<String>]
|
6
|
+
attr_reader :committee
|
7
|
+
|
8
|
+
# @param committee [Array<String>]
|
9
|
+
def initialize(committee)
|
10
|
+
@committee = committee
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param builder [Nokogiri::XML::Builder]
|
14
|
+
def to_xml(builder)
|
15
|
+
builder.editorialgroup do |b|
|
16
|
+
committee.each { |c| b.committee c }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Array<Hash>, Hash]
|
21
|
+
def to_hash
|
22
|
+
single_element_array(committee.map { |c| { "committee" => c } })
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -1,5 +1,33 @@
|
|
1
1
|
module RelatonUn
|
2
2
|
class HashConverter < RelatonBib::HashConverter
|
3
|
+
class << self
|
4
|
+
# @override RelatonIsoBib::HashConverter.hash_to_bib
|
5
|
+
# @param args [Hash]
|
6
|
+
# @param nested [TrueClass, FalseClass]
|
7
|
+
# @return [Hash]
|
8
|
+
def hash_to_bib(args, nested = false)
|
9
|
+
ret = super
|
10
|
+
return if ret.nil?
|
3
11
|
|
12
|
+
session_hash_to_bib ret
|
13
|
+
ret
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
# @param ret [Hash]
|
19
|
+
def session_hash_to_bib(ret)
|
20
|
+
ret[:session] = Session.new(ret[:session]) if ret[:session]
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param ret [Hash]
|
24
|
+
def editorialgroup_hash_to_bib(ret)
|
25
|
+
eg = ret[:editorialgroup]
|
26
|
+
return unless eg
|
27
|
+
|
28
|
+
committee = eg.map { |e| e[:committee] }
|
29
|
+
ret[:editorialgroup] = EditorialGroup.new array(committee)
|
30
|
+
end
|
31
|
+
end
|
4
32
|
end
|
5
33
|
end
|
data/lib/relaton_un/hit.rb
CHANGED
@@ -3,6 +3,51 @@
|
|
3
3
|
module RelatonUn
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
|
+
# rubocop:disable Layout/LineLength
|
7
|
+
|
8
|
+
# There is distribution PRO (A/47/PV.102/CORR.1, A/47/PV.54)
|
9
|
+
BODY = {
|
10
|
+
"A" => "General Assembly",
|
11
|
+
"E" => "Economic and Social Council",
|
12
|
+
"S" => "Security Council",
|
13
|
+
"T" => "Trusteeship Council",
|
14
|
+
"ACC" => "Administrative Committee on Coordination",
|
15
|
+
"AT" => "United Nations Administrative Tribunal",
|
16
|
+
"CAT" => "Committee against Torture",
|
17
|
+
"CCPR" => "Human Rights Committee",
|
18
|
+
"CD" => "Conference on Disarmament",
|
19
|
+
"CEDAW" => "Committee on the Elimination of All Forms of Discrimination against Women",
|
20
|
+
"CERD" => "Committee on the Elimination of Racial Discrimination",
|
21
|
+
"CRC" => "Committee on the Rights of the Child",
|
22
|
+
"DC" => "Disarmament Commission",
|
23
|
+
"DP" => "United Nations Development Programme",
|
24
|
+
"HS" => "United Nations Centre for Human Settlements (HABITAT)",
|
25
|
+
"TD" => "United Nations Conference on Trade and Development",
|
26
|
+
"UNEP" => "United Nations Environment Programme",
|
27
|
+
"TRADE" => "Committee on Trade",
|
28
|
+
"CEFACT" => "Centre for Trade Facilitation and Electronic Business",
|
29
|
+
"C.1" => "Disarmament and International Security Committee",
|
30
|
+
"C.2" => "Economic and Financial Committee",
|
31
|
+
"C.3" => "Social, Humanitarian & Cultural Issues",
|
32
|
+
"C.4" => "Special Political and Decolonization Committee",
|
33
|
+
"C.5" => "Administrative and Budgetary Committee",
|
34
|
+
"C.6" => "Sixth Committee (Legal)",
|
35
|
+
"PC" => "Preparatory Committee",
|
36
|
+
"AEC" => "Atomic Energy Commission",
|
37
|
+
"AGRI" => "Committee on Agriculture",
|
38
|
+
"AMCEN" => "African Ministerial Conference on the Environment",
|
39
|
+
"AMCOW" => "African Ministers’ Council on Water",
|
40
|
+
"ECA" => "Economic Commission for Africa",
|
41
|
+
"ESCAP" => "Economic and Social Commission for Asia and Pacific",
|
42
|
+
"ECE" => "Economic Commission for Europe",
|
43
|
+
"ECWA" => "Economic Commission for Western Asia",
|
44
|
+
"UNFF" => "United Nations Forum on Forests",
|
45
|
+
"ENERGY" => "Committee on Sustainable Energy",
|
46
|
+
"FAO" => "Food and Agriculture Organization",
|
47
|
+
"UNCTAD" => "United Nations Conference on Trade and Development",
|
48
|
+
}.freeze
|
49
|
+
# rubocop:enable Layout/LineLength
|
50
|
+
|
6
51
|
# Parse page.
|
7
52
|
# @return [RelatonUn::UnBibliographicItem]
|
8
53
|
def fetch
|
@@ -11,48 +56,97 @@ module RelatonUn
|
|
11
56
|
|
12
57
|
private
|
13
58
|
|
59
|
+
# rubocop:disable Metrics/MethodLength
|
60
|
+
|
61
|
+
# @return [RelatonUn::UnBibliographicItem]
|
14
62
|
def un_bib_item
|
15
63
|
UnBibliographicItem.new(
|
16
64
|
type: "standard",
|
17
65
|
fetched: Date.today.to_s,
|
18
|
-
docid:
|
66
|
+
docid: fetch_docid,
|
19
67
|
docnumber: hit[:ref],
|
20
68
|
language: ["en"],
|
21
69
|
script: ["Latn"],
|
22
|
-
title:
|
23
|
-
date:
|
24
|
-
link:
|
25
|
-
keyword:
|
70
|
+
title: fetch_title,
|
71
|
+
date: fetch_date,
|
72
|
+
link: fetch_link,
|
73
|
+
keyword: fetch_keyword,
|
74
|
+
session: fetch_session,
|
75
|
+
distribution: fetch_distribution,
|
76
|
+
editorialgroup: fetch_editorialgroup,
|
77
|
+
classification: fetch_classification,
|
26
78
|
)
|
27
79
|
end
|
80
|
+
# rubocop:enable Metrics/MethodLength
|
28
81
|
|
29
82
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
30
|
-
def
|
31
|
-
|
83
|
+
def fetch_docid
|
84
|
+
hit[:symbol].map do |s|
|
85
|
+
RelatonBib::DocumentIdentifier.new(id: s, type: "UN")
|
86
|
+
end
|
32
87
|
end
|
33
88
|
|
34
89
|
# @return [Array<RelatonBib::TypedTitleString>]
|
35
|
-
def
|
36
|
-
fs = RelatonBib::FormattedString.new(
|
37
|
-
[
|
90
|
+
def fetch_title
|
91
|
+
# fs = RelatonBib::FormattedString.new(
|
92
|
+
# content: hit[:title], language: "en", script: "Latn",
|
93
|
+
# )
|
94
|
+
# [RelatonBib::TypedTitleString.new(type: "main", title: fs)]
|
95
|
+
# [{ title_main: hit[:title], language: "en", script: "Latn" }]
|
96
|
+
RelatonBib::TypedTitleString.from_string hit[:title], "en", "Latn"
|
38
97
|
end
|
39
98
|
|
40
99
|
# @return [Array<RelatonBib::BibliographicDate>]
|
41
|
-
def
|
100
|
+
def fetch_date
|
42
101
|
d = []
|
43
|
-
d <<
|
44
|
-
d <<
|
102
|
+
d << bibdate("published", hit[:date_pub]) if hit[:date_pub]
|
103
|
+
d << bibdate("issued", hit[:date_rel]) if hit[:date_rel]
|
45
104
|
d
|
46
105
|
end
|
47
106
|
|
107
|
+
# @param type [String]
|
108
|
+
# @param on [String]
|
109
|
+
# @return [RelatonBib::BibliographicDate]
|
110
|
+
def bibdate(type, on)
|
111
|
+
RelatonBib::BibliographicDate.new type: type, on: on
|
112
|
+
end
|
113
|
+
|
48
114
|
# @return [Array<RelatonBib::TypedUri>]
|
49
|
-
def
|
115
|
+
def fetch_link
|
50
116
|
hit[:link].map { |l| RelatonBib::TypedUri.new l }
|
51
117
|
end
|
52
118
|
|
53
119
|
# @return [Array<String>]
|
54
|
-
def
|
120
|
+
def fetch_keyword
|
55
121
|
hit[:keyword].split(", ")
|
56
122
|
end
|
123
|
+
|
124
|
+
# @return [RelatonUn::Session]
|
125
|
+
def fetch_session
|
126
|
+
Session.new(session_number: hit[:session], agenda_id: hit[:agenda])
|
127
|
+
end
|
128
|
+
|
129
|
+
# @return [String]
|
130
|
+
def fetch_distribution
|
131
|
+
UnBibliographicItem::DISTRIBUTIONS[hit[:distribution]]
|
132
|
+
end
|
133
|
+
|
134
|
+
# @return [RelatonUn::EditorialGroup, NilClass]
|
135
|
+
def fetch_editorialgroup
|
136
|
+
tc = hit[:ref].match(/^[\S]+/).to_s.split(/\/|-/).reduce([]) do |m, v|
|
137
|
+
if BODY[v] then m << BODY[v]
|
138
|
+
elsif v =~ /(AC|C|CN|CONF|GC|SC|Sub|WG).\d+|PC/ then m << v
|
139
|
+
else m
|
140
|
+
end
|
141
|
+
end.uniq
|
142
|
+
return unless tc.any?
|
143
|
+
|
144
|
+
RelatonUn::EditorialGroup.new tc
|
145
|
+
end
|
146
|
+
|
147
|
+
# @return [Array<RelatonBib::Classification>]
|
148
|
+
def fetch_classification
|
149
|
+
[RelatonBib::Classification.new(type: "area", value: "UNDOC")]
|
150
|
+
end
|
57
151
|
end
|
58
152
|
end
|
@@ -6,7 +6,8 @@ require "http-cookie"
|
|
6
6
|
module RelatonUn
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
-
AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3)
|
9
|
+
AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) "\
|
10
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
|
10
11
|
DOMAIN = "https://documents.un.org"
|
11
12
|
BOUNDARY = "----WebKitFormBoundary6hkaBvITDck8dHCn"
|
12
13
|
|
@@ -17,52 +18,10 @@ module RelatonUn
|
|
17
18
|
@jar = HTTP::CookieJar.new
|
18
19
|
@http = Net::HTTP.new @uri.host, @uri.port
|
19
20
|
@http.use_ssl = true
|
21
|
+
@http.read_timeout = 120
|
20
22
|
if (form_resp = get_page)
|
21
|
-
|
22
|
-
|
23
|
-
"//input[@type!='radio']",
|
24
|
-
"//input[@type='radio'][@checked]",
|
25
|
-
"//select[@name!='view:_id1:_id2:cbLang']",
|
26
|
-
"//textarea"
|
27
|
-
).reduce([]) do |m, i|
|
28
|
-
v = case i[:name]
|
29
|
-
when "view:_id1:_id2:txtSymbol" then text
|
30
|
-
when "view:_id1:_id2:cbType" then "FP"
|
31
|
-
when "view:_id1:_id2:cbSort" then "R"
|
32
|
-
when "$$xspsubmitid" then "view:_id1:_id2:_id130"
|
33
|
-
when "$$xspsubmitscroll" then "0|167"
|
34
|
-
else i[:value]
|
35
|
-
end
|
36
|
-
m << %{--#{BOUNDARY}}
|
37
|
-
m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
|
38
|
-
end
|
39
|
-
form_data << %{--#{BOUNDARY}--\r\n}
|
40
|
-
req = Net::HTTP::Post.new form.at("//form")[:action]
|
41
|
-
set_headers req
|
42
|
-
req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
|
43
|
-
req.body = form_data.join("\r\n")
|
44
|
-
resp = @http.request req
|
45
|
-
page_resp = get_page URI.parse(resp["location"]).request_uri
|
46
|
-
doc = Nokogiri::HTML page_resp.body
|
47
|
-
@array = doc.css("div.viewHover").map do |item|
|
48
|
-
ref = item.at("div/div/a")&.text&.sub "\u00A0", ""
|
49
|
-
title = item.at("div/div/span")&.text
|
50
|
-
keyword = item.at("div[3]/div[5]/span")&.text
|
51
|
-
date_pub = item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
|
52
|
-
en = item.at("//span[.='ENGLISH']/../..")
|
53
|
-
date_rel = en.at("./following-sibling::span[contains(@id, 'cfRelDateE')]").text
|
54
|
-
link = en.xpath("//a[contains(@title, 'Open')]").map do |l|
|
55
|
-
{ content: l[:href], type: l[:title].match(/PDF|Word/).to_s.downcase }
|
56
|
-
end
|
57
|
-
Hit.new({
|
58
|
-
ref: ref,
|
59
|
-
title: title,
|
60
|
-
keyword: keyword,
|
61
|
-
date_pub: date_pub,
|
62
|
-
date_rel: date_rel,
|
63
|
-
link: link
|
64
|
-
}, self)
|
65
|
-
end
|
23
|
+
doc = Nokogiri::HTML page_resp(form_resp, text).body
|
24
|
+
@array = doc.css("div.viewHover").map { |item| hit item }
|
66
25
|
end
|
67
26
|
end
|
68
27
|
|
@@ -84,9 +43,125 @@ module RelatonUn
|
|
84
43
|
get_page request_uri, deep + 1
|
85
44
|
end
|
86
45
|
|
46
|
+
# rubocop:disable Metrics/MethodLength
|
47
|
+
|
48
|
+
# @param form [Nokogiri::HTML::Document]
|
49
|
+
# @param text [String]
|
50
|
+
# @return [Array<String>]
|
51
|
+
def form_data(form, text)
|
52
|
+
fd = form.xpath(
|
53
|
+
"//input[@type!='radio']",
|
54
|
+
"//input[@type='radio'][@checked]",
|
55
|
+
"//select[@name!='view:_id1:_id2:cbLang']",
|
56
|
+
"//textarea",
|
57
|
+
).reduce([]) do |m, i|
|
58
|
+
v = case i[:name]
|
59
|
+
when "view:_id1:_id2:txtSymbol" then text
|
60
|
+
when "view:_id1:_id2:cbType" then "FP"
|
61
|
+
when "view:_id1:_id2:cbSort" then "R"
|
62
|
+
when "$$xspsubmitid" then "view:_id1:_id2:_id130"
|
63
|
+
when "$$xspsubmitscroll" then "0|167"
|
64
|
+
else i[:value]
|
65
|
+
end
|
66
|
+
m << %{--#{BOUNDARY}}
|
67
|
+
m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
|
68
|
+
end
|
69
|
+
fd << %{--#{BOUNDARY}--\r\n}
|
70
|
+
end
|
71
|
+
# rubocop:enable Metrics/MethodLength
|
72
|
+
|
73
|
+
# @param form_resp [Net::HTTPOK]
|
74
|
+
# @param text [String]
|
75
|
+
# @return [Net::HTTPOK]
|
76
|
+
def page_resp(form_resp, text)
|
77
|
+
form = Nokogiri::HTML form_resp.body
|
78
|
+
req = Net::HTTP::Post.new form.at("//form")[:action]
|
79
|
+
set_headers req
|
80
|
+
req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
|
81
|
+
req.body = form_data(form, text).join("\r\n")
|
82
|
+
resp = @http.request req
|
83
|
+
get_page URI.parse(resp["location"]).request_uri
|
84
|
+
end
|
85
|
+
|
86
|
+
# @param item [Nokogiri::XML::Element]
|
87
|
+
# @return [RelatonUn::Hit]
|
88
|
+
def hit(item)
|
89
|
+
Hit.new(hit_data(item), self)
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param item [Nokogiri::XML::Element]
|
93
|
+
# @return [Hash]
|
94
|
+
def hit_data(item)
|
95
|
+
en = item.at("//span[.='ENGLISH']/../..")
|
96
|
+
{
|
97
|
+
ref: item.at("div/div/a")&.text&.sub("\u00A0", ""),
|
98
|
+
symbol: symbol(item),
|
99
|
+
title: item.at("div/div/span")&.text,
|
100
|
+
keyword: item.at("div[3]/div[5]/span")&.text,
|
101
|
+
date_pub: date_pub(item),
|
102
|
+
date_rel: date_rel(en),
|
103
|
+
link: link(en),
|
104
|
+
session: session(item),
|
105
|
+
agenda: agenda(item),
|
106
|
+
distribution: distribution(item)
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
# @param item [Nokogiri::XML::Element]
|
111
|
+
# @return [String]
|
112
|
+
def symbol(item)
|
113
|
+
item.xpath("div/div[not(contains(@class, 'hidden'))]/"\
|
114
|
+
"label[contains(.,'Symbol')]/following-sibling::span[1]").map &:text
|
115
|
+
end
|
116
|
+
|
117
|
+
# @param item [Nokogiri::XML::Element]
|
118
|
+
# @return [String]
|
119
|
+
def date_pub(item)
|
120
|
+
item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param item [Nokogiri::XML::Element]
|
124
|
+
# @return [String]
|
125
|
+
def date_rel(item)
|
126
|
+
item.at("./following-sibling::span[contains(@id, 'cfRelDateE')]")&.text
|
127
|
+
end
|
128
|
+
|
129
|
+
# @param item [Nokogiri::XML::Element]
|
130
|
+
# @return [Array<Hash>]
|
131
|
+
def link(item)
|
132
|
+
item.xpath("//a[contains(@title, 'Open')]").map do |l|
|
133
|
+
{
|
134
|
+
content: l[:href],
|
135
|
+
type: l[:title].match(/PDF|Word/).to_s.downcase,
|
136
|
+
}
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# @param item [Nokogiri::XML::Element]
|
141
|
+
# @return [String]
|
142
|
+
def session(item)
|
143
|
+
item.at("//label[.='Session / Year:']/following-sibling::span")&.text
|
144
|
+
end
|
145
|
+
|
146
|
+
# @param item [Nokogiri::XML::Element]
|
147
|
+
# @return [String]
|
148
|
+
def agenda(item)
|
149
|
+
item.at("//label[.='Agenda Item(s):']/following-sibling::span")&.text
|
150
|
+
end
|
151
|
+
|
152
|
+
# @param item [Nokogiri::XML::Element]
|
153
|
+
# @return [String]
|
154
|
+
def distribution(item)
|
155
|
+
item.at("//label[.='Distribution:']/following-sibling::span")&.text
|
156
|
+
end
|
157
|
+
|
158
|
+
# rubocop:disable Metrics/MethodLength
|
159
|
+
|
160
|
+
# @param req [Net::HTTP::Get, Net::HTTP::Post]
|
87
161
|
def set_headers(req)
|
88
162
|
set_cookie req
|
89
|
-
req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,
|
163
|
+
req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,"\
|
164
|
+
"image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
|
90
165
|
req["Accept-Encoding"] = "gzip, deflate, br"
|
91
166
|
req["Cache-Control"] = "max-age=0"
|
92
167
|
req["Connection"] = "keep-alive"
|
@@ -98,7 +173,9 @@ module RelatonUn
|
|
98
173
|
req["Upgrade-Insecure-Requests"] = "1"
|
99
174
|
req["User-Agent"] = AGENT
|
100
175
|
end
|
176
|
+
# rubocop:enable Metrics/MethodLength
|
101
177
|
|
178
|
+
# @param req [Net::HTTP::Get, Net::HTTP::Post]
|
102
179
|
def set_cookie(req)
|
103
180
|
req["Cookie"] = HTTP::Cookie.cookie_value @jar.cookies(@uri)
|
104
181
|
end
|