relaton-un 0.2.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/grammars/biblio.rng +145 -50
- data/grammars/isodoc.rng +481 -5
- data/grammars/un.rng +115 -1
- data/lib/relaton_un.rb +2 -0
- data/lib/relaton_un/editorialgroup.rb +25 -0
- data/lib/relaton_un/hash_converter.rb +28 -0
- data/lib/relaton_un/hit.rb +109 -15
- data/lib/relaton_un/hit_collection.rb +124 -47
- data/lib/relaton_un/processor.rb +1 -1
- data/lib/relaton_un/session.rb +65 -0
- data/lib/relaton_un/un_bibliographic_item.rb +40 -0
- data/lib/relaton_un/un_bibliography.rb +8 -5
- data/lib/relaton_un/version.rb +1 -1
- data/lib/relaton_un/xml_parser.rb +46 -9
- data/relaton_un.gemspec +13 -11
- metadata +7 -4
data/grammars/un.rng
CHANGED
@@ -107,7 +107,9 @@
|
|
107
107
|
<zeroOrMore>
|
108
108
|
<ref name="submissionlanguage"/>
|
109
109
|
</zeroOrMore>
|
110
|
-
<
|
110
|
+
<optional>
|
111
|
+
<ref name="editorialgroup"/>
|
112
|
+
</optional>
|
111
113
|
<zeroOrMore>
|
112
114
|
<ref name="ics"/>
|
113
115
|
</zeroOrMore>
|
@@ -117,6 +119,9 @@
|
|
117
119
|
<optional>
|
118
120
|
<ref name="session"/>
|
119
121
|
</optional>
|
122
|
+
<optional>
|
123
|
+
<ref name="job_number"/>
|
124
|
+
</optional>
|
120
125
|
</define>
|
121
126
|
<define name="preface">
|
122
127
|
<element name="preface">
|
@@ -136,6 +141,109 @@
|
|
136
141
|
<ref name="Basic-Section"/>
|
137
142
|
</element>
|
138
143
|
</define>
|
144
|
+
<define name="Clause-Section">
|
145
|
+
<optional>
|
146
|
+
<attribute name="id">
|
147
|
+
<data type="ID"/>
|
148
|
+
</attribute>
|
149
|
+
</optional>
|
150
|
+
<optional>
|
151
|
+
<attribute name="language"/>
|
152
|
+
</optional>
|
153
|
+
<optional>
|
154
|
+
<attribute name="script"/>
|
155
|
+
</optional>
|
156
|
+
<optional>
|
157
|
+
<attribute name="inline-header">
|
158
|
+
<data type="boolean"/>
|
159
|
+
</attribute>
|
160
|
+
</optional>
|
161
|
+
<optional>
|
162
|
+
<attribute name="obligation">
|
163
|
+
<choice>
|
164
|
+
<value>normative</value>
|
165
|
+
<value>informative</value>
|
166
|
+
</choice>
|
167
|
+
</attribute>
|
168
|
+
</optional>
|
169
|
+
<optional>
|
170
|
+
<attribute name="unnumbered">
|
171
|
+
<data type="boolean"/>
|
172
|
+
</attribute>
|
173
|
+
</optional>
|
174
|
+
<optional>
|
175
|
+
<ref name="section-title"/>
|
176
|
+
</optional>
|
177
|
+
<group>
|
178
|
+
<group>
|
179
|
+
<zeroOrMore>
|
180
|
+
<ref name="BasicBlock"/>
|
181
|
+
</zeroOrMore>
|
182
|
+
<zeroOrMore>
|
183
|
+
<ref name="note"/>
|
184
|
+
</zeroOrMore>
|
185
|
+
</group>
|
186
|
+
<zeroOrMore>
|
187
|
+
<choice>
|
188
|
+
<ref name="clause-subsection"/>
|
189
|
+
<ref name="terms"/>
|
190
|
+
<ref name="definitions"/>
|
191
|
+
</choice>
|
192
|
+
</zeroOrMore>
|
193
|
+
</group>
|
194
|
+
</define>
|
195
|
+
<define name="Annex-Section">
|
196
|
+
<optional>
|
197
|
+
<attribute name="id">
|
198
|
+
<data type="ID"/>
|
199
|
+
</attribute>
|
200
|
+
</optional>
|
201
|
+
<optional>
|
202
|
+
<attribute name="language"/>
|
203
|
+
</optional>
|
204
|
+
<optional>
|
205
|
+
<attribute name="script"/>
|
206
|
+
</optional>
|
207
|
+
<optional>
|
208
|
+
<attribute name="inline-header">
|
209
|
+
<data type="boolean"/>
|
210
|
+
</attribute>
|
211
|
+
</optional>
|
212
|
+
<optional>
|
213
|
+
<attribute name="obligation">
|
214
|
+
<choice>
|
215
|
+
<value>normative</value>
|
216
|
+
<value>informative</value>
|
217
|
+
</choice>
|
218
|
+
</attribute>
|
219
|
+
</optional>
|
220
|
+
<optional>
|
221
|
+
<attribute name="unnumbered">
|
222
|
+
<data type="boolean"/>
|
223
|
+
</attribute>
|
224
|
+
</optional>
|
225
|
+
<optional>
|
226
|
+
<ref name="section-title"/>
|
227
|
+
</optional>
|
228
|
+
<group>
|
229
|
+
<group>
|
230
|
+
<zeroOrMore>
|
231
|
+
<ref name="BasicBlock"/>
|
232
|
+
</zeroOrMore>
|
233
|
+
<zeroOrMore>
|
234
|
+
<ref name="note"/>
|
235
|
+
</zeroOrMore>
|
236
|
+
</group>
|
237
|
+
<zeroOrMore>
|
238
|
+
<choice>
|
239
|
+
<ref name="annex-subsection"/>
|
240
|
+
<ref name="terms"/>
|
241
|
+
<ref name="definitions"/>
|
242
|
+
<ref name="references"/>
|
243
|
+
</choice>
|
244
|
+
</zeroOrMore>
|
245
|
+
</group>
|
246
|
+
</define>
|
139
247
|
</include>
|
140
248
|
<define name="session">
|
141
249
|
<element name="session">
|
@@ -221,9 +329,15 @@
|
|
221
329
|
<value>general</value>
|
222
330
|
<value>limited</value>
|
223
331
|
<value>restricted</value>
|
332
|
+
<value>provisional</value>
|
224
333
|
</choice>
|
225
334
|
</element>
|
226
335
|
</define>
|
336
|
+
<define name="job_number">
|
337
|
+
<element name="job_number">
|
338
|
+
<text/>
|
339
|
+
</element>
|
340
|
+
</define>
|
227
341
|
<define name="un-standard">
|
228
342
|
<element name="un-standard">
|
229
343
|
<ref name="bibdata"/>
|
data/lib/relaton_un.rb
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
module RelatonUn
|
2
|
+
class EditorialGroup
|
3
|
+
include RelatonBib
|
4
|
+
|
5
|
+
# @return [Array<String>]
|
6
|
+
attr_reader :committee
|
7
|
+
|
8
|
+
# @param committee [Array<String>]
|
9
|
+
def initialize(committee)
|
10
|
+
@committee = committee
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param builder [Nokogiri::XML::Builder]
|
14
|
+
def to_xml(builder)
|
15
|
+
builder.editorialgroup do |b|
|
16
|
+
committee.each { |c| b.committee c }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Array<Hash>, Hash]
|
21
|
+
def to_hash
|
22
|
+
single_element_array(committee.map { |c| { "committee" => c } })
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -1,5 +1,33 @@
|
|
1
1
|
module RelatonUn
|
2
2
|
class HashConverter < RelatonBib::HashConverter
|
3
|
+
class << self
|
4
|
+
# @override RelatonIsoBib::HashConverter.hash_to_bib
|
5
|
+
# @param args [Hash]
|
6
|
+
# @param nested [TrueClass, FalseClass]
|
7
|
+
# @return [Hash]
|
8
|
+
def hash_to_bib(args, nested = false)
|
9
|
+
ret = super
|
10
|
+
return if ret.nil?
|
3
11
|
|
12
|
+
session_hash_to_bib ret
|
13
|
+
ret
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
# @param ret [Hash]
|
19
|
+
def session_hash_to_bib(ret)
|
20
|
+
ret[:session] = Session.new(ret[:session]) if ret[:session]
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param ret [Hash]
|
24
|
+
def editorialgroup_hash_to_bib(ret)
|
25
|
+
eg = ret[:editorialgroup]
|
26
|
+
return unless eg
|
27
|
+
|
28
|
+
committee = eg.map { |e| e[:committee] }
|
29
|
+
ret[:editorialgroup] = EditorialGroup.new array(committee)
|
30
|
+
end
|
31
|
+
end
|
4
32
|
end
|
5
33
|
end
|
data/lib/relaton_un/hit.rb
CHANGED
@@ -3,6 +3,51 @@
|
|
3
3
|
module RelatonUn
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
|
+
# rubocop:disable Layout/LineLength
|
7
|
+
|
8
|
+
# There is distribution PRO (A/47/PV.102/CORR.1, A/47/PV.54)
|
9
|
+
BODY = {
|
10
|
+
"A" => "General Assembly",
|
11
|
+
"E" => "Economic and Social Council",
|
12
|
+
"S" => "Security Council",
|
13
|
+
"T" => "Trusteeship Council",
|
14
|
+
"ACC" => "Administrative Committee on Coordination",
|
15
|
+
"AT" => "United Nations Administrative Tribunal",
|
16
|
+
"CAT" => "Committee against Torture",
|
17
|
+
"CCPR" => "Human Rights Committee",
|
18
|
+
"CD" => "Conference on Disarmament",
|
19
|
+
"CEDAW" => "Committee on the Elimination of All Forms of Discrimination against Women",
|
20
|
+
"CERD" => "Committee on the Elimination of Racial Discrimination",
|
21
|
+
"CRC" => "Committee on the Rights of the Child",
|
22
|
+
"DC" => "Disarmament Commission",
|
23
|
+
"DP" => "United Nations Development Programme",
|
24
|
+
"HS" => "United Nations Centre for Human Settlements (HABITAT)",
|
25
|
+
"TD" => "United Nations Conference on Trade and Development",
|
26
|
+
"UNEP" => "United Nations Environment Programme",
|
27
|
+
"TRADE" => "Committee on Trade",
|
28
|
+
"CEFACT" => "Centre for Trade Facilitation and Electronic Business",
|
29
|
+
"C.1" => "Disarmament and International Security Committee",
|
30
|
+
"C.2" => "Economic and Financial Committee",
|
31
|
+
"C.3" => "Social, Humanitarian & Cultural Issues",
|
32
|
+
"C.4" => "Special Political and Decolonization Committee",
|
33
|
+
"C.5" => "Administrative and Budgetary Committee",
|
34
|
+
"C.6" => "Sixth Committee (Legal)",
|
35
|
+
"PC" => "Preparatory Committee",
|
36
|
+
"AEC" => "Atomic Energy Commission",
|
37
|
+
"AGRI" => "Committee on Agriculture",
|
38
|
+
"AMCEN" => "African Ministerial Conference on the Environment",
|
39
|
+
"AMCOW" => "African Ministers’ Council on Water",
|
40
|
+
"ECA" => "Economic Commission for Africa",
|
41
|
+
"ESCAP" => "Economic and Social Commission for Asia and Pacific",
|
42
|
+
"ECE" => "Economic Commission for Europe",
|
43
|
+
"ECWA" => "Economic Commission for Western Asia",
|
44
|
+
"UNFF" => "United Nations Forum on Forests",
|
45
|
+
"ENERGY" => "Committee on Sustainable Energy",
|
46
|
+
"FAO" => "Food and Agriculture Organization",
|
47
|
+
"UNCTAD" => "United Nations Conference on Trade and Development",
|
48
|
+
}.freeze
|
49
|
+
# rubocop:enable Layout/LineLength
|
50
|
+
|
6
51
|
# Parse page.
|
7
52
|
# @return [RelatonUn::UnBibliographicItem]
|
8
53
|
def fetch
|
@@ -11,48 +56,97 @@ module RelatonUn
|
|
11
56
|
|
12
57
|
private
|
13
58
|
|
59
|
+
# rubocop:disable Metrics/MethodLength
|
60
|
+
|
61
|
+
# @return [RelatonUn::UnBibliographicItem]
|
14
62
|
def un_bib_item
|
15
63
|
UnBibliographicItem.new(
|
16
64
|
type: "standard",
|
17
65
|
fetched: Date.today.to_s,
|
18
|
-
docid:
|
66
|
+
docid: fetch_docid,
|
19
67
|
docnumber: hit[:ref],
|
20
68
|
language: ["en"],
|
21
69
|
script: ["Latn"],
|
22
|
-
title:
|
23
|
-
date:
|
24
|
-
link:
|
25
|
-
keyword:
|
70
|
+
title: fetch_title,
|
71
|
+
date: fetch_date,
|
72
|
+
link: fetch_link,
|
73
|
+
keyword: fetch_keyword,
|
74
|
+
session: fetch_session,
|
75
|
+
distribution: fetch_distribution,
|
76
|
+
editorialgroup: fetch_editorialgroup,
|
77
|
+
classification: fetch_classification,
|
26
78
|
)
|
27
79
|
end
|
80
|
+
# rubocop:enable Metrics/MethodLength
|
28
81
|
|
29
82
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
30
|
-
def
|
31
|
-
|
83
|
+
def fetch_docid
|
84
|
+
hit[:symbol].map do |s|
|
85
|
+
RelatonBib::DocumentIdentifier.new(id: s, type: "UN")
|
86
|
+
end
|
32
87
|
end
|
33
88
|
|
34
89
|
# @return [Array<RelatonBib::TypedTitleString>]
|
35
|
-
def
|
36
|
-
fs = RelatonBib::FormattedString.new(
|
37
|
-
[
|
90
|
+
def fetch_title
|
91
|
+
# fs = RelatonBib::FormattedString.new(
|
92
|
+
# content: hit[:title], language: "en", script: "Latn",
|
93
|
+
# )
|
94
|
+
# [RelatonBib::TypedTitleString.new(type: "main", title: fs)]
|
95
|
+
# [{ title_main: hit[:title], language: "en", script: "Latn" }]
|
96
|
+
RelatonBib::TypedTitleString.from_string hit[:title], "en", "Latn"
|
38
97
|
end
|
39
98
|
|
40
99
|
# @return [Array<RelatonBib::BibliographicDate>]
|
41
|
-
def
|
100
|
+
def fetch_date
|
42
101
|
d = []
|
43
|
-
d <<
|
44
|
-
d <<
|
102
|
+
d << bibdate("published", hit[:date_pub]) if hit[:date_pub]
|
103
|
+
d << bibdate("issued", hit[:date_rel]) if hit[:date_rel]
|
45
104
|
d
|
46
105
|
end
|
47
106
|
|
107
|
+
# @param type [String]
|
108
|
+
# @param on [String]
|
109
|
+
# @return [RelatonBib::BibliographicDate]
|
110
|
+
def bibdate(type, on)
|
111
|
+
RelatonBib::BibliographicDate.new type: type, on: on
|
112
|
+
end
|
113
|
+
|
48
114
|
# @return [Array<RelatonBib::TypedUri>]
|
49
|
-
def
|
115
|
+
def fetch_link
|
50
116
|
hit[:link].map { |l| RelatonBib::TypedUri.new l }
|
51
117
|
end
|
52
118
|
|
53
119
|
# @return [Array<String>]
|
54
|
-
def
|
120
|
+
def fetch_keyword
|
55
121
|
hit[:keyword].split(", ")
|
56
122
|
end
|
123
|
+
|
124
|
+
# @return [RelatonUn::Session]
|
125
|
+
def fetch_session
|
126
|
+
Session.new(session_number: hit[:session], agenda_id: hit[:agenda])
|
127
|
+
end
|
128
|
+
|
129
|
+
# @return [String]
|
130
|
+
def fetch_distribution
|
131
|
+
UnBibliographicItem::DISTRIBUTIONS[hit[:distribution]]
|
132
|
+
end
|
133
|
+
|
134
|
+
# @return [RelatonUn::EditorialGroup, NilClass]
|
135
|
+
def fetch_editorialgroup
|
136
|
+
tc = hit[:ref].match(/^[\S]+/).to_s.split(/\/|-/).reduce([]) do |m, v|
|
137
|
+
if BODY[v] then m << BODY[v]
|
138
|
+
elsif v =~ /(AC|C|CN|CONF|GC|SC|Sub|WG).\d+|PC/ then m << v
|
139
|
+
else m
|
140
|
+
end
|
141
|
+
end.uniq
|
142
|
+
return unless tc.any?
|
143
|
+
|
144
|
+
RelatonUn::EditorialGroup.new tc
|
145
|
+
end
|
146
|
+
|
147
|
+
# @return [Array<RelatonBib::Classification>]
|
148
|
+
def fetch_classification
|
149
|
+
[RelatonBib::Classification.new(type: "area", value: "UNDOC")]
|
150
|
+
end
|
57
151
|
end
|
58
152
|
end
|
@@ -6,7 +6,8 @@ require "http-cookie"
|
|
6
6
|
module RelatonUn
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
-
AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3)
|
9
|
+
AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) "\
|
10
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
|
10
11
|
DOMAIN = "https://documents.un.org"
|
11
12
|
BOUNDARY = "----WebKitFormBoundary6hkaBvITDck8dHCn"
|
12
13
|
|
@@ -17,52 +18,10 @@ module RelatonUn
|
|
17
18
|
@jar = HTTP::CookieJar.new
|
18
19
|
@http = Net::HTTP.new @uri.host, @uri.port
|
19
20
|
@http.use_ssl = true
|
21
|
+
@http.read_timeout = 120
|
20
22
|
if (form_resp = get_page)
|
21
|
-
|
22
|
-
|
23
|
-
"//input[@type!='radio']",
|
24
|
-
"//input[@type='radio'][@checked]",
|
25
|
-
"//select[@name!='view:_id1:_id2:cbLang']",
|
26
|
-
"//textarea"
|
27
|
-
).reduce([]) do |m, i|
|
28
|
-
v = case i[:name]
|
29
|
-
when "view:_id1:_id2:txtSymbol" then text
|
30
|
-
when "view:_id1:_id2:cbType" then "FP"
|
31
|
-
when "view:_id1:_id2:cbSort" then "R"
|
32
|
-
when "$$xspsubmitid" then "view:_id1:_id2:_id130"
|
33
|
-
when "$$xspsubmitscroll" then "0|167"
|
34
|
-
else i[:value]
|
35
|
-
end
|
36
|
-
m << %{--#{BOUNDARY}}
|
37
|
-
m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
|
38
|
-
end
|
39
|
-
form_data << %{--#{BOUNDARY}--\r\n}
|
40
|
-
req = Net::HTTP::Post.new form.at("//form")[:action]
|
41
|
-
set_headers req
|
42
|
-
req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
|
43
|
-
req.body = form_data.join("\r\n")
|
44
|
-
resp = @http.request req
|
45
|
-
page_resp = get_page URI.parse(resp["location"]).request_uri
|
46
|
-
doc = Nokogiri::HTML page_resp.body
|
47
|
-
@array = doc.css("div.viewHover").map do |item|
|
48
|
-
ref = item.at("div/div/a")&.text&.sub "\u00A0", ""
|
49
|
-
title = item.at("div/div/span")&.text
|
50
|
-
keyword = item.at("div[3]/div[5]/span")&.text
|
51
|
-
date_pub = item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
|
52
|
-
en = item.at("//span[.='ENGLISH']/../..")
|
53
|
-
date_rel = en.at("./following-sibling::span[contains(@id, 'cfRelDateE')]").text
|
54
|
-
link = en.xpath("//a[contains(@title, 'Open')]").map do |l|
|
55
|
-
{ content: l[:href], type: l[:title].match(/PDF|Word/).to_s.downcase }
|
56
|
-
end
|
57
|
-
Hit.new({
|
58
|
-
ref: ref,
|
59
|
-
title: title,
|
60
|
-
keyword: keyword,
|
61
|
-
date_pub: date_pub,
|
62
|
-
date_rel: date_rel,
|
63
|
-
link: link
|
64
|
-
}, self)
|
65
|
-
end
|
23
|
+
doc = Nokogiri::HTML page_resp(form_resp, text).body
|
24
|
+
@array = doc.css("div.viewHover").map { |item| hit item }
|
66
25
|
end
|
67
26
|
end
|
68
27
|
|
@@ -84,9 +43,125 @@ module RelatonUn
|
|
84
43
|
get_page request_uri, deep + 1
|
85
44
|
end
|
86
45
|
|
46
|
+
# rubocop:disable Metrics/MethodLength
|
47
|
+
|
48
|
+
# @param form [Nokogiri::HTML::Document]
|
49
|
+
# @param text [String]
|
50
|
+
# @return [Array<String>]
|
51
|
+
def form_data(form, text)
|
52
|
+
fd = form.xpath(
|
53
|
+
"//input[@type!='radio']",
|
54
|
+
"//input[@type='radio'][@checked]",
|
55
|
+
"//select[@name!='view:_id1:_id2:cbLang']",
|
56
|
+
"//textarea",
|
57
|
+
).reduce([]) do |m, i|
|
58
|
+
v = case i[:name]
|
59
|
+
when "view:_id1:_id2:txtSymbol" then text
|
60
|
+
when "view:_id1:_id2:cbType" then "FP"
|
61
|
+
when "view:_id1:_id2:cbSort" then "R"
|
62
|
+
when "$$xspsubmitid" then "view:_id1:_id2:_id130"
|
63
|
+
when "$$xspsubmitscroll" then "0|167"
|
64
|
+
else i[:value]
|
65
|
+
end
|
66
|
+
m << %{--#{BOUNDARY}}
|
67
|
+
m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
|
68
|
+
end
|
69
|
+
fd << %{--#{BOUNDARY}--\r\n}
|
70
|
+
end
|
71
|
+
# rubocop:enable Metrics/MethodLength
|
72
|
+
|
73
|
+
# @param form_resp [Net::HTTPOK]
|
74
|
+
# @param text [String]
|
75
|
+
# @return [Net::HTTPOK]
|
76
|
+
def page_resp(form_resp, text)
|
77
|
+
form = Nokogiri::HTML form_resp.body
|
78
|
+
req = Net::HTTP::Post.new form.at("//form")[:action]
|
79
|
+
set_headers req
|
80
|
+
req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
|
81
|
+
req.body = form_data(form, text).join("\r\n")
|
82
|
+
resp = @http.request req
|
83
|
+
get_page URI.parse(resp["location"]).request_uri
|
84
|
+
end
|
85
|
+
|
86
|
+
# @param item [Nokogiri::XML::Element]
|
87
|
+
# @return [RelatonUn::Hit]
|
88
|
+
def hit(item)
|
89
|
+
Hit.new(hit_data(item), self)
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param item [Nokogiri::XML::Element]
|
93
|
+
# @return [Hash]
|
94
|
+
def hit_data(item)
|
95
|
+
en = item.at("//span[.='ENGLISH']/../..")
|
96
|
+
{
|
97
|
+
ref: item.at("div/div/a")&.text&.sub("\u00A0", ""),
|
98
|
+
symbol: symbol(item),
|
99
|
+
title: item.at("div/div/span")&.text,
|
100
|
+
keyword: item.at("div[3]/div[5]/span")&.text,
|
101
|
+
date_pub: date_pub(item),
|
102
|
+
date_rel: date_rel(en),
|
103
|
+
link: link(en),
|
104
|
+
session: session(item),
|
105
|
+
agenda: agenda(item),
|
106
|
+
distribution: distribution(item)
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
# @param item [Nokogiri::XML::Element]
|
111
|
+
# @return [String]
|
112
|
+
def symbol(item)
|
113
|
+
item.xpath("div/div[not(contains(@class, 'hidden'))]/"\
|
114
|
+
"label[contains(.,'Symbol')]/following-sibling::span[1]").map &:text
|
115
|
+
end
|
116
|
+
|
117
|
+
# @param item [Nokogiri::XML::Element]
|
118
|
+
# @return [String]
|
119
|
+
def date_pub(item)
|
120
|
+
item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param item [Nokogiri::XML::Element]
|
124
|
+
# @return [String]
|
125
|
+
def date_rel(item)
|
126
|
+
item.at("./following-sibling::span[contains(@id, 'cfRelDateE')]")&.text
|
127
|
+
end
|
128
|
+
|
129
|
+
# @param item [Nokogiri::XML::Element]
|
130
|
+
# @return [Array<Hash>]
|
131
|
+
def link(item)
|
132
|
+
item.xpath("//a[contains(@title, 'Open')]").map do |l|
|
133
|
+
{
|
134
|
+
content: l[:href],
|
135
|
+
type: l[:title].match(/PDF|Word/).to_s.downcase,
|
136
|
+
}
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# @param item [Nokogiri::XML::Element]
|
141
|
+
# @return [String]
|
142
|
+
def session(item)
|
143
|
+
item.at("//label[.='Session / Year:']/following-sibling::span")&.text
|
144
|
+
end
|
145
|
+
|
146
|
+
# @param item [Nokogiri::XML::Element]
|
147
|
+
# @return [String]
|
148
|
+
def agenda(item)
|
149
|
+
item.at("//label[.='Agenda Item(s):']/following-sibling::span")&.text
|
150
|
+
end
|
151
|
+
|
152
|
+
# @param item [Nokogiri::XML::Element]
|
153
|
+
# @return [String]
|
154
|
+
def distribution(item)
|
155
|
+
item.at("//label[.='Distribution:']/following-sibling::span")&.text
|
156
|
+
end
|
157
|
+
|
158
|
+
# rubocop:disable Metrics/MethodLength
|
159
|
+
|
160
|
+
# @param req [Net::HTTP::Get, Net::HTTP::Post]
|
87
161
|
def set_headers(req)
|
88
162
|
set_cookie req
|
89
|
-
req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,
|
163
|
+
req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,"\
|
164
|
+
"image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
|
90
165
|
req["Accept-Encoding"] = "gzip, deflate, br"
|
91
166
|
req["Cache-Control"] = "max-age=0"
|
92
167
|
req["Connection"] = "keep-alive"
|
@@ -98,7 +173,9 @@ module RelatonUn
|
|
98
173
|
req["Upgrade-Insecure-Requests"] = "1"
|
99
174
|
req["User-Agent"] = AGENT
|
100
175
|
end
|
176
|
+
# rubocop:enable Metrics/MethodLength
|
101
177
|
|
178
|
+
# @param req [Net::HTTP::Get, Net::HTTP::Post]
|
102
179
|
def set_cookie(req)
|
103
180
|
req["Cookie"] = HTTP::Cookie.cookie_value @jar.cookies(@uri)
|
104
181
|
end
|