relaton-un 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/grammars/un.rng +3 -1
- data/lib/relaton_un/editorialgroup.rb +25 -0
- data/lib/relaton_un/hash_converter.rb +29 -1
- data/lib/relaton_un/hit.rb +102 -15
- data/lib/relaton_un/hit_collection.rb +124 -47
- data/lib/relaton_un/session.rb +65 -0
- data/lib/relaton_un/un_bibliographic_item.rb +37 -1
- data/lib/relaton_un/un_bibliography.rb +8 -5
- data/lib/relaton_un/version.rb +1 -1
- data/lib/relaton_un/xml_parser.rb +50 -5
- data/lib/relaton_un.rb +3 -1
- data/relaton_un.gemspec +13 -11
- metadata +10 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0fcd871844a8cafc3b613fc6e77ccd219717122b95bfdfafca8362b46f63490
|
4
|
+
data.tar.gz: 952d86d121183c5a32e822a2f7bb3ae651851f87999dfe21ccf0bb0b302fbf5f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 03c2ccfe9ce6cce797f2a5c905b74976793c8245ed36b813dfaf55e3989e1cdb41426a6d52a357c71cffa42d67427d78ae6c4d3418fdf471b176276557ff2579
|
7
|
+
data.tar.gz: 9f2c4b1db54d38a5e48407a33d01aab3d2e98c8d6eec700262f33f00505d7a2339378a5511ba600cad13a01ee710c5e4ba1e6ef703282644f4febe46b660fad0
|
data/grammars/un.rng
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
module RelatonUn
|
2
|
+
class EditorialGroup
|
3
|
+
include RelatonBib
|
4
|
+
|
5
|
+
# @return [Array<String>]
|
6
|
+
attr_reader :committee
|
7
|
+
|
8
|
+
# @param committee [Array<String>]
|
9
|
+
def initialize(committee)
|
10
|
+
@committee = committee
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param builder [Nokogiri::XML::Builder]
|
14
|
+
def to_xml(builder)
|
15
|
+
builder.editorialgroup do |b|
|
16
|
+
committee.each { |c| b.committee c }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Array<Hash>, Hash]
|
21
|
+
def to_hash
|
22
|
+
single_element_array(committee.map { |c| { "committee" => c } })
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -1,5 +1,33 @@
|
|
1
1
|
module RelatonUn
|
2
|
-
class HashConverter <
|
2
|
+
class HashConverter < RelatonIsoBib::HashConverter
|
3
|
+
class << self
|
4
|
+
# @override RelatonIsoBib::HashConverter.hash_to_bib
|
5
|
+
# @param args [Hash]
|
6
|
+
# @param nested [TrueClass, FalseClass]
|
7
|
+
# @return [Hash]
|
8
|
+
def hash_to_bib(args, nested = false)
|
9
|
+
ret = super
|
10
|
+
return if ret.nil?
|
3
11
|
|
12
|
+
session_hash_to_bib ret
|
13
|
+
ret
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
# @param ret [Hash]
|
19
|
+
def session_hash_to_bib(ret)
|
20
|
+
ret[:session] = Session.new(ret[:session]) if ret[:session]
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param ret [Hash]
|
24
|
+
def editorialgroup_hash_to_bib(ret)
|
25
|
+
eg = ret[:editorialgroup]
|
26
|
+
return unless eg
|
27
|
+
|
28
|
+
committee = eg.map { |e| e[:committee] }
|
29
|
+
ret[:editorialgroup] = EditorialGroup.new array(committee)
|
30
|
+
end
|
31
|
+
end
|
4
32
|
end
|
5
33
|
end
|
data/lib/relaton_un/hit.rb
CHANGED
@@ -3,6 +3,51 @@
|
|
3
3
|
module RelatonUn
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
|
+
# rubocop:disable Layout/LineLength
|
7
|
+
|
8
|
+
# There is distribution PRO (A/47/PV.102/CORR.1, A/47/PV.54)
|
9
|
+
BODY = {
|
10
|
+
"A" => "General Assembly",
|
11
|
+
"E" => "Economic and Social Council",
|
12
|
+
"S" => "Security Council",
|
13
|
+
"T" => "Trusteeship Council",
|
14
|
+
"ACC" => "Administrative Committee on Coordination",
|
15
|
+
"AT" => "United Nations Administrative Tribunal",
|
16
|
+
"CAT" => "Committee against Torture",
|
17
|
+
"CCPR" => "Human Rights Committee",
|
18
|
+
"CD" => "Conference on Disarmament",
|
19
|
+
"CEDAW" => "Committee on the Elimination of All Forms of Discrimination against Women",
|
20
|
+
"CERD" => "Committee on the Elimination of Racial Discrimination",
|
21
|
+
"CRC" => "Committee on the Rights of the Child",
|
22
|
+
"DC" => "Disarmament Commission",
|
23
|
+
"DP" => "United Nations Development Programme",
|
24
|
+
"HS" => "United Nations Centre for Human Settlements (HABITAT)",
|
25
|
+
"TD" => "United Nations Conference on Trade and Development",
|
26
|
+
"UNEP" => "United Nations Environment Programme",
|
27
|
+
"TRADE" => "Committee on Trade",
|
28
|
+
"CEFACT" => "Centre for Trade Facilitation and Electronic Business",
|
29
|
+
"C.1" => "Disarmament and International Security Committee",
|
30
|
+
"C.2" => "Economic and Financial Committee",
|
31
|
+
"C.3" => "Social, Humanitarian & Cultural Issues",
|
32
|
+
"C.4" => "Special Political and Decolonization Committee",
|
33
|
+
"C.5" => "Administrative and Budgetary Committee",
|
34
|
+
"C.6" => "Sixth Committee (Legal)",
|
35
|
+
"PC" => "Preparatory Committee",
|
36
|
+
"AEC" => "Atomic Energy Commission",
|
37
|
+
"AGRI" => "Committee on Agriculture",
|
38
|
+
"AMCEN" => "African Ministerial Conference on the Environment",
|
39
|
+
"AMCOW" => "African Ministers’ Council on Water",
|
40
|
+
"ECA" => "Economic Commission for Africa",
|
41
|
+
"ESCAP" => "Economic and Social Commission for Asia and Pacific",
|
42
|
+
"ECE" => "Economic Commission for Europe",
|
43
|
+
"ECWA" => "Economic Commission for Western Asia",
|
44
|
+
"UNFF" => "United Nations Forum on Forests",
|
45
|
+
"ENERGY" => "Committee on Sustainable Energy",
|
46
|
+
"FAO" => "Food and Agriculture Organization",
|
47
|
+
"UNCTAD" => "United Nations Conference on Trade and Development",
|
48
|
+
}.freeze
|
49
|
+
# rubocop:enable Layout/LineLength
|
50
|
+
|
6
51
|
# Parse page.
|
7
52
|
# @return [RelatonUn::UnBibliographicItem]
|
8
53
|
def fetch
|
@@ -11,48 +56,90 @@ module RelatonUn
|
|
11
56
|
|
12
57
|
private
|
13
58
|
|
59
|
+
# rubocop:disable Metrics/MethodLength
|
60
|
+
|
61
|
+
# @return [RelatonUn::UnBibliographicItem]
|
14
62
|
def un_bib_item
|
15
63
|
UnBibliographicItem.new(
|
16
64
|
type: "standard",
|
17
65
|
fetched: Date.today.to_s,
|
18
|
-
docid:
|
66
|
+
docid: fetch_docid,
|
19
67
|
docnumber: hit[:ref],
|
20
68
|
language: ["en"],
|
21
69
|
script: ["Latn"],
|
22
|
-
title:
|
23
|
-
date:
|
24
|
-
link:
|
25
|
-
keyword:
|
70
|
+
title: fetch_title,
|
71
|
+
date: fetch_date,
|
72
|
+
link: fetch_link,
|
73
|
+
keyword: fetch_keyword,
|
74
|
+
session: fetch_session,
|
75
|
+
distribution: fetch_distribution,
|
76
|
+
editorialgroup: fetch_editorialgroup,
|
26
77
|
)
|
27
78
|
end
|
79
|
+
# rubocop:enable Metrics/MethodLength
|
28
80
|
|
29
81
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
30
|
-
def
|
31
|
-
|
82
|
+
def fetch_docid
|
83
|
+
hit[:symbol].map do |s|
|
84
|
+
RelatonBib::DocumentIdentifier.new(id: s, type: "UN")
|
85
|
+
end
|
32
86
|
end
|
33
87
|
|
34
88
|
# @return [Array<RelatonBib::TypedTitleString>]
|
35
|
-
def
|
36
|
-
fs = RelatonBib::FormattedString.new(
|
37
|
-
[
|
89
|
+
def fetch_title
|
90
|
+
# fs = RelatonBib::FormattedString.new(
|
91
|
+
# content: hit[:title], language: "en", script: "Latn",
|
92
|
+
# )
|
93
|
+
# [RelatonBib::TypedTitleString.new(type: "main", title: fs)]
|
94
|
+
[{ title_main: hit[:title], language: "en", script: "Latn" }]
|
38
95
|
end
|
39
96
|
|
40
97
|
# @return [Array<RelatonBib::BibliographicDate>]
|
41
|
-
def
|
98
|
+
def fetch_date
|
42
99
|
d = []
|
43
|
-
d <<
|
44
|
-
d <<
|
100
|
+
d << bibdate("published", hit[:date_pub]) if hit[:date_pub]
|
101
|
+
d << bibdate("issued", hit[:date_rel]) if hit[:date_rel]
|
45
102
|
d
|
46
103
|
end
|
47
104
|
|
105
|
+
# @param type [String]
|
106
|
+
# @param on [String]
|
107
|
+
# @return [RelatonBib::BibliographicDate]
|
108
|
+
def bibdate(type, on)
|
109
|
+
RelatonBib::BibliographicDate.new type: type, on: on
|
110
|
+
end
|
111
|
+
|
48
112
|
# @return [Array<RelatonBib::TypedUri>]
|
49
|
-
def
|
113
|
+
def fetch_link
|
50
114
|
hit[:link].map { |l| RelatonBib::TypedUri.new l }
|
51
115
|
end
|
52
116
|
|
53
117
|
# @return [Array<String>]
|
54
|
-
def
|
118
|
+
def fetch_keyword
|
55
119
|
hit[:keyword].split(", ")
|
56
120
|
end
|
121
|
+
|
122
|
+
# @return [RelatonUn::Session]
|
123
|
+
def fetch_session
|
124
|
+
Session.new(session_number: hit[:session], agenda_id: hit[:agenda])
|
125
|
+
end
|
126
|
+
|
127
|
+
# @return [String]
|
128
|
+
def fetch_distribution
|
129
|
+
UnBibliographicItem::DISTRIBUTIONS[hit[:distribution]]
|
130
|
+
end
|
131
|
+
|
132
|
+
# @return [RelatonUn::EditorialGroup, NilClass]
|
133
|
+
def fetch_editorialgroup
|
134
|
+
tc = hit[:ref].match(/^[\S]+/).to_s.split(/\/|-/).reduce([]) do |m, v|
|
135
|
+
if BODY[v] then m << BODY[v]
|
136
|
+
elsif v =~ /(AC|C|CN|CONF|GC|SC|Sub|WG).\d+|PC/ then m << v
|
137
|
+
else m
|
138
|
+
end
|
139
|
+
end.uniq
|
140
|
+
return unless tc.any?
|
141
|
+
|
142
|
+
RelatonUn::EditorialGroup.new tc
|
143
|
+
end
|
57
144
|
end
|
58
145
|
end
|
@@ -6,7 +6,8 @@ require "http-cookie"
|
|
6
6
|
module RelatonUn
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
-
AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3)
|
9
|
+
AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) "\
|
10
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
|
10
11
|
DOMAIN = "https://documents.un.org"
|
11
12
|
BOUNDARY = "----WebKitFormBoundary6hkaBvITDck8dHCn"
|
12
13
|
|
@@ -17,52 +18,10 @@ module RelatonUn
|
|
17
18
|
@jar = HTTP::CookieJar.new
|
18
19
|
@http = Net::HTTP.new @uri.host, @uri.port
|
19
20
|
@http.use_ssl = true
|
21
|
+
@http.read_timeout = 120
|
20
22
|
if (form_resp = get_page)
|
21
|
-
|
22
|
-
|
23
|
-
"//input[@type!='radio']",
|
24
|
-
"//input[@type='radio'][@checked]",
|
25
|
-
"//select[@name!='view:_id1:_id2:cbLang']",
|
26
|
-
"//textarea"
|
27
|
-
).reduce([]) do |m, i|
|
28
|
-
v = case i[:name]
|
29
|
-
when "view:_id1:_id2:txtSymbol" then text
|
30
|
-
when "view:_id1:_id2:cbType" then "FP"
|
31
|
-
when "view:_id1:_id2:cbSort" then "R"
|
32
|
-
when "$$xspsubmitid" then "view:_id1:_id2:_id130"
|
33
|
-
when "$$xspsubmitscroll" then "0|167"
|
34
|
-
else i[:value]
|
35
|
-
end
|
36
|
-
m << %{--#{BOUNDARY}}
|
37
|
-
m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
|
38
|
-
end
|
39
|
-
form_data << %{--#{BOUNDARY}--\r\n}
|
40
|
-
req = Net::HTTP::Post.new form.at("//form")[:action]
|
41
|
-
set_headers req
|
42
|
-
req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
|
43
|
-
req.body = form_data.join("\r\n")
|
44
|
-
resp = @http.request req
|
45
|
-
page_resp = get_page URI.parse(resp["location"]).request_uri
|
46
|
-
doc = Nokogiri::HTML page_resp.body
|
47
|
-
@array = doc.css("div.viewHover").map do |item|
|
48
|
-
ref = item.at("div/div/a")&.text&.sub "\u00A0", ""
|
49
|
-
title = item.at("div/div/span")&.text
|
50
|
-
keyword = item.at("div[3]/div[5]/span")&.text
|
51
|
-
date_pub = item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
|
52
|
-
en = item.at("//span[.='ENGLISH']/../..")
|
53
|
-
date_rel = en.at("./following-sibling::span[contains(@id, 'cfRelDateE')]").text
|
54
|
-
link = en.xpath("//a[contains(@title, 'Open')]").map do |l|
|
55
|
-
{ content: l[:href], type: l[:title].match(/PDF|Word/).to_s.downcase }
|
56
|
-
end
|
57
|
-
Hit.new({
|
58
|
-
ref: ref,
|
59
|
-
title: title,
|
60
|
-
keyword: keyword,
|
61
|
-
date_pub: date_pub,
|
62
|
-
date_rel: date_rel,
|
63
|
-
link: link
|
64
|
-
}, self)
|
65
|
-
end
|
23
|
+
doc = Nokogiri::HTML page_resp(form_resp, text).body
|
24
|
+
@array = doc.css("div.viewHover").map { |item| hit item }
|
66
25
|
end
|
67
26
|
end
|
68
27
|
|
@@ -84,9 +43,125 @@ module RelatonUn
|
|
84
43
|
get_page request_uri, deep + 1
|
85
44
|
end
|
86
45
|
|
46
|
+
# rubocop:disable Metrics/MethodLength
|
47
|
+
|
48
|
+
# @param form [Nokogiri::HTML::Document]
|
49
|
+
# @param text [String]
|
50
|
+
# @return [Array<String>]
|
51
|
+
def form_data(form, text)
|
52
|
+
fd = form.xpath(
|
53
|
+
"//input[@type!='radio']",
|
54
|
+
"//input[@type='radio'][@checked]",
|
55
|
+
"//select[@name!='view:_id1:_id2:cbLang']",
|
56
|
+
"//textarea",
|
57
|
+
).reduce([]) do |m, i|
|
58
|
+
v = case i[:name]
|
59
|
+
when "view:_id1:_id2:txtSymbol" then text
|
60
|
+
when "view:_id1:_id2:cbType" then "FP"
|
61
|
+
when "view:_id1:_id2:cbSort" then "R"
|
62
|
+
when "$$xspsubmitid" then "view:_id1:_id2:_id130"
|
63
|
+
when "$$xspsubmitscroll" then "0|167"
|
64
|
+
else i[:value]
|
65
|
+
end
|
66
|
+
m << %{--#{BOUNDARY}}
|
67
|
+
m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
|
68
|
+
end
|
69
|
+
fd << %{--#{BOUNDARY}--\r\n}
|
70
|
+
end
|
71
|
+
# rubocop:enable Metrics/MethodLength
|
72
|
+
|
73
|
+
# @param form_resp [Net::HTTPOK]
|
74
|
+
# @param text [String]
|
75
|
+
# @return [Net::HTTPOK]
|
76
|
+
def page_resp(form_resp, text)
|
77
|
+
form = Nokogiri::HTML form_resp.body
|
78
|
+
req = Net::HTTP::Post.new form.at("//form")[:action]
|
79
|
+
set_headers req
|
80
|
+
req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
|
81
|
+
req.body = form_data(form, text).join("\r\n")
|
82
|
+
resp = @http.request req
|
83
|
+
get_page URI.parse(resp["location"]).request_uri
|
84
|
+
end
|
85
|
+
|
86
|
+
# @param item [Nokogiri::XML::Element]
|
87
|
+
# @return [RelatonUn::Hit]
|
88
|
+
def hit(item)
|
89
|
+
Hit.new(hit_data(item), self)
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param item [Nokogiri::XML::Element]
|
93
|
+
# @return [Hash]
|
94
|
+
def hit_data(item)
|
95
|
+
en = item.at("//span[.='ENGLISH']/../..")
|
96
|
+
{
|
97
|
+
ref: item.at("div/div/a")&.text&.sub("\u00A0", ""),
|
98
|
+
symbol: symbol(item),
|
99
|
+
title: item.at("div/div/span")&.text,
|
100
|
+
keyword: item.at("div[3]/div[5]/span")&.text,
|
101
|
+
date_pub: date_pub(item),
|
102
|
+
date_rel: date_rel(en),
|
103
|
+
link: link(en),
|
104
|
+
session: session(item),
|
105
|
+
agenda: agenda(item),
|
106
|
+
distribution: distribution(item)
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
# @param item [Nokogiri::XML::Element]
|
111
|
+
# @return [String]
|
112
|
+
def symbol(item)
|
113
|
+
item.xpath("div/div[not(contains(@class, 'hidden'))]/"\
|
114
|
+
"label[contains(.,'Symbol')]/following-sibling::span[1]").map &:text
|
115
|
+
end
|
116
|
+
|
117
|
+
# @param item [Nokogiri::XML::Element]
|
118
|
+
# @return [String]
|
119
|
+
def date_pub(item)
|
120
|
+
item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param item [Nokogiri::XML::Element]
|
124
|
+
# @return [String]
|
125
|
+
def date_rel(item)
|
126
|
+
item.at("./following-sibling::span[contains(@id, 'cfRelDateE')]")&.text
|
127
|
+
end
|
128
|
+
|
129
|
+
# @param item [Nokogiri::XML::Element]
|
130
|
+
# @return [Array<Hash>]
|
131
|
+
def link(item)
|
132
|
+
item.xpath("//a[contains(@title, 'Open')]").map do |l|
|
133
|
+
{
|
134
|
+
content: l[:href],
|
135
|
+
type: l[:title].match(/PDF|Word/).to_s.downcase,
|
136
|
+
}
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# @param item [Nokogiri::XML::Element]
|
141
|
+
# @return [String]
|
142
|
+
def session(item)
|
143
|
+
item.at("//label[.='Session / Year:']/following-sibling::span")&.text
|
144
|
+
end
|
145
|
+
|
146
|
+
# @param item [Nokogiri::XML::Element]
|
147
|
+
# @return [String]
|
148
|
+
def agenda(item)
|
149
|
+
item.at("//label[.='Agenda Item(s):']/following-sibling::span")&.text
|
150
|
+
end
|
151
|
+
|
152
|
+
# @param item [Nokogiri::XML::Element]
|
153
|
+
# @return [String]
|
154
|
+
def distribution(item)
|
155
|
+
item.at("//label[.='Distribution:']/following-sibling::span")&.text
|
156
|
+
end
|
157
|
+
|
158
|
+
# rubocop:disable Metrics/MethodLength
|
159
|
+
|
160
|
+
# @param req [Net::HTTP::Get, Net::HTTP::Post]
|
87
161
|
def set_headers(req)
|
88
162
|
set_cookie req
|
89
|
-
req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,
|
163
|
+
req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,"\
|
164
|
+
"image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
|
90
165
|
req["Accept-Encoding"] = "gzip, deflate, br"
|
91
166
|
req["Cache-Control"] = "max-age=0"
|
92
167
|
req["Connection"] = "keep-alive"
|
@@ -98,7 +173,9 @@ module RelatonUn
|
|
98
173
|
req["Upgrade-Insecure-Requests"] = "1"
|
99
174
|
req["User-Agent"] = AGENT
|
100
175
|
end
|
176
|
+
# rubocop:enable Metrics/MethodLength
|
101
177
|
|
178
|
+
# @param req [Net::HTTP::Get, Net::HTTP::Post]
|
102
179
|
def set_cookie(req)
|
103
180
|
req["Cookie"] = HTTP::Cookie.cookie_value @jar.cookies(@uri)
|
104
181
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module RelatonUn
|
2
|
+
class Session
|
3
|
+
include RelatonBib
|
4
|
+
|
5
|
+
# @return [String, NilClass]
|
6
|
+
attr_reader :session_number, :collaboration, :agenda_id, :item_footnote
|
7
|
+
|
8
|
+
# @return [Date, NilClass]
|
9
|
+
attr_reader :session_date
|
10
|
+
|
11
|
+
# @return [Array<String>]
|
12
|
+
attr_reader :item_number, :item_name, :subitem_name
|
13
|
+
|
14
|
+
# @param session_number [String]
|
15
|
+
# @param session_date [String]
|
16
|
+
# @param item_number [Array<String>]
|
17
|
+
# @pqrqm item_name [Array<String>]
|
18
|
+
# @pqrqm subitem_name [Array<String>]
|
19
|
+
# @param collaboration [String]
|
20
|
+
# @param agenda_id [String]
|
21
|
+
# @param item_footnote [String]
|
22
|
+
def initialize(**args)
|
23
|
+
@session_number = args[:session_number]
|
24
|
+
@session_date = Date.parse args[:session_date] if args[:session_date]
|
25
|
+
@item_number = args.fetch(:item_number, [])
|
26
|
+
@item_name = args.fetch(:item_name, [])
|
27
|
+
@subitem_name = args.fetch(:subitem_name, [])
|
28
|
+
@collaboration = args[:collaboration]
|
29
|
+
@agenda_id = args[:agenda_id]
|
30
|
+
@item_footnote = args[:item_footnote]
|
31
|
+
end
|
32
|
+
|
33
|
+
# rubocop:disable Metrics/AbcSize
|
34
|
+
|
35
|
+
# @param [Nokogiri::XML::Builder]
|
36
|
+
def to_xml(builder)
|
37
|
+
builder.session do |b|
|
38
|
+
b.number session_number if session_number
|
39
|
+
b.send "session-date", session_date.to_s if session_date
|
40
|
+
item_number.each { |n| b.send "item-number", n }
|
41
|
+
item_name.each { |n| b.send "item-name", n }
|
42
|
+
subitem_name.each { |n| b.send "subitem-name", n }
|
43
|
+
b.collaboration collaboration if collaboration
|
44
|
+
b.send "agenda-id", agenda_id if agenda_id
|
45
|
+
b.send "item-footnote", item_footnote if item_footnote
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
50
|
+
# @return [Hash]
|
51
|
+
def to_hash
|
52
|
+
hash = {}
|
53
|
+
hash["session_number"] = session_number if session_number
|
54
|
+
hash["session_date"] = session_date.to_s if session_date
|
55
|
+
hash["item_number"] = single_element_array(item_number) if item_number.any?
|
56
|
+
hash["item_name"] = single_element_array(item_name) if item_name.any?
|
57
|
+
hash["subitem_name"] = single_element_array(subitem_name) if subitem_name.any?
|
58
|
+
hash["collaboration"] = collaboration if collaboration
|
59
|
+
hash["agenda_id"] = agenda_id if agenda_id
|
60
|
+
hash["item_footnote"] = item_footnote if item_footnote
|
61
|
+
hash
|
62
|
+
end
|
63
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
64
|
+
end
|
65
|
+
end
|
@@ -1,8 +1,44 @@
|
|
1
1
|
module RelatonUn
|
2
|
-
class UnBibliographicItem <
|
2
|
+
class UnBibliographicItem < RelatonIsoBib::IsoBibliographicItem
|
3
3
|
TYPES = %w[
|
4
4
|
recommendation plenary addendum communication corrigendum reissue agenda
|
5
5
|
budgetary sec-gen-notes expert-report resolution
|
6
6
|
].freeze
|
7
|
+
|
8
|
+
DISTRIBUTIONS = { "GEN" => "general", "LTD" => "limited",
|
9
|
+
"DER" => "restricted" }.freeze
|
10
|
+
|
11
|
+
# @return [RelatonUn::Session, NilClass]
|
12
|
+
attr_reader :session
|
13
|
+
|
14
|
+
# @return [String, NilClass]
|
15
|
+
attr_reader :distribution
|
16
|
+
|
17
|
+
# @param session [RelatonUn::Session, NilClass]
|
18
|
+
# @param distribution [String]
|
19
|
+
def initialize(**args)
|
20
|
+
if args[:distribution] && !DISTRIBUTIONS.has_value?(args[:distribution])
|
21
|
+
warn "[relaton-un] WARNING: invalid distribution: #{args[:distribution]}"
|
22
|
+
end
|
23
|
+
@distribution = args.delete :distribution
|
24
|
+
@session = args.delete :session
|
25
|
+
super **args
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param builder [Nokogiri::XML::Builder]
|
29
|
+
# @param bibdata [TrueClasss, FalseClass, NilClass]
|
30
|
+
def to_xml(builder = nil, **opts)
|
31
|
+
super(builder, **opts) do |b|
|
32
|
+
b.distribution distribution if distribution
|
33
|
+
session&.to_xml b if session
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# @return [Hash]
|
38
|
+
def to_hash
|
39
|
+
hash = super
|
40
|
+
hash["session"] = session.to_hash if session
|
41
|
+
hash
|
42
|
+
end
|
7
43
|
end
|
8
44
|
end
|
@@ -9,9 +9,11 @@ module RelatonUn
|
|
9
9
|
def search(text)
|
10
10
|
HitCollection.new text
|
11
11
|
rescue SocketError, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
12
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
13
|
-
OpenSSL::SSL::SSLError,
|
14
|
-
|
12
|
+
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
13
|
+
Net::ProtocolError, Net::ReadTimeout, OpenSSL::SSL::SSLError,
|
14
|
+
Errno::ETIMEDOUT => e
|
15
|
+
raise RelatonBib::RequestError,
|
16
|
+
"Could not access #{HitCollection::DOMAIN}: #{e.message}"
|
15
17
|
end
|
16
18
|
|
17
19
|
# @param ref [String] document reference
|
@@ -23,7 +25,8 @@ module RelatonUn
|
|
23
25
|
/^(UN\s)?(?<code>.*)/ =~ ref
|
24
26
|
result = isobib_search_filter(code)
|
25
27
|
if result
|
26
|
-
warn "[relaton-un] (\"#{ref}\")
|
28
|
+
warn "[relaton-un] (\"#{ref}\") "\
|
29
|
+
"found #{result.fetch.docidentifier[0].id}"
|
27
30
|
result.fetch
|
28
31
|
end
|
29
32
|
end
|
@@ -36,7 +39,7 @@ module RelatonUn
|
|
36
39
|
# @return [RelatonUn::HitCollection]
|
37
40
|
def isobib_search_filter(code)
|
38
41
|
result = search(code)
|
39
|
-
result.select { |i| i.hit[:
|
42
|
+
result.select { |i| i.hit[:symbol].include? code }.first
|
40
43
|
end
|
41
44
|
end
|
42
45
|
end
|
data/lib/relaton_un/version.rb
CHANGED
@@ -1,16 +1,61 @@
|
|
1
1
|
module RelatonUn
|
2
|
-
class XMLParser <
|
2
|
+
class XMLParser < RelatonIsoBib::XMLParser
|
3
3
|
class << self
|
4
|
+
# @param xml [String]
|
5
|
+
# @return [RelatonUn::UnBibliographicItem, NilClass]
|
4
6
|
def from_xml(xml)
|
5
7
|
doc = Nokogiri::XML xml
|
6
8
|
doc.remove_namespaces!
|
7
|
-
|
8
|
-
if
|
9
|
-
UnBibliographicItem.new(item_data(
|
9
|
+
item = doc.at("/bibitem|/bibdata")
|
10
|
+
if item
|
11
|
+
UnBibliographicItem.new(item_data(item))
|
10
12
|
else
|
11
|
-
warn "[relaton-un] can't find bibitem or bibdata element in
|
13
|
+
warn "[relaton-un] WARNING: can't find bibitem or bibdata element in"\
|
14
|
+
" the XML"
|
12
15
|
end
|
13
16
|
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# @param item [Nokogiri::XML::Element]
|
21
|
+
# @return [Hash]
|
22
|
+
def item_data(item)
|
23
|
+
data = super
|
24
|
+
data[:session] = fetch_session item
|
25
|
+
data[:distribution] = item.at("distribution")&.text
|
26
|
+
data
|
27
|
+
end
|
28
|
+
|
29
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
30
|
+
|
31
|
+
# @param item [Nokogiri::XML::Element]
|
32
|
+
# @return [RelatonUn::Session]
|
33
|
+
def fetch_session(item)
|
34
|
+
session = item.at "./ext/session"
|
35
|
+
return unless session
|
36
|
+
|
37
|
+
RelatonUn::Session.new(
|
38
|
+
session_number: session.at("number")&.text,
|
39
|
+
session_date: session.at("session-date")&.text,
|
40
|
+
item_number: session.xpath("item-number").map(&:text),
|
41
|
+
item_name: session.xpath("item-name").map(&:text),
|
42
|
+
subitem_name: session.xpath("subitem-name").map(&:text),
|
43
|
+
collaboration: session.at("collaboration")&.text,
|
44
|
+
agenda_id: session.at("agenda-id")&.text,
|
45
|
+
item_footnote: session.at("item-footnote")&.text,
|
46
|
+
)
|
47
|
+
end
|
48
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
49
|
+
|
50
|
+
# @param ext [Nokogiri::XML::Element]
|
51
|
+
# @return [RelatonUn::EditorialGroup]
|
52
|
+
def fetch_editorialgroup(ext)
|
53
|
+
eg = ext.at("./editorialgroup")
|
54
|
+
return unless eg
|
55
|
+
|
56
|
+
committee = eg&.xpath("committee")&.map &:text
|
57
|
+
EditorialGroup.new committee
|
58
|
+
end
|
14
59
|
end
|
15
60
|
end
|
16
61
|
end
|
data/lib/relaton_un.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require "
|
1
|
+
require "relaton_iso_bib"
|
2
2
|
require "relaton_un/version"
|
3
3
|
require "relaton_un/un_bibliographic_item"
|
4
4
|
require "relaton_un/un_bibliography"
|
@@ -6,6 +6,8 @@ require "relaton_un/hit_collection"
|
|
6
6
|
require "relaton_un/hit"
|
7
7
|
require "relaton_un/hash_converter"
|
8
8
|
require "relaton_un/xml_parser"
|
9
|
+
require "relaton_un/session"
|
10
|
+
require "relaton_un/editorialgroup"
|
9
11
|
|
10
12
|
module RelatonUn
|
11
13
|
class Error < StandardError; end
|
data/relaton_un.gemspec
CHANGED
@@ -1,31 +1,32 @@
|
|
1
1
|
lib = File.expand_path("lib", __dir__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
-
require
|
3
|
+
require "relaton_un/version"
|
4
4
|
|
5
|
+
# rubocop:disable Metrics/BlockLength
|
5
6
|
Gem::Specification.new do |spec|
|
6
7
|
spec.name = "relaton-un"
|
7
8
|
spec.version = RelatonUn::VERSION
|
8
9
|
spec.authors = ["Ribose Inc."]
|
9
10
|
spec.email = ["open.source@ribose.com"]
|
10
11
|
|
11
|
-
spec.summary = "RelatonIso: retrieve CC Standards for bibliographic
|
12
|
-
"using the IsoBibliographicItem model"
|
13
|
-
spec.description = "RelatonIso: retrieve CC Standards for bibliographic
|
14
|
-
"using the IsoBibliographicItem model"
|
12
|
+
spec.summary = "RelatonIso: retrieve CC Standards for bibliographic "\
|
13
|
+
"use using the IsoBibliographicItem model"
|
14
|
+
spec.description = "RelatonIso: retrieve CC Standards for bibliographic "\
|
15
|
+
"use using the IsoBibliographicItem model"
|
15
16
|
spec.homepage = "https://github.com/relaton/relaton-un"
|
16
17
|
spec.license = "BSD-2-Clause"
|
17
18
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
18
19
|
|
19
|
-
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
20
|
-
|
21
20
|
spec.metadata["homepage_uri"] = spec.homepage
|
22
|
-
|
21
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
23
22
|
# spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
|
24
23
|
|
25
24
|
# Specify which files should be added to the gem when it is released.
|
26
25
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
27
|
-
spec.files
|
28
|
-
`git ls-files -z`.split("\x0").reject
|
26
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
27
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
28
|
+
f.match(%r{^(test|spec|features)/})
|
29
|
+
end
|
29
30
|
end
|
30
31
|
spec.bindir = "exe"
|
31
32
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
@@ -42,6 +43,7 @@ Gem::Specification.new do |spec|
|
|
42
43
|
|
43
44
|
spec.add_dependency "faraday"
|
44
45
|
spec.add_dependency "http-cookie"
|
45
|
-
spec.add_dependency "relaton-bib", "
|
46
|
+
spec.add_dependency "relaton-iso-bib", ">= 0.9.2"
|
46
47
|
spec.add_dependency "unf_ext", ">= 0.0.7.7"
|
47
48
|
end
|
49
|
+
# rubocop:enable Metrics/BlockLength
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-un
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|
@@ -151,19 +151,19 @@ dependencies:
|
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
|
-
name: relaton-bib
|
154
|
+
name: relaton-iso-bib
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
156
156
|
requirements:
|
157
|
-
- - "
|
157
|
+
- - ">="
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: 0.9.
|
159
|
+
version: 0.9.2
|
160
160
|
type: :runtime
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
|
-
- - "
|
164
|
+
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: 0.9.
|
166
|
+
version: 0.9.2
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
168
|
name: unf_ext
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -205,10 +205,12 @@ files:
|
|
205
205
|
- grammars/reqt.rng
|
206
206
|
- grammars/un.rng
|
207
207
|
- lib/relaton_un.rb
|
208
|
+
- lib/relaton_un/editorialgroup.rb
|
208
209
|
- lib/relaton_un/hash_converter.rb
|
209
210
|
- lib/relaton_un/hit.rb
|
210
211
|
- lib/relaton_un/hit_collection.rb
|
211
212
|
- lib/relaton_un/processor.rb
|
213
|
+
- lib/relaton_un/session.rb
|
212
214
|
- lib/relaton_un/un_bibliographic_item.rb
|
213
215
|
- lib/relaton_un/un_bibliography.rb
|
214
216
|
- lib/relaton_un/version.rb
|
@@ -220,6 +222,7 @@ licenses:
|
|
220
222
|
- BSD-2-Clause
|
221
223
|
metadata:
|
222
224
|
homepage_uri: https://github.com/relaton/relaton-un
|
225
|
+
source_code_uri: https://github.com/relaton/relaton-un
|
223
226
|
post_install_message:
|
224
227
|
rdoc_options: []
|
225
228
|
require_paths:
|