relaton-un 0.1.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
- require "relaton_bib"
1
+ require "relaton_iso_bib"
2
2
  require "relaton_un/version"
3
3
  require "relaton_un/un_bibliographic_item"
4
4
  require "relaton_un/un_bibliography"
@@ -6,6 +6,8 @@ require "relaton_un/hit_collection"
6
6
  require "relaton_un/hit"
7
7
  require "relaton_un/hash_converter"
8
8
  require "relaton_un/xml_parser"
9
+ require "relaton_un/session"
10
+ require "relaton_un/editorialgroup"
9
11
 
10
12
  module RelatonUn
11
13
  class Error < StandardError; end
@@ -0,0 +1,25 @@
1
+ module RelatonUn
2
+ class EditorialGroup
3
+ include RelatonBib
4
+
5
+ # @return [Array<String>]
6
+ attr_reader :committee
7
+
8
+ # @param committee [Array<String>]
9
+ def initialize(committee)
10
+ @committee = committee
11
+ end
12
+
13
+ # @param builder [Nokogiri::XML::Builder]
14
+ def to_xml(builder)
15
+ builder.editorialgroup do |b|
16
+ committee.each { |c| b.committee c }
17
+ end
18
+ end
19
+
20
+ # @return [Array<Hash>, Hash]
21
+ def to_hash
22
+ single_element_array(committee.map { |c| { "committee" => c } })
23
+ end
24
+ end
25
+ end
@@ -1,5 +1,33 @@
1
1
  module RelatonUn
2
- class HashConverter < RelatonBib::HashConverter
2
+ class HashConverter < RelatonIsoBib::HashConverter
3
+ class << self
4
+ # @override RelatonIsoBib::HashConverter.hash_to_bib
5
+ # @param args [Hash]
6
+ # @param nested [TrueClass, FalseClass]
7
+ # @return [Hash]
8
+ def hash_to_bib(args, nested = false)
9
+ ret = super
10
+ return if ret.nil?
3
11
 
12
+ session_hash_to_bib ret
13
+ ret
14
+ end
15
+
16
+ private
17
+
18
+ # @param ret [Hash]
19
+ def session_hash_to_bib(ret)
20
+ ret[:session] = Session.new(ret[:session]) if ret[:session]
21
+ end
22
+
23
+ # @param ret [Hash]
24
+ def editorialgroup_hash_to_bib(ret)
25
+ eg = ret[:editorialgroup]
26
+ return unless eg
27
+
28
+ committee = eg.map { |e| e[:committee] }
29
+ ret[:editorialgroup] = EditorialGroup.new array(committee)
30
+ end
31
+ end
4
32
  end
5
33
  end
@@ -3,6 +3,51 @@
3
3
  module RelatonUn
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
+ # rubocop:disable Layout/LineLength
7
+
8
+ # There is distribution PRO (A/47/PV.102/CORR.1, A/47/PV.54)
9
+ BODY = {
10
+ "A" => "General Assembly",
11
+ "E" => "Economic and Social Council",
12
+ "S" => "Security Council",
13
+ "T" => "Trusteeship Council",
14
+ "ACC" => "Administrative Committee on Coordination",
15
+ "AT" => "United Nations Administrative Tribunal",
16
+ "CAT" => "Committee against Torture",
17
+ "CCPR" => "Human Rights Committee",
18
+ "CD" => "Conference on Disarmament",
19
+ "CEDAW" => "Committee on the Elimination of All Forms of Discrimination against Women",
20
+ "CERD" => "Committee on the Elimination of Racial Discrimination",
21
+ "CRC" => "Committee on the Rights of the Child",
22
+ "DC" => "Disarmament Commission",
23
+ "DP" => "United Nations Development Programme",
24
+ "HS" => "United Nations Centre for Human Settlements (HABITAT)",
25
+ "TD" => "United Nations Conference on Trade and Development",
26
+ "UNEP" => "United Nations Environment Programme",
27
+ "TRADE" => "Committee on Trade",
28
+ "CEFACT" => "Centre for Trade Facilitation and Electronic Business",
29
+ "C.1" => "Disarmament and International Security Committee",
30
+ "C.2" => "Economic and Financial Committee",
31
+ "C.3" => "Social, Humanitarian & Cultural Issues",
32
+ "C.4" => "Special Political and Decolonization Committee",
33
+ "C.5" => "Administrative and Budgetary Committee",
34
+ "C.6" => "Sixth Committee (Legal)",
35
+ "PC" => "Preparatory Committee",
36
+ "AEC" => "Atomic Energy Commission",
37
+ "AGRI" => "Committee on Agriculture",
38
+ "AMCEN" => "African Ministerial Conference on the Environment",
39
+ "AMCOW" => "African Ministers’ Council on Water",
40
+ "ECA" => "Economic Commission for Africa",
41
+ "ESCAP" => "Economic and Social Commission for Asia and Pacific",
42
+ "ECE" => "Economic Commission for Europe",
43
+ "ECWA" => "Economic Commission for Western Asia",
44
+ "UNFF" => "United Nations Forum on Forests",
45
+ "ENERGY" => "Committee on Sustainable Energy",
46
+ "FAO" => "Food and Agriculture Organization",
47
+ "UNCTAD" => "United Nations Conference on Trade and Development",
48
+ }.freeze
49
+ # rubocop:enable Layout/LineLength
50
+
6
51
  # Parse page.
7
52
  # @return [RelatonUn::UnBibliographicItem]
8
53
  def fetch
@@ -11,48 +56,96 @@ module RelatonUn
11
56
 
12
57
  private
13
58
 
59
+ # rubocop:disable Metrics/MethodLength
60
+
61
+ # @return [RelatonUn::UnBibliographicItem]
14
62
  def un_bib_item
15
63
  UnBibliographicItem.new(
16
64
  type: "standard",
17
65
  fetched: Date.today.to_s,
18
- docid: docid,
66
+ docid: fetch_docid,
19
67
  docnumber: hit[:ref],
20
68
  language: ["en"],
21
69
  script: ["Latn"],
22
- title: title,
23
- date: date,
24
- link: link,
25
- keyword: keyword
70
+ title: fetch_title,
71
+ date: fetch_date,
72
+ link: fetch_link,
73
+ keyword: fetch_keyword,
74
+ session: fetch_session,
75
+ distribution: fetch_distribution,
76
+ editorialgroup: fetch_editorialgroup,
77
+ classification: fetch_classification,
26
78
  )
27
79
  end
80
+ # rubocop:enable Metrics/MethodLength
28
81
 
29
82
  # @return [Array<RelatonBib::DocumentIdentifier>]
30
- def docid
31
- [RelatonBib::DocumentIdentifier.new(id: hit[:ref], type: "UN")]
83
+ def fetch_docid
84
+ hit[:symbol].map do |s|
85
+ RelatonBib::DocumentIdentifier.new(id: s, type: "UN")
86
+ end
32
87
  end
33
88
 
34
89
  # @return [Array<RelatonBib::TypedTitleString>]
35
- def title
36
- fs = RelatonBib::FormattedString.new(content: hit[:title], language: "en", script: "Latn")
37
- [RelatonBib::TypedTitleString.new(type: "main", title: fs)]
90
+ def fetch_title
91
+ # fs = RelatonBib::FormattedString.new(
92
+ # content: hit[:title], language: "en", script: "Latn",
93
+ # )
94
+ # [RelatonBib::TypedTitleString.new(type: "main", title: fs)]
95
+ [{ title_main: hit[:title], language: "en", script: "Latn" }]
38
96
  end
39
97
 
40
98
  # @return [Array<RelatonBib::BibliographicDate>]
41
- def date
99
+ def fetch_date
42
100
  d = []
43
- d << RelatonBib::BibliographicDate.new(type: "published", on: hit[:date_pub]) if hit[:date_pub]
44
- d << RelatonBib::BibliographicDate.new(type: "issued", on: hit[:date_rel]) if hit[:date_rel]
101
+ d << bibdate("published", hit[:date_pub]) if hit[:date_pub]
102
+ d << bibdate("issued", hit[:date_rel]) if hit[:date_rel]
45
103
  d
46
104
  end
47
105
 
106
+ # @param type [String]
107
+ # @param on [String]
108
+ # @return [RelatonBib::BibliographicDate]
109
+ def bibdate(type, on)
110
+ RelatonBib::BibliographicDate.new type: type, on: on
111
+ end
112
+
48
113
  # @return [Array<RelatonBib::TypedUri>]
49
- def link
114
+ def fetch_link
50
115
  hit[:link].map { |l| RelatonBib::TypedUri.new l }
51
116
  end
52
117
 
53
118
  # @return [Array<String>]
54
- def keyword
119
+ def fetch_keyword
55
120
  hit[:keyword].split(", ")
56
121
  end
122
+
123
+ # @return [RelatonUn::Session]
124
+ def fetch_session
125
+ Session.new(session_number: hit[:session], agenda_id: hit[:agenda])
126
+ end
127
+
128
+ # @return [String]
129
+ def fetch_distribution
130
+ UnBibliographicItem::DISTRIBUTIONS[hit[:distribution]]
131
+ end
132
+
133
+ # @return [RelatonUn::EditorialGroup, NilClass]
134
+ def fetch_editorialgroup
135
+ tc = hit[:ref].match(/^[\S]+/).to_s.split(/\/|-/).reduce([]) do |m, v|
136
+ if BODY[v] then m << BODY[v]
137
+ elsif v =~ /(AC|C|CN|CONF|GC|SC|Sub|WG).\d+|PC/ then m << v
138
+ else m
139
+ end
140
+ end.uniq
141
+ return unless tc.any?
142
+
143
+ RelatonUn::EditorialGroup.new tc
144
+ end
145
+
146
+ # @return [Array<RelatonBib::Classification>]
147
+ def fetch_classification
148
+ [RelatonBib::Classification.new(type: "area", value: "UNDOC")]
149
+ end
57
150
  end
58
151
  end
@@ -6,7 +6,8 @@ require "http-cookie"
6
6
  module RelatonUn
7
7
  # Page of hit collection.
8
8
  class HitCollection < RelatonBib::HitCollection
9
- AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
9
+ AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) "\
10
+ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
10
11
  DOMAIN = "https://documents.un.org"
11
12
  BOUNDARY = "----WebKitFormBoundary6hkaBvITDck8dHCn"
12
13
 
@@ -17,52 +18,10 @@ module RelatonUn
17
18
  @jar = HTTP::CookieJar.new
18
19
  @http = Net::HTTP.new @uri.host, @uri.port
19
20
  @http.use_ssl = true
21
+ @http.read_timeout = 120
20
22
  if (form_resp = get_page)
21
- form = Nokogiri::HTML form_resp.body
22
- form_data = form.xpath(
23
- "//input[@type!='radio']",
24
- "//input[@type='radio'][@checked]",
25
- "//select[@name!='view:_id1:_id2:cbLang']",
26
- "//textarea"
27
- ).reduce([]) do |m, i|
28
- v = case i[:name]
29
- when "view:_id1:_id2:txtSymbol" then text
30
- when "view:_id1:_id2:cbType" then "FP"
31
- when "view:_id1:_id2:cbSort" then "R"
32
- when "$$xspsubmitid" then "view:_id1:_id2:_id130"
33
- when "$$xspsubmitscroll" then "0|167"
34
- else i[:value]
35
- end
36
- m << %{--#{BOUNDARY}}
37
- m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
38
- end
39
- form_data << %{--#{BOUNDARY}--\r\n}
40
- req = Net::HTTP::Post.new form.at("//form")[:action]
41
- set_headers req
42
- req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
43
- req.body = form_data.join("\r\n")
44
- resp = @http.request req
45
- page_resp = get_page URI.parse(resp["location"]).request_uri
46
- doc = Nokogiri::HTML page_resp.body
47
- @array = doc.css("div.viewHover").map do |item|
48
- ref = item.at("div/div/a")&.text&.sub "\u00A0", ""
49
- title = item.at("div/div/span")&.text
50
- keyword = item.at("div[3]/div[5]/span")&.text
51
- date_pub = item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
52
- en = item.at("//span[.='ENGLISH']/../..")
53
- date_rel = en.at("./following-sibling::span[contains(@id, 'cfRelDateE')]").text
54
- link = en.xpath("//a[contains(@title, 'Open')]").map do |l|
55
- { content: l[:href], type: l[:title].match(/PDF|Word/).to_s.downcase }
56
- end
57
- Hit.new({
58
- ref: ref,
59
- title: title,
60
- keyword: keyword,
61
- date_pub: date_pub,
62
- date_rel: date_rel,
63
- link: link
64
- }, self)
65
- end
23
+ doc = Nokogiri::HTML page_resp(form_resp, text).body
24
+ @array = doc.css("div.viewHover").map { |item| hit item }
66
25
  end
67
26
  end
68
27
 
@@ -84,9 +43,125 @@ module RelatonUn
84
43
  get_page request_uri, deep + 1
85
44
  end
86
45
 
46
+ # rubocop:disable Metrics/MethodLength
47
+
48
+ # @param form [Nokogiri::HTML::Document]
49
+ # @param text [String]
50
+ # @return [Array<String>]
51
+ def form_data(form, text)
52
+ fd = form.xpath(
53
+ "//input[@type!='radio']",
54
+ "//input[@type='radio'][@checked]",
55
+ "//select[@name!='view:_id1:_id2:cbLang']",
56
+ "//textarea",
57
+ ).reduce([]) do |m, i|
58
+ v = case i[:name]
59
+ when "view:_id1:_id2:txtSymbol" then text
60
+ when "view:_id1:_id2:cbType" then "FP"
61
+ when "view:_id1:_id2:cbSort" then "R"
62
+ when "$$xspsubmitid" then "view:_id1:_id2:_id130"
63
+ when "$$xspsubmitscroll" then "0|167"
64
+ else i[:value]
65
+ end
66
+ m << %{--#{BOUNDARY}}
67
+ m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
68
+ end
69
+ fd << %{--#{BOUNDARY}--\r\n}
70
+ end
71
+ # rubocop:enable Metrics/MethodLength
72
+
73
+ # @param form_resp [Net::HTTPOK]
74
+ # @param text [String]
75
+ # @return [Net::HTTPOK]
76
+ def page_resp(form_resp, text)
77
+ form = Nokogiri::HTML form_resp.body
78
+ req = Net::HTTP::Post.new form.at("//form")[:action]
79
+ set_headers req
80
+ req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
81
+ req.body = form_data(form, text).join("\r\n")
82
+ resp = @http.request req
83
+ get_page URI.parse(resp["location"]).request_uri
84
+ end
85
+
86
+ # @param item [Nokogiri::XML::Element]
87
+ # @return [RelatonUn::Hit]
88
+ def hit(item)
89
+ Hit.new(hit_data(item), self)
90
+ end
91
+
92
+ # @param item [Nokogiri::XML::Element]
93
+ # @return [Hash]
94
+ def hit_data(item)
95
+ en = item.at("//span[.='ENGLISH']/../..")
96
+ {
97
+ ref: item.at("div/div/a")&.text&.sub("\u00A0", ""),
98
+ symbol: symbol(item),
99
+ title: item.at("div/div/span")&.text,
100
+ keyword: item.at("div[3]/div[5]/span")&.text,
101
+ date_pub: date_pub(item),
102
+ date_rel: date_rel(en),
103
+ link: link(en),
104
+ session: session(item),
105
+ agenda: agenda(item),
106
+ distribution: distribution(item)
107
+ }
108
+ end
109
+
110
+ # @param item [Nokogiri::XML::Element]
111
+ # @return [String]
112
+ def symbol(item)
113
+ item.xpath("div/div[not(contains(@class, 'hidden'))]/"\
114
+ "label[contains(.,'Symbol')]/following-sibling::span[1]").map &:text
115
+ end
116
+
117
+ # @param item [Nokogiri::XML::Element]
118
+ # @return [String]
119
+ def date_pub(item)
120
+ item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
121
+ end
122
+
123
+ # @param item [Nokogiri::XML::Element]
124
+ # @return [String]
125
+ def date_rel(item)
126
+ item.at("./following-sibling::span[contains(@id, 'cfRelDateE')]")&.text
127
+ end
128
+
129
+ # @param item [Nokogiri::XML::Element]
130
+ # @return [Array<Hash>]
131
+ def link(item)
132
+ item.xpath("//a[contains(@title, 'Open')]").map do |l|
133
+ {
134
+ content: l[:href],
135
+ type: l[:title].match(/PDF|Word/).to_s.downcase,
136
+ }
137
+ end
138
+ end
139
+
140
+ # @param item [Nokogiri::XML::Element]
141
+ # @return [String]
142
+ def session(item)
143
+ item.at("//label[.='Session / Year:']/following-sibling::span")&.text
144
+ end
145
+
146
+ # @param item [Nokogiri::XML::Element]
147
+ # @return [String]
148
+ def agenda(item)
149
+ item.at("//label[.='Agenda Item(s):']/following-sibling::span")&.text
150
+ end
151
+
152
+ # @param item [Nokogiri::XML::Element]
153
+ # @return [String]
154
+ def distribution(item)
155
+ item.at("//label[.='Distribution:']/following-sibling::span")&.text
156
+ end
157
+
158
+ # rubocop:disable Metrics/MethodLength
159
+
160
+ # @param req [Net::HTTP::Get, Net::HTTP::Post]
87
161
  def set_headers(req)
88
162
  set_cookie req
89
- req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
163
+ req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,"\
164
+ "image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
90
165
  req["Accept-Encoding"] = "gzip, deflate, br"
91
166
  req["Cache-Control"] = "max-age=0"
92
167
  req["Connection"] = "keep-alive"
@@ -98,7 +173,9 @@ module RelatonUn
98
173
  req["Upgrade-Insecure-Requests"] = "1"
99
174
  req["User-Agent"] = AGENT
100
175
  end
176
+ # rubocop:enable Metrics/MethodLength
101
177
 
178
+ # @param req [Net::HTTP::Get, Net::HTTP::Post]
102
179
  def set_cookie(req)
103
180
  req["Cookie"] = HTTP::Cookie.cookie_value @jar.cookies(@uri)
104
181
  end
@@ -0,0 +1,65 @@
1
+ module RelatonUn
2
+ class Session
3
+ include RelatonBib
4
+
5
+ # @return [String, NilClass]
6
+ attr_reader :session_number, :collaboration, :agenda_id, :item_footnote
7
+
8
+ # @return [Date, NilClass]
9
+ attr_reader :session_date
10
+
11
+ # @return [Array<String>]
12
+ attr_reader :item_number, :item_name, :subitem_name
13
+
14
+ # @param session_number [String]
15
+ # @param session_date [String]
16
+ # @param item_number [Array<String>]
17
+ # @pqrqm item_name [Array<String>]
18
+ # @pqrqm subitem_name [Array<String>]
19
+ # @param collaboration [String]
20
+ # @param agenda_id [String]
21
+ # @param item_footnote [String]
22
+ def initialize(**args)
23
+ @session_number = args[:session_number]
24
+ @session_date = Date.parse args[:session_date] if args[:session_date]
25
+ @item_number = args.fetch(:item_number, [])
26
+ @item_name = args.fetch(:item_name, [])
27
+ @subitem_name = args.fetch(:subitem_name, [])
28
+ @collaboration = args[:collaboration]
29
+ @agenda_id = args[:agenda_id]
30
+ @item_footnote = args[:item_footnote]
31
+ end
32
+
33
+ # rubocop:disable Metrics/AbcSize
34
+
35
+ # @param [Nokogiri::XML::Builder]
36
+ def to_xml(builder)
37
+ builder.session do |b|
38
+ b.number session_number if session_number
39
+ b.send "session-date", session_date.to_s if session_date
40
+ item_number.each { |n| b.send "item-number", n }
41
+ item_name.each { |n| b.send "item-name", n }
42
+ subitem_name.each { |n| b.send "subitem-name", n }
43
+ b.collaboration collaboration if collaboration
44
+ b.send "agenda-id", agenda_id if agenda_id
45
+ b.send "item-footnote", item_footnote if item_footnote
46
+ end
47
+ end
48
+
49
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
50
+ # @return [Hash]
51
+ def to_hash
52
+ hash = {}
53
+ hash["session_number"] = session_number if session_number
54
+ hash["session_date"] = session_date.to_s if session_date
55
+ hash["item_number"] = single_element_array(item_number) if item_number.any?
56
+ hash["item_name"] = single_element_array(item_name) if item_name.any?
57
+ hash["subitem_name"] = single_element_array(subitem_name) if subitem_name.any?
58
+ hash["collaboration"] = collaboration if collaboration
59
+ hash["agenda_id"] = agenda_id if agenda_id
60
+ hash["item_footnote"] = item_footnote if item_footnote
61
+ hash
62
+ end
63
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
64
+ end
65
+ end