relaton-un 0.1.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- require "relaton_bib"
1
+ require "relaton_iso_bib"
2
2
  require "relaton_un/version"
3
3
  require "relaton_un/un_bibliographic_item"
4
4
  require "relaton_un/un_bibliography"
@@ -6,6 +6,8 @@ require "relaton_un/hit_collection"
6
6
  require "relaton_un/hit"
7
7
  require "relaton_un/hash_converter"
8
8
  require "relaton_un/xml_parser"
9
+ require "relaton_un/session"
10
+ require "relaton_un/editorialgroup"
9
11
 
10
12
  module RelatonUn
11
13
  class Error < StandardError; end
@@ -0,0 +1,25 @@
1
+ module RelatonUn
2
+ class EditorialGroup
3
+ include RelatonBib
4
+
5
+ # @return [Array<String>]
6
+ attr_reader :committee
7
+
8
+ # @param committee [Array<String>]
9
+ def initialize(committee)
10
+ @committee = committee
11
+ end
12
+
13
+ # @param builder [Nokogiri::XML::Builder]
14
+ def to_xml(builder)
15
+ builder.editorialgroup do |b|
16
+ committee.each { |c| b.committee c }
17
+ end
18
+ end
19
+
20
+ # @return [Array<Hash>, Hash]
21
+ def to_hash
22
+ single_element_array(committee.map { |c| { "committee" => c } })
23
+ end
24
+ end
25
+ end
@@ -1,5 +1,33 @@
1
1
  module RelatonUn
2
- class HashConverter < RelatonBib::HashConverter
2
+ class HashConverter < RelatonIsoBib::HashConverter
3
+ class << self
4
+ # @override RelatonIsoBib::HashConverter.hash_to_bib
5
+ # @param args [Hash]
6
+ # @param nested [TrueClass, FalseClass]
7
+ # @return [Hash]
8
+ def hash_to_bib(args, nested = false)
9
+ ret = super
10
+ return if ret.nil?
3
11
 
12
+ session_hash_to_bib ret
13
+ ret
14
+ end
15
+
16
+ private
17
+
18
+ # @param ret [Hash]
19
+ def session_hash_to_bib(ret)
20
+ ret[:session] = Session.new(ret[:session]) if ret[:session]
21
+ end
22
+
23
+ # @param ret [Hash]
24
+ def editorialgroup_hash_to_bib(ret)
25
+ eg = ret[:editorialgroup]
26
+ return unless eg
27
+
28
+ committee = eg.map { |e| e[:committee] }
29
+ ret[:editorialgroup] = EditorialGroup.new array(committee)
30
+ end
31
+ end
4
32
  end
5
33
  end
@@ -3,6 +3,51 @@
3
3
  module RelatonUn
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
+ # rubocop:disable Layout/LineLength
7
+
8
+ # There is distribution PRO (A/47/PV.102/CORR.1, A/47/PV.54)
9
+ BODY = {
10
+ "A" => "General Assembly",
11
+ "E" => "Economic and Social Council",
12
+ "S" => "Security Council",
13
+ "T" => "Trusteeship Council",
14
+ "ACC" => "Administrative Committee on Coordination",
15
+ "AT" => "United Nations Administrative Tribunal",
16
+ "CAT" => "Committee against Torture",
17
+ "CCPR" => "Human Rights Committee",
18
+ "CD" => "Conference on Disarmament",
19
+ "CEDAW" => "Committee on the Elimination of All Forms of Discrimination against Women",
20
+ "CERD" => "Committee on the Elimination of Racial Discrimination",
21
+ "CRC" => "Committee on the Rights of the Child",
22
+ "DC" => "Disarmament Commission",
23
+ "DP" => "United Nations Development Programme",
24
+ "HS" => "United Nations Centre for Human Settlements (HABITAT)",
25
+ "TD" => "United Nations Conference on Trade and Development",
26
+ "UNEP" => "United Nations Environment Programme",
27
+ "TRADE" => "Committee on Trade",
28
+ "CEFACT" => "Centre for Trade Facilitation and Electronic Business",
29
+ "C.1" => "Disarmament and International Security Committee",
30
+ "C.2" => "Economic and Financial Committee",
31
+ "C.3" => "Social, Humanitarian & Cultural Issues",
32
+ "C.4" => "Special Political and Decolonization Committee",
33
+ "C.5" => "Administrative and Budgetary Committee",
34
+ "C.6" => "Sixth Committee (Legal)",
35
+ "PC" => "Preparatory Committee",
36
+ "AEC" => "Atomic Energy Commission",
37
+ "AGRI" => "Committee on Agriculture",
38
+ "AMCEN" => "African Ministerial Conference on the Environment",
39
+ "AMCOW" => "African Ministers’ Council on Water",
40
+ "ECA" => "Economic Commission for Africa",
41
+ "ESCAP" => "Economic and Social Commission for Asia and Pacific",
42
+ "ECE" => "Economic Commission for Europe",
43
+ "ECWA" => "Economic Commission for Western Asia",
44
+ "UNFF" => "United Nations Forum on Forests",
45
+ "ENERGY" => "Committee on Sustainable Energy",
46
+ "FAO" => "Food and Agriculture Organization",
47
+ "UNCTAD" => "United Nations Conference on Trade and Development",
48
+ }.freeze
49
+ # rubocop:enable Layout/LineLength
50
+
6
51
  # Parse page.
7
52
  # @return [RelatonUn::UnBibliographicItem]
8
53
  def fetch
@@ -11,48 +56,96 @@ module RelatonUn
11
56
 
12
57
  private
13
58
 
59
+ # rubocop:disable Metrics/MethodLength
60
+
61
+ # @return [RelatonUn::UnBibliographicItem]
14
62
  def un_bib_item
15
63
  UnBibliographicItem.new(
16
64
  type: "standard",
17
65
  fetched: Date.today.to_s,
18
- docid: docid,
66
+ docid: fetch_docid,
19
67
  docnumber: hit[:ref],
20
68
  language: ["en"],
21
69
  script: ["Latn"],
22
- title: title,
23
- date: date,
24
- link: link,
25
- keyword: keyword
70
+ title: fetch_title,
71
+ date: fetch_date,
72
+ link: fetch_link,
73
+ keyword: fetch_keyword,
74
+ session: fetch_session,
75
+ distribution: fetch_distribution,
76
+ editorialgroup: fetch_editorialgroup,
77
+ classification: fetch_classification,
26
78
  )
27
79
  end
80
+ # rubocop:enable Metrics/MethodLength
28
81
 
29
82
  # @return [Array<RelatonBib::DocumentIdentifier>]
30
- def docid
31
- [RelatonBib::DocumentIdentifier.new(id: hit[:ref], type: "UN")]
83
+ def fetch_docid
84
+ hit[:symbol].map do |s|
85
+ RelatonBib::DocumentIdentifier.new(id: s, type: "UN")
86
+ end
32
87
  end
33
88
 
34
89
  # @return [Array<RelatonBib::TypedTitleString>]
35
- def title
36
- fs = RelatonBib::FormattedString.new(content: hit[:title], language: "en", script: "Latn")
37
- [RelatonBib::TypedTitleString.new(type: "main", title: fs)]
90
+ def fetch_title
91
+ # fs = RelatonBib::FormattedString.new(
92
+ # content: hit[:title], language: "en", script: "Latn",
93
+ # )
94
+ # [RelatonBib::TypedTitleString.new(type: "main", title: fs)]
95
+ [{ title_main: hit[:title], language: "en", script: "Latn" }]
38
96
  end
39
97
 
40
98
  # @return [Array<RelatonBib::BibliographicDate>]
41
- def date
99
+ def fetch_date
42
100
  d = []
43
- d << RelatonBib::BibliographicDate.new(type: "published", on: hit[:date_pub]) if hit[:date_pub]
44
- d << RelatonBib::BibliographicDate.new(type: "issued", on: hit[:date_rel]) if hit[:date_rel]
101
+ d << bibdate("published", hit[:date_pub]) if hit[:date_pub]
102
+ d << bibdate("issued", hit[:date_rel]) if hit[:date_rel]
45
103
  d
46
104
  end
47
105
 
106
+ # @param type [String]
107
+ # @param on [String]
108
+ # @return [RelatonBib::BibliographicDate]
109
+ def bibdate(type, on)
110
+ RelatonBib::BibliographicDate.new type: type, on: on
111
+ end
112
+
48
113
  # @return [Array<RelatonBib::TypedUri>]
49
- def link
114
+ def fetch_link
50
115
  hit[:link].map { |l| RelatonBib::TypedUri.new l }
51
116
  end
52
117
 
53
118
  # @return [Array<String>]
54
- def keyword
119
+ def fetch_keyword
55
120
  hit[:keyword].split(", ")
56
121
  end
122
+
123
+ # @return [RelatonUn::Session]
124
+ def fetch_session
125
+ Session.new(session_number: hit[:session], agenda_id: hit[:agenda])
126
+ end
127
+
128
+ # @return [String]
129
+ def fetch_distribution
130
+ UnBibliographicItem::DISTRIBUTIONS[hit[:distribution]]
131
+ end
132
+
133
+ # @return [RelatonUn::EditorialGroup, NilClass]
134
+ def fetch_editorialgroup
135
+ tc = hit[:ref].match(/^[\S]+/).to_s.split(/\/|-/).reduce([]) do |m, v|
136
+ if BODY[v] then m << BODY[v]
137
+ elsif v =~ /(AC|C|CN|CONF|GC|SC|Sub|WG).\d+|PC/ then m << v
138
+ else m
139
+ end
140
+ end.uniq
141
+ return unless tc.any?
142
+
143
+ RelatonUn::EditorialGroup.new tc
144
+ end
145
+
146
+ # @return [Array<RelatonBib::Classification>]
147
+ def fetch_classification
148
+ [RelatonBib::Classification.new(type: "area", value: "UNDOC")]
149
+ end
57
150
  end
58
151
  end
@@ -6,7 +6,8 @@ require "http-cookie"
6
6
  module RelatonUn
7
7
  # Page of hit collection.
8
8
  class HitCollection < RelatonBib::HitCollection
9
- AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
9
+ AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) "\
10
+ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
10
11
  DOMAIN = "https://documents.un.org"
11
12
  BOUNDARY = "----WebKitFormBoundary6hkaBvITDck8dHCn"
12
13
 
@@ -17,52 +18,10 @@ module RelatonUn
17
18
  @jar = HTTP::CookieJar.new
18
19
  @http = Net::HTTP.new @uri.host, @uri.port
19
20
  @http.use_ssl = true
21
+ @http.read_timeout = 120
20
22
  if (form_resp = get_page)
21
- form = Nokogiri::HTML form_resp.body
22
- form_data = form.xpath(
23
- "//input[@type!='radio']",
24
- "//input[@type='radio'][@checked]",
25
- "//select[@name!='view:_id1:_id2:cbLang']",
26
- "//textarea"
27
- ).reduce([]) do |m, i|
28
- v = case i[:name]
29
- when "view:_id1:_id2:txtSymbol" then text
30
- when "view:_id1:_id2:cbType" then "FP"
31
- when "view:_id1:_id2:cbSort" then "R"
32
- when "$$xspsubmitid" then "view:_id1:_id2:_id130"
33
- when "$$xspsubmitscroll" then "0|167"
34
- else i[:value]
35
- end
36
- m << %{--#{BOUNDARY}}
37
- m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
38
- end
39
- form_data << %{--#{BOUNDARY}--\r\n}
40
- req = Net::HTTP::Post.new form.at("//form")[:action]
41
- set_headers req
42
- req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
43
- req.body = form_data.join("\r\n")
44
- resp = @http.request req
45
- page_resp = get_page URI.parse(resp["location"]).request_uri
46
- doc = Nokogiri::HTML page_resp.body
47
- @array = doc.css("div.viewHover").map do |item|
48
- ref = item.at("div/div/a")&.text&.sub "\u00A0", ""
49
- title = item.at("div/div/span")&.text
50
- keyword = item.at("div[3]/div[5]/span")&.text
51
- date_pub = item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
52
- en = item.at("//span[.='ENGLISH']/../..")
53
- date_rel = en.at("./following-sibling::span[contains(@id, 'cfRelDateE')]").text
54
- link = en.xpath("//a[contains(@title, 'Open')]").map do |l|
55
- { content: l[:href], type: l[:title].match(/PDF|Word/).to_s.downcase }
56
- end
57
- Hit.new({
58
- ref: ref,
59
- title: title,
60
- keyword: keyword,
61
- date_pub: date_pub,
62
- date_rel: date_rel,
63
- link: link
64
- }, self)
65
- end
23
+ doc = Nokogiri::HTML page_resp(form_resp, text).body
24
+ @array = doc.css("div.viewHover").map { |item| hit item }
66
25
  end
67
26
  end
68
27
 
@@ -84,9 +43,125 @@ module RelatonUn
84
43
  get_page request_uri, deep + 1
85
44
  end
86
45
 
46
+ # rubocop:disable Metrics/MethodLength
47
+
48
+ # @param form [Nokogiri::HTML::Document]
49
+ # @param text [String]
50
+ # @return [Array<String>]
51
+ def form_data(form, text)
52
+ fd = form.xpath(
53
+ "//input[@type!='radio']",
54
+ "//input[@type='radio'][@checked]",
55
+ "//select[@name!='view:_id1:_id2:cbLang']",
56
+ "//textarea",
57
+ ).reduce([]) do |m, i|
58
+ v = case i[:name]
59
+ when "view:_id1:_id2:txtSymbol" then text
60
+ when "view:_id1:_id2:cbType" then "FP"
61
+ when "view:_id1:_id2:cbSort" then "R"
62
+ when "$$xspsubmitid" then "view:_id1:_id2:_id130"
63
+ when "$$xspsubmitscroll" then "0|167"
64
+ else i[:value]
65
+ end
66
+ m << %{--#{BOUNDARY}}
67
+ m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
68
+ end
69
+ fd << %{--#{BOUNDARY}--\r\n}
70
+ end
71
+ # rubocop:enable Metrics/MethodLength
72
+
73
+ # @param form_resp [Net::HTTPOK]
74
+ # @param text [String]
75
+ # @return [Net::HTTPOK]
76
+ def page_resp(form_resp, text)
77
+ form = Nokogiri::HTML form_resp.body
78
+ req = Net::HTTP::Post.new form.at("//form")[:action]
79
+ set_headers req
80
+ req["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
81
+ req.body = form_data(form, text).join("\r\n")
82
+ resp = @http.request req
83
+ get_page URI.parse(resp["location"]).request_uri
84
+ end
85
+
86
+ # @param item [Nokogiri::XML::Element]
87
+ # @return [RelatonUn::Hit]
88
+ def hit(item)
89
+ Hit.new(hit_data(item), self)
90
+ end
91
+
92
+ # @param item [Nokogiri::XML::Element]
93
+ # @return [Hash]
94
+ def hit_data(item)
95
+ en = item.at("//span[.='ENGLISH']/../..")
96
+ {
97
+ ref: item.at("div/div/a")&.text&.sub("\u00A0", ""),
98
+ symbol: symbol(item),
99
+ title: item.at("div/div/span")&.text,
100
+ keyword: item.at("div[3]/div[5]/span")&.text,
101
+ date_pub: date_pub(item),
102
+ date_rel: date_rel(en),
103
+ link: link(en),
104
+ session: session(item),
105
+ agenda: agenda(item),
106
+ distribution: distribution(item)
107
+ }
108
+ end
109
+
110
+ # @param item [Nokogiri::XML::Element]
111
+ # @return [String]
112
+ def symbol(item)
113
+ item.xpath("div/div[not(contains(@class, 'hidden'))]/"\
114
+ "label[contains(.,'Symbol')]/following-sibling::span[1]").map &:text
115
+ end
116
+
117
+ # @param item [Nokogiri::XML::Element]
118
+ # @return [String]
119
+ def date_pub(item)
120
+ item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
121
+ end
122
+
123
+ # @param item [Nokogiri::XML::Element]
124
+ # @return [String]
125
+ def date_rel(item)
126
+ item.at("./following-sibling::span[contains(@id, 'cfRelDateE')]")&.text
127
+ end
128
+
129
+ # @param item [Nokogiri::XML::Element]
130
+ # @return [Array<Hash>]
131
+ def link(item)
132
+ item.xpath("//a[contains(@title, 'Open')]").map do |l|
133
+ {
134
+ content: l[:href],
135
+ type: l[:title].match(/PDF|Word/).to_s.downcase,
136
+ }
137
+ end
138
+ end
139
+
140
+ # @param item [Nokogiri::XML::Element]
141
+ # @return [String]
142
+ def session(item)
143
+ item.at("//label[.='Session / Year:']/following-sibling::span")&.text
144
+ end
145
+
146
+ # @param item [Nokogiri::XML::Element]
147
+ # @return [String]
148
+ def agenda(item)
149
+ item.at("//label[.='Agenda Item(s):']/following-sibling::span")&.text
150
+ end
151
+
152
+ # @param item [Nokogiri::XML::Element]
153
+ # @return [String]
154
+ def distribution(item)
155
+ item.at("//label[.='Distribution:']/following-sibling::span")&.text
156
+ end
157
+
158
+ # rubocop:disable Metrics/MethodLength
159
+
160
+ # @param req [Net::HTTP::Get, Net::HTTP::Post]
87
161
  def set_headers(req)
88
162
  set_cookie req
89
- req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
163
+ req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,"\
164
+ "image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
90
165
  req["Accept-Encoding"] = "gzip, deflate, br"
91
166
  req["Cache-Control"] = "max-age=0"
92
167
  req["Connection"] = "keep-alive"
@@ -98,7 +173,9 @@ module RelatonUn
98
173
  req["Upgrade-Insecure-Requests"] = "1"
99
174
  req["User-Agent"] = AGENT
100
175
  end
176
+ # rubocop:enable Metrics/MethodLength
101
177
 
178
+ # @param req [Net::HTTP::Get, Net::HTTP::Post]
102
179
  def set_cookie(req)
103
180
  req["Cookie"] = HTTP::Cookie.cookie_value @jar.cookies(@uri)
104
181
  end
@@ -0,0 +1,65 @@
1
+ module RelatonUn
2
+ class Session
3
+ include RelatonBib
4
+
5
+ # @return [String, NilClass]
6
+ attr_reader :session_number, :collaboration, :agenda_id, :item_footnote
7
+
8
+ # @return [Date, NilClass]
9
+ attr_reader :session_date
10
+
11
+ # @return [Array<String>]
12
+ attr_reader :item_number, :item_name, :subitem_name
13
+
14
+ # @param session_number [String]
15
+ # @param session_date [String]
16
+ # @param item_number [Array<String>]
17
+ # @pqrqm item_name [Array<String>]
18
+ # @pqrqm subitem_name [Array<String>]
19
+ # @param collaboration [String]
20
+ # @param agenda_id [String]
21
+ # @param item_footnote [String]
22
+ def initialize(**args)
23
+ @session_number = args[:session_number]
24
+ @session_date = Date.parse args[:session_date] if args[:session_date]
25
+ @item_number = args.fetch(:item_number, [])
26
+ @item_name = args.fetch(:item_name, [])
27
+ @subitem_name = args.fetch(:subitem_name, [])
28
+ @collaboration = args[:collaboration]
29
+ @agenda_id = args[:agenda_id]
30
+ @item_footnote = args[:item_footnote]
31
+ end
32
+
33
+ # rubocop:disable Metrics/AbcSize
34
+
35
+ # @param [Nokogiri::XML::Builder]
36
+ def to_xml(builder)
37
+ builder.session do |b|
38
+ b.number session_number if session_number
39
+ b.send "session-date", session_date.to_s if session_date
40
+ item_number.each { |n| b.send "item-number", n }
41
+ item_name.each { |n| b.send "item-name", n }
42
+ subitem_name.each { |n| b.send "subitem-name", n }
43
+ b.collaboration collaboration if collaboration
44
+ b.send "agenda-id", agenda_id if agenda_id
45
+ b.send "item-footnote", item_footnote if item_footnote
46
+ end
47
+ end
48
+
49
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
50
+ # @return [Hash]
51
+ def to_hash
52
+ hash = {}
53
+ hash["session_number"] = session_number if session_number
54
+ hash["session_date"] = session_date.to_s if session_date
55
+ hash["item_number"] = single_element_array(item_number) if item_number.any?
56
+ hash["item_name"] = single_element_array(item_name) if item_name.any?
57
+ hash["subitem_name"] = single_element_array(subitem_name) if subitem_name.any?
58
+ hash["collaboration"] = collaboration if collaboration
59
+ hash["agenda_id"] = agenda_id if agenda_id
60
+ hash["item_footnote"] = item_footnote if item_footnote
61
+ hash
62
+ end
63
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
64
+ end
65
+ end