relaton-w3c 1.11.3 → 1.11.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4c96856baa51c84d8397b62be6dee1b8225e7ab854cd70176187e68fc269a23c
4
- data.tar.gz: 54d5cc018e0de6ecca6d402ccbe41947047190d08e922c8f46b7cb20164cea22
3
+ metadata.gz: 2c318668fd5a7ef93b5275ac02cfadc9b91832eddeccf5996bc5ea65fc5272b1
4
+ data.tar.gz: 431ee27aec817b6d352e5410e2f4bf63710ecfcff47813e03d7e3302a7b42ecd
5
5
  SHA512:
6
- metadata.gz: 070eb14907a49f99b7c0f45841d83250c1c051cd9a16b9cddfc83f6d874274f401ac95c9554f6994c6702e557ef8a200e576ee5fe1d0b7c7f7583afb53074794
7
- data.tar.gz: cab9e48e248b889c5d15e3449a93dc5cb100b7680a87f025ebfd090a1afba36e77730d9e0bc85a10bc9208b9ea2d813e444e0a28b2ac77d412650395479468f3
6
+ metadata.gz: 4254d592bcc1469a7a8773d641e192d07be6d4b0ce247e2b93be490717fc20dc4457718604a5353cf5b0cef6231f2920b03288b776f186df5a8645c5f453d5ed
7
+ data.tar.gz: 0c5b07d1efb4f1df3c1505c0b8d2558e7c9bd12d80d1fe7ea483e313f4462a1a1ae8010587f18772dc0288ab0e9573724a9c671d9985427adce5307354f20ee9
@@ -67,59 +67,6 @@ module RelatonW3c
67
67
  @index.sort!.save
68
68
  end
69
69
 
70
- #
71
- # Create index file
72
- #
73
- # def create_index
74
- # index_file = "index-w3c.yaml"
75
- # index_yaml = @index.sort do |a, b|
76
- # compare_index_items a, b
77
- # end.to_yaml
78
- # File.write index_file, index_yaml, encoding: "UTF-8"
79
- # end
80
-
81
- #
82
- # Compare index items
83
- #
84
- # @param [Hash] aid first item
85
- # @param [Hash] bid second item
86
- #
87
- # @return [Integer] comparison result
88
- #
89
- # def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
90
- # ret = aid[:code] <=> bid[:code]
91
- # ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
92
- # ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
93
- # # ret = aid[:type] <=> bid[:type] if ret.zero?
94
- # ret
95
- # end
96
-
97
- #
98
- # Weight of stage
99
- #
100
- # @param [String, nil] stage stage
101
- #
102
- # @return [Integer] weight
103
- #
104
- # def stage_weight(stage)
105
- # return DataParser::STAGES.size if stage.nil?
106
-
107
- # DataParser::STAGES.keys.index(stage)
108
- # end
109
-
110
- #
111
- # Weight of date
112
- #
113
- # @param [String] date date
114
- #
115
- # @return [String] weight
116
- #
117
- # def date_weight(date)
118
- # return "99999999" if date.nil?
119
-
120
- # date
121
- # end
122
-
123
70
  #
124
71
  # Query RDF source for documents
125
72
  #
@@ -13,25 +13,6 @@ module RelatonW3c
13
13
  @index = index
14
14
  end
15
15
 
16
- #
17
- # Create index from a GitHub repository
18
- #
19
- # @return [RelatonW3c::DataIndex] data index
20
- #
21
- def self.create_from_repo # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
22
- resp = Zip::InputStream.new URI("#{W3cBibliography::SOURCE}index-w3c.zip").open
23
- zip = resp.get_next_entry
24
-
25
- # Newer versions of Psych uses the `permitted_classes:` parameter
26
- index = if YAML.method(:safe_load).parameters.collect(&:last).index(:permitted_classes)
27
- YAML.safe_load(zip.get_input_stream.read, permitted_classes: [Symbol])
28
- else
29
- YAML.safe_load(zip.get_input_stream.read, [Symbol])
30
- end
31
-
32
- DataIndex.new index: index
33
- end
34
-
35
16
  #
36
17
  # Add document to index
37
18
  #
@@ -39,7 +20,9 @@ module RelatonW3c
39
20
  # @param [String] file path to document file
40
21
  #
41
22
  def add(docnumber, file)
42
- @index << docnumber_to_parts(docnumber, file)
23
+ dnparts = self.class.docnumber_to_parts docnumber
24
+ dnparts[:file] = file
25
+ @index << dnparts
43
26
  end
44
27
 
45
28
  #
@@ -67,11 +50,14 @@ module RelatonW3c
67
50
  # @return [String] document's filename
68
51
  #
69
52
  def search(ref) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
70
- dparts = docnumber_to_parts(ref)
53
+ dparts = self.class.docnumber_to_parts(ref)
54
+ return if dparts[:code].nil?
55
+
71
56
  @index.detect do |parts|
72
57
  parts[:code].match?(/^#{Regexp.escape dparts[:code]}/i) &&
73
58
  (dparts[:stage].nil? || dparts[:stage].casecmp?(parts[:stage])) &&
74
- (dparts[:type].nil? || dparts[:type].casecmp?(parts[:type])) &&
59
+ (dparts[:type].nil? || dparts[:type].casecmp?(parts[:type]) ||
60
+ (parts[:type].nil? && dparts[:type] == "TR")) &&
75
61
  (dparts[:date].nil? || dparts[:date] == parts[:date]) &&
76
62
  (dparts[:suff].nil? || dparts[:suff].casecmp?(parts[:suff]))
77
63
  end&.fetch(:file)
@@ -119,28 +105,47 @@ module RelatonW3c
119
105
  date
120
106
  end
121
107
 
122
- #
123
- # Parse document number to parts
124
- #
125
- # @param [String] docnumber document number
126
- # @param [String, nil] file path to document file
127
- #
128
- # @return [Hash{Symbol=>String}] document parts
129
- #
130
- def docnumber_to_parts(docnumber, file = nil) # rubocop:disable Metrics/MethodLength
131
- %r{
132
- ^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE))-)?
133
- (?<code>\w+(?:[+-][\w.]+)*?)
134
- (?:-(?<date>\d{8}|\d{6}))?
135
- (?:/(?<suff>\w+))?$
136
- }xi =~ docnumber
137
- entry = { code: code }
138
- entry[:file] = file if file
139
- entry[:stage] = stage if stage
140
- entry[:type] = type if type
141
- entry[:date] = date if date
142
- entry[:suff] = suff if suff
143
- entry
108
+ class << self
109
+ #
110
+ # Create index from a GitHub repository
111
+ #
112
+ # @return [RelatonW3c::DataIndex] data index
113
+ #
114
+ def create_from_repo # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
115
+ resp = Zip::InputStream.new URI("#{W3cBibliography::SOURCE}index-w3c.zip").open
116
+ zip = resp.get_next_entry
117
+
118
+ # Newer versions of Psych uses the `permitted_classes:` parameter
119
+ index = if YAML.method(:safe_load).parameters.collect(&:last).index(:permitted_classes)
120
+ YAML.safe_load(zip.get_input_stream.read, permitted_classes: [Symbol])
121
+ else
122
+ YAML.safe_load(zip.get_input_stream.read, [Symbol])
123
+ end
124
+
125
+ DataIndex.new index: index
126
+ end
127
+
128
+ #
129
+ # Parse document number to parts
130
+ #
131
+ # @param [String] docnumber document number
132
+ #
133
+ # @return [Hash{Symbol=>String}] document parts
134
+ #
135
+ def docnumber_to_parts(docnumber) # rubocop:disable Metrics/MethodLength
136
+ %r{
137
+ ^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE|TR))-)?
138
+ (?<code>\w+(?:[+-][\w.]+)*?)
139
+ (?:-(?<date>\d{8}|\d{6}))?
140
+ (?:/(?<suff>\w+))?$
141
+ }xi =~ docnumber
142
+ entry = { code: code }
143
+ entry[:stage] = stage if stage
144
+ entry[:type] = type if type
145
+ entry[:date] = date if date
146
+ entry[:suff] = suff if suff
147
+ entry
148
+ end
144
149
  end
145
150
  end
146
151
  end
@@ -169,7 +169,7 @@ module RelatonW3c
169
169
  #
170
170
  def type
171
171
  # thre are many types, we need to find the right one
172
- @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) }
172
+ @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) } || "technicalReport"
173
173
  end
174
174
 
175
175
  #
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.11.3".freeze
2
+ VERSION = "1.11.4".freeze
3
3
  end
@@ -9,10 +9,10 @@ module RelatonW3c
9
9
 
10
10
  class << self
11
11
  # @param text [String]
12
- # @return [RelatonW3c::HitCollection]
13
- def search(text) # rubocop:disable Metrics/MethodLength
12
+ # @return [RelatonW3c::W3cBibliographicItem]
13
+ def search(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
14
14
  ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
15
- file = DataIndex.create_from_repo.search(ref)
15
+ file = DataIndex.create_from_repo.search ref.gsub(" ", "-").squeeze("-")
16
16
  return unless file
17
17
 
18
18
  url = "#{SOURCE}#{file}"
data/lib/relaton_w3c.rb CHANGED
@@ -2,9 +2,6 @@ require "relaton_bib"
2
2
  require "relaton_w3c/version"
3
3
  require "relaton_w3c/w3c_bibliography"
4
4
  require "relaton_w3c/w3c_bibliographic_item"
5
- # require "relaton_w3c/hit_collection"
6
- # require "relaton_w3c/hit"
7
- # require "relaton_w3c/scrapper"
8
5
  require "relaton_w3c/xml_parser"
9
6
  require "relaton_w3c/bibxml_parser"
10
7
  require "relaton_w3c/hash_converter"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.3
4
+ version: 1.11.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-20 00:00:00.000000000 Z
11
+ date: 2022-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -247,10 +247,7 @@ files:
247
247
  - lib/relaton_w3c/data_index.rb
248
248
  - lib/relaton_w3c/data_parser.rb
249
249
  - lib/relaton_w3c/hash_converter.rb
250
- - lib/relaton_w3c/hit.rb
251
- - lib/relaton_w3c/hit_collection.rb
252
250
  - lib/relaton_w3c/processor.rb
253
- - lib/relaton_w3c/scrapper.rb
254
251
  - lib/relaton_w3c/version.rb
255
252
  - lib/relaton_w3c/w3c_bibliographic_item.rb
256
253
  - lib/relaton_w3c/w3c_bibliography.rb
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module RelatonW3c
4
- # Hit.
5
- class Hit < RelatonBib::Hit
6
- #
7
- # Parse page.
8
- #
9
- # @param lang [String, NilClass]
10
- # @return [RelatonW3c::W3cBibliographicItem]
11
- def fetch(_lang = nil)
12
- @fetch ||= Scrapper.parse_page hit
13
- end
14
- end
15
- end
@@ -1,172 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "fileutils"
4
- require "yaml"
5
-
6
- module RelatonW3c
7
- # Page of hit collection.
8
- class HitCollection < RelatonBib::HitCollection
9
- TYPES = {
10
- "CR" => "Candidate Recommendation",
11
- "NOTE" => "Group Note",
12
- "PER" => "Proposed Edited Recommendation",
13
- "PR" => "Proposed Recommendation",
14
- "REC" => "Recommendation",
15
- "RET" => "Retired",
16
- "WD" => "Working Draft",
17
- }.freeze
18
- DOMAIN = "https://www.w3.org"
19
- DATADIR = File.expand_path(".relaton/w3c", Dir.home).freeze
20
- DATAFILE = File.expand_path("bibliography.yml", DATADIR).freeze
21
-
22
- # @param ref [String] reference to search
23
- def initialize(ref)
24
- %r{
25
- ^(?:W3C\s)?
26
- (?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
27
- Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
28
- Recommendation|Retired|Working\sDraft))? # type
29
- \s?
30
- (?<title_date>.+) # title_date
31
- }x =~ ref
32
- super
33
- @array = from_yaml title_date, type
34
- end
35
-
36
- private
37
-
38
- #
39
- # Fetch data form yaml
40
- #
41
- # @param title_date [String]
42
- # @param type [String]
43
- # @return [Array<Hash>]
44
- def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
45
- /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
46
- title ||= title_date
47
- result = data.select do |hit|
48
- (hit["title"].casecmp?(title) ||
49
- hit["link"].split("/").last.match?(/-#{title}-/)) &&
50
- type_date_filter(hit, type, date)
51
- end
52
- if result.empty?
53
- result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
54
- end
55
- result.map { |h| Hit.new(h, self) }
56
- end
57
-
58
- # @param hit [Hash]
59
- # @param type [String]
60
- # @param date [String]
61
- # @return [TrueClass, FalseClass]
62
- def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
63
- if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
64
- history = get_history hit, type, date
65
- return false unless history.any?
66
-
67
- hit["type"] = short_type type
68
- hit["datepub"] = history.first.at("td").text
69
- hit["link"] = history.first.at("a")[:href]
70
- end
71
- true
72
- end
73
-
74
- # @param hit [Hash]
75
- # @param type [String]
76
- # @param date [String]
77
- # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
78
- def get_history(hit, type, date)
79
- resp = Net::HTTP.get URI.parse(HitCollection::DOMAIN + hit["history"])
80
- history_doc = Nokogiri::HTML resp
81
- history = history_doc.xpath(
82
- "//table//a[contains(.,'#{long_type(type)}')]/../..",
83
- )
84
- return filter_history_by_date(history, history_doc, type, date) if date
85
-
86
- history
87
- end
88
-
89
- # @param history [Nokogiri::XML::NodeSet]
90
- # @param history_doc [Nokogiri::HTML::NodeSet]
91
- # @param type [String]
92
- # @param date [String]
93
- # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
94
- def filter_history_by_date(history, history_doc, type, date)
95
- if type
96
- history.select do |h|
97
- h.at("td[@class='table_datecol']").text == date
98
- end
99
- else
100
- history_doc.xpath(
101
- "//table//td[@class='table_datecol'][.='#{date}']/..",
102
- )
103
- end
104
- end
105
-
106
- #
107
- # Convetr long type name to short
108
- #
109
- # @param type [String]
110
- # @return [String]
111
- def short_type(type)
112
- tp = TYPES.select { |_, v| v == type }.keys
113
- tp.first || type
114
- end
115
-
116
- #
117
- # Convert shot type name to long
118
- #
119
- # @param [String]
120
- # @return [String]
121
- def long_type(type)
122
- TYPES[type] || type
123
- end
124
-
125
- #
126
- # Fetches YAML data
127
- #
128
- # @return [Hash]
129
- def data
130
- FileUtils.mkdir_p DATADIR
131
- ctime = File.ctime DATAFILE if File.exist? DATAFILE
132
- fetch_data if !ctime || ctime.to_date < Date.today
133
- @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
134
- end
135
-
136
- #
137
- # fetch data form server and save it to file.
138
- #
139
- def fetch_data
140
- resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
141
- # return if there aren't any changes since last fetching
142
- return unless resp.code == "200"
143
-
144
- doc = Nokogiri::HTML resp.body
145
- @data = doc.xpath("//ul[@id='container']/li").map do |h_el|
146
- link = h_el.at("h2/a")
147
- pubdetails = h_el.at("p[@class='pubdetails']")
148
- fetch_hit h_el, link, pubdetails
149
- end
150
- File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
151
- end
152
-
153
- # @param h_el [Nokogiri::XML::Element]
154
- # @param link [Nokogiri::XML::Element]
155
- # @param pubdetails [Nokogiri::XML::Element]
156
- def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
157
- datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
158
- editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
159
- keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
160
- {
161
- "title" => link.text.gsub("\u00a0", " "),
162
- "link" => link[:href],
163
- "type" => h_el.at("div").text.upcase,
164
- "workgroup" => h_el.xpath("p[@class='deliverer']").map(&:text),
165
- "datepub" => datepub,
166
- "history" => pubdetails.at("a[text()='History']")[:href],
167
- "editor" => editor,
168
- "keyword" => keyword,
169
- }
170
- end
171
- end
172
- end
@@ -1,218 +0,0 @@
1
- module RelatonW3c
2
- class Scrapper
3
- DOCTYPES = {
4
- "CR" => "candidateRecommendation",
5
- "NOTE" => "groupNote",
6
- "PER" => "proposedEditedRecommendation",
7
- "PR" => "proposedRecommendation",
8
- "REC" => "recommendation",
9
- "RET" => "retired",
10
- "WD" => "workingDraft",
11
- }.freeze
12
-
13
- class << self
14
- # @param hit [Hash]
15
- # @return [RelatonW3c::W3cBibliographicItem]
16
- def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
17
- resp = Net::HTTP.get_response URI.parse(hit["link"])
18
- doc = resp.code == "200" ? Nokogiri::HTML(resp.body) : nil
19
- W3cBibliographicItem.new(
20
- type: "standard",
21
- docid: fetch_docid(hit),
22
- fetched: Date.today.to_s,
23
- language: ["en"],
24
- script: ["Latn"],
25
- title: fetch_title(hit, doc),
26
- abstract: fetch_abstract(doc),
27
- link: fetch_link(hit),
28
- date: fetch_date(hit, doc),
29
- doctype: fetch_doctype(hit, doc),
30
- contributor: fetch_contributor(hit, doc),
31
- relation: fetch_relation(doc),
32
- keyword: hit["keyword"],
33
- )
34
- end
35
-
36
- private
37
-
38
- # @param hit [Hash]
39
- # @return [Array<RelatonBib::DocumentIdentifier>]
40
- def fetch_docid(hit)
41
- id = hit["link"].split("/").last
42
- [RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)]
43
- end
44
-
45
- # @param hit [Hash]
46
- # @param doc [Nokogiri::HTML::Document]
47
- # @return [Array<RelatonBib::TypedTitleString>]
48
- def fetch_title(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
49
- titles = []
50
- if doc
51
- title = doc.at("//*[contains(@id, 'title')]")&.text
52
- if title && !title.empty?
53
- titles << { content: title.gsub(/\n/, " "), type: "main" }
54
- end
55
- subtitle = doc.at(
56
- "//h2[@id='subtitle']|//p[contains(@class, 'subline')]",
57
- )&.text
58
- titles << { content: subtitle, tipe: "subtitle" } if subtitle
59
- end
60
- if titles.empty? && hit["title"]
61
- titles << { content: hit["title"], type: "main" }
62
- end
63
- titles.map do |t|
64
- title = RelatonBib::FormattedString.new(
65
- content: t[:content], language: "en", script: "Latn",
66
- )
67
- RelatonBib::TypedTitleString.new(type: t[:type], title: title)
68
- end
69
- end
70
-
71
- # @param doc [Nokogiri::HTML::Document, NilClass]
72
- # @return [Array<RelatonBib::FormattedString>]
73
- def fetch_abstract(doc)
74
- return [] unless doc
75
-
76
- content = doc.at("//h2[.='Abstract']/following-sibling::p",
77
- "//div[@class='abstract']/p").text
78
- [RelatonBib::FormattedString.new(content: content, language: "en",
79
- script: "Latn")]
80
- end
81
-
82
- # @param hit [Hash]
83
- # @return [Array<RelatonBib::TypedUri>]
84
- def fetch_link(hit)
85
- [RelatonBib::TypedUri.new(type: "src", content: hit["link"])]
86
- end
87
-
88
- # @param hit [Hash]
89
- # @param doc [Nokogiri::HTML::Document, NilClass]
90
- # @return [Array<RelatonBib::BibliographicDate>]
91
- def fetch_date(hit, doc) # rubocop:disable Metrics/CyclomaticComplexity
92
- on = hit["datepub"] || doc&.at("//h2/time[@datetime]")&.attr(:datetime)
93
- on ||= fetch_date1(doc) || fetch_date2(doc)
94
- [RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
95
- end
96
-
97
- # @param doc [Nokogiri::HTML::Document, NilClass]
98
- # @return [String]
99
- def fetch_date1(doc)
100
- d = doc&.at("//h2[@property='dc:issued']")&.attr(:content)
101
- d&.match(/\d{4}-\d{2}-\d{2}/)&.to_s
102
- end
103
-
104
- # @param doc [Nokogiri::HTML::Document, NilClass]
105
- # @return [String]
106
- def fetch_date2(doc)
107
- d = doc&.at("//h2[contains(@id, 'w3c-recommendation')]")
108
- return unless d
109
-
110
- Date.parse(d.attr(:id.match(/\d{2}-\w+-\d{4}/).to_s)).to_s
111
- end
112
-
113
- # @param hit [Hash]
114
- # @param doc [Nokogiri::HTML::Document, NilClass]
115
- # @return [String]
116
- def fetch_doctype(hit, doc)
117
- if hit["type"]
118
- DOCTYPES[hit["type"]]
119
- elsif doc
120
- type = HitCollection::TYPES.detect do |_k, v|
121
- doc.at("//h2[contains(., '#{v}')]/time[@datetime]")
122
- end
123
- DOCTYPES[type&.first]
124
- end
125
- end
126
-
127
- # @param hit [Hash]
128
- # @param doc [Nokogiri::HTML::Document, NilClass]
129
- # @return [Array<RelatonBib::ContributionInfo>]
130
- def fetch_contributor(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
131
- if doc
132
- editors = find_contribs(doc, "Editors").reduce([]) do |mem, ed|
133
- c = parse_contrib ed, "editor"
134
- mem << c if c
135
- mem
136
- end
137
- contribs = find_contribs(doc, "Authors").reduce(editors) do |mem, ath|
138
- ed = mem.detect { |e| e[:id] && e[:id] == ath["data-editor-id"] }
139
- if ed
140
- ed[:role] << { type: "author" }
141
- else
142
- mem << parse_contrib(ath, "author")
143
- end
144
- mem
145
- end
146
- contribs.map { |c| contrib_info(**c) }
147
- else
148
- hit["editor"].map do |ed|
149
- contrib_info name: ed, role: [{ type: "editor" }]
150
- end
151
- end
152
- end
153
-
154
- # @param doc [Nokogiri::NTML::Document]
155
- # @param type [String]
156
- # @return [Array<Nokogiri::XML::Element]
157
- def find_contribs(doc, type)
158
- doc.xpath("//dt[contains(.,'#{type}')]/following-sibling::dd"\
159
- "[preceding-sibling::dt[1][contains(.,'#{type}')]]")
160
- end
161
-
162
- # @param element [Nokogiri::XML::Element]
163
- # @param type [String]
164
- # @return [Hash]
165
- def parse_contrib(element, type) # rubocop:disable Metrics/MethodLength
166
- p = element.at("a")
167
- return unless p
168
-
169
- contrib = {
170
- name: p.text,
171
- url: p[:href],
172
- role: [{ type: type }],
173
- id: element["data-editor-id"],
174
- }
175
- org = element.at("a[2]")
176
- contrib[:org] = { name: org.text, url: org[:href] } if org
177
- contrib
178
- end
179
-
180
- # @param name [String]
181
- # @param url [String, NilClass]
182
- # @param role [Array<Hash>]
183
- # @parma org [Hash]
184
- # @return [RelatonBib::ContributionInfo]
185
- def contrib_info(**args)
186
- completename = RelatonBib::LocalizedString.new(args[:name])
187
- name = RelatonBib::FullName.new completename: completename
188
- af = []
189
- if args[:org]
190
- org = RelatonBib::Organization.new(**args[:org])
191
- af << RelatonBib::Affiliation.new(organization: org)
192
- end
193
- en = RelatonBib::Person.new name: name, url: args[:url], affiliation: af
194
- RelatonBib::ContributionInfo.new entity: en, role: args[:role]
195
- end
196
-
197
- # @param doc [Nokogiri::HTML::Document]
198
- # @return [Array<RelatonBib::DocumentRelation>]
199
- def fetch_relation(doc)
200
- return [] unless doc && (link = recommendation_link(doc))
201
-
202
- hit = { "link" => link }
203
- item = parse_page hit
204
- [RelatonBib::DocumentRelation.new(type: "obsoletedBy", bibitem: item)]
205
- end
206
-
207
- # @param doc [Nokogiri::HTML::Document]
208
- # @return [String, NilClass]
209
- def recommendation_link(doc)
210
- recom = doc.at("//dt[.='Latest Recommendation:']",
211
- "//dt[.='Previous Recommendation:']")
212
- return unless recom
213
-
214
- recom.at("./following-sibling::dd/a")[:href]
215
- end
216
- end
217
- end
218
- end