relaton-w3c 1.11.3 → 1.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4c96856baa51c84d8397b62be6dee1b8225e7ab854cd70176187e68fc269a23c
4
- data.tar.gz: 54d5cc018e0de6ecca6d402ccbe41947047190d08e922c8f46b7cb20164cea22
3
+ metadata.gz: 2c318668fd5a7ef93b5275ac02cfadc9b91832eddeccf5996bc5ea65fc5272b1
4
+ data.tar.gz: 431ee27aec817b6d352e5410e2f4bf63710ecfcff47813e03d7e3302a7b42ecd
5
5
  SHA512:
6
- metadata.gz: 070eb14907a49f99b7c0f45841d83250c1c051cd9a16b9cddfc83f6d874274f401ac95c9554f6994c6702e557ef8a200e576ee5fe1d0b7c7f7583afb53074794
7
- data.tar.gz: cab9e48e248b889c5d15e3449a93dc5cb100b7680a87f025ebfd090a1afba36e77730d9e0bc85a10bc9208b9ea2d813e444e0a28b2ac77d412650395479468f3
6
+ metadata.gz: 4254d592bcc1469a7a8773d641e192d07be6d4b0ce247e2b93be490717fc20dc4457718604a5353cf5b0cef6231f2920b03288b776f186df5a8645c5f453d5ed
7
+ data.tar.gz: 0c5b07d1efb4f1df3c1505c0b8d2558e7c9bd12d80d1fe7ea483e313f4462a1a1ae8010587f18772dc0288ab0e9573724a9c671d9985427adce5307354f20ee9
@@ -67,59 +67,6 @@ module RelatonW3c
67
67
  @index.sort!.save
68
68
  end
69
69
 
70
- #
71
- # Create index file
72
- #
73
- # def create_index
74
- # index_file = "index-w3c.yaml"
75
- # index_yaml = @index.sort do |a, b|
76
- # compare_index_items a, b
77
- # end.to_yaml
78
- # File.write index_file, index_yaml, encoding: "UTF-8"
79
- # end
80
-
81
- #
82
- # Compare index items
83
- #
84
- # @param [Hash] aid first item
85
- # @param [Hash] bid second item
86
- #
87
- # @return [Integer] comparison result
88
- #
89
- # def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
90
- # ret = aid[:code] <=> bid[:code]
91
- # ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
92
- # ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
93
- # # ret = aid[:type] <=> bid[:type] if ret.zero?
94
- # ret
95
- # end
96
-
97
- #
98
- # Weight of stage
99
- #
100
- # @param [String, nil] stage stage
101
- #
102
- # @return [Integer] weight
103
- #
104
- # def stage_weight(stage)
105
- # return DataParser::STAGES.size if stage.nil?
106
-
107
- # DataParser::STAGES.keys.index(stage)
108
- # end
109
-
110
- #
111
- # Weight of date
112
- #
113
- # @param [String] date date
114
- #
115
- # @return [String] weight
116
- #
117
- # def date_weight(date)
118
- # return "99999999" if date.nil?
119
-
120
- # date
121
- # end
122
-
123
70
  #
124
71
  # Query RDF source for documents
125
72
  #
@@ -13,25 +13,6 @@ module RelatonW3c
13
13
  @index = index
14
14
  end
15
15
 
16
- #
17
- # Create index from a GitHub repository
18
- #
19
- # @return [RelatonW3c::DataIndex] data index
20
- #
21
- def self.create_from_repo # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
22
- resp = Zip::InputStream.new URI("#{W3cBibliography::SOURCE}index-w3c.zip").open
23
- zip = resp.get_next_entry
24
-
25
- # Newer versions of Psych uses the `permitted_classes:` parameter
26
- index = if YAML.method(:safe_load).parameters.collect(&:last).index(:permitted_classes)
27
- YAML.safe_load(zip.get_input_stream.read, permitted_classes: [Symbol])
28
- else
29
- YAML.safe_load(zip.get_input_stream.read, [Symbol])
30
- end
31
-
32
- DataIndex.new index: index
33
- end
34
-
35
16
  #
36
17
  # Add document to index
37
18
  #
@@ -39,7 +20,9 @@ module RelatonW3c
39
20
  # @param [String] file path to document file
40
21
  #
41
22
  def add(docnumber, file)
42
- @index << docnumber_to_parts(docnumber, file)
23
+ dnparts = self.class.docnumber_to_parts docnumber
24
+ dnparts[:file] = file
25
+ @index << dnparts
43
26
  end
44
27
 
45
28
  #
@@ -67,11 +50,14 @@ module RelatonW3c
67
50
  # @return [String] document's filename
68
51
  #
69
52
  def search(ref) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
70
- dparts = docnumber_to_parts(ref)
53
+ dparts = self.class.docnumber_to_parts(ref)
54
+ return if dparts[:code].nil?
55
+
71
56
  @index.detect do |parts|
72
57
  parts[:code].match?(/^#{Regexp.escape dparts[:code]}/i) &&
73
58
  (dparts[:stage].nil? || dparts[:stage].casecmp?(parts[:stage])) &&
74
- (dparts[:type].nil? || dparts[:type].casecmp?(parts[:type])) &&
59
+ (dparts[:type].nil? || dparts[:type].casecmp?(parts[:type]) ||
60
+ (parts[:type].nil? && dparts[:type] == "TR")) &&
75
61
  (dparts[:date].nil? || dparts[:date] == parts[:date]) &&
76
62
  (dparts[:suff].nil? || dparts[:suff].casecmp?(parts[:suff]))
77
63
  end&.fetch(:file)
@@ -119,28 +105,47 @@ module RelatonW3c
119
105
  date
120
106
  end
121
107
 
122
- #
123
- # Parse document number to parts
124
- #
125
- # @param [String] docnumber document number
126
- # @param [String, nil] file path to document file
127
- #
128
- # @return [Hash{Symbol=>String}] document parts
129
- #
130
- def docnumber_to_parts(docnumber, file = nil) # rubocop:disable Metrics/MethodLength
131
- %r{
132
- ^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE))-)?
133
- (?<code>\w+(?:[+-][\w.]+)*?)
134
- (?:-(?<date>\d{8}|\d{6}))?
135
- (?:/(?<suff>\w+))?$
136
- }xi =~ docnumber
137
- entry = { code: code }
138
- entry[:file] = file if file
139
- entry[:stage] = stage if stage
140
- entry[:type] = type if type
141
- entry[:date] = date if date
142
- entry[:suff] = suff if suff
143
- entry
108
+ class << self
109
+ #
110
+ # Create index from a GitHub repository
111
+ #
112
+ # @return [RelatonW3c::DataIndex] data index
113
+ #
114
+ def create_from_repo # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
115
+ resp = Zip::InputStream.new URI("#{W3cBibliography::SOURCE}index-w3c.zip").open
116
+ zip = resp.get_next_entry
117
+
118
+ # Newer versions of Psych uses the `permitted_classes:` parameter
119
+ index = if YAML.method(:safe_load).parameters.collect(&:last).index(:permitted_classes)
120
+ YAML.safe_load(zip.get_input_stream.read, permitted_classes: [Symbol])
121
+ else
122
+ YAML.safe_load(zip.get_input_stream.read, [Symbol])
123
+ end
124
+
125
+ DataIndex.new index: index
126
+ end
127
+
128
+ #
129
+ # Parse document number to parts
130
+ #
131
+ # @param [String] docnumber document number
132
+ #
133
+ # @return [Hash{Symbol=>String}] document parts
134
+ #
135
+ def docnumber_to_parts(docnumber) # rubocop:disable Metrics/MethodLength
136
+ %r{
137
+ ^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE|TR))-)?
138
+ (?<code>\w+(?:[+-][\w.]+)*?)
139
+ (?:-(?<date>\d{8}|\d{6}))?
140
+ (?:/(?<suff>\w+))?$
141
+ }xi =~ docnumber
142
+ entry = { code: code }
143
+ entry[:stage] = stage if stage
144
+ entry[:type] = type if type
145
+ entry[:date] = date if date
146
+ entry[:suff] = suff if suff
147
+ entry
148
+ end
144
149
  end
145
150
  end
146
151
  end
@@ -169,7 +169,7 @@ module RelatonW3c
169
169
  #
170
170
  def type
171
171
  # thre are many types, we need to find the right one
172
- @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) }
172
+ @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) } || "technicalReport"
173
173
  end
174
174
 
175
175
  #
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.11.3".freeze
2
+ VERSION = "1.11.4".freeze
3
3
  end
@@ -9,10 +9,10 @@ module RelatonW3c
9
9
 
10
10
  class << self
11
11
  # @param text [String]
12
- # @return [RelatonW3c::HitCollection]
13
- def search(text) # rubocop:disable Metrics/MethodLength
12
+ # @return [RelatonW3c::W3cBibliographicItem]
13
+ def search(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
14
14
  ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
15
- file = DataIndex.create_from_repo.search(ref)
15
+ file = DataIndex.create_from_repo.search ref.gsub(" ", "-").squeeze("-")
16
16
  return unless file
17
17
 
18
18
  url = "#{SOURCE}#{file}"
data/lib/relaton_w3c.rb CHANGED
@@ -2,9 +2,6 @@ require "relaton_bib"
2
2
  require "relaton_w3c/version"
3
3
  require "relaton_w3c/w3c_bibliography"
4
4
  require "relaton_w3c/w3c_bibliographic_item"
5
- # require "relaton_w3c/hit_collection"
6
- # require "relaton_w3c/hit"
7
- # require "relaton_w3c/scrapper"
8
5
  require "relaton_w3c/xml_parser"
9
6
  require "relaton_w3c/bibxml_parser"
10
7
  require "relaton_w3c/hash_converter"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.3
4
+ version: 1.11.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-20 00:00:00.000000000 Z
11
+ date: 2022-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -247,10 +247,7 @@ files:
247
247
  - lib/relaton_w3c/data_index.rb
248
248
  - lib/relaton_w3c/data_parser.rb
249
249
  - lib/relaton_w3c/hash_converter.rb
250
- - lib/relaton_w3c/hit.rb
251
- - lib/relaton_w3c/hit_collection.rb
252
250
  - lib/relaton_w3c/processor.rb
253
- - lib/relaton_w3c/scrapper.rb
254
251
  - lib/relaton_w3c/version.rb
255
252
  - lib/relaton_w3c/w3c_bibliographic_item.rb
256
253
  - lib/relaton_w3c/w3c_bibliography.rb
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module RelatonW3c
4
- # Hit.
5
- class Hit < RelatonBib::Hit
6
- #
7
- # Parse page.
8
- #
9
- # @param lang [String, NilClass]
10
- # @return [RelatonW3c::W3cBibliographicItem]
11
- def fetch(_lang = nil)
12
- @fetch ||= Scrapper.parse_page hit
13
- end
14
- end
15
- end
@@ -1,172 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "fileutils"
4
- require "yaml"
5
-
6
- module RelatonW3c
7
- # Page of hit collection.
8
- class HitCollection < RelatonBib::HitCollection
9
- TYPES = {
10
- "CR" => "Candidate Recommendation",
11
- "NOTE" => "Group Note",
12
- "PER" => "Proposed Edited Recommendation",
13
- "PR" => "Proposed Recommendation",
14
- "REC" => "Recommendation",
15
- "RET" => "Retired",
16
- "WD" => "Working Draft",
17
- }.freeze
18
- DOMAIN = "https://www.w3.org"
19
- DATADIR = File.expand_path(".relaton/w3c", Dir.home).freeze
20
- DATAFILE = File.expand_path("bibliography.yml", DATADIR).freeze
21
-
22
- # @param ref [String] reference to search
23
- def initialize(ref)
24
- %r{
25
- ^(?:W3C\s)?
26
- (?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
27
- Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
28
- Recommendation|Retired|Working\sDraft))? # type
29
- \s?
30
- (?<title_date>.+) # title_date
31
- }x =~ ref
32
- super
33
- @array = from_yaml title_date, type
34
- end
35
-
36
- private
37
-
38
- #
39
- # Fetch data form yaml
40
- #
41
- # @param title_date [String]
42
- # @param type [String]
43
- # @return [Array<Hash>]
44
- def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
45
- /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
46
- title ||= title_date
47
- result = data.select do |hit|
48
- (hit["title"].casecmp?(title) ||
49
- hit["link"].split("/").last.match?(/-#{title}-/)) &&
50
- type_date_filter(hit, type, date)
51
- end
52
- if result.empty?
53
- result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
54
- end
55
- result.map { |h| Hit.new(h, self) }
56
- end
57
-
58
- # @param hit [Hash]
59
- # @param type [String]
60
- # @param date [String]
61
- # @return [TrueClass, FalseClass]
62
- def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
63
- if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
64
- history = get_history hit, type, date
65
- return false unless history.any?
66
-
67
- hit["type"] = short_type type
68
- hit["datepub"] = history.first.at("td").text
69
- hit["link"] = history.first.at("a")[:href]
70
- end
71
- true
72
- end
73
-
74
- # @param hit [Hash]
75
- # @param type [String]
76
- # @param date [String]
77
- # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
78
- def get_history(hit, type, date)
79
- resp = Net::HTTP.get URI.parse(HitCollection::DOMAIN + hit["history"])
80
- history_doc = Nokogiri::HTML resp
81
- history = history_doc.xpath(
82
- "//table//a[contains(.,'#{long_type(type)}')]/../..",
83
- )
84
- return filter_history_by_date(history, history_doc, type, date) if date
85
-
86
- history
87
- end
88
-
89
- # @param history [Nokogiri::XML::NodeSet]
90
- # @param history_doc [Nokogiri::HTML::NodeSet]
91
- # @param type [String]
92
- # @param date [String]
93
- # @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
94
- def filter_history_by_date(history, history_doc, type, date)
95
- if type
96
- history.select do |h|
97
- h.at("td[@class='table_datecol']").text == date
98
- end
99
- else
100
- history_doc.xpath(
101
- "//table//td[@class='table_datecol'][.='#{date}']/..",
102
- )
103
- end
104
- end
105
-
106
- #
107
- # Convetr long type name to short
108
- #
109
- # @param type [String]
110
- # @return [String]
111
- def short_type(type)
112
- tp = TYPES.select { |_, v| v == type }.keys
113
- tp.first || type
114
- end
115
-
116
- #
117
- # Convert shot type name to long
118
- #
119
- # @param [String]
120
- # @return [String]
121
- def long_type(type)
122
- TYPES[type] || type
123
- end
124
-
125
- #
126
- # Fetches YAML data
127
- #
128
- # @return [Hash]
129
- def data
130
- FileUtils.mkdir_p DATADIR
131
- ctime = File.ctime DATAFILE if File.exist? DATAFILE
132
- fetch_data if !ctime || ctime.to_date < Date.today
133
- @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
134
- end
135
-
136
- #
137
- # fetch data form server and save it to file.
138
- #
139
- def fetch_data
140
- resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
141
- # return if there aren't any changes since last fetching
142
- return unless resp.code == "200"
143
-
144
- doc = Nokogiri::HTML resp.body
145
- @data = doc.xpath("//ul[@id='container']/li").map do |h_el|
146
- link = h_el.at("h2/a")
147
- pubdetails = h_el.at("p[@class='pubdetails']")
148
- fetch_hit h_el, link, pubdetails
149
- end
150
- File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
151
- end
152
-
153
- # @param h_el [Nokogiri::XML::Element]
154
- # @param link [Nokogiri::XML::Element]
155
- # @param pubdetails [Nokogiri::XML::Element]
156
- def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
157
- datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
158
- editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
159
- keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
160
- {
161
- "title" => link.text.gsub("\u00a0", " "),
162
- "link" => link[:href],
163
- "type" => h_el.at("div").text.upcase,
164
- "workgroup" => h_el.xpath("p[@class='deliverer']").map(&:text),
165
- "datepub" => datepub,
166
- "history" => pubdetails.at("a[text()='History']")[:href],
167
- "editor" => editor,
168
- "keyword" => keyword,
169
- }
170
- end
171
- end
172
- end
@@ -1,218 +0,0 @@
1
- module RelatonW3c
2
- class Scrapper
3
- DOCTYPES = {
4
- "CR" => "candidateRecommendation",
5
- "NOTE" => "groupNote",
6
- "PER" => "proposedEditedRecommendation",
7
- "PR" => "proposedRecommendation",
8
- "REC" => "recommendation",
9
- "RET" => "retired",
10
- "WD" => "workingDraft",
11
- }.freeze
12
-
13
- class << self
14
- # @param hit [Hash]
15
- # @return [RelatonW3c::W3cBibliographicItem]
16
- def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
17
- resp = Net::HTTP.get_response URI.parse(hit["link"])
18
- doc = resp.code == "200" ? Nokogiri::HTML(resp.body) : nil
19
- W3cBibliographicItem.new(
20
- type: "standard",
21
- docid: fetch_docid(hit),
22
- fetched: Date.today.to_s,
23
- language: ["en"],
24
- script: ["Latn"],
25
- title: fetch_title(hit, doc),
26
- abstract: fetch_abstract(doc),
27
- link: fetch_link(hit),
28
- date: fetch_date(hit, doc),
29
- doctype: fetch_doctype(hit, doc),
30
- contributor: fetch_contributor(hit, doc),
31
- relation: fetch_relation(doc),
32
- keyword: hit["keyword"],
33
- )
34
- end
35
-
36
- private
37
-
38
- # @param hit [Hash]
39
- # @return [Array<RelatonBib::DocumentIdentifier>]
40
- def fetch_docid(hit)
41
- id = hit["link"].split("/").last
42
- [RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)]
43
- end
44
-
45
- # @param hit [Hash]
46
- # @param doc [Nokogiri::HTML::Document]
47
- # @return [Array<RelatonBib::TypedTitleString>]
48
- def fetch_title(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
49
- titles = []
50
- if doc
51
- title = doc.at("//*[contains(@id, 'title')]")&.text
52
- if title && !title.empty?
53
- titles << { content: title.gsub(/\n/, " "), type: "main" }
54
- end
55
- subtitle = doc.at(
56
- "//h2[@id='subtitle']|//p[contains(@class, 'subline')]",
57
- )&.text
58
- titles << { content: subtitle, tipe: "subtitle" } if subtitle
59
- end
60
- if titles.empty? && hit["title"]
61
- titles << { content: hit["title"], type: "main" }
62
- end
63
- titles.map do |t|
64
- title = RelatonBib::FormattedString.new(
65
- content: t[:content], language: "en", script: "Latn",
66
- )
67
- RelatonBib::TypedTitleString.new(type: t[:type], title: title)
68
- end
69
- end
70
-
71
- # @param doc [Nokogiri::HTML::Document, NilClass]
72
- # @return [Array<RelatonBib::FormattedString>]
73
- def fetch_abstract(doc)
74
- return [] unless doc
75
-
76
- content = doc.at("//h2[.='Abstract']/following-sibling::p",
77
- "//div[@class='abstract']/p").text
78
- [RelatonBib::FormattedString.new(content: content, language: "en",
79
- script: "Latn")]
80
- end
81
-
82
- # @param hit [Hash]
83
- # @return [Array<RelatonBib::TypedUri>]
84
- def fetch_link(hit)
85
- [RelatonBib::TypedUri.new(type: "src", content: hit["link"])]
86
- end
87
-
88
- # @param hit [Hash]
89
- # @param doc [Nokogiri::HTML::Document, NilClass]
90
- # @return [Array<RelatonBib::BibliographicDate>]
91
- def fetch_date(hit, doc) # rubocop:disable Metrics/CyclomaticComplexity
92
- on = hit["datepub"] || doc&.at("//h2/time[@datetime]")&.attr(:datetime)
93
- on ||= fetch_date1(doc) || fetch_date2(doc)
94
- [RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
95
- end
96
-
97
- # @param doc [Nokogiri::HTML::Document, NilClass]
98
- # @return [String]
99
- def fetch_date1(doc)
100
- d = doc&.at("//h2[@property='dc:issued']")&.attr(:content)
101
- d&.match(/\d{4}-\d{2}-\d{2}/)&.to_s
102
- end
103
-
104
- # @param doc [Nokogiri::HTML::Document, NilClass]
105
- # @return [String]
106
- def fetch_date2(doc)
107
- d = doc&.at("//h2[contains(@id, 'w3c-recommendation')]")
108
- return unless d
109
-
110
- Date.parse(d.attr(:id.match(/\d{2}-\w+-\d{4}/).to_s)).to_s
111
- end
112
-
113
- # @param hit [Hash]
114
- # @param doc [Nokogiri::HTML::Document, NilClass]
115
- # @return [String]
116
- def fetch_doctype(hit, doc)
117
- if hit["type"]
118
- DOCTYPES[hit["type"]]
119
- elsif doc
120
- type = HitCollection::TYPES.detect do |_k, v|
121
- doc.at("//h2[contains(., '#{v}')]/time[@datetime]")
122
- end
123
- DOCTYPES[type&.first]
124
- end
125
- end
126
-
127
- # @param hit [Hash]
128
- # @param doc [Nokogiri::HTML::Document, NilClass]
129
- # @return [Array<RelatonBib::ContributionInfo>]
130
- def fetch_contributor(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
131
- if doc
132
- editors = find_contribs(doc, "Editors").reduce([]) do |mem, ed|
133
- c = parse_contrib ed, "editor"
134
- mem << c if c
135
- mem
136
- end
137
- contribs = find_contribs(doc, "Authors").reduce(editors) do |mem, ath|
138
- ed = mem.detect { |e| e[:id] && e[:id] == ath["data-editor-id"] }
139
- if ed
140
- ed[:role] << { type: "author" }
141
- else
142
- mem << parse_contrib(ath, "author")
143
- end
144
- mem
145
- end
146
- contribs.map { |c| contrib_info(**c) }
147
- else
148
- hit["editor"].map do |ed|
149
- contrib_info name: ed, role: [{ type: "editor" }]
150
- end
151
- end
152
- end
153
-
154
- # @param doc [Nokogiri::NTML::Document]
155
- # @param type [String]
156
- # @return [Array<Nokogiri::XML::Element]
157
- def find_contribs(doc, type)
158
- doc.xpath("//dt[contains(.,'#{type}')]/following-sibling::dd"\
159
- "[preceding-sibling::dt[1][contains(.,'#{type}')]]")
160
- end
161
-
162
- # @param element [Nokogiri::XML::Element]
163
- # @param type [String]
164
- # @return [Hash]
165
- def parse_contrib(element, type) # rubocop:disable Metrics/MethodLength
166
- p = element.at("a")
167
- return unless p
168
-
169
- contrib = {
170
- name: p.text,
171
- url: p[:href],
172
- role: [{ type: type }],
173
- id: element["data-editor-id"],
174
- }
175
- org = element.at("a[2]")
176
- contrib[:org] = { name: org.text, url: org[:href] } if org
177
- contrib
178
- end
179
-
180
- # @param name [String]
181
- # @param url [String, NilClass]
182
- # @param role [Array<Hash>]
183
- # @parma org [Hash]
184
- # @return [RelatonBib::ContributionInfo]
185
- def contrib_info(**args)
186
- completename = RelatonBib::LocalizedString.new(args[:name])
187
- name = RelatonBib::FullName.new completename: completename
188
- af = []
189
- if args[:org]
190
- org = RelatonBib::Organization.new(**args[:org])
191
- af << RelatonBib::Affiliation.new(organization: org)
192
- end
193
- en = RelatonBib::Person.new name: name, url: args[:url], affiliation: af
194
- RelatonBib::ContributionInfo.new entity: en, role: args[:role]
195
- end
196
-
197
- # @param doc [Nokogiri::HTML::Document]
198
- # @return [Array<RelatonBib::DocumentRelation>]
199
- def fetch_relation(doc)
200
- return [] unless doc && (link = recommendation_link(doc))
201
-
202
- hit = { "link" => link }
203
- item = parse_page hit
204
- [RelatonBib::DocumentRelation.new(type: "obsoletedBy", bibitem: item)]
205
- end
206
-
207
- # @param doc [Nokogiri::HTML::Document]
208
- # @return [String, NilClass]
209
- def recommendation_link(doc)
210
- recom = doc.at("//dt[.='Latest Recommendation:']",
211
- "//dt[.='Previous Recommendation:']")
212
- return unless recom
213
-
214
- recom.at("./following-sibling::dd/a")[:href]
215
- end
216
- end
217
- end
218
- end