relaton-w3c 1.11.2 → 1.11.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -5
- data/.rubocop.yml +1 -1
- data/data/reference.W3C.DSig-label.xml +32 -32
- data/data/reference.W3C.P3P-rdfschema.xml +26 -26
- data/data/reference.W3C.PICS-labels.xml +43 -43
- data/data/reference.W3C.PICS-rules.xml +38 -38
- data/data/reference.W3C.PICS-services.xml +37 -37
- data/data/reference.W3C.daml-oil-reference.xml +39 -39
- data/data/reference.W3C.soap11.xml +56 -56
- data/data/reference.W3C.soap12-part1.xml +38 -38
- data/data/reference.W3C.soap12-part2.xml +38 -38
- data/data/reference.W3C.xkms.xml +50 -50
- data/data/reference.W3C.xml-c14n.xml +15 -15
- data/data/reference.W3C.xmldsig-core.xml +26 -26
- data/data/reference.W3C.xmlenc-core.xml +20 -20
- data/data/reference.W3C.xpath.xml +22 -22
- data/lib/relaton_w3c/data_fetcher.rb +0 -53
- data/lib/relaton_w3c/data_index.rb +52 -44
- data/lib/relaton_w3c/data_parser.rb +1 -1
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +3 -3
- data/lib/relaton_w3c.rb +0 -3
- data/relaton_w3c.gemspec +4 -4
- metadata +22 -11
- data/lib/relaton_w3c/hit.rb +0 -15
- data/lib/relaton_w3c/hit_collection.rb +0 -172
- data/lib/relaton_w3c/scrapper.rb +0 -218
@@ -9,10 +9,10 @@ module RelatonW3c
|
|
9
9
|
|
10
10
|
class << self
|
11
11
|
# @param text [String]
|
12
|
-
# @return [RelatonW3c::
|
13
|
-
def search(text) # rubocop:disable Metrics/MethodLength
|
12
|
+
# @return [RelatonW3c::W3cBibliographicItem]
|
13
|
+
def search(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
14
14
|
ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
|
15
|
-
file = DataIndex.create_from_repo.search(
|
15
|
+
file = DataIndex.create_from_repo.search ref.gsub(" ", "-").squeeze("-")
|
16
16
|
return unless file
|
17
17
|
|
18
18
|
url = "#{SOURCE}#{file}"
|
data/lib/relaton_w3c.rb
CHANGED
@@ -2,9 +2,6 @@ require "relaton_bib"
|
|
2
2
|
require "relaton_w3c/version"
|
3
3
|
require "relaton_w3c/w3c_bibliography"
|
4
4
|
require "relaton_w3c/w3c_bibliographic_item"
|
5
|
-
# require "relaton_w3c/hit_collection"
|
6
|
-
# require "relaton_w3c/hit"
|
7
|
-
# require "relaton_w3c/scrapper"
|
8
5
|
require "relaton_w3c/xml_parser"
|
9
6
|
require "relaton_w3c/bibxml_parser"
|
10
7
|
require "relaton_w3c/hash_converter"
|
data/relaton_w3c.gemspec
CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
"using the IsoBibliographicItem model"
|
15
15
|
spec.homepage = "https://github.com/relaton/relaton-wc3"
|
16
16
|
spec.license = "BSD-2-Clause"
|
17
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 2.
|
17
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.6.0")
|
18
18
|
|
19
19
|
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
20
20
|
|
@@ -39,10 +39,10 @@ Gem::Specification.new do |spec|
|
|
39
39
|
|
40
40
|
spec.add_dependency "linkeddata", "~> 3.1.0"
|
41
41
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
42
|
-
|
43
|
-
spec.add_dependency "rdf", "~>
|
44
|
-
spec.add_dependency "rdf-normalize", "~> 0.4.0"
|
42
|
+
spec.add_dependency "rdf", "~> 3.2.0"
|
43
|
+
spec.add_dependency "rdf-normalize", "~> 0.5.0"
|
45
44
|
spec.add_dependency "relaton-bib", "~> 1.11.0"
|
45
|
+
spec.add_dependency "rubyzip", "~> 2.3.0"
|
46
46
|
spec.add_dependency "shex", "~> 0.6.0"
|
47
47
|
spec.add_dependency "sparql", "~> 3.1.0"
|
48
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.11.
|
4
|
+
version: 1.11.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -114,28 +114,28 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 3.
|
117
|
+
version: 3.2.0
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: 3.
|
124
|
+
version: 3.2.0
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: rdf-normalize
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.
|
131
|
+
version: 0.5.0
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0.
|
138
|
+
version: 0.5.0
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: relaton-bib
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 1.11.0
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rubyzip
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 2.3.0
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 2.3.0
|
153
167
|
- !ruby/object:Gem::Dependency
|
154
168
|
name: shex
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -233,10 +247,7 @@ files:
|
|
233
247
|
- lib/relaton_w3c/data_index.rb
|
234
248
|
- lib/relaton_w3c/data_parser.rb
|
235
249
|
- lib/relaton_w3c/hash_converter.rb
|
236
|
-
- lib/relaton_w3c/hit.rb
|
237
|
-
- lib/relaton_w3c/hit_collection.rb
|
238
250
|
- lib/relaton_w3c/processor.rb
|
239
|
-
- lib/relaton_w3c/scrapper.rb
|
240
251
|
- lib/relaton_w3c/version.rb
|
241
252
|
- lib/relaton_w3c/w3c_bibliographic_item.rb
|
242
253
|
- lib/relaton_w3c/w3c_bibliography.rb
|
@@ -256,14 +267,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
256
267
|
requirements:
|
257
268
|
- - ">="
|
258
269
|
- !ruby/object:Gem::Version
|
259
|
-
version: 2.
|
270
|
+
version: 2.6.0
|
260
271
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
261
272
|
requirements:
|
262
273
|
- - ">="
|
263
274
|
- !ruby/object:Gem::Version
|
264
275
|
version: '0'
|
265
276
|
requirements: []
|
266
|
-
rubygems_version: 3.3
|
277
|
+
rubygems_version: 3.2.3
|
267
278
|
signing_key:
|
268
279
|
specification_version: 4
|
269
280
|
summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
|
data/lib/relaton_w3c/hit.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module RelatonW3c
|
4
|
-
# Hit.
|
5
|
-
class Hit < RelatonBib::Hit
|
6
|
-
#
|
7
|
-
# Parse page.
|
8
|
-
#
|
9
|
-
# @param lang [String, NilClass]
|
10
|
-
# @return [RelatonW3c::W3cBibliographicItem]
|
11
|
-
def fetch(_lang = nil)
|
12
|
-
@fetch ||= Scrapper.parse_page hit
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
@@ -1,172 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "fileutils"
|
4
|
-
require "yaml"
|
5
|
-
|
6
|
-
module RelatonW3c
|
7
|
-
# Page of hit collection.
|
8
|
-
class HitCollection < RelatonBib::HitCollection
|
9
|
-
TYPES = {
|
10
|
-
"CR" => "Candidate Recommendation",
|
11
|
-
"NOTE" => "Group Note",
|
12
|
-
"PER" => "Proposed Edited Recommendation",
|
13
|
-
"PR" => "Proposed Recommendation",
|
14
|
-
"REC" => "Recommendation",
|
15
|
-
"RET" => "Retired",
|
16
|
-
"WD" => "Working Draft",
|
17
|
-
}.freeze
|
18
|
-
DOMAIN = "https://www.w3.org"
|
19
|
-
DATADIR = File.expand_path(".relaton/w3c", Dir.home).freeze
|
20
|
-
DATAFILE = File.expand_path("bibliography.yml", DATADIR).freeze
|
21
|
-
|
22
|
-
# @param ref [String] reference to search
|
23
|
-
def initialize(ref)
|
24
|
-
%r{
|
25
|
-
^(?:W3C\s)?
|
26
|
-
(?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
|
27
|
-
Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
|
28
|
-
Recommendation|Retired|Working\sDraft))? # type
|
29
|
-
\s?
|
30
|
-
(?<title_date>.+) # title_date
|
31
|
-
}x =~ ref
|
32
|
-
super
|
33
|
-
@array = from_yaml title_date, type
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
#
|
39
|
-
# Fetch data form yaml
|
40
|
-
#
|
41
|
-
# @param title_date [String]
|
42
|
-
# @param type [String]
|
43
|
-
# @return [Array<Hash>]
|
44
|
-
def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
45
|
-
/(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
|
46
|
-
title ||= title_date
|
47
|
-
result = data.select do |hit|
|
48
|
-
(hit["title"].casecmp?(title) ||
|
49
|
-
hit["link"].split("/").last.match?(/-#{title}-/)) &&
|
50
|
-
type_date_filter(hit, type, date)
|
51
|
-
end
|
52
|
-
if result.empty?
|
53
|
-
result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
|
54
|
-
end
|
55
|
-
result.map { |h| Hit.new(h, self) }
|
56
|
-
end
|
57
|
-
|
58
|
-
# @param hit [Hash]
|
59
|
-
# @param type [String]
|
60
|
-
# @param date [String]
|
61
|
-
# @return [TrueClass, FalseClass]
|
62
|
-
def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
|
63
|
-
if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
|
64
|
-
history = get_history hit, type, date
|
65
|
-
return false unless history.any?
|
66
|
-
|
67
|
-
hit["type"] = short_type type
|
68
|
-
hit["datepub"] = history.first.at("td").text
|
69
|
-
hit["link"] = history.first.at("a")[:href]
|
70
|
-
end
|
71
|
-
true
|
72
|
-
end
|
73
|
-
|
74
|
-
# @param hit [Hash]
|
75
|
-
# @param type [String]
|
76
|
-
# @param date [String]
|
77
|
-
# @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
|
78
|
-
def get_history(hit, type, date)
|
79
|
-
resp = Net::HTTP.get URI.parse(HitCollection::DOMAIN + hit["history"])
|
80
|
-
history_doc = Nokogiri::HTML resp
|
81
|
-
history = history_doc.xpath(
|
82
|
-
"//table//a[contains(.,'#{long_type(type)}')]/../..",
|
83
|
-
)
|
84
|
-
return filter_history_by_date(history, history_doc, type, date) if date
|
85
|
-
|
86
|
-
history
|
87
|
-
end
|
88
|
-
|
89
|
-
# @param history [Nokogiri::XML::NodeSet]
|
90
|
-
# @param history_doc [Nokogiri::HTML::NodeSet]
|
91
|
-
# @param type [String]
|
92
|
-
# @param date [String]
|
93
|
-
# @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
|
94
|
-
def filter_history_by_date(history, history_doc, type, date)
|
95
|
-
if type
|
96
|
-
history.select do |h|
|
97
|
-
h.at("td[@class='table_datecol']").text == date
|
98
|
-
end
|
99
|
-
else
|
100
|
-
history_doc.xpath(
|
101
|
-
"//table//td[@class='table_datecol'][.='#{date}']/..",
|
102
|
-
)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
#
|
107
|
-
# Convetr long type name to short
|
108
|
-
#
|
109
|
-
# @param type [String]
|
110
|
-
# @return [String]
|
111
|
-
def short_type(type)
|
112
|
-
tp = TYPES.select { |_, v| v == type }.keys
|
113
|
-
tp.first || type
|
114
|
-
end
|
115
|
-
|
116
|
-
#
|
117
|
-
# Convert shot type name to long
|
118
|
-
#
|
119
|
-
# @param [String]
|
120
|
-
# @return [String]
|
121
|
-
def long_type(type)
|
122
|
-
TYPES[type] || type
|
123
|
-
end
|
124
|
-
|
125
|
-
#
|
126
|
-
# Fetches YAML data
|
127
|
-
#
|
128
|
-
# @return [Hash]
|
129
|
-
def data
|
130
|
-
FileUtils.mkdir_p DATADIR
|
131
|
-
ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
132
|
-
fetch_data if !ctime || ctime.to_date < Date.today
|
133
|
-
@data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
|
134
|
-
end
|
135
|
-
|
136
|
-
#
|
137
|
-
# fetch data form server and save it to file.
|
138
|
-
#
|
139
|
-
def fetch_data
|
140
|
-
resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
|
141
|
-
# return if there aren't any changes since last fetching
|
142
|
-
return unless resp.code == "200"
|
143
|
-
|
144
|
-
doc = Nokogiri::HTML resp.body
|
145
|
-
@data = doc.xpath("//ul[@id='container']/li").map do |h_el|
|
146
|
-
link = h_el.at("h2/a")
|
147
|
-
pubdetails = h_el.at("p[@class='pubdetails']")
|
148
|
-
fetch_hit h_el, link, pubdetails
|
149
|
-
end
|
150
|
-
File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
|
151
|
-
end
|
152
|
-
|
153
|
-
# @param h_el [Nokogiri::XML::Element]
|
154
|
-
# @param link [Nokogiri::XML::Element]
|
155
|
-
# @param pubdetails [Nokogiri::XML::Element]
|
156
|
-
def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
157
|
-
datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
|
158
|
-
editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
|
159
|
-
keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
|
160
|
-
{
|
161
|
-
"title" => link.text.gsub("\u00a0", " "),
|
162
|
-
"link" => link[:href],
|
163
|
-
"type" => h_el.at("div").text.upcase,
|
164
|
-
"workgroup" => h_el.xpath("p[@class='deliverer']").map(&:text),
|
165
|
-
"datepub" => datepub,
|
166
|
-
"history" => pubdetails.at("a[text()='History']")[:href],
|
167
|
-
"editor" => editor,
|
168
|
-
"keyword" => keyword,
|
169
|
-
}
|
170
|
-
end
|
171
|
-
end
|
172
|
-
end
|
data/lib/relaton_w3c/scrapper.rb
DELETED
@@ -1,218 +0,0 @@
|
|
1
|
-
module RelatonW3c
|
2
|
-
class Scrapper
|
3
|
-
DOCTYPES = {
|
4
|
-
"CR" => "candidateRecommendation",
|
5
|
-
"NOTE" => "groupNote",
|
6
|
-
"PER" => "proposedEditedRecommendation",
|
7
|
-
"PR" => "proposedRecommendation",
|
8
|
-
"REC" => "recommendation",
|
9
|
-
"RET" => "retired",
|
10
|
-
"WD" => "workingDraft",
|
11
|
-
}.freeze
|
12
|
-
|
13
|
-
class << self
|
14
|
-
# @param hit [Hash]
|
15
|
-
# @return [RelatonW3c::W3cBibliographicItem]
|
16
|
-
def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
17
|
-
resp = Net::HTTP.get_response URI.parse(hit["link"])
|
18
|
-
doc = resp.code == "200" ? Nokogiri::HTML(resp.body) : nil
|
19
|
-
W3cBibliographicItem.new(
|
20
|
-
type: "standard",
|
21
|
-
docid: fetch_docid(hit),
|
22
|
-
fetched: Date.today.to_s,
|
23
|
-
language: ["en"],
|
24
|
-
script: ["Latn"],
|
25
|
-
title: fetch_title(hit, doc),
|
26
|
-
abstract: fetch_abstract(doc),
|
27
|
-
link: fetch_link(hit),
|
28
|
-
date: fetch_date(hit, doc),
|
29
|
-
doctype: fetch_doctype(hit, doc),
|
30
|
-
contributor: fetch_contributor(hit, doc),
|
31
|
-
relation: fetch_relation(doc),
|
32
|
-
keyword: hit["keyword"],
|
33
|
-
)
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
# @param hit [Hash]
|
39
|
-
# @return [Array<RelatonBib::DocumentIdentifier>]
|
40
|
-
def fetch_docid(hit)
|
41
|
-
id = hit["link"].split("/").last
|
42
|
-
[RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)]
|
43
|
-
end
|
44
|
-
|
45
|
-
# @param hit [Hash]
|
46
|
-
# @param doc [Nokogiri::HTML::Document]
|
47
|
-
# @return [Array<RelatonBib::TypedTitleString>]
|
48
|
-
def fetch_title(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
49
|
-
titles = []
|
50
|
-
if doc
|
51
|
-
title = doc.at("//*[contains(@id, 'title')]")&.text
|
52
|
-
if title && !title.empty?
|
53
|
-
titles << { content: title.gsub(/\n/, " "), type: "main" }
|
54
|
-
end
|
55
|
-
subtitle = doc.at(
|
56
|
-
"//h2[@id='subtitle']|//p[contains(@class, 'subline')]",
|
57
|
-
)&.text
|
58
|
-
titles << { content: subtitle, tipe: "subtitle" } if subtitle
|
59
|
-
end
|
60
|
-
if titles.empty? && hit["title"]
|
61
|
-
titles << { content: hit["title"], type: "main" }
|
62
|
-
end
|
63
|
-
titles.map do |t|
|
64
|
-
title = RelatonBib::FormattedString.new(
|
65
|
-
content: t[:content], language: "en", script: "Latn",
|
66
|
-
)
|
67
|
-
RelatonBib::TypedTitleString.new(type: t[:type], title: title)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
72
|
-
# @return [Array<RelatonBib::FormattedString>]
|
73
|
-
def fetch_abstract(doc)
|
74
|
-
return [] unless doc
|
75
|
-
|
76
|
-
content = doc.at("//h2[.='Abstract']/following-sibling::p",
|
77
|
-
"//div[@class='abstract']/p").text
|
78
|
-
[RelatonBib::FormattedString.new(content: content, language: "en",
|
79
|
-
script: "Latn")]
|
80
|
-
end
|
81
|
-
|
82
|
-
# @param hit [Hash]
|
83
|
-
# @return [Array<RelatonBib::TypedUri>]
|
84
|
-
def fetch_link(hit)
|
85
|
-
[RelatonBib::TypedUri.new(type: "src", content: hit["link"])]
|
86
|
-
end
|
87
|
-
|
88
|
-
# @param hit [Hash]
|
89
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
90
|
-
# @return [Array<RelatonBib::BibliographicDate>]
|
91
|
-
def fetch_date(hit, doc) # rubocop:disable Metrics/CyclomaticComplexity
|
92
|
-
on = hit["datepub"] || doc&.at("//h2/time[@datetime]")&.attr(:datetime)
|
93
|
-
on ||= fetch_date1(doc) || fetch_date2(doc)
|
94
|
-
[RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
|
95
|
-
end
|
96
|
-
|
97
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
98
|
-
# @return [String]
|
99
|
-
def fetch_date1(doc)
|
100
|
-
d = doc&.at("//h2[@property='dc:issued']")&.attr(:content)
|
101
|
-
d&.match(/\d{4}-\d{2}-\d{2}/)&.to_s
|
102
|
-
end
|
103
|
-
|
104
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
105
|
-
# @return [String]
|
106
|
-
def fetch_date2(doc)
|
107
|
-
d = doc&.at("//h2[contains(@id, 'w3c-recommendation')]")
|
108
|
-
return unless d
|
109
|
-
|
110
|
-
Date.parse(d.attr(:id.match(/\d{2}-\w+-\d{4}/).to_s)).to_s
|
111
|
-
end
|
112
|
-
|
113
|
-
# @param hit [Hash]
|
114
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
115
|
-
# @return [String]
|
116
|
-
def fetch_doctype(hit, doc)
|
117
|
-
if hit["type"]
|
118
|
-
DOCTYPES[hit["type"]]
|
119
|
-
elsif doc
|
120
|
-
type = HitCollection::TYPES.detect do |_k, v|
|
121
|
-
doc.at("//h2[contains(., '#{v}')]/time[@datetime]")
|
122
|
-
end
|
123
|
-
DOCTYPES[type&.first]
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
# @param hit [Hash]
|
128
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
129
|
-
# @return [Array<RelatonBib::ContributionInfo>]
|
130
|
-
def fetch_contributor(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
131
|
-
if doc
|
132
|
-
editors = find_contribs(doc, "Editors").reduce([]) do |mem, ed|
|
133
|
-
c = parse_contrib ed, "editor"
|
134
|
-
mem << c if c
|
135
|
-
mem
|
136
|
-
end
|
137
|
-
contribs = find_contribs(doc, "Authors").reduce(editors) do |mem, ath|
|
138
|
-
ed = mem.detect { |e| e[:id] && e[:id] == ath["data-editor-id"] }
|
139
|
-
if ed
|
140
|
-
ed[:role] << { type: "author" }
|
141
|
-
else
|
142
|
-
mem << parse_contrib(ath, "author")
|
143
|
-
end
|
144
|
-
mem
|
145
|
-
end
|
146
|
-
contribs.map { |c| contrib_info(**c) }
|
147
|
-
else
|
148
|
-
hit["editor"].map do |ed|
|
149
|
-
contrib_info name: ed, role: [{ type: "editor" }]
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
# @param doc [Nokogiri::NTML::Document]
|
155
|
-
# @param type [String]
|
156
|
-
# @return [Array<Nokogiri::XML::Element]
|
157
|
-
def find_contribs(doc, type)
|
158
|
-
doc.xpath("//dt[contains(.,'#{type}')]/following-sibling::dd"\
|
159
|
-
"[preceding-sibling::dt[1][contains(.,'#{type}')]]")
|
160
|
-
end
|
161
|
-
|
162
|
-
# @param element [Nokogiri::XML::Element]
|
163
|
-
# @param type [String]
|
164
|
-
# @return [Hash]
|
165
|
-
def parse_contrib(element, type) # rubocop:disable Metrics/MethodLength
|
166
|
-
p = element.at("a")
|
167
|
-
return unless p
|
168
|
-
|
169
|
-
contrib = {
|
170
|
-
name: p.text,
|
171
|
-
url: p[:href],
|
172
|
-
role: [{ type: type }],
|
173
|
-
id: element["data-editor-id"],
|
174
|
-
}
|
175
|
-
org = element.at("a[2]")
|
176
|
-
contrib[:org] = { name: org.text, url: org[:href] } if org
|
177
|
-
contrib
|
178
|
-
end
|
179
|
-
|
180
|
-
# @param name [String]
|
181
|
-
# @param url [String, NilClass]
|
182
|
-
# @param role [Array<Hash>]
|
183
|
-
# @parma org [Hash]
|
184
|
-
# @return [RelatonBib::ContributionInfo]
|
185
|
-
def contrib_info(**args)
|
186
|
-
completename = RelatonBib::LocalizedString.new(args[:name])
|
187
|
-
name = RelatonBib::FullName.new completename: completename
|
188
|
-
af = []
|
189
|
-
if args[:org]
|
190
|
-
org = RelatonBib::Organization.new(**args[:org])
|
191
|
-
af << RelatonBib::Affiliation.new(organization: org)
|
192
|
-
end
|
193
|
-
en = RelatonBib::Person.new name: name, url: args[:url], affiliation: af
|
194
|
-
RelatonBib::ContributionInfo.new entity: en, role: args[:role]
|
195
|
-
end
|
196
|
-
|
197
|
-
# @param doc [Nokogiri::HTML::Document]
|
198
|
-
# @return [Array<RelatonBib::DocumentRelation>]
|
199
|
-
def fetch_relation(doc)
|
200
|
-
return [] unless doc && (link = recommendation_link(doc))
|
201
|
-
|
202
|
-
hit = { "link" => link }
|
203
|
-
item = parse_page hit
|
204
|
-
[RelatonBib::DocumentRelation.new(type: "obsoletedBy", bibitem: item)]
|
205
|
-
end
|
206
|
-
|
207
|
-
# @param doc [Nokogiri::HTML::Document]
|
208
|
-
# @return [String, NilClass]
|
209
|
-
def recommendation_link(doc)
|
210
|
-
recom = doc.at("//dt[.='Latest Recommendation:']",
|
211
|
-
"//dt[.='Previous Recommendation:']")
|
212
|
-
return unless recom
|
213
|
-
|
214
|
-
recom.at("./following-sibling::dd/a")[:href]
|
215
|
-
end
|
216
|
-
end
|
217
|
-
end
|
218
|
-
end
|