relaton-w3c 1.11.2 → 1.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -5
- data/.rubocop.yml +1 -1
- data/data/reference.W3C.DSig-label.xml +32 -32
- data/data/reference.W3C.P3P-rdfschema.xml +26 -26
- data/data/reference.W3C.PICS-labels.xml +43 -43
- data/data/reference.W3C.PICS-rules.xml +38 -38
- data/data/reference.W3C.PICS-services.xml +37 -37
- data/data/reference.W3C.daml-oil-reference.xml +39 -39
- data/data/reference.W3C.soap11.xml +56 -56
- data/data/reference.W3C.soap12-part1.xml +38 -38
- data/data/reference.W3C.soap12-part2.xml +38 -38
- data/data/reference.W3C.xkms.xml +50 -50
- data/data/reference.W3C.xml-c14n.xml +15 -15
- data/data/reference.W3C.xmldsig-core.xml +26 -26
- data/data/reference.W3C.xmlenc-core.xml +20 -20
- data/data/reference.W3C.xpath.xml +22 -22
- data/lib/relaton_w3c/data_fetcher.rb +0 -53
- data/lib/relaton_w3c/data_index.rb +52 -44
- data/lib/relaton_w3c/data_parser.rb +1 -1
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +3 -3
- data/lib/relaton_w3c.rb +0 -3
- data/relaton_w3c.gemspec +4 -4
- metadata +22 -11
- data/lib/relaton_w3c/hit.rb +0 -15
- data/lib/relaton_w3c/hit_collection.rb +0 -172
- data/lib/relaton_w3c/scrapper.rb +0 -218
@@ -9,10 +9,10 @@ module RelatonW3c
|
|
9
9
|
|
10
10
|
class << self
|
11
11
|
# @param text [String]
|
12
|
-
# @return [RelatonW3c::
|
13
|
-
def search(text) # rubocop:disable Metrics/MethodLength
|
12
|
+
# @return [RelatonW3c::W3cBibliographicItem]
|
13
|
+
def search(text) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
14
14
|
ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
|
15
|
-
file = DataIndex.create_from_repo.search(
|
15
|
+
file = DataIndex.create_from_repo.search ref.gsub(" ", "-").squeeze("-")
|
16
16
|
return unless file
|
17
17
|
|
18
18
|
url = "#{SOURCE}#{file}"
|
data/lib/relaton_w3c.rb
CHANGED
@@ -2,9 +2,6 @@ require "relaton_bib"
|
|
2
2
|
require "relaton_w3c/version"
|
3
3
|
require "relaton_w3c/w3c_bibliography"
|
4
4
|
require "relaton_w3c/w3c_bibliographic_item"
|
5
|
-
# require "relaton_w3c/hit_collection"
|
6
|
-
# require "relaton_w3c/hit"
|
7
|
-
# require "relaton_w3c/scrapper"
|
8
5
|
require "relaton_w3c/xml_parser"
|
9
6
|
require "relaton_w3c/bibxml_parser"
|
10
7
|
require "relaton_w3c/hash_converter"
|
data/relaton_w3c.gemspec
CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
"using the IsoBibliographicItem model"
|
15
15
|
spec.homepage = "https://github.com/relaton/relaton-wc3"
|
16
16
|
spec.license = "BSD-2-Clause"
|
17
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 2.
|
17
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.6.0")
|
18
18
|
|
19
19
|
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
20
20
|
|
@@ -39,10 +39,10 @@ Gem::Specification.new do |spec|
|
|
39
39
|
|
40
40
|
spec.add_dependency "linkeddata", "~> 3.1.0"
|
41
41
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
42
|
-
|
43
|
-
spec.add_dependency "rdf", "~>
|
44
|
-
spec.add_dependency "rdf-normalize", "~> 0.4.0"
|
42
|
+
spec.add_dependency "rdf", "~> 3.2.0"
|
43
|
+
spec.add_dependency "rdf-normalize", "~> 0.5.0"
|
45
44
|
spec.add_dependency "relaton-bib", "~> 1.11.0"
|
45
|
+
spec.add_dependency "rubyzip", "~> 2.3.0"
|
46
46
|
spec.add_dependency "shex", "~> 0.6.0"
|
47
47
|
spec.add_dependency "sparql", "~> 3.1.0"
|
48
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.11.
|
4
|
+
version: 1.11.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -114,28 +114,28 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 3.
|
117
|
+
version: 3.2.0
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: 3.
|
124
|
+
version: 3.2.0
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: rdf-normalize
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.
|
131
|
+
version: 0.5.0
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0.
|
138
|
+
version: 0.5.0
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: relaton-bib
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 1.11.0
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rubyzip
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 2.3.0
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 2.3.0
|
153
167
|
- !ruby/object:Gem::Dependency
|
154
168
|
name: shex
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -233,10 +247,7 @@ files:
|
|
233
247
|
- lib/relaton_w3c/data_index.rb
|
234
248
|
- lib/relaton_w3c/data_parser.rb
|
235
249
|
- lib/relaton_w3c/hash_converter.rb
|
236
|
-
- lib/relaton_w3c/hit.rb
|
237
|
-
- lib/relaton_w3c/hit_collection.rb
|
238
250
|
- lib/relaton_w3c/processor.rb
|
239
|
-
- lib/relaton_w3c/scrapper.rb
|
240
251
|
- lib/relaton_w3c/version.rb
|
241
252
|
- lib/relaton_w3c/w3c_bibliographic_item.rb
|
242
253
|
- lib/relaton_w3c/w3c_bibliography.rb
|
@@ -256,14 +267,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
256
267
|
requirements:
|
257
268
|
- - ">="
|
258
269
|
- !ruby/object:Gem::Version
|
259
|
-
version: 2.
|
270
|
+
version: 2.6.0
|
260
271
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
261
272
|
requirements:
|
262
273
|
- - ">="
|
263
274
|
- !ruby/object:Gem::Version
|
264
275
|
version: '0'
|
265
276
|
requirements: []
|
266
|
-
rubygems_version: 3.3
|
277
|
+
rubygems_version: 3.2.3
|
267
278
|
signing_key:
|
268
279
|
specification_version: 4
|
269
280
|
summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
|
data/lib/relaton_w3c/hit.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module RelatonW3c
|
4
|
-
# Hit.
|
5
|
-
class Hit < RelatonBib::Hit
|
6
|
-
#
|
7
|
-
# Parse page.
|
8
|
-
#
|
9
|
-
# @param lang [String, NilClass]
|
10
|
-
# @return [RelatonW3c::W3cBibliographicItem]
|
11
|
-
def fetch(_lang = nil)
|
12
|
-
@fetch ||= Scrapper.parse_page hit
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
@@ -1,172 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "fileutils"
|
4
|
-
require "yaml"
|
5
|
-
|
6
|
-
module RelatonW3c
|
7
|
-
# Page of hit collection.
|
8
|
-
class HitCollection < RelatonBib::HitCollection
|
9
|
-
TYPES = {
|
10
|
-
"CR" => "Candidate Recommendation",
|
11
|
-
"NOTE" => "Group Note",
|
12
|
-
"PER" => "Proposed Edited Recommendation",
|
13
|
-
"PR" => "Proposed Recommendation",
|
14
|
-
"REC" => "Recommendation",
|
15
|
-
"RET" => "Retired",
|
16
|
-
"WD" => "Working Draft",
|
17
|
-
}.freeze
|
18
|
-
DOMAIN = "https://www.w3.org"
|
19
|
-
DATADIR = File.expand_path(".relaton/w3c", Dir.home).freeze
|
20
|
-
DATAFILE = File.expand_path("bibliography.yml", DATADIR).freeze
|
21
|
-
|
22
|
-
# @param ref [String] reference to search
|
23
|
-
def initialize(ref)
|
24
|
-
%r{
|
25
|
-
^(?:W3C\s)?
|
26
|
-
(?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
|
27
|
-
Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
|
28
|
-
Recommendation|Retired|Working\sDraft))? # type
|
29
|
-
\s?
|
30
|
-
(?<title_date>.+) # title_date
|
31
|
-
}x =~ ref
|
32
|
-
super
|
33
|
-
@array = from_yaml title_date, type
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
#
|
39
|
-
# Fetch data form yaml
|
40
|
-
#
|
41
|
-
# @param title_date [String]
|
42
|
-
# @param type [String]
|
43
|
-
# @return [Array<Hash>]
|
44
|
-
def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
45
|
-
/(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
|
46
|
-
title ||= title_date
|
47
|
-
result = data.select do |hit|
|
48
|
-
(hit["title"].casecmp?(title) ||
|
49
|
-
hit["link"].split("/").last.match?(/-#{title}-/)) &&
|
50
|
-
type_date_filter(hit, type, date)
|
51
|
-
end
|
52
|
-
if result.empty?
|
53
|
-
result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
|
54
|
-
end
|
55
|
-
result.map { |h| Hit.new(h, self) }
|
56
|
-
end
|
57
|
-
|
58
|
-
# @param hit [Hash]
|
59
|
-
# @param type [String]
|
60
|
-
# @param date [String]
|
61
|
-
# @return [TrueClass, FalseClass]
|
62
|
-
def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
|
63
|
-
if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
|
64
|
-
history = get_history hit, type, date
|
65
|
-
return false unless history.any?
|
66
|
-
|
67
|
-
hit["type"] = short_type type
|
68
|
-
hit["datepub"] = history.first.at("td").text
|
69
|
-
hit["link"] = history.first.at("a")[:href]
|
70
|
-
end
|
71
|
-
true
|
72
|
-
end
|
73
|
-
|
74
|
-
# @param hit [Hash]
|
75
|
-
# @param type [String]
|
76
|
-
# @param date [String]
|
77
|
-
# @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
|
78
|
-
def get_history(hit, type, date)
|
79
|
-
resp = Net::HTTP.get URI.parse(HitCollection::DOMAIN + hit["history"])
|
80
|
-
history_doc = Nokogiri::HTML resp
|
81
|
-
history = history_doc.xpath(
|
82
|
-
"//table//a[contains(.,'#{long_type(type)}')]/../..",
|
83
|
-
)
|
84
|
-
return filter_history_by_date(history, history_doc, type, date) if date
|
85
|
-
|
86
|
-
history
|
87
|
-
end
|
88
|
-
|
89
|
-
# @param history [Nokogiri::XML::NodeSet]
|
90
|
-
# @param history_doc [Nokogiri::HTML::NodeSet]
|
91
|
-
# @param type [String]
|
92
|
-
# @param date [String]
|
93
|
-
# @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]
|
94
|
-
def filter_history_by_date(history, history_doc, type, date)
|
95
|
-
if type
|
96
|
-
history.select do |h|
|
97
|
-
h.at("td[@class='table_datecol']").text == date
|
98
|
-
end
|
99
|
-
else
|
100
|
-
history_doc.xpath(
|
101
|
-
"//table//td[@class='table_datecol'][.='#{date}']/..",
|
102
|
-
)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
#
|
107
|
-
# Convetr long type name to short
|
108
|
-
#
|
109
|
-
# @param type [String]
|
110
|
-
# @return [String]
|
111
|
-
def short_type(type)
|
112
|
-
tp = TYPES.select { |_, v| v == type }.keys
|
113
|
-
tp.first || type
|
114
|
-
end
|
115
|
-
|
116
|
-
#
|
117
|
-
# Convert shot type name to long
|
118
|
-
#
|
119
|
-
# @param [String]
|
120
|
-
# @return [String]
|
121
|
-
def long_type(type)
|
122
|
-
TYPES[type] || type
|
123
|
-
end
|
124
|
-
|
125
|
-
#
|
126
|
-
# Fetches YAML data
|
127
|
-
#
|
128
|
-
# @return [Hash]
|
129
|
-
def data
|
130
|
-
FileUtils.mkdir_p DATADIR
|
131
|
-
ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
132
|
-
fetch_data if !ctime || ctime.to_date < Date.today
|
133
|
-
@data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
|
134
|
-
end
|
135
|
-
|
136
|
-
#
|
137
|
-
# fetch data form server and save it to file.
|
138
|
-
#
|
139
|
-
def fetch_data
|
140
|
-
resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
|
141
|
-
# return if there aren't any changes since last fetching
|
142
|
-
return unless resp.code == "200"
|
143
|
-
|
144
|
-
doc = Nokogiri::HTML resp.body
|
145
|
-
@data = doc.xpath("//ul[@id='container']/li").map do |h_el|
|
146
|
-
link = h_el.at("h2/a")
|
147
|
-
pubdetails = h_el.at("p[@class='pubdetails']")
|
148
|
-
fetch_hit h_el, link, pubdetails
|
149
|
-
end
|
150
|
-
File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
|
151
|
-
end
|
152
|
-
|
153
|
-
# @param h_el [Nokogiri::XML::Element]
|
154
|
-
# @param link [Nokogiri::XML::Element]
|
155
|
-
# @param pubdetails [Nokogiri::XML::Element]
|
156
|
-
def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
157
|
-
datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
|
158
|
-
editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
|
159
|
-
keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
|
160
|
-
{
|
161
|
-
"title" => link.text.gsub("\u00a0", " "),
|
162
|
-
"link" => link[:href],
|
163
|
-
"type" => h_el.at("div").text.upcase,
|
164
|
-
"workgroup" => h_el.xpath("p[@class='deliverer']").map(&:text),
|
165
|
-
"datepub" => datepub,
|
166
|
-
"history" => pubdetails.at("a[text()='History']")[:href],
|
167
|
-
"editor" => editor,
|
168
|
-
"keyword" => keyword,
|
169
|
-
}
|
170
|
-
end
|
171
|
-
end
|
172
|
-
end
|
data/lib/relaton_w3c/scrapper.rb
DELETED
@@ -1,218 +0,0 @@
|
|
1
|
-
module RelatonW3c
|
2
|
-
class Scrapper
|
3
|
-
DOCTYPES = {
|
4
|
-
"CR" => "candidateRecommendation",
|
5
|
-
"NOTE" => "groupNote",
|
6
|
-
"PER" => "proposedEditedRecommendation",
|
7
|
-
"PR" => "proposedRecommendation",
|
8
|
-
"REC" => "recommendation",
|
9
|
-
"RET" => "retired",
|
10
|
-
"WD" => "workingDraft",
|
11
|
-
}.freeze
|
12
|
-
|
13
|
-
class << self
|
14
|
-
# @param hit [Hash]
|
15
|
-
# @return [RelatonW3c::W3cBibliographicItem]
|
16
|
-
def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
17
|
-
resp = Net::HTTP.get_response URI.parse(hit["link"])
|
18
|
-
doc = resp.code == "200" ? Nokogiri::HTML(resp.body) : nil
|
19
|
-
W3cBibliographicItem.new(
|
20
|
-
type: "standard",
|
21
|
-
docid: fetch_docid(hit),
|
22
|
-
fetched: Date.today.to_s,
|
23
|
-
language: ["en"],
|
24
|
-
script: ["Latn"],
|
25
|
-
title: fetch_title(hit, doc),
|
26
|
-
abstract: fetch_abstract(doc),
|
27
|
-
link: fetch_link(hit),
|
28
|
-
date: fetch_date(hit, doc),
|
29
|
-
doctype: fetch_doctype(hit, doc),
|
30
|
-
contributor: fetch_contributor(hit, doc),
|
31
|
-
relation: fetch_relation(doc),
|
32
|
-
keyword: hit["keyword"],
|
33
|
-
)
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
# @param hit [Hash]
|
39
|
-
# @return [Array<RelatonBib::DocumentIdentifier>]
|
40
|
-
def fetch_docid(hit)
|
41
|
-
id = hit["link"].split("/").last
|
42
|
-
[RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)]
|
43
|
-
end
|
44
|
-
|
45
|
-
# @param hit [Hash]
|
46
|
-
# @param doc [Nokogiri::HTML::Document]
|
47
|
-
# @return [Array<RelatonBib::TypedTitleString>]
|
48
|
-
def fetch_title(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
49
|
-
titles = []
|
50
|
-
if doc
|
51
|
-
title = doc.at("//*[contains(@id, 'title')]")&.text
|
52
|
-
if title && !title.empty?
|
53
|
-
titles << { content: title.gsub(/\n/, " "), type: "main" }
|
54
|
-
end
|
55
|
-
subtitle = doc.at(
|
56
|
-
"//h2[@id='subtitle']|//p[contains(@class, 'subline')]",
|
57
|
-
)&.text
|
58
|
-
titles << { content: subtitle, tipe: "subtitle" } if subtitle
|
59
|
-
end
|
60
|
-
if titles.empty? && hit["title"]
|
61
|
-
titles << { content: hit["title"], type: "main" }
|
62
|
-
end
|
63
|
-
titles.map do |t|
|
64
|
-
title = RelatonBib::FormattedString.new(
|
65
|
-
content: t[:content], language: "en", script: "Latn",
|
66
|
-
)
|
67
|
-
RelatonBib::TypedTitleString.new(type: t[:type], title: title)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
72
|
-
# @return [Array<RelatonBib::FormattedString>]
|
73
|
-
def fetch_abstract(doc)
|
74
|
-
return [] unless doc
|
75
|
-
|
76
|
-
content = doc.at("//h2[.='Abstract']/following-sibling::p",
|
77
|
-
"//div[@class='abstract']/p").text
|
78
|
-
[RelatonBib::FormattedString.new(content: content, language: "en",
|
79
|
-
script: "Latn")]
|
80
|
-
end
|
81
|
-
|
82
|
-
# @param hit [Hash]
|
83
|
-
# @return [Array<RelatonBib::TypedUri>]
|
84
|
-
def fetch_link(hit)
|
85
|
-
[RelatonBib::TypedUri.new(type: "src", content: hit["link"])]
|
86
|
-
end
|
87
|
-
|
88
|
-
# @param hit [Hash]
|
89
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
90
|
-
# @return [Array<RelatonBib::BibliographicDate>]
|
91
|
-
def fetch_date(hit, doc) # rubocop:disable Metrics/CyclomaticComplexity
|
92
|
-
on = hit["datepub"] || doc&.at("//h2/time[@datetime]")&.attr(:datetime)
|
93
|
-
on ||= fetch_date1(doc) || fetch_date2(doc)
|
94
|
-
[RelatonBib::BibliographicDate.new(type: "published", on: on)] if on
|
95
|
-
end
|
96
|
-
|
97
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
98
|
-
# @return [String]
|
99
|
-
def fetch_date1(doc)
|
100
|
-
d = doc&.at("//h2[@property='dc:issued']")&.attr(:content)
|
101
|
-
d&.match(/\d{4}-\d{2}-\d{2}/)&.to_s
|
102
|
-
end
|
103
|
-
|
104
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
105
|
-
# @return [String]
|
106
|
-
def fetch_date2(doc)
|
107
|
-
d = doc&.at("//h2[contains(@id, 'w3c-recommendation')]")
|
108
|
-
return unless d
|
109
|
-
|
110
|
-
Date.parse(d.attr(:id.match(/\d{2}-\w+-\d{4}/).to_s)).to_s
|
111
|
-
end
|
112
|
-
|
113
|
-
# @param hit [Hash]
|
114
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
115
|
-
# @return [String]
|
116
|
-
def fetch_doctype(hit, doc)
|
117
|
-
if hit["type"]
|
118
|
-
DOCTYPES[hit["type"]]
|
119
|
-
elsif doc
|
120
|
-
type = HitCollection::TYPES.detect do |_k, v|
|
121
|
-
doc.at("//h2[contains(., '#{v}')]/time[@datetime]")
|
122
|
-
end
|
123
|
-
DOCTYPES[type&.first]
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
# @param hit [Hash]
|
128
|
-
# @param doc [Nokogiri::HTML::Document, NilClass]
|
129
|
-
# @return [Array<RelatonBib::ContributionInfo>]
|
130
|
-
def fetch_contributor(hit, doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
131
|
-
if doc
|
132
|
-
editors = find_contribs(doc, "Editors").reduce([]) do |mem, ed|
|
133
|
-
c = parse_contrib ed, "editor"
|
134
|
-
mem << c if c
|
135
|
-
mem
|
136
|
-
end
|
137
|
-
contribs = find_contribs(doc, "Authors").reduce(editors) do |mem, ath|
|
138
|
-
ed = mem.detect { |e| e[:id] && e[:id] == ath["data-editor-id"] }
|
139
|
-
if ed
|
140
|
-
ed[:role] << { type: "author" }
|
141
|
-
else
|
142
|
-
mem << parse_contrib(ath, "author")
|
143
|
-
end
|
144
|
-
mem
|
145
|
-
end
|
146
|
-
contribs.map { |c| contrib_info(**c) }
|
147
|
-
else
|
148
|
-
hit["editor"].map do |ed|
|
149
|
-
contrib_info name: ed, role: [{ type: "editor" }]
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
# @param doc [Nokogiri::NTML::Document]
|
155
|
-
# @param type [String]
|
156
|
-
# @return [Array<Nokogiri::XML::Element]
|
157
|
-
def find_contribs(doc, type)
|
158
|
-
doc.xpath("//dt[contains(.,'#{type}')]/following-sibling::dd"\
|
159
|
-
"[preceding-sibling::dt[1][contains(.,'#{type}')]]")
|
160
|
-
end
|
161
|
-
|
162
|
-
# @param element [Nokogiri::XML::Element]
|
163
|
-
# @param type [String]
|
164
|
-
# @return [Hash]
|
165
|
-
def parse_contrib(element, type) # rubocop:disable Metrics/MethodLength
|
166
|
-
p = element.at("a")
|
167
|
-
return unless p
|
168
|
-
|
169
|
-
contrib = {
|
170
|
-
name: p.text,
|
171
|
-
url: p[:href],
|
172
|
-
role: [{ type: type }],
|
173
|
-
id: element["data-editor-id"],
|
174
|
-
}
|
175
|
-
org = element.at("a[2]")
|
176
|
-
contrib[:org] = { name: org.text, url: org[:href] } if org
|
177
|
-
contrib
|
178
|
-
end
|
179
|
-
|
180
|
-
# @param name [String]
|
181
|
-
# @param url [String, NilClass]
|
182
|
-
# @param role [Array<Hash>]
|
183
|
-
# @parma org [Hash]
|
184
|
-
# @return [RelatonBib::ContributionInfo]
|
185
|
-
def contrib_info(**args)
|
186
|
-
completename = RelatonBib::LocalizedString.new(args[:name])
|
187
|
-
name = RelatonBib::FullName.new completename: completename
|
188
|
-
af = []
|
189
|
-
if args[:org]
|
190
|
-
org = RelatonBib::Organization.new(**args[:org])
|
191
|
-
af << RelatonBib::Affiliation.new(organization: org)
|
192
|
-
end
|
193
|
-
en = RelatonBib::Person.new name: name, url: args[:url], affiliation: af
|
194
|
-
RelatonBib::ContributionInfo.new entity: en, role: args[:role]
|
195
|
-
end
|
196
|
-
|
197
|
-
# @param doc [Nokogiri::HTML::Document]
|
198
|
-
# @return [Array<RelatonBib::DocumentRelation>]
|
199
|
-
def fetch_relation(doc)
|
200
|
-
return [] unless doc && (link = recommendation_link(doc))
|
201
|
-
|
202
|
-
hit = { "link" => link }
|
203
|
-
item = parse_page hit
|
204
|
-
[RelatonBib::DocumentRelation.new(type: "obsoletedBy", bibitem: item)]
|
205
|
-
end
|
206
|
-
|
207
|
-
# @param doc [Nokogiri::HTML::Document]
|
208
|
-
# @return [String, NilClass]
|
209
|
-
def recommendation_link(doc)
|
210
|
-
recom = doc.at("//dt[.='Latest Recommendation:']",
|
211
|
-
"//dt[.='Previous Recommendation:']")
|
212
|
-
return unless recom
|
213
|
-
|
214
|
-
recom.at("./following-sibling::dd/a")[:href]
|
215
|
-
end
|
216
|
-
end
|
217
|
-
end
|
218
|
-
end
|