linkheaders-processor 0.1.18 → 0.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +18 -18
- data/lib/linkheaders/link.rb +1 -1
- data/lib/linkheaders/processor/version.rb +1 -1
- data/lib/linkheaders/processor.rb +25 -15
- data/spec/linkheader/parser_spec.rb +38 -11
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 65747de845763341178717385337c65ed5d78a7df57bccecee17f392d8330b12
|
|
4
|
+
data.tar.gz: 1e2c5ae203200e40e8fc238b211f63491bc2ba4756d3bd807ccb916a7c9b270f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a92597e6f649e5abdc524862c051a0a2d442c753976ea5cea70ce0ce4b1c30261d0f0f3e9e3e577ad55c396cad7a780c806f068012ef26a3ae91a7cc8fbb109e
|
|
7
|
+
data.tar.gz: bdd96b086e950c0b427ec8d15ed151723639d72b81d9f3a15adfca22bf7326c51ee9711cf0aa451a3e1d472cb14d8c46d6ced944627058d5e87fa695dbcde6f6
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
linkheaders-processor (0.1.
|
|
4
|
+
linkheaders-processor (0.1.19)
|
|
5
5
|
json (~> 2.0)
|
|
6
6
|
json-ld (~> 3.2)
|
|
7
7
|
json-ld-preloaded (~> 3.2)
|
|
@@ -13,8 +13,8 @@ PATH
|
|
|
13
13
|
GEM
|
|
14
14
|
remote: https://rubygems.org/
|
|
15
15
|
specs:
|
|
16
|
-
addressable (2.8.
|
|
17
|
-
public_suffix (>= 2.0.2, <
|
|
16
|
+
addressable (2.8.5)
|
|
17
|
+
public_suffix (>= 2.0.2, < 6.0)
|
|
18
18
|
ast (2.4.2)
|
|
19
19
|
diff-lcs (1.5.0)
|
|
20
20
|
domain_name (0.5.20190701)
|
|
@@ -39,7 +39,7 @@ GEM
|
|
|
39
39
|
faraday-encoding (0.0.5)
|
|
40
40
|
faraday
|
|
41
41
|
faraday-excon (1.1.0)
|
|
42
|
-
faraday-http-cache (2.
|
|
42
|
+
faraday-http-cache (2.5.0)
|
|
43
43
|
faraday (>= 0.8)
|
|
44
44
|
faraday-httpclient (1.0.1)
|
|
45
45
|
faraday-multipart (1.0.4)
|
|
@@ -51,21 +51,21 @@ GEM
|
|
|
51
51
|
faraday-retry (1.0.3)
|
|
52
52
|
faraday_middleware (1.2.0)
|
|
53
53
|
faraday (~> 1.0)
|
|
54
|
-
fastimage (2.2.
|
|
54
|
+
fastimage (2.2.7)
|
|
55
55
|
htmlentities (4.3.4)
|
|
56
56
|
http-accept (1.7.0)
|
|
57
57
|
http-cookie (1.0.5)
|
|
58
58
|
domain_name (~> 0.5)
|
|
59
59
|
json (2.6.2)
|
|
60
|
-
json-canonicalization (0.3.
|
|
61
|
-
json-ld (3.2.
|
|
60
|
+
json-canonicalization (0.3.2)
|
|
61
|
+
json-ld (3.2.5)
|
|
62
62
|
htmlentities (~> 4.3)
|
|
63
|
-
json-canonicalization (~> 0.3)
|
|
63
|
+
json-canonicalization (~> 0.3, >= 0.3.2)
|
|
64
64
|
link_header (~> 0.0, >= 0.0.8)
|
|
65
65
|
multi_json (~> 1.15)
|
|
66
|
-
rack (
|
|
67
|
-
rdf (~> 3.2, >= 3.2.
|
|
68
|
-
json-ld-preloaded (3.2.
|
|
66
|
+
rack (>= 2.2, < 4)
|
|
67
|
+
rdf (~> 3.2, >= 3.2.10)
|
|
68
|
+
json-ld-preloaded (3.2.2)
|
|
69
69
|
json-ld (~> 3.2)
|
|
70
70
|
rdf (~> 3.2)
|
|
71
71
|
link_header (0.0.8)
|
|
@@ -79,24 +79,24 @@ GEM
|
|
|
79
79
|
fastimage (~> 2.2)
|
|
80
80
|
nesty (~> 1.0)
|
|
81
81
|
nokogiri (~> 1.11)
|
|
82
|
-
mime-types (3.
|
|
82
|
+
mime-types (3.5.1)
|
|
83
83
|
mime-types-data (~> 3.2015)
|
|
84
|
-
mime-types-data (3.
|
|
84
|
+
mime-types-data (3.2023.0808)
|
|
85
85
|
multi_json (1.15.0)
|
|
86
86
|
multipart-post (2.2.3)
|
|
87
87
|
nesty (1.0.2)
|
|
88
88
|
netrc (0.11.0)
|
|
89
|
-
nokogiri (1.
|
|
89
|
+
nokogiri (1.15.4-x86_64-linux)
|
|
90
90
|
racc (~> 1.4)
|
|
91
91
|
parallel (1.22.1)
|
|
92
92
|
parser (3.1.2.0)
|
|
93
93
|
ast (~> 2.4.1)
|
|
94
|
-
public_suffix (
|
|
95
|
-
racc (1.
|
|
96
|
-
rack (
|
|
94
|
+
public_suffix (5.0.3)
|
|
95
|
+
racc (1.7.1)
|
|
96
|
+
rack (3.0.8)
|
|
97
97
|
rainbow (3.1.1)
|
|
98
98
|
rake (13.0.6)
|
|
99
|
-
rdf (3.2.
|
|
99
|
+
rdf (3.2.11)
|
|
100
100
|
link_header (~> 0.0, >= 0.0.8)
|
|
101
101
|
regexp_parser (2.5.0)
|
|
102
102
|
rest-client (2.1.0)
|
data/lib/linkheaders/link.rb
CHANGED
|
@@ -127,7 +127,7 @@ module LinkHeaders
|
|
|
127
127
|
if l.relation != link.relation
|
|
128
128
|
@warnings |= ['WARN: Found identical hrefs with different relation types. This may be suspicious. Both have been retained']
|
|
129
129
|
else
|
|
130
|
-
@warnings |= [
|
|
130
|
+
@warnings |= ["WARN: found apparent duplicate #{l.relation} #{l.href} EQUALS#{link.href}. Ignoring and returning known link #{l.relation} #{l.href}"]
|
|
131
131
|
link = l
|
|
132
132
|
end
|
|
133
133
|
end
|
|
@@ -59,14 +59,14 @@ module LinkHeaders
|
|
|
59
59
|
return [[], []]
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
warn "HTTPlinks #{newlinks.inspect}"
|
|
62
|
+
_newlinks = parse_http_link_headers(head)
|
|
63
|
+
# warn "HTTPlinks #{newlinks.inspect}"
|
|
64
64
|
|
|
65
|
-
['text/html','text/xhtml+xml', 'application/xhtml+xml'].each do |format|
|
|
65
|
+
['text/html', 'text/xhtml+xml', 'application/xhtml+xml'].each do |format|
|
|
66
66
|
if head[:content_type] and head[:content_type].match(format)
|
|
67
67
|
warn "found #{format} content - parsing"
|
|
68
|
-
|
|
69
|
-
warn "htmllinks #{htmllinks.inspect}"
|
|
68
|
+
_htmllinks = parse_html_link_headers(body: body, anchor: default_anchor) # pass html body to find HTML link headers
|
|
69
|
+
# warn "htmllinks #{htmllinks.inspect}"
|
|
70
70
|
end
|
|
71
71
|
end
|
|
72
72
|
end
|
|
@@ -99,10 +99,12 @@ module LinkHeaders
|
|
|
99
99
|
# warn "link is: #{part}"
|
|
100
100
|
|
|
101
101
|
section = part.split(';') # ["<https://example.one.com>", "rel='preconnect'"]
|
|
102
|
-
|
|
102
|
+
warn section
|
|
103
103
|
next unless section[0]
|
|
104
104
|
|
|
105
105
|
href = section[0][/<(.*)>/, 1]
|
|
106
|
+
next unless href # this is mandatory!
|
|
107
|
+
|
|
106
108
|
next unless section[1]
|
|
107
109
|
|
|
108
110
|
sections = {}
|
|
@@ -123,10 +125,11 @@ module LinkHeaders
|
|
|
123
125
|
relation = sections['rel']
|
|
124
126
|
sections.delete('rel')
|
|
125
127
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
|
126
|
-
warn "RELATIONS #{relations}"
|
|
128
|
+
# warn "HEADERS RELATIONS #{relations}"
|
|
127
129
|
|
|
128
130
|
relations.each do |rel|
|
|
129
131
|
next unless rel.match?(/\w/)
|
|
132
|
+
puts "LICENCE is #{href}\n\n" if rel == "license"
|
|
130
133
|
newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **sections) # parsed['https://example.one.com'][:rel] = "preconnect"
|
|
131
134
|
end
|
|
132
135
|
end
|
|
@@ -143,7 +146,6 @@ module LinkHeaders
|
|
|
143
146
|
# an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
|
|
144
147
|
newlinks = Array.new
|
|
145
148
|
m.head_links.each do |l|
|
|
146
|
-
warn "HTML head link is: #{l.inspect}"
|
|
147
149
|
next unless l[:href] and l[:rel] # required
|
|
148
150
|
|
|
149
151
|
anchor = l[:anchor] || default_anchor
|
|
@@ -154,7 +156,7 @@ module LinkHeaders
|
|
|
154
156
|
l.delete(:href)
|
|
155
157
|
|
|
156
158
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
|
157
|
-
warn "RELATIONS #{relations}"
|
|
159
|
+
# warn "BODY RELATIONS #{relations}"
|
|
158
160
|
|
|
159
161
|
relations.each do |rel|
|
|
160
162
|
next unless rel.match?(/\w/)
|
|
@@ -189,9 +191,10 @@ module LinkHeaders
|
|
|
189
191
|
|
|
190
192
|
def processJSONLinkset(href:)
|
|
191
193
|
_headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset+json' })
|
|
192
|
-
# warn "Linkset body #{linkset.inspect}"
|
|
194
|
+
# warn "Linkset body #{linkset.inspect}\n\nLinkset headers #{_headers}\n\n"
|
|
193
195
|
newlinks = Array.new
|
|
194
196
|
return nil unless linkset
|
|
197
|
+
# warn "linkset #{linkset}"
|
|
195
198
|
|
|
196
199
|
# linkset = '{ "linkset":
|
|
197
200
|
# [
|
|
@@ -208,20 +211,28 @@ module LinkHeaders
|
|
|
208
211
|
# }'
|
|
209
212
|
|
|
210
213
|
linkset = JSON.parse(linkset)
|
|
214
|
+
# warn "linkset #{linkset}"
|
|
215
|
+
if linkset['data'] and linkset['data']['linkset']
|
|
216
|
+
linkset['linkset'] = linkset['data']['linkset']
|
|
217
|
+
end
|
|
218
|
+
return nil unless linkset['linkset'].first
|
|
211
219
|
linkset['linkset'].each do |ls|
|
|
212
220
|
# warn ls.inspect, "\n"
|
|
213
221
|
anchor = ls['anchor'] || @default_anchor
|
|
214
|
-
ls.delete('anchor') if ls['anchor'] # we need to delete since all others have a list as a value
|
|
222
|
+
ls.delete('anchor') if ls['anchor'] # we need to delete since almost all others have a list as a value
|
|
215
223
|
attrhash = {}
|
|
216
224
|
# warn ls.keys, "\n"
|
|
217
225
|
|
|
218
|
-
ls.each_key do |relation| #
|
|
219
|
-
|
|
226
|
+
ls.each_key do |relation| # relation = e.g. "item", "described-by". "cite"
|
|
227
|
+
href = ""
|
|
228
|
+
# warn relation
|
|
220
229
|
# warn ls[reltype], "\n"
|
|
230
|
+
ls[relation] = [ls[relation]] unless ls[relation].is_a? Array # force it to be a list, if it isn't
|
|
221
231
|
ls[relation].each do |attrs| # attr = e.g. {"href": "http://example.com/foo1", "type": "text/html"}
|
|
232
|
+
# warn "ATTR: #{attrs}"
|
|
222
233
|
next unless attrs['href'] # this is a required attribute of a linkset relation
|
|
223
|
-
|
|
224
234
|
href = attrs['href']
|
|
235
|
+
attrs.delete("href")
|
|
225
236
|
# now go through the other attributes of that relation
|
|
226
237
|
attrs.each do |attr, val| # attr = e.g. "type"; val = "text/html"
|
|
227
238
|
attrhash[attr.to_sym] = val
|
|
@@ -229,7 +240,6 @@ module LinkHeaders
|
|
|
229
240
|
end
|
|
230
241
|
|
|
231
242
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
|
232
|
-
|
|
233
243
|
relations.each do |rel|
|
|
234
244
|
next unless rel.match?(/\w/)
|
|
235
245
|
newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **attrhash) # parsed['https://example.one.com'][:rel] = "preconnect"
|
|
@@ -12,26 +12,53 @@ factory = p.factory # LinkHeaders::LinkFactory
|
|
|
12
12
|
|
|
13
13
|
RSpec.describe LinkHeaders::Processor do
|
|
14
14
|
|
|
15
|
-
it 'has a version number' do
|
|
15
|
+
it 'Benchmark: has a version number' do
|
|
16
16
|
expect(LinkHeaders::Processor::VERSION).not_to be nil
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
it "should find
|
|
20
|
-
expect(factory.all_links.length).to eq
|
|
19
|
+
it "Benchmark: should find 8 links in total" do
|
|
20
|
+
expect(factory.all_links.length).to eq 8
|
|
21
21
|
end
|
|
22
|
-
it "should find find href on all links" do
|
|
23
|
-
expect(factory.all_links.select{|l| l.href}.length).to eq
|
|
22
|
+
it "Benchmark: should find find href on all links" do
|
|
23
|
+
expect(factory.all_links.select{|l| l.href}.length).to eq 8
|
|
24
24
|
end
|
|
25
|
-
it "should find find
|
|
26
|
-
expect(factory.all_links.select{|l| l.anchor}.length).to eq
|
|
25
|
+
it "Benchmark: should find find anchor on all links" do
|
|
26
|
+
expect(factory.all_links.select{|l| l.anchor}.length).to eq 8
|
|
27
27
|
end
|
|
28
|
-
it "should find 5 links in mixed HTTP and HTML headers" do
|
|
29
|
-
expect(factory.all_links.select{|l| l.relation}.length).to eq
|
|
28
|
+
it "Benchmark: should find 5 links in mixed HTTP and HTML headers" do
|
|
29
|
+
expect(factory.all_links.select{|l| l.relation}.length).to eq 8
|
|
30
30
|
end
|
|
31
|
-
it "should find one citeas in mixed HTTP and HTML headers" do
|
|
31
|
+
it "Benchmark: should find one citeas in mixed HTTP and HTML headers" do
|
|
32
32
|
expect(factory.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
|
|
33
33
|
end
|
|
34
|
-
it "should find described-by in mixed HTTP and HTML headers" do
|
|
34
|
+
it "Benchmark: should find described-by in mixed HTTP and HTML headers" do
|
|
35
35
|
expect(factory.all_links.select{|l| l.relation == 'describedby'}.length).to eq 1
|
|
36
36
|
end
|
|
37
|
+
|
|
38
|
+
url2 = "https://doi.org/10.7910/DVN/Z2JD58"
|
|
39
|
+
p2 = LinkHeaders::Processor.new(default_anchor: url2)
|
|
40
|
+
r2 = RestClient.get(url2)
|
|
41
|
+
p2.extract_and_parse(response: r2)
|
|
42
|
+
factory2 = p2.factory # LinkHeaders::LinkFactory
|
|
43
|
+
|
|
44
|
+
it "Dataverse: should find 29 links in total" do
|
|
45
|
+
expect(factory2.all_links.length).to eq 28
|
|
46
|
+
end
|
|
47
|
+
it "Dataverse: should find find href on all links" do
|
|
48
|
+
expect(factory2.all_links.select{|l| l.href}.length).to eq 28
|
|
49
|
+
end
|
|
50
|
+
it "Dataverse: should find find anchor on all links" do
|
|
51
|
+
expect(factory2.all_links.select{|l| l.anchor}.length).to eq 28
|
|
52
|
+
end
|
|
53
|
+
it "Dataverse: should find one citeas" do
|
|
54
|
+
expect(factory2.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
|
|
55
|
+
end
|
|
56
|
+
it "Dataverse: should find 2 described-by" do
|
|
57
|
+
expect(factory2.all_links.select{|l| l.relation == 'describedby'}.length).to eq 2
|
|
58
|
+
end
|
|
59
|
+
it "Dataverse: should find 1 license" do
|
|
60
|
+
expect(factory2.all_links.select{|l| l.relation == 'license'}.length).to eq 1
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
|
|
37
64
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: linkheaders-processor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.19
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Mark Wilkinson
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2023-09-25 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|
|
@@ -168,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
168
168
|
- !ruby/object:Gem::Version
|
|
169
169
|
version: '0'
|
|
170
170
|
requirements: []
|
|
171
|
-
rubygems_version: 3.
|
|
171
|
+
rubygems_version: 3.3.23
|
|
172
172
|
signing_key:
|
|
173
173
|
specification_version: 4
|
|
174
174
|
summary: A parser/processor for Link Headers and Linksets in both JSON and Text formats.
|