linkheaders-processor 0.1.18 → 0.1.20
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +25 -23
- data/README.md +1 -1
- data/lib/linkheaders/link.rb +1 -1
- data/lib/linkheaders/processor/version.rb +1 -1
- data/lib/linkheaders/processor.rb +25 -15
- data/spec/linkheader/parser_spec.rb +38 -11
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d24a4ec87d13cc6f5fb9ba02ffa92f898782ee520ce4e4df19c424fc1bf52133
|
4
|
+
data.tar.gz: 5f95faf4fc44d7334df1cd2311c0d59aa3cd079fa0d5de92ffa793b1b280e69f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95e6929649e7f9e04b91de7e01f261204846325d65d54059ea4219b634723b29b64138bcfaf487959be2588a66dbf6a29d73dc836123331fc7e7f5b0335f32fc
|
7
|
+
data.tar.gz: dce8353665fa1110dbeacc2efe4e98614425e17ac431a936fc3b96cbf7b0af1d8346c25997adea42753b4efa6de341a2a2d9eb74bb774a3a564c9b50093c6a21
|
data/Gemfile.lock
CHANGED
@@ -1,25 +1,26 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
linkheaders-processor (0.1.
|
4
|
+
linkheaders-processor (0.1.20)
|
5
5
|
json (~> 2.0)
|
6
6
|
json-ld (~> 3.2)
|
7
7
|
json-ld-preloaded (~> 3.2)
|
8
8
|
link_header (~> 0.0.8)
|
9
9
|
metainspector (~> 5.11.2)
|
10
10
|
rest-client (~> 2.1)
|
11
|
-
securerandom
|
11
|
+
securerandom
|
12
12
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
addressable (2.8.
|
17
|
-
public_suffix (>= 2.0.2, <
|
16
|
+
addressable (2.8.5)
|
17
|
+
public_suffix (>= 2.0.2, < 6.0)
|
18
18
|
ast (2.4.2)
|
19
|
+
bcp47_spec (0.2.1)
|
19
20
|
diff-lcs (1.5.0)
|
20
21
|
domain_name (0.5.20190701)
|
21
22
|
unf (>= 0.0.5, < 1.0.0)
|
22
|
-
faraday (1.10.
|
23
|
+
faraday (1.10.3)
|
23
24
|
faraday-em_http (~> 1.0)
|
24
25
|
faraday-em_synchrony (~> 1.0)
|
25
26
|
faraday-excon (~> 1.1)
|
@@ -39,7 +40,7 @@ GEM
|
|
39
40
|
faraday-encoding (0.0.5)
|
40
41
|
faraday
|
41
42
|
faraday-excon (1.1.0)
|
42
|
-
faraday-http-cache (2.
|
43
|
+
faraday-http-cache (2.5.0)
|
43
44
|
faraday (>= 0.8)
|
44
45
|
faraday-httpclient (1.0.1)
|
45
46
|
faraday-multipart (1.0.4)
|
@@ -51,23 +52,23 @@ GEM
|
|
51
52
|
faraday-retry (1.0.3)
|
52
53
|
faraday_middleware (1.2.0)
|
53
54
|
faraday (~> 1.0)
|
54
|
-
fastimage (2.2.
|
55
|
+
fastimage (2.2.7)
|
55
56
|
htmlentities (4.3.4)
|
56
57
|
http-accept (1.7.0)
|
57
58
|
http-cookie (1.0.5)
|
58
59
|
domain_name (~> 0.5)
|
59
60
|
json (2.6.2)
|
60
|
-
json-canonicalization (0.3.
|
61
|
-
json-ld (3.
|
61
|
+
json-canonicalization (0.3.2)
|
62
|
+
json-ld (3.3.0)
|
62
63
|
htmlentities (~> 4.3)
|
63
|
-
json-canonicalization (~> 0.3)
|
64
|
+
json-canonicalization (~> 0.3, >= 0.3.2)
|
64
65
|
link_header (~> 0.0, >= 0.0.8)
|
65
66
|
multi_json (~> 1.15)
|
66
|
-
rack (
|
67
|
-
rdf (~> 3.
|
68
|
-
json-ld-preloaded (3.
|
69
|
-
json-ld (~> 3.
|
70
|
-
rdf (~> 3.
|
67
|
+
rack (>= 2.2, < 4)
|
68
|
+
rdf (~> 3.3)
|
69
|
+
json-ld-preloaded (3.3.0)
|
70
|
+
json-ld (~> 3.3)
|
71
|
+
rdf (~> 3.3)
|
71
72
|
link_header (0.0.8)
|
72
73
|
metainspector (5.11.2)
|
73
74
|
addressable (~> 2.7)
|
@@ -79,24 +80,25 @@ GEM
|
|
79
80
|
fastimage (~> 2.2)
|
80
81
|
nesty (~> 1.0)
|
81
82
|
nokogiri (~> 1.11)
|
82
|
-
mime-types (3.
|
83
|
+
mime-types (3.5.1)
|
83
84
|
mime-types-data (~> 3.2015)
|
84
|
-
mime-types-data (3.
|
85
|
+
mime-types-data (3.2023.0808)
|
85
86
|
multi_json (1.15.0)
|
86
|
-
multipart-post (2.
|
87
|
+
multipart-post (2.3.0)
|
87
88
|
nesty (1.0.2)
|
88
89
|
netrc (0.11.0)
|
89
|
-
nokogiri (1.
|
90
|
+
nokogiri (1.15.4-x86_64-linux)
|
90
91
|
racc (~> 1.4)
|
91
92
|
parallel (1.22.1)
|
92
93
|
parser (3.1.2.0)
|
93
94
|
ast (~> 2.4.1)
|
94
|
-
public_suffix (
|
95
|
-
racc (1.
|
96
|
-
rack (
|
95
|
+
public_suffix (5.0.3)
|
96
|
+
racc (1.7.1)
|
97
|
+
rack (3.0.8)
|
97
98
|
rainbow (3.1.1)
|
98
99
|
rake (13.0.6)
|
99
|
-
rdf (3.
|
100
|
+
rdf (3.3.1)
|
101
|
+
bcp47_spec (~> 0.2)
|
100
102
|
link_header (~> 0.0, >= 0.0.8)
|
101
103
|
regexp_parser (2.5.0)
|
102
104
|
rest-client (2.1.0)
|
data/README.md
CHANGED
data/lib/linkheaders/link.rb
CHANGED
@@ -127,7 +127,7 @@ module LinkHeaders
|
|
127
127
|
if l.relation != link.relation
|
128
128
|
@warnings |= ['WARN: Found identical hrefs with different relation types. This may be suspicious. Both have been retained']
|
129
129
|
else
|
130
|
-
@warnings |= [
|
130
|
+
@warnings |= ["WARN: found apparent duplicate #{l.relation} #{l.href} EQUALS#{link.href}. Ignoring and returning known link #{l.relation} #{l.href}"]
|
131
131
|
link = l
|
132
132
|
end
|
133
133
|
end
|
@@ -59,14 +59,14 @@ module LinkHeaders
|
|
59
59
|
return [[], []]
|
60
60
|
end
|
61
61
|
|
62
|
-
|
63
|
-
warn "HTTPlinks #{newlinks.inspect}"
|
62
|
+
_newlinks = parse_http_link_headers(head)
|
63
|
+
# warn "HTTPlinks #{newlinks.inspect}"
|
64
64
|
|
65
|
-
['text/html','text/xhtml+xml', 'application/xhtml+xml'].each do |format|
|
65
|
+
['text/html', 'text/xhtml+xml', 'application/xhtml+xml'].each do |format|
|
66
66
|
if head[:content_type] and head[:content_type].match(format)
|
67
67
|
warn "found #{format} content - parsing"
|
68
|
-
|
69
|
-
warn "htmllinks #{htmllinks.inspect}"
|
68
|
+
_htmllinks = parse_html_link_headers(body: body, anchor: default_anchor) # pass html body to find HTML link headers
|
69
|
+
# warn "htmllinks #{htmllinks.inspect}"
|
70
70
|
end
|
71
71
|
end
|
72
72
|
end
|
@@ -99,10 +99,12 @@ module LinkHeaders
|
|
99
99
|
# warn "link is: #{part}"
|
100
100
|
|
101
101
|
section = part.split(';') # ["<https://example.one.com>", "rel='preconnect'"]
|
102
|
-
|
102
|
+
warn section
|
103
103
|
next unless section[0]
|
104
104
|
|
105
105
|
href = section[0][/<(.*)>/, 1]
|
106
|
+
next unless href # this is mandatory!
|
107
|
+
|
106
108
|
next unless section[1]
|
107
109
|
|
108
110
|
sections = {}
|
@@ -123,10 +125,11 @@ module LinkHeaders
|
|
123
125
|
relation = sections['rel']
|
124
126
|
sections.delete('rel')
|
125
127
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
126
|
-
warn "RELATIONS #{relations}"
|
128
|
+
# warn "HEADERS RELATIONS #{relations}"
|
127
129
|
|
128
130
|
relations.each do |rel|
|
129
131
|
next unless rel.match?(/\w/)
|
132
|
+
puts "LICENCE is #{href}\n\n" if rel == "license"
|
130
133
|
newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **sections) # parsed['https://example.one.com'][:rel] = "preconnect"
|
131
134
|
end
|
132
135
|
end
|
@@ -143,7 +146,6 @@ module LinkHeaders
|
|
143
146
|
# an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
|
144
147
|
newlinks = Array.new
|
145
148
|
m.head_links.each do |l|
|
146
|
-
warn "HTML head link is: #{l.inspect}"
|
147
149
|
next unless l[:href] and l[:rel] # required
|
148
150
|
|
149
151
|
anchor = l[:anchor] || default_anchor
|
@@ -154,7 +156,7 @@ module LinkHeaders
|
|
154
156
|
l.delete(:href)
|
155
157
|
|
156
158
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
157
|
-
warn "RELATIONS #{relations}"
|
159
|
+
# warn "BODY RELATIONS #{relations}"
|
158
160
|
|
159
161
|
relations.each do |rel|
|
160
162
|
next unless rel.match?(/\w/)
|
@@ -189,9 +191,10 @@ module LinkHeaders
|
|
189
191
|
|
190
192
|
def processJSONLinkset(href:)
|
191
193
|
_headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset+json' })
|
192
|
-
# warn "Linkset body #{linkset.inspect}"
|
194
|
+
# warn "Linkset body #{linkset.inspect}\n\nLinkset headers #{_headers}\n\n"
|
193
195
|
newlinks = Array.new
|
194
196
|
return nil unless linkset
|
197
|
+
# warn "linkset #{linkset}"
|
195
198
|
|
196
199
|
# linkset = '{ "linkset":
|
197
200
|
# [
|
@@ -208,20 +211,28 @@ module LinkHeaders
|
|
208
211
|
# }'
|
209
212
|
|
210
213
|
linkset = JSON.parse(linkset)
|
214
|
+
# warn "linkset #{linkset}"
|
215
|
+
if linkset['data'] and linkset['data']['linkset']
|
216
|
+
linkset['linkset'] = linkset['data']['linkset']
|
217
|
+
end
|
218
|
+
return nil unless linkset['linkset'].first
|
211
219
|
linkset['linkset'].each do |ls|
|
212
220
|
# warn ls.inspect, "\n"
|
213
221
|
anchor = ls['anchor'] || @default_anchor
|
214
|
-
ls.delete('anchor') if ls['anchor'] # we need to delete since all others have a list as a value
|
222
|
+
ls.delete('anchor') if ls['anchor'] # we need to delete since almost all others have a list as a value
|
215
223
|
attrhash = {}
|
216
224
|
# warn ls.keys, "\n"
|
217
225
|
|
218
|
-
ls.each_key do |relation| #
|
219
|
-
|
226
|
+
ls.each_key do |relation| # relation = e.g. "item", "described-by". "cite"
|
227
|
+
href = ""
|
228
|
+
# warn relation
|
220
229
|
# warn ls[reltype], "\n"
|
230
|
+
ls[relation] = [ls[relation]] unless ls[relation].is_a? Array # force it to be a list, if it isn't
|
221
231
|
ls[relation].each do |attrs| # attr = e.g. {"href": "http://example.com/foo1", "type": "text/html"}
|
232
|
+
# warn "ATTR: #{attrs}"
|
222
233
|
next unless attrs['href'] # this is a required attribute of a linkset relation
|
223
|
-
|
224
234
|
href = attrs['href']
|
235
|
+
attrs.delete("href")
|
225
236
|
# now go through the other attributes of that relation
|
226
237
|
attrs.each do |attr, val| # attr = e.g. "type"; val = "text/html"
|
227
238
|
attrhash[attr.to_sym] = val
|
@@ -229,7 +240,6 @@ module LinkHeaders
|
|
229
240
|
end
|
230
241
|
|
231
242
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
232
|
-
|
233
243
|
relations.each do |rel|
|
234
244
|
next unless rel.match?(/\w/)
|
235
245
|
newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **attrhash) # parsed['https://example.one.com'][:rel] = "preconnect"
|
@@ -12,26 +12,53 @@ factory = p.factory # LinkHeaders::LinkFactory
|
|
12
12
|
|
13
13
|
RSpec.describe LinkHeaders::Processor do
|
14
14
|
|
15
|
-
it 'has a version number' do
|
15
|
+
it 'Benchmark: has a version number' do
|
16
16
|
expect(LinkHeaders::Processor::VERSION).not_to be nil
|
17
17
|
end
|
18
18
|
|
19
|
-
it "should find
|
20
|
-
expect(factory.all_links.length).to eq
|
19
|
+
it "Benchmark: should find 8 links in total" do
|
20
|
+
expect(factory.all_links.length).to eq 8
|
21
21
|
end
|
22
|
-
it "should find find href on all links" do
|
23
|
-
expect(factory.all_links.select{|l| l.href}.length).to eq
|
22
|
+
it "Benchmark: should find find href on all links" do
|
23
|
+
expect(factory.all_links.select{|l| l.href}.length).to eq 8
|
24
24
|
end
|
25
|
-
it "should find find
|
26
|
-
expect(factory.all_links.select{|l| l.anchor}.length).to eq
|
25
|
+
it "Benchmark: should find find anchor on all links" do
|
26
|
+
expect(factory.all_links.select{|l| l.anchor}.length).to eq 8
|
27
27
|
end
|
28
|
-
it "should find 5 links in mixed HTTP and HTML headers" do
|
29
|
-
expect(factory.all_links.select{|l| l.relation}.length).to eq
|
28
|
+
it "Benchmark: should find 5 links in mixed HTTP and HTML headers" do
|
29
|
+
expect(factory.all_links.select{|l| l.relation}.length).to eq 8
|
30
30
|
end
|
31
|
-
it "should find one citeas in mixed HTTP and HTML headers" do
|
31
|
+
it "Benchmark: should find one citeas in mixed HTTP and HTML headers" do
|
32
32
|
expect(factory.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
|
33
33
|
end
|
34
|
-
it "should find described-by in mixed HTTP and HTML headers" do
|
34
|
+
it "Benchmark: should find described-by in mixed HTTP and HTML headers" do
|
35
35
|
expect(factory.all_links.select{|l| l.relation == 'describedby'}.length).to eq 1
|
36
36
|
end
|
37
|
+
|
38
|
+
url2 = "https://doi.org/10.7910/DVN/Z2JD58"
|
39
|
+
p2 = LinkHeaders::Processor.new(default_anchor: url2)
|
40
|
+
r2 = RestClient.get(url2)
|
41
|
+
p2.extract_and_parse(response: r2)
|
42
|
+
factory2 = p2.factory # LinkHeaders::LinkFactory
|
43
|
+
|
44
|
+
it "Dataverse: should find 29 links in total" do
|
45
|
+
expect(factory2.all_links.length).to eq 28
|
46
|
+
end
|
47
|
+
it "Dataverse: should find find href on all links" do
|
48
|
+
expect(factory2.all_links.select{|l| l.href}.length).to eq 28
|
49
|
+
end
|
50
|
+
it "Dataverse: should find find anchor on all links" do
|
51
|
+
expect(factory2.all_links.select{|l| l.anchor}.length).to eq 28
|
52
|
+
end
|
53
|
+
it "Dataverse: should find one citeas" do
|
54
|
+
expect(factory2.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
|
55
|
+
end
|
56
|
+
it "Dataverse: should find 2 described-by" do
|
57
|
+
expect(factory2.all_links.select{|l| l.relation == 'describedby'}.length).to eq 2
|
58
|
+
end
|
59
|
+
it "Dataverse: should find 1 license" do
|
60
|
+
expect(factory2.all_links.select{|l| l.relation == 'license'}.length).to eq 1
|
61
|
+
end
|
62
|
+
|
63
|
+
|
37
64
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkheaders-processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-09-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -84,16 +84,16 @@ dependencies:
|
|
84
84
|
name: securerandom
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0
|
89
|
+
version: '0'
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0
|
96
|
+
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: metainspector
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -153,6 +153,7 @@ metadata:
|
|
153
153
|
homepage_uri: https://github.com/markwilkinson/linkheader-processor
|
154
154
|
source_code_uri: https://github.com/markwilkinson/linkheader-processor
|
155
155
|
changelog_uri: https://github.com/markwilkinson/linkheader-processor/blob/master/CHANGELOG.md
|
156
|
+
documentation_uri: https://www.rubydoc.info/gems/linkheaders-processor/
|
156
157
|
post_install_message:
|
157
158
|
rdoc_options: []
|
158
159
|
require_paths:
|
@@ -168,7 +169,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
168
169
|
- !ruby/object:Gem::Version
|
169
170
|
version: '0'
|
170
171
|
requirements: []
|
171
|
-
rubygems_version: 3.
|
172
|
+
rubygems_version: 3.3.23
|
172
173
|
signing_key:
|
173
174
|
specification_version: 4
|
174
175
|
summary: A parser/processor for Link Headers and Linksets in both JSON and Text formats.
|