linkheaders-processor 0.1.18 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8eb95bf3880ef8dba373d47230b512d5209bcde19581c1064c2cb703bab3abf
4
- data.tar.gz: 3ac9096ab4487e30f5e8a78a18cd982f47e232f4dc7e8b9d9573b73ee96b63ad
3
+ metadata.gz: 65747de845763341178717385337c65ed5d78a7df57bccecee17f392d8330b12
4
+ data.tar.gz: 1e2c5ae203200e40e8fc238b211f63491bc2ba4756d3bd807ccb916a7c9b270f
5
5
  SHA512:
6
- metadata.gz: 6de8bcfd72fb78d76483fe9473ece432ccfc6de1cd68eb1ceda5509ec7324e9c010f60a8c996c335c4164cc920a23b046c8ea1a0f9b6edd08f55acfe17e7caca
7
- data.tar.gz: 557b9ff9c6f9da8a7f28d3f01e9079f14caf205fe14b017bcc0aa2902b80a89cd32207e98e4ec2642e9d08aae112c22c6835e0821fb7acc1282a3e620771f0cf
6
+ metadata.gz: a92597e6f649e5abdc524862c051a0a2d442c753976ea5cea70ce0ce4b1c30261d0f0f3e9e3e577ad55c396cad7a780c806f068012ef26a3ae91a7cc8fbb109e
7
+ data.tar.gz: bdd96b086e950c0b427ec8d15ed151723639d72b81d9f3a15adfca22bf7326c51ee9711cf0aa451a3e1d472cb14d8c46d6ced944627058d5e87fa695dbcde6f6
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- linkheaders-processor (0.1.18)
4
+ linkheaders-processor (0.1.19)
5
5
  json (~> 2.0)
6
6
  json-ld (~> 3.2)
7
7
  json-ld-preloaded (~> 3.2)
@@ -13,8 +13,8 @@ PATH
13
13
  GEM
14
14
  remote: https://rubygems.org/
15
15
  specs:
16
- addressable (2.8.0)
17
- public_suffix (>= 2.0.2, < 5.0)
16
+ addressable (2.8.5)
17
+ public_suffix (>= 2.0.2, < 6.0)
18
18
  ast (2.4.2)
19
19
  diff-lcs (1.5.0)
20
20
  domain_name (0.5.20190701)
@@ -39,7 +39,7 @@ GEM
39
39
  faraday-encoding (0.0.5)
40
40
  faraday
41
41
  faraday-excon (1.1.0)
42
- faraday-http-cache (2.4.1)
42
+ faraday-http-cache (2.5.0)
43
43
  faraday (>= 0.8)
44
44
  faraday-httpclient (1.0.1)
45
45
  faraday-multipart (1.0.4)
@@ -51,21 +51,21 @@ GEM
51
51
  faraday-retry (1.0.3)
52
52
  faraday_middleware (1.2.0)
53
53
  faraday (~> 1.0)
54
- fastimage (2.2.6)
54
+ fastimage (2.2.7)
55
55
  htmlentities (4.3.4)
56
56
  http-accept (1.7.0)
57
57
  http-cookie (1.0.5)
58
58
  domain_name (~> 0.5)
59
59
  json (2.6.2)
60
- json-canonicalization (0.3.0)
61
- json-ld (3.2.3)
60
+ json-canonicalization (0.3.2)
61
+ json-ld (3.2.5)
62
62
  htmlentities (~> 4.3)
63
- json-canonicalization (~> 0.3)
63
+ json-canonicalization (~> 0.3, >= 0.3.2)
64
64
  link_header (~> 0.0, >= 0.0.8)
65
65
  multi_json (~> 1.15)
66
- rack (~> 2.2)
67
- rdf (~> 3.2, >= 3.2.9)
68
- json-ld-preloaded (3.2.0)
66
+ rack (>= 2.2, < 4)
67
+ rdf (~> 3.2, >= 3.2.10)
68
+ json-ld-preloaded (3.2.2)
69
69
  json-ld (~> 3.2)
70
70
  rdf (~> 3.2)
71
71
  link_header (0.0.8)
@@ -79,24 +79,24 @@ GEM
79
79
  fastimage (~> 2.2)
80
80
  nesty (~> 1.0)
81
81
  nokogiri (~> 1.11)
82
- mime-types (3.4.1)
82
+ mime-types (3.5.1)
83
83
  mime-types-data (~> 3.2015)
84
- mime-types-data (3.2022.0105)
84
+ mime-types-data (3.2023.0808)
85
85
  multi_json (1.15.0)
86
86
  multipart-post (2.2.3)
87
87
  nesty (1.0.2)
88
88
  netrc (0.11.0)
89
- nokogiri (1.13.8-x86_64-linux)
89
+ nokogiri (1.15.4-x86_64-linux)
90
90
  racc (~> 1.4)
91
91
  parallel (1.22.1)
92
92
  parser (3.1.2.0)
93
93
  ast (~> 2.4.1)
94
- public_suffix (4.0.7)
95
- racc (1.6.0)
96
- rack (2.2.4)
94
+ public_suffix (5.0.3)
95
+ racc (1.7.1)
96
+ rack (3.0.8)
97
97
  rainbow (3.1.1)
98
98
  rake (13.0.6)
99
- rdf (3.2.9)
99
+ rdf (3.2.11)
100
100
  link_header (~> 0.0, >= 0.0.8)
101
101
  regexp_parser (2.5.0)
102
102
  rest-client (2.1.0)
@@ -127,7 +127,7 @@ module LinkHeaders
127
127
  if l.relation != link.relation
128
128
  @warnings |= ['WARN: Found identical hrefs with different relation types. This may be suspicious. Both have been retained']
129
129
  else
130
- @warnings |= ['WARN: found apparent duplicate. Ignoring and returning known link']
130
+ @warnings |= ["WARN: found apparent duplicate #{l.relation} #{l.href} EQUALS#{link.href}. Ignoring and returning known link #{l.relation} #{l.href}"]
131
131
  link = l
132
132
  end
133
133
  end
@@ -3,6 +3,6 @@
3
3
 
4
4
  module LinkHeaders
5
5
  class Processor
6
- VERSION = "0.1.18"
6
+ VERSION = "0.1.19"
7
7
  end
8
8
  end
@@ -59,14 +59,14 @@ module LinkHeaders
59
59
  return [[], []]
60
60
  end
61
61
 
62
- newlinks = parse_http_link_headers(head) # pass guid to check against anchors in linksets
63
- warn "HTTPlinks #{newlinks.inspect}"
62
+ _newlinks = parse_http_link_headers(head)
63
+ # warn "HTTPlinks #{newlinks.inspect}"
64
64
 
65
- ['text/html','text/xhtml+xml', 'application/xhtml+xml'].each do |format|
65
+ ['text/html', 'text/xhtml+xml', 'application/xhtml+xml'].each do |format|
66
66
  if head[:content_type] and head[:content_type].match(format)
67
67
  warn "found #{format} content - parsing"
68
- htmllinks = parse_html_link_headers(body: body, anchor: default_anchor) # pass html body to find HTML link headers
69
- warn "htmllinks #{htmllinks.inspect}"
68
+ _htmllinks = parse_html_link_headers(body: body, anchor: default_anchor) # pass html body to find HTML link headers
69
+ # warn "htmllinks #{htmllinks.inspect}"
70
70
  end
71
71
  end
72
72
  end
@@ -99,10 +99,12 @@ module LinkHeaders
99
99
  # warn "link is: #{part}"
100
100
 
101
101
  section = part.split(';') # ["<https://example.one.com>", "rel='preconnect'"]
102
- # warn section
102
+ warn section
103
103
  next unless section[0]
104
104
 
105
105
  href = section[0][/<(.*)>/, 1]
106
+ next unless href # this is mandatory!
107
+
106
108
  next unless section[1]
107
109
 
108
110
  sections = {}
@@ -123,10 +125,11 @@ module LinkHeaders
123
125
  relation = sections['rel']
124
126
  sections.delete('rel')
125
127
  relations = relation.split(/\s+/) # handle the multiple relation case
126
- warn "RELATIONS #{relations}"
128
+ # warn "HEADERS RELATIONS #{relations}"
127
129
 
128
130
  relations.each do |rel|
129
131
  next unless rel.match?(/\w/)
132
+ puts "LICENCE is #{href}\n\n" if rel == "license"
130
133
  newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **sections) # parsed['https://example.one.com'][:rel] = "preconnect"
131
134
  end
132
135
  end
@@ -143,7 +146,6 @@ module LinkHeaders
143
146
  # an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
144
147
  newlinks = Array.new
145
148
  m.head_links.each do |l|
146
- warn "HTML head link is: #{l.inspect}"
147
149
  next unless l[:href] and l[:rel] # required
148
150
 
149
151
  anchor = l[:anchor] || default_anchor
@@ -154,7 +156,7 @@ module LinkHeaders
154
156
  l.delete(:href)
155
157
 
156
158
  relations = relation.split(/\s+/) # handle the multiple relation case
157
- warn "RELATIONS #{relations}"
159
+ # warn "BODY RELATIONS #{relations}"
158
160
 
159
161
  relations.each do |rel|
160
162
  next unless rel.match?(/\w/)
@@ -189,9 +191,10 @@ module LinkHeaders
189
191
 
190
192
  def processJSONLinkset(href:)
191
193
  _headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset+json' })
192
- # warn "Linkset body #{linkset.inspect}"
194
+ # warn "Linkset body #{linkset.inspect}\n\nLinkset headers #{_headers}\n\n"
193
195
  newlinks = Array.new
194
196
  return nil unless linkset
197
+ # warn "linkset #{linkset}"
195
198
 
196
199
  # linkset = '{ "linkset":
197
200
  # [
@@ -208,20 +211,28 @@ module LinkHeaders
208
211
  # }'
209
212
 
210
213
  linkset = JSON.parse(linkset)
214
+ # warn "linkset #{linkset}"
215
+ if linkset['data'] and linkset['data']['linkset']
216
+ linkset['linkset'] = linkset['data']['linkset']
217
+ end
218
+ return nil unless linkset['linkset'].first
211
219
  linkset['linkset'].each do |ls|
212
220
  # warn ls.inspect, "\n"
213
221
  anchor = ls['anchor'] || @default_anchor
214
- ls.delete('anchor') if ls['anchor'] # we need to delete since all others have a list as a value
222
+ ls.delete('anchor') if ls['anchor'] # we need to delete since almost all others have a list as a value
215
223
  attrhash = {}
216
224
  # warn ls.keys, "\n"
217
225
 
218
- ls.each_key do |relation| # key = e.g. "item", "described-by". "cite"
219
- # warn reltype, "\n"
226
+ ls.each_key do |relation| # relation = e.g. "item", "described-by". "cite"
227
+ href = ""
228
+ # warn relation
220
229
  # warn ls[reltype], "\n"
230
+ ls[relation] = [ls[relation]] unless ls[relation].is_a? Array # force it to be a list, if it isn't
221
231
  ls[relation].each do |attrs| # attr = e.g. {"href": "http://example.com/foo1", "type": "text/html"}
232
+ # warn "ATTR: #{attrs}"
222
233
  next unless attrs['href'] # this is a required attribute of a linkset relation
223
-
224
234
  href = attrs['href']
235
+ attrs.delete("href")
225
236
  # now go through the other attributes of that relation
226
237
  attrs.each do |attr, val| # attr = e.g. "type"; val = "text/html"
227
238
  attrhash[attr.to_sym] = val
@@ -229,7 +240,6 @@ module LinkHeaders
229
240
  end
230
241
 
231
242
  relations = relation.split(/\s+/) # handle the multiple relation case
232
-
233
243
  relations.each do |rel|
234
244
  next unless rel.match?(/\w/)
235
245
  newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **attrhash) # parsed['https://example.one.com'][:rel] = "preconnect"
@@ -12,26 +12,53 @@ factory = p.factory # LinkHeaders::LinkFactory
12
12
 
13
13
  RSpec.describe LinkHeaders::Processor do
14
14
 
15
- it 'has a version number' do
15
+ it 'Benchmark: has a version number' do
16
16
  expect(LinkHeaders::Processor::VERSION).not_to be nil
17
17
  end
18
18
 
19
- it "should find PURL citeas which has described-by and cite-as in mixed HTTP and HTML headers" do
20
- expect(factory.all_links.length).to eq 5
19
+ it "Benchmark: should find 8 links in total" do
20
+ expect(factory.all_links.length).to eq 8
21
21
  end
22
- it "should find find href on all links" do
23
- expect(factory.all_links.select{|l| l.href}.length).to eq 5
22
+ it "Benchmark: should find find href on all links" do
23
+ expect(factory.all_links.select{|l| l.href}.length).to eq 8
24
24
  end
25
- it "should find find href on all links" do
26
- expect(factory.all_links.select{|l| l.anchor}.length).to eq 5
25
+ it "Benchmark: should find find anchor on all links" do
26
+ expect(factory.all_links.select{|l| l.anchor}.length).to eq 8
27
27
  end
28
- it "should find 5 links in mixed HTTP and HTML headers" do
29
- expect(factory.all_links.select{|l| l.relation}.length).to eq 5
28
+ it "Benchmark: should find 5 links in mixed HTTP and HTML headers" do
29
+ expect(factory.all_links.select{|l| l.relation}.length).to eq 8
30
30
  end
31
- it "should find one citeas in mixed HTTP and HTML headers" do
31
+ it "Benchmark: should find one citeas in mixed HTTP and HTML headers" do
32
32
  expect(factory.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
33
33
  end
34
- it "should find described-by in mixed HTTP and HTML headers" do
34
+ it "Benchmark: should find described-by in mixed HTTP and HTML headers" do
35
35
  expect(factory.all_links.select{|l| l.relation == 'describedby'}.length).to eq 1
36
36
  end
37
+
38
+ url2 = "https://doi.org/10.7910/DVN/Z2JD58"
39
+ p2 = LinkHeaders::Processor.new(default_anchor: url2)
40
+ r2 = RestClient.get(url2)
41
+ p2.extract_and_parse(response: r2)
42
+ factory2 = p2.factory # LinkHeaders::LinkFactory
43
+
44
+ it "Dataverse: should find 29 links in total" do
45
+ expect(factory2.all_links.length).to eq 28
46
+ end
47
+ it "Dataverse: should find find href on all links" do
48
+ expect(factory2.all_links.select{|l| l.href}.length).to eq 28
49
+ end
50
+ it "Dataverse: should find find anchor on all links" do
51
+ expect(factory2.all_links.select{|l| l.anchor}.length).to eq 28
52
+ end
53
+ it "Dataverse: should find one citeas" do
54
+ expect(factory2.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
55
+ end
56
+ it "Dataverse: should find 2 described-by" do
57
+ expect(factory2.all_links.select{|l| l.relation == 'describedby'}.length).to eq 2
58
+ end
59
+ it "Dataverse: should find 1 license" do
60
+ expect(factory2.all_links.select{|l| l.relation == 'license'}.length).to eq 1
61
+ end
62
+
63
+
37
64
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkheaders-processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.18
4
+ version: 0.1.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-17 00:00:00.000000000 Z
11
+ date: 2023-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -168,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
168
168
  - !ruby/object:Gem::Version
169
169
  version: '0'
170
170
  requirements: []
171
- rubygems_version: 3.2.28
171
+ rubygems_version: 3.3.23
172
172
  signing_key:
173
173
  specification_version: 4
174
174
  summary: A parser/processor for Link Headers and Linksets in both JSON and Text formats.