linkheaders-processor 0.1.8 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: af390c80d1304df2d885e4bb19ad6be8a95e695b7b88ac2a59762e8f11d17dff
4
- data.tar.gz: f3e90daa90734be50afb722f6023ecb6594c778776c987c0a4bf9b42e4d3aeaa
3
+ metadata.gz: e88dd164547a9a21ce0f1a3ffa85f2af4190ea6a588da445387cdfa2dca7e25d
4
+ data.tar.gz: f800677c8d4cb18e274defb5dbda5d2f58431cab40899ae15cfb2f866fcf8644
5
5
  SHA512:
6
- metadata.gz: 100903ef954dc3b40aaea1f97b285bb5dce59703a968905c9a9a7933416c1e4b847de32d0c03de5c646b3d0ae8d6d9a73ae003c315ee335aac10a956cfc38bfb
7
- data.tar.gz: 71b0b8b7ad489ee6f3db7787fa6de0da3b3bbc40c2f16c53f16586be88233f3c88693c848624bd592d1a923ea9d90e535eaf6841547cade3e96664bdde6cdba4
6
+ metadata.gz: ad4b8814c9ace9def1edd94e53e890c8d534a40d7cad4d55f7dbd0426e1310d29277a1849fd63ebada06fa3c9812a9d189c86ef2635ffe846fe52ff5f4864e2e
7
+ data.tar.gz: 99573e84fa6eb0412a5223cb188f333c37a8dbb3c877fcf5ea403a71bfc18d57d477920c6000b280fbb8cc920548ddd11ea95dcdb0df4f0a896e30a68fe84d9b
data/Gemfile CHANGED
@@ -7,6 +7,4 @@ gemspec
7
7
 
8
8
  gem "rake", "~> 13.0"
9
9
 
10
- gem "rspec", "~> 3.0"
11
-
12
10
  gem "rubocop", "~> 1.21"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- linkheaders-processor (0.1.8)
4
+ linkheaders-processor (0.1.13)
5
5
  json (~> 2.0)
6
6
  json-ld (~> 3.2)
7
7
  json-ld-preloaded (~> 3.2)
@@ -143,7 +143,7 @@ PLATFORMS
143
143
  DEPENDENCIES
144
144
  linkheaders-processor!
145
145
  rake (~> 13.0)
146
- rspec (~> 3.0)
146
+ rspec (~> 3.11)
147
147
  rubocop (~> 1.21)
148
148
 
149
149
  BUNDLED WITH
data/README.md CHANGED
@@ -2,24 +2,24 @@
2
2
 
3
3
  A gem to extract Link Headers from Web responses.
4
4
 
5
- This module handles HTTP Link Headers, HTML Link Headers, and auto-follows links to LinkSets in both JSON and Text format, and processes them also.
5
+ This module handles HTTP Link Headers, HTML Link Headers, and auto-follows links to LinkSets in both JSON and Text format, and processes them also. It also handles some unusual cases, such as having multiple relation types in a single link, or when dealing with 204 or 410 response where there is no message body.
6
6
 
7
7
  ## Installation
8
8
 
9
9
  Install the gem and add to the application's Gemfile by executing:
10
10
 
11
- $ bundle add linkheader-processor
11
+ $ bundle add linkheaders-processor
12
12
 
13
13
  If bundler is not being used to manage dependencies, install the gem by executing:
14
14
 
15
- $ gem install linkheader-processor
15
+ $ gem install linkheaders-processor
16
16
 
17
17
  ## Usage
18
18
 
19
19
 
20
20
  ```
21
21
 
22
- require 'linkheader/processor'
22
+ require 'linkheaders/processor'
23
23
  require 'rest-client'
24
24
 
25
25
  # url1 has http link headers, and a reference to a linkset in json format
@@ -28,27 +28,33 @@ If bundler is not being used to manage dependencies, install the gem by executin
28
28
  # url2 has http link headers, with a reference to a linkset in legacy text format
29
29
  url2 = "https://s11.no/2022/a2a-fair-metrics/28-http-linkset-txt-only/"
30
30
 
31
- p = LinkHeader::Parser.new(default_anchor: url1)
31
+ p = LinkHeaders::Processor.new(default_anchor: url1)
32
32
  r = RestClient.get(url1)
33
33
 
34
34
  p.extract_and_parse(response: r)
35
- factory = p.factory # LinkHeader::LinkFactory
35
+ factory = p.factory # LinkHeaders::LinkFactory
36
36
 
37
37
  factory.all_links.each do |l|
38
38
  puts l.href
39
39
  puts l.relation
40
40
  puts l.responsepart
41
41
 
42
+ # Additional properties are added as other instance methods
43
+ # you can access them as follows:
44
+
42
45
  puts l.linkmethods # returns list of instance methods beyond href and relation, that are attributes of the link
43
46
  l.linkmethods.each do |method|
44
47
  puts "#{method}=" + l.send(method)
45
48
  end
49
+ # or
50
+ puts l.type if l.respond_to? 'type'
46
51
  puts
52
+
47
53
  end
48
54
 
49
55
 
50
56
 
51
- p = LinkHeader::Parser.new(default_anchor: url2)
57
+ p = LinkHeaders::Processor.new(default_anchor: url2)
52
58
  r = RestClient.get(url2)
53
59
 
54
60
  p.extract_and_parse(response: r)
@@ -5,7 +5,7 @@ module LinkHeaders
5
5
  attr_accessor :default_anchor
6
6
  # @return [Array] An array of strings containing any warnings that were encountered when creating the link (e.g. duplicate cite-as but non-identical URLs)
7
7
  attr_accessor :warnings
8
- @@all_links = Array.new
8
+ attr_accessor :all_links
9
9
 
10
10
  #
11
11
  # Create the LinkFacgtory Object
@@ -15,8 +15,10 @@ module LinkHeaders
15
15
  def initialize(default_anchor: 'https://example.org/')
16
16
  @default_anchor = default_anchor
17
17
  @warnings = Array.new
18
+ @all_links = Array.new
18
19
  end
19
20
 
21
+
20
22
  #
21
23
  # Create a new LinkHeader::Link object
22
24
  #
@@ -30,9 +32,13 @@ module LinkHeaders
30
32
  #
31
33
  def new_link(responsepart:, href:, relation:, anchor: @default_anchor, **kwargs)
32
34
  # warn "creating new link with kw #{kwargs}"
33
- link = LinkHeader::Link.new(responsepart: responsepart, factory: self, href: href, anchor: anchor, relation: relation, **kwargs)
35
+ if relation.split(/\s/).length > 1
36
+ @warnings |= ['WARN: the link relation contains spaces. This is allowed by the standard to indicate multiple relations for the same link, but this MUST be processed before creating a LinkHeaders::Link object!']
37
+ end
38
+
39
+ link = LinkHeaders::Link.new(responsepart: responsepart, factory: self, href: href, anchor: anchor, relation: relation, **kwargs)
34
40
  link = sanitycheck(link) # this will add warnings if the link already exists and has a conflict. returns the original of a duplicate
35
- @@all_links |= [link]
41
+ self.all_links |= [link]
36
42
  return link
37
43
  end
38
44
 
@@ -42,7 +48,7 @@ module LinkHeaders
42
48
  # @return [Array] Array of all LinkHeader::Link objects created by the factory so far
43
49
  #
44
50
  def all_links
45
- @@all_links
51
+ @all_links
46
52
  end
47
53
 
48
54
  #
@@ -106,19 +112,21 @@ module LinkHeaders
106
112
  end
107
113
 
108
114
  def sanitycheck(link)
109
- flag = true
115
+ if link.relation == "describedby" and !(link.respond_to? 'type')
116
+ @warnings |= ['WARN: A describedby link should include a "type" attribute, to know the MIME type of the addressed description']
117
+ end
118
+
110
119
  self.all_links.each do |l|
111
120
  if l.relation == "cite-as" and link.relation == "cite-as"
112
121
  if l.href != link.href
113
- @warnings << 'WARN: Found conflicting cite-as relations. This should never happen'
122
+ @warnings |= ['WARN: Found conflicting cite-as relations. This should never happen']
114
123
  end
115
124
  end
116
125
  if l.href == link.href
117
126
  if l.relation != link.relation
118
- @warnings << 'WARN: Found identical hrefs with different relation types. This may be suspicious. Both have been retained'
119
- end
120
- if l.relation = link.relation
121
- @warnings << 'WARN: found apparent duplicate. Ignoring and returning known link'
127
+ @warnings |= ['WARN: Found identical hrefs with different relation types. This may be suspicious. Both have been retained']
128
+ else
129
+ @warnings |= ['WARN: found apparent duplicate. Ignoring and returning known link']
122
130
  link = l
123
131
  end
124
132
  end
@@ -3,6 +3,6 @@
3
3
 
4
4
  module LinkHeaders
5
5
  class Processor
6
- VERSION = "0.1.8"
6
+ VERSION = "0.1.13"
7
7
  end
8
8
  end
@@ -17,7 +17,7 @@ module LinkHeaders
17
17
  #
18
18
  # Works for both HTML and HTTP links, and handles references to Linksets of either JSON or Text types
19
19
  #
20
- class Parser
20
+ class Processor
21
21
  # @return [<Type>] <description>
22
22
  attr_accessor :default_anchor, :factory
23
23
 
@@ -28,7 +28,7 @@ module LinkHeaders
28
28
  #
29
29
  def initialize(default_anchor: 'https://default.anchor.org/')
30
30
  @default_anchor = default_anchor
31
- @factory = LinkHeader::LinkFactory.new(default_anchor: @default_anchor)
31
+ @factory = LinkHeaders::LinkFactory.new(default_anchor: @default_anchor)
32
32
  end
33
33
 
34
34
  #
@@ -60,10 +60,14 @@ module LinkHeaders
60
60
  return [[], []]
61
61
  end
62
62
 
63
- parse_http_link_headers(head) # pass guid to check against anchors in linksets
63
+ newlinks = parse_http_link_headers(head) # pass guid to check against anchors in linksets
64
+ warn "HTTPlinks #{newlinks.inspect}"
65
+
64
66
  HTML_FORMATS['html'].each do |format|
65
67
  if head[:content_type] and head[:content_type].match(format)
68
+ warn "found #{format} content - parsing"
66
69
  htmllinks = parse_html_link_headers(body) # pass html body to find HTML link headers
70
+ warn "htmllinks #{htmllinks.inspect}"
67
71
  end
68
72
  end
69
73
  end
@@ -75,7 +79,7 @@ module LinkHeaders
75
79
  #
76
80
  #
77
81
  def parse_http_link_headers(headers)
78
-
82
+ newlinks = Array.new
79
83
  # Link: <https://example.one.com>; rel="preconnect", <https://example.two.com>; rel="preconnect", <https://example.three.com>; rel="preconnect"
80
84
  links = headers[:link]
81
85
  return [] unless links
@@ -85,11 +89,13 @@ module LinkHeaders
85
89
  # warn parts
86
90
 
87
91
  # Parse each part into a named link
88
- split_http_link_headers(parts) # creates links from the split headers and adds to factory.all_links
89
- check_for_linkset(responsepart: :header) # all links are held in the Linkset::LinkFactory object (factory variable here). This scans the links for a linkset link to follow
92
+ newlinks << split_http_link_headers_and_process(parts) # creates links from the split headers and adds to factory.all_links
93
+ newlinks << check_for_linkset(responsepart: :header) # all links are held in the Linkset::LinkFactory object (factory variable here). This scans the links for a linkset link to follow
94
+ newlinks
90
95
  end
91
96
 
92
- def split_http_link_headers(parts)
97
+ def split_http_link_headers_and_process(parts)
98
+ newlinks = Array.new
93
99
  parts.each do |part, _index|
94
100
  # warn "link is: #{part}"
95
101
 
@@ -117,9 +123,15 @@ module LinkHeaders
117
123
  sections.delete('anchor')
118
124
  relation = sections['rel']
119
125
  sections.delete('rel')
126
+ relations = relation.split(/\s+/) # handle the multiple relation case
127
+ $stderr.puts "RELATIONS #{relations}"
120
128
 
121
- factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: relation, **sections) # parsed['https://example.one.com'][:rel] = "preconnect"
129
+ relations.each do |rel|
130
+ next unless rel.match?(/\w/)
131
+ newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **sections) # parsed['https://example.one.com'][:rel] = "preconnect"
132
+ end
122
133
  end
134
+ newlinks
123
135
  end
124
136
 
125
137
  #
@@ -130,9 +142,9 @@ module LinkHeaders
130
142
  def parse_html_link_headers(body)
131
143
  m = MetaInspector.new('http://example.org', document: body)
132
144
  # an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
133
-
145
+ newlinks = Array.new
134
146
  m.head_links.each do |l|
135
- # warn "link is: #{l}"
147
+ warn "HTML head link is: #{l.inspect}"
136
148
  next unless l[:href] and l[:rel] # required
137
149
 
138
150
  anchor = l[:anchor] || default_anchor
@@ -140,14 +152,23 @@ module LinkHeaders
140
152
  relation = l[:rel]
141
153
  l.delete(:rel)
142
154
  href = l[:href]
143
- l.delete(:href)
144
- factory.new_link(responsepart: :body, anchor: anchor, href: href, relation: relation, **l)
155
+ l.delete(:href)
156
+
157
+ relations = relation.split(/\s+/) # handle the multiple relation case
158
+ $stderr.puts "RELATIONS #{relations}"
159
+
160
+ relations.each do |rel|
161
+ next unless rel.match?(/\w/)
162
+ newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **l) # parsed['https://example.one.com'][:rel] = "preconnect"
163
+ end
145
164
  end
146
- check_for_linkset(responsepart: :body)
165
+ newlinks << check_for_linkset(responsepart: :body)
166
+ newlinks
147
167
  end
148
168
 
149
169
  def check_for_linkset(responsepart:)
150
- # warn "looking for a linkset"
170
+ warn "looking for a linkset"
171
+ newlinks = Array.new
151
172
  factory.linksets.each do |linkset|
152
173
  # warn "found #{linkset.methods- Object.new.methods}"
153
174
  # warn "inspect #{linkset.inspect}"
@@ -156,20 +177,21 @@ module LinkHeaders
156
177
  case linkset.type
157
178
  when 'application/linkset+json'
158
179
  # warn "found a json linkset"
159
- processJSONLinkset(href: linkset.href)
180
+ newlinks << processJSONLinkset(href: linkset.href)
160
181
  when 'application/linkset'
161
182
  # warn "found a text linkset"
162
- processTextLinkset(href:linkset.href)
183
+ newlinks << processTextLinkset(href:linkset.href)
163
184
  else
164
185
  warn "the linkset #{linkset} was not typed as 'application/linkset+json' or 'application/linkset', and it should be! (found #{linkset.type}) Ignoring..."
165
186
  end
166
187
  end
188
+ newlinks
167
189
  end
168
190
 
169
191
  def processJSONLinkset(href:)
170
192
  _headers, linkset = fetch(href, { 'Accept' => 'application/linkset+json' })
171
193
  # warn "Linkset body #{linkset.inspect}"
172
-
194
+ newlinks = Array.new
173
195
  return nil unless linkset
174
196
 
175
197
  # linkset = '{ "linkset":
@@ -194,10 +216,10 @@ module LinkHeaders
194
216
  attrhash = {}
195
217
  # warn ls.keys, "\n"
196
218
 
197
- ls.each_key do |reltype| # key = e.g. "item", "described-by". "cite"
219
+ ls.each_key do |relation| # key = e.g. "item", "described-by". "cite"
198
220
  # warn reltype, "\n"
199
221
  # warn ls[reltype], "\n"
200
- ls[reltype].each do |attrs| # attr = e.g. {"href": "http://example.com/foo1", "type": "text/html"}
222
+ ls[relation].each do |attrs| # attr = e.g. {"href": "http://example.com/foo1", "type": "text/html"}
201
223
  next unless attrs['href'] # this is a required attribute of a linkset relation
202
224
 
203
225
  href = attrs['href']
@@ -206,12 +228,20 @@ module LinkHeaders
206
228
  attrhash[attr.to_sym] = val
207
229
  end
208
230
  end
209
- factory.new_link(responsepart: :linkset, href: href, relation: reltype, anchor: anchor, **attrhash)
231
+
232
+ relations = relation.split(/\s+/) # handle the multiple relation case
233
+
234
+ relations.each do |rel|
235
+ next unless rel.match?(/\w/)
236
+ newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **attrhash) # parsed['https://example.one.com'][:rel] = "preconnect"
237
+ end
210
238
  end
211
239
  end
240
+ newlinks
212
241
  end
213
242
 
214
243
  def processTextLinkset(href:)
244
+ newlinks = Array.new
215
245
  headers, linkset = fetch(href, { 'Accept' => 'application/linkset' })
216
246
  # warn "linkset body #{linkset.inspect}"
217
247
  return {} unless linkset
@@ -237,14 +267,19 @@ module LinkHeaders
237
267
  end
238
268
  warn "No link relation type... this is bad! Skipping" unless attrhash[:rel]
239
269
  next unless attrhash[:rel]
240
- reltype = attrhash[:rel]
270
+ relation = attrhash[:rel]
241
271
  attrhash.delete(:rel)
242
272
  anchor = attrhash[:anchor] || @default_anchor
243
273
  attrhash.delete(:anchor)
244
274
 
245
- factory.new_link(responsepart: :linkset, href: href, relation: reltype, anchor: anchor, **attrhash)
246
- # warn "created #{[href, reltype, anchor, **attrhash]}"
275
+ relations = relation.split(/\s+/) # handle the multiple relation case
276
+ #$stderr.puts "RELATIONS #{relations}"
277
+ relations.each do |rel|
278
+ next unless rel.match?(/\w/)
279
+ newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **attrhash) # parsed['https://example.one.com'][:rel] = "preconnect"
280
+ end
247
281
  end
282
+ newlinks
248
283
  end
249
284
  end
250
285
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkheaders-processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-27 00:00:00.000000000 Z
11
+ date: 2022-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '3.11'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '3.11'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rest-client
29
29
  requirement: !ruby/object:Gem::Requirement