linkheaders-processor 0.1.8 → 0.1.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: af390c80d1304df2d885e4bb19ad6be8a95e695b7b88ac2a59762e8f11d17dff
4
- data.tar.gz: f3e90daa90734be50afb722f6023ecb6594c778776c987c0a4bf9b42e4d3aeaa
3
+ metadata.gz: e88dd164547a9a21ce0f1a3ffa85f2af4190ea6a588da445387cdfa2dca7e25d
4
+ data.tar.gz: f800677c8d4cb18e274defb5dbda5d2f58431cab40899ae15cfb2f866fcf8644
5
5
  SHA512:
6
- metadata.gz: 100903ef954dc3b40aaea1f97b285bb5dce59703a968905c9a9a7933416c1e4b847de32d0c03de5c646b3d0ae8d6d9a73ae003c315ee335aac10a956cfc38bfb
7
- data.tar.gz: 71b0b8b7ad489ee6f3db7787fa6de0da3b3bbc40c2f16c53f16586be88233f3c88693c848624bd592d1a923ea9d90e535eaf6841547cade3e96664bdde6cdba4
6
+ metadata.gz: ad4b8814c9ace9def1edd94e53e890c8d534a40d7cad4d55f7dbd0426e1310d29277a1849fd63ebada06fa3c9812a9d189c86ef2635ffe846fe52ff5f4864e2e
7
+ data.tar.gz: 99573e84fa6eb0412a5223cb188f333c37a8dbb3c877fcf5ea403a71bfc18d57d477920c6000b280fbb8cc920548ddd11ea95dcdb0df4f0a896e30a68fe84d9b
data/Gemfile CHANGED
@@ -7,6 +7,4 @@ gemspec
7
7
 
8
8
  gem "rake", "~> 13.0"
9
9
 
10
- gem "rspec", "~> 3.0"
11
-
12
10
  gem "rubocop", "~> 1.21"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- linkheaders-processor (0.1.8)
4
+ linkheaders-processor (0.1.13)
5
5
  json (~> 2.0)
6
6
  json-ld (~> 3.2)
7
7
  json-ld-preloaded (~> 3.2)
@@ -143,7 +143,7 @@ PLATFORMS
143
143
  DEPENDENCIES
144
144
  linkheaders-processor!
145
145
  rake (~> 13.0)
146
- rspec (~> 3.0)
146
+ rspec (~> 3.11)
147
147
  rubocop (~> 1.21)
148
148
 
149
149
  BUNDLED WITH
data/README.md CHANGED
@@ -2,24 +2,24 @@
2
2
 
3
3
  A gem to extract Link Headers from Web responses.
4
4
 
5
- This module handles HTTP Link Headers, HTML Link Headers, and auto-follows links to LinkSets in both JSON and Text format, and processes them also.
5
+ This module handles HTTP Link Headers, HTML Link Headers, and auto-follows links to LinkSets in both JSON and Text format, and processes them also. It also handles some unusual cases, such as having multiple relation types in a single link, or when dealing with 204 or 410 response where there is no message body.
6
6
 
7
7
  ## Installation
8
8
 
9
9
  Install the gem and add to the application's Gemfile by executing:
10
10
 
11
- $ bundle add linkheader-processor
11
+ $ bundle add linkheaders-processor
12
12
 
13
13
  If bundler is not being used to manage dependencies, install the gem by executing:
14
14
 
15
- $ gem install linkheader-processor
15
+ $ gem install linkheaders-processor
16
16
 
17
17
  ## Usage
18
18
 
19
19
 
20
20
  ```
21
21
 
22
- require 'linkheader/processor'
22
+ require 'linkheaders/processor'
23
23
  require 'rest-client'
24
24
 
25
25
  # url1 has http link headers, and a reference to a linkset in json format
@@ -28,27 +28,33 @@ If bundler is not being used to manage dependencies, install the gem by executin
28
28
  # url2 has http link headers, with a reference to a linkset in legacy text format
29
29
  url2 = "https://s11.no/2022/a2a-fair-metrics/28-http-linkset-txt-only/"
30
30
 
31
- p = LinkHeader::Parser.new(default_anchor: url1)
31
+ p = LinkHeaders::Processor.new(default_anchor: url1)
32
32
  r = RestClient.get(url1)
33
33
 
34
34
  p.extract_and_parse(response: r)
35
- factory = p.factory # LinkHeader::LinkFactory
35
+ factory = p.factory # LinkHeaders::LinkFactory
36
36
 
37
37
  factory.all_links.each do |l|
38
38
  puts l.href
39
39
  puts l.relation
40
40
  puts l.responsepart
41
41
 
42
+ # Additional properties are added as other instance methods
43
+ # you can access them as follows:
44
+
42
45
  puts l.linkmethods # returns list of instance methods beyond href and relation, that are attributes of the link
43
46
  l.linkmethods.each do |method|
44
47
  puts "#{method}=" + l.send(method)
45
48
  end
49
+ # or
50
+ puts l.type if l.respond_to? 'type'
46
51
  puts
52
+
47
53
  end
48
54
 
49
55
 
50
56
 
51
- p = LinkHeader::Parser.new(default_anchor: url2)
57
+ p = LinkHeaders::Processor.new(default_anchor: url2)
52
58
  r = RestClient.get(url2)
53
59
 
54
60
  p.extract_and_parse(response: r)
@@ -5,7 +5,7 @@ module LinkHeaders
5
5
  attr_accessor :default_anchor
6
6
  # @return [Array] An array of strings containing any warnings that were encountered when creating the link (e.g. duplicate cite-as but non-identical URLs)
7
7
  attr_accessor :warnings
8
- @@all_links = Array.new
8
+ attr_accessor :all_links
9
9
 
10
10
  #
11
11
  # Create the LinkFacgtory Object
@@ -15,8 +15,10 @@ module LinkHeaders
15
15
  def initialize(default_anchor: 'https://example.org/')
16
16
  @default_anchor = default_anchor
17
17
  @warnings = Array.new
18
+ @all_links = Array.new
18
19
  end
19
20
 
21
+
20
22
  #
21
23
  # Create a new LinkHeader::Link object
22
24
  #
@@ -30,9 +32,13 @@ module LinkHeaders
30
32
  #
31
33
  def new_link(responsepart:, href:, relation:, anchor: @default_anchor, **kwargs)
32
34
  # warn "creating new link with kw #{kwargs}"
33
- link = LinkHeader::Link.new(responsepart: responsepart, factory: self, href: href, anchor: anchor, relation: relation, **kwargs)
35
+ if relation.split(/\s/).length > 1
36
+ @warnings |= ['WARN: the link relation contains spaces. This is allowed by the standard to indicate multiple relations for the same link, but this MUST be processed before creating a LinkHeaders::Link object!']
37
+ end
38
+
39
+ link = LinkHeaders::Link.new(responsepart: responsepart, factory: self, href: href, anchor: anchor, relation: relation, **kwargs)
34
40
  link = sanitycheck(link) # this will add warnings if the link already exists and has a conflict. returns the original of a duplicate
35
- @@all_links |= [link]
41
+ self.all_links |= [link]
36
42
  return link
37
43
  end
38
44
 
@@ -42,7 +48,7 @@ module LinkHeaders
42
48
  # @return [Array] Array of all LinkHeader::Link objects created by the factory so far
43
49
  #
44
50
  def all_links
45
- @@all_links
51
+ @all_links
46
52
  end
47
53
 
48
54
  #
@@ -106,19 +112,21 @@ module LinkHeaders
106
112
  end
107
113
 
108
114
  def sanitycheck(link)
109
- flag = true
115
+ if link.relation == "describedby" and !(link.respond_to? 'type')
116
+ @warnings |= ['WARN: A describedby link should include a "type" attribute, to know the MIME type of the addressed description']
117
+ end
118
+
110
119
  self.all_links.each do |l|
111
120
  if l.relation == "cite-as" and link.relation == "cite-as"
112
121
  if l.href != link.href
113
- @warnings << 'WARN: Found conflicting cite-as relations. This should never happen'
122
+ @warnings |= ['WARN: Found conflicting cite-as relations. This should never happen']
114
123
  end
115
124
  end
116
125
  if l.href == link.href
117
126
  if l.relation != link.relation
118
- @warnings << 'WARN: Found identical hrefs with different relation types. This may be suspicious. Both have been retained'
119
- end
120
- if l.relation = link.relation
121
- @warnings << 'WARN: found apparent duplicate. Ignoring and returning known link'
127
+ @warnings |= ['WARN: Found identical hrefs with different relation types. This may be suspicious. Both have been retained']
128
+ else
129
+ @warnings |= ['WARN: found apparent duplicate. Ignoring and returning known link']
122
130
  link = l
123
131
  end
124
132
  end
@@ -3,6 +3,6 @@
3
3
 
4
4
  module LinkHeaders
5
5
  class Processor
6
- VERSION = "0.1.8"
6
+ VERSION = "0.1.13"
7
7
  end
8
8
  end
@@ -17,7 +17,7 @@ module LinkHeaders
17
17
  #
18
18
  # Works for both HTML and HTTP links, and handles references to Linksets of either JSON or Text types
19
19
  #
20
- class Parser
20
+ class Processor
21
21
  # @return [<Type>] <description>
22
22
  attr_accessor :default_anchor, :factory
23
23
 
@@ -28,7 +28,7 @@ module LinkHeaders
28
28
  #
29
29
  def initialize(default_anchor: 'https://default.anchor.org/')
30
30
  @default_anchor = default_anchor
31
- @factory = LinkHeader::LinkFactory.new(default_anchor: @default_anchor)
31
+ @factory = LinkHeaders::LinkFactory.new(default_anchor: @default_anchor)
32
32
  end
33
33
 
34
34
  #
@@ -60,10 +60,14 @@ module LinkHeaders
60
60
  return [[], []]
61
61
  end
62
62
 
63
- parse_http_link_headers(head) # pass guid to check against anchors in linksets
63
+ newlinks = parse_http_link_headers(head) # pass guid to check against anchors in linksets
64
+ warn "HTTPlinks #{newlinks.inspect}"
65
+
64
66
  HTML_FORMATS['html'].each do |format|
65
67
  if head[:content_type] and head[:content_type].match(format)
68
+ warn "found #{format} content - parsing"
66
69
  htmllinks = parse_html_link_headers(body) # pass html body to find HTML link headers
70
+ warn "htmllinks #{htmllinks.inspect}"
67
71
  end
68
72
  end
69
73
  end
@@ -75,7 +79,7 @@ module LinkHeaders
75
79
  #
76
80
  #
77
81
  def parse_http_link_headers(headers)
78
-
82
+ newlinks = Array.new
79
83
  # Link: <https://example.one.com>; rel="preconnect", <https://example.two.com>; rel="preconnect", <https://example.three.com>; rel="preconnect"
80
84
  links = headers[:link]
81
85
  return [] unless links
@@ -85,11 +89,13 @@ module LinkHeaders
85
89
  # warn parts
86
90
 
87
91
  # Parse each part into a named link
88
- split_http_link_headers(parts) # creates links from the split headers and adds to factory.all_links
89
- check_for_linkset(responsepart: :header) # all links are held in the Linkset::LinkFactory object (factory variable here). This scans the links for a linkset link to follow
92
+ newlinks << split_http_link_headers_and_process(parts) # creates links from the split headers and adds to factory.all_links
93
+ newlinks << check_for_linkset(responsepart: :header) # all links are held in the Linkset::LinkFactory object (factory variable here). This scans the links for a linkset link to follow
94
+ newlinks
90
95
  end
91
96
 
92
- def split_http_link_headers(parts)
97
+ def split_http_link_headers_and_process(parts)
98
+ newlinks = Array.new
93
99
  parts.each do |part, _index|
94
100
  # warn "link is: #{part}"
95
101
 
@@ -117,9 +123,15 @@ module LinkHeaders
117
123
  sections.delete('anchor')
118
124
  relation = sections['rel']
119
125
  sections.delete('rel')
126
+ relations = relation.split(/\s+/) # handle the multiple relation case
127
+ $stderr.puts "RELATIONS #{relations}"
120
128
 
121
- factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: relation, **sections) # parsed['https://example.one.com'][:rel] = "preconnect"
129
+ relations.each do |rel|
130
+ next unless rel.match?(/\w/)
131
+ newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **sections) # parsed['https://example.one.com'][:rel] = "preconnect"
132
+ end
122
133
  end
134
+ newlinks
123
135
  end
124
136
 
125
137
  #
@@ -130,9 +142,9 @@ module LinkHeaders
130
142
  def parse_html_link_headers(body)
131
143
  m = MetaInspector.new('http://example.org', document: body)
132
144
  # an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
133
-
145
+ newlinks = Array.new
134
146
  m.head_links.each do |l|
135
- # warn "link is: #{l}"
147
+ warn "HTML head link is: #{l.inspect}"
136
148
  next unless l[:href] and l[:rel] # required
137
149
 
138
150
  anchor = l[:anchor] || default_anchor
@@ -140,14 +152,23 @@ module LinkHeaders
140
152
  relation = l[:rel]
141
153
  l.delete(:rel)
142
154
  href = l[:href]
143
- l.delete(:href)
144
- factory.new_link(responsepart: :body, anchor: anchor, href: href, relation: relation, **l)
155
+ l.delete(:href)
156
+
157
+ relations = relation.split(/\s+/) # handle the multiple relation case
158
+ $stderr.puts "RELATIONS #{relations}"
159
+
160
+ relations.each do |rel|
161
+ next unless rel.match?(/\w/)
162
+ newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **l) # parsed['https://example.one.com'][:rel] = "preconnect"
163
+ end
145
164
  end
146
- check_for_linkset(responsepart: :body)
165
+ newlinks << check_for_linkset(responsepart: :body)
166
+ newlinks
147
167
  end
148
168
 
149
169
  def check_for_linkset(responsepart:)
150
- # warn "looking for a linkset"
170
+ warn "looking for a linkset"
171
+ newlinks = Array.new
151
172
  factory.linksets.each do |linkset|
152
173
  # warn "found #{linkset.methods- Object.new.methods}"
153
174
  # warn "inspect #{linkset.inspect}"
@@ -156,20 +177,21 @@ module LinkHeaders
156
177
  case linkset.type
157
178
  when 'application/linkset+json'
158
179
  # warn "found a json linkset"
159
- processJSONLinkset(href: linkset.href)
180
+ newlinks << processJSONLinkset(href: linkset.href)
160
181
  when 'application/linkset'
161
182
  # warn "found a text linkset"
162
- processTextLinkset(href:linkset.href)
183
+ newlinks << processTextLinkset(href:linkset.href)
163
184
  else
164
185
  warn "the linkset #{linkset} was not typed as 'application/linkset+json' or 'application/linkset', and it should be! (found #{linkset.type}) Ignoring..."
165
186
  end
166
187
  end
188
+ newlinks
167
189
  end
168
190
 
169
191
  def processJSONLinkset(href:)
170
192
  _headers, linkset = fetch(href, { 'Accept' => 'application/linkset+json' })
171
193
  # warn "Linkset body #{linkset.inspect}"
172
-
194
+ newlinks = Array.new
173
195
  return nil unless linkset
174
196
 
175
197
  # linkset = '{ "linkset":
@@ -194,10 +216,10 @@ module LinkHeaders
194
216
  attrhash = {}
195
217
  # warn ls.keys, "\n"
196
218
 
197
- ls.each_key do |reltype| # key = e.g. "item", "described-by". "cite"
219
+ ls.each_key do |relation| # key = e.g. "item", "described-by". "cite"
198
220
  # warn reltype, "\n"
199
221
  # warn ls[reltype], "\n"
200
- ls[reltype].each do |attrs| # attr = e.g. {"href": "http://example.com/foo1", "type": "text/html"}
222
+ ls[relation].each do |attrs| # attr = e.g. {"href": "http://example.com/foo1", "type": "text/html"}
201
223
  next unless attrs['href'] # this is a required attribute of a linkset relation
202
224
 
203
225
  href = attrs['href']
@@ -206,12 +228,20 @@ module LinkHeaders
206
228
  attrhash[attr.to_sym] = val
207
229
  end
208
230
  end
209
- factory.new_link(responsepart: :linkset, href: href, relation: reltype, anchor: anchor, **attrhash)
231
+
232
+ relations = relation.split(/\s+/) # handle the multiple relation case
233
+
234
+ relations.each do |rel|
235
+ next unless rel.match?(/\w/)
236
+ newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **attrhash) # parsed['https://example.one.com'][:rel] = "preconnect"
237
+ end
210
238
  end
211
239
  end
240
+ newlinks
212
241
  end
213
242
 
214
243
  def processTextLinkset(href:)
244
+ newlinks = Array.new
215
245
  headers, linkset = fetch(href, { 'Accept' => 'application/linkset' })
216
246
  # warn "linkset body #{linkset.inspect}"
217
247
  return {} unless linkset
@@ -237,14 +267,19 @@ module LinkHeaders
237
267
  end
238
268
  warn "No link relation type... this is bad! Skipping" unless attrhash[:rel]
239
269
  next unless attrhash[:rel]
240
- reltype = attrhash[:rel]
270
+ relation = attrhash[:rel]
241
271
  attrhash.delete(:rel)
242
272
  anchor = attrhash[:anchor] || @default_anchor
243
273
  attrhash.delete(:anchor)
244
274
 
245
- factory.new_link(responsepart: :linkset, href: href, relation: reltype, anchor: anchor, **attrhash)
246
- # warn "created #{[href, reltype, anchor, **attrhash]}"
275
+ relations = relation.split(/\s+/) # handle the multiple relation case
276
+ #$stderr.puts "RELATIONS #{relations}"
277
+ relations.each do |rel|
278
+ next unless rel.match?(/\w/)
279
+ newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **attrhash) # parsed['https://example.one.com'][:rel] = "preconnect"
280
+ end
247
281
  end
282
+ newlinks
248
283
  end
249
284
  end
250
285
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkheaders-processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-27 00:00:00.000000000 Z
11
+ date: 2022-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '3.11'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '3.11'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rest-client
29
29
  requirement: !ruby/object:Gem::Requirement