linkheaders-processor 0.1.13 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e88dd164547a9a21ce0f1a3ffa85f2af4190ea6a588da445387cdfa2dca7e25d
4
- data.tar.gz: f800677c8d4cb18e274defb5dbda5d2f58431cab40899ae15cfb2f866fcf8644
3
+ metadata.gz: b10f24c0498058f393a2142465f0ff2e559dc2a8721ea8cf6a90178c8ff21789
4
+ data.tar.gz: 5cd760d37f6e82f63cb8271375bec72364264173010fda86e1ae32f1a424642d
5
5
  SHA512:
6
- metadata.gz: ad4b8814c9ace9def1edd94e53e890c8d534a40d7cad4d55f7dbd0426e1310d29277a1849fd63ebada06fa3c9812a9d189c86ef2635ffe846fe52ff5f4864e2e
7
- data.tar.gz: 99573e84fa6eb0412a5223cb188f333c37a8dbb3c877fcf5ea403a71bfc18d57d477920c6000b280fbb8cc920548ddd11ea95dcdb0df4f0a896e30a68fe84d9b
6
+ metadata.gz: e696571f2c9da932ff461af46740824de0728a6accd327ab6e109ab84530d313868d120cff5a62f670655efe6531069d7b6a358f1dcfcb94560cdd647614ff35
7
+ data.tar.gz: cf056d618d352bbcfaa43e59317fbc6f739186ac3e7ae26534e8f12721dec0d7ab2e8edfa07a2434e881a37d1cd18bf30e84f6cfac402fcc74ecd8c86170fa73
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- linkheaders-processor (0.1.13)
4
+ linkheaders-processor (0.1.17)
5
5
  json (~> 2.0)
6
6
  json-ld (~> 3.2)
7
7
  json-ld-preloaded (~> 3.2)
8
+ link_header (~> 0.0.8)
8
9
  metainspector (~> 5.11.2)
9
10
  rest-client (~> 2.1)
10
11
  securerandom (~> 0.1.0)
@@ -18,7 +19,7 @@ GEM
18
19
  diff-lcs (1.5.0)
19
20
  domain_name (0.5.20190701)
20
21
  unf (>= 0.0.5, < 1.0.0)
21
- faraday (1.10.0)
22
+ faraday (1.10.1)
22
23
  faraday-em_http (~> 1.0)
23
24
  faraday-em_synchrony (~> 1.0)
24
25
  faraday-excon (~> 1.1)
@@ -57,13 +58,13 @@ GEM
57
58
  domain_name (~> 0.5)
58
59
  json (2.6.2)
59
60
  json-canonicalization (0.3.0)
60
- json-ld (3.2.1)
61
+ json-ld (3.2.3)
61
62
  htmlentities (~> 4.3)
62
63
  json-canonicalization (~> 0.3)
63
64
  link_header (~> 0.0, >= 0.0.8)
64
65
  multi_json (~> 1.15)
65
66
  rack (~> 2.2)
66
- rdf (~> 3.2)
67
+ rdf (~> 3.2, >= 3.2.9)
67
68
  json-ld-preloaded (3.2.0)
68
69
  json-ld (~> 3.2)
69
70
  rdf (~> 3.2)
@@ -95,7 +96,7 @@ GEM
95
96
  rack (2.2.4)
96
97
  rainbow (3.1.1)
97
98
  rake (13.0.6)
98
- rdf (3.2.8)
99
+ rdf (3.2.9)
99
100
  link_header (~> 0.0, >= 0.0.8)
100
101
  regexp_parser (2.5.0)
101
102
  rest-client (2.1.0)
@@ -117,7 +118,7 @@ GEM
117
118
  diff-lcs (>= 1.2.0, < 2.0)
118
119
  rspec-support (~> 3.11.0)
119
120
  rspec-support (3.11.0)
120
- rubocop (1.32.0)
121
+ rubocop (1.33.0)
121
122
  json (~> 2.3)
122
123
  parallel (~> 1.10)
123
124
  parser (>= 3.1.0.0)
@@ -1,3 +1,4 @@
1
+
1
2
  module LinkHeaders
2
3
  class LinkFactory
3
4
 
@@ -191,5 +192,24 @@ module LinkHeaders
191
192
  self.send("#{k}=", v)
192
193
  end
193
194
  end
195
+
196
+ #
197
+ # Create an HTML version of the link
198
+ # @return [String] HTML version of the Link object
199
+ #
200
+ def to_html
201
+ methods = self.linkmethods
202
+ href = self.href
203
+ rel = self.relation
204
+ anchor = self.anchor
205
+ properties = []
206
+ methods.each do |method|
207
+ value = self.send(method)
208
+ properties << [method, value]
209
+ end
210
+ properties << ["rel", rel]
211
+ properties << ["anchor", anchor]
212
+ LinkHeader::Link.new(href, properties).to_html
213
+ end
194
214
  end
195
215
  end
@@ -3,6 +3,6 @@
3
3
 
4
4
  module LinkHeaders
5
5
  class Processor
6
- VERSION = "0.1.13"
6
+ VERSION = "0.1.17"
7
7
  end
8
8
  end
@@ -1,10 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'processor/version'
4
- require_relative 'constants'
5
4
  require_relative 'link'
6
5
  require_relative 'web_utils'
7
-
6
+ require 'link_header'
8
7
  require 'json'
9
8
  require 'rest-client'
10
9
  require 'securerandom'
@@ -63,10 +62,10 @@ module LinkHeaders
63
62
  newlinks = parse_http_link_headers(head) # pass guid to check against anchors in linksets
64
63
  warn "HTTPlinks #{newlinks.inspect}"
65
64
 
66
- HTML_FORMATS['html'].each do |format|
65
+ ['text/html','text/xhtml+xml', 'application/xhtml+xml'].each do |format|
67
66
  if head[:content_type] and head[:content_type].match(format)
68
67
  warn "found #{format} content - parsing"
69
- htmllinks = parse_html_link_headers(body) # pass html body to find HTML link headers
68
+ htmllinks = parse_html_link_headers(body: body, anchor: default_anchor) # pass html body to find HTML link headers
70
69
  warn "htmllinks #{htmllinks.inspect}"
71
70
  end
72
71
  end
@@ -124,7 +123,7 @@ module LinkHeaders
124
123
  relation = sections['rel']
125
124
  sections.delete('rel')
126
125
  relations = relation.split(/\s+/) # handle the multiple relation case
127
- $stderr.puts "RELATIONS #{relations}"
126
+ warn "RELATIONS #{relations}"
128
127
 
129
128
  relations.each do |rel|
130
129
  next unless rel.match?(/\w/)
@@ -139,8 +138,8 @@ module LinkHeaders
139
138
  #
140
139
  # @param [String] body The HTML of the page containing HTML Link headers
141
140
  #
142
- def parse_html_link_headers(body)
143
- m = MetaInspector.new('http://example.org', document: body)
141
+ def parse_html_link_headers(body:, anchor: '')
142
+ m = MetaInspector.new(anchor, document: body)
144
143
  # an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
145
144
  newlinks = Array.new
146
145
  m.head_links.each do |l|
@@ -155,7 +154,7 @@ module LinkHeaders
155
154
  l.delete(:href)
156
155
 
157
156
  relations = relation.split(/\s+/) # handle the multiple relation case
158
- $stderr.puts "RELATIONS #{relations}"
157
+ warn "RELATIONS #{relations}"
159
158
 
160
159
  relations.each do |rel|
161
160
  next unless rel.match?(/\w/)
@@ -189,7 +188,7 @@ module LinkHeaders
189
188
  end
190
189
 
191
190
  def processJSONLinkset(href:)
192
- _headers, linkset = fetch(href, { 'Accept' => 'application/linkset+json' })
191
+ _headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset+json' })
193
192
  # warn "Linkset body #{linkset.inspect}"
194
193
  newlinks = Array.new
195
194
  return nil unless linkset
@@ -242,7 +241,7 @@ module LinkHeaders
242
241
 
243
242
  def processTextLinkset(href:)
244
243
  newlinks = Array.new
245
- headers, linkset = fetch(href, { 'Accept' => 'application/linkset' })
244
+ headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset' })
246
245
  # warn "linkset body #{linkset.inspect}"
247
246
  return {} unless linkset
248
247
 
@@ -1,4 +1,4 @@
1
- def fetch(url, headers = ACCEPT_ALL_HEADER) # we will try to retrieve turtle whenever possible
1
+ def lhfetch(url, headers = {accept: "*/*"})
2
2
  # warn "In fetch routine now. "
3
3
 
4
4
  # warn "executing call over the Web to #{url.to_s}"
@@ -1,7 +1,37 @@
1
1
  # frozen_string_literal: true
2
+ require_relative '../../lib/linkheaders/processor'
3
+ require 'rest-client'
4
+
5
+
6
+ url1 = "https://w3id.org/a2a-fair-metrics/22-http-html-citeas-describedby-mixed/"
7
+ p = LinkHeaders::Processor.new(default_anchor: url1)
8
+ r = RestClient.get(url1)
9
+ p.extract_and_parse(response: r)
10
+ factory = p.factory # LinkHeaders::LinkFactory
11
+
12
+
13
+ RSpec.describe LinkHeaders::Processor do
2
14
 
3
- RSpec.describe LinkHeader::Parser do
4
15
  it 'has a version number' do
5
- expect(LinkHeader::Parser::VERSION).not_to be nil
16
+ expect(LinkHeaders::Processor::VERSION).not_to be nil
17
+ end
18
+
19
+ it "should find PURL citeas which has described-by and cite-as in mixed HTTP and HTML headers" do
20
+ expect(factory.all_links.length).to eq 5
21
+ end
22
+ it "should find find href on all links" do
23
+ expect(factory.all_links.select{|l| l.href}.length).to eq 5
24
+ end
25
+ it "should find find href on all links" do
26
+ expect(factory.all_links.select{|l| l.anchor}.length).to eq 5
27
+ end
28
+ it "should find 5 links in mixed HTTP and HTML headers" do
29
+ expect(factory.all_links.select{|l| l.relation}.length).to eq 5
30
+ end
31
+ it "should find one citeas in mixed HTTP and HTML headers" do
32
+ expect(factory.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
33
+ end
34
+ it "should find described-by in mixed HTTP and HTML headers" do
35
+ expect(factory.all_links.select{|l| l.relation == 'describedby'}.length).to eq 1
6
36
  end
7
37
  end
data/spec/spec_helper.rb CHANGED
@@ -7,7 +7,7 @@ RSpec.configure do |config|
7
7
  config.example_status_persistence_file_path = ".rspec_status"
8
8
 
9
9
  # Disable RSpec exposing methods globally on `Module` and `main`
10
- config.disable_monkey_patching!
10
+ # config.disable_monkey_patching!
11
11
 
12
12
  config.expect_with :rspec do |c|
13
13
  c.syntax = :expect
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkheaders-processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-02 00:00:00.000000000 Z
11
+ date: 2022-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: 5.11.2
111
+ - !ruby/object:Gem::Dependency
112
+ name: link_header
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.0.8
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.0.8
111
125
  description: A parser/processor for Link Headers and Linksets in both JSON and Text
112
126
  formats.
113
127
  email:
@@ -123,7 +137,6 @@ files:
123
137
  - README.md
124
138
  - Rakefile
125
139
  - launch.json
126
- - lib/linkheaders/constants.rb
127
140
  - lib/linkheaders/link.rb
128
141
  - lib/linkheaders/processor.rb
129
142
  - lib/linkheaders/processor/version.rb
@@ -1,28 +0,0 @@
1
- ACCEPT_ALL_HEADER = {'Accept' => 'text/turtle, application/ld+json, application/rdf+xml, text/xhtml+xml, application/n3, application/rdf+n3, application/turtle, application/x-turtle, text/n3, text/turtle, text/rdf+n3, text/rdf+turtle, application/n-triples' }
2
-
3
- TEXT_FORMATS = {
4
- 'text' => ['text/plain',],
5
- }
6
-
7
- RDF_FORMATS = {
8
- 'jsonld' => ['application/ld+json', 'application/vnd.schemaorg.ld+json'], # NEW FOR DATACITE
9
- 'turtle' => ['text/turtle','application/n3','application/rdf+n3',
10
- 'application/turtle', 'application/x-turtle','text/n3','text/turtle',
11
- 'text/rdf+n3', 'text/rdf+turtle'],
12
- #'rdfa' => ['text/xhtml+xml', 'application/xhtml+xml'],
13
- 'rdfxml' => ['application/rdf+xml'],
14
- 'triples' => ['application/n-triples','application/n-quads', 'application/trig']
15
- }
16
-
17
- XML_FORMATS = {
18
- 'xml' => ['text/xhtml','text/xml',]
19
- }
20
-
21
- HTML_FORMATS = {
22
- 'html' => ['text/html','text/xhtml+xml', 'application/xhtml+xml']
23
- }
24
-
25
- JSON_FORMATS = {
26
- 'json' => ['application/json',]
27
- }
28
-