linkheaders-processor 0.1.13 → 0.1.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e88dd164547a9a21ce0f1a3ffa85f2af4190ea6a588da445387cdfa2dca7e25d
4
- data.tar.gz: f800677c8d4cb18e274defb5dbda5d2f58431cab40899ae15cfb2f866fcf8644
3
+ metadata.gz: b10f24c0498058f393a2142465f0ff2e559dc2a8721ea8cf6a90178c8ff21789
4
+ data.tar.gz: 5cd760d37f6e82f63cb8271375bec72364264173010fda86e1ae32f1a424642d
5
5
  SHA512:
6
- metadata.gz: ad4b8814c9ace9def1edd94e53e890c8d534a40d7cad4d55f7dbd0426e1310d29277a1849fd63ebada06fa3c9812a9d189c86ef2635ffe846fe52ff5f4864e2e
7
- data.tar.gz: 99573e84fa6eb0412a5223cb188f333c37a8dbb3c877fcf5ea403a71bfc18d57d477920c6000b280fbb8cc920548ddd11ea95dcdb0df4f0a896e30a68fe84d9b
6
+ metadata.gz: e696571f2c9da932ff461af46740824de0728a6accd327ab6e109ab84530d313868d120cff5a62f670655efe6531069d7b6a358f1dcfcb94560cdd647614ff35
7
+ data.tar.gz: cf056d618d352bbcfaa43e59317fbc6f739186ac3e7ae26534e8f12721dec0d7ab2e8edfa07a2434e881a37d1cd18bf30e84f6cfac402fcc74ecd8c86170fa73
data/Gemfile.lock CHANGED
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- linkheaders-processor (0.1.13)
4
+ linkheaders-processor (0.1.17)
5
5
  json (~> 2.0)
6
6
  json-ld (~> 3.2)
7
7
  json-ld-preloaded (~> 3.2)
8
+ link_header (~> 0.0.8)
8
9
  metainspector (~> 5.11.2)
9
10
  rest-client (~> 2.1)
10
11
  securerandom (~> 0.1.0)
@@ -18,7 +19,7 @@ GEM
18
19
  diff-lcs (1.5.0)
19
20
  domain_name (0.5.20190701)
20
21
  unf (>= 0.0.5, < 1.0.0)
21
- faraday (1.10.0)
22
+ faraday (1.10.1)
22
23
  faraday-em_http (~> 1.0)
23
24
  faraday-em_synchrony (~> 1.0)
24
25
  faraday-excon (~> 1.1)
@@ -57,13 +58,13 @@ GEM
57
58
  domain_name (~> 0.5)
58
59
  json (2.6.2)
59
60
  json-canonicalization (0.3.0)
60
- json-ld (3.2.1)
61
+ json-ld (3.2.3)
61
62
  htmlentities (~> 4.3)
62
63
  json-canonicalization (~> 0.3)
63
64
  link_header (~> 0.0, >= 0.0.8)
64
65
  multi_json (~> 1.15)
65
66
  rack (~> 2.2)
66
- rdf (~> 3.2)
67
+ rdf (~> 3.2, >= 3.2.9)
67
68
  json-ld-preloaded (3.2.0)
68
69
  json-ld (~> 3.2)
69
70
  rdf (~> 3.2)
@@ -95,7 +96,7 @@ GEM
95
96
  rack (2.2.4)
96
97
  rainbow (3.1.1)
97
98
  rake (13.0.6)
98
- rdf (3.2.8)
99
+ rdf (3.2.9)
99
100
  link_header (~> 0.0, >= 0.0.8)
100
101
  regexp_parser (2.5.0)
101
102
  rest-client (2.1.0)
@@ -117,7 +118,7 @@ GEM
117
118
  diff-lcs (>= 1.2.0, < 2.0)
118
119
  rspec-support (~> 3.11.0)
119
120
  rspec-support (3.11.0)
120
- rubocop (1.32.0)
121
+ rubocop (1.33.0)
121
122
  json (~> 2.3)
122
123
  parallel (~> 1.10)
123
124
  parser (>= 3.1.0.0)
@@ -1,3 +1,4 @@
1
+
1
2
  module LinkHeaders
2
3
  class LinkFactory
3
4
 
@@ -191,5 +192,24 @@ module LinkHeaders
191
192
  self.send("#{k}=", v)
192
193
  end
193
194
  end
195
+
196
+ #
197
+ # Create an HTML version of the link
198
+ # @return [String] HTML version of the Link object
199
+ #
200
+ def to_html
201
+ methods = self.linkmethods
202
+ href = self.href
203
+ rel = self.relation
204
+ anchor = self.anchor
205
+ properties = []
206
+ methods.each do |method|
207
+ value = self.send(method)
208
+ properties << [method, value]
209
+ end
210
+ properties << ["rel", rel]
211
+ properties << ["anchor", anchor]
212
+ LinkHeader::Link.new(href, properties).to_html
213
+ end
194
214
  end
195
215
  end
@@ -3,6 +3,6 @@
3
3
 
4
4
  module LinkHeaders
5
5
  class Processor
6
- VERSION = "0.1.13"
6
+ VERSION = "0.1.17"
7
7
  end
8
8
  end
@@ -1,10 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'processor/version'
4
- require_relative 'constants'
5
4
  require_relative 'link'
6
5
  require_relative 'web_utils'
7
-
6
+ require 'link_header'
8
7
  require 'json'
9
8
  require 'rest-client'
10
9
  require 'securerandom'
@@ -63,10 +62,10 @@ module LinkHeaders
63
62
  newlinks = parse_http_link_headers(head) # pass guid to check against anchors in linksets
64
63
  warn "HTTPlinks #{newlinks.inspect}"
65
64
 
66
- HTML_FORMATS['html'].each do |format|
65
+ ['text/html','text/xhtml+xml', 'application/xhtml+xml'].each do |format|
67
66
  if head[:content_type] and head[:content_type].match(format)
68
67
  warn "found #{format} content - parsing"
69
- htmllinks = parse_html_link_headers(body) # pass html body to find HTML link headers
68
+ htmllinks = parse_html_link_headers(body: body, anchor: default_anchor) # pass html body to find HTML link headers
70
69
  warn "htmllinks #{htmllinks.inspect}"
71
70
  end
72
71
  end
@@ -124,7 +123,7 @@ module LinkHeaders
124
123
  relation = sections['rel']
125
124
  sections.delete('rel')
126
125
  relations = relation.split(/\s+/) # handle the multiple relation case
127
- $stderr.puts "RELATIONS #{relations}"
126
+ warn "RELATIONS #{relations}"
128
127
 
129
128
  relations.each do |rel|
130
129
  next unless rel.match?(/\w/)
@@ -139,8 +138,8 @@ module LinkHeaders
139
138
  #
140
139
  # @param [String] body The HTML of the page containing HTML Link headers
141
140
  #
142
- def parse_html_link_headers(body)
143
- m = MetaInspector.new('http://example.org', document: body)
141
+ def parse_html_link_headers(body:, anchor: '')
142
+ m = MetaInspector.new(anchor, document: body)
144
143
  # an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
145
144
  newlinks = Array.new
146
145
  m.head_links.each do |l|
@@ -155,7 +154,7 @@ module LinkHeaders
155
154
  l.delete(:href)
156
155
 
157
156
  relations = relation.split(/\s+/) # handle the multiple relation case
158
- $stderr.puts "RELATIONS #{relations}"
157
+ warn "RELATIONS #{relations}"
159
158
 
160
159
  relations.each do |rel|
161
160
  next unless rel.match?(/\w/)
@@ -189,7 +188,7 @@ module LinkHeaders
189
188
  end
190
189
 
191
190
  def processJSONLinkset(href:)
192
- _headers, linkset = fetch(href, { 'Accept' => 'application/linkset+json' })
191
+ _headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset+json' })
193
192
  # warn "Linkset body #{linkset.inspect}"
194
193
  newlinks = Array.new
195
194
  return nil unless linkset
@@ -242,7 +241,7 @@ module LinkHeaders
242
241
 
243
242
  def processTextLinkset(href:)
244
243
  newlinks = Array.new
245
- headers, linkset = fetch(href, { 'Accept' => 'application/linkset' })
244
+ headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset' })
246
245
  # warn "linkset body #{linkset.inspect}"
247
246
  return {} unless linkset
248
247
 
@@ -1,4 +1,4 @@
1
- def fetch(url, headers = ACCEPT_ALL_HEADER) # we will try to retrieve turtle whenever possible
1
+ def lhfetch(url, headers = {accept: "*/*"})
2
2
  # warn "In fetch routine now. "
3
3
 
4
4
  # warn "executing call over the Web to #{url.to_s}"
@@ -1,7 +1,37 @@
1
1
  # frozen_string_literal: true
2
+ require_relative '../../lib/linkheaders/processor'
3
+ require 'rest-client'
4
+
5
+
6
+ url1 = "https://w3id.org/a2a-fair-metrics/22-http-html-citeas-describedby-mixed/"
7
+ p = LinkHeaders::Processor.new(default_anchor: url1)
8
+ r = RestClient.get(url1)
9
+ p.extract_and_parse(response: r)
10
+ factory = p.factory # LinkHeaders::LinkFactory
11
+
12
+
13
+ RSpec.describe LinkHeaders::Processor do
2
14
 
3
- RSpec.describe LinkHeader::Parser do
4
15
  it 'has a version number' do
5
- expect(LinkHeader::Parser::VERSION).not_to be nil
16
+ expect(LinkHeaders::Processor::VERSION).not_to be nil
17
+ end
18
+
19
+ it "should find PURL citeas which has described-by and cite-as in mixed HTTP and HTML headers" do
20
+ expect(factory.all_links.length).to eq 5
21
+ end
22
+ it "should find find href on all links" do
23
+ expect(factory.all_links.select{|l| l.href}.length).to eq 5
24
+ end
25
+ it "should find find href on all links" do
26
+ expect(factory.all_links.select{|l| l.anchor}.length).to eq 5
27
+ end
28
+ it "should find 5 links in mixed HTTP and HTML headers" do
29
+ expect(factory.all_links.select{|l| l.relation}.length).to eq 5
30
+ end
31
+ it "should find one citeas in mixed HTTP and HTML headers" do
32
+ expect(factory.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
33
+ end
34
+ it "should find described-by in mixed HTTP and HTML headers" do
35
+ expect(factory.all_links.select{|l| l.relation == 'describedby'}.length).to eq 1
6
36
  end
7
37
  end
data/spec/spec_helper.rb CHANGED
@@ -7,7 +7,7 @@ RSpec.configure do |config|
7
7
  config.example_status_persistence_file_path = ".rspec_status"
8
8
 
9
9
  # Disable RSpec exposing methods globally on `Module` and `main`
10
- config.disable_monkey_patching!
10
+ # config.disable_monkey_patching!
11
11
 
12
12
  config.expect_with :rspec do |c|
13
13
  c.syntax = :expect
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkheaders-processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-02 00:00:00.000000000 Z
11
+ date: 2022-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: 5.11.2
111
+ - !ruby/object:Gem::Dependency
112
+ name: link_header
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.0.8
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.0.8
111
125
  description: A parser/processor for Link Headers and Linksets in both JSON and Text
112
126
  formats.
113
127
  email:
@@ -123,7 +137,6 @@ files:
123
137
  - README.md
124
138
  - Rakefile
125
139
  - launch.json
126
- - lib/linkheaders/constants.rb
127
140
  - lib/linkheaders/link.rb
128
141
  - lib/linkheaders/processor.rb
129
142
  - lib/linkheaders/processor/version.rb
@@ -1,28 +0,0 @@
1
- ACCEPT_ALL_HEADER = {'Accept' => 'text/turtle, application/ld+json, application/rdf+xml, text/xhtml+xml, application/n3, application/rdf+n3, application/turtle, application/x-turtle, text/n3, text/turtle, text/rdf+n3, text/rdf+turtle, application/n-triples' }
2
-
3
- TEXT_FORMATS = {
4
- 'text' => ['text/plain',],
5
- }
6
-
7
- RDF_FORMATS = {
8
- 'jsonld' => ['application/ld+json', 'application/vnd.schemaorg.ld+json'], # NEW FOR DATACITE
9
- 'turtle' => ['text/turtle','application/n3','application/rdf+n3',
10
- 'application/turtle', 'application/x-turtle','text/n3','text/turtle',
11
- 'text/rdf+n3', 'text/rdf+turtle'],
12
- #'rdfa' => ['text/xhtml+xml', 'application/xhtml+xml'],
13
- 'rdfxml' => ['application/rdf+xml'],
14
- 'triples' => ['application/n-triples','application/n-quads', 'application/trig']
15
- }
16
-
17
- XML_FORMATS = {
18
- 'xml' => ['text/xhtml','text/xml',]
19
- }
20
-
21
- HTML_FORMATS = {
22
- 'html' => ['text/html','text/xhtml+xml', 'application/xhtml+xml']
23
- }
24
-
25
- JSON_FORMATS = {
26
- 'json' => ['application/json',]
27
- }
28
-