linkheaders-processor 0.1.13 → 0.1.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -6
- data/lib/linkheaders/link.rb +20 -0
- data/lib/linkheaders/processor/version.rb +1 -1
- data/lib/linkheaders/processor.rb +9 -10
- data/lib/linkheaders/web_utils.rb +1 -1
- data/spec/linkheader/parser_spec.rb +32 -2
- data/spec/spec_helper.rb +1 -1
- metadata +16 -3
- data/lib/linkheaders/constants.rb +0 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b10f24c0498058f393a2142465f0ff2e559dc2a8721ea8cf6a90178c8ff21789
|
4
|
+
data.tar.gz: 5cd760d37f6e82f63cb8271375bec72364264173010fda86e1ae32f1a424642d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e696571f2c9da932ff461af46740824de0728a6accd327ab6e109ab84530d313868d120cff5a62f670655efe6531069d7b6a358f1dcfcb94560cdd647614ff35
|
7
|
+
data.tar.gz: cf056d618d352bbcfaa43e59317fbc6f739186ac3e7ae26534e8f12721dec0d7ab2e8edfa07a2434e881a37d1cd18bf30e84f6cfac402fcc74ecd8c86170fa73
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
linkheaders-processor (0.1.
|
4
|
+
linkheaders-processor (0.1.17)
|
5
5
|
json (~> 2.0)
|
6
6
|
json-ld (~> 3.2)
|
7
7
|
json-ld-preloaded (~> 3.2)
|
8
|
+
link_header (~> 0.0.8)
|
8
9
|
metainspector (~> 5.11.2)
|
9
10
|
rest-client (~> 2.1)
|
10
11
|
securerandom (~> 0.1.0)
|
@@ -18,7 +19,7 @@ GEM
|
|
18
19
|
diff-lcs (1.5.0)
|
19
20
|
domain_name (0.5.20190701)
|
20
21
|
unf (>= 0.0.5, < 1.0.0)
|
21
|
-
faraday (1.10.
|
22
|
+
faraday (1.10.1)
|
22
23
|
faraday-em_http (~> 1.0)
|
23
24
|
faraday-em_synchrony (~> 1.0)
|
24
25
|
faraday-excon (~> 1.1)
|
@@ -57,13 +58,13 @@ GEM
|
|
57
58
|
domain_name (~> 0.5)
|
58
59
|
json (2.6.2)
|
59
60
|
json-canonicalization (0.3.0)
|
60
|
-
json-ld (3.2.
|
61
|
+
json-ld (3.2.3)
|
61
62
|
htmlentities (~> 4.3)
|
62
63
|
json-canonicalization (~> 0.3)
|
63
64
|
link_header (~> 0.0, >= 0.0.8)
|
64
65
|
multi_json (~> 1.15)
|
65
66
|
rack (~> 2.2)
|
66
|
-
rdf (~> 3.2)
|
67
|
+
rdf (~> 3.2, >= 3.2.9)
|
67
68
|
json-ld-preloaded (3.2.0)
|
68
69
|
json-ld (~> 3.2)
|
69
70
|
rdf (~> 3.2)
|
@@ -95,7 +96,7 @@ GEM
|
|
95
96
|
rack (2.2.4)
|
96
97
|
rainbow (3.1.1)
|
97
98
|
rake (13.0.6)
|
98
|
-
rdf (3.2.
|
99
|
+
rdf (3.2.9)
|
99
100
|
link_header (~> 0.0, >= 0.0.8)
|
100
101
|
regexp_parser (2.5.0)
|
101
102
|
rest-client (2.1.0)
|
@@ -117,7 +118,7 @@ GEM
|
|
117
118
|
diff-lcs (>= 1.2.0, < 2.0)
|
118
119
|
rspec-support (~> 3.11.0)
|
119
120
|
rspec-support (3.11.0)
|
120
|
-
rubocop (1.
|
121
|
+
rubocop (1.33.0)
|
121
122
|
json (~> 2.3)
|
122
123
|
parallel (~> 1.10)
|
123
124
|
parser (>= 3.1.0.0)
|
data/lib/linkheaders/link.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
|
1
2
|
module LinkHeaders
|
2
3
|
class LinkFactory
|
3
4
|
|
@@ -191,5 +192,24 @@ module LinkHeaders
|
|
191
192
|
self.send("#{k}=", v)
|
192
193
|
end
|
193
194
|
end
|
195
|
+
|
196
|
+
#
|
197
|
+
# Create an HTML version of the link
|
198
|
+
# @return [String] HTML version of the Link object
|
199
|
+
#
|
200
|
+
def to_html
|
201
|
+
methods = self.linkmethods
|
202
|
+
href = self.href
|
203
|
+
rel = self.relation
|
204
|
+
anchor = self.anchor
|
205
|
+
properties = []
|
206
|
+
methods.each do |method|
|
207
|
+
value = self.send(method)
|
208
|
+
properties << [method, value]
|
209
|
+
end
|
210
|
+
properties << ["rel", rel]
|
211
|
+
properties << ["anchor", anchor]
|
212
|
+
LinkHeader::Link.new(href, properties).to_html
|
213
|
+
end
|
194
214
|
end
|
195
215
|
end
|
@@ -1,10 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'processor/version'
|
4
|
-
require_relative 'constants'
|
5
4
|
require_relative 'link'
|
6
5
|
require_relative 'web_utils'
|
7
|
-
|
6
|
+
require 'link_header'
|
8
7
|
require 'json'
|
9
8
|
require 'rest-client'
|
10
9
|
require 'securerandom'
|
@@ -63,10 +62,10 @@ module LinkHeaders
|
|
63
62
|
newlinks = parse_http_link_headers(head) # pass guid to check against anchors in linksets
|
64
63
|
warn "HTTPlinks #{newlinks.inspect}"
|
65
64
|
|
66
|
-
|
65
|
+
['text/html','text/xhtml+xml', 'application/xhtml+xml'].each do |format|
|
67
66
|
if head[:content_type] and head[:content_type].match(format)
|
68
67
|
warn "found #{format} content - parsing"
|
69
|
-
htmllinks = parse_html_link_headers(body) # pass html body to find HTML link headers
|
68
|
+
htmllinks = parse_html_link_headers(body: body, anchor: default_anchor) # pass html body to find HTML link headers
|
70
69
|
warn "htmllinks #{htmllinks.inspect}"
|
71
70
|
end
|
72
71
|
end
|
@@ -124,7 +123,7 @@ module LinkHeaders
|
|
124
123
|
relation = sections['rel']
|
125
124
|
sections.delete('rel')
|
126
125
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
127
|
-
|
126
|
+
warn "RELATIONS #{relations}"
|
128
127
|
|
129
128
|
relations.each do |rel|
|
130
129
|
next unless rel.match?(/\w/)
|
@@ -139,8 +138,8 @@ module LinkHeaders
|
|
139
138
|
#
|
140
139
|
# @param [String] body The HTML of the page containing HTML Link headers
|
141
140
|
#
|
142
|
-
def parse_html_link_headers(body)
|
143
|
-
m = MetaInspector.new(
|
141
|
+
def parse_html_link_headers(body:, anchor: '')
|
142
|
+
m = MetaInspector.new(anchor, document: body)
|
144
143
|
# an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
|
145
144
|
newlinks = Array.new
|
146
145
|
m.head_links.each do |l|
|
@@ -155,7 +154,7 @@ module LinkHeaders
|
|
155
154
|
l.delete(:href)
|
156
155
|
|
157
156
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
158
|
-
|
157
|
+
warn "RELATIONS #{relations}"
|
159
158
|
|
160
159
|
relations.each do |rel|
|
161
160
|
next unless rel.match?(/\w/)
|
@@ -189,7 +188,7 @@ module LinkHeaders
|
|
189
188
|
end
|
190
189
|
|
191
190
|
def processJSONLinkset(href:)
|
192
|
-
_headers, linkset =
|
191
|
+
_headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset+json' })
|
193
192
|
# warn "Linkset body #{linkset.inspect}"
|
194
193
|
newlinks = Array.new
|
195
194
|
return nil unless linkset
|
@@ -242,7 +241,7 @@ module LinkHeaders
|
|
242
241
|
|
243
242
|
def processTextLinkset(href:)
|
244
243
|
newlinks = Array.new
|
245
|
-
headers, linkset =
|
244
|
+
headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset' })
|
246
245
|
# warn "linkset body #{linkset.inspect}"
|
247
246
|
return {} unless linkset
|
248
247
|
|
@@ -1,7 +1,37 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require_relative '../../lib/linkheaders/processor'
|
3
|
+
require 'rest-client'
|
4
|
+
|
5
|
+
|
6
|
+
url1 = "https://w3id.org/a2a-fair-metrics/22-http-html-citeas-describedby-mixed/"
|
7
|
+
p = LinkHeaders::Processor.new(default_anchor: url1)
|
8
|
+
r = RestClient.get(url1)
|
9
|
+
p.extract_and_parse(response: r)
|
10
|
+
factory = p.factory # LinkHeaders::LinkFactory
|
11
|
+
|
12
|
+
|
13
|
+
RSpec.describe LinkHeaders::Processor do
|
2
14
|
|
3
|
-
RSpec.describe LinkHeader::Parser do
|
4
15
|
it 'has a version number' do
|
5
|
-
expect(
|
16
|
+
expect(LinkHeaders::Processor::VERSION).not_to be nil
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should find PURL citeas which has described-by and cite-as in mixed HTTP and HTML headers" do
|
20
|
+
expect(factory.all_links.length).to eq 5
|
21
|
+
end
|
22
|
+
it "should find find href on all links" do
|
23
|
+
expect(factory.all_links.select{|l| l.href}.length).to eq 5
|
24
|
+
end
|
25
|
+
it "should find find href on all links" do
|
26
|
+
expect(factory.all_links.select{|l| l.anchor}.length).to eq 5
|
27
|
+
end
|
28
|
+
it "should find 5 links in mixed HTTP and HTML headers" do
|
29
|
+
expect(factory.all_links.select{|l| l.relation}.length).to eq 5
|
30
|
+
end
|
31
|
+
it "should find one citeas in mixed HTTP and HTML headers" do
|
32
|
+
expect(factory.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
|
33
|
+
end
|
34
|
+
it "should find described-by in mixed HTTP and HTML headers" do
|
35
|
+
expect(factory.all_links.select{|l| l.relation == 'describedby'}.length).to eq 1
|
6
36
|
end
|
7
37
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -7,7 +7,7 @@ RSpec.configure do |config|
|
|
7
7
|
config.example_status_persistence_file_path = ".rspec_status"
|
8
8
|
|
9
9
|
# Disable RSpec exposing methods globally on `Module` and `main`
|
10
|
-
config.disable_monkey_patching!
|
10
|
+
# config.disable_monkey_patching!
|
11
11
|
|
12
12
|
config.expect_with :rspec do |c|
|
13
13
|
c.syntax = :expect
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkheaders-processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 5.11.2
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: link_header
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.0.8
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.0.8
|
111
125
|
description: A parser/processor for Link Headers and Linksets in both JSON and Text
|
112
126
|
formats.
|
113
127
|
email:
|
@@ -123,7 +137,6 @@ files:
|
|
123
137
|
- README.md
|
124
138
|
- Rakefile
|
125
139
|
- launch.json
|
126
|
-
- lib/linkheaders/constants.rb
|
127
140
|
- lib/linkheaders/link.rb
|
128
141
|
- lib/linkheaders/processor.rb
|
129
142
|
- lib/linkheaders/processor/version.rb
|
@@ -1,28 +0,0 @@
|
|
1
|
-
ACCEPT_ALL_HEADER = {'Accept' => 'text/turtle, application/ld+json, application/rdf+xml, text/xhtml+xml, application/n3, application/rdf+n3, application/turtle, application/x-turtle, text/n3, text/turtle, text/rdf+n3, text/rdf+turtle, application/n-triples' }
|
2
|
-
|
3
|
-
TEXT_FORMATS = {
|
4
|
-
'text' => ['text/plain',],
|
5
|
-
}
|
6
|
-
|
7
|
-
RDF_FORMATS = {
|
8
|
-
'jsonld' => ['application/ld+json', 'application/vnd.schemaorg.ld+json'], # NEW FOR DATACITE
|
9
|
-
'turtle' => ['text/turtle','application/n3','application/rdf+n3',
|
10
|
-
'application/turtle', 'application/x-turtle','text/n3','text/turtle',
|
11
|
-
'text/rdf+n3', 'text/rdf+turtle'],
|
12
|
-
#'rdfa' => ['text/xhtml+xml', 'application/xhtml+xml'],
|
13
|
-
'rdfxml' => ['application/rdf+xml'],
|
14
|
-
'triples' => ['application/n-triples','application/n-quads', 'application/trig']
|
15
|
-
}
|
16
|
-
|
17
|
-
XML_FORMATS = {
|
18
|
-
'xml' => ['text/xhtml','text/xml',]
|
19
|
-
}
|
20
|
-
|
21
|
-
HTML_FORMATS = {
|
22
|
-
'html' => ['text/html','text/xhtml+xml', 'application/xhtml+xml']
|
23
|
-
}
|
24
|
-
|
25
|
-
JSON_FORMATS = {
|
26
|
-
'json' => ['application/json',]
|
27
|
-
}
|
28
|
-
|