linkheaders-processor 0.1.13 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -6
- data/lib/linkheaders/link.rb +20 -0
- data/lib/linkheaders/processor/version.rb +1 -1
- data/lib/linkheaders/processor.rb +9 -10
- data/lib/linkheaders/web_utils.rb +1 -1
- data/spec/linkheader/parser_spec.rb +32 -2
- data/spec/spec_helper.rb +1 -1
- metadata +16 -3
- data/lib/linkheaders/constants.rb +0 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b10f24c0498058f393a2142465f0ff2e559dc2a8721ea8cf6a90178c8ff21789
|
4
|
+
data.tar.gz: 5cd760d37f6e82f63cb8271375bec72364264173010fda86e1ae32f1a424642d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e696571f2c9da932ff461af46740824de0728a6accd327ab6e109ab84530d313868d120cff5a62f670655efe6531069d7b6a358f1dcfcb94560cdd647614ff35
|
7
|
+
data.tar.gz: cf056d618d352bbcfaa43e59317fbc6f739186ac3e7ae26534e8f12721dec0d7ab2e8edfa07a2434e881a37d1cd18bf30e84f6cfac402fcc74ecd8c86170fa73
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
linkheaders-processor (0.1.
|
4
|
+
linkheaders-processor (0.1.17)
|
5
5
|
json (~> 2.0)
|
6
6
|
json-ld (~> 3.2)
|
7
7
|
json-ld-preloaded (~> 3.2)
|
8
|
+
link_header (~> 0.0.8)
|
8
9
|
metainspector (~> 5.11.2)
|
9
10
|
rest-client (~> 2.1)
|
10
11
|
securerandom (~> 0.1.0)
|
@@ -18,7 +19,7 @@ GEM
|
|
18
19
|
diff-lcs (1.5.0)
|
19
20
|
domain_name (0.5.20190701)
|
20
21
|
unf (>= 0.0.5, < 1.0.0)
|
21
|
-
faraday (1.10.
|
22
|
+
faraday (1.10.1)
|
22
23
|
faraday-em_http (~> 1.0)
|
23
24
|
faraday-em_synchrony (~> 1.0)
|
24
25
|
faraday-excon (~> 1.1)
|
@@ -57,13 +58,13 @@ GEM
|
|
57
58
|
domain_name (~> 0.5)
|
58
59
|
json (2.6.2)
|
59
60
|
json-canonicalization (0.3.0)
|
60
|
-
json-ld (3.2.
|
61
|
+
json-ld (3.2.3)
|
61
62
|
htmlentities (~> 4.3)
|
62
63
|
json-canonicalization (~> 0.3)
|
63
64
|
link_header (~> 0.0, >= 0.0.8)
|
64
65
|
multi_json (~> 1.15)
|
65
66
|
rack (~> 2.2)
|
66
|
-
rdf (~> 3.2)
|
67
|
+
rdf (~> 3.2, >= 3.2.9)
|
67
68
|
json-ld-preloaded (3.2.0)
|
68
69
|
json-ld (~> 3.2)
|
69
70
|
rdf (~> 3.2)
|
@@ -95,7 +96,7 @@ GEM
|
|
95
96
|
rack (2.2.4)
|
96
97
|
rainbow (3.1.1)
|
97
98
|
rake (13.0.6)
|
98
|
-
rdf (3.2.
|
99
|
+
rdf (3.2.9)
|
99
100
|
link_header (~> 0.0, >= 0.0.8)
|
100
101
|
regexp_parser (2.5.0)
|
101
102
|
rest-client (2.1.0)
|
@@ -117,7 +118,7 @@ GEM
|
|
117
118
|
diff-lcs (>= 1.2.0, < 2.0)
|
118
119
|
rspec-support (~> 3.11.0)
|
119
120
|
rspec-support (3.11.0)
|
120
|
-
rubocop (1.
|
121
|
+
rubocop (1.33.0)
|
121
122
|
json (~> 2.3)
|
122
123
|
parallel (~> 1.10)
|
123
124
|
parser (>= 3.1.0.0)
|
data/lib/linkheaders/link.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
|
1
2
|
module LinkHeaders
|
2
3
|
class LinkFactory
|
3
4
|
|
@@ -191,5 +192,24 @@ module LinkHeaders
|
|
191
192
|
self.send("#{k}=", v)
|
192
193
|
end
|
193
194
|
end
|
195
|
+
|
196
|
+
#
|
197
|
+
# Create an HTML version of the link
|
198
|
+
# @return [String] HTML version of the Link object
|
199
|
+
#
|
200
|
+
def to_html
|
201
|
+
methods = self.linkmethods
|
202
|
+
href = self.href
|
203
|
+
rel = self.relation
|
204
|
+
anchor = self.anchor
|
205
|
+
properties = []
|
206
|
+
methods.each do |method|
|
207
|
+
value = self.send(method)
|
208
|
+
properties << [method, value]
|
209
|
+
end
|
210
|
+
properties << ["rel", rel]
|
211
|
+
properties << ["anchor", anchor]
|
212
|
+
LinkHeader::Link.new(href, properties).to_html
|
213
|
+
end
|
194
214
|
end
|
195
215
|
end
|
@@ -1,10 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'processor/version'
|
4
|
-
require_relative 'constants'
|
5
4
|
require_relative 'link'
|
6
5
|
require_relative 'web_utils'
|
7
|
-
|
6
|
+
require 'link_header'
|
8
7
|
require 'json'
|
9
8
|
require 'rest-client'
|
10
9
|
require 'securerandom'
|
@@ -63,10 +62,10 @@ module LinkHeaders
|
|
63
62
|
newlinks = parse_http_link_headers(head) # pass guid to check against anchors in linksets
|
64
63
|
warn "HTTPlinks #{newlinks.inspect}"
|
65
64
|
|
66
|
-
|
65
|
+
['text/html','text/xhtml+xml', 'application/xhtml+xml'].each do |format|
|
67
66
|
if head[:content_type] and head[:content_type].match(format)
|
68
67
|
warn "found #{format} content - parsing"
|
69
|
-
htmllinks = parse_html_link_headers(body) # pass html body to find HTML link headers
|
68
|
+
htmllinks = parse_html_link_headers(body: body, anchor: default_anchor) # pass html body to find HTML link headers
|
70
69
|
warn "htmllinks #{htmllinks.inspect}"
|
71
70
|
end
|
72
71
|
end
|
@@ -124,7 +123,7 @@ module LinkHeaders
|
|
124
123
|
relation = sections['rel']
|
125
124
|
sections.delete('rel')
|
126
125
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
127
|
-
|
126
|
+
warn "RELATIONS #{relations}"
|
128
127
|
|
129
128
|
relations.each do |rel|
|
130
129
|
next unless rel.match?(/\w/)
|
@@ -139,8 +138,8 @@ module LinkHeaders
|
|
139
138
|
#
|
140
139
|
# @param [String] body The HTML of the page containing HTML Link headers
|
141
140
|
#
|
142
|
-
def parse_html_link_headers(body)
|
143
|
-
m = MetaInspector.new(
|
141
|
+
def parse_html_link_headers(body:, anchor: '')
|
142
|
+
m = MetaInspector.new(anchor, document: body)
|
144
143
|
# an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
|
145
144
|
newlinks = Array.new
|
146
145
|
m.head_links.each do |l|
|
@@ -155,7 +154,7 @@ module LinkHeaders
|
|
155
154
|
l.delete(:href)
|
156
155
|
|
157
156
|
relations = relation.split(/\s+/) # handle the multiple relation case
|
158
|
-
|
157
|
+
warn "RELATIONS #{relations}"
|
159
158
|
|
160
159
|
relations.each do |rel|
|
161
160
|
next unless rel.match?(/\w/)
|
@@ -189,7 +188,7 @@ module LinkHeaders
|
|
189
188
|
end
|
190
189
|
|
191
190
|
def processJSONLinkset(href:)
|
192
|
-
_headers, linkset =
|
191
|
+
_headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset+json' })
|
193
192
|
# warn "Linkset body #{linkset.inspect}"
|
194
193
|
newlinks = Array.new
|
195
194
|
return nil unless linkset
|
@@ -242,7 +241,7 @@ module LinkHeaders
|
|
242
241
|
|
243
242
|
def processTextLinkset(href:)
|
244
243
|
newlinks = Array.new
|
245
|
-
headers, linkset =
|
244
|
+
headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset' })
|
246
245
|
# warn "linkset body #{linkset.inspect}"
|
247
246
|
return {} unless linkset
|
248
247
|
|
@@ -1,7 +1,37 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require_relative '../../lib/linkheaders/processor'
|
3
|
+
require 'rest-client'
|
4
|
+
|
5
|
+
|
6
|
+
url1 = "https://w3id.org/a2a-fair-metrics/22-http-html-citeas-describedby-mixed/"
|
7
|
+
p = LinkHeaders::Processor.new(default_anchor: url1)
|
8
|
+
r = RestClient.get(url1)
|
9
|
+
p.extract_and_parse(response: r)
|
10
|
+
factory = p.factory # LinkHeaders::LinkFactory
|
11
|
+
|
12
|
+
|
13
|
+
RSpec.describe LinkHeaders::Processor do
|
2
14
|
|
3
|
-
RSpec.describe LinkHeader::Parser do
|
4
15
|
it 'has a version number' do
|
5
|
-
expect(
|
16
|
+
expect(LinkHeaders::Processor::VERSION).not_to be nil
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should find PURL citeas which has described-by and cite-as in mixed HTTP and HTML headers" do
|
20
|
+
expect(factory.all_links.length).to eq 5
|
21
|
+
end
|
22
|
+
it "should find find href on all links" do
|
23
|
+
expect(factory.all_links.select{|l| l.href}.length).to eq 5
|
24
|
+
end
|
25
|
+
it "should find find href on all links" do
|
26
|
+
expect(factory.all_links.select{|l| l.anchor}.length).to eq 5
|
27
|
+
end
|
28
|
+
it "should find 5 links in mixed HTTP and HTML headers" do
|
29
|
+
expect(factory.all_links.select{|l| l.relation}.length).to eq 5
|
30
|
+
end
|
31
|
+
it "should find one citeas in mixed HTTP and HTML headers" do
|
32
|
+
expect(factory.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
|
33
|
+
end
|
34
|
+
it "should find described-by in mixed HTTP and HTML headers" do
|
35
|
+
expect(factory.all_links.select{|l| l.relation == 'describedby'}.length).to eq 1
|
6
36
|
end
|
7
37
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -7,7 +7,7 @@ RSpec.configure do |config|
|
|
7
7
|
config.example_status_persistence_file_path = ".rspec_status"
|
8
8
|
|
9
9
|
# Disable RSpec exposing methods globally on `Module` and `main`
|
10
|
-
config.disable_monkey_patching!
|
10
|
+
# config.disable_monkey_patching!
|
11
11
|
|
12
12
|
config.expect_with :rspec do |c|
|
13
13
|
c.syntax = :expect
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkheaders-processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 5.11.2
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: link_header
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.0.8
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.0.8
|
111
125
|
description: A parser/processor for Link Headers and Linksets in both JSON and Text
|
112
126
|
formats.
|
113
127
|
email:
|
@@ -123,7 +137,6 @@ files:
|
|
123
137
|
- README.md
|
124
138
|
- Rakefile
|
125
139
|
- launch.json
|
126
|
-
- lib/linkheaders/constants.rb
|
127
140
|
- lib/linkheaders/link.rb
|
128
141
|
- lib/linkheaders/processor.rb
|
129
142
|
- lib/linkheaders/processor/version.rb
|
@@ -1,28 +0,0 @@
|
|
1
|
-
ACCEPT_ALL_HEADER = {'Accept' => 'text/turtle, application/ld+json, application/rdf+xml, text/xhtml+xml, application/n3, application/rdf+n3, application/turtle, application/x-turtle, text/n3, text/turtle, text/rdf+n3, text/rdf+turtle, application/n-triples' }
|
2
|
-
|
3
|
-
TEXT_FORMATS = {
|
4
|
-
'text' => ['text/plain',],
|
5
|
-
}
|
6
|
-
|
7
|
-
RDF_FORMATS = {
|
8
|
-
'jsonld' => ['application/ld+json', 'application/vnd.schemaorg.ld+json'], # NEW FOR DATACITE
|
9
|
-
'turtle' => ['text/turtle','application/n3','application/rdf+n3',
|
10
|
-
'application/turtle', 'application/x-turtle','text/n3','text/turtle',
|
11
|
-
'text/rdf+n3', 'text/rdf+turtle'],
|
12
|
-
#'rdfa' => ['text/xhtml+xml', 'application/xhtml+xml'],
|
13
|
-
'rdfxml' => ['application/rdf+xml'],
|
14
|
-
'triples' => ['application/n-triples','application/n-quads', 'application/trig']
|
15
|
-
}
|
16
|
-
|
17
|
-
XML_FORMATS = {
|
18
|
-
'xml' => ['text/xhtml','text/xml',]
|
19
|
-
}
|
20
|
-
|
21
|
-
HTML_FORMATS = {
|
22
|
-
'html' => ['text/html','text/xhtml+xml', 'application/xhtml+xml']
|
23
|
-
}
|
24
|
-
|
25
|
-
JSON_FORMATS = {
|
26
|
-
'json' => ['application/json',]
|
27
|
-
}
|
28
|
-
|