RubyGems - linkheaders-processor - Versions diffs - 0.1.8 → 0.1.13 - Mend

linkheaders-processor 0.1.8 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/Gemfile +0 -2
data/Gemfile.lock +2 -2
data/README.md +13 -7
data/lib/linkheaders/link.rb +18 -10
data/lib/linkheaders/processor/version.rb +1 -1
data/lib/linkheaders/processor.rb +58 -23
metadata +6 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: af390c80d1304df2d885e4bb19ad6be8a95e695b7b88ac2a59762e8f11d17dff
-  data.tar.gz: f3e90daa90734be50afb722f6023ecb6594c778776c987c0a4bf9b42e4d3aeaa
+  metadata.gz: e88dd164547a9a21ce0f1a3ffa85f2af4190ea6a588da445387cdfa2dca7e25d
+  data.tar.gz: f800677c8d4cb18e274defb5dbda5d2f58431cab40899ae15cfb2f866fcf8644
 SHA512:
-  metadata.gz: 100903ef954dc3b40aaea1f97b285bb5dce59703a968905c9a9a7933416c1e4b847de32d0c03de5c646b3d0ae8d6d9a73ae003c315ee335aac10a956cfc38bfb
-  data.tar.gz: 71b0b8b7ad489ee6f3db7787fa6de0da3b3bbc40c2f16c53f16586be88233f3c88693c848624bd592d1a923ea9d90e535eaf6841547cade3e96664bdde6cdba4
+  metadata.gz: ad4b8814c9ace9def1edd94e53e890c8d534a40d7cad4d55f7dbd0426e1310d29277a1849fd63ebada06fa3c9812a9d189c86ef2635ffe846fe52ff5f4864e2e
+  data.tar.gz: 99573e84fa6eb0412a5223cb188f333c37a8dbb3c877fcf5ea403a71bfc18d57d477920c6000b280fbb8cc920548ddd11ea95dcdb0df4f0a896e30a68fe84d9b

data/Gemfile CHANGED Viewed

@@ -7,6 +7,4 @@ gemspec
 gem "rake", "~> 13.0"
-gem "rspec", "~> 3.0"
 gem "rubocop", "~> 1.21"

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    linkheaders-processor (0.1.8)
+    linkheaders-processor (0.1.13)
       json (~> 2.0)
       json-ld (~> 3.2)
       json-ld-preloaded (~> 3.2)
@@ -143,7 +143,7 @@ PLATFORMS
 DEPENDENCIES
   linkheaders-processor!
   rake (~> 13.0)
-  rspec (~> 3.0)
+  rspec (~> 3.11)
   rubocop (~> 1.21)
 BUNDLED WITH

data/README.md CHANGED Viewed

@@ -2,24 +2,24 @@
 A gem to extract Link Headers from Web responses.
-This module handles HTTP Link Headers, HTML Link Headers, and auto-follows links to LinkSets in both JSON and Text format, and processes them also.
+This module handles HTTP Link Headers, HTML Link Headers, and auto-follows links to LinkSets in both JSON and Text format, and processes them also.  It also handles some unusual cases, such as having multiple relation types in a single link, or when dealing with 204 or 410 response where there is no message body.
 ## Installation
 Install the gem and add to the application's Gemfile by executing:
-    $ bundle add linkheader-processor
+    $ bundle add linkheaders-processor
 If bundler is not being used to manage dependencies, install the gem by executing:
-    $ gem install linkheader-processor
+    $ gem install linkheaders-processor
 ## Usage
 ```
-    require 'linkheader/processor'
+    require 'linkheaders/processor'
     require 'rest-client'
     # url1 has http link headers, and a reference to a linkset in json format
@@ -28,27 +28,33 @@ If bundler is not being used to manage dependencies, install the gem by executin
     # url2 has http link headers, with a reference to a linkset in legacy text format
     url2 = "https://s11.no/2022/a2a-fair-metrics/28-http-linkset-txt-only/"
-    p = LinkHeader::Parser.new(default_anchor: url1)
+    p = LinkHeaders::Processor.new(default_anchor: url1)
     r = RestClient.get(url1)
     p.extract_and_parse(response: r)
-    factory = p.factory  # LinkHeader::LinkFactory
+    factory = p.factory  # LinkHeaders::LinkFactory
     factory.all_links.each do |l|
         puts l.href
         puts l.relation
         puts l.responsepart
+        # Additional properties are added as other instance methods
+        # you can access them as follows:
         puts l.linkmethods  # returns list of instance methods beyond href and relation, that are attributes of the link
         l.linkmethods.each do |method|
             puts "#{method}=" + l.send(method)
         end
+        # or
+        puts l.type if l.respond_to? 'type'
         puts
     end
-    p = LinkHeader::Parser.new(default_anchor: url2)
+    p = LinkHeaders::Processor.new(default_anchor: url2)
     r = RestClient.get(url2)
     p.extract_and_parse(response: r)

data/lib/linkheaders/link.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module LinkHeaders
     attr_accessor :default_anchor
     # @return [Array] An array of strings containing any warnings that were encountered when creating the link (e.g. duplicate cite-as but non-identical URLs)
     attr_accessor :warnings
-    @@all_links = Array.new
+    attr_accessor :all_links
     #
     # Create the LinkFacgtory Object
@@ -15,8 +15,10 @@ module LinkHeaders
     def initialize(default_anchor: 'https://example.org/')
       @default_anchor = default_anchor
       @warnings = Array.new
+      @all_links = Array.new
     end
     #
     # Create a new LinkHeader::Link object
     #
@@ -30,9 +32,13 @@ module LinkHeaders
     #
     def new_link(responsepart:, href:, relation:, anchor: @default_anchor, **kwargs)
       # warn "creating new link with kw #{kwargs}"
-      link = LinkHeader::Link.new(responsepart: responsepart, factory: self, href: href, anchor: anchor, relation: relation, **kwargs)
+      if relation.split(/\s/).length > 1
+        @warnings |= ['WARN: the link relation contains spaces.  This is allowed by the standard to indicate multiple relations for the same link, but this MUST be processed before creating a LinkHeaders::Link object!']
+      end
+      link = LinkHeaders::Link.new(responsepart: responsepart, factory: self, href: href, anchor: anchor, relation: relation, **kwargs)
       link = sanitycheck(link)  # this will add warnings if the link already exists and has a conflict.  returns the original of a duplicate
-      @@all_links |= [link]
+      self.all_links |= [link]
       return link
     end
@@ -42,7 +48,7 @@ module LinkHeaders
     # @return [Array] Array of all LinkHeader::Link objects created by the factory so far
     #
     def all_links
-      @@all_links
+      @all_links
     end
     #
@@ -106,19 +112,21 @@ module LinkHeaders
     end
     def sanitycheck(link)
-      flag = true
+      if link.relation == "describedby" and !(link.respond_to? 'type')
+        @warnings |= ['WARN: A describedby link should include a "type" attribute, to know the MIME type of the addressed description']
+      end
       self.all_links.each do |l|
         if l.relation == "cite-as" and link.relation == "cite-as"
           if l.href != link.href
-            @warnings << 'WARN: Found conflicting cite-as relations.  This should never happen'
+            @warnings |= ['WARN: Found conflicting cite-as relations.  This should never happen']
           end
         end
         if l.href == link.href
           if l.relation != link.relation
-            @warnings << 'WARN: Found identical hrefs with different relation types.  This may be suspicious. Both have been retained'
-          end
-          if l.relation = link.relation
-            @warnings << 'WARN: found apparent duplicate. Ignoring and returning known link'
+            @warnings |= ['WARN: Found identical hrefs with different relation types.  This may be suspicious. Both have been retained']
+          else
+            @warnings |= ['WARN: found apparent duplicate. Ignoring and returning known link']
             link = l
           end
         end

data/lib/linkheaders/processor/version.rb CHANGED Viewed

@@ -3,6 +3,6 @@
 module LinkHeaders
   class Processor
-    VERSION = "0.1.8"
+    VERSION = "0.1.13"
   end
 end

data/lib/linkheaders/processor.rb CHANGED Viewed

@@ -17,7 +17,7 @@ module LinkHeaders
   #
   # Works for both HTML and HTTP links, and handles references to Linksets of either JSON or Text types
   #
-  class Parser
+  class Processor
     # @return [<Type>] <description>
     attr_accessor :default_anchor, :factory
@@ -28,7 +28,7 @@ module LinkHeaders
     #
     def initialize(default_anchor: 'https://default.anchor.org/')
       @default_anchor = default_anchor
-      @factory = LinkHeader::LinkFactory.new(default_anchor: @default_anchor)
+      @factory = LinkHeaders::LinkFactory.new(default_anchor: @default_anchor)
     end
     #
@@ -60,10 +60,14 @@ module LinkHeaders
         return [[], []]
       end
-      parse_http_link_headers(head) # pass guid to check against anchors in linksets
+      newlinks = parse_http_link_headers(head) # pass guid to check against anchors in linksets
+      warn "HTTPlinks #{newlinks.inspect}"
       HTML_FORMATS['html'].each do |format|
         if head[:content_type] and head[:content_type].match(format)
+          warn "found #{format} content - parsing"
           htmllinks = parse_html_link_headers(body) # pass html body to find HTML link headers
+          warn "htmllinks #{htmllinks.inspect}"
         end
       end
     end
@@ -75,7 +79,7 @@ module LinkHeaders
     #
     #
     def parse_http_link_headers(headers)
+      newlinks = Array.new
       # Link: <https://example.one.com>; rel="preconnect", <https://example.two.com>; rel="preconnect",  <https://example.three.com>; rel="preconnect"
       links = headers[:link]
       return [] unless links
@@ -85,11 +89,13 @@ module LinkHeaders
       # warn parts
       # Parse each part into a named link
-      split_http_link_headers(parts) # creates links from the split headers and adds to factory.all_links
-      check_for_linkset(responsepart: :header)  # all links are held in the Linkset::LinkFactory object (factory variable here).  This scans the links for a linkset link to follow
+      newlinks << split_http_link_headers_and_process(parts) # creates links from the split headers and adds to factory.all_links
+      newlinks << check_for_linkset(responsepart: :header)  # all links are held in the Linkset::LinkFactory object (factory variable here).  This scans the links for a linkset link to follow
+      newlinks
     end
-    def split_http_link_headers(parts)
+    def split_http_link_headers_and_process(parts)
+      newlinks = Array.new
       parts.each do |part, _index|
         # warn "link is:  #{part}"
@@ -117,9 +123,15 @@ module LinkHeaders
         sections.delete('anchor')
         relation = sections['rel']
         sections.delete('rel')
+        relations = relation.split(/\s+/)  # handle the multiple relation case
+        $stderr.puts "RELATIONS #{relations}"
-        factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: relation, **sections) # parsed['https://example.one.com'][:rel] = "preconnect"
+        relations.each do |rel|
+          next unless rel.match?(/\w/)
+          newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **sections) # parsed['https://example.one.com'][:rel] = "preconnect"
+        end
       end
+      newlinks
     end
     #
@@ -130,9 +142,9 @@ module LinkHeaders
     def parse_html_link_headers(body)
       m = MetaInspector.new('http://example.org', document: body)
       # an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
+      newlinks = Array.new
       m.head_links.each do |l|
-        # warn "link is:  #{l}"
+        warn "HTML head link is:  #{l.inspect}"
         next unless l[:href] and l[:rel] # required
         anchor = l[:anchor] || default_anchor
@@ -140,14 +152,23 @@ module LinkHeaders
         relation = l[:rel]
         l.delete(:rel)
         href = l[:href]
-        l.delete(:href)
-        factory.new_link(responsepart: :body, anchor: anchor, href: href, relation: relation, **l)
+        l.delete(:href)
+        relations = relation.split(/\s+/)  # handle the multiple relation case
+        $stderr.puts "RELATIONS #{relations}"
+        relations.each do |rel|
+          next unless rel.match?(/\w/)
+          newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **l) # parsed['https://example.one.com'][:rel] = "preconnect"
+        end
       end
-      check_for_linkset(responsepart: :body)
+      newlinks << check_for_linkset(responsepart: :body)
+      newlinks
     end
     def check_for_linkset(responsepart:)
-      # warn "looking for a linkset"
+      warn "looking for a linkset"
+      newlinks = Array.new
       factory.linksets.each do |linkset|
         # warn "found #{linkset.methods- Object.new.methods}"
         # warn "inspect #{linkset.inspect}"
@@ -156,20 +177,21 @@ module LinkHeaders
         case linkset.type
         when 'application/linkset+json'
           # warn "found a json linkset"
-          processJSONLinkset(href: linkset.href)
+          newlinks << processJSONLinkset(href: linkset.href)
         when 'application/linkset'
           # warn "found a text linkset"
-          processTextLinkset(href:linkset.href)
+          newlinks << processTextLinkset(href:linkset.href)
         else
           warn "the linkset #{linkset} was not typed as 'application/linkset+json' or 'application/linkset', and it should be! (found #{linkset.type}) Ignoring..."
         end
       end
+      newlinks
     end
     def processJSONLinkset(href:)
       _headers, linkset = fetch(href, { 'Accept' => 'application/linkset+json' })
       # warn "Linkset body #{linkset.inspect}"
+      newlinks = Array.new
       return nil unless linkset
       # linkset = '{ "linkset":
@@ -194,10 +216,10 @@ module LinkHeaders
         attrhash = {}
         # warn ls.keys, "\n"
-        ls.each_key do |reltype| # key =  e.g. "item", "described-by". "cite"
+        ls.each_key do |relation| # key =  e.g. "item", "described-by". "cite"
           # warn reltype, "\n"
           # warn ls[reltype], "\n"
-          ls[reltype].each do |attrs|  # attr = e.g.  {"href": "http://example.com/foo1", "type": "text/html"}
+          ls[relation].each do |attrs|  # attr = e.g.  {"href": "http://example.com/foo1", "type": "text/html"}
             next unless attrs['href']  # this is a required attribute of a  linkset relation
             href = attrs['href']
@@ -206,12 +228,20 @@ module LinkHeaders
               attrhash[attr.to_sym] = val
             end
           end
-          factory.new_link(responsepart: :linkset, href: href, relation: reltype, anchor: anchor, **attrhash)
+          relations = relation.split(/\s+/)  # handle the multiple relation case
+          relations.each do |rel|
+            next unless rel.match?(/\w/)
+            newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **attrhash) # parsed['https://example.one.com'][:rel] = "preconnect"
+          end
         end
       end
+      newlinks
     end
     def processTextLinkset(href:)
+      newlinks = Array.new
       headers, linkset = fetch(href, { 'Accept' => 'application/linkset' })
       # warn "linkset body #{linkset.inspect}"
       return {} unless linkset
@@ -237,14 +267,19 @@ module LinkHeaders
         end
         warn "No link relation type... this is bad!  Skipping" unless attrhash[:rel]
         next unless attrhash[:rel]
-        reltype = attrhash[:rel]
+        relation = attrhash[:rel]
         attrhash.delete(:rel)
         anchor = attrhash[:anchor] || @default_anchor
         attrhash.delete(:anchor)
-        factory.new_link(responsepart: :linkset, href: href, relation: reltype, anchor: anchor, **attrhash)
-        # warn "created #{[href, reltype, anchor, **attrhash]}"
+        relations = relation.split(/\s+/)  # handle the multiple relation case
+        #$stderr.puts "RELATIONS #{relations}"
+        relations.each do |rel|
+          next unless rel.match?(/\w/)
+          newlinks << factory.new_link(responsepart: :header, anchor: anchor, href: href, relation: rel, **attrhash) # parsed['https://example.one.com'][:rel] = "preconnect"
+        end
       end
+      newlinks
     end
   end
 end

metadata CHANGED Viewed

@@ -1,29 +1,29 @@
 --- !ruby/object:Gem::Specification
 name: linkheaders-processor
 version: !ruby/object:Gem::Version
-  version: 0.1.8
+  version: 0.1.13
 platform: ruby
 authors:
 - Mark Wilkinson
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-07-27 00:00:00.000000000 Z
+date: 2022-08-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '3.11'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '3.11'
 - !ruby/object:Gem::Dependency
   name: rest-client
   requirement: !ruby/object:Gem::Requirement