linkheaders-processor 0.1.13 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -6
- data/lib/linkheaders/link.rb +20 -0
- data/lib/linkheaders/processor/version.rb +1 -1
- data/lib/linkheaders/processor.rb +9 -10
- data/lib/linkheaders/web_utils.rb +1 -1
- data/spec/linkheader/parser_spec.rb +32 -2
- data/spec/spec_helper.rb +1 -1
- metadata +16 -3
- data/lib/linkheaders/constants.rb +0 -28
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b10f24c0498058f393a2142465f0ff2e559dc2a8721ea8cf6a90178c8ff21789
         | 
| 4 | 
            +
              data.tar.gz: 5cd760d37f6e82f63cb8271375bec72364264173010fda86e1ae32f1a424642d
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: e696571f2c9da932ff461af46740824de0728a6accd327ab6e109ab84530d313868d120cff5a62f670655efe6531069d7b6a358f1dcfcb94560cdd647614ff35
         | 
| 7 | 
            +
              data.tar.gz: cf056d618d352bbcfaa43e59317fbc6f739186ac3e7ae26534e8f12721dec0d7ab2e8edfa07a2434e881a37d1cd18bf30e84f6cfac402fcc74ecd8c86170fa73
         | 
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -1,10 +1,11 @@ | |
| 1 1 | 
             
            PATH
         | 
| 2 2 | 
             
              remote: .
         | 
| 3 3 | 
             
              specs:
         | 
| 4 | 
            -
                linkheaders-processor (0.1. | 
| 4 | 
            +
                linkheaders-processor (0.1.17)
         | 
| 5 5 | 
             
                  json (~> 2.0)
         | 
| 6 6 | 
             
                  json-ld (~> 3.2)
         | 
| 7 7 | 
             
                  json-ld-preloaded (~> 3.2)
         | 
| 8 | 
            +
                  link_header (~> 0.0.8)
         | 
| 8 9 | 
             
                  metainspector (~> 5.11.2)
         | 
| 9 10 | 
             
                  rest-client (~> 2.1)
         | 
| 10 11 | 
             
                  securerandom (~> 0.1.0)
         | 
| @@ -18,7 +19,7 @@ GEM | |
| 18 19 | 
             
                diff-lcs (1.5.0)
         | 
| 19 20 | 
             
                domain_name (0.5.20190701)
         | 
| 20 21 | 
             
                  unf (>= 0.0.5, < 1.0.0)
         | 
| 21 | 
            -
                faraday (1.10. | 
| 22 | 
            +
                faraday (1.10.1)
         | 
| 22 23 | 
             
                  faraday-em_http (~> 1.0)
         | 
| 23 24 | 
             
                  faraday-em_synchrony (~> 1.0)
         | 
| 24 25 | 
             
                  faraday-excon (~> 1.1)
         | 
| @@ -57,13 +58,13 @@ GEM | |
| 57 58 | 
             
                  domain_name (~> 0.5)
         | 
| 58 59 | 
             
                json (2.6.2)
         | 
| 59 60 | 
             
                json-canonicalization (0.3.0)
         | 
| 60 | 
            -
                json-ld (3.2. | 
| 61 | 
            +
                json-ld (3.2.3)
         | 
| 61 62 | 
             
                  htmlentities (~> 4.3)
         | 
| 62 63 | 
             
                  json-canonicalization (~> 0.3)
         | 
| 63 64 | 
             
                  link_header (~> 0.0, >= 0.0.8)
         | 
| 64 65 | 
             
                  multi_json (~> 1.15)
         | 
| 65 66 | 
             
                  rack (~> 2.2)
         | 
| 66 | 
            -
                  rdf (~> 3.2)
         | 
| 67 | 
            +
                  rdf (~> 3.2, >= 3.2.9)
         | 
| 67 68 | 
             
                json-ld-preloaded (3.2.0)
         | 
| 68 69 | 
             
                  json-ld (~> 3.2)
         | 
| 69 70 | 
             
                  rdf (~> 3.2)
         | 
| @@ -95,7 +96,7 @@ GEM | |
| 95 96 | 
             
                rack (2.2.4)
         | 
| 96 97 | 
             
                rainbow (3.1.1)
         | 
| 97 98 | 
             
                rake (13.0.6)
         | 
| 98 | 
            -
                rdf (3.2. | 
| 99 | 
            +
                rdf (3.2.9)
         | 
| 99 100 | 
             
                  link_header (~> 0.0, >= 0.0.8)
         | 
| 100 101 | 
             
                regexp_parser (2.5.0)
         | 
| 101 102 | 
             
                rest-client (2.1.0)
         | 
| @@ -117,7 +118,7 @@ GEM | |
| 117 118 | 
             
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 118 119 | 
             
                  rspec-support (~> 3.11.0)
         | 
| 119 120 | 
             
                rspec-support (3.11.0)
         | 
| 120 | 
            -
                rubocop (1. | 
| 121 | 
            +
                rubocop (1.33.0)
         | 
| 121 122 | 
             
                  json (~> 2.3)
         | 
| 122 123 | 
             
                  parallel (~> 1.10)
         | 
| 123 124 | 
             
                  parser (>= 3.1.0.0)
         | 
    
        data/lib/linkheaders/link.rb
    CHANGED
    
    | @@ -1,3 +1,4 @@ | |
| 1 | 
            +
             | 
| 1 2 | 
             
            module LinkHeaders
         | 
| 2 3 | 
             
              class LinkFactory
         | 
| 3 4 |  | 
| @@ -191,5 +192,24 @@ module LinkHeaders | |
| 191 192 | 
             
                    self.send("#{k}=", v)
         | 
| 192 193 | 
             
                  end
         | 
| 193 194 | 
             
                end
         | 
| 195 | 
            +
             | 
| 196 | 
            +
                #
         | 
| 197 | 
            +
                # Create an HTML version of the link
         | 
| 198 | 
            +
                # @return [String]  HTML version of the Link object
         | 
| 199 | 
            +
                #
         | 
| 200 | 
            +
                def to_html
         | 
| 201 | 
            +
                  methods = self.linkmethods
         | 
| 202 | 
            +
                  href = self.href
         | 
| 203 | 
            +
                  rel = self.relation
         | 
| 204 | 
            +
                  anchor = self.anchor
         | 
| 205 | 
            +
                  properties = []
         | 
| 206 | 
            +
                  methods.each do |method|
         | 
| 207 | 
            +
                    value = self.send(method)
         | 
| 208 | 
            +
                    properties << [method, value]
         | 
| 209 | 
            +
                  end
         | 
| 210 | 
            +
                  properties << ["rel", rel]
         | 
| 211 | 
            +
                  properties << ["anchor", anchor]
         | 
| 212 | 
            +
                  LinkHeader::Link.new(href, properties).to_html
         | 
| 213 | 
            +
                end
         | 
| 194 214 | 
             
              end
         | 
| 195 215 | 
             
            end
         | 
| @@ -1,10 +1,9 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 3 | 
             
            require_relative 'processor/version'
         | 
| 4 | 
            -
            require_relative 'constants'
         | 
| 5 4 | 
             
            require_relative 'link'
         | 
| 6 5 | 
             
            require_relative 'web_utils'
         | 
| 7 | 
            -
             | 
| 6 | 
            +
            require 'link_header'
         | 
| 8 7 | 
             
            require 'json'
         | 
| 9 8 | 
             
            require 'rest-client'
         | 
| 10 9 | 
             
            require 'securerandom'
         | 
| @@ -63,10 +62,10 @@ module LinkHeaders | |
| 63 62 | 
             
                  newlinks = parse_http_link_headers(head) # pass guid to check against anchors in linksets
         | 
| 64 63 | 
             
                  warn "HTTPlinks #{newlinks.inspect}"
         | 
| 65 64 |  | 
| 66 | 
            -
                   | 
| 65 | 
            +
                  ['text/html','text/xhtml+xml', 'application/xhtml+xml'].each do |format|
         | 
| 67 66 | 
             
                    if head[:content_type] and head[:content_type].match(format)
         | 
| 68 67 | 
             
                      warn "found #{format} content - parsing"
         | 
| 69 | 
            -
                      htmllinks = parse_html_link_headers(body) # pass html body to find HTML link headers
         | 
| 68 | 
            +
                      htmllinks = parse_html_link_headers(body: body, anchor: default_anchor) # pass html body to find HTML link headers
         | 
| 70 69 | 
             
                      warn "htmllinks #{htmllinks.inspect}"
         | 
| 71 70 | 
             
                    end
         | 
| 72 71 | 
             
                  end
         | 
| @@ -124,7 +123,7 @@ module LinkHeaders | |
| 124 123 | 
             
                    relation = sections['rel']
         | 
| 125 124 | 
             
                    sections.delete('rel')
         | 
| 126 125 | 
             
                    relations = relation.split(/\s+/)  # handle the multiple relation case
         | 
| 127 | 
            -
                     | 
| 126 | 
            +
                    warn "RELATIONS #{relations}"
         | 
| 128 127 |  | 
| 129 128 | 
             
                    relations.each do |rel|
         | 
| 130 129 | 
             
                      next unless rel.match?(/\w/)
         | 
| @@ -139,8 +138,8 @@ module LinkHeaders | |
| 139 138 | 
             
                #
         | 
| 140 139 | 
             
                # @param [String] body The HTML of the page containing HTML Link headers
         | 
| 141 140 | 
             
                #
         | 
| 142 | 
            -
                def parse_html_link_headers(body)
         | 
| 143 | 
            -
                  m = MetaInspector.new( | 
| 141 | 
            +
                def parse_html_link_headers(body:, anchor: '')
         | 
| 142 | 
            +
                  m = MetaInspector.new(anchor, document: body)
         | 
| 144 143 | 
             
                  # an array of elements that look like this: [{:rel=>"alternate", :type=>"application/ld+json", :href=>"http://scidata.vitk.lv/dataset/303.jsonld"}]
         | 
| 145 144 | 
             
                  newlinks = Array.new
         | 
| 146 145 | 
             
                  m.head_links.each do |l|
         | 
| @@ -155,7 +154,7 @@ module LinkHeaders | |
| 155 154 | 
             
                    l.delete(:href)        
         | 
| 156 155 |  | 
| 157 156 | 
             
                    relations = relation.split(/\s+/)  # handle the multiple relation case
         | 
| 158 | 
            -
                     | 
| 157 | 
            +
                    warn "RELATIONS #{relations}"
         | 
| 159 158 |  | 
| 160 159 | 
             
                    relations.each do |rel|
         | 
| 161 160 | 
             
                      next unless rel.match?(/\w/)
         | 
| @@ -189,7 +188,7 @@ module LinkHeaders | |
| 189 188 | 
             
                end
         | 
| 190 189 |  | 
| 191 190 | 
             
                def processJSONLinkset(href:)
         | 
| 192 | 
            -
                  _headers, linkset =  | 
| 191 | 
            +
                  _headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset+json' })
         | 
| 193 192 | 
             
                  # warn "Linkset body #{linkset.inspect}"
         | 
| 194 193 | 
             
                  newlinks = Array.new
         | 
| 195 194 | 
             
                  return nil unless linkset
         | 
| @@ -242,7 +241,7 @@ module LinkHeaders | |
| 242 241 |  | 
| 243 242 | 
             
                def processTextLinkset(href:)
         | 
| 244 243 | 
             
                  newlinks = Array.new
         | 
| 245 | 
            -
                  headers, linkset =  | 
| 244 | 
            +
                  headers, linkset = lhfetch(href, { 'Accept' => 'application/linkset' })
         | 
| 246 245 | 
             
                  # warn "linkset body #{linkset.inspect}"
         | 
| 247 246 | 
             
                  return {} unless linkset
         | 
| 248 247 |  | 
| @@ -1,7 +1,37 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 | 
            +
            require_relative '../../lib/linkheaders/processor'
         | 
| 3 | 
            +
            require 'rest-client'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
             | 
| 6 | 
            +
            url1 = "https://w3id.org/a2a-fair-metrics/22-http-html-citeas-describedby-mixed/"
         | 
| 7 | 
            +
            p = LinkHeaders::Processor.new(default_anchor: url1)
         | 
| 8 | 
            +
            r = RestClient.get(url1)
         | 
| 9 | 
            +
            p.extract_and_parse(response: r)
         | 
| 10 | 
            +
            factory = p.factory  # LinkHeaders::LinkFactory
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            RSpec.describe LinkHeaders::Processor do
         | 
| 2 14 |  | 
| 3 | 
            -
            RSpec.describe LinkHeader::Parser do
         | 
| 4 15 | 
             
              it 'has a version number' do
         | 
| 5 | 
            -
                expect( | 
| 16 | 
            +
                expect(LinkHeaders::Processor::VERSION).not_to be nil
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              it "should find PURL citeas which has described-by and cite-as in mixed HTTP and HTML headers" do
         | 
| 20 | 
            +
                expect(factory.all_links.length).to eq 5
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
              it "should find find href on all links" do
         | 
| 23 | 
            +
                expect(factory.all_links.select{|l| l.href}.length).to eq 5
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
              it "should find find href on all links" do
         | 
| 26 | 
            +
                expect(factory.all_links.select{|l| l.anchor}.length).to eq 5
         | 
| 27 | 
            +
              end
         | 
| 28 | 
            +
              it "should find 5 links in mixed HTTP and HTML headers" do
         | 
| 29 | 
            +
                expect(factory.all_links.select{|l| l.relation}.length).to eq 5
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
              it "should find one citeas in mixed HTTP and HTML headers" do
         | 
| 32 | 
            +
                expect(factory.all_links.select{|l| l.relation == 'cite-as'}.length).to eq 1
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
              it "should find described-by in mixed HTTP and HTML headers" do
         | 
| 35 | 
            +
                expect(factory.all_links.select{|l| l.relation == 'describedby'}.length).to eq 1
         | 
| 6 36 | 
             
              end
         | 
| 7 37 | 
             
            end
         | 
    
        data/spec/spec_helper.rb
    CHANGED
    
    | @@ -7,7 +7,7 @@ RSpec.configure do |config| | |
| 7 7 | 
             
              config.example_status_persistence_file_path = ".rspec_status"
         | 
| 8 8 |  | 
| 9 9 | 
             
              # Disable RSpec exposing methods globally on `Module` and `main`
         | 
| 10 | 
            -
              config.disable_monkey_patching!
         | 
| 10 | 
            +
              # config.disable_monkey_patching!
         | 
| 11 11 |  | 
| 12 12 | 
             
              config.expect_with :rspec do |c|
         | 
| 13 13 | 
             
                c.syntax = :expect
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: linkheaders-processor
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.17
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Mark Wilkinson
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2022-08- | 
| 11 | 
            +
            date: 2022-08-12 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rspec
         | 
| @@ -108,6 +108,20 @@ dependencies: | |
| 108 108 | 
             
                - - "~>"
         | 
| 109 109 | 
             
                  - !ruby/object:Gem::Version
         | 
| 110 110 | 
             
                    version: 5.11.2
         | 
| 111 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 112 | 
            +
              name: link_header
         | 
| 113 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 114 | 
            +
                requirements:
         | 
| 115 | 
            +
                - - "~>"
         | 
| 116 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 117 | 
            +
                    version: 0.0.8
         | 
| 118 | 
            +
              type: :runtime
         | 
| 119 | 
            +
              prerelease: false
         | 
| 120 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 121 | 
            +
                requirements:
         | 
| 122 | 
            +
                - - "~>"
         | 
| 123 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 124 | 
            +
                    version: 0.0.8
         | 
| 111 125 | 
             
            description: A parser/processor for Link Headers and Linksets in both JSON and Text
         | 
| 112 126 | 
             
              formats.
         | 
| 113 127 | 
             
            email:
         | 
| @@ -123,7 +137,6 @@ files: | |
| 123 137 | 
             
            - README.md
         | 
| 124 138 | 
             
            - Rakefile
         | 
| 125 139 | 
             
            - launch.json
         | 
| 126 | 
            -
            - lib/linkheaders/constants.rb
         | 
| 127 140 | 
             
            - lib/linkheaders/link.rb
         | 
| 128 141 | 
             
            - lib/linkheaders/processor.rb
         | 
| 129 142 | 
             
            - lib/linkheaders/processor/version.rb
         | 
| @@ -1,28 +0,0 @@ | |
| 1 | 
            -
            ACCEPT_ALL_HEADER = {'Accept' => 'text/turtle, application/ld+json, application/rdf+xml, text/xhtml+xml, application/n3, application/rdf+n3, application/turtle, application/x-turtle, text/n3, text/turtle, text/rdf+n3, text/rdf+turtle, application/n-triples' }
         | 
| 2 | 
            -
             | 
| 3 | 
            -
            TEXT_FORMATS = {
         | 
| 4 | 
            -
                'text' => ['text/plain',],
         | 
| 5 | 
            -
            }
         | 
| 6 | 
            -
             | 
| 7 | 
            -
            RDF_FORMATS = {
         | 
| 8 | 
            -
              'jsonld'  => ['application/ld+json', 'application/vnd.schemaorg.ld+json'],  # NEW FOR DATACITE
         | 
| 9 | 
            -
              'turtle'  => ['text/turtle','application/n3','application/rdf+n3',
         | 
| 10 | 
            -
                           'application/turtle', 'application/x-turtle','text/n3','text/turtle',
         | 
| 11 | 
            -
                           'text/rdf+n3', 'text/rdf+turtle'],
         | 
| 12 | 
            -
              #'rdfa'    => ['text/xhtml+xml', 'application/xhtml+xml'],
         | 
| 13 | 
            -
              'rdfxml'  => ['application/rdf+xml'],
         | 
| 14 | 
            -
              'triples' => ['application/n-triples','application/n-quads', 'application/trig']
         | 
| 15 | 
            -
            }
         | 
| 16 | 
            -
             | 
| 17 | 
            -
            XML_FORMATS = {
         | 
| 18 | 
            -
              'xml' => ['text/xhtml','text/xml',]
         | 
| 19 | 
            -
            }
         | 
| 20 | 
            -
             | 
| 21 | 
            -
            HTML_FORMATS = {
         | 
| 22 | 
            -
              'html' => ['text/html','text/xhtml+xml', 'application/xhtml+xml']
         | 
| 23 | 
            -
            }
         | 
| 24 | 
            -
             | 
| 25 | 
            -
            JSON_FORMATS = {
         | 
| 26 | 
            -
                        'json' => ['application/json',]
         | 
| 27 | 
            -
            }
         | 
| 28 | 
            -
             |