fsp_harvester 0.1.14 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e285f00da696d7e39d80df794be9524af6e63ea01deb4e73f6c30b3694c016ff
4
- data.tar.gz: fb81b5c1c0fac3bb22e078663025855e5accdb355db1811a4687fb1bca54bc61
3
+ metadata.gz: 9e0ffd5048e360ce8e8cced890a586664af797065d2c8d6312927d694835e84b
4
+ data.tar.gz: 840269a8b28da70bed8c5e46674ff3730cbee66f624064cab84f98d5b3a2ff00
5
5
  SHA512:
6
- metadata.gz: 194132eb78246291a3cb96566ca6a283841a0427afcd6a6abb79c590dbc2c54108e3e8cfef9e4802a77008f1a4c9c94ea7862987e81ce1b4b97cd1fdaf25ca23
7
- data.tar.gz: 9765647726c2bfcd7e790ba11929d257610672bc92d8d11756824432e90db4c05036b2cfcede1a55da95f1e74b9e87fd078c78284c356897a5bdc0a17593a3a1
6
+ metadata.gz: 4c01cc88a8f57e024c7aeed89a8251d97b130bca987dc14d914e87fa87ea744d3de7ab11ca340b0456f295edafdd872d4f63d0f0ef23dbe9c3cc8ebc97a64ae5
7
+ data.tar.gz: 2c274758ec874bb1c25ebd5286ecbc2b7e91205430a94cf3ada9c7350511fe362532f2c6d213a6fd6657ccdf7184df9c0eaf9c2461c0d25dc87da00b2aded390
data/.rspec_status CHANGED
@@ -42,14 +42,15 @@ example_id | status | run_time |
42
42
  ./spec/describedby_spec.rb[1:1:15] | passed | 2.28 seconds |
43
43
  ./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
44
44
  ./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
45
- ./spec/item_spec.rb[1:1:1] | passed | 2.94 seconds |
46
- ./spec/item_spec.rb[1:1:2] | passed | 3 seconds |
47
- ./spec/item_spec.rb[1:1:3] | passed | 1.35 seconds |
48
- ./spec/item_spec.rb[1:1:4] | passed | 1.83 seconds |
49
- ./spec/item_spec.rb[1:1:5] | passed | 2.26 seconds |
50
- ./spec/item_spec.rb[1:1:6] | passed | 2.17 seconds |
51
- ./spec/item_spec.rb[1:1:7] | passed | 2.8 seconds |
52
- ./spec/item_spec.rb[1:1:8] | passed | 0.52869 seconds |
45
+ ./spec/fsp_harvester_spec.rb[1:3] | passed | 6.87 seconds |
46
+ ./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
47
+ ./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
48
+ ./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
49
+ ./spec/item_spec.rb[1:1:4] | passed | 1.76 seconds |
50
+ ./spec/item_spec.rb[1:1:5] | passed | 2.43 seconds |
51
+ ./spec/item_spec.rb[1:1:6] | passed | 2.23 seconds |
52
+ ./spec/item_spec.rb[1:1:7] | passed | 2.94 seconds |
53
+ ./spec/item_spec.rb[1:1:8] | passed | 0.52517 seconds |
53
54
  ./spec/type_spec.rb[1:1:1] | passed | 1.35 seconds |
54
55
  ./spec/type_spec.rb[1:1:2] | passed | 1.32 seconds |
55
56
  ./spec/type_spec.rb[1:1:3] | passed | 1.54 seconds |
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fsp_harvester (0.1.14)
4
+ fsp_harvester (0.1.15)
5
5
  json (~> 2.0)
6
6
  linkeddata (~> 3.2)
7
7
  linkheaders-processor (~> 0.1.17)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module FspHarvester
4
- VERSION = "0.1.14"
4
+ VERSION = "0.1.15"
5
5
  end
@@ -0,0 +1,48 @@
1
+ module HarvesterTools
2
+ class Error < StandardError
3
+ end
4
+
5
+ class BruteForce
6
+ def self.begin_brute_force(guid:, metadata: HarvesterTools::MetadataObject.new)
7
+ type, url = HarvesterTools::Utils.convertToURL(guid: guid)
8
+ return false unless type
9
+
10
+ do_content_negotiation(url: url, metadata: metadata)
11
+ metadata
12
+ end
13
+
14
+ def self.do_content_negotiation(url:, metadata:)
15
+ response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_ALL_HEADER)
16
+ if response
17
+ HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata)
18
+ end
19
+ response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_STAR_HEADER)
20
+ if response
21
+ HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
22
+ response = resolve_url_brute(url: response.request.url, metadata: metadata, headers: ACCEPT_ALL_HEADER) # now do content negotiation on the landing page
23
+ if response
24
+ HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.resolve_url_brute(url:, method: :get, nolinkheaders: true, headers:, metadata:)
30
+ @meta = metadata
31
+ @meta.guidtype = 'uri' if @meta.guidtype.nil?
32
+ warn "\n\n BRUTE FETCHING #{url} #{headers}\n\n"
33
+ response = HarvesterTools::WebUtils.fspfetch(url: url, headers: headers, method: method, meta: @meta)
34
+ warn "\n\n head #{response.headers.inspect}\n\n" if response
35
+
36
+ unless response
37
+ @meta.add_warning(['001', url, headers])
38
+ @meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{headers}.\n"
39
+ @meta.full_response << [url, "No response"]
40
+ false
41
+ end
42
+
43
+ @meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
44
+ @meta.full_response << [url, response.body]
45
+ response
46
+ end
47
+ end
48
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fsp_harvester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.14
4
+ version: 0.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Wilkinson
@@ -190,6 +190,7 @@ files:
190
190
  - lib/fsp_harvester.rb
191
191
  - lib/fsp_harvester/version.rb
192
192
  - lib/harvester.rb
193
+ - lib/harvester_brute.rb
193
194
  - lib/harvester_utils.rb
194
195
  - lib/metadata_harvester.rb
195
196
  - lib/metadata_object.rb