fsp_harvester 0.1.14 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +9 -8
- data/Gemfile.lock +1 -1
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/harvester_brute.rb +48 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e0ffd5048e360ce8e8cced890a586664af797065d2c8d6312927d694835e84b
|
4
|
+
data.tar.gz: 840269a8b28da70bed8c5e46674ff3730cbee66f624064cab84f98d5b3a2ff00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c01cc88a8f57e024c7aeed89a8251d97b130bca987dc14d914e87fa87ea744d3de7ab11ca340b0456f295edafdd872d4f63d0f0ef23dbe9c3cc8ebc97a64ae5
|
7
|
+
data.tar.gz: 2c274758ec874bb1c25ebd5286ecbc2b7e91205430a94cf3ada9c7350511fe362532f2c6d213a6fd6657ccdf7184df9c0eaf9c2461c0d25dc87da00b2aded390
|
data/.rspec_status
CHANGED
@@ -42,14 +42,15 @@ example_id | status | run_time |
|
|
42
42
|
./spec/describedby_spec.rb[1:1:15] | passed | 2.28 seconds |
|
43
43
|
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
|
44
44
|
./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
|
45
|
-
./spec/
|
46
|
-
./spec/item_spec.rb[1:1:
|
47
|
-
./spec/item_spec.rb[1:1:
|
48
|
-
./spec/item_spec.rb[1:1:
|
49
|
-
./spec/item_spec.rb[1:1:
|
50
|
-
./spec/item_spec.rb[1:1:
|
51
|
-
./spec/item_spec.rb[1:1:
|
52
|
-
./spec/item_spec.rb[1:1:
|
45
|
+
./spec/fsp_harvester_spec.rb[1:3] | passed | 6.87 seconds |
|
46
|
+
./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
|
47
|
+
./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
|
48
|
+
./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
|
49
|
+
./spec/item_spec.rb[1:1:4] | passed | 1.76 seconds |
|
50
|
+
./spec/item_spec.rb[1:1:5] | passed | 2.43 seconds |
|
51
|
+
./spec/item_spec.rb[1:1:6] | passed | 2.23 seconds |
|
52
|
+
./spec/item_spec.rb[1:1:7] | passed | 2.94 seconds |
|
53
|
+
./spec/item_spec.rb[1:1:8] | passed | 0.52517 seconds |
|
53
54
|
./spec/type_spec.rb[1:1:1] | passed | 1.35 seconds |
|
54
55
|
./spec/type_spec.rb[1:1:2] | passed | 1.32 seconds |
|
55
56
|
./spec/type_spec.rb[1:1:3] | passed | 1.54 seconds |
|
data/Gemfile.lock
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
module HarvesterTools
|
2
|
+
class Error < StandardError
|
3
|
+
end
|
4
|
+
|
5
|
+
class BruteForce
|
6
|
+
def self.begin_brute_force(guid:, metadata: HarvesterTools::MetadataObject.new)
|
7
|
+
type, url = HarvesterTools::Utils.convertToURL(guid: guid)
|
8
|
+
return false unless type
|
9
|
+
|
10
|
+
do_content_negotiation(url: url, metadata: metadata)
|
11
|
+
metadata
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.do_content_negotiation(url:, metadata:)
|
15
|
+
response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_ALL_HEADER)
|
16
|
+
if response
|
17
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata)
|
18
|
+
end
|
19
|
+
response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_STAR_HEADER)
|
20
|
+
if response
|
21
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
|
22
|
+
response = resolve_url_brute(url: response.request.url, metadata: metadata, headers: ACCEPT_ALL_HEADER) # now do content negotiation on the landing page
|
23
|
+
if response
|
24
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.resolve_url_brute(url:, method: :get, nolinkheaders: true, headers:, metadata:)
|
30
|
+
@meta = metadata
|
31
|
+
@meta.guidtype = 'uri' if @meta.guidtype.nil?
|
32
|
+
warn "\n\n BRUTE FETCHING #{url} #{headers}\n\n"
|
33
|
+
response = HarvesterTools::WebUtils.fspfetch(url: url, headers: headers, method: method, meta: @meta)
|
34
|
+
warn "\n\n head #{response.headers.inspect}\n\n" if response
|
35
|
+
|
36
|
+
unless response
|
37
|
+
@meta.add_warning(['001', url, headers])
|
38
|
+
@meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{headers}.\n"
|
39
|
+
@meta.full_response << [url, "No response"]
|
40
|
+
false
|
41
|
+
end
|
42
|
+
|
43
|
+
@meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
|
44
|
+
@meta.full_response << [url, response.body]
|
45
|
+
response
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
@@ -190,6 +190,7 @@ files:
|
|
190
190
|
- lib/fsp_harvester.rb
|
191
191
|
- lib/fsp_harvester/version.rb
|
192
192
|
- lib/harvester.rb
|
193
|
+
- lib/harvester_brute.rb
|
193
194
|
- lib/harvester_utils.rb
|
194
195
|
- lib/metadata_harvester.rb
|
195
196
|
- lib/metadata_object.rb
|