fsp_harvester 0.1.13 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +9 -8
- data/Gemfile.lock +1 -1
- data/lib/config.conf +1 -1
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/harvester_brute.rb +48 -0
- data/lib/web_utils.rb +4 -4
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aec11fd57963ffb176ddb88338b9e262027c9a7d39364089ae130fb4b628bf5b
|
4
|
+
data.tar.gz: f8733a00de5c6c24a622235c18ba0dae208f5bac52d50607480e51fd563678c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c484e41aa0305f34d0bf7f82cad60b9b02106ffe80b9371c99e77b199eef9ce52818222368b8b3a3ff73d94dba89b8d7fb815d29c95ca335772946e1e9762849
|
7
|
+
data.tar.gz: '09dfdcc12b9176bc88c31a196893ae9ede6c35c2fd59271ca8fa5b1c29f0807ee82c8416ca5f8b7ff75c6caab71648b6d0e15d0976784cb7d34ff8686332be37'
|
data/.rspec_status
CHANGED
@@ -42,14 +42,15 @@ example_id | status | run_time |
|
|
42
42
|
./spec/describedby_spec.rb[1:1:15] | passed | 2.28 seconds |
|
43
43
|
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
|
44
44
|
./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
|
45
|
-
./spec/
|
46
|
-
./spec/item_spec.rb[1:1:
|
47
|
-
./spec/item_spec.rb[1:1:
|
48
|
-
./spec/item_spec.rb[1:1:
|
49
|
-
./spec/item_spec.rb[1:1:
|
50
|
-
./spec/item_spec.rb[1:1:
|
51
|
-
./spec/item_spec.rb[1:1:
|
52
|
-
./spec/item_spec.rb[1:1:
|
45
|
+
./spec/fsp_harvester_spec.rb[1:3] | passed | 6.87 seconds |
|
46
|
+
./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
|
47
|
+
./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
|
48
|
+
./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
|
49
|
+
./spec/item_spec.rb[1:1:4] | passed | 1.76 seconds |
|
50
|
+
./spec/item_spec.rb[1:1:5] | passed | 2.43 seconds |
|
51
|
+
./spec/item_spec.rb[1:1:6] | passed | 2.23 seconds |
|
52
|
+
./spec/item_spec.rb[1:1:7] | passed | 2.94 seconds |
|
53
|
+
./spec/item_spec.rb[1:1:8] | passed | 0.52517 seconds |
|
53
54
|
./spec/type_spec.rb[1:1:1] | passed | 1.35 seconds |
|
54
55
|
./spec/type_spec.rb[1:1:2] | passed | 1.32 seconds |
|
55
56
|
./spec/type_spec.rb[1:1:3] | passed | 1.54 seconds |
|
data/Gemfile.lock
CHANGED
data/lib/config.conf
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
module HarvesterTools
|
2
|
+
class Error < StandardError
|
3
|
+
end
|
4
|
+
|
5
|
+
class BruteForce
|
6
|
+
def self.begin_brute_force(guid:, metadata: HarvesterTools::MetadataObject.new)
|
7
|
+
type, url = HarvesterTools::Utils.convertToURL(guid: guid)
|
8
|
+
return false unless type
|
9
|
+
|
10
|
+
do_content_negotiation(url: url, metadata: metadata)
|
11
|
+
metadata
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.do_content_negotiation(url:, metadata:)
|
15
|
+
response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_ALL_HEADER)
|
16
|
+
if response
|
17
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata)
|
18
|
+
end
|
19
|
+
response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_STAR_HEADER)
|
20
|
+
if response
|
21
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
|
22
|
+
response = resolve_url_brute(url: response.request.url, metadata: metadata, headers: ACCEPT_ALL_HEADER) # now do content negotiation on the landing page
|
23
|
+
if response
|
24
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.resolve_url_brute(url:, method: :get, nolinkheaders: true, headers:, metadata:)
|
30
|
+
@meta = metadata
|
31
|
+
@meta.guidtype = 'uri' if @meta.guidtype.nil?
|
32
|
+
warn "\n\n BRUTE FETCHING #{url} #{headers}\n\n"
|
33
|
+
response = HarvesterTools::WebUtils.fspfetch(url: url, headers: headers, method: method, meta: @meta)
|
34
|
+
warn "\n\n head #{response.headers.inspect}\n\n" if response
|
35
|
+
|
36
|
+
unless response
|
37
|
+
@meta.add_warning(['001', url, headers])
|
38
|
+
@meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{headers}.\n"
|
39
|
+
@meta.full_response << [url, "No response"]
|
40
|
+
false
|
41
|
+
end
|
42
|
+
|
43
|
+
@meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
|
44
|
+
@meta.full_response << [url, response.body]
|
45
|
+
response
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/web_utils.rb
CHANGED
@@ -18,13 +18,13 @@ module HarvesterTools
|
|
18
18
|
warn "final URL #{response.request.url}"
|
19
19
|
warn "Response code #{response.code}"
|
20
20
|
if response.code == 203
|
21
|
-
meta.
|
21
|
+
meta.add_warning(["002", url, headers])
|
22
22
|
meta.comments << "WARN: Response is non-authoritative (HTTP response code: #{response.code}). Headers may have been manipulated encountered when trying to resolve #{url}\n"
|
23
23
|
end
|
24
24
|
response
|
25
25
|
rescue RestClient::ExceptionWithResponse => e
|
26
26
|
warn "EXCEPTION WITH RESPONSE! #{e.response.code} with response #{e.response}\nfailed response headers: #{e.response.headers}"
|
27
|
-
meta.
|
27
|
+
meta.add_warning(["003", url, headers])
|
28
28
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
29
29
|
if (e.response.code == 500 or e.response.code == 404)
|
30
30
|
return false
|
@@ -34,13 +34,13 @@ module HarvesterTools
|
|
34
34
|
# now we are returning the headers and body that were returned
|
35
35
|
rescue RestClient::Exception => e
|
36
36
|
warn "EXCEPTION WITH NO RESPONSE! #{e}"
|
37
|
-
meta.
|
37
|
+
meta.add_warning(["003", url, headers])
|
38
38
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
39
39
|
false
|
40
40
|
# now we are returning 'False', and we will check that with an \"if\" statement in our main code
|
41
41
|
rescue Exception => e
|
42
42
|
warn "EXCEPTION UNKNOWN! #{e}"
|
43
|
-
meta.
|
43
|
+
meta.add_warning(["003", url, headers])
|
44
44
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
45
45
|
false
|
46
46
|
# now we are returning 'False', and we will check that with an \"if\" statement in our main code
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
@@ -190,6 +190,7 @@ files:
|
|
190
190
|
- lib/fsp_harvester.rb
|
191
191
|
- lib/fsp_harvester/version.rb
|
192
192
|
- lib/harvester.rb
|
193
|
+
- lib/harvester_brute.rb
|
193
194
|
- lib/harvester_utils.rb
|
194
195
|
- lib/metadata_harvester.rb
|
195
196
|
- lib/metadata_object.rb
|