fsp_harvester 0.1.13 → 0.1.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec_status +9 -8
- data/Gemfile.lock +1 -1
- data/lib/config.conf +1 -1
- data/lib/fsp_harvester/version.rb +1 -1
- data/lib/harvester_brute.rb +48 -0
- data/lib/web_utils.rb +4 -4
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aec11fd57963ffb176ddb88338b9e262027c9a7d39364089ae130fb4b628bf5b
|
4
|
+
data.tar.gz: f8733a00de5c6c24a622235c18ba0dae208f5bac52d50607480e51fd563678c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c484e41aa0305f34d0bf7f82cad60b9b02106ffe80b9371c99e77b199eef9ce52818222368b8b3a3ff73d94dba89b8d7fb815d29c95ca335772946e1e9762849
|
7
|
+
data.tar.gz: '09dfdcc12b9176bc88c31a196893ae9ede6c35c2fd59271ca8fa5b1c29f0807ee82c8416ca5f8b7ff75c6caab71648b6d0e15d0976784cb7d34ff8686332be37'
|
data/.rspec_status
CHANGED
@@ -42,14 +42,15 @@ example_id | status | run_time |
|
|
42
42
|
./spec/describedby_spec.rb[1:1:15] | passed | 2.28 seconds |
|
43
43
|
./spec/fsp_harvester_spec.rb[1:1] | passed | 0.00058 seconds |
|
44
44
|
./spec/fsp_harvester_spec.rb[1:2] | failed | 2.92 seconds |
|
45
|
-
./spec/
|
46
|
-
./spec/item_spec.rb[1:1:
|
47
|
-
./spec/item_spec.rb[1:1:
|
48
|
-
./spec/item_spec.rb[1:1:
|
49
|
-
./spec/item_spec.rb[1:1:
|
50
|
-
./spec/item_spec.rb[1:1:
|
51
|
-
./spec/item_spec.rb[1:1:
|
52
|
-
./spec/item_spec.rb[1:1:
|
45
|
+
./spec/fsp_harvester_spec.rb[1:3] | passed | 6.87 seconds |
|
46
|
+
./spec/item_spec.rb[1:1:1] | passed | 3.19 seconds |
|
47
|
+
./spec/item_spec.rb[1:1:2] | passed | 2.81 seconds |
|
48
|
+
./spec/item_spec.rb[1:1:3] | passed | 1.27 seconds |
|
49
|
+
./spec/item_spec.rb[1:1:4] | passed | 1.76 seconds |
|
50
|
+
./spec/item_spec.rb[1:1:5] | passed | 2.43 seconds |
|
51
|
+
./spec/item_spec.rb[1:1:6] | passed | 2.23 seconds |
|
52
|
+
./spec/item_spec.rb[1:1:7] | passed | 2.94 seconds |
|
53
|
+
./spec/item_spec.rb[1:1:8] | passed | 0.52517 seconds |
|
53
54
|
./spec/type_spec.rb[1:1:1] | passed | 1.35 seconds |
|
54
55
|
./spec/type_spec.rb[1:1:2] | passed | 1.32 seconds |
|
55
56
|
./spec/type_spec.rb[1:1:3] | passed | 1.54 seconds |
|
data/Gemfile.lock
CHANGED
data/lib/config.conf
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
module HarvesterTools
|
2
|
+
class Error < StandardError
|
3
|
+
end
|
4
|
+
|
5
|
+
class BruteForce
|
6
|
+
def self.begin_brute_force(guid:, metadata: HarvesterTools::MetadataObject.new)
|
7
|
+
type, url = HarvesterTools::Utils.convertToURL(guid: guid)
|
8
|
+
return false unless type
|
9
|
+
|
10
|
+
do_content_negotiation(url: url, metadata: metadata)
|
11
|
+
metadata
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.do_content_negotiation(url:, metadata:)
|
15
|
+
response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_ALL_HEADER)
|
16
|
+
if response
|
17
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata)
|
18
|
+
end
|
19
|
+
response = resolve_url_brute(url: url, metadata: metadata, headers: ACCEPT_STAR_HEADER)
|
20
|
+
if response
|
21
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
|
22
|
+
response = resolve_url_brute(url: response.request.url, metadata: metadata, headers: ACCEPT_ALL_HEADER) # now do content negotiation on the landing page
|
23
|
+
if response
|
24
|
+
HarvesterTools::MetadataHarvester.extract_metadata_from_body(response: response, metadata: metadata) # extract from landing page
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.resolve_url_brute(url:, method: :get, nolinkheaders: true, headers:, metadata:)
|
30
|
+
@meta = metadata
|
31
|
+
@meta.guidtype = 'uri' if @meta.guidtype.nil?
|
32
|
+
warn "\n\n BRUTE FETCHING #{url} #{headers}\n\n"
|
33
|
+
response = HarvesterTools::WebUtils.fspfetch(url: url, headers: headers, method: method, meta: @meta)
|
34
|
+
warn "\n\n head #{response.headers.inspect}\n\n" if response
|
35
|
+
|
36
|
+
unless response
|
37
|
+
@meta.add_warning(['001', url, headers])
|
38
|
+
@meta.comments << "WARN: Unable to resolve #{url} using HTTP Accept header #{headers}.\n"
|
39
|
+
@meta.full_response << [url, "No response"]
|
40
|
+
false
|
41
|
+
end
|
42
|
+
|
43
|
+
@meta.comments << "INFO: following redirection using this header led to the following URL: #{@meta.all_uris.last}. Using the output from this URL for the next few tests..."
|
44
|
+
@meta.full_response << [url, response.body]
|
45
|
+
response
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/web_utils.rb
CHANGED
@@ -18,13 +18,13 @@ module HarvesterTools
|
|
18
18
|
warn "final URL #{response.request.url}"
|
19
19
|
warn "Response code #{response.code}"
|
20
20
|
if response.code == 203
|
21
|
-
meta.
|
21
|
+
meta.add_warning(["002", url, headers])
|
22
22
|
meta.comments << "WARN: Response is non-authoritative (HTTP response code: #{response.code}). Headers may have been manipulated encountered when trying to resolve #{url}\n"
|
23
23
|
end
|
24
24
|
response
|
25
25
|
rescue RestClient::ExceptionWithResponse => e
|
26
26
|
warn "EXCEPTION WITH RESPONSE! #{e.response.code} with response #{e.response}\nfailed response headers: #{e.response.headers}"
|
27
|
-
meta.
|
27
|
+
meta.add_warning(["003", url, headers])
|
28
28
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
29
29
|
if (e.response.code == 500 or e.response.code == 404)
|
30
30
|
return false
|
@@ -34,13 +34,13 @@ module HarvesterTools
|
|
34
34
|
# now we are returning the headers and body that were returned
|
35
35
|
rescue RestClient::Exception => e
|
36
36
|
warn "EXCEPTION WITH NO RESPONSE! #{e}"
|
37
|
-
meta.
|
37
|
+
meta.add_warning(["003", url, headers])
|
38
38
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
39
39
|
false
|
40
40
|
# now we are returning 'False', and we will check that with an \"if\" statement in our main code
|
41
41
|
rescue Exception => e
|
42
42
|
warn "EXCEPTION UNKNOWN! #{e}"
|
43
|
-
meta.
|
43
|
+
meta.add_warning(["003", url, headers])
|
44
44
|
meta.comments << "WARN: HTTP error #{e} encountered when trying to resolve #{url}\n"
|
45
45
|
false
|
46
46
|
# now we are returning 'False', and we will check that with an \"if\" statement in our main code
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fsp_harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Wilkinson
|
@@ -190,6 +190,7 @@ files:
|
|
190
190
|
- lib/fsp_harvester.rb
|
191
191
|
- lib/fsp_harvester/version.rb
|
192
192
|
- lib/harvester.rb
|
193
|
+
- lib/harvester_brute.rb
|
193
194
|
- lib/harvester_utils.rb
|
194
195
|
- lib/metadata_harvester.rb
|
195
196
|
- lib/metadata_object.rb
|