spn2 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +20 -4
- data/lib/curlable.rb +1 -1
- data/lib/spn2/version.rb +1 -1
- data/lib/spn2.rb +82 -36
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 744fc63b21023c96ff40a72561d80f8e277be132b369c10b7196f273178177d8
|
4
|
+
data.tar.gz: 26c923d7ac606c16c0f56e98cff3497878771899a7fc23947c57d864316de4a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca70afc978bd3766ebdc4234575ea1ca5e19202757c353bd917b6b822293764fbde204ad3e6753188eaab0c2123abf2b71ae8fee6c827df41fb836662f90f788
|
7
|
+
data.tar.gz: 23a245ea4c896b034da511fa7dab95d81fda3cd8cec778c1e0be27eddf4f49e46452efb7e10feeb59af0b7ad02ee95fc352f2cbc34c8e79f245c5a8efd283b52
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -21,20 +21,30 @@ For the Spn2 namespace do:
|
|
21
21
|
```rb
|
22
22
|
require 'spn2'
|
23
23
|
```
|
24
|
-
|
25
24
|
### Authentication
|
26
25
|
|
27
26
|
The API requires authentication, so you will need an account at [archive.org](https://archive.org). There are two methods of authentication; cookies and API key. Presently only the latter is implemented. API keys may be generated at https://archive.org/account/s3.php. Ensure your access key and secret key are set in environment variables SPN2_ACCESS_KEY and SPN2_SECRET_KEY respectively.
|
28
27
|
|
28
|
+
```rb
|
29
|
+
> Spn2.access_key
|
30
|
+
=> <your access key>
|
31
|
+
> Spn2.secret_key
|
32
|
+
=> <your secret key>
|
33
|
+
```
|
29
34
|
### Save a page
|
30
35
|
|
31
|
-
Save a url in the Wayback Machine. This method returns the job_id in a hash.
|
36
|
+
Save (capture) a url in the Wayback Machine. This method returns the job_id in a hash.
|
32
37
|
```rb
|
33
38
|
> Spn2.save(url: 'example.com') # returns a job_id
|
34
39
|
|
35
|
-
=> {job_id: 'spn2-9c17e047f58f9220a7008d4f18152fee4d111d14'}
|
40
|
+
=> {job_id: 'spn2-9c17e047f58f9220a7008d4f18152fee4d111d14'} # json may include "url" and "message" keys too
|
36
41
|
```
|
42
|
+
Various options are available, as detailed in the [specification](https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA/edit) in the section "Capture request". These may be passed like so:
|
43
|
+
```rb
|
44
|
+
> Spn2.save(url: 'example.com', opts: { capture_all: 1, capture_outlinks: 1 })
|
37
45
|
|
46
|
+
=> {url: 'example.com', job_id: 'spn2-9c17e047f58f9220a7008d4f18152fee4d111d14'}
|
47
|
+
```
|
38
48
|
### View the status of a job
|
39
49
|
|
40
50
|
Use the job_id.
|
@@ -53,9 +63,15 @@ Use the job_id.
|
|
53
63
|
The status of Wayback Machine itself is available.
|
54
64
|
```rb
|
55
65
|
> Spn2.system_status
|
56
|
-
=> {"status"=>"ok"}
|
66
|
+
=> {"status"=>"ok"} # if not "ok" captures may be delayed
|
57
67
|
```
|
68
|
+
### Error handling
|
58
69
|
|
70
|
+
To fascilitate graceful error handling, a full list of all error classes is provided by:
|
71
|
+
```rb
|
72
|
+
> Spn2.error_classes
|
73
|
+
=> [Spn2::Spn2Error, Spn2::Spn2ErrorBadAuth,.. ..]
|
74
|
+
```
|
59
75
|
## Development
|
60
76
|
|
61
77
|
~~After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.~~
|
data/lib/curlable.rb
CHANGED
@@ -12,7 +12,7 @@ module Curlable
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def post(url:, headers: {}, params: {})
|
15
|
-
Curl::Easy.http_post("#{url}?#{Curl.postalize(params)}", params) do |http|
|
15
|
+
Curl::Easy.http_post("#{url}?#{Curl.postalize(params)}", Curl.postalize(params)) do |http|
|
16
16
|
http.follow_location = true
|
17
17
|
headers.each { |k, v| http.headers[k] = v }
|
18
18
|
end.body_str
|
data/lib/spn2/version.rb
CHANGED
data/lib/spn2.rb
CHANGED
@@ -5,57 +5,103 @@ require 'nokogiri'
|
|
5
5
|
|
6
6
|
require_relative 'curlable'
|
7
7
|
|
8
|
-
#
|
8
|
+
# Design decison to not use a class as only 'state' is in 2 env vars
|
9
9
|
module Spn2
|
10
|
-
extend Curlable # for
|
10
|
+
extend Curlable # for system_status
|
11
11
|
include Curlable
|
12
12
|
|
13
|
-
|
13
|
+
ERROR_CODES = [502].freeze
|
14
14
|
|
15
|
-
|
15
|
+
class Spn2Error < StandardError; end
|
16
|
+
class Spn2ErrorBadAuth < Spn2Error; end
|
17
|
+
class Spn2ErrorBadAuth < Spn2Error; end
|
18
|
+
class Spn2ErrorBadResponse < Spn2Error; end
|
19
|
+
class Spn2ErrorInvalidOption < Spn2Error; end
|
20
|
+
class Spn2ErrorUnknownResponseCode < Spn2Error; end
|
21
|
+
ERROR_CODES.each { |i| Spn2.const_set("Spn2Error#{i}", Class.new(Spn2Error)) }
|
22
|
+
|
23
|
+
BAD_AUTH_MSG = 'You need to be logged in to use Save Page Now.'
|
24
|
+
ESSENTIAL_STATUS_KEYS = %w[job_id resources status].freeze
|
25
|
+
JOB_ID_REGEXP = /^(spn2-([a-f]|\d){40})$/
|
16
26
|
WEB_ARCHIVE = 'https://web.archive.org'
|
17
27
|
|
18
|
-
|
19
|
-
|
20
|
-
|
28
|
+
BINARY_OPTS = %w[capture_all capture_outlinks capture_screenshot delay_wb_availabilty force_get skip_first_archive
|
29
|
+
outlinks_availability email_result].freeze
|
30
|
+
OTHER_OPTS = %w[if_not_archived_within js_behavior_timeout capture_cookie target_username target_password].freeze
|
21
31
|
|
22
|
-
|
23
|
-
|
24
|
-
|
32
|
+
class << self
|
33
|
+
def error_classes
|
34
|
+
Spn2.constants.map { |e| Spn2.const_get(e) }.select { |e| e.is_a?(Class) && e < Exception }
|
35
|
+
end
|
25
36
|
|
26
|
-
|
27
|
-
|
28
|
-
|
37
|
+
def access_key
|
38
|
+
ENV.fetch('SPN2_ACCESS_KEY', nil)
|
39
|
+
end
|
29
40
|
|
30
|
-
|
31
|
-
|
32
|
-
|
41
|
+
def secret_key
|
42
|
+
ENV.fetch('SPN2_SECRET_KEY', nil)
|
43
|
+
end
|
33
44
|
|
34
|
-
|
35
|
-
|
36
|
-
|
45
|
+
def system_status
|
46
|
+
json get(url: "#{WEB_ARCHIVE}/save/status/system") # no auth
|
47
|
+
end
|
37
48
|
|
38
|
-
|
39
|
-
|
40
|
-
end
|
49
|
+
def save(url:, opts: {})
|
50
|
+
raise Spn2ErrorInvalidOption, "One or more invalid options: #{opts}" unless options_valid?(opts)
|
41
51
|
|
42
|
-
|
43
|
-
|
44
|
-
end
|
52
|
+
hash = json(auth_post(url: "#{WEB_ARCHIVE}/save/#{url}", params: { url: url }.merge(opts)))
|
53
|
+
raise Spn2ErrorBadAuth, hash.inspect if hash['message']&.== BAD_AUTH_MSG
|
45
54
|
|
46
|
-
|
47
|
-
{ 'Accept' => 'application/json' }
|
48
|
-
end
|
55
|
+
raise Spn2ErrorBadResponse, "Bad response: #{hash.inspect}" unless hash['job_id']
|
49
56
|
|
50
|
-
|
51
|
-
|
52
|
-
|
57
|
+
hash
|
58
|
+
end
|
59
|
+
alias capture save
|
53
60
|
|
54
|
-
|
55
|
-
|
56
|
-
|
61
|
+
def status(job_id:)
|
62
|
+
hash = json(auth_get(url: "#{WEB_ARCHIVE}/save/status/#{job_id}"))
|
63
|
+
raise Spn2ErrorBadAuth, hash.inspect if hash['message']&.== BAD_AUTH_MSG
|
64
|
+
|
65
|
+
raise Spn2ErrorBadResponse, "Bad response: #{hash.inspect}" unless (ESSENTIAL_STATUS_KEYS - hash.keys).empty?
|
66
|
+
|
67
|
+
hash
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def auth_get(url:)
|
73
|
+
get(url: url, headers: accept_header.merge(auth_header))
|
74
|
+
end
|
75
|
+
|
76
|
+
def auth_post(url:, params: {})
|
77
|
+
post(url: url, headers: accept_header.merge(auth_header), params: params)
|
78
|
+
end
|
79
|
+
|
80
|
+
def accept_header
|
81
|
+
{ Accept: 'application/json' }
|
82
|
+
end
|
83
|
+
|
84
|
+
def auth_header
|
85
|
+
{ Authorization: "LOW #{Spn2.access_key}:#{Spn2.secret_key}" }
|
86
|
+
end
|
87
|
+
|
88
|
+
def json(html_string)
|
89
|
+
JSON.parse(doc = Nokogiri::HTML(html_string))
|
90
|
+
rescue JSON::ParserError # an html response
|
91
|
+
raise Spn2ErrorBadResponse, "No title in: #{html_string}" unless (title = doc.title)
|
92
|
+
|
93
|
+
parse_error_code_from_page_title(title)
|
94
|
+
end
|
95
|
+
|
96
|
+
def parse_error_code_from_page_title(string)
|
97
|
+
code = string.to_i
|
98
|
+
raise Spn2.const_get("Spn2Error#{code}") if ERROR_CODES.include? code
|
99
|
+
|
100
|
+
raise Spn2ErrorUnknownResponseCode, string
|
101
|
+
end
|
57
102
|
|
58
|
-
|
59
|
-
|
103
|
+
def options_valid?(opts)
|
104
|
+
opts.keys.all? { |k| (BINARY_OPTS + OTHER_OPTS).include? k.to_s }
|
105
|
+
end
|
60
106
|
end
|
61
107
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spn2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MatzFan
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: curb
|
@@ -177,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
177
|
- !ruby/object:Gem::Version
|
178
178
|
version: '0'
|
179
179
|
requirements: []
|
180
|
-
rubygems_version: 3.3.
|
180
|
+
rubygems_version: 3.3.17
|
181
181
|
signing_key:
|
182
182
|
specification_version: 4
|
183
183
|
summary: Gem for the Save Page Now API of the Wayback Machine
|