spn2 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +4 -4
- data/lib/spn2/version.rb +1 -1
- data/lib/spn2.rb +34 -41
- data/lib/spn2_errors.rb +20 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d73702950316690d62ce910059788093af0ba31b094c1208eb7c7ef72cbaa06e
|
4
|
+
data.tar.gz: 87ba2a61a9f76d86d46043da74418b2a67f6ac584dac2c3b50b2aa7baa8c7aad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ad8a32dc48bd5dbaf5b24428f6a3522c4af9ce3a5a2297df39fb4dd9b36a627565a844cf841ca50e3c3e0200b569c88d73bd27717468ed8b8e2f5444114d316
|
7
|
+
data.tar.gz: b08014dddc62d499a30e5bc8ba1ce20c96a5f01ad8d0d465759897904f297339a9b79ecff1fdd56561c194072e24ba83542b5d0e39f6d6ab8e57a3b55bae8f6d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -59,7 +59,7 @@ The key "status_ext" contains an explanatory message - see the API [specificatio
|
|
59
59
|
|
60
60
|
Use the job_id.
|
61
61
|
```rb
|
62
|
-
> Spn2.
|
62
|
+
> Spn2.status(job_ids: 'spn2-9c17e047f58f9220a7008d4f18152fee4d111d14')
|
63
63
|
|
64
64
|
=> {"counters"=>{"outlinks"=>1, "embeds"=>2}, "job_id"=>"spn2-9c17e047f58f9220a7008d4f18152fee4d111d14",
|
65
65
|
"original_url"=>"http://example.com/", "resources"=>["http://example.com/", "http://example.com/favicon.ico"],
|
@@ -76,16 +76,16 @@ $ echo "http://example.com/"|tr -d "\n"|shasum
|
|
76
76
|
9c17e047f58f9220a7008d4f18152fee4d111d14 -
|
77
77
|
```
|
78
78
|
|
79
|
-
The status of
|
79
|
+
The status of an array of job_id's can be obtained with:
|
80
80
|
```rb
|
81
|
-
> Spn2.
|
81
|
+
> Spn2.status(job_ids: ['spn2-9c17e047f58f9220a7008d4f18152fee4d111d14', 'spn2-...'])
|
82
82
|
|
83
83
|
=> [.. # an array of status hashes
|
84
84
|
```
|
85
85
|
|
86
86
|
Finally, the status of any outlinks captured by using the save option `capture_outlinks: 1` is available by supplying the parent job_id to:
|
87
87
|
```rb
|
88
|
-
> Spn2.
|
88
|
+
> Spn2.status(job_ids: 'spn2-cce034d987e1d72d8cbf1770bcf99024fe20dddf', outlinks: true)
|
89
89
|
|
90
90
|
=> [.. # an array of outlink job status hashes
|
91
91
|
```
|
data/lib/spn2/version.rb
CHANGED
data/lib/spn2.rb
CHANGED
@@ -5,25 +5,12 @@ require 'json'
|
|
5
5
|
require 'nokogiri'
|
6
6
|
|
7
7
|
require_relative 'curlable'
|
8
|
+
require_relative 'spn2_errors'
|
8
9
|
|
9
10
|
# Design decison to not use a class as only 'state' is in 2 env vars
|
10
11
|
module Spn2
|
11
12
|
extend Curlable
|
12
13
|
|
13
|
-
BAD_AUTH_MSG = 'You need to be logged in to use Save Page Now.'
|
14
|
-
ERROR_CODES = [502].freeze
|
15
|
-
|
16
|
-
class Spn2Error < StandardError; end
|
17
|
-
class Spn2ErrorBadAuth < Spn2Error; end
|
18
|
-
class Spn2ErrorFailedCapture < Spn2Error; end
|
19
|
-
class Spn2ErrorInvalidOption < Spn2Error; end
|
20
|
-
class Spn2ErrorMissingKeys < Spn2Error; end
|
21
|
-
class Spn2ErrorNoOutlinks < Spn2Error; end
|
22
|
-
class Spn2ErrorTooManyRequests < Spn2Error; end
|
23
|
-
class Spn2ErrorUnknownResponse < Spn2Error; end
|
24
|
-
class Spn2ErrorUnknownResponseCode < Spn2Error; end
|
25
|
-
ERROR_CODES.each { |i| Spn2.const_set("Spn2Error#{i}", Class.new(Spn2Error)) }
|
26
|
-
|
27
14
|
ESSENTIAL_STATUS_KEYS = %w[job_id resources status].freeze
|
28
15
|
JOB_ID_REGEXP = /^(spn2-([a-f]|\d){40})$/
|
29
16
|
WEB_ARCHIVE = 'https://web.archive.org'
|
@@ -65,33 +52,30 @@ module Spn2
|
|
65
52
|
end
|
66
53
|
alias capture save
|
67
54
|
|
68
|
-
def
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
raise Spn2ErrorMissingKeys, json.inspect unless (ESSENTIAL_STATUS_KEYS - json.keys).empty?
|
55
|
+
def status(job_ids:, outlinks: false)
|
56
|
+
params = status_params(job_ids: job_ids, outlinks: outlinks)
|
57
|
+
json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: params))
|
58
|
+
return json if json.is_a? Array # must be valid response
|
73
59
|
|
60
|
+
handle_status_errors(job_ids: job_ids, json: json, outlinks: outlinks)
|
74
61
|
json
|
75
62
|
end
|
76
|
-
alias status status_job_id
|
77
63
|
|
78
|
-
|
79
|
-
json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: { job_ids: job_ids }))
|
80
|
-
raise Spn2Error, json.inspect unless json.is_a? Array
|
81
|
-
|
82
|
-
json
|
83
|
-
end
|
84
|
-
alias statuses status_job_ids
|
64
|
+
private
|
85
65
|
|
86
|
-
def
|
87
|
-
|
88
|
-
|
66
|
+
def status_params(job_ids:, outlinks:)
|
67
|
+
return { job_ids: job_ids.join(',') } if job_ids.is_a?(Array)
|
68
|
+
return { job_id_outlinks: job_ids } if outlinks
|
89
69
|
|
90
|
-
|
70
|
+
{ job_id: job_ids } # single job_id
|
91
71
|
end
|
92
|
-
alias status_outlinks status_job_id_outlinks
|
93
72
|
|
94
|
-
|
73
|
+
def handle_status_errors(job_ids:, json:, outlinks:)
|
74
|
+
raise Spn2ErrorBadAuth, json.inspect if json['message']&.== BAD_AUTH_MSG
|
75
|
+
raise Spn2ErrorNoOutlinks, json.inspect if outlinks
|
76
|
+
raise Spn2ErrorMissingKeys, json.inspect unless (ESSENTIAL_STATUS_KEYS - json.keys).empty?
|
77
|
+
raise Spn2Error, json.inspect if job_ids.is_a?(Array)
|
78
|
+
end
|
95
79
|
|
96
80
|
def auth_get(url:)
|
97
81
|
get(url: url, headers: accept_header.merge(auth_header))
|
@@ -115,25 +99,34 @@ module Spn2
|
|
115
99
|
|
116
100
|
def json(html_string)
|
117
101
|
JSON.parse(doc = doc(html_string))
|
118
|
-
rescue JSON::ParserError # an html response
|
102
|
+
rescue JSON::ParserError # an html response
|
119
103
|
parse_error_code_from_page_title(doc.title) if doc.title
|
120
|
-
parse_error_from_page_body(html_string)
|
104
|
+
parse_error_from_page_body(html_string)
|
121
105
|
end
|
122
106
|
|
123
107
|
def parse_error_code_from_page_title(title_string)
|
124
|
-
|
125
|
-
raise
|
126
|
-
|
127
|
-
raise Spn2ErrorUnknownResponseCode
|
108
|
+
raise_code_response_error_if_code_in_string(title_string)
|
109
|
+
raise Spn2ErrorUnknownResponseCode, title_string # code found but doesn't match any known error classes
|
128
110
|
end
|
129
111
|
|
130
112
|
def parse_error_from_page_body(html_string)
|
131
|
-
|
132
|
-
|
113
|
+
h1_tag_text = h1_tag_text(html_string)
|
114
|
+
raise_code_response_error_if_code_in_string h1_tag_text
|
115
|
+
raise Spn2ErrorTooManyRequests if h1_tag_text == TOO_MANY_REQUESTS
|
133
116
|
|
134
117
|
raise Spn2ErrorUnknownResponse, html_string # fall through
|
135
118
|
end
|
136
119
|
|
120
|
+
def h1_tag_text(html_string)
|
121
|
+
doc(html_string).xpath('//h1')&.text || ''
|
122
|
+
end
|
123
|
+
|
124
|
+
def raise_code_response_error_if_code_in_string(string)
|
125
|
+
return unless ERROR_CODES.include? code = string.to_i
|
126
|
+
|
127
|
+
raise Spn2.const_get("Spn2Error#{code}")
|
128
|
+
end
|
129
|
+
|
137
130
|
def options_valid?(opts)
|
138
131
|
opts.keys.all? { |k| (BINARY_OPTS + OTHER_OPTS).include? k.to_s }
|
139
132
|
end
|
data/lib/spn2_errors.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# namespace
|
4
|
+
module Spn2
|
5
|
+
BAD_AUTH_MSG = 'You need to be logged in to use Save Page Now.'
|
6
|
+
ERROR_CODES = [400, 502].freeze
|
7
|
+
TOO_MANY_REQUESTS = 'Too Many Requests'
|
8
|
+
|
9
|
+
class Spn2Error < StandardError; end
|
10
|
+
class Spn2ErrorBadAuth < Spn2Error; end
|
11
|
+
class Spn2ErrorBadParams < Spn2Error; end
|
12
|
+
class Spn2ErrorFailedCapture < Spn2Error; end
|
13
|
+
class Spn2ErrorInvalidOption < Spn2Error; end
|
14
|
+
class Spn2ErrorMissingKeys < Spn2Error; end
|
15
|
+
class Spn2ErrorNoOutlinks < Spn2Error; end
|
16
|
+
class Spn2ErrorTooManyRequests < Spn2Error; end
|
17
|
+
class Spn2ErrorUnknownResponse < Spn2Error; end
|
18
|
+
class Spn2ErrorUnknownResponseCode < Spn2Error; end
|
19
|
+
ERROR_CODES.each { |i| const_set("Spn2Error#{i}", Class.new(Spn2Error)) }
|
20
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spn2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MatzFan
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: curb
|
@@ -166,6 +166,7 @@ files:
|
|
166
166
|
- lib/curlable.rb
|
167
167
|
- lib/spn2.rb
|
168
168
|
- lib/spn2/version.rb
|
169
|
+
- lib/spn2_errors.rb
|
169
170
|
- sig/spn2.rbs
|
170
171
|
homepage: https://gitlab.com/matzfan/spn2
|
171
172
|
licenses:
|