spn2 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +4 -4
- data/lib/spn2/version.rb +1 -1
- data/lib/spn2.rb +34 -41
- data/lib/spn2_errors.rb +20 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d73702950316690d62ce910059788093af0ba31b094c1208eb7c7ef72cbaa06e
|
4
|
+
data.tar.gz: 87ba2a61a9f76d86d46043da74418b2a67f6ac584dac2c3b50b2aa7baa8c7aad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ad8a32dc48bd5dbaf5b24428f6a3522c4af9ce3a5a2297df39fb4dd9b36a627565a844cf841ca50e3c3e0200b569c88d73bd27717468ed8b8e2f5444114d316
|
7
|
+
data.tar.gz: b08014dddc62d499a30e5bc8ba1ce20c96a5f01ad8d0d465759897904f297339a9b79ecff1fdd56561c194072e24ba83542b5d0e39f6d6ab8e57a3b55bae8f6d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -59,7 +59,7 @@ The key "status_ext" contains an explanatory message - see the API [specificatio
|
|
59
59
|
|
60
60
|
Use the job_id.
|
61
61
|
```rb
|
62
|
-
> Spn2.
|
62
|
+
> Spn2.status(job_ids: 'spn2-9c17e047f58f9220a7008d4f18152fee4d111d14')
|
63
63
|
|
64
64
|
=> {"counters"=>{"outlinks"=>1, "embeds"=>2}, "job_id"=>"spn2-9c17e047f58f9220a7008d4f18152fee4d111d14",
|
65
65
|
"original_url"=>"http://example.com/", "resources"=>["http://example.com/", "http://example.com/favicon.ico"],
|
@@ -76,16 +76,16 @@ $ echo "http://example.com/"|tr -d "\n"|shasum
|
|
76
76
|
9c17e047f58f9220a7008d4f18152fee4d111d14 -
|
77
77
|
```
|
78
78
|
|
79
|
-
The status of
|
79
|
+
The status of an array of job_id's can be obtained with:
|
80
80
|
```rb
|
81
|
-
> Spn2.
|
81
|
+
> Spn2.status(job_ids: ['spn2-9c17e047f58f9220a7008d4f18152fee4d111d14', 'spn2-...'])
|
82
82
|
|
83
83
|
=> [.. # an array of status hashes
|
84
84
|
```
|
85
85
|
|
86
86
|
Finally, the status of any outlinks captured by using the save option `capture_outlinks: 1` is available by supplying the parent job_id to:
|
87
87
|
```rb
|
88
|
-
> Spn2.
|
88
|
+
> Spn2.status(job_ids: 'spn2-cce034d987e1d72d8cbf1770bcf99024fe20dddf', outlinks: true)
|
89
89
|
|
90
90
|
=> [.. # an array of outlink job status hashes
|
91
91
|
```
|
data/lib/spn2/version.rb
CHANGED
data/lib/spn2.rb
CHANGED
@@ -5,25 +5,12 @@ require 'json'
|
|
5
5
|
require 'nokogiri'
|
6
6
|
|
7
7
|
require_relative 'curlable'
|
8
|
+
require_relative 'spn2_errors'
|
8
9
|
|
9
10
|
# Design decison to not use a class as only 'state' is in 2 env vars
|
10
11
|
module Spn2
|
11
12
|
extend Curlable
|
12
13
|
|
13
|
-
BAD_AUTH_MSG = 'You need to be logged in to use Save Page Now.'
|
14
|
-
ERROR_CODES = [502].freeze
|
15
|
-
|
16
|
-
class Spn2Error < StandardError; end
|
17
|
-
class Spn2ErrorBadAuth < Spn2Error; end
|
18
|
-
class Spn2ErrorFailedCapture < Spn2Error; end
|
19
|
-
class Spn2ErrorInvalidOption < Spn2Error; end
|
20
|
-
class Spn2ErrorMissingKeys < Spn2Error; end
|
21
|
-
class Spn2ErrorNoOutlinks < Spn2Error; end
|
22
|
-
class Spn2ErrorTooManyRequests < Spn2Error; end
|
23
|
-
class Spn2ErrorUnknownResponse < Spn2Error; end
|
24
|
-
class Spn2ErrorUnknownResponseCode < Spn2Error; end
|
25
|
-
ERROR_CODES.each { |i| Spn2.const_set("Spn2Error#{i}", Class.new(Spn2Error)) }
|
26
|
-
|
27
14
|
ESSENTIAL_STATUS_KEYS = %w[job_id resources status].freeze
|
28
15
|
JOB_ID_REGEXP = /^(spn2-([a-f]|\d){40})$/
|
29
16
|
WEB_ARCHIVE = 'https://web.archive.org'
|
@@ -65,33 +52,30 @@ module Spn2
|
|
65
52
|
end
|
66
53
|
alias capture save
|
67
54
|
|
68
|
-
def
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
raise Spn2ErrorMissingKeys, json.inspect unless (ESSENTIAL_STATUS_KEYS - json.keys).empty?
|
55
|
+
def status(job_ids:, outlinks: false)
|
56
|
+
params = status_params(job_ids: job_ids, outlinks: outlinks)
|
57
|
+
json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: params))
|
58
|
+
return json if json.is_a? Array # must be valid response
|
73
59
|
|
60
|
+
handle_status_errors(job_ids: job_ids, json: json, outlinks: outlinks)
|
74
61
|
json
|
75
62
|
end
|
76
|
-
alias status status_job_id
|
77
63
|
|
78
|
-
|
79
|
-
json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: { job_ids: job_ids }))
|
80
|
-
raise Spn2Error, json.inspect unless json.is_a? Array
|
81
|
-
|
82
|
-
json
|
83
|
-
end
|
84
|
-
alias statuses status_job_ids
|
64
|
+
private
|
85
65
|
|
86
|
-
def
|
87
|
-
|
88
|
-
|
66
|
+
def status_params(job_ids:, outlinks:)
|
67
|
+
return { job_ids: job_ids.join(',') } if job_ids.is_a?(Array)
|
68
|
+
return { job_id_outlinks: job_ids } if outlinks
|
89
69
|
|
90
|
-
|
70
|
+
{ job_id: job_ids } # single job_id
|
91
71
|
end
|
92
|
-
alias status_outlinks status_job_id_outlinks
|
93
72
|
|
94
|
-
|
73
|
+
def handle_status_errors(job_ids:, json:, outlinks:)
|
74
|
+
raise Spn2ErrorBadAuth, json.inspect if json['message']&.== BAD_AUTH_MSG
|
75
|
+
raise Spn2ErrorNoOutlinks, json.inspect if outlinks
|
76
|
+
raise Spn2ErrorMissingKeys, json.inspect unless (ESSENTIAL_STATUS_KEYS - json.keys).empty?
|
77
|
+
raise Spn2Error, json.inspect if job_ids.is_a?(Array)
|
78
|
+
end
|
95
79
|
|
96
80
|
def auth_get(url:)
|
97
81
|
get(url: url, headers: accept_header.merge(auth_header))
|
@@ -115,25 +99,34 @@ module Spn2
|
|
115
99
|
|
116
100
|
def json(html_string)
|
117
101
|
JSON.parse(doc = doc(html_string))
|
118
|
-
rescue JSON::ParserError # an html response
|
102
|
+
rescue JSON::ParserError # an html response
|
119
103
|
parse_error_code_from_page_title(doc.title) if doc.title
|
120
|
-
parse_error_from_page_body(html_string)
|
104
|
+
parse_error_from_page_body(html_string)
|
121
105
|
end
|
122
106
|
|
123
107
|
def parse_error_code_from_page_title(title_string)
|
124
|
-
|
125
|
-
raise
|
126
|
-
|
127
|
-
raise Spn2ErrorUnknownResponseCode
|
108
|
+
raise_code_response_error_if_code_in_string(title_string)
|
109
|
+
raise Spn2ErrorUnknownResponseCode, title_string # code found but doesn't match any known error classes
|
128
110
|
end
|
129
111
|
|
130
112
|
def parse_error_from_page_body(html_string)
|
131
|
-
|
132
|
-
|
113
|
+
h1_tag_text = h1_tag_text(html_string)
|
114
|
+
raise_code_response_error_if_code_in_string h1_tag_text
|
115
|
+
raise Spn2ErrorTooManyRequests if h1_tag_text == TOO_MANY_REQUESTS
|
133
116
|
|
134
117
|
raise Spn2ErrorUnknownResponse, html_string # fall through
|
135
118
|
end
|
136
119
|
|
120
|
+
def h1_tag_text(html_string)
|
121
|
+
doc(html_string).xpath('//h1')&.text || ''
|
122
|
+
end
|
123
|
+
|
124
|
+
def raise_code_response_error_if_code_in_string(string)
|
125
|
+
return unless ERROR_CODES.include? code = string.to_i
|
126
|
+
|
127
|
+
raise Spn2.const_get("Spn2Error#{code}")
|
128
|
+
end
|
129
|
+
|
137
130
|
def options_valid?(opts)
|
138
131
|
opts.keys.all? { |k| (BINARY_OPTS + OTHER_OPTS).include? k.to_s }
|
139
132
|
end
|
data/lib/spn2_errors.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# namespace
|
4
|
+
module Spn2
|
5
|
+
BAD_AUTH_MSG = 'You need to be logged in to use Save Page Now.'
|
6
|
+
ERROR_CODES = [400, 502].freeze
|
7
|
+
TOO_MANY_REQUESTS = 'Too Many Requests'
|
8
|
+
|
9
|
+
class Spn2Error < StandardError; end
|
10
|
+
class Spn2ErrorBadAuth < Spn2Error; end
|
11
|
+
class Spn2ErrorBadParams < Spn2Error; end
|
12
|
+
class Spn2ErrorFailedCapture < Spn2Error; end
|
13
|
+
class Spn2ErrorInvalidOption < Spn2Error; end
|
14
|
+
class Spn2ErrorMissingKeys < Spn2Error; end
|
15
|
+
class Spn2ErrorNoOutlinks < Spn2Error; end
|
16
|
+
class Spn2ErrorTooManyRequests < Spn2Error; end
|
17
|
+
class Spn2ErrorUnknownResponse < Spn2Error; end
|
18
|
+
class Spn2ErrorUnknownResponseCode < Spn2Error; end
|
19
|
+
ERROR_CODES.each { |i| const_set("Spn2Error#{i}", Class.new(Spn2Error)) }
|
20
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spn2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MatzFan
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: curb
|
@@ -166,6 +166,7 @@ files:
|
|
166
166
|
- lib/curlable.rb
|
167
167
|
- lib/spn2.rb
|
168
168
|
- lib/spn2/version.rb
|
169
|
+
- lib/spn2_errors.rb
|
169
170
|
- sig/spn2.rbs
|
170
171
|
homepage: https://gitlab.com/matzfan/spn2
|
171
172
|
licenses:
|