spn2 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e968c75da93882e48ac210e17bb599497971ee1f065035758279529b23c53f1b
4
- data.tar.gz: 9fa1b6f6125d9347d2418254b8e8838572fd8a4efcf0c6bbabc258f3784c097b
3
+ metadata.gz: d73702950316690d62ce910059788093af0ba31b094c1208eb7c7ef72cbaa06e
4
+ data.tar.gz: 87ba2a61a9f76d86d46043da74418b2a67f6ac584dac2c3b50b2aa7baa8c7aad
5
5
  SHA512:
6
- metadata.gz: bacfeda95f8a40e132496cb69078e8229767de20c99615db7da4bb1d5f35a9a4c7e3e688a1fe3d31d6dd9e5f026d0d6fc8e328aae90915de55e6810db675732f
7
- data.tar.gz: e62fe104f074cb9ab5e4a85b89050e8dc0f198613dd9ff62fd56d3c645428e080699e14d57b8b6a655b68ed3be00704ff67ba5e9a58508083fe0f961cdc1ac55
6
+ metadata.gz: 0ad8a32dc48bd5dbaf5b24428f6a3522c4af9ce3a5a2297df39fb4dd9b36a627565a844cf841ca50e3c3e0200b569c88d73bd27717468ed8b8e2f5444114d316
7
+ data.tar.gz: b08014dddc62d499a30e5bc8ba1ce20c96a5f01ad8d0d465759897904f297339a9b79ecff1fdd56561c194072e24ba83542b5d0e39f6d6ab8e57a3b55bae8f6d
data/CHANGELOG.md CHANGED
@@ -11,3 +11,7 @@
11
11
 
12
12
  - Add user_status
13
13
  - Add status calls for multiple job_ids and outlinks
14
+
15
+ ## [0.2.0] - 2022-07-03
16
+
17
+ - Breaking change: Single method 'status' with kwarg :job_ids now used for status of job(s)
data/README.md CHANGED
@@ -59,7 +59,7 @@ The key "status_ext" contains an explanatory message - see the API [specificatio
59
59
 
60
60
  Use the job_id.
61
61
  ```rb
62
- > Spn2.status_job_id(job_id: 'spn2-9c17e047f58f9220a7008d4f18152fee4d111d14')
62
+ > Spn2.status(job_ids: 'spn2-9c17e047f58f9220a7008d4f18152fee4d111d14')
63
63
 
64
64
  => {"counters"=>{"outlinks"=>1, "embeds"=>2}, "job_id"=>"spn2-9c17e047f58f9220a7008d4f18152fee4d111d14",
65
65
  "original_url"=>"http://example.com/", "resources"=>["http://example.com/", "http://example.com/favicon.ico"],
@@ -76,16 +76,16 @@ $ echo "http://example.com/"|tr -d "\n"|shasum
76
76
  9c17e047f58f9220a7008d4f18152fee4d111d14 -
77
77
  ```
78
78
 
79
- The status of a comma-separated list of job_id's can be obtained with:
79
+ The status of an array of job_id's can be obtained with:
80
80
  ```rb
81
- > Spn2.status_job_ids(job_ids: 'spn2-9c17e047f58f9220a7008d4f18152fee4d111d14,spn2-...')
81
+ > Spn2.status(job_ids: ['spn2-9c17e047f58f9220a7008d4f18152fee4d111d14', 'spn2-...'])
82
82
 
83
83
  => [.. # an array of status hashes
84
84
  ```
85
85
 
86
86
  Finally, the status of any outlinks captured by using the save option `capture_outlinks: 1` is available by supplying the parent job_id to:
87
87
  ```rb
88
- > Spn2.status_job_id_outlinks(job_id: 'spn2-cce034d987e1d72d8cbf1770bcf99024fe20dddf')
88
+ > Spn2.status(job_ids: 'spn2-cce034d987e1d72d8cbf1770bcf99024fe20dddf', outlinks: true)
89
89
 
90
90
  => [.. # an array of outlink job status hashes
91
91
  ```
data/lib/spn2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spn2
4
- VERSION = '0.1.2'
4
+ VERSION = '0.2.0'
5
5
  end
data/lib/spn2.rb CHANGED
@@ -5,25 +5,12 @@ require 'json'
5
5
  require 'nokogiri'
6
6
 
7
7
  require_relative 'curlable'
8
+ require_relative 'spn2_errors'
8
9
 
9
10
  # Design decison to not use a class as only 'state' is in 2 env vars
10
11
  module Spn2
11
12
  extend Curlable
12
13
 
13
- BAD_AUTH_MSG = 'You need to be logged in to use Save Page Now.'
14
- ERROR_CODES = [502].freeze
15
-
16
- class Spn2Error < StandardError; end
17
- class Spn2ErrorBadAuth < Spn2Error; end
18
- class Spn2ErrorFailedCapture < Spn2Error; end
19
- class Spn2ErrorInvalidOption < Spn2Error; end
20
- class Spn2ErrorMissingKeys < Spn2Error; end
21
- class Spn2ErrorNoOutlinks < Spn2Error; end
22
- class Spn2ErrorTooManyRequests < Spn2Error; end
23
- class Spn2ErrorUnknownResponse < Spn2Error; end
24
- class Spn2ErrorUnknownResponseCode < Spn2Error; end
25
- ERROR_CODES.each { |i| Spn2.const_set("Spn2Error#{i}", Class.new(Spn2Error)) }
26
-
27
14
  ESSENTIAL_STATUS_KEYS = %w[job_id resources status].freeze
28
15
  JOB_ID_REGEXP = /^(spn2-([a-f]|\d){40})$/
29
16
  WEB_ARCHIVE = 'https://web.archive.org'
@@ -65,33 +52,30 @@ module Spn2
65
52
  end
66
53
  alias capture save
67
54
 
68
- def status_job_id(job_id:)
69
- json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: { job_id: job_id }))
70
- raise Spn2ErrorBadAuth, json.inspect if json['message']&.== BAD_AUTH_MSG
71
-
72
- raise Spn2ErrorMissingKeys, json.inspect unless (ESSENTIAL_STATUS_KEYS - json.keys).empty?
55
+ def status(job_ids:, outlinks: false)
56
+ params = status_params(job_ids: job_ids, outlinks: outlinks)
57
+ json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: params))
58
+ return json if json.is_a? Array # must be valid response
73
59
 
60
+ handle_status_errors(job_ids: job_ids, json: json, outlinks: outlinks)
74
61
  json
75
62
  end
76
- alias status status_job_id
77
63
 
78
- def status_job_ids(job_ids:)
79
- json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: { job_ids: job_ids }))
80
- raise Spn2Error, json.inspect unless json.is_a? Array
81
-
82
- json
83
- end
84
- alias statuses status_job_ids
64
+ private
85
65
 
86
- def status_job_id_outlinks(job_id:)
87
- json = json(auth_post(url: "#{WEB_ARCHIVE}/save/status", params: { job_id_outlinks: job_id }))
88
- raise Spn2ErrorNoOutlinks, json.inspect unless json.is_a? Array
66
+ def status_params(job_ids:, outlinks:)
67
+ return { job_ids: job_ids.join(',') } if job_ids.is_a?(Array)
68
+ return { job_id_outlinks: job_ids } if outlinks
89
69
 
90
- json
70
+ { job_id: job_ids } # single job_id
91
71
  end
92
- alias status_outlinks status_job_id_outlinks
93
72
 
94
- private
73
+ def handle_status_errors(job_ids:, json:, outlinks:)
74
+ raise Spn2ErrorBadAuth, json.inspect if json['message']&.== BAD_AUTH_MSG
75
+ raise Spn2ErrorNoOutlinks, json.inspect if outlinks
76
+ raise Spn2ErrorMissingKeys, json.inspect unless (ESSENTIAL_STATUS_KEYS - json.keys).empty?
77
+ raise Spn2Error, json.inspect if job_ids.is_a?(Array)
78
+ end
95
79
 
96
80
  def auth_get(url:)
97
81
  get(url: url, headers: accept_header.merge(auth_header))
@@ -115,25 +99,34 @@ module Spn2
115
99
 
116
100
  def json(html_string)
117
101
  JSON.parse(doc = doc(html_string))
118
- rescue JSON::ParserError # an html response & therefore an error
102
+ rescue JSON::ParserError # an html response
119
103
  parse_error_code_from_page_title(doc.title) if doc.title
120
- parse_error_from_page_body(html_string) # if no title parse body
104
+ parse_error_from_page_body(html_string)
121
105
  end
122
106
 
123
107
  def parse_error_code_from_page_title(title_string)
124
- code = title_string.to_i
125
- raise Spn2.const_get("Spn2Error#{code}") if ERROR_CODES.include? code
126
-
127
- raise Spn2ErrorUnknownResponseCode
108
+ raise_code_response_error_if_code_in_string(title_string)
109
+ raise Spn2ErrorUnknownResponseCode, title_string # code found but doesn't match any known error classes
128
110
  end
129
111
 
130
112
  def parse_error_from_page_body(html_string)
131
- h1 = doc(html_string).xpath('//h1')
132
- raise Spn2ErrorTooManyRequests if !h1.empty? && h1.text == 'Too Many Requests'
113
+ h1_tag_text = h1_tag_text(html_string)
114
+ raise_code_response_error_if_code_in_string h1_tag_text
115
+ raise Spn2ErrorTooManyRequests if h1_tag_text == TOO_MANY_REQUESTS
133
116
 
134
117
  raise Spn2ErrorUnknownResponse, html_string # fall through
135
118
  end
136
119
 
120
+ def h1_tag_text(html_string)
121
+ doc(html_string).xpath('//h1')&.text || ''
122
+ end
123
+
124
+ def raise_code_response_error_if_code_in_string(string)
125
+ return unless ERROR_CODES.include? code = string.to_i
126
+
127
+ raise Spn2.const_get("Spn2Error#{code}")
128
+ end
129
+
137
130
  def options_valid?(opts)
138
131
  opts.keys.all? { |k| (BINARY_OPTS + OTHER_OPTS).include? k.to_s }
139
132
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ # namespace
4
+ module Spn2
5
+ BAD_AUTH_MSG = 'You need to be logged in to use Save Page Now.'
6
+ ERROR_CODES = [400, 502].freeze
7
+ TOO_MANY_REQUESTS = 'Too Many Requests'
8
+
9
+ class Spn2Error < StandardError; end
10
+ class Spn2ErrorBadAuth < Spn2Error; end
11
+ class Spn2ErrorBadParams < Spn2Error; end
12
+ class Spn2ErrorFailedCapture < Spn2Error; end
13
+ class Spn2ErrorInvalidOption < Spn2Error; end
14
+ class Spn2ErrorMissingKeys < Spn2Error; end
15
+ class Spn2ErrorNoOutlinks < Spn2Error; end
16
+ class Spn2ErrorTooManyRequests < Spn2Error; end
17
+ class Spn2ErrorUnknownResponse < Spn2Error; end
18
+ class Spn2ErrorUnknownResponseCode < Spn2Error; end
19
+ ERROR_CODES.each { |i| const_set("Spn2Error#{i}", Class.new(Spn2Error)) }
20
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spn2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - MatzFan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-07-02 00:00:00.000000000 Z
11
+ date: 2022-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: curb
@@ -166,6 +166,7 @@ files:
166
166
  - lib/curlable.rb
167
167
  - lib/spn2.rb
168
168
  - lib/spn2/version.rb
169
+ - lib/spn2_errors.rb
169
170
  - sig/spn2.rbs
170
171
  homepage: https://gitlab.com/matzfan/spn2
171
172
  licenses: