archive_today 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile +8 -5
- data/Gemfile.lock +77 -5
- data/README.md +36 -3
- data/archive_today.gemspec +6 -3
- data/lib/archive_today.rb +5 -2
- data/lib/archive_today/archiver.rb +44 -18
- data/lib/archive_today/version.rb +1 -1
- metadata +59 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d37b8ae11abd0b7a09160e3dc7fa47b7e395a9e9b0ee56c6a0c0f6d686e06dd4
|
4
|
+
data.tar.gz: 88d084d91aeaefb0033e105ee23348f55335acbf92dc8bf4de07f9efcb894a1d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16a9e59b313e65a34d06657ee3a450ce8fe6e03c8f1f1ceb436db7ebce3fe5fd85b6a30f47818b49c2872bbe6cf641456d14d5d6e6e5323e1d908055abde6ca3
|
7
|
+
data.tar.gz: a484ae8056b1e643b0e2852ac25d79ad8e97597b8920cf9fb43947ddf9981fb94fbc4e773e7370c26d84bbf7543ffc31e92841e9aaf08adf5e5f5d7d75230a0e
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
source 'https://rubygems.org'
|
2
2
|
|
3
|
+
group :development, :test do
|
4
|
+
gem 'amazing_print'
|
5
|
+
gem 'httplog'
|
6
|
+
gem 'irbtools'
|
7
|
+
gem 'pry'
|
8
|
+
gem 'rake'
|
9
|
+
end
|
10
|
+
|
3
11
|
# Specify your gem's dependencies in archive_today.gemspec
|
4
12
|
gemspec
|
5
|
-
|
6
|
-
gem 'faraday', '~> 1.0'
|
7
|
-
gem 'faraday_middleware', '~> 1.0'
|
8
|
-
gem 'nokogiri', '~> 1.10'
|
9
|
-
gem 'rake', '~> 12.0'
|
data/Gemfile.lock
CHANGED
@@ -1,30 +1,102 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
archive_today (0.
|
4
|
+
archive_today (0.2.0)
|
5
|
+
faraday (~> 1.0)
|
6
|
+
faraday_middleware (~> 1.0)
|
7
|
+
nokogiri (~> 1.10)
|
8
|
+
rake (~> 12.0)
|
5
9
|
|
6
10
|
GEM
|
7
11
|
remote: https://rubygems.org/
|
8
12
|
specs:
|
13
|
+
alias (0.2.3)
|
14
|
+
amazing_print (1.2.1)
|
15
|
+
binding.repl (3.0.0)
|
16
|
+
boson (1.3.0)
|
17
|
+
boson-more (0.3.1)
|
18
|
+
boson (>= 1.3.0)
|
19
|
+
cd (1.0.1)
|
20
|
+
clipboard (1.0.6)
|
21
|
+
coderay (1.1.2)
|
22
|
+
debugging (1.1.1)
|
23
|
+
binding.repl (~> 3.0)
|
24
|
+
paint (>= 0.9, < 3.0)
|
25
|
+
every_day_irb (2.1.0)
|
26
|
+
cd (~> 1.0)
|
27
|
+
fancy_irb (1.2.1)
|
28
|
+
paint (>= 0.9, < 3.0)
|
29
|
+
unicode-display_width (~> 1.1)
|
9
30
|
faraday (1.0.1)
|
10
31
|
multipart-post (>= 1.2, < 3)
|
11
32
|
faraday_middleware (1.0.0)
|
12
33
|
faraday (~> 1.0)
|
34
|
+
ffi (1.13.1)
|
35
|
+
g (1.7.2)
|
36
|
+
hirb (0.7.3)
|
37
|
+
httplog (1.4.2)
|
38
|
+
rack (>= 1.0)
|
39
|
+
rainbow (>= 2.0.0)
|
40
|
+
interactive_editor (0.0.11)
|
41
|
+
spoon (>= 0.0.1)
|
42
|
+
irbtools (1.7.1)
|
43
|
+
alias (~> 0.2.3)
|
44
|
+
binding.repl (~> 3.0)
|
45
|
+
boson (~> 1.3.0)
|
46
|
+
boson-more (~> 0.3.0)
|
47
|
+
clipboard (~> 1.0.5)
|
48
|
+
coderay (~> 1.1.0)
|
49
|
+
debugging (~> 1.0)
|
50
|
+
every_day_irb (>= 1.7.1)
|
51
|
+
fancy_irb (>= 0.7.3)
|
52
|
+
g (>= 1.7.2)
|
53
|
+
hirb (~> 0.7, >= 0.7.3)
|
54
|
+
interactive_editor (>= 0.0.10)
|
55
|
+
method_locator (>= 0.0.4)
|
56
|
+
method_source (>= 0.8.2)
|
57
|
+
methodfinder (~> 2.0)
|
58
|
+
ori (~> 0.1.0)
|
59
|
+
os (~> 0.9)
|
60
|
+
paint (>= 0.8.7)
|
61
|
+
ruby_engine (~> 1.0)
|
62
|
+
ruby_info (~> 1.0)
|
63
|
+
ruby_version (~> 1.0)
|
64
|
+
wirb (>= 1.0.3)
|
65
|
+
method_locator (0.0.4)
|
66
|
+
method_source (1.0.0)
|
67
|
+
methodfinder (2.2.1)
|
13
68
|
mini_portile2 (2.4.0)
|
14
69
|
multipart-post (2.1.1)
|
15
70
|
nokogiri (1.10.10)
|
16
71
|
mini_portile2 (~> 2.4.0)
|
72
|
+
ori (0.1.0)
|
73
|
+
os (0.9.6)
|
74
|
+
paint (2.2.0)
|
75
|
+
pry (0.13.1)
|
76
|
+
coderay (~> 1.1)
|
77
|
+
method_source (~> 1.0)
|
78
|
+
rack (2.2.2)
|
79
|
+
rainbow (3.0.0)
|
17
80
|
rake (12.3.2)
|
81
|
+
ruby_engine (1.0.1)
|
82
|
+
ruby_info (1.0.1)
|
83
|
+
ruby_version (1.0.2)
|
84
|
+
spoon (0.0.6)
|
85
|
+
ffi
|
86
|
+
unicode-display_width (1.7.0)
|
87
|
+
wirb (2.2.1)
|
88
|
+
paint (>= 0.9, < 3.0)
|
18
89
|
|
19
90
|
PLATFORMS
|
20
91
|
ruby
|
21
92
|
|
22
93
|
DEPENDENCIES
|
94
|
+
amazing_print
|
23
95
|
archive_today!
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
rake
|
96
|
+
httplog
|
97
|
+
irbtools
|
98
|
+
pry
|
99
|
+
rake
|
28
100
|
|
29
101
|
BUNDLED WITH
|
30
102
|
2.1.4
|
data/README.md
CHANGED
@@ -20,18 +20,51 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
+
### Class Method
|
24
|
+
|
25
|
+
`ArchiveToday#capture`
|
26
|
+
|
27
|
+
Returns a Hash with keys `:url` and `:screenshot_url`. Note that if the page is in the process of being archived, the screenshot is not generated yet, so `nil` is returned for the screenshot URL.
|
28
|
+
|
29
|
+
#### Args
|
30
|
+
|
31
|
+
- `url` (required) - the target URL for archival
|
32
|
+
- `debug` (optional) - when set to true, this will log HTTP requests and responses
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
require 'archive_today'
|
36
|
+
|
37
|
+
ArchiveToday.capture(url: 'https://example.com')
|
38
|
+
|
39
|
+
# => { url: 'https://archive.is/a1b2c3, screenshot_url: 'https://archive.is/[...].jpg' }
|
40
|
+
```
|
41
|
+
|
42
|
+
### Instance Method
|
43
|
+
|
44
|
+
`ArchiveToday::Archiver`
|
45
|
+
|
46
|
+
This class exposes the same `#capture` method, but you can also query the instance for the cached URLs once the capture response is received.
|
47
|
+
|
48
|
+
#### Args
|
49
|
+
|
50
|
+
- `url` (required) - the target URL for archival
|
51
|
+
- `debug` (optional) - when set to true, this will log HTTP requests and responses
|
52
|
+
|
53
|
+
|
23
54
|
```ruby
|
24
55
|
require 'archive_today'
|
25
56
|
|
26
|
-
ArchiveToday.
|
57
|
+
a = ArchiveToday::Archiver.new(url: 'https://example.com')
|
58
|
+
a.capture
|
59
|
+
|
60
|
+
puts a.screenshot_url
|
27
61
|
|
28
|
-
# => 'https://archive.is/
|
62
|
+
# => 'https://archive.is/[...].jpg
|
29
63
|
```
|
30
64
|
|
31
65
|
## Roadmap
|
32
66
|
|
33
67
|
- proxies
|
34
|
-
- optionally return URL of captured screenshot
|
35
68
|
|
36
69
|
## Development
|
37
70
|
|
data/archive_today.gemspec
CHANGED
@@ -12,12 +12,15 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.license = "MIT"
|
13
13
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
14
|
|
15
|
+
spec.add_dependency 'faraday', '~> 1.0'
|
16
|
+
spec.add_dependency 'faraday_middleware', '~> 1.0'
|
17
|
+
spec.add_dependency 'nokogiri', '~> 1.10'
|
18
|
+
spec.add_dependency 'rake', '~> 12.0'
|
19
|
+
|
15
20
|
spec.metadata["homepage_uri"] = spec.homepage
|
16
21
|
spec.metadata["source_code_uri"] = spec.homepage
|
17
|
-
spec.metadata["changelog_uri"] = "#{spec.homepage}/CHANGELOG.md"
|
22
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/master/CHANGELOG.md"
|
18
23
|
|
19
|
-
# Specify which files should be added to the gem when it is released.
|
20
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
21
24
|
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
22
25
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
23
26
|
end
|
data/lib/archive_today.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
require 'faraday_middleware'
|
3
|
+
require 'nokogiri'
|
1
4
|
require 'archive_today/version'
|
2
5
|
require 'archive_today/archiver'
|
3
6
|
|
@@ -5,8 +8,8 @@ module ArchiveToday
|
|
5
8
|
class Error < StandardError; end
|
6
9
|
|
7
10
|
class << self
|
8
|
-
def
|
9
|
-
Archiver.new(url: url, debug: debug).
|
11
|
+
def capture(url:, debug: false)
|
12
|
+
Archiver.new(url: url, debug: debug).capture
|
10
13
|
end
|
11
14
|
end
|
12
15
|
end
|
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'faraday'
|
2
|
-
require 'faraday_middleware'
|
3
|
-
require 'nokogiri'
|
4
1
|
require_relative 'version'
|
5
2
|
|
6
3
|
module ArchiveToday
|
@@ -8,41 +5,69 @@ module ArchiveToday
|
|
8
5
|
BASE_URL = 'https://archive.today/'.freeze
|
9
6
|
DEFAULT_USER_AGENT = "archive_today #{ArchiveToday::VERSION}".freeze
|
10
7
|
|
11
|
-
|
8
|
+
attr_accessor :response
|
9
|
+
attr_reader :debug, :target_url, :user_agent
|
12
10
|
|
13
11
|
def initialize(url:, user_agent: DEFAULT_USER_AGENT, debug: false)
|
14
12
|
@debug = debug
|
15
|
-
@
|
13
|
+
@target_url = url
|
16
14
|
@user_agent = user_agent
|
17
15
|
end
|
18
16
|
|
19
|
-
def
|
20
|
-
puts 'Submitting URL ...'
|
17
|
+
def capture
|
18
|
+
puts 'Submitting URL ...' if debug
|
21
19
|
response = connection.post('/submit/') do |req|
|
22
20
|
req.body = submission_body
|
23
21
|
end
|
24
22
|
raise unless response.success?
|
25
23
|
|
26
|
-
|
24
|
+
self.response = response
|
25
|
+
|
26
|
+
{
|
27
|
+
url: finalized_url,
|
28
|
+
screenshot_url: screenshot_url
|
29
|
+
}
|
27
30
|
end
|
28
31
|
|
29
32
|
private
|
30
33
|
|
31
|
-
def
|
32
|
-
|
34
|
+
def finalized_url
|
35
|
+
archived_url.gsub('/wip', '')
|
36
|
+
end
|
37
|
+
|
38
|
+
def archived_url
|
39
|
+
@archived_url ||= begin
|
40
|
+
headers = response.headers
|
33
41
|
|
34
|
-
|
35
|
-
|
42
|
+
return headers[:location] if headers.has_key?('location')
|
43
|
+
return headers[:refresh].split(';url=').last if headers.has_key?('refresh')
|
44
|
+
|
45
|
+
# TODO: handle the history case mentioned here?
|
46
|
+
# https://github.com/pastpages/archiveis/blob/master/archiveis/api.py#L81
|
47
|
+
response.env.url
|
48
|
+
end
|
49
|
+
end
|
36
50
|
|
37
|
-
|
38
|
-
|
39
|
-
|
51
|
+
def screenshot_url
|
52
|
+
return nil unless archived_url
|
53
|
+
return nil if archived_url.include? '/wip/'
|
54
|
+
|
55
|
+
response = connection.get do |req|
|
56
|
+
req.url "#{archived_url}/image"
|
57
|
+
end
|
58
|
+
html = Nokogiri::HTML(response.body)
|
59
|
+
node = html.at_css('img[itemprop="contentUrl"]')
|
60
|
+
url = node.attr('src')
|
61
|
+
puts "Got screenshot URL: #{url}" if debug && url
|
62
|
+
return url if url
|
63
|
+
|
64
|
+
nil
|
40
65
|
end
|
41
66
|
|
42
67
|
def submission_body
|
43
68
|
URI.encode_www_form(
|
44
69
|
{
|
45
|
-
url:
|
70
|
+
url: target_url,
|
46
71
|
anyway: 1,
|
47
72
|
submitid: unique_submission_id
|
48
73
|
}
|
@@ -50,14 +75,15 @@ module ArchiveToday
|
|
50
75
|
end
|
51
76
|
|
52
77
|
def unique_submission_id
|
53
|
-
puts 'Getting unique submission ID ...'
|
78
|
+
puts 'Getting unique submission ID ...' if debug
|
54
79
|
response = connection.get('/')
|
55
80
|
raise unless response.success?
|
56
81
|
|
57
82
|
html = Nokogiri::HTML(response.body)
|
58
83
|
node = html.at_css('input[name="submitid"]')
|
59
84
|
id = node.attr('value')
|
60
|
-
puts "Got ID: #{id}"
|
85
|
+
puts "Got ID: #{id}" if debug && id
|
86
|
+
return id if id
|
61
87
|
|
62
88
|
nil
|
63
89
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: archive_today
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomholford
|
@@ -9,7 +9,63 @@ autorequire:
|
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
11
|
date: 2020-07-13 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: faraday
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faraday_middleware
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.10'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '12.0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '12.0'
|
13
69
|
description: Submit a URL to the Archive.today service to preserve it's contents in
|
14
70
|
a Memento-compatible format
|
15
71
|
email:
|
@@ -37,7 +93,7 @@ licenses:
|
|
37
93
|
metadata:
|
38
94
|
homepage_uri: https://github.com/tomholford/archive-today
|
39
95
|
source_code_uri: https://github.com/tomholford/archive-today
|
40
|
-
changelog_uri: https://github.com/tomholford/archive-today/CHANGELOG.md
|
96
|
+
changelog_uri: https://github.com/tomholford/archive-today/blob/master/CHANGELOG.md
|
41
97
|
post_install_message:
|
42
98
|
rdoc_options: []
|
43
99
|
require_paths:
|