archive_today 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile +8 -5
- data/Gemfile.lock +77 -5
- data/README.md +36 -3
- data/archive_today.gemspec +6 -3
- data/lib/archive_today.rb +5 -2
- data/lib/archive_today/archiver.rb +44 -18
- data/lib/archive_today/version.rb +1 -1
- metadata +59 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d37b8ae11abd0b7a09160e3dc7fa47b7e395a9e9b0ee56c6a0c0f6d686e06dd4
|
4
|
+
data.tar.gz: 88d084d91aeaefb0033e105ee23348f55335acbf92dc8bf4de07f9efcb894a1d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16a9e59b313e65a34d06657ee3a450ce8fe6e03c8f1f1ceb436db7ebce3fe5fd85b6a30f47818b49c2872bbe6cf641456d14d5d6e6e5323e1d908055abde6ca3
|
7
|
+
data.tar.gz: a484ae8056b1e643b0e2852ac25d79ad8e97597b8920cf9fb43947ddf9981fb94fbc4e773e7370c26d84bbf7543ffc31e92841e9aaf08adf5e5f5d7d75230a0e
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
source 'https://rubygems.org'
|
2
2
|
|
3
|
+
group :development, :test do
|
4
|
+
gem 'amazing_print'
|
5
|
+
gem 'httplog'
|
6
|
+
gem 'irbtools'
|
7
|
+
gem 'pry'
|
8
|
+
gem 'rake'
|
9
|
+
end
|
10
|
+
|
3
11
|
# Specify your gem's dependencies in archive_today.gemspec
|
4
12
|
gemspec
|
5
|
-
|
6
|
-
gem 'faraday', '~> 1.0'
|
7
|
-
gem 'faraday_middleware', '~> 1.0'
|
8
|
-
gem 'nokogiri', '~> 1.10'
|
9
|
-
gem 'rake', '~> 12.0'
|
data/Gemfile.lock
CHANGED
@@ -1,30 +1,102 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
archive_today (0.
|
4
|
+
archive_today (0.2.0)
|
5
|
+
faraday (~> 1.0)
|
6
|
+
faraday_middleware (~> 1.0)
|
7
|
+
nokogiri (~> 1.10)
|
8
|
+
rake (~> 12.0)
|
5
9
|
|
6
10
|
GEM
|
7
11
|
remote: https://rubygems.org/
|
8
12
|
specs:
|
13
|
+
alias (0.2.3)
|
14
|
+
amazing_print (1.2.1)
|
15
|
+
binding.repl (3.0.0)
|
16
|
+
boson (1.3.0)
|
17
|
+
boson-more (0.3.1)
|
18
|
+
boson (>= 1.3.0)
|
19
|
+
cd (1.0.1)
|
20
|
+
clipboard (1.0.6)
|
21
|
+
coderay (1.1.2)
|
22
|
+
debugging (1.1.1)
|
23
|
+
binding.repl (~> 3.0)
|
24
|
+
paint (>= 0.9, < 3.0)
|
25
|
+
every_day_irb (2.1.0)
|
26
|
+
cd (~> 1.0)
|
27
|
+
fancy_irb (1.2.1)
|
28
|
+
paint (>= 0.9, < 3.0)
|
29
|
+
unicode-display_width (~> 1.1)
|
9
30
|
faraday (1.0.1)
|
10
31
|
multipart-post (>= 1.2, < 3)
|
11
32
|
faraday_middleware (1.0.0)
|
12
33
|
faraday (~> 1.0)
|
34
|
+
ffi (1.13.1)
|
35
|
+
g (1.7.2)
|
36
|
+
hirb (0.7.3)
|
37
|
+
httplog (1.4.2)
|
38
|
+
rack (>= 1.0)
|
39
|
+
rainbow (>= 2.0.0)
|
40
|
+
interactive_editor (0.0.11)
|
41
|
+
spoon (>= 0.0.1)
|
42
|
+
irbtools (1.7.1)
|
43
|
+
alias (~> 0.2.3)
|
44
|
+
binding.repl (~> 3.0)
|
45
|
+
boson (~> 1.3.0)
|
46
|
+
boson-more (~> 0.3.0)
|
47
|
+
clipboard (~> 1.0.5)
|
48
|
+
coderay (~> 1.1.0)
|
49
|
+
debugging (~> 1.0)
|
50
|
+
every_day_irb (>= 1.7.1)
|
51
|
+
fancy_irb (>= 0.7.3)
|
52
|
+
g (>= 1.7.2)
|
53
|
+
hirb (~> 0.7, >= 0.7.3)
|
54
|
+
interactive_editor (>= 0.0.10)
|
55
|
+
method_locator (>= 0.0.4)
|
56
|
+
method_source (>= 0.8.2)
|
57
|
+
methodfinder (~> 2.0)
|
58
|
+
ori (~> 0.1.0)
|
59
|
+
os (~> 0.9)
|
60
|
+
paint (>= 0.8.7)
|
61
|
+
ruby_engine (~> 1.0)
|
62
|
+
ruby_info (~> 1.0)
|
63
|
+
ruby_version (~> 1.0)
|
64
|
+
wirb (>= 1.0.3)
|
65
|
+
method_locator (0.0.4)
|
66
|
+
method_source (1.0.0)
|
67
|
+
methodfinder (2.2.1)
|
13
68
|
mini_portile2 (2.4.0)
|
14
69
|
multipart-post (2.1.1)
|
15
70
|
nokogiri (1.10.10)
|
16
71
|
mini_portile2 (~> 2.4.0)
|
72
|
+
ori (0.1.0)
|
73
|
+
os (0.9.6)
|
74
|
+
paint (2.2.0)
|
75
|
+
pry (0.13.1)
|
76
|
+
coderay (~> 1.1)
|
77
|
+
method_source (~> 1.0)
|
78
|
+
rack (2.2.2)
|
79
|
+
rainbow (3.0.0)
|
17
80
|
rake (12.3.2)
|
81
|
+
ruby_engine (1.0.1)
|
82
|
+
ruby_info (1.0.1)
|
83
|
+
ruby_version (1.0.2)
|
84
|
+
spoon (0.0.6)
|
85
|
+
ffi
|
86
|
+
unicode-display_width (1.7.0)
|
87
|
+
wirb (2.2.1)
|
88
|
+
paint (>= 0.9, < 3.0)
|
18
89
|
|
19
90
|
PLATFORMS
|
20
91
|
ruby
|
21
92
|
|
22
93
|
DEPENDENCIES
|
94
|
+
amazing_print
|
23
95
|
archive_today!
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
rake
|
96
|
+
httplog
|
97
|
+
irbtools
|
98
|
+
pry
|
99
|
+
rake
|
28
100
|
|
29
101
|
BUNDLED WITH
|
30
102
|
2.1.4
|
data/README.md
CHANGED
@@ -20,18 +20,51 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
+
### Class Method
|
24
|
+
|
25
|
+
`ArchiveToday#capture`
|
26
|
+
|
27
|
+
Returns a Hash with keys `:url` and `:screenshot_url`. Note that if the page is in the process of being archived, the screenshot is not generated yet, so `nil` is returned for the screenshot URL.
|
28
|
+
|
29
|
+
#### Args
|
30
|
+
|
31
|
+
- `url` (required) - the target URL for archival
|
32
|
+
- `debug` (optional) - when set to true, this will log HTTP requests and responses
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
require 'archive_today'
|
36
|
+
|
37
|
+
ArchiveToday.capture(url: 'https://example.com')
|
38
|
+
|
39
|
+
# => { url: 'https://archive.is/a1b2c3, screenshot_url: 'https://archive.is/[...].jpg' }
|
40
|
+
```
|
41
|
+
|
42
|
+
### Instance Method
|
43
|
+
|
44
|
+
`ArchiveToday::Archiver`
|
45
|
+
|
46
|
+
This class exposes the same `#capture` method, but you can also query the instance for the cached URLs once the capture response is received.
|
47
|
+
|
48
|
+
#### Args
|
49
|
+
|
50
|
+
- `url` (required) - the target URL for archival
|
51
|
+
- `debug` (optional) - when set to true, this will log HTTP requests and responses
|
52
|
+
|
53
|
+
|
23
54
|
```ruby
|
24
55
|
require 'archive_today'
|
25
56
|
|
26
|
-
ArchiveToday.
|
57
|
+
a = ArchiveToday::Archiver.new(url: 'https://example.com')
|
58
|
+
a.capture
|
59
|
+
|
60
|
+
puts a.screenshot_url
|
27
61
|
|
28
|
-
# => 'https://archive.is/
|
62
|
+
# => 'https://archive.is/[...].jpg
|
29
63
|
```
|
30
64
|
|
31
65
|
## Roadmap
|
32
66
|
|
33
67
|
- proxies
|
34
|
-
- optionally return URL of captured screenshot
|
35
68
|
|
36
69
|
## Development
|
37
70
|
|
data/archive_today.gemspec
CHANGED
@@ -12,12 +12,15 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.license = "MIT"
|
13
13
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
14
|
|
15
|
+
spec.add_dependency 'faraday', '~> 1.0'
|
16
|
+
spec.add_dependency 'faraday_middleware', '~> 1.0'
|
17
|
+
spec.add_dependency 'nokogiri', '~> 1.10'
|
18
|
+
spec.add_dependency 'rake', '~> 12.0'
|
19
|
+
|
15
20
|
spec.metadata["homepage_uri"] = spec.homepage
|
16
21
|
spec.metadata["source_code_uri"] = spec.homepage
|
17
|
-
spec.metadata["changelog_uri"] = "#{spec.homepage}/CHANGELOG.md"
|
22
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/master/CHANGELOG.md"
|
18
23
|
|
19
|
-
# Specify which files should be added to the gem when it is released.
|
20
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
21
24
|
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
22
25
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
23
26
|
end
|
data/lib/archive_today.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
require 'faraday_middleware'
|
3
|
+
require 'nokogiri'
|
1
4
|
require 'archive_today/version'
|
2
5
|
require 'archive_today/archiver'
|
3
6
|
|
@@ -5,8 +8,8 @@ module ArchiveToday
|
|
5
8
|
class Error < StandardError; end
|
6
9
|
|
7
10
|
class << self
|
8
|
-
def
|
9
|
-
Archiver.new(url: url, debug: debug).
|
11
|
+
def capture(url:, debug: false)
|
12
|
+
Archiver.new(url: url, debug: debug).capture
|
10
13
|
end
|
11
14
|
end
|
12
15
|
end
|
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'faraday'
|
2
|
-
require 'faraday_middleware'
|
3
|
-
require 'nokogiri'
|
4
1
|
require_relative 'version'
|
5
2
|
|
6
3
|
module ArchiveToday
|
@@ -8,41 +5,69 @@ module ArchiveToday
|
|
8
5
|
BASE_URL = 'https://archive.today/'.freeze
|
9
6
|
DEFAULT_USER_AGENT = "archive_today #{ArchiveToday::VERSION}".freeze
|
10
7
|
|
11
|
-
|
8
|
+
attr_accessor :response
|
9
|
+
attr_reader :debug, :target_url, :user_agent
|
12
10
|
|
13
11
|
def initialize(url:, user_agent: DEFAULT_USER_AGENT, debug: false)
|
14
12
|
@debug = debug
|
15
|
-
@
|
13
|
+
@target_url = url
|
16
14
|
@user_agent = user_agent
|
17
15
|
end
|
18
16
|
|
19
|
-
def
|
20
|
-
puts 'Submitting URL ...'
|
17
|
+
def capture
|
18
|
+
puts 'Submitting URL ...' if debug
|
21
19
|
response = connection.post('/submit/') do |req|
|
22
20
|
req.body = submission_body
|
23
21
|
end
|
24
22
|
raise unless response.success?
|
25
23
|
|
26
|
-
|
24
|
+
self.response = response
|
25
|
+
|
26
|
+
{
|
27
|
+
url: finalized_url,
|
28
|
+
screenshot_url: screenshot_url
|
29
|
+
}
|
27
30
|
end
|
28
31
|
|
29
32
|
private
|
30
33
|
|
31
|
-
def
|
32
|
-
|
34
|
+
def finalized_url
|
35
|
+
archived_url.gsub('/wip', '')
|
36
|
+
end
|
37
|
+
|
38
|
+
def archived_url
|
39
|
+
@archived_url ||= begin
|
40
|
+
headers = response.headers
|
33
41
|
|
34
|
-
|
35
|
-
|
42
|
+
return headers[:location] if headers.has_key?('location')
|
43
|
+
return headers[:refresh].split(';url=').last if headers.has_key?('refresh')
|
44
|
+
|
45
|
+
# TODO: handle the history case mentioned here?
|
46
|
+
# https://github.com/pastpages/archiveis/blob/master/archiveis/api.py#L81
|
47
|
+
response.env.url
|
48
|
+
end
|
49
|
+
end
|
36
50
|
|
37
|
-
|
38
|
-
|
39
|
-
|
51
|
+
def screenshot_url
|
52
|
+
return nil unless archived_url
|
53
|
+
return nil if archived_url.include? '/wip/'
|
54
|
+
|
55
|
+
response = connection.get do |req|
|
56
|
+
req.url "#{archived_url}/image"
|
57
|
+
end
|
58
|
+
html = Nokogiri::HTML(response.body)
|
59
|
+
node = html.at_css('img[itemprop="contentUrl"]')
|
60
|
+
url = node.attr('src')
|
61
|
+
puts "Got screenshot URL: #{url}" if debug && url
|
62
|
+
return url if url
|
63
|
+
|
64
|
+
nil
|
40
65
|
end
|
41
66
|
|
42
67
|
def submission_body
|
43
68
|
URI.encode_www_form(
|
44
69
|
{
|
45
|
-
url:
|
70
|
+
url: target_url,
|
46
71
|
anyway: 1,
|
47
72
|
submitid: unique_submission_id
|
48
73
|
}
|
@@ -50,14 +75,15 @@ module ArchiveToday
|
|
50
75
|
end
|
51
76
|
|
52
77
|
def unique_submission_id
|
53
|
-
puts 'Getting unique submission ID ...'
|
78
|
+
puts 'Getting unique submission ID ...' if debug
|
54
79
|
response = connection.get('/')
|
55
80
|
raise unless response.success?
|
56
81
|
|
57
82
|
html = Nokogiri::HTML(response.body)
|
58
83
|
node = html.at_css('input[name="submitid"]')
|
59
84
|
id = node.attr('value')
|
60
|
-
puts "Got ID: #{id}"
|
85
|
+
puts "Got ID: #{id}" if debug && id
|
86
|
+
return id if id
|
61
87
|
|
62
88
|
nil
|
63
89
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: archive_today
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomholford
|
@@ -9,7 +9,63 @@ autorequire:
|
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
11
|
date: 2020-07-13 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: faraday
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faraday_middleware
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.10'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '12.0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '12.0'
|
13
69
|
description: Submit a URL to the Archive.today service to preserve it's contents in
|
14
70
|
a Memento-compatible format
|
15
71
|
email:
|
@@ -37,7 +93,7 @@ licenses:
|
|
37
93
|
metadata:
|
38
94
|
homepage_uri: https://github.com/tomholford/archive-today
|
39
95
|
source_code_uri: https://github.com/tomholford/archive-today
|
40
|
-
changelog_uri: https://github.com/tomholford/archive-today/CHANGELOG.md
|
96
|
+
changelog_uri: https://github.com/tomholford/archive-today/blob/master/CHANGELOG.md
|
41
97
|
post_install_message:
|
42
98
|
rdoc_options: []
|
43
99
|
require_paths:
|