wayback 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +4 -0
- data/README.md +28 -11
- data/lib/wayback/api/archive.rb +5 -4
- data/lib/wayback/client.rb +1 -1
- data/lib/wayback/configurable.rb +2 -1
- data/lib/wayback/default.rb +6 -1
- data/lib/wayback/version.rb +1 -1
- data/spec/fixtures/list.timemap +8 -8
- data/spec/helper.rb +8 -8
- data/spec/wayback/api/archive_spec.rb +9 -9
- data/spec/wayback/client_spec.rb +1 -0
- data/spec/wayback/error/client_error_spec.rb +1 -1
- data/spec/wayback/error/server_error_spec.rb +1 -1
- data/spec/wayback_spec.rb +2 -2
- metadata +3 -3
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
0.2.0
|
2
|
+
-----
|
3
|
+
* Archive.org has changed their endpoint URLs to http://web.archive.org/web. Version 0.2.0 corrects this issue with the new ENDPOINT_PATH configuration for handling the /web path.
|
4
|
+
|
1
5
|
0.1.0
|
2
6
|
-----
|
3
7
|
* [Initial release](https://github.com/XOlator/wayback_gem)
|
data/README.md
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
# Wayback Gem
|
2
2
|
|
3
|
-
![
|
4
|
-
![
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/wayback.png)][gem]
|
4
|
+
[![Build Status](https://secure.travis-ci.org/XOlator/wayback_gem.png?branch=master)][travis]
|
5
|
+
[![Coverage Status](https://coveralls.io/repos/XOlator/wayback_gem/badge.png?branch=master)][coveralls]
|
6
|
+
|
7
|
+
[gem]: https://rubygems.org/gems/wayback
|
8
|
+
[travis]: http://travis-ci.org/XOlator/wayback_gem
|
9
|
+
[coveralls]: https://coveralls.io/r/XOlator/wayback_gem
|
10
|
+
|
5
11
|
|
6
12
|
A Ruby interface to Archive.org's Wayback Machine Memento API.
|
7
13
|
|
@@ -9,22 +15,30 @@ A Ruby interface to Archive.org's Wayback Machine Memento API.
|
|
9
15
|
gem install wayback
|
10
16
|
|
11
17
|
## Quick Start Guide
|
12
|
-
|
18
|
+
Accessing the Wayback Machine is super-duper easy.
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
require 'wayback'
|
22
|
+
Wayback.page('http://www.xolator.com', :first)
|
23
|
+
```
|
13
24
|
|
14
25
|
|
15
26
|
## Documentation
|
16
|
-
|
27
|
+
You can browse the Rdoc [here][documentation].
|
28
|
+
|
29
|
+
[documentation]: http://rdoc.info/github/XOlator/wayback_gem/master/frames
|
17
30
|
|
18
31
|
|
19
32
|
## Configuration
|
20
33
|
|
21
|
-
There is no real configuration necessary for accessing
|
34
|
+
There is no real configuration necessary for accessing the Wayback Machine Memento API, however you can change endpoint and other basic connection options.
|
22
35
|
|
23
|
-
The current defaults for this gem are:
|
36
|
+
The current defaults configurations for this gem are:
|
24
37
|
|
25
38
|
```ruby
|
26
39
|
Wayback.configure do |c|
|
27
|
-
c.endpoint = 'http://
|
40
|
+
c.endpoint = 'http://web.archive.org'
|
41
|
+
c.endpoint_path = '/web'
|
28
42
|
c.connection_options = {
|
29
43
|
:headers => {:user_agent => "Wayback Ruby Gem #{Wayback::Version}"},
|
30
44
|
:request => {:open_timeout => 5, :timeout => 10},
|
@@ -68,14 +82,17 @@ Wayback.page('http://www.xolator.com', :last)
|
|
68
82
|
|
69
83
|
|
70
84
|
## Additional Notes
|
71
|
-
Based heavily on the [Twitter gem]
|
85
|
+
Based heavily on the [Twitter gem][twitter_gem]. (Xièxie!)
|
72
86
|
|
73
|
-
More information on Archive's Wayback Machine memento API can be found [here]
|
87
|
+
More information on Archive's Wayback Machine memento API can be found [here][wayback_api].
|
74
88
|
|
89
|
+
[twitter_gem]: https://www.github.com/sferik/twitter
|
90
|
+
[wayback_api]: http://mementoweb.org/depot/native/ia/
|
75
91
|
|
76
92
|
|
77
93
|
## Copyright
|
78
|
-
Copyright (c) 2013 XOlator.
|
79
|
-
See [LICENSE][] for details.
|
94
|
+
Copyright (c) 2013 [XOlator][xolator].
|
95
|
+
See [LICENSE][license] for details.
|
80
96
|
|
97
|
+
[xolator]: http://www.xolator.com
|
81
98
|
[license]: LICENSE.md
|
data/lib/wayback/api/archive.rb
CHANGED
@@ -10,19 +10,20 @@ module Wayback
|
|
10
10
|
# Return a list of archived pages
|
11
11
|
#
|
12
12
|
# @return [Wayback::Archive]
|
13
|
-
# @param url [String] The page that of which was archived.
|
13
|
+
# @param url [String] The page URI that of which was archived.
|
14
14
|
# @param options [Hash] A customizable set of options.
|
15
15
|
# @example Return the list of available archives for a web page.
|
16
16
|
# Wayback.list('http://gleu.ch')
|
17
17
|
def list(url, options={})
|
18
|
-
object_from_response(Wayback::Archive, :get, "/
|
18
|
+
object_from_response(Wayback::Archive, :get, "/timemap/link/#{url}", options)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Returns the HTML contents of an archive page, fetched by date
|
22
22
|
#
|
23
23
|
# @raise [Wayback::Error::Unauthorized] Error raised when supplied user credentials are not valid.
|
24
24
|
# @return [Wayback::Page]
|
25
|
-
# @param url [String] The page that of which was archived.
|
25
|
+
# @param url [String] The page URI that of which was archived.
|
26
|
+
# @param date [String, Symbol, Time, Fixnum, Integer] A date or symbol to describe which dated archive page. Symbols include :first and :last. Strings are converted to integer timestamps.
|
26
27
|
# @param options [Hash] A customizable set of options.
|
27
28
|
# @example Return the HTML archive for the page.
|
28
29
|
# Wayback.page('http://gleu.ch')
|
@@ -34,7 +35,7 @@ module Wayback
|
|
34
35
|
date = 0 if date == :first
|
35
36
|
date = Time.now if date == :last
|
36
37
|
date = Time.parse(date).to_i unless [Fixnum,Time,Integer].include?(date.class)
|
37
|
-
object_from_response(Wayback::Page, :get, "
|
38
|
+
object_from_response(Wayback::Page, :get, "/#{date.to_i}/#{url}", options)
|
38
39
|
end
|
39
40
|
|
40
41
|
end
|
data/lib/wayback/client.rb
CHANGED
@@ -46,7 +46,7 @@ module Wayback
|
|
46
46
|
private
|
47
47
|
|
48
48
|
def request(method, path, params={}, signature_params=params)
|
49
|
-
connection.send(method.to_sym, path, params).env
|
49
|
+
connection.send(method.to_sym, path.insert(0, @endpoint_path), params).env
|
50
50
|
rescue Faraday::Error::ClientError
|
51
51
|
raise Wayback::Error::ClientError
|
52
52
|
end
|
data/lib/wayback/configurable.rb
CHANGED
@@ -4,7 +4,7 @@ require 'wayback/error/configuration_error'
|
|
4
4
|
module Wayback
|
5
5
|
module Configurable
|
6
6
|
extend Forwardable
|
7
|
-
attr_accessor :endpoint, :connection_options, :identity_map, :middleware
|
7
|
+
attr_accessor :endpoint, :endpoint_path, :connection_options, :identity_map, :middleware
|
8
8
|
def_delegator :options, :hash
|
9
9
|
|
10
10
|
class << self
|
@@ -12,6 +12,7 @@ module Wayback
|
|
12
12
|
def keys
|
13
13
|
@keys ||= [
|
14
14
|
:endpoint,
|
15
|
+
:endpoint_path,
|
15
16
|
:connection_options,
|
16
17
|
:identity_map,
|
17
18
|
:middleware
|
data/lib/wayback/default.rb
CHANGED
@@ -10,7 +10,8 @@ require 'wayback/version'
|
|
10
10
|
|
11
11
|
module Wayback
|
12
12
|
module Default
|
13
|
-
ENDPOINT = 'http://
|
13
|
+
ENDPOINT = 'http://web.archive.org' unless defined? Wayback::Default::ENDPOINT
|
14
|
+
ENDPOINT_PATH = '/web' unless defined? Wayback::Default::ENDPOINT_PATH
|
14
15
|
CONNECTION_OPTIONS = {
|
15
16
|
:headers => {:user_agent => "Wayback Ruby Gem #{Wayback::Version}"},
|
16
17
|
:request => {:open_timeout => 5, :timeout => 10},
|
@@ -47,6 +48,10 @@ module Wayback
|
|
47
48
|
ENDPOINT
|
48
49
|
end
|
49
50
|
|
51
|
+
def endpoint_path
|
52
|
+
ENDPOINT_PATH
|
53
|
+
end
|
54
|
+
|
50
55
|
def connection_options
|
51
56
|
CONNECTION_OPTIONS
|
52
57
|
end
|
data/lib/wayback/version.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Wayback
|
2
2
|
class Version
|
3
3
|
MAJOR = 0 unless defined? Wayback::Version::MAJOR
|
4
|
-
MINOR =
|
4
|
+
MINOR = 2 unless defined? Wayback::Version::MINOR
|
5
5
|
PATCH = 0 unless defined? Wayback::Version::PATCH
|
6
6
|
PRE = nil unless defined? Wayback::Version::PRE
|
7
7
|
|
data/spec/fixtures/list.timemap
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
<http://
|
1
|
+
<http://web.archive.org/web/timebundle/http://gleu.ch>; rel="timebundle",
|
2
2
|
<http://gleu.ch>; rel="original",
|
3
|
-
<http://
|
4
|
-
<http://
|
5
|
-
<http://
|
6
|
-
<http://
|
7
|
-
<http://
|
8
|
-
<http://
|
9
|
-
<http://
|
3
|
+
<http://web.archive.org/web/timemap/link/http://gleu.ch>; rel="timemap"; type="application/link-format",
|
4
|
+
<http://web.archive.org/web/timegate/http://gleu.ch>; rel="timegate",
|
5
|
+
<http://web.archive.org/web/20110417182251/http://gleu.ch/>; rel="first memento"; datetime="Sun, 17 Apr 2011 18:22:51 GMT",
|
6
|
+
<http://web.archive.org/web/20120117073306/http://gleu.ch/>; rel="memento"; datetime="Tue, 17 Jan 2012 07:33:06 GMT",
|
7
|
+
<http://web.archive.org/web/20130113125339/http://gleu.ch/>; rel="memento"; datetime="Sun, 13 Jan 2013 12:53:39 GMT",
|
8
|
+
<http://web.archive.org/web/20130120084303/http://gleu.ch/>; rel="memento"; datetime="Sun, 20 Jan 2013 08:43:03 GMT",
|
9
|
+
<http://web.archive.org/web/20130129170322/http://gleu.ch/>; rel="last memento"; datetime="Tue, 29 Jan 2013 17:03:22 GMT"
|
data/spec/helper.rb
CHANGED
@@ -25,35 +25,35 @@ RSpec.configure do |config|
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def a_delete(path)
|
28
|
-
a_request(:delete, Wayback::Default::ENDPOINT + path)
|
28
|
+
a_request(:delete, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
29
29
|
end
|
30
30
|
|
31
31
|
def a_get(path)
|
32
|
-
a_request(:get, Wayback::Default::ENDPOINT + path)
|
32
|
+
a_request(:get, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
33
33
|
end
|
34
34
|
|
35
35
|
def a_post(path)
|
36
|
-
a_request(:post, Wayback::Default::ENDPOINT + path)
|
36
|
+
a_request(:post, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
37
37
|
end
|
38
38
|
|
39
39
|
def a_put(path)
|
40
|
-
a_request(:put, Wayback::Default::ENDPOINT + path)
|
40
|
+
a_request(:put, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
41
41
|
end
|
42
42
|
|
43
43
|
def stub_delete(path)
|
44
|
-
stub_request(:delete, Wayback::Default::ENDPOINT + path)
|
44
|
+
stub_request(:delete, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
45
45
|
end
|
46
46
|
|
47
47
|
def stub_get(path)
|
48
|
-
stub_request(:get, Wayback::Default::ENDPOINT + path)
|
48
|
+
stub_request(:get, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
49
49
|
end
|
50
50
|
|
51
51
|
def stub_post(path)
|
52
|
-
stub_request(:post, Wayback::Default::ENDPOINT + path)
|
52
|
+
stub_request(:post, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
53
53
|
end
|
54
54
|
|
55
55
|
def stub_put(path)
|
56
|
-
stub_request(:put, Wayback::Default::ENDPOINT + path)
|
56
|
+
stub_request(:put, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
57
57
|
end
|
58
58
|
|
59
59
|
def fixture_path
|
@@ -10,11 +10,11 @@ describe Wayback::API::Archive do
|
|
10
10
|
|
11
11
|
describe "#list" do
|
12
12
|
before do
|
13
|
-
stub_get("/
|
13
|
+
stub_get("/timemap/link/gleu.ch").to_return(:body => fixture("list.timemap"), :headers => {:content_type => "application/link-format"})
|
14
14
|
end
|
15
15
|
it "requests the correct resource" do
|
16
16
|
@client.list('gleu.ch')
|
17
|
-
expect(a_get("/
|
17
|
+
expect(a_get("/timemap/link/gleu.ch")).to have_been_made
|
18
18
|
end
|
19
19
|
it "returns the link data" do
|
20
20
|
timemap = @client.list('gleu.ch')
|
@@ -27,11 +27,11 @@ describe Wayback::API::Archive do
|
|
27
27
|
|
28
28
|
describe "#page" do
|
29
29
|
before do
|
30
|
-
stub_get("/
|
30
|
+
stub_get("/20130129170322/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
31
31
|
end
|
32
32
|
it "requests the correct resource" do
|
33
33
|
@client.page('gleu.ch', 20130129170322)
|
34
|
-
expect(a_get("/
|
34
|
+
expect(a_get("/20130129170322/gleu.ch")).to have_been_made
|
35
35
|
end
|
36
36
|
it "returns the desired page on date" do
|
37
37
|
page = @client.page('gleu.ch', 20130129170322)
|
@@ -39,31 +39,31 @@ describe Wayback::API::Archive do
|
|
39
39
|
expect(page.html).to match /^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im
|
40
40
|
end
|
41
41
|
it "returns the first desired page" do
|
42
|
-
stub_get("/
|
42
|
+
stub_get("/0/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
43
43
|
page = @client.page('gleu.ch', :first)
|
44
44
|
expect(page).to be_a Wayback::Page
|
45
45
|
expect(page.html).to match /^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im
|
46
46
|
end
|
47
47
|
it "returns the last desired page" do
|
48
|
-
stub_get("
|
48
|
+
stub_get("/#{Time.now.to_i}/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
49
49
|
page = @client.page('gleu.ch', :last)
|
50
50
|
expect(page).to be_a Wayback::Page
|
51
51
|
expect(page.html).to match(/^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im)
|
52
52
|
end
|
53
53
|
it "returns the desired page for Time" do
|
54
|
-
stub_get("
|
54
|
+
stub_get("/#{Time.now.to_i}/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
55
55
|
page = @client.page('gleu.ch', Time.now)
|
56
56
|
expect(page).to be_a Wayback::Page
|
57
57
|
expect(page.html).to match(/^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im)
|
58
58
|
end
|
59
59
|
it "returns the desired page for Time string" do
|
60
|
-
stub_get("
|
60
|
+
stub_get("/#{Time.now.to_i}/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
61
61
|
page = @client.page('gleu.ch', Time.now.to_s)
|
62
62
|
expect(page).to be_a Wayback::Page
|
63
63
|
expect(page.html).to match(/^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im)
|
64
64
|
end
|
65
65
|
# it "handles when error exists" do
|
66
|
-
# stub_get("
|
66
|
+
# stub_get("/#{Time.now.to_i}/gleu.ch").to_return(:status => 204, :body => '', :headers => {:content_type => "text/xml"})
|
67
67
|
# page = @client.page('gleu.ch', Time.now.to_s)
|
68
68
|
# expect(page).to be_a Wayback::Page
|
69
69
|
# expect(page.html).to match(/^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im)
|
data/spec/wayback/client_spec.rb
CHANGED
@@ -11,7 +11,7 @@ describe Wayback::Error::ClientError do
|
|
11
11
|
context "when HTTP status is #{status} and body is #{body.inspect}" do
|
12
12
|
before do
|
13
13
|
body_message = '<wayback><error><title>Hrm.</title><message>Wayback Machine doesn't have that page archived.</message></error></wayback>' unless body.nil?
|
14
|
-
stub_get("/
|
14
|
+
stub_get("/timemap/link/gleu.ch").to_return(:body => body_message, :status => status)
|
15
15
|
end
|
16
16
|
it "raises #{exception.name}" do
|
17
17
|
expect{@client.list('gleu.ch')}.to raise_error exception
|
@@ -9,7 +9,7 @@ describe Wayback::Error::ServerError do
|
|
9
9
|
Wayback::Error::ServerError.errors.each do |status, exception|
|
10
10
|
context "when HTTP status is #{status}" do
|
11
11
|
before do
|
12
|
-
stub_get("/
|
12
|
+
stub_get("/timemap/link/gleu.ch").to_return(:status => status)
|
13
13
|
end
|
14
14
|
it "raises #{exception.name}" do
|
15
15
|
expect{@client.list('gleu.ch')}.to raise_error exception
|
data/spec/wayback_spec.rb
CHANGED
@@ -10,12 +10,12 @@ describe Wayback do
|
|
10
10
|
|
11
11
|
context "when delegating to a client" do
|
12
12
|
before do
|
13
|
-
stub_get("/
|
13
|
+
stub_get("/timemap/link/gleu.ch").to_return(:body => fixture("list.timemap"), :headers => {:content_type => "application/link-format"})
|
14
14
|
end
|
15
15
|
|
16
16
|
it "requests the correct resource" do
|
17
17
|
Wayback.list('gleu.ch')
|
18
|
-
expect(a_get("/
|
18
|
+
expect(a_get("/timemap/link/gleu.ch")).to have_been_made
|
19
19
|
end
|
20
20
|
|
21
21
|
it "returns the same results as a client" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wayback
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-09-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: faraday
|
@@ -154,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
154
|
version: 1.3.6
|
155
155
|
requirements: []
|
156
156
|
rubyforge_project:
|
157
|
-
rubygems_version: 1.8.
|
157
|
+
rubygems_version: 1.8.25
|
158
158
|
signing_key:
|
159
159
|
specification_version: 3
|
160
160
|
summary: A Ruby interface to the Archive.org's Wayback Machine Memento API.
|