wayback 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +4 -0
- data/README.md +28 -11
- data/lib/wayback/api/archive.rb +5 -4
- data/lib/wayback/client.rb +1 -1
- data/lib/wayback/configurable.rb +2 -1
- data/lib/wayback/default.rb +6 -1
- data/lib/wayback/version.rb +1 -1
- data/spec/fixtures/list.timemap +8 -8
- data/spec/helper.rb +8 -8
- data/spec/wayback/api/archive_spec.rb +9 -9
- data/spec/wayback/client_spec.rb +1 -0
- data/spec/wayback/error/client_error_spec.rb +1 -1
- data/spec/wayback/error/server_error_spec.rb +1 -1
- data/spec/wayback_spec.rb +2 -2
- metadata +3 -3
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
0.2.0
|
2
|
+
-----
|
3
|
+
* Archive.org has changed their endpoint URLs to http://web.archive.org/web. Version 0.2.0 corrects this issue with the new ENDPOINT_PATH configuration for handling the /web path.
|
4
|
+
|
1
5
|
0.1.0
|
2
6
|
-----
|
3
7
|
* [Initial release](https://github.com/XOlator/wayback_gem)
|
data/README.md
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
# Wayback Gem
|
2
2
|
|
3
|
-
][gem]
|
4
|
+
[][travis]
|
5
|
+
[][coveralls]
|
6
|
+
|
7
|
+
[gem]: https://rubygems.org/gems/wayback
|
8
|
+
[travis]: http://travis-ci.org/XOlator/wayback_gem
|
9
|
+
[coveralls]: https://coveralls.io/r/XOlator/wayback_gem
|
10
|
+
|
5
11
|
|
6
12
|
A Ruby interface to Archive.org's Wayback Machine Memento API.
|
7
13
|
|
@@ -9,22 +15,30 @@ A Ruby interface to Archive.org's Wayback Machine Memento API.
|
|
9
15
|
gem install wayback
|
10
16
|
|
11
17
|
## Quick Start Guide
|
12
|
-
|
18
|
+
Accessing the Wayback Machine is super-duper easy.
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
require 'wayback'
|
22
|
+
Wayback.page('http://www.xolator.com', :first)
|
23
|
+
```
|
13
24
|
|
14
25
|
|
15
26
|
## Documentation
|
16
|
-
|
27
|
+
You can browse the Rdoc [here][documentation].
|
28
|
+
|
29
|
+
[documentation]: http://rdoc.info/github/XOlator/wayback_gem/master/frames
|
17
30
|
|
18
31
|
|
19
32
|
## Configuration
|
20
33
|
|
21
|
-
There is no real configuration necessary for accessing
|
34
|
+
There is no real configuration necessary for accessing the Wayback Machine Memento API, however you can change endpoint and other basic connection options.
|
22
35
|
|
23
|
-
The current defaults for this gem are:
|
36
|
+
The current defaults configurations for this gem are:
|
24
37
|
|
25
38
|
```ruby
|
26
39
|
Wayback.configure do |c|
|
27
|
-
c.endpoint = 'http://
|
40
|
+
c.endpoint = 'http://web.archive.org'
|
41
|
+
c.endpoint_path = '/web'
|
28
42
|
c.connection_options = {
|
29
43
|
:headers => {:user_agent => "Wayback Ruby Gem #{Wayback::Version}"},
|
30
44
|
:request => {:open_timeout => 5, :timeout => 10},
|
@@ -68,14 +82,17 @@ Wayback.page('http://www.xolator.com', :last)
|
|
68
82
|
|
69
83
|
|
70
84
|
## Additional Notes
|
71
|
-
Based heavily on the [Twitter gem]
|
85
|
+
Based heavily on the [Twitter gem][twitter_gem]. (Xièxie!)
|
72
86
|
|
73
|
-
More information on Archive's Wayback Machine memento API can be found [here]
|
87
|
+
More information on Archive's Wayback Machine memento API can be found [here][wayback_api].
|
74
88
|
|
89
|
+
[twitter_gem]: https://www.github.com/sferik/twitter
|
90
|
+
[wayback_api]: http://mementoweb.org/depot/native/ia/
|
75
91
|
|
76
92
|
|
77
93
|
## Copyright
|
78
|
-
Copyright (c) 2013 XOlator.
|
79
|
-
See [LICENSE][] for details.
|
94
|
+
Copyright (c) 2013 [XOlator][xolator].
|
95
|
+
See [LICENSE][license] for details.
|
80
96
|
|
97
|
+
[xolator]: http://www.xolator.com
|
81
98
|
[license]: LICENSE.md
|
data/lib/wayback/api/archive.rb
CHANGED
@@ -10,19 +10,20 @@ module Wayback
|
|
10
10
|
# Return a list of archived pages
|
11
11
|
#
|
12
12
|
# @return [Wayback::Archive]
|
13
|
-
# @param url [String] The page that of which was archived.
|
13
|
+
# @param url [String] The page URI that of which was archived.
|
14
14
|
# @param options [Hash] A customizable set of options.
|
15
15
|
# @example Return the list of available archives for a web page.
|
16
16
|
# Wayback.list('http://gleu.ch')
|
17
17
|
def list(url, options={})
|
18
|
-
object_from_response(Wayback::Archive, :get, "/
|
18
|
+
object_from_response(Wayback::Archive, :get, "/timemap/link/#{url}", options)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Returns the HTML contents of an archive page, fetched by date
|
22
22
|
#
|
23
23
|
# @raise [Wayback::Error::Unauthorized] Error raised when supplied user credentials are not valid.
|
24
24
|
# @return [Wayback::Page]
|
25
|
-
# @param url [String] The page that of which was archived.
|
25
|
+
# @param url [String] The page URI that of which was archived.
|
26
|
+
# @param date [String, Symbol, Time, Fixnum, Integer] A date or symbol to describe which dated archive page. Symbols include :first and :last. Strings are converted to integer timestamps.
|
26
27
|
# @param options [Hash] A customizable set of options.
|
27
28
|
# @example Return the HTML archive for the page.
|
28
29
|
# Wayback.page('http://gleu.ch')
|
@@ -34,7 +35,7 @@ module Wayback
|
|
34
35
|
date = 0 if date == :first
|
35
36
|
date = Time.now if date == :last
|
36
37
|
date = Time.parse(date).to_i unless [Fixnum,Time,Integer].include?(date.class)
|
37
|
-
object_from_response(Wayback::Page, :get, "
|
38
|
+
object_from_response(Wayback::Page, :get, "/#{date.to_i}/#{url}", options)
|
38
39
|
end
|
39
40
|
|
40
41
|
end
|
data/lib/wayback/client.rb
CHANGED
@@ -46,7 +46,7 @@ module Wayback
|
|
46
46
|
private
|
47
47
|
|
48
48
|
def request(method, path, params={}, signature_params=params)
|
49
|
-
connection.send(method.to_sym, path, params).env
|
49
|
+
connection.send(method.to_sym, path.insert(0, @endpoint_path), params).env
|
50
50
|
rescue Faraday::Error::ClientError
|
51
51
|
raise Wayback::Error::ClientError
|
52
52
|
end
|
data/lib/wayback/configurable.rb
CHANGED
@@ -4,7 +4,7 @@ require 'wayback/error/configuration_error'
|
|
4
4
|
module Wayback
|
5
5
|
module Configurable
|
6
6
|
extend Forwardable
|
7
|
-
attr_accessor :endpoint, :connection_options, :identity_map, :middleware
|
7
|
+
attr_accessor :endpoint, :endpoint_path, :connection_options, :identity_map, :middleware
|
8
8
|
def_delegator :options, :hash
|
9
9
|
|
10
10
|
class << self
|
@@ -12,6 +12,7 @@ module Wayback
|
|
12
12
|
def keys
|
13
13
|
@keys ||= [
|
14
14
|
:endpoint,
|
15
|
+
:endpoint_path,
|
15
16
|
:connection_options,
|
16
17
|
:identity_map,
|
17
18
|
:middleware
|
data/lib/wayback/default.rb
CHANGED
@@ -10,7 +10,8 @@ require 'wayback/version'
|
|
10
10
|
|
11
11
|
module Wayback
|
12
12
|
module Default
|
13
|
-
ENDPOINT = 'http://
|
13
|
+
ENDPOINT = 'http://web.archive.org' unless defined? Wayback::Default::ENDPOINT
|
14
|
+
ENDPOINT_PATH = '/web' unless defined? Wayback::Default::ENDPOINT_PATH
|
14
15
|
CONNECTION_OPTIONS = {
|
15
16
|
:headers => {:user_agent => "Wayback Ruby Gem #{Wayback::Version}"},
|
16
17
|
:request => {:open_timeout => 5, :timeout => 10},
|
@@ -47,6 +48,10 @@ module Wayback
|
|
47
48
|
ENDPOINT
|
48
49
|
end
|
49
50
|
|
51
|
+
def endpoint_path
|
52
|
+
ENDPOINT_PATH
|
53
|
+
end
|
54
|
+
|
50
55
|
def connection_options
|
51
56
|
CONNECTION_OPTIONS
|
52
57
|
end
|
data/lib/wayback/version.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Wayback
|
2
2
|
class Version
|
3
3
|
MAJOR = 0 unless defined? Wayback::Version::MAJOR
|
4
|
-
MINOR =
|
4
|
+
MINOR = 2 unless defined? Wayback::Version::MINOR
|
5
5
|
PATCH = 0 unless defined? Wayback::Version::PATCH
|
6
6
|
PRE = nil unless defined? Wayback::Version::PRE
|
7
7
|
|
data/spec/fixtures/list.timemap
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
<http://
|
1
|
+
<http://web.archive.org/web/timebundle/http://gleu.ch>; rel="timebundle",
|
2
2
|
<http://gleu.ch>; rel="original",
|
3
|
-
<http://
|
4
|
-
<http://
|
5
|
-
<http://
|
6
|
-
<http://
|
7
|
-
<http://
|
8
|
-
<http://
|
9
|
-
<http://
|
3
|
+
<http://web.archive.org/web/timemap/link/http://gleu.ch>; rel="timemap"; type="application/link-format",
|
4
|
+
<http://web.archive.org/web/timegate/http://gleu.ch>; rel="timegate",
|
5
|
+
<http://web.archive.org/web/20110417182251/http://gleu.ch/>; rel="first memento"; datetime="Sun, 17 Apr 2011 18:22:51 GMT",
|
6
|
+
<http://web.archive.org/web/20120117073306/http://gleu.ch/>; rel="memento"; datetime="Tue, 17 Jan 2012 07:33:06 GMT",
|
7
|
+
<http://web.archive.org/web/20130113125339/http://gleu.ch/>; rel="memento"; datetime="Sun, 13 Jan 2013 12:53:39 GMT",
|
8
|
+
<http://web.archive.org/web/20130120084303/http://gleu.ch/>; rel="memento"; datetime="Sun, 20 Jan 2013 08:43:03 GMT",
|
9
|
+
<http://web.archive.org/web/20130129170322/http://gleu.ch/>; rel="last memento"; datetime="Tue, 29 Jan 2013 17:03:22 GMT"
|
data/spec/helper.rb
CHANGED
@@ -25,35 +25,35 @@ RSpec.configure do |config|
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def a_delete(path)
|
28
|
-
a_request(:delete, Wayback::Default::ENDPOINT + path)
|
28
|
+
a_request(:delete, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
29
29
|
end
|
30
30
|
|
31
31
|
def a_get(path)
|
32
|
-
a_request(:get, Wayback::Default::ENDPOINT + path)
|
32
|
+
a_request(:get, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
33
33
|
end
|
34
34
|
|
35
35
|
def a_post(path)
|
36
|
-
a_request(:post, Wayback::Default::ENDPOINT + path)
|
36
|
+
a_request(:post, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
37
37
|
end
|
38
38
|
|
39
39
|
def a_put(path)
|
40
|
-
a_request(:put, Wayback::Default::ENDPOINT + path)
|
40
|
+
a_request(:put, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
41
41
|
end
|
42
42
|
|
43
43
|
def stub_delete(path)
|
44
|
-
stub_request(:delete, Wayback::Default::ENDPOINT + path)
|
44
|
+
stub_request(:delete, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
45
45
|
end
|
46
46
|
|
47
47
|
def stub_get(path)
|
48
|
-
stub_request(:get, Wayback::Default::ENDPOINT + path)
|
48
|
+
stub_request(:get, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
49
49
|
end
|
50
50
|
|
51
51
|
def stub_post(path)
|
52
|
-
stub_request(:post, Wayback::Default::ENDPOINT + path)
|
52
|
+
stub_request(:post, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
53
53
|
end
|
54
54
|
|
55
55
|
def stub_put(path)
|
56
|
-
stub_request(:put, Wayback::Default::ENDPOINT + path)
|
56
|
+
stub_request(:put, Wayback::Default::ENDPOINT + Wayback::Default::ENDPOINT_PATH + path)
|
57
57
|
end
|
58
58
|
|
59
59
|
def fixture_path
|
@@ -10,11 +10,11 @@ describe Wayback::API::Archive do
|
|
10
10
|
|
11
11
|
describe "#list" do
|
12
12
|
before do
|
13
|
-
stub_get("/
|
13
|
+
stub_get("/timemap/link/gleu.ch").to_return(:body => fixture("list.timemap"), :headers => {:content_type => "application/link-format"})
|
14
14
|
end
|
15
15
|
it "requests the correct resource" do
|
16
16
|
@client.list('gleu.ch')
|
17
|
-
expect(a_get("/
|
17
|
+
expect(a_get("/timemap/link/gleu.ch")).to have_been_made
|
18
18
|
end
|
19
19
|
it "returns the link data" do
|
20
20
|
timemap = @client.list('gleu.ch')
|
@@ -27,11 +27,11 @@ describe Wayback::API::Archive do
|
|
27
27
|
|
28
28
|
describe "#page" do
|
29
29
|
before do
|
30
|
-
stub_get("/
|
30
|
+
stub_get("/20130129170322/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
31
31
|
end
|
32
32
|
it "requests the correct resource" do
|
33
33
|
@client.page('gleu.ch', 20130129170322)
|
34
|
-
expect(a_get("/
|
34
|
+
expect(a_get("/20130129170322/gleu.ch")).to have_been_made
|
35
35
|
end
|
36
36
|
it "returns the desired page on date" do
|
37
37
|
page = @client.page('gleu.ch', 20130129170322)
|
@@ -39,31 +39,31 @@ describe Wayback::API::Archive do
|
|
39
39
|
expect(page.html).to match /^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im
|
40
40
|
end
|
41
41
|
it "returns the first desired page" do
|
42
|
-
stub_get("/
|
42
|
+
stub_get("/0/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
43
43
|
page = @client.page('gleu.ch', :first)
|
44
44
|
expect(page).to be_a Wayback::Page
|
45
45
|
expect(page.html).to match /^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im
|
46
46
|
end
|
47
47
|
it "returns the last desired page" do
|
48
|
-
stub_get("
|
48
|
+
stub_get("/#{Time.now.to_i}/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
49
49
|
page = @client.page('gleu.ch', :last)
|
50
50
|
expect(page).to be_a Wayback::Page
|
51
51
|
expect(page.html).to match(/^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im)
|
52
52
|
end
|
53
53
|
it "returns the desired page for Time" do
|
54
|
-
stub_get("
|
54
|
+
stub_get("/#{Time.now.to_i}/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
55
55
|
page = @client.page('gleu.ch', Time.now)
|
56
56
|
expect(page).to be_a Wayback::Page
|
57
57
|
expect(page.html).to match(/^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im)
|
58
58
|
end
|
59
59
|
it "returns the desired page for Time string" do
|
60
|
-
stub_get("
|
60
|
+
stub_get("/#{Time.now.to_i}/gleu.ch").to_return(:body => fixture("page.html"), :headers => {:content_type => "text/html"})
|
61
61
|
page = @client.page('gleu.ch', Time.now.to_s)
|
62
62
|
expect(page).to be_a Wayback::Page
|
63
63
|
expect(page.html).to match(/^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im)
|
64
64
|
end
|
65
65
|
# it "handles when error exists" do
|
66
|
-
# stub_get("
|
66
|
+
# stub_get("/#{Time.now.to_i}/gleu.ch").to_return(:status => 204, :body => '', :headers => {:content_type => "text/xml"})
|
67
67
|
# page = @client.page('gleu.ch', Time.now.to_s)
|
68
68
|
# expect(page).to be_a Wayback::Page
|
69
69
|
# expect(page.html).to match(/^\<\!DOCTYPE html\>.*http\:\/\/gleu\.ch.*\<\/html\>/im)
|
data/spec/wayback/client_spec.rb
CHANGED
@@ -11,7 +11,7 @@ describe Wayback::Error::ClientError do
|
|
11
11
|
context "when HTTP status is #{status} and body is #{body.inspect}" do
|
12
12
|
before do
|
13
13
|
body_message = '<wayback><error><title>Hrm.</title><message>Wayback Machine doesn't have that page archived.</message></error></wayback>' unless body.nil?
|
14
|
-
stub_get("/
|
14
|
+
stub_get("/timemap/link/gleu.ch").to_return(:body => body_message, :status => status)
|
15
15
|
end
|
16
16
|
it "raises #{exception.name}" do
|
17
17
|
expect{@client.list('gleu.ch')}.to raise_error exception
|
@@ -9,7 +9,7 @@ describe Wayback::Error::ServerError do
|
|
9
9
|
Wayback::Error::ServerError.errors.each do |status, exception|
|
10
10
|
context "when HTTP status is #{status}" do
|
11
11
|
before do
|
12
|
-
stub_get("/
|
12
|
+
stub_get("/timemap/link/gleu.ch").to_return(:status => status)
|
13
13
|
end
|
14
14
|
it "raises #{exception.name}" do
|
15
15
|
expect{@client.list('gleu.ch')}.to raise_error exception
|
data/spec/wayback_spec.rb
CHANGED
@@ -10,12 +10,12 @@ describe Wayback do
|
|
10
10
|
|
11
11
|
context "when delegating to a client" do
|
12
12
|
before do
|
13
|
-
stub_get("/
|
13
|
+
stub_get("/timemap/link/gleu.ch").to_return(:body => fixture("list.timemap"), :headers => {:content_type => "application/link-format"})
|
14
14
|
end
|
15
15
|
|
16
16
|
it "requests the correct resource" do
|
17
17
|
Wayback.list('gleu.ch')
|
18
|
-
expect(a_get("/
|
18
|
+
expect(a_get("/timemap/link/gleu.ch")).to have_been_made
|
19
19
|
end
|
20
20
|
|
21
21
|
it "returns the same results as a client" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wayback
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-09-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: faraday
|
@@ -154,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
154
|
version: 1.3.6
|
155
155
|
requirements: []
|
156
156
|
rubyforge_project:
|
157
|
-
rubygems_version: 1.8.
|
157
|
+
rubygems_version: 1.8.25
|
158
158
|
signing_key:
|
159
159
|
specification_version: 3
|
160
160
|
summary: A Ruby interface to the Archive.org's Wayback Machine Memento API.
|