wayback 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. data/.yardopts +10 -0
  2. data/CHANGELOG.md +3 -0
  3. data/CONTRIBUTING.md +52 -0
  4. data/LICENSE.md +20 -0
  5. data/README.md +81 -0
  6. data/Rakefile +11 -0
  7. data/lib/wayback.rb +32 -0
  8. data/lib/wayback/api/archive.rb +42 -0
  9. data/lib/wayback/api/utils.rb +27 -0
  10. data/lib/wayback/archive.rb +15 -0
  11. data/lib/wayback/base.rb +127 -0
  12. data/lib/wayback/client.rb +62 -0
  13. data/lib/wayback/configurable.rb +48 -0
  14. data/lib/wayback/default.rb +68 -0
  15. data/lib/wayback/error.rb +31 -0
  16. data/lib/wayback/error/already_favorited.rb +10 -0
  17. data/lib/wayback/error/already_retweeted.rb +10 -0
  18. data/lib/wayback/error/bad_gateway.rb +11 -0
  19. data/lib/wayback/error/bad_request.rb +10 -0
  20. data/lib/wayback/error/client_error.rb +28 -0
  21. data/lib/wayback/error/configuration_error.rb +8 -0
  22. data/lib/wayback/error/decode_error.rb +9 -0
  23. data/lib/wayback/error/forbidden.rb +10 -0
  24. data/lib/wayback/error/gateway_timeout.rb +11 -0
  25. data/lib/wayback/error/identity_map_key_error.rb +9 -0
  26. data/lib/wayback/error/internal_server_error.rb +11 -0
  27. data/lib/wayback/error/not_acceptable.rb +10 -0
  28. data/lib/wayback/error/not_found.rb +10 -0
  29. data/lib/wayback/error/server_error.rb +28 -0
  30. data/lib/wayback/error/service_unavailable.rb +11 -0
  31. data/lib/wayback/error/too_many_requests.rb +12 -0
  32. data/lib/wayback/error/unauthorized.rb +10 -0
  33. data/lib/wayback/error/unprocessable_entity.rb +10 -0
  34. data/lib/wayback/factory.rb +21 -0
  35. data/lib/wayback/identity.rb +50 -0
  36. data/lib/wayback/identity_map.rb +22 -0
  37. data/lib/wayback/page.rb +18 -0
  38. data/lib/wayback/response/parse_memento.rb +61 -0
  39. data/lib/wayback/response/parse_memento_page.rb +23 -0
  40. data/lib/wayback/response/raise_error.rb +31 -0
  41. data/lib/wayback/version.rb +18 -0
  42. data/spec/fixtures/list.timemap +9 -0
  43. data/spec/fixtures/page.html +225 -0
  44. data/spec/helper.rb +65 -0
  45. data/spec/wayback/api/archive_spec.rb +73 -0
  46. data/spec/wayback/archive_spec.rb +23 -0
  47. data/spec/wayback/base_spec.rb +117 -0
  48. data/spec/wayback/client_spec.rb +114 -0
  49. data/spec/wayback/error/client_error_spec.rb +23 -0
  50. data/spec/wayback/error/server_error_spec.rb +20 -0
  51. data/spec/wayback/error_spec.rb +20 -0
  52. data/spec/wayback/identifiable_spec.rb +50 -0
  53. data/spec/wayback/page_spec.rb +36 -0
  54. data/spec/wayback_spec.rb +47 -0
  55. data/wayback.gemspec +26 -0
  56. metadata +175 -0
@@ -0,0 +1,22 @@
1
+ module Wayback
2
+
3
+ # Tracks objects to help ensure that each object gets loaded only once.
4
+ # See: http://www.martinfowler.com/eaaCatalog/identityMap.html
5
+ class IdentityMap < Hash
6
+
7
+ # @param id
8
+ # @return [Object]
9
+ def fetch(id)
10
+ self[id]
11
+ end
12
+
13
+ # @param id
14
+ # @param object
15
+ # @return [Object]
16
+ def store(id, object)
17
+ self[id] = object
18
+ end
19
+
20
+ end
21
+
22
+ end
@@ -0,0 +1,18 @@
1
+ require 'wayback/identity'
2
+
3
+ module Wayback
4
+ class Page < Wayback::Identity
5
+
6
+ attr_reader :html
7
+
8
+
9
+ def to_s
10
+ html || ''
11
+ end
12
+
13
+
14
+ private
15
+
16
+
17
+ end
18
+ end
@@ -0,0 +1,61 @@
1
+ require 'faraday'
2
+ require 'time'
3
+
4
+ module Wayback
5
+ module Response
6
+ class ParseMemento < Faraday::Response::Middleware
7
+
8
+ def parse(body, *opts)
9
+ case body
10
+ # Assume it starts with "<http"
11
+ when /^\<http/
12
+ body, info = body.gsub(/,(\s+)?\</, ",\n<").gsub(/\s(\s+)/, ' ').split("\n"), {:id => nil, :dates => {}, :first_date => nil, :last_date => nil}
13
+
14
+ body.each do |s|
15
+ attrs, uri = s.split('; '), s.gsub(/^(\<)([A-Z0-9\-\/\:\.\?\=\&]+)(\>)(.*)$/i, '\2')
16
+ rels, datetime, date, from, til = [], nil, nil, nil, nil
17
+
18
+ attrs.each do |a|
19
+ k, v = a.gsub(/^([A-Z0-9\-]+)(=.*)$/i, '\1'), a.gsub(/^([A-Z0-9\-]+)(=(\'|\"))(.*)(\'|\")(,)?$/i, '\4')
20
+ case k
21
+ when 'datetime'
22
+ datetime, date = v, Time.parse(v).to_i
23
+ when 'rel'
24
+ rels = v.split(' ')
25
+ when 'from'
26
+ # Not handled by archive.org
27
+ when 'until'
28
+ # Not handled by archive.org
29
+ end
30
+ end
31
+
32
+ if rels.include?('original')
33
+ info[:id] = uri
34
+ elsif rels.include?('memento')
35
+ info[:last_date] = date if rels.include?('last')
36
+ info[:first_date] = date if rels.include?('first')
37
+ info[:dates][date] = {:datetime => datetime, :uri => uri} unless date.nil?
38
+ elsif rels.include?('timebundle')
39
+ #
40
+ elsif rels.include?('timegate')
41
+ #
42
+ elsif rels.include?('timemap')
43
+ #
44
+ end
45
+ end
46
+
47
+ info
48
+ else
49
+ nil
50
+ end
51
+ end
52
+
53
+ def on_complete(env)
54
+ if respond_to?(:parse) && ((env[:response_headers] && env[:response_headers]['content-type']) || '').match(/^application\/link-format/i)
55
+ env[:body] = parse(env[:body]) unless [204, 301, 302, 304].include?(env[:status])
56
+ end
57
+ end
58
+
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,23 @@
1
+ require 'faraday'
2
+
3
+ module Wayback
4
+ module Response
5
+ class ParseMementoPage < Faraday::Response::Middleware
6
+
7
+ def parse(body, *opts)
8
+ body
9
+ end
10
+
11
+ def on_complete(env)
12
+ if respond_to?(:parse) && ((env[:response_headers] && env[:response_headers]['content-type']) || '').match(/^(text\/html|application\/octet-stream)/i)
13
+ unless [204, 301, 302, 304].include?(env[:status])
14
+ env[:body] = {:id => env[:url].to_s, :html => parse(env[:body])}
15
+ else
16
+ nil
17
+ end
18
+ end
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,31 @@
1
+ require 'faraday'
2
+ require 'wayback/error/bad_gateway'
3
+ require 'wayback/error/bad_request'
4
+ require 'wayback/error/forbidden'
5
+ require 'wayback/error/gateway_timeout'
6
+ require 'wayback/error/internal_server_error'
7
+ require 'wayback/error/not_acceptable'
8
+ require 'wayback/error/not_found'
9
+ require 'wayback/error/service_unavailable'
10
+ require 'wayback/error/too_many_requests'
11
+ require 'wayback/error/unauthorized'
12
+ require 'wayback/error/unprocessable_entity'
13
+
14
+ module Wayback
15
+ module Response
16
+ class RaiseError < Faraday::Response::Middleware
17
+
18
+ def on_complete(env)
19
+ status_code = env[:status].to_i
20
+ error_class = @klass.errors[status_code]
21
+ raise error_class.from_response(env) if error_class
22
+ end
23
+
24
+ def initialize(app, klass)
25
+ @klass = klass
26
+ super(app)
27
+ end
28
+
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,18 @@
1
+ module Wayback
2
+ class Version
3
+ MAJOR = 0 unless defined? Wayback::Version::MAJOR
4
+ MINOR = 1 unless defined? Wayback::Version::MINOR
5
+ PATCH = 0 unless defined? Wayback::Version::PATCH
6
+ PRE = nil unless defined? Wayback::Version::PRE
7
+
8
+ class << self
9
+
10
+ # @return [String]
11
+ def to_s
12
+ [MAJOR, MINOR, PATCH, PRE].compact.join('.')
13
+ end
14
+
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ <http://api.wayback.archive.org/list/timebundle/http://gleu.ch>; rel="timebundle",
2
+ <http://gleu.ch>; rel="original",
3
+ <http://api.wayback.archive.org/list/timemap/link/http://gleu.ch>; rel="timemap"; type="application/link-format",
4
+ <http://api.wayback.archive.org/list/timegate/http://gleu.ch>; rel="timegate",
5
+ <http://api.wayback.archive.org/memento/20110417182251/http://gleu.ch/>; rel="first memento"; datetime="Sun, 17 Apr 2011 18:22:51 GMT",
6
+ <http://api.wayback.archive.org/memento/20120117073306/http://gleu.ch/>; rel="memento"; datetime="Tue, 17 Jan 2012 07:33:06 GMT",
7
+ <http://api.wayback.archive.org/memento/20130113125339/http://gleu.ch/>; rel="memento"; datetime="Sun, 13 Jan 2013 12:53:39 GMT",
8
+ <http://api.wayback.archive.org/memento/20130120084303/http://gleu.ch/>; rel="memento"; datetime="Sun, 20 Jan 2013 08:43:03 GMT",
9
+ <http://api.wayback.archive.org/memento/20130129170322/http://gleu.ch/>; rel="last memento"; datetime="Tue, 29 Jan 2013 17:03:22 GMT"
@@ -0,0 +1,225 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head><base href="http://gleu.ch/" />
4
+ <!--
5
+ All work is copyrighted 2012 by Greg Leuch and/or its respective owners.
6
+ All other non-copyrighted or non-licensed work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License <http://creativecommons.org/licenses/by-sa/3.0/us>.
7
+ Permissions beyond the scope of this license may be available at: http://gleu.ch/contact.
8
+ -->
9
+
10
+ <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'>
11
+
12
+ <title>Greg Leuch: Creative + User Interaction</title>
13
+
14
+ <link rel="canonical" href="" />
15
+ <meta name="description" content="Greg Leuch is a user interface designer, interaction designer, visual designer, front-end web developer, and open-source artist."/>
16
+ <meta name="keywords" content="greg leuch, gleuch, halvfet, interface, interaction, visual, design, designer, user experience, art, programming, web, css, html, php, rails, javascript, code, general"/>
17
+ <meta name="author" content="Greg Leuch at http://gleu.ch/"/>
18
+ <meta name="robots" content="index, follow" />
19
+ <meta name="googlebot" content="index, follow"/>
20
+ <meta name="revisit-after" content="3 days" />
21
+ <meta name="distribution" content="global" />
22
+
23
+
24
+ <script type="text/javascript" src="/assets/js/jquery-1.8.3.min.js"></script>
25
+ <script type="text/javascript" src="/assets/js/site.js"></script>
26
+
27
+ <meta name="viewport" content="width=device-width; initial-scale=1.0" />
28
+ <link rel="stylesheet" href="/assets/css/site.css" media="all" />
29
+
30
+
31
+ <script type="text/javascript">var _gaq = _gaq || [];_gaq.push(['_setAccount', 'UA-2855868-25']);_gaq.push(['_trackPageview']);(function() {var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);})();</script>
32
+
33
+ </head><body class="home">
34
+ <div id="template_wrapper" class="c">
35
+ <div id="content_area" class="rel c">
36
+
37
+ <article id="content">
38
+
39
+
40
+
41
+ <section class="intro">
42
+ <h1>Portfolio of Greg Leuch</h1>
43
+ <p class="large serif"> Design, web, technology, and art.</p>
44
+ </section>
45
+
46
+ <section class="featured_list">
47
+ <h3>Featured Work &amp; Projects</h3>
48
+
49
+ <div class="first c">
50
+ <ul>
51
+ <li>
52
+ <dl>
53
+ <dd class="image">
54
+ <figure><a href="/projects/pop-block"><img src="/assets/projects/pop-block/feature.png" title="Pop Block" alt="" /></a></figure>
55
+ </dd>
56
+ <dt><a href="/projects/pop-block">Pop Block <ins class="new">New!</ins></a></dt>
57
+ <dd class="about">Manage your content bubble by controlling aspects of your online experience.</dd>
58
+ <dd>web application, browser extensions</dd>
59
+ </dl>
60
+ </li>
61
+
62
+ <li>
63
+ <dl>
64
+ <dd class="image">
65
+ <figure><a href="/projects/shaved-bieber"><img src="/assets/projects/shaved-bieber/feature.png" title="Shaved Bieber" alt="" /></a></figure>
66
+ </dd>
67
+ <dt><a href="/projects/shaved-bieber">Shaved Bieber</a></dt>
68
+ <dd class="about">An innovative tool to clean up the lower regions of web pages from unwanted Justin Bieber mentions.</dd>
69
+ <dd>project: browser extensions</dd>
70
+ </dl>
71
+ </li>
72
+
73
+ <!-- <li>
74
+ <dl>
75
+ <dd class="image">
76
+ <figure><a href="/work/heineken-fever-keeper"><img src="/assets/work/heineken-feverkeeper/feature.png" title="Heineken Fever Keeper" alt="" /></a></figure>
77
+ </dd>
78
+ <dt><a href="/work/heineken-fever-keeper">Heineken Fever Keeper</a></dt>
79
+ <dd class="about">Block the spoilers from tape-delayed UEFA soccer matches.</dd>
80
+ <dd>work: browser extensions</dd>
81
+ </dl>
82
+ </li> -->
83
+
84
+ <li>
85
+ <dl>
86
+ <dd class="image">
87
+ <figure><a href="/projects/greed"><img src="/assets/projects/greed/feature.png" title="G.R.E.E.D." alt="" /></a></figure>
88
+ </dd>
89
+ <dt><a href="/projects/greed">G.R.E.E.D.</a></dt>
90
+ <dd class="about">Greed powers our economy through restrictive licensing deals and claims of copyright.</dd>
91
+ <dd>project: browser extensions</dd>
92
+ </dl>
93
+ </li>
94
+
95
+ <li>
96
+ <dl>
97
+ <dd class="image">
98
+ <figure><a href="/work/know-your-meme"><img src="/assets/work/know-your-meme/feature.png" title="Know Your Meme" alt="" /></a></figure>
99
+ </dd>
100
+ <dt><a href="/work/know-your-meme">Know Your Meme</a></dt>
101
+ <dd class="about">Documenting Internet phenomena: viral videos, image macros, catchphrases, web celebs and more.</dd>
102
+ <dd>web application, design, branding</dd>
103
+ </dl>
104
+ </li>
105
+
106
+ <li>
107
+ <dl>
108
+ <dd class="image">
109
+ <figure><a href="/projects/ctrl-f-d"><img src="/assets/projects/ctrl-f-d/feature.png" title="Ctrl+F'd" alt="" /></a></figure>
110
+ </dd>
111
+ <dt><a href="/projects/ctrl-f-d">Ctrl+F'd</a></dt>
112
+ <dd class="about">Many people believe you cannot keyword search online documents. In other words, they are Ctrl+F'd. A playful experiment in "censoring" a web page by hiding text and images behind blocks.</dd>
113
+ <dd>bookmarklet</dd>
114
+ </dl>
115
+ </li>
116
+
117
+ </ul>
118
+ </div>
119
+ </section>
120
+
121
+ <hr />
122
+
123
+ <footer class="tc">
124
+ <p class="large link a b">View a list of all <a href="/work/all">client work</a> or <a href="/projects/all">projects</a>.</p>
125
+ </footer>
126
+ </article>
127
+
128
+ <aside id="sidebar">
129
+
130
+
131
+ <section class="box">
132
+ <h4>Who</h4>
133
+ <p>Maker of <a href="https://pop-block.com">Pop Block</a>, <a href="http://metafetch.com" target="_blank">Metafetch</a> and other products. <a href="http://fffff.at" target="_blank">F.A.T. Lab</a> virtual fellow. Previously at BuzzFeed, Know Your Meme, Rocketboom, and JESS3.</p>
134
+ </section>
135
+
136
+ <section class="box">
137
+ <h4>What</h4>
138
+ <p>Creative freelancer with a background in user experience design, interface design, interaction design, visual design, e-commerce, and web development.</p>
139
+ </section>
140
+
141
+ <section class="box">
142
+ <h4>Work Availablity</h4>
143
+ <p>Available for full-time or project-based design, development, and idea/strategy freelance work. <em>(Hurry, limited time offer!)</em></p>
144
+ </section>
145
+
146
+ <section id="newsletter_signup" class="box ">
147
+ <h4>Newsletter</h4>
148
+
149
+ <p>Sign up for updates and other announcements.</p>
150
+
151
+ <form action="/newsletter/signup.php" method="post">
152
+ <input type="hidden" name="auth" value="278693195410" />
153
+ <input type="hidden" name="referrer" value="/" />
154
+ <input type="text" name="email" value="" placeholder="Your email address..." />
155
+ <button><i class="icon newsletter button">Sign Up!</i></button>
156
+ </form>
157
+ </section>
158
+
159
+ <section class="box">
160
+ <h4>Online Activity</h4>
161
+ <ul>
162
+ <li><a class="grey dc" href="https://twitter.com/gleuch" title="Twitter" target="_blank">Twitter</a></li>
163
+ <li><a class="grey dc" href="https://github.com/gleuch" title="GitHub" target="_blank">GitHub</a></li>
164
+ <li><a class="grey dc" href="http://instagram.com/gleuch" title="Instagram" target="_blank">Instagram</a></li>
165
+ <li><a class="grey dc" href="http://www.linkedin.com/in/gleuch" title="LinkedIn" target="_blank">LinkedIn</a></li>
166
+ <li><a class="grey dc" href="https://www.facebook.com/gleuch" title="Facebook" target="_blank">Facebook</a></li>
167
+ </ul>
168
+ </section>
169
+ </aside>
170
+
171
+ </div>
172
+
173
+ <header id="header" class="c">
174
+ <h1 title="Greg Leuch"><a href="/"><i class="icon logo">Greg Leuch</i></a></h1>
175
+ <nav>
176
+ <ul class="c">
177
+ <li class=""><a href="/projects" title="Projects" class="">Projects</a></li>
178
+ <li class=""><a href="/work" title="Client Work" class=""><span class="mfull inline">Client</span> Work</a></li>
179
+ <li class=""><a href="/about" title="About" class="">About</a></li>
180
+ <li class=""><a href="/contact" title="Contact" class="">Contact</a></li>
181
+ <!-- <li class=""><a href="http://blog.gleu.ch" title="Blog" class="">Blog</a></li> -->
182
+ <li class="twitter"><a href="https://twitter.com/gleuch" target="_blank"><i class="icon twitter">Twitter: @gleuch</i></a></li>
183
+ <li class="facebook"><a href="https://facebook.com/gleuch" target="_blank"><i class="icon facebook">Facebook: Greg Leuch</i></a></li>
184
+ </ul>
185
+ </nav>
186
+ </header>
187
+
188
+ <footer id="footer" class="c">
189
+ <p>&copy; 2003&ndash;2013 Greg Leuch, unless noted differently.</p>
190
+ </footer>
191
+
192
+ </div>
193
+
194
+ <div id="template_static_bar"></div>
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+ <!--
204
+ FILE ARCHIVED ON 17:03:22 Jan 29, 2013 AND RETRIEVED FROM THE
205
+ INTERNET ARCHIVE ON 16:31:56 Mar 16, 2013.
206
+ JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE.
207
+
208
+ ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C.
209
+ SECTION 108(a)(3)).
210
+ -->
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+ <script type="text/javascript">
220
+ var wmNotice = "Wayback - External links, forms, and search boxes may not function within this collection. Url: http://gleu.ch/ time: 17:03:22 Jan 29, 2013";
221
+ var wmHideNotice = "hide";
222
+ </script>
223
+ <script type="text/javascript" src="/static/js/disclaim.js"></script>
224
+ </body>
225
+ </html>
data/spec/helper.rb ADDED
@@ -0,0 +1,65 @@
1
+ require 'simplecov'
2
+ require 'coveralls'
3
+ Coveralls.wear!
4
+
5
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
6
+ SimpleCov::Formatter::HTMLFormatter,
7
+ Coveralls::SimpleCov::Formatter
8
+ ]
9
+ SimpleCov.start
10
+
11
+ require 'wayback'
12
+ require 'wayback/identity_map'
13
+ require 'rspec'
14
+ require 'stringio'
15
+ require 'tempfile'
16
+ require 'timecop'
17
+ require 'webmock/rspec'
18
+
19
+ WebMock.disable_net_connect!(:allow => 'coveralls.io')
20
+
21
+ RSpec.configure do |config|
22
+ config.expect_with :rspec do |c|
23
+ c.syntax = :expect
24
+ end
25
+ end
26
+
27
+ def a_delete(path)
28
+ a_request(:delete, Wayback::Default::ENDPOINT + path)
29
+ end
30
+
31
+ def a_get(path)
32
+ a_request(:get, Wayback::Default::ENDPOINT + path)
33
+ end
34
+
35
+ def a_post(path)
36
+ a_request(:post, Wayback::Default::ENDPOINT + path)
37
+ end
38
+
39
+ def a_put(path)
40
+ a_request(:put, Wayback::Default::ENDPOINT + path)
41
+ end
42
+
43
+ def stub_delete(path)
44
+ stub_request(:delete, Wayback::Default::ENDPOINT + path)
45
+ end
46
+
47
+ def stub_get(path)
48
+ stub_request(:get, Wayback::Default::ENDPOINT + path)
49
+ end
50
+
51
+ def stub_post(path)
52
+ stub_request(:post, Wayback::Default::ENDPOINT + path)
53
+ end
54
+
55
+ def stub_put(path)
56
+ stub_request(:put, Wayback::Default::ENDPOINT + path)
57
+ end
58
+
59
+ def fixture_path
60
+ File.expand_path("../fixtures", __FILE__)
61
+ end
62
+
63
+ def fixture(file)
64
+ File.new(fixture_path + '/' + file)
65
+ end