wayback 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/.yardopts +10 -0
  2. data/CHANGELOG.md +3 -0
  3. data/CONTRIBUTING.md +52 -0
  4. data/LICENSE.md +20 -0
  5. data/README.md +81 -0
  6. data/Rakefile +11 -0
  7. data/lib/wayback.rb +32 -0
  8. data/lib/wayback/api/archive.rb +42 -0
  9. data/lib/wayback/api/utils.rb +27 -0
  10. data/lib/wayback/archive.rb +15 -0
  11. data/lib/wayback/base.rb +127 -0
  12. data/lib/wayback/client.rb +62 -0
  13. data/lib/wayback/configurable.rb +48 -0
  14. data/lib/wayback/default.rb +68 -0
  15. data/lib/wayback/error.rb +31 -0
  16. data/lib/wayback/error/already_favorited.rb +10 -0
  17. data/lib/wayback/error/already_retweeted.rb +10 -0
  18. data/lib/wayback/error/bad_gateway.rb +11 -0
  19. data/lib/wayback/error/bad_request.rb +10 -0
  20. data/lib/wayback/error/client_error.rb +28 -0
  21. data/lib/wayback/error/configuration_error.rb +8 -0
  22. data/lib/wayback/error/decode_error.rb +9 -0
  23. data/lib/wayback/error/forbidden.rb +10 -0
  24. data/lib/wayback/error/gateway_timeout.rb +11 -0
  25. data/lib/wayback/error/identity_map_key_error.rb +9 -0
  26. data/lib/wayback/error/internal_server_error.rb +11 -0
  27. data/lib/wayback/error/not_acceptable.rb +10 -0
  28. data/lib/wayback/error/not_found.rb +10 -0
  29. data/lib/wayback/error/server_error.rb +28 -0
  30. data/lib/wayback/error/service_unavailable.rb +11 -0
  31. data/lib/wayback/error/too_many_requests.rb +12 -0
  32. data/lib/wayback/error/unauthorized.rb +10 -0
  33. data/lib/wayback/error/unprocessable_entity.rb +10 -0
  34. data/lib/wayback/factory.rb +21 -0
  35. data/lib/wayback/identity.rb +50 -0
  36. data/lib/wayback/identity_map.rb +22 -0
  37. data/lib/wayback/page.rb +18 -0
  38. data/lib/wayback/response/parse_memento.rb +61 -0
  39. data/lib/wayback/response/parse_memento_page.rb +23 -0
  40. data/lib/wayback/response/raise_error.rb +31 -0
  41. data/lib/wayback/version.rb +18 -0
  42. data/spec/fixtures/list.timemap +9 -0
  43. data/spec/fixtures/page.html +225 -0
  44. data/spec/helper.rb +65 -0
  45. data/spec/wayback/api/archive_spec.rb +73 -0
  46. data/spec/wayback/archive_spec.rb +23 -0
  47. data/spec/wayback/base_spec.rb +117 -0
  48. data/spec/wayback/client_spec.rb +114 -0
  49. data/spec/wayback/error/client_error_spec.rb +23 -0
  50. data/spec/wayback/error/server_error_spec.rb +20 -0
  51. data/spec/wayback/error_spec.rb +20 -0
  52. data/spec/wayback/identifiable_spec.rb +50 -0
  53. data/spec/wayback/page_spec.rb +36 -0
  54. data/spec/wayback_spec.rb +47 -0
  55. data/wayback.gemspec +26 -0
  56. metadata +175 -0
@@ -0,0 +1,22 @@
1
+ module Wayback
2
+
3
+ # Tracks objects to help ensure that each object gets loaded only once.
4
+ # See: http://www.martinfowler.com/eaaCatalog/identityMap.html
5
+ class IdentityMap < Hash
6
+
7
+ # @param id
8
+ # @return [Object]
9
+ def fetch(id)
10
+ self[id]
11
+ end
12
+
13
+ # @param id
14
+ # @param object
15
+ # @return [Object]
16
+ def store(id, object)
17
+ self[id] = object
18
+ end
19
+
20
+ end
21
+
22
+ end
@@ -0,0 +1,18 @@
1
+ require 'wayback/identity'
2
+
3
+ module Wayback
4
+ class Page < Wayback::Identity
5
+
6
+ attr_reader :html
7
+
8
+
9
+ def to_s
10
+ html || ''
11
+ end
12
+
13
+
14
+ private
15
+
16
+
17
+ end
18
+ end
@@ -0,0 +1,61 @@
1
+ require 'faraday'
2
+ require 'time'
3
+
4
+ module Wayback
5
+ module Response
6
+ class ParseMemento < Faraday::Response::Middleware
7
+
8
+ def parse(body, *opts)
9
+ case body
10
+ # Assume it starts with "<http"
11
+ when /^\<http/
12
+ body, info = body.gsub(/,(\s+)?\</, ",\n<").gsub(/\s(\s+)/, ' ').split("\n"), {:id => nil, :dates => {}, :first_date => nil, :last_date => nil}
13
+
14
+ body.each do |s|
15
+ attrs, uri = s.split('; '), s.gsub(/^(\<)([A-Z0-9\-\/\:\.\?\=\&]+)(\>)(.*)$/i, '\2')
16
+ rels, datetime, date, from, til = [], nil, nil, nil, nil
17
+
18
+ attrs.each do |a|
19
+ k, v = a.gsub(/^([A-Z0-9\-]+)(=.*)$/i, '\1'), a.gsub(/^([A-Z0-9\-]+)(=(\'|\"))(.*)(\'|\")(,)?$/i, '\4')
20
+ case k
21
+ when 'datetime'
22
+ datetime, date = v, Time.parse(v).to_i
23
+ when 'rel'
24
+ rels = v.split(' ')
25
+ when 'from'
26
+ # Not handled by archive.org
27
+ when 'until'
28
+ # Not handled by archive.org
29
+ end
30
+ end
31
+
32
+ if rels.include?('original')
33
+ info[:id] = uri
34
+ elsif rels.include?('memento')
35
+ info[:last_date] = date if rels.include?('last')
36
+ info[:first_date] = date if rels.include?('first')
37
+ info[:dates][date] = {:datetime => datetime, :uri => uri} unless date.nil?
38
+ elsif rels.include?('timebundle')
39
+ #
40
+ elsif rels.include?('timegate')
41
+ #
42
+ elsif rels.include?('timemap')
43
+ #
44
+ end
45
+ end
46
+
47
+ info
48
+ else
49
+ nil
50
+ end
51
+ end
52
+
53
+ def on_complete(env)
54
+ if respond_to?(:parse) && ((env[:response_headers] && env[:response_headers]['content-type']) || '').match(/^application\/link-format/i)
55
+ env[:body] = parse(env[:body]) unless [204, 301, 302, 304].include?(env[:status])
56
+ end
57
+ end
58
+
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,23 @@
1
+ require 'faraday'
2
+
3
+ module Wayback
4
+ module Response
5
+ class ParseMementoPage < Faraday::Response::Middleware
6
+
7
+ def parse(body, *opts)
8
+ body
9
+ end
10
+
11
+ def on_complete(env)
12
+ if respond_to?(:parse) && ((env[:response_headers] && env[:response_headers]['content-type']) || '').match(/^(text\/html|application\/octet-stream)/i)
13
+ unless [204, 301, 302, 304].include?(env[:status])
14
+ env[:body] = {:id => env[:url].to_s, :html => parse(env[:body])}
15
+ else
16
+ nil
17
+ end
18
+ end
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,31 @@
1
+ require 'faraday'
2
+ require 'wayback/error/bad_gateway'
3
+ require 'wayback/error/bad_request'
4
+ require 'wayback/error/forbidden'
5
+ require 'wayback/error/gateway_timeout'
6
+ require 'wayback/error/internal_server_error'
7
+ require 'wayback/error/not_acceptable'
8
+ require 'wayback/error/not_found'
9
+ require 'wayback/error/service_unavailable'
10
+ require 'wayback/error/too_many_requests'
11
+ require 'wayback/error/unauthorized'
12
+ require 'wayback/error/unprocessable_entity'
13
+
14
+ module Wayback
15
+ module Response
16
+ class RaiseError < Faraday::Response::Middleware
17
+
18
+ def on_complete(env)
19
+ status_code = env[:status].to_i
20
+ error_class = @klass.errors[status_code]
21
+ raise error_class.from_response(env) if error_class
22
+ end
23
+
24
+ def initialize(app, klass)
25
+ @klass = klass
26
+ super(app)
27
+ end
28
+
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,18 @@
1
+ module Wayback
2
+ class Version
3
+ MAJOR = 0 unless defined? Wayback::Version::MAJOR
4
+ MINOR = 1 unless defined? Wayback::Version::MINOR
5
+ PATCH = 0 unless defined? Wayback::Version::PATCH
6
+ PRE = nil unless defined? Wayback::Version::PRE
7
+
8
+ class << self
9
+
10
+ # @return [String]
11
+ def to_s
12
+ [MAJOR, MINOR, PATCH, PRE].compact.join('.')
13
+ end
14
+
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ <http://api.wayback.archive.org/list/timebundle/http://gleu.ch>; rel="timebundle",
2
+ <http://gleu.ch>; rel="original",
3
+ <http://api.wayback.archive.org/list/timemap/link/http://gleu.ch>; rel="timemap"; type="application/link-format",
4
+ <http://api.wayback.archive.org/list/timegate/http://gleu.ch>; rel="timegate",
5
+ <http://api.wayback.archive.org/memento/20110417182251/http://gleu.ch/>; rel="first memento"; datetime="Sun, 17 Apr 2011 18:22:51 GMT",
6
+ <http://api.wayback.archive.org/memento/20120117073306/http://gleu.ch/>; rel="memento"; datetime="Tue, 17 Jan 2012 07:33:06 GMT",
7
+ <http://api.wayback.archive.org/memento/20130113125339/http://gleu.ch/>; rel="memento"; datetime="Sun, 13 Jan 2013 12:53:39 GMT",
8
+ <http://api.wayback.archive.org/memento/20130120084303/http://gleu.ch/>; rel="memento"; datetime="Sun, 20 Jan 2013 08:43:03 GMT",
9
+ <http://api.wayback.archive.org/memento/20130129170322/http://gleu.ch/>; rel="last memento"; datetime="Tue, 29 Jan 2013 17:03:22 GMT"
@@ -0,0 +1,225 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head><base href="http://gleu.ch/" />
4
+ <!--
5
+ All work is copyrighted 2012 by Greg Leuch and/or its respective owners.
6
+ All other non-copyrighted or non-licensed work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License <http://creativecommons.org/licenses/by-sa/3.0/us>.
7
+ Permissions beyond the scope of this license may be available at: http://gleu.ch/contact.
8
+ -->
9
+
10
+ <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'>
11
+
12
+ <title>Greg Leuch: Creative + User Interaction</title>
13
+
14
+ <link rel="canonical" href="" />
15
+ <meta name="description" content="Greg Leuch is a user interface designer, interaction designer, visual designer, front-end web developer, and open-source artist."/>
16
+ <meta name="keywords" content="greg leuch, gleuch, halvfet, interface, interaction, visual, design, designer, user experience, art, programming, web, css, html, php, rails, javascript, code, general"/>
17
+ <meta name="author" content="Greg Leuch at http://gleu.ch/"/>
18
+ <meta name="robots" content="index, follow" />
19
+ <meta name="googlebot" content="index, follow"/>
20
+ <meta name="revisit-after" content="3 days" />
21
+ <meta name="distribution" content="global" />
22
+
23
+
24
+ <script type="text/javascript" src="/assets/js/jquery-1.8.3.min.js"></script>
25
+ <script type="text/javascript" src="/assets/js/site.js"></script>
26
+
27
+ <meta name="viewport" content="width=device-width; initial-scale=1.0" />
28
+ <link rel="stylesheet" href="/assets/css/site.css" media="all" />
29
+
30
+
31
+ <script type="text/javascript">var _gaq = _gaq || [];_gaq.push(['_setAccount', 'UA-2855868-25']);_gaq.push(['_trackPageview']);(function() {var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);})();</script>
32
+
33
+ </head><body class="home">
34
+ <div id="template_wrapper" class="c">
35
+ <div id="content_area" class="rel c">
36
+
37
+ <article id="content">
38
+
39
+
40
+
41
+ <section class="intro">
42
+ <h1>Portfolio of Greg Leuch</h1>
43
+ <p class="large serif"> Design, web, technology, and art.</p>
44
+ </section>
45
+
46
+ <section class="featured_list">
47
+ <h3>Featured Work &amp; Projects</h3>
48
+
49
+ <div class="first c">
50
+ <ul>
51
+ <li>
52
+ <dl>
53
+ <dd class="image">
54
+ <figure><a href="/projects/pop-block"><img src="/assets/projects/pop-block/feature.png" title="Pop Block" alt="" /></a></figure>
55
+ </dd>
56
+ <dt><a href="/projects/pop-block">Pop Block <ins class="new">New!</ins></a></dt>
57
+ <dd class="about">Manage your content bubble by controlling aspects of your online experience.</dd>
58
+ <dd>web application, browser extensions</dd>
59
+ </dl>
60
+ </li>
61
+
62
+ <li>
63
+ <dl>
64
+ <dd class="image">
65
+ <figure><a href="/projects/shaved-bieber"><img src="/assets/projects/shaved-bieber/feature.png" title="Shaved Bieber" alt="" /></a></figure>
66
+ </dd>
67
+ <dt><a href="/projects/shaved-bieber">Shaved Bieber</a></dt>
68
+ <dd class="about">An innovative tool to clean up the lower regions of web pages from unwanted Justin Bieber mentions.</dd>
69
+ <dd>project: browser extensions</dd>
70
+ </dl>
71
+ </li>
72
+
73
+ <!-- <li>
74
+ <dl>
75
+ <dd class="image">
76
+ <figure><a href="/work/heineken-fever-keeper"><img src="/assets/work/heineken-feverkeeper/feature.png" title="Heineken Fever Keeper" alt="" /></a></figure>
77
+ </dd>
78
+ <dt><a href="/work/heineken-fever-keeper">Heineken Fever Keeper</a></dt>
79
+ <dd class="about">Block the spoilers from tape-delayed UEFA soccer matches.</dd>
80
+ <dd>work: browser extensions</dd>
81
+ </dl>
82
+ </li> -->
83
+
84
+ <li>
85
+ <dl>
86
+ <dd class="image">
87
+ <figure><a href="/projects/greed"><img src="/assets/projects/greed/feature.png" title="G.R.E.E.D." alt="" /></a></figure>
88
+ </dd>
89
+ <dt><a href="/projects/greed">G.R.E.E.D.</a></dt>
90
+ <dd class="about">Greed powers our economy through restrictive licensing deals and claims of copyright.</dd>
91
+ <dd>project: browser extensions</dd>
92
+ </dl>
93
+ </li>
94
+
95
+ <li>
96
+ <dl>
97
+ <dd class="image">
98
+ <figure><a href="/work/know-your-meme"><img src="/assets/work/know-your-meme/feature.png" title="Know Your Meme" alt="" /></a></figure>
99
+ </dd>
100
+ <dt><a href="/work/know-your-meme">Know Your Meme</a></dt>
101
+ <dd class="about">Documenting Internet phenomena: viral videos, image macros, catchphrases, web celebs and more.</dd>
102
+ <dd>web application, design, branding</dd>
103
+ </dl>
104
+ </li>
105
+
106
+ <li>
107
+ <dl>
108
+ <dd class="image">
109
+ <figure><a href="/projects/ctrl-f-d"><img src="/assets/projects/ctrl-f-d/feature.png" title="Ctrl+F'd" alt="" /></a></figure>
110
+ </dd>
111
+ <dt><a href="/projects/ctrl-f-d">Ctrl+F'd</a></dt>
112
+ <dd class="about">Many people believe you cannot keyword search online documents. In other words, they are Ctrl+F'd. A playful experiment in "censoring" a web page by hiding text and images behind blocks.</dd>
113
+ <dd>bookmarklet</dd>
114
+ </dl>
115
+ </li>
116
+
117
+ </ul>
118
+ </div>
119
+ </section>
120
+
121
+ <hr />
122
+
123
+ <footer class="tc">
124
+ <p class="large link a b">View a list of all <a href="/work/all">client work</a> or <a href="/projects/all">projects</a>.</p>
125
+ </footer>
126
+ </article>
127
+
128
+ <aside id="sidebar">
129
+
130
+
131
+ <section class="box">
132
+ <h4>Who</h4>
133
+ <p>Maker of <a href="https://pop-block.com">Pop Block</a>, <a href="http://metafetch.com" target="_blank">Metafetch</a> and other products. <a href="http://fffff.at" target="_blank">F.A.T. Lab</a> virtual fellow. Previously at BuzzFeed, Know Your Meme, Rocketboom, and JESS3.</p>
134
+ </section>
135
+
136
+ <section class="box">
137
+ <h4>What</h4>
138
+ <p>Creative freelancer with a background in user experience design, interface design, interaction design, visual design, e-commerce, and web development.</p>
139
+ </section>
140
+
141
+ <section class="box">
142
+ <h4>Work Availablity</h4>
143
+ <p>Available for full-time or project-based design, development, and idea/strategy freelance work. <em>(Hurry, limited time offer!)</em></p>
144
+ </section>
145
+
146
+ <section id="newsletter_signup" class="box ">
147
+ <h4>Newsletter</h4>
148
+
149
+ <p>Sign up for updates and other announcements.</p>
150
+
151
+ <form action="/newsletter/signup.php" method="post">
152
+ <input type="hidden" name="auth" value="278693195410" />
153
+ <input type="hidden" name="referrer" value="/" />
154
+ <input type="text" name="email" value="" placeholder="Your email address..." />
155
+ <button><i class="icon newsletter button">Sign Up!</i></button>
156
+ </form>
157
+ </section>
158
+
159
+ <section class="box">
160
+ <h4>Online Activity</h4>
161
+ <ul>
162
+ <li><a class="grey dc" href="https://twitter.com/gleuch" title="Twitter" target="_blank">Twitter</a></li>
163
+ <li><a class="grey dc" href="https://github.com/gleuch" title="GitHub" target="_blank">GitHub</a></li>
164
+ <li><a class="grey dc" href="http://instagram.com/gleuch" title="Instagram" target="_blank">Instagram</a></li>
165
+ <li><a class="grey dc" href="http://www.linkedin.com/in/gleuch" title="LinkedIn" target="_blank">LinkedIn</a></li>
166
+ <li><a class="grey dc" href="https://www.facebook.com/gleuch" title="Facebook" target="_blank">Facebook</a></li>
167
+ </ul>
168
+ </section>
169
+ </aside>
170
+
171
+ </div>
172
+
173
+ <header id="header" class="c">
174
+ <h1 title="Greg Leuch"><a href="/"><i class="icon logo">Greg Leuch</i></a></h1>
175
+ <nav>
176
+ <ul class="c">
177
+ <li class=""><a href="/projects" title="Projects" class="">Projects</a></li>
178
+ <li class=""><a href="/work" title="Client Work" class=""><span class="mfull inline">Client</span> Work</a></li>
179
+ <li class=""><a href="/about" title="About" class="">About</a></li>
180
+ <li class=""><a href="/contact" title="Contact" class="">Contact</a></li>
181
+ <!-- <li class=""><a href="http://blog.gleu.ch" title="Blog" class="">Blog</a></li> -->
182
+ <li class="twitter"><a href="https://twitter.com/gleuch" target="_blank"><i class="icon twitter">Twitter: @gleuch</i></a></li>
183
+ <li class="facebook"><a href="https://facebook.com/gleuch" target="_blank"><i class="icon facebook">Facebook: Greg Leuch</i></a></li>
184
+ </ul>
185
+ </nav>
186
+ </header>
187
+
188
+ <footer id="footer" class="c">
189
+ <p>&copy; 2003&ndash;2013 Greg Leuch, unless noted differently.</p>
190
+ </footer>
191
+
192
+ </div>
193
+
194
+ <div id="template_static_bar"></div>
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+ <!--
204
+ FILE ARCHIVED ON 17:03:22 Jan 29, 2013 AND RETRIEVED FROM THE
205
+ INTERNET ARCHIVE ON 16:31:56 Mar 16, 2013.
206
+ JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE.
207
+
208
+ ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C.
209
+ SECTION 108(a)(3)).
210
+ -->
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+ <script type="text/javascript">
220
+ var wmNotice = "Wayback - External links, forms, and search boxes may not function within this collection. Url: http://gleu.ch/ time: 17:03:22 Jan 29, 2013";
221
+ var wmHideNotice = "hide";
222
+ </script>
223
+ <script type="text/javascript" src="/static/js/disclaim.js"></script>
224
+ </body>
225
+ </html>
data/spec/helper.rb ADDED
@@ -0,0 +1,65 @@
1
+ require 'simplecov'
2
+ require 'coveralls'
3
+ Coveralls.wear!
4
+
5
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
6
+ SimpleCov::Formatter::HTMLFormatter,
7
+ Coveralls::SimpleCov::Formatter
8
+ ]
9
+ SimpleCov.start
10
+
11
+ require 'wayback'
12
+ require 'wayback/identity_map'
13
+ require 'rspec'
14
+ require 'stringio'
15
+ require 'tempfile'
16
+ require 'timecop'
17
+ require 'webmock/rspec'
18
+
19
+ WebMock.disable_net_connect!(:allow => 'coveralls.io')
20
+
21
+ RSpec.configure do |config|
22
+ config.expect_with :rspec do |c|
23
+ c.syntax = :expect
24
+ end
25
+ end
26
+
27
+ def a_delete(path)
28
+ a_request(:delete, Wayback::Default::ENDPOINT + path)
29
+ end
30
+
31
+ def a_get(path)
32
+ a_request(:get, Wayback::Default::ENDPOINT + path)
33
+ end
34
+
35
+ def a_post(path)
36
+ a_request(:post, Wayback::Default::ENDPOINT + path)
37
+ end
38
+
39
+ def a_put(path)
40
+ a_request(:put, Wayback::Default::ENDPOINT + path)
41
+ end
42
+
43
+ def stub_delete(path)
44
+ stub_request(:delete, Wayback::Default::ENDPOINT + path)
45
+ end
46
+
47
+ def stub_get(path)
48
+ stub_request(:get, Wayback::Default::ENDPOINT + path)
49
+ end
50
+
51
+ def stub_post(path)
52
+ stub_request(:post, Wayback::Default::ENDPOINT + path)
53
+ end
54
+
55
+ def stub_put(path)
56
+ stub_request(:put, Wayback::Default::ENDPOINT + path)
57
+ end
58
+
59
+ def fixture_path
60
+ File.expand_path("../fixtures", __FILE__)
61
+ end
62
+
63
+ def fixture(file)
64
+ File.new(fixture_path + '/' + file)
65
+ end