cached_web 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cached_web.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Ian Connor
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # CachedWeb
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'cached_web'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install cached_web
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cached_web/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "cached_web"
8
+ gem.version = CachedWeb::VERSION
9
+ gem.authors = ["Ian Connor"]
10
+ gem.email = ["iconnor@projectlounge.com"]
11
+ gem.description = %q{Simple gem to cache web locally}
12
+ gem.summary = %q{Cached web with sleeps and local storage}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ gem.add_development_dependency "mechanize"
20
+ end
@@ -0,0 +1,3 @@
1
+ class CachedWeb
2
+ VERSION = "0.0.1"
3
+ end
data/lib/cached_web.rb ADDED
@@ -0,0 +1,146 @@
1
+ require "cached_web/version"
2
+
3
+ class CachedWeb
4
+
5
+ @@min_wait_times = {}
6
+ @@last_request_times = {}
7
+
8
+ def escape_key(key)
9
+ if key.size > 140
10
+ Digest::MD5.hexdigest(key)
11
+ else
12
+ key.gsub(/[^a-z0-9]/i,"_")
13
+ end
14
+ end
15
+
16
+ def self.get(params)
17
+ CachedWeb.new.get(params)
18
+ end
19
+
20
+ def get(params)
21
+ url = params[:url].gsub(" ","%20").gsub("%off", "%25off")
22
+ page, redirect_url, headers = get_cache(:url=>url, :expires_in=>params[:expires_in], :details=>true) rescue [nil, nil, nil]
23
+
24
+ if params[:cache_only]
25
+ return [nil, nil, nil] if page.blank?
26
+ return [page, redirect_url, headers]
27
+ end
28
+
29
+ if page.blank?
30
+ uri = URI.parse(url)
31
+ domain = uri.host.downcase
32
+ #puts "Not in cache - getting: #{params[:url]}"
33
+ # Don't hit a site more than once per 5 seconds
34
+ time_to_wait = (@@min_wait_times[domain] || 5) -
35
+ (Time.now - (@@last_request_times[domain] || Time.at(0)))
36
+ if time_to_wait > 0
37
+ puts "Waiting for #{time_to_wait} on domain #{domain}"
38
+ sleep time_to_wait
39
+ end
40
+
41
+ @agent = Mechanize.new unless @agent
42
+ page = @agent.get(url)
43
+ @@last_request_times[uri.host]
44
+ content = page.body
45
+ redirect_url = page.uri.to_s
46
+ headers = page.header
47
+ set_cache(:url=>url, :redirect_url=>redirect_url, :headers=>headers, :content=>content)
48
+ [content, redirect_url, headers]
49
+ else
50
+ [page, redirect_url, headers]
51
+ end
52
+ end
53
+
54
+ def self.fetch_cache(params, &block)
55
+ CachedWeb.new.fetch_cache(params, &block)
56
+ end
57
+
58
+ def fetch_cache(params)
59
+ ret_val = nil
60
+ begin
61
+ ret_val = get_cache(params)
62
+ rescue
63
+ #puts "didn't find data in cache: #{$!}"
64
+ end
65
+ unless ret_val
66
+ ret_val = yield self
67
+ set_cache(params.merge(:page=>ret_val))
68
+ end
69
+
70
+ return ret_val
71
+ end
72
+
73
+ def post(url, params)
74
+ puts "Not in cache - POSTing to: #{url}"
75
+ @agent = Mechanize.new unless @agent
76
+ begin
77
+ page = @agent.post(url, params)
78
+ uri = URI.parse(url)
79
+ @@last_request_times[uri.host]
80
+ content = page.body
81
+ redirect_url = page.uri.to_s
82
+ headers = page.header
83
+ set_cache(:url=>url, :redirect_url=>redirect_url, :headers=>headers, :page=>content)
84
+ [content, redirect_url, headers]
85
+ rescue Exception => e
86
+ puts e
87
+ puts e.backtrace
88
+ Toadhopper(AIRBRAKE_KEY).post!(e)
89
+ ["", url, {}]
90
+ end
91
+ end
92
+
93
+ def set_cache(params)
94
+ key = escape_key(params[:url].present? ? params[:url] : params[:key])
95
+ content = params[:content]
96
+ redirect_url = params[:redirect_url]
97
+
98
+ h = {:content => content, :redirect_url=>redirect_url}
99
+
100
+
101
+ path = "/tmp/cachedweb/#{key}"
102
+ #puts "Saving file to #{path}"
103
+ begin
104
+ File.open(path, 'w') do |out|
105
+ YAML.dump(h, out)
106
+ end
107
+ rescue Exception
108
+ `mkdir -p /tmp/cachedweb`
109
+ File.open(path, 'w') do |out|
110
+ YAML.dump(h, out)
111
+ end
112
+ end
113
+ true
114
+ end
115
+
116
+ def get_cache(params)
117
+ key = escape_key(params[:url].present? ? params[:url] : params[:key])
118
+ details = params[:details]
119
+ expires_in = params[:expires_in]
120
+
121
+ timestamp = nil
122
+
123
+ path = "/tmp/cachedweb/#{key}"
124
+ timestamp = File.ctime(path)
125
+
126
+ if expires_in and timestamp.nil?
127
+ #puts "There is no timestamp, force a get of this URL so we can add a timestamp"
128
+ throw "There is no timestamp, force a get of this URL so we can add a timestamp"
129
+ end
130
+
131
+ if expires_in and timestamp and timestamp < (Time.now - expires_in)
132
+ #puts "Cache is older than desired, saved on #{timestamp}"
133
+ throw "Cache is older than desired, saved on #{timestamp}"
134
+ end
135
+
136
+ h = YAML::load_file(path)
137
+ content = h[:content]
138
+ redirect_url = h[:redirect_url]
139
+ if details
140
+ [content, redirect_url, nil]
141
+ else
142
+ content
143
+ end
144
+ end
145
+
146
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cached_web
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ian Connor
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-06-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mechanize
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Simple gem to cache web locally
31
+ email:
32
+ - iconnor@projectlounge.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - Gemfile
39
+ - LICENSE.txt
40
+ - README.md
41
+ - Rakefile
42
+ - cached_web.gemspec
43
+ - lib/cached_web.rb
44
+ - lib/cached_web/version.rb
45
+ homepage: ''
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.24
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Cached web with sleeps and local storage
69
+ test_files: []