cached_web 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cached_web.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Ian Connor
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # CachedWeb
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'cached_web'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install cached_web
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cached_web/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "cached_web"
8
+ gem.version = CachedWeb::VERSION
9
+ gem.authors = ["Ian Connor"]
10
+ gem.email = ["iconnor@projectlounge.com"]
11
+ gem.description = %q{Simple gem to cache web locally}
12
+ gem.summary = %q{Cached web with sleeps and local storage}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ gem.add_development_dependency "mechanize"
20
+ end
@@ -0,0 +1,3 @@
1
+ class CachedWeb
2
+ VERSION = "0.0.1"
3
+ end
data/lib/cached_web.rb ADDED
@@ -0,0 +1,146 @@
1
+ require "cached_web/version"
2
+
3
+ class CachedWeb
4
+
5
+ @@min_wait_times = {}
6
+ @@last_request_times = {}
7
+
8
+ def escape_key(key)
9
+ if key.size > 140
10
+ Digest::MD5.hexdigest(key)
11
+ else
12
+ key.gsub(/[^a-z0-9]/i,"_")
13
+ end
14
+ end
15
+
16
+ def self.get(params)
17
+ CachedWeb.new.get(params)
18
+ end
19
+
20
+ def get(params)
21
+ url = params[:url].gsub(" ","%20").gsub("%off", "%25off")
22
+ page, redirect_url, headers = get_cache(:url=>url, :expires_in=>params[:expires_in], :details=>true) rescue [nil, nil, nil]
23
+
24
+ if params[:cache_only]
25
+ return [nil, nil, nil] if page.blank?
26
+ return [page, redirect_url, headers]
27
+ end
28
+
29
+ if page.blank?
30
+ uri = URI.parse(url)
31
+ domain = uri.host.downcase
32
+ #puts "Not in cache - getting: #{params[:url]}"
33
+ # Don't hit a site more than once per 5 seconds
34
+ time_to_wait = (@@min_wait_times[domain] || 5) -
35
+ (Time.now - (@@last_request_times[domain] || Time.at(0)))
36
+ if time_to_wait > 0
37
+ puts "Waiting for #{time_to_wait} on domain #{domain}"
38
+ sleep time_to_wait
39
+ end
40
+
41
+ @agent = Mechanize.new unless @agent
42
+ page = @agent.get(url)
43
+ @@last_request_times[uri.host]
44
+ content = page.body
45
+ redirect_url = page.uri.to_s
46
+ headers = page.header
47
+ set_cache(:url=>url, :redirect_url=>redirect_url, :headers=>headers, :content=>content)
48
+ [content, redirect_url, headers]
49
+ else
50
+ [page, redirect_url, headers]
51
+ end
52
+ end
53
+
54
+ def self.fetch_cache(params, &block)
55
+ CachedWeb.new.fetch_cache(params, &block)
56
+ end
57
+
58
+ def fetch_cache(params)
59
+ ret_val = nil
60
+ begin
61
+ ret_val = get_cache(params)
62
+ rescue
63
+ #puts "didn't find data in cache: #{$!}"
64
+ end
65
+ unless ret_val
66
+ ret_val = yield self
67
+ set_cache(params.merge(:page=>ret_val))
68
+ end
69
+
70
+ return ret_val
71
+ end
72
+
73
+ def post(url, params)
74
+ puts "Not in cache - POSTing to: #{url}"
75
+ @agent = Mechanize.new unless @agent
76
+ begin
77
+ page = @agent.post(url, params)
78
+ uri = URI.parse(url)
79
+ @@last_request_times[uri.host]
80
+ content = page.body
81
+ redirect_url = page.uri.to_s
82
+ headers = page.header
83
+ set_cache(:url=>url, :redirect_url=>redirect_url, :headers=>headers, :page=>content)
84
+ [content, redirect_url, headers]
85
+ rescue Exception => e
86
+ puts e
87
+ puts e.backtrace
88
+ Toadhopper(AIRBRAKE_KEY).post!(e)
89
+ ["", url, {}]
90
+ end
91
+ end
92
+
93
+ def set_cache(params)
94
+ key = escape_key(params[:url].present? ? params[:url] : params[:key])
95
+ content = params[:content]
96
+ redirect_url = params[:redirect_url]
97
+
98
+ h = {:content => content, :redirect_url=>redirect_url}
99
+
100
+
101
+ path = "/tmp/cachedweb/#{key}"
102
+ #puts "Saving file to #{path}"
103
+ begin
104
+ File.open(path, 'w') do |out|
105
+ YAML.dump(h, out)
106
+ end
107
+ rescue Exception
108
+ `mkdir -p /tmp/cachedweb`
109
+ File.open(path, 'w') do |out|
110
+ YAML.dump(h, out)
111
+ end
112
+ end
113
+ true
114
+ end
115
+
116
+ def get_cache(params)
117
+ key = escape_key(params[:url].present? ? params[:url] : params[:key])
118
+ details = params[:details]
119
+ expires_in = params[:expires_in]
120
+
121
+ timestamp = nil
122
+
123
+ path = "/tmp/cachedweb/#{key}"
124
+ timestamp = File.ctime(path)
125
+
126
+ if expires_in and timestamp.nil?
127
+ #puts "There is no timestamp, force a get of this URL so we can add a timestamp"
128
+ throw "There is no timestamp, force a get of this URL so we can add a timestamp"
129
+ end
130
+
131
+ if expires_in and timestamp and timestamp < (Time.now - expires_in)
132
+ #puts "Cache is older than desired, saved on #{timestamp}"
133
+ throw "Cache is older than desired, saved on #{timestamp}"
134
+ end
135
+
136
+ h = YAML::load_file(path)
137
+ content = h[:content]
138
+ redirect_url = h[:redirect_url]
139
+ if details
140
+ [content, redirect_url, nil]
141
+ else
142
+ content
143
+ end
144
+ end
145
+
146
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cached_web
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ian Connor
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-06-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mechanize
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Simple gem to cache web locally
31
+ email:
32
+ - iconnor@projectlounge.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - Gemfile
39
+ - LICENSE.txt
40
+ - README.md
41
+ - Rakefile
42
+ - cached_web.gemspec
43
+ - lib/cached_web.rb
44
+ - lib/cached_web/version.rb
45
+ homepage: ''
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.24
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Cached web with sleeps and local storage
69
+ test_files: []