cached_mechanize2 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +59 -0
- data/Rakefile +16 -0
- data/cached_mechanize2.gemspec +23 -0
- data/lib/cached_mechanize.rb +70 -0
- data/lib/file_cache.rb +66 -0
- metadata +94 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 28bb73f528f1529f6ed553497534c95bbfcdaa47
|
4
|
+
data.tar.gz: 63e310f1b7475010f29a5251cabf2c92e6220bf5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 81bc7ffb8527565f6d95dd55a0226dbf958e45f5d94541fa6ccbb8d1485dd5e8e9b9ae9805bf6717155adfba9f8b747dc5094673e468fd03a34b977bd4ab6239
|
7
|
+
data.tar.gz: 9ce2edb824afa7e43d6494302f7f9c55e18992f901559f242e8214c728d78195193a94fb7a5428abc8283a300f8b1647d7635ae57878e144b152d6e852acb049
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 wukerplank
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# CachedMechanize
|
2
|
+
|
3
|
+
is a small wrapper around [Mechanize](https://github.com/sparklemotion/mechanize) to enable caching for GET requests.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'cached_mechanize'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install cached_mechanize
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
Right now there is only a file based caching backend. There might follow more in the future (redis, etc.).
|
22
|
+
|
23
|
+
To configure the backend use this syntax:
|
24
|
+
|
25
|
+
CachedMechanize.configure do |config|
|
26
|
+
config.cache_class = CachedMechanize::CacheBackends::FileCache
|
27
|
+
config.cache_options = {
|
28
|
+
path: "/path/to/my/cached_files"
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
Now you can use this class like you would use Mechanize:
|
33
|
+
|
34
|
+
agent = CachedMechanize.new
|
35
|
+
doc = agent.get('http://www.imdb.com')
|
36
|
+
|
37
|
+
By default all retrieved pages will be cached for one day (86400 seconds). To change this behaviour you can provide an `expires_after` option:
|
38
|
+
|
39
|
+
doc = agent.get('http://www.imdb.com', expires_after: 42)
|
40
|
+
|
41
|
+
Note that the syntax has changed! If you want to provide parameters, additional headers or a referer, you can do it like this:
|
42
|
+
|
43
|
+
doc = agent.get('http://www.imdb.com', {
|
44
|
+
referer: 'http://thatothersite.com',
|
45
|
+
expires_after: 42
|
46
|
+
})
|
47
|
+
|
48
|
+
## TO-DO
|
49
|
+
|
50
|
+
- add more backends
|
51
|
+
- add tests
|
52
|
+
|
53
|
+
## Contributing
|
54
|
+
|
55
|
+
1. Fork it ( http://github.com/wukerplank/cached_mechanize/fork )
|
56
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
57
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
58
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
59
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
|
3
|
+
task :console do
|
4
|
+
require 'irb'
|
5
|
+
require 'irb/completion'
|
6
|
+
require 'pp'
|
7
|
+
require 'cached_mechanize'
|
8
|
+
|
9
|
+
# Guh::Base.configure do |c|
|
10
|
+
# c.guh_ip_address = ENV['GUH_IP']
|
11
|
+
# c.guh_port = ENV['GUH_PORT']
|
12
|
+
# end
|
13
|
+
|
14
|
+
ARGV.clear
|
15
|
+
IRB.start
|
16
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "cached_mechanize2"
|
7
|
+
spec.version = "0.1.0"
|
8
|
+
spec.authors = ["johnwylie"]
|
9
|
+
spec.email = ["johnwylie70@gmail.com"]
|
10
|
+
spec.summary = %q{A small wrapper around Mechanize to enable caching for GET requests.}
|
11
|
+
spec.homepage = "https://github.com/johnwylie70/cached_mechanize2"
|
12
|
+
spec.license = "MIT"
|
13
|
+
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
|
19
|
+
spec.add_dependency "mechanize", "~> 2.7"
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require "mechanize"
|
2
|
+
|
3
|
+
class CachedMechanize < Mechanize
|
4
|
+
|
5
|
+
# Getter/Setter for the cache_class attribute
|
6
|
+
def self.cache_class #:nodoc:
|
7
|
+
@@cache_class
|
8
|
+
end
|
9
|
+
def self.cache_class=(cache_class) #:nodoc:
|
10
|
+
@@cache_class = cache_class
|
11
|
+
end
|
12
|
+
@@cache_class = nil
|
13
|
+
|
14
|
+
# Getter/Setter for the cache_options attribute
|
15
|
+
def self.cache_options #:nodoc:
|
16
|
+
@@cache_options
|
17
|
+
end
|
18
|
+
def self.cache_options=(cache_options) #:nodoc:
|
19
|
+
@@cache_options = cache_options
|
20
|
+
end
|
21
|
+
@@cache_options = {}
|
22
|
+
|
23
|
+
def initialize(*args)
|
24
|
+
super
|
25
|
+
@cache = (@@cache_class || CachedMechanize::FileCache).new(@@cache_options)
|
26
|
+
end
|
27
|
+
|
28
|
+
# parameters:
|
29
|
+
# - uri
|
30
|
+
# - options
|
31
|
+
#
|
32
|
+
# possible options are:
|
33
|
+
# - +parameters+ (default: [])
|
34
|
+
# - +referer+ (default: nil)
|
35
|
+
# - +headers+ (default: {})
|
36
|
+
# - +expires_after+ in seconds (default: 86400 = 1 day)
|
37
|
+
# def get(uri, options = {})
|
38
|
+
def get(uri, parameters = [], referer = nil, headers = {})
|
39
|
+
|
40
|
+
#options[:expires_after] ||= 86400
|
41
|
+
|
42
|
+
data = nil
|
43
|
+
|
44
|
+
if @cache.present?(uri, 86400)
|
45
|
+
data = Mechanize::Page.new(URI(uri), nil, @cache.retrieve(uri), nil, self)
|
46
|
+
else
|
47
|
+
data = super(uri, parameters, referer, headers)
|
48
|
+
@cache.store(uri, data.body)
|
49
|
+
end
|
50
|
+
|
51
|
+
return data
|
52
|
+
end
|
53
|
+
|
54
|
+
##
|
55
|
+
#
|
56
|
+
# Example:
|
57
|
+
#
|
58
|
+
# CachedMechanize.configure do |config|
|
59
|
+
# config.cache_class = CachedMechanize::CacheBackends::FileCache
|
60
|
+
# config.cache_options = {
|
61
|
+
# path: "/path/to/my/cached_files"
|
62
|
+
# }
|
63
|
+
# end
|
64
|
+
#
|
65
|
+
def self.configure(&block)
|
66
|
+
yield self
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
require "./file_cache"
|
data/lib/file_cache.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require "digest/sha1"
|
2
|
+
require "zlib"
|
3
|
+
|
4
|
+
class CachedMechanize
|
5
|
+
|
6
|
+
class FileCache
|
7
|
+
|
8
|
+
class GzFile
|
9
|
+
def self.open(path, &block)
|
10
|
+
Zlib::GzipWriter.open(path) do |gz|
|
11
|
+
yield gz
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.read(path)
|
16
|
+
data = nil
|
17
|
+
|
18
|
+
File.open(path) do |file|
|
19
|
+
gz = Zlib::GzipReader.new(file)
|
20
|
+
data = gz.read
|
21
|
+
gz.close
|
22
|
+
end
|
23
|
+
|
24
|
+
return data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
#
|
30
|
+
# Options:
|
31
|
+
# - +path+ the path where the cached files should be stored
|
32
|
+
#
|
33
|
+
def initialize(options={})
|
34
|
+
@path = options[:path] || '/tmp'
|
35
|
+
end
|
36
|
+
|
37
|
+
def present?(uri, expires_after)
|
38
|
+
File.exist?(file_name(uri)) && file_age(uri) <= expires_after
|
39
|
+
end
|
40
|
+
|
41
|
+
def retrieve(uri)
|
42
|
+
GzFile.read(file_name(uri))
|
43
|
+
end
|
44
|
+
|
45
|
+
def store(uri, data)
|
46
|
+
GzFile.open(file_name(uri)) do |file|
|
47
|
+
file.write(data)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def file_age(uri)
|
54
|
+
Time.now - File.mtime(file_name(uri))
|
55
|
+
end
|
56
|
+
|
57
|
+
def file_name(uri)
|
58
|
+
File.join(@path, uri_to_key(uri))
|
59
|
+
end
|
60
|
+
|
61
|
+
def uri_to_key(uri)
|
62
|
+
Digest::SHA1.hexdigest(uri)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
metadata
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cached_mechanize2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- johnwylie
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-04-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.7'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.5'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.5'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
- johnwylie70@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE.txt
|
65
|
+
- README.md
|
66
|
+
- Rakefile
|
67
|
+
- cached_mechanize2.gemspec
|
68
|
+
- lib/cached_mechanize.rb
|
69
|
+
- lib/file_cache.rb
|
70
|
+
homepage: https://github.com/johnwylie70/cached_mechanize2
|
71
|
+
licenses:
|
72
|
+
- MIT
|
73
|
+
metadata: {}
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options: []
|
76
|
+
require_paths:
|
77
|
+
- lib
|
78
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
requirements: []
|
89
|
+
rubyforge_project:
|
90
|
+
rubygems_version: 2.0.14
|
91
|
+
signing_key:
|
92
|
+
specification_version: 4
|
93
|
+
summary: A small wrapper around Mechanize to enable caching for GET requests.
|
94
|
+
test_files: []
|