cached_mechanize2 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 28bb73f528f1529f6ed553497534c95bbfcdaa47
4
+ data.tar.gz: 63e310f1b7475010f29a5251cabf2c92e6220bf5
5
+ SHA512:
6
+ metadata.gz: 81bc7ffb8527565f6d95dd55a0226dbf958e45f5d94541fa6ccbb8d1485dd5e8e9b9ae9805bf6717155adfba9f8b747dc5094673e468fd03a34b977bd4ab6239
7
+ data.tar.gz: 9ce2edb824afa7e43d6494302f7f9c55e18992f901559f242e8214c728d78195193a94fb7a5428abc8283a300f8b1647d7635ae57878e144b152d6e852acb049
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ cache/
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cached_mechanize2.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 wukerplank
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,59 @@
1
+ # CachedMechanize
2
+
3
+ is a small wrapper around [Mechanize](https://github.com/sparklemotion/mechanize) to enable caching for GET requests.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'cached_mechanize'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install cached_mechanize
18
+
19
+ ## Usage
20
+
21
+ Right now there is only a file based caching backend. There might follow more in the future (redis, etc.).
22
+
23
+ To configure the backend use this syntax:
24
+
25
+ CachedMechanize.configure do |config|
26
+ config.cache_class = CachedMechanize::CacheBackends::FileCache
27
+ config.cache_options = {
28
+ path: "/path/to/my/cached_files"
29
+ }
30
+ end
31
+
32
+ Now you can use this class like you would use Mechanize:
33
+
34
+ agent = CachedMechanize.new
35
+ doc = agent.get('http://www.imdb.com')
36
+
37
+ By default all retrieved pages will be cached for one day (86400 seconds). To change this behaviour you can provide an `expires_after` option:
38
+
39
+ doc = agent.get('http://www.imdb.com', expires_after: 42)
40
+
41
+ Note that the syntax has changed! If you want to provide parameters, additional headers or a referer, you can do it like this:
42
+
43
+ doc = agent.get('http://www.imdb.com', {
44
+ referer: 'http://thatothersite.com',
45
+ expires_after: 42
46
+ })
47
+
48
+ ## TO-DO
49
+
50
+ - add more backends
51
+ - add tests
52
+
53
+ ## Contributing
54
+
55
+ 1. Fork it ( http://github.com/wukerplank/cached_mechanize/fork )
56
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
57
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
58
+ 4. Push to the branch (`git push origin my-new-feature`)
59
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :console do
4
+ require 'irb'
5
+ require 'irb/completion'
6
+ require 'pp'
7
+ require 'cached_mechanize'
8
+
9
+ # Guh::Base.configure do |c|
10
+ # c.guh_ip_address = ENV['GUH_IP']
11
+ # c.guh_port = ENV['GUH_PORT']
12
+ # end
13
+
14
+ ARGV.clear
15
+ IRB.start
16
+ end
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "cached_mechanize2"
7
+ spec.version = "0.1.0"
8
+ spec.authors = ["johnwylie"]
9
+ spec.email = ["johnwylie70@gmail.com"]
10
+ spec.summary = %q{A small wrapper around Mechanize to enable caching for GET requests.}
11
+ spec.homepage = "https://github.com/johnwylie70/cached_mechanize2"
12
+ spec.license = "MIT"
13
+
14
+ spec.files = `git ls-files -z`.split("\x0")
15
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
16
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
+ spec.require_paths = ["lib"]
18
+
19
+ spec.add_dependency "mechanize", "~> 2.7"
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake"
23
+ end
@@ -0,0 +1,70 @@
1
+ require "mechanize"
2
+
3
+ class CachedMechanize < Mechanize
4
+
5
+ # Getter/Setter for the cache_class attribute
6
+ def self.cache_class #:nodoc:
7
+ @@cache_class
8
+ end
9
+ def self.cache_class=(cache_class) #:nodoc:
10
+ @@cache_class = cache_class
11
+ end
12
+ @@cache_class = nil
13
+
14
+ # Getter/Setter for the cache_options attribute
15
+ def self.cache_options #:nodoc:
16
+ @@cache_options
17
+ end
18
+ def self.cache_options=(cache_options) #:nodoc:
19
+ @@cache_options = cache_options
20
+ end
21
+ @@cache_options = {}
22
+
23
+ def initialize(*args)
24
+ super
25
+ @cache = (@@cache_class || CachedMechanize::FileCache).new(@@cache_options)
26
+ end
27
+
28
+ # parameters:
29
+ # - uri
30
+ # - options
31
+ #
32
+ # possible options are:
33
+ # - +parameters+ (default: [])
34
+ # - +referer+ (default: nil)
35
+ # - +headers+ (default: {})
36
+ # - +expires_after+ in seconds (default: 86400 = 1 day)
37
+ # def get(uri, options = {})
38
+ def get(uri, parameters = [], referer = nil, headers = {})
39
+
40
+ #options[:expires_after] ||= 86400
41
+
42
+ data = nil
43
+
44
+ if @cache.present?(uri, 86400)
45
+ data = Mechanize::Page.new(URI(uri), nil, @cache.retrieve(uri), nil, self)
46
+ else
47
+ data = super(uri, parameters, referer, headers)
48
+ @cache.store(uri, data.body)
49
+ end
50
+
51
+ return data
52
+ end
53
+
54
+ ##
55
+ #
56
+ # Example:
57
+ #
58
+ # CachedMechanize.configure do |config|
59
+ # config.cache_class = CachedMechanize::CacheBackends::FileCache
60
+ # config.cache_options = {
61
+ # path: "/path/to/my/cached_files"
62
+ # }
63
+ # end
64
+ #
65
+ def self.configure(&block)
66
+ yield self
67
+ end
68
+ end
69
+
70
+ require "./file_cache"
data/lib/file_cache.rb ADDED
@@ -0,0 +1,66 @@
1
+ require "digest/sha1"
2
+ require "zlib"
3
+
4
+ class CachedMechanize
5
+
6
+ class FileCache
7
+
8
+ class GzFile
9
+ def self.open(path, &block)
10
+ Zlib::GzipWriter.open(path) do |gz|
11
+ yield gz
12
+ end
13
+ end
14
+
15
+ def self.read(path)
16
+ data = nil
17
+
18
+ File.open(path) do |file|
19
+ gz = Zlib::GzipReader.new(file)
20
+ data = gz.read
21
+ gz.close
22
+ end
23
+
24
+ return data
25
+ end
26
+ end
27
+
28
+ ##
29
+ #
30
+ # Options:
31
+ # - +path+ the path where the cached files should be stored
32
+ #
33
+ def initialize(options={})
34
+ @path = options[:path] || '/tmp'
35
+ end
36
+
37
+ def present?(uri, expires_after)
38
+ File.exist?(file_name(uri)) && file_age(uri) <= expires_after
39
+ end
40
+
41
+ def retrieve(uri)
42
+ GzFile.read(file_name(uri))
43
+ end
44
+
45
+ def store(uri, data)
46
+ GzFile.open(file_name(uri)) do |file|
47
+ file.write(data)
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def file_age(uri)
54
+ Time.now - File.mtime(file_name(uri))
55
+ end
56
+
57
+ def file_name(uri)
58
+ File.join(@path, uri_to_key(uri))
59
+ end
60
+
61
+ def uri_to_key(uri)
62
+ Digest::SHA1.hexdigest(uri)
63
+ end
64
+ end
65
+
66
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cached_mechanize2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - johnwylie
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.5'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.5'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ - johnwylie70@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - cached_mechanize2.gemspec
68
+ - lib/cached_mechanize.rb
69
+ - lib/file_cache.rb
70
+ homepage: https://github.com/johnwylie70/cached_mechanize2
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.0.14
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: A small wrapper around Mechanize to enable caching for GET requests.
94
+ test_files: []