cached_mechanize2 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 28bb73f528f1529f6ed553497534c95bbfcdaa47
4
+ data.tar.gz: 63e310f1b7475010f29a5251cabf2c92e6220bf5
5
+ SHA512:
6
+ metadata.gz: 81bc7ffb8527565f6d95dd55a0226dbf958e45f5d94541fa6ccbb8d1485dd5e8e9b9ae9805bf6717155adfba9f8b747dc5094673e468fd03a34b977bd4ab6239
7
+ data.tar.gz: 9ce2edb824afa7e43d6494302f7f9c55e18992f901559f242e8214c728d78195193a94fb7a5428abc8283a300f8b1647d7635ae57878e144b152d6e852acb049
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ cache/
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cached_mechanize2.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 wukerplank
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,59 @@
1
+ # CachedMechanize
2
+
3
+ is a small wrapper around [Mechanize](https://github.com/sparklemotion/mechanize) to enable caching for GET requests.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'cached_mechanize'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install cached_mechanize
18
+
19
+ ## Usage
20
+
21
+ Right now there is only a file based caching backend. There might follow more in the future (redis, etc.).
22
+
23
+ To configure the backend use this syntax:
24
+
25
+ CachedMechanize.configure do |config|
26
+ config.cache_class = CachedMechanize::CacheBackends::FileCache
27
+ config.cache_options = {
28
+ path: "/path/to/my/cached_files"
29
+ }
30
+ end
31
+
32
+ Now you can use this class like you would use Mechanize:
33
+
34
+ agent = CachedMechanize.new
35
+ doc = agent.get('http://www.imdb.com')
36
+
37
+ By default all retrieved pages will be cached for one day (86400 seconds). To change this behaviour you can provide an `expires_after` option:
38
+
39
+ doc = agent.get('http://www.imdb.com', expires_after: 42)
40
+
41
+ Note that the syntax has changed! If you want to provide parameters, additional headers or a referer, you can do it like this:
42
+
43
+ doc = agent.get('http://www.imdb.com', {
44
+ referer: 'http://thatothersite.com',
45
+ expires_after: 42
46
+ })
47
+
48
+ ## TO-DO
49
+
50
+ - add more backends
51
+ - add tests
52
+
53
+ ## Contributing
54
+
55
+ 1. Fork it ( http://github.com/wukerplank/cached_mechanize/fork )
56
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
57
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
58
+ 4. Push to the branch (`git push origin my-new-feature`)
59
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :console do
4
+ require 'irb'
5
+ require 'irb/completion'
6
+ require 'pp'
7
+ require 'cached_mechanize'
8
+
9
+ # Guh::Base.configure do |c|
10
+ # c.guh_ip_address = ENV['GUH_IP']
11
+ # c.guh_port = ENV['GUH_PORT']
12
+ # end
13
+
14
+ ARGV.clear
15
+ IRB.start
16
+ end
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "cached_mechanize2"
7
+ spec.version = "0.1.0"
8
+ spec.authors = ["johnwylie"]
9
+ spec.email = ["johnwylie70@gmail.com"]
10
+ spec.summary = %q{A small wrapper around Mechanize to enable caching for GET requests.}
11
+ spec.homepage = "https://github.com/johnwylie70/cached_mechanize2"
12
+ spec.license = "MIT"
13
+
14
+ spec.files = `git ls-files -z`.split("\x0")
15
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
16
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
+ spec.require_paths = ["lib"]
18
+
19
+ spec.add_dependency "mechanize", "~> 2.7"
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake"
23
+ end
@@ -0,0 +1,70 @@
1
+ require "mechanize"
2
+
3
+ class CachedMechanize < Mechanize
4
+
5
+ # Getter/Setter for the cache_class attribute
6
+ def self.cache_class #:nodoc:
7
+ @@cache_class
8
+ end
9
+ def self.cache_class=(cache_class) #:nodoc:
10
+ @@cache_class = cache_class
11
+ end
12
+ @@cache_class = nil
13
+
14
+ # Getter/Setter for the cache_options attribute
15
+ def self.cache_options #:nodoc:
16
+ @@cache_options
17
+ end
18
+ def self.cache_options=(cache_options) #:nodoc:
19
+ @@cache_options = cache_options
20
+ end
21
+ @@cache_options = {}
22
+
23
+ def initialize(*args)
24
+ super
25
+ @cache = (@@cache_class || CachedMechanize::FileCache).new(@@cache_options)
26
+ end
27
+
28
+ # parameters:
29
+ # - uri
30
+ # - options
31
+ #
32
+ # possible options are:
33
+ # - +parameters+ (default: [])
34
+ # - +referer+ (default: nil)
35
+ # - +headers+ (default: {})
36
+ # - +expires_after+ in seconds (default: 86400 = 1 day)
37
+ # def get(uri, options = {})
38
+ def get(uri, parameters = [], referer = nil, headers = {})
39
+
40
+ #options[:expires_after] ||= 86400
41
+
42
+ data = nil
43
+
44
+ if @cache.present?(uri, 86400)
45
+ data = Mechanize::Page.new(URI(uri), nil, @cache.retrieve(uri), nil, self)
46
+ else
47
+ data = super(uri, parameters, referer, headers)
48
+ @cache.store(uri, data.body)
49
+ end
50
+
51
+ return data
52
+ end
53
+
54
+ ##
55
+ #
56
+ # Example:
57
+ #
58
+ # CachedMechanize.configure do |config|
59
+ # config.cache_class = CachedMechanize::CacheBackends::FileCache
60
+ # config.cache_options = {
61
+ # path: "/path/to/my/cached_files"
62
+ # }
63
+ # end
64
+ #
65
+ def self.configure(&block)
66
+ yield self
67
+ end
68
+ end
69
+
70
+ require "./file_cache"
data/lib/file_cache.rb ADDED
@@ -0,0 +1,66 @@
1
+ require "digest/sha1"
2
+ require "zlib"
3
+
4
+ class CachedMechanize
5
+
6
+ class FileCache
7
+
8
+ class GzFile
9
+ def self.open(path, &block)
10
+ Zlib::GzipWriter.open(path) do |gz|
11
+ yield gz
12
+ end
13
+ end
14
+
15
+ def self.read(path)
16
+ data = nil
17
+
18
+ File.open(path) do |file|
19
+ gz = Zlib::GzipReader.new(file)
20
+ data = gz.read
21
+ gz.close
22
+ end
23
+
24
+ return data
25
+ end
26
+ end
27
+
28
+ ##
29
+ #
30
+ # Options:
31
+ # - +path+ the path where the cached files should be stored
32
+ #
33
+ def initialize(options={})
34
+ @path = options[:path] || '/tmp'
35
+ end
36
+
37
+ def present?(uri, expires_after)
38
+ File.exist?(file_name(uri)) && file_age(uri) <= expires_after
39
+ end
40
+
41
+ def retrieve(uri)
42
+ GzFile.read(file_name(uri))
43
+ end
44
+
45
+ def store(uri, data)
46
+ GzFile.open(file_name(uri)) do |file|
47
+ file.write(data)
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def file_age(uri)
54
+ Time.now - File.mtime(file_name(uri))
55
+ end
56
+
57
+ def file_name(uri)
58
+ File.join(@path, uri_to_key(uri))
59
+ end
60
+
61
+ def uri_to_key(uri)
62
+ Digest::SHA1.hexdigest(uri)
63
+ end
64
+ end
65
+
66
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cached_mechanize2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - johnwylie
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.5'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.5'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ - johnwylie70@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - cached_mechanize2.gemspec
68
+ - lib/cached_mechanize.rb
69
+ - lib/file_cache.rb
70
+ homepage: https://github.com/johnwylie70/cached_mechanize2
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.0.14
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: A small wrapper around Mechanize to enable caching for GET requests.
94
+ test_files: []