epitools 0.4.10 → 0.4.13
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +13 -3
- data/Rakefile +21 -17
- data/VERSION +1 -1
- data/epitools.gemspec +4 -5
- data/lib/epitools/browser.rb +5 -4
- data/lib/epitools/browser/cache.rb +196 -0
- data/lib/epitools/browser/mechanize_progressbar.rb +5 -0
- data/spec/browser_spec.rb +64 -1
- metadata +5 -29
- data/lib/epitools/browser/browser_cache.rb +0 -169
data/README.rdoc
CHANGED
@@ -1,7 +1,17 @@
|
|
1
1
|
= epitools
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
Useful miscellaneous improvements for base Ruby objects, plus some extra
|
4
|
+
data structures and handy wrappers.
|
5
|
+
|
6
|
+
Base classess: Object, Enumerable, Hash, String, Array, Integer, etc.
|
7
|
+
|
8
|
+
Extras:
|
9
|
+
* Path (a better Pathname)
|
10
|
+
* Rash (a hash which can have Regexps as keys, allowing a single (key,value) pair to match many keys.)
|
11
|
+
* Progressbar (better than the progressbar gem)
|
12
|
+
* Colored (enhanced version of defunkt's colored -- adds ANSI colouring methods to String, eg: #red, #green, #light_blue, etc.)
|
13
|
+
* Browser (a fake browser, using mechanize, Progressbar, and CacheDB)
|
14
|
+
|
5
15
|
== Installing
|
6
16
|
|
7
17
|
gem install epitools
|
@@ -10,7 +20,7 @@ Awesome collection of Ruby tools!
|
|
10
20
|
|
11
21
|
This is basically a collection of enhancements to the ruby base types (Hash, Array,
|
12
22
|
Enumerable, etc.). To learn how it works, read the specs in spec/*.rb,
|
13
|
-
or check out the rdoc: http://rdoc.info/
|
23
|
+
or check out the rdoc: http://rdoc.info/gems/epitools/frames
|
14
24
|
|
15
25
|
== Copyright
|
16
26
|
|
data/Rakefile
CHANGED
@@ -1,17 +1,22 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
require
|
1
|
+
### Require all gems...
|
2
|
+
%w[
|
3
|
+
|
4
|
+
rubygems
|
5
|
+
|
6
|
+
rake
|
7
|
+
rake/rdoctask
|
8
|
+
rspec/core
|
9
|
+
rspec/core/rake_task
|
10
|
+
jeweler
|
11
|
+
|
12
|
+
].each { |mod| require mod }
|
13
|
+
|
14
|
+
desc 'Default: specs.'
|
15
|
+
task :default => :spec
|
13
16
|
|
14
|
-
|
17
|
+
#
|
18
|
+
# Jewelerrrr
|
19
|
+
#
|
15
20
|
Jeweler::Tasks.new do |gem|
|
16
21
|
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
22
|
gem.name = "epitools"
|
@@ -32,20 +37,19 @@ Jeweler::Tasks.new do |gem|
|
|
32
37
|
end
|
33
38
|
Jeweler::RubygemsDotOrgTasks.new
|
34
39
|
|
35
|
-
|
36
|
-
require 'rspec/core/rake_task'
|
40
|
+
desc 'Run all the specs.'
|
37
41
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
38
42
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
39
43
|
end
|
40
44
|
|
45
|
+
desc 'Run rcov code coverage'
|
41
46
|
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
42
47
|
spec.pattern = 'spec/**/*_spec.rb'
|
43
48
|
spec.rcov = true
|
44
49
|
end
|
45
50
|
|
46
|
-
task :default => :spec
|
47
51
|
|
48
|
-
|
52
|
+
desc 'Generate documentation for rdoc.'
|
49
53
|
Rake::RDocTask.new do |rdoc|
|
50
54
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
51
55
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.13
|
data/epitools.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{epitools}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.13"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["epitron"]
|
12
|
-
s.date = %q{2011-02-
|
12
|
+
s.date = %q{2011-02-14}
|
13
13
|
s.description = %q{Miscellaneous utility libraries to make my life easier.}
|
14
14
|
s.email = %q{chris@ill-logic.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -26,7 +26,7 @@ Gem::Specification.new do |s|
|
|
26
26
|
"lib/epitools.rb",
|
27
27
|
"lib/epitools/basetypes.rb",
|
28
28
|
"lib/epitools/browser.rb",
|
29
|
-
"lib/epitools/browser/
|
29
|
+
"lib/epitools/browser/cache.rb",
|
30
30
|
"lib/epitools/browser/mechanize_progressbar.rb",
|
31
31
|
"lib/epitools/clitools.rb",
|
32
32
|
"lib/epitools/colored.rb",
|
@@ -62,7 +62,7 @@ Gem::Specification.new do |s|
|
|
62
62
|
s.homepage = %q{http://github.com/epitron/epitools}
|
63
63
|
s.licenses = ["WTFPL"]
|
64
64
|
s.require_paths = ["lib"]
|
65
|
-
s.rubygems_version = %q{1.
|
65
|
+
s.rubygems_version = %q{1.5.2}
|
66
66
|
s.summary = %q{NOT UTILS... METILS!}
|
67
67
|
s.test_files = [
|
68
68
|
"spec/basetypes_spec.rb",
|
@@ -80,7 +80,6 @@ Gem::Specification.new do |s|
|
|
80
80
|
]
|
81
81
|
|
82
82
|
if s.respond_to? :specification_version then
|
83
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
84
83
|
s.specification_version = 3
|
85
84
|
|
86
85
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
data/lib/epitools/browser.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
+
|
1
2
|
require 'mechanize'
|
2
3
|
require 'uri'
|
3
4
|
require 'fileutils'
|
4
5
|
|
5
|
-
require 'epitools/browser/
|
6
|
+
require 'epitools/browser/cache'
|
6
7
|
require 'epitools/browser/mechanize_progressbar'
|
7
8
|
|
8
9
|
# TODO: Make socksify optional (eg: if proxy is specified)
|
@@ -100,12 +101,12 @@ class Browser
|
|
100
101
|
|
101
102
|
def init_cache!
|
102
103
|
# TODO: Rescue "couldn't load" exception and disable caching
|
103
|
-
@cache =
|
104
|
+
@cache = Cache.new(agent) if @use_cache
|
104
105
|
end
|
105
106
|
|
106
107
|
|
107
108
|
def relative?(url)
|
108
|
-
not url
|
109
|
+
not url[ %r{^https?://} ]
|
109
110
|
end
|
110
111
|
|
111
112
|
|
@@ -141,7 +142,7 @@ class Browser
|
|
141
142
|
cached_already = cache.include?(url)
|
142
143
|
|
143
144
|
puts
|
144
|
-
puts "[ #{url
|
145
|
+
puts "[ GET #{url} (using cache: #{use_cache}) ]"
|
145
146
|
|
146
147
|
delay unless cached_already
|
147
148
|
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
require 'sqlite3'
|
3
|
+
|
4
|
+
def dmsg(msg)
|
5
|
+
|
6
|
+
if $DEBUG
|
7
|
+
case msg
|
8
|
+
when String
|
9
|
+
puts msg
|
10
|
+
else
|
11
|
+
puts msg.inspect
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
class Browser
|
18
|
+
|
19
|
+
#
|
20
|
+
# An SQLite3-backed browser cache (with gzip compressed pages)
|
21
|
+
#
|
22
|
+
class Cache
|
23
|
+
|
24
|
+
include Enumerable
|
25
|
+
|
26
|
+
attr_reader :db, :agent
|
27
|
+
|
28
|
+
def initialize(agent, filename="browsercache.db")
|
29
|
+
@agent = agent
|
30
|
+
@filename = filename
|
31
|
+
|
32
|
+
@db = SQLite3::Database.new(filename)
|
33
|
+
@db.busy_timeout(50)
|
34
|
+
|
35
|
+
create_tables
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect
|
39
|
+
"#<Browser::Cache filename=#{@filename.inspect}, count=#{count}, size=#{File.size @filename} bytes>"
|
40
|
+
end
|
41
|
+
|
42
|
+
def count
|
43
|
+
db.execute("SELECT COUNT(1) FROM cache").first.first.to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
alias_method :size, :count
|
47
|
+
|
48
|
+
def put(page, original_url=nil, options={})
|
49
|
+
dmsg [:put, original_url]
|
50
|
+
|
51
|
+
raise "Invalid page" unless [:body, :content_type, :uri].all?{|m| page.respond_to? m }
|
52
|
+
|
53
|
+
url = page.uri.to_s
|
54
|
+
|
55
|
+
dmsg [:page_uri, url]
|
56
|
+
dmsg [:original_url, url]
|
57
|
+
|
58
|
+
if url != original_url
|
59
|
+
# redirect original_url to url
|
60
|
+
expire(original_url) if options[:overwrite]
|
61
|
+
db.execute(
|
62
|
+
"INSERT INTO cache VALUES ( ?, ?, ?, ? )",
|
63
|
+
original_url,
|
64
|
+
page.content_type,
|
65
|
+
nil,
|
66
|
+
url
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
#compressed_body = page.body
|
71
|
+
compressed_body = Zlib::Deflate.deflate(page.body)
|
72
|
+
|
73
|
+
expire(url) if options[:overwrite]
|
74
|
+
db.execute(
|
75
|
+
"INSERT INTO cache VALUES ( ?, ?, ?, ? )",
|
76
|
+
url,
|
77
|
+
page.content_type,
|
78
|
+
SQLite3::Blob.new( compressed_body ),
|
79
|
+
nil
|
80
|
+
)
|
81
|
+
|
82
|
+
true
|
83
|
+
|
84
|
+
rescue SQLite3::SQLException => e
|
85
|
+
p [:exception, e]
|
86
|
+
false
|
87
|
+
end
|
88
|
+
|
89
|
+
def row_to_page(row)
|
90
|
+
url, content_type, compressed_body, redirect = row
|
91
|
+
|
92
|
+
if redirect
|
93
|
+
get(redirect)
|
94
|
+
else
|
95
|
+
#body = compressed_body
|
96
|
+
body = Zlib::Inflate.inflate(compressed_body)
|
97
|
+
|
98
|
+
Mechanize::Page.new(
|
99
|
+
URI.parse(url),
|
100
|
+
{'content-type'=>content_type},
|
101
|
+
body,
|
102
|
+
nil,
|
103
|
+
agent
|
104
|
+
)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def pages_via_sql(*args, &block)
|
109
|
+
dmsg [:pages_via_sql, args]
|
110
|
+
if block_given?
|
111
|
+
db.execute(*args) do |row|
|
112
|
+
yield row_to_page(row)
|
113
|
+
end
|
114
|
+
else
|
115
|
+
db.execute(*args).map{|row| row_to_page(row) }
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def grep(pattern, &block)
|
120
|
+
pages_via_sql("SELECT * FROM cache WHERE url like '%#{pattern}%'", &block)
|
121
|
+
end
|
122
|
+
|
123
|
+
def get(url)
|
124
|
+
pages = pages_via_sql("SELECT * FROM cache WHERE url = ?", url.to_s)
|
125
|
+
|
126
|
+
if pages.any?
|
127
|
+
pages.first
|
128
|
+
else
|
129
|
+
nil
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def includes?(url)
|
134
|
+
db.execute("SELECT url FROM cache WHERE url = ?", url.to_s).any?
|
135
|
+
end
|
136
|
+
|
137
|
+
alias_method :include?, :includes?
|
138
|
+
|
139
|
+
def urls(pattern=nil)
|
140
|
+
if pattern
|
141
|
+
rows = db.execute("SELECT url FROM cache WHERE url LIKE '%#{pattern}%'")
|
142
|
+
else
|
143
|
+
rows = db.execute('SELECT url FROM cache')
|
144
|
+
end
|
145
|
+
rows.map{|row| row.first}
|
146
|
+
end
|
147
|
+
|
148
|
+
def clear(pattern=nil)
|
149
|
+
if pattern
|
150
|
+
db.execute("DELETE FROM cache WHERE url LIKE '%#{pattern}%'")
|
151
|
+
else
|
152
|
+
db.execute("DELETE FROM cache")
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def each(&block)
|
157
|
+
pages_via_sql("SELECT * FROM cache", &block)
|
158
|
+
end
|
159
|
+
|
160
|
+
def each_url
|
161
|
+
db.execute("SELECT url FROM cache") do |row|
|
162
|
+
yield row.first
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def expire(url)
|
167
|
+
db.execute("DELETE FROM cache WHERE url = ?", url)
|
168
|
+
end
|
169
|
+
|
170
|
+
def recreate_tables
|
171
|
+
drop_tables rescue nil
|
172
|
+
create_tables
|
173
|
+
end
|
174
|
+
|
175
|
+
def delete!
|
176
|
+
File.unlink @filename
|
177
|
+
end
|
178
|
+
|
179
|
+
private
|
180
|
+
|
181
|
+
def list_tables
|
182
|
+
db.execute("SELECT name FROM SQLITE_MASTER WHERE type='table'")
|
183
|
+
end
|
184
|
+
|
185
|
+
def create_tables
|
186
|
+
db.execute("CREATE TABLE IF NOT EXISTS cache ( url varchar(2048), content_type varchar(255), body blob, redirect varchar(2048) )")
|
187
|
+
db.execute("CREATE UNIQUE INDEX IF NOT EXISTS url_index ON cache ( url )")
|
188
|
+
end
|
189
|
+
|
190
|
+
def drop_tables
|
191
|
+
db.execute("DROP TABLE cache")
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
data/spec/browser_spec.rb
CHANGED
@@ -1,14 +1,77 @@
|
|
1
1
|
require 'epitools/browser'
|
2
2
|
|
3
|
+
class Mechanize::Page
|
4
|
+
def url
|
5
|
+
uri.to_s
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
3
11
|
describe Browser do
|
4
12
|
|
5
13
|
before :all do
|
6
14
|
@browser = Browser.new
|
7
15
|
end
|
8
|
-
|
16
|
+
|
17
|
+
after :all do
|
18
|
+
@browser.cache.delete!
|
19
|
+
end
|
20
|
+
|
9
21
|
it "googles" do
|
10
22
|
page = @browser.get("http://google.com")
|
11
23
|
page.body["Feeling Lucky"].should_not be_empty
|
12
24
|
end
|
25
|
+
|
26
|
+
it "googles (cached)" do
|
27
|
+
@browser.get("http://google.com").body
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
describe Browser::Cache do
|
35
|
+
|
36
|
+
before :all do
|
37
|
+
@agent = Mechanize.new
|
38
|
+
Browser::Cache.new(@agent).delete!
|
39
|
+
@cache = Browser::Cache.new(@agent)
|
40
|
+
end
|
41
|
+
|
42
|
+
def new_page(body, url)
|
43
|
+
Mechanize::Page.new(
|
44
|
+
URI.parse(url),
|
45
|
+
{'content-type'=>'text/html'},
|
46
|
+
body,
|
47
|
+
nil,
|
48
|
+
@agent
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
after :all do
|
53
|
+
@cache.delete!
|
54
|
+
end
|
55
|
+
|
56
|
+
it "writes and reads" do
|
57
|
+
body = "Blah blah blah."
|
58
|
+
url = "http://example.com/url.html"
|
59
|
+
|
60
|
+
page = new_page(body, url)
|
61
|
+
|
62
|
+
page.body.should == body
|
63
|
+
page.url.should == url
|
64
|
+
|
65
|
+
@cache.put page, url
|
66
|
+
@cache.urls.size.should == 1
|
67
|
+
@cache.includes?(url).should == true
|
68
|
+
|
69
|
+
result = @cache.get url
|
70
|
+
|
71
|
+
body.should == page.body
|
72
|
+
body.should == result.body
|
73
|
+
url.should == page.url
|
74
|
+
url.should == result.url
|
75
|
+
end
|
13
76
|
|
14
77
|
end
|
metadata
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epitools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
- 10
|
10
|
-
version: 0.4.10
|
4
|
+
prerelease:
|
5
|
+
version: 0.4.13
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
8
|
- epitron
|
@@ -15,7 +10,7 @@ autorequire:
|
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
12
|
|
18
|
-
date: 2011-02-
|
13
|
+
date: 2011-02-14 00:00:00 -05:00
|
19
14
|
default_executable:
|
20
15
|
dependencies:
|
21
16
|
- !ruby/object:Gem::Dependency
|
@@ -26,11 +21,6 @@ dependencies:
|
|
26
21
|
requirements:
|
27
22
|
- - ~>
|
28
23
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 7
|
30
|
-
segments:
|
31
|
-
- 2
|
32
|
-
- 2
|
33
|
-
- 0
|
34
24
|
version: 2.2.0
|
35
25
|
type: :development
|
36
26
|
version_requirements: *id001
|
@@ -42,11 +32,6 @@ dependencies:
|
|
42
32
|
requirements:
|
43
33
|
- - ~>
|
44
34
|
- !ruby/object:Gem::Version
|
45
|
-
hash: 23
|
46
|
-
segments:
|
47
|
-
- 1
|
48
|
-
- 0
|
49
|
-
- 0
|
50
35
|
version: 1.0.0
|
51
36
|
type: :development
|
52
37
|
version_requirements: *id002
|
@@ -58,9 +43,6 @@ dependencies:
|
|
58
43
|
requirements:
|
59
44
|
- - ">="
|
60
45
|
- !ruby/object:Gem::Version
|
61
|
-
hash: 3
|
62
|
-
segments:
|
63
|
-
- 0
|
64
46
|
version: "0"
|
65
47
|
type: :development
|
66
48
|
version_requirements: *id003
|
@@ -83,7 +65,7 @@ files:
|
|
83
65
|
- lib/epitools.rb
|
84
66
|
- lib/epitools/basetypes.rb
|
85
67
|
- lib/epitools/browser.rb
|
86
|
-
- lib/epitools/browser/
|
68
|
+
- lib/epitools/browser/cache.rb
|
87
69
|
- lib/epitools/browser/mechanize_progressbar.rb
|
88
70
|
- lib/epitools/clitools.rb
|
89
71
|
- lib/epitools/colored.rb
|
@@ -129,23 +111,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
111
|
requirements:
|
130
112
|
- - ">="
|
131
113
|
- !ruby/object:Gem::Version
|
132
|
-
hash: 3
|
133
|
-
segments:
|
134
|
-
- 0
|
135
114
|
version: "0"
|
136
115
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
116
|
none: false
|
138
117
|
requirements:
|
139
118
|
- - ">="
|
140
119
|
- !ruby/object:Gem::Version
|
141
|
-
hash: 3
|
142
|
-
segments:
|
143
|
-
- 0
|
144
120
|
version: "0"
|
145
121
|
requirements: []
|
146
122
|
|
147
123
|
rubyforge_project:
|
148
|
-
rubygems_version: 1.
|
124
|
+
rubygems_version: 1.5.2
|
149
125
|
signing_key:
|
150
126
|
specification_version: 3
|
151
127
|
summary: NOT UTILS... METILS!
|
@@ -1,169 +0,0 @@
|
|
1
|
-
require 'mechanize'
|
2
|
-
require 'sqlite3'
|
3
|
-
|
4
|
-
class CacheDB
|
5
|
-
|
6
|
-
include Enumerable
|
7
|
-
|
8
|
-
attr_reader :db, :agent
|
9
|
-
|
10
|
-
def initialize(agent, filename="browsercache.db")
|
11
|
-
@agent = agent
|
12
|
-
@filename = filename
|
13
|
-
|
14
|
-
@db = SQLite3::Database.new(filename)
|
15
|
-
@db.busy_timeout(50)
|
16
|
-
|
17
|
-
create_tables
|
18
|
-
end
|
19
|
-
|
20
|
-
def inspect
|
21
|
-
"#<CacheDB filename=#{@filename.inspect}, count=#{count}, size=#{File.size @filename} bytes>"
|
22
|
-
end
|
23
|
-
|
24
|
-
def count
|
25
|
-
db.execute("SELECT COUNT(1) FROM cache").first.first.to_i
|
26
|
-
end
|
27
|
-
|
28
|
-
alias_method :size, :count
|
29
|
-
|
30
|
-
def put(page, original_url=nil, options={})
|
31
|
-
p [:put, original_url]
|
32
|
-
|
33
|
-
raise "Invalid page" unless [:body, :content_type, :uri].all?{|m| page.respond_to? m }
|
34
|
-
|
35
|
-
url = page.uri.to_s
|
36
|
-
|
37
|
-
p [:page_uri, url]
|
38
|
-
|
39
|
-
if url != original_url
|
40
|
-
# redirect original_url to url
|
41
|
-
expire(original_url) if options[:overwrite]
|
42
|
-
db.execute(
|
43
|
-
"INSERT INTO cache VALUES ( ?, ?, ?, ? )",
|
44
|
-
original_url,
|
45
|
-
page.content_type,
|
46
|
-
nil,
|
47
|
-
url
|
48
|
-
)
|
49
|
-
end
|
50
|
-
|
51
|
-
compressed_body = Zlib::Deflate.deflate(page.body)
|
52
|
-
expire(url) if options[:overwrite]
|
53
|
-
db.execute(
|
54
|
-
"INSERT INTO cache VALUES ( ?, ?, ?, ? )",
|
55
|
-
url,
|
56
|
-
page.content_type,
|
57
|
-
SQLite3::Blob.new( compressed_body ),
|
58
|
-
nil
|
59
|
-
)
|
60
|
-
|
61
|
-
true
|
62
|
-
|
63
|
-
rescue SQLite3::SQLException => e
|
64
|
-
p [:exception, e]
|
65
|
-
false
|
66
|
-
end
|
67
|
-
|
68
|
-
def row_to_page(row)
|
69
|
-
url, content_type, compressed_body, redirect = row
|
70
|
-
|
71
|
-
if redirect
|
72
|
-
get(redirect)
|
73
|
-
else
|
74
|
-
body = Zlib::Inflate.inflate(compressed_body)
|
75
|
-
|
76
|
-
Mechanize::Page.new(
|
77
|
-
URI.parse(url),
|
78
|
-
{'content-type'=>content_type},
|
79
|
-
body,
|
80
|
-
nil,
|
81
|
-
agent
|
82
|
-
)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def pages_via_sql(*args, &block)
|
87
|
-
p [:pages_via_sql, args]
|
88
|
-
if block_given?
|
89
|
-
db.execute(*args) do |row|
|
90
|
-
yield row_to_page(row)
|
91
|
-
end
|
92
|
-
else
|
93
|
-
db.execute(*args).map{|row| row_to_page(row) }
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
def grep(pattern, &block)
|
98
|
-
pages_via_sql("SELECT * FROM cache WHERE url like '%#{pattern}%'", &block)
|
99
|
-
end
|
100
|
-
|
101
|
-
def get(url)
|
102
|
-
pages = pages_via_sql("SELECT * FROM cache WHERE url = ?", url.to_s)
|
103
|
-
|
104
|
-
if pages.any?
|
105
|
-
pages.first
|
106
|
-
else
|
107
|
-
nil
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def includes?(url)
|
112
|
-
db.execute("SELECT url FROM cache WHERE url = ?", url.to_s).any?
|
113
|
-
end
|
114
|
-
|
115
|
-
alias_method :include?, :includes?
|
116
|
-
|
117
|
-
def urls(pattern=nil)
|
118
|
-
if pattern
|
119
|
-
rows = db.execute("SELECT url FROM cache WHERE url LIKE '%#{pattern}%'")
|
120
|
-
else
|
121
|
-
rows = db.execute('SELECT url FROM cache')
|
122
|
-
end
|
123
|
-
rows.map{|row| row.first}
|
124
|
-
end
|
125
|
-
|
126
|
-
def clear(pattern=nil)
|
127
|
-
if pattern
|
128
|
-
db.execute("DELETE FROM cache WHERE url LIKE '%#{pattern}%'")
|
129
|
-
else
|
130
|
-
db.execute("DELETE FROM cache")
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
def each(&block)
|
135
|
-
pages_via_sql("SELECT * FROM cache", &block)
|
136
|
-
end
|
137
|
-
|
138
|
-
def each_url
|
139
|
-
db.execute("SELECT url FROM cache") do |row|
|
140
|
-
yield row.first
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
def expire(url)
|
145
|
-
db.execute("DELETE FROM cache WHERE url = ?", url)
|
146
|
-
end
|
147
|
-
|
148
|
-
def recreate_tables
|
149
|
-
drop_tables rescue nil
|
150
|
-
create_tables
|
151
|
-
end
|
152
|
-
|
153
|
-
private
|
154
|
-
|
155
|
-
def list_tables
|
156
|
-
db.execute("SELECT name FROM SQLITE_MASTER WHERE type='table'")
|
157
|
-
end
|
158
|
-
|
159
|
-
def create_tables
|
160
|
-
db.execute("CREATE TABLE IF NOT EXISTS cache ( url varchar(2048), content_type varchar(255), body blob, redirect varchar(2048) )")
|
161
|
-
db.execute("CREATE UNIQUE INDEX IF NOT EXISTS url_index ON cache ( url )")
|
162
|
-
end
|
163
|
-
|
164
|
-
def drop_tables
|
165
|
-
db.execute("DROP TABLE cache")
|
166
|
-
end
|
167
|
-
|
168
|
-
|
169
|
-
end
|