epitools 0.4.10 → 0.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +13 -3
- data/Rakefile +21 -17
- data/VERSION +1 -1
- data/epitools.gemspec +4 -5
- data/lib/epitools/browser.rb +5 -4
- data/lib/epitools/browser/cache.rb +196 -0
- data/lib/epitools/browser/mechanize_progressbar.rb +5 -0
- data/spec/browser_spec.rb +64 -1
- metadata +5 -29
- data/lib/epitools/browser/browser_cache.rb +0 -169
data/README.rdoc
CHANGED
@@ -1,7 +1,17 @@
|
|
1
1
|
= epitools
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
Useful miscellaneous improvements for base Ruby objects, plus some extra
|
4
|
+
data structures and handy wrappers.
|
5
|
+
|
6
|
+
Base classess: Object, Enumerable, Hash, String, Array, Integer, etc.
|
7
|
+
|
8
|
+
Extras:
|
9
|
+
* Path (a better Pathname)
|
10
|
+
* Rash (a hash which can have Regexps as keys, allowing a single (key,value) pair to match many keys.)
|
11
|
+
* Progressbar (better than the progressbar gem)
|
12
|
+
* Colored (enhanced version of defunkt's colored -- adds ANSI colouring methods to String, eg: #red, #green, #light_blue, etc.)
|
13
|
+
* Browser (a fake browser, using mechanize, Progressbar, and CacheDB)
|
14
|
+
|
5
15
|
== Installing
|
6
16
|
|
7
17
|
gem install epitools
|
@@ -10,7 +20,7 @@ Awesome collection of Ruby tools!
|
|
10
20
|
|
11
21
|
This is basically a collection of enhancements to the ruby base types (Hash, Array,
|
12
22
|
Enumerable, etc.). To learn how it works, read the specs in spec/*.rb,
|
13
|
-
or check out the rdoc: http://rdoc.info/
|
23
|
+
or check out the rdoc: http://rdoc.info/gems/epitools/frames
|
14
24
|
|
15
25
|
== Copyright
|
16
26
|
|
data/Rakefile
CHANGED
@@ -1,17 +1,22 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
require
|
1
|
+
### Require all gems...
|
2
|
+
%w[
|
3
|
+
|
4
|
+
rubygems
|
5
|
+
|
6
|
+
rake
|
7
|
+
rake/rdoctask
|
8
|
+
rspec/core
|
9
|
+
rspec/core/rake_task
|
10
|
+
jeweler
|
11
|
+
|
12
|
+
].each { |mod| require mod }
|
13
|
+
|
14
|
+
desc 'Default: specs.'
|
15
|
+
task :default => :spec
|
13
16
|
|
14
|
-
|
17
|
+
#
|
18
|
+
# Jewelerrrr
|
19
|
+
#
|
15
20
|
Jeweler::Tasks.new do |gem|
|
16
21
|
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
22
|
gem.name = "epitools"
|
@@ -32,20 +37,19 @@ Jeweler::Tasks.new do |gem|
|
|
32
37
|
end
|
33
38
|
Jeweler::RubygemsDotOrgTasks.new
|
34
39
|
|
35
|
-
|
36
|
-
require 'rspec/core/rake_task'
|
40
|
+
desc 'Run all the specs.'
|
37
41
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
38
42
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
39
43
|
end
|
40
44
|
|
45
|
+
desc 'Run rcov code coverage'
|
41
46
|
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
42
47
|
spec.pattern = 'spec/**/*_spec.rb'
|
43
48
|
spec.rcov = true
|
44
49
|
end
|
45
50
|
|
46
|
-
task :default => :spec
|
47
51
|
|
48
|
-
|
52
|
+
desc 'Generate documentation for rdoc.'
|
49
53
|
Rake::RDocTask.new do |rdoc|
|
50
54
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
51
55
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.13
|
data/epitools.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{epitools}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.13"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["epitron"]
|
12
|
-
s.date = %q{2011-02-
|
12
|
+
s.date = %q{2011-02-14}
|
13
13
|
s.description = %q{Miscellaneous utility libraries to make my life easier.}
|
14
14
|
s.email = %q{chris@ill-logic.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -26,7 +26,7 @@ Gem::Specification.new do |s|
|
|
26
26
|
"lib/epitools.rb",
|
27
27
|
"lib/epitools/basetypes.rb",
|
28
28
|
"lib/epitools/browser.rb",
|
29
|
-
"lib/epitools/browser/
|
29
|
+
"lib/epitools/browser/cache.rb",
|
30
30
|
"lib/epitools/browser/mechanize_progressbar.rb",
|
31
31
|
"lib/epitools/clitools.rb",
|
32
32
|
"lib/epitools/colored.rb",
|
@@ -62,7 +62,7 @@ Gem::Specification.new do |s|
|
|
62
62
|
s.homepage = %q{http://github.com/epitron/epitools}
|
63
63
|
s.licenses = ["WTFPL"]
|
64
64
|
s.require_paths = ["lib"]
|
65
|
-
s.rubygems_version = %q{1.
|
65
|
+
s.rubygems_version = %q{1.5.2}
|
66
66
|
s.summary = %q{NOT UTILS... METILS!}
|
67
67
|
s.test_files = [
|
68
68
|
"spec/basetypes_spec.rb",
|
@@ -80,7 +80,6 @@ Gem::Specification.new do |s|
|
|
80
80
|
]
|
81
81
|
|
82
82
|
if s.respond_to? :specification_version then
|
83
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
84
83
|
s.specification_version = 3
|
85
84
|
|
86
85
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
data/lib/epitools/browser.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
+
|
1
2
|
require 'mechanize'
|
2
3
|
require 'uri'
|
3
4
|
require 'fileutils'
|
4
5
|
|
5
|
-
require 'epitools/browser/
|
6
|
+
require 'epitools/browser/cache'
|
6
7
|
require 'epitools/browser/mechanize_progressbar'
|
7
8
|
|
8
9
|
# TODO: Make socksify optional (eg: if proxy is specified)
|
@@ -100,12 +101,12 @@ class Browser
|
|
100
101
|
|
101
102
|
def init_cache!
|
102
103
|
# TODO: Rescue "couldn't load" exception and disable caching
|
103
|
-
@cache =
|
104
|
+
@cache = Cache.new(agent) if @use_cache
|
104
105
|
end
|
105
106
|
|
106
107
|
|
107
108
|
def relative?(url)
|
108
|
-
not url
|
109
|
+
not url[ %r{^https?://} ]
|
109
110
|
end
|
110
111
|
|
111
112
|
|
@@ -141,7 +142,7 @@ class Browser
|
|
141
142
|
cached_already = cache.include?(url)
|
142
143
|
|
143
144
|
puts
|
144
|
-
puts "[ #{url
|
145
|
+
puts "[ GET #{url} (using cache: #{use_cache}) ]"
|
145
146
|
|
146
147
|
delay unless cached_already
|
147
148
|
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
require 'sqlite3'
|
3
|
+
|
4
|
+
def dmsg(msg)
|
5
|
+
|
6
|
+
if $DEBUG
|
7
|
+
case msg
|
8
|
+
when String
|
9
|
+
puts msg
|
10
|
+
else
|
11
|
+
puts msg.inspect
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
class Browser
|
18
|
+
|
19
|
+
#
|
20
|
+
# An SQLite3-backed browser cache (with gzip compressed pages)
|
21
|
+
#
|
22
|
+
class Cache
|
23
|
+
|
24
|
+
include Enumerable
|
25
|
+
|
26
|
+
attr_reader :db, :agent
|
27
|
+
|
28
|
+
def initialize(agent, filename="browsercache.db")
|
29
|
+
@agent = agent
|
30
|
+
@filename = filename
|
31
|
+
|
32
|
+
@db = SQLite3::Database.new(filename)
|
33
|
+
@db.busy_timeout(50)
|
34
|
+
|
35
|
+
create_tables
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect
|
39
|
+
"#<Browser::Cache filename=#{@filename.inspect}, count=#{count}, size=#{File.size @filename} bytes>"
|
40
|
+
end
|
41
|
+
|
42
|
+
def count
|
43
|
+
db.execute("SELECT COUNT(1) FROM cache").first.first.to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
alias_method :size, :count
|
47
|
+
|
48
|
+
def put(page, original_url=nil, options={})
|
49
|
+
dmsg [:put, original_url]
|
50
|
+
|
51
|
+
raise "Invalid page" unless [:body, :content_type, :uri].all?{|m| page.respond_to? m }
|
52
|
+
|
53
|
+
url = page.uri.to_s
|
54
|
+
|
55
|
+
dmsg [:page_uri, url]
|
56
|
+
dmsg [:original_url, url]
|
57
|
+
|
58
|
+
if url != original_url
|
59
|
+
# redirect original_url to url
|
60
|
+
expire(original_url) if options[:overwrite]
|
61
|
+
db.execute(
|
62
|
+
"INSERT INTO cache VALUES ( ?, ?, ?, ? )",
|
63
|
+
original_url,
|
64
|
+
page.content_type,
|
65
|
+
nil,
|
66
|
+
url
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
#compressed_body = page.body
|
71
|
+
compressed_body = Zlib::Deflate.deflate(page.body)
|
72
|
+
|
73
|
+
expire(url) if options[:overwrite]
|
74
|
+
db.execute(
|
75
|
+
"INSERT INTO cache VALUES ( ?, ?, ?, ? )",
|
76
|
+
url,
|
77
|
+
page.content_type,
|
78
|
+
SQLite3::Blob.new( compressed_body ),
|
79
|
+
nil
|
80
|
+
)
|
81
|
+
|
82
|
+
true
|
83
|
+
|
84
|
+
rescue SQLite3::SQLException => e
|
85
|
+
p [:exception, e]
|
86
|
+
false
|
87
|
+
end
|
88
|
+
|
89
|
+
def row_to_page(row)
|
90
|
+
url, content_type, compressed_body, redirect = row
|
91
|
+
|
92
|
+
if redirect
|
93
|
+
get(redirect)
|
94
|
+
else
|
95
|
+
#body = compressed_body
|
96
|
+
body = Zlib::Inflate.inflate(compressed_body)
|
97
|
+
|
98
|
+
Mechanize::Page.new(
|
99
|
+
URI.parse(url),
|
100
|
+
{'content-type'=>content_type},
|
101
|
+
body,
|
102
|
+
nil,
|
103
|
+
agent
|
104
|
+
)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def pages_via_sql(*args, &block)
|
109
|
+
dmsg [:pages_via_sql, args]
|
110
|
+
if block_given?
|
111
|
+
db.execute(*args) do |row|
|
112
|
+
yield row_to_page(row)
|
113
|
+
end
|
114
|
+
else
|
115
|
+
db.execute(*args).map{|row| row_to_page(row) }
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def grep(pattern, &block)
|
120
|
+
pages_via_sql("SELECT * FROM cache WHERE url like '%#{pattern}%'", &block)
|
121
|
+
end
|
122
|
+
|
123
|
+
def get(url)
|
124
|
+
pages = pages_via_sql("SELECT * FROM cache WHERE url = ?", url.to_s)
|
125
|
+
|
126
|
+
if pages.any?
|
127
|
+
pages.first
|
128
|
+
else
|
129
|
+
nil
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def includes?(url)
|
134
|
+
db.execute("SELECT url FROM cache WHERE url = ?", url.to_s).any?
|
135
|
+
end
|
136
|
+
|
137
|
+
alias_method :include?, :includes?
|
138
|
+
|
139
|
+
def urls(pattern=nil)
|
140
|
+
if pattern
|
141
|
+
rows = db.execute("SELECT url FROM cache WHERE url LIKE '%#{pattern}%'")
|
142
|
+
else
|
143
|
+
rows = db.execute('SELECT url FROM cache')
|
144
|
+
end
|
145
|
+
rows.map{|row| row.first}
|
146
|
+
end
|
147
|
+
|
148
|
+
def clear(pattern=nil)
|
149
|
+
if pattern
|
150
|
+
db.execute("DELETE FROM cache WHERE url LIKE '%#{pattern}%'")
|
151
|
+
else
|
152
|
+
db.execute("DELETE FROM cache")
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def each(&block)
|
157
|
+
pages_via_sql("SELECT * FROM cache", &block)
|
158
|
+
end
|
159
|
+
|
160
|
+
def each_url
|
161
|
+
db.execute("SELECT url FROM cache") do |row|
|
162
|
+
yield row.first
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def expire(url)
|
167
|
+
db.execute("DELETE FROM cache WHERE url = ?", url)
|
168
|
+
end
|
169
|
+
|
170
|
+
def recreate_tables
|
171
|
+
drop_tables rescue nil
|
172
|
+
create_tables
|
173
|
+
end
|
174
|
+
|
175
|
+
def delete!
|
176
|
+
File.unlink @filename
|
177
|
+
end
|
178
|
+
|
179
|
+
private
|
180
|
+
|
181
|
+
def list_tables
|
182
|
+
db.execute("SELECT name FROM SQLITE_MASTER WHERE type='table'")
|
183
|
+
end
|
184
|
+
|
185
|
+
def create_tables
|
186
|
+
db.execute("CREATE TABLE IF NOT EXISTS cache ( url varchar(2048), content_type varchar(255), body blob, redirect varchar(2048) )")
|
187
|
+
db.execute("CREATE UNIQUE INDEX IF NOT EXISTS url_index ON cache ( url )")
|
188
|
+
end
|
189
|
+
|
190
|
+
def drop_tables
|
191
|
+
db.execute("DROP TABLE cache")
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
data/spec/browser_spec.rb
CHANGED
@@ -1,14 +1,77 @@
|
|
1
1
|
require 'epitools/browser'
|
2
2
|
|
3
|
+
class Mechanize::Page
|
4
|
+
def url
|
5
|
+
uri.to_s
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
3
11
|
describe Browser do
|
4
12
|
|
5
13
|
before :all do
|
6
14
|
@browser = Browser.new
|
7
15
|
end
|
8
|
-
|
16
|
+
|
17
|
+
after :all do
|
18
|
+
@browser.cache.delete!
|
19
|
+
end
|
20
|
+
|
9
21
|
it "googles" do
|
10
22
|
page = @browser.get("http://google.com")
|
11
23
|
page.body["Feeling Lucky"].should_not be_empty
|
12
24
|
end
|
25
|
+
|
26
|
+
it "googles (cached)" do
|
27
|
+
@browser.get("http://google.com").body
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
describe Browser::Cache do
|
35
|
+
|
36
|
+
before :all do
|
37
|
+
@agent = Mechanize.new
|
38
|
+
Browser::Cache.new(@agent).delete!
|
39
|
+
@cache = Browser::Cache.new(@agent)
|
40
|
+
end
|
41
|
+
|
42
|
+
def new_page(body, url)
|
43
|
+
Mechanize::Page.new(
|
44
|
+
URI.parse(url),
|
45
|
+
{'content-type'=>'text/html'},
|
46
|
+
body,
|
47
|
+
nil,
|
48
|
+
@agent
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
after :all do
|
53
|
+
@cache.delete!
|
54
|
+
end
|
55
|
+
|
56
|
+
it "writes and reads" do
|
57
|
+
body = "Blah blah blah."
|
58
|
+
url = "http://example.com/url.html"
|
59
|
+
|
60
|
+
page = new_page(body, url)
|
61
|
+
|
62
|
+
page.body.should == body
|
63
|
+
page.url.should == url
|
64
|
+
|
65
|
+
@cache.put page, url
|
66
|
+
@cache.urls.size.should == 1
|
67
|
+
@cache.includes?(url).should == true
|
68
|
+
|
69
|
+
result = @cache.get url
|
70
|
+
|
71
|
+
body.should == page.body
|
72
|
+
body.should == result.body
|
73
|
+
url.should == page.url
|
74
|
+
url.should == result.url
|
75
|
+
end
|
13
76
|
|
14
77
|
end
|
metadata
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epitools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
- 10
|
10
|
-
version: 0.4.10
|
4
|
+
prerelease:
|
5
|
+
version: 0.4.13
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
8
|
- epitron
|
@@ -15,7 +10,7 @@ autorequire:
|
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
12
|
|
18
|
-
date: 2011-02-
|
13
|
+
date: 2011-02-14 00:00:00 -05:00
|
19
14
|
default_executable:
|
20
15
|
dependencies:
|
21
16
|
- !ruby/object:Gem::Dependency
|
@@ -26,11 +21,6 @@ dependencies:
|
|
26
21
|
requirements:
|
27
22
|
- - ~>
|
28
23
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 7
|
30
|
-
segments:
|
31
|
-
- 2
|
32
|
-
- 2
|
33
|
-
- 0
|
34
24
|
version: 2.2.0
|
35
25
|
type: :development
|
36
26
|
version_requirements: *id001
|
@@ -42,11 +32,6 @@ dependencies:
|
|
42
32
|
requirements:
|
43
33
|
- - ~>
|
44
34
|
- !ruby/object:Gem::Version
|
45
|
-
hash: 23
|
46
|
-
segments:
|
47
|
-
- 1
|
48
|
-
- 0
|
49
|
-
- 0
|
50
35
|
version: 1.0.0
|
51
36
|
type: :development
|
52
37
|
version_requirements: *id002
|
@@ -58,9 +43,6 @@ dependencies:
|
|
58
43
|
requirements:
|
59
44
|
- - ">="
|
60
45
|
- !ruby/object:Gem::Version
|
61
|
-
hash: 3
|
62
|
-
segments:
|
63
|
-
- 0
|
64
46
|
version: "0"
|
65
47
|
type: :development
|
66
48
|
version_requirements: *id003
|
@@ -83,7 +65,7 @@ files:
|
|
83
65
|
- lib/epitools.rb
|
84
66
|
- lib/epitools/basetypes.rb
|
85
67
|
- lib/epitools/browser.rb
|
86
|
-
- lib/epitools/browser/
|
68
|
+
- lib/epitools/browser/cache.rb
|
87
69
|
- lib/epitools/browser/mechanize_progressbar.rb
|
88
70
|
- lib/epitools/clitools.rb
|
89
71
|
- lib/epitools/colored.rb
|
@@ -129,23 +111,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
111
|
requirements:
|
130
112
|
- - ">="
|
131
113
|
- !ruby/object:Gem::Version
|
132
|
-
hash: 3
|
133
|
-
segments:
|
134
|
-
- 0
|
135
114
|
version: "0"
|
136
115
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
116
|
none: false
|
138
117
|
requirements:
|
139
118
|
- - ">="
|
140
119
|
- !ruby/object:Gem::Version
|
141
|
-
hash: 3
|
142
|
-
segments:
|
143
|
-
- 0
|
144
120
|
version: "0"
|
145
121
|
requirements: []
|
146
122
|
|
147
123
|
rubyforge_project:
|
148
|
-
rubygems_version: 1.
|
124
|
+
rubygems_version: 1.5.2
|
149
125
|
signing_key:
|
150
126
|
specification_version: 3
|
151
127
|
summary: NOT UTILS... METILS!
|
@@ -1,169 +0,0 @@
|
|
1
|
-
require 'mechanize'
|
2
|
-
require 'sqlite3'
|
3
|
-
|
4
|
-
class CacheDB
|
5
|
-
|
6
|
-
include Enumerable
|
7
|
-
|
8
|
-
attr_reader :db, :agent
|
9
|
-
|
10
|
-
def initialize(agent, filename="browsercache.db")
|
11
|
-
@agent = agent
|
12
|
-
@filename = filename
|
13
|
-
|
14
|
-
@db = SQLite3::Database.new(filename)
|
15
|
-
@db.busy_timeout(50)
|
16
|
-
|
17
|
-
create_tables
|
18
|
-
end
|
19
|
-
|
20
|
-
def inspect
|
21
|
-
"#<CacheDB filename=#{@filename.inspect}, count=#{count}, size=#{File.size @filename} bytes>"
|
22
|
-
end
|
23
|
-
|
24
|
-
def count
|
25
|
-
db.execute("SELECT COUNT(1) FROM cache").first.first.to_i
|
26
|
-
end
|
27
|
-
|
28
|
-
alias_method :size, :count
|
29
|
-
|
30
|
-
def put(page, original_url=nil, options={})
|
31
|
-
p [:put, original_url]
|
32
|
-
|
33
|
-
raise "Invalid page" unless [:body, :content_type, :uri].all?{|m| page.respond_to? m }
|
34
|
-
|
35
|
-
url = page.uri.to_s
|
36
|
-
|
37
|
-
p [:page_uri, url]
|
38
|
-
|
39
|
-
if url != original_url
|
40
|
-
# redirect original_url to url
|
41
|
-
expire(original_url) if options[:overwrite]
|
42
|
-
db.execute(
|
43
|
-
"INSERT INTO cache VALUES ( ?, ?, ?, ? )",
|
44
|
-
original_url,
|
45
|
-
page.content_type,
|
46
|
-
nil,
|
47
|
-
url
|
48
|
-
)
|
49
|
-
end
|
50
|
-
|
51
|
-
compressed_body = Zlib::Deflate.deflate(page.body)
|
52
|
-
expire(url) if options[:overwrite]
|
53
|
-
db.execute(
|
54
|
-
"INSERT INTO cache VALUES ( ?, ?, ?, ? )",
|
55
|
-
url,
|
56
|
-
page.content_type,
|
57
|
-
SQLite3::Blob.new( compressed_body ),
|
58
|
-
nil
|
59
|
-
)
|
60
|
-
|
61
|
-
true
|
62
|
-
|
63
|
-
rescue SQLite3::SQLException => e
|
64
|
-
p [:exception, e]
|
65
|
-
false
|
66
|
-
end
|
67
|
-
|
68
|
-
def row_to_page(row)
|
69
|
-
url, content_type, compressed_body, redirect = row
|
70
|
-
|
71
|
-
if redirect
|
72
|
-
get(redirect)
|
73
|
-
else
|
74
|
-
body = Zlib::Inflate.inflate(compressed_body)
|
75
|
-
|
76
|
-
Mechanize::Page.new(
|
77
|
-
URI.parse(url),
|
78
|
-
{'content-type'=>content_type},
|
79
|
-
body,
|
80
|
-
nil,
|
81
|
-
agent
|
82
|
-
)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def pages_via_sql(*args, &block)
|
87
|
-
p [:pages_via_sql, args]
|
88
|
-
if block_given?
|
89
|
-
db.execute(*args) do |row|
|
90
|
-
yield row_to_page(row)
|
91
|
-
end
|
92
|
-
else
|
93
|
-
db.execute(*args).map{|row| row_to_page(row) }
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
def grep(pattern, &block)
|
98
|
-
pages_via_sql("SELECT * FROM cache WHERE url like '%#{pattern}%'", &block)
|
99
|
-
end
|
100
|
-
|
101
|
-
def get(url)
|
102
|
-
pages = pages_via_sql("SELECT * FROM cache WHERE url = ?", url.to_s)
|
103
|
-
|
104
|
-
if pages.any?
|
105
|
-
pages.first
|
106
|
-
else
|
107
|
-
nil
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def includes?(url)
|
112
|
-
db.execute("SELECT url FROM cache WHERE url = ?", url.to_s).any?
|
113
|
-
end
|
114
|
-
|
115
|
-
alias_method :include?, :includes?
|
116
|
-
|
117
|
-
def urls(pattern=nil)
|
118
|
-
if pattern
|
119
|
-
rows = db.execute("SELECT url FROM cache WHERE url LIKE '%#{pattern}%'")
|
120
|
-
else
|
121
|
-
rows = db.execute('SELECT url FROM cache')
|
122
|
-
end
|
123
|
-
rows.map{|row| row.first}
|
124
|
-
end
|
125
|
-
|
126
|
-
def clear(pattern=nil)
|
127
|
-
if pattern
|
128
|
-
db.execute("DELETE FROM cache WHERE url LIKE '%#{pattern}%'")
|
129
|
-
else
|
130
|
-
db.execute("DELETE FROM cache")
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
def each(&block)
|
135
|
-
pages_via_sql("SELECT * FROM cache", &block)
|
136
|
-
end
|
137
|
-
|
138
|
-
def each_url
|
139
|
-
db.execute("SELECT url FROM cache") do |row|
|
140
|
-
yield row.first
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
def expire(url)
|
145
|
-
db.execute("DELETE FROM cache WHERE url = ?", url)
|
146
|
-
end
|
147
|
-
|
148
|
-
def recreate_tables
|
149
|
-
drop_tables rescue nil
|
150
|
-
create_tables
|
151
|
-
end
|
152
|
-
|
153
|
-
private
|
154
|
-
|
155
|
-
def list_tables
|
156
|
-
db.execute("SELECT name FROM SQLITE_MASTER WHERE type='table'")
|
157
|
-
end
|
158
|
-
|
159
|
-
def create_tables
|
160
|
-
db.execute("CREATE TABLE IF NOT EXISTS cache ( url varchar(2048), content_type varchar(255), body blob, redirect varchar(2048) )")
|
161
|
-
db.execute("CREATE UNIQUE INDEX IF NOT EXISTS url_index ON cache ( url )")
|
162
|
-
end
|
163
|
-
|
164
|
-
def drop_tables
|
165
|
-
db.execute("DROP TABLE cache")
|
166
|
-
end
|
167
|
-
|
168
|
-
|
169
|
-
end
|