epitools 0.4.32 → 0.4.33
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/epitools.gemspec +2 -2
- data/lib/epitools/basetypes.rb +8 -0
- data/lib/epitools/browser.rb +49 -29
- data/lib/epitools/browser/cache.rb +25 -8
- data/lib/epitools/browser/mechanize_progressbar.rb +0 -0
- data/spec/browser_spec.rb +7 -1
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.33
|
data/epitools.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{epitools}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.33"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["epitron"]
|
12
|
-
s.date = %q{2011-05-
|
12
|
+
s.date = %q{2011-05-16}
|
13
13
|
s.description = %q{Miscellaneous utility libraries to make my life easier.}
|
14
14
|
s.email = %q{chris@ill-logic.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/epitools/basetypes.rb
CHANGED
data/lib/epitools/browser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
|
2
1
|
require 'mechanize'
|
3
2
|
require 'uri'
|
4
3
|
require 'fileutils'
|
4
|
+
require 'json'
|
5
5
|
|
6
6
|
require 'epitools'
|
7
7
|
require 'epitools/browser/cache'
|
@@ -39,6 +39,14 @@ class BrowserOptions < OpenStruct
|
|
39
39
|
end
|
40
40
|
=end
|
41
41
|
|
42
|
+
# Monkeypatches!
|
43
|
+
class Mechanize::File
|
44
|
+
def content_type
|
45
|
+
response['content-type']
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
42
50
|
#
|
43
51
|
# A mechanize class that emulates a web-browser, with cache and everything.
|
44
52
|
# Progress bars are enabled by default.
|
@@ -74,7 +82,7 @@ class Browser
|
|
74
82
|
init_agent!
|
75
83
|
init_cache!
|
76
84
|
end
|
77
|
-
|
85
|
+
|
78
86
|
|
79
87
|
def init_agent!
|
80
88
|
@agent = Mechanize.new do |a|
|
@@ -106,18 +114,46 @@ class Browser
|
|
106
114
|
end
|
107
115
|
|
108
116
|
|
117
|
+
def load_cookies!
|
118
|
+
if File.exists? @cookie_file
|
119
|
+
agent.cookie_jar.load @cookie_file
|
120
|
+
true
|
121
|
+
else
|
122
|
+
false
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
def save_cookies!
|
128
|
+
agent.cookie_jar.save_as @cookie_file
|
129
|
+
true
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
|
109
134
|
def relative?(url)
|
110
135
|
not url[ %r{^https?://} ]
|
111
136
|
end
|
112
137
|
|
138
|
+
|
139
|
+
def cacheable?(page)
|
140
|
+
case page.content_type
|
141
|
+
when %r{^(text|application)}
|
142
|
+
true
|
143
|
+
end
|
144
|
+
end
|
113
145
|
|
146
|
+
|
114
147
|
def cache_put(page, url)
|
115
|
-
if
|
116
|
-
|
117
|
-
|
148
|
+
if cache.valid_page?(page)
|
149
|
+
if page.content_type =~ %r{(^text/|^application/javascript|javascript)}
|
150
|
+
puts " |_ writing to cache"
|
151
|
+
cache.put(page, url, :overwrite=>true)
|
152
|
+
end
|
118
153
|
end
|
119
154
|
end
|
120
155
|
|
156
|
+
|
121
157
|
#
|
122
158
|
# Retrieve an URL, and return a Mechanize::Page instance (which acts a
|
123
159
|
# bit like a Nokogiri::HTML::Document instance.)
|
@@ -136,9 +172,7 @@ class Browser
|
|
136
172
|
#end
|
137
173
|
|
138
174
|
# Determine the cache setting
|
139
|
-
|
140
|
-
|
141
|
-
use_cache = options[:use_cache] || @use_cache
|
175
|
+
use_cache = options[:use_cache] || options[:cache] || options[:cached] || @use_cache
|
142
176
|
|
143
177
|
cached_already = cache.include?(url)
|
144
178
|
|
@@ -149,20 +183,14 @@ class Browser
|
|
149
183
|
|
150
184
|
begin
|
151
185
|
|
152
|
-
if
|
153
|
-
page = cache.get(url)
|
154
|
-
if page.nil?
|
155
|
-
puts " |_ CACHE FAIL! Re-getting page."
|
156
|
-
page = get(url, false)
|
157
|
-
end
|
186
|
+
if page = cache.get(url)
|
158
187
|
puts " |_ cached (#{page.content_type})"
|
159
188
|
else
|
160
|
-
page = agent.get
|
189
|
+
page = agent.get(url)
|
161
190
|
@last_get = Time.now
|
191
|
+
cache_put(page, url)
|
162
192
|
end
|
163
193
|
|
164
|
-
cache_put(page, url) unless cached_already
|
165
|
-
|
166
194
|
puts
|
167
195
|
|
168
196
|
rescue Net::HTTPBadResponse, Errno::ECONNRESET, SocketError, Timeout::Error, SOCKSError => e
|
@@ -196,22 +224,14 @@ class Browser
|
|
196
224
|
end
|
197
225
|
|
198
226
|
|
199
|
-
#
|
200
|
-
|
227
|
+
#
|
228
|
+
# Delegate certain methods to @agent
|
229
|
+
#
|
230
|
+
[:head, :post, :put, :submit].each do |meth|
|
201
231
|
define_method meth do |*args|
|
202
232
|
agent.send(meth, *args)
|
203
233
|
end
|
204
234
|
end
|
205
235
|
|
206
|
-
private
|
207
|
-
|
208
|
-
def load_cookies!
|
209
|
-
agent.cookie_jar.load @cookie_file if File.exists? @cookie_file
|
210
|
-
end
|
211
|
-
|
212
|
-
def save_cookies!
|
213
|
-
agent.cookie_jar.save_as @cookie_file
|
214
|
-
end
|
215
|
-
|
216
236
|
end
|
217
237
|
|
@@ -48,10 +48,15 @@ class Browser
|
|
48
48
|
|
49
49
|
alias_method :size, :count
|
50
50
|
|
51
|
+
def valid_page?(page)
|
52
|
+
[:body, :content_type, :uri].all?{|m| page.respond_to? m }
|
53
|
+
end
|
54
|
+
|
55
|
+
|
51
56
|
def put(page, original_url=nil, options={})
|
52
57
|
dmsg [:put, original_url]
|
53
58
|
|
54
|
-
raise "Invalid page" unless
|
59
|
+
raise "Invalid page" unless valid_page?(page)
|
55
60
|
|
56
61
|
url = page.uri.to_s
|
57
62
|
|
@@ -98,13 +103,25 @@ class Browser
|
|
98
103
|
#body = compressed_body
|
99
104
|
body = Zlib::Inflate.inflate(compressed_body)
|
100
105
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
106
|
+
if content_type =~ /^(text\/html)|(application\/xhtml\+xml)/i
|
107
|
+
Mechanize::Page.new(
|
108
|
+
#initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
|
109
|
+
URI.parse(url),
|
110
|
+
{'content-type'=>content_type},
|
111
|
+
body,
|
112
|
+
nil,
|
113
|
+
agent
|
114
|
+
)
|
115
|
+
else
|
116
|
+
Mechanize::File.new(
|
117
|
+
#initialize(uri=nil, response=nil, body=nil, code=nil
|
118
|
+
URI.parse(url),
|
119
|
+
{'content-type'=>content_type},
|
120
|
+
body,
|
121
|
+
nil
|
122
|
+
)
|
123
|
+
end
|
124
|
+
|
108
125
|
end
|
109
126
|
end
|
110
127
|
|
File without changes
|
data/spec/browser_spec.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'pp'
|
1
2
|
require 'epitools/browser'
|
2
3
|
|
3
4
|
class Mechanize::Page
|
@@ -7,7 +8,6 @@ class Mechanize::Page
|
|
7
8
|
end
|
8
9
|
|
9
10
|
|
10
|
-
|
11
11
|
describe Browser do
|
12
12
|
|
13
13
|
before :all do
|
@@ -18,6 +18,12 @@ describe Browser do
|
|
18
18
|
@browser.cache.delete!
|
19
19
|
end
|
20
20
|
|
21
|
+
it "caches javascript" do
|
22
|
+
url = "http://code.jquery.com/jquery-1.0.pack.js"
|
23
|
+
page = @browser.get(url)
|
24
|
+
@browser.cache.get(url).should_not == nil
|
25
|
+
end
|
26
|
+
|
21
27
|
it "googles" do
|
22
28
|
page = @browser.get("http://google.com")
|
23
29
|
page.body["Feeling Lucky"].should_not be_empty
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epitools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 77
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 4
|
9
|
-
-
|
10
|
-
version: 0.4.
|
9
|
+
- 33
|
10
|
+
version: 0.4.33
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- epitron
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-05-
|
18
|
+
date: 2011-05-16 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: rspec
|