epitools 0.4.32 → 0.4.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/epitools.gemspec +2 -2
- data/lib/epitools/basetypes.rb +8 -0
- data/lib/epitools/browser.rb +49 -29
- data/lib/epitools/browser/cache.rb +25 -8
- data/lib/epitools/browser/mechanize_progressbar.rb +0 -0
- data/spec/browser_spec.rb +7 -1
- metadata +4 -4
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.4.
|
|
1
|
+
0.4.33
|
data/epitools.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{epitools}
|
|
8
|
-
s.version = "0.4.
|
|
8
|
+
s.version = "0.4.33"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["epitron"]
|
|
12
|
-
s.date = %q{2011-05-
|
|
12
|
+
s.date = %q{2011-05-16}
|
|
13
13
|
s.description = %q{Miscellaneous utility libraries to make my life easier.}
|
|
14
14
|
s.email = %q{chris@ill-logic.com}
|
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/epitools/basetypes.rb
CHANGED
data/lib/epitools/browser.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
|
|
2
1
|
require 'mechanize'
|
|
3
2
|
require 'uri'
|
|
4
3
|
require 'fileutils'
|
|
4
|
+
require 'json'
|
|
5
5
|
|
|
6
6
|
require 'epitools'
|
|
7
7
|
require 'epitools/browser/cache'
|
|
@@ -39,6 +39,14 @@ class BrowserOptions < OpenStruct
|
|
|
39
39
|
end
|
|
40
40
|
=end
|
|
41
41
|
|
|
42
|
+
# Monkeypatches!
|
|
43
|
+
class Mechanize::File
|
|
44
|
+
def content_type
|
|
45
|
+
response['content-type']
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
|
|
42
50
|
#
|
|
43
51
|
# A mechanize class that emulates a web-browser, with cache and everything.
|
|
44
52
|
# Progress bars are enabled by default.
|
|
@@ -74,7 +82,7 @@ class Browser
|
|
|
74
82
|
init_agent!
|
|
75
83
|
init_cache!
|
|
76
84
|
end
|
|
77
|
-
|
|
85
|
+
|
|
78
86
|
|
|
79
87
|
def init_agent!
|
|
80
88
|
@agent = Mechanize.new do |a|
|
|
@@ -106,18 +114,46 @@ class Browser
|
|
|
106
114
|
end
|
|
107
115
|
|
|
108
116
|
|
|
117
|
+
def load_cookies!
|
|
118
|
+
if File.exists? @cookie_file
|
|
119
|
+
agent.cookie_jar.load @cookie_file
|
|
120
|
+
true
|
|
121
|
+
else
|
|
122
|
+
false
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def save_cookies!
|
|
128
|
+
agent.cookie_jar.save_as @cookie_file
|
|
129
|
+
true
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
|
|
109
134
|
def relative?(url)
|
|
110
135
|
not url[ %r{^https?://} ]
|
|
111
136
|
end
|
|
112
137
|
|
|
138
|
+
|
|
139
|
+
def cacheable?(page)
|
|
140
|
+
case page.content_type
|
|
141
|
+
when %r{^(text|application)}
|
|
142
|
+
true
|
|
143
|
+
end
|
|
144
|
+
end
|
|
113
145
|
|
|
146
|
+
|
|
114
147
|
def cache_put(page, url)
|
|
115
|
-
if
|
|
116
|
-
|
|
117
|
-
|
|
148
|
+
if cache.valid_page?(page)
|
|
149
|
+
if page.content_type =~ %r{(^text/|^application/javascript|javascript)}
|
|
150
|
+
puts " |_ writing to cache"
|
|
151
|
+
cache.put(page, url, :overwrite=>true)
|
|
152
|
+
end
|
|
118
153
|
end
|
|
119
154
|
end
|
|
120
155
|
|
|
156
|
+
|
|
121
157
|
#
|
|
122
158
|
# Retrieve an URL, and return a Mechanize::Page instance (which acts a
|
|
123
159
|
# bit like a Nokogiri::HTML::Document instance.)
|
|
@@ -136,9 +172,7 @@ class Browser
|
|
|
136
172
|
#end
|
|
137
173
|
|
|
138
174
|
# Determine the cache setting
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
use_cache = options[:use_cache] || @use_cache
|
|
175
|
+
use_cache = options[:use_cache] || options[:cache] || options[:cached] || @use_cache
|
|
142
176
|
|
|
143
177
|
cached_already = cache.include?(url)
|
|
144
178
|
|
|
@@ -149,20 +183,14 @@ class Browser
|
|
|
149
183
|
|
|
150
184
|
begin
|
|
151
185
|
|
|
152
|
-
if
|
|
153
|
-
page = cache.get(url)
|
|
154
|
-
if page.nil?
|
|
155
|
-
puts " |_ CACHE FAIL! Re-getting page."
|
|
156
|
-
page = get(url, false)
|
|
157
|
-
end
|
|
186
|
+
if page = cache.get(url)
|
|
158
187
|
puts " |_ cached (#{page.content_type})"
|
|
159
188
|
else
|
|
160
|
-
page = agent.get
|
|
189
|
+
page = agent.get(url)
|
|
161
190
|
@last_get = Time.now
|
|
191
|
+
cache_put(page, url)
|
|
162
192
|
end
|
|
163
193
|
|
|
164
|
-
cache_put(page, url) unless cached_already
|
|
165
|
-
|
|
166
194
|
puts
|
|
167
195
|
|
|
168
196
|
rescue Net::HTTPBadResponse, Errno::ECONNRESET, SocketError, Timeout::Error, SOCKSError => e
|
|
@@ -196,22 +224,14 @@ class Browser
|
|
|
196
224
|
end
|
|
197
225
|
|
|
198
226
|
|
|
199
|
-
#
|
|
200
|
-
|
|
227
|
+
#
|
|
228
|
+
# Delegate certain methods to @agent
|
|
229
|
+
#
|
|
230
|
+
[:head, :post, :put, :submit].each do |meth|
|
|
201
231
|
define_method meth do |*args|
|
|
202
232
|
agent.send(meth, *args)
|
|
203
233
|
end
|
|
204
234
|
end
|
|
205
235
|
|
|
206
|
-
private
|
|
207
|
-
|
|
208
|
-
def load_cookies!
|
|
209
|
-
agent.cookie_jar.load @cookie_file if File.exists? @cookie_file
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
def save_cookies!
|
|
213
|
-
agent.cookie_jar.save_as @cookie_file
|
|
214
|
-
end
|
|
215
|
-
|
|
216
236
|
end
|
|
217
237
|
|
|
@@ -48,10 +48,15 @@ class Browser
|
|
|
48
48
|
|
|
49
49
|
alias_method :size, :count
|
|
50
50
|
|
|
51
|
+
def valid_page?(page)
|
|
52
|
+
[:body, :content_type, :uri].all?{|m| page.respond_to? m }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
|
|
51
56
|
def put(page, original_url=nil, options={})
|
|
52
57
|
dmsg [:put, original_url]
|
|
53
58
|
|
|
54
|
-
raise "Invalid page" unless
|
|
59
|
+
raise "Invalid page" unless valid_page?(page)
|
|
55
60
|
|
|
56
61
|
url = page.uri.to_s
|
|
57
62
|
|
|
@@ -98,13 +103,25 @@ class Browser
|
|
|
98
103
|
#body = compressed_body
|
|
99
104
|
body = Zlib::Inflate.inflate(compressed_body)
|
|
100
105
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
if content_type =~ /^(text\/html)|(application\/xhtml\+xml)/i
|
|
107
|
+
Mechanize::Page.new(
|
|
108
|
+
#initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
|
|
109
|
+
URI.parse(url),
|
|
110
|
+
{'content-type'=>content_type},
|
|
111
|
+
body,
|
|
112
|
+
nil,
|
|
113
|
+
agent
|
|
114
|
+
)
|
|
115
|
+
else
|
|
116
|
+
Mechanize::File.new(
|
|
117
|
+
#initialize(uri=nil, response=nil, body=nil, code=nil
|
|
118
|
+
URI.parse(url),
|
|
119
|
+
{'content-type'=>content_type},
|
|
120
|
+
body,
|
|
121
|
+
nil
|
|
122
|
+
)
|
|
123
|
+
end
|
|
124
|
+
|
|
108
125
|
end
|
|
109
126
|
end
|
|
110
127
|
|
|
File without changes
|
data/spec/browser_spec.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
require 'pp'
|
|
1
2
|
require 'epitools/browser'
|
|
2
3
|
|
|
3
4
|
class Mechanize::Page
|
|
@@ -7,7 +8,6 @@ class Mechanize::Page
|
|
|
7
8
|
end
|
|
8
9
|
|
|
9
10
|
|
|
10
|
-
|
|
11
11
|
describe Browser do
|
|
12
12
|
|
|
13
13
|
before :all do
|
|
@@ -18,6 +18,12 @@ describe Browser do
|
|
|
18
18
|
@browser.cache.delete!
|
|
19
19
|
end
|
|
20
20
|
|
|
21
|
+
it "caches javascript" do
|
|
22
|
+
url = "http://code.jquery.com/jquery-1.0.pack.js"
|
|
23
|
+
page = @browser.get(url)
|
|
24
|
+
@browser.cache.get(url).should_not == nil
|
|
25
|
+
end
|
|
26
|
+
|
|
21
27
|
it "googles" do
|
|
22
28
|
page = @browser.get("http://google.com")
|
|
23
29
|
page.body["Feeling Lucky"].should_not be_empty
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: epitools
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 77
|
|
5
5
|
prerelease:
|
|
6
6
|
segments:
|
|
7
7
|
- 0
|
|
8
8
|
- 4
|
|
9
|
-
-
|
|
10
|
-
version: 0.4.
|
|
9
|
+
- 33
|
|
10
|
+
version: 0.4.33
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- epitron
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2011-05-
|
|
18
|
+
date: 2011-05-16 00:00:00 Z
|
|
19
19
|
dependencies:
|
|
20
20
|
- !ruby/object:Gem::Dependency
|
|
21
21
|
name: rspec
|