epitools 0.4.32 → 0.4.33

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.32
1
+ 0.4.33
data/epitools.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{epitools}
8
- s.version = "0.4.32"
8
+ s.version = "0.4.33"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["epitron"]
12
- s.date = %q{2011-05-15}
12
+ s.date = %q{2011-05-16}
13
13
  s.description = %q{Miscellaneous utility libraries to make my life easier.}
14
14
  s.email = %q{chris@ill-logic.com}
15
15
  s.extra_rdoc_files = [
@@ -194,6 +194,14 @@ class String
194
194
  def endswith(substring)
195
195
  self[-substring.size..-1] == substring
196
196
  end
197
+
198
+ #
199
+ # Parse object as JSON
200
+ #
201
+ def from_json
202
+ require 'json' unless defined? JSON
203
+ JSON.parse self
204
+ end
197
205
 
198
206
  end
199
207
 
@@ -1,7 +1,7 @@
1
-
2
1
  require 'mechanize'
3
2
  require 'uri'
4
3
  require 'fileutils'
4
+ require 'json'
5
5
 
6
6
  require 'epitools'
7
7
  require 'epitools/browser/cache'
@@ -39,6 +39,14 @@ class BrowserOptions < OpenStruct
39
39
  end
40
40
  =end
41
41
 
42
+ # Monkeypatches!
43
+ class Mechanize::File
44
+ def content_type
45
+ response['content-type']
46
+ end
47
+ end
48
+
49
+
42
50
  #
43
51
  # A mechanize class that emulates a web-browser, with cache and everything.
44
52
  # Progress bars are enabled by default.
@@ -74,7 +82,7 @@ class Browser
74
82
  init_agent!
75
83
  init_cache!
76
84
  end
77
-
85
+
78
86
 
79
87
  def init_agent!
80
88
  @agent = Mechanize.new do |a|
@@ -106,18 +114,46 @@ class Browser
106
114
  end
107
115
 
108
116
 
117
+ def load_cookies!
118
+ if File.exists? @cookie_file
119
+ agent.cookie_jar.load @cookie_file
120
+ true
121
+ else
122
+ false
123
+ end
124
+ end
125
+
126
+
127
+ def save_cookies!
128
+ agent.cookie_jar.save_as @cookie_file
129
+ true
130
+ end
131
+
132
+
133
+
109
134
  def relative?(url)
110
135
  not url[ %r{^https?://} ]
111
136
  end
112
137
 
138
+
139
+ def cacheable?(page)
140
+ case page.content_type
141
+ when %r{^(text|application)}
142
+ true
143
+ end
144
+ end
113
145
 
146
+
114
147
  def cache_put(page, url)
115
- if page.is_a? Mechanize::Page and page.content_type =~ %r{^text/}
116
- puts " |_ writing to cache"
117
- cache.put(page, url, :overwrite=>true)
148
+ if cache.valid_page?(page)
149
+ if page.content_type =~ %r{(^text/|^application/javascript|javascript)}
150
+ puts " |_ writing to cache"
151
+ cache.put(page, url, :overwrite=>true)
152
+ end
118
153
  end
119
154
  end
120
155
 
156
+
121
157
  #
122
158
  # Retrieve an URL, and return a Mechanize::Page instance (which acts a
123
159
  # bit like a Nokogiri::HTML::Document instance.)
@@ -136,9 +172,7 @@ class Browser
136
172
  #end
137
173
 
138
174
  # Determine the cache setting
139
-
140
-
141
- use_cache = options[:use_cache] || @use_cache
175
+ use_cache = options[:use_cache] || options[:cache] || options[:cached] || @use_cache
142
176
 
143
177
  cached_already = cache.include?(url)
144
178
 
@@ -149,20 +183,14 @@ class Browser
149
183
 
150
184
  begin
151
185
 
152
- if cached_already
153
- page = cache.get(url)
154
- if page.nil?
155
- puts " |_ CACHE FAIL! Re-getting page."
156
- page = get(url, false)
157
- end
186
+ if page = cache.get(url)
158
187
  puts " |_ cached (#{page.content_type})"
159
188
  else
160
- page = agent.get url
189
+ page = agent.get(url)
161
190
  @last_get = Time.now
191
+ cache_put(page, url)
162
192
  end
163
193
 
164
- cache_put(page, url) unless cached_already
165
-
166
194
  puts
167
195
 
168
196
  rescue Net::HTTPBadResponse, Errno::ECONNRESET, SocketError, Timeout::Error, SOCKSError => e
@@ -196,22 +224,14 @@ class Browser
196
224
  end
197
225
 
198
226
 
199
- # Delegation
200
- [:head, :post, :put].each do |meth|
227
+ #
228
+ # Delegate certain methods to @agent
229
+ #
230
+ [:head, :post, :put, :submit].each do |meth|
201
231
  define_method meth do |*args|
202
232
  agent.send(meth, *args)
203
233
  end
204
234
  end
205
235
 
206
- private
207
-
208
- def load_cookies!
209
- agent.cookie_jar.load @cookie_file if File.exists? @cookie_file
210
- end
211
-
212
- def save_cookies!
213
- agent.cookie_jar.save_as @cookie_file
214
- end
215
-
216
236
  end
217
237
 
@@ -48,10 +48,15 @@ class Browser
48
48
 
49
49
  alias_method :size, :count
50
50
 
51
+ def valid_page?(page)
52
+ [:body, :content_type, :uri].all?{|m| page.respond_to? m }
53
+ end
54
+
55
+
51
56
  def put(page, original_url=nil, options={})
52
57
  dmsg [:put, original_url]
53
58
 
54
- raise "Invalid page" unless [:body, :content_type, :uri].all?{|m| page.respond_to? m }
59
+ raise "Invalid page" unless valid_page?(page)
55
60
 
56
61
  url = page.uri.to_s
57
62
 
@@ -98,13 +103,25 @@ class Browser
98
103
  #body = compressed_body
99
104
  body = Zlib::Inflate.inflate(compressed_body)
100
105
 
101
- Mechanize::Page.new(
102
- URI.parse(url),
103
- {'content-type'=>content_type},
104
- body,
105
- nil,
106
- agent
107
- )
106
+ if content_type =~ /^(text\/html)|(application\/xhtml\+xml)/i
107
+ Mechanize::Page.new(
108
+ #initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
109
+ URI.parse(url),
110
+ {'content-type'=>content_type},
111
+ body,
112
+ nil,
113
+ agent
114
+ )
115
+ else
116
+ Mechanize::File.new(
117
+ #initialize(uri=nil, response=nil, body=nil, code=nil
118
+ URI.parse(url),
119
+ {'content-type'=>content_type},
120
+ body,
121
+ nil
122
+ )
123
+ end
124
+
108
125
  end
109
126
  end
110
127
 
File without changes
data/spec/browser_spec.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'pp'
1
2
  require 'epitools/browser'
2
3
 
3
4
  class Mechanize::Page
@@ -7,7 +8,6 @@ class Mechanize::Page
7
8
  end
8
9
 
9
10
 
10
-
11
11
  describe Browser do
12
12
 
13
13
  before :all do
@@ -18,6 +18,12 @@ describe Browser do
18
18
  @browser.cache.delete!
19
19
  end
20
20
 
21
+ it "caches javascript" do
22
+ url = "http://code.jquery.com/jquery-1.0.pack.js"
23
+ page = @browser.get(url)
24
+ @browser.cache.get(url).should_not == nil
25
+ end
26
+
21
27
  it "googles" do
22
28
  page = @browser.get("http://google.com")
23
29
  page.body["Feeling Lucky"].should_not be_empty
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: epitools
3
3
  version: !ruby/object:Gem::Version
4
- hash: 79
4
+ hash: 77
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 4
9
- - 32
10
- version: 0.4.32
9
+ - 33
10
+ version: 0.4.33
11
11
  platform: ruby
12
12
  authors:
13
13
  - epitron
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-05-15 00:00:00 Z
18
+ date: 2011-05-16 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: rspec