epitools 0.4.32 → 0.4.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.32
1
+ 0.4.33
data/epitools.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{epitools}
8
- s.version = "0.4.32"
8
+ s.version = "0.4.33"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["epitron"]
12
- s.date = %q{2011-05-15}
12
+ s.date = %q{2011-05-16}
13
13
  s.description = %q{Miscellaneous utility libraries to make my life easier.}
14
14
  s.email = %q{chris@ill-logic.com}
15
15
  s.extra_rdoc_files = [
@@ -194,6 +194,14 @@ class String
194
194
  def endswith(substring)
195
195
  self[-substring.size..-1] == substring
196
196
  end
197
+
198
+ #
199
+ # Parse object as JSON
200
+ #
201
+ def from_json
202
+ require 'json' unless defined? JSON
203
+ JSON.parse self
204
+ end
197
205
 
198
206
  end
199
207
 
@@ -1,7 +1,7 @@
1
-
2
1
  require 'mechanize'
3
2
  require 'uri'
4
3
  require 'fileutils'
4
+ require 'json'
5
5
 
6
6
  require 'epitools'
7
7
  require 'epitools/browser/cache'
@@ -39,6 +39,14 @@ class BrowserOptions < OpenStruct
39
39
  end
40
40
  =end
41
41
 
42
+ # Monkeypatches!
43
+ class Mechanize::File
44
+ def content_type
45
+ response['content-type']
46
+ end
47
+ end
48
+
49
+
42
50
  #
43
51
  # A mechanize class that emulates a web-browser, with cache and everything.
44
52
  # Progress bars are enabled by default.
@@ -74,7 +82,7 @@ class Browser
74
82
  init_agent!
75
83
  init_cache!
76
84
  end
77
-
85
+
78
86
 
79
87
  def init_agent!
80
88
  @agent = Mechanize.new do |a|
@@ -106,18 +114,46 @@ class Browser
106
114
  end
107
115
 
108
116
 
117
+ def load_cookies!
118
+ if File.exists? @cookie_file
119
+ agent.cookie_jar.load @cookie_file
120
+ true
121
+ else
122
+ false
123
+ end
124
+ end
125
+
126
+
127
+ def save_cookies!
128
+ agent.cookie_jar.save_as @cookie_file
129
+ true
130
+ end
131
+
132
+
133
+
109
134
  def relative?(url)
110
135
  not url[ %r{^https?://} ]
111
136
  end
112
137
 
138
+
139
+ def cacheable?(page)
140
+ case page.content_type
141
+ when %r{^(text|application)}
142
+ true
143
+ end
144
+ end
113
145
 
146
+
114
147
  def cache_put(page, url)
115
- if page.is_a? Mechanize::Page and page.content_type =~ %r{^text/}
116
- puts " |_ writing to cache"
117
- cache.put(page, url, :overwrite=>true)
148
+ if cache.valid_page?(page)
149
+ if page.content_type =~ %r{(^text/|^application/javascript|javascript)}
150
+ puts " |_ writing to cache"
151
+ cache.put(page, url, :overwrite=>true)
152
+ end
118
153
  end
119
154
  end
120
155
 
156
+
121
157
  #
122
158
  # Retrieve an URL, and return a Mechanize::Page instance (which acts a
123
159
  # bit like a Nokogiri::HTML::Document instance.)
@@ -136,9 +172,7 @@ class Browser
136
172
  #end
137
173
 
138
174
  # Determine the cache setting
139
-
140
-
141
- use_cache = options[:use_cache] || @use_cache
175
+ use_cache = options[:use_cache] || options[:cache] || options[:cached] || @use_cache
142
176
 
143
177
  cached_already = cache.include?(url)
144
178
 
@@ -149,20 +183,14 @@ class Browser
149
183
 
150
184
  begin
151
185
 
152
- if cached_already
153
- page = cache.get(url)
154
- if page.nil?
155
- puts " |_ CACHE FAIL! Re-getting page."
156
- page = get(url, false)
157
- end
186
+ if page = cache.get(url)
158
187
  puts " |_ cached (#{page.content_type})"
159
188
  else
160
- page = agent.get url
189
+ page = agent.get(url)
161
190
  @last_get = Time.now
191
+ cache_put(page, url)
162
192
  end
163
193
 
164
- cache_put(page, url) unless cached_already
165
-
166
194
  puts
167
195
 
168
196
  rescue Net::HTTPBadResponse, Errno::ECONNRESET, SocketError, Timeout::Error, SOCKSError => e
@@ -196,22 +224,14 @@ class Browser
196
224
  end
197
225
 
198
226
 
199
- # Delegation
200
- [:head, :post, :put].each do |meth|
227
+ #
228
+ # Delegate certain methods to @agent
229
+ #
230
+ [:head, :post, :put, :submit].each do |meth|
201
231
  define_method meth do |*args|
202
232
  agent.send(meth, *args)
203
233
  end
204
234
  end
205
235
 
206
- private
207
-
208
- def load_cookies!
209
- agent.cookie_jar.load @cookie_file if File.exists? @cookie_file
210
- end
211
-
212
- def save_cookies!
213
- agent.cookie_jar.save_as @cookie_file
214
- end
215
-
216
236
  end
217
237
 
@@ -48,10 +48,15 @@ class Browser
48
48
 
49
49
  alias_method :size, :count
50
50
 
51
+ def valid_page?(page)
52
+ [:body, :content_type, :uri].all?{|m| page.respond_to? m }
53
+ end
54
+
55
+
51
56
  def put(page, original_url=nil, options={})
52
57
  dmsg [:put, original_url]
53
58
 
54
- raise "Invalid page" unless [:body, :content_type, :uri].all?{|m| page.respond_to? m }
59
+ raise "Invalid page" unless valid_page?(page)
55
60
 
56
61
  url = page.uri.to_s
57
62
 
@@ -98,13 +103,25 @@ class Browser
98
103
  #body = compressed_body
99
104
  body = Zlib::Inflate.inflate(compressed_body)
100
105
 
101
- Mechanize::Page.new(
102
- URI.parse(url),
103
- {'content-type'=>content_type},
104
- body,
105
- nil,
106
- agent
107
- )
106
+ if content_type =~ /^(text\/html)|(application\/xhtml\+xml)/i
107
+ Mechanize::Page.new(
108
+ #initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
109
+ URI.parse(url),
110
+ {'content-type'=>content_type},
111
+ body,
112
+ nil,
113
+ agent
114
+ )
115
+ else
116
+ Mechanize::File.new(
117
+ #initialize(uri=nil, response=nil, body=nil, code=nil
118
+ URI.parse(url),
119
+ {'content-type'=>content_type},
120
+ body,
121
+ nil
122
+ )
123
+ end
124
+
108
125
  end
109
126
  end
110
127
 
File without changes
data/spec/browser_spec.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'pp'
1
2
  require 'epitools/browser'
2
3
 
3
4
  class Mechanize::Page
@@ -7,7 +8,6 @@ class Mechanize::Page
7
8
  end
8
9
 
9
10
 
10
-
11
11
  describe Browser do
12
12
 
13
13
  before :all do
@@ -18,6 +18,12 @@ describe Browser do
18
18
  @browser.cache.delete!
19
19
  end
20
20
 
21
+ it "caches javascript" do
22
+ url = "http://code.jquery.com/jquery-1.0.pack.js"
23
+ page = @browser.get(url)
24
+ @browser.cache.get(url).should_not == nil
25
+ end
26
+
21
27
  it "googles" do
22
28
  page = @browser.get("http://google.com")
23
29
  page.body["Feeling Lucky"].should_not be_empty
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: epitools
3
3
  version: !ruby/object:Gem::Version
4
- hash: 79
4
+ hash: 77
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 4
9
- - 32
10
- version: 0.4.32
9
+ - 33
10
+ version: 0.4.33
11
11
  platform: ruby
12
12
  authors:
13
13
  - epitron
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-05-15 00:00:00 Z
18
+ date: 2011-05-16 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: rspec