damog-goodies 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/goodies/html.rb +1 -1
- data/lib/goodies/lwr-simple.rb +179 -0
- metadata +4 -3
- data/lib/goodies/lwr.rb +0 -13
data/lib/goodies/html.rb
CHANGED
@@ -2,7 +2,7 @@ require "rubygems"
|
|
2
2
|
require "hpricot"
|
3
3
|
|
4
4
|
# for 'get'
|
5
|
-
require "#{File.join(File.expand_path(File.dirname(__FILE__)), "lwr")}"
|
5
|
+
require "#{File.join(File.expand_path(File.dirname(__FILE__)), "lwr-simple")}"
|
6
6
|
|
7
7
|
# for 'has?'
|
8
8
|
require "#{File.join(File.expand_path(File.dirname(__FILE__)), "array")}"
|
@@ -0,0 +1,179 @@
|
|
1
|
+
# Little tribute to LWP::Simple
|
2
|
+
require "open-uri"
|
3
|
+
require "tempfile"
|
4
|
+
|
5
|
+
module LWR
|
6
|
+
class LWR::Simple
|
7
|
+
|
8
|
+
def self.normalize(url)
|
9
|
+
case url
|
10
|
+
when URI
|
11
|
+
when String
|
12
|
+
url = URI.parse(url)
|
13
|
+
else
|
14
|
+
raise ArgumentError, "URI or String expected, got: "
|
15
|
+
end
|
16
|
+
|
17
|
+
unless url.scheme
|
18
|
+
url = URI.parse("http://" << url.to_s)
|
19
|
+
end
|
20
|
+
|
21
|
+
if url.path.empty?
|
22
|
+
url.path = "/"
|
23
|
+
end
|
24
|
+
|
25
|
+
url
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
module Kernel
|
33
|
+
# get will fetch the document identified by the given URL and return it.
|
34
|
+
# The url argument can be either a simple string or a URI object.
|
35
|
+
#
|
36
|
+
# You will not be able to examine the response code or response headers
|
37
|
+
# like ('Content-Type') wgen you are accessing the web using this function.
|
38
|
+
# If you need that information you should use the full OO interface of Net::HTTP.
|
39
|
+
def get(uri)
|
40
|
+
url = LWR::Simple.normalize(uri)
|
41
|
+
|
42
|
+
begin
|
43
|
+
return open(url).read
|
44
|
+
rescue
|
45
|
+
nil
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Get document headers. Returns the following 5 values if successful:
|
50
|
+
# - content_type
|
51
|
+
# - document_length
|
52
|
+
# - modified_time
|
53
|
+
# - expires
|
54
|
+
# - server
|
55
|
+
#
|
56
|
+
# Returns an empty array if it fails.
|
57
|
+
def head(uri)
|
58
|
+
url = LWR::Simple.normalize(uri)
|
59
|
+
|
60
|
+
head = []
|
61
|
+
|
62
|
+
begin
|
63
|
+
Net::HTTP::start(url.host, url.port) do |c|
|
64
|
+
res = c.request_head(url.path)
|
65
|
+
head << res["content-type"]
|
66
|
+
head << res["content-length"]
|
67
|
+
head << res["last-modified"]
|
68
|
+
head << res["expires"]
|
69
|
+
head << res["server"]
|
70
|
+
end
|
71
|
+
ensure
|
72
|
+
return head
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Get and print a document identified by URL. If the request fails, then the
|
77
|
+
# status code and message are printed on $stderr. The return value is the
|
78
|
+
# HTTPResponse object. See also getputs.
|
79
|
+
def getprint(uri, limit = 10)
|
80
|
+
raise ArgumentError, 'HTTP redirect too deep' if limit == 0
|
81
|
+
|
82
|
+
url = LWR::Simple.normalize(uri)
|
83
|
+
|
84
|
+
begin
|
85
|
+
res = Net::HTTP.get_response(url)
|
86
|
+
rescue => ex
|
87
|
+
$stderr.puts "#{ex.class}: #{ex.message}"
|
88
|
+
else
|
89
|
+
case res
|
90
|
+
when Net::HTTPSuccess
|
91
|
+
puts res.body
|
92
|
+
when Net::HTTPRedirection
|
93
|
+
getprint(res["location"], limit - 1)
|
94
|
+
else
|
95
|
+
$stderr.puts "#{res.code} #{res.message}"
|
96
|
+
end
|
97
|
+
ensure
|
98
|
+
return res
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Gets a document identified by a URL and stores it in the file. The URL
|
103
|
+
# can either be a string or a URI object. File can be either a string or
|
104
|
+
# a File object. If a problem ocurrs while writing to a file, nil is
|
105
|
+
# returned. If it succeeds, the return value is the HTTPResponse object.
|
106
|
+
def getstore(uri, file)
|
107
|
+
res = nil
|
108
|
+
|
109
|
+
file = file.path if file.is_a? File
|
110
|
+
|
111
|
+
begin
|
112
|
+
File.open(file, "w") do |f|
|
113
|
+
res = f.getprint(uri) # this is why Ruby is so sweet
|
114
|
+
end
|
115
|
+
rescue
|
116
|
+
return nil
|
117
|
+
end
|
118
|
+
|
119
|
+
res
|
120
|
+
end
|
121
|
+
|
122
|
+
def mirror(uri, file)
|
123
|
+
url = LWR::Simple.normalize(uri)
|
124
|
+
req = Net::HTTP::Get.new(url.path)
|
125
|
+
|
126
|
+
case file
|
127
|
+
when String
|
128
|
+
if File.exists? file
|
129
|
+
file_exists = true
|
130
|
+
else
|
131
|
+
file_exists = false
|
132
|
+
end
|
133
|
+
file = File.open(file, "w")
|
134
|
+
when File
|
135
|
+
if File.exists? file.path
|
136
|
+
file_exists = true
|
137
|
+
else
|
138
|
+
file_exists = false
|
139
|
+
end
|
140
|
+
else
|
141
|
+
raise ArgumentError, "Failed arguments"
|
142
|
+
end
|
143
|
+
|
144
|
+
if file_exists
|
145
|
+
req.add_field("If-Modified-Since", file.mtime.httpdate)
|
146
|
+
end
|
147
|
+
|
148
|
+
res = Net::HTTP.new(url.host, url.port).start do |http|
|
149
|
+
http.request(req)
|
150
|
+
end
|
151
|
+
|
152
|
+
case res
|
153
|
+
when Net::HTTPSuccess
|
154
|
+
tmpfile = Tempfile.new("lwr")
|
155
|
+
tmpfile.print res.body
|
156
|
+
|
157
|
+
if res["content-length"] and tmpfile.size < res["content-length"].to_i
|
158
|
+
raise Exception, "Transfer truncated, only #{tmpfile.size} of #{res["content-length"]} bytes received"
|
159
|
+
File.unlink(tmpfile.path)
|
160
|
+
elsif res["content-length"] and tmpfile.size > res["content-length"].to_i
|
161
|
+
raise Exception, "Content-length mismatch, expected #{res["content-length"]} bytes, got #{tmpfile.size}"
|
162
|
+
File.unlink(tmpfile.path)
|
163
|
+
else # OK
|
164
|
+
File.unlink(file.path) if File.exists? file.path
|
165
|
+
File.rename(tmpfile.path, file.path)
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
else
|
170
|
+
end
|
171
|
+
|
172
|
+
res
|
173
|
+
|
174
|
+
end
|
175
|
+
|
176
|
+
alias getputs getprint
|
177
|
+
|
178
|
+
end
|
179
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: damog-goodies
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.3"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Moreno <david@axiombox.com>
|
@@ -9,11 +9,12 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-02-28 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: hpricot
|
17
|
+
type: :runtime
|
17
18
|
version_requirement:
|
18
19
|
version_requirements: !ruby/object:Gem::Requirement
|
19
20
|
requirements:
|
@@ -34,7 +35,7 @@ files:
|
|
34
35
|
- lib/goodies.rb
|
35
36
|
- lib/goodies/array.rb
|
36
37
|
- lib/goodies/html.rb
|
37
|
-
- lib/goodies/lwr.rb
|
38
|
+
- lib/goodies/lwr-simple.rb
|
38
39
|
has_rdoc: true
|
39
40
|
homepage: http://github.com/damog/goodies
|
40
41
|
post_install_message:
|