siefca-httpage 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/httpage.rb +3 -0
- data/lib/httpage/httpage.rb +14 -6
- metadata +1 -1
data/lib/httpage.rb
CHANGED
data/lib/httpage/httpage.rb
CHANGED
@@ -9,7 +9,6 @@ class HTTPage
|
|
9
9
|
attr_affects_buffers :url, :encoding
|
10
10
|
|
11
11
|
attr_accessor :redir_retry, :conn_retry, :timeout, :url
|
12
|
-
attr_reader :real_url
|
13
12
|
attr_writer :encoding
|
14
13
|
|
15
14
|
def initialize(url,redir_retry=5,conn_retry=8,timeout=40)
|
@@ -59,13 +58,17 @@ class HTTPage
|
|
59
58
|
return [default_content_type, default_encoding] if self.response.nil?
|
60
59
|
|
61
60
|
# try meta-tag header
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
61
|
+
enc = nil
|
62
|
+
ctype = nil
|
63
|
+
unless self.body.nil?
|
64
|
+
header = body.scan(/<meta http-equiv\s*=\s*['"]*content-type['"]*\s*content\s*=\s*['"]*\s*(.*?)\s*['"]*\s*\/?>/i)
|
65
|
+
header = header.flatten.first
|
66
|
+
enc = extract_encoding(header)
|
67
|
+
ctype = extract_content_type(header)
|
68
|
+
end
|
66
69
|
|
67
70
|
# try server header
|
68
|
-
if ctype.nil?
|
71
|
+
if ctype.nil? && response.respond_to?(:header)
|
69
72
|
header = response.header['content-type']
|
70
73
|
ctype = extract_content_type(header)
|
71
74
|
enc = extract_encoding(header)
|
@@ -178,6 +181,11 @@ class HTTPage
|
|
178
181
|
return r.respond_to?(:body) ? r.body : nil
|
179
182
|
end
|
180
183
|
|
184
|
+
def real_url
|
185
|
+
return nil if self.response.nil?
|
186
|
+
return @real_url
|
187
|
+
end
|
188
|
+
|
181
189
|
# Strips HTML tags from document.
|
182
190
|
|
183
191
|
def strip_html(text=nil)
|