siefca-httpage 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/httpage/httpage.rb +49 -22
- metadata +1 -1
data/lib/httpage/httpage.rb
CHANGED
@@ -9,16 +9,19 @@ class HTTPage
|
|
9
9
|
attr_affects_buffers :url, :encoding
|
10
10
|
|
11
11
|
attr_accessor :redir_retry, :conn_retry, :timeout, :url
|
12
|
-
|
12
|
+
attr_reader :real_url
|
13
|
+
attr_writer :encoding
|
13
14
|
|
14
15
|
def initialize(url,redir_retry=5,conn_retry=8,timeout=40)
|
15
|
-
@encoding
|
16
|
-
@
|
17
|
-
@
|
18
|
-
@
|
19
|
-
@
|
20
|
-
@
|
21
|
-
|
16
|
+
@encoding = nil
|
17
|
+
@content_type = nil
|
18
|
+
@response = nil
|
19
|
+
@http_req = nil
|
20
|
+
@redir_retry = redir_retry
|
21
|
+
@conn_retry = conn_retry
|
22
|
+
@timeout = timeout
|
23
|
+
@real_url = nil
|
24
|
+
self.url = url
|
22
25
|
end
|
23
26
|
|
24
27
|
# Resets encoding and response buffers.
|
@@ -39,31 +42,42 @@ class HTTPage
|
|
39
42
|
# Returns page encoding.
|
40
43
|
|
41
44
|
def encoding
|
42
|
-
@encoding
|
45
|
+
@encoding, @content_type = get_page_info if @encoding.nil?
|
46
|
+
return @encoding
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns page content-type.
|
50
|
+
|
51
|
+
def content_type
|
52
|
+
@encoding, @content_type = get_page_info if @content_type.nil?
|
53
|
+
return @content_type
|
43
54
|
end
|
44
55
|
|
45
56
|
# Obtains encoding from document body or server response header.
|
46
57
|
|
47
|
-
def
|
48
|
-
return default_encoding if self.response.nil?
|
58
|
+
def get_page_info(default_encoding='ascii', default_content_type='text/html')
|
59
|
+
return [default_content_type, default_encoding] if self.response.nil?
|
49
60
|
|
50
61
|
# try meta-tag header
|
51
62
|
header = self.response.body.scan(/<meta http-equiv\s*=\s*['"]*content-type['"]*\s*content\s*=\s*['"]*\s*(.*?)\s*['"]*\s*\/?>/i)
|
52
63
|
header = header.flatten.first
|
53
64
|
enc = extract_encoding(header)
|
65
|
+
ctype = extract_content_type(header)
|
54
66
|
|
55
67
|
# try server header
|
56
|
-
if
|
68
|
+
if ctype.nil?
|
57
69
|
header = response.header['content-type']
|
70
|
+
ctype = extract_content_type(header)
|
58
71
|
enc = extract_encoding(header)
|
59
72
|
end
|
60
|
-
|
73
|
+
|
61
74
|
# try default
|
62
|
-
enc
|
75
|
+
enc = default_encoding if enc.nil?
|
76
|
+
ctype = default_content_type if ctype.nil?
|
63
77
|
|
64
|
-
return enc
|
78
|
+
return [ctype, enc]
|
65
79
|
end
|
66
|
-
private :
|
80
|
+
private :get_page_info
|
67
81
|
|
68
82
|
# Extracts enconding from content-type string.
|
69
83
|
|
@@ -92,16 +106,28 @@ class HTTPage
|
|
92
106
|
end
|
93
107
|
private :extract_encoding
|
94
108
|
|
109
|
+
# Extracts content-type from content-type string.
|
110
|
+
|
111
|
+
def extract_content_type(ctype_string)
|
112
|
+
return nil if ctype_string.nil? || ctype_string.empty?
|
113
|
+
ct = ctype_string.chomp.squeeze(' ').split(';').first
|
114
|
+
ct = ct.strip.downcase.to_sym unless ct.nil?
|
115
|
+
return ct
|
116
|
+
end
|
117
|
+
private :extract_content_type
|
118
|
+
|
119
|
+
|
95
120
|
# Fetches document using HTTP and returns response object. It also sets encoding.
|
96
121
|
|
97
122
|
def response
|
98
123
|
return @response unless @response.nil?
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
124
|
+
url = @url
|
125
|
+
found = false
|
126
|
+
response = nil
|
127
|
+
@real_url = nil
|
128
|
+
http_req = @http_req
|
103
129
|
redir_retry = @redir_retry
|
104
|
-
conn_retry
|
130
|
+
conn_retry = @conn_retry
|
105
131
|
|
106
132
|
until found do
|
107
133
|
begin
|
@@ -136,8 +162,9 @@ class HTTPage
|
|
136
162
|
break if (redir_retry < 0 || conn_retry < 0)
|
137
163
|
end
|
138
164
|
if found
|
165
|
+
@real_url = url
|
139
166
|
@response = response
|
140
|
-
@encoding =
|
167
|
+
@encoding, @content_type = get_page_info
|
141
168
|
return response
|
142
169
|
else
|
143
170
|
return nil
|