siefca-httpage 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/httpage/httpage.rb +49 -22
  2. metadata +1 -1
@@ -9,16 +9,19 @@ class HTTPage
9
9
  attr_affects_buffers :url, :encoding
10
10
 
11
11
  attr_accessor :redir_retry, :conn_retry, :timeout, :url
12
- attr_writer :encoding
12
+ attr_reader :real_url
13
+ attr_writer :encoding
13
14
 
14
15
  def initialize(url,redir_retry=5,conn_retry=8,timeout=40)
15
- @encoding = nil
16
- @response = nil
17
- @http_req = nil
18
- @redir_retry = redir_retry
19
- @conn_retry = conn_retry
20
- @timeout = timeout
21
- self.url = url
16
+ @encoding = nil
17
+ @content_type = nil
18
+ @response = nil
19
+ @http_req = nil
20
+ @redir_retry = redir_retry
21
+ @conn_retry = conn_retry
22
+ @timeout = timeout
23
+ @real_url = nil
24
+ self.url = url
22
25
  end
23
26
 
24
27
  # Resets encoding and response buffers.
@@ -39,31 +42,42 @@ class HTTPage
39
42
  # Returns page encoding.
40
43
 
41
44
  def encoding
42
- @encoding ||= get_page_encoding
45
+ @encoding, @content_type = get_page_info if @encoding.nil?
46
+ return @encoding
47
+ end
48
+
49
+ # Returns page content-type.
50
+
51
+ def content_type
52
+ @encoding, @content_type = get_page_info if @content_type.nil?
53
+ return @content_type
43
54
  end
44
55
 
45
56
  # Obtains encoding from document body or server response header.
46
57
 
47
- def get_page_encoding(default_encoding='ascii')
48
- return default_encoding if self.response.nil?
58
+ def get_page_info(default_encoding='ascii', default_content_type='text/html')
59
+ return [default_content_type, default_encoding] if self.response.nil?
49
60
 
50
61
  # try meta-tag header
51
62
  header = self.response.body.scan(/<meta http-equiv\s*=\s*['"]*content-type['"]*\s*content\s*=\s*['"]*\s*(.*?)\s*['"]*\s*\/?>/i)
52
63
  header = header.flatten.first
53
64
  enc = extract_encoding(header)
65
+ ctype = extract_content_type(header)
54
66
 
55
67
  # try server header
56
- if enc.nil?
68
+ if ctype.nil?
57
69
  header = response.header['content-type']
70
+ ctype = extract_content_type(header)
58
71
  enc = extract_encoding(header)
59
72
  end
60
-
73
+
61
74
  # try default
62
- enc = default_encoding if enc.nil?
75
+ enc = default_encoding if enc.nil?
76
+ ctype = default_content_type if ctype.nil?
63
77
 
64
- return enc
78
+ return [ctype, enc]
65
79
  end
66
- private :get_page_encoding
80
+ private :get_page_info
67
81
 
68
82
  # Extracts enconding from content-type string.
69
83
 
@@ -92,16 +106,28 @@ class HTTPage
92
106
  end
93
107
  private :extract_encoding
94
108
 
109
+ # Extracts content-type from content-type string.
110
+
111
+ def extract_content_type(ctype_string)
112
+ return nil if ctype_string.nil? || ctype_string.empty?
113
+ ct = ctype_string.chomp.squeeze(' ').split(';').first
114
+ ct = ct.strip.downcase.to_sym unless ct.nil?
115
+ return ct
116
+ end
117
+ private :extract_content_type
118
+
119
+
95
120
  # Fetches document using HTTP and returns response object. It also sets encoding.
96
121
 
97
122
  def response
98
123
  return @response unless @response.nil?
99
- found = false
100
- response = nil
101
- url = @url
102
- http_req = @http_req
124
+ url = @url
125
+ found = false
126
+ response = nil
127
+ @real_url = nil
128
+ http_req = @http_req
103
129
  redir_retry = @redir_retry
104
- conn_retry = @conn_retry
130
+ conn_retry = @conn_retry
105
131
 
106
132
  until found do
107
133
  begin
@@ -136,8 +162,9 @@ class HTTPage
136
162
  break if (redir_retry < 0 || conn_retry < 0)
137
163
  end
138
164
  if found
165
+ @real_url = url
139
166
  @response = response
140
- @encoding = get_page_encoding
167
+ @encoding, @content_type = get_page_info
141
168
  return response
142
169
  else
143
170
  return nil
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: siefca-httpage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Pawe\xC5\x82 Wilk"