siefca-httpage 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/httpage/httpage.rb +49 -22
  2. metadata +1 -1
@@ -9,16 +9,19 @@ class HTTPage
9
9
  attr_affects_buffers :url, :encoding
10
10
 
11
11
  attr_accessor :redir_retry, :conn_retry, :timeout, :url
12
- attr_writer :encoding
12
+ attr_reader :real_url
13
+ attr_writer :encoding
13
14
 
14
15
  def initialize(url,redir_retry=5,conn_retry=8,timeout=40)
15
- @encoding = nil
16
- @response = nil
17
- @http_req = nil
18
- @redir_retry = redir_retry
19
- @conn_retry = conn_retry
20
- @timeout = timeout
21
- self.url = url
16
+ @encoding = nil
17
+ @content_type = nil
18
+ @response = nil
19
+ @http_req = nil
20
+ @redir_retry = redir_retry
21
+ @conn_retry = conn_retry
22
+ @timeout = timeout
23
+ @real_url = nil
24
+ self.url = url
22
25
  end
23
26
 
24
27
  # Resets encoding and response buffers.
@@ -39,31 +42,42 @@ class HTTPage
39
42
  # Returns page encoding.
40
43
 
41
44
  def encoding
42
- @encoding ||= get_page_encoding
45
+ @encoding, @content_type = get_page_info if @encoding.nil?
46
+ return @encoding
47
+ end
48
+
49
+ # Returns page content-type.
50
+
51
+ def content_type
52
+ @encoding, @content_type = get_page_info if @content_type.nil?
53
+ return @content_type
43
54
  end
44
55
 
45
56
  # Obtains encoding from document body or server response header.
46
57
 
47
- def get_page_encoding(default_encoding='ascii')
48
- return default_encoding if self.response.nil?
58
+ def get_page_info(default_encoding='ascii', default_content_type='text/html')
59
+ return [default_content_type, default_encoding] if self.response.nil?
49
60
 
50
61
  # try meta-tag header
51
62
  header = self.response.body.scan(/<meta http-equiv\s*=\s*['"]*content-type['"]*\s*content\s*=\s*['"]*\s*(.*?)\s*['"]*\s*\/?>/i)
52
63
  header = header.flatten.first
53
64
  enc = extract_encoding(header)
65
+ ctype = extract_content_type(header)
54
66
 
55
67
  # try server header
56
- if enc.nil?
68
+ if ctype.nil?
57
69
  header = response.header['content-type']
70
+ ctype = extract_content_type(header)
58
71
  enc = extract_encoding(header)
59
72
  end
60
-
73
+
61
74
  # try default
62
- enc = default_encoding if enc.nil?
75
+ enc = default_encoding if enc.nil?
76
+ ctype = default_content_type if ctype.nil?
63
77
 
64
- return enc
78
+ return [ctype, enc]
65
79
  end
66
- private :get_page_encoding
80
+ private :get_page_info
67
81
 
68
82
  # Extracts enconding from content-type string.
69
83
 
@@ -92,16 +106,28 @@ class HTTPage
92
106
  end
93
107
  private :extract_encoding
94
108
 
109
+ # Extracts content-type from content-type string.
110
+
111
+ def extract_content_type(ctype_string)
112
+ return nil if ctype_string.nil? || ctype_string.empty?
113
+ ct = ctype_string.chomp.squeeze(' ').split(';').first
114
+ ct = ct.strip.downcase.to_sym unless ct.nil?
115
+ return ct
116
+ end
117
+ private :extract_content_type
118
+
119
+
95
120
  # Fetches document using HTTP and returns response object. It also sets encoding.
96
121
 
97
122
  def response
98
123
  return @response unless @response.nil?
99
- found = false
100
- response = nil
101
- url = @url
102
- http_req = @http_req
124
+ url = @url
125
+ found = false
126
+ response = nil
127
+ @real_url = nil
128
+ http_req = @http_req
103
129
  redir_retry = @redir_retry
104
- conn_retry = @conn_retry
130
+ conn_retry = @conn_retry
105
131
 
106
132
  until found do
107
133
  begin
@@ -136,8 +162,9 @@ class HTTPage
136
162
  break if (redir_retry < 0 || conn_retry < 0)
137
163
  end
138
164
  if found
165
+ @real_url = url
139
166
  @response = response
140
- @encoding = get_page_encoding
167
+ @encoding, @content_type = get_page_info
141
168
  return response
142
169
  else
143
170
  return nil
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: siefca-httpage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Pawe\xC5\x82 Wilk"