typhoeus_curly 0.1.14

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,19 @@
1
+ #ifndef TYPHOEUS_EASY
2
+ #define TYPHOEUS_EASY
3
+
4
+ #include <native.h>
5
+
6
+ void init_typhoeus_easy();
7
+ typedef struct {
8
+ const char *memory;
9
+ int size;
10
+ int read;
11
+ } RequestChunk;
12
+
13
+ typedef struct {
14
+ RequestChunk *request_chunk;
15
+ CURL *curl;
16
+ struct curl_slist *headers;
17
+ } CurlEasy;
18
+
19
+ #endif
@@ -0,0 +1,225 @@
1
+ #include <typhoeus_multi.h>
2
+
3
+ static void multi_read_info(VALUE self, CURLM *multi_handle);
4
+
5
+ static void dealloc(CurlMulti *curl_multi) {
6
+ curl_multi_cleanup(curl_multi->multi);
7
+ free(curl_multi);
8
+ }
9
+
10
+ static VALUE multi_add_handle(VALUE self, VALUE easy) {
11
+ CurlEasy *curl_easy;
12
+ Data_Get_Struct(easy, CurlEasy, curl_easy);
13
+ CurlMulti *curl_multi;
14
+ Data_Get_Struct(self, CurlMulti, curl_multi);
15
+ CURLMcode mcode;
16
+
17
+ mcode = curl_multi_add_handle(curl_multi->multi, curl_easy->curl);
18
+ if (mcode != CURLM_CALL_MULTI_PERFORM && mcode != CURLM_OK) {
19
+ rb_raise((VALUE)mcode, "An error occured adding the handle");
20
+ }
21
+
22
+ curl_easy_setopt(curl_easy->curl, CURLOPT_PRIVATE, easy);
23
+ curl_multi->active++;
24
+
25
+ if (mcode == CURLM_CALL_MULTI_PERFORM) {
26
+ curl_multi_perform(curl_multi->multi, &(curl_multi->running));
27
+ }
28
+ //
29
+ // if (curl_multi->running) {
30
+ // printf("call read_info on add<br/>");
31
+ // multi_read_info(self, curl_multi->multi);
32
+ // }
33
+
34
+ return easy;
35
+ }
36
+
37
+ static VALUE multi_remove_handle(VALUE self, VALUE easy) {
38
+ CurlEasy *curl_easy;
39
+ Data_Get_Struct(easy, CurlEasy, curl_easy);
40
+ CurlMulti *curl_multi;
41
+ Data_Get_Struct(self, CurlMulti, curl_multi);
42
+
43
+ curl_multi->active--;
44
+ curl_multi_remove_handle(curl_multi->multi, curl_easy->curl);
45
+
46
+ return easy;
47
+ }
48
+
49
+ static void multi_read_info(VALUE self, CURLM *multi_handle) {
50
+ int msgs_left, result;
51
+ CURLMsg *msg;
52
+ CURLcode ecode;
53
+ CURL *easy_handle;
54
+ VALUE easy;
55
+
56
+ /* check for finished easy handles and remove from the multi handle */
57
+ while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
58
+
59
+ if (msg->msg != CURLMSG_DONE) {
60
+ continue;
61
+ }
62
+
63
+ easy_handle = msg->easy_handle;
64
+ result = msg->data.result;
65
+ if (easy_handle) {
66
+ ecode = curl_easy_getinfo(easy_handle, CURLINFO_PRIVATE, &easy);
67
+ if (ecode != 0) {
68
+ rb_raise(ecode, "error getting easy object");
69
+ }
70
+
71
+ long response_code = -1;
72
+ curl_easy_getinfo(easy_handle, CURLINFO_RESPONSE_CODE, &response_code);
73
+
74
+ // TODO: find out what the real problem is here and fix it.
75
+ // this next bit is a horrible hack. For some reason my tests against a local server on my laptop
76
+ // fail intermittently and return this result number. However, it will succeed if you try it a few
77
+ // more times. Also noteworthy is that this doens't happen when hitting an external server. WTF?!
78
+
79
+ // Sandofsky says:
80
+ // This is caused by OS X first attempting to resolve using IPV6.
81
+ // Hack solution: connect to yourself with 127.0.0.1, not localhost
82
+ // http://curl.haxx.se/mail/tracker-2009-09/0018.html
83
+ if (result == 7) {
84
+ VALUE max_retries = rb_funcall(easy, rb_intern("max_retries?"), 0);
85
+ if (max_retries != Qtrue) {
86
+ multi_remove_handle(self, easy);
87
+ multi_add_handle(self, easy);
88
+ CurlMulti *curl_multi;
89
+ Data_Get_Struct(self, CurlMulti, curl_multi);
90
+ curl_multi_perform(curl_multi->multi, &(curl_multi->running));
91
+
92
+ rb_funcall(easy, rb_intern("increment_retries"), 0);
93
+
94
+ continue;
95
+ }
96
+ }
97
+ multi_remove_handle(self, easy);
98
+
99
+ if (result != 0) {
100
+ rb_funcall(easy, rb_intern("failure"), 0);
101
+ }
102
+ else if ((response_code >= 200 && response_code < 300) || response_code == 0) {
103
+ rb_funcall(easy, rb_intern("success"), 0);
104
+ }
105
+ else if (response_code >= 300 && response_code < 600) {
106
+ rb_funcall(easy, rb_intern("failure"), 0);
107
+ }
108
+ }
109
+ }
110
+ }
111
+
112
+ /* called by multi_perform and fire_and_forget */
113
+ static void rb_curl_multi_run(VALUE self, CURLM *multi_handle, int *still_running) {
114
+ CURLMcode mcode;
115
+
116
+ do {
117
+ mcode = curl_multi_perform(multi_handle, still_running);
118
+ } while (mcode == CURLM_CALL_MULTI_PERFORM);
119
+
120
+ if (mcode != CURLM_OK) {
121
+ rb_raise((VALUE)mcode, "an error occured while running perform");
122
+ }
123
+
124
+ multi_read_info( self, multi_handle );
125
+ }
126
+
127
+ static VALUE fire_and_forget(VALUE self) {
128
+ CurlMulti *curl_multi;
129
+ Data_Get_Struct(self, CurlMulti, curl_multi);
130
+ rb_curl_multi_run( self, curl_multi->multi, &(curl_multi->running) );
131
+ }
132
+
133
+ static VALUE multi_perform(VALUE self) {
134
+ CURLMcode mcode;
135
+ CurlMulti *curl_multi;
136
+ int maxfd, rc;
137
+ fd_set fdread, fdwrite, fdexcep;
138
+
139
+ long timeout;
140
+ struct timeval tv = {0, 0};
141
+
142
+ Data_Get_Struct(self, CurlMulti, curl_multi);
143
+
144
+ rb_curl_multi_run( self, curl_multi->multi, &(curl_multi->running) );
145
+ while(curl_multi->running) {
146
+ FD_ZERO(&fdread);
147
+ FD_ZERO(&fdwrite);
148
+ FD_ZERO(&fdexcep);
149
+
150
+ /* get the curl suggested time out */
151
+ mcode = curl_multi_timeout(curl_multi->multi, &timeout);
152
+ if (mcode != CURLM_OK) {
153
+ rb_raise((VALUE)mcode, "an error occured getting the timeout");
154
+ }
155
+
156
+ if (timeout == 0) { /* no delay */
157
+ rb_curl_multi_run( self, curl_multi->multi, &(curl_multi->running) );
158
+ continue;
159
+ }
160
+ else if (timeout < 0) {
161
+ timeout = 1;
162
+ }
163
+
164
+ tv.tv_sec = timeout / 1000;
165
+ tv.tv_usec = (timeout * 1000) % 1000000;
166
+
167
+ /* load the fd sets from the multi handle */
168
+ mcode = curl_multi_fdset(curl_multi->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
169
+ if (mcode != CURLM_OK) {
170
+ rb_raise((VALUE)mcode, "an error occured getting the fdset");
171
+ }
172
+
173
+ rc = rb_thread_select(maxfd+1, &fdread, &fdwrite, &fdexcep, &tv);
174
+ if (rc < 0) {
175
+ rb_raise(rb_eRuntimeError, "error on thread select");
176
+ }
177
+ rb_curl_multi_run( self, curl_multi->multi, &(curl_multi->running) );
178
+
179
+ }
180
+
181
+ return Qnil;
182
+ }
183
+
184
+ static VALUE active_handle_count(VALUE self) {
185
+ CurlMulti *curl_multi;
186
+ Data_Get_Struct(self, CurlMulti, curl_multi);
187
+
188
+ return INT2NUM(curl_multi->active);
189
+ }
190
+
191
+ static VALUE multi_cleanup(VALUE self) {
192
+ CurlMulti *curl_multi;
193
+ Data_Get_Struct(self, CurlMulti, curl_multi);
194
+
195
+ curl_multi_cleanup(curl_multi->multi);
196
+ curl_multi->active = 0;
197
+ curl_multi->running = 0;
198
+
199
+ return Qnil;
200
+ }
201
+
202
+ static VALUE new(int argc, VALUE *argv, VALUE klass) {
203
+ CurlMulti *curl_multi = ALLOC(CurlMulti);
204
+ curl_multi->multi = curl_multi_init();
205
+ curl_multi->active = 0;
206
+ curl_multi->running = 0;
207
+
208
+ VALUE multi = Data_Wrap_Struct(cTyphoeusMulti, 0, dealloc, curl_multi);
209
+
210
+ rb_obj_call_init(multi, argc, argv);
211
+
212
+ return multi;
213
+ }
214
+
215
+ void init_typhoeus_multi() {
216
+ VALUE klass = cTyphoeusMulti = rb_define_class_under(mTyphoeus, "Multi", rb_cObject);
217
+
218
+ rb_define_singleton_method(klass, "new", new, -1);
219
+ rb_define_private_method(klass, "multi_add_handle", multi_add_handle, 1);
220
+ rb_define_private_method(klass, "multi_remove_handle", multi_remove_handle, 1);
221
+ rb_define_private_method(klass, "multi_perform", multi_perform, 0);
222
+ rb_define_private_method(klass, "multi_cleanup", multi_cleanup, 0);
223
+ rb_define_private_method(klass, "active_handle_count", active_handle_count, 0);
224
+ rb_define_method(klass, "fire_and_forget", fire_and_forget, 0);
225
+ }
@@ -0,0 +1,16 @@
1
+ #ifndef TYPHOEUS_MULTI
2
+ #define TYPHOEUS_MULTI
3
+
4
+ #include <native.h>
5
+ #include <typhoeus_easy.h>
6
+
7
+ VALUE cTyphoeusMulti;
8
+ typedef struct {
9
+ int running;
10
+ int active;
11
+ CURLM *multi;
12
+ } CurlMulti;
13
+
14
+ void init_typhoeus_multi();
15
+
16
+ #endif
data/lib/typhoeus.rb ADDED
@@ -0,0 +1,55 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
+
3
+ require 'rack/utils'
4
+ require 'digest/sha2'
5
+ require 'typhoeus/easy'
6
+ require 'typhoeus/multi'
7
+ require 'typhoeus/native'
8
+ require 'typhoeus/filter'
9
+ require 'typhoeus/remote_method'
10
+ require 'typhoeus/remote'
11
+ require 'typhoeus/remote_proxy_object'
12
+ require 'typhoeus/response'
13
+ require 'typhoeus/request'
14
+ require 'typhoeus/hydra'
15
+
16
+ module Typhoeus
17
+ VERSION = "0.1.14"
18
+
19
+ def self.easy_object_pool
20
+ @easy_objects ||= []
21
+ end
22
+
23
+ def self.init_easy_object_pool
24
+ 20.times do
25
+ easy_object_pool << Typhoeus::Easy.new
26
+ end
27
+ end
28
+
29
+ def self.release_easy_object(easy)
30
+ easy.reset
31
+ easy_object_pool << easy
32
+ end
33
+
34
+ def self.get_easy_object
35
+ if easy_object_pool.empty?
36
+ Typhoeus::Easy.new
37
+ else
38
+ easy_object_pool.pop
39
+ end
40
+ end
41
+
42
+ def self.add_easy_request(easy_object)
43
+ Thread.current[:curl_multi] ||= Typhoeus::Multi.new
44
+ Thread.current[:curl_multi].add(easy_object)
45
+ end
46
+
47
+ def self.perform_easy_requests
48
+ multi = Thread.current[:curl_multi]
49
+ start_time = Time.now
50
+ multi.easy_handles.each do |easy|
51
+ easy.start_time = start_time
52
+ end
53
+ multi.perform
54
+ end
55
+ end
@@ -0,0 +1,240 @@
1
+ module Typhoeus
2
+ class Easy
3
+ attr_reader :response_body, :response_header, :method, :headers, :url
4
+ attr_accessor :start_time
5
+
6
+ CURLINFO_STRING = 1048576
7
+ OPTION_VALUES = {
8
+ :CURLOPT_URL => 10002,
9
+ :CURLOPT_HTTPGET => 80,
10
+ :CURLOPT_HTTPPOST => 10024,
11
+ :CURLOPT_UPLOAD => 46,
12
+ :CURLOPT_CUSTOMREQUEST => 10036,
13
+ :CURLOPT_POSTFIELDS => 10015,
14
+ :CURLOPT_POSTFIELDSIZE => 60,
15
+ :CURLOPT_USERAGENT => 10018,
16
+ :CURLOPT_TIMEOUT_MS => 155,
17
+ :CURLOPT_NOSIGNAL => 99,
18
+ :CURLOPT_HTTPHEADER => 10023,
19
+ :CURLOPT_FOLLOWLOCATION => 52,
20
+ :CURLOPT_MAXREDIRS => 68,
21
+ :CURLOPT_HTTPAUTH => 107,
22
+ :CURLOPT_USERPWD => 10000 + 5,
23
+ :CURLOPT_VERBOSE => 41
24
+ }
25
+ INFO_VALUES = {
26
+ :CURLINFO_RESPONSE_CODE => 2097154,
27
+ :CURLINFO_TOTAL_TIME => 3145731,
28
+ :CURLINFO_HTTPAUTH_AVAIL => 0x200000 + 23
29
+ }
30
+ AUTH_TYPES = {
31
+ :CURLAUTH_BASIC => 1,
32
+ :CURLAUTH_DIGEST => 2,
33
+ :CURLAUTH_GSSNEGOTIATE => 4,
34
+ :CURLAUTH_NTLM => 8,
35
+ :CURLAUTH_DIGEST_IE => 16
36
+ }
37
+
38
+ def initialize
39
+ @method = :get
40
+ @post_dat_set = nil
41
+ @headers = {}
42
+ end
43
+
44
+ def headers=(hash)
45
+ @headers = hash
46
+ end
47
+
48
+ def auth=(authinfo)
49
+ set_option(OPTION_VALUES[:CURLOPT_USERPWD], "#{authinfo[:username]}:#{authinfo[:password]}")
50
+ set_option(OPTION_VALUES[:CURLOPT_HTTPAUTH], authinfo[:method]) if authinfo[:method]
51
+ end
52
+
53
+ def auth_methods
54
+ get_info_long(INFO_VALUES[:CURLINFO_HTTPAUTH_AVAIL])
55
+ end
56
+
57
+ def verbose=(boolean)
58
+ set_option(OPTION_VALUES[:CURLOPT_VERBOSE], !!boolean ? 1 : 0)
59
+ end
60
+
61
+ def total_time_taken
62
+ get_info_double(INFO_VALUES[:CURLINFO_TOTAL_TIME])
63
+ end
64
+
65
+ def response_code
66
+ get_info_long(INFO_VALUES[:CURLINFO_RESPONSE_CODE])
67
+ end
68
+
69
+ def follow_location=(boolean)
70
+ if boolean
71
+ set_option(OPTION_VALUES[:CURLOPT_FOLLOWLOCATION], 1)
72
+ else
73
+ set_option(OPTION_VALUES[:CURLOPT_FOLLOWLOCATION], 0)
74
+ end
75
+ end
76
+
77
+ def max_redirects=(redirects)
78
+ set_option(OPTION_VALUES[:CURLOPT_MAXREDIRS], redirects)
79
+ end
80
+
81
+ def timeout=(milliseconds)
82
+ @timeout = milliseconds
83
+ set_option(OPTION_VALUES[:CURLOPT_NOSIGNAL], 1)
84
+ set_option(OPTION_VALUES[:CURLOPT_TIMEOUT_MS], milliseconds)
85
+ end
86
+
87
+ def timed_out?
88
+ @timeout && total_time_taken > @timeout && response_code == 0
89
+ end
90
+
91
+ def request_body=(request_body)
92
+ @request_body = request_body
93
+ if @method == :put
94
+ easy_set_request_body(@request_body)
95
+ headers["Transfer-Encoding"] = ""
96
+ headers["Expect"] = ""
97
+ else
98
+ self.post_data = request_body
99
+ end
100
+ end
101
+
102
+ def user_agent=(user_agent)
103
+ set_option(OPTION_VALUES[:CURLOPT_USERAGENT], user_agent)
104
+ end
105
+
106
+ def url=(url)
107
+ @url = url
108
+ set_option(OPTION_VALUES[:CURLOPT_URL], url)
109
+ end
110
+
111
+ def method=(method)
112
+ @method = method
113
+ if method == :get
114
+ set_option(OPTION_VALUES[:CURLOPT_HTTPGET], 1)
115
+ elsif method == :post
116
+ set_option(OPTION_VALUES[:CURLOPT_HTTPPOST], 1)
117
+ self.post_data = ""
118
+ elsif method == :put
119
+ set_option(OPTION_VALUES[:CURLOPT_UPLOAD], 1)
120
+ self.request_body = "" unless @request_body
121
+ else
122
+ set_option(OPTION_VALUES[:CURLOPT_CUSTOMREQUEST], "DELETE")
123
+ end
124
+ end
125
+
126
+ def post_data=(data)
127
+ @post_data_set = true
128
+ set_option(OPTION_VALUES[:CURLOPT_POSTFIELDS], data)
129
+ set_option(OPTION_VALUES[:CURLOPT_POSTFIELDSIZE], data.length)
130
+ end
131
+
132
+ def params=(params)
133
+ params_string = params.keys.collect do |k|
134
+ value = params[k]
135
+ if value.is_a? Hash
136
+ value.keys.collect {|sk| Rack::Utils.escape("#{k}[#{sk}]") + "=" + Rack::Utils.escape(value[sk].to_s)}
137
+ elsif value.is_a? Array
138
+ key = Rack::Utils.escape(k.to_s)
139
+ value.collect { |v| "#{key}=#{Rack::Utils.escape(v.to_s)}" }.join('&')
140
+ else
141
+ "#{Rack::Utils.escape(k.to_s)}=#{Rack::Utils.escape(params[k].to_s)}"
142
+ end
143
+ end.flatten.join("&")
144
+
145
+ if method == :post
146
+ self.post_data = params_string
147
+ else
148
+ self.url = "#{url}?#{params_string}"
149
+ end
150
+ end
151
+
152
+ def set_option(option, value)
153
+ if value.class == String
154
+ easy_setopt_string(option, value)
155
+ else
156
+ easy_setopt_long(option, value)
157
+ end
158
+ end
159
+
160
+ def perform
161
+ set_headers()
162
+ easy_perform()
163
+ response_code()
164
+ end
165
+
166
+ def set_headers
167
+ headers.each_pair do |key, value|
168
+ easy_add_header("#{key}: #{value}")
169
+ end
170
+ easy_set_headers() unless headers.empty?
171
+ end
172
+
173
+ # gets called when finished and response code is 200-299
174
+ def success
175
+ @success.call(self) if @success
176
+ end
177
+
178
+ def on_success(&block)
179
+ @success = block
180
+ end
181
+
182
+ def on_success=(block)
183
+ @success = block
184
+ end
185
+
186
+ # gets called when finished and response code is 300-599
187
+ def failure
188
+ @failure.call(self) if @failure
189
+ end
190
+
191
+ def on_failure(&block)
192
+ @failure = block
193
+ end
194
+
195
+ def on_failure=(block)
196
+ @failure = block
197
+ end
198
+
199
+ def retries
200
+ @retries ||= 0
201
+ end
202
+
203
+ def increment_retries
204
+ @retries ||= 0
205
+ @retries += 1
206
+ end
207
+
208
+ def max_retries
209
+ @max_retries ||= 40
210
+ end
211
+
212
+ def max_retries?
213
+ retries >= max_retries
214
+ end
215
+
216
+ def reset
217
+ @retries = 0
218
+ @response_code = 0
219
+ @response_header = ""
220
+ @response_body = ""
221
+ easy_reset()
222
+ end
223
+
224
+ def get_info_string(option)
225
+ easy_getinfo_string(option)
226
+ end
227
+
228
+ def get_info_long(option)
229
+ easy_getinfo_long(option)
230
+ end
231
+
232
+ def get_info_double(option)
233
+ easy_getinfo_double(option)
234
+ end
235
+
236
+ def curl_version
237
+ version
238
+ end
239
+ end
240
+ end