curl 0.0.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of curl might be problematic. Click here for more details.

Files changed (3) hide show
  1. data/README +6 -0
  2. data/lib/curl.rb +264 -0
  3. metadata +80 -0
data/README ADDED
@@ -0,0 +1,6 @@
1
+ Usage:
2
+
3
+ require 'curl'
4
+ curl = CURL.new
5
+ page = curl.get("http://google.com")
6
+ page.scan(/somebody/)
@@ -0,0 +1,264 @@
1
+
2
+ require 'cgi'
3
+ require "open3"
4
+ require 'fileutils'
5
+ require 'ap'
6
+ require 'digest/md5'
7
+
8
+
9
+ include Open3
10
+
11
+
12
+ class CURL
13
+ AGENT_ALIASES = {
14
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
15
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
16
+ 'Windows Mozilla' => 'Mozilla/5.0 Windows; U; Windows NT 5.0; en-US; rv:1.4b Gecko/20030516 Mozilla Firebird/0.6',
17
+ 'Windows Mozilla 2' => 'Mozilla/5.0 Windows; U; Windows NT 5.0; ru-US; rv:1.4b Gecko/20030516',
18
+ 'Windows Mozilla 3' => 'Mozilla/5.0 Windows; U; Windows NT 5.0; en-UK; rv:1.4b Gecko/20060516',
19
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
20
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
21
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
22
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
23
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
24
+ 'IPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1A543a Safari/419.3',
25
+ 'IPhone Vkontakt' => 'VKontakte/1.1.8 CFNetwork/342.1 Darwin/9.4.1',
26
+ 'Google'=>"Googlebot/2.1 (+http://www.google.com/bot.html)",
27
+ "Yahoo-Slurp"=>"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)"
28
+
29
+ }
30
+
31
+ attr_accessor :user_agent
32
+
33
+ def initialize(keys={})
34
+ #@debug = true
35
+ @cache = ( keys[:cache] ? keys[:cache] : false )
36
+ @cookies_enable = ( keys[:cookies_disable] ? false : true )
37
+ @user_agent = AGENT_ALIASES["Google"]#AGENT_ALIASES[AGENT_ALIASES.keys[rand(6)]]
38
+ FileUtils.makedirs("/tmp/curl/")
39
+ @cookies_file = keys[:cookies] || "/tmp/curl/curl_#{rand}_#{rand}.jar"
40
+ # @cookies_file = "/home/ruslan/curl.jar"
41
+ #--header "Accept-Encoding: deflate"
42
+ @setup_params = ' --header "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" --header "Accept-Language: en-us,en;q=0.5" --header "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7" '
43
+ @setup_params = ' --connect-timeout 6 --max-time 8 --retry 1 --location --compressed --silent -k '
44
+ # @setup_params = ' --location --silent '
45
+ yield self if block_given?
46
+ end
47
+
48
+ def user_agent_alias=(al)
49
+ self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
50
+ end
51
+
52
+ def cookies
53
+ @cookies_file
54
+ end
55
+
56
+ def proxy(proxy_uri)
57
+ File.open("/tmp/aaaaaaaa.aaa","w"){|file| file.puts "#{Time.now}---"+proxy_uri}
58
+ proxy = ( proxy_uri.is_a?(URI) ? proxy_uri : URI.parse("http://#{proxy_uri}") )
59
+ @setup_params = "#{@setup_params} --proxy \"#{proxy.host}:#{proxy.port}\" "
60
+ @setup_params = "#{@setup_params} --proxy-user \"#{proxy.user}:#{proxy.password}\" " if proxy.user
61
+ end
62
+
63
+ def socks(socks_uri)
64
+ socks = ( socks_uri.is_a?(URI) ? socks_uri : URI.parse("http://#{socks_uri}") )
65
+ @setup_params = "#{@setup_params} --socks5-hostname \"#{socks.host}:#{socks.port}\" "
66
+ @setup_params = "#{@setup_params} --proxy-user \"#{socks.user}:#{socks.password}\" " if socks.user
67
+ @setup_params
68
+ end
69
+
70
+ def self.check(proxy)
71
+ out = false
72
+ catch_errors(5){
73
+ result = `curl --connect-timeout 6 --max-time 8 --silent --socks5 \"#{proxy}\" \"yahoo.com\" `
74
+ out = true if result.scan("yahoo").size>0
75
+ }
76
+ out
77
+ end
78
+
79
+
80
+ def debug=(debug=false)
81
+ @debug=debug
82
+ end
83
+
84
+ def debug?
85
+ @debug
86
+ end
87
+
88
+ def get(url, count=3, ref=nil, keys={})
89
+ if @cache
90
+ filename = "#{@cache}/#{Digest::MD5.hexdigest(url)[0..3]}/#{Digest::MD5.hexdigest(url)}.html"
91
+ unless File.exists?(filename)
92
+ FileUtils.mkdir_p("#{@cache}/#{Digest::MD5.hexdigest(url)[0..3]}/")
93
+ result = get_raw(url,count,ref)
94
+ puts "cache to file '#{filename}'"
95
+ File.open(filename,"w"){|f| f.puts result}
96
+ return result
97
+ else
98
+ puts "read from cache file '#{filename}'"
99
+ return open(filename).read
100
+ end
101
+ else
102
+ return get_raw(url,count,ref)
103
+ end
104
+
105
+ end
106
+
107
+ def get_raw(url,count=3,ref=nil)
108
+ cmd = "curl #{cookies_store} #{browser_type} #{@setup_params} #{ref} \"#{url}\" "
109
+ if @debug
110
+ puts cmd.red
111
+ end
112
+ result = open_pipe(cmd)
113
+ if result.to_s.strip.size == 0
114
+ puts "empty result, left #{count} try".yellow if @debug
115
+ count -= 1
116
+ result = self.get(url,count) if count > 0
117
+ end
118
+ result = result.gsub(/\\x../,'')
119
+
120
+ end
121
+
122
+ # формат данных для поста
123
+ # data = { "subm"=>"1",
124
+ # "sid"=>cap.split("=").last,
125
+ # "country"=>"1"
126
+ # }
127
+ def post(url,post_data, ref = nil,count=5, header = " --header \"Content-Type: application/x-www-form-urlencoded\" " )
128
+ #header = " --header \"Content-Type: application/x-www-form-urlencoded\" "
129
+
130
+ post_q = '--data "'
131
+ post_data.each do |key,val|
132
+ if key
133
+ post_q += "#{key}=#{URI.escape(CGI.escape(val.to_s),'.')}&" unless key == 'IDontAgreeBtn'
134
+ end
135
+ end
136
+ post_q += '"'
137
+
138
+ post_q.gsub!('&"','"')
139
+ cmd = "curl #{cookies_store} #{browser_type} #{post_q} #{header} #{@setup_params} #{ref} \"#{url}\" "
140
+ puts cmd.red if @debug
141
+
142
+ result = open_pipe(cmd)
143
+ if result.to_s.strip.size == 0
144
+ puts "empty result, left #{count} try".yellow if @debug
145
+ count -= 1
146
+ result = self.post(url,post_data,nil,count) if count > 0
147
+ end
148
+ result
149
+ end
150
+
151
+
152
+ # формат данных для поста
153
+ # data = { "subm"=>"1",
154
+ # "sid"=>cap.split("=").last,
155
+ # "country"=>"1"
156
+ # }
157
+ def send(url,post_data, ref = nil,count=5 )
158
+
159
+ post_q = '' # " -F \"method\"=\"post\" "
160
+ post_data.each do |key,val|
161
+ pre = ""
162
+ if key
163
+ pre = "@" if key.scan("file").size>0 or key.scan("photo").size>0
164
+ val = val.gsub('"','\"')
165
+ post_q += " -F \"#{key}\"=#{pre}\"#{val}\" "
166
+ end
167
+ end
168
+
169
+ cmd = "curl #{cookies_store} #{browser_type} #{post_q} #{@setup_params} #{ref} \"#{url}\" "
170
+ puts cmd.red if @debug
171
+
172
+ result = open_pipe(cmd)
173
+ #if result.to_s.strip.size == 0
174
+ # puts "empty result, left #{count} try".yellow if @debug
175
+ # count -= 1
176
+ # result = self.send(url,post_data,nil,count) if count > 0
177
+ #end
178
+ result
179
+ end
180
+
181
+
182
+
183
+ def get_header(url, location=false)
184
+ cmd = "curl #{cookies_store} #{browser_type} #{@setup_params} \"#{url}\" -i "
185
+ cmd.gsub!(/\-\-location/,' ') unless location
186
+ puts cmd.red if @debug
187
+ open_pipe(cmd)
188
+ end
189
+
190
+ def save(url,path="/tmp/curl/curl_#{rand}_#{rand}.jpg")
191
+ FileUtils.mkdir_p(File.dirname(path))
192
+ cmd = "curl #{cookies_store} #{browser_type} #{@setup_params} \"#{url}\" --output \"#{path}\" "
193
+ puts cmd.red if @debug
194
+ system(cmd)
195
+ path
196
+ end
197
+
198
+ def save!(url,path="/tmp/curl/curl_#{rand}_#{rand}.jpg")
199
+ FileUtils.mkdir_p(File.dirname(path))
200
+ cmd = "curl #{browser_type} --location --compressed --silent \"#{url}\" --output \"#{path}\" "
201
+ puts cmd.red if @debug
202
+ system(cmd)
203
+ path
204
+ end
205
+
206
+
207
+ def clear
208
+ File.delete(@cookies_file) if File.exists?(@cookies_file)
209
+ end
210
+
211
+ def init_cook(hash,site='')
212
+ file = "# Netscape HTTP Cookie File\n# http://curl.haxx.se/rfc/cookie_spec.html\n# This file was generated by libcurl! Edit at your own risk.\n\n"
213
+ hash.each do |key,val|
214
+ file += "#{site}\tTRUE\t/\tFALSE\t0\t#{key}\t#{val}\n"
215
+ end
216
+ File.open(cookies_store.scan(/\"(.+?)\"/).first.first,"w") {|f| f.puts file+"\n" }
217
+ file+"\n"
218
+ end
219
+
220
+
221
+ private
222
+ def open_pipe_old(cmd,kills=true)
223
+ result = ''
224
+
225
+ tmp_path="/tmp/curl/curl_#{rand}_#{rand}.html.tmp"
226
+ #cmd += " --output \"#{tmp_path}\" "
227
+ system(cmd)
228
+ stdin, stdout, stderr = popen3(cmd)
229
+ result = stdout
230
+
231
+
232
+ #File.open(tmp_path,"r") { |f| result = f.read }
233
+ #File.delete(tmp_path)
234
+
235
+ result
236
+ end
237
+
238
+ def open_pipe(cmd,kills=true)
239
+ result, current_process = '', 0
240
+ IO.popen(cmd,"r+") { |pipe|
241
+ current_process = pipe.object_id # Saving the PID
242
+ result = pipe.read
243
+ pipe.close
244
+ }
245
+ while result.to_s.size==0
246
+ sleep 0.5
247
+ end
248
+ #Process.wait
249
+ #Process.kill("KILL", current_process) if kills and current_process.to_i>0
250
+ result
251
+ end
252
+
253
+ def browser_type
254
+ browser = " --user-agent \"#{@user_agent}\" "
255
+ end
256
+
257
+ def cookies_store
258
+ if @cookies_enable
259
+ return " --cookie \"#{@cookies_file}\" --cookie-jar \"#{@cookies_file}\" "
260
+ else
261
+ return " "
262
+ end
263
+ end
264
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: curl
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 3
9
+ version: 0.0.3
10
+ platform: ruby
11
+ authors:
12
+ - tg0
13
+ autorequire: curl
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-10-13 00:00:00 +03:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: awesome_print
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 2
31
+ - 1
32
+ version: 0.2.1
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Some simple methods to use shell curl
36
+ email: email@tg0.ru
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files: []
42
+
43
+ files:
44
+ - README
45
+ - lib/curl.rb
46
+ has_rdoc: true
47
+ homepage: http://github.com/tg0/curl
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --inline-source
53
+ - --charset=UTF-8
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
63
+ version: "0"
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ segments:
70
+ - 0
71
+ version: "0"
72
+ requirements: []
73
+
74
+ rubyforge_project:
75
+ rubygems_version: 1.3.7
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: shell CURL ruby wrapper.
79
+ test_files: []
80
+