curl 0.0.4 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of curl might be problematic. Click here for more details.
- data/lib/curl.rb +72 -40
- data/lib/string_cleaner.rb +181 -0
- metadata +16 -16
data/lib/curl.rb
CHANGED
@@ -4,6 +4,7 @@ require "open3"
|
|
4
4
|
require 'fileutils'
|
5
5
|
require 'ap'
|
6
6
|
require 'digest/md5'
|
7
|
+
require 'string_cleaner'
|
7
8
|
|
8
9
|
|
9
10
|
include Open3
|
@@ -31,15 +32,20 @@ class CURL
|
|
31
32
|
attr_accessor :user_agent
|
32
33
|
|
33
34
|
def initialize(keys={})
|
35
|
+
@socks_hostname = keys[:socks_hostname] ||= false
|
34
36
|
@cache = ( keys[:cache] ? keys[:cache] : false )
|
37
|
+
@cache_time = ( keys[:cache_time] ? keys[:cache_time] : 3600*24*1 ) # 1 day cache life
|
38
|
+
@connect_timeout = keys[:connect_timeout] || 6
|
39
|
+
@max_time = keys[:max_time] || 8
|
40
|
+
@retry = keys[:retry] || 1
|
35
41
|
@cookies_enable = ( keys[:cookies_disable] ? false : true )
|
36
42
|
@user_agent = AGENT_ALIASES["Google"]#AGENT_ALIASES[AGENT_ALIASES.keys[rand(6)]]
|
37
43
|
FileUtils.makedirs("/tmp/curl/")
|
38
44
|
@cookies_file = keys[:cookies] || "/tmp/curl/curl_#{rand}_#{rand}.jar"
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
45
|
+
# @cookies_file = "/home/ruslan/curl.jar"
|
46
|
+
#--header "Accept-Encoding: deflate"
|
47
|
+
# @setup_params = ' --header "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" --header "Accept-Language: en-us,en;q=0.5" --header "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7" '
|
48
|
+
@setup_params = " --connect-timeout #{@connect_timeout} --max-time #{@max_time} --retry #{@retry} --location --compressed --silent -k "
|
43
49
|
# @setup_params = ' --location --silent '
|
44
50
|
yield self if block_given?
|
45
51
|
end
|
@@ -60,10 +66,11 @@ class CURL
|
|
60
66
|
end
|
61
67
|
|
62
68
|
def socks(socks_uri)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
69
|
+
socks = ( socks_uri.is_a?(URI) ? socks_uri : URI.parse("http://#{socks_uri}") )
|
70
|
+
s = @socks_hostname ? "--socks5-hostname" : "--socks5"
|
71
|
+
@setup_params = "#{@setup_params} #{s} \"#{socks.host}:#{socks.port}\" "
|
72
|
+
@setup_params = "#{@setup_params} --proxy-user \"#{socks.user}:#{socks.password}\" " if socks.user
|
73
|
+
@setup_params
|
67
74
|
end
|
68
75
|
|
69
76
|
def self.check(proxy)
|
@@ -84,27 +91,44 @@ class CURL
|
|
84
91
|
@debug
|
85
92
|
end
|
86
93
|
|
87
|
-
def
|
94
|
+
def cache_path(url)
|
95
|
+
"#{@cache}/#{Digest::MD5.hexdigest(url)[0..1]}/#{Digest::MD5.hexdigest(url)[2..3]}/#{Digest::MD5.hexdigest(url)[4..5]}/#{Digest::MD5.hexdigest(url)[6..7]}"
|
96
|
+
end
|
97
|
+
def cache_file(url)
|
98
|
+
cache_path(url)+"/#{Digest::MD5.hexdigest(url)}.html"
|
99
|
+
end
|
100
|
+
|
101
|
+
def get(url, keys={})
|
102
|
+
ref = keys[:ref] ||= nil
|
103
|
+
count = keys[:count] ||= 3
|
104
|
+
encoding = keys[:encoding] ||= "utf-8"
|
105
|
+
raw = ( keys[:raw]==nil ? false : keys[:raw] )
|
106
|
+
|
88
107
|
if @cache
|
89
|
-
filename =
|
90
|
-
unless File.exists?(filename)
|
91
|
-
FileUtils.mkdir_p(
|
92
|
-
result = get_raw(url,count,ref)
|
93
|
-
puts "cache to file '#{filename}'"
|
108
|
+
filename = cache_file(url)
|
109
|
+
unless File.exists?(filename) && (File.exists?(filename) && File.ctime(filename) > Time.now-@cache_time)
|
110
|
+
FileUtils.mkdir_p(cache_path(url))
|
111
|
+
result = get_raw(url, {:count=>count, :ref=>ref, :encoding=>encoding} ) #+" --output \"#{filename}\" ")
|
112
|
+
puts "cache to file '#{filename}'" if @debug
|
94
113
|
File.open(filename,"w"){|f| f.puts result}
|
95
114
|
return result
|
96
115
|
else
|
97
|
-
puts "read from cache file '#{filename}'"
|
116
|
+
puts "read from cache file '#{filename}'" if @debug
|
98
117
|
return open(filename).read
|
99
118
|
end
|
100
119
|
else
|
101
|
-
return get_raw(url,count,ref)
|
120
|
+
return get_raw(url, {:count=>count , :ref=>ref, :encoding=>encoding, :raw=>raw})
|
102
121
|
end
|
103
122
|
|
104
123
|
end
|
105
124
|
|
106
|
-
def get_raw(url,
|
107
|
-
|
125
|
+
def get_raw(url, keys={})
|
126
|
+
ref = keys[:ref] ||= nil
|
127
|
+
count = keys[:count] ||= 3
|
128
|
+
encoding = keys[:encoding] ||= "utf-8"
|
129
|
+
raw = ( keys[:raw]==nil ? false : keys[:raw] )
|
130
|
+
|
131
|
+
cmd = "curl #{cookies_store} #{browser_type} #{@setup_params} #{ref} \"#{url}\" "
|
108
132
|
if @debug
|
109
133
|
puts cmd.red
|
110
134
|
end
|
@@ -114,8 +138,13 @@ class CURL
|
|
114
138
|
count -= 1
|
115
139
|
result = self.get(url,count) if count > 0
|
116
140
|
end
|
117
|
-
result
|
118
|
-
|
141
|
+
# result.force_encoding(encoding)
|
142
|
+
if raw
|
143
|
+
return result
|
144
|
+
else
|
145
|
+
return ( encoding=="utf-8" ? result.clean : Iconv.new("UTF-8", "WINDOWS-1251").iconv(result) )
|
146
|
+
end
|
147
|
+
|
119
148
|
end
|
120
149
|
|
121
150
|
# формат данных для поста
|
@@ -155,26 +184,29 @@ class CURL
|
|
155
184
|
# }
|
156
185
|
def send(url,post_data, ref = nil,count=5 )
|
157
186
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
187
|
+
post_q = '' # " -F \"method\"=\"post\" "
|
188
|
+
post_data.each do |key,val|
|
189
|
+
pre = ""
|
190
|
+
if key
|
191
|
+
key = key.to_s
|
192
|
+
pre = "@" if key.scan("file").size>0 or key.scan("photo").size>0 or key.scan("@").size>0
|
193
|
+
key = key.to_s.gsub("@",'')
|
194
|
+
val = val.to_s
|
195
|
+
val = val.gsub('"','\"')
|
196
|
+
post_q += " -F \"#{key}\"=#{pre}\"#{val}\" "
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
cmd = "curl #{cookies_store} #{browser_type} #{post_q} #{@setup_params} #{ref} \"#{url}\" "
|
201
|
+
puts cmd.red if @debug
|
202
|
+
|
203
|
+
result = open_pipe(cmd)
|
204
|
+
#if result.to_s.strip.size == 0
|
205
|
+
# puts "empty result, left #{count} try".yellow if @debug
|
206
|
+
# count -= 1
|
207
|
+
# result = self.send(url,post_data,nil,count) if count > 0
|
208
|
+
#end
|
209
|
+
result
|
178
210
|
end
|
179
211
|
|
180
212
|
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require "unidecoder"
|
3
|
+
|
4
|
+
module String::Cleaner
|
5
|
+
|
6
|
+
def clean
|
7
|
+
fix_encoding.fix_endlines.fix_invisible_chars
|
8
|
+
end
|
9
|
+
|
10
|
+
def fix_encoding
|
11
|
+
utf8 = dup
|
12
|
+
if utf8.respond_to?(:force_encoding)
|
13
|
+
utf8.force_encoding("UTF-8") # for Ruby 1.9+
|
14
|
+
unless utf8.valid_encoding? # if invalid UTF-8
|
15
|
+
utf8 = utf8.force_encoding("ISO8859-1")
|
16
|
+
utf8.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
|
17
|
+
end
|
18
|
+
utf8.gsub!(/\u0080|¤/, "€") # special case for euro sign from Windows-1252
|
19
|
+
utf8
|
20
|
+
else
|
21
|
+
require "iconv"
|
22
|
+
utf8 << " "
|
23
|
+
begin
|
24
|
+
Iconv.new("UTF-8", "UTF-8").iconv(utf8)
|
25
|
+
rescue
|
26
|
+
utf8.gsub!(/\x80/n, "\xA4")
|
27
|
+
Iconv.new("UTF-8//IGNORE", "ISO8859-1").iconv(utf8).gsub("¤", "€")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def fix_endlines
|
33
|
+
gsub(/(?:\r\n|\r)/u, "\n")
|
34
|
+
end
|
35
|
+
|
36
|
+
SPECIAL_SPACES = [
|
37
|
+
0x00A0, # NO-BREAK SPACE
|
38
|
+
0x1680, # OGHAM SPACE MARK
|
39
|
+
0x180E, # MONGOLIAN VOWEL SEPARATOR
|
40
|
+
(0x2000..0x200A).to_a, # EN QUAD..HAIR SPACE
|
41
|
+
0x2028, # LINE SEPARATOR
|
42
|
+
0x2029, # PARAGRAPH SEPARATOR
|
43
|
+
0x202F, # NARROW NO-BREAK SPACE
|
44
|
+
0x205F, # MEDIUM MATHEMATICAL SPACE
|
45
|
+
0x3000, # IDEOGRAPHIC SPACE
|
46
|
+
].flatten.collect{|e| [e].pack 'U*'}
|
47
|
+
|
48
|
+
ZERO_WIDTH = [
|
49
|
+
0x200B, # ZERO WIDTH SPACE
|
50
|
+
0x200C, # ZERO WIDTH NON-JOINER
|
51
|
+
0x200D, # ZERO WIDTH JOINER
|
52
|
+
0x2060, # WORD JOINER
|
53
|
+
0xFEFF, # ZERO WIDTH NO-BREAK SPACE
|
54
|
+
].flatten.collect{|e| [e].pack 'U*'}
|
55
|
+
|
56
|
+
def fix_invisible_chars
|
57
|
+
utf8 = self.dup
|
58
|
+
utf8.gsub!(Regexp.new(ZERO_WIDTH.join("|")), "")
|
59
|
+
utf8 = if utf8.respond_to?(:force_encoding)
|
60
|
+
utf8 = (utf8 << " ").split(/\n/u).each{|line|
|
61
|
+
line.gsub!(/[\s\p{C}]/u, " ")
|
62
|
+
}.join("\n").chop!
|
63
|
+
else
|
64
|
+
require "oniguruma"
|
65
|
+
utf8.split(/\n/n).collect{|line|
|
66
|
+
Oniguruma::ORegexp.new("[\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
|
67
|
+
}.join("\n").chop!
|
68
|
+
end
|
69
|
+
utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|") + "|\s"), " ")
|
70
|
+
utf8
|
71
|
+
end
|
72
|
+
|
73
|
+
def trim(chars = "")
|
74
|
+
chars.size>0 ? gsub(/\A[#{chars}]+|[#{chars}]+\z/, "") : strip
|
75
|
+
end
|
76
|
+
|
77
|
+
def to_permalink(separator="-")
|
78
|
+
clean.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
|
79
|
+
end
|
80
|
+
|
81
|
+
def nl2br
|
82
|
+
gsub("\n", "<br/>\n")
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_nicer_sym
|
86
|
+
to_permalink("_").to_sym
|
87
|
+
end
|
88
|
+
|
89
|
+
def chartable(options = {})
|
90
|
+
options = {
|
91
|
+
:clean_binary => true,
|
92
|
+
:translit_symbols => true,
|
93
|
+
}.merge(options)
|
94
|
+
char = "%c"
|
95
|
+
table = {
|
96
|
+
"`" => "'", # dec = 96
|
97
|
+
"¦" => "|", # dec = 166, broken vertical bar
|
98
|
+
"¨" => "", # dec = 168, spacing diaeresis - umlaut
|
99
|
+
"ª" => "", # dec = 170, feminine ordinal indicator
|
100
|
+
"«" => "\"", # dec = 171, left double angle quotes
|
101
|
+
"¬" => "!", # dec = 172, not sign
|
102
|
+
"" => "-", # dec = 173, soft hyphen
|
103
|
+
"¯" => "-", # dec = 175, spacing macron - overline
|
104
|
+
"²" => "2", # dec = 178, superscript two - squared
|
105
|
+
"³" => "3", # dec = 179, superscript three - cubed
|
106
|
+
"´" => "'", # dec = 180, acute accent - spacing acute
|
107
|
+
"·" => "", # dec = 183, middle dot - Georgian comma
|
108
|
+
"¸" => "", # dec = 184, spacing cedilla
|
109
|
+
"¹" => "1", # dec = 185, superscript one
|
110
|
+
"º" => "0", # dec = 186, masculine ordinal indicator
|
111
|
+
"»" => "\"", # dec = 187, right double angle quotes
|
112
|
+
"¿" => "", # dec = 191, inverted question mark
|
113
|
+
"Ý" => "Y", # dec = 221
|
114
|
+
"–" => "-", # hex = 2013, en dash
|
115
|
+
"—" => "-", # hex = 2014, em dash
|
116
|
+
"‚" => "'", # hex = 201A, single low-9 quotation mark
|
117
|
+
"„" => "\"", # hex = 201E, double low-9 quotation mark
|
118
|
+
}
|
119
|
+
if options[:clean_binary]
|
120
|
+
table[char % 0] = "" # null
|
121
|
+
table[char % 1] = "" # start of heading
|
122
|
+
table[char % 2] = "" # start of text
|
123
|
+
table[char % 3] = "" # end of text
|
124
|
+
table[char % 4] = "" # end of transmission
|
125
|
+
table[char % 5] = "" # enquiry
|
126
|
+
table[char % 6] = "" # acknowledge
|
127
|
+
table[char % 7] = "" # bell
|
128
|
+
table[char % 8] = "" # backspace
|
129
|
+
table[char % 9] = " " # tab
|
130
|
+
table[char % 11] = "" # vertical tab
|
131
|
+
table[char % 12] = "" # form feed
|
132
|
+
table[char % 14] = "" # shift out
|
133
|
+
table[char % 15] = "" # shift in
|
134
|
+
table[char % 16] = "" # data link escape
|
135
|
+
table[char % 17] = "" # device control 1
|
136
|
+
table[char % 18] = "" # device control 2
|
137
|
+
table[char % 19] = "" # device control 3
|
138
|
+
table[char % 20] = "" # device control 4
|
139
|
+
table[char % 21] = "" # negative acknowledgement
|
140
|
+
table[char % 22] = "" # synchronous idle
|
141
|
+
table[char % 23] = "" # end of transmission block
|
142
|
+
table[char % 24] = "" # cancel
|
143
|
+
table[char % 25] = "" # end of medium
|
144
|
+
table[char % 26] = "" # substitute
|
145
|
+
table[char % 27] = "" # escape
|
146
|
+
table[char % 28] = "" # file separator
|
147
|
+
table[char % 29] = "" # group separator
|
148
|
+
table[char % 30] = "" # record separator
|
149
|
+
table[char % 31] = "" # unit separator
|
150
|
+
table[char % 127] = "" # delete
|
151
|
+
end
|
152
|
+
if options[:translit_symbols]
|
153
|
+
table["$"] = " dollars " # dec = 36, dollar sign
|
154
|
+
table["%"] = " percent " # dec = 37, percent sign
|
155
|
+
table["&"] = " and " # dec = 38, ampersand
|
156
|
+
table["@"] = " at " # dec = 64, at symbol
|
157
|
+
table[char % 128] = " euros " # windows euro
|
158
|
+
table["¢"] = " cents " # dec = 162, cent sign
|
159
|
+
table["£"] = " pounds " # dec = 163, pound sign
|
160
|
+
table["¤"] = " euros " # dec = 164, currency sign
|
161
|
+
table["¥"] = " yens " # dec = 165, yen sign
|
162
|
+
table["§"] = " section " # dec = 167, section sign
|
163
|
+
table["©"] = " copyright " # dec = 169, copyright sign
|
164
|
+
table["®"] = " registered trademark " # dec = 174, registered trade mark sign
|
165
|
+
table["°"] = " degrees " # dec = 176, degree sign
|
166
|
+
table["±"] = " approx " # dec = 177, plus-or-minus sign
|
167
|
+
table["µ"] = " micro " # dec = 181, micro sign
|
168
|
+
table["¶"] = " paragraph " # dec = 182, pilcrow sign - paragraph sign
|
169
|
+
table["¼"] = " 1/4 " # dec = 188, fraction one quarter
|
170
|
+
table["½"] = " 1/2 " # dec = 189, fraction one half
|
171
|
+
table["¾"] = " 3/4 " # dec = 190, fraction three quarters
|
172
|
+
table["€"] = " euros " # hex = 20AC, unicode euro
|
173
|
+
table["™"] = " trademark " # hex = 2122, trade mark
|
174
|
+
end
|
175
|
+
table
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
class String
|
180
|
+
include String::Cleaner
|
181
|
+
end
|
metadata
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: curl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
version: 0.0.4
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.9
|
10
6
|
platform: ruby
|
11
7
|
authors:
|
12
8
|
- tg0
|
@@ -14,7 +10,7 @@ autorequire:
|
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
12
|
|
17
|
-
date:
|
13
|
+
date: 2011-03-10 00:00:00 +02:00
|
18
14
|
default_executable:
|
19
15
|
dependencies:
|
20
16
|
- !ruby/object:Gem::Dependency
|
@@ -25,13 +21,20 @@ dependencies:
|
|
25
21
|
requirements:
|
26
22
|
- - ">="
|
27
23
|
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
- 2
|
31
|
-
- 1
|
32
24
|
version: 0.2.1
|
33
25
|
type: :runtime
|
34
26
|
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: unidecoder
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 1.1.1
|
36
|
+
type: :runtime
|
37
|
+
version_requirements: *id002
|
35
38
|
description: Some simple methods to use shell curl
|
36
39
|
email: email@tg0.ru
|
37
40
|
executables: []
|
@@ -43,6 +46,7 @@ extra_rdoc_files: []
|
|
43
46
|
files:
|
44
47
|
- README
|
45
48
|
- lib/curl.rb
|
49
|
+
- lib/string_cleaner.rb
|
46
50
|
has_rdoc: true
|
47
51
|
homepage: http://github.com/tg0/curl
|
48
52
|
licenses: []
|
@@ -58,21 +62,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
58
62
|
requirements:
|
59
63
|
- - ">="
|
60
64
|
- !ruby/object:Gem::Version
|
61
|
-
segments:
|
62
|
-
- 0
|
63
65
|
version: "0"
|
64
66
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
67
|
none: false
|
66
68
|
requirements:
|
67
69
|
- - ">="
|
68
70
|
- !ruby/object:Gem::Version
|
69
|
-
segments:
|
70
|
-
- 0
|
71
71
|
version: "0"
|
72
72
|
requirements: []
|
73
73
|
|
74
74
|
rubyforge_project: curl
|
75
|
-
rubygems_version: 1.
|
75
|
+
rubygems_version: 1.5.2
|
76
76
|
signing_key:
|
77
77
|
specification_version: 3
|
78
78
|
summary: shell CURL ruby wrapper.
|