nethttputils 0.2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 71be1aac2cdebb5316afe367b7b7bd2eb8b5b60d
4
+ data.tar.gz: 4026ab5f4299bc880b3f8c13881114f4e49c67fd
5
+ SHA512:
6
+ metadata.gz: 7fe70ebe65005756f847deb19f318d77ae8c46298420f869caa5d4a2cb75de826b0634f758048f8190188b7704469fa73a84435572dd585e325d62b30e26290e
7
+ data.tar.gz: 58debad4370c8c99abbbd149fcb02c553e674d79dcd89cd294d306f59429c7ede3e58518c4b20127107dfeb7fed5476aa25a5e8ffadc4eeabd54d9870ed44233
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017 Victor Maslov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,288 @@
1
+ require "net/http"
2
+ require "openssl"
3
+
4
+ require "logger"
5
+
6
+
7
+ module NetHTTPUtils
8
+ class << self
9
+ attr_accessor :logger
10
+ end
11
+ self.logger = Logger.new STDOUT
12
+ self.logger.level = ENV["LOGLEVEL_#{name}"] ? Logger.const_get(ENV["LOGLEVEL_#{name}"]) : Logger::WARN
13
+ self.logger.formatter = lambda do |severity, datetime, progname, msg|
14
+ "#{severity.to_s[0]} #{datetime.strftime "%y%m%d %H%M%S"} : #{name} : #{msg}\n"
15
+ end
16
+
17
+ class Error < RuntimeError
18
+ attr_reader :code
19
+ def initialize body, code = nil
20
+ @code = code
21
+ super "HTTP error ##{code} #{body}"
22
+ end
23
+ end
24
+
25
+ class << self
26
+
27
+ def remove_tags str
28
+ str.gsub(/<script( type="text\/javascript"| src="[^"]+")?>.*?<\/script>/m, "").gsub(/<[^>]*>/, "").strip
29
+ end
30
+
31
+ # TODO: make it private?
32
+ def get_response url, mtd = :GET, type = :form, form: {}, header: {}, auth: nil, timeout: 30, max_timeout_retry_delay: 3600, max_sslerror_retry_delay: 3600, max_read_retry_delay: 3600, max_econnrefused_retry_delay: 3600, patch_request: nil, &block
33
+ uri = URI.parse URI.escape url
34
+
35
+ logger.warn "Warning: query params included `url` are discarded because `:form` isn't empty" if uri.query && !form.empty?
36
+ # we can't just merge because URI fails to parse such queries as "/?1"
37
+
38
+ uri.query = URI.encode_www_form form if :GET == (mtd = mtd.upcase) && !form.empty?
39
+ cookies = {}
40
+ prepare_request = lambda do |uri|
41
+ case mtd.upcase
42
+ when :GET ; Net::HTTP::Get
43
+ when :POST ; Net::HTTP::Post
44
+ when :PUT ; Net::HTTP::Put
45
+ when :DELETE ; Net::HTTP::Delete
46
+ when :PATCH ; Net::HTTP::Patch
47
+ else ; raise "unknown method '#{mtd}'"
48
+ end.new(uri).tap do |request| # somehow Get eats even raw url, not URI object
49
+ patch_request.call uri, form, request if patch_request
50
+ request.basic_auth *auth if auth
51
+ request["cookie"] = [*request["cookie"], cookies.map{ |k, v| "#{k}=#{v}" }].join "; " unless cookies.empty?
52
+ request.set_form_data form if !form.empty? && mtd == :POST
53
+ if mtd == :POST || mtd == :PATCH
54
+ request["Content-Type"] = case type
55
+ when :form ; "application/x-www-form-urlencoded;charset=UTF-8"
56
+ when :json ; request.body = JSON.dump form # yes this overwrites form data set few lines higher
57
+ "application/json"
58
+ else ; raise "unknown content-type '#{type}'"
59
+ end
60
+ end
61
+ header.each{ |k, v| request[k.to_s] = v }
62
+
63
+ logger.info "> #{request} #{request.path}"
64
+ next unless logger.debug?
65
+ logger.debug "curl -s -D - #{request.each_header.map{ |k, v| "-H \"#{k}: #{v}\" " unless k == "host" }.join}#{url.gsub "&", "\\\\&"}"
66
+ logger.debug "> header: #{request.each_header.to_a}"
67
+ logger.debug "> body: #{request.body.inspect.tap{ |body| body[100..-1] = "..." if body.size > 100 }}"
68
+ stack = caller.reverse.map do |level|
69
+ /((?:[^\/:]+\/)?[^\/:]+):([^:]+)/.match(level).captures
70
+ end.chunk(&:first).map do |file, group|
71
+ "#{file}:#{group.map(&:last).chunk{|_|_}.map(&:first).join(",")}"
72
+ end
73
+ logger.debug stack.join " -> "
74
+ end
75
+ end
76
+ start_http = lambda do |uri|
77
+ delay = 5
78
+ begin
79
+ Net::HTTP.start(
80
+ uri.host, uri.port,
81
+ use_ssl: uri.scheme == "https",
82
+ verify_mode: OpenSSL::SSL::VERIFY_NONE,
83
+ **({open_timeout: timeout}), # if timeout
84
+ **({read_timeout: timeout}), # if timeout
85
+ ) do |http|
86
+ # http.open_timeout = timeout # seems like when opening hangs, this line in unreachable
87
+ # http.read_timeout = timeout
88
+ http.set_debug_output STDERR if logger.level == Logger::DEBUG # use `logger.debug?`?
89
+ http
90
+ end
91
+ rescue Errno::ECONNREFUSED => e
92
+ e.message.concat " to #{uri}"
93
+ raise if max_econnrefused_retry_delay < delay *= 2
94
+ logger.warn "retrying in #{delay} seconds because of #{e.class} '#{e.message}'"
95
+ sleep delay
96
+ retry
97
+ rescue Errno::EHOSTUNREACH, Errno::ENETUNREACH, Errno::ECONNRESET, SocketError => e
98
+ if e.is_a?(SocketError) && e.message["getaddrinfo: "]
99
+ e.message.concat ": #{uri.host}"
100
+ raise e
101
+ # logger.warn "retrying in 60 seconds because of #{e.class} '#{e.message}'"
102
+ # sleep 60
103
+ # retry
104
+ end
105
+ logger.warn "retrying in 5 seconds because of #{e.class} '#{e.message}'"
106
+ sleep 5
107
+ retry
108
+ rescue Errno::ETIMEDOUT, Net::OpenTimeout => e
109
+ raise if max_timeout_retry_delay < delay *= 2
110
+ logger.warn "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{uri}"
111
+ sleep delay
112
+ retry
113
+ rescue OpenSSL::SSL::SSLError => e
114
+ raise if max_sslerror_retry_delay < delay *= 2
115
+ logger.error "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{uri}"
116
+ sleep delay
117
+ retry
118
+ end
119
+ end
120
+ http = start_http[uri]
121
+ do_request = lambda do |request|
122
+ delay = 1
123
+ response = begin
124
+ http.request request, &block
125
+ rescue Errno::ECONNREFUSED, Net::ReadTimeout, Net::OpenTimeout, Zlib::BufError, Errno::ECONNRESET, OpenSSL::SSL::SSLError => e
126
+ raise if max_read_retry_delay < delay *= 2
127
+ logger.error "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{request.uri}"
128
+ sleep delay
129
+ retry
130
+ end
131
+ # response.instance_variable_set "@nethttputils_close", http.method(:finish)
132
+ # response.singleton_class.instance_eval{ attr_accessor :nethttputils_socket_to_close }
133
+
134
+ if response.key? "x-ratelimit-userremaining"
135
+ c = response.fetch("x-ratelimit-userremaining").to_i
136
+ logger.debug "x-ratelimit-userremaining: #{c}"
137
+ t = response.fetch("x-ratelimit-clientremaining").to_i
138
+ logger.debug "x-ratelimit-clientremaining: #{t}"
139
+ unless 100 < c
140
+ a = response.fetch("x-timer")[/\d+/].to_i
141
+ b = response.fetch("x-ratelimit-userreset").to_i
142
+ t = (b - a + 1).fdiv c
143
+ logger.warn "x-ratelimit sleep #{t} seconds"
144
+ sleep t
145
+ end
146
+ end
147
+
148
+ response.to_hash.fetch("set-cookie", []).each{ |c| k, v = c.split(?=); cookies[k] = v[/[^;]+/] }
149
+ case response.code
150
+ when /\A3\d\d\z/
151
+ logger.info "redirect: #{response["location"]}"
152
+ new_uri = URI.join request.uri, URI.escape(response["location"])
153
+ new_host = new_uri.host
154
+ if http.address != new_host ||
155
+ http.port != new_uri.port ||
156
+ http.use_ssl? != (new_uri.scheme == "https")
157
+ logger.debug "changing host from '#{http.address}' to '#{new_host}'"
158
+ # http.finish
159
+ http = start_http[new_uri]
160
+ end
161
+ do_request.call prepare_request[new_uri]
162
+ when "404"
163
+ logger.error "404 at #{request.method} #{request.uri} with body: #{
164
+ response.body.is_a?(Net::ReadAdapter) ? "impossible to reread Net::ReadAdapter -- check the IO you've used in block form" : response.body.tap do |body|
165
+ body.replace remove_tags body if body[/<html[> ]/]
166
+ end.inspect
167
+ }"
168
+ response
169
+ when "429"
170
+ logger.error "429 at #{request.method} #{request.uri} with body: #{response.body.inspect}"
171
+ response
172
+ when /\A50\d\z/
173
+ logger.error "#{response.code} at #{request.method} #{request.uri} with body: #{
174
+ response.body.tap do |body|
175
+ body.replace remove_tags body if body[/<html[> ]/]
176
+ end.inspect
177
+ }"
178
+ response
179
+ when /\A20/
180
+ response
181
+ else
182
+ logger.warn "code #{response.code} at #{request.method} #{request.uri}#{
183
+ " and so #{url}" if request.uri.to_s != url
184
+ } from #{
185
+ [__FILE__, caller.map{ |i| i[/(?<=:)\d+/] }].join ?:
186
+ }"
187
+ logger.debug "< header: #{response.to_hash}"
188
+ logger.debug "< body: #{
189
+ response.body.tap do |body|
190
+ body.replace remove_tags body if body[/<html[> ]/]
191
+ end.inspect
192
+ }"
193
+ response
194
+ end
195
+ end
196
+ do_request[prepare_request[uri]].tap do |response|
197
+ cookies.each{ |k, v| response.add_field "Set-Cookie", "#{k}=#{v};" }
198
+ logger.debug response.to_hash
199
+ end
200
+ end
201
+
202
+ def request_data *args, &block
203
+ response = get_response *args, &block
204
+ raise Error.new response.body, response.code.to_i unless response.code[/\A(20\d|3\d\d)\z/]
205
+ if response["content-encoding"] == "gzip"
206
+ Zlib::GzipReader.new(StringIO.new(response.body)).read
207
+ else
208
+ response.body
209
+ end.tap do |string|
210
+ string.instance_variable_set :@uri_path, response.uri.path
211
+ end
212
+ # ensure
213
+ # response.instance_variable_get("@nethttputils_close").call if response
214
+ end
215
+
216
+ end
217
+ end
218
+
219
+
220
+ if $0 == __FILE__
221
+ STDOUT.sync = true
222
+ print "self testing... "
223
+ require "pp"
224
+
225
+ require "webrick"
226
+ require "json"
227
+ server = WEBrick::HTTPServer.new Port: 8000
228
+ server.mount_proc ?/ do |req, res|
229
+ # pp req.dup.tap{ |_| _.instance_variable_set "@config", nil }
230
+ # res.status = WEBrick::HTTPStatus::RC_ACCEPTED
231
+ res.body = JSON.dump [req.unparsed_uri, req.header.keys]
232
+ end
233
+ Thread.abort_on_exception = true
234
+ Thread.new{ server.start }
235
+ fail unless JSON.dump(["/", %w{ accept-encoding accept user-agent host connection }]) == NetHTTPUtils.request_data("http://localhost:8000/")
236
+ fail unless JSON.dump(["/?1", %w{ accept-encoding accept user-agent host connection }]) == NetHTTPUtils.request_data("http://localhost:8000/?1")
237
+ fail unless JSON.dump(["/?1=2", %w{ accept-encoding accept user-agent host connection }]) == NetHTTPUtils.request_data("http://localhost:8000/?1=2")
238
+ fail unless JSON.dump(["/?1=3", %w{ accept-encoding accept user-agent host connection }]) == NetHTTPUtils.request_data("http://localhost:8000/?1=2&3=4", form: {1=>3})
239
+ fail unless JSON.dump(["/", %w{ accept-encoding accept user-agent host content-type connection content-length }]) == NetHTTPUtils.request_data("http://localhost:8000/", :post, form: {1=>2})
240
+ server.shutdown
241
+
242
+ fail unless NetHTTPUtils.request_data("http://httpstat.us/200") == "200 OK"
243
+ [400, 404, 500, 503].each do |code|
244
+ begin
245
+ fail NetHTTPUtils.request_data "http://httpstat.us/#{code}"
246
+ rescue NetHTTPUtils::Error => e
247
+ raise unless e.code == code
248
+ end
249
+ end
250
+ fail unless NetHTTPUtils.get_response("http://httpstat.us/400").body == "400 Bad Request"
251
+ fail unless NetHTTPUtils.get_response("http://httpstat.us/404").body == "404 Not Found"
252
+ fail unless NetHTTPUtils.get_response("http://httpstat.us/500").body == "500 Internal Server Error"
253
+ fail unless NetHTTPUtils.get_response("http://httpstat.us/503").body == "503 Service Unavailable"
254
+ %w{
255
+ https://imgur.com/a/cccccc
256
+ https://imgur.com/mM4Dh7Z
257
+ }.each do |url|
258
+ begin
259
+ puts NetHTTPUtils.remove_tags NetHTTPUtils.request_data url
260
+ fail
261
+ rescue NetHTTPUtils::Error => e
262
+ raise unless e.code == 404
263
+ end
264
+ end
265
+ %w{
266
+ http://minus.com/lkP3hgRJd9npi
267
+ http://www.cutehalloweencostumeideas.org/wp-content/uploads/2017/10/Niagara-Falls_04.jpg
268
+ }.each do |url|
269
+ begin
270
+ fail NetHTTPUtils.request_data url
271
+ rescue SocketError => e
272
+ raise unless e.message["getaddrinfo: "]
273
+ end
274
+ end
275
+
276
+ begin
277
+ fail NetHTTPUtils.request_data "https://oi64.tinypic.com/29z7oxs.jpg?", timeout: 5, max_timeout_retry_delay: -1
278
+ rescue Net::OpenTimeout => e
279
+ end
280
+ ## this stopped failing on High Sierra
281
+ # begin
282
+ # # https://www.virtualself.co/?
283
+ # fail NetHTTPUtils.request_data "https://bulletinxp.com/curiosity/strange-weather/?", max_sslerror_retry_delay: -1
284
+ # rescue OpenSSL::SSL::SSLError => e
285
+ # end
286
+
287
+ puts "OK #{__FILE__}"
288
+ end
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "nethttputils"
3
+ spec.version = "0.2.0.0"
4
+ spec.summary = "this tool is like a pet that I adopted young and now I depend on, sorry"
5
+ spec.description = <<-EOF
6
+ Back in 2015 I was a guy automating things at my job and two scripts had a common need --
7
+ they both had to pass the same credentials to Jenkins (via query params, I guess).
8
+
9
+ That common tool with a single method was a Net::HTTP wrapper -- that's where the name from.
10
+ Then when the third script appeared two of them had to pass the Basic Auth.
11
+ The verb POST was added and common logging format, and relatively complex retry logic.
12
+ Then some website had redirects and I had to store cookies, then GZIP and API rate limits...
13
+
14
+ I was not going to gemify this monster but it is now a dependency in many other gems,
15
+ and since Gemfile does not support Github dependencies I have to finally gemify it.
16
+ EOF
17
+
18
+ spec.homepage = "https://github.com/nakilon/nethttputils"
19
+ spec.author = "Victor Maslov aka Nakilon"
20
+ spec.email = "nakilon@gmail.com"
21
+ spec.license = "MIT"
22
+
23
+ spec.require_path = "lib"
24
+ spec.files = `git ls-files -z`.split(?\0) - spec.test_files
25
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nethttputils
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Victor Maslov aka Nakilon
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-05-03 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |2
14
+ Back in 2015 I was a guy automating things at my job and two scripts had a common need --
15
+ they both had to pass the same credentials to Jenkins (via query params, I guess).
16
+
17
+ That common tool with a single method was a Net::HTTP wrapper -- that's where the name from.
18
+ Then when the third script appeared two of them had to pass the Basic Auth.
19
+ The verb POST was added and common logging format, and relatively complex retry logic.
20
+ Then some website had redirects and I had to store cookies, then GZIP and API rate limits...
21
+
22
+ I was not going to gemify this monster but it is now a dependency in many other gems,
23
+ and since Gemfile does not support Github dependencies I have to finally gemify it.
24
+ email: nakilon@gmail.com
25
+ executables: []
26
+ extensions: []
27
+ extra_rdoc_files: []
28
+ files:
29
+ - LICENSE
30
+ - Rakefile
31
+ - lib/nethttputils.rb
32
+ - nethttputils.gemspec
33
+ homepage: https://github.com/nakilon/nethttputils
34
+ licenses:
35
+ - MIT
36
+ metadata: {}
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 2.5.2
54
+ signing_key:
55
+ specification_version: 4
56
+ summary: this tool is like a pet that I adopted young and now I depend on, sorry
57
+ test_files: []