nethttputils 0.2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 71be1aac2cdebb5316afe367b7b7bd2eb8b5b60d
4
+ data.tar.gz: 4026ab5f4299bc880b3f8c13881114f4e49c67fd
5
+ SHA512:
6
+ metadata.gz: 7fe70ebe65005756f847deb19f318d77ae8c46298420f869caa5d4a2cb75de826b0634f758048f8190188b7704469fa73a84435572dd585e325d62b30e26290e
7
+ data.tar.gz: 58debad4370c8c99abbbd149fcb02c553e674d79dcd89cd294d306f59429c7ede3e58518c4b20127107dfeb7fed5476aa25a5e8ffadc4eeabd54d9870ed44233
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017 Victor Maslov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,288 @@
1
+ require "net/http"
2
+ require "openssl"
3
+
4
+ require "logger"
5
+
6
+
7
+ module NetHTTPUtils
8
+ class << self
9
+ attr_accessor :logger
10
+ end
11
+ self.logger = Logger.new STDOUT
12
+ self.logger.level = ENV["LOGLEVEL_#{name}"] ? Logger.const_get(ENV["LOGLEVEL_#{name}"]) : Logger::WARN
13
+ self.logger.formatter = lambda do |severity, datetime, progname, msg|
14
+ "#{severity.to_s[0]} #{datetime.strftime "%y%m%d %H%M%S"} : #{name} : #{msg}\n"
15
+ end
16
+
17
+ class Error < RuntimeError
18
+ attr_reader :code
19
+ def initialize body, code = nil
20
+ @code = code
21
+ super "HTTP error ##{code} #{body}"
22
+ end
23
+ end
24
+
25
+ class << self
26
+
27
+ def remove_tags str
28
+ str.gsub(/<script( type="text\/javascript"| src="[^"]+")?>.*?<\/script>/m, "").gsub(/<[^>]*>/, "").strip
29
+ end
30
+
31
+ # TODO: make it private?
32
+ def get_response url, mtd = :GET, type = :form, form: {}, header: {}, auth: nil, timeout: 30, max_timeout_retry_delay: 3600, max_sslerror_retry_delay: 3600, max_read_retry_delay: 3600, max_econnrefused_retry_delay: 3600, patch_request: nil, &block
33
+ uri = URI.parse URI.escape url
34
+
35
+ logger.warn "Warning: query params included `url` are discarded because `:form` isn't empty" if uri.query && !form.empty?
36
+ # we can't just merge because URI fails to parse such queries as "/?1"
37
+
38
+ uri.query = URI.encode_www_form form if :GET == (mtd = mtd.upcase) && !form.empty?
39
+ cookies = {}
40
+ prepare_request = lambda do |uri|
41
+ case mtd.upcase
42
+ when :GET ; Net::HTTP::Get
43
+ when :POST ; Net::HTTP::Post
44
+ when :PUT ; Net::HTTP::Put
45
+ when :DELETE ; Net::HTTP::Delete
46
+ when :PATCH ; Net::HTTP::Patch
47
+ else ; raise "unknown method '#{mtd}'"
48
+ end.new(uri).tap do |request| # somehow Get eats even raw url, not URI object
49
+ patch_request.call uri, form, request if patch_request
50
+ request.basic_auth *auth if auth
51
+ request["cookie"] = [*request["cookie"], cookies.map{ |k, v| "#{k}=#{v}" }].join "; " unless cookies.empty?
52
+ request.set_form_data form if !form.empty? && mtd == :POST
53
+ if mtd == :POST || mtd == :PATCH
54
+ request["Content-Type"] = case type
55
+ when :form ; "application/x-www-form-urlencoded;charset=UTF-8"
56
+ when :json ; request.body = JSON.dump form # yes this overwrites form data set few lines higher
57
+ "application/json"
58
+ else ; raise "unknown content-type '#{type}'"
59
+ end
60
+ end
61
+ header.each{ |k, v| request[k.to_s] = v }
62
+
63
+ logger.info "> #{request} #{request.path}"
64
+ next unless logger.debug?
65
+ logger.debug "curl -s -D - #{request.each_header.map{ |k, v| "-H \"#{k}: #{v}\" " unless k == "host" }.join}#{url.gsub "&", "\\\\&"}"
66
+ logger.debug "> header: #{request.each_header.to_a}"
67
+ logger.debug "> body: #{request.body.inspect.tap{ |body| body[100..-1] = "..." if body.size > 100 }}"
68
+ stack = caller.reverse.map do |level|
69
+ /((?:[^\/:]+\/)?[^\/:]+):([^:]+)/.match(level).captures
70
+ end.chunk(&:first).map do |file, group|
71
+ "#{file}:#{group.map(&:last).chunk{|_|_}.map(&:first).join(",")}"
72
+ end
73
+ logger.debug stack.join " -> "
74
+ end
75
+ end
76
+ start_http = lambda do |uri|
77
+ delay = 5
78
+ begin
79
+ Net::HTTP.start(
80
+ uri.host, uri.port,
81
+ use_ssl: uri.scheme == "https",
82
+ verify_mode: OpenSSL::SSL::VERIFY_NONE,
83
+ **({open_timeout: timeout}), # if timeout
84
+ **({read_timeout: timeout}), # if timeout
85
+ ) do |http|
86
+ # http.open_timeout = timeout # seems like when opening hangs, this line in unreachable
87
+ # http.read_timeout = timeout
88
+ http.set_debug_output STDERR if logger.level == Logger::DEBUG # use `logger.debug?`?
89
+ http
90
+ end
91
+ rescue Errno::ECONNREFUSED => e
92
+ e.message.concat " to #{uri}"
93
+ raise if max_econnrefused_retry_delay < delay *= 2
94
+ logger.warn "retrying in #{delay} seconds because of #{e.class} '#{e.message}'"
95
+ sleep delay
96
+ retry
97
+ rescue Errno::EHOSTUNREACH, Errno::ENETUNREACH, Errno::ECONNRESET, SocketError => e
98
+ if e.is_a?(SocketError) && e.message["getaddrinfo: "]
99
+ e.message.concat ": #{uri.host}"
100
+ raise e
101
+ # logger.warn "retrying in 60 seconds because of #{e.class} '#{e.message}'"
102
+ # sleep 60
103
+ # retry
104
+ end
105
+ logger.warn "retrying in 5 seconds because of #{e.class} '#{e.message}'"
106
+ sleep 5
107
+ retry
108
+ rescue Errno::ETIMEDOUT, Net::OpenTimeout => e
109
+ raise if max_timeout_retry_delay < delay *= 2
110
+ logger.warn "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{uri}"
111
+ sleep delay
112
+ retry
113
+ rescue OpenSSL::SSL::SSLError => e
114
+ raise if max_sslerror_retry_delay < delay *= 2
115
+ logger.error "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{uri}"
116
+ sleep delay
117
+ retry
118
+ end
119
+ end
120
+ http = start_http[uri]
121
+ do_request = lambda do |request|
122
+ delay = 1
123
+ response = begin
124
+ http.request request, &block
125
+ rescue Errno::ECONNREFUSED, Net::ReadTimeout, Net::OpenTimeout, Zlib::BufError, Errno::ECONNRESET, OpenSSL::SSL::SSLError => e
126
+ raise if max_read_retry_delay < delay *= 2
127
+ logger.error "retrying in #{delay} seconds because of #{e.class} '#{e.message}' at: #{request.uri}"
128
+ sleep delay
129
+ retry
130
+ end
131
+ # response.instance_variable_set "@nethttputils_close", http.method(:finish)
132
+ # response.singleton_class.instance_eval{ attr_accessor :nethttputils_socket_to_close }
133
+
134
+ if response.key? "x-ratelimit-userremaining"
135
+ c = response.fetch("x-ratelimit-userremaining").to_i
136
+ logger.debug "x-ratelimit-userremaining: #{c}"
137
+ t = response.fetch("x-ratelimit-clientremaining").to_i
138
+ logger.debug "x-ratelimit-clientremaining: #{t}"
139
+ unless 100 < c
140
+ a = response.fetch("x-timer")[/\d+/].to_i
141
+ b = response.fetch("x-ratelimit-userreset").to_i
142
+ t = (b - a + 1).fdiv c
143
+ logger.warn "x-ratelimit sleep #{t} seconds"
144
+ sleep t
145
+ end
146
+ end
147
+
148
+ response.to_hash.fetch("set-cookie", []).each{ |c| k, v = c.split(?=); cookies[k] = v[/[^;]+/] }
149
+ case response.code
150
+ when /\A3\d\d\z/
151
+ logger.info "redirect: #{response["location"]}"
152
+ new_uri = URI.join request.uri, URI.escape(response["location"])
153
+ new_host = new_uri.host
154
+ if http.address != new_host ||
155
+ http.port != new_uri.port ||
156
+ http.use_ssl? != (new_uri.scheme == "https")
157
+ logger.debug "changing host from '#{http.address}' to '#{new_host}'"
158
+ # http.finish
159
+ http = start_http[new_uri]
160
+ end
161
+ do_request.call prepare_request[new_uri]
162
+ when "404"
163
+ logger.error "404 at #{request.method} #{request.uri} with body: #{
164
+ response.body.is_a?(Net::ReadAdapter) ? "impossible to reread Net::ReadAdapter -- check the IO you've used in block form" : response.body.tap do |body|
165
+ body.replace remove_tags body if body[/<html[> ]/]
166
+ end.inspect
167
+ }"
168
+ response
169
+ when "429"
170
+ logger.error "429 at #{request.method} #{request.uri} with body: #{response.body.inspect}"
171
+ response
172
+ when /\A50\d\z/
173
+ logger.error "#{response.code} at #{request.method} #{request.uri} with body: #{
174
+ response.body.tap do |body|
175
+ body.replace remove_tags body if body[/<html[> ]/]
176
+ end.inspect
177
+ }"
178
+ response
179
+ when /\A20/
180
+ response
181
+ else
182
+ logger.warn "code #{response.code} at #{request.method} #{request.uri}#{
183
+ " and so #{url}" if request.uri.to_s != url
184
+ } from #{
185
+ [__FILE__, caller.map{ |i| i[/(?<=:)\d+/] }].join ?:
186
+ }"
187
+ logger.debug "< header: #{response.to_hash}"
188
+ logger.debug "< body: #{
189
+ response.body.tap do |body|
190
+ body.replace remove_tags body if body[/<html[> ]/]
191
+ end.inspect
192
+ }"
193
+ response
194
+ end
195
+ end
196
+ do_request[prepare_request[uri]].tap do |response|
197
+ cookies.each{ |k, v| response.add_field "Set-Cookie", "#{k}=#{v};" }
198
+ logger.debug response.to_hash
199
+ end
200
+ end
201
+
202
+ def request_data *args, &block
203
+ response = get_response *args, &block
204
+ raise Error.new response.body, response.code.to_i unless response.code[/\A(20\d|3\d\d)\z/]
205
+ if response["content-encoding"] == "gzip"
206
+ Zlib::GzipReader.new(StringIO.new(response.body)).read
207
+ else
208
+ response.body
209
+ end.tap do |string|
210
+ string.instance_variable_set :@uri_path, response.uri.path
211
+ end
212
+ # ensure
213
+ # response.instance_variable_get("@nethttputils_close").call if response
214
+ end
215
+
216
+ end
217
+ end
218
+
219
+
220
+ if $0 == __FILE__
221
+ STDOUT.sync = true
222
+ print "self testing... "
223
+ require "pp"
224
+
225
+ require "webrick"
226
+ require "json"
227
+ server = WEBrick::HTTPServer.new Port: 8000
228
+ server.mount_proc ?/ do |req, res|
229
+ # pp req.dup.tap{ |_| _.instance_variable_set "@config", nil }
230
+ # res.status = WEBrick::HTTPStatus::RC_ACCEPTED
231
+ res.body = JSON.dump [req.unparsed_uri, req.header.keys]
232
+ end
233
+ Thread.abort_on_exception = true
234
+ Thread.new{ server.start }
235
+ fail unless JSON.dump(["/", %w{ accept-encoding accept user-agent host connection }]) == NetHTTPUtils.request_data("http://localhost:8000/")
236
+ fail unless JSON.dump(["/?1", %w{ accept-encoding accept user-agent host connection }]) == NetHTTPUtils.request_data("http://localhost:8000/?1")
237
+ fail unless JSON.dump(["/?1=2", %w{ accept-encoding accept user-agent host connection }]) == NetHTTPUtils.request_data("http://localhost:8000/?1=2")
238
+ fail unless JSON.dump(["/?1=3", %w{ accept-encoding accept user-agent host connection }]) == NetHTTPUtils.request_data("http://localhost:8000/?1=2&3=4", form: {1=>3})
239
+ fail unless JSON.dump(["/", %w{ accept-encoding accept user-agent host content-type connection content-length }]) == NetHTTPUtils.request_data("http://localhost:8000/", :post, form: {1=>2})
240
+ server.shutdown
241
+
242
+ fail unless NetHTTPUtils.request_data("http://httpstat.us/200") == "200 OK"
243
+ [400, 404, 500, 503].each do |code|
244
+ begin
245
+ fail NetHTTPUtils.request_data "http://httpstat.us/#{code}"
246
+ rescue NetHTTPUtils::Error => e
247
+ raise unless e.code == code
248
+ end
249
+ end
250
+ fail unless NetHTTPUtils.get_response("http://httpstat.us/400").body == "400 Bad Request"
251
+ fail unless NetHTTPUtils.get_response("http://httpstat.us/404").body == "404 Not Found"
252
+ fail unless NetHTTPUtils.get_response("http://httpstat.us/500").body == "500 Internal Server Error"
253
+ fail unless NetHTTPUtils.get_response("http://httpstat.us/503").body == "503 Service Unavailable"
254
+ %w{
255
+ https://imgur.com/a/cccccc
256
+ https://imgur.com/mM4Dh7Z
257
+ }.each do |url|
258
+ begin
259
+ puts NetHTTPUtils.remove_tags NetHTTPUtils.request_data url
260
+ fail
261
+ rescue NetHTTPUtils::Error => e
262
+ raise unless e.code == 404
263
+ end
264
+ end
265
+ %w{
266
+ http://minus.com/lkP3hgRJd9npi
267
+ http://www.cutehalloweencostumeideas.org/wp-content/uploads/2017/10/Niagara-Falls_04.jpg
268
+ }.each do |url|
269
+ begin
270
+ fail NetHTTPUtils.request_data url
271
+ rescue SocketError => e
272
+ raise unless e.message["getaddrinfo: "]
273
+ end
274
+ end
275
+
276
+ begin
277
+ fail NetHTTPUtils.request_data "https://oi64.tinypic.com/29z7oxs.jpg?", timeout: 5, max_timeout_retry_delay: -1
278
+ rescue Net::OpenTimeout => e
279
+ end
280
+ ## this stopped failing on High Sierra
281
+ # begin
282
+ # # https://www.virtualself.co/?
283
+ # fail NetHTTPUtils.request_data "https://bulletinxp.com/curiosity/strange-weather/?", max_sslerror_retry_delay: -1
284
+ # rescue OpenSSL::SSL::SSLError => e
285
+ # end
286
+
287
+ puts "OK #{__FILE__}"
288
+ end
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "nethttputils"
3
+ spec.version = "0.2.0.0"
4
+ spec.summary = "this tool is like a pet that I adopted young and now I depend on, sorry"
5
+ spec.description = <<-EOF
6
+ Back in 2015 I was a guy automating things at my job and two scripts had a common need --
7
+ they both had to pass the same credentials to Jenkins (via query params, I guess).
8
+
9
+ That common tool with a single method was a Net::HTTP wrapper -- that's where the name from.
10
+ Then when the third script appeared two of them had to pass the Basic Auth.
11
+ The verb POST was added and common logging format, and relatively complex retry logic.
12
+ Then some website had redirects and I had to store cookies, then GZIP and API rate limits...
13
+
14
+ I was not going to gemify this monster but it is now a dependency in many other gems,
15
+ and since Gemfile does not support Github dependencies I have to finally gemify it.
16
+ EOF
17
+
18
+ spec.homepage = "https://github.com/nakilon/nethttputils"
19
+ spec.author = "Victor Maslov aka Nakilon"
20
+ spec.email = "nakilon@gmail.com"
21
+ spec.license = "MIT"
22
+
23
+ spec.require_path = "lib"
24
+ spec.files = `git ls-files -z`.split(?\0) - spec.test_files
25
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nethttputils
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Victor Maslov aka Nakilon
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-05-03 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |2
14
+ Back in 2015 I was a guy automating things at my job and two scripts had a common need --
15
+ they both had to pass the same credentials to Jenkins (via query params, I guess).
16
+
17
+ That common tool with a single method was a Net::HTTP wrapper -- that's where the name from.
18
+ Then when the third script appeared two of them had to pass the Basic Auth.
19
+ The verb POST was added and common logging format, and relatively complex retry logic.
20
+ Then some website had redirects and I had to store cookies, then GZIP and API rate limits...
21
+
22
+ I was not going to gemify this monster but it is now a dependency in many other gems,
23
+ and since Gemfile does not support Github dependencies I have to finally gemify it.
24
+ email: nakilon@gmail.com
25
+ executables: []
26
+ extensions: []
27
+ extra_rdoc_files: []
28
+ files:
29
+ - LICENSE
30
+ - Rakefile
31
+ - lib/nethttputils.rb
32
+ - nethttputils.gemspec
33
+ homepage: https://github.com/nakilon/nethttputils
34
+ licenses:
35
+ - MIT
36
+ metadata: {}
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 2.5.2
54
+ signing_key:
55
+ specification_version: 4
56
+ summary: this tool is like a pet that I adopted young and now I depend on, sorry
57
+ test_files: []