sinew 2.0.2 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +26 -0
- data/.rubocop.yml +9 -6
- data/.vscode/settings.json +0 -10
- data/Gemfile +9 -0
- data/README.md +62 -54
- data/Rakefile +33 -18
- data/bin/sinew +2 -0
- data/lib/sinew.rb +0 -1
- data/lib/sinew/connection.rb +52 -0
- data/lib/sinew/connection/log_formatter.rb +22 -0
- data/lib/sinew/connection/rate_limit.rb +29 -0
- data/lib/sinew/core_ext.rb +1 -1
- data/lib/sinew/dsl.rb +10 -6
- data/lib/sinew/main.rb +29 -56
- data/lib/sinew/output.rb +7 -16
- data/lib/sinew/request.rb +22 -87
- data/lib/sinew/response.rb +8 -57
- data/lib/sinew/runtime_options.rb +4 -4
- data/lib/sinew/version.rb +1 -1
- data/sample.sinew +2 -2
- data/sinew.gemspec +16 -18
- metadata +38 -110
- data/.travis.yml +0 -4
- data/lib/sinew/cache.rb +0 -79
- data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
- data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
- data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
- data/test/legacy/eu.httpbin.org/status,500 +0 -1
- data/test/legacy/legacy.sinew +0 -2
- data/test/recipes/array_header.sinew +0 -6
- data/test/recipes/basic.sinew +0 -8
- data/test/recipes/dups.sinew +0 -7
- data/test/recipes/implicit_header.sinew +0 -5
- data/test/recipes/limit.sinew +0 -11
- data/test/recipes/noko.sinew +0 -9
- data/test/recipes/uri.sinew +0 -11
- data/test/recipes/xml.sinew +0 -8
- data/test/test.html +0 -45
- data/test/test_cache.rb +0 -69
- data/test/test_helper.rb +0 -123
- data/test/test_legacy.rb +0 -23
- data/test/test_main.rb +0 -34
- data/test/test_nokogiri_ext.rb +0 -18
- data/test/test_output.rb +0 -56
- data/test/test_recipes.rb +0 -60
- data/test/test_requests.rb +0 -135
- data/test/test_utf8.rb +0 -39
@@ -0,0 +1,29 @@
|
|
1
|
+
module Sinew
|
2
|
+
module Connection
|
3
|
+
class RateLimit < Faraday::Middleware
|
4
|
+
attr_reader :rate_limit
|
5
|
+
|
6
|
+
def initialize(app, options = {})
|
7
|
+
super(app)
|
8
|
+
|
9
|
+
@last_request_tm = @current_request_tm = nil
|
10
|
+
@rate_limit = options.fetch(:rate_limit, 1)
|
11
|
+
end
|
12
|
+
|
13
|
+
def on_request(_env)
|
14
|
+
if @last_request_tm
|
15
|
+
sleep = (@last_request_tm + rate_limit) - Time.now
|
16
|
+
sleep(sleep) if sleep > 0
|
17
|
+
end
|
18
|
+
|
19
|
+
@current_request_tm = Time.now
|
20
|
+
end
|
21
|
+
|
22
|
+
def on_complete(env)
|
23
|
+
# Only rate limit on uncached requests
|
24
|
+
@last_request_tm = @current_request_tm unless env[:httpdisk]
|
25
|
+
@current_request_tm = nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/sinew/core_ext.rb
CHANGED
data/lib/sinew/dsl.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require '
|
1
|
+
require 'amazing_print'
|
2
2
|
require 'cgi'
|
3
|
+
require 'json'
|
3
4
|
|
4
5
|
#
|
5
6
|
# The DSL available to .sinew files.
|
@@ -10,7 +11,7 @@ module Sinew
|
|
10
11
|
# this is used to break out of --limit
|
11
12
|
class LimitError < StandardError; end
|
12
13
|
|
13
|
-
attr_reader :sinew, :raw, :
|
14
|
+
attr_reader :sinew, :uri, :raw, :code, :elapsed
|
14
15
|
|
15
16
|
def initialize(sinew)
|
16
17
|
@sinew = sinew
|
@@ -52,14 +53,17 @@ module Sinew
|
|
52
53
|
end
|
53
54
|
|
54
55
|
def http(method, url, options = {})
|
55
|
-
#
|
56
|
-
|
57
|
-
instance_variable_set(i, nil)
|
56
|
+
# these need to be cleared before each request
|
57
|
+
%i[@html @noko @xml @json].each do |i|
|
58
|
+
instance_variable_set(i, nil)
|
58
59
|
end
|
59
60
|
|
60
61
|
# fetch and make response available to callers
|
61
62
|
response = sinew.http(method, url, options)
|
62
|
-
@uri, @raw = response.uri, response.body
|
63
|
+
@uri, @raw, @code = response.uri, response.body, response.code
|
64
|
+
|
65
|
+
# don't confuse the user
|
66
|
+
nil
|
63
67
|
end
|
64
68
|
|
65
69
|
#
|
data/lib/sinew/main.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'scripto'
|
2
|
+
require 'sinew/connection'
|
2
3
|
|
3
4
|
#
|
4
5
|
# Main sinew entry point.
|
@@ -6,21 +7,13 @@ require 'scripto'
|
|
6
7
|
|
7
8
|
module Sinew
|
8
9
|
class Main < Scripto::Main
|
9
|
-
attr_reader :runtime_options
|
10
|
+
attr_reader :runtime_options
|
10
11
|
|
11
12
|
def initialize(options)
|
12
13
|
super(options)
|
13
14
|
|
14
15
|
# init
|
15
16
|
@runtime_options = RuntimeOptions.new
|
16
|
-
@request_tm = Time.at(0)
|
17
|
-
@request_count = 0
|
18
|
-
|
19
|
-
if options[:proxy]
|
20
|
-
addr, port = options[:proxy].split(':')
|
21
|
-
runtime_options.httparty_options[:http_proxyaddr] = addr
|
22
|
-
runtime_options.httparty_options[:http_proxyport] = port || 80
|
23
|
-
end
|
24
17
|
end
|
25
18
|
|
26
19
|
def run
|
@@ -37,24 +30,12 @@ module Sinew
|
|
37
30
|
end
|
38
31
|
|
39
32
|
#
|
40
|
-
# http requests
|
33
|
+
# http requests
|
41
34
|
#
|
42
35
|
|
43
|
-
def cache
|
44
|
-
@cache ||= Cache.new(self)
|
45
|
-
end
|
46
|
-
|
47
36
|
def http(method, url, options = {})
|
48
|
-
request = Request.new(
|
49
|
-
|
50
|
-
# try to get from cache
|
51
|
-
response = cache.get(request)
|
52
|
-
|
53
|
-
# perform if necessary
|
54
|
-
if !response
|
55
|
-
response = perform(request)
|
56
|
-
cache.set(response)
|
57
|
-
end
|
37
|
+
request = Request.new(method, url, request_options(options))
|
38
|
+
response = request.perform(connection)
|
58
39
|
|
59
40
|
# always log error messages
|
60
41
|
if response.error?
|
@@ -64,26 +45,10 @@ module Sinew
|
|
64
45
|
response
|
65
46
|
end
|
66
47
|
|
67
|
-
def
|
68
|
-
|
69
|
-
|
70
|
-
response = nil
|
71
|
-
|
72
|
-
tries = runtime_options.retries + 1
|
73
|
-
while tries > 0
|
74
|
-
tries -= 1
|
75
|
-
begin
|
76
|
-
@request_count += 1
|
77
|
-
response = request.perform
|
78
|
-
rescue Timeout::Error
|
79
|
-
response = Response.from_timeout(request)
|
80
|
-
end
|
81
|
-
break if !response.error_500?
|
82
|
-
end
|
83
|
-
|
84
|
-
response
|
48
|
+
def connection
|
49
|
+
@connection ||= Connection.create(options: options, runtime_options: runtime_options)
|
85
50
|
end
|
86
|
-
protected :
|
51
|
+
protected :connection
|
87
52
|
|
88
53
|
#
|
89
54
|
# output
|
@@ -97,23 +62,31 @@ module Sinew
|
|
97
62
|
# helpers
|
98
63
|
#
|
99
64
|
|
100
|
-
def
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
"req #{request.uri}"
|
65
|
+
def request_options(options)
|
66
|
+
options.dup.tap do |req|
|
67
|
+
req[:headers] = {}.tap do |h|
|
68
|
+
[ runtime_options.headers, options[:headers]].each do
|
69
|
+
h.merge!(_1) if _1
|
70
|
+
end
|
107
71
|
end
|
108
|
-
|
72
|
+
req[:proxy] = random_proxy
|
73
|
+
end
|
74
|
+
end
|
75
|
+
protected :request_options
|
76
|
+
|
77
|
+
PROXY_RE = /\A#{URI::PATTERN::HOST}(:\d+)?\Z/.freeze
|
78
|
+
|
79
|
+
def random_proxy
|
80
|
+
return if !options[:proxy]
|
81
|
+
|
82
|
+
proxy = options[:proxy].split(',').sample
|
83
|
+
if proxy !~ PROXY_RE
|
84
|
+
raise ArgumentError, "invalid proxy #{proxy.inspect}, should be host[:port]"
|
109
85
|
end
|
110
86
|
|
111
|
-
|
112
|
-
sleep = (request_tm + runtime_options.rate_limit) - Time.now
|
113
|
-
sleep(sleep) if sleep > 0
|
114
|
-
@request_tm = Time.now
|
87
|
+
"http://#{proxy}"
|
115
88
|
end
|
116
|
-
protected :
|
89
|
+
protected :random_proxy
|
117
90
|
|
118
91
|
def footer
|
119
92
|
output.report
|
data/lib/sinew/output.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'csv'
|
2
2
|
require 'set'
|
3
|
-
require '
|
3
|
+
require 'sterile'
|
4
4
|
|
5
5
|
#
|
6
6
|
# CSV output.
|
@@ -45,6 +45,7 @@ module Sinew
|
|
45
45
|
|
46
46
|
# don't allow duplicate urls
|
47
47
|
return if dup_url?(row)
|
48
|
+
|
48
49
|
rows << row.dup
|
49
50
|
|
50
51
|
# map columns to row, and normalize along the way
|
@@ -101,24 +102,14 @@ module Sinew
|
|
101
102
|
# strip html tags. Note that we replace tags with spaces
|
102
103
|
s = s.gsub(/<[^>]+>/, ' ')
|
103
104
|
|
104
|
-
#
|
105
|
-
# Below uses stringex
|
106
|
-
#
|
107
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/string_extensions.rb
|
108
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/localization/conversion_expressions.rb
|
109
|
-
#
|
110
|
-
|
111
105
|
# Converts MS Word 'smart punctuation' to ASCII
|
112
|
-
s = s
|
113
|
-
|
114
|
-
# "á".convert_accented_html_entities # => "a"
|
115
|
-
s = s.convert_accented_html_entities
|
106
|
+
s = Sterile.plain_format(s)
|
116
107
|
|
117
|
-
# &
|
118
|
-
s = s
|
108
|
+
# á & etc.
|
109
|
+
s = Sterile.decode_entities(s)
|
119
110
|
|
120
|
-
#
|
121
|
-
s = s
|
111
|
+
# "šţɽĩɳģ" => "string"
|
112
|
+
s = Sterile.transliterate(s)
|
122
113
|
|
123
114
|
# squish
|
124
115
|
s = s.squish
|
data/lib/sinew/request.rb
CHANGED
@@ -1,61 +1,57 @@
|
|
1
|
-
require '
|
2
|
-
require 'httparty'
|
3
|
-
require 'htmlentities'
|
1
|
+
require 'sterile'
|
4
2
|
|
5
3
|
#
|
6
|
-
# Process a single HTTP request.
|
4
|
+
# Process a single HTTP request.
|
7
5
|
#
|
8
6
|
|
9
7
|
module Sinew
|
10
8
|
class Error < StandardError; end
|
11
9
|
|
12
10
|
class Request
|
13
|
-
HTML_ENTITIES = HTMLEntities.new
|
14
11
|
VALID_METHODS = %w[get post patch put delete head options].freeze
|
12
|
+
METHODS_WITH_BODY = %w[patch post put].freeze
|
15
13
|
|
16
|
-
attr_reader :
|
14
|
+
attr_reader :method, :options, :uri
|
17
15
|
|
18
|
-
#
|
19
|
-
|
20
|
-
|
16
|
+
# Supported options:
|
17
|
+
# body: Body of http post
|
18
|
+
# headers: Hash of HTTP headers (combined with runtime_options.headers)
|
19
|
+
# query: Hash of query parameters to add to url
|
20
|
+
def initialize(method, url, options = {})
|
21
21
|
@method = method
|
22
22
|
@options = options.dup
|
23
23
|
@uri = parse_url(url)
|
24
|
-
@cache_key = calculate_cache_key
|
25
24
|
end
|
26
25
|
|
27
26
|
# run the request, return the result
|
28
|
-
def perform
|
27
|
+
def perform(connection)
|
29
28
|
validate!
|
30
29
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
headers = headers.merge(options[:headers]) if options[:headers]
|
37
|
-
options[:headers] = headers
|
30
|
+
body = options.delete(:body)
|
31
|
+
fday_response = connection.send(method, uri, body) do
|
32
|
+
_1.headers.update(options[:headers]) if options[:headers]
|
33
|
+
_1.options[:proxy] = options[:proxy]
|
34
|
+
end
|
38
35
|
|
39
|
-
|
40
|
-
Response.from_network(self, party_response)
|
36
|
+
Response.from_network(self, fday_response)
|
41
37
|
end
|
42
38
|
|
43
39
|
# We accept sloppy urls and attempt to clean them up
|
44
40
|
def parse_url(url)
|
45
|
-
s = url
|
41
|
+
s = url.to_s
|
46
42
|
|
47
43
|
# remove entities
|
48
|
-
s =
|
44
|
+
s = Sterile.decode_entities(s)
|
49
45
|
|
50
46
|
# fix a couple of common encoding bugs
|
51
47
|
s = s.gsub(' ', '%20')
|
52
48
|
s = s.gsub("'", '%27')
|
53
49
|
|
54
|
-
# append query manually (instead of letting
|
55
|
-
#
|
50
|
+
# append query manually (instead of letting Faraday handle it) for consistent
|
51
|
+
# Request#uri and Response#uri
|
56
52
|
query = options.delete(:query)
|
57
53
|
if query.present?
|
58
|
-
q =
|
54
|
+
q = Faraday::Utils.default_params_encoder.encode(query)
|
59
55
|
separator = s.include?('?') ? '&' : '?'
|
60
56
|
s = "#{s}#{separator}#{q}"
|
61
57
|
end
|
@@ -64,44 +60,10 @@ module Sinew
|
|
64
60
|
end
|
65
61
|
protected :parse_url
|
66
62
|
|
67
|
-
def calculate_cache_key
|
68
|
-
dir = pathify(uri.host)
|
69
|
-
|
70
|
-
body_key = if body.is_a?(Hash)
|
71
|
-
HTTParty::HashConversions.to_params(body)
|
72
|
-
else
|
73
|
-
body&.dup
|
74
|
-
end
|
75
|
-
|
76
|
-
# build key, as a hash for before_generate_cache_key
|
77
|
-
key = {
|
78
|
-
method: method.dup,
|
79
|
-
path: uri.path,
|
80
|
-
query: uri.query,
|
81
|
-
body: body_key,
|
82
|
-
}
|
83
|
-
key = sinew.runtime_options.before_generate_cache_key.call(key)
|
84
|
-
|
85
|
-
# strip method for gets
|
86
|
-
key.delete(:method) if key[:method] == 'get'
|
87
|
-
|
88
|
-
# pull out the values, join and pathify
|
89
|
-
path = key.values.select(&:present?).join(',')
|
90
|
-
path = pathify(path)
|
91
|
-
|
92
|
-
# shorten long paths
|
93
|
-
if path.length > 250
|
94
|
-
path = Digest::MD5.hexdigest(path)
|
95
|
-
end
|
96
|
-
|
97
|
-
"#{dir}/#{path}"
|
98
|
-
end
|
99
|
-
protected :calculate_cache_key
|
100
|
-
|
101
63
|
def validate!
|
102
64
|
raise "invalid method #{method}" if !VALID_METHODS.include?(method)
|
103
65
|
raise "invalid url #{uri}" if uri.scheme !~ /^http/
|
104
|
-
raise "can't
|
66
|
+
raise "can't #{method} with a body" if body && !METHODS_WITH_BODY.include?(method)
|
105
67
|
raise "Content-Type doesn't make sense without a body" if content_type && !body
|
106
68
|
end
|
107
69
|
protected :validate!
|
@@ -120,32 +82,5 @@ module Sinew
|
|
120
82
|
headers && headers['Content-Type']
|
121
83
|
end
|
122
84
|
protected :content_type
|
123
|
-
|
124
|
-
def form?
|
125
|
-
content_type == 'application/x-www-form-urlencoded'
|
126
|
-
end
|
127
|
-
protected :form?
|
128
|
-
|
129
|
-
def pathify(s)
|
130
|
-
# remove leading slash
|
131
|
-
s = s.gsub(/^\//, '')
|
132
|
-
# .. => comma
|
133
|
-
s = s.gsub('..', ',')
|
134
|
-
# query separators => comma
|
135
|
-
s = s.gsub(/[?\/&]/, ',')
|
136
|
-
# ,, => comma
|
137
|
-
s = s.gsub(',,', ',')
|
138
|
-
# encode invalid path chars
|
139
|
-
s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
|
140
|
-
hex = i.unpack('H2').first
|
141
|
-
"%#{hex}"
|
142
|
-
end
|
143
|
-
# handle empty case
|
144
|
-
s = '_root_' if s.blank?
|
145
|
-
# always downcase
|
146
|
-
s = s.downcase
|
147
|
-
s
|
148
|
-
end
|
149
|
-
protected :pathify
|
150
85
|
end
|
151
86
|
end
|
data/lib/sinew/response.rb
CHANGED
@@ -2,7 +2,7 @@ require 'stringio'
|
|
2
2
|
require 'zlib'
|
3
3
|
|
4
4
|
#
|
5
|
-
# An HTTP response.
|
5
|
+
# An HTTP response.
|
6
6
|
#
|
7
7
|
|
8
8
|
module Sinew
|
@@ -13,62 +13,13 @@ module Sinew
|
|
13
13
|
# factory methods
|
14
14
|
#
|
15
15
|
|
16
|
-
def self.from_network(request,
|
17
|
-
Response.new.tap do
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.from_cache(request, body, head)
|
27
|
-
Response.new.tap do |response|
|
28
|
-
response.request = request
|
29
|
-
response.body = body
|
30
|
-
|
31
|
-
# defaults
|
32
|
-
response.uri = request.uri
|
33
|
-
response.code = 200
|
34
|
-
response.headers = {}
|
35
|
-
|
36
|
-
# overwrite with cached response headers
|
37
|
-
if head
|
38
|
-
if head !~ /^{/
|
39
|
-
return from_legacy_head(response, head)
|
40
|
-
end
|
41
|
-
head = JSON.parse(head, symbolize_names: true)
|
42
|
-
response.uri = URI.parse(head[:uri])
|
43
|
-
response.code = head[:code]
|
44
|
-
response.headers = head[:headers]
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.from_timeout(request)
|
50
|
-
Response.new.tap do |response|
|
51
|
-
response.request = request
|
52
|
-
response.uri = request.uri
|
53
|
-
response.body = 'timeout'
|
54
|
-
response.code = 999
|
55
|
-
response.headers = {}
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
def self.from_legacy_head(response, head)
|
60
|
-
response.tap do |r|
|
61
|
-
case head
|
62
|
-
when /\ACURLER_ERROR/
|
63
|
-
# error
|
64
|
-
r.code = 999
|
65
|
-
when /\AHTTP/
|
66
|
-
# redirect
|
67
|
-
location = head.scan(/Location: ([^\r\n]+)/).flatten.last
|
68
|
-
r.uri += location
|
69
|
-
else
|
70
|
-
$stderr.puts "unknown cached /head for #{r.uri}"
|
71
|
-
end
|
16
|
+
def self.from_network(request, fday_response)
|
17
|
+
Response.new.tap do
|
18
|
+
_1.request = request
|
19
|
+
_1.uri = fday_response.env.url
|
20
|
+
_1.code = fday_response.status
|
21
|
+
_1.headers = fday_response.headers.to_h
|
22
|
+
_1.body = process_body(fday_response)
|
72
23
|
end
|
73
24
|
end
|
74
25
|
|