sinew 2.0.2 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +26 -0
- data/.rubocop.yml +9 -6
- data/.vscode/settings.json +0 -10
- data/Gemfile +9 -0
- data/README.md +62 -54
- data/Rakefile +33 -18
- data/bin/sinew +2 -0
- data/lib/sinew.rb +0 -1
- data/lib/sinew/connection.rb +52 -0
- data/lib/sinew/connection/log_formatter.rb +22 -0
- data/lib/sinew/connection/rate_limit.rb +29 -0
- data/lib/sinew/core_ext.rb +1 -1
- data/lib/sinew/dsl.rb +10 -6
- data/lib/sinew/main.rb +29 -56
- data/lib/sinew/output.rb +7 -16
- data/lib/sinew/request.rb +22 -87
- data/lib/sinew/response.rb +8 -57
- data/lib/sinew/runtime_options.rb +4 -4
- data/lib/sinew/version.rb +1 -1
- data/sample.sinew +2 -2
- data/sinew.gemspec +16 -18
- metadata +38 -110
- data/.travis.yml +0 -4
- data/lib/sinew/cache.rb +0 -79
- data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
- data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
- data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
- data/test/legacy/eu.httpbin.org/status,500 +0 -1
- data/test/legacy/legacy.sinew +0 -2
- data/test/recipes/array_header.sinew +0 -6
- data/test/recipes/basic.sinew +0 -8
- data/test/recipes/dups.sinew +0 -7
- data/test/recipes/implicit_header.sinew +0 -5
- data/test/recipes/limit.sinew +0 -11
- data/test/recipes/noko.sinew +0 -9
- data/test/recipes/uri.sinew +0 -11
- data/test/recipes/xml.sinew +0 -8
- data/test/test.html +0 -45
- data/test/test_cache.rb +0 -69
- data/test/test_helper.rb +0 -123
- data/test/test_legacy.rb +0 -23
- data/test/test_main.rb +0 -34
- data/test/test_nokogiri_ext.rb +0 -18
- data/test/test_output.rb +0 -56
- data/test/test_recipes.rb +0 -60
- data/test/test_requests.rb +0 -135
- data/test/test_utf8.rb +0 -39
@@ -0,0 +1,29 @@
|
|
1
|
+
module Sinew
|
2
|
+
module Connection
|
3
|
+
class RateLimit < Faraday::Middleware
|
4
|
+
attr_reader :rate_limit
|
5
|
+
|
6
|
+
def initialize(app, options = {})
|
7
|
+
super(app)
|
8
|
+
|
9
|
+
@last_request_tm = @current_request_tm = nil
|
10
|
+
@rate_limit = options.fetch(:rate_limit, 1)
|
11
|
+
end
|
12
|
+
|
13
|
+
def on_request(_env)
|
14
|
+
if @last_request_tm
|
15
|
+
sleep = (@last_request_tm + rate_limit) - Time.now
|
16
|
+
sleep(sleep) if sleep > 0
|
17
|
+
end
|
18
|
+
|
19
|
+
@current_request_tm = Time.now
|
20
|
+
end
|
21
|
+
|
22
|
+
def on_complete(env)
|
23
|
+
# Only rate limit on uncached requests
|
24
|
+
@last_request_tm = @current_request_tm unless env[:httpdisk]
|
25
|
+
@current_request_tm = nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/sinew/core_ext.rb
CHANGED
data/lib/sinew/dsl.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require '
|
1
|
+
require 'amazing_print'
|
2
2
|
require 'cgi'
|
3
|
+
require 'json'
|
3
4
|
|
4
5
|
#
|
5
6
|
# The DSL available to .sinew files.
|
@@ -10,7 +11,7 @@ module Sinew
|
|
10
11
|
# this is used to break out of --limit
|
11
12
|
class LimitError < StandardError; end
|
12
13
|
|
13
|
-
attr_reader :sinew, :raw, :
|
14
|
+
attr_reader :sinew, :uri, :raw, :code, :elapsed
|
14
15
|
|
15
16
|
def initialize(sinew)
|
16
17
|
@sinew = sinew
|
@@ -52,14 +53,17 @@ module Sinew
|
|
52
53
|
end
|
53
54
|
|
54
55
|
def http(method, url, options = {})
|
55
|
-
#
|
56
|
-
|
57
|
-
instance_variable_set(i, nil)
|
56
|
+
# these need to be cleared before each request
|
57
|
+
%i[@html @noko @xml @json].each do |i|
|
58
|
+
instance_variable_set(i, nil)
|
58
59
|
end
|
59
60
|
|
60
61
|
# fetch and make response available to callers
|
61
62
|
response = sinew.http(method, url, options)
|
62
|
-
@uri, @raw = response.uri, response.body
|
63
|
+
@uri, @raw, @code = response.uri, response.body, response.code
|
64
|
+
|
65
|
+
# don't confuse the user
|
66
|
+
nil
|
63
67
|
end
|
64
68
|
|
65
69
|
#
|
data/lib/sinew/main.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'scripto'
|
2
|
+
require 'sinew/connection'
|
2
3
|
|
3
4
|
#
|
4
5
|
# Main sinew entry point.
|
@@ -6,21 +7,13 @@ require 'scripto'
|
|
6
7
|
|
7
8
|
module Sinew
|
8
9
|
class Main < Scripto::Main
|
9
|
-
attr_reader :runtime_options
|
10
|
+
attr_reader :runtime_options
|
10
11
|
|
11
12
|
def initialize(options)
|
12
13
|
super(options)
|
13
14
|
|
14
15
|
# init
|
15
16
|
@runtime_options = RuntimeOptions.new
|
16
|
-
@request_tm = Time.at(0)
|
17
|
-
@request_count = 0
|
18
|
-
|
19
|
-
if options[:proxy]
|
20
|
-
addr, port = options[:proxy].split(':')
|
21
|
-
runtime_options.httparty_options[:http_proxyaddr] = addr
|
22
|
-
runtime_options.httparty_options[:http_proxyport] = port || 80
|
23
|
-
end
|
24
17
|
end
|
25
18
|
|
26
19
|
def run
|
@@ -37,24 +30,12 @@ module Sinew
|
|
37
30
|
end
|
38
31
|
|
39
32
|
#
|
40
|
-
# http requests
|
33
|
+
# http requests
|
41
34
|
#
|
42
35
|
|
43
|
-
def cache
|
44
|
-
@cache ||= Cache.new(self)
|
45
|
-
end
|
46
|
-
|
47
36
|
def http(method, url, options = {})
|
48
|
-
request = Request.new(
|
49
|
-
|
50
|
-
# try to get from cache
|
51
|
-
response = cache.get(request)
|
52
|
-
|
53
|
-
# perform if necessary
|
54
|
-
if !response
|
55
|
-
response = perform(request)
|
56
|
-
cache.set(response)
|
57
|
-
end
|
37
|
+
request = Request.new(method, url, request_options(options))
|
38
|
+
response = request.perform(connection)
|
58
39
|
|
59
40
|
# always log error messages
|
60
41
|
if response.error?
|
@@ -64,26 +45,10 @@ module Sinew
|
|
64
45
|
response
|
65
46
|
end
|
66
47
|
|
67
|
-
def
|
68
|
-
|
69
|
-
|
70
|
-
response = nil
|
71
|
-
|
72
|
-
tries = runtime_options.retries + 1
|
73
|
-
while tries > 0
|
74
|
-
tries -= 1
|
75
|
-
begin
|
76
|
-
@request_count += 1
|
77
|
-
response = request.perform
|
78
|
-
rescue Timeout::Error
|
79
|
-
response = Response.from_timeout(request)
|
80
|
-
end
|
81
|
-
break if !response.error_500?
|
82
|
-
end
|
83
|
-
|
84
|
-
response
|
48
|
+
def connection
|
49
|
+
@connection ||= Connection.create(options: options, runtime_options: runtime_options)
|
85
50
|
end
|
86
|
-
protected :
|
51
|
+
protected :connection
|
87
52
|
|
88
53
|
#
|
89
54
|
# output
|
@@ -97,23 +62,31 @@ module Sinew
|
|
97
62
|
# helpers
|
98
63
|
#
|
99
64
|
|
100
|
-
def
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
"req #{request.uri}"
|
65
|
+
def request_options(options)
|
66
|
+
options.dup.tap do |req|
|
67
|
+
req[:headers] = {}.tap do |h|
|
68
|
+
[ runtime_options.headers, options[:headers]].each do
|
69
|
+
h.merge!(_1) if _1
|
70
|
+
end
|
107
71
|
end
|
108
|
-
|
72
|
+
req[:proxy] = random_proxy
|
73
|
+
end
|
74
|
+
end
|
75
|
+
protected :request_options
|
76
|
+
|
77
|
+
PROXY_RE = /\A#{URI::PATTERN::HOST}(:\d+)?\Z/.freeze
|
78
|
+
|
79
|
+
def random_proxy
|
80
|
+
return if !options[:proxy]
|
81
|
+
|
82
|
+
proxy = options[:proxy].split(',').sample
|
83
|
+
if proxy !~ PROXY_RE
|
84
|
+
raise ArgumentError, "invalid proxy #{proxy.inspect}, should be host[:port]"
|
109
85
|
end
|
110
86
|
|
111
|
-
|
112
|
-
sleep = (request_tm + runtime_options.rate_limit) - Time.now
|
113
|
-
sleep(sleep) if sleep > 0
|
114
|
-
@request_tm = Time.now
|
87
|
+
"http://#{proxy}"
|
115
88
|
end
|
116
|
-
protected :
|
89
|
+
protected :random_proxy
|
117
90
|
|
118
91
|
def footer
|
119
92
|
output.report
|
data/lib/sinew/output.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'csv'
|
2
2
|
require 'set'
|
3
|
-
require '
|
3
|
+
require 'sterile'
|
4
4
|
|
5
5
|
#
|
6
6
|
# CSV output.
|
@@ -45,6 +45,7 @@ module Sinew
|
|
45
45
|
|
46
46
|
# don't allow duplicate urls
|
47
47
|
return if dup_url?(row)
|
48
|
+
|
48
49
|
rows << row.dup
|
49
50
|
|
50
51
|
# map columns to row, and normalize along the way
|
@@ -101,24 +102,14 @@ module Sinew
|
|
101
102
|
# strip html tags. Note that we replace tags with spaces
|
102
103
|
s = s.gsub(/<[^>]+>/, ' ')
|
103
104
|
|
104
|
-
#
|
105
|
-
# Below uses stringex
|
106
|
-
#
|
107
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/string_extensions.rb
|
108
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/localization/conversion_expressions.rb
|
109
|
-
#
|
110
|
-
|
111
105
|
# Converts MS Word 'smart punctuation' to ASCII
|
112
|
-
s = s
|
113
|
-
|
114
|
-
# "á".convert_accented_html_entities # => "a"
|
115
|
-
s = s.convert_accented_html_entities
|
106
|
+
s = Sterile.plain_format(s)
|
116
107
|
|
117
|
-
# &
|
118
|
-
s = s
|
108
|
+
# á & etc.
|
109
|
+
s = Sterile.decode_entities(s)
|
119
110
|
|
120
|
-
#
|
121
|
-
s = s
|
111
|
+
# "šţɽĩɳģ" => "string"
|
112
|
+
s = Sterile.transliterate(s)
|
122
113
|
|
123
114
|
# squish
|
124
115
|
s = s.squish
|
data/lib/sinew/request.rb
CHANGED
@@ -1,61 +1,57 @@
|
|
1
|
-
require '
|
2
|
-
require 'httparty'
|
3
|
-
require 'htmlentities'
|
1
|
+
require 'sterile'
|
4
2
|
|
5
3
|
#
|
6
|
-
# Process a single HTTP request.
|
4
|
+
# Process a single HTTP request.
|
7
5
|
#
|
8
6
|
|
9
7
|
module Sinew
|
10
8
|
class Error < StandardError; end
|
11
9
|
|
12
10
|
class Request
|
13
|
-
HTML_ENTITIES = HTMLEntities.new
|
14
11
|
VALID_METHODS = %w[get post patch put delete head options].freeze
|
12
|
+
METHODS_WITH_BODY = %w[patch post put].freeze
|
15
13
|
|
16
|
-
attr_reader :
|
14
|
+
attr_reader :method, :options, :uri
|
17
15
|
|
18
|
-
#
|
19
|
-
|
20
|
-
|
16
|
+
# Supported options:
|
17
|
+
# body: Body of http post
|
18
|
+
# headers: Hash of HTTP headers (combined with runtime_options.headers)
|
19
|
+
# query: Hash of query parameters to add to url
|
20
|
+
def initialize(method, url, options = {})
|
21
21
|
@method = method
|
22
22
|
@options = options.dup
|
23
23
|
@uri = parse_url(url)
|
24
|
-
@cache_key = calculate_cache_key
|
25
24
|
end
|
26
25
|
|
27
26
|
# run the request, return the result
|
28
|
-
def perform
|
27
|
+
def perform(connection)
|
29
28
|
validate!
|
30
29
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
headers = headers.merge(options[:headers]) if options[:headers]
|
37
|
-
options[:headers] = headers
|
30
|
+
body = options.delete(:body)
|
31
|
+
fday_response = connection.send(method, uri, body) do
|
32
|
+
_1.headers.update(options[:headers]) if options[:headers]
|
33
|
+
_1.options[:proxy] = options[:proxy]
|
34
|
+
end
|
38
35
|
|
39
|
-
|
40
|
-
Response.from_network(self, party_response)
|
36
|
+
Response.from_network(self, fday_response)
|
41
37
|
end
|
42
38
|
|
43
39
|
# We accept sloppy urls and attempt to clean them up
|
44
40
|
def parse_url(url)
|
45
|
-
s = url
|
41
|
+
s = url.to_s
|
46
42
|
|
47
43
|
# remove entities
|
48
|
-
s =
|
44
|
+
s = Sterile.decode_entities(s)
|
49
45
|
|
50
46
|
# fix a couple of common encoding bugs
|
51
47
|
s = s.gsub(' ', '%20')
|
52
48
|
s = s.gsub("'", '%27')
|
53
49
|
|
54
|
-
# append query manually (instead of letting
|
55
|
-
#
|
50
|
+
# append query manually (instead of letting Faraday handle it) for consistent
|
51
|
+
# Request#uri and Response#uri
|
56
52
|
query = options.delete(:query)
|
57
53
|
if query.present?
|
58
|
-
q =
|
54
|
+
q = Faraday::Utils.default_params_encoder.encode(query)
|
59
55
|
separator = s.include?('?') ? '&' : '?'
|
60
56
|
s = "#{s}#{separator}#{q}"
|
61
57
|
end
|
@@ -64,44 +60,10 @@ module Sinew
|
|
64
60
|
end
|
65
61
|
protected :parse_url
|
66
62
|
|
67
|
-
def calculate_cache_key
|
68
|
-
dir = pathify(uri.host)
|
69
|
-
|
70
|
-
body_key = if body.is_a?(Hash)
|
71
|
-
HTTParty::HashConversions.to_params(body)
|
72
|
-
else
|
73
|
-
body&.dup
|
74
|
-
end
|
75
|
-
|
76
|
-
# build key, as a hash for before_generate_cache_key
|
77
|
-
key = {
|
78
|
-
method: method.dup,
|
79
|
-
path: uri.path,
|
80
|
-
query: uri.query,
|
81
|
-
body: body_key,
|
82
|
-
}
|
83
|
-
key = sinew.runtime_options.before_generate_cache_key.call(key)
|
84
|
-
|
85
|
-
# strip method for gets
|
86
|
-
key.delete(:method) if key[:method] == 'get'
|
87
|
-
|
88
|
-
# pull out the values, join and pathify
|
89
|
-
path = key.values.select(&:present?).join(',')
|
90
|
-
path = pathify(path)
|
91
|
-
|
92
|
-
# shorten long paths
|
93
|
-
if path.length > 250
|
94
|
-
path = Digest::MD5.hexdigest(path)
|
95
|
-
end
|
96
|
-
|
97
|
-
"#{dir}/#{path}"
|
98
|
-
end
|
99
|
-
protected :calculate_cache_key
|
100
|
-
|
101
63
|
def validate!
|
102
64
|
raise "invalid method #{method}" if !VALID_METHODS.include?(method)
|
103
65
|
raise "invalid url #{uri}" if uri.scheme !~ /^http/
|
104
|
-
raise "can't
|
66
|
+
raise "can't #{method} with a body" if body && !METHODS_WITH_BODY.include?(method)
|
105
67
|
raise "Content-Type doesn't make sense without a body" if content_type && !body
|
106
68
|
end
|
107
69
|
protected :validate!
|
@@ -120,32 +82,5 @@ module Sinew
|
|
120
82
|
headers && headers['Content-Type']
|
121
83
|
end
|
122
84
|
protected :content_type
|
123
|
-
|
124
|
-
def form?
|
125
|
-
content_type == 'application/x-www-form-urlencoded'
|
126
|
-
end
|
127
|
-
protected :form?
|
128
|
-
|
129
|
-
def pathify(s)
|
130
|
-
# remove leading slash
|
131
|
-
s = s.gsub(/^\//, '')
|
132
|
-
# .. => comma
|
133
|
-
s = s.gsub('..', ',')
|
134
|
-
# query separators => comma
|
135
|
-
s = s.gsub(/[?\/&]/, ',')
|
136
|
-
# ,, => comma
|
137
|
-
s = s.gsub(',,', ',')
|
138
|
-
# encode invalid path chars
|
139
|
-
s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
|
140
|
-
hex = i.unpack('H2').first
|
141
|
-
"%#{hex}"
|
142
|
-
end
|
143
|
-
# handle empty case
|
144
|
-
s = '_root_' if s.blank?
|
145
|
-
# always downcase
|
146
|
-
s = s.downcase
|
147
|
-
s
|
148
|
-
end
|
149
|
-
protected :pathify
|
150
85
|
end
|
151
86
|
end
|
data/lib/sinew/response.rb
CHANGED
@@ -2,7 +2,7 @@ require 'stringio'
|
|
2
2
|
require 'zlib'
|
3
3
|
|
4
4
|
#
|
5
|
-
# An HTTP response.
|
5
|
+
# An HTTP response.
|
6
6
|
#
|
7
7
|
|
8
8
|
module Sinew
|
@@ -13,62 +13,13 @@ module Sinew
|
|
13
13
|
# factory methods
|
14
14
|
#
|
15
15
|
|
16
|
-
def self.from_network(request,
|
17
|
-
Response.new.tap do
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.from_cache(request, body, head)
|
27
|
-
Response.new.tap do |response|
|
28
|
-
response.request = request
|
29
|
-
response.body = body
|
30
|
-
|
31
|
-
# defaults
|
32
|
-
response.uri = request.uri
|
33
|
-
response.code = 200
|
34
|
-
response.headers = {}
|
35
|
-
|
36
|
-
# overwrite with cached response headers
|
37
|
-
if head
|
38
|
-
if head !~ /^{/
|
39
|
-
return from_legacy_head(response, head)
|
40
|
-
end
|
41
|
-
head = JSON.parse(head, symbolize_names: true)
|
42
|
-
response.uri = URI.parse(head[:uri])
|
43
|
-
response.code = head[:code]
|
44
|
-
response.headers = head[:headers]
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.from_timeout(request)
|
50
|
-
Response.new.tap do |response|
|
51
|
-
response.request = request
|
52
|
-
response.uri = request.uri
|
53
|
-
response.body = 'timeout'
|
54
|
-
response.code = 999
|
55
|
-
response.headers = {}
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
def self.from_legacy_head(response, head)
|
60
|
-
response.tap do |r|
|
61
|
-
case head
|
62
|
-
when /\ACURLER_ERROR/
|
63
|
-
# error
|
64
|
-
r.code = 999
|
65
|
-
when /\AHTTP/
|
66
|
-
# redirect
|
67
|
-
location = head.scan(/Location: ([^\r\n]+)/).flatten.last
|
68
|
-
r.uri += location
|
69
|
-
else
|
70
|
-
$stderr.puts "unknown cached /head for #{r.uri}"
|
71
|
-
end
|
16
|
+
def self.from_network(request, fday_response)
|
17
|
+
Response.new.tap do
|
18
|
+
_1.request = request
|
19
|
+
_1.uri = fday_response.env.url
|
20
|
+
_1.code = fday_response.status
|
21
|
+
_1.headers = fday_response.headers.to_h
|
22
|
+
_1.body = process_body(fday_response)
|
72
23
|
end
|
73
24
|
end
|
74
25
|
|