sinew 3.0.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -5
- data/.rubocop.yml +30 -48
- data/Gemfile +4 -4
- data/Gemfile.lock +124 -0
- data/README.md +108 -47
- data/Rakefile +16 -15
- data/bin/sinew +13 -41
- data/lib/sinew.rb +23 -9
- data/lib/sinew/args.rb +53 -0
- data/lib/sinew/base.rb +251 -0
- data/lib/sinew/csv.rb +89 -0
- data/lib/sinew/main.rb +46 -72
- data/lib/sinew/{connection → middleware}/log_formatter.rb +2 -1
- data/lib/sinew/nokogiri_ext.rb +12 -21
- data/lib/sinew/response.rb +41 -52
- data/lib/sinew/version.rb +1 -1
- data/sample.rb +13 -0
- data/sample.sinew +4 -4
- data/sinew.gemspec +19 -16
- metadata +31 -21
- data/.vscode/extensions.json +0 -3
- data/.vscode/settings.json +0 -5
- data/lib/sinew/connection.rb +0 -52
- data/lib/sinew/connection/rate_limit.rb +0 -29
- data/lib/sinew/core_ext.rb +0 -59
- data/lib/sinew/dsl.rb +0 -115
- data/lib/sinew/output.rb +0 -133
- data/lib/sinew/request.rb +0 -86
- data/lib/sinew/runtime_options.rb +0 -28
data/Rakefile
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
require 'bundler/setup'
|
2
|
-
|
3
2
|
require 'rake/testtask'
|
4
|
-
require 'sinew/version'
|
5
3
|
|
6
4
|
# load the spec, we use it below
|
7
5
|
spec = Gem::Specification.load('sinew.gemspec')
|
@@ -13,17 +11,20 @@ spec = Gem::Specification.load('sinew.gemspec')
|
|
13
11
|
#
|
14
12
|
|
15
13
|
# test (default)
|
14
|
+
Rake::TestTask.new
|
16
15
|
task default: :test
|
17
16
|
|
18
|
-
Rake::TestTask.new do
|
19
|
-
_1.libs << 'test'
|
20
|
-
_1.warning = false # sterile has a few issues here
|
21
|
-
end
|
22
|
-
|
23
17
|
# Watch rb files, run tests whenever something changes
|
24
18
|
task :watch do
|
25
|
-
|
26
|
-
|
19
|
+
sh "find . -name '*.rb' -o -name '*.sinew' | entr -c rake"
|
20
|
+
end
|
21
|
+
|
22
|
+
#
|
23
|
+
# pry
|
24
|
+
#
|
25
|
+
|
26
|
+
task :pry do
|
27
|
+
sh 'pry -I lib -r sinew.rb'
|
27
28
|
end
|
28
29
|
|
29
30
|
#
|
@@ -31,7 +32,7 @@ end
|
|
31
32
|
#
|
32
33
|
|
33
34
|
task :rubocop do
|
34
|
-
|
35
|
+
sh 'bundle exec rubocop -A .'
|
35
36
|
end
|
36
37
|
|
37
38
|
#
|
@@ -39,15 +40,15 @@ end
|
|
39
40
|
#
|
40
41
|
|
41
42
|
task :build do
|
42
|
-
|
43
|
+
sh 'gem build --quiet sinew.gemspec'
|
43
44
|
end
|
44
45
|
|
45
46
|
task install: :build do
|
46
|
-
|
47
|
+
sh "gem install --quiet sinew-#{spec.version}.gem"
|
47
48
|
end
|
48
49
|
|
49
50
|
task release: %i[rubocop test build] do
|
50
|
-
|
51
|
-
|
52
|
-
|
51
|
+
sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
|
52
|
+
sh 'git push --tags'
|
53
|
+
sh "gem push sinew-#{spec.version}.gem"
|
53
54
|
end
|
data/bin/sinew
CHANGED
@@ -1,53 +1,25 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
$LOAD_PATH.unshift(
|
3
|
+
$LOAD_PATH.unshift(File.join(__dir__, '../lib'))
|
4
4
|
|
5
|
-
|
6
|
-
require 'slop'
|
5
|
+
BIN = File.basename($PROGRAM_NAME)
|
7
6
|
|
8
7
|
#
|
9
|
-
#
|
8
|
+
# Load the bare minimum and parse args with slop. For speed.
|
10
9
|
#
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
o.bool '--force-errors', "don't read errors from cache (but still write)"
|
20
|
-
o.string '--proxy', 'use host[:port] as HTTP proxy'
|
21
|
-
o.bool '--version', 'show version and exit'
|
22
|
-
o.on('--help', 'show this help') do
|
23
|
-
puts o
|
24
|
-
exit
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
if options[:version]
|
29
|
-
puts Sinew::VERSION
|
30
|
-
exit
|
11
|
+
require 'sinew/args'
|
12
|
+
begin
|
13
|
+
slop = Sinew::Args.slop(ARGV)
|
14
|
+
rescue Slop::Error => e
|
15
|
+
$stderr.puts "#{BIN}: #{e}" if e.message != ''
|
16
|
+
$stderr.puts("#{BIN}: try '#{BIN} --help' for more information")
|
17
|
+
exit 1
|
31
18
|
end
|
32
19
|
|
33
20
|
#
|
34
|
-
#
|
21
|
+
# now load everything and run
|
35
22
|
#
|
36
23
|
|
37
|
-
|
38
|
-
|
39
|
-
Scripto.fatal('need a .sinew file to run against')
|
40
|
-
end
|
41
|
-
if !File.exist?(recipe)
|
42
|
-
Scripto.fatal("#{recipe} not found")
|
43
|
-
end
|
44
|
-
if options.arguments.length > 1
|
45
|
-
Scripto.fatal('can only run on one .sinew file')
|
46
|
-
end
|
47
|
-
options = options.to_h.merge(recipe: recipe)
|
48
|
-
|
49
|
-
#
|
50
|
-
# main
|
51
|
-
#
|
52
|
-
|
53
|
-
Sinew::Main.new(options).run
|
24
|
+
require 'sinew'
|
25
|
+
Sinew::Main.new(slop).run
|
data/lib/sinew.rb
CHANGED
@@ -1,9 +1,23 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
1
|
+
# sinew
|
2
|
+
require 'sinew/args'
|
3
|
+
require 'sinew/base'
|
4
|
+
require 'sinew/csv'
|
5
|
+
require 'sinew/main'
|
6
|
+
require 'sinew/nokogiri_ext'
|
7
|
+
require 'sinew/response'
|
8
|
+
require 'sinew/version'
|
9
|
+
|
10
|
+
# custom faraday middleware
|
11
|
+
require 'sinew/middleware/log_formatter'
|
12
|
+
|
13
|
+
module Sinew
|
14
|
+
# flow control for --limit
|
15
|
+
class LimitError < StandardError; end
|
16
|
+
|
17
|
+
# shortcut for Sinew::Base.new
|
18
|
+
class << self
|
19
|
+
def new(**args)
|
20
|
+
Sinew::Base.new(**args)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/sinew/args.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# manually load dependencies here since this is loaded standalone by bin
|
2
|
+
require 'httpdisk/slop_duration'
|
3
|
+
require 'sinew/version'
|
4
|
+
require 'slop'
|
5
|
+
|
6
|
+
#
|
7
|
+
# This is used to parse command line arguments with Slop. We don't set any
|
8
|
+
# defaults in here, relying instead on Sloptions in Sinew::Base. That way
|
9
|
+
# defaults are applied for both command line and embedded usage of Sinew::Base.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Sinew
|
13
|
+
module Args
|
14
|
+
def self.slop(args)
|
15
|
+
slop = Slop.parse(args) do |o|
|
16
|
+
o.banner = 'Usage: sinew [options] [recipe.sinew]'
|
17
|
+
o.integer '-l', '--limit', 'quit after emitting this many rows'
|
18
|
+
o.string '--proxy', 'use host[:port] as HTTP proxy (can be a comma-delimited list)'
|
19
|
+
o.integer '--timeout', 'maximum time allowed for the transfer'
|
20
|
+
o.bool '-s', '--silent', 'suppress some output'
|
21
|
+
o.bool '-v', '--verbose', 'dump emitted rows while running'
|
22
|
+
|
23
|
+
o.separator 'From httpdisk:'
|
24
|
+
o.string '--dir', 'set custom cache directory'
|
25
|
+
# note: uses slop_duration from HTTPDisk
|
26
|
+
o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
|
27
|
+
o.bool '--force', "don't read anything from cache (but still write)"
|
28
|
+
o.bool '--force-errors', "don't read errors from cache (but still write)"
|
29
|
+
|
30
|
+
# generic
|
31
|
+
o.boolean '--version', 'show version' do
|
32
|
+
puts "sinew #{Sinew::VERSION}"
|
33
|
+
exit
|
34
|
+
end
|
35
|
+
o.on('--help', 'show this help') do
|
36
|
+
puts o
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# recipe argument
|
42
|
+
recipe = slop.args.first
|
43
|
+
raise Slop::Error, '' if args.empty?
|
44
|
+
raise Slop::Error, 'no RECIPE specified' if !recipe
|
45
|
+
raise Slop::Error, 'more than one RECIPE specified' if slop.args.length > 1
|
46
|
+
raise Slop::Error, "#{recipe} not found" if !File.exist?(recipe)
|
47
|
+
|
48
|
+
slop.to_h.tap do
|
49
|
+
_1[:recipe] = recipe
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/sinew/base.rb
ADDED
@@ -0,0 +1,251 @@
|
|
1
|
+
require 'amazing_print'
|
2
|
+
require 'faraday-encoding'
|
3
|
+
require 'faraday/logging/formatter'
|
4
|
+
require 'faraday-rate_limiter'
|
5
|
+
require 'httpdisk'
|
6
|
+
|
7
|
+
module Sinew
|
8
|
+
# Sinew base class, for in standalone scripts or via the sinew binary.
|
9
|
+
class Base
|
10
|
+
attr_reader :csv, :mutex, :options
|
11
|
+
|
12
|
+
def initialize(opts = {})
|
13
|
+
@mutex = Mutex.new
|
14
|
+
|
15
|
+
#
|
16
|
+
# defaults for Sloptions
|
17
|
+
#
|
18
|
+
|
19
|
+
# default :rate_limit, typically 1
|
20
|
+
default_rate_limit = ENV['SINEW_TEST'] ? 0 : 1
|
21
|
+
|
22
|
+
#
|
23
|
+
# note: uses HTTPDisk::Sloptions
|
24
|
+
#
|
25
|
+
|
26
|
+
@options = HTTPDisk::Sloptions.parse(opts) do
|
27
|
+
# cli
|
28
|
+
_1.integer :limit
|
29
|
+
_1.integer :timeout, default: 30
|
30
|
+
_1.boolean :silent
|
31
|
+
_1.on :proxy, type: [:string, Array]
|
32
|
+
_1.boolean :verbose
|
33
|
+
|
34
|
+
# httpdisk
|
35
|
+
_1.string :dir, default: File.join(ENV['HOME'], '.sinew')
|
36
|
+
_1.integer :expires
|
37
|
+
_1.boolean :force
|
38
|
+
_1.boolean :force_errors
|
39
|
+
_1.array :ignore_params
|
40
|
+
|
41
|
+
# more runtime options
|
42
|
+
_1.hash :headers
|
43
|
+
_1.boolean :insecure
|
44
|
+
_1.string :output, required: true
|
45
|
+
_1.hash :params
|
46
|
+
_1.float :rate_limit, default: default_rate_limit
|
47
|
+
_1.integer :retries, default: 2
|
48
|
+
_1.on :url_prefix, type: [:string, URI]
|
49
|
+
_1.boolean :utf8, default: true
|
50
|
+
end
|
51
|
+
|
52
|
+
@csv = CSV.new(opts[:output])
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# requests
|
57
|
+
#
|
58
|
+
|
59
|
+
# http get, returns a Response
|
60
|
+
def get(url, params = nil, headers = nil)
|
61
|
+
faraday_response = faraday.get(url, params, headers) do
|
62
|
+
_1.options[:proxy] = random_proxy
|
63
|
+
end
|
64
|
+
Response.new(faraday_response)
|
65
|
+
end
|
66
|
+
|
67
|
+
# http post, returns a Response. Defaults to form body type.
|
68
|
+
def post(url, body = nil, headers = nil)
|
69
|
+
faraday_response = faraday.post(url, body, headers) do
|
70
|
+
_1.options[:proxy] = random_proxy
|
71
|
+
end
|
72
|
+
Response.new(faraday_response)
|
73
|
+
end
|
74
|
+
|
75
|
+
# http post json, returns a Response
|
76
|
+
def post_json(url, body = nil, headers = nil)
|
77
|
+
body = body.to_json
|
78
|
+
headers = (headers || {}).merge('Content-Type' => 'application/json')
|
79
|
+
post(url, body, headers)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Faraday connection for this recipe
|
83
|
+
def faraday
|
84
|
+
mutex.synchronize do
|
85
|
+
@faraday ||= create_faraday
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# httpdisk
|
91
|
+
#
|
92
|
+
|
93
|
+
# Returns true if request is cached. Defaults to form body type.
|
94
|
+
def cached?(method, url, params = nil, body = nil)
|
95
|
+
status = status(method, url, params, body)
|
96
|
+
status[:status] != 'miss'
|
97
|
+
end
|
98
|
+
|
99
|
+
# Remove cache file, if any. Defaults to form body type.
|
100
|
+
def uncache(method, url, params = nil, body = nil)
|
101
|
+
status = status(method, url, params, body)
|
102
|
+
path = status[:path]
|
103
|
+
File.unlink(path) if File.exist?(path)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Check httpdisk status for this request. Defaults to form body type.
|
107
|
+
def status(method, url, params = nil, body = nil)
|
108
|
+
# if hash, default to url encoded form
|
109
|
+
# see lib/faraday/request/url_encoded.rb
|
110
|
+
if body.is_a?(Hash)
|
111
|
+
body = Faraday::Utils::ParamsHash[body].to_query
|
112
|
+
end
|
113
|
+
|
114
|
+
env = Faraday::Env.new.tap do
|
115
|
+
_1.method = method.to_s.downcase.to_sym
|
116
|
+
_1.request_headers = {}
|
117
|
+
_1.request_body = body
|
118
|
+
_1.url = faraday.build_url(url, params)
|
119
|
+
end
|
120
|
+
httpdisk.status(env)
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# csv
|
125
|
+
#
|
126
|
+
|
127
|
+
# Output a csv header. This usually happens automatically, but you can call
|
128
|
+
# this method directly to ensure a consistent set of columns.
|
129
|
+
def csv_header(*columns)
|
130
|
+
csv.start(columns.flatten)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Output a csv row. Row should be any object that can turn into a hash - a
|
134
|
+
# hash, OpenStruct, etc.
|
135
|
+
def csv_emit(row)
|
136
|
+
row = row.to_h
|
137
|
+
mutex.synchronize do
|
138
|
+
# header if necessary
|
139
|
+
csv_header(row.keys) if !csv.started?
|
140
|
+
|
141
|
+
# emit
|
142
|
+
print = csv.emit(row)
|
143
|
+
puts print.ai if options[:verbose]
|
144
|
+
|
145
|
+
# this is caught by Sinew::Main
|
146
|
+
if csv.count == options[:limit]
|
147
|
+
raise LimitError
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
#
|
153
|
+
# stdout
|
154
|
+
#
|
155
|
+
|
156
|
+
RESET = "\e[0m".freeze
|
157
|
+
RED = "\e[1;37;41m".freeze
|
158
|
+
GREEN = "\e[1;37;42m".freeze
|
159
|
+
|
160
|
+
# Print a nice green banner.
|
161
|
+
def banner(msg, color: GREEN)
|
162
|
+
msg = "#{msg} ".ljust(72, ' ')
|
163
|
+
msg = "[#{Time.new.strftime('%H:%M:%S')}] #{msg}"
|
164
|
+
msg = "#{color}#{msg}#{RESET}" if $stdout.tty?
|
165
|
+
puts msg
|
166
|
+
end
|
167
|
+
|
168
|
+
# Print a scary red banner and exit.
|
169
|
+
def fatal(msg)
|
170
|
+
banner(msg, color: RED)
|
171
|
+
exit 1
|
172
|
+
end
|
173
|
+
|
174
|
+
protected
|
175
|
+
|
176
|
+
# Return a random proxy.
|
177
|
+
def random_proxy
|
178
|
+
return if !options[:proxy]
|
179
|
+
|
180
|
+
proxies = options[:proxy]
|
181
|
+
proxies = proxies.split(',') if !proxies.is_a?(Array)
|
182
|
+
proxies.sample
|
183
|
+
end
|
184
|
+
|
185
|
+
# Create the Faraday connection for making requests.
|
186
|
+
def create_faraday
|
187
|
+
faraday_options = options.slice(:headers, :params)
|
188
|
+
if options[:insecure]
|
189
|
+
faraday_options[:ssl] = { verify: false }
|
190
|
+
end
|
191
|
+
Faraday.new(nil, faraday_options) do
|
192
|
+
# options
|
193
|
+
if options[:url_prefix]
|
194
|
+
_1.url_prefix = options[:url_prefix]
|
195
|
+
end
|
196
|
+
_1.options.timeout = options[:timeout]
|
197
|
+
|
198
|
+
#
|
199
|
+
# middleware that runs on both disk/network requests
|
200
|
+
#
|
201
|
+
|
202
|
+
# cookie middleware
|
203
|
+
_1.use :cookie_jar
|
204
|
+
|
205
|
+
# auto-encode form bodies
|
206
|
+
_1.request :url_encoded
|
207
|
+
|
208
|
+
# Before httpdisk so each redirect segment is cached
|
209
|
+
# Keep track of redirect status for logger
|
210
|
+
_1.response :follow_redirects, callback: ->(_old_env, new_env) { new_env[:redirect] = true }
|
211
|
+
|
212
|
+
#
|
213
|
+
# httpdisk
|
214
|
+
#
|
215
|
+
|
216
|
+
httpdisk_options = options.slice(:dir, :expires, :force, :force_errors, :ignore_params, :utf8)
|
217
|
+
_1.use :httpdisk, httpdisk_options
|
218
|
+
|
219
|
+
#
|
220
|
+
# middleware below only used it httpdisk uses the network
|
221
|
+
#
|
222
|
+
|
223
|
+
# rate limit
|
224
|
+
rate_limit = options[:rate_limit]
|
225
|
+
_1.request :rate_limiter, interval: rate_limit
|
226
|
+
|
227
|
+
# After httpdisk so that only non-cached requests are logged.
|
228
|
+
# Before retry so that we don't log each retry attempt.
|
229
|
+
_1.response :logger, nil, formatter: Middleware::LogFormatter if !options[:silent]
|
230
|
+
|
231
|
+
retry_options = {
|
232
|
+
max_interval: rate_limit, # very important, negates Retry-After: 86400
|
233
|
+
max: options[:retries],
|
234
|
+
methods: %w[delete get head options patch post put trace],
|
235
|
+
retry_statuses: (500..600).to_a,
|
236
|
+
retry_if: ->(_env, _err) { true },
|
237
|
+
}
|
238
|
+
_1.request :retry, retry_options
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# find connection's httpdisk instance
|
243
|
+
def httpdisk
|
244
|
+
@httpdisk ||= begin
|
245
|
+
app = faraday.app
|
246
|
+
app = app.app until app.is_a?(HTTPDisk::Client)
|
247
|
+
app
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|