sinew 3.0.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -5
- data/.rubocop.yml +30 -48
- data/Gemfile +4 -4
- data/Gemfile.lock +124 -0
- data/README.md +108 -47
- data/Rakefile +16 -15
- data/bin/sinew +13 -41
- data/lib/sinew.rb +23 -9
- data/lib/sinew/args.rb +53 -0
- data/lib/sinew/base.rb +251 -0
- data/lib/sinew/csv.rb +89 -0
- data/lib/sinew/main.rb +46 -72
- data/lib/sinew/{connection → middleware}/log_formatter.rb +2 -1
- data/lib/sinew/nokogiri_ext.rb +12 -21
- data/lib/sinew/response.rb +41 -52
- data/lib/sinew/version.rb +1 -1
- data/sample.rb +13 -0
- data/sample.sinew +4 -4
- data/sinew.gemspec +19 -16
- metadata +31 -21
- data/.vscode/extensions.json +0 -3
- data/.vscode/settings.json +0 -5
- data/lib/sinew/connection.rb +0 -52
- data/lib/sinew/connection/rate_limit.rb +0 -29
- data/lib/sinew/core_ext.rb +0 -59
- data/lib/sinew/dsl.rb +0 -115
- data/lib/sinew/output.rb +0 -133
- data/lib/sinew/request.rb +0 -86
- data/lib/sinew/runtime_options.rb +0 -28
data/Rakefile
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
require 'bundler/setup'
|
2
|
-
|
3
2
|
require 'rake/testtask'
|
4
|
-
require 'sinew/version'
|
5
3
|
|
6
4
|
# load the spec, we use it below
|
7
5
|
spec = Gem::Specification.load('sinew.gemspec')
|
@@ -13,17 +11,20 @@ spec = Gem::Specification.load('sinew.gemspec')
|
|
13
11
|
#
|
14
12
|
|
15
13
|
# test (default)
|
14
|
+
Rake::TestTask.new
|
16
15
|
task default: :test
|
17
16
|
|
18
|
-
Rake::TestTask.new do
|
19
|
-
_1.libs << 'test'
|
20
|
-
_1.warning = false # sterile has a few issues here
|
21
|
-
end
|
22
|
-
|
23
17
|
# Watch rb files, run tests whenever something changes
|
24
18
|
task :watch do
|
25
|
-
|
26
|
-
|
19
|
+
sh "find . -name '*.rb' -o -name '*.sinew' | entr -c rake"
|
20
|
+
end
|
21
|
+
|
22
|
+
#
|
23
|
+
# pry
|
24
|
+
#
|
25
|
+
|
26
|
+
task :pry do
|
27
|
+
sh 'pry -I lib -r sinew.rb'
|
27
28
|
end
|
28
29
|
|
29
30
|
#
|
@@ -31,7 +32,7 @@ end
|
|
31
32
|
#
|
32
33
|
|
33
34
|
task :rubocop do
|
34
|
-
|
35
|
+
sh 'bundle exec rubocop -A .'
|
35
36
|
end
|
36
37
|
|
37
38
|
#
|
@@ -39,15 +40,15 @@ end
|
|
39
40
|
#
|
40
41
|
|
41
42
|
task :build do
|
42
|
-
|
43
|
+
sh 'gem build --quiet sinew.gemspec'
|
43
44
|
end
|
44
45
|
|
45
46
|
task install: :build do
|
46
|
-
|
47
|
+
sh "gem install --quiet sinew-#{spec.version}.gem"
|
47
48
|
end
|
48
49
|
|
49
50
|
task release: %i[rubocop test build] do
|
50
|
-
|
51
|
-
|
52
|
-
|
51
|
+
sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
|
52
|
+
sh 'git push --tags'
|
53
|
+
sh "gem push sinew-#{spec.version}.gem"
|
53
54
|
end
|
data/bin/sinew
CHANGED
@@ -1,53 +1,25 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
$LOAD_PATH.unshift(
|
3
|
+
$LOAD_PATH.unshift(File.join(__dir__, '../lib'))
|
4
4
|
|
5
|
-
|
6
|
-
require 'slop'
|
5
|
+
BIN = File.basename($PROGRAM_NAME)
|
7
6
|
|
8
7
|
#
|
9
|
-
#
|
8
|
+
# Load the bare minimum and parse args with slop. For speed.
|
10
9
|
#
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
o.bool '--force-errors', "don't read errors from cache (but still write)"
|
20
|
-
o.string '--proxy', 'use host[:port] as HTTP proxy'
|
21
|
-
o.bool '--version', 'show version and exit'
|
22
|
-
o.on('--help', 'show this help') do
|
23
|
-
puts o
|
24
|
-
exit
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
if options[:version]
|
29
|
-
puts Sinew::VERSION
|
30
|
-
exit
|
11
|
+
require 'sinew/args'
|
12
|
+
begin
|
13
|
+
slop = Sinew::Args.slop(ARGV)
|
14
|
+
rescue Slop::Error => e
|
15
|
+
$stderr.puts "#{BIN}: #{e}" if e.message != ''
|
16
|
+
$stderr.puts("#{BIN}: try '#{BIN} --help' for more information")
|
17
|
+
exit 1
|
31
18
|
end
|
32
19
|
|
33
20
|
#
|
34
|
-
#
|
21
|
+
# now load everything and run
|
35
22
|
#
|
36
23
|
|
37
|
-
|
38
|
-
|
39
|
-
Scripto.fatal('need a .sinew file to run against')
|
40
|
-
end
|
41
|
-
if !File.exist?(recipe)
|
42
|
-
Scripto.fatal("#{recipe} not found")
|
43
|
-
end
|
44
|
-
if options.arguments.length > 1
|
45
|
-
Scripto.fatal('can only run on one .sinew file')
|
46
|
-
end
|
47
|
-
options = options.to_h.merge(recipe: recipe)
|
48
|
-
|
49
|
-
#
|
50
|
-
# main
|
51
|
-
#
|
52
|
-
|
53
|
-
Sinew::Main.new(options).run
|
24
|
+
require 'sinew'
|
25
|
+
Sinew::Main.new(slop).run
|
data/lib/sinew.rb
CHANGED
@@ -1,9 +1,23 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
1
|
+
# sinew
|
2
|
+
require 'sinew/args'
|
3
|
+
require 'sinew/base'
|
4
|
+
require 'sinew/csv'
|
5
|
+
require 'sinew/main'
|
6
|
+
require 'sinew/nokogiri_ext'
|
7
|
+
require 'sinew/response'
|
8
|
+
require 'sinew/version'
|
9
|
+
|
10
|
+
# custom faraday middleware
|
11
|
+
require 'sinew/middleware/log_formatter'
|
12
|
+
|
13
|
+
module Sinew
|
14
|
+
# flow control for --limit
|
15
|
+
class LimitError < StandardError; end
|
16
|
+
|
17
|
+
# shortcut for Sinew::Base.new
|
18
|
+
class << self
|
19
|
+
def new(**args)
|
20
|
+
Sinew::Base.new(**args)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/sinew/args.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# manually load dependencies here since this is loaded standalone by bin
|
2
|
+
require 'httpdisk/slop_duration'
|
3
|
+
require 'sinew/version'
|
4
|
+
require 'slop'
|
5
|
+
|
6
|
+
#
|
7
|
+
# This is used to parse command line arguments with Slop. We don't set any
|
8
|
+
# defaults in here, relying instead on Sloptions in Sinew::Base. That way
|
9
|
+
# defaults are applied for both command line and embedded usage of Sinew::Base.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Sinew
|
13
|
+
module Args
|
14
|
+
def self.slop(args)
|
15
|
+
slop = Slop.parse(args) do |o|
|
16
|
+
o.banner = 'Usage: sinew [options] [recipe.sinew]'
|
17
|
+
o.integer '-l', '--limit', 'quit after emitting this many rows'
|
18
|
+
o.string '--proxy', 'use host[:port] as HTTP proxy (can be a comma-delimited list)'
|
19
|
+
o.integer '--timeout', 'maximum time allowed for the transfer'
|
20
|
+
o.bool '-s', '--silent', 'suppress some output'
|
21
|
+
o.bool '-v', '--verbose', 'dump emitted rows while running'
|
22
|
+
|
23
|
+
o.separator 'From httpdisk:'
|
24
|
+
o.string '--dir', 'set custom cache directory'
|
25
|
+
# note: uses slop_duration from HTTPDisk
|
26
|
+
o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
|
27
|
+
o.bool '--force', "don't read anything from cache (but still write)"
|
28
|
+
o.bool '--force-errors', "don't read errors from cache (but still write)"
|
29
|
+
|
30
|
+
# generic
|
31
|
+
o.boolean '--version', 'show version' do
|
32
|
+
puts "sinew #{Sinew::VERSION}"
|
33
|
+
exit
|
34
|
+
end
|
35
|
+
o.on('--help', 'show this help') do
|
36
|
+
puts o
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# recipe argument
|
42
|
+
recipe = slop.args.first
|
43
|
+
raise Slop::Error, '' if args.empty?
|
44
|
+
raise Slop::Error, 'no RECIPE specified' if !recipe
|
45
|
+
raise Slop::Error, 'more than one RECIPE specified' if slop.args.length > 1
|
46
|
+
raise Slop::Error, "#{recipe} not found" if !File.exist?(recipe)
|
47
|
+
|
48
|
+
slop.to_h.tap do
|
49
|
+
_1[:recipe] = recipe
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/sinew/base.rb
ADDED
@@ -0,0 +1,251 @@
|
|
1
|
+
require 'amazing_print'
|
2
|
+
require 'faraday-encoding'
|
3
|
+
require 'faraday/logging/formatter'
|
4
|
+
require 'faraday-rate_limiter'
|
5
|
+
require 'httpdisk'
|
6
|
+
|
7
|
+
module Sinew
|
8
|
+
# Sinew base class, for in standalone scripts or via the sinew binary.
|
9
|
+
class Base
|
10
|
+
attr_reader :csv, :mutex, :options
|
11
|
+
|
12
|
+
def initialize(opts = {})
|
13
|
+
@mutex = Mutex.new
|
14
|
+
|
15
|
+
#
|
16
|
+
# defaults for Sloptions
|
17
|
+
#
|
18
|
+
|
19
|
+
# default :rate_limit, typically 1
|
20
|
+
default_rate_limit = ENV['SINEW_TEST'] ? 0 : 1
|
21
|
+
|
22
|
+
#
|
23
|
+
# note: uses HTTPDisk::Sloptions
|
24
|
+
#
|
25
|
+
|
26
|
+
@options = HTTPDisk::Sloptions.parse(opts) do
|
27
|
+
# cli
|
28
|
+
_1.integer :limit
|
29
|
+
_1.integer :timeout, default: 30
|
30
|
+
_1.boolean :silent
|
31
|
+
_1.on :proxy, type: [:string, Array]
|
32
|
+
_1.boolean :verbose
|
33
|
+
|
34
|
+
# httpdisk
|
35
|
+
_1.string :dir, default: File.join(ENV['HOME'], '.sinew')
|
36
|
+
_1.integer :expires
|
37
|
+
_1.boolean :force
|
38
|
+
_1.boolean :force_errors
|
39
|
+
_1.array :ignore_params
|
40
|
+
|
41
|
+
# more runtime options
|
42
|
+
_1.hash :headers
|
43
|
+
_1.boolean :insecure
|
44
|
+
_1.string :output, required: true
|
45
|
+
_1.hash :params
|
46
|
+
_1.float :rate_limit, default: default_rate_limit
|
47
|
+
_1.integer :retries, default: 2
|
48
|
+
_1.on :url_prefix, type: [:string, URI]
|
49
|
+
_1.boolean :utf8, default: true
|
50
|
+
end
|
51
|
+
|
52
|
+
@csv = CSV.new(opts[:output])
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# requests
|
57
|
+
#
|
58
|
+
|
59
|
+
# http get, returns a Response
|
60
|
+
def get(url, params = nil, headers = nil)
|
61
|
+
faraday_response = faraday.get(url, params, headers) do
|
62
|
+
_1.options[:proxy] = random_proxy
|
63
|
+
end
|
64
|
+
Response.new(faraday_response)
|
65
|
+
end
|
66
|
+
|
67
|
+
# http post, returns a Response. Defaults to form body type.
|
68
|
+
def post(url, body = nil, headers = nil)
|
69
|
+
faraday_response = faraday.post(url, body, headers) do
|
70
|
+
_1.options[:proxy] = random_proxy
|
71
|
+
end
|
72
|
+
Response.new(faraday_response)
|
73
|
+
end
|
74
|
+
|
75
|
+
# http post json, returns a Response
|
76
|
+
def post_json(url, body = nil, headers = nil)
|
77
|
+
body = body.to_json
|
78
|
+
headers = (headers || {}).merge('Content-Type' => 'application/json')
|
79
|
+
post(url, body, headers)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Faraday connection for this recipe
|
83
|
+
def faraday
|
84
|
+
mutex.synchronize do
|
85
|
+
@faraday ||= create_faraday
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# httpdisk
|
91
|
+
#
|
92
|
+
|
93
|
+
# Returns true if request is cached. Defaults to form body type.
|
94
|
+
def cached?(method, url, params = nil, body = nil)
|
95
|
+
status = status(method, url, params, body)
|
96
|
+
status[:status] != 'miss'
|
97
|
+
end
|
98
|
+
|
99
|
+
# Remove cache file, if any. Defaults to form body type.
|
100
|
+
def uncache(method, url, params = nil, body = nil)
|
101
|
+
status = status(method, url, params, body)
|
102
|
+
path = status[:path]
|
103
|
+
File.unlink(path) if File.exist?(path)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Check httpdisk status for this request. Defaults to form body type.
|
107
|
+
def status(method, url, params = nil, body = nil)
|
108
|
+
# if hash, default to url encoded form
|
109
|
+
# see lib/faraday/request/url_encoded.rb
|
110
|
+
if body.is_a?(Hash)
|
111
|
+
body = Faraday::Utils::ParamsHash[body].to_query
|
112
|
+
end
|
113
|
+
|
114
|
+
env = Faraday::Env.new.tap do
|
115
|
+
_1.method = method.to_s.downcase.to_sym
|
116
|
+
_1.request_headers = {}
|
117
|
+
_1.request_body = body
|
118
|
+
_1.url = faraday.build_url(url, params)
|
119
|
+
end
|
120
|
+
httpdisk.status(env)
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# csv
|
125
|
+
#
|
126
|
+
|
127
|
+
# Output a csv header. This usually happens automatically, but you can call
|
128
|
+
# this method directly to ensure a consistent set of columns.
|
129
|
+
def csv_header(*columns)
|
130
|
+
csv.start(columns.flatten)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Output a csv row. Row should be any object that can turn into a hash - a
|
134
|
+
# hash, OpenStruct, etc.
|
135
|
+
def csv_emit(row)
|
136
|
+
row = row.to_h
|
137
|
+
mutex.synchronize do
|
138
|
+
# header if necessary
|
139
|
+
csv_header(row.keys) if !csv.started?
|
140
|
+
|
141
|
+
# emit
|
142
|
+
print = csv.emit(row)
|
143
|
+
puts print.ai if options[:verbose]
|
144
|
+
|
145
|
+
# this is caught by Sinew::Main
|
146
|
+
if csv.count == options[:limit]
|
147
|
+
raise LimitError
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
#
|
153
|
+
# stdout
|
154
|
+
#
|
155
|
+
|
156
|
+
RESET = "\e[0m".freeze
|
157
|
+
RED = "\e[1;37;41m".freeze
|
158
|
+
GREEN = "\e[1;37;42m".freeze
|
159
|
+
|
160
|
+
# Print a nice green banner.
|
161
|
+
def banner(msg, color: GREEN)
|
162
|
+
msg = "#{msg} ".ljust(72, ' ')
|
163
|
+
msg = "[#{Time.new.strftime('%H:%M:%S')}] #{msg}"
|
164
|
+
msg = "#{color}#{msg}#{RESET}" if $stdout.tty?
|
165
|
+
puts msg
|
166
|
+
end
|
167
|
+
|
168
|
+
# Print a scary red banner and exit.
|
169
|
+
def fatal(msg)
|
170
|
+
banner(msg, color: RED)
|
171
|
+
exit 1
|
172
|
+
end
|
173
|
+
|
174
|
+
protected
|
175
|
+
|
176
|
+
# Return a random proxy.
|
177
|
+
def random_proxy
|
178
|
+
return if !options[:proxy]
|
179
|
+
|
180
|
+
proxies = options[:proxy]
|
181
|
+
proxies = proxies.split(',') if !proxies.is_a?(Array)
|
182
|
+
proxies.sample
|
183
|
+
end
|
184
|
+
|
185
|
+
# Create the Faraday connection for making requests.
|
186
|
+
def create_faraday
|
187
|
+
faraday_options = options.slice(:headers, :params)
|
188
|
+
if options[:insecure]
|
189
|
+
faraday_options[:ssl] = { verify: false }
|
190
|
+
end
|
191
|
+
Faraday.new(nil, faraday_options) do
|
192
|
+
# options
|
193
|
+
if options[:url_prefix]
|
194
|
+
_1.url_prefix = options[:url_prefix]
|
195
|
+
end
|
196
|
+
_1.options.timeout = options[:timeout]
|
197
|
+
|
198
|
+
#
|
199
|
+
# middleware that runs on both disk/network requests
|
200
|
+
#
|
201
|
+
|
202
|
+
# cookie middleware
|
203
|
+
_1.use :cookie_jar
|
204
|
+
|
205
|
+
# auto-encode form bodies
|
206
|
+
_1.request :url_encoded
|
207
|
+
|
208
|
+
# Before httpdisk so each redirect segment is cached
|
209
|
+
# Keep track of redirect status for logger
|
210
|
+
_1.response :follow_redirects, callback: ->(_old_env, new_env) { new_env[:redirect] = true }
|
211
|
+
|
212
|
+
#
|
213
|
+
# httpdisk
|
214
|
+
#
|
215
|
+
|
216
|
+
httpdisk_options = options.slice(:dir, :expires, :force, :force_errors, :ignore_params, :utf8)
|
217
|
+
_1.use :httpdisk, httpdisk_options
|
218
|
+
|
219
|
+
#
|
220
|
+
# middleware below only used it httpdisk uses the network
|
221
|
+
#
|
222
|
+
|
223
|
+
# rate limit
|
224
|
+
rate_limit = options[:rate_limit]
|
225
|
+
_1.request :rate_limiter, interval: rate_limit
|
226
|
+
|
227
|
+
# After httpdisk so that only non-cached requests are logged.
|
228
|
+
# Before retry so that we don't log each retry attempt.
|
229
|
+
_1.response :logger, nil, formatter: Middleware::LogFormatter if !options[:silent]
|
230
|
+
|
231
|
+
retry_options = {
|
232
|
+
max_interval: rate_limit, # very important, negates Retry-After: 86400
|
233
|
+
max: options[:retries],
|
234
|
+
methods: %w[delete get head options patch post put trace],
|
235
|
+
retry_statuses: (500..600).to_a,
|
236
|
+
retry_if: ->(_env, _err) { true },
|
237
|
+
}
|
238
|
+
_1.request :retry, retry_options
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# find connection's httpdisk instance
|
243
|
+
def httpdisk
|
244
|
+
@httpdisk ||= begin
|
245
|
+
app = faraday.app
|
246
|
+
app = app.app until app.is_a?(HTTPDisk::Client)
|
247
|
+
app
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|