sinew 3.0.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,7 +1,5 @@
1
1
  require 'bundler/setup'
2
-
3
2
  require 'rake/testtask'
4
- require 'sinew/version'
5
3
 
6
4
  # load the spec, we use it below
7
5
  spec = Gem::Specification.load('sinew.gemspec')
@@ -13,17 +11,20 @@ spec = Gem::Specification.load('sinew.gemspec')
13
11
  #
14
12
 
15
13
  # test (default)
14
+ Rake::TestTask.new
16
15
  task default: :test
17
16
 
18
- Rake::TestTask.new do
19
- _1.libs << 'test'
20
- _1.warning = false # sterile has a few issues here
21
- end
22
-
23
17
  # Watch rb files, run tests whenever something changes
24
18
  task :watch do
25
- # https://superuser.com/a/665208 / https://unix.stackexchange.com/a/42288
26
- system("while true; do find . -name '*.rb' | entr -c -d rake; test $? -gt 128 && break; done")
19
+ sh "find . -name '*.rb' -o -name '*.sinew' | entr -c rake"
20
+ end
21
+
22
+ #
23
+ # pry
24
+ #
25
+
26
+ task :pry do
27
+ sh 'pry -I lib -r sinew.rb'
27
28
  end
28
29
 
29
30
  #
@@ -31,7 +32,7 @@ end
31
32
  #
32
33
 
33
34
  task :rubocop do
34
- system('bundle exec rubocop -A .', exception: true)
35
+ sh 'bundle exec rubocop -A .'
35
36
  end
36
37
 
37
38
  #
@@ -39,15 +40,15 @@ end
39
40
  #
40
41
 
41
42
  task :build do
42
- system 'gem build --quiet sinew.gemspec', exception: true
43
+ sh 'gem build --quiet sinew.gemspec'
43
44
  end
44
45
 
45
46
  task install: :build do
46
- system "gem install --quiet sinew-#{spec.version}.gem", exception: true
47
+ sh "gem install --quiet sinew-#{spec.version}.gem"
47
48
  end
48
49
 
49
50
  task release: %i[rubocop test build] do
50
- system "git tag -a #{spec.version} -m 'Tagging #{spec.version}'", exception: true
51
- system 'git push --tags', exception: true
52
- system "gem push sinew-#{spec.version}.gem", exception: true
51
+ sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
52
+ sh 'git push --tags'
53
+ sh "gem push sinew-#{spec.version}.gem"
53
54
  end
data/bin/sinew CHANGED
@@ -1,53 +1,25 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- $LOAD_PATH.unshift("#{__dir__}/../lib")
3
+ $LOAD_PATH.unshift(File.join(__dir__, '../lib'))
4
4
 
5
- require 'sinew'
6
- require 'slop'
5
+ BIN = File.basename($PROGRAM_NAME)
7
6
 
8
7
  #
9
- # options
8
+ # Load the bare minimum and parse args with slop. For speed.
10
9
  #
11
10
 
12
- options = Slop.parse do |o|
13
- o.banner = 'Usage: sinew [options] <gub.sinew>'
14
- o.bool '-v', '--verbose', 'dump emitted rows while running'
15
- o.bool '-q', '--quiet', 'suppress some output'
16
- o.integer '-l', '--limit', 'quit after emitting this many rows'
17
- o.string '-c', '--cache', 'set custom cache directory', default: "#{ENV['HOME']}/.sinew"
18
- o.bool '--force', "don't read anything from cache (but still write)"
19
- o.bool '--force-errors', "don't read errors from cache (but still write)"
20
- o.string '--proxy', 'use host[:port] as HTTP proxy'
21
- o.bool '--version', 'show version and exit'
22
- o.on('--help', 'show this help') do
23
- puts o
24
- exit
25
- end
26
- end
27
-
28
- if options[:version]
29
- puts Sinew::VERSION
30
- exit
11
+ require 'sinew/args'
12
+ begin
13
+ slop = Sinew::Args.slop(ARGV)
14
+ rescue Slop::Error => e
15
+ $stderr.puts "#{BIN}: #{e}" if e.message != ''
16
+ $stderr.puts("#{BIN}: try '#{BIN} --help' for more information")
17
+ exit 1
31
18
  end
32
19
 
33
20
  #
34
- # recipe
21
+ # now load everything and run
35
22
  #
36
23
 
37
- recipe = options.arguments.first
38
- if !recipe
39
- Scripto.fatal('need a .sinew file to run against')
40
- end
41
- if !File.exist?(recipe)
42
- Scripto.fatal("#{recipe} not found")
43
- end
44
- if options.arguments.length > 1
45
- Scripto.fatal('can only run on one .sinew file')
46
- end
47
- options = options.to_h.merge(recipe: recipe)
48
-
49
- #
50
- # main
51
- #
52
-
53
- Sinew::Main.new(options).run
24
+ require 'sinew'
25
+ Sinew::Main.new(slop).run
data/lib/sinew.rb CHANGED
@@ -1,9 +1,23 @@
1
- require_relative 'sinew/core_ext'
2
- require_relative 'sinew/dsl'
3
- require_relative 'sinew/main'
4
- require_relative 'sinew/nokogiri_ext'
5
- require_relative 'sinew/output'
6
- require_relative 'sinew/request'
7
- require_relative 'sinew/response'
8
- require_relative 'sinew/runtime_options'
9
- require_relative 'sinew/version'
1
+ # sinew
2
+ require 'sinew/args'
3
+ require 'sinew/base'
4
+ require 'sinew/csv'
5
+ require 'sinew/main'
6
+ require 'sinew/nokogiri_ext'
7
+ require 'sinew/response'
8
+ require 'sinew/version'
9
+
10
+ # custom faraday middleware
11
+ require 'sinew/middleware/log_formatter'
12
+
13
+ module Sinew
14
+ # flow control for --limit
15
+ class LimitError < StandardError; end
16
+
17
+ # shortcut for Sinew::Base.new
18
+ class << self
19
+ def new(**args)
20
+ Sinew::Base.new(**args)
21
+ end
22
+ end
23
+ end
data/lib/sinew/args.rb ADDED
@@ -0,0 +1,53 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/slop_duration'
3
+ require 'sinew/version'
4
+ require 'slop'
5
+
6
+ #
7
+ # This is used to parse command line arguments with Slop. We don't set any
8
+ # defaults in here, relying instead on Sloptions in Sinew::Base. That way
9
+ # defaults are applied for both command line and embedded usage of Sinew::Base.
10
+ #
11
+
12
+ module Sinew
13
+ module Args
14
+ def self.slop(args)
15
+ slop = Slop.parse(args) do |o|
16
+ o.banner = 'Usage: sinew [options] [recipe.sinew]'
17
+ o.integer '-l', '--limit', 'quit after emitting this many rows'
18
+ o.string '--proxy', 'use host[:port] as HTTP proxy (can be a comma-delimited list)'
19
+ o.integer '--timeout', 'maximum time allowed for the transfer'
20
+ o.bool '-s', '--silent', 'suppress some output'
21
+ o.bool '-v', '--verbose', 'dump emitted rows while running'
22
+
23
+ o.separator 'From httpdisk:'
24
+ o.string '--dir', 'set custom cache directory'
25
+ # note: uses slop_duration from HTTPDisk
26
+ o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
27
+ o.bool '--force', "don't read anything from cache (but still write)"
28
+ o.bool '--force-errors', "don't read errors from cache (but still write)"
29
+
30
+ # generic
31
+ o.boolean '--version', 'show version' do
32
+ puts "sinew #{Sinew::VERSION}"
33
+ exit
34
+ end
35
+ o.on('--help', 'show this help') do
36
+ puts o
37
+ exit
38
+ end
39
+ end
40
+
41
+ # recipe argument
42
+ recipe = slop.args.first
43
+ raise Slop::Error, '' if args.empty?
44
+ raise Slop::Error, 'no RECIPE specified' if !recipe
45
+ raise Slop::Error, 'more than one RECIPE specified' if slop.args.length > 1
46
+ raise Slop::Error, "#{recipe} not found" if !File.exist?(recipe)
47
+
48
+ slop.to_h.tap do
49
+ _1[:recipe] = recipe
50
+ end
51
+ end
52
+ end
53
+ end
data/lib/sinew/base.rb ADDED
@@ -0,0 +1,251 @@
1
+ require 'amazing_print'
2
+ require 'faraday-encoding'
3
+ require 'faraday/logging/formatter'
4
+ require 'faraday-rate_limiter'
5
+ require 'httpdisk'
6
+
7
+ module Sinew
8
+ # Sinew base class, for in standalone scripts or via the sinew binary.
9
+ class Base
10
+ attr_reader :csv, :mutex, :options
11
+
12
+ def initialize(opts = {})
13
+ @mutex = Mutex.new
14
+
15
+ #
16
+ # defaults for Sloptions
17
+ #
18
+
19
+ # default :rate_limit, typically 1
20
+ default_rate_limit = ENV['SINEW_TEST'] ? 0 : 1
21
+
22
+ #
23
+ # note: uses HTTPDisk::Sloptions
24
+ #
25
+
26
+ @options = HTTPDisk::Sloptions.parse(opts) do
27
+ # cli
28
+ _1.integer :limit
29
+ _1.integer :timeout, default: 30
30
+ _1.boolean :silent
31
+ _1.on :proxy, type: [:string, Array]
32
+ _1.boolean :verbose
33
+
34
+ # httpdisk
35
+ _1.string :dir, default: File.join(ENV['HOME'], '.sinew')
36
+ _1.integer :expires
37
+ _1.boolean :force
38
+ _1.boolean :force_errors
39
+ _1.array :ignore_params
40
+
41
+ # more runtime options
42
+ _1.hash :headers
43
+ _1.boolean :insecure
44
+ _1.string :output, required: true
45
+ _1.hash :params
46
+ _1.float :rate_limit, default: default_rate_limit
47
+ _1.integer :retries, default: 2
48
+ _1.on :url_prefix, type: [:string, URI]
49
+ _1.boolean :utf8, default: true
50
+ end
51
+
52
+ @csv = CSV.new(opts[:output])
53
+ end
54
+
55
+ #
56
+ # requests
57
+ #
58
+
59
+ # http get, returns a Response
60
+ def get(url, params = nil, headers = nil)
61
+ faraday_response = faraday.get(url, params, headers) do
62
+ _1.options[:proxy] = random_proxy
63
+ end
64
+ Response.new(faraday_response)
65
+ end
66
+
67
+ # http post, returns a Response. Defaults to form body type.
68
+ def post(url, body = nil, headers = nil)
69
+ faraday_response = faraday.post(url, body, headers) do
70
+ _1.options[:proxy] = random_proxy
71
+ end
72
+ Response.new(faraday_response)
73
+ end
74
+
75
+ # http post json, returns a Response
76
+ def post_json(url, body = nil, headers = nil)
77
+ body = body.to_json
78
+ headers = (headers || {}).merge('Content-Type' => 'application/json')
79
+ post(url, body, headers)
80
+ end
81
+
82
+ # Faraday connection for this recipe
83
+ def faraday
84
+ mutex.synchronize do
85
+ @faraday ||= create_faraday
86
+ end
87
+ end
88
+
89
+ #
90
+ # httpdisk
91
+ #
92
+
93
+ # Returns true if request is cached. Defaults to form body type.
94
+ def cached?(method, url, params = nil, body = nil)
95
+ status = status(method, url, params, body)
96
+ status[:status] != 'miss'
97
+ end
98
+
99
+ # Remove cache file, if any. Defaults to form body type.
100
+ def uncache(method, url, params = nil, body = nil)
101
+ status = status(method, url, params, body)
102
+ path = status[:path]
103
+ File.unlink(path) if File.exist?(path)
104
+ end
105
+
106
+ # Check httpdisk status for this request. Defaults to form body type.
107
+ def status(method, url, params = nil, body = nil)
108
+ # if hash, default to url encoded form
109
+ # see lib/faraday/request/url_encoded.rb
110
+ if body.is_a?(Hash)
111
+ body = Faraday::Utils::ParamsHash[body].to_query
112
+ end
113
+
114
+ env = Faraday::Env.new.tap do
115
+ _1.method = method.to_s.downcase.to_sym
116
+ _1.request_headers = {}
117
+ _1.request_body = body
118
+ _1.url = faraday.build_url(url, params)
119
+ end
120
+ httpdisk.status(env)
121
+ end
122
+
123
+ #
124
+ # csv
125
+ #
126
+
127
+ # Output a csv header. This usually happens automatically, but you can call
128
+ # this method directly to ensure a consistent set of columns.
129
+ def csv_header(*columns)
130
+ csv.start(columns.flatten)
131
+ end
132
+
133
+ # Output a csv row. Row should be any object that can turn into a hash - a
134
+ # hash, OpenStruct, etc.
135
+ def csv_emit(row)
136
+ row = row.to_h
137
+ mutex.synchronize do
138
+ # header if necessary
139
+ csv_header(row.keys) if !csv.started?
140
+
141
+ # emit
142
+ print = csv.emit(row)
143
+ puts print.ai if options[:verbose]
144
+
145
+ # this is caught by Sinew::Main
146
+ if csv.count == options[:limit]
147
+ raise LimitError
148
+ end
149
+ end
150
+ end
151
+
152
+ #
153
+ # stdout
154
+ #
155
+
156
+ RESET = "\e[0m".freeze
157
+ RED = "\e[1;37;41m".freeze
158
+ GREEN = "\e[1;37;42m".freeze
159
+
160
+ # Print a nice green banner.
161
+ def banner(msg, color: GREEN)
162
+ msg = "#{msg} ".ljust(72, ' ')
163
+ msg = "[#{Time.new.strftime('%H:%M:%S')}] #{msg}"
164
+ msg = "#{color}#{msg}#{RESET}" if $stdout.tty?
165
+ puts msg
166
+ end
167
+
168
+ # Print a scary red banner and exit.
169
+ def fatal(msg)
170
+ banner(msg, color: RED)
171
+ exit 1
172
+ end
173
+
174
+ protected
175
+
176
+ # Return a random proxy.
177
+ def random_proxy
178
+ return if !options[:proxy]
179
+
180
+ proxies = options[:proxy]
181
+ proxies = proxies.split(',') if !proxies.is_a?(Array)
182
+ proxies.sample
183
+ end
184
+
185
+ # Create the Faraday connection for making requests.
186
+ def create_faraday
187
+ faraday_options = options.slice(:headers, :params)
188
+ if options[:insecure]
189
+ faraday_options[:ssl] = { verify: false }
190
+ end
191
+ Faraday.new(nil, faraday_options) do
192
+ # options
193
+ if options[:url_prefix]
194
+ _1.url_prefix = options[:url_prefix]
195
+ end
196
+ _1.options.timeout = options[:timeout]
197
+
198
+ #
199
+ # middleware that runs on both disk/network requests
200
+ #
201
+
202
+ # cookie middleware
203
+ _1.use :cookie_jar
204
+
205
+ # auto-encode form bodies
206
+ _1.request :url_encoded
207
+
208
+ # Before httpdisk so each redirect segment is cached
209
+ # Keep track of redirect status for logger
210
+ _1.response :follow_redirects, callback: ->(_old_env, new_env) { new_env[:redirect] = true }
211
+
212
+ #
213
+ # httpdisk
214
+ #
215
+
216
+ httpdisk_options = options.slice(:dir, :expires, :force, :force_errors, :ignore_params, :utf8)
217
+ _1.use :httpdisk, httpdisk_options
218
+
219
+ #
220
+ # middleware below only used it httpdisk uses the network
221
+ #
222
+
223
+ # rate limit
224
+ rate_limit = options[:rate_limit]
225
+ _1.request :rate_limiter, interval: rate_limit
226
+
227
+ # After httpdisk so that only non-cached requests are logged.
228
+ # Before retry so that we don't log each retry attempt.
229
+ _1.response :logger, nil, formatter: Middleware::LogFormatter if !options[:silent]
230
+
231
+ retry_options = {
232
+ max_interval: rate_limit, # very important, negates Retry-After: 86400
233
+ max: options[:retries],
234
+ methods: %w[delete get head options patch post put trace],
235
+ retry_statuses: (500..600).to_a,
236
+ retry_if: ->(_env, _err) { true },
237
+ }
238
+ _1.request :retry, retry_options
239
+ end
240
+ end
241
+
242
+ # find connection's httpdisk instance
243
+ def httpdisk
244
+ @httpdisk ||= begin
245
+ app = faraday.app
246
+ app = app.app until app.is_a?(HTTPDisk::Client)
247
+ app
248
+ end
249
+ end
250
+ end
251
+ end