sinew 2.0.3 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +26 -0
  3. data/.gitignore +3 -5
  4. data/.rubocop.yml +31 -46
  5. data/Gemfile +9 -0
  6. data/Gemfile.lock +124 -0
  7. data/README.md +146 -81
  8. data/Rakefile +36 -20
  9. data/bin/sinew +13 -39
  10. data/lib/sinew.rb +23 -10
  11. data/lib/sinew/args.rb +53 -0
  12. data/lib/sinew/base.rb +251 -0
  13. data/lib/sinew/csv.rb +89 -0
  14. data/lib/sinew/main.rb +45 -98
  15. data/lib/sinew/middleware/log_formatter.rb +23 -0
  16. data/lib/sinew/nokogiri_ext.rb +12 -21
  17. data/lib/sinew/response.rb +39 -99
  18. data/lib/sinew/version.rb +1 -1
  19. data/sample.rb +13 -0
  20. data/sample.sinew +4 -4
  21. data/sinew.gemspec +26 -25
  22. metadata +46 -108
  23. data/.travis.yml +0 -4
  24. data/.vscode/extensions.json +0 -3
  25. data/.vscode/settings.json +0 -15
  26. data/lib/sinew/cache.rb +0 -79
  27. data/lib/sinew/core_ext.rb +0 -59
  28. data/lib/sinew/dsl.rb +0 -114
  29. data/lib/sinew/output.rb +0 -149
  30. data/lib/sinew/request.rb +0 -151
  31. data/lib/sinew/runtime_options.rb +0 -28
  32. data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
  33. data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
  34. data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
  35. data/test/legacy/eu.httpbin.org/status,500 +0 -1
  36. data/test/legacy/legacy.sinew +0 -2
  37. data/test/recipes/array_header.sinew +0 -6
  38. data/test/recipes/basic.sinew +0 -8
  39. data/test/recipes/dups.sinew +0 -7
  40. data/test/recipes/implicit_header.sinew +0 -5
  41. data/test/recipes/limit.sinew +0 -11
  42. data/test/recipes/noko.sinew +0 -9
  43. data/test/recipes/uri.sinew +0 -11
  44. data/test/recipes/xml.sinew +0 -8
  45. data/test/test.html +0 -45
  46. data/test/test_cache.rb +0 -69
  47. data/test/test_helper.rb +0 -123
  48. data/test/test_legacy.rb +0 -23
  49. data/test/test_main.rb +0 -34
  50. data/test/test_nokogiri_ext.rb +0 -18
  51. data/test/test_output.rb +0 -56
  52. data/test/test_recipes.rb +0 -60
  53. data/test/test_requests.rb +0 -135
  54. data/test/test_utf8.rb +0 -39
data/Rakefile CHANGED
@@ -1,38 +1,54 @@
1
- require 'bundler'
2
1
  require 'bundler/setup'
3
-
4
- require 'rake'
5
2
  require 'rake/testtask'
6
- require 'sinew/version'
3
+
4
+ # load the spec, we use it below
5
+ spec = Gem::Specification.load('sinew.gemspec')
7
6
 
8
7
  #
9
- # gem
8
+ # testing
9
+ # don't forget about TESTOPTS="--verbose" rake
10
+ # also: rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
10
11
  #
11
12
 
12
- task gem: :build
13
- task :build do
14
- system 'gem build --quiet sinew.gemspec'
13
+ # test (default)
14
+ Rake::TestTask.new
15
+ task default: :test
16
+
17
+ # Watch rb files, run tests whenever something changes
18
+ task :watch do
19
+ sh "find . -name '*.rb' -o -name '*.sinew' | entr -c rake"
15
20
  end
16
21
 
17
- task install: :build do
18
- system "sudo gem install --quiet sinew-#{Sinew::VERSION}.gem"
22
+ #
23
+ # pry
24
+ #
25
+
26
+ task :pry do
27
+ sh 'pry -I lib -r sinew.rb'
19
28
  end
20
29
 
21
- task release: :build do
22
- system "git tag -a #{Sinew::VERSION} -m 'Tagging #{Sinew::VERSION}'"
23
- system 'git push --tags'
24
- system "gem push sinew-#{Sinew::VERSION}.gem"
30
+ #
31
+ # rubocop
32
+ #
33
+
34
+ task :rubocop do
35
+ sh 'bundle exec rubocop -A .'
25
36
  end
26
37
 
27
38
  #
28
- # minitest
39
+ # gem
29
40
  #
30
41
 
31
- Rake::TestTask.new(:test) do |t|
32
- t.warning = false
42
+ task :build do
43
+ sh 'gem build --quiet sinew.gemspec'
33
44
  end
34
45
 
35
- task default: :test
46
+ task install: :build do
47
+ sh "gem install --quiet sinew-#{spec.version}.gem"
48
+ end
36
49
 
37
- # to test:
38
- # block ; rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
50
+ task release: %i[rubocop test build] do
51
+ sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
52
+ sh 'git push --tags'
53
+ sh "gem push sinew-#{spec.version}.gem"
54
+ end
data/bin/sinew CHANGED
@@ -1,51 +1,25 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- $LOAD_PATH.unshift("#{__dir__}/../lib")
3
+ $LOAD_PATH.unshift(File.join(__dir__, '../lib'))
4
4
 
5
- require 'sinew'
6
- require 'slop'
5
+ BIN = File.basename($PROGRAM_NAME)
7
6
 
8
7
  #
9
- # options
8
+ # Load the bare minimum and parse args with slop. For speed.
10
9
  #
11
10
 
12
- options = Slop.parse do |o|
13
- o.banner = 'Usage: sinew [options] <gub.sinew>'
14
- o.bool '-v', '--verbose', 'dump emitted rows while running'
15
- o.bool '-q', '--quiet', 'suppress some output'
16
- o.integer '-l', '--limit', 'quit after emitting this many rows'
17
- o.string '-c', '--cache', 'set custom cache directory', default: "#{ENV['HOME']}/.sinew"
18
- o.string '--proxy', 'use host[:port] as HTTP proxy'
19
- o.bool '--version', 'show version and exit'
20
- o.on('--help', 'show this help') do
21
- puts o
22
- exit
23
- end
24
- end
25
-
26
- if options[:version]
27
- puts Sinew::VERSION
28
- exit
11
+ require 'sinew/args'
12
+ begin
13
+ slop = Sinew::Args.slop(ARGV)
14
+ rescue Slop::Error => e
15
+ $stderr.puts "#{BIN}: #{e}" if e.message != ''
16
+ $stderr.puts("#{BIN}: try '#{BIN} --help' for more information")
17
+ exit 1
29
18
  end
30
19
 
31
20
  #
32
- # recipe
21
+ # now load everything and run
33
22
  #
34
23
 
35
- recipe = options.arguments.first
36
- if !recipe
37
- Scripto.fatal('need a .sinew file to run against')
38
- end
39
- if !File.exist?(recipe)
40
- Scripto.fatal("#{recipe} not found")
41
- end
42
- if options.arguments.length > 1
43
- Scripto.fatal('can only run on one .sinew file')
44
- end
45
- options = options.to_h.merge(recipe: recipe)
46
-
47
- #
48
- # main
49
- #
50
-
51
- Sinew::Main.new(options).run
24
+ require 'sinew'
25
+ Sinew::Main.new(slop).run
data/lib/sinew.rb CHANGED
@@ -1,10 +1,23 @@
1
- require_relative 'sinew/cache'
2
- require_relative 'sinew/core_ext'
3
- require_relative 'sinew/dsl'
4
- require_relative 'sinew/main'
5
- require_relative 'sinew/nokogiri_ext'
6
- require_relative 'sinew/output'
7
- require_relative 'sinew/request'
8
- require_relative 'sinew/response'
9
- require_relative 'sinew/runtime_options'
10
- require_relative 'sinew/version'
1
+ # sinew
2
+ require 'sinew/args'
3
+ require 'sinew/base'
4
+ require 'sinew/csv'
5
+ require 'sinew/main'
6
+ require 'sinew/nokogiri_ext'
7
+ require 'sinew/response'
8
+ require 'sinew/version'
9
+
10
+ # custom faraday middleware
11
+ require 'sinew/middleware/log_formatter'
12
+
13
+ module Sinew
14
+ # flow control for --limit
15
+ class LimitError < StandardError; end
16
+
17
+ # shortcut for Sinew::Base.new
18
+ class << self
19
+ def new(**args)
20
+ Sinew::Base.new(**args)
21
+ end
22
+ end
23
+ end
data/lib/sinew/args.rb ADDED
@@ -0,0 +1,53 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/slop_duration'
3
+ require 'sinew/version'
4
+ require 'slop'
5
+
6
+ #
7
+ # This is used to parse command line arguments with Slop. We don't set any
8
+ # defaults in here, relying instead on Sloptions in Sinew::Base. That way
9
+ # defaults are applied for both command line and embedded usage of Sinew::Base.
10
+ #
11
+
12
+ module Sinew
13
+ module Args
14
+ def self.slop(args)
15
+ slop = Slop.parse(args) do |o|
16
+ o.banner = 'Usage: sinew [options] [recipe.sinew]'
17
+ o.integer '-l', '--limit', 'quit after emitting this many rows'
18
+ o.string '--proxy', 'use host[:port] as HTTP proxy (can be a comma-delimited list)'
19
+ o.integer '--timeout', 'maximum time allowed for the transfer'
20
+ o.bool '-s', '--silent', 'suppress some output'
21
+ o.bool '-v', '--verbose', 'dump emitted rows while running'
22
+
23
+ o.separator 'From httpdisk:'
24
+ o.string '--dir', 'set custom cache directory'
25
+ # note: uses slop_duration from HTTPDisk
26
+ o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
27
+ o.bool '--force', "don't read anything from cache (but still write)"
28
+ o.bool '--force-errors', "don't read errors from cache (but still write)"
29
+
30
+ # generic
31
+ o.boolean '--version', 'show version' do
32
+ puts "sinew #{Sinew::VERSION}"
33
+ exit
34
+ end
35
+ o.on('--help', 'show this help') do
36
+ puts o
37
+ exit
38
+ end
39
+ end
40
+
41
+ # recipe argument
42
+ recipe = slop.args.first
43
+ raise Slop::Error, '' if args.empty?
44
+ raise Slop::Error, 'no RECIPE specified' if !recipe
45
+ raise Slop::Error, 'more than one RECIPE specified' if slop.args.length > 1
46
+ raise Slop::Error, "#{recipe} not found" if !File.exist?(recipe)
47
+
48
+ slop.to_h.tap do
49
+ _1[:recipe] = recipe
50
+ end
51
+ end
52
+ end
53
+ end
data/lib/sinew/base.rb ADDED
@@ -0,0 +1,251 @@
1
+ require 'amazing_print'
2
+ require 'faraday-encoding'
3
+ require 'faraday/logging/formatter'
4
+ require 'faraday-rate_limiter'
5
+ require 'httpdisk'
6
+
7
+ module Sinew
8
+ # Sinew base class, for in standalone scripts or via the sinew binary.
9
+ class Base
10
+ attr_reader :csv, :mutex, :options
11
+
12
+ def initialize(opts = {})
13
+ @mutex = Mutex.new
14
+
15
+ #
16
+ # defaults for Sloptions
17
+ #
18
+
19
+ # default :rate_limit, typically 1
20
+ default_rate_limit = ENV['SINEW_TEST'] ? 0 : 1
21
+
22
+ #
23
+ # note: uses HTTPDisk::Sloptions
24
+ #
25
+
26
+ @options = HTTPDisk::Sloptions.parse(opts) do
27
+ # cli
28
+ _1.integer :limit
29
+ _1.integer :timeout, default: 30
30
+ _1.boolean :silent
31
+ _1.on :proxy, type: [:string, Array]
32
+ _1.boolean :verbose
33
+
34
+ # httpdisk
35
+ _1.string :dir, default: File.join(ENV['HOME'], '.sinew')
36
+ _1.integer :expires
37
+ _1.boolean :force
38
+ _1.boolean :force_errors
39
+ _1.array :ignore_params
40
+
41
+ # more runtime options
42
+ _1.hash :headers
43
+ _1.boolean :insecure
44
+ _1.string :output, required: true
45
+ _1.hash :params
46
+ _1.float :rate_limit, default: default_rate_limit
47
+ _1.integer :retries, default: 2
48
+ _1.on :url_prefix, type: [:string, URI]
49
+ _1.boolean :utf8, default: true
50
+ end
51
+
52
+ @csv = CSV.new(opts[:output])
53
+ end
54
+
55
+ #
56
+ # requests
57
+ #
58
+
59
+ # http get, returns a Response
60
+ def get(url, params = nil, headers = nil)
61
+ faraday_response = faraday.get(url, params, headers) do
62
+ _1.options[:proxy] = random_proxy
63
+ end
64
+ Response.new(faraday_response)
65
+ end
66
+
67
+ # http post, returns a Response. Defaults to form body type.
68
+ def post(url, body = nil, headers = nil)
69
+ faraday_response = faraday.post(url, body, headers) do
70
+ _1.options[:proxy] = random_proxy
71
+ end
72
+ Response.new(faraday_response)
73
+ end
74
+
75
+ # http post json, returns a Response
76
+ def post_json(url, body = nil, headers = nil)
77
+ body = body.to_json
78
+ headers = (headers || {}).merge('Content-Type' => 'application/json')
79
+ post(url, body, headers)
80
+ end
81
+
82
+ # Faraday connection for this recipe
83
+ def faraday
84
+ mutex.synchronize do
85
+ @faraday ||= create_faraday
86
+ end
87
+ end
88
+
89
+ #
90
+ # httpdisk
91
+ #
92
+
93
+ # Returns true if request is cached. Defaults to form body type.
94
+ def cached?(method, url, params = nil, body = nil)
95
+ status = status(method, url, params, body)
96
+ status[:status] != 'miss'
97
+ end
98
+
99
+ # Remove cache file, if any. Defaults to form body type.
100
+ def uncache(method, url, params = nil, body = nil)
101
+ status = status(method, url, params, body)
102
+ path = status[:path]
103
+ File.unlink(path) if File.exist?(path)
104
+ end
105
+
106
+ # Check httpdisk status for this request. Defaults to form body type.
107
+ def status(method, url, params = nil, body = nil)
108
+ # if hash, default to url encoded form
109
+ # see lib/faraday/request/url_encoded.rb
110
+ if body.is_a?(Hash)
111
+ body = Faraday::Utils::ParamsHash[body].to_query
112
+ end
113
+
114
+ env = Faraday::Env.new.tap do
115
+ _1.method = method.to_s.downcase.to_sym
116
+ _1.request_headers = {}
117
+ _1.request_body = body
118
+ _1.url = faraday.build_url(url, params)
119
+ end
120
+ httpdisk.status(env)
121
+ end
122
+
123
+ #
124
+ # csv
125
+ #
126
+
127
+ # Output a csv header. This usually happens automatically, but you can call
128
+ # this method directly to ensure a consistent set of columns.
129
+ def csv_header(*columns)
130
+ csv.start(columns.flatten)
131
+ end
132
+
133
+ # Output a csv row. Row should be any object that can turn into a hash - a
134
+ # hash, OpenStruct, etc.
135
+ def csv_emit(row)
136
+ row = row.to_h
137
+ mutex.synchronize do
138
+ # header if necessary
139
+ csv_header(row.keys) if !csv.started?
140
+
141
+ # emit
142
+ print = csv.emit(row)
143
+ puts print.ai if options[:verbose]
144
+
145
+ # this is caught by Sinew::Main
146
+ if csv.count == options[:limit]
147
+ raise LimitError
148
+ end
149
+ end
150
+ end
151
+
152
+ #
153
+ # stdout
154
+ #
155
+
156
+ RESET = "\e[0m".freeze
157
+ RED = "\e[1;37;41m".freeze
158
+ GREEN = "\e[1;37;42m".freeze
159
+
160
+ # Print a nice green banner.
161
+ def banner(msg, color: GREEN)
162
+ msg = "#{msg} ".ljust(72, ' ')
163
+ msg = "[#{Time.new.strftime('%H:%M:%S')}] #{msg}"
164
+ msg = "#{color}#{msg}#{RESET}" if $stdout.tty?
165
+ puts msg
166
+ end
167
+
168
+ # Print a scary red banner and exit.
169
+ def fatal(msg)
170
+ banner(msg, color: RED)
171
+ exit 1
172
+ end
173
+
174
+ protected
175
+
176
+ # Return a random proxy.
177
+ def random_proxy
178
+ return if !options[:proxy]
179
+
180
+ proxies = options[:proxy]
181
+ proxies = proxies.split(',') if !proxies.is_a?(Array)
182
+ proxies.sample
183
+ end
184
+
185
+ # Create the Faraday connection for making requests.
186
+ def create_faraday
187
+ faraday_options = options.slice(:headers, :params)
188
+ if options[:insecure]
189
+ faraday_options[:ssl] = { verify: false }
190
+ end
191
+ Faraday.new(nil, faraday_options) do
192
+ # options
193
+ if options[:url_prefix]
194
+ _1.url_prefix = options[:url_prefix]
195
+ end
196
+ _1.options.timeout = options[:timeout]
197
+
198
+ #
199
+ # middleware that runs on both disk/network requests
200
+ #
201
+
202
+ # cookie middleware
203
+ _1.use :cookie_jar
204
+
205
+ # auto-encode form bodies
206
+ _1.request :url_encoded
207
+
208
+ # Before httpdisk so each redirect segment is cached
209
+ # Keep track of redirect status for logger
210
+ _1.response :follow_redirects, callback: ->(_old_env, new_env) { new_env[:redirect] = true }
211
+
212
+ #
213
+ # httpdisk
214
+ #
215
+
216
+ httpdisk_options = options.slice(:dir, :expires, :force, :force_errors, :ignore_params, :utf8)
217
+ _1.use :httpdisk, httpdisk_options
218
+
219
+ #
220
+ # middleware below only used it httpdisk uses the network
221
+ #
222
+
223
+ # rate limit
224
+ rate_limit = options[:rate_limit]
225
+ _1.request :rate_limiter, interval: rate_limit
226
+
227
+ # After httpdisk so that only non-cached requests are logged.
228
+ # Before retry so that we don't log each retry attempt.
229
+ _1.response :logger, nil, formatter: Middleware::LogFormatter if !options[:silent]
230
+
231
+ retry_options = {
232
+ max_interval: rate_limit, # very important, negates Retry-After: 86400
233
+ max: options[:retries],
234
+ methods: %w[delete get head options patch post put trace],
235
+ retry_statuses: (500..600).to_a,
236
+ retry_if: ->(_env, _err) { true },
237
+ }
238
+ _1.request :retry, retry_options
239
+ end
240
+ end
241
+
242
+ # find connection's httpdisk instance
243
+ def httpdisk
244
+ @httpdisk ||= begin
245
+ app = faraday.app
246
+ app = app.app until app.is_a?(HTTPDisk::Client)
247
+ app
248
+ end
249
+ end
250
+ end
251
+ end