sinew 2.0.3 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +26 -0
  3. data/.gitignore +3 -5
  4. data/.rubocop.yml +31 -46
  5. data/Gemfile +9 -0
  6. data/Gemfile.lock +124 -0
  7. data/README.md +146 -81
  8. data/Rakefile +36 -20
  9. data/bin/sinew +13 -39
  10. data/lib/sinew.rb +23 -10
  11. data/lib/sinew/args.rb +53 -0
  12. data/lib/sinew/base.rb +251 -0
  13. data/lib/sinew/csv.rb +89 -0
  14. data/lib/sinew/main.rb +45 -98
  15. data/lib/sinew/middleware/log_formatter.rb +23 -0
  16. data/lib/sinew/nokogiri_ext.rb +12 -21
  17. data/lib/sinew/response.rb +39 -99
  18. data/lib/sinew/version.rb +1 -1
  19. data/sample.rb +13 -0
  20. data/sample.sinew +4 -4
  21. data/sinew.gemspec +26 -25
  22. metadata +46 -108
  23. data/.travis.yml +0 -4
  24. data/.vscode/extensions.json +0 -3
  25. data/.vscode/settings.json +0 -15
  26. data/lib/sinew/cache.rb +0 -79
  27. data/lib/sinew/core_ext.rb +0 -59
  28. data/lib/sinew/dsl.rb +0 -114
  29. data/lib/sinew/output.rb +0 -149
  30. data/lib/sinew/request.rb +0 -151
  31. data/lib/sinew/runtime_options.rb +0 -28
  32. data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
  33. data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
  34. data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
  35. data/test/legacy/eu.httpbin.org/status,500 +0 -1
  36. data/test/legacy/legacy.sinew +0 -2
  37. data/test/recipes/array_header.sinew +0 -6
  38. data/test/recipes/basic.sinew +0 -8
  39. data/test/recipes/dups.sinew +0 -7
  40. data/test/recipes/implicit_header.sinew +0 -5
  41. data/test/recipes/limit.sinew +0 -11
  42. data/test/recipes/noko.sinew +0 -9
  43. data/test/recipes/uri.sinew +0 -11
  44. data/test/recipes/xml.sinew +0 -8
  45. data/test/test.html +0 -45
  46. data/test/test_cache.rb +0 -69
  47. data/test/test_helper.rb +0 -123
  48. data/test/test_legacy.rb +0 -23
  49. data/test/test_main.rb +0 -34
  50. data/test/test_nokogiri_ext.rb +0 -18
  51. data/test/test_output.rb +0 -56
  52. data/test/test_recipes.rb +0 -60
  53. data/test/test_requests.rb +0 -135
  54. data/test/test_utf8.rb +0 -39
data/Rakefile CHANGED
@@ -1,38 +1,54 @@
1
- require 'bundler'
2
1
  require 'bundler/setup'
3
-
4
- require 'rake'
5
2
  require 'rake/testtask'
6
- require 'sinew/version'
3
+
4
+ # load the spec, we use it below
5
+ spec = Gem::Specification.load('sinew.gemspec')
7
6
 
8
7
  #
9
- # gem
8
+ # testing
9
+ # don't forget about TESTOPTS="--verbose" rake
10
+ # also: rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
10
11
  #
11
12
 
12
- task gem: :build
13
- task :build do
14
- system 'gem build --quiet sinew.gemspec'
13
+ # test (default)
14
+ Rake::TestTask.new
15
+ task default: :test
16
+
17
+ # Watch rb files, run tests whenever something changes
18
+ task :watch do
19
+ sh "find . -name '*.rb' -o -name '*.sinew' | entr -c rake"
15
20
  end
16
21
 
17
- task install: :build do
18
- system "sudo gem install --quiet sinew-#{Sinew::VERSION}.gem"
22
+ #
23
+ # pry
24
+ #
25
+
26
+ task :pry do
27
+ sh 'pry -I lib -r sinew.rb'
19
28
  end
20
29
 
21
- task release: :build do
22
- system "git tag -a #{Sinew::VERSION} -m 'Tagging #{Sinew::VERSION}'"
23
- system 'git push --tags'
24
- system "gem push sinew-#{Sinew::VERSION}.gem"
30
+ #
31
+ # rubocop
32
+ #
33
+
34
+ task :rubocop do
35
+ sh 'bundle exec rubocop -A .'
25
36
  end
26
37
 
27
38
  #
28
- # minitest
39
+ # gem
29
40
  #
30
41
 
31
- Rake::TestTask.new(:test) do |t|
32
- t.warning = false
42
+ task :build do
43
+ sh 'gem build --quiet sinew.gemspec'
33
44
  end
34
45
 
35
- task default: :test
46
+ task install: :build do
47
+ sh "gem install --quiet sinew-#{spec.version}.gem"
48
+ end
36
49
 
37
- # to test:
38
- # block ; rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
50
+ task release: %i[rubocop test build] do
51
+ sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
52
+ sh 'git push --tags'
53
+ sh "gem push sinew-#{spec.version}.gem"
54
+ end
data/bin/sinew CHANGED
@@ -1,51 +1,25 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- $LOAD_PATH.unshift("#{__dir__}/../lib")
3
+ $LOAD_PATH.unshift(File.join(__dir__, '../lib'))
4
4
 
5
- require 'sinew'
6
- require 'slop'
5
+ BIN = File.basename($PROGRAM_NAME)
7
6
 
8
7
  #
9
- # options
8
+ # Load the bare minimum and parse args with slop. For speed.
10
9
  #
11
10
 
12
- options = Slop.parse do |o|
13
- o.banner = 'Usage: sinew [options] <gub.sinew>'
14
- o.bool '-v', '--verbose', 'dump emitted rows while running'
15
- o.bool '-q', '--quiet', 'suppress some output'
16
- o.integer '-l', '--limit', 'quit after emitting this many rows'
17
- o.string '-c', '--cache', 'set custom cache directory', default: "#{ENV['HOME']}/.sinew"
18
- o.string '--proxy', 'use host[:port] as HTTP proxy'
19
- o.bool '--version', 'show version and exit'
20
- o.on('--help', 'show this help') do
21
- puts o
22
- exit
23
- end
24
- end
25
-
26
- if options[:version]
27
- puts Sinew::VERSION
28
- exit
11
+ require 'sinew/args'
12
+ begin
13
+ slop = Sinew::Args.slop(ARGV)
14
+ rescue Slop::Error => e
15
+ $stderr.puts "#{BIN}: #{e}" if e.message != ''
16
+ $stderr.puts("#{BIN}: try '#{BIN} --help' for more information")
17
+ exit 1
29
18
  end
30
19
 
31
20
  #
32
- # recipe
21
+ # now load everything and run
33
22
  #
34
23
 
35
- recipe = options.arguments.first
36
- if !recipe
37
- Scripto.fatal('need a .sinew file to run against')
38
- end
39
- if !File.exist?(recipe)
40
- Scripto.fatal("#{recipe} not found")
41
- end
42
- if options.arguments.length > 1
43
- Scripto.fatal('can only run on one .sinew file')
44
- end
45
- options = options.to_h.merge(recipe: recipe)
46
-
47
- #
48
- # main
49
- #
50
-
51
- Sinew::Main.new(options).run
24
+ require 'sinew'
25
+ Sinew::Main.new(slop).run
data/lib/sinew.rb CHANGED
@@ -1,10 +1,23 @@
1
- require_relative 'sinew/cache'
2
- require_relative 'sinew/core_ext'
3
- require_relative 'sinew/dsl'
4
- require_relative 'sinew/main'
5
- require_relative 'sinew/nokogiri_ext'
6
- require_relative 'sinew/output'
7
- require_relative 'sinew/request'
8
- require_relative 'sinew/response'
9
- require_relative 'sinew/runtime_options'
10
- require_relative 'sinew/version'
1
+ # sinew
2
+ require 'sinew/args'
3
+ require 'sinew/base'
4
+ require 'sinew/csv'
5
+ require 'sinew/main'
6
+ require 'sinew/nokogiri_ext'
7
+ require 'sinew/response'
8
+ require 'sinew/version'
9
+
10
+ # custom faraday middleware
11
+ require 'sinew/middleware/log_formatter'
12
+
13
+ module Sinew
14
+ # flow control for --limit
15
+ class LimitError < StandardError; end
16
+
17
+ # shortcut for Sinew::Base.new
18
+ class << self
19
+ def new(**args)
20
+ Sinew::Base.new(**args)
21
+ end
22
+ end
23
+ end
data/lib/sinew/args.rb ADDED
@@ -0,0 +1,53 @@
1
+ # manually load dependencies here since this is loaded standalone by bin
2
+ require 'httpdisk/slop_duration'
3
+ require 'sinew/version'
4
+ require 'slop'
5
+
6
+ #
7
+ # This is used to parse command line arguments with Slop. We don't set any
8
+ # defaults in here, relying instead on Sloptions in Sinew::Base. That way
9
+ # defaults are applied for both command line and embedded usage of Sinew::Base.
10
+ #
11
+
12
+ module Sinew
13
+ module Args
14
+ def self.slop(args)
15
+ slop = Slop.parse(args) do |o|
16
+ o.banner = 'Usage: sinew [options] [recipe.sinew]'
17
+ o.integer '-l', '--limit', 'quit after emitting this many rows'
18
+ o.string '--proxy', 'use host[:port] as HTTP proxy (can be a comma-delimited list)'
19
+ o.integer '--timeout', 'maximum time allowed for the transfer'
20
+ o.bool '-s', '--silent', 'suppress some output'
21
+ o.bool '-v', '--verbose', 'dump emitted rows while running'
22
+
23
+ o.separator 'From httpdisk:'
24
+ o.string '--dir', 'set custom cache directory'
25
+ # note: uses slop_duration from HTTPDisk
26
+ o.duration '--expires', 'when to expire cached requests (ex: 1h, 2d, 3w)'
27
+ o.bool '--force', "don't read anything from cache (but still write)"
28
+ o.bool '--force-errors', "don't read errors from cache (but still write)"
29
+
30
+ # generic
31
+ o.boolean '--version', 'show version' do
32
+ puts "sinew #{Sinew::VERSION}"
33
+ exit
34
+ end
35
+ o.on('--help', 'show this help') do
36
+ puts o
37
+ exit
38
+ end
39
+ end
40
+
41
+ # recipe argument
42
+ recipe = slop.args.first
43
+ raise Slop::Error, '' if args.empty?
44
+ raise Slop::Error, 'no RECIPE specified' if !recipe
45
+ raise Slop::Error, 'more than one RECIPE specified' if slop.args.length > 1
46
+ raise Slop::Error, "#{recipe} not found" if !File.exist?(recipe)
47
+
48
+ slop.to_h.tap do
49
+ _1[:recipe] = recipe
50
+ end
51
+ end
52
+ end
53
+ end
data/lib/sinew/base.rb ADDED
@@ -0,0 +1,251 @@
1
+ require 'amazing_print'
2
+ require 'faraday-encoding'
3
+ require 'faraday/logging/formatter'
4
+ require 'faraday-rate_limiter'
5
+ require 'httpdisk'
6
+
7
+ module Sinew
8
+ # Sinew base class, for in standalone scripts or via the sinew binary.
9
+ class Base
10
+ attr_reader :csv, :mutex, :options
11
+
12
+ def initialize(opts = {})
13
+ @mutex = Mutex.new
14
+
15
+ #
16
+ # defaults for Sloptions
17
+ #
18
+
19
+ # default :rate_limit, typically 1
20
+ default_rate_limit = ENV['SINEW_TEST'] ? 0 : 1
21
+
22
+ #
23
+ # note: uses HTTPDisk::Sloptions
24
+ #
25
+
26
+ @options = HTTPDisk::Sloptions.parse(opts) do
27
+ # cli
28
+ _1.integer :limit
29
+ _1.integer :timeout, default: 30
30
+ _1.boolean :silent
31
+ _1.on :proxy, type: [:string, Array]
32
+ _1.boolean :verbose
33
+
34
+ # httpdisk
35
+ _1.string :dir, default: File.join(ENV['HOME'], '.sinew')
36
+ _1.integer :expires
37
+ _1.boolean :force
38
+ _1.boolean :force_errors
39
+ _1.array :ignore_params
40
+
41
+ # more runtime options
42
+ _1.hash :headers
43
+ _1.boolean :insecure
44
+ _1.string :output, required: true
45
+ _1.hash :params
46
+ _1.float :rate_limit, default: default_rate_limit
47
+ _1.integer :retries, default: 2
48
+ _1.on :url_prefix, type: [:string, URI]
49
+ _1.boolean :utf8, default: true
50
+ end
51
+
52
+ @csv = CSV.new(opts[:output])
53
+ end
54
+
55
+ #
56
+ # requests
57
+ #
58
+
59
+ # http get, returns a Response
60
+ def get(url, params = nil, headers = nil)
61
+ faraday_response = faraday.get(url, params, headers) do
62
+ _1.options[:proxy] = random_proxy
63
+ end
64
+ Response.new(faraday_response)
65
+ end
66
+
67
+ # http post, returns a Response. Defaults to form body type.
68
+ def post(url, body = nil, headers = nil)
69
+ faraday_response = faraday.post(url, body, headers) do
70
+ _1.options[:proxy] = random_proxy
71
+ end
72
+ Response.new(faraday_response)
73
+ end
74
+
75
+ # http post json, returns a Response
76
+ def post_json(url, body = nil, headers = nil)
77
+ body = body.to_json
78
+ headers = (headers || {}).merge('Content-Type' => 'application/json')
79
+ post(url, body, headers)
80
+ end
81
+
82
+ # Faraday connection for this recipe
83
+ def faraday
84
+ mutex.synchronize do
85
+ @faraday ||= create_faraday
86
+ end
87
+ end
88
+
89
+ #
90
+ # httpdisk
91
+ #
92
+
93
+ # Returns true if request is cached. Defaults to form body type.
94
+ def cached?(method, url, params = nil, body = nil)
95
+ status = status(method, url, params, body)
96
+ status[:status] != 'miss'
97
+ end
98
+
99
+ # Remove cache file, if any. Defaults to form body type.
100
+ def uncache(method, url, params = nil, body = nil)
101
+ status = status(method, url, params, body)
102
+ path = status[:path]
103
+ File.unlink(path) if File.exist?(path)
104
+ end
105
+
106
+ # Check httpdisk status for this request. Defaults to form body type.
107
+ def status(method, url, params = nil, body = nil)
108
+ # if hash, default to url encoded form
109
+ # see lib/faraday/request/url_encoded.rb
110
+ if body.is_a?(Hash)
111
+ body = Faraday::Utils::ParamsHash[body].to_query
112
+ end
113
+
114
+ env = Faraday::Env.new.tap do
115
+ _1.method = method.to_s.downcase.to_sym
116
+ _1.request_headers = {}
117
+ _1.request_body = body
118
+ _1.url = faraday.build_url(url, params)
119
+ end
120
+ httpdisk.status(env)
121
+ end
122
+
123
+ #
124
+ # csv
125
+ #
126
+
127
+ # Output a csv header. This usually happens automatically, but you can call
128
+ # this method directly to ensure a consistent set of columns.
129
+ def csv_header(*columns)
130
+ csv.start(columns.flatten)
131
+ end
132
+
133
+ # Output a csv row. Row should be any object that can turn into a hash - a
134
+ # hash, OpenStruct, etc.
135
+ def csv_emit(row)
136
+ row = row.to_h
137
+ mutex.synchronize do
138
+ # header if necessary
139
+ csv_header(row.keys) if !csv.started?
140
+
141
+ # emit
142
+ print = csv.emit(row)
143
+ puts print.ai if options[:verbose]
144
+
145
+ # this is caught by Sinew::Main
146
+ if csv.count == options[:limit]
147
+ raise LimitError
148
+ end
149
+ end
150
+ end
151
+
152
+ #
153
+ # stdout
154
+ #
155
+
156
+ RESET = "\e[0m".freeze
157
+ RED = "\e[1;37;41m".freeze
158
+ GREEN = "\e[1;37;42m".freeze
159
+
160
+ # Print a nice green banner.
161
+ def banner(msg, color: GREEN)
162
+ msg = "#{msg} ".ljust(72, ' ')
163
+ msg = "[#{Time.new.strftime('%H:%M:%S')}] #{msg}"
164
+ msg = "#{color}#{msg}#{RESET}" if $stdout.tty?
165
+ puts msg
166
+ end
167
+
168
+ # Print a scary red banner and exit.
169
+ def fatal(msg)
170
+ banner(msg, color: RED)
171
+ exit 1
172
+ end
173
+
174
+ protected
175
+
176
+ # Return a random proxy.
177
+ def random_proxy
178
+ return if !options[:proxy]
179
+
180
+ proxies = options[:proxy]
181
+ proxies = proxies.split(',') if !proxies.is_a?(Array)
182
+ proxies.sample
183
+ end
184
+
185
+ # Create the Faraday connection for making requests.
186
+ def create_faraday
187
+ faraday_options = options.slice(:headers, :params)
188
+ if options[:insecure]
189
+ faraday_options[:ssl] = { verify: false }
190
+ end
191
+ Faraday.new(nil, faraday_options) do
192
+ # options
193
+ if options[:url_prefix]
194
+ _1.url_prefix = options[:url_prefix]
195
+ end
196
+ _1.options.timeout = options[:timeout]
197
+
198
+ #
199
+ # middleware that runs on both disk/network requests
200
+ #
201
+
202
+ # cookie middleware
203
+ _1.use :cookie_jar
204
+
205
+ # auto-encode form bodies
206
+ _1.request :url_encoded
207
+
208
+ # Before httpdisk so each redirect segment is cached
209
+ # Keep track of redirect status for logger
210
+ _1.response :follow_redirects, callback: ->(_old_env, new_env) { new_env[:redirect] = true }
211
+
212
+ #
213
+ # httpdisk
214
+ #
215
+
216
+ httpdisk_options = options.slice(:dir, :expires, :force, :force_errors, :ignore_params, :utf8)
217
+ _1.use :httpdisk, httpdisk_options
218
+
219
+ #
220
+ # middleware below only used it httpdisk uses the network
221
+ #
222
+
223
+ # rate limit
224
+ rate_limit = options[:rate_limit]
225
+ _1.request :rate_limiter, interval: rate_limit
226
+
227
+ # After httpdisk so that only non-cached requests are logged.
228
+ # Before retry so that we don't log each retry attempt.
229
+ _1.response :logger, nil, formatter: Middleware::LogFormatter if !options[:silent]
230
+
231
+ retry_options = {
232
+ max_interval: rate_limit, # very important, negates Retry-After: 86400
233
+ max: options[:retries],
234
+ methods: %w[delete get head options patch post put trace],
235
+ retry_statuses: (500..600).to_a,
236
+ retry_if: ->(_env, _err) { true },
237
+ }
238
+ _1.request :retry, retry_options
239
+ end
240
+ end
241
+
242
+ # find connection's httpdisk instance
243
+ def httpdisk
244
+ @httpdisk ||= begin
245
+ app = faraday.app
246
+ app = app.app until app.is_a?(HTTPDisk::Client)
247
+ app
248
+ end
249
+ end
250
+ end
251
+ end