sinew 2.0.5 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +26 -0
  3. data/.rubocop.yml +9 -6
  4. data/.vscode/settings.json +0 -10
  5. data/Gemfile +9 -0
  6. data/README.md +13 -17
  7. data/Rakefile +33 -18
  8. data/bin/sinew +2 -0
  9. data/lib/sinew.rb +0 -1
  10. data/lib/sinew/connection.rb +52 -0
  11. data/lib/sinew/connection/log_formatter.rb +22 -0
  12. data/lib/sinew/connection/rate_limit.rb +29 -0
  13. data/lib/sinew/core_ext.rb +1 -1
  14. data/lib/sinew/dsl.rb +2 -1
  15. data/lib/sinew/main.rb +7 -55
  16. data/lib/sinew/output.rb +7 -23
  17. data/lib/sinew/request.rb +20 -71
  18. data/lib/sinew/response.rb +8 -57
  19. data/lib/sinew/runtime_options.rb +4 -4
  20. data/lib/sinew/version.rb +1 -1
  21. data/sample.sinew +2 -2
  22. data/sinew.gemspec +16 -17
  23. metadata +41 -99
  24. data/.travis.yml +0 -4
  25. data/lib/sinew/cache.rb +0 -79
  26. data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
  27. data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
  28. data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
  29. data/test/legacy/eu.httpbin.org/status,500 +0 -1
  30. data/test/legacy/legacy.sinew +0 -2
  31. data/test/recipes/array_header.sinew +0 -6
  32. data/test/recipes/basic.sinew +0 -8
  33. data/test/recipes/dups.sinew +0 -7
  34. data/test/recipes/implicit_header.sinew +0 -5
  35. data/test/recipes/limit.sinew +0 -11
  36. data/test/recipes/noko.sinew +0 -9
  37. data/test/recipes/uri.sinew +0 -11
  38. data/test/recipes/xml.sinew +0 -8
  39. data/test/test.html +0 -45
  40. data/test/test_cache.rb +0 -69
  41. data/test/test_helper.rb +0 -126
  42. data/test/test_legacy.rb +0 -23
  43. data/test/test_main.rb +0 -34
  44. data/test/test_nokogiri_ext.rb +0 -18
  45. data/test/test_output.rb +0 -56
  46. data/test/test_recipes.rb +0 -60
  47. data/test/test_requests.rb +0 -164
  48. data/test/test_utf8.rb +0 -39
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b383fb9d0a1d57acfafd78d8e3ff0185b81acb5bcd368d4c0cca9a8999aa0a52
4
- data.tar.gz: 0dede0f01c7a53056a38705c6fc134cd33a2f15868a9b4fee1b6f9fa85361d31
3
+ metadata.gz: df55f2168ff4242fceb31d083b8d16f1046139fa7acb8a9c4fc3f06f7884e113
4
+ data.tar.gz: 520967eba4ea2d8446690736f2c28d34642b452c0f4e5003dcb89ce373c116e5
5
5
  SHA512:
6
- metadata.gz: e867516dd43bed9f6dd524475c70b933b30029788521224eed08d269ea1264f5d52a3740e967a2cc61ec96861e1875ecaf2f43af403bd917cac05ce2fd394119
7
- data.tar.gz: 719ab64ac523e9cf553171bf318a59938655e4a40fd4c6544ec5c5188c7a81227a30b9ba7c70a6ace90a41b476a62081f2ac4cc88a490d0be2e05ade7e8b3dce
6
+ metadata.gz: 7443bccc5fc4e1bd112ce50b3d17445f0c21f5b351a6b5be586aadd63f36396312370ec6115d8116701165b3af19fcb852f85d18d0fbe7b4bf0d797312d3fa40
7
+ data.tar.gz: 9ca4f3c424e021100f518ca4f2231f515b38dbeb402ff4fce07c13a2440f19b0fabcc2a2920e51aa3f6228507550d3c94cb70a46fd22711ba3add90e4fc28004
@@ -0,0 +1,26 @@
1
+ name: test
2
+
3
+ on:
4
+ push:
5
+ paths-ignore:
6
+ - '**.md'
7
+ pull_request:
8
+ paths-ignore:
9
+ - '**.md'
10
+ workflow_dispatch:
11
+
12
+ jobs:
13
+ test:
14
+ strategy:
15
+ max-parallel: 3
16
+ matrix:
17
+ os: [ubuntu, macos]
18
+ ruby-version: [3.0, 2.7]
19
+ runs-on: ${{ matrix.os }}-latest
20
+ steps:
21
+ - uses: actions/checkout@v2
22
+ - uses: ruby/setup-ruby@v1
23
+ with:
24
+ ruby-version: ${{ matrix.ruby-version }}
25
+ - run: bundle install
26
+ - run: bundle exec rake test
data/.rubocop.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  AllCops:
2
- Exclude:
3
- TargetRubyVersion: 2.3
2
+ TargetRubyVersion: 2.7
3
+ NewCops: enable
4
4
 
5
5
  # amd: customizations
6
6
  Layout/SpaceInsideArrayLiteralBrackets:
@@ -22,18 +22,19 @@ Style/TrailingCommaInHashLiteral:
22
22
 
23
23
  # amd: these seem extreme
24
24
  Lint/AssignmentInCondition: { Enabled: false } # I do this all the time
25
- Lint/HandleExceptions: { Enabled: false } # blank rescues are useful
25
+ Lint/SuppressedException: { Enabled: false } # blank rescues are useful
26
26
  Naming/BinaryOperatorParameterName: { Enabled: false } # silly
27
27
  Naming/HeredocDelimiterNaming: { Enabled: false } # silly
28
- Naming/UncommunicativeMethodParamName: { Enabled: false } # silly
29
- Performance/RegexpMatch: { Enabled: false } # =~ is fine
30
- Performance/TimesMap: { Enabled: false } # silly
28
+ Naming/MethodParameterName: { Enabled: false } # silly
29
+ Style/AccessorGrouping: { Enabled: false } # silly
30
+ Style/AsciiComments: { Enabled: false } # silly
31
31
  Style/ClassAndModuleChildren: { Enabled: false } # silly
32
32
  Style/Documentation: { Enabled: false } # we don't need this
33
33
  Style/DoubleNegation: { Enabled: false } # silly
34
34
  Style/FormatStringToken: { Enabled: false } # we like printf here
35
35
  Style/FrozenStringLiteralComment: { Enabled: false } # seems excessive
36
36
  Style/GuardClause: { Enabled: false } # confusing
37
+ Style/HashTransformValues: { Enabled: false } # breaks code by trying to apply to an array
37
38
  Style/IfUnlessModifier: { Enabled: false } # personally I hate unless
38
39
  Style/NegatedIf: { Enabled: false } # these are fine
39
40
  Style/Next: { Enabled: false } # these are fine
@@ -41,7 +42,9 @@ Style/NumericPredicate: { Enabled: false } # silly
41
42
  Style/ParallelAssignment: { Enabled: false } # these are fine
42
43
  Style/PerlBackrefs: { Enabled: false } # these are fine
43
44
  Style/RaiseArgs: { Enabled: false } # silly
45
+ Style/RedundantAssignment: { Enabled: false } # these are usually on purpose
44
46
  Style/RegexpLiteral: { Enabled: false } # these are fine
47
+ Style/SoleNestedConditional: { Enabled: false } # these are fine
45
48
  Style/StderrPuts: { Enabled: false } # this is awful
46
49
 
47
50
  # amd: these Metric rules are annoying, disable
@@ -1,15 +1,5 @@
1
1
  {
2
- "editor.formatOnSave": true,
3
- "editor.formatOnSaveTimeout": 1500,
4
- "editor.tabSize": 2,
5
- "editor.wordSeparators": "`~#$%^&*()-=+[{]}\\|;:'\",.<>/",
6
2
  "files.associations": {
7
3
  "*.sinew": "ruby"
8
- },
9
- "files.insertFinalNewline": true,
10
- "files.trimTrailingWhitespace": true,
11
- "ruby.format": "rubocop",
12
- "ruby.lint": {
13
- "rubocop": true
14
4
  }
15
5
  }
data/Gemfile CHANGED
@@ -1,2 +1,11 @@
1
1
  source 'http://rubygems.org'
2
+
3
+ group :development do
4
+ gem 'minitest'
5
+ gem 'mocha'
6
+ gem 'rake'
7
+ gem 'rubocop', '~> 0.91.0', require: false
8
+ gem 'webmock'
9
+ end
10
+
2
11
  gemspec
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- ![Travis](https://travis-ci.org/gurgeous/sinew.svg?branch=master)
1
+ [![Build Status](https://github.com/gurgeous/sinew/workflows/test/badge.svg?branch=master)](https://github.com/gurgeous/sinew/action)
2
2
 
3
3
  ## Welcome to Sinew
4
4
 
@@ -20,7 +20,7 @@ gem 'sinew'
20
20
 
21
21
  <!--- markdown-toc --no-firsth1 --maxdepth 1 readme.md -->
22
22
 
23
- - [Sinew 2](#sinew-2-may-2018)
23
+ - [Sinew 3](#sinew-3-may-2021)
24
24
  - [Quick Example](#quick-example)
25
25
  - [How it Works](#how-it-works)
26
26
  - [DSL Reference](#dsl-reference)
@@ -29,27 +29,21 @@ gem 'sinew'
29
29
  - [Changelog](#changelog)
30
30
  - [License](#license)
31
31
 
32
- ## Sinew 2 (May 2018)
32
+ ## Sinew 3 (May 2021)
33
33
 
34
- I am pleased to announce the release of Sinew 2.0, a complete rewrite of Sinew for the modern era. Enhancements include:
35
-
36
- - Remove dependencies on active_support, curl and tidy. We use HTTParty now.
37
- - Much easier to customize requests in `.sinew` files. For example, setting User-Agent or Bearer tokens.
38
- - More operations like `post_json` or the generic `http`. These methods are thin wrappers around HTTParty.
39
- - New end-of-run report.
40
- - Tests, rubocop, vscode settings, travis, etc.
34
+ I am pleased to announce the release of Sinew 3.0. Sinew has been streamlined and updated to use the [Faraday](https://lostisland.github.io/faraday/) HTTP client with [sinew](https://github.com/gurgeous/sinew/) middleware for caching.
41
35
 
42
36
  **Breaking change**
43
37
 
44
- Sinew uses a new format for cached responses. Old Sinew 1 cache directories must be removed before running Sinew again. Sinew 2 might choke on Sinew 1 cache directores when reading `head/`. This is not tested or supported.
38
+ Sinew 3 uses a new format for cached responses. Old Sinew 2 cache directories should be removed before running Sinew again.
45
39
 
46
40
  ## Quick Example
47
41
 
48
- Here's an example for collecting the links from httpbin.org:
42
+ Here's an example for collecting the links from httpbingo.org:
49
43
 
50
44
  ```ruby
51
45
  # get the url
52
- get "http://httpbin.org"
46
+ get "http://httpbingo.org"
53
47
 
54
48
  # use nokogiri to collect links
55
49
  noko.css("ul li a").each do |a|
@@ -114,9 +108,9 @@ Sinew creates a CSV file with the same name as the recipe, and `csv_emit(hash)`
114
108
 
115
109
  #### Caching
116
110
 
117
- Requests are made using HTTParty, and all responses are cached on disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
111
+ Sinew uses [sinew](https://github.com/gurgeous/sinew/) to aggressively cache all HTTP responses to disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
118
112
 
119
- The files in `~/.sinew` have nice names and are designed to be human readable. This helps when writing recipes. Sinew never deletes files from the cache - that's up to you!
113
+ Sinew never deletes files from the cache - that's up to you!
120
114
 
121
115
  Because all requests are cached, you can run Sinew repeatedly with confidence. Run it over and over again while you build up your recipe.
122
116
 
@@ -162,7 +156,7 @@ Writing Sinew recipes is fun and easy. The builtin caching means you can iterate
162
156
  noko.css("table")[4].css("td").select { |i| i[:width].to_i > 80 }.map(&:text)
163
157
  ```
164
158
 
165
- - Debug your recipes using plain old `puts`, or better yet use `ap` from [awesome_print](https://github.com/michaeldv/awesome_print).
159
+ - Debug your recipes using plain old `puts`, or better yet use `ap` from [amazing_print](https://github.com/amazing-print/amazing_print).
166
160
  - Run `sinew -v` to get a report on every `csv_emit`. Very handy.
167
161
  - Add the CSV files to your git repo. That way you can version them and get diffs!
168
162
 
@@ -173,8 +167,10 @@ noko.css("table")[4].css("td").select { |i| i[:width].to_i > 80 }.map(&:text)
173
167
 
174
168
  ## Changelog
175
169
 
176
- #### 2.0.5 (unreleased)
170
+ #### 3.0.0 (May 2021)
177
171
 
172
+ - Major rewrite of network and caching layer. See above.
173
+ - Use Faraday HTTP client with sinew middleware for caching.
178
174
  - Supports multiple proxies (`--proxy host1,host2,...`)
179
175
 
180
176
  #### 2.0.4 (May 2018)
data/Rakefile CHANGED
@@ -1,38 +1,53 @@
1
- require 'bundler'
2
1
  require 'bundler/setup'
3
2
 
4
- require 'rake'
5
3
  require 'rake/testtask'
6
4
  require 'sinew/version'
7
5
 
6
+ # load the spec, we use it below
7
+ spec = Gem::Specification.load('sinew.gemspec')
8
+
8
9
  #
9
- # gem
10
+ # testing
11
+ # don't forget about TESTOPTS="--verbose" rake
12
+ # also: rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
10
13
  #
11
14
 
12
- task gem: :build
13
- task :build do
14
- system 'gem build --quiet sinew.gemspec'
15
+ # test (default)
16
+ task default: :test
17
+
18
+ Rake::TestTask.new do
19
+ _1.libs << 'test'
20
+ _1.warning = false # sterile has a few issues here
15
21
  end
16
22
 
17
- task install: :build do
18
- system "sudo gem install --quiet sinew-#{Sinew::VERSION}.gem"
23
+ # Watch rb files, run tests whenever something changes
24
+ task :watch do
25
+ # https://superuser.com/a/665208 / https://unix.stackexchange.com/a/42288
26
+ system("while true; do find . -name '*.rb' | entr -c -d rake; test $? -gt 128 && break; done")
19
27
  end
20
28
 
21
- task release: :build do
22
- system "git tag -a #{Sinew::VERSION} -m 'Tagging #{Sinew::VERSION}'"
23
- system 'git push --tags'
24
- system "gem push sinew-#{Sinew::VERSION}.gem"
29
+ #
30
+ # rubocop
31
+ #
32
+
33
+ task :rubocop do
34
+ system('bundle exec rubocop -A .', exception: true)
25
35
  end
26
36
 
27
37
  #
28
- # minitest
38
+ # gem
29
39
  #
30
40
 
31
- Rake::TestTask.new(:test) do |t|
32
- t.warning = false
41
+ task :build do
42
+ system 'gem build --quiet sinew.gemspec', exception: true
33
43
  end
34
44
 
35
- task default: :test
45
+ task install: :build do
46
+ system "gem install --quiet sinew-#{spec.version}.gem", exception: true
47
+ end
36
48
 
37
- # to test:
38
- # block ; rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
49
+ task release: %i[rubocop test build] do
50
+ system "git tag -a #{spec.version} -m 'Tagging #{spec.version}'", exception: true
51
+ system 'git push --tags', exception: true
52
+ system "gem push sinew-#{spec.version}.gem", exception: true
53
+ end
data/bin/sinew CHANGED
@@ -15,6 +15,8 @@ options = Slop.parse do |o|
15
15
  o.bool '-q', '--quiet', 'suppress some output'
16
16
  o.integer '-l', '--limit', 'quit after emitting this many rows'
17
17
  o.string '-c', '--cache', 'set custom cache directory', default: "#{ENV['HOME']}/.sinew"
18
+ o.bool '--force', "don't read anything from cache (but still write)"
19
+ o.bool '--force-errors', "don't read errors from cache (but still write)"
18
20
  o.string '--proxy', 'use host[:port] as HTTP proxy'
19
21
  o.bool '--version', 'show version and exit'
20
22
  o.on('--help', 'show this help') do
data/lib/sinew.rb CHANGED
@@ -1,4 +1,3 @@
1
- require_relative 'sinew/cache'
2
1
  require_relative 'sinew/core_ext'
3
2
  require_relative 'sinew/dsl'
4
3
  require_relative 'sinew/main'
@@ -0,0 +1,52 @@
1
+ require 'faraday'
2
+ require 'faraday-encoding'
3
+ require 'faraday/logging/formatter'
4
+ require 'httpdisk'
5
+ require 'sinew/connection/log_formatter'
6
+ require 'sinew/connection/rate_limit'
7
+
8
+ module Sinew
9
+ module Connection
10
+ def self.create(options:, runtime_options:)
11
+ connection_options = {}
12
+ connection_options[:ssl] = { verify: false } if runtime_options.insecure
13
+
14
+ Faraday.new(nil, connection_options) do
15
+ _1.use RateLimit, rate_limit: runtime_options.rate_limit
16
+
17
+ # auto-encode form bodies
18
+ _1.request :url_encoded
19
+
20
+ # Before httpdisk so each redirect segment is cached
21
+ # Keep track of redirect status for logger
22
+ _1.response :follow_redirects, callback: ->(_old_env, new_env) { new_env[:redirect] = true }
23
+
24
+ # set Ruby string encoding based on Content-Type (should be above httpdisk)
25
+ _1.response :encoding
26
+
27
+ # disk caching
28
+ httpdisk_options = {
29
+ dir: options[:cache],
30
+ force: options[:force],
31
+ force_errors: options[:force_errors],
32
+ }.merge(runtime_options.httpdisk_options)
33
+
34
+ _1.use :httpdisk, httpdisk_options
35
+
36
+ # After httpdisk so that only non-cached requests are logged.
37
+ # Before retry so that we don't log each retry attempt.
38
+ _1.response :logger, nil, formatter: LogFormatter if !options[:quiet]
39
+
40
+ # After httpdisk so transient failures are not cached
41
+ retry_options = {
42
+ interval: runtime_options.rate_limit,
43
+ max: runtime_options.retries,
44
+ methods: %w[delete get head options patch post put trace],
45
+ retry_statuses: (500..600).to_a,
46
+ retry_if: ->(_env, _err) { true },
47
+ }
48
+ _1.request :retry, retry_options
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,22 @@
1
+ module Sinew
2
+ module Connection
3
+ class LogFormatter < Faraday::Logging::Formatter
4
+ def request(env)
5
+ info('req') do
6
+ # Only log the initial request, not the redirects
7
+ return if env[:redirect]
8
+
9
+ msg = apply_filters(env.url.to_s)
10
+ msg = "#{msg} (#{env.method})" if env.method != :get
11
+ msg = "#{msg} => #{env.request.proxy.uri}" if env.request.proxy
12
+
13
+ msg
14
+ end
15
+ end
16
+
17
+ def response(env)
18
+ # silent
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,29 @@
1
+ module Sinew
2
+ module Connection
3
+ class RateLimit < Faraday::Middleware
4
+ attr_reader :rate_limit
5
+
6
+ def initialize(app, options = {})
7
+ super(app)
8
+
9
+ @last_request_tm = @current_request_tm = nil
10
+ @rate_limit = options.fetch(:rate_limit, 1)
11
+ end
12
+
13
+ def on_request(_env)
14
+ if @last_request_tm
15
+ sleep = (@last_request_tm + rate_limit) - Time.now
16
+ sleep(sleep) if sleep > 0
17
+ end
18
+
19
+ @current_request_tm = Time.now
20
+ end
21
+
22
+ def on_complete(env)
23
+ # Only rate limit on uncached requests
24
+ @last_request_tm = @current_request_tm unless env[:httpdisk]
25
+ @current_request_tm = nil
26
+ end
27
+ end
28
+ end
29
+ end
@@ -30,7 +30,7 @@ class String
30
30
  elsif limit >= size
31
31
  dup
32
32
  else
33
- self[-limit..-1]
33
+ self[-limit..]
34
34
  end
35
35
  end
36
36
 
data/lib/sinew/dsl.rb CHANGED
@@ -1,5 +1,6 @@
1
- require 'awesome_print'
1
+ require 'amazing_print'
2
2
  require 'cgi'
3
+ require 'json'
3
4
 
4
5
  #
5
6
  # The DSL available to .sinew files.
data/lib/sinew/main.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'scripto'
2
+ require 'sinew/connection'
2
3
 
3
4
  #
4
5
  # Main sinew entry point.
@@ -6,15 +7,13 @@ require 'scripto'
6
7
 
7
8
  module Sinew
8
9
  class Main < Scripto::Main
9
- attr_reader :runtime_options, :request_tm, :request_count
10
+ attr_reader :runtime_options
10
11
 
11
12
  def initialize(options)
12
13
  super(options)
13
14
 
14
15
  # init
15
16
  @runtime_options = RuntimeOptions.new
16
- @request_tm = Time.at(0)
17
- @request_count = 0
18
17
  end
19
18
 
20
19
  def run
@@ -31,24 +30,12 @@ module Sinew
31
30
  end
32
31
 
33
32
  #
34
- # http requests and caching
33
+ # http requests
35
34
  #
36
35
 
37
- def cache
38
- @cache ||= Cache.new(self)
39
- end
40
-
41
36
  def http(method, url, options = {})
42
37
  request = Request.new(self, method, url, options)
43
-
44
- # try to get from cache
45
- response = cache.get(request)
46
-
47
- # perform if necessary
48
- if !response
49
- response = perform(request)
50
- cache.set(response)
51
- end
38
+ response = request.perform(connection)
52
39
 
53
40
  # always log error messages
54
41
  if response.error?
@@ -58,26 +45,10 @@ module Sinew
58
45
  response
59
46
  end
60
47
 
61
- def perform(request)
62
- before_perform_request(request)
63
-
64
- response = nil
65
-
66
- tries = runtime_options.retries + 1
67
- while tries > 0
68
- tries -= 1
69
- begin
70
- @request_count += 1
71
- response = request.perform
72
- rescue HTTParty::RedirectionTooDeep, OpenSSL::SSL::SSLError, SocketError, SystemCallError, Timeout::Error => e
73
- response = Response.from_error(request, e)
74
- end
75
- break if !response.error_500?
76
- end
77
-
78
- response
48
+ def connection
49
+ @connection ||= Connection.create(options: options, runtime_options: runtime_options)
79
50
  end
80
- protected :perform
51
+ protected :connection
81
52
 
82
53
  #
83
54
  # output
@@ -91,25 +62,6 @@ module Sinew
91
62
  # helpers
92
63
  #
93
64
 
94
- def before_perform_request(request)
95
- # log
96
- if !quiet?
97
- msg = if request.method != 'get'
98
- "req #{request.uri} (#{request.method})"
99
- else
100
- "req #{request.uri}"
101
- end
102
- msg = "#{msg} => #{request.proxy}" if request.proxy
103
- $stderr.puts msg
104
- end
105
-
106
- # rate limit
107
- sleep = (request_tm + runtime_options.rate_limit) - Time.now
108
- sleep(sleep) if sleep > 0
109
- @request_tm = Time.now
110
- end
111
- protected :before_perform_request
112
-
113
65
  def footer
114
66
  output.report
115
67
  finished = output.count > 0 ? "Finished #{output.filename}" : 'Finished'