sinew 2.0.5 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +26 -0
  3. data/.rubocop.yml +9 -6
  4. data/.vscode/settings.json +0 -10
  5. data/Gemfile +9 -0
  6. data/README.md +13 -17
  7. data/Rakefile +33 -18
  8. data/bin/sinew +2 -0
  9. data/lib/sinew.rb +0 -1
  10. data/lib/sinew/connection.rb +52 -0
  11. data/lib/sinew/connection/log_formatter.rb +22 -0
  12. data/lib/sinew/connection/rate_limit.rb +29 -0
  13. data/lib/sinew/core_ext.rb +1 -1
  14. data/lib/sinew/dsl.rb +2 -1
  15. data/lib/sinew/main.rb +7 -55
  16. data/lib/sinew/output.rb +7 -23
  17. data/lib/sinew/request.rb +20 -71
  18. data/lib/sinew/response.rb +8 -57
  19. data/lib/sinew/runtime_options.rb +4 -4
  20. data/lib/sinew/version.rb +1 -1
  21. data/sample.sinew +2 -2
  22. data/sinew.gemspec +16 -17
  23. metadata +41 -99
  24. data/.travis.yml +0 -4
  25. data/lib/sinew/cache.rb +0 -79
  26. data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
  27. data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
  28. data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
  29. data/test/legacy/eu.httpbin.org/status,500 +0 -1
  30. data/test/legacy/legacy.sinew +0 -2
  31. data/test/recipes/array_header.sinew +0 -6
  32. data/test/recipes/basic.sinew +0 -8
  33. data/test/recipes/dups.sinew +0 -7
  34. data/test/recipes/implicit_header.sinew +0 -5
  35. data/test/recipes/limit.sinew +0 -11
  36. data/test/recipes/noko.sinew +0 -9
  37. data/test/recipes/uri.sinew +0 -11
  38. data/test/recipes/xml.sinew +0 -8
  39. data/test/test.html +0 -45
  40. data/test/test_cache.rb +0 -69
  41. data/test/test_helper.rb +0 -126
  42. data/test/test_legacy.rb +0 -23
  43. data/test/test_main.rb +0 -34
  44. data/test/test_nokogiri_ext.rb +0 -18
  45. data/test/test_output.rb +0 -56
  46. data/test/test_recipes.rb +0 -60
  47. data/test/test_requests.rb +0 -164
  48. data/test/test_utf8.rb +0 -39
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b383fb9d0a1d57acfafd78d8e3ff0185b81acb5bcd368d4c0cca9a8999aa0a52
4
- data.tar.gz: 0dede0f01c7a53056a38705c6fc134cd33a2f15868a9b4fee1b6f9fa85361d31
3
+ metadata.gz: df55f2168ff4242fceb31d083b8d16f1046139fa7acb8a9c4fc3f06f7884e113
4
+ data.tar.gz: 520967eba4ea2d8446690736f2c28d34642b452c0f4e5003dcb89ce373c116e5
5
5
  SHA512:
6
- metadata.gz: e867516dd43bed9f6dd524475c70b933b30029788521224eed08d269ea1264f5d52a3740e967a2cc61ec96861e1875ecaf2f43af403bd917cac05ce2fd394119
7
- data.tar.gz: 719ab64ac523e9cf553171bf318a59938655e4a40fd4c6544ec5c5188c7a81227a30b9ba7c70a6ace90a41b476a62081f2ac4cc88a490d0be2e05ade7e8b3dce
6
+ metadata.gz: 7443bccc5fc4e1bd112ce50b3d17445f0c21f5b351a6b5be586aadd63f36396312370ec6115d8116701165b3af19fcb852f85d18d0fbe7b4bf0d797312d3fa40
7
+ data.tar.gz: 9ca4f3c424e021100f518ca4f2231f515b38dbeb402ff4fce07c13a2440f19b0fabcc2a2920e51aa3f6228507550d3c94cb70a46fd22711ba3add90e4fc28004
@@ -0,0 +1,26 @@
1
+ name: test
2
+
3
+ on:
4
+ push:
5
+ paths-ignore:
6
+ - '**.md'
7
+ pull_request:
8
+ paths-ignore:
9
+ - '**.md'
10
+ workflow_dispatch:
11
+
12
+ jobs:
13
+ test:
14
+ strategy:
15
+ max-parallel: 3
16
+ matrix:
17
+ os: [ubuntu, macos]
18
+ ruby-version: [3.0, 2.7]
19
+ runs-on: ${{ matrix.os }}-latest
20
+ steps:
21
+ - uses: actions/checkout@v2
22
+ - uses: ruby/setup-ruby@v1
23
+ with:
24
+ ruby-version: ${{ matrix.ruby-version }}
25
+ - run: bundle install
26
+ - run: bundle exec rake test
data/.rubocop.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  AllCops:
2
- Exclude:
3
- TargetRubyVersion: 2.3
2
+ TargetRubyVersion: 2.7
3
+ NewCops: enable
4
4
 
5
5
  # amd: customizations
6
6
  Layout/SpaceInsideArrayLiteralBrackets:
@@ -22,18 +22,19 @@ Style/TrailingCommaInHashLiteral:
22
22
 
23
23
  # amd: these seem extreme
24
24
  Lint/AssignmentInCondition: { Enabled: false } # I do this all the time
25
- Lint/HandleExceptions: { Enabled: false } # blank rescues are useful
25
+ Lint/SuppressedException: { Enabled: false } # blank rescues are useful
26
26
  Naming/BinaryOperatorParameterName: { Enabled: false } # silly
27
27
  Naming/HeredocDelimiterNaming: { Enabled: false } # silly
28
- Naming/UncommunicativeMethodParamName: { Enabled: false } # silly
29
- Performance/RegexpMatch: { Enabled: false } # =~ is fine
30
- Performance/TimesMap: { Enabled: false } # silly
28
+ Naming/MethodParameterName: { Enabled: false } # silly
29
+ Style/AccessorGrouping: { Enabled: false } # silly
30
+ Style/AsciiComments: { Enabled: false } # silly
31
31
  Style/ClassAndModuleChildren: { Enabled: false } # silly
32
32
  Style/Documentation: { Enabled: false } # we don't need this
33
33
  Style/DoubleNegation: { Enabled: false } # silly
34
34
  Style/FormatStringToken: { Enabled: false } # we like printf here
35
35
  Style/FrozenStringLiteralComment: { Enabled: false } # seems excessive
36
36
  Style/GuardClause: { Enabled: false } # confusing
37
+ Style/HashTransformValues: { Enabled: false } # breaks code by trying to apply to an array
37
38
  Style/IfUnlessModifier: { Enabled: false } # personally I hate unless
38
39
  Style/NegatedIf: { Enabled: false } # these are fine
39
40
  Style/Next: { Enabled: false } # these are fine
@@ -41,7 +42,9 @@ Style/NumericPredicate: { Enabled: false } # silly
41
42
  Style/ParallelAssignment: { Enabled: false } # these are fine
42
43
  Style/PerlBackrefs: { Enabled: false } # these are fine
43
44
  Style/RaiseArgs: { Enabled: false } # silly
45
+ Style/RedundantAssignment: { Enabled: false } # these are usually on purpose
44
46
  Style/RegexpLiteral: { Enabled: false } # these are fine
47
+ Style/SoleNestedConditional: { Enabled: false } # these are fine
45
48
  Style/StderrPuts: { Enabled: false } # this is awful
46
49
 
47
50
  # amd: these Metric rules are annoying, disable
@@ -1,15 +1,5 @@
1
1
  {
2
- "editor.formatOnSave": true,
3
- "editor.formatOnSaveTimeout": 1500,
4
- "editor.tabSize": 2,
5
- "editor.wordSeparators": "`~#$%^&*()-=+[{]}\\|;:'\",.<>/",
6
2
  "files.associations": {
7
3
  "*.sinew": "ruby"
8
- },
9
- "files.insertFinalNewline": true,
10
- "files.trimTrailingWhitespace": true,
11
- "ruby.format": "rubocop",
12
- "ruby.lint": {
13
- "rubocop": true
14
4
  }
15
5
  }
data/Gemfile CHANGED
@@ -1,2 +1,11 @@
1
1
  source 'http://rubygems.org'
2
+
3
+ group :development do
4
+ gem 'minitest'
5
+ gem 'mocha'
6
+ gem 'rake'
7
+ gem 'rubocop', '~> 0.91.0', require: false
8
+ gem 'webmock'
9
+ end
10
+
2
11
  gemspec
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- ![Travis](https://travis-ci.org/gurgeous/sinew.svg?branch=master)
1
+ [![Build Status](https://github.com/gurgeous/sinew/workflows/test/badge.svg?branch=master)](https://github.com/gurgeous/sinew/action)
2
2
 
3
3
  ## Welcome to Sinew
4
4
 
@@ -20,7 +20,7 @@ gem 'sinew'
20
20
 
21
21
  <!--- markdown-toc --no-firsth1 --maxdepth 1 readme.md -->
22
22
 
23
- - [Sinew 2](#sinew-2-may-2018)
23
+ - [Sinew 3](#sinew-3-may-2021)
24
24
  - [Quick Example](#quick-example)
25
25
  - [How it Works](#how-it-works)
26
26
  - [DSL Reference](#dsl-reference)
@@ -29,27 +29,21 @@ gem 'sinew'
29
29
  - [Changelog](#changelog)
30
30
  - [License](#license)
31
31
 
32
- ## Sinew 2 (May 2018)
32
+ ## Sinew 3 (May 2021)
33
33
 
34
- I am pleased to announce the release of Sinew 2.0, a complete rewrite of Sinew for the modern era. Enhancements include:
35
-
36
- - Remove dependencies on active_support, curl and tidy. We use HTTParty now.
37
- - Much easier to customize requests in `.sinew` files. For example, setting User-Agent or Bearer tokens.
38
- - More operations like `post_json` or the generic `http`. These methods are thin wrappers around HTTParty.
39
- - New end-of-run report.
40
- - Tests, rubocop, vscode settings, travis, etc.
34
+ I am pleased to announce the release of Sinew 3.0. Sinew has been streamlined and updated to use the [Faraday](https://lostisland.github.io/faraday/) HTTP client with [sinew](https://github.com/gurgeous/sinew/) middleware for caching.
41
35
 
42
36
  **Breaking change**
43
37
 
44
- Sinew uses a new format for cached responses. Old Sinew 1 cache directories must be removed before running Sinew again. Sinew 2 might choke on Sinew 1 cache directores when reading `head/`. This is not tested or supported.
38
+ Sinew 3 uses a new format for cached responses. Old Sinew 2 cache directories should be removed before running Sinew again.
45
39
 
46
40
  ## Quick Example
47
41
 
48
- Here's an example for collecting the links from httpbin.org:
42
+ Here's an example for collecting the links from httpbingo.org:
49
43
 
50
44
  ```ruby
51
45
  # get the url
52
- get "http://httpbin.org"
46
+ get "http://httpbingo.org"
53
47
 
54
48
  # use nokogiri to collect links
55
49
  noko.css("ul li a").each do |a|
@@ -114,9 +108,9 @@ Sinew creates a CSV file with the same name as the recipe, and `csv_emit(hash)`
114
108
 
115
109
  #### Caching
116
110
 
117
- Requests are made using HTTParty, and all responses are cached on disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
111
+ Sinew uses [sinew](https://github.com/gurgeous/sinew/) to aggressively cache all HTTP responses to disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
118
112
 
119
- The files in `~/.sinew` have nice names and are designed to be human readable. This helps when writing recipes. Sinew never deletes files from the cache - that's up to you!
113
+ Sinew never deletes files from the cache - that's up to you!
120
114
 
121
115
  Because all requests are cached, you can run Sinew repeatedly with confidence. Run it over and over again while you build up your recipe.
122
116
 
@@ -162,7 +156,7 @@ Writing Sinew recipes is fun and easy. The builtin caching means you can iterate
162
156
  noko.css("table")[4].css("td").select { |i| i[:width].to_i > 80 }.map(&:text)
163
157
  ```
164
158
 
165
- - Debug your recipes using plain old `puts`, or better yet use `ap` from [awesome_print](https://github.com/michaeldv/awesome_print).
159
+ - Debug your recipes using plain old `puts`, or better yet use `ap` from [amazing_print](https://github.com/amazing-print/amazing_print).
166
160
  - Run `sinew -v` to get a report on every `csv_emit`. Very handy.
167
161
  - Add the CSV files to your git repo. That way you can version them and get diffs!
168
162
 
@@ -173,8 +167,10 @@ noko.css("table")[4].css("td").select { |i| i[:width].to_i > 80 }.map(&:text)
173
167
 
174
168
  ## Changelog
175
169
 
176
- #### 2.0.5 (unreleased)
170
+ #### 3.0.0 (May 2021)
177
171
 
172
+ - Major rewrite of network and caching layer. See above.
173
+ - Use Faraday HTTP client with sinew middleware for caching.
178
174
  - Supports multiple proxies (`--proxy host1,host2,...`)
179
175
 
180
176
  #### 2.0.4 (May 2018)
data/Rakefile CHANGED
@@ -1,38 +1,53 @@
1
- require 'bundler'
2
1
  require 'bundler/setup'
3
2
 
4
- require 'rake'
5
3
  require 'rake/testtask'
6
4
  require 'sinew/version'
7
5
 
6
+ # load the spec, we use it below
7
+ spec = Gem::Specification.load('sinew.gemspec')
8
+
8
9
  #
9
- # gem
10
+ # testing
11
+ # don't forget about TESTOPTS="--verbose" rake
12
+ # also: rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
10
13
  #
11
14
 
12
- task gem: :build
13
- task :build do
14
- system 'gem build --quiet sinew.gemspec'
15
+ # test (default)
16
+ task default: :test
17
+
18
+ Rake::TestTask.new do
19
+ _1.libs << 'test'
20
+ _1.warning = false # sterile has a few issues here
15
21
  end
16
22
 
17
- task install: :build do
18
- system "sudo gem install --quiet sinew-#{Sinew::VERSION}.gem"
23
+ # Watch rb files, run tests whenever something changes
24
+ task :watch do
25
+ # https://superuser.com/a/665208 / https://unix.stackexchange.com/a/42288
26
+ system("while true; do find . -name '*.rb' | entr -c -d rake; test $? -gt 128 && break; done")
19
27
  end
20
28
 
21
- task release: :build do
22
- system "git tag -a #{Sinew::VERSION} -m 'Tagging #{Sinew::VERSION}'"
23
- system 'git push --tags'
24
- system "gem push sinew-#{Sinew::VERSION}.gem"
29
+ #
30
+ # rubocop
31
+ #
32
+
33
+ task :rubocop do
34
+ system('bundle exec rubocop -A .', exception: true)
25
35
  end
26
36
 
27
37
  #
28
- # minitest
38
+ # gem
29
39
  #
30
40
 
31
- Rake::TestTask.new(:test) do |t|
32
- t.warning = false
41
+ task :build do
42
+ system 'gem build --quiet sinew.gemspec', exception: true
33
43
  end
34
44
 
35
- task default: :test
45
+ task install: :build do
46
+ system "gem install --quiet sinew-#{spec.version}.gem", exception: true
47
+ end
36
48
 
37
- # to test:
38
- # block ; rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
49
+ task release: %i[rubocop test build] do
50
+ system "git tag -a #{spec.version} -m 'Tagging #{spec.version}'", exception: true
51
+ system 'git push --tags', exception: true
52
+ system "gem push sinew-#{spec.version}.gem", exception: true
53
+ end
data/bin/sinew CHANGED
@@ -15,6 +15,8 @@ options = Slop.parse do |o|
15
15
  o.bool '-q', '--quiet', 'suppress some output'
16
16
  o.integer '-l', '--limit', 'quit after emitting this many rows'
17
17
  o.string '-c', '--cache', 'set custom cache directory', default: "#{ENV['HOME']}/.sinew"
18
+ o.bool '--force', "don't read anything from cache (but still write)"
19
+ o.bool '--force-errors', "don't read errors from cache (but still write)"
18
20
  o.string '--proxy', 'use host[:port] as HTTP proxy'
19
21
  o.bool '--version', 'show version and exit'
20
22
  o.on('--help', 'show this help') do
data/lib/sinew.rb CHANGED
@@ -1,4 +1,3 @@
1
- require_relative 'sinew/cache'
2
1
  require_relative 'sinew/core_ext'
3
2
  require_relative 'sinew/dsl'
4
3
  require_relative 'sinew/main'
@@ -0,0 +1,52 @@
1
+ require 'faraday'
2
+ require 'faraday-encoding'
3
+ require 'faraday/logging/formatter'
4
+ require 'httpdisk'
5
+ require 'sinew/connection/log_formatter'
6
+ require 'sinew/connection/rate_limit'
7
+
8
+ module Sinew
9
+ module Connection
10
+ def self.create(options:, runtime_options:)
11
+ connection_options = {}
12
+ connection_options[:ssl] = { verify: false } if runtime_options.insecure
13
+
14
+ Faraday.new(nil, connection_options) do
15
+ _1.use RateLimit, rate_limit: runtime_options.rate_limit
16
+
17
+ # auto-encode form bodies
18
+ _1.request :url_encoded
19
+
20
+ # Before httpdisk so each redirect segment is cached
21
+ # Keep track of redirect status for logger
22
+ _1.response :follow_redirects, callback: ->(_old_env, new_env) { new_env[:redirect] = true }
23
+
24
+ # set Ruby string encoding based on Content-Type (should be above httpdisk)
25
+ _1.response :encoding
26
+
27
+ # disk caching
28
+ httpdisk_options = {
29
+ dir: options[:cache],
30
+ force: options[:force],
31
+ force_errors: options[:force_errors],
32
+ }.merge(runtime_options.httpdisk_options)
33
+
34
+ _1.use :httpdisk, httpdisk_options
35
+
36
+ # After httpdisk so that only non-cached requests are logged.
37
+ # Before retry so that we don't log each retry attempt.
38
+ _1.response :logger, nil, formatter: LogFormatter if !options[:quiet]
39
+
40
+ # After httpdisk so transient failures are not cached
41
+ retry_options = {
42
+ interval: runtime_options.rate_limit,
43
+ max: runtime_options.retries,
44
+ methods: %w[delete get head options patch post put trace],
45
+ retry_statuses: (500..600).to_a,
46
+ retry_if: ->(_env, _err) { true },
47
+ }
48
+ _1.request :retry, retry_options
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,22 @@
1
+ module Sinew
2
+ module Connection
3
+ class LogFormatter < Faraday::Logging::Formatter
4
+ def request(env)
5
+ info('req') do
6
+ # Only log the initial request, not the redirects
7
+ return if env[:redirect]
8
+
9
+ msg = apply_filters(env.url.to_s)
10
+ msg = "#{msg} (#{env.method})" if env.method != :get
11
+ msg = "#{msg} => #{env.request.proxy.uri}" if env.request.proxy
12
+
13
+ msg
14
+ end
15
+ end
16
+
17
+ def response(env)
18
+ # silent
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,29 @@
1
+ module Sinew
2
+ module Connection
3
+ class RateLimit < Faraday::Middleware
4
+ attr_reader :rate_limit
5
+
6
+ def initialize(app, options = {})
7
+ super(app)
8
+
9
+ @last_request_tm = @current_request_tm = nil
10
+ @rate_limit = options.fetch(:rate_limit, 1)
11
+ end
12
+
13
+ def on_request(_env)
14
+ if @last_request_tm
15
+ sleep = (@last_request_tm + rate_limit) - Time.now
16
+ sleep(sleep) if sleep > 0
17
+ end
18
+
19
+ @current_request_tm = Time.now
20
+ end
21
+
22
+ def on_complete(env)
23
+ # Only rate limit on uncached requests
24
+ @last_request_tm = @current_request_tm unless env[:httpdisk]
25
+ @current_request_tm = nil
26
+ end
27
+ end
28
+ end
29
+ end
@@ -30,7 +30,7 @@ class String
30
30
  elsif limit >= size
31
31
  dup
32
32
  else
33
- self[-limit..-1]
33
+ self[-limit..]
34
34
  end
35
35
  end
36
36
 
data/lib/sinew/dsl.rb CHANGED
@@ -1,5 +1,6 @@
1
- require 'awesome_print'
1
+ require 'amazing_print'
2
2
  require 'cgi'
3
+ require 'json'
3
4
 
4
5
  #
5
6
  # The DSL available to .sinew files.
data/lib/sinew/main.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'scripto'
2
+ require 'sinew/connection'
2
3
 
3
4
  #
4
5
  # Main sinew entry point.
@@ -6,15 +7,13 @@ require 'scripto'
6
7
 
7
8
  module Sinew
8
9
  class Main < Scripto::Main
9
- attr_reader :runtime_options, :request_tm, :request_count
10
+ attr_reader :runtime_options
10
11
 
11
12
  def initialize(options)
12
13
  super(options)
13
14
 
14
15
  # init
15
16
  @runtime_options = RuntimeOptions.new
16
- @request_tm = Time.at(0)
17
- @request_count = 0
18
17
  end
19
18
 
20
19
  def run
@@ -31,24 +30,12 @@ module Sinew
31
30
  end
32
31
 
33
32
  #
34
- # http requests and caching
33
+ # http requests
35
34
  #
36
35
 
37
- def cache
38
- @cache ||= Cache.new(self)
39
- end
40
-
41
36
  def http(method, url, options = {})
42
37
  request = Request.new(self, method, url, options)
43
-
44
- # try to get from cache
45
- response = cache.get(request)
46
-
47
- # perform if necessary
48
- if !response
49
- response = perform(request)
50
- cache.set(response)
51
- end
38
+ response = request.perform(connection)
52
39
 
53
40
  # always log error messages
54
41
  if response.error?
@@ -58,26 +45,10 @@ module Sinew
58
45
  response
59
46
  end
60
47
 
61
- def perform(request)
62
- before_perform_request(request)
63
-
64
- response = nil
65
-
66
- tries = runtime_options.retries + 1
67
- while tries > 0
68
- tries -= 1
69
- begin
70
- @request_count += 1
71
- response = request.perform
72
- rescue HTTParty::RedirectionTooDeep, OpenSSL::SSL::SSLError, SocketError, SystemCallError, Timeout::Error => e
73
- response = Response.from_error(request, e)
74
- end
75
- break if !response.error_500?
76
- end
77
-
78
- response
48
+ def connection
49
+ @connection ||= Connection.create(options: options, runtime_options: runtime_options)
79
50
  end
80
- protected :perform
51
+ protected :connection
81
52
 
82
53
  #
83
54
  # output
@@ -91,25 +62,6 @@ module Sinew
91
62
  # helpers
92
63
  #
93
64
 
94
- def before_perform_request(request)
95
- # log
96
- if !quiet?
97
- msg = if request.method != 'get'
98
- "req #{request.uri} (#{request.method})"
99
- else
100
- "req #{request.uri}"
101
- end
102
- msg = "#{msg} => #{request.proxy}" if request.proxy
103
- $stderr.puts msg
104
- end
105
-
106
- # rate limit
107
- sleep = (request_tm + runtime_options.rate_limit) - Time.now
108
- sleep(sleep) if sleep > 0
109
- @request_tm = Time.now
110
- end
111
- protected :before_perform_request
112
-
113
65
  def footer
114
66
  output.report
115
67
  finished = output.count > 0 ? "Finished #{output.filename}" : 'Finished'