sinew 2.0.2 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +26 -0
  3. data/.rubocop.yml +9 -6
  4. data/.vscode/settings.json +0 -10
  5. data/Gemfile +9 -0
  6. data/README.md +62 -54
  7. data/Rakefile +33 -18
  8. data/bin/sinew +2 -0
  9. data/lib/sinew.rb +0 -1
  10. data/lib/sinew/connection.rb +52 -0
  11. data/lib/sinew/connection/log_formatter.rb +22 -0
  12. data/lib/sinew/connection/rate_limit.rb +29 -0
  13. data/lib/sinew/core_ext.rb +1 -1
  14. data/lib/sinew/dsl.rb +10 -6
  15. data/lib/sinew/main.rb +29 -56
  16. data/lib/sinew/output.rb +7 -16
  17. data/lib/sinew/request.rb +22 -87
  18. data/lib/sinew/response.rb +8 -57
  19. data/lib/sinew/runtime_options.rb +4 -4
  20. data/lib/sinew/version.rb +1 -1
  21. data/sample.sinew +2 -2
  22. data/sinew.gemspec +16 -18
  23. metadata +38 -110
  24. data/.travis.yml +0 -4
  25. data/lib/sinew/cache.rb +0 -79
  26. data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
  27. data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
  28. data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
  29. data/test/legacy/eu.httpbin.org/status,500 +0 -1
  30. data/test/legacy/legacy.sinew +0 -2
  31. data/test/recipes/array_header.sinew +0 -6
  32. data/test/recipes/basic.sinew +0 -8
  33. data/test/recipes/dups.sinew +0 -7
  34. data/test/recipes/implicit_header.sinew +0 -5
  35. data/test/recipes/limit.sinew +0 -11
  36. data/test/recipes/noko.sinew +0 -9
  37. data/test/recipes/uri.sinew +0 -11
  38. data/test/recipes/xml.sinew +0 -8
  39. data/test/test.html +0 -45
  40. data/test/test_cache.rb +0 -69
  41. data/test/test_helper.rb +0 -123
  42. data/test/test_legacy.rb +0 -23
  43. data/test/test_main.rb +0 -34
  44. data/test/test_nokogiri_ext.rb +0 -18
  45. data/test/test_output.rb +0 -56
  46. data/test/test_recipes.rb +0 -60
  47. data/test/test_requests.rb +0 -135
  48. data/test/test_utf8.rb +0 -39
@@ -0,0 +1,29 @@
1
+ module Sinew
2
+ module Connection
3
+ class RateLimit < Faraday::Middleware
4
+ attr_reader :rate_limit
5
+
6
+ def initialize(app, options = {})
7
+ super(app)
8
+
9
+ @last_request_tm = @current_request_tm = nil
10
+ @rate_limit = options.fetch(:rate_limit, 1)
11
+ end
12
+
13
+ def on_request(_env)
14
+ if @last_request_tm
15
+ sleep = (@last_request_tm + rate_limit) - Time.now
16
+ sleep(sleep) if sleep > 0
17
+ end
18
+
19
+ @current_request_tm = Time.now
20
+ end
21
+
22
+ def on_complete(env)
23
+ # Only rate limit on uncached requests
24
+ @last_request_tm = @current_request_tm unless env[:httpdisk]
25
+ @current_request_tm = nil
26
+ end
27
+ end
28
+ end
29
+ end
@@ -30,7 +30,7 @@ class String
30
30
  elsif limit >= size
31
31
  dup
32
32
  else
33
- self[-limit..-1]
33
+ self[-limit..]
34
34
  end
35
35
  end
36
36
 
data/lib/sinew/dsl.rb CHANGED
@@ -1,5 +1,6 @@
1
- require 'awesome_print'
1
+ require 'amazing_print'
2
2
  require 'cgi'
3
+ require 'json'
3
4
 
4
5
  #
5
6
  # The DSL available to .sinew files.
@@ -10,7 +11,7 @@ module Sinew
10
11
  # this is used to break out of --limit
11
12
  class LimitError < StandardError; end
12
13
 
13
- attr_reader :sinew, :raw, :uri, :elapsed
14
+ attr_reader :sinew, :uri, :raw, :code, :elapsed
14
15
 
15
16
  def initialize(sinew)
16
17
  @sinew = sinew
@@ -52,14 +53,17 @@ module Sinew
52
53
  end
53
54
 
54
55
  def http(method, url, options = {})
55
- # reset
56
- instance_variables.each do |i|
57
- instance_variable_set(i, nil) if i != :@sinew
56
+ # these need to be cleared before each request
57
+ %i[@html @noko @xml @json].each do |i|
58
+ instance_variable_set(i, nil)
58
59
  end
59
60
 
60
61
  # fetch and make response available to callers
61
62
  response = sinew.http(method, url, options)
62
- @uri, @raw = response.uri, response.body
63
+ @uri, @raw, @code = response.uri, response.body, response.code
64
+
65
+ # don't confuse the user
66
+ nil
63
67
  end
64
68
 
65
69
  #
data/lib/sinew/main.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'scripto'
2
+ require 'sinew/connection'
2
3
 
3
4
  #
4
5
  # Main sinew entry point.
@@ -6,21 +7,13 @@ require 'scripto'
6
7
 
7
8
  module Sinew
8
9
  class Main < Scripto::Main
9
- attr_reader :runtime_options, :request_tm, :request_count
10
+ attr_reader :runtime_options
10
11
 
11
12
  def initialize(options)
12
13
  super(options)
13
14
 
14
15
  # init
15
16
  @runtime_options = RuntimeOptions.new
16
- @request_tm = Time.at(0)
17
- @request_count = 0
18
-
19
- if options[:proxy]
20
- addr, port = options[:proxy].split(':')
21
- runtime_options.httparty_options[:http_proxyaddr] = addr
22
- runtime_options.httparty_options[:http_proxyport] = port || 80
23
- end
24
17
  end
25
18
 
26
19
  def run
@@ -37,24 +30,12 @@ module Sinew
37
30
  end
38
31
 
39
32
  #
40
- # http requests and caching
33
+ # http requests
41
34
  #
42
35
 
43
- def cache
44
- @cache ||= Cache.new(self)
45
- end
46
-
47
36
  def http(method, url, options = {})
48
- request = Request.new(self, method, url, options)
49
-
50
- # try to get from cache
51
- response = cache.get(request)
52
-
53
- # perform if necessary
54
- if !response
55
- response = perform(request)
56
- cache.set(response)
57
- end
37
+ request = Request.new(method, url, request_options(options))
38
+ response = request.perform(connection)
58
39
 
59
40
  # always log error messages
60
41
  if response.error?
@@ -64,26 +45,10 @@ module Sinew
64
45
  response
65
46
  end
66
47
 
67
- def perform(request)
68
- before_perform_request(request)
69
-
70
- response = nil
71
-
72
- tries = runtime_options.retries + 1
73
- while tries > 0
74
- tries -= 1
75
- begin
76
- @request_count += 1
77
- response = request.perform
78
- rescue Timeout::Error
79
- response = Response.from_timeout(request)
80
- end
81
- break if !response.error_500?
82
- end
83
-
84
- response
48
+ def connection
49
+ @connection ||= Connection.create(options: options, runtime_options: runtime_options)
85
50
  end
86
- protected :perform
51
+ protected :connection
87
52
 
88
53
  #
89
54
  # output
@@ -97,23 +62,31 @@ module Sinew
97
62
  # helpers
98
63
  #
99
64
 
100
- def before_perform_request(request)
101
- # log
102
- if !quiet?
103
- msg = if request.method != 'get'
104
- "req #{request.uri} (#{request.method})"
105
- else
106
- "req #{request.uri}"
65
+ def request_options(options)
66
+ options.dup.tap do |req|
67
+ req[:headers] = {}.tap do |h|
68
+ [ runtime_options.headers, options[:headers]].each do
69
+ h.merge!(_1) if _1
70
+ end
107
71
  end
108
- $stderr.puts msg
72
+ req[:proxy] = random_proxy
73
+ end
74
+ end
75
+ protected :request_options
76
+
77
+ PROXY_RE = /\A#{URI::PATTERN::HOST}(:\d+)?\Z/.freeze
78
+
79
+ def random_proxy
80
+ return if !options[:proxy]
81
+
82
+ proxy = options[:proxy].split(',').sample
83
+ if proxy !~ PROXY_RE
84
+ raise ArgumentError, "invalid proxy #{proxy.inspect}, should be host[:port]"
109
85
  end
110
86
 
111
- # rate limit
112
- sleep = (request_tm + runtime_options.rate_limit) - Time.now
113
- sleep(sleep) if sleep > 0
114
- @request_tm = Time.now
87
+ "http://#{proxy}"
115
88
  end
116
- protected :before_perform_request
89
+ protected :random_proxy
117
90
 
118
91
  def footer
119
92
  output.report
data/lib/sinew/output.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'csv'
2
2
  require 'set'
3
- require 'stringex'
3
+ require 'sterile'
4
4
 
5
5
  #
6
6
  # CSV output.
@@ -45,6 +45,7 @@ module Sinew
45
45
 
46
46
  # don't allow duplicate urls
47
47
  return if dup_url?(row)
48
+
48
49
  rows << row.dup
49
50
 
50
51
  # map columns to row, and normalize along the way
@@ -101,24 +102,14 @@ module Sinew
101
102
  # strip html tags. Note that we replace tags with spaces
102
103
  s = s.gsub(/<[^>]+>/, ' ')
103
104
 
104
- #
105
- # Below uses stringex
106
- #
107
- # github.com/rsl/stringex/blob/master/lib/stringex/string_extensions.rb
108
- # github.com/rsl/stringex/blob/master/lib/stringex/localization/conversion_expressions.rb
109
- #
110
-
111
105
  # Converts MS Word 'smart punctuation' to ASCII
112
- s = s.convert_smart_punctuation
113
-
114
- # "&aacute;".convert_accented_html_entities # => "a"
115
- s = s.convert_accented_html_entities
106
+ s = Sterile.plain_format(s)
116
107
 
117
- # &amp, &frac, etc.
118
- s = s.convert_miscellaneous_html_entities
108
+ # &aacute; &amp; etc.
109
+ s = Sterile.decode_entities(s)
119
110
 
120
- # convert unicode => regular characters
121
- s = s.to_ascii
111
+ # "šţɽĩɳģ" => "string"
112
+ s = Sterile.transliterate(s)
122
113
 
123
114
  # squish
124
115
  s = s.squish
data/lib/sinew/request.rb CHANGED
@@ -1,61 +1,57 @@
1
- require 'digest/md5'
2
- require 'httparty'
3
- require 'htmlentities'
1
+ require 'sterile'
4
2
 
5
3
  #
6
- # Process a single HTTP request. Mostly a wrapper around HTTParty.
4
+ # Process a single HTTP request.
7
5
  #
8
6
 
9
7
  module Sinew
10
8
  class Error < StandardError; end
11
9
 
12
10
  class Request
13
- HTML_ENTITIES = HTMLEntities.new
14
11
  VALID_METHODS = %w[get post patch put delete head options].freeze
12
+ METHODS_WITH_BODY = %w[patch post put].freeze
15
13
 
16
- attr_reader :sinew, :method, :uri, :options, :cache_key
14
+ attr_reader :method, :options, :uri
17
15
 
18
- # Options are largely compatible with HTTParty, except for :method.
19
- def initialize(sinew, method, url, options = {})
20
- @sinew = sinew
16
+ # Supported options:
17
+ # body: Body of http post
18
+ # headers: Hash of HTTP headers (combined with runtime_options.headers)
19
+ # query: Hash of query parameters to add to url
20
+ def initialize(method, url, options = {})
21
21
  @method = method
22
22
  @options = options.dup
23
23
  @uri = parse_url(url)
24
- @cache_key = calculate_cache_key
25
24
  end
26
25
 
27
26
  # run the request, return the result
28
- def perform
27
+ def perform(connection)
29
28
  validate!
30
29
 
31
- # merge optons
32
- options = self.options.merge(sinew.runtime_options.httparty_options)
33
-
34
- # merge headers
35
- headers = sinew.runtime_options.headers
36
- headers = headers.merge(options[:headers]) if options[:headers]
37
- options[:headers] = headers
30
+ body = options.delete(:body)
31
+ fday_response = connection.send(method, uri, body) do
32
+ _1.headers.update(options[:headers]) if options[:headers]
33
+ _1.options[:proxy] = options[:proxy]
34
+ end
38
35
 
39
- party_response = HTTParty.send(method, uri, options)
40
- Response.from_network(self, party_response)
36
+ Response.from_network(self, fday_response)
41
37
  end
42
38
 
43
39
  # We accept sloppy urls and attempt to clean them up
44
40
  def parse_url(url)
45
- s = url
41
+ s = url.to_s
46
42
 
47
43
  # remove entities
48
- s = HTML_ENTITIES.decode(s)
44
+ s = Sterile.decode_entities(s)
49
45
 
50
46
  # fix a couple of common encoding bugs
51
47
  s = s.gsub(' ', '%20')
52
48
  s = s.gsub("'", '%27')
53
49
 
54
- # append query manually (instead of letting HTTParty handle it) so we can
55
- # include it in cache_key
50
+ # append query manually (instead of letting Faraday handle it) for consistent
51
+ # Request#uri and Response#uri
56
52
  query = options.delete(:query)
57
53
  if query.present?
58
- q = HTTParty::HashConversions.to_params(query)
54
+ q = Faraday::Utils.default_params_encoder.encode(query)
59
55
  separator = s.include?('?') ? '&' : '?'
60
56
  s = "#{s}#{separator}#{q}"
61
57
  end
@@ -64,44 +60,10 @@ module Sinew
64
60
  end
65
61
  protected :parse_url
66
62
 
67
- def calculate_cache_key
68
- dir = pathify(uri.host)
69
-
70
- body_key = if body.is_a?(Hash)
71
- HTTParty::HashConversions.to_params(body)
72
- else
73
- body&.dup
74
- end
75
-
76
- # build key, as a hash for before_generate_cache_key
77
- key = {
78
- method: method.dup,
79
- path: uri.path,
80
- query: uri.query,
81
- body: body_key,
82
- }
83
- key = sinew.runtime_options.before_generate_cache_key.call(key)
84
-
85
- # strip method for gets
86
- key.delete(:method) if key[:method] == 'get'
87
-
88
- # pull out the values, join and pathify
89
- path = key.values.select(&:present?).join(',')
90
- path = pathify(path)
91
-
92
- # shorten long paths
93
- if path.length > 250
94
- path = Digest::MD5.hexdigest(path)
95
- end
96
-
97
- "#{dir}/#{path}"
98
- end
99
- protected :calculate_cache_key
100
-
101
63
  def validate!
102
64
  raise "invalid method #{method}" if !VALID_METHODS.include?(method)
103
65
  raise "invalid url #{uri}" if uri.scheme !~ /^http/
104
- raise "can't get with a body" if method == 'get' && body
66
+ raise "can't #{method} with a body" if body && !METHODS_WITH_BODY.include?(method)
105
67
  raise "Content-Type doesn't make sense without a body" if content_type && !body
106
68
  end
107
69
  protected :validate!
@@ -120,32 +82,5 @@ module Sinew
120
82
  headers && headers['Content-Type']
121
83
  end
122
84
  protected :content_type
123
-
124
- def form?
125
- content_type == 'application/x-www-form-urlencoded'
126
- end
127
- protected :form?
128
-
129
- def pathify(s)
130
- # remove leading slash
131
- s = s.gsub(/^\//, '')
132
- # .. => comma
133
- s = s.gsub('..', ',')
134
- # query separators => comma
135
- s = s.gsub(/[?\/&]/, ',')
136
- # ,, => comma
137
- s = s.gsub(',,', ',')
138
- # encode invalid path chars
139
- s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
140
- hex = i.unpack('H2').first
141
- "%#{hex}"
142
- end
143
- # handle empty case
144
- s = '_root_' if s.blank?
145
- # always downcase
146
- s = s.downcase
147
- s
148
- end
149
- protected :pathify
150
85
  end
151
86
  end
@@ -2,7 +2,7 @@ require 'stringio'
2
2
  require 'zlib'
3
3
 
4
4
  #
5
- # An HTTP response. Mostly a wrapper around HTTParty.
5
+ # An HTTP response.
6
6
  #
7
7
 
8
8
  module Sinew
@@ -13,62 +13,13 @@ module Sinew
13
13
  # factory methods
14
14
  #
15
15
 
16
- def self.from_network(request, party_response)
17
- Response.new.tap do |response|
18
- response.request = request
19
- response.uri = party_response.request.last_uri
20
- response.code = party_response.code
21
- response.headers = party_response.headers.to_h
22
- response.body = process_body(party_response)
23
- end
24
- end
25
-
26
- def self.from_cache(request, body, head)
27
- Response.new.tap do |response|
28
- response.request = request
29
- response.body = body
30
-
31
- # defaults
32
- response.uri = request.uri
33
- response.code = 200
34
- response.headers = {}
35
-
36
- # overwrite with cached response headers
37
- if head
38
- if head !~ /^{/
39
- return from_legacy_head(response, head)
40
- end
41
- head = JSON.parse(head, symbolize_names: true)
42
- response.uri = URI.parse(head[:uri])
43
- response.code = head[:code]
44
- response.headers = head[:headers]
45
- end
46
- end
47
- end
48
-
49
- def self.from_timeout(request)
50
- Response.new.tap do |response|
51
- response.request = request
52
- response.uri = request.uri
53
- response.body = 'timeout'
54
- response.code = 999
55
- response.headers = {}
56
- end
57
- end
58
-
59
- def self.from_legacy_head(response, head)
60
- response.tap do |r|
61
- case head
62
- when /\ACURLER_ERROR/
63
- # error
64
- r.code = 999
65
- when /\AHTTP/
66
- # redirect
67
- location = head.scan(/Location: ([^\r\n]+)/).flatten.last
68
- r.uri += location
69
- else
70
- $stderr.puts "unknown cached /head for #{r.uri}"
71
- end
16
+ def self.from_network(request, fday_response)
17
+ Response.new.tap do
18
+ _1.request = request
19
+ _1.uri = fday_response.env.url
20
+ _1.code = fday_response.status
21
+ _1.headers = fday_response.headers.to_h
22
+ _1.body = process_body(fday_response)
72
23
  end
73
24
  end
74
25