sinew 3.0.0 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df55f2168ff4242fceb31d083b8d16f1046139fa7acb8a9c4fc3f06f7884e113
4
- data.tar.gz: 520967eba4ea2d8446690736f2c28d34642b452c0f4e5003dcb89ce373c116e5
3
+ metadata.gz: ba5558019816540d71e1bb44029f2733aff53649a3f4da72a9905e0a67d06ad9
4
+ data.tar.gz: 446c245782cad55f1caa36e01b0e8c98748295b47eb5b1911175fff2f79589b2
5
5
  SHA512:
6
- metadata.gz: 7443bccc5fc4e1bd112ce50b3d17445f0c21f5b351a6b5be586aadd63f36396312370ec6115d8116701165b3af19fcb852f85d18d0fbe7b4bf0d797312d3fa40
7
- data.tar.gz: 9ca4f3c424e021100f518ca4f2231f515b38dbeb402ff4fce07c13a2440f19b0fabcc2a2920e51aa3f6228507550d3c94cb70a46fd22711ba3add90e4fc28004
6
+ metadata.gz: cef8c1145a21e84f560b44821071ffc7b57ef965167b633e1c837f7b7d9dbfce340b14d1afb2a35891c7b1ed4aa4f08e47ca7405cf382acca7eae855a47d3a71
7
+ data.tar.gz: 8dc7b67511fc541cccef23b69463abd9a8081f9c75583c1d6c6756dda0561ce7a91be35ee06f7d886ea7488b9005c969e14fa10794cac92475a3063c8968abec
data/README.md CHANGED
@@ -39,7 +39,7 @@ Sinew 3 uses a new format for cached responses. Old Sinew 2 cache directories sh
39
39
 
40
40
  ## Quick Example
41
41
 
42
- Here's an example for collecting the links from httpbingo.org:
42
+ Here's an example for collecting the links from httpbingo.org. Paste this into a file called `sample.sinew` and run `sinew sample.sinew`. It will create a `sample.csv` file containing the href and text for each link:
43
43
 
44
44
  ```ruby
45
45
  # get the url
@@ -56,8 +56,6 @@ noko.css("ul li a").each do |a|
56
56
  end
57
57
  ```
58
58
 
59
- If you paste this into a file called `sample.sinew` and run `sinew sample.sinew`, it will create a `sample.csv` file containing the href and text for each link.
60
-
61
59
  ## How it Works
62
60
 
63
61
  There are three main features provided by Sinew.
@@ -108,7 +106,7 @@ Sinew creates a CSV file with the same name as the recipe, and `csv_emit(hash)`
108
106
 
109
107
  #### Caching
110
108
 
111
- Sinew uses [sinew](https://github.com/gurgeous/sinew/) to aggressively cache all HTTP responses to disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
109
+ Sinew uses [httpdisk](https://github.com/gurgeous/httpdisk/) to aggressively cache all HTTP responses to disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
112
110
 
113
111
  Sinew never deletes files from the cache - that's up to you!
114
112
 
data/lib/sinew/main.rb CHANGED
@@ -34,7 +34,7 @@ module Sinew
34
34
  #
35
35
 
36
36
  def http(method, url, options = {})
37
- request = Request.new(self, method, url, options)
37
+ request = Request.new(method, url, request_options(options))
38
38
  response = request.perform(connection)
39
39
 
40
40
  # always log error messages
@@ -62,6 +62,32 @@ module Sinew
62
62
  # helpers
63
63
  #
64
64
 
65
+ def request_options(options)
66
+ options.dup.tap do |req|
67
+ req[:headers] = {}.tap do |h|
68
+ [ runtime_options.headers, options[:headers]].each do
69
+ h.merge!(_1) if _1
70
+ end
71
+ end
72
+ req[:proxy] = random_proxy
73
+ end
74
+ end
75
+ protected :request_options
76
+
77
+ PROXY_RE = /\A#{URI::PATTERN::HOST}(:\d+)?\Z/.freeze
78
+
79
+ def random_proxy
80
+ return if !options[:proxy]
81
+
82
+ proxy = options[:proxy].split(',').sample
83
+ if proxy !~ PROXY_RE
84
+ raise ArgumentError, "invalid proxy #{proxy.inspect}, should be host[:port]"
85
+ end
86
+
87
+ "http://#{proxy}"
88
+ end
89
+ protected :random_proxy
90
+
65
91
  def footer
66
92
  output.report
67
93
  finished = output.count > 0 ? "Finished #{output.filename}" : 'Finished'
data/lib/sinew/request.rb CHANGED
@@ -1,5 +1,4 @@
1
- require 'digest/md5'
2
- require 'htmlentities'
1
+ require 'sterile'
3
2
 
4
3
  #
5
4
  # Process a single HTTP request.
@@ -9,42 +8,29 @@ module Sinew
9
8
  class Error < StandardError; end
10
9
 
11
10
  class Request
12
- HTML_ENTITIES = HTMLEntities.new
13
11
  VALID_METHODS = %w[get post patch put delete head options].freeze
14
12
  METHODS_WITH_BODY = %w[patch post put].freeze
15
13
 
16
- attr_reader :sinew, :method, :uri, :options
14
+ attr_reader :method, :options, :uri
17
15
 
18
16
  # Supported options:
19
17
  # body: Body of http post
20
18
  # headers: Hash of HTTP headers (combined with runtime_options.headers)
21
19
  # query: Hash of query parameters to add to url
22
- def initialize(sinew, method, url, options = {})
23
- @sinew = sinew
20
+ def initialize(method, url, options = {})
24
21
  @method = method
25
22
  @options = options.dup
26
23
  @uri = parse_url(url)
27
24
  end
28
25
 
29
- def proxy
30
- @proxy ||= begin
31
- if proxies = sinew.options[:proxy]
32
- proxies.split(',').sample
33
- end
34
- end
35
- end
36
-
37
26
  # run the request, return the result
38
27
  def perform(connection)
39
28
  validate!
40
29
 
41
- headers = sinew.runtime_options.headers
42
- headers = headers.merge(options[:headers]) if options[:headers]
43
-
44
30
  body = options.delete(:body)
45
-
46
- fday_response = connection.send(method, uri, body, headers) do
47
- _1.options[:proxy] = proxy
31
+ fday_response = connection.send(method, uri, body) do
32
+ _1.headers.update(options[:headers]) if options[:headers]
33
+ _1.options[:proxy] = options[:proxy]
48
34
  end
49
35
 
50
36
  Response.from_network(self, fday_response)
@@ -52,10 +38,10 @@ module Sinew
52
38
 
53
39
  # We accept sloppy urls and attempt to clean them up
54
40
  def parse_url(url)
55
- s = url
41
+ s = url.to_s
56
42
 
57
43
  # remove entities
58
- s = HTML_ENTITIES.decode(s)
44
+ s = Sterile.decode_entities(s)
59
45
 
60
46
  # fix a couple of common encoding bugs
61
47
  s = s.gsub(' ', '%20')
@@ -96,32 +82,5 @@ module Sinew
96
82
  headers && headers['Content-Type']
97
83
  end
98
84
  protected :content_type
99
-
100
- def form?
101
- content_type == 'application/x-www-form-urlencoded'
102
- end
103
- protected :form?
104
-
105
- def pathify(s)
106
- # remove leading slash
107
- s = s.gsub(/^\//, '')
108
- # .. => comma
109
- s = s.gsub('..', ',')
110
- # query separators => comma
111
- s = s.gsub(/[?\/&]/, ',')
112
- # ,, => comma
113
- s = s.gsub(',,', ',')
114
- # encode invalid path chars
115
- s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
116
- hex = i.unpack1('H2')
117
- "%#{hex}"
118
- end
119
- # handle empty case
120
- s = '_root_' if s.blank?
121
- # always downcase
122
- s = s.downcase
123
- s
124
- end
125
- protected :pathify
126
85
  end
127
86
  end
data/lib/sinew/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Sinew
2
2
  # Gem version
3
- VERSION = '3.0.0'.freeze
3
+ VERSION = '3.0.1'.freeze
4
4
  end
data/sinew.gemspec CHANGED
@@ -24,7 +24,6 @@ Gem::Specification.new do |s|
24
24
  s.add_runtime_dependency 'amazing_print', '~> 1.3'
25
25
  s.add_runtime_dependency 'faraday', '~> 1.4'
26
26
  s.add_runtime_dependency 'faraday-encoding', '~> 0'
27
- s.add_runtime_dependency 'htmlentities', '~> 4.3'
28
27
  s.add_runtime_dependency 'httpdisk', '~> 0'
29
28
  s.add_runtime_dependency 'nokogiri', '~> 1.11'
30
29
  s.add_runtime_dependency 'scripto', '~> 0'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sinew
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Doppelt
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-05-11 00:00:00.000000000 Z
12
+ date: 2021-06-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: amazing_print
@@ -53,20 +53,6 @@ dependencies:
53
53
  - - "~>"
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
- - !ruby/object:Gem::Dependency
57
- name: htmlentities
58
- requirement: !ruby/object:Gem::Requirement
59
- requirements:
60
- - - "~>"
61
- - !ruby/object:Gem::Version
62
- version: '4.3'
63
- type: :runtime
64
- prerelease: false
65
- version_requirements: !ruby/object:Gem::Requirement
66
- requirements:
67
- - - "~>"
68
- - !ruby/object:Gem::Version
69
- version: '4.3'
70
56
  - !ruby/object:Gem::Dependency
71
57
  name: httpdisk
72
58
  requirement: !ruby/object:Gem::Requirement