sinew 3.0.0 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -4
- data/lib/sinew/main.rb +27 -1
- data/lib/sinew/request.rb +8 -49
- data/lib/sinew/version.rb +1 -1
- data/sinew.gemspec +0 -1
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba5558019816540d71e1bb44029f2733aff53649a3f4da72a9905e0a67d06ad9
|
4
|
+
data.tar.gz: 446c245782cad55f1caa36e01b0e8c98748295b47eb5b1911175fff2f79589b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cef8c1145a21e84f560b44821071ffc7b57ef965167b633e1c837f7b7d9dbfce340b14d1afb2a35891c7b1ed4aa4f08e47ca7405cf382acca7eae855a47d3a71
|
7
|
+
data.tar.gz: 8dc7b67511fc541cccef23b69463abd9a8081f9c75583c1d6c6756dda0561ce7a91be35ee06f7d886ea7488b9005c969e14fa10794cac92475a3063c8968abec
|
data/README.md
CHANGED
@@ -39,7 +39,7 @@ Sinew 3 uses a new format for cached responses. Old Sinew 2 cache directories sh
|
|
39
39
|
|
40
40
|
## Quick Example
|
41
41
|
|
42
|
-
Here's an example for collecting the links from httpbingo.org:
|
42
|
+
Here's an example for collecting the links from httpbingo.org. Paste this into a file called `sample.sinew` and run `sinew sample.sinew`. It will create a `sample.csv` file containing the href and text for each link:
|
43
43
|
|
44
44
|
```ruby
|
45
45
|
# get the url
|
@@ -56,8 +56,6 @@ noko.css("ul li a").each do |a|
|
|
56
56
|
end
|
57
57
|
```
|
58
58
|
|
59
|
-
If you paste this into a file called `sample.sinew` and run `sinew sample.sinew`, it will create a `sample.csv` file containing the href and text for each link.
|
60
|
-
|
61
59
|
## How it Works
|
62
60
|
|
63
61
|
There are three main features provided by Sinew.
|
@@ -108,7 +106,7 @@ Sinew creates a CSV file with the same name as the recipe, and `csv_emit(hash)`
|
|
108
106
|
|
109
107
|
#### Caching
|
110
108
|
|
111
|
-
Sinew uses [
|
109
|
+
Sinew uses [httpdisk](https://github.com/gurgeous/httpdisk/) to aggressively cache all HTTP responses to disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
|
112
110
|
|
113
111
|
Sinew never deletes files from the cache - that's up to you!
|
114
112
|
|
data/lib/sinew/main.rb
CHANGED
@@ -34,7 +34,7 @@ module Sinew
|
|
34
34
|
#
|
35
35
|
|
36
36
|
def http(method, url, options = {})
|
37
|
-
request = Request.new(
|
37
|
+
request = Request.new(method, url, request_options(options))
|
38
38
|
response = request.perform(connection)
|
39
39
|
|
40
40
|
# always log error messages
|
@@ -62,6 +62,32 @@ module Sinew
|
|
62
62
|
# helpers
|
63
63
|
#
|
64
64
|
|
65
|
+
def request_options(options)
|
66
|
+
options.dup.tap do |req|
|
67
|
+
req[:headers] = {}.tap do |h|
|
68
|
+
[ runtime_options.headers, options[:headers]].each do
|
69
|
+
h.merge!(_1) if _1
|
70
|
+
end
|
71
|
+
end
|
72
|
+
req[:proxy] = random_proxy
|
73
|
+
end
|
74
|
+
end
|
75
|
+
protected :request_options
|
76
|
+
|
77
|
+
PROXY_RE = /\A#{URI::PATTERN::HOST}(:\d+)?\Z/.freeze
|
78
|
+
|
79
|
+
def random_proxy
|
80
|
+
return if !options[:proxy]
|
81
|
+
|
82
|
+
proxy = options[:proxy].split(',').sample
|
83
|
+
if proxy !~ PROXY_RE
|
84
|
+
raise ArgumentError, "invalid proxy #{proxy.inspect}, should be host[:port]"
|
85
|
+
end
|
86
|
+
|
87
|
+
"http://#{proxy}"
|
88
|
+
end
|
89
|
+
protected :random_proxy
|
90
|
+
|
65
91
|
def footer
|
66
92
|
output.report
|
67
93
|
finished = output.count > 0 ? "Finished #{output.filename}" : 'Finished'
|
data/lib/sinew/request.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
require '
|
2
|
-
require 'htmlentities'
|
1
|
+
require 'sterile'
|
3
2
|
|
4
3
|
#
|
5
4
|
# Process a single HTTP request.
|
@@ -9,42 +8,29 @@ module Sinew
|
|
9
8
|
class Error < StandardError; end
|
10
9
|
|
11
10
|
class Request
|
12
|
-
HTML_ENTITIES = HTMLEntities.new
|
13
11
|
VALID_METHODS = %w[get post patch put delete head options].freeze
|
14
12
|
METHODS_WITH_BODY = %w[patch post put].freeze
|
15
13
|
|
16
|
-
attr_reader :
|
14
|
+
attr_reader :method, :options, :uri
|
17
15
|
|
18
16
|
# Supported options:
|
19
17
|
# body: Body of http post
|
20
18
|
# headers: Hash of HTTP headers (combined with runtime_options.headers)
|
21
19
|
# query: Hash of query parameters to add to url
|
22
|
-
def initialize(
|
23
|
-
@sinew = sinew
|
20
|
+
def initialize(method, url, options = {})
|
24
21
|
@method = method
|
25
22
|
@options = options.dup
|
26
23
|
@uri = parse_url(url)
|
27
24
|
end
|
28
25
|
|
29
|
-
def proxy
|
30
|
-
@proxy ||= begin
|
31
|
-
if proxies = sinew.options[:proxy]
|
32
|
-
proxies.split(',').sample
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
26
|
# run the request, return the result
|
38
27
|
def perform(connection)
|
39
28
|
validate!
|
40
29
|
|
41
|
-
headers = sinew.runtime_options.headers
|
42
|
-
headers = headers.merge(options[:headers]) if options[:headers]
|
43
|
-
|
44
30
|
body = options.delete(:body)
|
45
|
-
|
46
|
-
|
47
|
-
_1.options[:proxy] = proxy
|
31
|
+
fday_response = connection.send(method, uri, body) do
|
32
|
+
_1.headers.update(options[:headers]) if options[:headers]
|
33
|
+
_1.options[:proxy] = options[:proxy]
|
48
34
|
end
|
49
35
|
|
50
36
|
Response.from_network(self, fday_response)
|
@@ -52,10 +38,10 @@ module Sinew
|
|
52
38
|
|
53
39
|
# We accept sloppy urls and attempt to clean them up
|
54
40
|
def parse_url(url)
|
55
|
-
s = url
|
41
|
+
s = url.to_s
|
56
42
|
|
57
43
|
# remove entities
|
58
|
-
s =
|
44
|
+
s = Sterile.decode_entities(s)
|
59
45
|
|
60
46
|
# fix a couple of common encoding bugs
|
61
47
|
s = s.gsub(' ', '%20')
|
@@ -96,32 +82,5 @@ module Sinew
|
|
96
82
|
headers && headers['Content-Type']
|
97
83
|
end
|
98
84
|
protected :content_type
|
99
|
-
|
100
|
-
def form?
|
101
|
-
content_type == 'application/x-www-form-urlencoded'
|
102
|
-
end
|
103
|
-
protected :form?
|
104
|
-
|
105
|
-
def pathify(s)
|
106
|
-
# remove leading slash
|
107
|
-
s = s.gsub(/^\//, '')
|
108
|
-
# .. => comma
|
109
|
-
s = s.gsub('..', ',')
|
110
|
-
# query separators => comma
|
111
|
-
s = s.gsub(/[?\/&]/, ',')
|
112
|
-
# ,, => comma
|
113
|
-
s = s.gsub(',,', ',')
|
114
|
-
# encode invalid path chars
|
115
|
-
s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
|
116
|
-
hex = i.unpack1('H2')
|
117
|
-
"%#{hex}"
|
118
|
-
end
|
119
|
-
# handle empty case
|
120
|
-
s = '_root_' if s.blank?
|
121
|
-
# always downcase
|
122
|
-
s = s.downcase
|
123
|
-
s
|
124
|
-
end
|
125
|
-
protected :pathify
|
126
85
|
end
|
127
86
|
end
|
data/lib/sinew/version.rb
CHANGED
data/sinew.gemspec
CHANGED
@@ -24,7 +24,6 @@ Gem::Specification.new do |s|
|
|
24
24
|
s.add_runtime_dependency 'amazing_print', '~> 1.3'
|
25
25
|
s.add_runtime_dependency 'faraday', '~> 1.4'
|
26
26
|
s.add_runtime_dependency 'faraday-encoding', '~> 0'
|
27
|
-
s.add_runtime_dependency 'htmlentities', '~> 4.3'
|
28
27
|
s.add_runtime_dependency 'httpdisk', '~> 0'
|
29
28
|
s.add_runtime_dependency 'nokogiri', '~> 1.11'
|
30
29
|
s.add_runtime_dependency 'scripto', '~> 0'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sinew
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Doppelt
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-06-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: amazing_print
|
@@ -53,20 +53,6 @@ dependencies:
|
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
|
-
- !ruby/object:Gem::Dependency
|
57
|
-
name: htmlentities
|
58
|
-
requirement: !ruby/object:Gem::Requirement
|
59
|
-
requirements:
|
60
|
-
- - "~>"
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '4.3'
|
63
|
-
type: :runtime
|
64
|
-
prerelease: false
|
65
|
-
version_requirements: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '4.3'
|
70
56
|
- !ruby/object:Gem::Dependency
|
71
57
|
name: httpdisk
|
72
58
|
requirement: !ruby/object:Gem::Requirement
|