sinew 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -4
- data/lib/sinew/main.rb +27 -1
- data/lib/sinew/request.rb +8 -49
- data/lib/sinew/version.rb +1 -1
- data/sinew.gemspec +0 -1
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba5558019816540d71e1bb44029f2733aff53649a3f4da72a9905e0a67d06ad9
|
4
|
+
data.tar.gz: 446c245782cad55f1caa36e01b0e8c98748295b47eb5b1911175fff2f79589b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cef8c1145a21e84f560b44821071ffc7b57ef965167b633e1c837f7b7d9dbfce340b14d1afb2a35891c7b1ed4aa4f08e47ca7405cf382acca7eae855a47d3a71
|
7
|
+
data.tar.gz: 8dc7b67511fc541cccef23b69463abd9a8081f9c75583c1d6c6756dda0561ce7a91be35ee06f7d886ea7488b9005c969e14fa10794cac92475a3063c8968abec
|
data/README.md
CHANGED
@@ -39,7 +39,7 @@ Sinew 3 uses a new format for cached responses. Old Sinew 2 cache directories sh
|
|
39
39
|
|
40
40
|
## Quick Example
|
41
41
|
|
42
|
-
Here's an example for collecting the links from httpbingo.org:
|
42
|
+
Here's an example for collecting the links from httpbingo.org. Paste this into a file called `sample.sinew` and run `sinew sample.sinew`. It will create a `sample.csv` file containing the href and text for each link:
|
43
43
|
|
44
44
|
```ruby
|
45
45
|
# get the url
|
@@ -56,8 +56,6 @@ noko.css("ul li a").each do |a|
|
|
56
56
|
end
|
57
57
|
```
|
58
58
|
|
59
|
-
If you paste this into a file called `sample.sinew` and run `sinew sample.sinew`, it will create a `sample.csv` file containing the href and text for each link.
|
60
|
-
|
61
59
|
## How it Works
|
62
60
|
|
63
61
|
There are three main features provided by Sinew.
|
@@ -108,7 +106,7 @@ Sinew creates a CSV file with the same name as the recipe, and `csv_emit(hash)`
|
|
108
106
|
|
109
107
|
#### Caching
|
110
108
|
|
111
|
-
Sinew uses [
|
109
|
+
Sinew uses [httpdisk](https://github.com/gurgeous/httpdisk/) to aggressively cache all HTTP responses to disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
|
112
110
|
|
113
111
|
Sinew never deletes files from the cache - that's up to you!
|
114
112
|
|
data/lib/sinew/main.rb
CHANGED
@@ -34,7 +34,7 @@ module Sinew
|
|
34
34
|
#
|
35
35
|
|
36
36
|
def http(method, url, options = {})
|
37
|
-
request = Request.new(
|
37
|
+
request = Request.new(method, url, request_options(options))
|
38
38
|
response = request.perform(connection)
|
39
39
|
|
40
40
|
# always log error messages
|
@@ -62,6 +62,32 @@ module Sinew
|
|
62
62
|
# helpers
|
63
63
|
#
|
64
64
|
|
65
|
+
def request_options(options)
|
66
|
+
options.dup.tap do |req|
|
67
|
+
req[:headers] = {}.tap do |h|
|
68
|
+
[ runtime_options.headers, options[:headers]].each do
|
69
|
+
h.merge!(_1) if _1
|
70
|
+
end
|
71
|
+
end
|
72
|
+
req[:proxy] = random_proxy
|
73
|
+
end
|
74
|
+
end
|
75
|
+
protected :request_options
|
76
|
+
|
77
|
+
PROXY_RE = /\A#{URI::PATTERN::HOST}(:\d+)?\Z/.freeze
|
78
|
+
|
79
|
+
def random_proxy
|
80
|
+
return if !options[:proxy]
|
81
|
+
|
82
|
+
proxy = options[:proxy].split(',').sample
|
83
|
+
if proxy !~ PROXY_RE
|
84
|
+
raise ArgumentError, "invalid proxy #{proxy.inspect}, should be host[:port]"
|
85
|
+
end
|
86
|
+
|
87
|
+
"http://#{proxy}"
|
88
|
+
end
|
89
|
+
protected :random_proxy
|
90
|
+
|
65
91
|
def footer
|
66
92
|
output.report
|
67
93
|
finished = output.count > 0 ? "Finished #{output.filename}" : 'Finished'
|
data/lib/sinew/request.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
require '
|
2
|
-
require 'htmlentities'
|
1
|
+
require 'sterile'
|
3
2
|
|
4
3
|
#
|
5
4
|
# Process a single HTTP request.
|
@@ -9,42 +8,29 @@ module Sinew
|
|
9
8
|
class Error < StandardError; end
|
10
9
|
|
11
10
|
class Request
|
12
|
-
HTML_ENTITIES = HTMLEntities.new
|
13
11
|
VALID_METHODS = %w[get post patch put delete head options].freeze
|
14
12
|
METHODS_WITH_BODY = %w[patch post put].freeze
|
15
13
|
|
16
|
-
attr_reader :
|
14
|
+
attr_reader :method, :options, :uri
|
17
15
|
|
18
16
|
# Supported options:
|
19
17
|
# body: Body of http post
|
20
18
|
# headers: Hash of HTTP headers (combined with runtime_options.headers)
|
21
19
|
# query: Hash of query parameters to add to url
|
22
|
-
def initialize(
|
23
|
-
@sinew = sinew
|
20
|
+
def initialize(method, url, options = {})
|
24
21
|
@method = method
|
25
22
|
@options = options.dup
|
26
23
|
@uri = parse_url(url)
|
27
24
|
end
|
28
25
|
|
29
|
-
def proxy
|
30
|
-
@proxy ||= begin
|
31
|
-
if proxies = sinew.options[:proxy]
|
32
|
-
proxies.split(',').sample
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
26
|
# run the request, return the result
|
38
27
|
def perform(connection)
|
39
28
|
validate!
|
40
29
|
|
41
|
-
headers = sinew.runtime_options.headers
|
42
|
-
headers = headers.merge(options[:headers]) if options[:headers]
|
43
|
-
|
44
30
|
body = options.delete(:body)
|
45
|
-
|
46
|
-
|
47
|
-
_1.options[:proxy] = proxy
|
31
|
+
fday_response = connection.send(method, uri, body) do
|
32
|
+
_1.headers.update(options[:headers]) if options[:headers]
|
33
|
+
_1.options[:proxy] = options[:proxy]
|
48
34
|
end
|
49
35
|
|
50
36
|
Response.from_network(self, fday_response)
|
@@ -52,10 +38,10 @@ module Sinew
|
|
52
38
|
|
53
39
|
# We accept sloppy urls and attempt to clean them up
|
54
40
|
def parse_url(url)
|
55
|
-
s = url
|
41
|
+
s = url.to_s
|
56
42
|
|
57
43
|
# remove entities
|
58
|
-
s =
|
44
|
+
s = Sterile.decode_entities(s)
|
59
45
|
|
60
46
|
# fix a couple of common encoding bugs
|
61
47
|
s = s.gsub(' ', '%20')
|
@@ -96,32 +82,5 @@ module Sinew
|
|
96
82
|
headers && headers['Content-Type']
|
97
83
|
end
|
98
84
|
protected :content_type
|
99
|
-
|
100
|
-
def form?
|
101
|
-
content_type == 'application/x-www-form-urlencoded'
|
102
|
-
end
|
103
|
-
protected :form?
|
104
|
-
|
105
|
-
def pathify(s)
|
106
|
-
# remove leading slash
|
107
|
-
s = s.gsub(/^\//, '')
|
108
|
-
# .. => comma
|
109
|
-
s = s.gsub('..', ',')
|
110
|
-
# query separators => comma
|
111
|
-
s = s.gsub(/[?\/&]/, ',')
|
112
|
-
# ,, => comma
|
113
|
-
s = s.gsub(',,', ',')
|
114
|
-
# encode invalid path chars
|
115
|
-
s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
|
116
|
-
hex = i.unpack1('H2')
|
117
|
-
"%#{hex}"
|
118
|
-
end
|
119
|
-
# handle empty case
|
120
|
-
s = '_root_' if s.blank?
|
121
|
-
# always downcase
|
122
|
-
s = s.downcase
|
123
|
-
s
|
124
|
-
end
|
125
|
-
protected :pathify
|
126
85
|
end
|
127
86
|
end
|
data/lib/sinew/version.rb
CHANGED
data/sinew.gemspec
CHANGED
@@ -24,7 +24,6 @@ Gem::Specification.new do |s|
|
|
24
24
|
s.add_runtime_dependency 'amazing_print', '~> 1.3'
|
25
25
|
s.add_runtime_dependency 'faraday', '~> 1.4'
|
26
26
|
s.add_runtime_dependency 'faraday-encoding', '~> 0'
|
27
|
-
s.add_runtime_dependency 'htmlentities', '~> 4.3'
|
28
27
|
s.add_runtime_dependency 'httpdisk', '~> 0'
|
29
28
|
s.add_runtime_dependency 'nokogiri', '~> 1.11'
|
30
29
|
s.add_runtime_dependency 'scripto', '~> 0'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sinew
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Doppelt
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-06-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: amazing_print
|
@@ -53,20 +53,6 @@ dependencies:
|
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
|
-
- !ruby/object:Gem::Dependency
|
57
|
-
name: htmlentities
|
58
|
-
requirement: !ruby/object:Gem::Requirement
|
59
|
-
requirements:
|
60
|
-
- - "~>"
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '4.3'
|
63
|
-
type: :runtime
|
64
|
-
prerelease: false
|
65
|
-
version_requirements: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '4.3'
|
70
56
|
- !ruby/object:Gem::Dependency
|
71
57
|
name: httpdisk
|
72
58
|
requirement: !ruby/object:Gem::Requirement
|