sinew 2.0.5 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +26 -0
- data/.rubocop.yml +9 -6
- data/.vscode/settings.json +0 -10
- data/Gemfile +9 -0
- data/README.md +13 -17
- data/Rakefile +33 -18
- data/bin/sinew +2 -0
- data/lib/sinew.rb +0 -1
- data/lib/sinew/connection.rb +52 -0
- data/lib/sinew/connection/log_formatter.rb +22 -0
- data/lib/sinew/connection/rate_limit.rb +29 -0
- data/lib/sinew/core_ext.rb +1 -1
- data/lib/sinew/dsl.rb +2 -1
- data/lib/sinew/main.rb +7 -55
- data/lib/sinew/output.rb +7 -23
- data/lib/sinew/request.rb +20 -71
- data/lib/sinew/response.rb +8 -57
- data/lib/sinew/runtime_options.rb +4 -4
- data/lib/sinew/version.rb +1 -1
- data/sample.sinew +2 -2
- data/sinew.gemspec +16 -17
- metadata +41 -99
- data/.travis.yml +0 -4
- data/lib/sinew/cache.rb +0 -79
- data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
- data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
- data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
- data/test/legacy/eu.httpbin.org/status,500 +0 -1
- data/test/legacy/legacy.sinew +0 -2
- data/test/recipes/array_header.sinew +0 -6
- data/test/recipes/basic.sinew +0 -8
- data/test/recipes/dups.sinew +0 -7
- data/test/recipes/implicit_header.sinew +0 -5
- data/test/recipes/limit.sinew +0 -11
- data/test/recipes/noko.sinew +0 -9
- data/test/recipes/uri.sinew +0 -11
- data/test/recipes/xml.sinew +0 -8
- data/test/test.html +0 -45
- data/test/test_cache.rb +0 -69
- data/test/test_helper.rb +0 -126
- data/test/test_legacy.rb +0 -23
- data/test/test_main.rb +0 -34
- data/test/test_nokogiri_ext.rb +0 -18
- data/test/test_output.rb +0 -56
- data/test/test_recipes.rb +0 -60
- data/test/test_requests.rb +0 -164
- data/test/test_utf8.rb +0 -39
data/lib/sinew/output.rb
CHANGED
@@ -1,13 +1,6 @@
|
|
1
1
|
require 'csv'
|
2
2
|
require 'set'
|
3
|
-
require '
|
4
|
-
|
5
|
-
#
|
6
|
-
# Stringex customizations
|
7
|
-
#
|
8
|
-
|
9
|
-
# turn '&' into '&', not 'and'
|
10
|
-
Stringex::Localization::DefaultConversions::HTML_ENTITIES[:amp] = '&'
|
3
|
+
require 'sterile'
|
11
4
|
|
12
5
|
#
|
13
6
|
# CSV output.
|
@@ -52,6 +45,7 @@ module Sinew
|
|
52
45
|
|
53
46
|
# don't allow duplicate urls
|
54
47
|
return if dup_url?(row)
|
48
|
+
|
55
49
|
rows << row.dup
|
56
50
|
|
57
51
|
# map columns to row, and normalize along the way
|
@@ -108,24 +102,14 @@ module Sinew
|
|
108
102
|
# strip html tags. Note that we replace tags with spaces
|
109
103
|
s = s.gsub(/<[^>]+>/, ' ')
|
110
104
|
|
111
|
-
#
|
112
|
-
# Below uses stringex
|
113
|
-
#
|
114
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/string_extensions.rb
|
115
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/localization/conversion_expressions.rb
|
116
|
-
#
|
117
|
-
|
118
105
|
# Converts MS Word 'smart punctuation' to ASCII
|
119
|
-
s = s
|
120
|
-
|
121
|
-
# "á".convert_accented_html_entities # => "a"
|
122
|
-
s = s.convert_accented_html_entities
|
106
|
+
s = Sterile.plain_format(s)
|
123
107
|
|
124
|
-
# &
|
125
|
-
s = s
|
108
|
+
# á & etc.
|
109
|
+
s = Sterile.decode_entities(s)
|
126
110
|
|
127
|
-
#
|
128
|
-
s = s
|
111
|
+
# "šţɽĩɳģ" => "string"
|
112
|
+
s = Sterile.transliterate(s)
|
129
113
|
|
130
114
|
# squish
|
131
115
|
s = s.squish
|
data/lib/sinew/request.rb
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
require 'digest/md5'
|
2
|
-
require 'httparty'
|
3
2
|
require 'htmlentities'
|
4
3
|
|
5
4
|
#
|
6
|
-
# Process a single HTTP request.
|
5
|
+
# Process a single HTTP request.
|
7
6
|
#
|
8
7
|
|
9
8
|
module Sinew
|
@@ -12,16 +11,19 @@ module Sinew
|
|
12
11
|
class Request
|
13
12
|
HTML_ENTITIES = HTMLEntities.new
|
14
13
|
VALID_METHODS = %w[get post patch put delete head options].freeze
|
14
|
+
METHODS_WITH_BODY = %w[patch post put].freeze
|
15
15
|
|
16
|
-
attr_reader :sinew, :method, :uri, :options
|
16
|
+
attr_reader :sinew, :method, :uri, :options
|
17
17
|
|
18
|
-
#
|
18
|
+
# Supported options:
|
19
|
+
# body: Body of http post
|
20
|
+
# headers: Hash of HTTP headers (combined with runtime_options.headers)
|
21
|
+
# query: Hash of query parameters to add to url
|
19
22
|
def initialize(sinew, method, url, options = {})
|
20
23
|
@sinew = sinew
|
21
24
|
@method = method
|
22
25
|
@options = options.dup
|
23
26
|
@uri = parse_url(url)
|
24
|
-
@cache_key = calculate_cache_key
|
25
27
|
end
|
26
28
|
|
27
29
|
def proxy
|
@@ -33,28 +35,19 @@ module Sinew
|
|
33
35
|
end
|
34
36
|
|
35
37
|
# run the request, return the result
|
36
|
-
def perform
|
38
|
+
def perform(connection)
|
37
39
|
validate!
|
38
40
|
|
39
|
-
|
40
|
-
|
41
|
-
# merge proxy
|
42
|
-
if proxy = self.proxy
|
43
|
-
addr, port = proxy.split(':')
|
44
|
-
party_options[:http_proxyaddr] = addr
|
45
|
-
party_options[:http_proxyport] = port || 80
|
46
|
-
end
|
41
|
+
headers = sinew.runtime_options.headers
|
42
|
+
headers = headers.merge(options[:headers]) if options[:headers]
|
47
43
|
|
48
|
-
|
49
|
-
party_options = party_options.merge(sinew.runtime_options.httparty_options)
|
44
|
+
body = options.delete(:body)
|
50
45
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
party_options[:headers] = headers
|
46
|
+
fday_response = connection.send(method, uri, body, headers) do
|
47
|
+
_1.options[:proxy] = proxy
|
48
|
+
end
|
55
49
|
|
56
|
-
|
57
|
-
Response.from_network(self, party_response)
|
50
|
+
Response.from_network(self, fday_response)
|
58
51
|
end
|
59
52
|
|
60
53
|
# We accept sloppy urls and attempt to clean them up
|
@@ -68,11 +61,11 @@ module Sinew
|
|
68
61
|
s = s.gsub(' ', '%20')
|
69
62
|
s = s.gsub("'", '%27')
|
70
63
|
|
71
|
-
# append query manually (instead of letting
|
72
|
-
#
|
64
|
+
# append query manually (instead of letting Faraday handle it) for consistent
|
65
|
+
# Request#uri and Response#uri
|
73
66
|
query = options.delete(:query)
|
74
67
|
if query.present?
|
75
|
-
q =
|
68
|
+
q = Faraday::Utils.default_params_encoder.encode(query)
|
76
69
|
separator = s.include?('?') ? '&' : '?'
|
77
70
|
s = "#{s}#{separator}#{q}"
|
78
71
|
end
|
@@ -81,54 +74,10 @@ module Sinew
|
|
81
74
|
end
|
82
75
|
protected :parse_url
|
83
76
|
|
84
|
-
def calculate_cache_key
|
85
|
-
dir = pathify(uri.host)
|
86
|
-
|
87
|
-
body_key = if body.is_a?(Hash)
|
88
|
-
HTTParty::HashConversions.to_params(body)
|
89
|
-
else
|
90
|
-
body&.dup
|
91
|
-
end
|
92
|
-
|
93
|
-
# Build key, as a hash for before_generate_cache_key. Note that :scheme is
|
94
|
-
# just a placeholder in case someone wants to add it for real, so that
|
95
|
-
# it'll appear in the correct order. We remove the placerholder after we
|
96
|
-
# call the proc.
|
97
|
-
key = {
|
98
|
-
method: method.dup,
|
99
|
-
scheme: 'placeholder',
|
100
|
-
path: uri.path,
|
101
|
-
query: uri.query,
|
102
|
-
body: body_key,
|
103
|
-
}
|
104
|
-
|
105
|
-
args = [ key ]
|
106
|
-
if sinew.runtime_options.before_generate_cache_key.arity == 2
|
107
|
-
args << uri
|
108
|
-
end
|
109
|
-
key = sinew.runtime_options.before_generate_cache_key.call(*args)
|
110
|
-
|
111
|
-
# strip defaults
|
112
|
-
key.delete(:scheme) if key[:scheme] == 'placeholder'
|
113
|
-
key.delete(:method) if key[:method] == 'get'
|
114
|
-
|
115
|
-
# pull out the values, join and pathify
|
116
|
-
path = key.values.select(&:present?).join(',')
|
117
|
-
path = pathify(path)
|
118
|
-
|
119
|
-
# shorten long paths
|
120
|
-
if path.length > 250
|
121
|
-
path = Digest::MD5.hexdigest(path)
|
122
|
-
end
|
123
|
-
|
124
|
-
"#{dir}/#{path}"
|
125
|
-
end
|
126
|
-
protected :calculate_cache_key
|
127
|
-
|
128
77
|
def validate!
|
129
78
|
raise "invalid method #{method}" if !VALID_METHODS.include?(method)
|
130
79
|
raise "invalid url #{uri}" if uri.scheme !~ /^http/
|
131
|
-
raise "can't
|
80
|
+
raise "can't #{method} with a body" if body && !METHODS_WITH_BODY.include?(method)
|
132
81
|
raise "Content-Type doesn't make sense without a body" if content_type && !body
|
133
82
|
end
|
134
83
|
protected :validate!
|
@@ -164,7 +113,7 @@ module Sinew
|
|
164
113
|
s = s.gsub(',,', ',')
|
165
114
|
# encode invalid path chars
|
166
115
|
s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
|
167
|
-
hex = i.
|
116
|
+
hex = i.unpack1('H2')
|
168
117
|
"%#{hex}"
|
169
118
|
end
|
170
119
|
# handle empty case
|
data/lib/sinew/response.rb
CHANGED
@@ -2,7 +2,7 @@ require 'stringio'
|
|
2
2
|
require 'zlib'
|
3
3
|
|
4
4
|
#
|
5
|
-
# An HTTP response.
|
5
|
+
# An HTTP response.
|
6
6
|
#
|
7
7
|
|
8
8
|
module Sinew
|
@@ -13,62 +13,13 @@ module Sinew
|
|
13
13
|
# factory methods
|
14
14
|
#
|
15
15
|
|
16
|
-
def self.from_network(request,
|
17
|
-
Response.new.tap do
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.from_cache(request, body, head)
|
27
|
-
Response.new.tap do |response|
|
28
|
-
response.request = request
|
29
|
-
response.body = body
|
30
|
-
|
31
|
-
# defaults
|
32
|
-
response.uri = request.uri
|
33
|
-
response.code = 200
|
34
|
-
response.headers = {}
|
35
|
-
|
36
|
-
# overwrite with cached response headers
|
37
|
-
if head
|
38
|
-
if head !~ /^{/
|
39
|
-
return from_legacy_head(response, head)
|
40
|
-
end
|
41
|
-
head = JSON.parse(head, symbolize_names: true)
|
42
|
-
response.uri = URI.parse(head[:uri])
|
43
|
-
response.code = head[:code]
|
44
|
-
response.headers = head[:headers]
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.from_error(request, error)
|
50
|
-
Response.new.tap do |response|
|
51
|
-
response.request = request
|
52
|
-
response.uri = request.uri
|
53
|
-
response.body = error.to_s
|
54
|
-
response.code = 999
|
55
|
-
response.headers = {}
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
def self.from_legacy_head(response, head)
|
60
|
-
response.tap do |r|
|
61
|
-
case head
|
62
|
-
when /\ACURLER_ERROR/
|
63
|
-
# error
|
64
|
-
r.code = 999
|
65
|
-
when /\AHTTP/
|
66
|
-
# redirect
|
67
|
-
location = head.scan(/Location: ([^\r\n]+)/).flatten.last
|
68
|
-
r.uri += location
|
69
|
-
else
|
70
|
-
$stderr.puts "unknown cached /head for #{r.uri}"
|
71
|
-
end
|
16
|
+
def self.from_network(request, fday_response)
|
17
|
+
Response.new.tap do
|
18
|
+
_1.request = request
|
19
|
+
_1.uri = fday_response.env.url
|
20
|
+
_1.code = fday_response.status
|
21
|
+
_1.headers = fday_response.headers.to_h
|
22
|
+
_1.body = process_body(fday_response)
|
72
23
|
end
|
73
24
|
end
|
74
25
|
|
@@ -7,8 +7,8 @@ module Sinew
|
|
7
7
|
attr_accessor :retries
|
8
8
|
attr_accessor :rate_limit
|
9
9
|
attr_accessor :headers
|
10
|
-
attr_accessor :
|
11
|
-
attr_accessor :
|
10
|
+
attr_accessor :httpdisk_options
|
11
|
+
attr_accessor :insecure
|
12
12
|
|
13
13
|
def initialize
|
14
14
|
self.retries = 3
|
@@ -16,8 +16,8 @@ module Sinew
|
|
16
16
|
self.headers = {
|
17
17
|
'User-Agent' => "sinew/#{VERSION}",
|
18
18
|
}
|
19
|
-
self.
|
20
|
-
self.
|
19
|
+
self.httpdisk_options = {}
|
20
|
+
self.insecure = false
|
21
21
|
|
22
22
|
# for testing
|
23
23
|
if ENV['SINEW_TEST']
|
data/lib/sinew/version.rb
CHANGED
data/sample.sinew
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
get 'http://
|
1
|
+
get 'http://httpbingo.org'
|
2
2
|
noko.css('ul li a').each do |a|
|
3
3
|
row = {}
|
4
4
|
row[:url] = a[:href]
|
@@ -6,4 +6,4 @@ noko.css('ul li a').each do |a|
|
|
6
6
|
csv_emit(row)
|
7
7
|
end
|
8
8
|
|
9
|
-
get 'http://
|
9
|
+
get 'http://httpbingo.org/redirect/2'
|
data/sinew.gemspec
CHANGED
@@ -5,30 +5,29 @@ require 'sinew/version'
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = 'sinew'
|
7
7
|
s.version = Sinew::VERSION
|
8
|
-
s.platform = Gem::Platform::RUBY
|
9
8
|
s.license = 'MIT'
|
10
|
-
s.authors = [ 'Adam Doppelt' ]
|
9
|
+
s.authors = [ 'Adam Doppelt', 'Nathan Kriege' ]
|
11
10
|
s.email = [ 'amd@gurge.com' ]
|
12
11
|
s.homepage = 'http://github.com/gurgeous/sinew'
|
13
12
|
s.summary = 'Sinew - structured web crawling using recipes.'
|
14
13
|
s.description = 'Crawl web sites easily using ruby recipes, with caching and nokogiri.'
|
15
|
-
s.required_ruby_version = '
|
14
|
+
s.required_ruby_version = '>= 2.7'
|
16
15
|
|
17
|
-
s
|
16
|
+
# what's in the gem?
|
17
|
+
s.files = Dir.chdir(File.expand_path(__dir__)) do
|
18
|
+
`git ls-files -z`.split("\x0").reject { _1.match(%r{^test/}) }
|
19
|
+
end
|
20
|
+
s.bindir = 'bin'
|
21
|
+
s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
|
22
|
+
s.require_paths = [ 'lib' ]
|
18
23
|
|
19
|
-
s.add_runtime_dependency '
|
24
|
+
s.add_runtime_dependency 'amazing_print', '~> 1.3'
|
25
|
+
s.add_runtime_dependency 'faraday', '~> 1.4'
|
26
|
+
s.add_runtime_dependency 'faraday-encoding', '~> 0'
|
20
27
|
s.add_runtime_dependency 'htmlentities', '~> 4.3'
|
21
|
-
s.add_runtime_dependency '
|
22
|
-
s.add_runtime_dependency 'nokogiri', '~> 1.
|
28
|
+
s.add_runtime_dependency 'httpdisk', '~> 0'
|
29
|
+
s.add_runtime_dependency 'nokogiri', '~> 1.11'
|
23
30
|
s.add_runtime_dependency 'scripto', '~> 0'
|
24
|
-
s.add_runtime_dependency 'slop', '~> 4.
|
25
|
-
s.add_runtime_dependency '
|
26
|
-
s.add_development_dependency 'minitest', '~> 5.11'
|
27
|
-
s.add_development_dependency 'rake', '~> 12.3'
|
28
|
-
s.add_development_dependency 'webmock', '~> 3.4'
|
29
|
-
|
30
|
-
s.files = `git ls-files`.split("\n")
|
31
|
-
s.test_files = `git ls-files -- test/*`.split("\n")
|
32
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
33
|
-
s.require_paths = [ 'lib' ]
|
31
|
+
s.add_runtime_dependency 'slop', '~> 4.8'
|
32
|
+
s.add_runtime_dependency 'sterile', '~> 1.0'
|
34
33
|
end
|
metadata
CHANGED
@@ -1,73 +1,74 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sinew
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Doppelt
|
8
|
-
|
8
|
+
- Nathan Kriege
|
9
|
+
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2021-05-11 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
+
name: amazing_print
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
16
17
|
requirements:
|
17
18
|
- - "~>"
|
18
19
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
20
|
+
version: '1.3'
|
20
21
|
type: :runtime
|
21
22
|
prerelease: false
|
22
23
|
version_requirements: !ruby/object:Gem::Requirement
|
23
24
|
requirements:
|
24
25
|
- - "~>"
|
25
26
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
27
|
+
version: '1.3'
|
27
28
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
29
|
+
name: faraday
|
29
30
|
requirement: !ruby/object:Gem::Requirement
|
30
31
|
requirements:
|
31
32
|
- - "~>"
|
32
33
|
- !ruby/object:Gem::Version
|
33
|
-
version: '4
|
34
|
+
version: '1.4'
|
34
35
|
type: :runtime
|
35
36
|
prerelease: false
|
36
37
|
version_requirements: !ruby/object:Gem::Requirement
|
37
38
|
requirements:
|
38
39
|
- - "~>"
|
39
40
|
- !ruby/object:Gem::Version
|
40
|
-
version: '4
|
41
|
+
version: '1.4'
|
41
42
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
43
|
+
name: faraday-encoding
|
43
44
|
requirement: !ruby/object:Gem::Requirement
|
44
45
|
requirements:
|
45
46
|
- - "~>"
|
46
47
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0
|
48
|
+
version: '0'
|
48
49
|
type: :runtime
|
49
50
|
prerelease: false
|
50
51
|
version_requirements: !ruby/object:Gem::Requirement
|
51
52
|
requirements:
|
52
53
|
- - "~>"
|
53
54
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0
|
55
|
+
version: '0'
|
55
56
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
57
|
+
name: htmlentities
|
57
58
|
requirement: !ruby/object:Gem::Requirement
|
58
59
|
requirements:
|
59
60
|
- - "~>"
|
60
61
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
62
|
+
version: '4.3'
|
62
63
|
type: :runtime
|
63
64
|
prerelease: false
|
64
65
|
version_requirements: !ruby/object:Gem::Requirement
|
65
66
|
requirements:
|
66
67
|
- - "~>"
|
67
68
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
69
|
+
version: '4.3'
|
69
70
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
71
|
+
name: httpdisk
|
71
72
|
requirement: !ruby/object:Gem::Requirement
|
72
73
|
requirements:
|
73
74
|
- - "~>"
|
@@ -81,75 +82,61 @@ dependencies:
|
|
81
82
|
- !ruby/object:Gem::Version
|
82
83
|
version: '0'
|
83
84
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
85
|
+
name: nokogiri
|
85
86
|
requirement: !ruby/object:Gem::Requirement
|
86
87
|
requirements:
|
87
88
|
- - "~>"
|
88
89
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
90
|
+
version: '1.11'
|
90
91
|
type: :runtime
|
91
92
|
prerelease: false
|
92
93
|
version_requirements: !ruby/object:Gem::Requirement
|
93
94
|
requirements:
|
94
95
|
- - "~>"
|
95
96
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
97
|
+
version: '1.11'
|
97
98
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
99
|
+
name: scripto
|
99
100
|
requirement: !ruby/object:Gem::Requirement
|
100
101
|
requirements:
|
101
102
|
- - "~>"
|
102
103
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
104
|
+
version: '0'
|
104
105
|
type: :runtime
|
105
106
|
prerelease: false
|
106
107
|
version_requirements: !ruby/object:Gem::Requirement
|
107
108
|
requirements:
|
108
109
|
- - "~>"
|
109
110
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: minitest
|
113
|
-
requirement: !ruby/object:Gem::Requirement
|
114
|
-
requirements:
|
115
|
-
- - "~>"
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '5.11'
|
118
|
-
type: :development
|
119
|
-
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - "~>"
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: '5.11'
|
111
|
+
version: '0'
|
125
112
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
113
|
+
name: slop
|
127
114
|
requirement: !ruby/object:Gem::Requirement
|
128
115
|
requirements:
|
129
116
|
- - "~>"
|
130
117
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
132
|
-
type: :
|
118
|
+
version: '4.8'
|
119
|
+
type: :runtime
|
133
120
|
prerelease: false
|
134
121
|
version_requirements: !ruby/object:Gem::Requirement
|
135
122
|
requirements:
|
136
123
|
- - "~>"
|
137
124
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
125
|
+
version: '4.8'
|
139
126
|
- !ruby/object:Gem::Dependency
|
140
|
-
name:
|
127
|
+
name: sterile
|
141
128
|
requirement: !ruby/object:Gem::Requirement
|
142
129
|
requirements:
|
143
130
|
- - "~>"
|
144
131
|
- !ruby/object:Gem::Version
|
145
|
-
version: '
|
146
|
-
type: :
|
132
|
+
version: '1.0'
|
133
|
+
type: :runtime
|
147
134
|
prerelease: false
|
148
135
|
version_requirements: !ruby/object:Gem::Requirement
|
149
136
|
requirements:
|
150
137
|
- - "~>"
|
151
138
|
- !ruby/object:Gem::Version
|
152
|
-
version: '
|
139
|
+
version: '1.0'
|
153
140
|
description: Crawl web sites easily using ruby recipes, with caching and nokogiri.
|
154
141
|
email:
|
155
142
|
- amd@gurge.com
|
@@ -158,9 +145,9 @@ executables:
|
|
158
145
|
extensions: []
|
159
146
|
extra_rdoc_files: []
|
160
147
|
files:
|
148
|
+
- ".github/workflows/test.yml"
|
161
149
|
- ".gitignore"
|
162
150
|
- ".rubocop.yml"
|
163
|
-
- ".travis.yml"
|
164
151
|
- ".vscode/extensions.json"
|
165
152
|
- ".vscode/settings.json"
|
166
153
|
- Gemfile
|
@@ -169,7 +156,9 @@ files:
|
|
169
156
|
- Rakefile
|
170
157
|
- bin/sinew
|
171
158
|
- lib/sinew.rb
|
172
|
-
- lib/sinew/
|
159
|
+
- lib/sinew/connection.rb
|
160
|
+
- lib/sinew/connection/log_formatter.rb
|
161
|
+
- lib/sinew/connection/rate_limit.rb
|
173
162
|
- lib/sinew/core_ext.rb
|
174
163
|
- lib/sinew/dsl.rb
|
175
164
|
- lib/sinew/main.rb
|
@@ -181,74 +170,27 @@ files:
|
|
181
170
|
- lib/sinew/version.rb
|
182
171
|
- sample.sinew
|
183
172
|
- sinew.gemspec
|
184
|
-
- test/legacy/eu.httpbin.org/head/redirect,3
|
185
|
-
- test/legacy/eu.httpbin.org/head/status,500
|
186
|
-
- test/legacy/eu.httpbin.org/redirect,3
|
187
|
-
- test/legacy/eu.httpbin.org/status,500
|
188
|
-
- test/legacy/legacy.sinew
|
189
|
-
- test/recipes/array_header.sinew
|
190
|
-
- test/recipes/basic.sinew
|
191
|
-
- test/recipes/dups.sinew
|
192
|
-
- test/recipes/implicit_header.sinew
|
193
|
-
- test/recipes/limit.sinew
|
194
|
-
- test/recipes/noko.sinew
|
195
|
-
- test/recipes/uri.sinew
|
196
|
-
- test/recipes/xml.sinew
|
197
|
-
- test/test.html
|
198
|
-
- test/test_cache.rb
|
199
|
-
- test/test_helper.rb
|
200
|
-
- test/test_legacy.rb
|
201
|
-
- test/test_main.rb
|
202
|
-
- test/test_nokogiri_ext.rb
|
203
|
-
- test/test_output.rb
|
204
|
-
- test/test_recipes.rb
|
205
|
-
- test/test_requests.rb
|
206
|
-
- test/test_utf8.rb
|
207
173
|
homepage: http://github.com/gurgeous/sinew
|
208
174
|
licenses:
|
209
175
|
- MIT
|
210
176
|
metadata: {}
|
211
|
-
post_install_message:
|
177
|
+
post_install_message:
|
212
178
|
rdoc_options: []
|
213
179
|
require_paths:
|
214
180
|
- lib
|
215
181
|
required_ruby_version: !ruby/object:Gem::Requirement
|
216
182
|
requirements:
|
217
|
-
- - "
|
183
|
+
- - ">="
|
218
184
|
- !ruby/object:Gem::Version
|
219
|
-
version: '2.
|
185
|
+
version: '2.7'
|
220
186
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
221
187
|
requirements:
|
222
188
|
- - ">="
|
223
189
|
- !ruby/object:Gem::Version
|
224
190
|
version: '0'
|
225
191
|
requirements: []
|
226
|
-
|
227
|
-
|
228
|
-
signing_key:
|
192
|
+
rubygems_version: 3.1.4
|
193
|
+
signing_key:
|
229
194
|
specification_version: 4
|
230
195
|
summary: Sinew - structured web crawling using recipes.
|
231
|
-
test_files:
|
232
|
-
- test/legacy/eu.httpbin.org/head/redirect,3
|
233
|
-
- test/legacy/eu.httpbin.org/head/status,500
|
234
|
-
- test/legacy/eu.httpbin.org/redirect,3
|
235
|
-
- test/legacy/eu.httpbin.org/status,500
|
236
|
-
- test/legacy/legacy.sinew
|
237
|
-
- test/recipes/array_header.sinew
|
238
|
-
- test/recipes/basic.sinew
|
239
|
-
- test/recipes/dups.sinew
|
240
|
-
- test/recipes/implicit_header.sinew
|
241
|
-
- test/recipes/limit.sinew
|
242
|
-
- test/recipes/noko.sinew
|
243
|
-
- test/recipes/uri.sinew
|
244
|
-
- test/recipes/xml.sinew
|
245
|
-
- test/test.html
|
246
|
-
- test/test_cache.rb
|
247
|
-
- test/test_helper.rb
|
248
|
-
- test/test_legacy.rb
|
249
|
-
- test/test_main.rb
|
250
|
-
- test/test_nokogiri_ext.rb
|
251
|
-
- test/test_output.rb
|
252
|
-
- test/test_recipes.rb
|
253
|
-
- test/test_requests.rb
|
254
|
-
- test/test_utf8.rb
|
196
|
+
test_files: []
|