sinew 2.0.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,8 @@
1
+ require 'stringio'
2
+ require 'zlib'
3
+
1
4
  #
2
- # An HTTP response. Mostly a wrapper around HTTParty.
5
+ # An HTTP response.
3
6
  #
4
7
 
5
8
  module Sinew
@@ -10,69 +13,36 @@ module Sinew
10
13
  # factory methods
11
14
  #
12
15
 
13
- def self.from_network(request, party_response)
14
- Response.new.tap do |response|
15
- response.request = request
16
- response.uri = party_response.request.last_uri
17
- response.code = party_response.code
18
- response.headers = party_response.headers.to_h
19
-
20
- # force to utf-8 as best we can
21
- body = party_response.body
22
- if body.encoding != Encoding::UTF_8
23
- body = body.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
24
- end
25
- response.body = body
16
+ def self.from_network(request, fday_response)
17
+ Response.new.tap do
18
+ _1.request = request
19
+ _1.uri = fday_response.env.url
20
+ _1.code = fday_response.status
21
+ _1.headers = fday_response.headers.to_h
22
+ _1.body = process_body(fday_response)
26
23
  end
27
24
  end
28
25
 
29
- def self.from_cache(request, body, head)
30
- Response.new.tap do |response|
31
- response.request = request
32
- response.body = body
26
+ # helper for decoding bodies before parsing
27
+ def self.process_body(response)
28
+ body = response.body
33
29
 
34
- # defaults
35
- response.uri = request.uri
36
- response.code = 200
37
- response.headers = {}
30
+ # inflate if necessary
31
+ bits = body[0, 10].force_encoding('BINARY')
32
+ if bits =~ /\A\x1f\x8b/n
33
+ body = Zlib::GzipReader.new(StringIO.new(body)).read
34
+ end
38
35
 
39
- # overwrite with cached response headers
40
- if head
41
- if head !~ /^{/
42
- return from_legacy_head(response, head)
36
+ # force to utf-8 if we think this could be text
37
+ if body.encoding != Encoding::UTF_8
38
+ if content_type = response.headers['content-type']
39
+ if content_type =~ /\b(html|javascript|json|text|xml)\b/
40
+ body = body.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
43
41
  end
44
- head = JSON.parse(head, symbolize_names: true)
45
- response.uri = URI.parse(head[:uri])
46
- response.code = head[:code]
47
- response.headers = head[:headers]
48
42
  end
49
43
  end
50
- end
51
-
52
- def self.from_timeout(request)
53
- Response.new.tap do |response|
54
- response.request = request
55
- response.uri = request.uri
56
- response.body = 'timeout'
57
- response.code = 999
58
- response.headers = {}
59
- end
60
- end
61
44
 
62
- def self.from_legacy_head(response, head)
63
- response.tap do |response|
64
- case head
65
- when /\ACURLER_ERROR/
66
- # error
67
- response.code = 999
68
- when /\AHTTP/
69
- # redirect
70
- location = head.scan(/Location: ([^\r\n]+)/).flatten.last
71
- response.uri += location
72
- else
73
- $stderr.puts "unknown cached /head for #{response.uri}"
74
- end
75
- end
45
+ body
76
46
  end
77
47
 
78
48
  #
@@ -7,7 +7,8 @@ module Sinew
7
7
  attr_accessor :retries
8
8
  attr_accessor :rate_limit
9
9
  attr_accessor :headers
10
- attr_accessor :before_generate_cache_key
10
+ attr_accessor :httpdisk_options
11
+ attr_accessor :insecure
11
12
 
12
13
  def initialize
13
14
  self.retries = 3
@@ -15,7 +16,8 @@ module Sinew
15
16
  self.headers = {
16
17
  'User-Agent' => "sinew/#{VERSION}",
17
18
  }
18
- self.before_generate_cache_key = ->(i) { i }
19
+ self.httpdisk_options = {}
20
+ self.insecure = false
19
21
 
20
22
  # for testing
21
23
  if ENV['SINEW_TEST']
data/lib/sinew/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Sinew
2
2
  # Gem version
3
- VERSION = '2.0.1'.freeze
3
+ VERSION = '3.0.0'.freeze
4
4
  end
data/sample.sinew CHANGED
@@ -1,4 +1,4 @@
1
- get 'http://httpbin.org'
1
+ get 'http://httpbingo.org'
2
2
  noko.css('ul li a').each do |a|
3
3
  row = {}
4
4
  row[:url] = a[:href]
@@ -6,4 +6,4 @@ noko.css('ul li a').each do |a|
6
6
  csv_emit(row)
7
7
  end
8
8
 
9
- get 'http://httpbin.org/redirect/2'
9
+ get 'http://httpbingo.org/redirect/2'
data/sinew.gemspec CHANGED
@@ -5,30 +5,29 @@ require 'sinew/version'
5
5
  Gem::Specification.new do |s|
6
6
  s.name = 'sinew'
7
7
  s.version = Sinew::VERSION
8
- s.platform = Gem::Platform::RUBY
9
8
  s.license = 'MIT'
10
- s.authors = [ 'Adam Doppelt' ]
9
+ s.authors = [ 'Adam Doppelt', 'Nathan Kriege' ]
11
10
  s.email = [ 'amd@gurge.com' ]
12
11
  s.homepage = 'http://github.com/gurgeous/sinew'
13
12
  s.summary = 'Sinew - structured web crawling using recipes.'
14
13
  s.description = 'Crawl web sites easily using ruby recipes, with caching and nokogiri.'
15
- s.required_ruby_version = '~> 2.3'
14
+ s.required_ruby_version = '>= 2.7'
16
15
 
17
- s.rubyforge_project = 'sinew'
16
+ # what's in the gem?
17
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
18
+ `git ls-files -z`.split("\x0").reject { _1.match(%r{^test/}) }
19
+ end
20
+ s.bindir = 'bin'
21
+ s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
22
+ s.require_paths = [ 'lib' ]
18
23
 
19
- s.add_runtime_dependency 'awesome_print', '~> 1.8'
24
+ s.add_runtime_dependency 'amazing_print', '~> 1.3'
25
+ s.add_runtime_dependency 'faraday', '~> 1.4'
26
+ s.add_runtime_dependency 'faraday-encoding', '~> 0'
20
27
  s.add_runtime_dependency 'htmlentities', '~> 4.3'
21
- s.add_runtime_dependency 'httparty', '~> 0.16'
22
- s.add_runtime_dependency 'nokogiri', '~> 1.8'
28
+ s.add_runtime_dependency 'httpdisk', '~> 0'
29
+ s.add_runtime_dependency 'nokogiri', '~> 1.11'
23
30
  s.add_runtime_dependency 'scripto', '~> 0'
24
- s.add_runtime_dependency 'slop', '~> 4.6'
25
- s.add_runtime_dependency 'stringex', '~> 2.8'
26
- s.add_development_dependency 'minitest', '~> 5.11'
27
- s.add_development_dependency 'rake', '~> 12.3'
28
- s.add_development_dependency 'webmock', '~> 3.4'
29
-
30
- s.files = `git ls-files`.split("\n")
31
- s.test_files = `git ls-files -- test/*`.split("\n")
32
- s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
33
- s.require_paths = [ 'lib' ]
31
+ s.add_runtime_dependency 'slop', '~> 4.8'
32
+ s.add_runtime_dependency 'sterile', '~> 1.0'
34
33
  end
metadata CHANGED
@@ -1,73 +1,74 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sinew
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Doppelt
8
- autorequire:
8
+ - Nathan Kriege
9
+ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2018-05-02 00:00:00.000000000 Z
12
+ date: 2021-05-11 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
- name: awesome_print
15
+ name: amazing_print
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
17
18
  - - "~>"
18
19
  - !ruby/object:Gem::Version
19
- version: '1.8'
20
+ version: '1.3'
20
21
  type: :runtime
21
22
  prerelease: false
22
23
  version_requirements: !ruby/object:Gem::Requirement
23
24
  requirements:
24
25
  - - "~>"
25
26
  - !ruby/object:Gem::Version
26
- version: '1.8'
27
+ version: '1.3'
27
28
  - !ruby/object:Gem::Dependency
28
- name: htmlentities
29
+ name: faraday
29
30
  requirement: !ruby/object:Gem::Requirement
30
31
  requirements:
31
32
  - - "~>"
32
33
  - !ruby/object:Gem::Version
33
- version: '4.3'
34
+ version: '1.4'
34
35
  type: :runtime
35
36
  prerelease: false
36
37
  version_requirements: !ruby/object:Gem::Requirement
37
38
  requirements:
38
39
  - - "~>"
39
40
  - !ruby/object:Gem::Version
40
- version: '4.3'
41
+ version: '1.4'
41
42
  - !ruby/object:Gem::Dependency
42
- name: httparty
43
+ name: faraday-encoding
43
44
  requirement: !ruby/object:Gem::Requirement
44
45
  requirements:
45
46
  - - "~>"
46
47
  - !ruby/object:Gem::Version
47
- version: '0.16'
48
+ version: '0'
48
49
  type: :runtime
49
50
  prerelease: false
50
51
  version_requirements: !ruby/object:Gem::Requirement
51
52
  requirements:
52
53
  - - "~>"
53
54
  - !ruby/object:Gem::Version
54
- version: '0.16'
55
+ version: '0'
55
56
  - !ruby/object:Gem::Dependency
56
- name: nokogiri
57
+ name: htmlentities
57
58
  requirement: !ruby/object:Gem::Requirement
58
59
  requirements:
59
60
  - - "~>"
60
61
  - !ruby/object:Gem::Version
61
- version: '1.8'
62
+ version: '4.3'
62
63
  type: :runtime
63
64
  prerelease: false
64
65
  version_requirements: !ruby/object:Gem::Requirement
65
66
  requirements:
66
67
  - - "~>"
67
68
  - !ruby/object:Gem::Version
68
- version: '1.8'
69
+ version: '4.3'
69
70
  - !ruby/object:Gem::Dependency
70
- name: scripto
71
+ name: httpdisk
71
72
  requirement: !ruby/object:Gem::Requirement
72
73
  requirements:
73
74
  - - "~>"
@@ -81,75 +82,61 @@ dependencies:
81
82
  - !ruby/object:Gem::Version
82
83
  version: '0'
83
84
  - !ruby/object:Gem::Dependency
84
- name: slop
85
+ name: nokogiri
85
86
  requirement: !ruby/object:Gem::Requirement
86
87
  requirements:
87
88
  - - "~>"
88
89
  - !ruby/object:Gem::Version
89
- version: '4.6'
90
+ version: '1.11'
90
91
  type: :runtime
91
92
  prerelease: false
92
93
  version_requirements: !ruby/object:Gem::Requirement
93
94
  requirements:
94
95
  - - "~>"
95
96
  - !ruby/object:Gem::Version
96
- version: '4.6'
97
+ version: '1.11'
97
98
  - !ruby/object:Gem::Dependency
98
- name: stringex
99
+ name: scripto
99
100
  requirement: !ruby/object:Gem::Requirement
100
101
  requirements:
101
102
  - - "~>"
102
103
  - !ruby/object:Gem::Version
103
- version: '2.8'
104
+ version: '0'
104
105
  type: :runtime
105
106
  prerelease: false
106
107
  version_requirements: !ruby/object:Gem::Requirement
107
108
  requirements:
108
109
  - - "~>"
109
110
  - !ruby/object:Gem::Version
110
- version: '2.8'
111
- - !ruby/object:Gem::Dependency
112
- name: minitest
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '5.11'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '5.11'
111
+ version: '0'
125
112
  - !ruby/object:Gem::Dependency
126
- name: rake
113
+ name: slop
127
114
  requirement: !ruby/object:Gem::Requirement
128
115
  requirements:
129
116
  - - "~>"
130
117
  - !ruby/object:Gem::Version
131
- version: '12.3'
132
- type: :development
118
+ version: '4.8'
119
+ type: :runtime
133
120
  prerelease: false
134
121
  version_requirements: !ruby/object:Gem::Requirement
135
122
  requirements:
136
123
  - - "~>"
137
124
  - !ruby/object:Gem::Version
138
- version: '12.3'
125
+ version: '4.8'
139
126
  - !ruby/object:Gem::Dependency
140
- name: webmock
127
+ name: sterile
141
128
  requirement: !ruby/object:Gem::Requirement
142
129
  requirements:
143
130
  - - "~>"
144
131
  - !ruby/object:Gem::Version
145
- version: '3.4'
146
- type: :development
132
+ version: '1.0'
133
+ type: :runtime
147
134
  prerelease: false
148
135
  version_requirements: !ruby/object:Gem::Requirement
149
136
  requirements:
150
137
  - - "~>"
151
138
  - !ruby/object:Gem::Version
152
- version: '3.4'
139
+ version: '1.0'
153
140
  description: Crawl web sites easily using ruby recipes, with caching and nokogiri.
154
141
  email:
155
142
  - amd@gurge.com
@@ -158,9 +145,9 @@ executables:
158
145
  extensions: []
159
146
  extra_rdoc_files: []
160
147
  files:
148
+ - ".github/workflows/test.yml"
161
149
  - ".gitignore"
162
150
  - ".rubocop.yml"
163
- - ".travis.yml"
164
151
  - ".vscode/extensions.json"
165
152
  - ".vscode/settings.json"
166
153
  - Gemfile
@@ -169,7 +156,9 @@ files:
169
156
  - Rakefile
170
157
  - bin/sinew
171
158
  - lib/sinew.rb
172
- - lib/sinew/cache.rb
159
+ - lib/sinew/connection.rb
160
+ - lib/sinew/connection/log_formatter.rb
161
+ - lib/sinew/connection/rate_limit.rb
173
162
  - lib/sinew/core_ext.rb
174
163
  - lib/sinew/dsl.rb
175
164
  - lib/sinew/main.rb
@@ -181,56 +170,27 @@ files:
181
170
  - lib/sinew/version.rb
182
171
  - sample.sinew
183
172
  - sinew.gemspec
184
- - test/legacy/eu.httpbin.org/head/redirect,3
185
- - test/legacy/eu.httpbin.org/head/status,500
186
- - test/legacy/eu.httpbin.org/redirect,3
187
- - test/legacy/eu.httpbin.org/status,500
188
- - test/legacy/legacy.sinew
189
- - test/test.html
190
- - test/test_cache.rb
191
- - test/test_helper.rb
192
- - test/test_legacy.rb
193
- - test/test_main.rb
194
- - test/test_nokogiri_ext.rb
195
- - test/test_output.rb
196
- - test/test_requests.rb
197
- - test/test_utf8.rb
198
173
  homepage: http://github.com/gurgeous/sinew
199
174
  licenses:
200
175
  - MIT
201
176
  metadata: {}
202
- post_install_message:
177
+ post_install_message:
203
178
  rdoc_options: []
204
179
  require_paths:
205
180
  - lib
206
181
  required_ruby_version: !ruby/object:Gem::Requirement
207
182
  requirements:
208
- - - "~>"
183
+ - - ">="
209
184
  - !ruby/object:Gem::Version
210
- version: '2.3'
185
+ version: '2.7'
211
186
  required_rubygems_version: !ruby/object:Gem::Requirement
212
187
  requirements:
213
188
  - - ">="
214
189
  - !ruby/object:Gem::Version
215
190
  version: '0'
216
191
  requirements: []
217
- rubyforge_project: sinew
218
- rubygems_version: 2.7.6
219
- signing_key:
192
+ rubygems_version: 3.1.4
193
+ signing_key:
220
194
  specification_version: 4
221
195
  summary: Sinew - structured web crawling using recipes.
222
- test_files:
223
- - test/legacy/eu.httpbin.org/head/redirect,3
224
- - test/legacy/eu.httpbin.org/head/status,500
225
- - test/legacy/eu.httpbin.org/redirect,3
226
- - test/legacy/eu.httpbin.org/status,500
227
- - test/legacy/legacy.sinew
228
- - test/test.html
229
- - test/test_cache.rb
230
- - test/test_helper.rb
231
- - test/test_legacy.rb
232
- - test/test_main.rb
233
- - test/test_nokogiri_ext.rb
234
- - test/test_output.rb
235
- - test/test_requests.rb
236
- - test/test_utf8.rb
196
+ test_files: []