sinew 2.0.2 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +26 -0
  3. data/.rubocop.yml +9 -6
  4. data/.vscode/settings.json +0 -10
  5. data/Gemfile +9 -0
  6. data/README.md +62 -54
  7. data/Rakefile +33 -18
  8. data/bin/sinew +2 -0
  9. data/lib/sinew.rb +0 -1
  10. data/lib/sinew/connection.rb +52 -0
  11. data/lib/sinew/connection/log_formatter.rb +22 -0
  12. data/lib/sinew/connection/rate_limit.rb +29 -0
  13. data/lib/sinew/core_ext.rb +1 -1
  14. data/lib/sinew/dsl.rb +10 -6
  15. data/lib/sinew/main.rb +29 -56
  16. data/lib/sinew/output.rb +7 -16
  17. data/lib/sinew/request.rb +22 -87
  18. data/lib/sinew/response.rb +8 -57
  19. data/lib/sinew/runtime_options.rb +4 -4
  20. data/lib/sinew/version.rb +1 -1
  21. data/sample.sinew +2 -2
  22. data/sinew.gemspec +16 -18
  23. metadata +38 -110
  24. data/.travis.yml +0 -4
  25. data/lib/sinew/cache.rb +0 -79
  26. data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
  27. data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
  28. data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
  29. data/test/legacy/eu.httpbin.org/status,500 +0 -1
  30. data/test/legacy/legacy.sinew +0 -2
  31. data/test/recipes/array_header.sinew +0 -6
  32. data/test/recipes/basic.sinew +0 -8
  33. data/test/recipes/dups.sinew +0 -7
  34. data/test/recipes/implicit_header.sinew +0 -5
  35. data/test/recipes/limit.sinew +0 -11
  36. data/test/recipes/noko.sinew +0 -9
  37. data/test/recipes/uri.sinew +0 -11
  38. data/test/recipes/xml.sinew +0 -8
  39. data/test/test.html +0 -45
  40. data/test/test_cache.rb +0 -69
  41. data/test/test_helper.rb +0 -123
  42. data/test/test_legacy.rb +0 -23
  43. data/test/test_main.rb +0 -34
  44. data/test/test_nokogiri_ext.rb +0 -18
  45. data/test/test_output.rb +0 -56
  46. data/test/test_recipes.rb +0 -60
  47. data/test/test_requests.rb +0 -135
  48. data/test/test_utf8.rb +0 -39
@@ -7,8 +7,8 @@ module Sinew
7
7
  attr_accessor :retries
8
8
  attr_accessor :rate_limit
9
9
  attr_accessor :headers
10
- attr_accessor :httparty_options
11
- attr_accessor :before_generate_cache_key
10
+ attr_accessor :httpdisk_options
11
+ attr_accessor :insecure
12
12
 
13
13
  def initialize
14
14
  self.retries = 3
@@ -16,8 +16,8 @@ module Sinew
16
16
  self.headers = {
17
17
  'User-Agent' => "sinew/#{VERSION}",
18
18
  }
19
- self.httparty_options = {}
20
- self.before_generate_cache_key = ->(i) { i }
19
+ self.httpdisk_options = {}
20
+ self.insecure = false
21
21
 
22
22
  # for testing
23
23
  if ENV['SINEW_TEST']
data/lib/sinew/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Sinew
2
2
  # Gem version
3
- VERSION = '2.0.2'.freeze
3
+ VERSION = '3.0.1'.freeze
4
4
  end
data/sample.sinew CHANGED
@@ -1,4 +1,4 @@
1
- get 'http://httpbin.org'
1
+ get 'http://httpbingo.org'
2
2
  noko.css('ul li a').each do |a|
3
3
  row = {}
4
4
  row[:url] = a[:href]
@@ -6,4 +6,4 @@ noko.css('ul li a').each do |a|
6
6
  csv_emit(row)
7
7
  end
8
8
 
9
- get 'http://httpbin.org/redirect/2'
9
+ get 'http://httpbingo.org/redirect/2'
data/sinew.gemspec CHANGED
@@ -5,30 +5,28 @@ require 'sinew/version'
5
5
  Gem::Specification.new do |s|
6
6
  s.name = 'sinew'
7
7
  s.version = Sinew::VERSION
8
- s.platform = Gem::Platform::RUBY
9
8
  s.license = 'MIT'
10
- s.authors = [ 'Adam Doppelt' ]
9
+ s.authors = [ 'Adam Doppelt', 'Nathan Kriege' ]
11
10
  s.email = [ 'amd@gurge.com' ]
12
11
  s.homepage = 'http://github.com/gurgeous/sinew'
13
12
  s.summary = 'Sinew - structured web crawling using recipes.'
14
13
  s.description = 'Crawl web sites easily using ruby recipes, with caching and nokogiri.'
15
- s.required_ruby_version = '~> 2.3'
14
+ s.required_ruby_version = '>= 2.7'
16
15
 
17
- s.rubyforge_project = 'sinew'
16
+ # what's in the gem?
17
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
18
+ `git ls-files -z`.split("\x0").reject { _1.match(%r{^test/}) }
19
+ end
20
+ s.bindir = 'bin'
21
+ s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
22
+ s.require_paths = [ 'lib' ]
18
23
 
19
- s.add_runtime_dependency 'awesome_print', '~> 1.8'
20
- s.add_runtime_dependency 'htmlentities', '~> 4.3'
21
- s.add_runtime_dependency 'httparty', '~> 0.16'
22
- s.add_runtime_dependency 'nokogiri', '~> 1.8'
24
+ s.add_runtime_dependency 'amazing_print', '~> 1.3'
25
+ s.add_runtime_dependency 'faraday', '~> 1.4'
26
+ s.add_runtime_dependency 'faraday-encoding', '~> 0'
27
+ s.add_runtime_dependency 'httpdisk', '~> 0'
28
+ s.add_runtime_dependency 'nokogiri', '~> 1.11'
23
29
  s.add_runtime_dependency 'scripto', '~> 0'
24
- s.add_runtime_dependency 'slop', '~> 4.6'
25
- s.add_runtime_dependency 'stringex', '~> 2.8'
26
- s.add_development_dependency 'minitest', '~> 5.11'
27
- s.add_development_dependency 'rake', '~> 12.3'
28
- s.add_development_dependency 'webmock', '~> 3.4'
29
-
30
- s.files = `git ls-files`.split("\n")
31
- s.test_files = `git ls-files -- test/*`.split("\n")
32
- s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
33
- s.require_paths = [ 'lib' ]
30
+ s.add_runtime_dependency 'slop', '~> 4.8'
31
+ s.add_runtime_dependency 'sterile', '~> 1.0'
34
32
  end
metadata CHANGED
@@ -1,73 +1,60 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sinew
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Doppelt
8
- autorequire:
8
+ - Nathan Kriege
9
+ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2018-05-03 00:00:00.000000000 Z
12
+ date: 2021-06-04 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
- name: awesome_print
15
+ name: amazing_print
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
17
18
  - - "~>"
18
19
  - !ruby/object:Gem::Version
19
- version: '1.8'
20
+ version: '1.3'
20
21
  type: :runtime
21
22
  prerelease: false
22
23
  version_requirements: !ruby/object:Gem::Requirement
23
24
  requirements:
24
25
  - - "~>"
25
26
  - !ruby/object:Gem::Version
26
- version: '1.8'
27
+ version: '1.3'
27
28
  - !ruby/object:Gem::Dependency
28
- name: htmlentities
29
+ name: faraday
29
30
  requirement: !ruby/object:Gem::Requirement
30
31
  requirements:
31
32
  - - "~>"
32
33
  - !ruby/object:Gem::Version
33
- version: '4.3'
34
+ version: '1.4'
34
35
  type: :runtime
35
36
  prerelease: false
36
37
  version_requirements: !ruby/object:Gem::Requirement
37
38
  requirements:
38
39
  - - "~>"
39
40
  - !ruby/object:Gem::Version
40
- version: '4.3'
41
+ version: '1.4'
41
42
  - !ruby/object:Gem::Dependency
42
- name: httparty
43
+ name: faraday-encoding
43
44
  requirement: !ruby/object:Gem::Requirement
44
45
  requirements:
45
46
  - - "~>"
46
47
  - !ruby/object:Gem::Version
47
- version: '0.16'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '0.16'
55
- - !ruby/object:Gem::Dependency
56
- name: nokogiri
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '1.8'
48
+ version: '0'
62
49
  type: :runtime
63
50
  prerelease: false
64
51
  version_requirements: !ruby/object:Gem::Requirement
65
52
  requirements:
66
53
  - - "~>"
67
54
  - !ruby/object:Gem::Version
68
- version: '1.8'
55
+ version: '0'
69
56
  - !ruby/object:Gem::Dependency
70
- name: scripto
57
+ name: httpdisk
71
58
  requirement: !ruby/object:Gem::Requirement
72
59
  requirements:
73
60
  - - "~>"
@@ -81,75 +68,61 @@ dependencies:
81
68
  - !ruby/object:Gem::Version
82
69
  version: '0'
83
70
  - !ruby/object:Gem::Dependency
84
- name: slop
71
+ name: nokogiri
85
72
  requirement: !ruby/object:Gem::Requirement
86
73
  requirements:
87
74
  - - "~>"
88
75
  - !ruby/object:Gem::Version
89
- version: '4.6'
76
+ version: '1.11'
90
77
  type: :runtime
91
78
  prerelease: false
92
79
  version_requirements: !ruby/object:Gem::Requirement
93
80
  requirements:
94
81
  - - "~>"
95
82
  - !ruby/object:Gem::Version
96
- version: '4.6'
83
+ version: '1.11'
97
84
  - !ruby/object:Gem::Dependency
98
- name: stringex
85
+ name: scripto
99
86
  requirement: !ruby/object:Gem::Requirement
100
87
  requirements:
101
88
  - - "~>"
102
89
  - !ruby/object:Gem::Version
103
- version: '2.8'
90
+ version: '0'
104
91
  type: :runtime
105
92
  prerelease: false
106
93
  version_requirements: !ruby/object:Gem::Requirement
107
94
  requirements:
108
95
  - - "~>"
109
96
  - !ruby/object:Gem::Version
110
- version: '2.8'
111
- - !ruby/object:Gem::Dependency
112
- name: minitest
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '5.11'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '5.11'
97
+ version: '0'
125
98
  - !ruby/object:Gem::Dependency
126
- name: rake
99
+ name: slop
127
100
  requirement: !ruby/object:Gem::Requirement
128
101
  requirements:
129
102
  - - "~>"
130
103
  - !ruby/object:Gem::Version
131
- version: '12.3'
132
- type: :development
104
+ version: '4.8'
105
+ type: :runtime
133
106
  prerelease: false
134
107
  version_requirements: !ruby/object:Gem::Requirement
135
108
  requirements:
136
109
  - - "~>"
137
110
  - !ruby/object:Gem::Version
138
- version: '12.3'
111
+ version: '4.8'
139
112
  - !ruby/object:Gem::Dependency
140
- name: webmock
113
+ name: sterile
141
114
  requirement: !ruby/object:Gem::Requirement
142
115
  requirements:
143
116
  - - "~>"
144
117
  - !ruby/object:Gem::Version
145
- version: '3.4'
146
- type: :development
118
+ version: '1.0'
119
+ type: :runtime
147
120
  prerelease: false
148
121
  version_requirements: !ruby/object:Gem::Requirement
149
122
  requirements:
150
123
  - - "~>"
151
124
  - !ruby/object:Gem::Version
152
- version: '3.4'
125
+ version: '1.0'
153
126
  description: Crawl web sites easily using ruby recipes, with caching and nokogiri.
154
127
  email:
155
128
  - amd@gurge.com
@@ -158,9 +131,9 @@ executables:
158
131
  extensions: []
159
132
  extra_rdoc_files: []
160
133
  files:
134
+ - ".github/workflows/test.yml"
161
135
  - ".gitignore"
162
136
  - ".rubocop.yml"
163
- - ".travis.yml"
164
137
  - ".vscode/extensions.json"
165
138
  - ".vscode/settings.json"
166
139
  - Gemfile
@@ -169,7 +142,9 @@ files:
169
142
  - Rakefile
170
143
  - bin/sinew
171
144
  - lib/sinew.rb
172
- - lib/sinew/cache.rb
145
+ - lib/sinew/connection.rb
146
+ - lib/sinew/connection/log_formatter.rb
147
+ - lib/sinew/connection/rate_limit.rb
173
148
  - lib/sinew/core_ext.rb
174
149
  - lib/sinew/dsl.rb
175
150
  - lib/sinew/main.rb
@@ -181,74 +156,27 @@ files:
181
156
  - lib/sinew/version.rb
182
157
  - sample.sinew
183
158
  - sinew.gemspec
184
- - test/legacy/eu.httpbin.org/head/redirect,3
185
- - test/legacy/eu.httpbin.org/head/status,500
186
- - test/legacy/eu.httpbin.org/redirect,3
187
- - test/legacy/eu.httpbin.org/status,500
188
- - test/legacy/legacy.sinew
189
- - test/recipes/array_header.sinew
190
- - test/recipes/basic.sinew
191
- - test/recipes/dups.sinew
192
- - test/recipes/implicit_header.sinew
193
- - test/recipes/limit.sinew
194
- - test/recipes/noko.sinew
195
- - test/recipes/uri.sinew
196
- - test/recipes/xml.sinew
197
- - test/test.html
198
- - test/test_cache.rb
199
- - test/test_helper.rb
200
- - test/test_legacy.rb
201
- - test/test_main.rb
202
- - test/test_nokogiri_ext.rb
203
- - test/test_output.rb
204
- - test/test_recipes.rb
205
- - test/test_requests.rb
206
- - test/test_utf8.rb
207
159
  homepage: http://github.com/gurgeous/sinew
208
160
  licenses:
209
161
  - MIT
210
162
  metadata: {}
211
- post_install_message:
163
+ post_install_message:
212
164
  rdoc_options: []
213
165
  require_paths:
214
166
  - lib
215
167
  required_ruby_version: !ruby/object:Gem::Requirement
216
168
  requirements:
217
- - - "~>"
169
+ - - ">="
218
170
  - !ruby/object:Gem::Version
219
- version: '2.3'
171
+ version: '2.7'
220
172
  required_rubygems_version: !ruby/object:Gem::Requirement
221
173
  requirements:
222
174
  - - ">="
223
175
  - !ruby/object:Gem::Version
224
176
  version: '0'
225
177
  requirements: []
226
- rubyforge_project: sinew
227
- rubygems_version: 2.7.6
228
- signing_key:
178
+ rubygems_version: 3.1.4
179
+ signing_key:
229
180
  specification_version: 4
230
181
  summary: Sinew - structured web crawling using recipes.
231
- test_files:
232
- - test/legacy/eu.httpbin.org/head/redirect,3
233
- - test/legacy/eu.httpbin.org/head/status,500
234
- - test/legacy/eu.httpbin.org/redirect,3
235
- - test/legacy/eu.httpbin.org/status,500
236
- - test/legacy/legacy.sinew
237
- - test/recipes/array_header.sinew
238
- - test/recipes/basic.sinew
239
- - test/recipes/dups.sinew
240
- - test/recipes/implicit_header.sinew
241
- - test/recipes/limit.sinew
242
- - test/recipes/noko.sinew
243
- - test/recipes/uri.sinew
244
- - test/recipes/xml.sinew
245
- - test/test.html
246
- - test/test_cache.rb
247
- - test/test_helper.rb
248
- - test/test_legacy.rb
249
- - test/test_main.rb
250
- - test/test_nokogiri_ext.rb
251
- - test/test_output.rb
252
- - test/test_recipes.rb
253
- - test/test_requests.rb
254
- - test/test_utf8.rb
182
+ test_files: []
data/.travis.yml DELETED
@@ -1,4 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 2.3.7
4
- - 2.5.1
data/lib/sinew/cache.rb DELETED
@@ -1,79 +0,0 @@
1
- require 'fileutils'
2
- require 'tempfile'
3
-
4
- #
5
- # This class handles the caching of http responses on disk.
6
- #
7
-
8
- module Sinew
9
- class Cache
10
- attr_reader :sinew
11
-
12
- def initialize(sinew)
13
- @sinew = sinew
14
- end
15
-
16
- def get(request)
17
- body = read_if_exist(body_path(request))
18
- return nil if !body
19
-
20
- head = read_if_exist(head_path(request))
21
- Response.from_cache(request, body, head)
22
- end
23
-
24
- def set(response)
25
- body_path = body_path(response.request)
26
- head_path = head_path(response.request)
27
-
28
- FileUtils.mkdir_p(File.dirname(body_path))
29
- FileUtils.mkdir_p(File.dirname(head_path))
30
-
31
- # write body, and head if necessary
32
- atomic_write(body_path, response.body)
33
- if head_necessary?(response)
34
- head = JSON.pretty_generate(response.head_as_json)
35
- atomic_write(head_path, head)
36
- end
37
- end
38
-
39
- def root_dir
40
- sinew.options[:cache]
41
- end
42
- protected :root_dir
43
-
44
- def head_necessary?(response)
45
- response.error? || response.redirected?
46
- end
47
- protected :head_necessary?
48
-
49
- def body_path(request)
50
- "#{root_dir}/#{request.cache_key}"
51
- end
52
- protected :body_path
53
-
54
- def head_path(request)
55
- body_path = body_path(request)
56
- dir, base = File.dirname(body_path), File.basename(body_path)
57
- "#{dir}/head/#{base}"
58
- end
59
- protected :head_path
60
-
61
- def read_if_exist(path)
62
- if File.exist?(path)
63
- IO.read(path, mode: 'r:UTF-8')
64
- end
65
- end
66
- protected :read_if_exist
67
-
68
- def atomic_write(path, data)
69
- tmp = Tempfile.new('sinew', encoding: 'UTF-8')
70
- tmp.write(data)
71
- tmp.close
72
- FileUtils.chmod(0o644, tmp.path)
73
- FileUtils.mv(tmp.path, path)
74
- ensure
75
- FileUtils.rm(tmp.path, force: true)
76
- end
77
- protected :atomic_write
78
- end
79
- end