sinew 2.0.2 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +26 -0
  3. data/.rubocop.yml +9 -6
  4. data/.vscode/settings.json +0 -10
  5. data/Gemfile +9 -0
  6. data/README.md +62 -54
  7. data/Rakefile +33 -18
  8. data/bin/sinew +2 -0
  9. data/lib/sinew.rb +0 -1
  10. data/lib/sinew/connection.rb +52 -0
  11. data/lib/sinew/connection/log_formatter.rb +22 -0
  12. data/lib/sinew/connection/rate_limit.rb +29 -0
  13. data/lib/sinew/core_ext.rb +1 -1
  14. data/lib/sinew/dsl.rb +10 -6
  15. data/lib/sinew/main.rb +29 -56
  16. data/lib/sinew/output.rb +7 -16
  17. data/lib/sinew/request.rb +22 -87
  18. data/lib/sinew/response.rb +8 -57
  19. data/lib/sinew/runtime_options.rb +4 -4
  20. data/lib/sinew/version.rb +1 -1
  21. data/sample.sinew +2 -2
  22. data/sinew.gemspec +16 -18
  23. metadata +38 -110
  24. data/.travis.yml +0 -4
  25. data/lib/sinew/cache.rb +0 -79
  26. data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
  27. data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
  28. data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
  29. data/test/legacy/eu.httpbin.org/status,500 +0 -1
  30. data/test/legacy/legacy.sinew +0 -2
  31. data/test/recipes/array_header.sinew +0 -6
  32. data/test/recipes/basic.sinew +0 -8
  33. data/test/recipes/dups.sinew +0 -7
  34. data/test/recipes/implicit_header.sinew +0 -5
  35. data/test/recipes/limit.sinew +0 -11
  36. data/test/recipes/noko.sinew +0 -9
  37. data/test/recipes/uri.sinew +0 -11
  38. data/test/recipes/xml.sinew +0 -8
  39. data/test/test.html +0 -45
  40. data/test/test_cache.rb +0 -69
  41. data/test/test_helper.rb +0 -123
  42. data/test/test_legacy.rb +0 -23
  43. data/test/test_main.rb +0 -34
  44. data/test/test_nokogiri_ext.rb +0 -18
  45. data/test/test_output.rb +0 -56
  46. data/test/test_recipes.rb +0 -60
  47. data/test/test_requests.rb +0 -135
  48. data/test/test_utf8.rb +0 -39
@@ -7,8 +7,8 @@ module Sinew
7
7
  attr_accessor :retries
8
8
  attr_accessor :rate_limit
9
9
  attr_accessor :headers
10
- attr_accessor :httparty_options
11
- attr_accessor :before_generate_cache_key
10
+ attr_accessor :httpdisk_options
11
+ attr_accessor :insecure
12
12
 
13
13
  def initialize
14
14
  self.retries = 3
@@ -16,8 +16,8 @@ module Sinew
16
16
  self.headers = {
17
17
  'User-Agent' => "sinew/#{VERSION}",
18
18
  }
19
- self.httparty_options = {}
20
- self.before_generate_cache_key = ->(i) { i }
19
+ self.httpdisk_options = {}
20
+ self.insecure = false
21
21
 
22
22
  # for testing
23
23
  if ENV['SINEW_TEST']
data/lib/sinew/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Sinew
2
2
  # Gem version
3
- VERSION = '2.0.2'.freeze
3
+ VERSION = '3.0.1'.freeze
4
4
  end
data/sample.sinew CHANGED
@@ -1,4 +1,4 @@
1
- get 'http://httpbin.org'
1
+ get 'http://httpbingo.org'
2
2
  noko.css('ul li a').each do |a|
3
3
  row = {}
4
4
  row[:url] = a[:href]
@@ -6,4 +6,4 @@ noko.css('ul li a').each do |a|
6
6
  csv_emit(row)
7
7
  end
8
8
 
9
- get 'http://httpbin.org/redirect/2'
9
+ get 'http://httpbingo.org/redirect/2'
data/sinew.gemspec CHANGED
@@ -5,30 +5,28 @@ require 'sinew/version'
5
5
  Gem::Specification.new do |s|
6
6
  s.name = 'sinew'
7
7
  s.version = Sinew::VERSION
8
- s.platform = Gem::Platform::RUBY
9
8
  s.license = 'MIT'
10
- s.authors = [ 'Adam Doppelt' ]
9
+ s.authors = [ 'Adam Doppelt', 'Nathan Kriege' ]
11
10
  s.email = [ 'amd@gurge.com' ]
12
11
  s.homepage = 'http://github.com/gurgeous/sinew'
13
12
  s.summary = 'Sinew - structured web crawling using recipes.'
14
13
  s.description = 'Crawl web sites easily using ruby recipes, with caching and nokogiri.'
15
- s.required_ruby_version = '~> 2.3'
14
+ s.required_ruby_version = '>= 2.7'
16
15
 
17
- s.rubyforge_project = 'sinew'
16
+ # what's in the gem?
17
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
18
+ `git ls-files -z`.split("\x0").reject { _1.match(%r{^test/}) }
19
+ end
20
+ s.bindir = 'bin'
21
+ s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
22
+ s.require_paths = [ 'lib' ]
18
23
 
19
- s.add_runtime_dependency 'awesome_print', '~> 1.8'
20
- s.add_runtime_dependency 'htmlentities', '~> 4.3'
21
- s.add_runtime_dependency 'httparty', '~> 0.16'
22
- s.add_runtime_dependency 'nokogiri', '~> 1.8'
24
+ s.add_runtime_dependency 'amazing_print', '~> 1.3'
25
+ s.add_runtime_dependency 'faraday', '~> 1.4'
26
+ s.add_runtime_dependency 'faraday-encoding', '~> 0'
27
+ s.add_runtime_dependency 'httpdisk', '~> 0'
28
+ s.add_runtime_dependency 'nokogiri', '~> 1.11'
23
29
  s.add_runtime_dependency 'scripto', '~> 0'
24
- s.add_runtime_dependency 'slop', '~> 4.6'
25
- s.add_runtime_dependency 'stringex', '~> 2.8'
26
- s.add_development_dependency 'minitest', '~> 5.11'
27
- s.add_development_dependency 'rake', '~> 12.3'
28
- s.add_development_dependency 'webmock', '~> 3.4'
29
-
30
- s.files = `git ls-files`.split("\n")
31
- s.test_files = `git ls-files -- test/*`.split("\n")
32
- s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
33
- s.require_paths = [ 'lib' ]
30
+ s.add_runtime_dependency 'slop', '~> 4.8'
31
+ s.add_runtime_dependency 'sterile', '~> 1.0'
34
32
  end
metadata CHANGED
@@ -1,73 +1,60 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sinew
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Doppelt
8
- autorequire:
8
+ - Nathan Kriege
9
+ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2018-05-03 00:00:00.000000000 Z
12
+ date: 2021-06-04 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
- name: awesome_print
15
+ name: amazing_print
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
17
18
  - - "~>"
18
19
  - !ruby/object:Gem::Version
19
- version: '1.8'
20
+ version: '1.3'
20
21
  type: :runtime
21
22
  prerelease: false
22
23
  version_requirements: !ruby/object:Gem::Requirement
23
24
  requirements:
24
25
  - - "~>"
25
26
  - !ruby/object:Gem::Version
26
- version: '1.8'
27
+ version: '1.3'
27
28
  - !ruby/object:Gem::Dependency
28
- name: htmlentities
29
+ name: faraday
29
30
  requirement: !ruby/object:Gem::Requirement
30
31
  requirements:
31
32
  - - "~>"
32
33
  - !ruby/object:Gem::Version
33
- version: '4.3'
34
+ version: '1.4'
34
35
  type: :runtime
35
36
  prerelease: false
36
37
  version_requirements: !ruby/object:Gem::Requirement
37
38
  requirements:
38
39
  - - "~>"
39
40
  - !ruby/object:Gem::Version
40
- version: '4.3'
41
+ version: '1.4'
41
42
  - !ruby/object:Gem::Dependency
42
- name: httparty
43
+ name: faraday-encoding
43
44
  requirement: !ruby/object:Gem::Requirement
44
45
  requirements:
45
46
  - - "~>"
46
47
  - !ruby/object:Gem::Version
47
- version: '0.16'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '0.16'
55
- - !ruby/object:Gem::Dependency
56
- name: nokogiri
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '1.8'
48
+ version: '0'
62
49
  type: :runtime
63
50
  prerelease: false
64
51
  version_requirements: !ruby/object:Gem::Requirement
65
52
  requirements:
66
53
  - - "~>"
67
54
  - !ruby/object:Gem::Version
68
- version: '1.8'
55
+ version: '0'
69
56
  - !ruby/object:Gem::Dependency
70
- name: scripto
57
+ name: httpdisk
71
58
  requirement: !ruby/object:Gem::Requirement
72
59
  requirements:
73
60
  - - "~>"
@@ -81,75 +68,61 @@ dependencies:
81
68
  - !ruby/object:Gem::Version
82
69
  version: '0'
83
70
  - !ruby/object:Gem::Dependency
84
- name: slop
71
+ name: nokogiri
85
72
  requirement: !ruby/object:Gem::Requirement
86
73
  requirements:
87
74
  - - "~>"
88
75
  - !ruby/object:Gem::Version
89
- version: '4.6'
76
+ version: '1.11'
90
77
  type: :runtime
91
78
  prerelease: false
92
79
  version_requirements: !ruby/object:Gem::Requirement
93
80
  requirements:
94
81
  - - "~>"
95
82
  - !ruby/object:Gem::Version
96
- version: '4.6'
83
+ version: '1.11'
97
84
  - !ruby/object:Gem::Dependency
98
- name: stringex
85
+ name: scripto
99
86
  requirement: !ruby/object:Gem::Requirement
100
87
  requirements:
101
88
  - - "~>"
102
89
  - !ruby/object:Gem::Version
103
- version: '2.8'
90
+ version: '0'
104
91
  type: :runtime
105
92
  prerelease: false
106
93
  version_requirements: !ruby/object:Gem::Requirement
107
94
  requirements:
108
95
  - - "~>"
109
96
  - !ruby/object:Gem::Version
110
- version: '2.8'
111
- - !ruby/object:Gem::Dependency
112
- name: minitest
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - "~>"
116
- - !ruby/object:Gem::Version
117
- version: '5.11'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - "~>"
123
- - !ruby/object:Gem::Version
124
- version: '5.11'
97
+ version: '0'
125
98
  - !ruby/object:Gem::Dependency
126
- name: rake
99
+ name: slop
127
100
  requirement: !ruby/object:Gem::Requirement
128
101
  requirements:
129
102
  - - "~>"
130
103
  - !ruby/object:Gem::Version
131
- version: '12.3'
132
- type: :development
104
+ version: '4.8'
105
+ type: :runtime
133
106
  prerelease: false
134
107
  version_requirements: !ruby/object:Gem::Requirement
135
108
  requirements:
136
109
  - - "~>"
137
110
  - !ruby/object:Gem::Version
138
- version: '12.3'
111
+ version: '4.8'
139
112
  - !ruby/object:Gem::Dependency
140
- name: webmock
113
+ name: sterile
141
114
  requirement: !ruby/object:Gem::Requirement
142
115
  requirements:
143
116
  - - "~>"
144
117
  - !ruby/object:Gem::Version
145
- version: '3.4'
146
- type: :development
118
+ version: '1.0'
119
+ type: :runtime
147
120
  prerelease: false
148
121
  version_requirements: !ruby/object:Gem::Requirement
149
122
  requirements:
150
123
  - - "~>"
151
124
  - !ruby/object:Gem::Version
152
- version: '3.4'
125
+ version: '1.0'
153
126
  description: Crawl web sites easily using ruby recipes, with caching and nokogiri.
154
127
  email:
155
128
  - amd@gurge.com
@@ -158,9 +131,9 @@ executables:
158
131
  extensions: []
159
132
  extra_rdoc_files: []
160
133
  files:
134
+ - ".github/workflows/test.yml"
161
135
  - ".gitignore"
162
136
  - ".rubocop.yml"
163
- - ".travis.yml"
164
137
  - ".vscode/extensions.json"
165
138
  - ".vscode/settings.json"
166
139
  - Gemfile
@@ -169,7 +142,9 @@ files:
169
142
  - Rakefile
170
143
  - bin/sinew
171
144
  - lib/sinew.rb
172
- - lib/sinew/cache.rb
145
+ - lib/sinew/connection.rb
146
+ - lib/sinew/connection/log_formatter.rb
147
+ - lib/sinew/connection/rate_limit.rb
173
148
  - lib/sinew/core_ext.rb
174
149
  - lib/sinew/dsl.rb
175
150
  - lib/sinew/main.rb
@@ -181,74 +156,27 @@ files:
181
156
  - lib/sinew/version.rb
182
157
  - sample.sinew
183
158
  - sinew.gemspec
184
- - test/legacy/eu.httpbin.org/head/redirect,3
185
- - test/legacy/eu.httpbin.org/head/status,500
186
- - test/legacy/eu.httpbin.org/redirect,3
187
- - test/legacy/eu.httpbin.org/status,500
188
- - test/legacy/legacy.sinew
189
- - test/recipes/array_header.sinew
190
- - test/recipes/basic.sinew
191
- - test/recipes/dups.sinew
192
- - test/recipes/implicit_header.sinew
193
- - test/recipes/limit.sinew
194
- - test/recipes/noko.sinew
195
- - test/recipes/uri.sinew
196
- - test/recipes/xml.sinew
197
- - test/test.html
198
- - test/test_cache.rb
199
- - test/test_helper.rb
200
- - test/test_legacy.rb
201
- - test/test_main.rb
202
- - test/test_nokogiri_ext.rb
203
- - test/test_output.rb
204
- - test/test_recipes.rb
205
- - test/test_requests.rb
206
- - test/test_utf8.rb
207
159
  homepage: http://github.com/gurgeous/sinew
208
160
  licenses:
209
161
  - MIT
210
162
  metadata: {}
211
- post_install_message:
163
+ post_install_message:
212
164
  rdoc_options: []
213
165
  require_paths:
214
166
  - lib
215
167
  required_ruby_version: !ruby/object:Gem::Requirement
216
168
  requirements:
217
- - - "~>"
169
+ - - ">="
218
170
  - !ruby/object:Gem::Version
219
- version: '2.3'
171
+ version: '2.7'
220
172
  required_rubygems_version: !ruby/object:Gem::Requirement
221
173
  requirements:
222
174
  - - ">="
223
175
  - !ruby/object:Gem::Version
224
176
  version: '0'
225
177
  requirements: []
226
- rubyforge_project: sinew
227
- rubygems_version: 2.7.6
228
- signing_key:
178
+ rubygems_version: 3.1.4
179
+ signing_key:
229
180
  specification_version: 4
230
181
  summary: Sinew - structured web crawling using recipes.
231
- test_files:
232
- - test/legacy/eu.httpbin.org/head/redirect,3
233
- - test/legacy/eu.httpbin.org/head/status,500
234
- - test/legacy/eu.httpbin.org/redirect,3
235
- - test/legacy/eu.httpbin.org/status,500
236
- - test/legacy/legacy.sinew
237
- - test/recipes/array_header.sinew
238
- - test/recipes/basic.sinew
239
- - test/recipes/dups.sinew
240
- - test/recipes/implicit_header.sinew
241
- - test/recipes/limit.sinew
242
- - test/recipes/noko.sinew
243
- - test/recipes/uri.sinew
244
- - test/recipes/xml.sinew
245
- - test/test.html
246
- - test/test_cache.rb
247
- - test/test_helper.rb
248
- - test/test_legacy.rb
249
- - test/test_main.rb
250
- - test/test_nokogiri_ext.rb
251
- - test/test_output.rb
252
- - test/test_recipes.rb
253
- - test/test_requests.rb
254
- - test/test_utf8.rb
182
+ test_files: []
data/.travis.yml DELETED
@@ -1,4 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 2.3.7
4
- - 2.5.1
data/lib/sinew/cache.rb DELETED
@@ -1,79 +0,0 @@
1
- require 'fileutils'
2
- require 'tempfile'
3
-
4
- #
5
- # This class handles the caching of http responses on disk.
6
- #
7
-
8
- module Sinew
9
- class Cache
10
- attr_reader :sinew
11
-
12
- def initialize(sinew)
13
- @sinew = sinew
14
- end
15
-
16
- def get(request)
17
- body = read_if_exist(body_path(request))
18
- return nil if !body
19
-
20
- head = read_if_exist(head_path(request))
21
- Response.from_cache(request, body, head)
22
- end
23
-
24
- def set(response)
25
- body_path = body_path(response.request)
26
- head_path = head_path(response.request)
27
-
28
- FileUtils.mkdir_p(File.dirname(body_path))
29
- FileUtils.mkdir_p(File.dirname(head_path))
30
-
31
- # write body, and head if necessary
32
- atomic_write(body_path, response.body)
33
- if head_necessary?(response)
34
- head = JSON.pretty_generate(response.head_as_json)
35
- atomic_write(head_path, head)
36
- end
37
- end
38
-
39
- def root_dir
40
- sinew.options[:cache]
41
- end
42
- protected :root_dir
43
-
44
- def head_necessary?(response)
45
- response.error? || response.redirected?
46
- end
47
- protected :head_necessary?
48
-
49
- def body_path(request)
50
- "#{root_dir}/#{request.cache_key}"
51
- end
52
- protected :body_path
53
-
54
- def head_path(request)
55
- body_path = body_path(request)
56
- dir, base = File.dirname(body_path), File.basename(body_path)
57
- "#{dir}/head/#{base}"
58
- end
59
- protected :head_path
60
-
61
- def read_if_exist(path)
62
- if File.exist?(path)
63
- IO.read(path, mode: 'r:UTF-8')
64
- end
65
- end
66
- protected :read_if_exist
67
-
68
- def atomic_write(path, data)
69
- tmp = Tempfile.new('sinew', encoding: 'UTF-8')
70
- tmp.write(data)
71
- tmp.close
72
- FileUtils.chmod(0o644, tmp.path)
73
- FileUtils.mv(tmp.path, path)
74
- ensure
75
- FileUtils.rm(tmp.path, force: true)
76
- end
77
- protected :atomic_write
78
- end
79
- end