tanakai 1.7.0 → 1.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/lib/tanakai/base/saver.rb +4 -6
- data/lib/tanakai/base.rb +3 -1
- data/lib/tanakai/cli/ansible_command_builder.rb +1 -1
- data/lib/tanakai/cli/generator.rb +1 -1
- data/lib/tanakai/cli.rb +1 -1
- data/lib/tanakai/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 412cdf33eda65d65336652df303c4eb55cf395f5154e53f88a1c901ef4360c55
|
4
|
+
data.tar.gz: eb0217edc65c54bbe6b243596965b5073e0c3a308e5b52249d9de7027e8382e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87cdf732e266dc15d8a160d8ca7d4161f838b05f870d1fcc6b74f2c62f85a380ac98df778660206ab93927e22cd03dc5855b90062285461f48db041baeaa4cb4
|
7
|
+
data.tar.gz: 96b44cde5d58de7579dccd0394545f035412b01e26a46e2d686969176d6ea36589089ab3306a5972d2ae8cca4e8eb3d57b3c89173d96fe6531b5be8e528eae59
|
data/CHANGELOG.md
CHANGED
@@ -3,6 +3,14 @@
|
|
3
3
|
## Next
|
4
4
|
* Your contribution here
|
5
5
|
|
6
|
+
## 1.7.2
|
7
|
+
### Fixes
|
8
|
+
* [#6](https://github.com/glaucocustodio/tanakai/pull/6): Fix url validation - [MrChriss](https://github.com/MrChriss)
|
9
|
+
|
10
|
+
## 1.7.1
|
11
|
+
### Fixes
|
12
|
+
* [#5](https://github.com/glaucocustodio/tanakai/pull/5): Replace `File.exists`/`Dir.exists` that have been removed on ruby 3.2 - [MrChriss](https://github.com/MrChriss)
|
13
|
+
|
6
14
|
## 1.7.0
|
7
15
|
### New
|
8
16
|
* Allow passing `data:` to `crawl!` - [glaucocustodio](https://github.com/glaucocustodio)
|
@@ -130,7 +138,6 @@
|
|
130
138
|
* Fix Mechanize::Driver#proxy (there was a bug while using proxy for mechanize engine without authorization)
|
131
139
|
* Fix requests retries logic
|
132
140
|
|
133
|
-
|
134
141
|
## 1.0.1
|
135
142
|
* Add missing `logger` method to pipeline
|
136
143
|
* Fix `set_proxy` in Mechanize and Poltergeist builders
|
data/lib/tanakai/base/saver.rb
CHANGED
@@ -42,7 +42,7 @@ module Tanakai
|
|
42
42
|
def save_to_json(item)
|
43
43
|
data = JSON.generate([item])
|
44
44
|
|
45
|
-
if @index > 1 || append && File.
|
45
|
+
if @index > 1 || append && File.exist?(path)
|
46
46
|
file_content = File.read(path).sub(/\}\]\Z/, "\}\,")
|
47
47
|
File.open(path, "w") do |f|
|
48
48
|
f.write(file_content + data.sub(/\A\[/, ""))
|
@@ -55,7 +55,7 @@ module Tanakai
|
|
55
55
|
def save_to_pretty_json(item)
|
56
56
|
data = JSON.pretty_generate([item])
|
57
57
|
|
58
|
-
if @index > 1 || append && File.
|
58
|
+
if @index > 1 || append && File.exist?(path)
|
59
59
|
file_content = File.read(path).sub(/\}\n\]\Z/, "\}\,\n")
|
60
60
|
File.open(path, "w") do |f|
|
61
61
|
f.write(file_content + data.sub(/\A\[\n/, ""))
|
@@ -68,7 +68,7 @@ module Tanakai
|
|
68
68
|
def save_to_jsonlines(item)
|
69
69
|
data = JSON.generate(item)
|
70
70
|
|
71
|
-
if @index > 1 || append && File.
|
71
|
+
if @index > 1 || append && File.exist?(path)
|
72
72
|
File.open(path, "a") { |file| file.write("\n" + data) }
|
73
73
|
else
|
74
74
|
File.open(path, "w") { |file| file.write(data) }
|
@@ -78,7 +78,7 @@ module Tanakai
|
|
78
78
|
def save_to_csv(item)
|
79
79
|
data = flatten_hash(item)
|
80
80
|
|
81
|
-
if @index > 1 || append && File.
|
81
|
+
if @index > 1 || append && File.exist?(path)
|
82
82
|
CSV.open(path, "a+", force_quotes: true) do |csv|
|
83
83
|
csv << data.values
|
84
84
|
end
|
@@ -102,5 +102,3 @@ module Tanakai
|
|
102
102
|
end
|
103
103
|
end
|
104
104
|
end
|
105
|
-
|
106
|
-
|
data/lib/tanakai/base.rb
CHANGED
@@ -192,7 +192,9 @@ module Tanakai
|
|
192
192
|
end
|
193
193
|
|
194
194
|
def request_to(handler, delay = nil, url:, data: {}, response_type: :html)
|
195
|
-
|
195
|
+
if %w[http https].exclude?(Addressable::URI.parse(url).scheme)
|
196
|
+
raise InvalidUrlError, "Requested url scheme is invalid: #{url}"
|
197
|
+
end
|
196
198
|
|
197
199
|
if @config[:skip_duplicate_requests] && !unique_request?(url)
|
198
200
|
add_event(:duplicate_requests) if self.with_info
|
@@ -31,7 +31,7 @@ module Tanakai
|
|
31
31
|
"--extra-vars", "ansible_python_interpreter=/usr/bin/python3"
|
32
32
|
]
|
33
33
|
|
34
|
-
if File.
|
34
|
+
if File.exist? "config/automation.yml"
|
35
35
|
require 'yaml'
|
36
36
|
if config = YAML.load_file("config/automation.yml").dig(@playbook)
|
37
37
|
config.each { |key, value| @vars[key] = value unless @vars[key] }
|
@@ -17,7 +17,7 @@ module Tanakai
|
|
17
17
|
|
18
18
|
def generate_spider(spider_name, in_project:)
|
19
19
|
spider_path = in_project ? "spiders/#{spider_name}.rb" : "./#{spider_name}.rb"
|
20
|
-
raise "Spider #{spider_path} already
|
20
|
+
raise "Spider #{spider_path} already exist?" if File.exist? spider_path
|
21
21
|
|
22
22
|
spider_class = to_spider_class(spider_name)
|
23
23
|
create_file spider_path do
|
data/lib/tanakai/cli.rb
CHANGED
data/lib/tanakai/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tanakai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Afanasev
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-
|
12
|
+
date: 2023-11-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: thor
|