tanakai 1.7.0 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/lib/tanakai/base/saver.rb +4 -6
- data/lib/tanakai/base.rb +3 -1
- data/lib/tanakai/cli/ansible_command_builder.rb +1 -1
- data/lib/tanakai/cli/generator.rb +1 -1
- data/lib/tanakai/cli.rb +1 -1
- data/lib/tanakai/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 412cdf33eda65d65336652df303c4eb55cf395f5154e53f88a1c901ef4360c55
|
4
|
+
data.tar.gz: eb0217edc65c54bbe6b243596965b5073e0c3a308e5b52249d9de7027e8382e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87cdf732e266dc15d8a160d8ca7d4161f838b05f870d1fcc6b74f2c62f85a380ac98df778660206ab93927e22cd03dc5855b90062285461f48db041baeaa4cb4
|
7
|
+
data.tar.gz: 96b44cde5d58de7579dccd0394545f035412b01e26a46e2d686969176d6ea36589089ab3306a5972d2ae8cca4e8eb3d57b3c89173d96fe6531b5be8e528eae59
|
data/CHANGELOG.md
CHANGED
@@ -3,6 +3,14 @@
|
|
3
3
|
## Next
|
4
4
|
* Your contribution here
|
5
5
|
|
6
|
+
## 1.7.2
|
7
|
+
### Fixes
|
8
|
+
* [#6](https://github.com/glaucocustodio/tanakai/pull/6): Fix url validation - [MrChriss](https://github.com/MrChriss)
|
9
|
+
|
10
|
+
## 1.7.1
|
11
|
+
### Fixes
|
12
|
+
* [#5](https://github.com/glaucocustodio/tanakai/pull/5): Replace `File.exists`/`Dir.exists` that have been removed on ruby 3.2 - [MrChriss](https://github.com/MrChriss)
|
13
|
+
|
6
14
|
## 1.7.0
|
7
15
|
### New
|
8
16
|
* Allow passing `data:` to `crawl!` - [glaucocustodio](https://github.com/glaucocustodio)
|
@@ -130,7 +138,6 @@
|
|
130
138
|
* Fix Mechanize::Driver#proxy (there was a bug while using proxy for mechanize engine without authorization)
|
131
139
|
* Fix requests retries logic
|
132
140
|
|
133
|
-
|
134
141
|
## 1.0.1
|
135
142
|
* Add missing `logger` method to pipeline
|
136
143
|
* Fix `set_proxy` in Mechanize and Poltergeist builders
|
data/lib/tanakai/base/saver.rb
CHANGED
@@ -42,7 +42,7 @@ module Tanakai
|
|
42
42
|
def save_to_json(item)
|
43
43
|
data = JSON.generate([item])
|
44
44
|
|
45
|
-
if @index > 1 || append && File.
|
45
|
+
if @index > 1 || append && File.exist?(path)
|
46
46
|
file_content = File.read(path).sub(/\}\]\Z/, "\}\,")
|
47
47
|
File.open(path, "w") do |f|
|
48
48
|
f.write(file_content + data.sub(/\A\[/, ""))
|
@@ -55,7 +55,7 @@ module Tanakai
|
|
55
55
|
def save_to_pretty_json(item)
|
56
56
|
data = JSON.pretty_generate([item])
|
57
57
|
|
58
|
-
if @index > 1 || append && File.
|
58
|
+
if @index > 1 || append && File.exist?(path)
|
59
59
|
file_content = File.read(path).sub(/\}\n\]\Z/, "\}\,\n")
|
60
60
|
File.open(path, "w") do |f|
|
61
61
|
f.write(file_content + data.sub(/\A\[\n/, ""))
|
@@ -68,7 +68,7 @@ module Tanakai
|
|
68
68
|
def save_to_jsonlines(item)
|
69
69
|
data = JSON.generate(item)
|
70
70
|
|
71
|
-
if @index > 1 || append && File.
|
71
|
+
if @index > 1 || append && File.exist?(path)
|
72
72
|
File.open(path, "a") { |file| file.write("\n" + data) }
|
73
73
|
else
|
74
74
|
File.open(path, "w") { |file| file.write(data) }
|
@@ -78,7 +78,7 @@ module Tanakai
|
|
78
78
|
def save_to_csv(item)
|
79
79
|
data = flatten_hash(item)
|
80
80
|
|
81
|
-
if @index > 1 || append && File.
|
81
|
+
if @index > 1 || append && File.exist?(path)
|
82
82
|
CSV.open(path, "a+", force_quotes: true) do |csv|
|
83
83
|
csv << data.values
|
84
84
|
end
|
@@ -102,5 +102,3 @@ module Tanakai
|
|
102
102
|
end
|
103
103
|
end
|
104
104
|
end
|
105
|
-
|
106
|
-
|
data/lib/tanakai/base.rb
CHANGED
@@ -192,7 +192,9 @@ module Tanakai
|
|
192
192
|
end
|
193
193
|
|
194
194
|
def request_to(handler, delay = nil, url:, data: {}, response_type: :html)
|
195
|
-
|
195
|
+
if %w[http https].exclude?(Addressable::URI.parse(url).scheme)
|
196
|
+
raise InvalidUrlError, "Requested url scheme is invalid: #{url}"
|
197
|
+
end
|
196
198
|
|
197
199
|
if @config[:skip_duplicate_requests] && !unique_request?(url)
|
198
200
|
add_event(:duplicate_requests) if self.with_info
|
@@ -31,7 +31,7 @@ module Tanakai
|
|
31
31
|
"--extra-vars", "ansible_python_interpreter=/usr/bin/python3"
|
32
32
|
]
|
33
33
|
|
34
|
-
if File.
|
34
|
+
if File.exist? "config/automation.yml"
|
35
35
|
require 'yaml'
|
36
36
|
if config = YAML.load_file("config/automation.yml").dig(@playbook)
|
37
37
|
config.each { |key, value| @vars[key] = value unless @vars[key] }
|
@@ -17,7 +17,7 @@ module Tanakai
|
|
17
17
|
|
18
18
|
def generate_spider(spider_name, in_project:)
|
19
19
|
spider_path = in_project ? "spiders/#{spider_name}.rb" : "./#{spider_name}.rb"
|
20
|
-
raise "Spider #{spider_path} already
|
20
|
+
raise "Spider #{spider_path} already exist?" if File.exist? spider_path
|
21
21
|
|
22
22
|
spider_class = to_spider_class(spider_name)
|
23
23
|
create_file spider_path do
|
data/lib/tanakai/cli.rb
CHANGED
data/lib/tanakai/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tanakai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Afanasev
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-
|
12
|
+
date: 2023-11-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: thor
|