tanakai 1.6.0 → 1.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +19 -6
- data/README.md +6 -0
- data/lib/tanakai/base/saver.rb +4 -6
- data/lib/tanakai/base.rb +5 -5
- data/lib/tanakai/cli/ansible_command_builder.rb +1 -1
- data/lib/tanakai/cli/generator.rb +1 -1
- data/lib/tanakai/cli.rb +1 -1
- data/lib/tanakai/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f639e8c843d1effdd2fa5268fe01ee0fda5adcb741ecda49b5e8f2c8a51f55a3
|
4
|
+
data.tar.gz: d03287426dc9e1e802ef149ad9edf5f83813ea067d22f9f6fb8fa7ec33bb3c5a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 12b9a122343c1599c87caf97cd527bf98c83db50e4f5fab40b4657932c41b41c5f3437a26297141a2e19f064f5083ba9b12c099d4bc20f537b0cf440aa92d9e2
|
7
|
+
data.tar.gz: 756e98178ef2c1fe80d9dfca936eab248e46e1c199665c23973fa9b96ad513a9bad2e4d0498e6ea3d3442a3aa76bf744109d91161e39c89aa0b29cdf83385ed0
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,18 +1,32 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## Next
|
4
|
+
* Your contribution here
|
5
|
+
|
6
|
+
## 1.7.1
|
7
|
+
### Fixes
|
8
|
+
* [#5](https://github.com/glaucocustodio/tanakai/pull/5): Replace `File.exists`/`Dir.exists` that have been removed on ruby 3.2 - [MrChriss](https://github.com/MrChriss)
|
9
|
+
|
10
|
+
## 1.7.0
|
11
|
+
### New
|
12
|
+
* Allow passing `data:` to `crawl!` - [glaucocustodio](https://github.com/glaucocustodio)
|
13
|
+
|
14
|
+
### Fixes
|
15
|
+
* [#4](https://github.com/glaucocustodio/tanakai/pull/4): Fix keyword args on `crawl!` - [milk1000cc](https://github.com/milk1000cc)
|
16
|
+
|
3
17
|
## 1.6.0
|
4
18
|
### New
|
5
|
-
* Add support to Ruby 3
|
19
|
+
* Add support to Ruby 3 - [glaucocustodio](https://github.com/glaucocustodio)
|
6
20
|
|
7
21
|
## 1.5.1
|
8
22
|
### New
|
9
|
-
* Add `response_type` to `in_parallel`
|
23
|
+
* Add `response_type` to `in_parallel` - [glaucocustodio](https://github.com/glaucocustodio)
|
10
24
|
|
11
25
|
## 1.5.0
|
12
26
|
### New
|
13
|
-
* First release as Tanakai
|
14
|
-
* Add support to [Apparition](https://github.com/twalpole/apparition)
|
15
|
-
* Add support to [Cuprite](https://github.com/rubycdp/cuprite)
|
27
|
+
* First release as Tanakai - [glaucocustodio](https://github.com/glaucocustodio)
|
28
|
+
* Add support to [Apparition](https://github.com/twalpole/apparition) - [glaucocustodio](https://github.com/glaucocustodio)
|
29
|
+
* Add support to [Cuprite](https://github.com/rubycdp/cuprite) - [glaucocustodio](https://github.com/glaucocustodio)
|
16
30
|
|
17
31
|
## 1.4.0
|
18
32
|
### New
|
@@ -120,7 +134,6 @@
|
|
120
134
|
* Fix Mechanize::Driver#proxy (there was a bug while using proxy for mechanize engine without authorization)
|
121
135
|
* Fix requests retries logic
|
122
136
|
|
123
|
-
|
124
137
|
## 1.0.1
|
125
138
|
* Add missing `logger` method to pipeline
|
126
139
|
* Fix `set_proxy` in Mechanize and Poltergeist builders
|
data/README.md
CHANGED
@@ -1355,6 +1355,12 @@ end # =>
|
|
1355
1355
|
# {:spider_name=>"example_spider", :status=>:completed, :environment=>"development", :start_time=>2018-08-22 18:49:22 +0400, :stop_time=>2018-08-22 18:49:23 +0400, :running_time=>0.801, :visits=>{:requests=>1, :responses=>1}, :items=>{:sent=>0, :processed=>0}, :error=>nil}
|
1356
1356
|
```
|
1357
1357
|
|
1358
|
+
You can also pass `data` to `crawl!`:
|
1359
|
+
|
1360
|
+
```ruby
|
1361
|
+
ExampleSpider.crawl!(data: { foo: "bar" })
|
1362
|
+
```
|
1363
|
+
|
1358
1364
|
So what if you're don't care about stats and just want to process request to a particular spider method and get the returning value from this method? Use `.parse!` instead:
|
1359
1365
|
|
1360
1366
|
#### `.parse!(:method_name, url:)` method
|
data/lib/tanakai/base/saver.rb
CHANGED
@@ -42,7 +42,7 @@ module Tanakai
|
|
42
42
|
def save_to_json(item)
|
43
43
|
data = JSON.generate([item])
|
44
44
|
|
45
|
-
if @index > 1 || append && File.
|
45
|
+
if @index > 1 || append && File.exist?(path)
|
46
46
|
file_content = File.read(path).sub(/\}\]\Z/, "\}\,")
|
47
47
|
File.open(path, "w") do |f|
|
48
48
|
f.write(file_content + data.sub(/\A\[/, ""))
|
@@ -55,7 +55,7 @@ module Tanakai
|
|
55
55
|
def save_to_pretty_json(item)
|
56
56
|
data = JSON.pretty_generate([item])
|
57
57
|
|
58
|
-
if @index > 1 || append && File.
|
58
|
+
if @index > 1 || append && File.exist?(path)
|
59
59
|
file_content = File.read(path).sub(/\}\n\]\Z/, "\}\,\n")
|
60
60
|
File.open(path, "w") do |f|
|
61
61
|
f.write(file_content + data.sub(/\A\[\n/, ""))
|
@@ -68,7 +68,7 @@ module Tanakai
|
|
68
68
|
def save_to_jsonlines(item)
|
69
69
|
data = JSON.generate(item)
|
70
70
|
|
71
|
-
if @index > 1 || append && File.
|
71
|
+
if @index > 1 || append && File.exist?(path)
|
72
72
|
File.open(path, "a") { |file| file.write("\n" + data) }
|
73
73
|
else
|
74
74
|
File.open(path, "w") { |file| file.write(data) }
|
@@ -78,7 +78,7 @@ module Tanakai
|
|
78
78
|
def save_to_csv(item)
|
79
79
|
data = flatten_hash(item)
|
80
80
|
|
81
|
-
if @index > 1 || append && File.
|
81
|
+
if @index > 1 || append && File.exist?(path)
|
82
82
|
CSV.open(path, "a+", force_quotes: true) do |csv|
|
83
83
|
csv << data.values
|
84
84
|
end
|
@@ -102,5 +102,3 @@ module Tanakai
|
|
102
102
|
end
|
103
103
|
end
|
104
104
|
end
|
105
|
-
|
106
|
-
|
data/lib/tanakai/base.rb
CHANGED
@@ -100,7 +100,7 @@ module Tanakai
|
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
103
|
-
def self.crawl!(exception_on_fail: true)
|
103
|
+
def self.crawl!(exception_on_fail: true, data: {})
|
104
104
|
logger.error "Spider: already running: #{name}" and return false if running?
|
105
105
|
|
106
106
|
@storage = Storage.new
|
@@ -124,13 +124,13 @@ module Tanakai
|
|
124
124
|
if start_urls
|
125
125
|
start_urls.each do |start_url|
|
126
126
|
if start_url.class == Hash
|
127
|
-
spider.request_to(:parse, start_url)
|
127
|
+
spider.request_to(:parse, url: start_url, data: data)
|
128
128
|
else
|
129
|
-
spider.request_to(:parse, url: start_url)
|
129
|
+
spider.request_to(:parse, url: start_url, data: data)
|
130
130
|
end
|
131
131
|
end
|
132
132
|
else
|
133
|
-
spider.parse
|
133
|
+
spider.parse(data: data)
|
134
134
|
end
|
135
135
|
rescue StandardError, SignalException, SystemExit => e
|
136
136
|
@run_info.merge!(status: :failed, error: e.inspect)
|
@@ -160,7 +160,7 @@ module Tanakai
|
|
160
160
|
if args.present?
|
161
161
|
spider.public_send(handler, *args)
|
162
162
|
elsif request.present?
|
163
|
-
spider.request_to(handler, request)
|
163
|
+
spider.request_to(handler, **request)
|
164
164
|
else
|
165
165
|
spider.public_send(handler)
|
166
166
|
end
|
@@ -31,7 +31,7 @@ module Tanakai
|
|
31
31
|
"--extra-vars", "ansible_python_interpreter=/usr/bin/python3"
|
32
32
|
]
|
33
33
|
|
34
|
-
if File.
|
34
|
+
if File.exist? "config/automation.yml"
|
35
35
|
require 'yaml'
|
36
36
|
if config = YAML.load_file("config/automation.yml").dig(@playbook)
|
37
37
|
config.each { |key, value| @vars[key] = value unless @vars[key] }
|
@@ -17,7 +17,7 @@ module Tanakai
|
|
17
17
|
|
18
18
|
def generate_spider(spider_name, in_project:)
|
19
19
|
spider_path = in_project ? "spiders/#{spider_name}.rb" : "./#{spider_name}.rb"
|
20
|
-
raise "Spider #{spider_path} already
|
20
|
+
raise "Spider #{spider_path} already exist?" if File.exist? spider_path
|
21
21
|
|
22
22
|
spider_class = to_spider_class(spider_name)
|
23
23
|
create_file spider_path do
|
data/lib/tanakai/cli.rb
CHANGED
data/lib/tanakai/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tanakai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Afanasev
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-02
|
12
|
+
date: 2023-11-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: thor
|