tanakai 1.6.0 → 1.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ea3cd20cfaedaebf473e853b66ebe58958e89b7525246444e3c8aeef46a4bf0
4
- data.tar.gz: a2c51b86487d6392a58b533237731996639fe0037c9aca22a6140c3c968eaf7d
3
+ metadata.gz: f639e8c843d1effdd2fa5268fe01ee0fda5adcb741ecda49b5e8f2c8a51f55a3
4
+ data.tar.gz: d03287426dc9e1e802ef149ad9edf5f83813ea067d22f9f6fb8fa7ec33bb3c5a
5
5
  SHA512:
6
- metadata.gz: 52d9a730a0a9e08c0a49ee4177a0370f5ed2a12ac9e3925f0a83b0c232dcedb1645d1b6860cb19c8453bbc5777cec02403654e2282e57ad75c5c2cb898b6dc1b
7
- data.tar.gz: '0969ee651ec787b9fa1e47b8d776571b6f4751c29d3dd15bb0c696181ceab8bc826db6f54486df6426151f25f626f4725bf79c51ec8cc8cebebbe6cfa057bfa3'
6
+ metadata.gz: 12b9a122343c1599c87caf97cd527bf98c83db50e4f5fab40b4657932c41b41c5f3437a26297141a2e19f064f5083ba9b12c099d4bc20f537b0cf440aa92d9e2
7
+ data.tar.gz: 756e98178ef2c1fe80d9dfca936eab248e46e1c199665c23973fa9b96ad513a9bad2e4d0498e6ea3d3442a3aa76bf744109d91161e39c89aa0b29cdf83385ed0
data/.gitignore CHANGED
@@ -11,3 +11,4 @@ Gemfile.lock
11
11
  *.retry
12
12
  .tags*
13
13
  *.gem
14
+ .DS_Store
data/CHANGELOG.md CHANGED
@@ -1,18 +1,32 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## Next
4
+ * Your contribution here
5
+
6
+ ## 1.7.1
7
+ ### Fixes
8
+ * [#5](https://github.com/glaucocustodio/tanakai/pull/5): Replace `File.exists`/`Dir.exists` that have been removed on ruby 3.2 - [MrChriss](https://github.com/MrChriss)
9
+
10
+ ## 1.7.0
11
+ ### New
12
+ * Allow passing `data:` to `crawl!` - [glaucocustodio](https://github.com/glaucocustodio)
13
+
14
+ ### Fixes
15
+ * [#4](https://github.com/glaucocustodio/tanakai/pull/4): Fix keyword args on `crawl!` - [milk1000cc](https://github.com/milk1000cc)
16
+
3
17
  ## 1.6.0
4
18
  ### New
5
- * Add support to Ruby 3
19
+ * Add support to Ruby 3 - [glaucocustodio](https://github.com/glaucocustodio)
6
20
 
7
21
  ## 1.5.1
8
22
  ### New
9
- * Add `response_type` to `in_parallel`
23
+ * Add `response_type` to `in_parallel` - [glaucocustodio](https://github.com/glaucocustodio)
10
24
 
11
25
  ## 1.5.0
12
26
  ### New
13
- * First release as Tanakai
14
- * Add support to [Apparition](https://github.com/twalpole/apparition)
15
- * Add support to [Cuprite](https://github.com/rubycdp/cuprite)
27
+ * First release as Tanakai - [glaucocustodio](https://github.com/glaucocustodio)
28
+ * Add support to [Apparition](https://github.com/twalpole/apparition) - [glaucocustodio](https://github.com/glaucocustodio)
29
+ * Add support to [Cuprite](https://github.com/rubycdp/cuprite) - [glaucocustodio](https://github.com/glaucocustodio)
16
30
 
17
31
  ## 1.4.0
18
32
  ### New
@@ -120,7 +134,6 @@
120
134
  * Fix Mechanize::Driver#proxy (there was a bug while using proxy for mechanize engine without authorization)
121
135
  * Fix requests retries logic
122
136
 
123
-
124
137
  ## 1.0.1
125
138
  * Add missing `logger` method to pipeline
126
139
  * Fix `set_proxy` in Mechanize and Poltergeist builders
data/README.md CHANGED
@@ -1355,6 +1355,12 @@ end # =>
1355
1355
  # {:spider_name=>"example_spider", :status=>:completed, :environment=>"development", :start_time=>2018-08-22 18:49:22 +0400, :stop_time=>2018-08-22 18:49:23 +0400, :running_time=>0.801, :visits=>{:requests=>1, :responses=>1}, :items=>{:sent=>0, :processed=>0}, :error=>nil}
1356
1356
  ```
1357
1357
 
1358
+ You can also pass `data` to `crawl!`:
1359
+
1360
+ ```ruby
1361
+ ExampleSpider.crawl!(data: { foo: "bar" })
1362
+ ```
1363
+
1358
1364
  So what if you're don't care about stats and just want to process request to a particular spider method and get the returning value from this method? Use `.parse!` instead:
1359
1365
 
1360
1366
  #### `.parse!(:method_name, url:)` method
@@ -42,7 +42,7 @@ module Tanakai
42
42
  def save_to_json(item)
43
43
  data = JSON.generate([item])
44
44
 
45
- if @index > 1 || append && File.exists?(path)
45
+ if @index > 1 || append && File.exist?(path)
46
46
  file_content = File.read(path).sub(/\}\]\Z/, "\}\,")
47
47
  File.open(path, "w") do |f|
48
48
  f.write(file_content + data.sub(/\A\[/, ""))
@@ -55,7 +55,7 @@ module Tanakai
55
55
  def save_to_pretty_json(item)
56
56
  data = JSON.pretty_generate([item])
57
57
 
58
- if @index > 1 || append && File.exists?(path)
58
+ if @index > 1 || append && File.exist?(path)
59
59
  file_content = File.read(path).sub(/\}\n\]\Z/, "\}\,\n")
60
60
  File.open(path, "w") do |f|
61
61
  f.write(file_content + data.sub(/\A\[\n/, ""))
@@ -68,7 +68,7 @@ module Tanakai
68
68
  def save_to_jsonlines(item)
69
69
  data = JSON.generate(item)
70
70
 
71
- if @index > 1 || append && File.exists?(path)
71
+ if @index > 1 || append && File.exist?(path)
72
72
  File.open(path, "a") { |file| file.write("\n" + data) }
73
73
  else
74
74
  File.open(path, "w") { |file| file.write(data) }
@@ -78,7 +78,7 @@ module Tanakai
78
78
  def save_to_csv(item)
79
79
  data = flatten_hash(item)
80
80
 
81
- if @index > 1 || append && File.exists?(path)
81
+ if @index > 1 || append && File.exist?(path)
82
82
  CSV.open(path, "a+", force_quotes: true) do |csv|
83
83
  csv << data.values
84
84
  end
@@ -102,5 +102,3 @@ module Tanakai
102
102
  end
103
103
  end
104
104
  end
105
-
106
-
data/lib/tanakai/base.rb CHANGED
@@ -100,7 +100,7 @@ module Tanakai
100
100
  end
101
101
  end
102
102
 
103
- def self.crawl!(exception_on_fail: true)
103
+ def self.crawl!(exception_on_fail: true, data: {})
104
104
  logger.error "Spider: already running: #{name}" and return false if running?
105
105
 
106
106
  @storage = Storage.new
@@ -124,13 +124,13 @@ module Tanakai
124
124
  if start_urls
125
125
  start_urls.each do |start_url|
126
126
  if start_url.class == Hash
127
- spider.request_to(:parse, start_url)
127
+ spider.request_to(:parse, url: start_url, data: data)
128
128
  else
129
- spider.request_to(:parse, url: start_url)
129
+ spider.request_to(:parse, url: start_url, data: data)
130
130
  end
131
131
  end
132
132
  else
133
- spider.parse
133
+ spider.parse(data: data)
134
134
  end
135
135
  rescue StandardError, SignalException, SystemExit => e
136
136
  @run_info.merge!(status: :failed, error: e.inspect)
@@ -160,7 +160,7 @@ module Tanakai
160
160
  if args.present?
161
161
  spider.public_send(handler, *args)
162
162
  elsif request.present?
163
- spider.request_to(handler, request)
163
+ spider.request_to(handler, **request)
164
164
  else
165
165
  spider.public_send(handler)
166
166
  end
@@ -31,7 +31,7 @@ module Tanakai
31
31
  "--extra-vars", "ansible_python_interpreter=/usr/bin/python3"
32
32
  ]
33
33
 
34
- if File.exists? "config/automation.yml"
34
+ if File.exist? "config/automation.yml"
35
35
  require 'yaml'
36
36
  if config = YAML.load_file("config/automation.yml").dig(@playbook)
37
37
  config.each { |key, value| @vars[key] = value unless @vars[key] }
@@ -17,7 +17,7 @@ module Tanakai
17
17
 
18
18
  def generate_spider(spider_name, in_project:)
19
19
  spider_path = in_project ? "spiders/#{spider_name}.rb" : "./#{spider_name}.rb"
20
- raise "Spider #{spider_path} already exists" if File.exists? spider_path
20
+ raise "Spider #{spider_path} already exist?" if File.exist? spider_path
21
21
 
22
22
  spider_class = to_spider_class(spider_name)
23
23
  create_file spider_path do
data/lib/tanakai/cli.rb CHANGED
@@ -174,7 +174,7 @@ module Tanakai
174
174
  private
175
175
 
176
176
  def inside_project?
177
- Dir.exists?("spiders") && File.exists?("./config/boot.rb")
177
+ Dir.exist?("spiders") && File.exist?("./config/boot.rb")
178
178
  end
179
179
  end
180
180
  end
@@ -1,3 +1,3 @@
1
1
  module Tanakai
2
- VERSION = "1.6.0"
2
+ VERSION = "1.7.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tanakai
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Afanasev
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2023-02-16 00:00:00.000000000 Z
12
+ date: 2023-11-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: thor