tanakai 1.6.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ea3cd20cfaedaebf473e853b66ebe58958e89b7525246444e3c8aeef46a4bf0
4
- data.tar.gz: a2c51b86487d6392a58b533237731996639fe0037c9aca22a6140c3c968eaf7d
3
+ metadata.gz: f639e8c843d1effdd2fa5268fe01ee0fda5adcb741ecda49b5e8f2c8a51f55a3
4
+ data.tar.gz: d03287426dc9e1e802ef149ad9edf5f83813ea067d22f9f6fb8fa7ec33bb3c5a
5
5
  SHA512:
6
- metadata.gz: 52d9a730a0a9e08c0a49ee4177a0370f5ed2a12ac9e3925f0a83b0c232dcedb1645d1b6860cb19c8453bbc5777cec02403654e2282e57ad75c5c2cb898b6dc1b
7
- data.tar.gz: '0969ee651ec787b9fa1e47b8d776571b6f4751c29d3dd15bb0c696181ceab8bc826db6f54486df6426151f25f626f4725bf79c51ec8cc8cebebbe6cfa057bfa3'
6
+ metadata.gz: 12b9a122343c1599c87caf97cd527bf98c83db50e4f5fab40b4657932c41b41c5f3437a26297141a2e19f064f5083ba9b12c099d4bc20f537b0cf440aa92d9e2
7
+ data.tar.gz: 756e98178ef2c1fe80d9dfca936eab248e46e1c199665c23973fa9b96ad513a9bad2e4d0498e6ea3d3442a3aa76bf744109d91161e39c89aa0b29cdf83385ed0
data/.gitignore CHANGED
@@ -11,3 +11,4 @@ Gemfile.lock
11
11
  *.retry
12
12
  .tags*
13
13
  *.gem
14
+ .DS_Store
data/CHANGELOG.md CHANGED
@@ -1,18 +1,32 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## Next
4
+ * Your contribution here
5
+
6
+ ## 1.7.1
7
+ ### Fixes
8
+ * [#5](https://github.com/glaucocustodio/tanakai/pull/5): Replace `File.exists`/`Dir.exists` that have been removed on ruby 3.2 - [MrChriss](https://github.com/MrChriss)
9
+
10
+ ## 1.7.0
11
+ ### New
12
+ * Allow passing `data:` to `crawl!` - [glaucocustodio](https://github.com/glaucocustodio)
13
+
14
+ ### Fixes
15
+ * [#4](https://github.com/glaucocustodio/tanakai/pull/4): Fix keyword args on `crawl!` - [milk1000cc](https://github.com/milk1000cc)
16
+
3
17
  ## 1.6.0
4
18
  ### New
5
- * Add support to Ruby 3
19
+ * Add support to Ruby 3 - [glaucocustodio](https://github.com/glaucocustodio)
6
20
 
7
21
  ## 1.5.1
8
22
  ### New
9
- * Add `response_type` to `in_parallel`
23
+ * Add `response_type` to `in_parallel` - [glaucocustodio](https://github.com/glaucocustodio)
10
24
 
11
25
  ## 1.5.0
12
26
  ### New
13
- * First release as Tanakai
14
- * Add support to [Apparition](https://github.com/twalpole/apparition)
15
- * Add support to [Cuprite](https://github.com/rubycdp/cuprite)
27
+ * First release as Tanakai - [glaucocustodio](https://github.com/glaucocustodio)
28
+ * Add support to [Apparition](https://github.com/twalpole/apparition) - [glaucocustodio](https://github.com/glaucocustodio)
29
+ * Add support to [Cuprite](https://github.com/rubycdp/cuprite) - [glaucocustodio](https://github.com/glaucocustodio)
16
30
 
17
31
  ## 1.4.0
18
32
  ### New
@@ -120,7 +134,6 @@
120
134
  * Fix Mechanize::Driver#proxy (there was a bug while using proxy for mechanize engine without authorization)
121
135
  * Fix requests retries logic
122
136
 
123
-
124
137
  ## 1.0.1
125
138
  * Add missing `logger` method to pipeline
126
139
  * Fix `set_proxy` in Mechanize and Poltergeist builders
data/README.md CHANGED
@@ -1355,6 +1355,12 @@ end # =>
1355
1355
  # {:spider_name=>"example_spider", :status=>:completed, :environment=>"development", :start_time=>2018-08-22 18:49:22 +0400, :stop_time=>2018-08-22 18:49:23 +0400, :running_time=>0.801, :visits=>{:requests=>1, :responses=>1}, :items=>{:sent=>0, :processed=>0}, :error=>nil}
1356
1356
  ```
1357
1357
 
1358
+ You can also pass `data` to `crawl!`:
1359
+
1360
+ ```ruby
1361
+ ExampleSpider.crawl!(data: { foo: "bar" })
1362
+ ```
1363
+
1358
1364
  So what if you're don't care about stats and just want to process request to a particular spider method and get the returning value from this method? Use `.parse!` instead:
1359
1365
 
1360
1366
  #### `.parse!(:method_name, url:)` method
@@ -42,7 +42,7 @@ module Tanakai
42
42
  def save_to_json(item)
43
43
  data = JSON.generate([item])
44
44
 
45
- if @index > 1 || append && File.exists?(path)
45
+ if @index > 1 || append && File.exist?(path)
46
46
  file_content = File.read(path).sub(/\}\]\Z/, "\}\,")
47
47
  File.open(path, "w") do |f|
48
48
  f.write(file_content + data.sub(/\A\[/, ""))
@@ -55,7 +55,7 @@ module Tanakai
55
55
  def save_to_pretty_json(item)
56
56
  data = JSON.pretty_generate([item])
57
57
 
58
- if @index > 1 || append && File.exists?(path)
58
+ if @index > 1 || append && File.exist?(path)
59
59
  file_content = File.read(path).sub(/\}\n\]\Z/, "\}\,\n")
60
60
  File.open(path, "w") do |f|
61
61
  f.write(file_content + data.sub(/\A\[\n/, ""))
@@ -68,7 +68,7 @@ module Tanakai
68
68
  def save_to_jsonlines(item)
69
69
  data = JSON.generate(item)
70
70
 
71
- if @index > 1 || append && File.exists?(path)
71
+ if @index > 1 || append && File.exist?(path)
72
72
  File.open(path, "a") { |file| file.write("\n" + data) }
73
73
  else
74
74
  File.open(path, "w") { |file| file.write(data) }
@@ -78,7 +78,7 @@ module Tanakai
78
78
  def save_to_csv(item)
79
79
  data = flatten_hash(item)
80
80
 
81
- if @index > 1 || append && File.exists?(path)
81
+ if @index > 1 || append && File.exist?(path)
82
82
  CSV.open(path, "a+", force_quotes: true) do |csv|
83
83
  csv << data.values
84
84
  end
@@ -102,5 +102,3 @@ module Tanakai
102
102
  end
103
103
  end
104
104
  end
105
-
106
-
data/lib/tanakai/base.rb CHANGED
@@ -100,7 +100,7 @@ module Tanakai
100
100
  end
101
101
  end
102
102
 
103
- def self.crawl!(exception_on_fail: true)
103
+ def self.crawl!(exception_on_fail: true, data: {})
104
104
  logger.error "Spider: already running: #{name}" and return false if running?
105
105
 
106
106
  @storage = Storage.new
@@ -124,13 +124,13 @@ module Tanakai
124
124
  if start_urls
125
125
  start_urls.each do |start_url|
126
126
  if start_url.class == Hash
127
- spider.request_to(:parse, start_url)
127
+ spider.request_to(:parse, url: start_url, data: data)
128
128
  else
129
- spider.request_to(:parse, url: start_url)
129
+ spider.request_to(:parse, url: start_url, data: data)
130
130
  end
131
131
  end
132
132
  else
133
- spider.parse
133
+ spider.parse(data: data)
134
134
  end
135
135
  rescue StandardError, SignalException, SystemExit => e
136
136
  @run_info.merge!(status: :failed, error: e.inspect)
@@ -160,7 +160,7 @@ module Tanakai
160
160
  if args.present?
161
161
  spider.public_send(handler, *args)
162
162
  elsif request.present?
163
- spider.request_to(handler, request)
163
+ spider.request_to(handler, **request)
164
164
  else
165
165
  spider.public_send(handler)
166
166
  end
@@ -31,7 +31,7 @@ module Tanakai
31
31
  "--extra-vars", "ansible_python_interpreter=/usr/bin/python3"
32
32
  ]
33
33
 
34
- if File.exists? "config/automation.yml"
34
+ if File.exist? "config/automation.yml"
35
35
  require 'yaml'
36
36
  if config = YAML.load_file("config/automation.yml").dig(@playbook)
37
37
  config.each { |key, value| @vars[key] = value unless @vars[key] }
@@ -17,7 +17,7 @@ module Tanakai
17
17
 
18
18
  def generate_spider(spider_name, in_project:)
19
19
  spider_path = in_project ? "spiders/#{spider_name}.rb" : "./#{spider_name}.rb"
20
- raise "Spider #{spider_path} already exists" if File.exists? spider_path
20
+ raise "Spider #{spider_path} already exist?" if File.exist? spider_path
21
21
 
22
22
  spider_class = to_spider_class(spider_name)
23
23
  create_file spider_path do
data/lib/tanakai/cli.rb CHANGED
@@ -174,7 +174,7 @@ module Tanakai
174
174
  private
175
175
 
176
176
  def inside_project?
177
- Dir.exists?("spiders") && File.exists?("./config/boot.rb")
177
+ Dir.exist?("spiders") && File.exist?("./config/boot.rb")
178
178
  end
179
179
  end
180
180
  end
@@ -1,3 +1,3 @@
1
1
  module Tanakai
2
- VERSION = "1.6.0"
2
+ VERSION = "1.7.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tanakai
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Afanasev
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2023-02-16 00:00:00.000000000 Z
12
+ date: 2023-11-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: thor