tanakai 1.7.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a544dd8c9d448beccd646a4a536a5597bd9e6348717c4f8103f708798fbc21c5
4
- data.tar.gz: '093f88b3185e3999f4bed6b6c9df472a1d7c32c4f1e95e044bd313ec4e444f50'
3
+ metadata.gz: 412cdf33eda65d65336652df303c4eb55cf395f5154e53f88a1c901ef4360c55
4
+ data.tar.gz: eb0217edc65c54bbe6b243596965b5073e0c3a308e5b52249d9de7027e8382e9
5
5
  SHA512:
6
- metadata.gz: 0bf1db16739720f015902830f588a3a463331b6d36d241fdd0d397bd1d8cb2e8ef921bf8d10a552016b7b2e43e415f938b974001e63a347a8feec9bb8b9b270f
7
- data.tar.gz: c080531acf9b98fb00c94ca994c3e32281c5eada7a38d15175a481f55e4b8cbe11b707950a9674fdfd09909b6ded2ece608f753b5fb5fb919ae79b6810c97547
6
+ metadata.gz: 87cdf732e266dc15d8a160d8ca7d4161f838b05f870d1fcc6b74f2c62f85a380ac98df778660206ab93927e22cd03dc5855b90062285461f48db041baeaa4cb4
7
+ data.tar.gz: 96b44cde5d58de7579dccd0394545f035412b01e26a46e2d686969176d6ea36589089ab3306a5972d2ae8cca4e8eb3d57b3c89173d96fe6531b5be8e528eae59
data/CHANGELOG.md CHANGED
@@ -3,6 +3,14 @@
3
3
  ## Next
4
4
  * Your contribution here
5
5
 
6
+ ## 1.7.2
7
+ ### Fixes
8
+ * [#6](https://github.com/glaucocustodio/tanakai/pull/6): Fix url validation - [MrChriss](https://github.com/MrChriss)
9
+
10
+ ## 1.7.1
11
+ ### Fixes
12
+ * [#5](https://github.com/glaucocustodio/tanakai/pull/5): Replace `File.exists`/`Dir.exists` that have been removed on ruby 3.2 - [MrChriss](https://github.com/MrChriss)
13
+
6
14
  ## 1.7.0
7
15
  ### New
8
16
  * Allow passing `data:` to `crawl!` - [glaucocustodio](https://github.com/glaucocustodio)
@@ -130,7 +138,6 @@
130
138
  * Fix Mechanize::Driver#proxy (there was a bug while using proxy for mechanize engine without authorization)
131
139
  * Fix requests retries logic
132
140
 
133
-
134
141
  ## 1.0.1
135
142
  * Add missing `logger` method to pipeline
136
143
  * Fix `set_proxy` in Mechanize and Poltergeist builders
@@ -42,7 +42,7 @@ module Tanakai
42
42
  def save_to_json(item)
43
43
  data = JSON.generate([item])
44
44
 
45
- if @index > 1 || append && File.exists?(path)
45
+ if @index > 1 || append && File.exist?(path)
46
46
  file_content = File.read(path).sub(/\}\]\Z/, "\}\,")
47
47
  File.open(path, "w") do |f|
48
48
  f.write(file_content + data.sub(/\A\[/, ""))
@@ -55,7 +55,7 @@ module Tanakai
55
55
  def save_to_pretty_json(item)
56
56
  data = JSON.pretty_generate([item])
57
57
 
58
- if @index > 1 || append && File.exists?(path)
58
+ if @index > 1 || append && File.exist?(path)
59
59
  file_content = File.read(path).sub(/\}\n\]\Z/, "\}\,\n")
60
60
  File.open(path, "w") do |f|
61
61
  f.write(file_content + data.sub(/\A\[\n/, ""))
@@ -68,7 +68,7 @@ module Tanakai
68
68
  def save_to_jsonlines(item)
69
69
  data = JSON.generate(item)
70
70
 
71
- if @index > 1 || append && File.exists?(path)
71
+ if @index > 1 || append && File.exist?(path)
72
72
  File.open(path, "a") { |file| file.write("\n" + data) }
73
73
  else
74
74
  File.open(path, "w") { |file| file.write(data) }
@@ -78,7 +78,7 @@ module Tanakai
78
78
  def save_to_csv(item)
79
79
  data = flatten_hash(item)
80
80
 
81
- if @index > 1 || append && File.exists?(path)
81
+ if @index > 1 || append && File.exist?(path)
82
82
  CSV.open(path, "a+", force_quotes: true) do |csv|
83
83
  csv << data.values
84
84
  end
@@ -102,5 +102,3 @@ module Tanakai
102
102
  end
103
103
  end
104
104
  end
105
-
106
-
data/lib/tanakai/base.rb CHANGED
@@ -192,7 +192,9 @@ module Tanakai
192
192
  end
193
193
 
194
194
  def request_to(handler, delay = nil, url:, data: {}, response_type: :html)
195
- raise InvalidUrlError, "Requested url is invalid: #{url}" unless URI.parse(url).kind_of?(URI::HTTP)
195
+ if %w[http https].exclude?(Addressable::URI.parse(url).scheme)
196
+ raise InvalidUrlError, "Requested url scheme is invalid: #{url}"
197
+ end
196
198
 
197
199
  if @config[:skip_duplicate_requests] && !unique_request?(url)
198
200
  add_event(:duplicate_requests) if self.with_info
@@ -31,7 +31,7 @@ module Tanakai
31
31
  "--extra-vars", "ansible_python_interpreter=/usr/bin/python3"
32
32
  ]
33
33
 
34
- if File.exists? "config/automation.yml"
34
+ if File.exist? "config/automation.yml"
35
35
  require 'yaml'
36
36
  if config = YAML.load_file("config/automation.yml").dig(@playbook)
37
37
  config.each { |key, value| @vars[key] = value unless @vars[key] }
@@ -17,7 +17,7 @@ module Tanakai
17
17
 
18
18
  def generate_spider(spider_name, in_project:)
19
19
  spider_path = in_project ? "spiders/#{spider_name}.rb" : "./#{spider_name}.rb"
20
- raise "Spider #{spider_path} already exists" if File.exists? spider_path
20
+ raise "Spider #{spider_path} already exist?" if File.exist? spider_path
21
21
 
22
22
  spider_class = to_spider_class(spider_name)
23
23
  create_file spider_path do
data/lib/tanakai/cli.rb CHANGED
@@ -174,7 +174,7 @@ module Tanakai
174
174
  private
175
175
 
176
176
  def inside_project?
177
- Dir.exists?("spiders") && File.exists?("./config/boot.rb")
177
+ Dir.exist?("spiders") && File.exist?("./config/boot.rb")
178
178
  end
179
179
  end
180
180
  end
@@ -1,3 +1,3 @@
1
1
  module Tanakai
2
- VERSION = "1.7.0"
2
+ VERSION = "1.7.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tanakai
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Afanasev
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2023-10-25 00:00:00.000000000 Z
12
+ date: 2023-11-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: thor