tanakai 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.travis.yml +5 -0
  4. data/CHANGELOG.md +118 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +2038 -0
  8. data/Rakefile +10 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/exe/tanakai +6 -0
  12. data/lib/tanakai/automation/deploy.yml +54 -0
  13. data/lib/tanakai/automation/setup/chromium_chromedriver.yml +26 -0
  14. data/lib/tanakai/automation/setup/firefox_geckodriver.yml +20 -0
  15. data/lib/tanakai/automation/setup/phantomjs.yml +33 -0
  16. data/lib/tanakai/automation/setup/ruby_environment.yml +124 -0
  17. data/lib/tanakai/automation/setup.yml +45 -0
  18. data/lib/tanakai/base/saver.rb +106 -0
  19. data/lib/tanakai/base/storage.rb +54 -0
  20. data/lib/tanakai/base.rb +326 -0
  21. data/lib/tanakai/base_helper.rb +22 -0
  22. data/lib/tanakai/browser_builder/apparition_builder.rb +58 -0
  23. data/lib/tanakai/browser_builder/cuprite_builder.rb +54 -0
  24. data/lib/tanakai/browser_builder/mechanize_builder.rb +154 -0
  25. data/lib/tanakai/browser_builder/poltergeist_phantomjs_builder.rb +175 -0
  26. data/lib/tanakai/browser_builder/selenium_chrome_builder.rb +199 -0
  27. data/lib/tanakai/browser_builder/selenium_firefox_builder.rb +204 -0
  28. data/lib/tanakai/browser_builder.rb +20 -0
  29. data/lib/tanakai/capybara_configuration.rb +10 -0
  30. data/lib/tanakai/capybara_ext/apparition/driver.rb +13 -0
  31. data/lib/tanakai/capybara_ext/cuprite/driver.rb +13 -0
  32. data/lib/tanakai/capybara_ext/driver/base.rb +62 -0
  33. data/lib/tanakai/capybara_ext/mechanize/driver.rb +71 -0
  34. data/lib/tanakai/capybara_ext/poltergeist/driver.rb +13 -0
  35. data/lib/tanakai/capybara_ext/selenium/driver.rb +34 -0
  36. data/lib/tanakai/capybara_ext/session/config.rb +22 -0
  37. data/lib/tanakai/capybara_ext/session.rb +249 -0
  38. data/lib/tanakai/cli/ansible_command_builder.rb +71 -0
  39. data/lib/tanakai/cli/generator.rb +57 -0
  40. data/lib/tanakai/cli.rb +183 -0
  41. data/lib/tanakai/core_ext/array.rb +14 -0
  42. data/lib/tanakai/core_ext/hash.rb +5 -0
  43. data/lib/tanakai/core_ext/numeric.rb +19 -0
  44. data/lib/tanakai/core_ext/string.rb +7 -0
  45. data/lib/tanakai/pipeline.rb +33 -0
  46. data/lib/tanakai/runner.rb +60 -0
  47. data/lib/tanakai/template/.gitignore +18 -0
  48. data/lib/tanakai/template/Gemfile +28 -0
  49. data/lib/tanakai/template/README.md +3 -0
  50. data/lib/tanakai/template/config/application.rb +37 -0
  51. data/lib/tanakai/template/config/automation.yml +13 -0
  52. data/lib/tanakai/template/config/boot.rb +22 -0
  53. data/lib/tanakai/template/config/initializers/.keep +0 -0
  54. data/lib/tanakai/template/config/schedule.rb +57 -0
  55. data/lib/tanakai/template/db/.keep +0 -0
  56. data/lib/tanakai/template/helpers/application_helper.rb +3 -0
  57. data/lib/tanakai/template/lib/.keep +0 -0
  58. data/lib/tanakai/template/log/.keep +0 -0
  59. data/lib/tanakai/template/pipelines/saver.rb +11 -0
  60. data/lib/tanakai/template/pipelines/validator.rb +24 -0
  61. data/lib/tanakai/template/spiders/application_spider.rb +143 -0
  62. data/lib/tanakai/template/tmp/.keep +0 -0
  63. data/lib/tanakai/version.rb +3 -0
  64. data/lib/tanakai.rb +54 -0
  65. data/tanakai.gemspec +50 -0
  66. metadata +382 -0
@@ -0,0 +1,143 @@
1
+ # ApplicationSpider is a default base spider class. You can set here
2
+ # default settings for all spiders inherited from ApplicationSpider.
3
+ # To generate a new spider, run: `$ tanakai generate spider spider_name`
4
+
5
+ class ApplicationSpider < Tanakai::Base
6
+ include ApplicationHelper
7
+
8
+ # Default engine for spiders (available engines: :mechanize, :poltergeist_phantomjs,
9
+ # :selenium_firefox, :selenium_chrome)
10
+ @engine = :poltergeist_phantomjs
11
+
12
+ # Pipelines list, by order.
13
+ # To process item through pipelines pass item to the `send_item` method
14
+ @pipelines = [:validator, :saver]
15
+
16
+ # Default config. Set here options which are default for all spiders inherited
17
+ # from ApplicationSpider. Child's class config will be deep merged with this one
18
+ @config = {
19
+ # Custom headers, format: hash. Example: { "some header" => "some value", "another header" => "another value" }
20
+ # Works only for :mechanize and :poltergeist_phantomjs engines (Selenium doesn't allow to set/get headers)
21
+ # headers: {},
22
+
23
+ # Custom User Agent, format: string or lambda.
24
+ # Use lambda if you want to rotate user agents before each run:
25
+ # user_agent: -> { ARRAY_OF_USER_AGENTS.sample }
26
+ # Works for all engines
27
+ # user_agent: "Mozilla/5.0 Firefox/61.0",
28
+
29
+ # Custom cookies, format: array of hashes.
30
+ # Format for a single cookie: { name: "cookie name", value: "cookie value", domain: ".example.com" }
31
+ # Works for all engines
32
+ # cookies: [],
33
+
34
+ # Proxy, format: string or lambda. Format of a proxy string: "ip:port:protocol:user:password"
35
+ # `protocol` can be http or socks5. User and password are optional.
36
+ # Use lambda if you want to rotate proxies before each run:
37
+ # proxy: -> { ARRAY_OF_PROXIES.sample }
38
+ # Works for all engines, but keep in mind that Selenium drivers doesn't support proxies
39
+ # with authorization. Also, Mechanize doesn't support socks5 proxy format (only http)
40
+ # proxy: "3.4.5.6:3128:http:user:pass",
41
+
42
+ # If enabled, browser will ignore any https errors. It's handy while using a proxy
43
+ # with self-signed SSL cert (for example Crawlera or Mitmproxy)
44
+ # Also, it will allow to visit webpages with expires SSL certificate.
45
+ # Works for all engines
46
+ ignore_ssl_errors: true,
47
+
48
+ # Custom window size, works for all engines
49
+ # window_size: [1366, 768],
50
+
51
+ # Skip images downloading if true, works for all engines
52
+ disable_images: true,
53
+
54
+ # Selenium engines only: headless mode, `:native` or `:virtual_display` (default is :native)
55
+ # Although native mode has a better performance, virtual display mode
56
+ # sometimes can be useful. For example, some websites can detect (and block)
57
+ # headless chrome, so you can use virtual_display mode instead
58
+ # headless_mode: :native,
59
+
60
+ # This option tells the browser not to use a proxy for the provided list of domains or IP addresses.
61
+ # Format: array of strings. Works only for :selenium_firefox and selenium_chrome
62
+ # proxy_bypass_list: [],
63
+
64
+ # Option to provide custom SSL certificate. Works only for :poltergeist_phantomjs and :mechanize
65
+ # ssl_cert_path: "path/to/ssl_cert",
66
+
67
+ # Inject some JavaScript code to the browser.
68
+ # Format: array of strings, where each string is a path to JS file.
69
+ # Works only for poltergeist_phantomjs engine (Selenium doesn't support JS code injection)
70
+ # extensions: ["lib/code_to_inject.js"],
71
+
72
+ # Automatically skip duplicated (already visited) urls when using `request_to` method.
73
+ # Possible values: `true` or `hash` with options.
74
+ # In case of `true`, all visited urls will be added to the storage's scope `:requests_urls`
75
+ # and if url already contains in this scope, request will be skipped.
76
+ # You can configure this setting by providing additional options as hash:
77
+ # `skip_duplicate_requests: { scope: :custom_scope, check_only: true }`, where:
78
+ # `scope:` - use custom scope than `:requests_urls`
79
+ # `check_only:` - if true, then scope will be only checked for url, url will not
80
+ # be added to the scope if scope doesn't contains it.
81
+ # works for all drivers
82
+ # skip_duplicate_requests: true,
83
+
84
+ # Automatically skip provided errors while requesting a page.
85
+ # If raised error matches one of the errors in the list, then this error will be caught,
86
+ # and request will be skipped.
87
+ # It is a good idea to skip errors like NotFound(404), etc.
88
+ # Format: array where elements are error classes or/and hashes. You can use hash format
89
+ # for more flexibility: `{ error: "RuntimeError", message: "404 => Net::HTTPNotFound" }`.
90
+ # Provided `message:` will be compared with a full error message using `String#include?`. Also
91
+ # you can use regex instead: `{ error: "RuntimeError", message: /404|403/ }`.
92
+ # skip_request_errors: [{ error: RuntimeError, message: "404 => Net::HTTPNotFound" }],
93
+
94
+ # Automatically retry provided errors with a few attempts while requesting a page.
95
+ # If raised error matches one of the errors in the list, then this error will be caught
96
+ # and the request will be processed again within a delay. There are 3 attempts:
97
+ # first: delay 15 sec, second: delay 30 sec, third: delay 45 sec.
98
+ # If after 3 attempts there is still an exception, then the exception will be raised.
99
+ # It is a good idea to try to retry errros like `ReadTimeout`, `HTTPBadGateway`, etc.
100
+ # Format: same like for `skip_request_errors` option.
101
+ # retry_request_errors: [Net::ReadTimeout],
102
+
103
+ # Handle page encoding while parsing html response using Nokogiri. There are two modes:
104
+ # Auto (`:auto`) (try to fetch correct encoding from <meta http-equiv="Content-Type"> or <meta charset> tags)
105
+ # Set required encoding manually, example: `encoding: "GB2312"` (Set required encoding manually)
106
+ # Default this option is unset.
107
+ # encoding: nil,
108
+
109
+ # Restart browser if one of the options is true:
110
+ restart_if: {
111
+ # Restart browser if provided memory limit (in kilobytes) is exceeded (works for all engines)
112
+ # memory_limit: 350_000,
113
+
114
+ # Restart browser if provided requests limit is exceeded (works for all engines)
115
+ # requests_limit: 100
116
+ },
117
+
118
+ # Perform several actions before each request:
119
+ before_request: {
120
+ # Change proxy before each request. The `proxy:` option above should be presented
121
+ # and has lambda format. Works only for poltergeist and mechanize engines
122
+ # (Selenium doesn't support proxy rotation).
123
+ # change_proxy: true,
124
+
125
+ # Change user agent before each request. The `user_agent:` option above should be presented
126
+ # and has lambda format. Works only for poltergeist and mechanize engines
127
+ # (selenium doesn't support to get/set headers).
128
+ # change_user_agent: true,
129
+
130
+ # Clear all cookies before each request, works for all engines
131
+ # clear_cookies: true,
132
+
133
+ # If you want to clear all cookies + set custom cookies (`cookies:` option above should be presented)
134
+ # use this option instead (works for all engines)
135
+ # clear_and_set_cookies: true,
136
+
137
+ # Global option to set delay between requests.
138
+ # Delay can be `Integer`, `Float` or `Range` (`2..5`). In case of a range,
139
+ # delay number will be chosen randomly for each request: `rand (2..5) # => 3`
140
+ # delay: 1..3
141
+ }
142
+ }
143
+ end
File without changes
@@ -0,0 +1,3 @@
1
+ module Tanakai
2
+ VERSION = "1.5.0"
3
+ end
data/lib/tanakai.rb ADDED
@@ -0,0 +1,54 @@
1
+ require 'ostruct'
2
+ require 'logger'
3
+ require 'json'
4
+ require 'active_support'
5
+ require 'active_support/core_ext'
6
+ require 'rbcat'
7
+
8
+ require_relative 'tanakai/version'
9
+
10
+ require_relative 'tanakai/core_ext/numeric'
11
+ require_relative 'tanakai/core_ext/string'
12
+ require_relative 'tanakai/core_ext/array'
13
+ require_relative 'tanakai/core_ext/hash'
14
+
15
+ require_relative 'tanakai/browser_builder'
16
+ require_relative 'tanakai/base_helper'
17
+ require_relative 'tanakai/pipeline'
18
+ require_relative 'tanakai/base'
19
+
20
+ module Tanakai
21
+ class << self
22
+ def configuration
23
+ @configuration ||= OpenStruct.new
24
+ end
25
+
26
+ def configure
27
+ yield(configuration)
28
+ end
29
+
30
+ def env
31
+ ENV.fetch("TANAKAI_ENV") { "development" }
32
+ end
33
+
34
+ def time_zone
35
+ ENV["TZ"]
36
+ end
37
+
38
+ def time_zone=(value)
39
+ ENV.store("TZ", value)
40
+ end
41
+
42
+ def list
43
+ Base.descendants.map do |klass|
44
+ next unless klass.name
45
+ [klass.name, klass]
46
+ end.compact.to_h
47
+ end
48
+
49
+ def find_by_name(name)
50
+ return unless name
51
+ Base.descendants.find { |klass| klass.name == name }
52
+ end
53
+ end
54
+ end
data/tanakai.gemspec ADDED
@@ -0,0 +1,50 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "tanakai/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tanakai"
8
+ spec.version = Tanakai::VERSION
9
+ spec.authors = ["Victor Afanasev", "Glauco Custódio"]
10
+ spec.email = ["vicfreefly@gmail.com"]
11
+
12
+ spec.summary = "Maintained fork of Kimurai, a modern web scraping framework written in Ruby and based on Capybara/Nokogiri"
13
+ spec.homepage = "https://github.com/glaucocustodio/tanakai"
14
+ spec.license = "MIT"
15
+
16
+ # Specify which files should be added to the gem when it is released.
17
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
18
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
19
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
20
+ end
21
+ spec.bindir = "exe"
22
+ spec.executables = "tanakai"
23
+ spec.require_paths = ["lib"]
24
+ spec.required_ruby_version = ">= 2.5.0"
25
+
26
+ spec.add_dependency "thor"
27
+ spec.add_dependency "cliver"
28
+ spec.add_dependency "activesupport"
29
+ spec.add_dependency "murmurhash3"
30
+ spec.add_dependency "nokogiri"
31
+
32
+ spec.add_dependency "capybara", ">= 2.15", "< 4.0"
33
+ spec.add_dependency "capybara-mechanize"
34
+ spec.add_dependency "poltergeist"
35
+ spec.add_dependency "selenium-webdriver"
36
+ spec.add_dependency "apparition"
37
+ spec.add_dependency "cuprite"
38
+
39
+ spec.add_dependency "headless"
40
+ spec.add_dependency "pmap"
41
+
42
+ spec.add_dependency "whenever"
43
+
44
+ spec.add_dependency "rbcat", "~> 0.2"
45
+ spec.add_dependency "pry"
46
+
47
+ spec.add_development_dependency "bundler", "~> 1.16"
48
+ spec.add_development_dependency "rake", "~> 10.0"
49
+ spec.add_development_dependency "minitest", "~> 5.0"
50
+ end
metadata ADDED
@@ -0,0 +1,382 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tanakai
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Victor Afanasev
8
+ - Glauco Custódio
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2022-08-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: thor
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: cliver
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: activesupport
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: murmurhash3
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: nokogiri
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ - !ruby/object:Gem::Dependency
85
+ name: capybara
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '2.15'
91
+ - - "<"
92
+ - !ruby/object:Gem::Version
93
+ version: '4.0'
94
+ type: :runtime
95
+ prerelease: false
96
+ version_requirements: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '2.15'
101
+ - - "<"
102
+ - !ruby/object:Gem::Version
103
+ version: '4.0'
104
+ - !ruby/object:Gem::Dependency
105
+ name: capybara-mechanize
106
+ requirement: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ type: :runtime
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ - !ruby/object:Gem::Dependency
119
+ name: poltergeist
120
+ requirement: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ type: :runtime
126
+ prerelease: false
127
+ version_requirements: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ - !ruby/object:Gem::Dependency
133
+ name: selenium-webdriver
134
+ requirement: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ type: :runtime
140
+ prerelease: false
141
+ version_requirements: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ - !ruby/object:Gem::Dependency
147
+ name: apparition
148
+ requirement: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ type: :runtime
154
+ prerelease: false
155
+ version_requirements: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ - !ruby/object:Gem::Dependency
161
+ name: cuprite
162
+ requirement: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ type: :runtime
168
+ prerelease: false
169
+ version_requirements: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ - !ruby/object:Gem::Dependency
175
+ name: headless
176
+ requirement: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ type: :runtime
182
+ prerelease: false
183
+ version_requirements: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ - !ruby/object:Gem::Dependency
189
+ name: pmap
190
+ requirement: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ type: :runtime
196
+ prerelease: false
197
+ version_requirements: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ - !ruby/object:Gem::Dependency
203
+ name: whenever
204
+ requirement: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ type: :runtime
210
+ prerelease: false
211
+ version_requirements: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ - !ruby/object:Gem::Dependency
217
+ name: rbcat
218
+ requirement: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: '0.2'
223
+ type: :runtime
224
+ prerelease: false
225
+ version_requirements: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - "~>"
228
+ - !ruby/object:Gem::Version
229
+ version: '0.2'
230
+ - !ruby/object:Gem::Dependency
231
+ name: pry
232
+ requirement: !ruby/object:Gem::Requirement
233
+ requirements:
234
+ - - ">="
235
+ - !ruby/object:Gem::Version
236
+ version: '0'
237
+ type: :runtime
238
+ prerelease: false
239
+ version_requirements: !ruby/object:Gem::Requirement
240
+ requirements:
241
+ - - ">="
242
+ - !ruby/object:Gem::Version
243
+ version: '0'
244
+ - !ruby/object:Gem::Dependency
245
+ name: bundler
246
+ requirement: !ruby/object:Gem::Requirement
247
+ requirements:
248
+ - - "~>"
249
+ - !ruby/object:Gem::Version
250
+ version: '1.16'
251
+ type: :development
252
+ prerelease: false
253
+ version_requirements: !ruby/object:Gem::Requirement
254
+ requirements:
255
+ - - "~>"
256
+ - !ruby/object:Gem::Version
257
+ version: '1.16'
258
+ - !ruby/object:Gem::Dependency
259
+ name: rake
260
+ requirement: !ruby/object:Gem::Requirement
261
+ requirements:
262
+ - - "~>"
263
+ - !ruby/object:Gem::Version
264
+ version: '10.0'
265
+ type: :development
266
+ prerelease: false
267
+ version_requirements: !ruby/object:Gem::Requirement
268
+ requirements:
269
+ - - "~>"
270
+ - !ruby/object:Gem::Version
271
+ version: '10.0'
272
+ - !ruby/object:Gem::Dependency
273
+ name: minitest
274
+ requirement: !ruby/object:Gem::Requirement
275
+ requirements:
276
+ - - "~>"
277
+ - !ruby/object:Gem::Version
278
+ version: '5.0'
279
+ type: :development
280
+ prerelease: false
281
+ version_requirements: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - "~>"
284
+ - !ruby/object:Gem::Version
285
+ version: '5.0'
286
+ description:
287
+ email:
288
+ - vicfreefly@gmail.com
289
+ executables:
290
+ - tanakai
291
+ extensions: []
292
+ extra_rdoc_files: []
293
+ files:
294
+ - ".gitignore"
295
+ - ".travis.yml"
296
+ - CHANGELOG.md
297
+ - Gemfile
298
+ - LICENSE.txt
299
+ - README.md
300
+ - Rakefile
301
+ - bin/console
302
+ - bin/setup
303
+ - exe/tanakai
304
+ - lib/tanakai.rb
305
+ - lib/tanakai/automation/deploy.yml
306
+ - lib/tanakai/automation/setup.yml
307
+ - lib/tanakai/automation/setup/chromium_chromedriver.yml
308
+ - lib/tanakai/automation/setup/firefox_geckodriver.yml
309
+ - lib/tanakai/automation/setup/phantomjs.yml
310
+ - lib/tanakai/automation/setup/ruby_environment.yml
311
+ - lib/tanakai/base.rb
312
+ - lib/tanakai/base/saver.rb
313
+ - lib/tanakai/base/storage.rb
314
+ - lib/tanakai/base_helper.rb
315
+ - lib/tanakai/browser_builder.rb
316
+ - lib/tanakai/browser_builder/apparition_builder.rb
317
+ - lib/tanakai/browser_builder/cuprite_builder.rb
318
+ - lib/tanakai/browser_builder/mechanize_builder.rb
319
+ - lib/tanakai/browser_builder/poltergeist_phantomjs_builder.rb
320
+ - lib/tanakai/browser_builder/selenium_chrome_builder.rb
321
+ - lib/tanakai/browser_builder/selenium_firefox_builder.rb
322
+ - lib/tanakai/capybara_configuration.rb
323
+ - lib/tanakai/capybara_ext/apparition/driver.rb
324
+ - lib/tanakai/capybara_ext/cuprite/driver.rb
325
+ - lib/tanakai/capybara_ext/driver/base.rb
326
+ - lib/tanakai/capybara_ext/mechanize/driver.rb
327
+ - lib/tanakai/capybara_ext/poltergeist/driver.rb
328
+ - lib/tanakai/capybara_ext/selenium/driver.rb
329
+ - lib/tanakai/capybara_ext/session.rb
330
+ - lib/tanakai/capybara_ext/session/config.rb
331
+ - lib/tanakai/cli.rb
332
+ - lib/tanakai/cli/ansible_command_builder.rb
333
+ - lib/tanakai/cli/generator.rb
334
+ - lib/tanakai/core_ext/array.rb
335
+ - lib/tanakai/core_ext/hash.rb
336
+ - lib/tanakai/core_ext/numeric.rb
337
+ - lib/tanakai/core_ext/string.rb
338
+ - lib/tanakai/pipeline.rb
339
+ - lib/tanakai/runner.rb
340
+ - lib/tanakai/template/.gitignore
341
+ - lib/tanakai/template/Gemfile
342
+ - lib/tanakai/template/README.md
343
+ - lib/tanakai/template/config/application.rb
344
+ - lib/tanakai/template/config/automation.yml
345
+ - lib/tanakai/template/config/boot.rb
346
+ - lib/tanakai/template/config/initializers/.keep
347
+ - lib/tanakai/template/config/schedule.rb
348
+ - lib/tanakai/template/db/.keep
349
+ - lib/tanakai/template/helpers/application_helper.rb
350
+ - lib/tanakai/template/lib/.keep
351
+ - lib/tanakai/template/log/.keep
352
+ - lib/tanakai/template/pipelines/saver.rb
353
+ - lib/tanakai/template/pipelines/validator.rb
354
+ - lib/tanakai/template/spiders/application_spider.rb
355
+ - lib/tanakai/template/tmp/.keep
356
+ - lib/tanakai/version.rb
357
+ - tanakai.gemspec
358
+ homepage: https://github.com/glaucocustodio/tanakai
359
+ licenses:
360
+ - MIT
361
+ metadata: {}
362
+ post_install_message:
363
+ rdoc_options: []
364
+ require_paths:
365
+ - lib
366
+ required_ruby_version: !ruby/object:Gem::Requirement
367
+ requirements:
368
+ - - ">="
369
+ - !ruby/object:Gem::Version
370
+ version: 2.5.0
371
+ required_rubygems_version: !ruby/object:Gem::Requirement
372
+ requirements:
373
+ - - ">="
374
+ - !ruby/object:Gem::Version
375
+ version: '0'
376
+ requirements: []
377
+ rubygems_version: 3.1.2
378
+ signing_key:
379
+ specification_version: 4
380
+ summary: Maintained fork of Kimurai, a modern web scraping framework written in Ruby
381
+ and based on Capybara/Nokogiri
382
+ test_files: []