tanakai 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.travis.yml +5 -0
  4. data/CHANGELOG.md +118 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +2038 -0
  8. data/Rakefile +10 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/exe/tanakai +6 -0
  12. data/lib/tanakai/automation/deploy.yml +54 -0
  13. data/lib/tanakai/automation/setup/chromium_chromedriver.yml +26 -0
  14. data/lib/tanakai/automation/setup/firefox_geckodriver.yml +20 -0
  15. data/lib/tanakai/automation/setup/phantomjs.yml +33 -0
  16. data/lib/tanakai/automation/setup/ruby_environment.yml +124 -0
  17. data/lib/tanakai/automation/setup.yml +45 -0
  18. data/lib/tanakai/base/saver.rb +106 -0
  19. data/lib/tanakai/base/storage.rb +54 -0
  20. data/lib/tanakai/base.rb +326 -0
  21. data/lib/tanakai/base_helper.rb +22 -0
  22. data/lib/tanakai/browser_builder/apparition_builder.rb +58 -0
  23. data/lib/tanakai/browser_builder/cuprite_builder.rb +54 -0
  24. data/lib/tanakai/browser_builder/mechanize_builder.rb +154 -0
  25. data/lib/tanakai/browser_builder/poltergeist_phantomjs_builder.rb +175 -0
  26. data/lib/tanakai/browser_builder/selenium_chrome_builder.rb +199 -0
  27. data/lib/tanakai/browser_builder/selenium_firefox_builder.rb +204 -0
  28. data/lib/tanakai/browser_builder.rb +20 -0
  29. data/lib/tanakai/capybara_configuration.rb +10 -0
  30. data/lib/tanakai/capybara_ext/apparition/driver.rb +13 -0
  31. data/lib/tanakai/capybara_ext/cuprite/driver.rb +13 -0
  32. data/lib/tanakai/capybara_ext/driver/base.rb +62 -0
  33. data/lib/tanakai/capybara_ext/mechanize/driver.rb +71 -0
  34. data/lib/tanakai/capybara_ext/poltergeist/driver.rb +13 -0
  35. data/lib/tanakai/capybara_ext/selenium/driver.rb +34 -0
  36. data/lib/tanakai/capybara_ext/session/config.rb +22 -0
  37. data/lib/tanakai/capybara_ext/session.rb +249 -0
  38. data/lib/tanakai/cli/ansible_command_builder.rb +71 -0
  39. data/lib/tanakai/cli/generator.rb +57 -0
  40. data/lib/tanakai/cli.rb +183 -0
  41. data/lib/tanakai/core_ext/array.rb +14 -0
  42. data/lib/tanakai/core_ext/hash.rb +5 -0
  43. data/lib/tanakai/core_ext/numeric.rb +19 -0
  44. data/lib/tanakai/core_ext/string.rb +7 -0
  45. data/lib/tanakai/pipeline.rb +33 -0
  46. data/lib/tanakai/runner.rb +60 -0
  47. data/lib/tanakai/template/.gitignore +18 -0
  48. data/lib/tanakai/template/Gemfile +28 -0
  49. data/lib/tanakai/template/README.md +3 -0
  50. data/lib/tanakai/template/config/application.rb +37 -0
  51. data/lib/tanakai/template/config/automation.yml +13 -0
  52. data/lib/tanakai/template/config/boot.rb +22 -0
  53. data/lib/tanakai/template/config/initializers/.keep +0 -0
  54. data/lib/tanakai/template/config/schedule.rb +57 -0
  55. data/lib/tanakai/template/db/.keep +0 -0
  56. data/lib/tanakai/template/helpers/application_helper.rb +3 -0
  57. data/lib/tanakai/template/lib/.keep +0 -0
  58. data/lib/tanakai/template/log/.keep +0 -0
  59. data/lib/tanakai/template/pipelines/saver.rb +11 -0
  60. data/lib/tanakai/template/pipelines/validator.rb +24 -0
  61. data/lib/tanakai/template/spiders/application_spider.rb +143 -0
  62. data/lib/tanakai/template/tmp/.keep +0 -0
  63. data/lib/tanakai/version.rb +3 -0
  64. data/lib/tanakai.rb +54 -0
  65. data/tanakai.gemspec +50 -0
  66. metadata +382 -0
@@ -0,0 +1,143 @@
1
+ # ApplicationSpider is a default base spider class. You can set here
2
+ # default settings for all spiders inherited from ApplicationSpider.
3
+ # To generate a new spider, run: `$ tanakai generate spider spider_name`
4
+
5
+ class ApplicationSpider < Tanakai::Base
6
+ include ApplicationHelper
7
+
8
+ # Default engine for spiders (available engines: :mechanize, :poltergeist_phantomjs,
9
+ # :selenium_firefox, :selenium_chrome)
10
+ @engine = :poltergeist_phantomjs
11
+
12
+ # Pipelines list, by order.
13
+ # To process item through pipelines pass item to the `send_item` method
14
+ @pipelines = [:validator, :saver]
15
+
16
+ # Default config. Set here options which are default for all spiders inherited
17
+ # from ApplicationSpider. Child's class config will be deep merged with this one
18
+ @config = {
19
+ # Custom headers, format: hash. Example: { "some header" => "some value", "another header" => "another value" }
20
+ # Works only for :mechanize and :poltergeist_phantomjs engines (Selenium doesn't allow to set/get headers)
21
+ # headers: {},
22
+
23
+ # Custom User Agent, format: string or lambda.
24
+ # Use lambda if you want to rotate user agents before each run:
25
+ # user_agent: -> { ARRAY_OF_USER_AGENTS.sample }
26
+ # Works for all engines
27
+ # user_agent: "Mozilla/5.0 Firefox/61.0",
28
+
29
+ # Custom cookies, format: array of hashes.
30
+ # Format for a single cookie: { name: "cookie name", value: "cookie value", domain: ".example.com" }
31
+ # Works for all engines
32
+ # cookies: [],
33
+
34
+ # Proxy, format: string or lambda. Format of a proxy string: "ip:port:protocol:user:password"
35
+ # `protocol` can be http or socks5. User and password are optional.
36
+ # Use lambda if you want to rotate proxies before each run:
37
+ # proxy: -> { ARRAY_OF_PROXIES.sample }
38
+ # Works for all engines, but keep in mind that Selenium drivers doesn't support proxies
39
+ # with authorization. Also, Mechanize doesn't support socks5 proxy format (only http)
40
+ # proxy: "3.4.5.6:3128:http:user:pass",
41
+
42
+ # If enabled, browser will ignore any https errors. It's handy while using a proxy
43
+ # with self-signed SSL cert (for example Crawlera or Mitmproxy)
44
+ # Also, it will allow to visit webpages with expires SSL certificate.
45
+ # Works for all engines
46
+ ignore_ssl_errors: true,
47
+
48
+ # Custom window size, works for all engines
49
+ # window_size: [1366, 768],
50
+
51
+ # Skip images downloading if true, works for all engines
52
+ disable_images: true,
53
+
54
+ # Selenium engines only: headless mode, `:native` or `:virtual_display` (default is :native)
55
+ # Although native mode has a better performance, virtual display mode
56
+ # sometimes can be useful. For example, some websites can detect (and block)
57
+ # headless chrome, so you can use virtual_display mode instead
58
+ # headless_mode: :native,
59
+
60
+ # This option tells the browser not to use a proxy for the provided list of domains or IP addresses.
61
+ # Format: array of strings. Works only for :selenium_firefox and selenium_chrome
62
+ # proxy_bypass_list: [],
63
+
64
+ # Option to provide custom SSL certificate. Works only for :poltergeist_phantomjs and :mechanize
65
+ # ssl_cert_path: "path/to/ssl_cert",
66
+
67
+ # Inject some JavaScript code to the browser.
68
+ # Format: array of strings, where each string is a path to JS file.
69
+ # Works only for poltergeist_phantomjs engine (Selenium doesn't support JS code injection)
70
+ # extensions: ["lib/code_to_inject.js"],
71
+
72
+ # Automatically skip duplicated (already visited) urls when using `request_to` method.
73
+ # Possible values: `true` or `hash` with options.
74
+ # In case of `true`, all visited urls will be added to the storage's scope `:requests_urls`
75
+ # and if url already contains in this scope, request will be skipped.
76
+ # You can configure this setting by providing additional options as hash:
77
+ # `skip_duplicate_requests: { scope: :custom_scope, check_only: true }`, where:
78
+ # `scope:` - use custom scope than `:requests_urls`
79
+ # `check_only:` - if true, then scope will be only checked for url, url will not
80
+ # be added to the scope if scope doesn't contains it.
81
+ # works for all drivers
82
+ # skip_duplicate_requests: true,
83
+
84
+ # Automatically skip provided errors while requesting a page.
85
+ # If raised error matches one of the errors in the list, then this error will be caught,
86
+ # and request will be skipped.
87
+ # It is a good idea to skip errors like NotFound(404), etc.
88
+ # Format: array where elements are error classes or/and hashes. You can use hash format
89
+ # for more flexibility: `{ error: "RuntimeError", message: "404 => Net::HTTPNotFound" }`.
90
+ # Provided `message:` will be compared with a full error message using `String#include?`. Also
91
+ # you can use regex instead: `{ error: "RuntimeError", message: /404|403/ }`.
92
+ # skip_request_errors: [{ error: RuntimeError, message: "404 => Net::HTTPNotFound" }],
93
+
94
+ # Automatically retry provided errors with a few attempts while requesting a page.
95
+ # If raised error matches one of the errors in the list, then this error will be caught
96
+ # and the request will be processed again within a delay. There are 3 attempts:
97
+ # first: delay 15 sec, second: delay 30 sec, third: delay 45 sec.
98
+ # If after 3 attempts there is still an exception, then the exception will be raised.
99
+ # It is a good idea to try to retry errros like `ReadTimeout`, `HTTPBadGateway`, etc.
100
+ # Format: same like for `skip_request_errors` option.
101
+ # retry_request_errors: [Net::ReadTimeout],
102
+
103
+ # Handle page encoding while parsing html response using Nokogiri. There are two modes:
104
+ # Auto (`:auto`) (try to fetch correct encoding from <meta http-equiv="Content-Type"> or <meta charset> tags)
105
+ # Set required encoding manually, example: `encoding: "GB2312"` (Set required encoding manually)
106
+ # Default this option is unset.
107
+ # encoding: nil,
108
+
109
+ # Restart browser if one of the options is true:
110
+ restart_if: {
111
+ # Restart browser if provided memory limit (in kilobytes) is exceeded (works for all engines)
112
+ # memory_limit: 350_000,
113
+
114
+ # Restart browser if provided requests limit is exceeded (works for all engines)
115
+ # requests_limit: 100
116
+ },
117
+
118
+ # Perform several actions before each request:
119
+ before_request: {
120
+ # Change proxy before each request. The `proxy:` option above should be presented
121
+ # and has lambda format. Works only for poltergeist and mechanize engines
122
+ # (Selenium doesn't support proxy rotation).
123
+ # change_proxy: true,
124
+
125
+ # Change user agent before each request. The `user_agent:` option above should be presented
126
+ # and has lambda format. Works only for poltergeist and mechanize engines
127
+ # (selenium doesn't support to get/set headers).
128
+ # change_user_agent: true,
129
+
130
+ # Clear all cookies before each request, works for all engines
131
+ # clear_cookies: true,
132
+
133
+ # If you want to clear all cookies + set custom cookies (`cookies:` option above should be presented)
134
+ # use this option instead (works for all engines)
135
+ # clear_and_set_cookies: true,
136
+
137
+ # Global option to set delay between requests.
138
+ # Delay can be `Integer`, `Float` or `Range` (`2..5`). In case of a range,
139
+ # delay number will be chosen randomly for each request: `rand (2..5) # => 3`
140
+ # delay: 1..3
141
+ }
142
+ }
143
+ end
File without changes
@@ -0,0 +1,3 @@
1
+ module Tanakai
2
+ VERSION = "1.5.0"
3
+ end
data/lib/tanakai.rb ADDED
@@ -0,0 +1,54 @@
1
+ require 'ostruct'
2
+ require 'logger'
3
+ require 'json'
4
+ require 'active_support'
5
+ require 'active_support/core_ext'
6
+ require 'rbcat'
7
+
8
+ require_relative 'tanakai/version'
9
+
10
+ require_relative 'tanakai/core_ext/numeric'
11
+ require_relative 'tanakai/core_ext/string'
12
+ require_relative 'tanakai/core_ext/array'
13
+ require_relative 'tanakai/core_ext/hash'
14
+
15
+ require_relative 'tanakai/browser_builder'
16
+ require_relative 'tanakai/base_helper'
17
+ require_relative 'tanakai/pipeline'
18
+ require_relative 'tanakai/base'
19
+
20
+ module Tanakai
21
+ class << self
22
+ def configuration
23
+ @configuration ||= OpenStruct.new
24
+ end
25
+
26
+ def configure
27
+ yield(configuration)
28
+ end
29
+
30
+ def env
31
+ ENV.fetch("TANAKAI_ENV") { "development" }
32
+ end
33
+
34
+ def time_zone
35
+ ENV["TZ"]
36
+ end
37
+
38
+ def time_zone=(value)
39
+ ENV.store("TZ", value)
40
+ end
41
+
42
+ def list
43
+ Base.descendants.map do |klass|
44
+ next unless klass.name
45
+ [klass.name, klass]
46
+ end.compact.to_h
47
+ end
48
+
49
+ def find_by_name(name)
50
+ return unless name
51
+ Base.descendants.find { |klass| klass.name == name }
52
+ end
53
+ end
54
+ end
data/tanakai.gemspec ADDED
@@ -0,0 +1,50 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "tanakai/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tanakai"
8
+ spec.version = Tanakai::VERSION
9
+ spec.authors = ["Victor Afanasev", "Glauco Custódio"]
10
+ spec.email = ["vicfreefly@gmail.com"]
11
+
12
+ spec.summary = "Maintained fork of Kimurai, a modern web scraping framework written in Ruby and based on Capybara/Nokogiri"
13
+ spec.homepage = "https://github.com/glaucocustodio/tanakai"
14
+ spec.license = "MIT"
15
+
16
+ # Specify which files should be added to the gem when it is released.
17
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
18
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
19
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
20
+ end
21
+ spec.bindir = "exe"
22
+ spec.executables = "tanakai"
23
+ spec.require_paths = ["lib"]
24
+ spec.required_ruby_version = ">= 2.5.0"
25
+
26
+ spec.add_dependency "thor"
27
+ spec.add_dependency "cliver"
28
+ spec.add_dependency "activesupport"
29
+ spec.add_dependency "murmurhash3"
30
+ spec.add_dependency "nokogiri"
31
+
32
+ spec.add_dependency "capybara", ">= 2.15", "< 4.0"
33
+ spec.add_dependency "capybara-mechanize"
34
+ spec.add_dependency "poltergeist"
35
+ spec.add_dependency "selenium-webdriver"
36
+ spec.add_dependency "apparition"
37
+ spec.add_dependency "cuprite"
38
+
39
+ spec.add_dependency "headless"
40
+ spec.add_dependency "pmap"
41
+
42
+ spec.add_dependency "whenever"
43
+
44
+ spec.add_dependency "rbcat", "~> 0.2"
45
+ spec.add_dependency "pry"
46
+
47
+ spec.add_development_dependency "bundler", "~> 1.16"
48
+ spec.add_development_dependency "rake", "~> 10.0"
49
+ spec.add_development_dependency "minitest", "~> 5.0"
50
+ end
metadata ADDED
@@ -0,0 +1,382 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tanakai
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Victor Afanasev
8
+ - Glauco Custódio
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2022-08-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: thor
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: cliver
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: activesupport
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: murmurhash3
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: nokogiri
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ - !ruby/object:Gem::Dependency
85
+ name: capybara
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '2.15'
91
+ - - "<"
92
+ - !ruby/object:Gem::Version
93
+ version: '4.0'
94
+ type: :runtime
95
+ prerelease: false
96
+ version_requirements: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '2.15'
101
+ - - "<"
102
+ - !ruby/object:Gem::Version
103
+ version: '4.0'
104
+ - !ruby/object:Gem::Dependency
105
+ name: capybara-mechanize
106
+ requirement: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ type: :runtime
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ - !ruby/object:Gem::Dependency
119
+ name: poltergeist
120
+ requirement: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ type: :runtime
126
+ prerelease: false
127
+ version_requirements: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ - !ruby/object:Gem::Dependency
133
+ name: selenium-webdriver
134
+ requirement: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ type: :runtime
140
+ prerelease: false
141
+ version_requirements: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ - !ruby/object:Gem::Dependency
147
+ name: apparition
148
+ requirement: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ type: :runtime
154
+ prerelease: false
155
+ version_requirements: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ - !ruby/object:Gem::Dependency
161
+ name: cuprite
162
+ requirement: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ type: :runtime
168
+ prerelease: false
169
+ version_requirements: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ - !ruby/object:Gem::Dependency
175
+ name: headless
176
+ requirement: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ type: :runtime
182
+ prerelease: false
183
+ version_requirements: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ - !ruby/object:Gem::Dependency
189
+ name: pmap
190
+ requirement: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ type: :runtime
196
+ prerelease: false
197
+ version_requirements: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ - !ruby/object:Gem::Dependency
203
+ name: whenever
204
+ requirement: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ type: :runtime
210
+ prerelease: false
211
+ version_requirements: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ - !ruby/object:Gem::Dependency
217
+ name: rbcat
218
+ requirement: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: '0.2'
223
+ type: :runtime
224
+ prerelease: false
225
+ version_requirements: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - "~>"
228
+ - !ruby/object:Gem::Version
229
+ version: '0.2'
230
+ - !ruby/object:Gem::Dependency
231
+ name: pry
232
+ requirement: !ruby/object:Gem::Requirement
233
+ requirements:
234
+ - - ">="
235
+ - !ruby/object:Gem::Version
236
+ version: '0'
237
+ type: :runtime
238
+ prerelease: false
239
+ version_requirements: !ruby/object:Gem::Requirement
240
+ requirements:
241
+ - - ">="
242
+ - !ruby/object:Gem::Version
243
+ version: '0'
244
+ - !ruby/object:Gem::Dependency
245
+ name: bundler
246
+ requirement: !ruby/object:Gem::Requirement
247
+ requirements:
248
+ - - "~>"
249
+ - !ruby/object:Gem::Version
250
+ version: '1.16'
251
+ type: :development
252
+ prerelease: false
253
+ version_requirements: !ruby/object:Gem::Requirement
254
+ requirements:
255
+ - - "~>"
256
+ - !ruby/object:Gem::Version
257
+ version: '1.16'
258
+ - !ruby/object:Gem::Dependency
259
+ name: rake
260
+ requirement: !ruby/object:Gem::Requirement
261
+ requirements:
262
+ - - "~>"
263
+ - !ruby/object:Gem::Version
264
+ version: '10.0'
265
+ type: :development
266
+ prerelease: false
267
+ version_requirements: !ruby/object:Gem::Requirement
268
+ requirements:
269
+ - - "~>"
270
+ - !ruby/object:Gem::Version
271
+ version: '10.0'
272
+ - !ruby/object:Gem::Dependency
273
+ name: minitest
274
+ requirement: !ruby/object:Gem::Requirement
275
+ requirements:
276
+ - - "~>"
277
+ - !ruby/object:Gem::Version
278
+ version: '5.0'
279
+ type: :development
280
+ prerelease: false
281
+ version_requirements: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - "~>"
284
+ - !ruby/object:Gem::Version
285
+ version: '5.0'
286
+ description:
287
+ email:
288
+ - vicfreefly@gmail.com
289
+ executables:
290
+ - tanakai
291
+ extensions: []
292
+ extra_rdoc_files: []
293
+ files:
294
+ - ".gitignore"
295
+ - ".travis.yml"
296
+ - CHANGELOG.md
297
+ - Gemfile
298
+ - LICENSE.txt
299
+ - README.md
300
+ - Rakefile
301
+ - bin/console
302
+ - bin/setup
303
+ - exe/tanakai
304
+ - lib/tanakai.rb
305
+ - lib/tanakai/automation/deploy.yml
306
+ - lib/tanakai/automation/setup.yml
307
+ - lib/tanakai/automation/setup/chromium_chromedriver.yml
308
+ - lib/tanakai/automation/setup/firefox_geckodriver.yml
309
+ - lib/tanakai/automation/setup/phantomjs.yml
310
+ - lib/tanakai/automation/setup/ruby_environment.yml
311
+ - lib/tanakai/base.rb
312
+ - lib/tanakai/base/saver.rb
313
+ - lib/tanakai/base/storage.rb
314
+ - lib/tanakai/base_helper.rb
315
+ - lib/tanakai/browser_builder.rb
316
+ - lib/tanakai/browser_builder/apparition_builder.rb
317
+ - lib/tanakai/browser_builder/cuprite_builder.rb
318
+ - lib/tanakai/browser_builder/mechanize_builder.rb
319
+ - lib/tanakai/browser_builder/poltergeist_phantomjs_builder.rb
320
+ - lib/tanakai/browser_builder/selenium_chrome_builder.rb
321
+ - lib/tanakai/browser_builder/selenium_firefox_builder.rb
322
+ - lib/tanakai/capybara_configuration.rb
323
+ - lib/tanakai/capybara_ext/apparition/driver.rb
324
+ - lib/tanakai/capybara_ext/cuprite/driver.rb
325
+ - lib/tanakai/capybara_ext/driver/base.rb
326
+ - lib/tanakai/capybara_ext/mechanize/driver.rb
327
+ - lib/tanakai/capybara_ext/poltergeist/driver.rb
328
+ - lib/tanakai/capybara_ext/selenium/driver.rb
329
+ - lib/tanakai/capybara_ext/session.rb
330
+ - lib/tanakai/capybara_ext/session/config.rb
331
+ - lib/tanakai/cli.rb
332
+ - lib/tanakai/cli/ansible_command_builder.rb
333
+ - lib/tanakai/cli/generator.rb
334
+ - lib/tanakai/core_ext/array.rb
335
+ - lib/tanakai/core_ext/hash.rb
336
+ - lib/tanakai/core_ext/numeric.rb
337
+ - lib/tanakai/core_ext/string.rb
338
+ - lib/tanakai/pipeline.rb
339
+ - lib/tanakai/runner.rb
340
+ - lib/tanakai/template/.gitignore
341
+ - lib/tanakai/template/Gemfile
342
+ - lib/tanakai/template/README.md
343
+ - lib/tanakai/template/config/application.rb
344
+ - lib/tanakai/template/config/automation.yml
345
+ - lib/tanakai/template/config/boot.rb
346
+ - lib/tanakai/template/config/initializers/.keep
347
+ - lib/tanakai/template/config/schedule.rb
348
+ - lib/tanakai/template/db/.keep
349
+ - lib/tanakai/template/helpers/application_helper.rb
350
+ - lib/tanakai/template/lib/.keep
351
+ - lib/tanakai/template/log/.keep
352
+ - lib/tanakai/template/pipelines/saver.rb
353
+ - lib/tanakai/template/pipelines/validator.rb
354
+ - lib/tanakai/template/spiders/application_spider.rb
355
+ - lib/tanakai/template/tmp/.keep
356
+ - lib/tanakai/version.rb
357
+ - tanakai.gemspec
358
+ homepage: https://github.com/glaucocustodio/tanakai
359
+ licenses:
360
+ - MIT
361
+ metadata: {}
362
+ post_install_message:
363
+ rdoc_options: []
364
+ require_paths:
365
+ - lib
366
+ required_ruby_version: !ruby/object:Gem::Requirement
367
+ requirements:
368
+ - - ">="
369
+ - !ruby/object:Gem::Version
370
+ version: 2.5.0
371
+ required_rubygems_version: !ruby/object:Gem::Requirement
372
+ requirements:
373
+ - - ">="
374
+ - !ruby/object:Gem::Version
375
+ version: '0'
376
+ requirements: []
377
+ rubygems_version: 3.1.2
378
+ signing_key:
379
+ specification_version: 4
380
+ summary: Maintained fork of Kimurai, a modern web scraping framework written in Ruby
381
+ and based on Capybara/Nokogiri
382
+ test_files: []