tanakai 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +118 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +2038 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/tanakai +6 -0
- data/lib/tanakai/automation/deploy.yml +54 -0
- data/lib/tanakai/automation/setup/chromium_chromedriver.yml +26 -0
- data/lib/tanakai/automation/setup/firefox_geckodriver.yml +20 -0
- data/lib/tanakai/automation/setup/phantomjs.yml +33 -0
- data/lib/tanakai/automation/setup/ruby_environment.yml +124 -0
- data/lib/tanakai/automation/setup.yml +45 -0
- data/lib/tanakai/base/saver.rb +106 -0
- data/lib/tanakai/base/storage.rb +54 -0
- data/lib/tanakai/base.rb +326 -0
- data/lib/tanakai/base_helper.rb +22 -0
- data/lib/tanakai/browser_builder/apparition_builder.rb +58 -0
- data/lib/tanakai/browser_builder/cuprite_builder.rb +54 -0
- data/lib/tanakai/browser_builder/mechanize_builder.rb +154 -0
- data/lib/tanakai/browser_builder/poltergeist_phantomjs_builder.rb +175 -0
- data/lib/tanakai/browser_builder/selenium_chrome_builder.rb +199 -0
- data/lib/tanakai/browser_builder/selenium_firefox_builder.rb +204 -0
- data/lib/tanakai/browser_builder.rb +20 -0
- data/lib/tanakai/capybara_configuration.rb +10 -0
- data/lib/tanakai/capybara_ext/apparition/driver.rb +13 -0
- data/lib/tanakai/capybara_ext/cuprite/driver.rb +13 -0
- data/lib/tanakai/capybara_ext/driver/base.rb +62 -0
- data/lib/tanakai/capybara_ext/mechanize/driver.rb +71 -0
- data/lib/tanakai/capybara_ext/poltergeist/driver.rb +13 -0
- data/lib/tanakai/capybara_ext/selenium/driver.rb +34 -0
- data/lib/tanakai/capybara_ext/session/config.rb +22 -0
- data/lib/tanakai/capybara_ext/session.rb +249 -0
- data/lib/tanakai/cli/ansible_command_builder.rb +71 -0
- data/lib/tanakai/cli/generator.rb +57 -0
- data/lib/tanakai/cli.rb +183 -0
- data/lib/tanakai/core_ext/array.rb +14 -0
- data/lib/tanakai/core_ext/hash.rb +5 -0
- data/lib/tanakai/core_ext/numeric.rb +19 -0
- data/lib/tanakai/core_ext/string.rb +7 -0
- data/lib/tanakai/pipeline.rb +33 -0
- data/lib/tanakai/runner.rb +60 -0
- data/lib/tanakai/template/.gitignore +18 -0
- data/lib/tanakai/template/Gemfile +28 -0
- data/lib/tanakai/template/README.md +3 -0
- data/lib/tanakai/template/config/application.rb +37 -0
- data/lib/tanakai/template/config/automation.yml +13 -0
- data/lib/tanakai/template/config/boot.rb +22 -0
- data/lib/tanakai/template/config/initializers/.keep +0 -0
- data/lib/tanakai/template/config/schedule.rb +57 -0
- data/lib/tanakai/template/db/.keep +0 -0
- data/lib/tanakai/template/helpers/application_helper.rb +3 -0
- data/lib/tanakai/template/lib/.keep +0 -0
- data/lib/tanakai/template/log/.keep +0 -0
- data/lib/tanakai/template/pipelines/saver.rb +11 -0
- data/lib/tanakai/template/pipelines/validator.rb +24 -0
- data/lib/tanakai/template/spiders/application_spider.rb +143 -0
- data/lib/tanakai/template/tmp/.keep +0 -0
- data/lib/tanakai/version.rb +3 -0
- data/lib/tanakai.rb +54 -0
- data/tanakai.gemspec +50 -0
- metadata +382 -0
@@ -0,0 +1,143 @@
|
|
1
|
+
# ApplicationSpider is a default base spider class. You can set here
|
2
|
+
# default settings for all spiders inherited from ApplicationSpider.
|
3
|
+
# To generate a new spider, run: `$ tanakai generate spider spider_name`
|
4
|
+
|
5
|
+
class ApplicationSpider < Tanakai::Base
|
6
|
+
include ApplicationHelper
|
7
|
+
|
8
|
+
# Default engine for spiders (available engines: :mechanize, :poltergeist_phantomjs,
|
9
|
+
# :selenium_firefox, :selenium_chrome)
|
10
|
+
@engine = :poltergeist_phantomjs
|
11
|
+
|
12
|
+
# Pipelines list, by order.
|
13
|
+
# To process item through pipelines pass item to the `send_item` method
|
14
|
+
@pipelines = [:validator, :saver]
|
15
|
+
|
16
|
+
# Default config. Set here options which are default for all spiders inherited
|
17
|
+
# from ApplicationSpider. Child's class config will be deep merged with this one
|
18
|
+
@config = {
|
19
|
+
# Custom headers, format: hash. Example: { "some header" => "some value", "another header" => "another value" }
|
20
|
+
# Works only for :mechanize and :poltergeist_phantomjs engines (Selenium doesn't allow to set/get headers)
|
21
|
+
# headers: {},
|
22
|
+
|
23
|
+
# Custom User Agent, format: string or lambda.
|
24
|
+
# Use lambda if you want to rotate user agents before each run:
|
25
|
+
# user_agent: -> { ARRAY_OF_USER_AGENTS.sample }
|
26
|
+
# Works for all engines
|
27
|
+
# user_agent: "Mozilla/5.0 Firefox/61.0",
|
28
|
+
|
29
|
+
# Custom cookies, format: array of hashes.
|
30
|
+
# Format for a single cookie: { name: "cookie name", value: "cookie value", domain: ".example.com" }
|
31
|
+
# Works for all engines
|
32
|
+
# cookies: [],
|
33
|
+
|
34
|
+
# Proxy, format: string or lambda. Format of a proxy string: "ip:port:protocol:user:password"
|
35
|
+
# `protocol` can be http or socks5. User and password are optional.
|
36
|
+
# Use lambda if you want to rotate proxies before each run:
|
37
|
+
# proxy: -> { ARRAY_OF_PROXIES.sample }
|
38
|
+
# Works for all engines, but keep in mind that Selenium drivers doesn't support proxies
|
39
|
+
# with authorization. Also, Mechanize doesn't support socks5 proxy format (only http)
|
40
|
+
# proxy: "3.4.5.6:3128:http:user:pass",
|
41
|
+
|
42
|
+
# If enabled, browser will ignore any https errors. It's handy while using a proxy
|
43
|
+
# with self-signed SSL cert (for example Crawlera or Mitmproxy)
|
44
|
+
# Also, it will allow to visit webpages with expires SSL certificate.
|
45
|
+
# Works for all engines
|
46
|
+
ignore_ssl_errors: true,
|
47
|
+
|
48
|
+
# Custom window size, works for all engines
|
49
|
+
# window_size: [1366, 768],
|
50
|
+
|
51
|
+
# Skip images downloading if true, works for all engines
|
52
|
+
disable_images: true,
|
53
|
+
|
54
|
+
# Selenium engines only: headless mode, `:native` or `:virtual_display` (default is :native)
|
55
|
+
# Although native mode has a better performance, virtual display mode
|
56
|
+
# sometimes can be useful. For example, some websites can detect (and block)
|
57
|
+
# headless chrome, so you can use virtual_display mode instead
|
58
|
+
# headless_mode: :native,
|
59
|
+
|
60
|
+
# This option tells the browser not to use a proxy for the provided list of domains or IP addresses.
|
61
|
+
# Format: array of strings. Works only for :selenium_firefox and selenium_chrome
|
62
|
+
# proxy_bypass_list: [],
|
63
|
+
|
64
|
+
# Option to provide custom SSL certificate. Works only for :poltergeist_phantomjs and :mechanize
|
65
|
+
# ssl_cert_path: "path/to/ssl_cert",
|
66
|
+
|
67
|
+
# Inject some JavaScript code to the browser.
|
68
|
+
# Format: array of strings, where each string is a path to JS file.
|
69
|
+
# Works only for poltergeist_phantomjs engine (Selenium doesn't support JS code injection)
|
70
|
+
# extensions: ["lib/code_to_inject.js"],
|
71
|
+
|
72
|
+
# Automatically skip duplicated (already visited) urls when using `request_to` method.
|
73
|
+
# Possible values: `true` or `hash` with options.
|
74
|
+
# In case of `true`, all visited urls will be added to the storage's scope `:requests_urls`
|
75
|
+
# and if url already contains in this scope, request will be skipped.
|
76
|
+
# You can configure this setting by providing additional options as hash:
|
77
|
+
# `skip_duplicate_requests: { scope: :custom_scope, check_only: true }`, where:
|
78
|
+
# `scope:` - use custom scope than `:requests_urls`
|
79
|
+
# `check_only:` - if true, then scope will be only checked for url, url will not
|
80
|
+
# be added to the scope if scope doesn't contains it.
|
81
|
+
# works for all drivers
|
82
|
+
# skip_duplicate_requests: true,
|
83
|
+
|
84
|
+
# Automatically skip provided errors while requesting a page.
|
85
|
+
# If raised error matches one of the errors in the list, then this error will be caught,
|
86
|
+
# and request will be skipped.
|
87
|
+
# It is a good idea to skip errors like NotFound(404), etc.
|
88
|
+
# Format: array where elements are error classes or/and hashes. You can use hash format
|
89
|
+
# for more flexibility: `{ error: "RuntimeError", message: "404 => Net::HTTPNotFound" }`.
|
90
|
+
# Provided `message:` will be compared with a full error message using `String#include?`. Also
|
91
|
+
# you can use regex instead: `{ error: "RuntimeError", message: /404|403/ }`.
|
92
|
+
# skip_request_errors: [{ error: RuntimeError, message: "404 => Net::HTTPNotFound" }],
|
93
|
+
|
94
|
+
# Automatically retry provided errors with a few attempts while requesting a page.
|
95
|
+
# If raised error matches one of the errors in the list, then this error will be caught
|
96
|
+
# and the request will be processed again within a delay. There are 3 attempts:
|
97
|
+
# first: delay 15 sec, second: delay 30 sec, third: delay 45 sec.
|
98
|
+
# If after 3 attempts there is still an exception, then the exception will be raised.
|
99
|
+
# It is a good idea to try to retry errros like `ReadTimeout`, `HTTPBadGateway`, etc.
|
100
|
+
# Format: same like for `skip_request_errors` option.
|
101
|
+
# retry_request_errors: [Net::ReadTimeout],
|
102
|
+
|
103
|
+
# Handle page encoding while parsing html response using Nokogiri. There are two modes:
|
104
|
+
# Auto (`:auto`) (try to fetch correct encoding from <meta http-equiv="Content-Type"> or <meta charset> tags)
|
105
|
+
# Set required encoding manually, example: `encoding: "GB2312"` (Set required encoding manually)
|
106
|
+
# Default this option is unset.
|
107
|
+
# encoding: nil,
|
108
|
+
|
109
|
+
# Restart browser if one of the options is true:
|
110
|
+
restart_if: {
|
111
|
+
# Restart browser if provided memory limit (in kilobytes) is exceeded (works for all engines)
|
112
|
+
# memory_limit: 350_000,
|
113
|
+
|
114
|
+
# Restart browser if provided requests limit is exceeded (works for all engines)
|
115
|
+
# requests_limit: 100
|
116
|
+
},
|
117
|
+
|
118
|
+
# Perform several actions before each request:
|
119
|
+
before_request: {
|
120
|
+
# Change proxy before each request. The `proxy:` option above should be presented
|
121
|
+
# and has lambda format. Works only for poltergeist and mechanize engines
|
122
|
+
# (Selenium doesn't support proxy rotation).
|
123
|
+
# change_proxy: true,
|
124
|
+
|
125
|
+
# Change user agent before each request. The `user_agent:` option above should be presented
|
126
|
+
# and has lambda format. Works only for poltergeist and mechanize engines
|
127
|
+
# (selenium doesn't support to get/set headers).
|
128
|
+
# change_user_agent: true,
|
129
|
+
|
130
|
+
# Clear all cookies before each request, works for all engines
|
131
|
+
# clear_cookies: true,
|
132
|
+
|
133
|
+
# If you want to clear all cookies + set custom cookies (`cookies:` option above should be presented)
|
134
|
+
# use this option instead (works for all engines)
|
135
|
+
# clear_and_set_cookies: true,
|
136
|
+
|
137
|
+
# Global option to set delay between requests.
|
138
|
+
# Delay can be `Integer`, `Float` or `Range` (`2..5`). In case of a range,
|
139
|
+
# delay number will be chosen randomly for each request: `rand (2..5) # => 3`
|
140
|
+
# delay: 1..3
|
141
|
+
}
|
142
|
+
}
|
143
|
+
end
|
File without changes
|
data/lib/tanakai.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
require 'logger'
|
3
|
+
require 'json'
|
4
|
+
require 'active_support'
|
5
|
+
require 'active_support/core_ext'
|
6
|
+
require 'rbcat'
|
7
|
+
|
8
|
+
require_relative 'tanakai/version'
|
9
|
+
|
10
|
+
require_relative 'tanakai/core_ext/numeric'
|
11
|
+
require_relative 'tanakai/core_ext/string'
|
12
|
+
require_relative 'tanakai/core_ext/array'
|
13
|
+
require_relative 'tanakai/core_ext/hash'
|
14
|
+
|
15
|
+
require_relative 'tanakai/browser_builder'
|
16
|
+
require_relative 'tanakai/base_helper'
|
17
|
+
require_relative 'tanakai/pipeline'
|
18
|
+
require_relative 'tanakai/base'
|
19
|
+
|
20
|
+
module Tanakai
|
21
|
+
class << self
|
22
|
+
def configuration
|
23
|
+
@configuration ||= OpenStruct.new
|
24
|
+
end
|
25
|
+
|
26
|
+
def configure
|
27
|
+
yield(configuration)
|
28
|
+
end
|
29
|
+
|
30
|
+
def env
|
31
|
+
ENV.fetch("TANAKAI_ENV") { "development" }
|
32
|
+
end
|
33
|
+
|
34
|
+
def time_zone
|
35
|
+
ENV["TZ"]
|
36
|
+
end
|
37
|
+
|
38
|
+
def time_zone=(value)
|
39
|
+
ENV.store("TZ", value)
|
40
|
+
end
|
41
|
+
|
42
|
+
def list
|
43
|
+
Base.descendants.map do |klass|
|
44
|
+
next unless klass.name
|
45
|
+
[klass.name, klass]
|
46
|
+
end.compact.to_h
|
47
|
+
end
|
48
|
+
|
49
|
+
def find_by_name(name)
|
50
|
+
return unless name
|
51
|
+
Base.descendants.find { |klass| klass.name == name }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/tanakai.gemspec
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "tanakai/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "tanakai"
|
8
|
+
spec.version = Tanakai::VERSION
|
9
|
+
spec.authors = ["Victor Afanasev", "Glauco Custódio"]
|
10
|
+
spec.email = ["vicfreefly@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "Maintained fork of Kimurai, a modern web scraping framework written in Ruby and based on Capybara/Nokogiri"
|
13
|
+
spec.homepage = "https://github.com/glaucocustodio/tanakai"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
# Specify which files should be added to the gem when it is released.
|
17
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
18
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
19
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = "tanakai"
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
spec.required_ruby_version = ">= 2.5.0"
|
25
|
+
|
26
|
+
spec.add_dependency "thor"
|
27
|
+
spec.add_dependency "cliver"
|
28
|
+
spec.add_dependency "activesupport"
|
29
|
+
spec.add_dependency "murmurhash3"
|
30
|
+
spec.add_dependency "nokogiri"
|
31
|
+
|
32
|
+
spec.add_dependency "capybara", ">= 2.15", "< 4.0"
|
33
|
+
spec.add_dependency "capybara-mechanize"
|
34
|
+
spec.add_dependency "poltergeist"
|
35
|
+
spec.add_dependency "selenium-webdriver"
|
36
|
+
spec.add_dependency "apparition"
|
37
|
+
spec.add_dependency "cuprite"
|
38
|
+
|
39
|
+
spec.add_dependency "headless"
|
40
|
+
spec.add_dependency "pmap"
|
41
|
+
|
42
|
+
spec.add_dependency "whenever"
|
43
|
+
|
44
|
+
spec.add_dependency "rbcat", "~> 0.2"
|
45
|
+
spec.add_dependency "pry"
|
46
|
+
|
47
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
48
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
49
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
50
|
+
end
|
metadata
ADDED
@@ -0,0 +1,382 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tanakai
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.5.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Victor Afanasev
|
8
|
+
- Glauco Custódio
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2022-08-13 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: thor
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: cliver
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: activesupport
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: murmurhash3
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: nokogiri
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: capybara
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '2.15'
|
91
|
+
- - "<"
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '4.0'
|
94
|
+
type: :runtime
|
95
|
+
prerelease: false
|
96
|
+
version_requirements: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - ">="
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '2.15'
|
101
|
+
- - "<"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '4.0'
|
104
|
+
- !ruby/object:Gem::Dependency
|
105
|
+
name: capybara-mechanize
|
106
|
+
requirement: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
type: :runtime
|
112
|
+
prerelease: false
|
113
|
+
version_requirements: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
- !ruby/object:Gem::Dependency
|
119
|
+
name: poltergeist
|
120
|
+
requirement: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
type: :runtime
|
126
|
+
prerelease: false
|
127
|
+
version_requirements: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
- !ruby/object:Gem::Dependency
|
133
|
+
name: selenium-webdriver
|
134
|
+
requirement: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
type: :runtime
|
140
|
+
prerelease: false
|
141
|
+
version_requirements: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
- !ruby/object:Gem::Dependency
|
147
|
+
name: apparition
|
148
|
+
requirement: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
type: :runtime
|
154
|
+
prerelease: false
|
155
|
+
version_requirements: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
- !ruby/object:Gem::Dependency
|
161
|
+
name: cuprite
|
162
|
+
requirement: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
type: :runtime
|
168
|
+
prerelease: false
|
169
|
+
version_requirements: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
- !ruby/object:Gem::Dependency
|
175
|
+
name: headless
|
176
|
+
requirement: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
181
|
+
type: :runtime
|
182
|
+
prerelease: false
|
183
|
+
version_requirements: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
- !ruby/object:Gem::Dependency
|
189
|
+
name: pmap
|
190
|
+
requirement: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
195
|
+
type: :runtime
|
196
|
+
prerelease: false
|
197
|
+
version_requirements: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - ">="
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '0'
|
202
|
+
- !ruby/object:Gem::Dependency
|
203
|
+
name: whenever
|
204
|
+
requirement: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - ">="
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '0'
|
209
|
+
type: :runtime
|
210
|
+
prerelease: false
|
211
|
+
version_requirements: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - ">="
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0'
|
216
|
+
- !ruby/object:Gem::Dependency
|
217
|
+
name: rbcat
|
218
|
+
requirement: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - "~>"
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0.2'
|
223
|
+
type: :runtime
|
224
|
+
prerelease: false
|
225
|
+
version_requirements: !ruby/object:Gem::Requirement
|
226
|
+
requirements:
|
227
|
+
- - "~>"
|
228
|
+
- !ruby/object:Gem::Version
|
229
|
+
version: '0.2'
|
230
|
+
- !ruby/object:Gem::Dependency
|
231
|
+
name: pry
|
232
|
+
requirement: !ruby/object:Gem::Requirement
|
233
|
+
requirements:
|
234
|
+
- - ">="
|
235
|
+
- !ruby/object:Gem::Version
|
236
|
+
version: '0'
|
237
|
+
type: :runtime
|
238
|
+
prerelease: false
|
239
|
+
version_requirements: !ruby/object:Gem::Requirement
|
240
|
+
requirements:
|
241
|
+
- - ">="
|
242
|
+
- !ruby/object:Gem::Version
|
243
|
+
version: '0'
|
244
|
+
- !ruby/object:Gem::Dependency
|
245
|
+
name: bundler
|
246
|
+
requirement: !ruby/object:Gem::Requirement
|
247
|
+
requirements:
|
248
|
+
- - "~>"
|
249
|
+
- !ruby/object:Gem::Version
|
250
|
+
version: '1.16'
|
251
|
+
type: :development
|
252
|
+
prerelease: false
|
253
|
+
version_requirements: !ruby/object:Gem::Requirement
|
254
|
+
requirements:
|
255
|
+
- - "~>"
|
256
|
+
- !ruby/object:Gem::Version
|
257
|
+
version: '1.16'
|
258
|
+
- !ruby/object:Gem::Dependency
|
259
|
+
name: rake
|
260
|
+
requirement: !ruby/object:Gem::Requirement
|
261
|
+
requirements:
|
262
|
+
- - "~>"
|
263
|
+
- !ruby/object:Gem::Version
|
264
|
+
version: '10.0'
|
265
|
+
type: :development
|
266
|
+
prerelease: false
|
267
|
+
version_requirements: !ruby/object:Gem::Requirement
|
268
|
+
requirements:
|
269
|
+
- - "~>"
|
270
|
+
- !ruby/object:Gem::Version
|
271
|
+
version: '10.0'
|
272
|
+
- !ruby/object:Gem::Dependency
|
273
|
+
name: minitest
|
274
|
+
requirement: !ruby/object:Gem::Requirement
|
275
|
+
requirements:
|
276
|
+
- - "~>"
|
277
|
+
- !ruby/object:Gem::Version
|
278
|
+
version: '5.0'
|
279
|
+
type: :development
|
280
|
+
prerelease: false
|
281
|
+
version_requirements: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - "~>"
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: '5.0'
|
286
|
+
description:
|
287
|
+
email:
|
288
|
+
- vicfreefly@gmail.com
|
289
|
+
executables:
|
290
|
+
- tanakai
|
291
|
+
extensions: []
|
292
|
+
extra_rdoc_files: []
|
293
|
+
files:
|
294
|
+
- ".gitignore"
|
295
|
+
- ".travis.yml"
|
296
|
+
- CHANGELOG.md
|
297
|
+
- Gemfile
|
298
|
+
- LICENSE.txt
|
299
|
+
- README.md
|
300
|
+
- Rakefile
|
301
|
+
- bin/console
|
302
|
+
- bin/setup
|
303
|
+
- exe/tanakai
|
304
|
+
- lib/tanakai.rb
|
305
|
+
- lib/tanakai/automation/deploy.yml
|
306
|
+
- lib/tanakai/automation/setup.yml
|
307
|
+
- lib/tanakai/automation/setup/chromium_chromedriver.yml
|
308
|
+
- lib/tanakai/automation/setup/firefox_geckodriver.yml
|
309
|
+
- lib/tanakai/automation/setup/phantomjs.yml
|
310
|
+
- lib/tanakai/automation/setup/ruby_environment.yml
|
311
|
+
- lib/tanakai/base.rb
|
312
|
+
- lib/tanakai/base/saver.rb
|
313
|
+
- lib/tanakai/base/storage.rb
|
314
|
+
- lib/tanakai/base_helper.rb
|
315
|
+
- lib/tanakai/browser_builder.rb
|
316
|
+
- lib/tanakai/browser_builder/apparition_builder.rb
|
317
|
+
- lib/tanakai/browser_builder/cuprite_builder.rb
|
318
|
+
- lib/tanakai/browser_builder/mechanize_builder.rb
|
319
|
+
- lib/tanakai/browser_builder/poltergeist_phantomjs_builder.rb
|
320
|
+
- lib/tanakai/browser_builder/selenium_chrome_builder.rb
|
321
|
+
- lib/tanakai/browser_builder/selenium_firefox_builder.rb
|
322
|
+
- lib/tanakai/capybara_configuration.rb
|
323
|
+
- lib/tanakai/capybara_ext/apparition/driver.rb
|
324
|
+
- lib/tanakai/capybara_ext/cuprite/driver.rb
|
325
|
+
- lib/tanakai/capybara_ext/driver/base.rb
|
326
|
+
- lib/tanakai/capybara_ext/mechanize/driver.rb
|
327
|
+
- lib/tanakai/capybara_ext/poltergeist/driver.rb
|
328
|
+
- lib/tanakai/capybara_ext/selenium/driver.rb
|
329
|
+
- lib/tanakai/capybara_ext/session.rb
|
330
|
+
- lib/tanakai/capybara_ext/session/config.rb
|
331
|
+
- lib/tanakai/cli.rb
|
332
|
+
- lib/tanakai/cli/ansible_command_builder.rb
|
333
|
+
- lib/tanakai/cli/generator.rb
|
334
|
+
- lib/tanakai/core_ext/array.rb
|
335
|
+
- lib/tanakai/core_ext/hash.rb
|
336
|
+
- lib/tanakai/core_ext/numeric.rb
|
337
|
+
- lib/tanakai/core_ext/string.rb
|
338
|
+
- lib/tanakai/pipeline.rb
|
339
|
+
- lib/tanakai/runner.rb
|
340
|
+
- lib/tanakai/template/.gitignore
|
341
|
+
- lib/tanakai/template/Gemfile
|
342
|
+
- lib/tanakai/template/README.md
|
343
|
+
- lib/tanakai/template/config/application.rb
|
344
|
+
- lib/tanakai/template/config/automation.yml
|
345
|
+
- lib/tanakai/template/config/boot.rb
|
346
|
+
- lib/tanakai/template/config/initializers/.keep
|
347
|
+
- lib/tanakai/template/config/schedule.rb
|
348
|
+
- lib/tanakai/template/db/.keep
|
349
|
+
- lib/tanakai/template/helpers/application_helper.rb
|
350
|
+
- lib/tanakai/template/lib/.keep
|
351
|
+
- lib/tanakai/template/log/.keep
|
352
|
+
- lib/tanakai/template/pipelines/saver.rb
|
353
|
+
- lib/tanakai/template/pipelines/validator.rb
|
354
|
+
- lib/tanakai/template/spiders/application_spider.rb
|
355
|
+
- lib/tanakai/template/tmp/.keep
|
356
|
+
- lib/tanakai/version.rb
|
357
|
+
- tanakai.gemspec
|
358
|
+
homepage: https://github.com/glaucocustodio/tanakai
|
359
|
+
licenses:
|
360
|
+
- MIT
|
361
|
+
metadata: {}
|
362
|
+
post_install_message:
|
363
|
+
rdoc_options: []
|
364
|
+
require_paths:
|
365
|
+
- lib
|
366
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
367
|
+
requirements:
|
368
|
+
- - ">="
|
369
|
+
- !ruby/object:Gem::Version
|
370
|
+
version: 2.5.0
|
371
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
372
|
+
requirements:
|
373
|
+
- - ">="
|
374
|
+
- !ruby/object:Gem::Version
|
375
|
+
version: '0'
|
376
|
+
requirements: []
|
377
|
+
rubygems_version: 3.1.2
|
378
|
+
signing_key:
|
379
|
+
specification_version: 4
|
380
|
+
summary: Maintained fork of Kimurai, a modern web scraping framework written in Ruby
|
381
|
+
and based on Capybara/Nokogiri
|
382
|
+
test_files: []
|