tanakai 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +118 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +2038 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/tanakai +6 -0
- data/lib/tanakai/automation/deploy.yml +54 -0
- data/lib/tanakai/automation/setup/chromium_chromedriver.yml +26 -0
- data/lib/tanakai/automation/setup/firefox_geckodriver.yml +20 -0
- data/lib/tanakai/automation/setup/phantomjs.yml +33 -0
- data/lib/tanakai/automation/setup/ruby_environment.yml +124 -0
- data/lib/tanakai/automation/setup.yml +45 -0
- data/lib/tanakai/base/saver.rb +106 -0
- data/lib/tanakai/base/storage.rb +54 -0
- data/lib/tanakai/base.rb +326 -0
- data/lib/tanakai/base_helper.rb +22 -0
- data/lib/tanakai/browser_builder/apparition_builder.rb +58 -0
- data/lib/tanakai/browser_builder/cuprite_builder.rb +54 -0
- data/lib/tanakai/browser_builder/mechanize_builder.rb +154 -0
- data/lib/tanakai/browser_builder/poltergeist_phantomjs_builder.rb +175 -0
- data/lib/tanakai/browser_builder/selenium_chrome_builder.rb +199 -0
- data/lib/tanakai/browser_builder/selenium_firefox_builder.rb +204 -0
- data/lib/tanakai/browser_builder.rb +20 -0
- data/lib/tanakai/capybara_configuration.rb +10 -0
- data/lib/tanakai/capybara_ext/apparition/driver.rb +13 -0
- data/lib/tanakai/capybara_ext/cuprite/driver.rb +13 -0
- data/lib/tanakai/capybara_ext/driver/base.rb +62 -0
- data/lib/tanakai/capybara_ext/mechanize/driver.rb +71 -0
- data/lib/tanakai/capybara_ext/poltergeist/driver.rb +13 -0
- data/lib/tanakai/capybara_ext/selenium/driver.rb +34 -0
- data/lib/tanakai/capybara_ext/session/config.rb +22 -0
- data/lib/tanakai/capybara_ext/session.rb +249 -0
- data/lib/tanakai/cli/ansible_command_builder.rb +71 -0
- data/lib/tanakai/cli/generator.rb +57 -0
- data/lib/tanakai/cli.rb +183 -0
- data/lib/tanakai/core_ext/array.rb +14 -0
- data/lib/tanakai/core_ext/hash.rb +5 -0
- data/lib/tanakai/core_ext/numeric.rb +19 -0
- data/lib/tanakai/core_ext/string.rb +7 -0
- data/lib/tanakai/pipeline.rb +33 -0
- data/lib/tanakai/runner.rb +60 -0
- data/lib/tanakai/template/.gitignore +18 -0
- data/lib/tanakai/template/Gemfile +28 -0
- data/lib/tanakai/template/README.md +3 -0
- data/lib/tanakai/template/config/application.rb +37 -0
- data/lib/tanakai/template/config/automation.yml +13 -0
- data/lib/tanakai/template/config/boot.rb +22 -0
- data/lib/tanakai/template/config/initializers/.keep +0 -0
- data/lib/tanakai/template/config/schedule.rb +57 -0
- data/lib/tanakai/template/db/.keep +0 -0
- data/lib/tanakai/template/helpers/application_helper.rb +3 -0
- data/lib/tanakai/template/lib/.keep +0 -0
- data/lib/tanakai/template/log/.keep +0 -0
- data/lib/tanakai/template/pipelines/saver.rb +11 -0
- data/lib/tanakai/template/pipelines/validator.rb +24 -0
- data/lib/tanakai/template/spiders/application_spider.rb +143 -0
- data/lib/tanakai/template/tmp/.keep +0 -0
- data/lib/tanakai/version.rb +3 -0
- data/lib/tanakai.rb +54 -0
- data/tanakai.gemspec +50 -0
- metadata +382 -0
@@ -0,0 +1,143 @@
|
|
1
|
+
# ApplicationSpider is a default base spider class. You can set here
|
2
|
+
# default settings for all spiders inherited from ApplicationSpider.
|
3
|
+
# To generate a new spider, run: `$ tanakai generate spider spider_name`
|
4
|
+
|
5
|
+
class ApplicationSpider < Tanakai::Base
|
6
|
+
include ApplicationHelper
|
7
|
+
|
8
|
+
# Default engine for spiders (available engines: :mechanize, :poltergeist_phantomjs,
|
9
|
+
# :selenium_firefox, :selenium_chrome)
|
10
|
+
@engine = :poltergeist_phantomjs
|
11
|
+
|
12
|
+
# Pipelines list, by order.
|
13
|
+
# To process item through pipelines pass item to the `send_item` method
|
14
|
+
@pipelines = [:validator, :saver]
|
15
|
+
|
16
|
+
# Default config. Set here options which are default for all spiders inherited
|
17
|
+
# from ApplicationSpider. Child's class config will be deep merged with this one
|
18
|
+
@config = {
|
19
|
+
# Custom headers, format: hash. Example: { "some header" => "some value", "another header" => "another value" }
|
20
|
+
# Works only for :mechanize and :poltergeist_phantomjs engines (Selenium doesn't allow to set/get headers)
|
21
|
+
# headers: {},
|
22
|
+
|
23
|
+
# Custom User Agent, format: string or lambda.
|
24
|
+
# Use lambda if you want to rotate user agents before each run:
|
25
|
+
# user_agent: -> { ARRAY_OF_USER_AGENTS.sample }
|
26
|
+
# Works for all engines
|
27
|
+
# user_agent: "Mozilla/5.0 Firefox/61.0",
|
28
|
+
|
29
|
+
# Custom cookies, format: array of hashes.
|
30
|
+
# Format for a single cookie: { name: "cookie name", value: "cookie value", domain: ".example.com" }
|
31
|
+
# Works for all engines
|
32
|
+
# cookies: [],
|
33
|
+
|
34
|
+
# Proxy, format: string or lambda. Format of a proxy string: "ip:port:protocol:user:password"
|
35
|
+
# `protocol` can be http or socks5. User and password are optional.
|
36
|
+
# Use lambda if you want to rotate proxies before each run:
|
37
|
+
# proxy: -> { ARRAY_OF_PROXIES.sample }
|
38
|
+
# Works for all engines, but keep in mind that Selenium drivers doesn't support proxies
|
39
|
+
# with authorization. Also, Mechanize doesn't support socks5 proxy format (only http)
|
40
|
+
# proxy: "3.4.5.6:3128:http:user:pass",
|
41
|
+
|
42
|
+
# If enabled, browser will ignore any https errors. It's handy while using a proxy
|
43
|
+
# with self-signed SSL cert (for example Crawlera or Mitmproxy)
|
44
|
+
# Also, it will allow to visit webpages with expires SSL certificate.
|
45
|
+
# Works for all engines
|
46
|
+
ignore_ssl_errors: true,
|
47
|
+
|
48
|
+
# Custom window size, works for all engines
|
49
|
+
# window_size: [1366, 768],
|
50
|
+
|
51
|
+
# Skip images downloading if true, works for all engines
|
52
|
+
disable_images: true,
|
53
|
+
|
54
|
+
# Selenium engines only: headless mode, `:native` or `:virtual_display` (default is :native)
|
55
|
+
# Although native mode has a better performance, virtual display mode
|
56
|
+
# sometimes can be useful. For example, some websites can detect (and block)
|
57
|
+
# headless chrome, so you can use virtual_display mode instead
|
58
|
+
# headless_mode: :native,
|
59
|
+
|
60
|
+
# This option tells the browser not to use a proxy for the provided list of domains or IP addresses.
|
61
|
+
# Format: array of strings. Works only for :selenium_firefox and selenium_chrome
|
62
|
+
# proxy_bypass_list: [],
|
63
|
+
|
64
|
+
# Option to provide custom SSL certificate. Works only for :poltergeist_phantomjs and :mechanize
|
65
|
+
# ssl_cert_path: "path/to/ssl_cert",
|
66
|
+
|
67
|
+
# Inject some JavaScript code to the browser.
|
68
|
+
# Format: array of strings, where each string is a path to JS file.
|
69
|
+
# Works only for poltergeist_phantomjs engine (Selenium doesn't support JS code injection)
|
70
|
+
# extensions: ["lib/code_to_inject.js"],
|
71
|
+
|
72
|
+
# Automatically skip duplicated (already visited) urls when using `request_to` method.
|
73
|
+
# Possible values: `true` or `hash` with options.
|
74
|
+
# In case of `true`, all visited urls will be added to the storage's scope `:requests_urls`
|
75
|
+
# and if url already contains in this scope, request will be skipped.
|
76
|
+
# You can configure this setting by providing additional options as hash:
|
77
|
+
# `skip_duplicate_requests: { scope: :custom_scope, check_only: true }`, where:
|
78
|
+
# `scope:` - use custom scope than `:requests_urls`
|
79
|
+
# `check_only:` - if true, then scope will be only checked for url, url will not
|
80
|
+
# be added to the scope if scope doesn't contains it.
|
81
|
+
# works for all drivers
|
82
|
+
# skip_duplicate_requests: true,
|
83
|
+
|
84
|
+
# Automatically skip provided errors while requesting a page.
|
85
|
+
# If raised error matches one of the errors in the list, then this error will be caught,
|
86
|
+
# and request will be skipped.
|
87
|
+
# It is a good idea to skip errors like NotFound(404), etc.
|
88
|
+
# Format: array where elements are error classes or/and hashes. You can use hash format
|
89
|
+
# for more flexibility: `{ error: "RuntimeError", message: "404 => Net::HTTPNotFound" }`.
|
90
|
+
# Provided `message:` will be compared with a full error message using `String#include?`. Also
|
91
|
+
# you can use regex instead: `{ error: "RuntimeError", message: /404|403/ }`.
|
92
|
+
# skip_request_errors: [{ error: RuntimeError, message: "404 => Net::HTTPNotFound" }],
|
93
|
+
|
94
|
+
# Automatically retry provided errors with a few attempts while requesting a page.
|
95
|
+
# If raised error matches one of the errors in the list, then this error will be caught
|
96
|
+
# and the request will be processed again within a delay. There are 3 attempts:
|
97
|
+
# first: delay 15 sec, second: delay 30 sec, third: delay 45 sec.
|
98
|
+
# If after 3 attempts there is still an exception, then the exception will be raised.
|
99
|
+
# It is a good idea to try to retry errros like `ReadTimeout`, `HTTPBadGateway`, etc.
|
100
|
+
# Format: same like for `skip_request_errors` option.
|
101
|
+
# retry_request_errors: [Net::ReadTimeout],
|
102
|
+
|
103
|
+
# Handle page encoding while parsing html response using Nokogiri. There are two modes:
|
104
|
+
# Auto (`:auto`) (try to fetch correct encoding from <meta http-equiv="Content-Type"> or <meta charset> tags)
|
105
|
+
# Set required encoding manually, example: `encoding: "GB2312"` (Set required encoding manually)
|
106
|
+
# Default this option is unset.
|
107
|
+
# encoding: nil,
|
108
|
+
|
109
|
+
# Restart browser if one of the options is true:
|
110
|
+
restart_if: {
|
111
|
+
# Restart browser if provided memory limit (in kilobytes) is exceeded (works for all engines)
|
112
|
+
# memory_limit: 350_000,
|
113
|
+
|
114
|
+
# Restart browser if provided requests limit is exceeded (works for all engines)
|
115
|
+
# requests_limit: 100
|
116
|
+
},
|
117
|
+
|
118
|
+
# Perform several actions before each request:
|
119
|
+
before_request: {
|
120
|
+
# Change proxy before each request. The `proxy:` option above should be presented
|
121
|
+
# and has lambda format. Works only for poltergeist and mechanize engines
|
122
|
+
# (Selenium doesn't support proxy rotation).
|
123
|
+
# change_proxy: true,
|
124
|
+
|
125
|
+
# Change user agent before each request. The `user_agent:` option above should be presented
|
126
|
+
# and has lambda format. Works only for poltergeist and mechanize engines
|
127
|
+
# (selenium doesn't support to get/set headers).
|
128
|
+
# change_user_agent: true,
|
129
|
+
|
130
|
+
# Clear all cookies before each request, works for all engines
|
131
|
+
# clear_cookies: true,
|
132
|
+
|
133
|
+
# If you want to clear all cookies + set custom cookies (`cookies:` option above should be presented)
|
134
|
+
# use this option instead (works for all engines)
|
135
|
+
# clear_and_set_cookies: true,
|
136
|
+
|
137
|
+
# Global option to set delay between requests.
|
138
|
+
# Delay can be `Integer`, `Float` or `Range` (`2..5`). In case of a range,
|
139
|
+
# delay number will be chosen randomly for each request: `rand (2..5) # => 3`
|
140
|
+
# delay: 1..3
|
141
|
+
}
|
142
|
+
}
|
143
|
+
end
|
File without changes
|
data/lib/tanakai.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
require 'logger'
|
3
|
+
require 'json'
|
4
|
+
require 'active_support'
|
5
|
+
require 'active_support/core_ext'
|
6
|
+
require 'rbcat'
|
7
|
+
|
8
|
+
require_relative 'tanakai/version'
|
9
|
+
|
10
|
+
require_relative 'tanakai/core_ext/numeric'
|
11
|
+
require_relative 'tanakai/core_ext/string'
|
12
|
+
require_relative 'tanakai/core_ext/array'
|
13
|
+
require_relative 'tanakai/core_ext/hash'
|
14
|
+
|
15
|
+
require_relative 'tanakai/browser_builder'
|
16
|
+
require_relative 'tanakai/base_helper'
|
17
|
+
require_relative 'tanakai/pipeline'
|
18
|
+
require_relative 'tanakai/base'
|
19
|
+
|
20
|
+
module Tanakai
|
21
|
+
class << self
|
22
|
+
def configuration
|
23
|
+
@configuration ||= OpenStruct.new
|
24
|
+
end
|
25
|
+
|
26
|
+
def configure
|
27
|
+
yield(configuration)
|
28
|
+
end
|
29
|
+
|
30
|
+
def env
|
31
|
+
ENV.fetch("TANAKAI_ENV") { "development" }
|
32
|
+
end
|
33
|
+
|
34
|
+
def time_zone
|
35
|
+
ENV["TZ"]
|
36
|
+
end
|
37
|
+
|
38
|
+
def time_zone=(value)
|
39
|
+
ENV.store("TZ", value)
|
40
|
+
end
|
41
|
+
|
42
|
+
def list
|
43
|
+
Base.descendants.map do |klass|
|
44
|
+
next unless klass.name
|
45
|
+
[klass.name, klass]
|
46
|
+
end.compact.to_h
|
47
|
+
end
|
48
|
+
|
49
|
+
def find_by_name(name)
|
50
|
+
return unless name
|
51
|
+
Base.descendants.find { |klass| klass.name == name }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/tanakai.gemspec
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "tanakai/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "tanakai"
|
8
|
+
spec.version = Tanakai::VERSION
|
9
|
+
spec.authors = ["Victor Afanasev", "Glauco Custódio"]
|
10
|
+
spec.email = ["vicfreefly@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "Maintained fork of Kimurai, a modern web scraping framework written in Ruby and based on Capybara/Nokogiri"
|
13
|
+
spec.homepage = "https://github.com/glaucocustodio/tanakai"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
# Specify which files should be added to the gem when it is released.
|
17
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
18
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
19
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = "tanakai"
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
spec.required_ruby_version = ">= 2.5.0"
|
25
|
+
|
26
|
+
spec.add_dependency "thor"
|
27
|
+
spec.add_dependency "cliver"
|
28
|
+
spec.add_dependency "activesupport"
|
29
|
+
spec.add_dependency "murmurhash3"
|
30
|
+
spec.add_dependency "nokogiri"
|
31
|
+
|
32
|
+
spec.add_dependency "capybara", ">= 2.15", "< 4.0"
|
33
|
+
spec.add_dependency "capybara-mechanize"
|
34
|
+
spec.add_dependency "poltergeist"
|
35
|
+
spec.add_dependency "selenium-webdriver"
|
36
|
+
spec.add_dependency "apparition"
|
37
|
+
spec.add_dependency "cuprite"
|
38
|
+
|
39
|
+
spec.add_dependency "headless"
|
40
|
+
spec.add_dependency "pmap"
|
41
|
+
|
42
|
+
spec.add_dependency "whenever"
|
43
|
+
|
44
|
+
spec.add_dependency "rbcat", "~> 0.2"
|
45
|
+
spec.add_dependency "pry"
|
46
|
+
|
47
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
48
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
49
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
50
|
+
end
|
metadata
ADDED
@@ -0,0 +1,382 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tanakai
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.5.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Victor Afanasev
|
8
|
+
- Glauco Custódio
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2022-08-13 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: thor
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: cliver
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: activesupport
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: murmurhash3
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: nokogiri
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: capybara
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '2.15'
|
91
|
+
- - "<"
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '4.0'
|
94
|
+
type: :runtime
|
95
|
+
prerelease: false
|
96
|
+
version_requirements: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - ">="
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '2.15'
|
101
|
+
- - "<"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '4.0'
|
104
|
+
- !ruby/object:Gem::Dependency
|
105
|
+
name: capybara-mechanize
|
106
|
+
requirement: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
type: :runtime
|
112
|
+
prerelease: false
|
113
|
+
version_requirements: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
- !ruby/object:Gem::Dependency
|
119
|
+
name: poltergeist
|
120
|
+
requirement: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
type: :runtime
|
126
|
+
prerelease: false
|
127
|
+
version_requirements: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
- !ruby/object:Gem::Dependency
|
133
|
+
name: selenium-webdriver
|
134
|
+
requirement: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
type: :runtime
|
140
|
+
prerelease: false
|
141
|
+
version_requirements: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
- !ruby/object:Gem::Dependency
|
147
|
+
name: apparition
|
148
|
+
requirement: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
type: :runtime
|
154
|
+
prerelease: false
|
155
|
+
version_requirements: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
- !ruby/object:Gem::Dependency
|
161
|
+
name: cuprite
|
162
|
+
requirement: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
type: :runtime
|
168
|
+
prerelease: false
|
169
|
+
version_requirements: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
- !ruby/object:Gem::Dependency
|
175
|
+
name: headless
|
176
|
+
requirement: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
181
|
+
type: :runtime
|
182
|
+
prerelease: false
|
183
|
+
version_requirements: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
- !ruby/object:Gem::Dependency
|
189
|
+
name: pmap
|
190
|
+
requirement: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
195
|
+
type: :runtime
|
196
|
+
prerelease: false
|
197
|
+
version_requirements: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - ">="
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '0'
|
202
|
+
- !ruby/object:Gem::Dependency
|
203
|
+
name: whenever
|
204
|
+
requirement: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - ">="
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '0'
|
209
|
+
type: :runtime
|
210
|
+
prerelease: false
|
211
|
+
version_requirements: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - ">="
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0'
|
216
|
+
- !ruby/object:Gem::Dependency
|
217
|
+
name: rbcat
|
218
|
+
requirement: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - "~>"
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0.2'
|
223
|
+
type: :runtime
|
224
|
+
prerelease: false
|
225
|
+
version_requirements: !ruby/object:Gem::Requirement
|
226
|
+
requirements:
|
227
|
+
- - "~>"
|
228
|
+
- !ruby/object:Gem::Version
|
229
|
+
version: '0.2'
|
230
|
+
- !ruby/object:Gem::Dependency
|
231
|
+
name: pry
|
232
|
+
requirement: !ruby/object:Gem::Requirement
|
233
|
+
requirements:
|
234
|
+
- - ">="
|
235
|
+
- !ruby/object:Gem::Version
|
236
|
+
version: '0'
|
237
|
+
type: :runtime
|
238
|
+
prerelease: false
|
239
|
+
version_requirements: !ruby/object:Gem::Requirement
|
240
|
+
requirements:
|
241
|
+
- - ">="
|
242
|
+
- !ruby/object:Gem::Version
|
243
|
+
version: '0'
|
244
|
+
- !ruby/object:Gem::Dependency
|
245
|
+
name: bundler
|
246
|
+
requirement: !ruby/object:Gem::Requirement
|
247
|
+
requirements:
|
248
|
+
- - "~>"
|
249
|
+
- !ruby/object:Gem::Version
|
250
|
+
version: '1.16'
|
251
|
+
type: :development
|
252
|
+
prerelease: false
|
253
|
+
version_requirements: !ruby/object:Gem::Requirement
|
254
|
+
requirements:
|
255
|
+
- - "~>"
|
256
|
+
- !ruby/object:Gem::Version
|
257
|
+
version: '1.16'
|
258
|
+
- !ruby/object:Gem::Dependency
|
259
|
+
name: rake
|
260
|
+
requirement: !ruby/object:Gem::Requirement
|
261
|
+
requirements:
|
262
|
+
- - "~>"
|
263
|
+
- !ruby/object:Gem::Version
|
264
|
+
version: '10.0'
|
265
|
+
type: :development
|
266
|
+
prerelease: false
|
267
|
+
version_requirements: !ruby/object:Gem::Requirement
|
268
|
+
requirements:
|
269
|
+
- - "~>"
|
270
|
+
- !ruby/object:Gem::Version
|
271
|
+
version: '10.0'
|
272
|
+
- !ruby/object:Gem::Dependency
|
273
|
+
name: minitest
|
274
|
+
requirement: !ruby/object:Gem::Requirement
|
275
|
+
requirements:
|
276
|
+
- - "~>"
|
277
|
+
- !ruby/object:Gem::Version
|
278
|
+
version: '5.0'
|
279
|
+
type: :development
|
280
|
+
prerelease: false
|
281
|
+
version_requirements: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - "~>"
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: '5.0'
|
286
|
+
description:
|
287
|
+
email:
|
288
|
+
- vicfreefly@gmail.com
|
289
|
+
executables:
|
290
|
+
- tanakai
|
291
|
+
extensions: []
|
292
|
+
extra_rdoc_files: []
|
293
|
+
files:
|
294
|
+
- ".gitignore"
|
295
|
+
- ".travis.yml"
|
296
|
+
- CHANGELOG.md
|
297
|
+
- Gemfile
|
298
|
+
- LICENSE.txt
|
299
|
+
- README.md
|
300
|
+
- Rakefile
|
301
|
+
- bin/console
|
302
|
+
- bin/setup
|
303
|
+
- exe/tanakai
|
304
|
+
- lib/tanakai.rb
|
305
|
+
- lib/tanakai/automation/deploy.yml
|
306
|
+
- lib/tanakai/automation/setup.yml
|
307
|
+
- lib/tanakai/automation/setup/chromium_chromedriver.yml
|
308
|
+
- lib/tanakai/automation/setup/firefox_geckodriver.yml
|
309
|
+
- lib/tanakai/automation/setup/phantomjs.yml
|
310
|
+
- lib/tanakai/automation/setup/ruby_environment.yml
|
311
|
+
- lib/tanakai/base.rb
|
312
|
+
- lib/tanakai/base/saver.rb
|
313
|
+
- lib/tanakai/base/storage.rb
|
314
|
+
- lib/tanakai/base_helper.rb
|
315
|
+
- lib/tanakai/browser_builder.rb
|
316
|
+
- lib/tanakai/browser_builder/apparition_builder.rb
|
317
|
+
- lib/tanakai/browser_builder/cuprite_builder.rb
|
318
|
+
- lib/tanakai/browser_builder/mechanize_builder.rb
|
319
|
+
- lib/tanakai/browser_builder/poltergeist_phantomjs_builder.rb
|
320
|
+
- lib/tanakai/browser_builder/selenium_chrome_builder.rb
|
321
|
+
- lib/tanakai/browser_builder/selenium_firefox_builder.rb
|
322
|
+
- lib/tanakai/capybara_configuration.rb
|
323
|
+
- lib/tanakai/capybara_ext/apparition/driver.rb
|
324
|
+
- lib/tanakai/capybara_ext/cuprite/driver.rb
|
325
|
+
- lib/tanakai/capybara_ext/driver/base.rb
|
326
|
+
- lib/tanakai/capybara_ext/mechanize/driver.rb
|
327
|
+
- lib/tanakai/capybara_ext/poltergeist/driver.rb
|
328
|
+
- lib/tanakai/capybara_ext/selenium/driver.rb
|
329
|
+
- lib/tanakai/capybara_ext/session.rb
|
330
|
+
- lib/tanakai/capybara_ext/session/config.rb
|
331
|
+
- lib/tanakai/cli.rb
|
332
|
+
- lib/tanakai/cli/ansible_command_builder.rb
|
333
|
+
- lib/tanakai/cli/generator.rb
|
334
|
+
- lib/tanakai/core_ext/array.rb
|
335
|
+
- lib/tanakai/core_ext/hash.rb
|
336
|
+
- lib/tanakai/core_ext/numeric.rb
|
337
|
+
- lib/tanakai/core_ext/string.rb
|
338
|
+
- lib/tanakai/pipeline.rb
|
339
|
+
- lib/tanakai/runner.rb
|
340
|
+
- lib/tanakai/template/.gitignore
|
341
|
+
- lib/tanakai/template/Gemfile
|
342
|
+
- lib/tanakai/template/README.md
|
343
|
+
- lib/tanakai/template/config/application.rb
|
344
|
+
- lib/tanakai/template/config/automation.yml
|
345
|
+
- lib/tanakai/template/config/boot.rb
|
346
|
+
- lib/tanakai/template/config/initializers/.keep
|
347
|
+
- lib/tanakai/template/config/schedule.rb
|
348
|
+
- lib/tanakai/template/db/.keep
|
349
|
+
- lib/tanakai/template/helpers/application_helper.rb
|
350
|
+
- lib/tanakai/template/lib/.keep
|
351
|
+
- lib/tanakai/template/log/.keep
|
352
|
+
- lib/tanakai/template/pipelines/saver.rb
|
353
|
+
- lib/tanakai/template/pipelines/validator.rb
|
354
|
+
- lib/tanakai/template/spiders/application_spider.rb
|
355
|
+
- lib/tanakai/template/tmp/.keep
|
356
|
+
- lib/tanakai/version.rb
|
357
|
+
- tanakai.gemspec
|
358
|
+
homepage: https://github.com/glaucocustodio/tanakai
|
359
|
+
licenses:
|
360
|
+
- MIT
|
361
|
+
metadata: {}
|
362
|
+
post_install_message:
|
363
|
+
rdoc_options: []
|
364
|
+
require_paths:
|
365
|
+
- lib
|
366
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
367
|
+
requirements:
|
368
|
+
- - ">="
|
369
|
+
- !ruby/object:Gem::Version
|
370
|
+
version: 2.5.0
|
371
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
372
|
+
requirements:
|
373
|
+
- - ">="
|
374
|
+
- !ruby/object:Gem::Version
|
375
|
+
version: '0'
|
376
|
+
requirements: []
|
377
|
+
rubygems_version: 3.1.2
|
378
|
+
signing_key:
|
379
|
+
specification_version: 4
|
380
|
+
summary: Maintained fork of Kimurai, a modern web scraping framework written in Ruby
|
381
|
+
and based on Capybara/Nokogiri
|
382
|
+
test_files: []
|