kimurai 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -0
- data/CHANGELOG.md +21 -0
- data/Gemfile +2 -2
- data/README.md +476 -648
- data/Rakefile +6 -6
- data/bin/console +3 -4
- data/exe/kimurai +0 -1
- data/kimurai.gemspec +38 -37
- data/lib/kimurai/base/saver.rb +15 -19
- data/lib/kimurai/base/storage.rb +1 -1
- data/lib/kimurai/base.rb +38 -38
- data/lib/kimurai/base_helper.rb +5 -4
- data/lib/kimurai/browser_builder/mechanize_builder.rb +121 -119
- data/lib/kimurai/browser_builder/selenium_chrome_builder.rb +160 -152
- data/lib/kimurai/browser_builder/selenium_firefox_builder.rb +162 -160
- data/lib/kimurai/browser_builder.rb +1 -7
- data/lib/kimurai/capybara_configuration.rb +1 -1
- data/lib/kimurai/capybara_ext/driver/base.rb +50 -46
- data/lib/kimurai/capybara_ext/mechanize/driver.rb +51 -50
- data/lib/kimurai/capybara_ext/selenium/driver.rb +33 -29
- data/lib/kimurai/capybara_ext/session.rb +31 -38
- data/lib/kimurai/cli/generator.rb +15 -15
- data/lib/kimurai/cli.rb +49 -86
- data/lib/kimurai/core_ext/array.rb +2 -2
- data/lib/kimurai/core_ext/hash.rb +1 -1
- data/lib/kimurai/core_ext/numeric.rb +4 -4
- data/lib/kimurai/pipeline.rb +2 -1
- data/lib/kimurai/runner.rb +6 -6
- data/lib/kimurai/template/Gemfile +2 -2
- data/lib/kimurai/template/config/boot.rb +4 -4
- data/lib/kimurai/template/config/schedule.rb +15 -15
- data/lib/kimurai/template/spiders/application_spider.rb +8 -14
- data/lib/kimurai/version.rb +1 -1
- data/lib/kimurai.rb +7 -3
- metadata +58 -65
- data/.travis.yml +0 -5
- data/lib/kimurai/automation/deploy.yml +0 -54
- data/lib/kimurai/automation/setup/chromium_chromedriver.yml +0 -26
- data/lib/kimurai/automation/setup/firefox_geckodriver.yml +0 -20
- data/lib/kimurai/automation/setup/phantomjs.yml +0 -33
- data/lib/kimurai/automation/setup/ruby_environment.yml +0 -124
- data/lib/kimurai/automation/setup.yml +0 -44
- data/lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb +0 -175
- data/lib/kimurai/capybara_ext/poltergeist/driver.rb +0 -13
- data/lib/kimurai/cli/ansible_command_builder.rb +0 -71
- data/lib/kimurai/template/config/automation.yml +0 -13
|
@@ -6,17 +6,17 @@ Bundler.require(:default, Kimurai.env)
|
|
|
6
6
|
require 'dotenv/load'
|
|
7
7
|
|
|
8
8
|
# require initializers
|
|
9
|
-
Dir.glob(File.join(
|
|
9
|
+
Dir.glob(File.join('./config/initializers', '*.rb'), &method(:require))
|
|
10
10
|
|
|
11
11
|
# require helpers
|
|
12
|
-
Dir.glob(File.join(
|
|
12
|
+
Dir.glob(File.join('./helpers', '*.rb'), &method(:require))
|
|
13
13
|
|
|
14
14
|
# require pipelines
|
|
15
|
-
Dir.glob(File.join(
|
|
15
|
+
Dir.glob(File.join('./pipelines', '*.rb'), &method(:require))
|
|
16
16
|
|
|
17
17
|
# require spiders recursively in the `spiders/` folder
|
|
18
18
|
require_relative '../spiders/application_spider'
|
|
19
|
-
require_all
|
|
19
|
+
require_all 'spiders'
|
|
20
20
|
|
|
21
21
|
# require Kimurai configuration
|
|
22
22
|
require_relative 'application'
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
require 'tzinfo'
|
|
3
3
|
|
|
4
4
|
# Export current PATH to the cron
|
|
5
|
-
env :PATH, ENV[
|
|
5
|
+
env :PATH, ENV['PATH']
|
|
6
6
|
|
|
7
7
|
# Use 24 hour format when using `at:` option
|
|
8
8
|
set :chronic_options, hours24: true
|
|
@@ -19,34 +19,34 @@ def local_to_utc(time_string, zone:)
|
|
|
19
19
|
TZInfo::Timezone.get(zone).local_to_utc(Time.parse(time_string))
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
-
#
|
|
22
|
+
# NOTE: by default Whenever exports cron commands with :environment == "production".
|
|
23
23
|
# Note: Whenever can only append log data to a log file (>>). If you want
|
|
24
24
|
# to overwrite (>) log file before each run, pass lambda:
|
|
25
25
|
# crawl "google_spider.com", output: -> { "> log/google_spider.com.log 2>&1" }
|
|
26
26
|
|
|
27
27
|
# Project job types
|
|
28
|
-
job_type :crawl,
|
|
29
|
-
job_type :runner,
|
|
28
|
+
job_type :crawl, 'cd :path && KIMURAI_ENV=:environment bundle exec kimurai crawl :task :output'
|
|
29
|
+
job_type :runner, 'cd :path && KIMURAI_ENV=:environment bundle exec kimurai runner --jobs :task :output'
|
|
30
30
|
|
|
31
31
|
# Single file job type
|
|
32
|
-
job_type :single,
|
|
32
|
+
job_type :single, 'cd :path && KIMURAI_ENV=:environment ruby :task :output'
|
|
33
33
|
# Single with bundle exec
|
|
34
|
-
job_type :single_bundle,
|
|
34
|
+
job_type :single_bundle, 'cd :path && KIMURAI_ENV=:environment bundle exec ruby :task :output'
|
|
35
35
|
|
|
36
36
|
### Schedule ###
|
|
37
37
|
# Usage (check examples here https://github.com/javan/whenever#example-schedulerb-file):
|
|
38
38
|
# every 1.day do
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
# Example to schedule a single spider in the project:
|
|
40
|
+
# crawl "google_spider.com", output: "log/google_spider.com.log"
|
|
41
41
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
42
|
+
# Example to schedule all spiders in the project using runner. Each spider will write
|
|
43
|
+
# it's own output to the `log/spider_name.log` file (handled by a runner itself).
|
|
44
|
+
# Runner output will be written to log/runner.log file.
|
|
45
|
+
# Argument number it's a count of concurrent jobs:
|
|
46
|
+
# runner 3, output:"log/runner.log"
|
|
47
47
|
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
# Example to schedule single spider (without project):
|
|
49
|
+
# single "single_spider.rb", output: "single_spider.log"
|
|
50
50
|
# end
|
|
51
51
|
|
|
52
52
|
### How to set a cron schedule ###
|
|
@@ -5,19 +5,18 @@
|
|
|
5
5
|
class ApplicationSpider < Kimurai::Base
|
|
6
6
|
include ApplicationHelper
|
|
7
7
|
|
|
8
|
-
# Default engine for spiders (available engines: :mechanize, :
|
|
9
|
-
|
|
10
|
-
@engine = :poltergeist_phantomjs
|
|
8
|
+
# Default engine for spiders (available engines: :mechanize, :selenium_firefox, :selenium_chrome)
|
|
9
|
+
@engine = :selenium_chrome
|
|
11
10
|
|
|
12
11
|
# Pipelines list, by order.
|
|
13
12
|
# To process item through pipelines pass item to the `send_item` method
|
|
14
|
-
@pipelines = [
|
|
13
|
+
@pipelines = %i[validator saver]
|
|
15
14
|
|
|
16
15
|
# Default config. Set here options which are default for all spiders inherited
|
|
17
16
|
# from ApplicationSpider. Child's class config will be deep merged with this one
|
|
18
17
|
@config = {
|
|
19
18
|
# Custom headers, format: hash. Example: { "some header" => "some value", "another header" => "another value" }
|
|
20
|
-
# Works
|
|
19
|
+
# Works for :mechanize engine. Selenium doesn't allow to set/get headers.
|
|
21
20
|
# headers: {},
|
|
22
21
|
|
|
23
22
|
# Custom User Agent, format: string or lambda.
|
|
@@ -49,7 +48,7 @@ class ApplicationSpider < Kimurai::Base
|
|
|
49
48
|
# window_size: [1366, 768],
|
|
50
49
|
|
|
51
50
|
# Skip images downloading if true, works for all engines
|
|
52
|
-
disable_images: true,
|
|
51
|
+
# disable_images: true,
|
|
53
52
|
|
|
54
53
|
# Selenium engines only: headless mode, `:native` or `:virtual_display` (default is :native)
|
|
55
54
|
# Although native mode has a better performance, virtual display mode
|
|
@@ -61,14 +60,9 @@ class ApplicationSpider < Kimurai::Base
|
|
|
61
60
|
# Format: array of strings. Works only for :selenium_firefox and selenium_chrome
|
|
62
61
|
# proxy_bypass_list: [],
|
|
63
62
|
|
|
64
|
-
# Option to provide custom SSL certificate. Works only for :
|
|
63
|
+
# Option to provide custom SSL certificate. Works only for :mechanize
|
|
65
64
|
# ssl_cert_path: "path/to/ssl_cert",
|
|
66
65
|
|
|
67
|
-
# Inject some JavaScript code to the browser.
|
|
68
|
-
# Format: array of strings, where each string is a path to JS file.
|
|
69
|
-
# Works only for poltergeist_phantomjs engine (Selenium doesn't support JS code injection)
|
|
70
|
-
# extensions: ["lib/code_to_inject.js"],
|
|
71
|
-
|
|
72
66
|
# Automatically skip duplicated (already visited) urls when using `request_to` method.
|
|
73
67
|
# Possible values: `true` or `hash` with options.
|
|
74
68
|
# In case of `true`, all visited urls will be added to the storage's scope `:requests_urls`
|
|
@@ -118,12 +112,12 @@ class ApplicationSpider < Kimurai::Base
|
|
|
118
112
|
# Perform several actions before each request:
|
|
119
113
|
before_request: {
|
|
120
114
|
# Change proxy before each request. The `proxy:` option above should be presented
|
|
121
|
-
# and has lambda format. Works
|
|
115
|
+
# and has lambda format. Works for :mechanize engine.
|
|
122
116
|
# (Selenium doesn't support proxy rotation).
|
|
123
117
|
# change_proxy: true,
|
|
124
118
|
|
|
125
119
|
# Change user agent before each request. The `user_agent:` option above should be presented
|
|
126
|
-
# and has lambda format. Works
|
|
120
|
+
# and has lambda format. Works for :mechanize engine.
|
|
127
121
|
# (selenium doesn't support to get/set headers).
|
|
128
122
|
# change_user_agent: true,
|
|
129
123
|
|
data/lib/kimurai/version.rb
CHANGED
data/lib/kimurai.rb
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
require 'ostruct'
|
|
2
2
|
require 'logger'
|
|
3
3
|
require 'json'
|
|
4
|
+
require 'uri'
|
|
5
|
+
|
|
4
6
|
require 'active_support'
|
|
5
7
|
require 'active_support/core_ext'
|
|
6
8
|
require 'rbcat'
|
|
@@ -28,26 +30,28 @@ module Kimurai
|
|
|
28
30
|
end
|
|
29
31
|
|
|
30
32
|
def env
|
|
31
|
-
ENV.fetch(
|
|
33
|
+
ENV.fetch('KIMURAI_ENV', 'development')
|
|
32
34
|
end
|
|
33
35
|
|
|
34
36
|
def time_zone
|
|
35
|
-
ENV[
|
|
37
|
+
ENV['TZ']
|
|
36
38
|
end
|
|
37
39
|
|
|
38
40
|
def time_zone=(value)
|
|
39
|
-
ENV.store(
|
|
41
|
+
ENV.store('TZ', value)
|
|
40
42
|
end
|
|
41
43
|
|
|
42
44
|
def list
|
|
43
45
|
Base.descendants.map do |klass|
|
|
44
46
|
next unless klass.name
|
|
47
|
+
|
|
45
48
|
[klass.name, klass]
|
|
46
49
|
end.compact.to_h
|
|
47
50
|
end
|
|
48
51
|
|
|
49
52
|
def find_by_name(name)
|
|
50
53
|
return unless name
|
|
54
|
+
|
|
51
55
|
Base.descendants.find { |klass| klass.name == name }
|
|
52
56
|
end
|
|
53
57
|
end
|
metadata
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kimurai
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Victor Afanasev
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: exe
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
|
-
name:
|
|
13
|
+
name: activesupport
|
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
|
16
15
|
requirements:
|
|
17
16
|
- - ">="
|
|
@@ -39,7 +38,7 @@ dependencies:
|
|
|
39
38
|
- !ruby/object:Gem::Version
|
|
40
39
|
version: '0'
|
|
41
40
|
- !ruby/object:Gem::Dependency
|
|
42
|
-
name:
|
|
41
|
+
name: csv
|
|
43
42
|
requirement: !ruby/object:Gem::Requirement
|
|
44
43
|
requirements:
|
|
45
44
|
- - ">="
|
|
@@ -81,27 +80,21 @@ dependencies:
|
|
|
81
80
|
- !ruby/object:Gem::Version
|
|
82
81
|
version: '0'
|
|
83
82
|
- !ruby/object:Gem::Dependency
|
|
84
|
-
name:
|
|
83
|
+
name: ostruct
|
|
85
84
|
requirement: !ruby/object:Gem::Requirement
|
|
86
85
|
requirements:
|
|
87
86
|
- - ">="
|
|
88
87
|
- !ruby/object:Gem::Version
|
|
89
|
-
version: '
|
|
90
|
-
- - "<"
|
|
91
|
-
- !ruby/object:Gem::Version
|
|
92
|
-
version: '4.0'
|
|
88
|
+
version: '0'
|
|
93
89
|
type: :runtime
|
|
94
90
|
prerelease: false
|
|
95
91
|
version_requirements: !ruby/object:Gem::Requirement
|
|
96
92
|
requirements:
|
|
97
93
|
- - ">="
|
|
98
94
|
- !ruby/object:Gem::Version
|
|
99
|
-
version: '
|
|
100
|
-
- - "<"
|
|
101
|
-
- !ruby/object:Gem::Version
|
|
102
|
-
version: '4.0'
|
|
95
|
+
version: '0'
|
|
103
96
|
- !ruby/object:Gem::Dependency
|
|
104
|
-
name:
|
|
97
|
+
name: thor
|
|
105
98
|
requirement: !ruby/object:Gem::Requirement
|
|
106
99
|
requirements:
|
|
107
100
|
- - ">="
|
|
@@ -115,7 +108,7 @@ dependencies:
|
|
|
115
108
|
- !ruby/object:Gem::Version
|
|
116
109
|
version: '0'
|
|
117
110
|
- !ruby/object:Gem::Dependency
|
|
118
|
-
name:
|
|
111
|
+
name: mutex_m
|
|
119
112
|
requirement: !ruby/object:Gem::Requirement
|
|
120
113
|
requirements:
|
|
121
114
|
- - ">="
|
|
@@ -129,7 +122,7 @@ dependencies:
|
|
|
129
122
|
- !ruby/object:Gem::Version
|
|
130
123
|
version: '0'
|
|
131
124
|
- !ruby/object:Gem::Dependency
|
|
132
|
-
name:
|
|
125
|
+
name: nkf
|
|
133
126
|
requirement: !ruby/object:Gem::Requirement
|
|
134
127
|
requirements:
|
|
135
128
|
- - ">="
|
|
@@ -143,7 +136,7 @@ dependencies:
|
|
|
143
136
|
- !ruby/object:Gem::Version
|
|
144
137
|
version: '0'
|
|
145
138
|
- !ruby/object:Gem::Dependency
|
|
146
|
-
name:
|
|
139
|
+
name: reline
|
|
147
140
|
requirement: !ruby/object:Gem::Requirement
|
|
148
141
|
requirements:
|
|
149
142
|
- - ">="
|
|
@@ -157,49 +150,49 @@ dependencies:
|
|
|
157
150
|
- !ruby/object:Gem::Version
|
|
158
151
|
version: '0'
|
|
159
152
|
- !ruby/object:Gem::Dependency
|
|
160
|
-
name:
|
|
153
|
+
name: capybara
|
|
161
154
|
requirement: !ruby/object:Gem::Requirement
|
|
162
155
|
requirements:
|
|
163
|
-
- - "
|
|
156
|
+
- - "~>"
|
|
164
157
|
- !ruby/object:Gem::Version
|
|
165
|
-
version: '
|
|
158
|
+
version: '3.40'
|
|
166
159
|
type: :runtime
|
|
167
160
|
prerelease: false
|
|
168
161
|
version_requirements: !ruby/object:Gem::Requirement
|
|
169
162
|
requirements:
|
|
170
|
-
- - "
|
|
163
|
+
- - "~>"
|
|
171
164
|
- !ruby/object:Gem::Version
|
|
172
|
-
version: '
|
|
165
|
+
version: '3.40'
|
|
173
166
|
- !ruby/object:Gem::Dependency
|
|
174
|
-
name:
|
|
167
|
+
name: capybara-mechanize
|
|
175
168
|
requirement: !ruby/object:Gem::Requirement
|
|
176
169
|
requirements:
|
|
177
|
-
- - "
|
|
170
|
+
- - "~>"
|
|
178
171
|
- !ruby/object:Gem::Version
|
|
179
|
-
version: '
|
|
172
|
+
version: '1.13'
|
|
180
173
|
type: :runtime
|
|
181
174
|
prerelease: false
|
|
182
175
|
version_requirements: !ruby/object:Gem::Requirement
|
|
183
176
|
requirements:
|
|
184
|
-
- - "
|
|
177
|
+
- - "~>"
|
|
185
178
|
- !ruby/object:Gem::Version
|
|
186
|
-
version: '
|
|
179
|
+
version: '1.13'
|
|
187
180
|
- !ruby/object:Gem::Dependency
|
|
188
|
-
name:
|
|
181
|
+
name: selenium-webdriver
|
|
189
182
|
requirement: !ruby/object:Gem::Requirement
|
|
190
183
|
requirements:
|
|
191
184
|
- - "~>"
|
|
192
185
|
- !ruby/object:Gem::Version
|
|
193
|
-
version: '
|
|
186
|
+
version: '4.27'
|
|
194
187
|
type: :runtime
|
|
195
188
|
prerelease: false
|
|
196
189
|
version_requirements: !ruby/object:Gem::Requirement
|
|
197
190
|
requirements:
|
|
198
191
|
- - "~>"
|
|
199
192
|
- !ruby/object:Gem::Version
|
|
200
|
-
version: '
|
|
193
|
+
version: '4.27'
|
|
201
194
|
- !ruby/object:Gem::Dependency
|
|
202
|
-
name:
|
|
195
|
+
name: headless
|
|
203
196
|
requirement: !ruby/object:Gem::Requirement
|
|
204
197
|
requirements:
|
|
205
198
|
- - ">="
|
|
@@ -213,48 +206,61 @@ dependencies:
|
|
|
213
206
|
- !ruby/object:Gem::Version
|
|
214
207
|
version: '0'
|
|
215
208
|
- !ruby/object:Gem::Dependency
|
|
216
|
-
name:
|
|
209
|
+
name: pmap
|
|
217
210
|
requirement: !ruby/object:Gem::Requirement
|
|
218
211
|
requirements:
|
|
219
|
-
- - "
|
|
212
|
+
- - ">="
|
|
220
213
|
- !ruby/object:Gem::Version
|
|
221
|
-
version: '
|
|
222
|
-
type: :
|
|
214
|
+
version: '0'
|
|
215
|
+
type: :runtime
|
|
223
216
|
prerelease: false
|
|
224
217
|
version_requirements: !ruby/object:Gem::Requirement
|
|
225
218
|
requirements:
|
|
226
|
-
- - "
|
|
219
|
+
- - ">="
|
|
227
220
|
- !ruby/object:Gem::Version
|
|
228
|
-
version: '
|
|
221
|
+
version: '0'
|
|
229
222
|
- !ruby/object:Gem::Dependency
|
|
230
|
-
name:
|
|
223
|
+
name: whenever
|
|
231
224
|
requirement: !ruby/object:Gem::Requirement
|
|
232
225
|
requirements:
|
|
233
|
-
- - "
|
|
226
|
+
- - ">="
|
|
234
227
|
- !ruby/object:Gem::Version
|
|
235
|
-
version: '
|
|
236
|
-
type: :
|
|
228
|
+
version: '0'
|
|
229
|
+
type: :runtime
|
|
237
230
|
prerelease: false
|
|
238
231
|
version_requirements: !ruby/object:Gem::Requirement
|
|
239
232
|
requirements:
|
|
240
|
-
- - "
|
|
233
|
+
- - ">="
|
|
234
|
+
- !ruby/object:Gem::Version
|
|
235
|
+
version: '0'
|
|
236
|
+
- !ruby/object:Gem::Dependency
|
|
237
|
+
name: pry
|
|
238
|
+
requirement: !ruby/object:Gem::Requirement
|
|
239
|
+
requirements:
|
|
240
|
+
- - ">="
|
|
241
|
+
- !ruby/object:Gem::Version
|
|
242
|
+
version: '0'
|
|
243
|
+
type: :runtime
|
|
244
|
+
prerelease: false
|
|
245
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
246
|
+
requirements:
|
|
247
|
+
- - ">="
|
|
241
248
|
- !ruby/object:Gem::Version
|
|
242
|
-
version: '
|
|
249
|
+
version: '0'
|
|
243
250
|
- !ruby/object:Gem::Dependency
|
|
244
|
-
name:
|
|
251
|
+
name: rbcat
|
|
245
252
|
requirement: !ruby/object:Gem::Requirement
|
|
246
253
|
requirements:
|
|
247
254
|
- - "~>"
|
|
248
255
|
- !ruby/object:Gem::Version
|
|
249
|
-
version: '
|
|
250
|
-
type: :
|
|
256
|
+
version: '1.0'
|
|
257
|
+
type: :runtime
|
|
251
258
|
prerelease: false
|
|
252
259
|
version_requirements: !ruby/object:Gem::Requirement
|
|
253
260
|
requirements:
|
|
254
261
|
- - "~>"
|
|
255
262
|
- !ruby/object:Gem::Version
|
|
256
|
-
version: '
|
|
257
|
-
description:
|
|
263
|
+
version: '1.0'
|
|
258
264
|
email:
|
|
259
265
|
- vicfreefly@gmail.com
|
|
260
266
|
executables:
|
|
@@ -263,7 +269,7 @@ extensions: []
|
|
|
263
269
|
extra_rdoc_files: []
|
|
264
270
|
files:
|
|
265
271
|
- ".gitignore"
|
|
266
|
-
- ".
|
|
272
|
+
- ".rubocop.yml"
|
|
267
273
|
- CHANGELOG.md
|
|
268
274
|
- Gemfile
|
|
269
275
|
- LICENSE.txt
|
|
@@ -274,30 +280,21 @@ files:
|
|
|
274
280
|
- exe/kimurai
|
|
275
281
|
- kimurai.gemspec
|
|
276
282
|
- lib/kimurai.rb
|
|
277
|
-
- lib/kimurai/automation/deploy.yml
|
|
278
|
-
- lib/kimurai/automation/setup.yml
|
|
279
|
-
- lib/kimurai/automation/setup/chromium_chromedriver.yml
|
|
280
|
-
- lib/kimurai/automation/setup/firefox_geckodriver.yml
|
|
281
|
-
- lib/kimurai/automation/setup/phantomjs.yml
|
|
282
|
-
- lib/kimurai/automation/setup/ruby_environment.yml
|
|
283
283
|
- lib/kimurai/base.rb
|
|
284
284
|
- lib/kimurai/base/saver.rb
|
|
285
285
|
- lib/kimurai/base/storage.rb
|
|
286
286
|
- lib/kimurai/base_helper.rb
|
|
287
287
|
- lib/kimurai/browser_builder.rb
|
|
288
288
|
- lib/kimurai/browser_builder/mechanize_builder.rb
|
|
289
|
-
- lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb
|
|
290
289
|
- lib/kimurai/browser_builder/selenium_chrome_builder.rb
|
|
291
290
|
- lib/kimurai/browser_builder/selenium_firefox_builder.rb
|
|
292
291
|
- lib/kimurai/capybara_configuration.rb
|
|
293
292
|
- lib/kimurai/capybara_ext/driver/base.rb
|
|
294
293
|
- lib/kimurai/capybara_ext/mechanize/driver.rb
|
|
295
|
-
- lib/kimurai/capybara_ext/poltergeist/driver.rb
|
|
296
294
|
- lib/kimurai/capybara_ext/selenium/driver.rb
|
|
297
295
|
- lib/kimurai/capybara_ext/session.rb
|
|
298
296
|
- lib/kimurai/capybara_ext/session/config.rb
|
|
299
297
|
- lib/kimurai/cli.rb
|
|
300
|
-
- lib/kimurai/cli/ansible_command_builder.rb
|
|
301
298
|
- lib/kimurai/cli/generator.rb
|
|
302
299
|
- lib/kimurai/core_ext/array.rb
|
|
303
300
|
- lib/kimurai/core_ext/hash.rb
|
|
@@ -309,7 +306,6 @@ files:
|
|
|
309
306
|
- lib/kimurai/template/Gemfile
|
|
310
307
|
- lib/kimurai/template/README.md
|
|
311
308
|
- lib/kimurai/template/config/application.rb
|
|
312
|
-
- lib/kimurai/template/config/automation.yml
|
|
313
309
|
- lib/kimurai/template/config/boot.rb
|
|
314
310
|
- lib/kimurai/template/config/initializers/.keep
|
|
315
311
|
- lib/kimurai/template/config/schedule.rb
|
|
@@ -326,7 +322,6 @@ homepage: https://github.com/vifreefly/kimuraframework
|
|
|
326
322
|
licenses:
|
|
327
323
|
- MIT
|
|
328
324
|
metadata: {}
|
|
329
|
-
post_install_message:
|
|
330
325
|
rdoc_options: []
|
|
331
326
|
require_paths:
|
|
332
327
|
- lib
|
|
@@ -334,16 +329,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
334
329
|
requirements:
|
|
335
330
|
- - ">="
|
|
336
331
|
- !ruby/object:Gem::Version
|
|
337
|
-
version:
|
|
332
|
+
version: 3.1.0
|
|
338
333
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
339
334
|
requirements:
|
|
340
335
|
- - ">="
|
|
341
336
|
- !ruby/object:Gem::Version
|
|
342
337
|
version: '0'
|
|
343
338
|
requirements: []
|
|
344
|
-
|
|
345
|
-
rubygems_version: 2.7.6
|
|
346
|
-
signing_key:
|
|
339
|
+
rubygems_version: 4.0.1
|
|
347
340
|
specification_version: 4
|
|
348
341
|
summary: Modern web scraping framework written in Ruby and based on Capybara/Nokogiri
|
|
349
342
|
test_files: []
|
data/.travis.yml
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
- hosts: all
|
|
3
|
-
vars:
|
|
4
|
-
rbenv_root_path: /home/{{ ansible_user_id }}/.rbenv
|
|
5
|
-
rbenv_shims_path: "{{ rbenv_root_path }}/shims"
|
|
6
|
-
repo_url:
|
|
7
|
-
repo_name:
|
|
8
|
-
repo_key_path:
|
|
9
|
-
|
|
10
|
-
tasks:
|
|
11
|
-
- name: Copy custom git ssh key to /tmp/private_key (if provided)
|
|
12
|
-
when: repo_key_path is not none
|
|
13
|
-
copy:
|
|
14
|
-
src: "{{ repo_key_path }}"
|
|
15
|
-
dest: /tmp/private_key
|
|
16
|
-
mode: 0600
|
|
17
|
-
|
|
18
|
-
- name: Clone/pull project repo to ~/{{ repo_name }} user directory (using ssh-agent forwarding or https)
|
|
19
|
-
when: repo_key_path is none
|
|
20
|
-
git:
|
|
21
|
-
repo: "{{ repo_url }}"
|
|
22
|
-
dest: "~/{{ repo_name }}"
|
|
23
|
-
force: true
|
|
24
|
-
accept_hostkey: true
|
|
25
|
-
|
|
26
|
-
- name: Clone/pull project repo to ~/{{ repo_name }} user directory (using custom git ssh key)
|
|
27
|
-
when: repo_key_path is not none
|
|
28
|
-
git:
|
|
29
|
-
repo: "{{ repo_url }}"
|
|
30
|
-
dest: "~/{{ repo_name }}"
|
|
31
|
-
force: true
|
|
32
|
-
accept_hostkey: true
|
|
33
|
-
key_file: /tmp/private_key
|
|
34
|
-
|
|
35
|
-
- name: Delete custom git ssh key from /tmp/private_key (if provided)
|
|
36
|
-
when: repo_key_path is not none
|
|
37
|
-
file:
|
|
38
|
-
state: absent
|
|
39
|
-
path: /tmp/private_key
|
|
40
|
-
|
|
41
|
-
- name: Run bundle install
|
|
42
|
-
command: bundle install
|
|
43
|
-
args:
|
|
44
|
-
chdir: ~/{{ repo_name }}
|
|
45
|
-
environment:
|
|
46
|
-
PATH: "{{ rbenv_root_path }}/bin:{{ rbenv_root_path }}/shims:{{ ansible_env.PATH }}"
|
|
47
|
-
|
|
48
|
-
- name: Run whenever to update crontab
|
|
49
|
-
command: whenever --update-crontab
|
|
50
|
-
args:
|
|
51
|
-
chdir: ~/{{ repo_name }}
|
|
52
|
-
environment:
|
|
53
|
-
PATH: "{{ rbenv_root_path }}/bin:{{ rbenv_root_path }}/shims:{{ ansible_env.PATH }}"
|
|
54
|
-
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
- name: Install chromium browser
|
|
3
|
-
apt:
|
|
4
|
-
pkg: chromium-browser
|
|
5
|
-
state: present
|
|
6
|
-
|
|
7
|
-
- name: Get current chromedriver version
|
|
8
|
-
shell: chromedriver --version
|
|
9
|
-
args:
|
|
10
|
-
executable: /bin/bash
|
|
11
|
-
register: current_chromedriver_version
|
|
12
|
-
changed_when: false
|
|
13
|
-
ignore_errors: true
|
|
14
|
-
|
|
15
|
-
- name: Install unzip tool to unarchive chromedriver archive
|
|
16
|
-
apt:
|
|
17
|
-
pkg: unzip
|
|
18
|
-
state: present
|
|
19
|
-
|
|
20
|
-
- name: Download chromedriver binary archive and unarchive it to /usr/local/bin
|
|
21
|
-
unarchive:
|
|
22
|
-
src: https://chromedriver.storage.googleapis.com/{{ chromedriver }}/chromedriver_linux64.zip
|
|
23
|
-
dest: /usr/local/bin
|
|
24
|
-
remote_src: true
|
|
25
|
-
mode: a+x
|
|
26
|
-
when: chromedriver not in current_chromedriver_version.stdout_lines
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
- name: Install firefox
|
|
3
|
-
apt:
|
|
4
|
-
pkg: firefox
|
|
5
|
-
state: present
|
|
6
|
-
|
|
7
|
-
- name: Get current geckodriver version
|
|
8
|
-
shell: geckodriver --version
|
|
9
|
-
args:
|
|
10
|
-
executable: /bin/bash
|
|
11
|
-
register: current_geckodriver_version
|
|
12
|
-
changed_when: false
|
|
13
|
-
ignore_errors: true
|
|
14
|
-
|
|
15
|
-
- name: Download geckodriver binary archive and unarchive it to /usr/local/bin
|
|
16
|
-
unarchive:
|
|
17
|
-
src: https://github.com/mozilla/geckodriver/releases/download/v{{ geckodriver }}/geckodriver-v{{ geckodriver }}-linux64.tar.gz
|
|
18
|
-
dest: /usr/local/bin
|
|
19
|
-
remote_src: true
|
|
20
|
-
when: geckodriver not in current_geckodriver_version.stdout
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
- name: Install dependencies for PhantomJS
|
|
3
|
-
apt:
|
|
4
|
-
pkg: "{{ item }}"
|
|
5
|
-
state: present
|
|
6
|
-
with_items:
|
|
7
|
-
- chrpath
|
|
8
|
-
- libxft-dev
|
|
9
|
-
- libfreetype6
|
|
10
|
-
- libfreetype6-dev
|
|
11
|
-
- libfontconfig1
|
|
12
|
-
- libfontconfig1-dev
|
|
13
|
-
|
|
14
|
-
- name: Get current phantomjs version
|
|
15
|
-
shell: phantomjs -v
|
|
16
|
-
args:
|
|
17
|
-
executable: /bin/bash
|
|
18
|
-
register: current_phantomjs_version
|
|
19
|
-
changed_when: false
|
|
20
|
-
ignore_errors: true
|
|
21
|
-
|
|
22
|
-
- name: Download phantomJS archive and unarchive it to /usr/local/lib
|
|
23
|
-
unarchive:
|
|
24
|
-
src: https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-{{ phantomjs }}-linux-x86_64.tar.bz2
|
|
25
|
-
dest: /usr/local/lib
|
|
26
|
-
remote_src: true
|
|
27
|
-
when: phantomjs not in current_phantomjs_version.stdout
|
|
28
|
-
|
|
29
|
-
- name: Link PhantomJS binary to /usr/local/bin/phantomjs
|
|
30
|
-
file:
|
|
31
|
-
src: /usr/local/lib/phantomjs-{{ phantomjs }}-linux-x86_64/bin/phantomjs
|
|
32
|
-
dest: /usr/local/bin/phantomjs
|
|
33
|
-
state: link
|