kimurai-dashboard 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +5 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/kimurai-dashboard.gemspec +30 -0
- data/lib/kimurai/dashboard.rb +49 -0
- data/lib/kimurai/dashboard/app.rb +151 -0
- data/lib/kimurai/dashboard/base.rb +53 -0
- data/lib/kimurai/dashboard/helpers.rb +78 -0
- data/lib/kimurai/dashboard/models/run.rb +74 -0
- data/lib/kimurai/dashboard/models/session.rb +37 -0
- data/lib/kimurai/dashboard/models/spider.rb +20 -0
- data/lib/kimurai/dashboard/public/application.css +61 -0
- data/lib/kimurai/dashboard/public/application.js +4 -0
- data/lib/kimurai/dashboard/runner.rb +31 -0
- data/lib/kimurai/dashboard/version.rb +5 -0
- data/lib/kimurai/dashboard/views/_header.erb +19 -0
- data/lib/kimurai/dashboard/views/layout.erb +29 -0
- data/lib/kimurai/dashboard/views/runs/_errors.erb +18 -0
- data/lib/kimurai/dashboard/views/runs/_table.erb +38 -0
- data/lib/kimurai/dashboard/views/runs/index.erb +11 -0
- data/lib/kimurai/dashboard/views/runs/show.erb +142 -0
- data/lib/kimurai/dashboard/views/sessions/index.erb +46 -0
- data/lib/kimurai/dashboard/views/sessions/show.erb +87 -0
- data/lib/kimurai/dashboard/views/spiders/index.erb +40 -0
- data/lib/kimurai/dashboard/views/spiders/show.erb +46 -0
- metadata +172 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d45c04e24e2f570510b38e3027958d190e39cb8cc3991f03925691acc5c0a4cf
|
4
|
+
data.tar.gz: 8833f523ebc1f303d48a5ad9107c0ecc1e33f36d0e0aa987206f13f30508e26a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e12f971ce5e9c6dab110cae9e6975f4ace11ffac1c1a607d855e5d32b2662bdb3619fd08a4921b0f58f90dc30634a18ade29ead57273bcbc8b525c9786e6fc13
|
7
|
+
data.tar.gz: ff659649fb65ab2c7953e9544dc6ea3bc3a0196bd150d812316fb3f9ca8652a667c31c37e6d6a5643ff8804d5f29d3dbd57bc63b13ebc7eb5a05df2401f78833
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Victor Afanasev
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# Kimurai::Dashboard
|
2
|
+
|
3
|
+
Simple Dashboard for [Kimurai web scraping framework](https://github.com/vifreefly/kimuraframework). Required version of Kimurai `>= 1.2.0`.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
Add this line to your Kimurai project's Gemfile:
|
7
|
+
|
8
|
+
```ruby
|
9
|
+
# add this line after `gem 'kimurai'`
|
10
|
+
gem 'kimurai-dashboard', require: false
|
11
|
+
```
|
12
|
+
|
13
|
+
and then execute `$ bundle`.
|
14
|
+
|
15
|
+
## Configuration
|
16
|
+
You need to provide `stats_database_url` to enable stats and save info about project spiders runs and sessions to a database. Format for a database url: https://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html. You can use _sqlite_, _postgres_ or _mysql_ database (check Kimurai's project Gemfile and uncomment preferred gem).
|
17
|
+
|
18
|
+
Example for SQlite:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
# Gemfile
|
22
|
+
gem 'sqlite3'
|
23
|
+
```
|
24
|
+
|
25
|
+
**Note that dashboard should be required only after stats_database_url provided:**
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
# config/boot.rb
|
29
|
+
# ...
|
30
|
+
|
31
|
+
Kimurai.configuration.stats_database_url = "sqlite://db/spiders_runs_#{Kimurai.env}.sqlite3"
|
32
|
+
# Important: require dashboard ONLY after stats_database_url was provided:
|
33
|
+
require 'kimurai/dashboard'
|
34
|
+
```
|
35
|
+
|
36
|
+
Also, there are optional settings for a dashboard:
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
# config/application.rb
|
40
|
+
|
41
|
+
Kimurai.configure do |config|
|
42
|
+
# ...
|
43
|
+
|
44
|
+
config.dashboard = {
|
45
|
+
bind_address: "0.0.0.0",
|
46
|
+
port: 3001,
|
47
|
+
basic_auth: { username: "admin", password: "123456" }
|
48
|
+
}
|
49
|
+
end
|
50
|
+
```
|
51
|
+
|
52
|
+
## Usage
|
53
|
+
After successful configuration, all spiders (running individually `kimurai start` or in queue `kimurai runner`) will save stats to the database.
|
54
|
+
|
55
|
+
Run `$ bundle exec kimurai dashboard` and navigate to a dashboard url to see the stats.
|
56
|
+
|
57
|
+
## License
|
58
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "kimurai/dashboard"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "kimurai/dashboard/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "kimurai-dashboard"
|
8
|
+
spec.version = Kimurai::Dashboard::VERSION
|
9
|
+
spec.authors = ["Victor Afanasev"]
|
10
|
+
spec.email = ["vicfreefly@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "Simple dashboard for a Kimurai web scraping framework"
|
13
|
+
spec.homepage = "https://github.com/vifreefly/kimurai-dashboard"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
17
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
end
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
spec.required_ruby_version = ">= 2.5.0"
|
21
|
+
|
22
|
+
spec.add_dependency "kimurai", "~> 1.2.0"
|
23
|
+
spec.add_dependency "sequel"
|
24
|
+
spec.add_dependency "sinatra-contrib"
|
25
|
+
spec.add_dependency "pagy"
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
28
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
29
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
30
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
require 'json'
|
3
|
+
require 'kimurai/dashboard/version'
|
4
|
+
|
5
|
+
require_relative 'dashboard/base'
|
6
|
+
require_relative 'dashboard/runner'
|
7
|
+
|
8
|
+
module Kimurai
|
9
|
+
module Dashboard
|
10
|
+
DB = Sequel.connect(Kimurai.configuration.stats_database_url ||= ENV["STATS_DATABASE_URL"])
|
11
|
+
|
12
|
+
DB.create_table?(:sessions) do
|
13
|
+
primary_key :id, type: :integer, auto_increment: false
|
14
|
+
string :status
|
15
|
+
datetime :start_time, empty: false
|
16
|
+
datetime :stop_time
|
17
|
+
string :environment
|
18
|
+
integer :concurrent_jobs
|
19
|
+
text :spiders
|
20
|
+
text :error
|
21
|
+
end
|
22
|
+
|
23
|
+
DB.create_table?(:runs) do
|
24
|
+
primary_key :id
|
25
|
+
string :spider_name, empty: false
|
26
|
+
string :status
|
27
|
+
string :environment
|
28
|
+
datetime :start_time, empty: false
|
29
|
+
datetime :stop_time
|
30
|
+
float :running_time
|
31
|
+
foreign_key :session_id, :sessions
|
32
|
+
foreign_key :spider_id, :spiders
|
33
|
+
text :visits
|
34
|
+
text :items
|
35
|
+
text :events
|
36
|
+
text :error
|
37
|
+
text :server
|
38
|
+
end
|
39
|
+
|
40
|
+
DB.create_table?(:spiders) do
|
41
|
+
primary_key :id
|
42
|
+
string :name, empty: false, unique: true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
require_relative 'dashboard/models/session'
|
48
|
+
require_relative 'dashboard/models/run'
|
49
|
+
require_relative 'dashboard/models/spider'
|
@@ -0,0 +1,151 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
require 'sinatra/respond_with'
|
3
|
+
require 'sinatra/json'
|
4
|
+
require 'sinatra/namespace'
|
5
|
+
require 'sinatra/reloader'
|
6
|
+
require 'sinatra/streaming'
|
7
|
+
require 'pagy'
|
8
|
+
require_relative 'helpers'
|
9
|
+
|
10
|
+
module Kimurai
|
11
|
+
module Dashboard
|
12
|
+
class App < Sinatra::Base
|
13
|
+
include Pagy::Backend
|
14
|
+
|
15
|
+
register Sinatra::RespondWith, Sinatra::Namespace
|
16
|
+
enable :logging
|
17
|
+
set :environment, Kimurai.env.to_sym
|
18
|
+
|
19
|
+
if bind_address = Kimurai.configuration.dashboard&.dig(:bind_address)
|
20
|
+
set :bind, bind_address
|
21
|
+
end
|
22
|
+
|
23
|
+
if port = Kimurai.configuration.dashboard&.dig(:port)
|
24
|
+
set :port, port
|
25
|
+
end
|
26
|
+
|
27
|
+
configure :development do
|
28
|
+
require 'pry'
|
29
|
+
register Sinatra::Reloader
|
30
|
+
end
|
31
|
+
|
32
|
+
helpers Sinatra::Streaming
|
33
|
+
helpers do
|
34
|
+
include Helpers
|
35
|
+
include Rack::Utils
|
36
|
+
alias_method :h, :escape_html
|
37
|
+
end
|
38
|
+
|
39
|
+
if auth = Kimurai.configuration.dashboard&.dig(:basic_auth)
|
40
|
+
use Rack::Auth::Basic, "Protected Area" do |username, password|
|
41
|
+
username == auth[:username] && password == auth[:password]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
###
|
46
|
+
|
47
|
+
get "/" do
|
48
|
+
redirect "/spiders"
|
49
|
+
end
|
50
|
+
|
51
|
+
namespace "/sessions" do
|
52
|
+
get do
|
53
|
+
@sessions = Session.reverse_order(:id)
|
54
|
+
@pagy, @sessions = pagy(@sessions) unless @sessions.count.zero?
|
55
|
+
|
56
|
+
respond_to do |f|
|
57
|
+
f.html { erb :'sessions/index' }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
get "/:id" do
|
62
|
+
@session = Session.find(id: params[:id].to_i)
|
63
|
+
halt "Error, can't find session!" unless @session
|
64
|
+
|
65
|
+
respond_to do |f|
|
66
|
+
f.html { erb :'sessions/show' }
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
namespace "/runs" do
|
72
|
+
get do
|
73
|
+
@runs = Run.reverse_order(:id)
|
74
|
+
|
75
|
+
filters = params.slice("spider_id", "session_id")
|
76
|
+
filters.each do |filter_name, value|
|
77
|
+
@runs = @runs.send(filter_name, value)
|
78
|
+
end
|
79
|
+
|
80
|
+
@pagy, @runs = pagy(@runs) unless @runs.count.zero?
|
81
|
+
respond_to do |f|
|
82
|
+
f.html { erb :'runs/index', locals: { filters: filters }}
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
get "/:id" do
|
87
|
+
@run = Run.find(id: params[:id].to_i)
|
88
|
+
halt "Error, can't find session!" unless @run
|
89
|
+
|
90
|
+
respond_to do |f|
|
91
|
+
f.html { erb :'runs/show', locals: { difference: @run.difference_between_previous_run }}
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
get "/:id/log" do
|
96
|
+
@run = Run.find(id: params[:id].to_i)
|
97
|
+
halt "Error, can't find run with id: #{params[:id]}" unless @run
|
98
|
+
|
99
|
+
log_name = "./log/#{@run.spider_name}.log"
|
100
|
+
|
101
|
+
if @run.latest? && File.exists?(log_name)
|
102
|
+
content_type 'text/event-stream'
|
103
|
+
File.readlines(log_name)
|
104
|
+
else
|
105
|
+
halt "Log file is not available for this run"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
namespace "/spiders" do
|
111
|
+
get do
|
112
|
+
@spiders = Spider
|
113
|
+
@pagy, @spiders = pagy(@spiders) unless @spiders.count.zero?
|
114
|
+
|
115
|
+
respond_to do |f|
|
116
|
+
f.html { erb :'spiders/index' }
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
get "/:id_or_name" do
|
121
|
+
@spider =
|
122
|
+
if params[:id_or_name].match?(/^(\d)+$/)
|
123
|
+
Spider.find(id: params[:id_or_name].to_i)
|
124
|
+
else
|
125
|
+
Spider.find(name: params[:id_or_name])
|
126
|
+
end
|
127
|
+
|
128
|
+
halt "Error, can't find spider!" unless @spider
|
129
|
+
|
130
|
+
@spider_runs = @spider.runs_dataset.reverse_order(:id)
|
131
|
+
@pagy, @spider_runs = pagy(@spider_runs, items: 15) unless @spider_runs.count.zero?
|
132
|
+
|
133
|
+
respond_to do |f|
|
134
|
+
f.html { erb :'spiders/show' }
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
private
|
140
|
+
|
141
|
+
def pagy_get_vars(collection, vars)
|
142
|
+
{
|
143
|
+
count: collection.count,
|
144
|
+
page_param: "page",
|
145
|
+
page: params["page"],
|
146
|
+
items: vars[:items] || 25
|
147
|
+
}
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'kimurai/base'
|
2
|
+
require 'socket'
|
3
|
+
|
4
|
+
module Kimurai
|
5
|
+
class Base
|
6
|
+
class << self
|
7
|
+
alias_method :original_crawl!, :crawl!
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.crawl!(continue: false, exception_on_fail: true)
|
11
|
+
logger.error "Spider: already running: #{name}" and return false if running?
|
12
|
+
|
13
|
+
spider = Dashboard::Spider.find_or_create(name: name)
|
14
|
+
run = Dashboard::Run.new(spider_id: spider.id)
|
15
|
+
|
16
|
+
updater = proc do |final_info|
|
17
|
+
if final_info
|
18
|
+
run.set(final_info)
|
19
|
+
run.save
|
20
|
+
elsif @run_info
|
21
|
+
unless @run_info[:server]
|
22
|
+
@run_info.merge!(
|
23
|
+
session_id: ENV["SESSION_ID"]&.to_i,
|
24
|
+
server: {
|
25
|
+
hostname: Socket.gethostname,
|
26
|
+
ipv4: Socket.ip_address_list.find { |ai| ai.ipv4? && !ai.ipv4_loopback? }&.ip_address,
|
27
|
+
process_pid: Process.pid
|
28
|
+
}
|
29
|
+
)
|
30
|
+
end
|
31
|
+
|
32
|
+
running_time = (Time.now - run_info[:start_time]).round(3)
|
33
|
+
run.set(@run_info.merge!(running_time: running_time))
|
34
|
+
run.save
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
task = Thread.new do
|
39
|
+
loop { sleep 0.5 and updater.call and sleep 1.5 }
|
40
|
+
end
|
41
|
+
|
42
|
+
final_info, error = original_crawl!(continue: continue, exception_on_fail: false)
|
43
|
+
if error
|
44
|
+
exception_on_fail ? raise(error) : [final_info, error]
|
45
|
+
else
|
46
|
+
final_info
|
47
|
+
end
|
48
|
+
ensure
|
49
|
+
task.terminate if task
|
50
|
+
updater.call(final_info)# if final_info
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'pagy'
|
2
|
+
require 'pagy/extras/bootstrap'
|
3
|
+
|
4
|
+
module Kimurai
|
5
|
+
module Dashboard
|
6
|
+
module Helpers
|
7
|
+
include Pagy::Frontend
|
8
|
+
|
9
|
+
def to_hash(object)
|
10
|
+
object.to_hash.merge(object.deserialized_values)
|
11
|
+
end
|
12
|
+
|
13
|
+
def convert_to_links(elements, base:)
|
14
|
+
elements.map { |element| %(<a href="#{base}/#{element}">#{element}</a>) }
|
15
|
+
end
|
16
|
+
|
17
|
+
def breadcrumbs(hash)
|
18
|
+
elements = hash.map do |path, value|
|
19
|
+
if path.empty?
|
20
|
+
%Q{<li class="breadcrumb-item active" aria-current="page">#{value}</li>}
|
21
|
+
else
|
22
|
+
%Q{<li class="breadcrumb-item"><a href="#{path}">#{value}</a></li>}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
%Q{<nav aria-label="breadcrumb">
|
26
|
+
<ol class="breadcrumb"> #{elements.join}</ol>
|
27
|
+
</nav>}
|
28
|
+
end
|
29
|
+
|
30
|
+
def minimize_stats(stats)
|
31
|
+
stats.values.map { |stat| stat.class == Hash ? stat.size : stat }
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_badge(status)
|
35
|
+
case status
|
36
|
+
when "running"
|
37
|
+
%Q{<span class="badge badge-primary">running</span>}
|
38
|
+
when "processing"
|
39
|
+
%Q{<span class="badge badge-primary">processing</span>}
|
40
|
+
when "completed"
|
41
|
+
%Q{<span class="badge badge-success">completed</span>}
|
42
|
+
when "failed"
|
43
|
+
%Q{<span class="badge badge-danger">failed</span>}
|
44
|
+
when "stopped"
|
45
|
+
%Q{<span class="badge badge-light">stopped</span>}
|
46
|
+
else
|
47
|
+
status
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def render_filters(filters)
|
52
|
+
f = filters.map { |k,v| "#{k} = #{v}" }.join(", ")
|
53
|
+
%Q{<p class="text-muted"> Filters: #{f} </p>}
|
54
|
+
end
|
55
|
+
|
56
|
+
def format_difference(prev_value, prev_diff, prev_run_id)
|
57
|
+
previous =
|
58
|
+
%Q{previous <a href="/runs/#{prev_run_id}">#{prev_value}</a>}
|
59
|
+
|
60
|
+
formatted_diff = begin
|
61
|
+
str = prev_diff.to_s
|
62
|
+
str.insert(0, "+") if str !~ /^[-0]/i
|
63
|
+
"#{str}%"
|
64
|
+
end if prev_diff
|
65
|
+
|
66
|
+
if formatted_diff
|
67
|
+
"(#{previous}, #{formatted_diff})"
|
68
|
+
else
|
69
|
+
"(#{previous})"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def errors_count(hash)
|
74
|
+
hash.map { |_, count| count }.sum
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|