kimurai-dashboard 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d45c04e24e2f570510b38e3027958d190e39cb8cc3991f03925691acc5c0a4cf
4
+ data.tar.gz: 8833f523ebc1f303d48a5ad9107c0ecc1e33f36d0e0aa987206f13f30508e26a
5
+ SHA512:
6
+ metadata.gz: e12f971ce5e9c6dab110cae9e6975f4ace11ffac1c1a607d855e5d32b2662bdb3619fd08a4921b0f58f90dc30634a18ade29ead57273bcbc8b525c9786e6fc13
7
+ data.tar.gz: ff659649fb65ab2c7953e9544dc6ea3bc3a0196bd150d812316fb3f9ca8652a667c31c37e6d6a5643ff8804d5f29d3dbd57bc63b13ebc7eb5a05df2401f78833
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.1
5
+ before_install: gem install bundler -v 1.16.2
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in kimurai-dashboard.gemspec
6
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Victor Afanasev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,58 @@
1
+ # Kimurai::Dashboard
2
+
3
+ Simple Dashboard for [Kimurai web scraping framework](https://github.com/vifreefly/kimuraframework). Required version of Kimurai `>= 1.2.0`.
4
+
5
+ ## Installation
6
+ Add this line to your Kimurai project's Gemfile:
7
+
8
+ ```ruby
9
+ # add this line after `gem 'kimurai'`
10
+ gem 'kimurai-dashboard', require: false
11
+ ```
12
+
13
+ and then execute `$ bundle`.
14
+
15
+ ## Configuration
16
+ You need to provide `stats_database_url` to enable stats and save info about project spiders runs and sessions to a database. Format for a database url: https://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html. You can use _sqlite_, _postgres_ or _mysql_ database (check Kimurai's project Gemfile and uncomment preferred gem).
17
+
18
+ Example for SQlite:
19
+
20
+ ```ruby
21
+ # Gemfile
22
+ gem 'sqlite3'
23
+ ```
24
+
25
+ **Note that dashboard should be required only after stats_database_url provided:**
26
+
27
+ ```ruby
28
+ # config/boot.rb
29
+ # ...
30
+
31
+ Kimurai.configuration.stats_database_url = "sqlite://db/spiders_runs_#{Kimurai.env}.sqlite3"
32
+ # Important: require dashboard ONLY after stats_database_url was provided:
33
+ require 'kimurai/dashboard'
34
+ ```
35
+
36
+ Also, there are optional settings for a dashboard:
37
+
38
+ ```ruby
39
+ # config/application.rb
40
+
41
+ Kimurai.configure do |config|
42
+ # ...
43
+
44
+ config.dashboard = {
45
+ bind_address: "0.0.0.0",
46
+ port: 3001,
47
+ basic_auth: { username: "admin", password: "123456" }
48
+ }
49
+ end
50
+ ```
51
+
52
+ ## Usage
53
+ After successful configuration, all spiders (running individually `kimurai start` or in queue `kimurai runner`) will save stats to the database.
54
+
55
+ Run `$ bundle exec kimurai dashboard` and navigate to a dashboard url to see the stats.
56
+
57
+ ## License
58
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/*_test.rb"]
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kimurai/dashboard"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,30 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "kimurai/dashboard/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kimurai-dashboard"
8
+ spec.version = Kimurai::Dashboard::VERSION
9
+ spec.authors = ["Victor Afanasev"]
10
+ spec.email = ["vicfreefly@gmail.com"]
11
+
12
+ spec.summary = "Simple dashboard for a Kimurai web scraping framework"
13
+ spec.homepage = "https://github.com/vifreefly/kimurai-dashboard"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
17
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ end
19
+ spec.require_paths = ["lib"]
20
+ spec.required_ruby_version = ">= 2.5.0"
21
+
22
+ spec.add_dependency "kimurai", "~> 1.2.0"
23
+ spec.add_dependency "sequel"
24
+ spec.add_dependency "sinatra-contrib"
25
+ spec.add_dependency "pagy"
26
+
27
+ spec.add_development_dependency "bundler", "~> 1.16"
28
+ spec.add_development_dependency "rake", "~> 10.0"
29
+ spec.add_development_dependency "minitest", "~> 5.0"
30
+ end
@@ -0,0 +1,49 @@
1
+ require 'sequel'
2
+ require 'json'
3
+ require 'kimurai/dashboard/version'
4
+
5
+ require_relative 'dashboard/base'
6
+ require_relative 'dashboard/runner'
7
+
8
+ module Kimurai
9
+ module Dashboard
10
+ DB = Sequel.connect(Kimurai.configuration.stats_database_url ||= ENV["STATS_DATABASE_URL"])
11
+
12
+ DB.create_table?(:sessions) do
13
+ primary_key :id, type: :integer, auto_increment: false
14
+ string :status
15
+ datetime :start_time, empty: false
16
+ datetime :stop_time
17
+ string :environment
18
+ integer :concurrent_jobs
19
+ text :spiders
20
+ text :error
21
+ end
22
+
23
+ DB.create_table?(:runs) do
24
+ primary_key :id
25
+ string :spider_name, empty: false
26
+ string :status
27
+ string :environment
28
+ datetime :start_time, empty: false
29
+ datetime :stop_time
30
+ float :running_time
31
+ foreign_key :session_id, :sessions
32
+ foreign_key :spider_id, :spiders
33
+ text :visits
34
+ text :items
35
+ text :events
36
+ text :error
37
+ text :server
38
+ end
39
+
40
+ DB.create_table?(:spiders) do
41
+ primary_key :id
42
+ string :name, empty: false, unique: true
43
+ end
44
+ end
45
+ end
46
+
47
+ require_relative 'dashboard/models/session'
48
+ require_relative 'dashboard/models/run'
49
+ require_relative 'dashboard/models/spider'
@@ -0,0 +1,151 @@
1
+ require 'sinatra/base'
2
+ require 'sinatra/respond_with'
3
+ require 'sinatra/json'
4
+ require 'sinatra/namespace'
5
+ require 'sinatra/reloader'
6
+ require 'sinatra/streaming'
7
+ require 'pagy'
8
+ require_relative 'helpers'
9
+
10
+ module Kimurai
11
+ module Dashboard
12
+ class App < Sinatra::Base
13
+ include Pagy::Backend
14
+
15
+ register Sinatra::RespondWith, Sinatra::Namespace
16
+ enable :logging
17
+ set :environment, Kimurai.env.to_sym
18
+
19
+ if bind_address = Kimurai.configuration.dashboard&.dig(:bind_address)
20
+ set :bind, bind_address
21
+ end
22
+
23
+ if port = Kimurai.configuration.dashboard&.dig(:port)
24
+ set :port, port
25
+ end
26
+
27
+ configure :development do
28
+ require 'pry'
29
+ register Sinatra::Reloader
30
+ end
31
+
32
+ helpers Sinatra::Streaming
33
+ helpers do
34
+ include Helpers
35
+ include Rack::Utils
36
+ alias_method :h, :escape_html
37
+ end
38
+
39
+ if auth = Kimurai.configuration.dashboard&.dig(:basic_auth)
40
+ use Rack::Auth::Basic, "Protected Area" do |username, password|
41
+ username == auth[:username] && password == auth[:password]
42
+ end
43
+ end
44
+
45
+ ###
46
+
47
+ get "/" do
48
+ redirect "/spiders"
49
+ end
50
+
51
+ namespace "/sessions" do
52
+ get do
53
+ @sessions = Session.reverse_order(:id)
54
+ @pagy, @sessions = pagy(@sessions) unless @sessions.count.zero?
55
+
56
+ respond_to do |f|
57
+ f.html { erb :'sessions/index' }
58
+ end
59
+ end
60
+
61
+ get "/:id" do
62
+ @session = Session.find(id: params[:id].to_i)
63
+ halt "Error, can't find session!" unless @session
64
+
65
+ respond_to do |f|
66
+ f.html { erb :'sessions/show' }
67
+ end
68
+ end
69
+ end
70
+
71
+ namespace "/runs" do
72
+ get do
73
+ @runs = Run.reverse_order(:id)
74
+
75
+ filters = params.slice("spider_id", "session_id")
76
+ filters.each do |filter_name, value|
77
+ @runs = @runs.send(filter_name, value)
78
+ end
79
+
80
+ @pagy, @runs = pagy(@runs) unless @runs.count.zero?
81
+ respond_to do |f|
82
+ f.html { erb :'runs/index', locals: { filters: filters }}
83
+ end
84
+ end
85
+
86
+ get "/:id" do
87
+ @run = Run.find(id: params[:id].to_i)
88
+ halt "Error, can't find session!" unless @run
89
+
90
+ respond_to do |f|
91
+ f.html { erb :'runs/show', locals: { difference: @run.difference_between_previous_run }}
92
+ end
93
+ end
94
+
95
+ get "/:id/log" do
96
+ @run = Run.find(id: params[:id].to_i)
97
+ halt "Error, can't find run with id: #{params[:id]}" unless @run
98
+
99
+ log_name = "./log/#{@run.spider_name}.log"
100
+
101
+ if @run.latest? && File.exists?(log_name)
102
+ content_type 'text/event-stream'
103
+ File.readlines(log_name)
104
+ else
105
+ halt "Log file is not available for this run"
106
+ end
107
+ end
108
+ end
109
+
110
+ namespace "/spiders" do
111
+ get do
112
+ @spiders = Spider
113
+ @pagy, @spiders = pagy(@spiders) unless @spiders.count.zero?
114
+
115
+ respond_to do |f|
116
+ f.html { erb :'spiders/index' }
117
+ end
118
+ end
119
+
120
+ get "/:id_or_name" do
121
+ @spider =
122
+ if params[:id_or_name].match?(/^(\d)+$/)
123
+ Spider.find(id: params[:id_or_name].to_i)
124
+ else
125
+ Spider.find(name: params[:id_or_name])
126
+ end
127
+
128
+ halt "Error, can't find spider!" unless @spider
129
+
130
+ @spider_runs = @spider.runs_dataset.reverse_order(:id)
131
+ @pagy, @spider_runs = pagy(@spider_runs, items: 15) unless @spider_runs.count.zero?
132
+
133
+ respond_to do |f|
134
+ f.html { erb :'spiders/show' }
135
+ end
136
+ end
137
+ end
138
+
139
+ private
140
+
141
+ def pagy_get_vars(collection, vars)
142
+ {
143
+ count: collection.count,
144
+ page_param: "page",
145
+ page: params["page"],
146
+ items: vars[:items] || 25
147
+ }
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,53 @@
1
+ require 'kimurai/base'
2
+ require 'socket'
3
+
4
+ module Kimurai
5
+ class Base
6
+ class << self
7
+ alias_method :original_crawl!, :crawl!
8
+ end
9
+
10
+ def self.crawl!(continue: false, exception_on_fail: true)
11
+ logger.error "Spider: already running: #{name}" and return false if running?
12
+
13
+ spider = Dashboard::Spider.find_or_create(name: name)
14
+ run = Dashboard::Run.new(spider_id: spider.id)
15
+
16
+ updater = proc do |final_info|
17
+ if final_info
18
+ run.set(final_info)
19
+ run.save
20
+ elsif @run_info
21
+ unless @run_info[:server]
22
+ @run_info.merge!(
23
+ session_id: ENV["SESSION_ID"]&.to_i,
24
+ server: {
25
+ hostname: Socket.gethostname,
26
+ ipv4: Socket.ip_address_list.find { |ai| ai.ipv4? && !ai.ipv4_loopback? }&.ip_address,
27
+ process_pid: Process.pid
28
+ }
29
+ )
30
+ end
31
+
32
+ running_time = (Time.now - run_info[:start_time]).round(3)
33
+ run.set(@run_info.merge!(running_time: running_time))
34
+ run.save
35
+ end
36
+ end
37
+
38
+ task = Thread.new do
39
+ loop { sleep 0.5 and updater.call and sleep 1.5 }
40
+ end
41
+
42
+ final_info, error = original_crawl!(continue: continue, exception_on_fail: false)
43
+ if error
44
+ exception_on_fail ? raise(error) : [final_info, error]
45
+ else
46
+ final_info
47
+ end
48
+ ensure
49
+ task.terminate if task
50
+ updater.call(final_info)# if final_info
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,78 @@
1
+ require 'pagy'
2
+ require 'pagy/extras/bootstrap'
3
+
4
+ module Kimurai
5
+ module Dashboard
6
+ module Helpers
7
+ include Pagy::Frontend
8
+
9
+ def to_hash(object)
10
+ object.to_hash.merge(object.deserialized_values)
11
+ end
12
+
13
+ def convert_to_links(elements, base:)
14
+ elements.map { |element| %(<a href="#{base}/#{element}">#{element}</a>) }
15
+ end
16
+
17
+ def breadcrumbs(hash)
18
+ elements = hash.map do |path, value|
19
+ if path.empty?
20
+ %Q{<li class="breadcrumb-item active" aria-current="page">#{value}</li>}
21
+ else
22
+ %Q{<li class="breadcrumb-item"><a href="#{path}">#{value}</a></li>}
23
+ end
24
+ end
25
+ %Q{<nav aria-label="breadcrumb">
26
+ <ol class="breadcrumb"> #{elements.join}</ol>
27
+ </nav>}
28
+ end
29
+
30
+ def minimize_stats(stats)
31
+ stats.values.map { |stat| stat.class == Hash ? stat.size : stat }
32
+ end
33
+
34
+ def get_badge(status)
35
+ case status
36
+ when "running"
37
+ %Q{<span class="badge badge-primary">running</span>}
38
+ when "processing"
39
+ %Q{<span class="badge badge-primary">processing</span>}
40
+ when "completed"
41
+ %Q{<span class="badge badge-success">completed</span>}
42
+ when "failed"
43
+ %Q{<span class="badge badge-danger">failed</span>}
44
+ when "stopped"
45
+ %Q{<span class="badge badge-light">stopped</span>}
46
+ else
47
+ status
48
+ end
49
+ end
50
+
51
+ def render_filters(filters)
52
+ f = filters.map { |k,v| "#{k} = #{v}" }.join(", ")
53
+ %Q{<p class="text-muted"> Filters: #{f} </p>}
54
+ end
55
+
56
+ def format_difference(prev_value, prev_diff, prev_run_id)
57
+ previous =
58
+ %Q{previous <a href="/runs/#{prev_run_id}">#{prev_value}</a>}
59
+
60
+ formatted_diff = begin
61
+ str = prev_diff.to_s
62
+ str.insert(0, "+") if str !~ /^[-0]/i
63
+ "#{str}%"
64
+ end if prev_diff
65
+
66
+ if formatted_diff
67
+ "(#{previous}, #{formatted_diff})"
68
+ else
69
+ "(#{previous})"
70
+ end
71
+ end
72
+
73
+ def errors_count(hash)
74
+ hash.map { |_, count| count }.sum
75
+ end
76
+ end
77
+ end
78
+ end