kimurai-dashboard 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d45c04e24e2f570510b38e3027958d190e39cb8cc3991f03925691acc5c0a4cf
4
+ data.tar.gz: 8833f523ebc1f303d48a5ad9107c0ecc1e33f36d0e0aa987206f13f30508e26a
5
+ SHA512:
6
+ metadata.gz: e12f971ce5e9c6dab110cae9e6975f4ace11ffac1c1a607d855e5d32b2662bdb3619fd08a4921b0f58f90dc30634a18ade29ead57273bcbc8b525c9786e6fc13
7
+ data.tar.gz: ff659649fb65ab2c7953e9544dc6ea3bc3a0196bd150d812316fb3f9ca8652a667c31c37e6d6a5643ff8804d5f29d3dbd57bc63b13ebc7eb5a05df2401f78833
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.1
5
+ before_install: gem install bundler -v 1.16.2
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in kimurai-dashboard.gemspec
6
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Victor Afanasev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,58 @@
1
+ # Kimurai::Dashboard
2
+
3
+ Simple Dashboard for [Kimurai web scraping framework](https://github.com/vifreefly/kimuraframework). Required version of Kimurai `>= 1.2.0`.
4
+
5
+ ## Installation
6
+ Add this line to your Kimurai project's Gemfile:
7
+
8
+ ```ruby
9
+ # add this line after `gem 'kimurai'`
10
+ gem 'kimurai-dashboard', require: false
11
+ ```
12
+
13
+ and then execute `$ bundle`.
14
+
15
+ ## Configuration
16
+ You need to provide `stats_database_url` to enable stats and save info about project spiders runs and sessions to a database. Format for a database url: https://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html. You can use _sqlite_, _postgres_ or _mysql_ database (check Kimurai's project Gemfile and uncomment preferred gem).
17
+
18
+ Example for SQlite:
19
+
20
+ ```ruby
21
+ # Gemfile
22
+ gem 'sqlite3'
23
+ ```
24
+
25
+ **Note that dashboard should be required only after stats_database_url provided:**
26
+
27
+ ```ruby
28
+ # config/boot.rb
29
+ # ...
30
+
31
+ Kimurai.configuration.stats_database_url = "sqlite://db/spiders_runs_#{Kimurai.env}.sqlite3"
32
+ # Important: require dashboard ONLY after stats_database_url was provided:
33
+ require 'kimurai/dashboard'
34
+ ```
35
+
36
+ Also, there are optional settings for a dashboard:
37
+
38
+ ```ruby
39
+ # config/application.rb
40
+
41
+ Kimurai.configure do |config|
42
+ # ...
43
+
44
+ config.dashboard = {
45
+ bind_address: "0.0.0.0",
46
+ port: 3001,
47
+ basic_auth: { username: "admin", password: "123456" }
48
+ }
49
+ end
50
+ ```
51
+
52
+ ## Usage
53
+ After successful configuration, all spiders (running individually `kimurai start` or in queue `kimurai runner`) will save stats to the database.
54
+
55
+ Run `$ bundle exec kimurai dashboard` and navigate to a dashboard url to see the stats.
56
+
57
+ ## License
58
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/*_test.rb"]
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kimurai/dashboard"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,30 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "kimurai/dashboard/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kimurai-dashboard"
8
+ spec.version = Kimurai::Dashboard::VERSION
9
+ spec.authors = ["Victor Afanasev"]
10
+ spec.email = ["vicfreefly@gmail.com"]
11
+
12
+ spec.summary = "Simple dashboard for a Kimurai web scraping framework"
13
+ spec.homepage = "https://github.com/vifreefly/kimurai-dashboard"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
17
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ end
19
+ spec.require_paths = ["lib"]
20
+ spec.required_ruby_version = ">= 2.5.0"
21
+
22
+ spec.add_dependency "kimurai", "~> 1.2.0"
23
+ spec.add_dependency "sequel"
24
+ spec.add_dependency "sinatra-contrib"
25
+ spec.add_dependency "pagy"
26
+
27
+ spec.add_development_dependency "bundler", "~> 1.16"
28
+ spec.add_development_dependency "rake", "~> 10.0"
29
+ spec.add_development_dependency "minitest", "~> 5.0"
30
+ end
@@ -0,0 +1,49 @@
1
+ require 'sequel'
2
+ require 'json'
3
+ require 'kimurai/dashboard/version'
4
+
5
+ require_relative 'dashboard/base'
6
+ require_relative 'dashboard/runner'
7
+
8
+ module Kimurai
9
+ module Dashboard
10
+ DB = Sequel.connect(Kimurai.configuration.stats_database_url ||= ENV["STATS_DATABASE_URL"])
11
+
12
+ DB.create_table?(:sessions) do
13
+ primary_key :id, type: :integer, auto_increment: false
14
+ string :status
15
+ datetime :start_time, empty: false
16
+ datetime :stop_time
17
+ string :environment
18
+ integer :concurrent_jobs
19
+ text :spiders
20
+ text :error
21
+ end
22
+
23
+ DB.create_table?(:runs) do
24
+ primary_key :id
25
+ string :spider_name, empty: false
26
+ string :status
27
+ string :environment
28
+ datetime :start_time, empty: false
29
+ datetime :stop_time
30
+ float :running_time
31
+ foreign_key :session_id, :sessions
32
+ foreign_key :spider_id, :spiders
33
+ text :visits
34
+ text :items
35
+ text :events
36
+ text :error
37
+ text :server
38
+ end
39
+
40
+ DB.create_table?(:spiders) do
41
+ primary_key :id
42
+ string :name, empty: false, unique: true
43
+ end
44
+ end
45
+ end
46
+
47
+ require_relative 'dashboard/models/session'
48
+ require_relative 'dashboard/models/run'
49
+ require_relative 'dashboard/models/spider'
@@ -0,0 +1,151 @@
1
+ require 'sinatra/base'
2
+ require 'sinatra/respond_with'
3
+ require 'sinatra/json'
4
+ require 'sinatra/namespace'
5
+ require 'sinatra/reloader'
6
+ require 'sinatra/streaming'
7
+ require 'pagy'
8
+ require_relative 'helpers'
9
+
10
+ module Kimurai
11
+ module Dashboard
12
+ class App < Sinatra::Base
13
+ include Pagy::Backend
14
+
15
+ register Sinatra::RespondWith, Sinatra::Namespace
16
+ enable :logging
17
+ set :environment, Kimurai.env.to_sym
18
+
19
+ if bind_address = Kimurai.configuration.dashboard&.dig(:bind_address)
20
+ set :bind, bind_address
21
+ end
22
+
23
+ if port = Kimurai.configuration.dashboard&.dig(:port)
24
+ set :port, port
25
+ end
26
+
27
+ configure :development do
28
+ require 'pry'
29
+ register Sinatra::Reloader
30
+ end
31
+
32
+ helpers Sinatra::Streaming
33
+ helpers do
34
+ include Helpers
35
+ include Rack::Utils
36
+ alias_method :h, :escape_html
37
+ end
38
+
39
+ if auth = Kimurai.configuration.dashboard&.dig(:basic_auth)
40
+ use Rack::Auth::Basic, "Protected Area" do |username, password|
41
+ username == auth[:username] && password == auth[:password]
42
+ end
43
+ end
44
+
45
+ ###
46
+
47
+ get "/" do
48
+ redirect "/spiders"
49
+ end
50
+
51
+ namespace "/sessions" do
52
+ get do
53
+ @sessions = Session.reverse_order(:id)
54
+ @pagy, @sessions = pagy(@sessions) unless @sessions.count.zero?
55
+
56
+ respond_to do |f|
57
+ f.html { erb :'sessions/index' }
58
+ end
59
+ end
60
+
61
+ get "/:id" do
62
+ @session = Session.find(id: params[:id].to_i)
63
+ halt "Error, can't find session!" unless @session
64
+
65
+ respond_to do |f|
66
+ f.html { erb :'sessions/show' }
67
+ end
68
+ end
69
+ end
70
+
71
+ namespace "/runs" do
72
+ get do
73
+ @runs = Run.reverse_order(:id)
74
+
75
+ filters = params.slice("spider_id", "session_id")
76
+ filters.each do |filter_name, value|
77
+ @runs = @runs.send(filter_name, value)
78
+ end
79
+
80
+ @pagy, @runs = pagy(@runs) unless @runs.count.zero?
81
+ respond_to do |f|
82
+ f.html { erb :'runs/index', locals: { filters: filters }}
83
+ end
84
+ end
85
+
86
+ get "/:id" do
87
+ @run = Run.find(id: params[:id].to_i)
88
+ halt "Error, can't find session!" unless @run
89
+
90
+ respond_to do |f|
91
+ f.html { erb :'runs/show', locals: { difference: @run.difference_between_previous_run }}
92
+ end
93
+ end
94
+
95
+ get "/:id/log" do
96
+ @run = Run.find(id: params[:id].to_i)
97
+ halt "Error, can't find run with id: #{params[:id]}" unless @run
98
+
99
+ log_name = "./log/#{@run.spider_name}.log"
100
+
101
+ if @run.latest? && File.exists?(log_name)
102
+ content_type 'text/event-stream'
103
+ File.readlines(log_name)
104
+ else
105
+ halt "Log file is not available for this run"
106
+ end
107
+ end
108
+ end
109
+
110
+ namespace "/spiders" do
111
+ get do
112
+ @spiders = Spider
113
+ @pagy, @spiders = pagy(@spiders) unless @spiders.count.zero?
114
+
115
+ respond_to do |f|
116
+ f.html { erb :'spiders/index' }
117
+ end
118
+ end
119
+
120
+ get "/:id_or_name" do
121
+ @spider =
122
+ if params[:id_or_name].match?(/^(\d)+$/)
123
+ Spider.find(id: params[:id_or_name].to_i)
124
+ else
125
+ Spider.find(name: params[:id_or_name])
126
+ end
127
+
128
+ halt "Error, can't find spider!" unless @spider
129
+
130
+ @spider_runs = @spider.runs_dataset.reverse_order(:id)
131
+ @pagy, @spider_runs = pagy(@spider_runs, items: 15) unless @spider_runs.count.zero?
132
+
133
+ respond_to do |f|
134
+ f.html { erb :'spiders/show' }
135
+ end
136
+ end
137
+ end
138
+
139
+ private
140
+
141
+ def pagy_get_vars(collection, vars)
142
+ {
143
+ count: collection.count,
144
+ page_param: "page",
145
+ page: params["page"],
146
+ items: vars[:items] || 25
147
+ }
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,53 @@
1
+ require 'kimurai/base'
2
+ require 'socket'
3
+
4
+ module Kimurai
5
+ class Base
6
+ class << self
7
+ alias_method :original_crawl!, :crawl!
8
+ end
9
+
10
+ def self.crawl!(continue: false, exception_on_fail: true)
11
+ logger.error "Spider: already running: #{name}" and return false if running?
12
+
13
+ spider = Dashboard::Spider.find_or_create(name: name)
14
+ run = Dashboard::Run.new(spider_id: spider.id)
15
+
16
+ updater = proc do |final_info|
17
+ if final_info
18
+ run.set(final_info)
19
+ run.save
20
+ elsif @run_info
21
+ unless @run_info[:server]
22
+ @run_info.merge!(
23
+ session_id: ENV["SESSION_ID"]&.to_i,
24
+ server: {
25
+ hostname: Socket.gethostname,
26
+ ipv4: Socket.ip_address_list.find { |ai| ai.ipv4? && !ai.ipv4_loopback? }&.ip_address,
27
+ process_pid: Process.pid
28
+ }
29
+ )
30
+ end
31
+
32
+ running_time = (Time.now - run_info[:start_time]).round(3)
33
+ run.set(@run_info.merge!(running_time: running_time))
34
+ run.save
35
+ end
36
+ end
37
+
38
+ task = Thread.new do
39
+ loop { sleep 0.5 and updater.call and sleep 1.5 }
40
+ end
41
+
42
+ final_info, error = original_crawl!(continue: continue, exception_on_fail: false)
43
+ if error
44
+ exception_on_fail ? raise(error) : [final_info, error]
45
+ else
46
+ final_info
47
+ end
48
+ ensure
49
+ task.terminate if task
50
+ updater.call(final_info)# if final_info
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,78 @@
1
+ require 'pagy'
2
+ require 'pagy/extras/bootstrap'
3
+
4
+ module Kimurai
5
+ module Dashboard
6
+ module Helpers
7
+ include Pagy::Frontend
8
+
9
+ def to_hash(object)
10
+ object.to_hash.merge(object.deserialized_values)
11
+ end
12
+
13
+ def convert_to_links(elements, base:)
14
+ elements.map { |element| %(<a href="#{base}/#{element}">#{element}</a>) }
15
+ end
16
+
17
+ def breadcrumbs(hash)
18
+ elements = hash.map do |path, value|
19
+ if path.empty?
20
+ %Q{<li class="breadcrumb-item active" aria-current="page">#{value}</li>}
21
+ else
22
+ %Q{<li class="breadcrumb-item"><a href="#{path}">#{value}</a></li>}
23
+ end
24
+ end
25
+ %Q{<nav aria-label="breadcrumb">
26
+ <ol class="breadcrumb"> #{elements.join}</ol>
27
+ </nav>}
28
+ end
29
+
30
+ def minimize_stats(stats)
31
+ stats.values.map { |stat| stat.class == Hash ? stat.size : stat }
32
+ end
33
+
34
+ def get_badge(status)
35
+ case status
36
+ when "running"
37
+ %Q{<span class="badge badge-primary">running</span>}
38
+ when "processing"
39
+ %Q{<span class="badge badge-primary">processing</span>}
40
+ when "completed"
41
+ %Q{<span class="badge badge-success">completed</span>}
42
+ when "failed"
43
+ %Q{<span class="badge badge-danger">failed</span>}
44
+ when "stopped"
45
+ %Q{<span class="badge badge-light">stopped</span>}
46
+ else
47
+ status
48
+ end
49
+ end
50
+
51
+ def render_filters(filters)
52
+ f = filters.map { |k,v| "#{k} = #{v}" }.join(", ")
53
+ %Q{<p class="text-muted"> Filters: #{f} </p>}
54
+ end
55
+
56
+ def format_difference(prev_value, prev_diff, prev_run_id)
57
+ previous =
58
+ %Q{previous <a href="/runs/#{prev_run_id}">#{prev_value}</a>}
59
+
60
+ formatted_diff = begin
61
+ str = prev_diff.to_s
62
+ str.insert(0, "+") if str !~ /^[-0]/i
63
+ "#{str}%"
64
+ end if prev_diff
65
+
66
+ if formatted_diff
67
+ "(#{previous}, #{formatted_diff})"
68
+ else
69
+ "(#{previous})"
70
+ end
71
+ end
72
+
73
+ def errors_count(hash)
74
+ hash.map { |_, count| count }.sum
75
+ end
76
+ end
77
+ end
78
+ end