outrider 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +3 -0
- data/Capfile +30 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +119 -0
- data/LICENSE.txt +21 -0
- data/README.md +261 -0
- data/Rakefile +52 -0
- data/app/run.rb +15 -0
- data/bin/console +14 -0
- data/bin/outrider +8 -0
- data/bin/setup +7 -0
- data/config.ru +2 -0
- data/config/messages.yml +1 -0
- data/config/schema.sql +40 -0
- data/lib/ignite.rb +8 -0
- data/lib/outrider.rb +94 -0
- data/lib/outrider/commandify.rb +49 -0
- data/lib/outrider/engine.rb +20 -0
- data/lib/outrider/intel.rb +14 -0
- data/lib/outrider/project.rb +146 -0
- data/lib/outrider/tools.rb +224 -0
- data/lib/outrider/version.rb +3 -0
- data/outrider.gemspec +27 -0
- data/projects/nz_herald/auxiliary.rb +56 -0
- data/projects/stuff/auxiliary.rb +71 -0
- data/projects/test_project/auxiliary.rb +63 -0
- data/projects/theage/auxiliary.rb +29 -0
- data/public/index.html +0 -0
- data/tmp/x.txt +1 -0
- metadata +122 -0
data/Rakefile
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require "bundler/gem_tasks"
|
|
2
|
+
require "rspec/core/rake_task"
|
|
3
|
+
require "net/ssh"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
task :default => :spec
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TaskHelper
|
|
11
|
+
|
|
12
|
+
attr_reader :data
|
|
13
|
+
|
|
14
|
+
def initialize env
|
|
15
|
+
#load environment variables
|
|
16
|
+
@data = YAML.load_file(File.join( Dir.home, "/.outrider/config/hosts.yml") )[env]
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
namespace :project do
|
|
25
|
+
|
|
26
|
+
@prod = TaskHelper.new('prod').data
|
|
27
|
+
@dev = TaskHelper.new('dev').data
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
desc "Builds new project on development machine and production"
|
|
32
|
+
task :build, [:title, :domain] do |t, args|
|
|
33
|
+
puts ":: Building project #{args}"
|
|
34
|
+
# Build remote command
|
|
35
|
+
command = "#{@prod['ruby']}; #{@prod['ignite_path']} create_project_db_row -p #{args[:title]} -d #{args[:domain]}"
|
|
36
|
+
# add project to local system
|
|
37
|
+
sh "#{@dev['ignite_path']} create_project -p #{args[:title]} -d #{args[:domain]}"
|
|
38
|
+
# ssh and run on production server
|
|
39
|
+
begin
|
|
40
|
+
ssh = Net::SSH.start(@prod['host'], @prod['user'], :port => @prod['port'], :password => @prod['password'] )
|
|
41
|
+
res = ssh.exec!(command)
|
|
42
|
+
ssh.close
|
|
43
|
+
puts res
|
|
44
|
+
rescue Exception => e
|
|
45
|
+
puts "Unable to connect to #{@prod['host']} using #{@prod['user']} :: #{e}"
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
end
|
data/app/run.rb
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
require 'sinatra'
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
get '/' do
|
|
6
|
+
"Outrider reporting for duty"
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
get '/test/1' do
|
|
10
|
+
"<h1 class='test_class'>Test 1</h1><a href='http://outriderapp.com/test/2'>Link</a><br /><p class='content'>This page is no use to you</p>"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
get '/test/2' do
|
|
14
|
+
"<h1 class='test_class'>Test 2</h1><a href='http://outriderapp.com/test/1'>Link</a><a href='http://google.com'>External Link</a><a href='http://outriderapp.com/test/test.pdf'>Internal File Link</a><a href='http://google.com/test.pdf'>External File Link</a><br /><p class='content'>This page is no use to you</p>"
|
|
15
|
+
end
|
data/bin/console
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "outrider"
|
|
5
|
+
|
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
8
|
+
|
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
10
|
+
# require "pry"
|
|
11
|
+
# Pry.start
|
|
12
|
+
|
|
13
|
+
require "irb"
|
|
14
|
+
IRB.start
|
data/bin/outrider
ADDED
data/bin/setup
ADDED
data/config.ru
ADDED
data/config/messages.yml
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
no_method: "Method doesn't exist"
|
data/config/schema.sql
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# ************************************************************
|
|
2
|
+
# Database: outrider
|
|
3
|
+
# ************************************************************
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Dump of table projects
|
|
8
|
+
# ------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
CREATE TABLE `projects` (
|
|
11
|
+
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
|
|
12
|
+
`title` varchar(255) DEFAULT NULL,
|
|
13
|
+
`domain` varchar(255) DEFAULT NULL,
|
|
14
|
+
PRIMARY KEY (`id`)
|
|
15
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Dump of table raw_data
|
|
20
|
+
# ------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
CREATE TABLE `raw_data` (
|
|
23
|
+
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
|
|
24
|
+
`date_created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
|
25
|
+
`url` varchar(255) DEFAULT NULL,
|
|
26
|
+
`title_raw` varchar(255) DEFAULT NULL,
|
|
27
|
+
`content_raw` text,
|
|
28
|
+
`status` varchar(255) DEFAULT 'unscraped',
|
|
29
|
+
`author` varchar(255) DEFAULT NULL,
|
|
30
|
+
`date_published_raw` varchar(255) DEFAULT NULL,
|
|
31
|
+
`project_id` int(11) DEFAULT NULL,
|
|
32
|
+
`date_published_timestamp` datetime DEFAULT NULL,
|
|
33
|
+
PRIMARY KEY (`id`)
|
|
34
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Our seed data
|
|
38
|
+
# -------------------------------------------------------------
|
|
39
|
+
INSERT INTO `projects` (`title`, `domain`)
|
|
40
|
+
VALUES ('test_project','http://outriderapp.com/test/1'),
|
data/lib/ignite.rb
ADDED
data/lib/outrider.rb
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#require_relative "outrider/version"
|
|
2
|
+
require_relative "outrider/engine"
|
|
3
|
+
require_relative "outrider/tools"
|
|
4
|
+
require_relative "outrider/commandify"
|
|
5
|
+
require_relative "outrider/project"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Provides an interface as to what commands can be run
|
|
9
|
+
class Outrider
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
attr_reader :command_list, :project, :db, :config
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# When initialized, do so with our base project facade,
|
|
17
|
+
# and change it later once everything else is initialized based on the specified project
|
|
18
|
+
def initialize
|
|
19
|
+
@project = Project.new
|
|
20
|
+
|
|
21
|
+
@config = {
|
|
22
|
+
:database => load_global_config( 'database', "Couldn't load database configuration" ),
|
|
23
|
+
:messages => load_yaml( __FILE__, "../config/messages.yml", "Couldn't load messages config file")
|
|
24
|
+
}
|
|
25
|
+
@db = load_database
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def set_project_object project
|
|
33
|
+
project_path = OutriderTools::Store::get_filepath __FILE__, "../projects/#{project}/auxiliary.rb"
|
|
34
|
+
if File.exist? project_path
|
|
35
|
+
require_relative "../projects/#{project}/auxiliary"
|
|
36
|
+
# Initialze object for the project we're working on
|
|
37
|
+
@project = project.classify.constantize.new
|
|
38
|
+
else
|
|
39
|
+
return false
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def operate command, options = {}
|
|
48
|
+
|
|
49
|
+
return @config[:messages]["no_method"] if command.nil?
|
|
50
|
+
|
|
51
|
+
if @project.respond_to?(command)
|
|
52
|
+
return @project.send( command, options )
|
|
53
|
+
else
|
|
54
|
+
return @config[:messages]["no_method"]
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def load_yaml file_object, filename, error_message
|
|
67
|
+
file = OutriderTools::Store::get_filepath file_object, filename
|
|
68
|
+
if File.exist? file
|
|
69
|
+
return YAML::load( File.open( file ))
|
|
70
|
+
else
|
|
71
|
+
return error_message
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def load_global_config filename, error_message
|
|
78
|
+
file = File.join( Dir.home, "/.outrider/config/#{filename}.yml" )
|
|
79
|
+
if File.exist? file
|
|
80
|
+
return YAML::load( File.open( file ))
|
|
81
|
+
else
|
|
82
|
+
return error_message
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def load_database
|
|
90
|
+
ActiveRecord::Base.establish_connection(@config[:database])
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
module Commandify
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def self.process
|
|
5
|
+
# SYSTEM API METHODS Do not modify this line
|
|
6
|
+
sub_commands = %w(create_project create_project_db_row delete_project crawl intel test_super)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Place custom command options here. See instructions at http://manageiq.github.io/trollop/
|
|
11
|
+
sub_commands << %w()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
global_opts = Trollop::options do
|
|
15
|
+
banner "CLI for Outrider data processing tools"
|
|
16
|
+
opt :dry_run, "Don't actually do anything", :short => "-n"
|
|
17
|
+
stop_on sub_commands
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
command = ARGV.shift
|
|
21
|
+
|
|
22
|
+
# Do not modify this
|
|
23
|
+
command_opts = Trollop::options do
|
|
24
|
+
# REQUIRED. Do not mess with these. Do not duplicate arguments or their short form. Run tests after modifying
|
|
25
|
+
opt :domain, "The domain", :short => "-d", :type => String, :default => ''
|
|
26
|
+
opt :limit, "Limit", :short => "-l", :type => Integer,:default => 1000
|
|
27
|
+
opt :project, "The name of the project", :short => "-p", :type => String, :default => ''
|
|
28
|
+
opt :filename, "Write data to a filename", :short => "-f", :type => String, :default => ''
|
|
29
|
+
opt :restrict, "Can only be crawled within the domain", :short => "-r", :default => true
|
|
30
|
+
opt :set_project, "If we need to set project", :short => "-s", :default => true
|
|
31
|
+
opt :intel_command, "What's the secondary command", :short => "-i", :type => String, :default => ''
|
|
32
|
+
|
|
33
|
+
# CUSTOM. Place custom command options here
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
puts "Invalid Command" unless sub_commands.include? command
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
return {
|
|
42
|
+
:action => command,
|
|
43
|
+
:options => command_opts
|
|
44
|
+
}
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
class Engine
|
|
2
|
+
|
|
3
|
+
attr_accessor :commands
|
|
4
|
+
|
|
5
|
+
def initialize commands = Commandify::process
|
|
6
|
+
@commands = commands
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def run
|
|
11
|
+
p "No Method Given" if @commands[:action].nil?
|
|
12
|
+
outrider = Outrider.new
|
|
13
|
+
outrider.set_project_object( @commands[:options][:project] ) if @commands[:options][:set_project] == true
|
|
14
|
+
outrider.operate( @commands[:action], @commands[:options] )
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
end
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
class Project
|
|
2
|
+
|
|
3
|
+
attr_reader :config, :logger
|
|
4
|
+
|
|
5
|
+
@@log = Logger.new(STDOUT)
|
|
6
|
+
|
|
7
|
+
def initialize
|
|
8
|
+
@config = {}
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def set_config name
|
|
14
|
+
project_meta = Projects.find_by( title: name )
|
|
15
|
+
@config = {
|
|
16
|
+
:id => project_meta.id,
|
|
17
|
+
:title => project_meta.title,
|
|
18
|
+
:domain => project_meta.domain
|
|
19
|
+
} unless project_meta.nil?
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def self.create_folder options
|
|
26
|
+
|
|
27
|
+
class_name = options[:project].classify
|
|
28
|
+
project_name = options[:project].parameterize.underscore
|
|
29
|
+
file_path = OutriderTools::Store::get_filepath( __FILE__, "../../projects/#{options[:project]}/auxiliary.rb" )
|
|
30
|
+
|
|
31
|
+
#create project files by making a copy of test_project
|
|
32
|
+
require 'fileutils'
|
|
33
|
+
|
|
34
|
+
#create directories if they dont exist
|
|
35
|
+
dirname = File.dirname(file_path)
|
|
36
|
+
unless File.directory?(dirname)
|
|
37
|
+
FileUtils.mkdir_p(dirname)
|
|
38
|
+
@@log.info "Making directory: #{dirname}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
#generate our default project class
|
|
42
|
+
File.open( file_path, 'w') { |file|
|
|
43
|
+
file.write(%Q{class #{class_name} < Project\n\tdef initialize\n\t\tproject_name :#{project_name}\n\tend\nend})
|
|
44
|
+
@@log.info "Auxiliary File Created in: #{file.path}"
|
|
45
|
+
}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def self.create_db_row options
|
|
53
|
+
#create project in database
|
|
54
|
+
project = Projects.create({ :title => options[:project], :domain => options[:domain] })
|
|
55
|
+
# TODO it might get stuck here if this domain already is in the raw data table
|
|
56
|
+
entry = ProjectData.create({ :url => options[:domain], :status => 'unscraped', :project_id => project.id })
|
|
57
|
+
@@log.info "Project created in database: #{project.id}"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def self.delete options
|
|
67
|
+
#delete folder
|
|
68
|
+
folder_path = OutriderTools::Store::get_filepath( __FILE__, "../../projects/#{options[:project]}" )
|
|
69
|
+
FileUtils.rm_rf( folder_path )
|
|
70
|
+
@@log.info "Deleting: #{folder_path}"
|
|
71
|
+
|
|
72
|
+
#delete from database
|
|
73
|
+
project = Projects.find_by( title: options[:project] )
|
|
74
|
+
project.destroy unless project.nil?
|
|
75
|
+
@@log.info "Deleting: project from database: #{options[:project]}"
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
#
|
|
84
|
+
# These methods are here simply to help run our unit tests
|
|
85
|
+
def test_super options
|
|
86
|
+
p "Super Test Called"
|
|
87
|
+
return "Super Test Called"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def project_name name
|
|
91
|
+
set_config name.to_s
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# A command line tool that lets us build a project
|
|
100
|
+
# /lib/ignite.rb create_project -p project -d domain.com
|
|
101
|
+
#
|
|
102
|
+
def create_project options
|
|
103
|
+
Project::create_folder options
|
|
104
|
+
Project::create_db_row options
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def create_project_db_row options
|
|
111
|
+
Project::create_db_row options
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def delete_project options
|
|
117
|
+
return Project::delete options
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Access to active record classes
|
|
130
|
+
# This makes them usable throughout our code, as project.rb
|
|
131
|
+
# is autoloaded
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class Projects < ActiveRecord::Base
|
|
135
|
+
self.table_name = 'projects'
|
|
136
|
+
#has_many :project_data
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class ProjectData < ActiveRecord::Base
|
|
142
|
+
self.table_name = 'raw_data'
|
|
143
|
+
#belongs_to :projects
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
|