outrider 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+ require "net/ssh"
4
+ require "yaml"
5
+
6
+ task :default => :spec
7
+
8
+
9
+
10
+ class TaskHelper
11
+
12
+ attr_reader :data
13
+
14
+ def initialize env
15
+ #load environment variables
16
+ @data = YAML.load_file(File.join( Dir.home, "/.outrider/config/hosts.yml") )[env]
17
+ end
18
+ end
19
+
20
+
21
+
22
+
23
+
24
+ namespace :project do
25
+
26
+ @prod = TaskHelper.new('prod').data
27
+ @dev = TaskHelper.new('dev').data
28
+
29
+
30
+
31
+ desc "Builds new project on development machine and production"
32
+ task :build, [:title, :domain] do |t, args|
33
+ puts ":: Building project #{args}"
34
+ # Build remote command
35
+ command = "#{@prod['ruby']}; #{@prod['ignite_path']} create_project_db_row -p #{args[:title]} -d #{args[:domain]}"
36
+ # add project to local system
37
+ sh "#{@dev['ignite_path']} create_project -p #{args[:title]} -d #{args[:domain]}"
38
+ # ssh and run on production server
39
+ begin
40
+ ssh = Net::SSH.start(@prod['host'], @prod['user'], :port => @prod['port'], :password => @prod['password'] )
41
+ res = ssh.exec!(command)
42
+ ssh.close
43
+ puts res
44
+ rescue Exception => e
45
+ puts "Unable to connect to #{@prod['host']} using #{@prod['user']} :: #{e}"
46
+ end
47
+ end
48
+
49
+
50
+
51
+
52
+ end
data/app/run.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'sinatra'
2
+
3
+
4
+
5
+ get '/' do
6
+ "Outrider reporting for duty"
7
+ end
8
+
9
+ get '/test/1' do
10
+ "<h1 class='test_class'>Test 1</h1><a href='http://outriderapp.com/test/2'>Link</a><br /><p class='content'>This page is no use to you</p>"
11
+ end
12
+
13
+ get '/test/2' do
14
+ "<h1 class='test_class'>Test 2</h1><a href='http://outriderapp.com/test/1'>Link</a><a href='http://google.com'>External Link</a><a href='http://outriderapp.com/test/test.pdf'>Internal File Link</a><a href='http://google.com/test.pdf'>External File Link</a><br /><p class='content'>This page is no use to you</p>"
15
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "outrider"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/outrider ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/ruby
2
+
3
+ # Outrider loads in everything we need
4
+ require 'outrider'
5
+ # The engine is initialized - this sets everything up for us
6
+ engine = Engine.new
7
+ # The process of interprating commands is started
8
+ engine.run
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
data/config.ru ADDED
@@ -0,0 +1,2 @@
1
+ require ::File.expand_path('../app/run', __FILE__)
2
+ run Sinatra::Application
@@ -0,0 +1 @@
1
+ no_method: "Method doesn't exist"
data/config/schema.sql ADDED
@@ -0,0 +1,40 @@
1
+ # ************************************************************
2
+ # Database: outrider
3
+ # ************************************************************
4
+
5
+
6
+
7
+ # Dump of table projects
8
+ # ------------------------------------------------------------
9
+
10
+ CREATE TABLE `projects` (
11
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
12
+ `title` varchar(255) DEFAULT NULL,
13
+ `domain` varchar(255) DEFAULT NULL,
14
+ PRIMARY KEY (`id`)
15
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
16
+
17
+
18
+
19
+ # Dump of table raw_data
20
+ # ------------------------------------------------------------
21
+
22
+ CREATE TABLE `raw_data` (
23
+ `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
24
+ `date_created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
25
+ `url` varchar(255) DEFAULT NULL,
26
+ `title_raw` varchar(255) DEFAULT NULL,
27
+ `content_raw` text,
28
+ `status` varchar(255) DEFAULT 'unscraped',
29
+ `author` varchar(255) DEFAULT NULL,
30
+ `date_published_raw` varchar(255) DEFAULT NULL,
31
+ `project_id` int(11) DEFAULT NULL,
32
+ `date_published_timestamp` datetime DEFAULT NULL,
33
+ PRIMARY KEY (`id`)
34
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
35
+
36
+
37
+ # Our seed data
38
+ # -------------------------------------------------------------
39
+ INSERT INTO `projects` (`title`, `domain`)
40
+ VALUES ('test_project','http://outriderapp.com/test/1'),
data/lib/ignite.rb ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/ruby
2
+
3
+ # Outrider loads in everything we need
4
+ require_relative './outrider.rb'
5
+ # The engine is initialized - this sets everything up for us
6
+ engine = Engine.new
7
+ # The process of interprating commands is started
8
+ engine.run
data/lib/outrider.rb ADDED
@@ -0,0 +1,94 @@
1
+ #require_relative "outrider/version"
2
+ require_relative "outrider/engine"
3
+ require_relative "outrider/tools"
4
+ require_relative "outrider/commandify"
5
+ require_relative "outrider/project"
6
+
7
+
8
+ # Provides an interface as to what commands can be run
9
+ class Outrider
10
+
11
+
12
+ attr_reader :command_list, :project, :db, :config
13
+
14
+
15
+
16
+ # When initialized, do so with our base project facade,
17
+ # and change it later once everything else is initialized based on the specified project
18
+ def initialize
19
+ @project = Project.new
20
+
21
+ @config = {
22
+ :database => load_global_config( 'database', "Couldn't load database configuration" ),
23
+ :messages => load_yaml( __FILE__, "../config/messages.yml", "Couldn't load messages config file")
24
+ }
25
+ @db = load_database
26
+ end
27
+
28
+
29
+
30
+
31
+
32
+ def set_project_object project
33
+ project_path = OutriderTools::Store::get_filepath __FILE__, "../projects/#{project}/auxiliary.rb"
34
+ if File.exist? project_path
35
+ require_relative "../projects/#{project}/auxiliary"
36
+ # Initialze object for the project we're working on
37
+ @project = project.classify.constantize.new
38
+ else
39
+ return false
40
+ end
41
+ end
42
+
43
+
44
+
45
+
46
+
47
+ def operate command, options = {}
48
+
49
+ return @config[:messages]["no_method"] if command.nil?
50
+
51
+ if @project.respond_to?(command)
52
+ return @project.send( command, options )
53
+ else
54
+ return @config[:messages]["no_method"]
55
+ end
56
+ end
57
+
58
+
59
+
60
+
61
+ private
62
+
63
+
64
+
65
+
66
+ def load_yaml file_object, filename, error_message
67
+ file = OutriderTools::Store::get_filepath file_object, filename
68
+ if File.exist? file
69
+ return YAML::load( File.open( file ))
70
+ else
71
+ return error_message
72
+ end
73
+ end
74
+
75
+
76
+
77
+ def load_global_config filename, error_message
78
+ file = File.join( Dir.home, "/.outrider/config/#{filename}.yml" )
79
+ if File.exist? file
80
+ return YAML::load( File.open( file ))
81
+ else
82
+ return error_message
83
+ end
84
+ end
85
+
86
+
87
+
88
+
89
+ def load_database
90
+ ActiveRecord::Base.establish_connection(@config[:database])
91
+ end
92
+
93
+
94
+ end
@@ -0,0 +1,49 @@
1
+ module Commandify
2
+
3
+
4
+ def self.process
5
+ # SYSTEM API METHODS Do not modify this line
6
+ sub_commands = %w(create_project create_project_db_row delete_project crawl intel test_super)
7
+
8
+
9
+
10
+ # Place custom command options here. See instructions at http://manageiq.github.io/trollop/
11
+ sub_commands << %w()
12
+
13
+
14
+ global_opts = Trollop::options do
15
+ banner "CLI for Outrider data processing tools"
16
+ opt :dry_run, "Don't actually do anything", :short => "-n"
17
+ stop_on sub_commands
18
+ end
19
+
20
+ command = ARGV.shift
21
+
22
+ # Do not modify this
23
+ command_opts = Trollop::options do
24
+ # REQUIRED. Do not mess with these. Do not duplicate arguments or their short form. Run tests after modifying
25
+ opt :domain, "The domain", :short => "-d", :type => String, :default => ''
26
+ opt :limit, "Limit", :short => "-l", :type => Integer,:default => 1000
27
+ opt :project, "The name of the project", :short => "-p", :type => String, :default => ''
28
+ opt :filename, "Write data to a filename", :short => "-f", :type => String, :default => ''
29
+ opt :restrict, "Can only be crawled within the domain", :short => "-r", :default => true
30
+ opt :set_project, "If we need to set project", :short => "-s", :default => true
31
+ opt :intel_command, "What's the secondary command", :short => "-i", :type => String, :default => ''
32
+
33
+ # CUSTOM. Place custom command options here
34
+
35
+
36
+ end
37
+
38
+ puts "Invalid Command" unless sub_commands.include? command
39
+
40
+
41
+ return {
42
+ :action => command,
43
+ :options => command_opts
44
+ }
45
+ end
46
+
47
+
48
+
49
+ end
@@ -0,0 +1,20 @@
1
+ class Engine
2
+
3
+ attr_accessor :commands
4
+
5
+ def initialize commands = Commandify::process
6
+ @commands = commands
7
+ end
8
+
9
+
10
+ def run
11
+ p "No Method Given" if @commands[:action].nil?
12
+ outrider = Outrider.new
13
+ outrider.set_project_object( @commands[:options][:project] ) if @commands[:options][:set_project] == true
14
+ outrider.operate( @commands[:action], @commands[:options] )
15
+ end
16
+
17
+
18
+
19
+
20
+ end
@@ -0,0 +1,14 @@
1
+ ## Intel is outrider's interface for running
2
+ ## statistical analysis libraries against it's data
3
+ ##
4
+
5
+ module OutriderIntel
6
+
7
+
8
+ def self.word_frequency words
9
+ words.inject(Hash.new(0)){|p,v| p[v]+=1; p}
10
+ end
11
+
12
+
13
+
14
+ end
@@ -0,0 +1,146 @@
1
+ class Project
2
+
3
+ attr_reader :config, :logger
4
+
5
+ @@log = Logger.new(STDOUT)
6
+
7
+ def initialize
8
+ @config = {}
9
+ end
10
+
11
+
12
+
13
+ def set_config name
14
+ project_meta = Projects.find_by( title: name )
15
+ @config = {
16
+ :id => project_meta.id,
17
+ :title => project_meta.title,
18
+ :domain => project_meta.domain
19
+ } unless project_meta.nil?
20
+ end
21
+
22
+
23
+
24
+
25
+ def self.create_folder options
26
+
27
+ class_name = options[:project].classify
28
+ project_name = options[:project].parameterize.underscore
29
+ file_path = OutriderTools::Store::get_filepath( __FILE__, "../../projects/#{options[:project]}/auxiliary.rb" )
30
+
31
+ #create project files by making a copy of test_project
32
+ require 'fileutils'
33
+
34
+ #create directories if they dont exist
35
+ dirname = File.dirname(file_path)
36
+ unless File.directory?(dirname)
37
+ FileUtils.mkdir_p(dirname)
38
+ @@log.info "Making directory: #{dirname}"
39
+ end
40
+
41
+ #generate our default project class
42
+ File.open( file_path, 'w') { |file|
43
+ file.write(%Q{class #{class_name} < Project\n\tdef initialize\n\t\tproject_name :#{project_name}\n\tend\nend})
44
+ @@log.info "Auxiliary File Created in: #{file.path}"
45
+ }
46
+ end
47
+
48
+
49
+
50
+
51
+
52
+ def self.create_db_row options
53
+ #create project in database
54
+ project = Projects.create({ :title => options[:project], :domain => options[:domain] })
55
+ # TODO it might get stuck here if this domain already is in the raw data table
56
+ entry = ProjectData.create({ :url => options[:domain], :status => 'unscraped', :project_id => project.id })
57
+ @@log.info "Project created in database: #{project.id}"
58
+ end
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+ def self.delete options
67
+ #delete folder
68
+ folder_path = OutriderTools::Store::get_filepath( __FILE__, "../../projects/#{options[:project]}" )
69
+ FileUtils.rm_rf( folder_path )
70
+ @@log.info "Deleting: #{folder_path}"
71
+
72
+ #delete from database
73
+ project = Projects.find_by( title: options[:project] )
74
+ project.destroy unless project.nil?
75
+ @@log.info "Deleting: project from database: #{options[:project]}"
76
+ end
77
+
78
+
79
+
80
+
81
+
82
+
83
+ #
84
+ # These methods are here simply to help run our unit tests
85
+ def test_super options
86
+ p "Super Test Called"
87
+ return "Super Test Called"
88
+ end
89
+
90
+ def project_name name
91
+ set_config name.to_s
92
+ end
93
+
94
+
95
+
96
+
97
+
98
+
99
+ # A command line tool that lets us build a project
100
+ # /lib/ignite.rb create_project -p project -d domain.com
101
+ #
102
+ def create_project options
103
+ Project::create_folder options
104
+ Project::create_db_row options
105
+ end
106
+
107
+
108
+
109
+
110
+ def create_project_db_row options
111
+ Project::create_db_row options
112
+ end
113
+
114
+
115
+
116
+ def delete_project options
117
+ return Project::delete options
118
+ end
119
+
120
+
121
+
122
+
123
+ end
124
+
125
+
126
+
127
+
128
+
129
+ # Access to active record classes
130
+ # This makes them usable throughout our code, as project.rb
131
+ # is autoloaded
132
+
133
+
134
+ class Projects < ActiveRecord::Base
135
+ self.table_name = 'projects'
136
+ #has_many :project_data
137
+ end
138
+
139
+
140
+
141
+ class ProjectData < ActiveRecord::Base
142
+ self.table_name = 'raw_data'
143
+ #belongs_to :projects
144
+ end
145
+
146
+