farmstead 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +26 -0
- data/.rspec +3 -0
- data/.rubocop.yml +148 -0
- data/.travis.yml +5 -0
- data/.yardopts +2 -0
- data/Gemfile +35 -0
- data/LICENSE.txt +21 -0
- data/README.md +110 -0
- data/Rakefile +6 -0
- data/bin/console +18 -0
- data/bin/setup +8 -0
- data/examples/myproject.yml +23 -0
- data/farmstead.gemspec +26 -0
- data/lib/farmstead.rb +46 -0
- data/lib/farmstead/cli.rb +47 -0
- data/lib/farmstead/cowardlylion.rb +41 -0
- data/lib/farmstead/glenda.rb +99 -0
- data/lib/farmstead/project.rb +79 -0
- data/lib/farmstead/scarecrow.rb +55 -0
- data/lib/farmstead/service.rb +89 -0
- data/lib/farmstead/tinman.rb +49 -0
- data/lib/farmstead/version.rb +3 -0
- data/scaffold/.dockerignore.erb +6 -0
- data/scaffold/.env.erb +9 -0
- data/scaffold/Dockerfile-dorothy.erb +24 -0
- data/scaffold/Dockerfile.erb +25 -0
- data/scaffold/Gemfile.erb +36 -0
- data/scaffold/app/controllers/application_controller.rb.erb +6 -0
- data/scaffold/docker-compose.yml.erb +72 -0
- data/scaffold/exec.sh.erb +25 -0
- data/scaffold/exec.sh.erb.old +57 -0
- data/scaffold/supervisord.conf.erb +10 -0
- metadata +121 -0
data/lib/farmstead.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2004-2018 Ken Jenney
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
require "kafka"
|
27
|
+
require "mysql2"
|
28
|
+
require "json"
|
29
|
+
require "nokogiri"
|
30
|
+
require "httparty"
|
31
|
+
require "open-uri"
|
32
|
+
require "mechanize"
|
33
|
+
|
34
|
+
require "farmstead/version"
|
35
|
+
require "farmstead/project"
|
36
|
+
require "farmstead/cli"
|
37
|
+
require "farmstead/service"
|
38
|
+
require "farmstead/tinman"
|
39
|
+
require "farmstead/cowardlylion"
|
40
|
+
require "farmstead/glenda"
|
41
|
+
require "farmstead/scarecrow"
|
42
|
+
require "farmstead/tinman"
|
43
|
+
|
44
|
+
module Farmstead
|
45
|
+
# Your code goes here...
|
46
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "thor"
|
2
|
+
# require 'farmstead/cli/net'
|
3
|
+
|
4
|
+
module Farmstead
|
5
|
+
class CLI < Thor
|
6
|
+
class_option :verbose, aliases: "-v", type: "boolean", desc: "Be verbose"
|
7
|
+
class_option :config, aliases: "-c", type: "string", desc: "Config file"
|
8
|
+
class_option :database, aliases: "-d", type: "string", desc: "Database"
|
9
|
+
class_option :deploy, aliases: "-x", type: "string", desc: "Deployment Method"
|
10
|
+
desc "new project_name", "Create a new project"
|
11
|
+
def new(project_name)
|
12
|
+
project = Farmstead::Project.new
|
13
|
+
project.name = project_name
|
14
|
+
project.database = options[:database] if options[:database]
|
15
|
+
project.config = options[:config] if options[:config]
|
16
|
+
project.deploy = options[:deploy] if options[:deploy]
|
17
|
+
project.create
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "tinman command", "Send a command to tinman"
|
21
|
+
def tinman(command)
|
22
|
+
instance = Farmstead::Tinman.new
|
23
|
+
instance.send(command)
|
24
|
+
end
|
25
|
+
|
26
|
+
desc "scarecrow command", "Send a command to scarecrow"
|
27
|
+
def scarecrow(command)
|
28
|
+
instance = Farmstead::Scarecrow.new
|
29
|
+
instance.send(command)
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "cowardlylion command", "Send a command to cowardlylion"
|
33
|
+
def cowardlylion(command)
|
34
|
+
instance = Farmstead::Cowardlylion.new
|
35
|
+
instance.send(command)
|
36
|
+
end
|
37
|
+
|
38
|
+
desc "glenda command", "Send a command to glenda"
|
39
|
+
def glenda(command)
|
40
|
+
instance = Farmstead::Glenda.new
|
41
|
+
instance.send(command)
|
42
|
+
end
|
43
|
+
|
44
|
+
# desc "net COMMANDS", "Net control Module"
|
45
|
+
# subcommand "net", Socialinvestigator::CLI::Net
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# CowardlyLion - the Miller
|
2
|
+
#
|
3
|
+
# Cowardlylion is responsible for arranging data into usable blocks
|
4
|
+
#
|
5
|
+
#
|
6
|
+
# CowardlyLion is running as a Consumer and it will automatically pick up
|
7
|
+
# messages from the Forest topic and do it's job and then send
|
8
|
+
# a message as a Producer to the Road topic
|
9
|
+
#
|
10
|
+
# Every micro-service inherits the Service class
|
11
|
+
module Farmstead
|
12
|
+
class Cowardlylion #< Service
|
13
|
+
# Does nothing...work is handled by magic_work
|
14
|
+
def producer
|
15
|
+
loop do
|
16
|
+
puts "Do nothing"
|
17
|
+
sleep 300
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Subscribed to the Field topic
|
22
|
+
# Works on message
|
23
|
+
def consumer
|
24
|
+
@consumer.subscribe('Forest')
|
25
|
+
trap('TERM') { @consumer.stop }
|
26
|
+
@consumer.each_message do |message|
|
27
|
+
puts "Received: #{message.value}"
|
28
|
+
magic_work(message.value)
|
29
|
+
@consumer.mark_message_as_processed(message)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def magic_work(site)
|
34
|
+
hash = JSON.parse(site)
|
35
|
+
hash['cowardlylion'] = 'true'
|
36
|
+
json = hash.to_json
|
37
|
+
puts "Writing: #{json}"
|
38
|
+
write_message(json, topic: 'Road')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# Glenda - the Scheduler
|
2
|
+
#
|
3
|
+
#
|
4
|
+
# It works off of the DB when
|
5
|
+
# 1) A new site is added
|
6
|
+
# 2) A scheduled site pull is configured to happen
|
7
|
+
#
|
8
|
+
# It then takes the config from the DB and passed it to the Wood topic
|
9
|
+
#
|
10
|
+
# Tinman is running as a Consumer and it will automatically pick up the message
|
11
|
+
# and do it's job and then send a message (as a Producer) to the Field topic
|
12
|
+
#
|
13
|
+
# Scarecrow is running as a Consumer and it will automatically pick up the
|
14
|
+
# message and do it's job and then send a message (as a Producer)
|
15
|
+
# to the Forest topic
|
16
|
+
#
|
17
|
+
# CowardlyLion is running as a Consumer and it will automatically pick up the
|
18
|
+
# message and do it's job and then send a message (as a Producer)
|
19
|
+
# to the Road topic
|
20
|
+
#
|
21
|
+
# Glenda is running as a Consumer and it will automatically pick up messages
|
22
|
+
# from the Road topic. This is the final product of scraping a site. It's stored
|
23
|
+
# in a Hash. Glenda imports the Hash into the MySQL database where it is
|
24
|
+
# presented by Dorothy
|
25
|
+
#
|
26
|
+
# Topics are created when Kafka comes up
|
27
|
+
# HINT: See .env
|
28
|
+
# Every micro-service inherits the Service class
|
29
|
+
module Farmstead
|
30
|
+
class Glenda < Service
|
31
|
+
# Runs on an infinite loop processing records
|
32
|
+
# on MySQL DB and writing messages accordingly
|
33
|
+
def producer
|
34
|
+
loop do
|
35
|
+
puts 'Checking sites'
|
36
|
+
check_sites
|
37
|
+
puts 'Checking tasks'
|
38
|
+
# regular_tasks
|
39
|
+
sleep 3
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Subscribed to the Road topic
|
44
|
+
# Imports Hash into MySQL Database for each message
|
45
|
+
def consumer
|
46
|
+
@consumer.subscribe('Road')
|
47
|
+
trap('TERM') { @consumer.stop }
|
48
|
+
@consumer.each_message do |message|
|
49
|
+
puts "Received: #{message.value}"
|
50
|
+
hash = JSON.parse(message.value)
|
51
|
+
import_site(hash, hash[:id])
|
52
|
+
mark_processed(hash[:id])
|
53
|
+
@consumer.mark_message_as_processed(message)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Checks for any new sites to be processed
|
58
|
+
# Adds them to the message queue
|
59
|
+
def check_sites
|
60
|
+
sites = @mysql.query("SELECT * FROM sites WHERE pickedup = 'false'")
|
61
|
+
return false if sites.count.zero?
|
62
|
+
sites.each do |site|
|
63
|
+
json = site.to_json
|
64
|
+
siteid = get_from_json(json, 'id')
|
65
|
+
# import_site(json, siteid)
|
66
|
+
write_message(json, topic: 'Wood')
|
67
|
+
mark_pickedup(siteid)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Sets the value of pickedup to true
|
72
|
+
def mark_pickedup(siteid)
|
73
|
+
@mysql.query("UPDATE sites SET pickedup = 'true' WHERE id = #{siteid}")
|
74
|
+
end
|
75
|
+
|
76
|
+
# Sets the value of processed to true
|
77
|
+
def mark_processed(siteid)
|
78
|
+
@mysql.query("UPDATE sites SET processed = 'true' WHERE id = #{siteid}")
|
79
|
+
end
|
80
|
+
|
81
|
+
# Checks for any processing tasks that need to be
|
82
|
+
# completed at speicifc times
|
83
|
+
def regular_tasks
|
84
|
+
tasks = @mysql.query("SELECT * FROM tasks WHERE processed = 'false'")
|
85
|
+
return false if tasks.count.zero?
|
86
|
+
tasks.each do |task|
|
87
|
+
json = task.to_json
|
88
|
+
taskid = get_id(task)
|
89
|
+
write_message(json, topic: 'Wood')
|
90
|
+
mark_pickedup(taskid)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Imports site data as a Hash into MySQL DB
|
95
|
+
def import_site(sitehash, siteid)
|
96
|
+
sitehash
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require "erb"
|
2
|
+
|
3
|
+
module Farmstead
|
4
|
+
# creates a Farmstead Project
|
5
|
+
class Project
|
6
|
+
attr_accessor :name
|
7
|
+
attr_accessor :config
|
8
|
+
attr_accessor :database
|
9
|
+
attr_accessor :deploy
|
10
|
+
|
11
|
+
def create
|
12
|
+
create_directory
|
13
|
+
generate_files
|
14
|
+
start_deploy
|
15
|
+
end
|
16
|
+
|
17
|
+
def start_deploy
|
18
|
+
Dir.chdir @name
|
19
|
+
deploy = system ("bash exec.sh")
|
20
|
+
p deploy.inspect
|
21
|
+
end
|
22
|
+
|
23
|
+
# Creates OR RE-Creates the Project Directory
|
24
|
+
def create_directory
|
25
|
+
remove_dir(@name) if Dir.exist?(@name)
|
26
|
+
Dir.mkdir(@name)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Generate from templates in scaffold
|
30
|
+
def generate_files
|
31
|
+
erbfiles = File.join("**", "*.erb")
|
32
|
+
scaffold = Dir.glob(erbfiles, File::FNM_DOTMATCH)
|
33
|
+
scaffold.each do |file|
|
34
|
+
file.gsub! /\bscaffold\b/, ""
|
35
|
+
foldername = File.dirname(file)
|
36
|
+
# Create folder structure of subdirectories
|
37
|
+
if foldername != "/"
|
38
|
+
create_recursive(foldername)
|
39
|
+
end
|
40
|
+
projectpath = "#{@name}#{file}".chomp(".erb")
|
41
|
+
scaffoldpath = "scaffold#{file}"
|
42
|
+
template = File.read(scaffoldpath)
|
43
|
+
results = ERB.new(template).result(binding)
|
44
|
+
copy_to_directory(results, projectpath)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Recursive Create
|
49
|
+
def create_recursive(path)
|
50
|
+
recursive = path.split("/")
|
51
|
+
directory = ""
|
52
|
+
recursive.each do |sub_directory|
|
53
|
+
directory += sub_directory + "/"
|
54
|
+
Dir.mkdir("#{@name}/#{directory}") unless (File.directory? directory)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Recursive Remove
|
59
|
+
def remove_dir(path)
|
60
|
+
if File.directory?(path)
|
61
|
+
Dir.foreach(path) do |file|
|
62
|
+
if ((file.to_s != ".") && (file.to_s != ".."))
|
63
|
+
remove_dir("#{path}/#{file}")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
Dir.delete(path)
|
67
|
+
else
|
68
|
+
File.delete(path)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Copies an ERB Template as a file to the destination directory
|
73
|
+
def copy_to_directory(str, file_name)
|
74
|
+
open(file_name, "a") do |f|
|
75
|
+
f.puts str
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Scarecrow - the Harvester
|
2
|
+
#
|
3
|
+
# Scarecrow is responsible for extracting data from a source
|
4
|
+
#
|
5
|
+
# Scarecrow is running as a Consumer and it will automatically pick up
|
6
|
+
# messages from the Field Topic and do it's job
|
7
|
+
# and then send a message as a Producer to the Forest Topic
|
8
|
+
#
|
9
|
+
# Every micro-service inherits the Service class
|
10
|
+
module Farmstead
|
11
|
+
class Scarecrow < Service
|
12
|
+
# Picks up JSON generated by WebDriver and save it to Forest topic
|
13
|
+
def producer
|
14
|
+
loop do
|
15
|
+
puts 'Do something'
|
16
|
+
sleep 300
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Subscribed to the Field topic
|
21
|
+
# Works on message
|
22
|
+
def consumer
|
23
|
+
@consumer.subscribe('Field')
|
24
|
+
trap('TERM') { @consumer.stop }
|
25
|
+
@consumer.each_message do |message|
|
26
|
+
puts "Received: #{message.value}"
|
27
|
+
magic_work(message.value)
|
28
|
+
@consumer.mark_message_as_processed(message)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def magic_work(site)
|
33
|
+
hash = JSON.parse(site)
|
34
|
+
hash['scarecrow'] = 'true'
|
35
|
+
json = hash.to_json
|
36
|
+
puts "Writing: #{json}"
|
37
|
+
write_message(json, topic: 'Forest')
|
38
|
+
end
|
39
|
+
|
40
|
+
# Call the Site Class
|
41
|
+
def call_class
|
42
|
+
puts 'this'
|
43
|
+
end
|
44
|
+
|
45
|
+
def selenium
|
46
|
+
browser = Watir::Browser.new :chrome
|
47
|
+
browser.goto 'http://www.stackoverflow.com'
|
48
|
+
puts browser.title
|
49
|
+
# browser.text_field(title: 'Search').set 'Hello World!'
|
50
|
+
# browser.button(type: 'submit').click
|
51
|
+
# puts browser.title
|
52
|
+
browser.quit
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# Base Class for all micro-services
|
2
|
+
module Farmstead
|
3
|
+
class Service
|
4
|
+
def initialize
|
5
|
+
read_environment
|
6
|
+
#mysql_init
|
7
|
+
@kafka = Kafka.new(
|
8
|
+
seed_brokers: ["#{@broker_host}:9092"],
|
9
|
+
client_id: @service_name
|
10
|
+
)
|
11
|
+
@producer = @kafka.producer
|
12
|
+
@consumer = @kafka.consumer(group_id: @service_name)
|
13
|
+
@logfile = "/tmp/farmlog"
|
14
|
+
end
|
15
|
+
|
16
|
+
def read_environment
|
17
|
+
@broker_host = ENV['KAFKA_ADVERTISED_HOST_NAME']
|
18
|
+
@mysql_host = ENV['MYSQL_HOST']
|
19
|
+
@mysql_pass = ENV['MYSQL_PASSWORD']
|
20
|
+
@mysql_user = ENV['MYSQL_USER']
|
21
|
+
@mysql_db = ENV['MYSQL_DATABASE']
|
22
|
+
@service_name = ENV['SERVICE']
|
23
|
+
#@selenium_hub = ENV['SELENIUM_HUB']
|
24
|
+
end
|
25
|
+
|
26
|
+
def mysql_init
|
27
|
+
@mysql = Mysql2::Client.new(
|
28
|
+
host: @mysql_host,
|
29
|
+
username: @mysql_user,
|
30
|
+
password: @mysql_pass,
|
31
|
+
database: @mysql_db
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Runs on an infinite loop processing records
|
36
|
+
# on MySQL DB and writing messages accordingly
|
37
|
+
def producer
|
38
|
+
loop do
|
39
|
+
puts 'Producing'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Subscribes to a Topic
|
44
|
+
# Works on the message
|
45
|
+
def consumer
|
46
|
+
loop do
|
47
|
+
puts 'Consuming'
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Each Farmstead site has a Class that defines it
|
52
|
+
# Each service runs one or more methods on that Class
|
53
|
+
def find_definitions(site, service)
|
54
|
+
mysite = @mysql.query("SELECT * FROM sites WHERE name = '#{site}'")
|
55
|
+
return false if mysite.count.zero?
|
56
|
+
mysite.each do |sited|
|
57
|
+
json = sited.to_json
|
58
|
+
config = get_from_json(json, 'config')
|
59
|
+
# Convert to hash where each element is a service and then associate it
|
60
|
+
# with the service name passed to this method
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def doit
|
65
|
+
find_definitions('Yahoo', 'scarecrow')
|
66
|
+
end
|
67
|
+
|
68
|
+
def write_message(message, topic)
|
69
|
+
@producer.produce(message, topic)
|
70
|
+
@producer.deliver_messages
|
71
|
+
end
|
72
|
+
|
73
|
+
# Appends to existing file
|
74
|
+
def write_file(filename, text)
|
75
|
+
File.open(filename, 'a') { |file| file.write("#{text}\n") }
|
76
|
+
end
|
77
|
+
|
78
|
+
# Gets the value of an element from json
|
79
|
+
def get_from_json(json, element)
|
80
|
+
hash = JSON.parse(json)
|
81
|
+
hash[element]
|
82
|
+
end
|
83
|
+
|
84
|
+
def print_time
|
85
|
+
time1 = Time.new
|
86
|
+
write_file(@logfile, "Current Time : #{time1.inspect}")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|