farmstead 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +26 -0
- data/.rspec +3 -0
- data/.rubocop.yml +148 -0
- data/.travis.yml +5 -0
- data/.yardopts +2 -0
- data/Gemfile +35 -0
- data/LICENSE.txt +21 -0
- data/README.md +110 -0
- data/Rakefile +6 -0
- data/bin/console +18 -0
- data/bin/setup +8 -0
- data/examples/myproject.yml +23 -0
- data/farmstead.gemspec +26 -0
- data/lib/farmstead.rb +46 -0
- data/lib/farmstead/cli.rb +47 -0
- data/lib/farmstead/cowardlylion.rb +41 -0
- data/lib/farmstead/glenda.rb +99 -0
- data/lib/farmstead/project.rb +79 -0
- data/lib/farmstead/scarecrow.rb +55 -0
- data/lib/farmstead/service.rb +89 -0
- data/lib/farmstead/tinman.rb +49 -0
- data/lib/farmstead/version.rb +3 -0
- data/scaffold/.dockerignore.erb +6 -0
- data/scaffold/.env.erb +9 -0
- data/scaffold/Dockerfile-dorothy.erb +24 -0
- data/scaffold/Dockerfile.erb +25 -0
- data/scaffold/Gemfile.erb +36 -0
- data/scaffold/app/controllers/application_controller.rb.erb +6 -0
- data/scaffold/docker-compose.yml.erb +72 -0
- data/scaffold/exec.sh.erb +25 -0
- data/scaffold/exec.sh.erb.old +57 -0
- data/scaffold/supervisord.conf.erb +10 -0
- metadata +121 -0
data/lib/farmstead.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2004-2018 Ken Jenney
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
require "kafka"
|
27
|
+
require "mysql2"
|
28
|
+
require "json"
|
29
|
+
require "nokogiri"
|
30
|
+
require "httparty"
|
31
|
+
require "open-uri"
|
32
|
+
require "mechanize"
|
33
|
+
|
34
|
+
require "farmstead/version"
|
35
|
+
require "farmstead/project"
|
36
|
+
require "farmstead/cli"
|
37
|
+
require "farmstead/service"
|
38
|
+
require "farmstead/tinman"
|
39
|
+
require "farmstead/cowardlylion"
|
40
|
+
require "farmstead/glenda"
|
41
|
+
require "farmstead/scarecrow"
|
42
|
+
require "farmstead/tinman"
|
43
|
+
|
44
|
+
module Farmstead
|
45
|
+
# Your code goes here...
|
46
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "thor"
|
2
|
+
# require 'farmstead/cli/net'
|
3
|
+
|
4
|
+
module Farmstead
|
5
|
+
class CLI < Thor
|
6
|
+
class_option :verbose, aliases: "-v", type: "boolean", desc: "Be verbose"
|
7
|
+
class_option :config, aliases: "-c", type: "string", desc: "Config file"
|
8
|
+
class_option :database, aliases: "-d", type: "string", desc: "Database"
|
9
|
+
class_option :deploy, aliases: "-x", type: "string", desc: "Deployment Method"
|
10
|
+
desc "new project_name", "Create a new project"
|
11
|
+
def new(project_name)
|
12
|
+
project = Farmstead::Project.new
|
13
|
+
project.name = project_name
|
14
|
+
project.database = options[:database] if options[:database]
|
15
|
+
project.config = options[:config] if options[:config]
|
16
|
+
project.deploy = options[:deploy] if options[:deploy]
|
17
|
+
project.create
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "tinman command", "Send a command to tinman"
|
21
|
+
def tinman(command)
|
22
|
+
instance = Farmstead::Tinman.new
|
23
|
+
instance.send(command)
|
24
|
+
end
|
25
|
+
|
26
|
+
desc "scarecrow command", "Send a command to scarecrow"
|
27
|
+
def scarecrow(command)
|
28
|
+
instance = Farmstead::Scarecrow.new
|
29
|
+
instance.send(command)
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "cowardlylion command", "Send a command to cowardlylion"
|
33
|
+
def cowardlylion(command)
|
34
|
+
instance = Farmstead::Cowardlylion.new
|
35
|
+
instance.send(command)
|
36
|
+
end
|
37
|
+
|
38
|
+
desc "glenda command", "Send a command to glenda"
|
39
|
+
def glenda(command)
|
40
|
+
instance = Farmstead::Glenda.new
|
41
|
+
instance.send(command)
|
42
|
+
end
|
43
|
+
|
44
|
+
# desc "net COMMANDS", "Net control Module"
|
45
|
+
# subcommand "net", Socialinvestigator::CLI::Net
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# CowardlyLion - the Miller
|
2
|
+
#
|
3
|
+
# Cowardlylion is responsible for arranging data into usable blocks
|
4
|
+
#
|
5
|
+
#
|
6
|
+
# CowardlyLion is running as a Consumer and it will automatically pick up
|
7
|
+
# messages from the Forest topic and do it's job and then send
|
8
|
+
# a message as a Producer to the Road topic
|
9
|
+
#
|
10
|
+
# Every micro-service inherits the Service class
|
11
|
+
module Farmstead
|
12
|
+
class Cowardlylion #< Service
|
13
|
+
# Does nothing...work is handled by magic_work
|
14
|
+
def producer
|
15
|
+
loop do
|
16
|
+
puts "Do nothing"
|
17
|
+
sleep 300
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Subscribed to the Field topic
|
22
|
+
# Works on message
|
23
|
+
def consumer
|
24
|
+
@consumer.subscribe('Forest')
|
25
|
+
trap('TERM') { @consumer.stop }
|
26
|
+
@consumer.each_message do |message|
|
27
|
+
puts "Received: #{message.value}"
|
28
|
+
magic_work(message.value)
|
29
|
+
@consumer.mark_message_as_processed(message)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def magic_work(site)
|
34
|
+
hash = JSON.parse(site)
|
35
|
+
hash['cowardlylion'] = 'true'
|
36
|
+
json = hash.to_json
|
37
|
+
puts "Writing: #{json}"
|
38
|
+
write_message(json, topic: 'Road')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# Glenda - the Scheduler
|
2
|
+
#
|
3
|
+
#
|
4
|
+
# It works off of the DB when
|
5
|
+
# 1) A new site is added
|
6
|
+
# 2) A scheduled site pull is configured to happen
|
7
|
+
#
|
8
|
+
# It then takes the config from the DB and passed it to the Wood topic
|
9
|
+
#
|
10
|
+
# Tinman is running as a Consumer and it will automatically pick up the message
|
11
|
+
# and do it's job and then send a message (as a Producer) to the Field topic
|
12
|
+
#
|
13
|
+
# Scarecrow is running as a Consumer and it will automatically pick up the
|
14
|
+
# message and do it's job and then send a message (as a Producer)
|
15
|
+
# to the Forest topic
|
16
|
+
#
|
17
|
+
# CowardlyLion is running as a Consumer and it will automatically pick up the
|
18
|
+
# message and do it's job and then send a message (as a Producer)
|
19
|
+
# to the Road topic
|
20
|
+
#
|
21
|
+
# Glenda is running as a Consumer and it will automatically pick up messages
|
22
|
+
# from the Road topic. This is the final product of scraping a site. It's stored
|
23
|
+
# in a Hash. Glenda imports the Hash into the MySQL database where it is
|
24
|
+
# presented by Dorothy
|
25
|
+
#
|
26
|
+
# Topics are created when Kafka comes up
|
27
|
+
# HINT: See .env
|
28
|
+
# Every micro-service inherits the Service class
|
29
|
+
module Farmstead
|
30
|
+
class Glenda < Service
|
31
|
+
# Runs on an infinite loop processing records
|
32
|
+
# on MySQL DB and writing messages accordingly
|
33
|
+
def producer
|
34
|
+
loop do
|
35
|
+
puts 'Checking sites'
|
36
|
+
check_sites
|
37
|
+
puts 'Checking tasks'
|
38
|
+
# regular_tasks
|
39
|
+
sleep 3
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Subscribed to the Road topic
|
44
|
+
# Imports Hash into MySQL Database for each message
|
45
|
+
def consumer
|
46
|
+
@consumer.subscribe('Road')
|
47
|
+
trap('TERM') { @consumer.stop }
|
48
|
+
@consumer.each_message do |message|
|
49
|
+
puts "Received: #{message.value}"
|
50
|
+
hash = JSON.parse(message.value)
|
51
|
+
import_site(hash, hash[:id])
|
52
|
+
mark_processed(hash[:id])
|
53
|
+
@consumer.mark_message_as_processed(message)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Checks for any new sites to be processed
|
58
|
+
# Adds them to the message queue
|
59
|
+
def check_sites
|
60
|
+
sites = @mysql.query("SELECT * FROM sites WHERE pickedup = 'false'")
|
61
|
+
return false if sites.count.zero?
|
62
|
+
sites.each do |site|
|
63
|
+
json = site.to_json
|
64
|
+
siteid = get_from_json(json, 'id')
|
65
|
+
# import_site(json, siteid)
|
66
|
+
write_message(json, topic: 'Wood')
|
67
|
+
mark_pickedup(siteid)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Sets the value of pickedup to true
|
72
|
+
def mark_pickedup(siteid)
|
73
|
+
@mysql.query("UPDATE sites SET pickedup = 'true' WHERE id = #{siteid}")
|
74
|
+
end
|
75
|
+
|
76
|
+
# Sets the value of processed to true
|
77
|
+
def mark_processed(siteid)
|
78
|
+
@mysql.query("UPDATE sites SET processed = 'true' WHERE id = #{siteid}")
|
79
|
+
end
|
80
|
+
|
81
|
+
# Checks for any processing tasks that need to be
|
82
|
+
# completed at speicifc times
|
83
|
+
def regular_tasks
|
84
|
+
tasks = @mysql.query("SELECT * FROM tasks WHERE processed = 'false'")
|
85
|
+
return false if tasks.count.zero?
|
86
|
+
tasks.each do |task|
|
87
|
+
json = task.to_json
|
88
|
+
taskid = get_id(task)
|
89
|
+
write_message(json, topic: 'Wood')
|
90
|
+
mark_pickedup(taskid)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Imports site data as a Hash into MySQL DB
|
95
|
+
def import_site(sitehash, siteid)
|
96
|
+
sitehash
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require "erb"
|
2
|
+
|
3
|
+
module Farmstead
|
4
|
+
# creates a Farmstead Project
|
5
|
+
class Project
|
6
|
+
attr_accessor :name
|
7
|
+
attr_accessor :config
|
8
|
+
attr_accessor :database
|
9
|
+
attr_accessor :deploy
|
10
|
+
|
11
|
+
def create
|
12
|
+
create_directory
|
13
|
+
generate_files
|
14
|
+
start_deploy
|
15
|
+
end
|
16
|
+
|
17
|
+
def start_deploy
|
18
|
+
Dir.chdir @name
|
19
|
+
deploy = system ("bash exec.sh")
|
20
|
+
p deploy.inspect
|
21
|
+
end
|
22
|
+
|
23
|
+
# Creates OR RE-Creates the Project Directory
|
24
|
+
def create_directory
|
25
|
+
remove_dir(@name) if Dir.exist?(@name)
|
26
|
+
Dir.mkdir(@name)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Generate from templates in scaffold
|
30
|
+
def generate_files
|
31
|
+
erbfiles = File.join("**", "*.erb")
|
32
|
+
scaffold = Dir.glob(erbfiles, File::FNM_DOTMATCH)
|
33
|
+
scaffold.each do |file|
|
34
|
+
file.gsub! /\bscaffold\b/, ""
|
35
|
+
foldername = File.dirname(file)
|
36
|
+
# Create folder structure of subdirectories
|
37
|
+
if foldername != "/"
|
38
|
+
create_recursive(foldername)
|
39
|
+
end
|
40
|
+
projectpath = "#{@name}#{file}".chomp(".erb")
|
41
|
+
scaffoldpath = "scaffold#{file}"
|
42
|
+
template = File.read(scaffoldpath)
|
43
|
+
results = ERB.new(template).result(binding)
|
44
|
+
copy_to_directory(results, projectpath)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Recursive Create
|
49
|
+
def create_recursive(path)
|
50
|
+
recursive = path.split("/")
|
51
|
+
directory = ""
|
52
|
+
recursive.each do |sub_directory|
|
53
|
+
directory += sub_directory + "/"
|
54
|
+
Dir.mkdir("#{@name}/#{directory}") unless (File.directory? directory)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Recursive Remove
|
59
|
+
def remove_dir(path)
|
60
|
+
if File.directory?(path)
|
61
|
+
Dir.foreach(path) do |file|
|
62
|
+
if ((file.to_s != ".") && (file.to_s != ".."))
|
63
|
+
remove_dir("#{path}/#{file}")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
Dir.delete(path)
|
67
|
+
else
|
68
|
+
File.delete(path)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Copies an ERB Template as a file to the destination directory
|
73
|
+
def copy_to_directory(str, file_name)
|
74
|
+
open(file_name, "a") do |f|
|
75
|
+
f.puts str
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Scarecrow - the Harvester
|
2
|
+
#
|
3
|
+
# Scarecrow is responsible for extracting data from a source
|
4
|
+
#
|
5
|
+
# Scarecrow is running as a Consumer and it will automatically pick up
|
6
|
+
# messages from the Field Topic and do it's job
|
7
|
+
# and then send a message as a Producer to the Forest Topic
|
8
|
+
#
|
9
|
+
# Every micro-service inherits the Service class
|
10
|
+
module Farmstead
|
11
|
+
class Scarecrow < Service
|
12
|
+
# Picks up JSON generated by WebDriver and save it to Forest topic
|
13
|
+
def producer
|
14
|
+
loop do
|
15
|
+
puts 'Do something'
|
16
|
+
sleep 300
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Subscribed to the Field topic
|
21
|
+
# Works on message
|
22
|
+
def consumer
|
23
|
+
@consumer.subscribe('Field')
|
24
|
+
trap('TERM') { @consumer.stop }
|
25
|
+
@consumer.each_message do |message|
|
26
|
+
puts "Received: #{message.value}"
|
27
|
+
magic_work(message.value)
|
28
|
+
@consumer.mark_message_as_processed(message)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def magic_work(site)
|
33
|
+
hash = JSON.parse(site)
|
34
|
+
hash['scarecrow'] = 'true'
|
35
|
+
json = hash.to_json
|
36
|
+
puts "Writing: #{json}"
|
37
|
+
write_message(json, topic: 'Forest')
|
38
|
+
end
|
39
|
+
|
40
|
+
# Call the Site Class
|
41
|
+
def call_class
|
42
|
+
puts 'this'
|
43
|
+
end
|
44
|
+
|
45
|
+
def selenium
|
46
|
+
browser = Watir::Browser.new :chrome
|
47
|
+
browser.goto 'http://www.stackoverflow.com'
|
48
|
+
puts browser.title
|
49
|
+
# browser.text_field(title: 'Search').set 'Hello World!'
|
50
|
+
# browser.button(type: 'submit').click
|
51
|
+
# puts browser.title
|
52
|
+
browser.quit
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# Base Class for all micro-services
|
2
|
+
module Farmstead
|
3
|
+
class Service
|
4
|
+
def initialize
|
5
|
+
read_environment
|
6
|
+
#mysql_init
|
7
|
+
@kafka = Kafka.new(
|
8
|
+
seed_brokers: ["#{@broker_host}:9092"],
|
9
|
+
client_id: @service_name
|
10
|
+
)
|
11
|
+
@producer = @kafka.producer
|
12
|
+
@consumer = @kafka.consumer(group_id: @service_name)
|
13
|
+
@logfile = "/tmp/farmlog"
|
14
|
+
end
|
15
|
+
|
16
|
+
def read_environment
|
17
|
+
@broker_host = ENV['KAFKA_ADVERTISED_HOST_NAME']
|
18
|
+
@mysql_host = ENV['MYSQL_HOST']
|
19
|
+
@mysql_pass = ENV['MYSQL_PASSWORD']
|
20
|
+
@mysql_user = ENV['MYSQL_USER']
|
21
|
+
@mysql_db = ENV['MYSQL_DATABASE']
|
22
|
+
@service_name = ENV['SERVICE']
|
23
|
+
#@selenium_hub = ENV['SELENIUM_HUB']
|
24
|
+
end
|
25
|
+
|
26
|
+
def mysql_init
|
27
|
+
@mysql = Mysql2::Client.new(
|
28
|
+
host: @mysql_host,
|
29
|
+
username: @mysql_user,
|
30
|
+
password: @mysql_pass,
|
31
|
+
database: @mysql_db
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Runs on an infinite loop processing records
|
36
|
+
# on MySQL DB and writing messages accordingly
|
37
|
+
def producer
|
38
|
+
loop do
|
39
|
+
puts 'Producing'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Subscribes to a Topic
|
44
|
+
# Works on the message
|
45
|
+
def consumer
|
46
|
+
loop do
|
47
|
+
puts 'Consuming'
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Each Farmstead site has a Class that defines it
|
52
|
+
# Each service runs one or more methods on that Class
|
53
|
+
def find_definitions(site, service)
|
54
|
+
mysite = @mysql.query("SELECT * FROM sites WHERE name = '#{site}'")
|
55
|
+
return false if mysite.count.zero?
|
56
|
+
mysite.each do |sited|
|
57
|
+
json = sited.to_json
|
58
|
+
config = get_from_json(json, 'config')
|
59
|
+
# Convert to hash where each element is a service and then associate it
|
60
|
+
# with the service name passed to this method
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def doit
|
65
|
+
find_definitions('Yahoo', 'scarecrow')
|
66
|
+
end
|
67
|
+
|
68
|
+
def write_message(message, topic)
|
69
|
+
@producer.produce(message, topic)
|
70
|
+
@producer.deliver_messages
|
71
|
+
end
|
72
|
+
|
73
|
+
# Appends to existing file
|
74
|
+
def write_file(filename, text)
|
75
|
+
File.open(filename, 'a') { |file| file.write("#{text}\n") }
|
76
|
+
end
|
77
|
+
|
78
|
+
# Gets the value of an element from json
|
79
|
+
def get_from_json(json, element)
|
80
|
+
hash = JSON.parse(json)
|
81
|
+
hash[element]
|
82
|
+
end
|
83
|
+
|
84
|
+
def print_time
|
85
|
+
time1 = Time.new
|
86
|
+
write_file(@logfile, "Current Time : #{time1.inspect}")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|