farmstead 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ #--
4
+ # Copyright (c) 2004-2018 Ken Jenney
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining
7
+ # a copy of this software and associated documentation files (the
8
+ # "Software"), to deal in the Software without restriction, including
9
+ # without limitation the rights to use, copy, modify, merge, publish,
10
+ # distribute, sublicense, and/or sell copies of the Software, and to
11
+ # permit persons to whom the Software is furnished to do so, subject to
12
+ # the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be
15
+ # included in all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ #++
25
+
26
+ require "kafka"
27
+ require "mysql2"
28
+ require "json"
29
+ require "nokogiri"
30
+ require "httparty"
31
+ require "open-uri"
32
+ require "mechanize"
33
+
34
+ require "farmstead/version"
35
+ require "farmstead/project"
36
+ require "farmstead/cli"
37
+ require "farmstead/service"
38
+ require "farmstead/tinman"
39
+ require "farmstead/cowardlylion"
40
+ require "farmstead/glenda"
41
+ require "farmstead/scarecrow"
42
+ require "farmstead/tinman"
43
+
44
+ module Farmstead
45
+ # Your code goes here...
46
+ end
@@ -0,0 +1,47 @@
1
+ require "thor"
2
+ # require 'farmstead/cli/net'
3
+
4
+ module Farmstead
5
+ class CLI < Thor
6
+ class_option :verbose, aliases: "-v", type: "boolean", desc: "Be verbose"
7
+ class_option :config, aliases: "-c", type: "string", desc: "Config file"
8
+ class_option :database, aliases: "-d", type: "string", desc: "Database"
9
+ class_option :deploy, aliases: "-x", type: "string", desc: "Deployment Method"
10
+ desc "new project_name", "Create a new project"
11
+ def new(project_name)
12
+ project = Farmstead::Project.new
13
+ project.name = project_name
14
+ project.database = options[:database] if options[:database]
15
+ project.config = options[:config] if options[:config]
16
+ project.deploy = options[:deploy] if options[:deploy]
17
+ project.create
18
+ end
19
+
20
+ desc "tinman command", "Send a command to tinman"
21
+ def tinman(command)
22
+ instance = Farmstead::Tinman.new
23
+ instance.send(command)
24
+ end
25
+
26
+ desc "scarecrow command", "Send a command to scarecrow"
27
+ def scarecrow(command)
28
+ instance = Farmstead::Scarecrow.new
29
+ instance.send(command)
30
+ end
31
+
32
+ desc "cowardlylion command", "Send a command to cowardlylion"
33
+ def cowardlylion(command)
34
+ instance = Farmstead::Cowardlylion.new
35
+ instance.send(command)
36
+ end
37
+
38
+ desc "glenda command", "Send a command to glenda"
39
+ def glenda(command)
40
+ instance = Farmstead::Glenda.new
41
+ instance.send(command)
42
+ end
43
+
44
+ # desc "net COMMANDS", "Net control Module"
45
+ # subcommand "net", Socialinvestigator::CLI::Net
46
+ end
47
+ end
@@ -0,0 +1,41 @@
1
+ # CowardlyLion - the Miller
2
+ #
3
+ # Cowardlylion is responsible for arranging data into usable blocks
4
+ #
5
+ #
6
+ # CowardlyLion is running as a Consumer and it will automatically pick up
7
+ # messages from the Forest topic and do it's job and then send
8
+ # a message as a Producer to the Road topic
9
+ #
10
+ # Every micro-service inherits the Service class
11
+ module Farmstead
12
+ class Cowardlylion #< Service
13
+ # Does nothing...work is handled by magic_work
14
+ def producer
15
+ loop do
16
+ puts "Do nothing"
17
+ sleep 300
18
+ end
19
+ end
20
+
21
+ # Subscribed to the Field topic
22
+ # Works on message
23
+ def consumer
24
+ @consumer.subscribe('Forest')
25
+ trap('TERM') { @consumer.stop }
26
+ @consumer.each_message do |message|
27
+ puts "Received: #{message.value}"
28
+ magic_work(message.value)
29
+ @consumer.mark_message_as_processed(message)
30
+ end
31
+ end
32
+
33
+ def magic_work(site)
34
+ hash = JSON.parse(site)
35
+ hash['cowardlylion'] = 'true'
36
+ json = hash.to_json
37
+ puts "Writing: #{json}"
38
+ write_message(json, topic: 'Road')
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,99 @@
1
+ # Glenda - the Scheduler
2
+ #
3
+ #
4
+ # It works off of the DB when
5
+ # 1) A new site is added
6
+ # 2) A scheduled site pull is configured to happen
7
+ #
8
+ # It then takes the config from the DB and passed it to the Wood topic
9
+ #
10
+ # Tinman is running as a Consumer and it will automatically pick up the message
11
+ # and do it's job and then send a message (as a Producer) to the Field topic
12
+ #
13
+ # Scarecrow is running as a Consumer and it will automatically pick up the
14
+ # message and do it's job and then send a message (as a Producer)
15
+ # to the Forest topic
16
+ #
17
+ # CowardlyLion is running as a Consumer and it will automatically pick up the
18
+ # message and do it's job and then send a message (as a Producer)
19
+ # to the Road topic
20
+ #
21
+ # Glenda is running as a Consumer and it will automatically pick up messages
22
+ # from the Road topic. This is the final product of scraping a site. It's stored
23
+ # in a Hash. Glenda imports the Hash into the MySQL database where it is
24
+ # presented by Dorothy
25
+ #
26
+ # Topics are created when Kafka comes up
27
+ # HINT: See .env
28
+ # Every micro-service inherits the Service class
29
+ module Farmstead
30
+ class Glenda < Service
31
+ # Runs on an infinite loop processing records
32
+ # on MySQL DB and writing messages accordingly
33
+ def producer
34
+ loop do
35
+ puts 'Checking sites'
36
+ check_sites
37
+ puts 'Checking tasks'
38
+ # regular_tasks
39
+ sleep 3
40
+ end
41
+ end
42
+
43
+ # Subscribed to the Road topic
44
+ # Imports Hash into MySQL Database for each message
45
+ def consumer
46
+ @consumer.subscribe('Road')
47
+ trap('TERM') { @consumer.stop }
48
+ @consumer.each_message do |message|
49
+ puts "Received: #{message.value}"
50
+ hash = JSON.parse(message.value)
51
+ import_site(hash, hash[:id])
52
+ mark_processed(hash[:id])
53
+ @consumer.mark_message_as_processed(message)
54
+ end
55
+ end
56
+
57
+ # Checks for any new sites to be processed
58
+ # Adds them to the message queue
59
+ def check_sites
60
+ sites = @mysql.query("SELECT * FROM sites WHERE pickedup = 'false'")
61
+ return false if sites.count.zero?
62
+ sites.each do |site|
63
+ json = site.to_json
64
+ siteid = get_from_json(json, 'id')
65
+ # import_site(json, siteid)
66
+ write_message(json, topic: 'Wood')
67
+ mark_pickedup(siteid)
68
+ end
69
+ end
70
+
71
+ # Sets the value of pickedup to true
72
+ def mark_pickedup(siteid)
73
+ @mysql.query("UPDATE sites SET pickedup = 'true' WHERE id = #{siteid}")
74
+ end
75
+
76
+ # Sets the value of processed to true
77
+ def mark_processed(siteid)
78
+ @mysql.query("UPDATE sites SET processed = 'true' WHERE id = #{siteid}")
79
+ end
80
+
81
+ # Checks for any processing tasks that need to be
82
+ # completed at speicifc times
83
+ def regular_tasks
84
+ tasks = @mysql.query("SELECT * FROM tasks WHERE processed = 'false'")
85
+ return false if tasks.count.zero?
86
+ tasks.each do |task|
87
+ json = task.to_json
88
+ taskid = get_id(task)
89
+ write_message(json, topic: 'Wood')
90
+ mark_pickedup(taskid)
91
+ end
92
+ end
93
+
94
+ # Imports site data as a Hash into MySQL DB
95
+ def import_site(sitehash, siteid)
96
+ sitehash
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,79 @@
1
+ require "erb"
2
+
3
+ module Farmstead
4
+ # creates a Farmstead Project
5
+ class Project
6
+ attr_accessor :name
7
+ attr_accessor :config
8
+ attr_accessor :database
9
+ attr_accessor :deploy
10
+
11
+ def create
12
+ create_directory
13
+ generate_files
14
+ start_deploy
15
+ end
16
+
17
+ def start_deploy
18
+ Dir.chdir @name
19
+ deploy = system ("bash exec.sh")
20
+ p deploy.inspect
21
+ end
22
+
23
+ # Creates OR RE-Creates the Project Directory
24
+ def create_directory
25
+ remove_dir(@name) if Dir.exist?(@name)
26
+ Dir.mkdir(@name)
27
+ end
28
+
29
+ # Generate from templates in scaffold
30
+ def generate_files
31
+ erbfiles = File.join("**", "*.erb")
32
+ scaffold = Dir.glob(erbfiles, File::FNM_DOTMATCH)
33
+ scaffold.each do |file|
34
+ file.gsub! /\bscaffold\b/, ""
35
+ foldername = File.dirname(file)
36
+ # Create folder structure of subdirectories
37
+ if foldername != "/"
38
+ create_recursive(foldername)
39
+ end
40
+ projectpath = "#{@name}#{file}".chomp(".erb")
41
+ scaffoldpath = "scaffold#{file}"
42
+ template = File.read(scaffoldpath)
43
+ results = ERB.new(template).result(binding)
44
+ copy_to_directory(results, projectpath)
45
+ end
46
+ end
47
+
48
+ # Recursive Create
49
+ def create_recursive(path)
50
+ recursive = path.split("/")
51
+ directory = ""
52
+ recursive.each do |sub_directory|
53
+ directory += sub_directory + "/"
54
+ Dir.mkdir("#{@name}/#{directory}") unless (File.directory? directory)
55
+ end
56
+ end
57
+
58
+ # Recursive Remove
59
+ def remove_dir(path)
60
+ if File.directory?(path)
61
+ Dir.foreach(path) do |file|
62
+ if ((file.to_s != ".") && (file.to_s != ".."))
63
+ remove_dir("#{path}/#{file}")
64
+ end
65
+ end
66
+ Dir.delete(path)
67
+ else
68
+ File.delete(path)
69
+ end
70
+ end
71
+
72
+ # Copies an ERB Template as a file to the destination directory
73
+ def copy_to_directory(str, file_name)
74
+ open(file_name, "a") do |f|
75
+ f.puts str
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,55 @@
1
+ # Scarecrow - the Harvester
2
+ #
3
+ # Scarecrow is responsible for extracting data from a source
4
+ #
5
+ # Scarecrow is running as a Consumer and it will automatically pick up
6
+ # messages from the Field Topic and do it's job
7
+ # and then send a message as a Producer to the Forest Topic
8
+ #
9
+ # Every micro-service inherits the Service class
10
+ module Farmstead
11
+ class Scarecrow < Service
12
+ # Picks up JSON generated by WebDriver and save it to Forest topic
13
+ def producer
14
+ loop do
15
+ puts 'Do something'
16
+ sleep 300
17
+ end
18
+ end
19
+
20
+ # Subscribed to the Field topic
21
+ # Works on message
22
+ def consumer
23
+ @consumer.subscribe('Field')
24
+ trap('TERM') { @consumer.stop }
25
+ @consumer.each_message do |message|
26
+ puts "Received: #{message.value}"
27
+ magic_work(message.value)
28
+ @consumer.mark_message_as_processed(message)
29
+ end
30
+ end
31
+
32
+ def magic_work(site)
33
+ hash = JSON.parse(site)
34
+ hash['scarecrow'] = 'true'
35
+ json = hash.to_json
36
+ puts "Writing: #{json}"
37
+ write_message(json, topic: 'Forest')
38
+ end
39
+
40
+ # Call the Site Class
41
+ def call_class
42
+ puts 'this'
43
+ end
44
+
45
+ def selenium
46
+ browser = Watir::Browser.new :chrome
47
+ browser.goto 'http://www.stackoverflow.com'
48
+ puts browser.title
49
+ # browser.text_field(title: 'Search').set 'Hello World!'
50
+ # browser.button(type: 'submit').click
51
+ # puts browser.title
52
+ browser.quit
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,89 @@
1
+ # Base Class for all micro-services
2
+ module Farmstead
3
+ class Service
4
+ def initialize
5
+ read_environment
6
+ #mysql_init
7
+ @kafka = Kafka.new(
8
+ seed_brokers: ["#{@broker_host}:9092"],
9
+ client_id: @service_name
10
+ )
11
+ @producer = @kafka.producer
12
+ @consumer = @kafka.consumer(group_id: @service_name)
13
+ @logfile = "/tmp/farmlog"
14
+ end
15
+
16
+ def read_environment
17
+ @broker_host = ENV['KAFKA_ADVERTISED_HOST_NAME']
18
+ @mysql_host = ENV['MYSQL_HOST']
19
+ @mysql_pass = ENV['MYSQL_PASSWORD']
20
+ @mysql_user = ENV['MYSQL_USER']
21
+ @mysql_db = ENV['MYSQL_DATABASE']
22
+ @service_name = ENV['SERVICE']
23
+ #@selenium_hub = ENV['SELENIUM_HUB']
24
+ end
25
+
26
+ def mysql_init
27
+ @mysql = Mysql2::Client.new(
28
+ host: @mysql_host,
29
+ username: @mysql_user,
30
+ password: @mysql_pass,
31
+ database: @mysql_db
32
+ )
33
+ end
34
+
35
+ # Runs on an infinite loop processing records
36
+ # on MySQL DB and writing messages accordingly
37
+ def producer
38
+ loop do
39
+ puts 'Producing'
40
+ end
41
+ end
42
+
43
+ # Subscribes to a Topic
44
+ # Works on the message
45
+ def consumer
46
+ loop do
47
+ puts 'Consuming'
48
+ end
49
+ end
50
+
51
+ # Each Farmstead site has a Class that defines it
52
+ # Each service runs one or more methods on that Class
53
+ def find_definitions(site, service)
54
+ mysite = @mysql.query("SELECT * FROM sites WHERE name = '#{site}'")
55
+ return false if mysite.count.zero?
56
+ mysite.each do |sited|
57
+ json = sited.to_json
58
+ config = get_from_json(json, 'config')
59
+ # Convert to hash where each element is a service and then associate it
60
+ # with the service name passed to this method
61
+ end
62
+ end
63
+
64
+ def doit
65
+ find_definitions('Yahoo', 'scarecrow')
66
+ end
67
+
68
+ def write_message(message, topic)
69
+ @producer.produce(message, topic)
70
+ @producer.deliver_messages
71
+ end
72
+
73
+ # Appends to existing file
74
+ def write_file(filename, text)
75
+ File.open(filename, 'a') { |file| file.write("#{text}\n") }
76
+ end
77
+
78
+ # Gets the value of an element from json
79
+ def get_from_json(json, element)
80
+ hash = JSON.parse(json)
81
+ hash[element]
82
+ end
83
+
84
+ def print_time
85
+ time1 = Time.new
86
+ write_file(@logfile, "Current Time : #{time1.inspect}")
87
+ end
88
+ end
89
+ end