farmstead 0.0.15 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +12 -1
- data/farmstead.gemspec +1 -0
- data/lib/farmstead/api/defaultroutes.rb +14 -0
- data/lib/farmstead/api/helpers.rb +13 -0
- data/lib/farmstead/cli.rb +6 -0
- data/lib/farmstead/config.rb +4 -0
- data/lib/farmstead/pipeline/extract.rb +24 -20
- data/lib/farmstead/pipeline/load.rb +23 -19
- data/lib/farmstead/pipeline/manager.rb +115 -0
- data/lib/farmstead/pipeline/transform.rb +23 -19
- data/lib/farmstead/scaffold/Dockerfile.erb +1 -0
- data/lib/farmstead/scaffold/Gemfile.erb +1 -1
- data/lib/farmstead/scaffold/docker-compose.yml.erb +4 -4
- data/lib/farmstead/scaffold/extract/extracter.rb.erb +5 -3
- data/lib/farmstead/scaffold/load/loader.rb.erb +2 -1
- data/lib/farmstead/scaffold/project.rb.erb +4 -4
- data/lib/farmstead/scaffold/supervisord.conf.erb +2 -2
- data/lib/farmstead/scaffold/transform/transformer.rb.erb +2 -1
- data/lib/farmstead/version.rb +1 -1
- data/lib/farmstead.rb +6 -1
- metadata +20 -8
- data/examples/myproject/extract/extracter.rb +0 -7
- data/examples/myproject/load/loader.rb +0 -7
- data/examples/myproject/myproject.rb +0 -25
- data/examples/myproject/myproject.yml +0 -17
- data/examples/myproject/transform/transformer.rb +0 -7
- data/lib/farmstead/manager.rb +0 -99
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6853da09a2d0ccd7d0dcbae85a830e6968fd2b5f
|
4
|
+
data.tar.gz: bdd81aa444d6adc620bd4535e7d65e4ba2c4ef17
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 878aaa7062cabba0158015b61707e1cda3e686e19c677d9fa5652a796eb5ce01be0b8533d54c0808ba1e9368bb5c98c329c03ffcedc39af2a27734137990544e
|
7
|
+
data.tar.gz: 1250b8c9ad1ff619e0679e818848d3e732edd3c268369735c11d48571b72bdcf1b87196e01515f58c79af01f883cd0c2831f492a518e37abf960339e0a16ba4e
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
farmstead (0.0.
|
4
|
+
farmstead (0.0.16)
|
5
5
|
httparty (~> 0.15.6)
|
6
6
|
mechanize (~> 2.7)
|
7
7
|
mysql2 (~> 0.4.10)
|
8
8
|
nokogiri (~> 1.8)
|
9
9
|
os (~> 1.0)
|
10
10
|
ruby-kafka (~> 0.5.1)
|
11
|
+
sinatra (~> 2.0.0)
|
11
12
|
thor (~> 0.20.0)
|
12
13
|
|
13
14
|
GEM
|
@@ -41,6 +42,7 @@ GEM
|
|
41
42
|
mime-types-data (3.2016.0521)
|
42
43
|
mini_portile2 (2.3.0)
|
43
44
|
multi_xml (0.6.0)
|
45
|
+
mustermann (1.0.1)
|
44
46
|
mysql2 (0.4.10)
|
45
47
|
net-http-digest_auth (1.4.1)
|
46
48
|
net-http-persistent (2.9.4)
|
@@ -48,6 +50,9 @@ GEM
|
|
48
50
|
mini_portile2 (~> 2.3.0)
|
49
51
|
ntlm-http (0.1.1)
|
50
52
|
os (1.0.0)
|
53
|
+
rack (2.0.3)
|
54
|
+
rack-protection (2.0.0)
|
55
|
+
rack
|
51
56
|
rake (10.5.0)
|
52
57
|
rb-fsevent (0.10.2)
|
53
58
|
rb-inotify (0.9.10)
|
@@ -66,7 +71,13 @@ GEM
|
|
66
71
|
rspec-support (~> 3.7.0)
|
67
72
|
rspec-support (3.7.0)
|
68
73
|
ruby-kafka (0.5.2)
|
74
|
+
sinatra (2.0.0)
|
75
|
+
mustermann (~> 1.0)
|
76
|
+
rack (~> 2.0)
|
77
|
+
rack-protection (= 2.0.0)
|
78
|
+
tilt (~> 2.0)
|
69
79
|
thor (0.20.0)
|
80
|
+
tilt (2.0.8)
|
70
81
|
unf (0.1.4)
|
71
82
|
unf_ext
|
72
83
|
unf_ext (0.0.7.4)
|
data/farmstead.gemspec
CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.add_runtime_dependency "nokogiri", "~> 1.8"
|
31
31
|
spec.add_runtime_dependency "mechanize", "~> 2.7"
|
32
32
|
spec.add_runtime_dependency "httparty", "~> 0.15.6"
|
33
|
+
spec.add_runtime_dependency "sinatra", "~> 2.0.0"
|
33
34
|
|
34
35
|
spec.add_development_dependency "dotenv", "~> 0.11.1"
|
35
36
|
spec.add_development_dependency "listen", "~> 3.0"
|
data/lib/farmstead/cli.rb
CHANGED
@@ -28,6 +28,12 @@ module Farmstead
|
|
28
28
|
Farmstead::Project.deploy
|
29
29
|
end
|
30
30
|
|
31
|
+
desc "pipelined", "Test a pipeline API"
|
32
|
+
def pipeline
|
33
|
+
Farmstead.const_set("OPERATION", "started")
|
34
|
+
require_relative "farmstead/extract"
|
35
|
+
end
|
36
|
+
|
31
37
|
desc "test COMMANDS", "Test commands"
|
32
38
|
subcommand "test", Farmstead::CLITest
|
33
39
|
|
@@ -8,33 +8,37 @@
|
|
8
8
|
#
|
9
9
|
# Every micro-service inherits the Service class
|
10
10
|
module Farmstead
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
OPERATION = 1
|
12
|
+
module Extract
|
13
|
+
class Producer < Farmstead::Service
|
14
|
+
def doit
|
15
|
+
loop do
|
16
|
+
puts "Do something"
|
17
|
+
sleep 300
|
18
|
+
end
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
20
22
|
# Subscribed to the Field topic
|
21
23
|
# Works on message
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
24
|
+
class Consumer < Farmstead::Service
|
25
|
+
def doit
|
26
|
+
@consumer.subscribe('Field')
|
27
|
+
trap('TERM') { @consumer.stop }
|
28
|
+
@consumer.each_message do |message|
|
29
|
+
puts "Received: #{message.value}"
|
30
|
+
magic_work(message.value)
|
31
|
+
@consumer.mark_message_as_processed(message)
|
32
|
+
end
|
29
33
|
end
|
30
|
-
end
|
31
34
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
def magic_work(site)
|
36
|
+
hash = JSON.parse(site)
|
37
|
+
hash['scarecrow'] = 'true'
|
38
|
+
json = hash.to_json
|
39
|
+
puts "Writing: #{json}"
|
40
|
+
write_message(json, topic: 'Forest')
|
41
|
+
end
|
38
42
|
end
|
39
43
|
end
|
40
44
|
end
|
@@ -8,33 +8,37 @@
|
|
8
8
|
#
|
9
9
|
# Every micro-service inherits the Service class
|
10
10
|
module Farmstead
|
11
|
-
|
11
|
+
module Load
|
12
12
|
# Picks up JSON generated by WebDriver and save it to Forest topic
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
class Producer < Farmstead::Service
|
14
|
+
def doit
|
15
|
+
loop do
|
16
|
+
puts "Do something"
|
17
|
+
sleep 300
|
18
|
+
end
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
20
22
|
# Subscribed to the Field topic
|
21
23
|
# Works on message
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
24
|
+
class Consumer < Farmstead::Service
|
25
|
+
def doit
|
26
|
+
@consumer.subscribe('Field')
|
27
|
+
trap('TERM') { @consumer.stop }
|
28
|
+
@consumer.each_message do |message|
|
29
|
+
puts "Received: #{message.value}"
|
30
|
+
magic_work(message.value)
|
31
|
+
@consumer.mark_message_as_processed(message)
|
32
|
+
end
|
29
33
|
end
|
30
|
-
end
|
31
34
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
def magic_work(site)
|
36
|
+
hash = JSON.parse(site)
|
37
|
+
hash['scarecrow'] = 'true'
|
38
|
+
json = hash.to_json
|
39
|
+
puts "Writing: #{json}"
|
40
|
+
write_message(json, topic: 'Forest')
|
41
|
+
end
|
38
42
|
end
|
39
43
|
end
|
40
44
|
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# Manager
|
2
|
+
#
|
3
|
+
# It works off of the DB when
|
4
|
+
# 1) A new site is added
|
5
|
+
# 2) A scheduled site pull is configured to happen
|
6
|
+
#
|
7
|
+
# It then takes the config from the DB and passed it to the Wood topic
|
8
|
+
#
|
9
|
+
# Tinman is running as a Consumer and it will automatically pick up the message
|
10
|
+
# and do it's job and then send a message (as a Producer) to the Field topic
|
11
|
+
#
|
12
|
+
# Scarecrow is running as a Consumer and it will automatically pick up the
|
13
|
+
# message and do it's job and then send a message (as a Producer)
|
14
|
+
# to the Forest topic
|
15
|
+
#
|
16
|
+
# CowardlyLion is running as a Consumer and it will automatically pick up the
|
17
|
+
# message and do it's job and then send a message (as a Producer)
|
18
|
+
# to the Road topic
|
19
|
+
#
|
20
|
+
# Glenda is running as a Consumer and it will automatically pick up messages
|
21
|
+
# from the Road topic. This is the final product of scraping a site. It's stored
|
22
|
+
# in a Hash. Glenda imports the Hash into the MySQL database where it is
|
23
|
+
# presented by Dorothy
|
24
|
+
#
|
25
|
+
# Topics are created when Kafka comes up
|
26
|
+
# HINT: See .env
|
27
|
+
# Every micro-service inherits the Service class
|
28
|
+
module Farmstead
|
29
|
+
module Manager
|
30
|
+
class Producer < Farmstead::Service
|
31
|
+
def doit
|
32
|
+
loop do
|
33
|
+
puts 'Checking sites'
|
34
|
+
check_sites
|
35
|
+
puts 'Checking tasks'
|
36
|
+
# regular_tasks
|
37
|
+
sleep 3
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Checks for any new sites to be processed
|
42
|
+
# Adds them to the message queue
|
43
|
+
def check_sites
|
44
|
+
sites = @mysql.query("SELECT * FROM sites WHERE pickedup = 'false'")
|
45
|
+
return false if sites.count.zero?
|
46
|
+
sites.each do |site|
|
47
|
+
json = site.to_json
|
48
|
+
siteid = get_from_json(json, 'id')
|
49
|
+
# import_site(json, siteid)
|
50
|
+
write_message(json, topic: 'Wood')
|
51
|
+
mark_pickedup(siteid)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Subscribed to the Road topic
|
57
|
+
# Imports Hash into MySQL Database for each message
|
58
|
+
class Consumer < Farmstead::Service
|
59
|
+
def doit
|
60
|
+
@consumer.subscribe('Road')
|
61
|
+
trap('TERM') { @consumer.stop }
|
62
|
+
@consumer.each_message do |message|
|
63
|
+
puts "Received: #{message.value}"
|
64
|
+
hash = JSON.parse(message.value)
|
65
|
+
import_site(hash, hash[:id])
|
66
|
+
mark_processed(hash[:id])
|
67
|
+
@consumer.mark_message_as_processed(message)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Checks for any new sites to be processed
|
72
|
+
# Adds them to the message queue
|
73
|
+
def check_sites
|
74
|
+
sites = @mysql.query("SELECT * FROM sites WHERE pickedup = 'false'")
|
75
|
+
return false if sites.count.zero?
|
76
|
+
sites.each do |site|
|
77
|
+
json = site.to_json
|
78
|
+
siteid = get_from_json(json, 'id')
|
79
|
+
# import_site(json, siteid)
|
80
|
+
write_message(json, topic: 'Wood')
|
81
|
+
mark_pickedup(siteid)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Sets the value of pickedup to true
|
86
|
+
def mark_pickedup(siteid)
|
87
|
+
@mysql.query("UPDATE sites SET pickedup = 'true' WHERE id = #{siteid}")
|
88
|
+
end
|
89
|
+
|
90
|
+
# Sets the value of processed to true
|
91
|
+
def mark_processed(siteid)
|
92
|
+
@mysql.query("UPDATE sites SET processed = 'true' WHERE id = #{siteid}")
|
93
|
+
end
|
94
|
+
|
95
|
+
# Checks for any processing tasks that need to be
|
96
|
+
# completed at speicifc times
|
97
|
+
def regular_tasks
|
98
|
+
tasks = @mysql.query("SELECT * FROM tasks WHERE processed = 'false'")
|
99
|
+
return false if tasks.count.zero?
|
100
|
+
tasks.each do |task|
|
101
|
+
json = task.to_json
|
102
|
+
taskid = get_id(task)
|
103
|
+
write_message(json, topic: 'Wood')
|
104
|
+
mark_pickedup(taskid)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Imports site data as a Hash into MySQL DB
|
109
|
+
def import_site(sitehash, siteid)
|
110
|
+
sitehash
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
@@ -11,33 +11,37 @@
|
|
11
11
|
#
|
12
12
|
# Every micro-service inherits the Service class
|
13
13
|
module Farmstead
|
14
|
-
|
14
|
+
module Transform
|
15
15
|
# Does nothing...work is handled by magic_work
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
class Producer < Farmstead::Service
|
17
|
+
def doit
|
18
|
+
loop do
|
19
|
+
puts "Do nothing"
|
20
|
+
sleep 300
|
21
|
+
end
|
20
22
|
end
|
21
23
|
end
|
22
24
|
|
23
25
|
# Subscribed to the Field topic
|
24
26
|
# Works on message
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
class Consumer < Farmstead::Service
|
28
|
+
def doit
|
29
|
+
@consumer.subscribe('Forest')
|
30
|
+
trap('TERM') { @consumer.stop }
|
31
|
+
@consumer.each_message do |message|
|
32
|
+
puts "Received: #{message.value}"
|
33
|
+
magic_work(message.value)
|
34
|
+
@consumer.mark_message_as_processed(message)
|
35
|
+
end
|
32
36
|
end
|
33
|
-
end
|
34
37
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
def magic_work(site)
|
39
|
+
hash = JSON.parse(site)
|
40
|
+
hash['cowardlylion'] = 'true'
|
41
|
+
json = hash.to_json
|
42
|
+
puts "Writing: #{json}"
|
43
|
+
write_message(json, topic: 'Road')
|
44
|
+
end
|
41
45
|
end
|
42
46
|
end
|
43
47
|
end
|
@@ -29,7 +29,7 @@ services:
|
|
29
29
|
- 'mysql'
|
30
30
|
build: .
|
31
31
|
environment:
|
32
|
-
SERVICE:
|
32
|
+
SERVICE: Manager
|
33
33
|
ports:
|
34
34
|
- '3000:3000'
|
35
35
|
env_file:
|
@@ -40,7 +40,7 @@ services:
|
|
40
40
|
- 'manage'
|
41
41
|
build: .
|
42
42
|
environment:
|
43
|
-
SERVICE:
|
43
|
+
SERVICE: Extracter
|
44
44
|
env_file:
|
45
45
|
- '.env'
|
46
46
|
|
@@ -49,7 +49,7 @@ services:
|
|
49
49
|
- 'manage'
|
50
50
|
build: .
|
51
51
|
environment:
|
52
|
-
SERVICE:
|
52
|
+
SERVICE: Transformer
|
53
53
|
env_file:
|
54
54
|
- '.env'
|
55
55
|
|
@@ -58,6 +58,6 @@ services:
|
|
58
58
|
- 'manage'
|
59
59
|
build: .
|
60
60
|
environment:
|
61
|
-
SERVICE:
|
61
|
+
SERVICE: Loader
|
62
62
|
env_file:
|
63
63
|
- '.env'
|
@@ -5,8 +5,8 @@
|
|
5
5
|
# 1st - the class you want to call
|
6
6
|
# 2nd - the method you want to call
|
7
7
|
|
8
|
-
|
9
|
-
|
8
|
+
dynamic_module = ARGV[0].capitalize
|
9
|
+
dynamic_class = ARGV[1]
|
10
10
|
|
11
11
|
# Extend Farmstead
|
12
12
|
require "farmstead"
|
@@ -20,6 +20,6 @@ module <%= @name.capitalize %>
|
|
20
20
|
include Farmstead
|
21
21
|
end
|
22
22
|
|
23
|
-
klass = Object.const_get "<%= @name.capitalize %>::#{dynamic_class}"
|
23
|
+
klass = Object.const_get "<%= @name.capitalize %>::#{dynamic_module}::#{dynamic_class}"
|
24
24
|
service = klass.new
|
25
|
-
service.send(
|
25
|
+
service.send("doit")
|
@@ -2,9 +2,9 @@
|
|
2
2
|
nodaemon=true
|
3
3
|
|
4
4
|
[program:producer]
|
5
|
-
command=/usr/local/bin/ruby /service/project.rb %(ENV_SERVICE)s
|
5
|
+
command=/usr/local/bin/ruby /service/project.rb %(ENV_SERVICE)s Producer
|
6
6
|
redirect_stderr=true
|
7
7
|
|
8
8
|
[program:consumer]
|
9
|
-
command=/usr/local/bin/ruby /service/project.rb %(ENV_SERVICE)s
|
9
|
+
command=/usr/local/bin/ruby /service/project.rb %(ENV_SERVICE)s Consumer
|
10
10
|
redirect_stderr=true
|
data/lib/farmstead/version.rb
CHANGED
data/lib/farmstead.rb
CHANGED
@@ -30,16 +30,21 @@ require "nokogiri"
|
|
30
30
|
require "httparty"
|
31
31
|
require "open-uri"
|
32
32
|
require "mechanize"
|
33
|
+
require "sinatra/base"
|
33
34
|
|
34
35
|
require "farmstead/version"
|
35
36
|
require "farmstead/project"
|
36
37
|
require "farmstead/cli"
|
38
|
+
require "farmstead/config"
|
37
39
|
require "farmstead/service"
|
38
|
-
require "farmstead/manager"
|
40
|
+
require "farmstead/pipeline/manager"
|
39
41
|
require "farmstead/pipeline/extract"
|
40
42
|
require "farmstead/pipeline/transform"
|
41
43
|
require "farmstead/pipeline/load"
|
42
44
|
|
45
|
+
require "farmstead/api/defaultroutes.rb"
|
46
|
+
require "farmstead/api/helpers.rb"
|
47
|
+
|
43
48
|
module Farmstead
|
44
49
|
# Your code goes here...
|
45
50
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: farmstead
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ken Jenney
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-01-
|
11
|
+
date: 2018-01-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 0.15.6
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: sinatra
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 2.0.0
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 2.0.0
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: dotenv
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -200,18 +214,16 @@ files:
|
|
200
214
|
- README.md
|
201
215
|
- Rakefile
|
202
216
|
- bin/farmstead
|
203
|
-
- examples/myproject/extract/extracter.rb
|
204
|
-
- examples/myproject/load/loader.rb
|
205
|
-
- examples/myproject/myproject.rb
|
206
|
-
- examples/myproject/myproject.yml
|
207
|
-
- examples/myproject/transform/transformer.rb
|
208
217
|
- farmstead.gemspec
|
209
218
|
- lib/farmstead.rb
|
219
|
+
- lib/farmstead/api/defaultroutes.rb
|
220
|
+
- lib/farmstead/api/helpers.rb
|
210
221
|
- lib/farmstead/cli.rb
|
211
222
|
- lib/farmstead/cli/test.rb
|
212
|
-
- lib/farmstead/
|
223
|
+
- lib/farmstead/config.rb
|
213
224
|
- lib/farmstead/pipeline/extract.rb
|
214
225
|
- lib/farmstead/pipeline/load.rb
|
226
|
+
- lib/farmstead/pipeline/manager.rb
|
215
227
|
- lib/farmstead/pipeline/transform.rb
|
216
228
|
- lib/farmstead/project.rb
|
217
229
|
- lib/farmstead/scaffold/.dockerignore.erb
|
@@ -1,25 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Primary script to kick off services in project
|
4
|
-
# Pass two arguments to this script
|
5
|
-
# 1st - the class you want to call
|
6
|
-
# 2nd - the method you want to call
|
7
|
-
|
8
|
-
dynamic_class = ARGV[0]
|
9
|
-
dynamic_method = ARGV[1]
|
10
|
-
|
11
|
-
# Extend Farmstead
|
12
|
-
require "farmstead"
|
13
|
-
|
14
|
-
# Load project classes
|
15
|
-
require_relative "extract/extracter"
|
16
|
-
require_relative "load/loader"
|
17
|
-
require_relative "transform/transformer"
|
18
|
-
|
19
|
-
module MyProject
|
20
|
-
include Farmstead
|
21
|
-
end
|
22
|
-
|
23
|
-
klass = Object.const_get "MyProject::#{dynamic_class}"
|
24
|
-
service = klass.new
|
25
|
-
service.send(dynamic_method)
|
@@ -1,17 +0,0 @@
|
|
1
|
-
- name: My Project
|
2
|
-
database:
|
3
|
-
- type: mysql
|
4
|
-
- MYSQL_ROOT_PASSWORD: Rc2$NE99p5%^
|
5
|
-
- MYSQL_DATABASE: farmstead
|
6
|
-
- MYSQL_USER: farmstead
|
7
|
-
- MYSQL_PASSWORD: farmstead
|
8
|
-
- MYSQL_HOST: mysql
|
9
|
-
kafka:
|
10
|
-
- advertise_from_local_ip: false
|
11
|
-
- advertised_ip: 192.168.1.2
|
12
|
-
- zookeeper_address: "zookeeper:2181"
|
13
|
-
- topics
|
14
|
-
- "Manage:1:1"
|
15
|
-
- "Extract:1:1"
|
16
|
-
- "Load:1:1"
|
17
|
-
- "Transform:1:1"
|
data/lib/farmstead/manager.rb
DELETED
@@ -1,99 +0,0 @@
|
|
1
|
-
# Manager
|
2
|
-
#
|
3
|
-
# It works off of the DB when
|
4
|
-
# 1) A new site is added
|
5
|
-
# 2) A scheduled site pull is configured to happen
|
6
|
-
#
|
7
|
-
# It then takes the config from the DB and passed it to the Wood topic
|
8
|
-
#
|
9
|
-
# Tinman is running as a Consumer and it will automatically pick up the message
|
10
|
-
# and do it's job and then send a message (as a Producer) to the Field topic
|
11
|
-
#
|
12
|
-
# Scarecrow is running as a Consumer and it will automatically pick up the
|
13
|
-
# message and do it's job and then send a message (as a Producer)
|
14
|
-
# to the Forest topic
|
15
|
-
#
|
16
|
-
# CowardlyLion is running as a Consumer and it will automatically pick up the
|
17
|
-
# message and do it's job and then send a message (as a Producer)
|
18
|
-
# to the Road topic
|
19
|
-
#
|
20
|
-
# Glenda is running as a Consumer and it will automatically pick up messages
|
21
|
-
# from the Road topic. This is the final product of scraping a site. It's stored
|
22
|
-
# in a Hash. Glenda imports the Hash into the MySQL database where it is
|
23
|
-
# presented by Dorothy
|
24
|
-
#
|
25
|
-
# Topics are created when Kafka comes up
|
26
|
-
# HINT: See .env
|
27
|
-
# Every micro-service inherits the Service class
|
28
|
-
module Farmstead
|
29
|
-
class Manager < Service
|
30
|
-
# Runs on an infinite loop processing records
|
31
|
-
# on MySQL DB and writing messages accordingly
|
32
|
-
def producer
|
33
|
-
loop do
|
34
|
-
puts 'Checking sites'
|
35
|
-
check_sites
|
36
|
-
puts 'Checking tasks'
|
37
|
-
# regular_tasks
|
38
|
-
sleep 3
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
# Subscribed to the Road topic
|
43
|
-
# Imports Hash into MySQL Database for each message
|
44
|
-
def consumer
|
45
|
-
@consumer.subscribe('Road')
|
46
|
-
trap('TERM') { @consumer.stop }
|
47
|
-
@consumer.each_message do |message|
|
48
|
-
puts "Received: #{message.value}"
|
49
|
-
hash = JSON.parse(message.value)
|
50
|
-
import_site(hash, hash[:id])
|
51
|
-
mark_processed(hash[:id])
|
52
|
-
@consumer.mark_message_as_processed(message)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
# Checks for any new sites to be processed
|
57
|
-
# Adds them to the message queue
|
58
|
-
def check_sites
|
59
|
-
sites = @mysql.query("SELECT * FROM sites WHERE pickedup = 'false'")
|
60
|
-
return false if sites.count.zero?
|
61
|
-
sites.each do |site|
|
62
|
-
json = site.to_json
|
63
|
-
siteid = get_from_json(json, 'id')
|
64
|
-
# import_site(json, siteid)
|
65
|
-
write_message(json, topic: 'Wood')
|
66
|
-
mark_pickedup(siteid)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
# Sets the value of pickedup to true
|
71
|
-
def mark_pickedup(siteid)
|
72
|
-
@mysql.query("UPDATE sites SET pickedup = 'true' WHERE id = #{siteid}")
|
73
|
-
end
|
74
|
-
|
75
|
-
# Sets the value of processed to true
|
76
|
-
def mark_processed(siteid)
|
77
|
-
@mysql.query("UPDATE sites SET processed = 'true' WHERE id = #{siteid}")
|
78
|
-
end
|
79
|
-
|
80
|
-
# Checks for any processing tasks that need to be
|
81
|
-
# completed at speicifc times
|
82
|
-
def regular_tasks
|
83
|
-
tasks = @mysql.query("SELECT * FROM tasks WHERE processed = 'false'")
|
84
|
-
return false if tasks.count.zero?
|
85
|
-
tasks.each do |task|
|
86
|
-
json = task.to_json
|
87
|
-
taskid = get_id(task)
|
88
|
-
write_message(json, topic: 'Wood')
|
89
|
-
mark_pickedup(taskid)
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Imports site data as a Hash into MySQL DB
|
94
|
-
def import_site(sitehash, siteid)
|
95
|
-
sitehash
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|