datapipe 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/datapipe +18 -3
- data/lib/DataPipe.rb +1 -0
- data/lib/DbToDir.rb +12 -7
- data/lib/DbToJson.rb +29 -0
- data/lib/DirToDb.rb +27 -0
- data/lib/Host.rb +10 -5
- data/lib/Jobs.rb +27 -1
- data/lib/JsonToPgsql.rb +25 -0
- data/lib/PathFromRemote.rb +27 -0
- data/lib/Stream.rb +9 -0
- data/lib/StreamBuilder.rb +32 -0
- data/lib/StreamParser.rb +70 -0
- data/lib/WWW.rb +68 -0
- data/lib/helper_functions.rb +3 -0
- metadata +16 -8
data/bin/datapipe
CHANGED
@@ -10,9 +10,24 @@ $:.unshift "/guyirvine.com/Stream/DataPipe/lib"
|
|
10
10
|
require 'rubygems'
|
11
11
|
require 'DataPipe'
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
@hash = Hash.new
|
14
|
+
|
15
|
+
datapipe_thread = Thread.new do
|
16
|
+
begin
|
17
|
+
h = DataPipe::Host.new
|
18
|
+
h.hash = @hash
|
19
|
+
h.run()
|
20
|
+
rescue Exception=>e
|
21
|
+
puts e.message
|
22
|
+
puts e.backtrace
|
23
|
+
end
|
15
24
|
end
|
16
25
|
|
17
|
-
|
26
|
+
|
27
|
+
s = DataPipe::WWW.new
|
28
|
+
s.hash = @hash
|
29
|
+
Rack::Handler::Thin.run s, :Port => 9292
|
30
|
+
|
31
|
+
|
32
|
+
Thread.kill( datapipe_thread )
|
18
33
|
|
data/lib/DataPipe.rb
CHANGED
data/lib/DbToDir.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
|
-
require '
|
2
|
-
require "json"
|
1
|
+
require 'StreamBuilder'
|
3
2
|
|
4
3
|
|
5
4
|
def DbToDir( db_env_name, sql, splitField, path, prefix )
|
6
|
-
Dir.mkdir( path ) unless Dir.exists?( path )
|
7
|
-
|
5
|
+
# Dir.mkdir( path ) unless Dir.exists?( path )
|
8
6
|
|
9
7
|
db = DataPipe.getFluidDb( db_env_name )
|
10
8
|
|
11
9
|
hash = Hash.new
|
12
|
-
db.queryForResultset( sql, [] )
|
10
|
+
rst = db.queryForResultset( sql, [] )
|
11
|
+
columns = rst[0].keys if rst.length > 0
|
12
|
+
|
13
|
+
|
14
|
+
rst.each do |r|
|
13
15
|
hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
|
14
16
|
|
15
17
|
hash[r[splitField]] << r
|
@@ -19,9 +21,12 @@ def DbToDir( db_env_name, sql, splitField, path, prefix )
|
|
19
21
|
Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
|
20
22
|
|
21
23
|
hash.each do |k,v|
|
22
|
-
|
24
|
+
s = StreamBuilder.new
|
25
|
+
.f( columns )
|
26
|
+
v.each { |r| s.add *r.values }
|
27
|
+
File.write( "#{basePath}#{k}.js", s.serialize )
|
23
28
|
end
|
24
|
-
|
29
|
+
|
25
30
|
return hash
|
26
31
|
end
|
27
32
|
|
data/lib/DbToJson.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'FluidDb'
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
|
5
|
+
def DbToJson( db_env_name, sql, splitField, path, prefix )
|
6
|
+
Dir.mkdir( path ) unless Dir.exists?( path )
|
7
|
+
|
8
|
+
|
9
|
+
db = DataPipe.getFluidDb( db_env_name )
|
10
|
+
|
11
|
+
hash = Hash.new
|
12
|
+
rst = db.queryForResultset( sql, [] )
|
13
|
+
columns = rst[0].keys if rst.length > 0
|
14
|
+
rst.each do |r|
|
15
|
+
hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
|
16
|
+
|
17
|
+
hash[r[splitField]] << r
|
18
|
+
end
|
19
|
+
|
20
|
+
basePath = "#{path}/#{prefix}-"
|
21
|
+
Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
|
22
|
+
|
23
|
+
hash.each do |k,v|
|
24
|
+
File.write( "#{basePath}#{k}.js", v.to_json )
|
25
|
+
end
|
26
|
+
|
27
|
+
return hash
|
28
|
+
end
|
29
|
+
|
data/lib/DirToDb.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'FluidDb'
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
|
5
|
+
def DbToDir( db_env_name, sql, splitField, path, prefix )
|
6
|
+
Dir.mkdir( path ) unless Dir.exists?( path )
|
7
|
+
|
8
|
+
|
9
|
+
db = DataPipe.getFluidDb( db_env_name )
|
10
|
+
|
11
|
+
hash = Hash.new
|
12
|
+
db.queryForResultset( sql, [] ).each do |r|
|
13
|
+
hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
|
14
|
+
|
15
|
+
hash[r[splitField]] << r
|
16
|
+
end
|
17
|
+
|
18
|
+
basePath = "#{path}/#{prefix}-"
|
19
|
+
Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
|
20
|
+
|
21
|
+
hash.each do |k,v|
|
22
|
+
File.write( "#{basePath}#{k}.js", v.to_json )
|
23
|
+
end
|
24
|
+
|
25
|
+
return hash
|
26
|
+
end
|
27
|
+
|
data/lib/Host.rb
CHANGED
@@ -1,22 +1,29 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rack'
|
3
|
+
require 'thin'
|
4
|
+
|
1
5
|
|
2
6
|
module DataPipe
|
3
|
-
|
7
|
+
|
4
8
|
class Host
|
9
|
+
|
10
|
+
attr_accessor :hash
|
5
11
|
|
6
12
|
def run
|
7
13
|
|
8
14
|
libs = ENV["LIB"] ||= "./lib"
|
9
15
|
dsl_paths = ENV["DSL"] ||= "./dsl"
|
10
|
-
|
16
|
+
puts "dsl_paths: #{dsl_paths}"
|
11
17
|
|
12
18
|
libs.split( ";" ).each do |path|
|
13
19
|
DataPipe.log "Adding libdir: #{path}"
|
14
20
|
$:.unshift path
|
15
|
-
|
21
|
+
end
|
16
22
|
loop = true
|
17
23
|
|
18
24
|
|
19
25
|
jobs = Jobs.new
|
26
|
+
@hash['jobs'] = jobs
|
20
27
|
while loop do
|
21
28
|
begin
|
22
29
|
dsl_paths.split( ";" ).each do |dsl_dir|
|
@@ -26,7 +33,6 @@ module DataPipe
|
|
26
33
|
end
|
27
34
|
end
|
28
35
|
|
29
|
-
|
30
36
|
sleep 0.5
|
31
37
|
rescue SystemExit, Interrupt
|
32
38
|
puts "Exiting on request ..."
|
@@ -34,7 +40,6 @@ module DataPipe
|
|
34
40
|
end
|
35
41
|
end
|
36
42
|
|
37
|
-
end
|
38
43
|
|
39
44
|
end
|
40
45
|
|
data/lib/Jobs.rb
CHANGED
@@ -4,14 +4,32 @@ module DataPipe
|
|
4
4
|
|
5
5
|
|
6
6
|
class Job
|
7
|
+
|
8
|
+
attr_reader :name, :next, :errorList
|
7
9
|
|
8
10
|
def initialize( path )
|
9
11
|
@path = path
|
10
12
|
@name = File.basename( path, ".dsl" )
|
11
13
|
@cronString = ""
|
14
|
+
|
15
|
+
@errorList = Array.new
|
12
16
|
self.setCron
|
13
17
|
end
|
14
18
|
|
19
|
+
def addError( e )
|
20
|
+
#Job Error -> Time, Exception Class Name, nsg, backtrace
|
21
|
+
@errorList << "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def clearError
|
25
|
+
@errorList = Array.new
|
26
|
+
end
|
27
|
+
|
28
|
+
def runNow
|
29
|
+
@next = Time.now - 1
|
30
|
+
end
|
31
|
+
|
32
|
+
|
15
33
|
def setCron
|
16
34
|
tmp = ENV["#{@name}_CRON"] ||= "0 0 * * *"
|
17
35
|
return if tmp == @cronString
|
@@ -29,12 +47,15 @@ class Job
|
|
29
47
|
DataPipe.log "path: #{@path}", true
|
30
48
|
DataPipe.log "dsl: #{@name}"
|
31
49
|
load @path
|
50
|
+
self.clearError
|
32
51
|
|
33
52
|
rescue SystemExit, Interrupt
|
34
53
|
raise
|
35
54
|
rescue Exception => e
|
36
|
-
string = "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
|
55
|
+
# string = "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
|
56
|
+
string = e.message
|
37
57
|
DataPipe.log_dsl @name, string
|
58
|
+
self.addError( e )
|
38
59
|
end
|
39
60
|
|
40
61
|
self.setCron
|
@@ -43,16 +64,21 @@ class Job
|
|
43
64
|
end
|
44
65
|
|
45
66
|
class Jobs
|
67
|
+
|
68
|
+
attr_reader :hash, :byName
|
46
69
|
|
47
70
|
def initialize
|
48
71
|
@hash = Hash.new
|
72
|
+
@byName = Hash.new
|
49
73
|
end
|
50
74
|
|
75
|
+
|
51
76
|
|
52
77
|
def call( path )
|
53
78
|
if @hash[path].nil? then
|
54
79
|
j = Job.new( path )
|
55
80
|
@hash[path] = j
|
81
|
+
@byName[j.name.downcase] = j
|
56
82
|
j.run
|
57
83
|
else
|
58
84
|
@hash[path].call
|
data/lib/JsonToPgsql.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require "FluidDb"
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
|
5
|
+
def JsonToPgsql( source_env_name, destination_env_name, tableName, columns )
|
6
|
+
d = DataPipe.getFluidDb( destination_env_name )
|
7
|
+
|
8
|
+
d.execute( "TRUNCATE TABLE #{tableName}", [])
|
9
|
+
|
10
|
+
results = s.connection.exec( sql )
|
11
|
+
|
12
|
+
d.connection.exec( "COPY #{tableName} (#{columns.join( "," )}) FROM STDIN WITH DELIMITER AS '|' CSV;" )
|
13
|
+
|
14
|
+
JSON.parse( IO.read( DataPipe.getEnvVar( source_env_name ) ) ).each do |row|
|
15
|
+
l = Array.new
|
16
|
+
columns.each do |name|
|
17
|
+
l << row[name]
|
18
|
+
end
|
19
|
+
d.connection.put_copy_data "#{l.join( '|' )}\n"
|
20
|
+
end
|
21
|
+
d.connection.put_copy_end
|
22
|
+
|
23
|
+
DataPipe.log "#{tableName}: #{count}", true
|
24
|
+
end
|
25
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'net/sftp'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
|
5
|
+
def PathFromRemote( remoteUri, localPath, prefix )
|
6
|
+
|
7
|
+
uri = URI.parse( remoteUri )
|
8
|
+
DataPipe.log "remoteUri: #{remoteUri}, localPath: #{localPath}, prefix: #{prefix}", true
|
9
|
+
|
10
|
+
Net::SFTP.start( uri.host, uri.user, :password => uri.password ) do |sftp|
|
11
|
+
Dir.glob( "#{localPath}/#{prefix}*" ).each do |path|
|
12
|
+
File.rm( path )
|
13
|
+
end
|
14
|
+
|
15
|
+
sftp.dir.foreach(uri.path) do |entry|
|
16
|
+
name = entry.name
|
17
|
+
if name[0,prefix.length] == prefix && entry.file? then
|
18
|
+
DataPipe.log "sftp.rm: #{uri.path}/#{name}"
|
19
|
+
sftp.download!( "#{uri.path}/#{name}" ) if name[0,prefix.length] == prefix && entry.file?
|
20
|
+
sftp.remove!( "#{uri.path}/#{name}" ) if name[0,prefix.length] == prefix && entry.file?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
data/lib/Stream.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
|
4
|
+
class StreamBuilder
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@h = Hash['v',1,'d',{},'f',[],'l',[]]
|
8
|
+
end
|
9
|
+
|
10
|
+
def set( name, value )
|
11
|
+
@h['d'][name] = value
|
12
|
+
return self
|
13
|
+
end
|
14
|
+
|
15
|
+
def f( fieldList )
|
16
|
+
@h['f'] = fieldList
|
17
|
+
|
18
|
+
return self
|
19
|
+
end
|
20
|
+
|
21
|
+
def add( *args )
|
22
|
+
@h['l'].concat args
|
23
|
+
|
24
|
+
return self
|
25
|
+
end
|
26
|
+
|
27
|
+
def serialize
|
28
|
+
return @h.to_json
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
data/lib/StreamParser.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module Telemetry
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'date'
|
5
|
+
|
6
|
+
class StreamParserError<StandardError
|
7
|
+
end
|
8
|
+
|
9
|
+
class VersionNotSpecifiedError<StreamParserError
|
10
|
+
end
|
11
|
+
class VersionNotSupportedError<StreamParserError
|
12
|
+
end
|
13
|
+
class MissingFormatError<StreamParserError
|
14
|
+
end
|
15
|
+
class InvalidFormatError<StreamParserError
|
16
|
+
end
|
17
|
+
class MissingListError<StreamParserError
|
18
|
+
end
|
19
|
+
class MeasurementFieldNotSuppliedError<StreamParserError
|
20
|
+
end
|
21
|
+
|
22
|
+
class StreamParser
|
23
|
+
|
24
|
+
attr_reader :version, :list, :all_fields
|
25
|
+
|
26
|
+
def method_missing( meth, *args, &block )
|
27
|
+
raise VersionNotSupportedError.new if meth[0,5] == "parse"
|
28
|
+
|
29
|
+
raise NoMethodError.new( "method: #{meth}" )
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse1
|
33
|
+
raise MissingFormatError.new if @r['f'].nil?
|
34
|
+
raise InvalidFormatError.new if @r['f'].length == 0
|
35
|
+
raise MissingListError.new if @r['l'].nil?
|
36
|
+
|
37
|
+
@defaults = @r['d'] || {}
|
38
|
+
|
39
|
+
@list = Array.new
|
40
|
+
format = @r['f']
|
41
|
+
|
42
|
+
@all_fields = @defaults.keys + @r['f']
|
43
|
+
|
44
|
+
#Break list up into chunks, each chunk being the size of the format record
|
45
|
+
@r['l'].each_slice( format.length ).with_index do |el,idx|
|
46
|
+
|
47
|
+
obj = @defaults.clone
|
48
|
+
format.each_with_index do |name,idx|
|
49
|
+
obj[name] = el[idx]
|
50
|
+
end
|
51
|
+
@list << obj
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
def initialize( payload )
|
57
|
+
@r = JSON.parse( payload )
|
58
|
+
|
59
|
+
raise VersionNotSpecifiedError.new if @r['v'].nil?
|
60
|
+
@version = @r['v']
|
61
|
+
|
62
|
+
self.send "parse#{version}"
|
63
|
+
|
64
|
+
return self
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
data/lib/WWW.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rack'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
|
6
|
+
module DataPipe
|
7
|
+
|
8
|
+
class WWW
|
9
|
+
|
10
|
+
attr_accessor :hash
|
11
|
+
|
12
|
+
def call(env)
|
13
|
+
@root = File.expand_path(File.dirname(__FILE__))
|
14
|
+
path = Rack::Utils.unescape(env['PATH_INFO'])
|
15
|
+
# path += 'index.html' if path == '/'
|
16
|
+
file = @root + "#{path}"
|
17
|
+
|
18
|
+
params = Rack::Utils.parse_nested_query(env['QUERY_STRING'])
|
19
|
+
|
20
|
+
request = Rack::Request.new(env)
|
21
|
+
response = Rack::Response.new()
|
22
|
+
parts = request.path_info.downcase.split( "/" )
|
23
|
+
section = parts[1]
|
24
|
+
case true
|
25
|
+
when request.request_method == "GET" && request.path_info.downcase == "/" then
|
26
|
+
l = ['jobs', 'errors']
|
27
|
+
[ 200, {'Content-Type' => 'application/json'}, l.to_json ]
|
28
|
+
|
29
|
+
when request.request_method == "GET" && section == "jobs" && parts.length == 2 then
|
30
|
+
l = Array.new
|
31
|
+
@hash['jobs'].hash.each do |k,job|
|
32
|
+
l << Hash['name',job.name,'next',job.next,'errors',job.errorList.length]
|
33
|
+
end
|
34
|
+
[ 200, {'Content-Type' => 'application/json'}, l.to_json ]
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
when request.request_method == "GET" && section == "jobs" && parts.length == 3 then
|
39
|
+
jobName = parts[2]
|
40
|
+
[404, {'Content-Type' => 'text/plain'}, "Not Found"] if @hash['jobs'].byName[jobName].nil?
|
41
|
+
|
42
|
+
|
43
|
+
job = @hash['jobs'].byName[jobName]
|
44
|
+
h = Hash['name',job.name,'next',job.next,'errorList',job.errorList]
|
45
|
+
[ 200, {'Content-Type' => 'application/json'}, h.to_json ]
|
46
|
+
|
47
|
+
|
48
|
+
when request.request_method == "POST" && section == "jobs" && parts.length == 4 && parts[3] == 'run' then
|
49
|
+
jobName = parts[2]
|
50
|
+
[404, {'Content-Type' => 'text/plain'}, "Not Found"] if @hash['jobs'].byName[jobName].nil?
|
51
|
+
|
52
|
+
@hash['jobs'].byName[ jobName ].runNow
|
53
|
+
[ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
|
54
|
+
|
55
|
+
|
56
|
+
when request.request_method == "POST" && parts.length == 3 && parts[2] == 'run' then
|
57
|
+
@hash['jobs'].byName[ parts[1] ].runNow
|
58
|
+
[ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
|
59
|
+
|
60
|
+
else
|
61
|
+
[ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
data/lib/helper_functions.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datapipe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-05-
|
12
|
+
date: 2014-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
16
|
-
requirement: &
|
16
|
+
requirement: &70316502296420 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70316502296420
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: fluiddb
|
27
|
-
requirement: &
|
27
|
+
requirement: &70316502295960 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70316502295960
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: parse-cron
|
38
|
-
requirement: &
|
38
|
+
requirement: &70316502295500 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70316502295500
|
47
47
|
description: Helping to move data around your system
|
48
48
|
email: guy@guyirvine.com
|
49
49
|
executables:
|
@@ -53,12 +53,20 @@ extra_rdoc_files: []
|
|
53
53
|
files:
|
54
54
|
- lib/DataPipe.rb
|
55
55
|
- lib/DbToDir.rb
|
56
|
+
- lib/DbToJson.rb
|
57
|
+
- lib/DirToDb.rb
|
56
58
|
- lib/helper_functions.rb
|
57
59
|
- lib/Host.rb
|
58
60
|
- lib/Jobs.rb
|
61
|
+
- lib/JsonToPgsql.rb
|
62
|
+
- lib/PathFromRemote.rb
|
59
63
|
- lib/PathToRemote.rb
|
60
64
|
- lib/PgsqlToPgsql.rb
|
61
65
|
- lib/SqlServerToPgsql.rb
|
66
|
+
- lib/Stream.rb
|
67
|
+
- lib/StreamBuilder.rb
|
68
|
+
- lib/StreamParser.rb
|
69
|
+
- lib/WWW.rb
|
62
70
|
- bin/datapipe
|
63
71
|
- LICENSE
|
64
72
|
- README.md
|