datapipe 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/datapipe +18 -3
- data/lib/DataPipe.rb +1 -0
- data/lib/DbToDir.rb +12 -7
- data/lib/DbToJson.rb +29 -0
- data/lib/DirToDb.rb +27 -0
- data/lib/Host.rb +10 -5
- data/lib/Jobs.rb +27 -1
- data/lib/JsonToPgsql.rb +25 -0
- data/lib/PathFromRemote.rb +27 -0
- data/lib/Stream.rb +9 -0
- data/lib/StreamBuilder.rb +32 -0
- data/lib/StreamParser.rb +70 -0
- data/lib/WWW.rb +68 -0
- data/lib/helper_functions.rb +3 -0
- metadata +16 -8
data/bin/datapipe
CHANGED
@@ -10,9 +10,24 @@ $:.unshift "/guyirvine.com/Stream/DataPipe/lib"
|
|
10
10
|
require 'rubygems'
|
11
11
|
require 'DataPipe'
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
@hash = Hash.new
|
14
|
+
|
15
|
+
datapipe_thread = Thread.new do
|
16
|
+
begin
|
17
|
+
h = DataPipe::Host.new
|
18
|
+
h.hash = @hash
|
19
|
+
h.run()
|
20
|
+
rescue Exception=>e
|
21
|
+
puts e.message
|
22
|
+
puts e.backtrace
|
23
|
+
end
|
15
24
|
end
|
16
25
|
|
17
|
-
|
26
|
+
|
27
|
+
s = DataPipe::WWW.new
|
28
|
+
s.hash = @hash
|
29
|
+
Rack::Handler::Thin.run s, :Port => 9292
|
30
|
+
|
31
|
+
|
32
|
+
Thread.kill( datapipe_thread )
|
18
33
|
|
data/lib/DataPipe.rb
CHANGED
data/lib/DbToDir.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
|
-
require '
|
2
|
-
require "json"
|
1
|
+
require 'StreamBuilder'
|
3
2
|
|
4
3
|
|
5
4
|
def DbToDir( db_env_name, sql, splitField, path, prefix )
|
6
|
-
Dir.mkdir( path ) unless Dir.exists?( path )
|
7
|
-
|
5
|
+
# Dir.mkdir( path ) unless Dir.exists?( path )
|
8
6
|
|
9
7
|
db = DataPipe.getFluidDb( db_env_name )
|
10
8
|
|
11
9
|
hash = Hash.new
|
12
|
-
db.queryForResultset( sql, [] )
|
10
|
+
rst = db.queryForResultset( sql, [] )
|
11
|
+
columns = rst[0].keys if rst.length > 0
|
12
|
+
|
13
|
+
|
14
|
+
rst.each do |r|
|
13
15
|
hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
|
14
16
|
|
15
17
|
hash[r[splitField]] << r
|
@@ -19,9 +21,12 @@ def DbToDir( db_env_name, sql, splitField, path, prefix )
|
|
19
21
|
Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
|
20
22
|
|
21
23
|
hash.each do |k,v|
|
22
|
-
|
24
|
+
s = StreamBuilder.new
|
25
|
+
.f( columns )
|
26
|
+
v.each { |r| s.add *r.values }
|
27
|
+
File.write( "#{basePath}#{k}.js", s.serialize )
|
23
28
|
end
|
24
|
-
|
29
|
+
|
25
30
|
return hash
|
26
31
|
end
|
27
32
|
|
data/lib/DbToJson.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'FluidDb'
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
|
5
|
+
def DbToJson( db_env_name, sql, splitField, path, prefix )
|
6
|
+
Dir.mkdir( path ) unless Dir.exists?( path )
|
7
|
+
|
8
|
+
|
9
|
+
db = DataPipe.getFluidDb( db_env_name )
|
10
|
+
|
11
|
+
hash = Hash.new
|
12
|
+
rst = db.queryForResultset( sql, [] )
|
13
|
+
columns = rst[0].keys if rst.length > 0
|
14
|
+
rst.each do |r|
|
15
|
+
hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
|
16
|
+
|
17
|
+
hash[r[splitField]] << r
|
18
|
+
end
|
19
|
+
|
20
|
+
basePath = "#{path}/#{prefix}-"
|
21
|
+
Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
|
22
|
+
|
23
|
+
hash.each do |k,v|
|
24
|
+
File.write( "#{basePath}#{k}.js", v.to_json )
|
25
|
+
end
|
26
|
+
|
27
|
+
return hash
|
28
|
+
end
|
29
|
+
|
data/lib/DirToDb.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'FluidDb'
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
|
5
|
+
def DbToDir( db_env_name, sql, splitField, path, prefix )
|
6
|
+
Dir.mkdir( path ) unless Dir.exists?( path )
|
7
|
+
|
8
|
+
|
9
|
+
db = DataPipe.getFluidDb( db_env_name )
|
10
|
+
|
11
|
+
hash = Hash.new
|
12
|
+
db.queryForResultset( sql, [] ).each do |r|
|
13
|
+
hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
|
14
|
+
|
15
|
+
hash[r[splitField]] << r
|
16
|
+
end
|
17
|
+
|
18
|
+
basePath = "#{path}/#{prefix}-"
|
19
|
+
Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
|
20
|
+
|
21
|
+
hash.each do |k,v|
|
22
|
+
File.write( "#{basePath}#{k}.js", v.to_json )
|
23
|
+
end
|
24
|
+
|
25
|
+
return hash
|
26
|
+
end
|
27
|
+
|
data/lib/Host.rb
CHANGED
@@ -1,22 +1,29 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rack'
|
3
|
+
require 'thin'
|
4
|
+
|
1
5
|
|
2
6
|
module DataPipe
|
3
|
-
|
7
|
+
|
4
8
|
class Host
|
9
|
+
|
10
|
+
attr_accessor :hash
|
5
11
|
|
6
12
|
def run
|
7
13
|
|
8
14
|
libs = ENV["LIB"] ||= "./lib"
|
9
15
|
dsl_paths = ENV["DSL"] ||= "./dsl"
|
10
|
-
|
16
|
+
puts "dsl_paths: #{dsl_paths}"
|
11
17
|
|
12
18
|
libs.split( ";" ).each do |path|
|
13
19
|
DataPipe.log "Adding libdir: #{path}"
|
14
20
|
$:.unshift path
|
15
|
-
|
21
|
+
end
|
16
22
|
loop = true
|
17
23
|
|
18
24
|
|
19
25
|
jobs = Jobs.new
|
26
|
+
@hash['jobs'] = jobs
|
20
27
|
while loop do
|
21
28
|
begin
|
22
29
|
dsl_paths.split( ";" ).each do |dsl_dir|
|
@@ -26,7 +33,6 @@ module DataPipe
|
|
26
33
|
end
|
27
34
|
end
|
28
35
|
|
29
|
-
|
30
36
|
sleep 0.5
|
31
37
|
rescue SystemExit, Interrupt
|
32
38
|
puts "Exiting on request ..."
|
@@ -34,7 +40,6 @@ module DataPipe
|
|
34
40
|
end
|
35
41
|
end
|
36
42
|
|
37
|
-
end
|
38
43
|
|
39
44
|
end
|
40
45
|
|
data/lib/Jobs.rb
CHANGED
@@ -4,14 +4,32 @@ module DataPipe
|
|
4
4
|
|
5
5
|
|
6
6
|
class Job
|
7
|
+
|
8
|
+
attr_reader :name, :next, :errorList
|
7
9
|
|
8
10
|
def initialize( path )
|
9
11
|
@path = path
|
10
12
|
@name = File.basename( path, ".dsl" )
|
11
13
|
@cronString = ""
|
14
|
+
|
15
|
+
@errorList = Array.new
|
12
16
|
self.setCron
|
13
17
|
end
|
14
18
|
|
19
|
+
def addError( e )
|
20
|
+
#Job Error -> Time, Exception Class Name, nsg, backtrace
|
21
|
+
@errorList << "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def clearError
|
25
|
+
@errorList = Array.new
|
26
|
+
end
|
27
|
+
|
28
|
+
def runNow
|
29
|
+
@next = Time.now - 1
|
30
|
+
end
|
31
|
+
|
32
|
+
|
15
33
|
def setCron
|
16
34
|
tmp = ENV["#{@name}_CRON"] ||= "0 0 * * *"
|
17
35
|
return if tmp == @cronString
|
@@ -29,12 +47,15 @@ class Job
|
|
29
47
|
DataPipe.log "path: #{@path}", true
|
30
48
|
DataPipe.log "dsl: #{@name}"
|
31
49
|
load @path
|
50
|
+
self.clearError
|
32
51
|
|
33
52
|
rescue SystemExit, Interrupt
|
34
53
|
raise
|
35
54
|
rescue Exception => e
|
36
|
-
string = "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
|
55
|
+
# string = "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
|
56
|
+
string = e.message
|
37
57
|
DataPipe.log_dsl @name, string
|
58
|
+
self.addError( e )
|
38
59
|
end
|
39
60
|
|
40
61
|
self.setCron
|
@@ -43,16 +64,21 @@ class Job
|
|
43
64
|
end
|
44
65
|
|
45
66
|
class Jobs
|
67
|
+
|
68
|
+
attr_reader :hash, :byName
|
46
69
|
|
47
70
|
def initialize
|
48
71
|
@hash = Hash.new
|
72
|
+
@byName = Hash.new
|
49
73
|
end
|
50
74
|
|
75
|
+
|
51
76
|
|
52
77
|
def call( path )
|
53
78
|
if @hash[path].nil? then
|
54
79
|
j = Job.new( path )
|
55
80
|
@hash[path] = j
|
81
|
+
@byName[j.name.downcase] = j
|
56
82
|
j.run
|
57
83
|
else
|
58
84
|
@hash[path].call
|
data/lib/JsonToPgsql.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require "FluidDb"
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
|
5
|
+
def JsonToPgsql( source_env_name, destination_env_name, tableName, columns )
|
6
|
+
d = DataPipe.getFluidDb( destination_env_name )
|
7
|
+
|
8
|
+
d.execute( "TRUNCATE TABLE #{tableName}", [])
|
9
|
+
|
10
|
+
results = s.connection.exec( sql )
|
11
|
+
|
12
|
+
d.connection.exec( "COPY #{tableName} (#{columns.join( "," )}) FROM STDIN WITH DELIMITER AS '|' CSV;" )
|
13
|
+
|
14
|
+
JSON.parse( IO.read( DataPipe.getEnvVar( source_env_name ) ) ).each do |row|
|
15
|
+
l = Array.new
|
16
|
+
columns.each do |name|
|
17
|
+
l << row[name]
|
18
|
+
end
|
19
|
+
d.connection.put_copy_data "#{l.join( '|' )}\n"
|
20
|
+
end
|
21
|
+
d.connection.put_copy_end
|
22
|
+
|
23
|
+
DataPipe.log "#{tableName}: #{count}", true
|
24
|
+
end
|
25
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'net/sftp'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
|
5
|
+
def PathFromRemote( remoteUri, localPath, prefix )
|
6
|
+
|
7
|
+
uri = URI.parse( remoteUri )
|
8
|
+
DataPipe.log "remoteUri: #{remoteUri}, localPath: #{localPath}, prefix: #{prefix}", true
|
9
|
+
|
10
|
+
Net::SFTP.start( uri.host, uri.user, :password => uri.password ) do |sftp|
|
11
|
+
Dir.glob( "#{localPath}/#{prefix}*" ).each do |path|
|
12
|
+
File.rm( path )
|
13
|
+
end
|
14
|
+
|
15
|
+
sftp.dir.foreach(uri.path) do |entry|
|
16
|
+
name = entry.name
|
17
|
+
if name[0,prefix.length] == prefix && entry.file? then
|
18
|
+
DataPipe.log "sftp.rm: #{uri.path}/#{name}"
|
19
|
+
sftp.download!( "#{uri.path}/#{name}" ) if name[0,prefix.length] == prefix && entry.file?
|
20
|
+
sftp.remove!( "#{uri.path}/#{name}" ) if name[0,prefix.length] == prefix && entry.file?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
data/lib/Stream.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
|
4
|
+
class StreamBuilder
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@h = Hash['v',1,'d',{},'f',[],'l',[]]
|
8
|
+
end
|
9
|
+
|
10
|
+
def set( name, value )
|
11
|
+
@h['d'][name] = value
|
12
|
+
return self
|
13
|
+
end
|
14
|
+
|
15
|
+
def f( fieldList )
|
16
|
+
@h['f'] = fieldList
|
17
|
+
|
18
|
+
return self
|
19
|
+
end
|
20
|
+
|
21
|
+
def add( *args )
|
22
|
+
@h['l'].concat args
|
23
|
+
|
24
|
+
return self
|
25
|
+
end
|
26
|
+
|
27
|
+
def serialize
|
28
|
+
return @h.to_json
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
data/lib/StreamParser.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module Telemetry
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'date'
|
5
|
+
|
6
|
+
class StreamParserError<StandardError
|
7
|
+
end
|
8
|
+
|
9
|
+
class VersionNotSpecifiedError<StreamParserError
|
10
|
+
end
|
11
|
+
class VersionNotSupportedError<StreamParserError
|
12
|
+
end
|
13
|
+
class MissingFormatError<StreamParserError
|
14
|
+
end
|
15
|
+
class InvalidFormatError<StreamParserError
|
16
|
+
end
|
17
|
+
class MissingListError<StreamParserError
|
18
|
+
end
|
19
|
+
class MeasurementFieldNotSuppliedError<StreamParserError
|
20
|
+
end
|
21
|
+
|
22
|
+
class StreamParser
|
23
|
+
|
24
|
+
attr_reader :version, :list, :all_fields
|
25
|
+
|
26
|
+
def method_missing( meth, *args, &block )
|
27
|
+
raise VersionNotSupportedError.new if meth[0,5] == "parse"
|
28
|
+
|
29
|
+
raise NoMethodError.new( "method: #{meth}" )
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse1
|
33
|
+
raise MissingFormatError.new if @r['f'].nil?
|
34
|
+
raise InvalidFormatError.new if @r['f'].length == 0
|
35
|
+
raise MissingListError.new if @r['l'].nil?
|
36
|
+
|
37
|
+
@defaults = @r['d'] || {}
|
38
|
+
|
39
|
+
@list = Array.new
|
40
|
+
format = @r['f']
|
41
|
+
|
42
|
+
@all_fields = @defaults.keys + @r['f']
|
43
|
+
|
44
|
+
#Break list up into chunks, each chunk being the size of the format record
|
45
|
+
@r['l'].each_slice( format.length ).with_index do |el,idx|
|
46
|
+
|
47
|
+
obj = @defaults.clone
|
48
|
+
format.each_with_index do |name,idx|
|
49
|
+
obj[name] = el[idx]
|
50
|
+
end
|
51
|
+
@list << obj
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
def initialize( payload )
|
57
|
+
@r = JSON.parse( payload )
|
58
|
+
|
59
|
+
raise VersionNotSpecifiedError.new if @r['v'].nil?
|
60
|
+
@version = @r['v']
|
61
|
+
|
62
|
+
self.send "parse#{version}"
|
63
|
+
|
64
|
+
return self
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
data/lib/WWW.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rack'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
|
6
|
+
module DataPipe
|
7
|
+
|
8
|
+
class WWW
|
9
|
+
|
10
|
+
attr_accessor :hash
|
11
|
+
|
12
|
+
def call(env)
|
13
|
+
@root = File.expand_path(File.dirname(__FILE__))
|
14
|
+
path = Rack::Utils.unescape(env['PATH_INFO'])
|
15
|
+
# path += 'index.html' if path == '/'
|
16
|
+
file = @root + "#{path}"
|
17
|
+
|
18
|
+
params = Rack::Utils.parse_nested_query(env['QUERY_STRING'])
|
19
|
+
|
20
|
+
request = Rack::Request.new(env)
|
21
|
+
response = Rack::Response.new()
|
22
|
+
parts = request.path_info.downcase.split( "/" )
|
23
|
+
section = parts[1]
|
24
|
+
case true
|
25
|
+
when request.request_method == "GET" && request.path_info.downcase == "/" then
|
26
|
+
l = ['jobs', 'errors']
|
27
|
+
[ 200, {'Content-Type' => 'application/json'}, l.to_json ]
|
28
|
+
|
29
|
+
when request.request_method == "GET" && section == "jobs" && parts.length == 2 then
|
30
|
+
l = Array.new
|
31
|
+
@hash['jobs'].hash.each do |k,job|
|
32
|
+
l << Hash['name',job.name,'next',job.next,'errors',job.errorList.length]
|
33
|
+
end
|
34
|
+
[ 200, {'Content-Type' => 'application/json'}, l.to_json ]
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
when request.request_method == "GET" && section == "jobs" && parts.length == 3 then
|
39
|
+
jobName = parts[2]
|
40
|
+
[404, {'Content-Type' => 'text/plain'}, "Not Found"] if @hash['jobs'].byName[jobName].nil?
|
41
|
+
|
42
|
+
|
43
|
+
job = @hash['jobs'].byName[jobName]
|
44
|
+
h = Hash['name',job.name,'next',job.next,'errorList',job.errorList]
|
45
|
+
[ 200, {'Content-Type' => 'application/json'}, h.to_json ]
|
46
|
+
|
47
|
+
|
48
|
+
when request.request_method == "POST" && section == "jobs" && parts.length == 4 && parts[3] == 'run' then
|
49
|
+
jobName = parts[2]
|
50
|
+
[404, {'Content-Type' => 'text/plain'}, "Not Found"] if @hash['jobs'].byName[jobName].nil?
|
51
|
+
|
52
|
+
@hash['jobs'].byName[ jobName ].runNow
|
53
|
+
[ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
|
54
|
+
|
55
|
+
|
56
|
+
when request.request_method == "POST" && parts.length == 3 && parts[2] == 'run' then
|
57
|
+
@hash['jobs'].byName[ parts[1] ].runNow
|
58
|
+
[ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
|
59
|
+
|
60
|
+
else
|
61
|
+
[ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
data/lib/helper_functions.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datapipe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-05-
|
12
|
+
date: 2014-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
16
|
-
requirement: &
|
16
|
+
requirement: &70316502296420 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70316502296420
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: fluiddb
|
27
|
-
requirement: &
|
27
|
+
requirement: &70316502295960 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70316502295960
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: parse-cron
|
38
|
-
requirement: &
|
38
|
+
requirement: &70316502295500 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70316502295500
|
47
47
|
description: Helping to move data around your system
|
48
48
|
email: guy@guyirvine.com
|
49
49
|
executables:
|
@@ -53,12 +53,20 @@ extra_rdoc_files: []
|
|
53
53
|
files:
|
54
54
|
- lib/DataPipe.rb
|
55
55
|
- lib/DbToDir.rb
|
56
|
+
- lib/DbToJson.rb
|
57
|
+
- lib/DirToDb.rb
|
56
58
|
- lib/helper_functions.rb
|
57
59
|
- lib/Host.rb
|
58
60
|
- lib/Jobs.rb
|
61
|
+
- lib/JsonToPgsql.rb
|
62
|
+
- lib/PathFromRemote.rb
|
59
63
|
- lib/PathToRemote.rb
|
60
64
|
- lib/PgsqlToPgsql.rb
|
61
65
|
- lib/SqlServerToPgsql.rb
|
66
|
+
- lib/Stream.rb
|
67
|
+
- lib/StreamBuilder.rb
|
68
|
+
- lib/StreamParser.rb
|
69
|
+
- lib/WWW.rb
|
62
70
|
- bin/datapipe
|
63
71
|
- LICENSE
|
64
72
|
- README.md
|