datapipe 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,9 +10,24 @@ $:.unshift "/guyirvine.com/Stream/DataPipe/lib"
10
10
  require 'rubygems'
11
11
  require 'DataPipe'
12
12
 
13
- def run_datapipe()
14
- DataPipe::Host.new().run()
13
+ @hash = Hash.new
14
+
15
+ datapipe_thread = Thread.new do
16
+ begin
17
+ h = DataPipe::Host.new
18
+ h.hash = @hash
19
+ h.run()
20
+ rescue Exception=>e
21
+ puts e.message
22
+ puts e.backtrace
23
+ end
15
24
  end
16
25
 
17
- run_datapipe
26
+
27
+ s = DataPipe::WWW.new
28
+ s.hash = @hash
29
+ Rack::Handler::Thin.run s, :Port => 9292
30
+
31
+
32
+ Thread.kill( datapipe_thread )
18
33
 
@@ -6,6 +6,7 @@ module DataPipe
6
6
  require 'helper_functions'
7
7
  require 'Jobs'
8
8
  require 'Host'
9
+ require 'WWW'
9
10
 
10
11
 
11
12
  class DataPipelineError<StandardError
@@ -1,15 +1,17 @@
1
- require 'FluidDb'
2
- require "json"
1
+ require 'StreamBuilder'
3
2
 
4
3
 
5
4
  def DbToDir( db_env_name, sql, splitField, path, prefix )
6
- Dir.mkdir( path ) unless Dir.exists?( path )
7
-
5
+ # Dir.mkdir( path ) unless Dir.exists?( path )
8
6
 
9
7
  db = DataPipe.getFluidDb( db_env_name )
10
8
 
11
9
  hash = Hash.new
12
- db.queryForResultset( sql, [] ).each do |r|
10
+ rst = db.queryForResultset( sql, [] )
11
+ columns = rst[0].keys if rst.length > 0
12
+
13
+
14
+ rst.each do |r|
13
15
  hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
14
16
 
15
17
  hash[r[splitField]] << r
@@ -19,9 +21,12 @@ def DbToDir( db_env_name, sql, splitField, path, prefix )
19
21
  Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
20
22
 
21
23
  hash.each do |k,v|
22
- File.write( "#{basePath}#{k}.js", v.to_json )
24
+ s = StreamBuilder.new
25
+ .f( columns )
26
+ v.each { |r| s.add *r.values }
27
+ File.write( "#{basePath}#{k}.js", s.serialize )
23
28
  end
24
-
29
+
25
30
  return hash
26
31
  end
27
32
 
@@ -0,0 +1,29 @@
1
+ require 'FluidDb'
2
+ require "json"
3
+
4
+
5
+ def DbToJson( db_env_name, sql, splitField, path, prefix )
6
+ Dir.mkdir( path ) unless Dir.exists?( path )
7
+
8
+
9
+ db = DataPipe.getFluidDb( db_env_name )
10
+
11
+ hash = Hash.new
12
+ rst = db.queryForResultset( sql, [] )
13
+ columns = rst[0].keys if rst.length > 0
14
+ rst.each do |r|
15
+ hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
16
+
17
+ hash[r[splitField]] << r
18
+ end
19
+
20
+ basePath = "#{path}/#{prefix}-"
21
+ Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
22
+
23
+ hash.each do |k,v|
24
+ File.write( "#{basePath}#{k}.js", v.to_json )
25
+ end
26
+
27
+ return hash
28
+ end
29
+
@@ -0,0 +1,27 @@
1
+ require 'FluidDb'
2
+ require "json"
3
+
4
+
5
+ def DbToDir( db_env_name, sql, splitField, path, prefix )
6
+ Dir.mkdir( path ) unless Dir.exists?( path )
7
+
8
+
9
+ db = DataPipe.getFluidDb( db_env_name )
10
+
11
+ hash = Hash.new
12
+ db.queryForResultset( sql, [] ).each do |r|
13
+ hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
14
+
15
+ hash[r[splitField]] << r
16
+ end
17
+
18
+ basePath = "#{path}/#{prefix}-"
19
+ Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
20
+
21
+ hash.each do |k,v|
22
+ File.write( "#{basePath}#{k}.js", v.to_json )
23
+ end
24
+
25
+ return hash
26
+ end
27
+
@@ -1,22 +1,29 @@
1
+ require 'rubygems'
2
+ require 'rack'
3
+ require 'thin'
4
+
1
5
 
2
6
  module DataPipe
3
-
7
+
4
8
  class Host
9
+
10
+ attr_accessor :hash
5
11
 
6
12
  def run
7
13
 
8
14
  libs = ENV["LIB"] ||= "./lib"
9
15
  dsl_paths = ENV["DSL"] ||= "./dsl"
10
-
16
+ puts "dsl_paths: #{dsl_paths}"
11
17
 
12
18
  libs.split( ";" ).each do |path|
13
19
  DataPipe.log "Adding libdir: #{path}"
14
20
  $:.unshift path
15
-
21
+ end
16
22
  loop = true
17
23
 
18
24
 
19
25
  jobs = Jobs.new
26
+ @hash['jobs'] = jobs
20
27
  while loop do
21
28
  begin
22
29
  dsl_paths.split( ";" ).each do |dsl_dir|
@@ -26,7 +33,6 @@ module DataPipe
26
33
  end
27
34
  end
28
35
 
29
-
30
36
  sleep 0.5
31
37
  rescue SystemExit, Interrupt
32
38
  puts "Exiting on request ..."
@@ -34,7 +40,6 @@ module DataPipe
34
40
  end
35
41
  end
36
42
 
37
- end
38
43
 
39
44
  end
40
45
 
@@ -4,14 +4,32 @@ module DataPipe
4
4
 
5
5
 
6
6
  class Job
7
+
8
+ attr_reader :name, :next, :errorList
7
9
 
8
10
  def initialize( path )
9
11
  @path = path
10
12
  @name = File.basename( path, ".dsl" )
11
13
  @cronString = ""
14
+
15
+ @errorList = Array.new
12
16
  self.setCron
13
17
  end
14
18
 
19
+ def addError( e )
20
+ #Job Error -> Time, Exception Class Name, nsg, backtrace
21
+ @errorList << "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
22
+ end
23
+
24
+ def clearError
25
+ @errorList = Array.new
26
+ end
27
+
28
+ def runNow
29
+ @next = Time.now - 1
30
+ end
31
+
32
+
15
33
  def setCron
16
34
  tmp = ENV["#{@name}_CRON"] ||= "0 0 * * *"
17
35
  return if tmp == @cronString
@@ -29,12 +47,15 @@ class Job
29
47
  DataPipe.log "path: #{@path}", true
30
48
  DataPipe.log "dsl: #{@name}"
31
49
  load @path
50
+ self.clearError
32
51
 
33
52
  rescue SystemExit, Interrupt
34
53
  raise
35
54
  rescue Exception => e
36
- string = "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
55
+ # string = "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
56
+ string = e.message
37
57
  DataPipe.log_dsl @name, string
58
+ self.addError( e )
38
59
  end
39
60
 
40
61
  self.setCron
@@ -43,16 +64,21 @@ class Job
43
64
  end
44
65
 
45
66
  class Jobs
67
+
68
+ attr_reader :hash, :byName
46
69
 
47
70
  def initialize
48
71
  @hash = Hash.new
72
+ @byName = Hash.new
49
73
  end
50
74
 
75
+
51
76
 
52
77
  def call( path )
53
78
  if @hash[path].nil? then
54
79
  j = Job.new( path )
55
80
  @hash[path] = j
81
+ @byName[j.name.downcase] = j
56
82
  j.run
57
83
  else
58
84
  @hash[path].call
@@ -0,0 +1,25 @@
1
+ require "FluidDb"
2
+ require "json"
3
+
4
+
5
+ def JsonToPgsql( source_env_name, destination_env_name, tableName, columns )
6
+ d = DataPipe.getFluidDb( destination_env_name )
7
+
8
+ d.execute( "TRUNCATE TABLE #{tableName}", [])
9
+
10
+ results = s.connection.exec( sql )
11
+
12
+ d.connection.exec( "COPY #{tableName} (#{columns.join( "," )}) FROM STDIN WITH DELIMITER AS '|' CSV;" )
13
+
14
+ JSON.parse( IO.read( DataPipe.getEnvVar( source_env_name ) ) ).each do |row|
15
+ l = Array.new
16
+ columns.each do |name|
17
+ l << row[name]
18
+ end
19
+ d.connection.put_copy_data "#{l.join( '|' )}\n"
20
+ end
21
+ d.connection.put_copy_end
22
+
23
+ DataPipe.log "#{tableName}: #{count}", true
24
+ end
25
+
@@ -0,0 +1,27 @@
1
+ require 'net/sftp'
2
+ require 'uri'
3
+
4
+
5
+ def PathFromRemote( remoteUri, localPath, prefix )
6
+
7
+ uri = URI.parse( remoteUri )
8
+ DataPipe.log "remoteUri: #{remoteUri}, localPath: #{localPath}, prefix: #{prefix}", true
9
+
10
+ Net::SFTP.start( uri.host, uri.user, :password => uri.password ) do |sftp|
11
+ Dir.glob( "#{localPath}/#{prefix}*" ).each do |path|
12
+ File.rm( path )
13
+ end
14
+
15
+ sftp.dir.foreach(uri.path) do |entry|
16
+ name = entry.name
17
+ if name[0,prefix.length] == prefix && entry.file? then
18
+ DataPipe.log "sftp.rm: #{uri.path}/#{name}"
19
+ sftp.download!( "#{uri.path}/#{name}" ) if name[0,prefix.length] == prefix && entry.file?
20
+ sftp.remove!( "#{uri.path}/#{name}" ) if name[0,prefix.length] == prefix && entry.file?
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
@@ -0,0 +1,9 @@
1
+ module Telemetry
2
+
3
+
4
+ class StreamError<StandardError
5
+ end
6
+
7
+
8
+ end
9
+
@@ -0,0 +1,32 @@
1
+ require 'json'
2
+
3
+
4
+ class StreamBuilder
5
+
6
+ def initialize
7
+ @h = Hash['v',1,'d',{},'f',[],'l',[]]
8
+ end
9
+
10
+ def set( name, value )
11
+ @h['d'][name] = value
12
+ return self
13
+ end
14
+
15
+ def f( fieldList )
16
+ @h['f'] = fieldList
17
+
18
+ return self
19
+ end
20
+
21
+ def add( *args )
22
+ @h['l'].concat args
23
+
24
+ return self
25
+ end
26
+
27
+ def serialize
28
+ return @h.to_json
29
+ end
30
+
31
+ end
32
+
@@ -0,0 +1,70 @@
1
+ module Telemetry
2
+
3
+ require 'json'
4
+ require 'date'
5
+
6
+ class StreamParserError<StandardError
7
+ end
8
+
9
+ class VersionNotSpecifiedError<StreamParserError
10
+ end
11
+ class VersionNotSupportedError<StreamParserError
12
+ end
13
+ class MissingFormatError<StreamParserError
14
+ end
15
+ class InvalidFormatError<StreamParserError
16
+ end
17
+ class MissingListError<StreamParserError
18
+ end
19
+ class MeasurementFieldNotSuppliedError<StreamParserError
20
+ end
21
+
22
+ class StreamParser
23
+
24
+ attr_reader :version, :list, :all_fields
25
+
26
+ def method_missing( meth, *args, &block )
27
+ raise VersionNotSupportedError.new if meth[0,5] == "parse"
28
+
29
+ raise NoMethodError.new( "method: #{meth}" )
30
+ end
31
+
32
+ def parse1
33
+ raise MissingFormatError.new if @r['f'].nil?
34
+ raise InvalidFormatError.new if @r['f'].length == 0
35
+ raise MissingListError.new if @r['l'].nil?
36
+
37
+ @defaults = @r['d'] || {}
38
+
39
+ @list = Array.new
40
+ format = @r['f']
41
+
42
+ @all_fields = @defaults.keys + @r['f']
43
+
44
+ #Break list up into chunks, each chunk being the size of the format record
45
+ @r['l'].each_slice( format.length ).with_index do |el,idx|
46
+
47
+ obj = @defaults.clone
48
+ format.each_with_index do |name,idx|
49
+ obj[name] = el[idx]
50
+ end
51
+ @list << obj
52
+ end
53
+
54
+ end
55
+
56
+ def initialize( payload )
57
+ @r = JSON.parse( payload )
58
+
59
+ raise VersionNotSpecifiedError.new if @r['v'].nil?
60
+ @version = @r['v']
61
+
62
+ self.send "parse#{version}"
63
+
64
+ return self
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+
@@ -0,0 +1,68 @@
1
+ require 'rubygems'
2
+ require 'rack'
3
+ require 'json'
4
+
5
+
6
+ module DataPipe
7
+
8
+ class WWW
9
+
10
+ attr_accessor :hash
11
+
12
+ def call(env)
13
+ @root = File.expand_path(File.dirname(__FILE__))
14
+ path = Rack::Utils.unescape(env['PATH_INFO'])
15
+ # path += 'index.html' if path == '/'
16
+ file = @root + "#{path}"
17
+
18
+ params = Rack::Utils.parse_nested_query(env['QUERY_STRING'])
19
+
20
+ request = Rack::Request.new(env)
21
+ response = Rack::Response.new()
22
+ parts = request.path_info.downcase.split( "/" )
23
+ section = parts[1]
24
+ case true
25
+ when request.request_method == "GET" && request.path_info.downcase == "/" then
26
+ l = ['jobs', 'errors']
27
+ [ 200, {'Content-Type' => 'application/json'}, l.to_json ]
28
+
29
+ when request.request_method == "GET" && section == "jobs" && parts.length == 2 then
30
+ l = Array.new
31
+ @hash['jobs'].hash.each do |k,job|
32
+ l << Hash['name',job.name,'next',job.next,'errors',job.errorList.length]
33
+ end
34
+ [ 200, {'Content-Type' => 'application/json'}, l.to_json ]
35
+
36
+
37
+
38
+ when request.request_method == "GET" && section == "jobs" && parts.length == 3 then
39
+ jobName = parts[2]
40
+ [404, {'Content-Type' => 'text/plain'}, "Not Found"] if @hash['jobs'].byName[jobName].nil?
41
+
42
+
43
+ job = @hash['jobs'].byName[jobName]
44
+ h = Hash['name',job.name,'next',job.next,'errorList',job.errorList]
45
+ [ 200, {'Content-Type' => 'application/json'}, h.to_json ]
46
+
47
+
48
+ when request.request_method == "POST" && section == "jobs" && parts.length == 4 && parts[3] == 'run' then
49
+ jobName = parts[2]
50
+ [404, {'Content-Type' => 'text/plain'}, "Not Found"] if @hash['jobs'].byName[jobName].nil?
51
+
52
+ @hash['jobs'].byName[ jobName ].runNow
53
+ [ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
54
+
55
+
56
+ when request.request_method == "POST" && parts.length == 3 && parts[2] == 'run' then
57
+ @hash['jobs'].byName[ parts[1] ].runNow
58
+ [ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
59
+
60
+ else
61
+ [ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
62
+ end
63
+
64
+
65
+ end
66
+ end
67
+ end
68
+
@@ -1,5 +1,8 @@
1
1
  module DataPipe
2
2
 
3
+ require 'FluidDb'
4
+
5
+
3
6
  def DataPipe.log( string, verbose=false )
4
7
  type = verbose ? "VERB" : "INFO"
5
8
  if !ENV["VERBOSE"].nil? || !verbose then
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datapipe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-05-01 00:00:00.000000000 Z
12
+ date: 2014-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
16
- requirement: &70106395416780 !ruby/object:Gem::Requirement
16
+ requirement: &70316502296420 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70106395416780
24
+ version_requirements: *70316502296420
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: fluiddb
27
- requirement: &70106395416340 !ruby/object:Gem::Requirement
27
+ requirement: &70316502295960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70106395416340
35
+ version_requirements: *70316502295960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: parse-cron
38
- requirement: &70106395415920 !ruby/object:Gem::Requirement
38
+ requirement: &70316502295500 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70106395415920
46
+ version_requirements: *70316502295500
47
47
  description: Helping to move data around your system
48
48
  email: guy@guyirvine.com
49
49
  executables:
@@ -53,12 +53,20 @@ extra_rdoc_files: []
53
53
  files:
54
54
  - lib/DataPipe.rb
55
55
  - lib/DbToDir.rb
56
+ - lib/DbToJson.rb
57
+ - lib/DirToDb.rb
56
58
  - lib/helper_functions.rb
57
59
  - lib/Host.rb
58
60
  - lib/Jobs.rb
61
+ - lib/JsonToPgsql.rb
62
+ - lib/PathFromRemote.rb
59
63
  - lib/PathToRemote.rb
60
64
  - lib/PgsqlToPgsql.rb
61
65
  - lib/SqlServerToPgsql.rb
66
+ - lib/Stream.rb
67
+ - lib/StreamBuilder.rb
68
+ - lib/StreamParser.rb
69
+ - lib/WWW.rb
62
70
  - bin/datapipe
63
71
  - LICENSE
64
72
  - README.md