datapipe 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,9 +10,24 @@ $:.unshift "/guyirvine.com/Stream/DataPipe/lib"
10
10
  require 'rubygems'
11
11
  require 'DataPipe'
12
12
 
13
- def run_datapipe()
14
- DataPipe::Host.new().run()
13
+ @hash = Hash.new
14
+
15
+ datapipe_thread = Thread.new do
16
+ begin
17
+ h = DataPipe::Host.new
18
+ h.hash = @hash
19
+ h.run()
20
+ rescue Exception=>e
21
+ puts e.message
22
+ puts e.backtrace
23
+ end
15
24
  end
16
25
 
17
- run_datapipe
26
+
27
+ s = DataPipe::WWW.new
28
+ s.hash = @hash
29
+ Rack::Handler::Thin.run s, :Port => 9292
30
+
31
+
32
+ Thread.kill( datapipe_thread )
18
33
 
@@ -6,6 +6,7 @@ module DataPipe
6
6
  require 'helper_functions'
7
7
  require 'Jobs'
8
8
  require 'Host'
9
+ require 'WWW'
9
10
 
10
11
 
11
12
  class DataPipelineError<StandardError
@@ -1,15 +1,17 @@
1
- require 'FluidDb'
2
- require "json"
1
+ require 'StreamBuilder'
3
2
 
4
3
 
5
4
  def DbToDir( db_env_name, sql, splitField, path, prefix )
6
- Dir.mkdir( path ) unless Dir.exists?( path )
7
-
5
+ # Dir.mkdir( path ) unless Dir.exists?( path )
8
6
 
9
7
  db = DataPipe.getFluidDb( db_env_name )
10
8
 
11
9
  hash = Hash.new
12
- db.queryForResultset( sql, [] ).each do |r|
10
+ rst = db.queryForResultset( sql, [] )
11
+ columns = rst[0].keys if rst.length > 0
12
+
13
+
14
+ rst.each do |r|
13
15
  hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
14
16
 
15
17
  hash[r[splitField]] << r
@@ -19,9 +21,12 @@ def DbToDir( db_env_name, sql, splitField, path, prefix )
19
21
  Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
20
22
 
21
23
  hash.each do |k,v|
22
- File.write( "#{basePath}#{k}.js", v.to_json )
24
+ s = StreamBuilder.new
25
+ .f( columns )
26
+ v.each { |r| s.add *r.values }
27
+ File.write( "#{basePath}#{k}.js", s.serialize )
23
28
  end
24
-
29
+
25
30
  return hash
26
31
  end
27
32
 
@@ -0,0 +1,29 @@
1
+ require 'FluidDb'
2
+ require "json"
3
+
4
+
5
+ def DbToJson( db_env_name, sql, splitField, path, prefix )
6
+ Dir.mkdir( path ) unless Dir.exists?( path )
7
+
8
+
9
+ db = DataPipe.getFluidDb( db_env_name )
10
+
11
+ hash = Hash.new
12
+ rst = db.queryForResultset( sql, [] )
13
+ columns = rst[0].keys if rst.length > 0
14
+ rst.each do |r|
15
+ hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
16
+
17
+ hash[r[splitField]] << r
18
+ end
19
+
20
+ basePath = "#{path}/#{prefix}-"
21
+ Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
22
+
23
+ hash.each do |k,v|
24
+ File.write( "#{basePath}#{k}.js", v.to_json )
25
+ end
26
+
27
+ return hash
28
+ end
29
+
@@ -0,0 +1,27 @@
1
+ require 'FluidDb'
2
+ require "json"
3
+
4
+
5
+ def DbToDir( db_env_name, sql, splitField, path, prefix )
6
+ Dir.mkdir( path ) unless Dir.exists?( path )
7
+
8
+
9
+ db = DataPipe.getFluidDb( db_env_name )
10
+
11
+ hash = Hash.new
12
+ db.queryForResultset( sql, [] ).each do |r|
13
+ hash[r[splitField]] = Array.new unless hash.has_key?(r[splitField])
14
+
15
+ hash[r[splitField]] << r
16
+ end
17
+
18
+ basePath = "#{path}/#{prefix}-"
19
+ Dir.glob( "#{basePath}*" ).each { |f| File.delete(f) }
20
+
21
+ hash.each do |k,v|
22
+ File.write( "#{basePath}#{k}.js", v.to_json )
23
+ end
24
+
25
+ return hash
26
+ end
27
+
@@ -1,22 +1,29 @@
1
+ require 'rubygems'
2
+ require 'rack'
3
+ require 'thin'
4
+
1
5
 
2
6
  module DataPipe
3
-
7
+
4
8
  class Host
9
+
10
+ attr_accessor :hash
5
11
 
6
12
  def run
7
13
 
8
14
  libs = ENV["LIB"] ||= "./lib"
9
15
  dsl_paths = ENV["DSL"] ||= "./dsl"
10
-
16
+ puts "dsl_paths: #{dsl_paths}"
11
17
 
12
18
  libs.split( ";" ).each do |path|
13
19
  DataPipe.log "Adding libdir: #{path}"
14
20
  $:.unshift path
15
-
21
+ end
16
22
  loop = true
17
23
 
18
24
 
19
25
  jobs = Jobs.new
26
+ @hash['jobs'] = jobs
20
27
  while loop do
21
28
  begin
22
29
  dsl_paths.split( ";" ).each do |dsl_dir|
@@ -26,7 +33,6 @@ module DataPipe
26
33
  end
27
34
  end
28
35
 
29
-
30
36
  sleep 0.5
31
37
  rescue SystemExit, Interrupt
32
38
  puts "Exiting on request ..."
@@ -34,7 +40,6 @@ module DataPipe
34
40
  end
35
41
  end
36
42
 
37
- end
38
43
 
39
44
  end
40
45
 
@@ -4,14 +4,32 @@ module DataPipe
4
4
 
5
5
 
6
6
  class Job
7
+
8
+ attr_reader :name, :next, :errorList
7
9
 
8
10
  def initialize( path )
9
11
  @path = path
10
12
  @name = File.basename( path, ".dsl" )
11
13
  @cronString = ""
14
+
15
+ @errorList = Array.new
12
16
  self.setCron
13
17
  end
14
18
 
19
+ def addError( e )
20
+ #Job Error -> Time, Exception Class Name, nsg, backtrace
21
+ @errorList << "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
22
+ end
23
+
24
+ def clearError
25
+ @errorList = Array.new
26
+ end
27
+
28
+ def runNow
29
+ @next = Time.now - 1
30
+ end
31
+
32
+
15
33
  def setCron
16
34
  tmp = ENV["#{@name}_CRON"] ||= "0 0 * * *"
17
35
  return if tmp == @cronString
@@ -29,12 +47,15 @@ class Job
29
47
  DataPipe.log "path: #{@path}", true
30
48
  DataPipe.log "dsl: #{@name}"
31
49
  load @path
50
+ self.clearError
32
51
 
33
52
  rescue SystemExit, Interrupt
34
53
  raise
35
54
  rescue Exception => e
36
- string = "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
55
+ # string = "#{e.class.name}: #{e.message}\n#{e.backtrace.join( "\n" )}"
56
+ string = e.message
37
57
  DataPipe.log_dsl @name, string
58
+ self.addError( e )
38
59
  end
39
60
 
40
61
  self.setCron
@@ -43,16 +64,21 @@ class Job
43
64
  end
44
65
 
45
66
  class Jobs
67
+
68
+ attr_reader :hash, :byName
46
69
 
47
70
  def initialize
48
71
  @hash = Hash.new
72
+ @byName = Hash.new
49
73
  end
50
74
 
75
+
51
76
 
52
77
  def call( path )
53
78
  if @hash[path].nil? then
54
79
  j = Job.new( path )
55
80
  @hash[path] = j
81
+ @byName[j.name.downcase] = j
56
82
  j.run
57
83
  else
58
84
  @hash[path].call
@@ -0,0 +1,25 @@
1
+ require "FluidDb"
2
+ require "json"
3
+
4
+
5
+ def JsonToPgsql( source_env_name, destination_env_name, tableName, columns )
6
+ d = DataPipe.getFluidDb( destination_env_name )
7
+
8
+ d.execute( "TRUNCATE TABLE #{tableName}", [])
9
+
10
+ results = s.connection.exec( sql )
11
+
12
+ d.connection.exec( "COPY #{tableName} (#{columns.join( "," )}) FROM STDIN WITH DELIMITER AS '|' CSV;" )
13
+
14
+ JSON.parse( IO.read( DataPipe.getEnvVar( source_env_name ) ) ).each do |row|
15
+ l = Array.new
16
+ columns.each do |name|
17
+ l << row[name]
18
+ end
19
+ d.connection.put_copy_data "#{l.join( '|' )}\n"
20
+ end
21
+ d.connection.put_copy_end
22
+
23
+ DataPipe.log "#{tableName}: #{count}", true
24
+ end
25
+
@@ -0,0 +1,27 @@
1
+ require 'net/sftp'
2
+ require 'uri'
3
+
4
+
5
+ def PathFromRemote( remoteUri, localPath, prefix )
6
+
7
+ uri = URI.parse( remoteUri )
8
+ DataPipe.log "remoteUri: #{remoteUri}, localPath: #{localPath}, prefix: #{prefix}", true
9
+
10
+ Net::SFTP.start( uri.host, uri.user, :password => uri.password ) do |sftp|
11
+ Dir.glob( "#{localPath}/#{prefix}*" ).each do |path|
12
+ File.rm( path )
13
+ end
14
+
15
+ sftp.dir.foreach(uri.path) do |entry|
16
+ name = entry.name
17
+ if name[0,prefix.length] == prefix && entry.file? then
18
+ DataPipe.log "sftp.rm: #{uri.path}/#{name}"
19
+ sftp.download!( "#{uri.path}/#{name}" ) if name[0,prefix.length] == prefix && entry.file?
20
+ sftp.remove!( "#{uri.path}/#{name}" ) if name[0,prefix.length] == prefix && entry.file?
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
@@ -0,0 +1,9 @@
1
+ module Telemetry
2
+
3
+
4
+ class StreamError<StandardError
5
+ end
6
+
7
+
8
+ end
9
+
@@ -0,0 +1,32 @@
1
+ require 'json'
2
+
3
+
4
+ class StreamBuilder
5
+
6
+ def initialize
7
+ @h = Hash['v',1,'d',{},'f',[],'l',[]]
8
+ end
9
+
10
+ def set( name, value )
11
+ @h['d'][name] = value
12
+ return self
13
+ end
14
+
15
+ def f( fieldList )
16
+ @h['f'] = fieldList
17
+
18
+ return self
19
+ end
20
+
21
+ def add( *args )
22
+ @h['l'].concat args
23
+
24
+ return self
25
+ end
26
+
27
+ def serialize
28
+ return @h.to_json
29
+ end
30
+
31
+ end
32
+
@@ -0,0 +1,70 @@
1
+ module Telemetry
2
+
3
+ require 'json'
4
+ require 'date'
5
+
6
+ class StreamParserError<StandardError
7
+ end
8
+
9
+ class VersionNotSpecifiedError<StreamParserError
10
+ end
11
+ class VersionNotSupportedError<StreamParserError
12
+ end
13
+ class MissingFormatError<StreamParserError
14
+ end
15
+ class InvalidFormatError<StreamParserError
16
+ end
17
+ class MissingListError<StreamParserError
18
+ end
19
+ class MeasurementFieldNotSuppliedError<StreamParserError
20
+ end
21
+
22
+ class StreamParser
23
+
24
+ attr_reader :version, :list, :all_fields
25
+
26
+ def method_missing( meth, *args, &block )
27
+ raise VersionNotSupportedError.new if meth[0,5] == "parse"
28
+
29
+ raise NoMethodError.new( "method: #{meth}" )
30
+ end
31
+
32
+ def parse1
33
+ raise MissingFormatError.new if @r['f'].nil?
34
+ raise InvalidFormatError.new if @r['f'].length == 0
35
+ raise MissingListError.new if @r['l'].nil?
36
+
37
+ @defaults = @r['d'] || {}
38
+
39
+ @list = Array.new
40
+ format = @r['f']
41
+
42
+ @all_fields = @defaults.keys + @r['f']
43
+
44
+ #Break list up into chunks, each chunk being the size of the format record
45
+ @r['l'].each_slice( format.length ).with_index do |el,idx|
46
+
47
+ obj = @defaults.clone
48
+ format.each_with_index do |name,idx|
49
+ obj[name] = el[idx]
50
+ end
51
+ @list << obj
52
+ end
53
+
54
+ end
55
+
56
+ def initialize( payload )
57
+ @r = JSON.parse( payload )
58
+
59
+ raise VersionNotSpecifiedError.new if @r['v'].nil?
60
+ @version = @r['v']
61
+
62
+ self.send "parse#{version}"
63
+
64
+ return self
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+
@@ -0,0 +1,68 @@
1
+ require 'rubygems'
2
+ require 'rack'
3
+ require 'json'
4
+
5
+
6
+ module DataPipe
7
+
8
+ class WWW
9
+
10
+ attr_accessor :hash
11
+
12
+ def call(env)
13
+ @root = File.expand_path(File.dirname(__FILE__))
14
+ path = Rack::Utils.unescape(env['PATH_INFO'])
15
+ # path += 'index.html' if path == '/'
16
+ file = @root + "#{path}"
17
+
18
+ params = Rack::Utils.parse_nested_query(env['QUERY_STRING'])
19
+
20
+ request = Rack::Request.new(env)
21
+ response = Rack::Response.new()
22
+ parts = request.path_info.downcase.split( "/" )
23
+ section = parts[1]
24
+ case true
25
+ when request.request_method == "GET" && request.path_info.downcase == "/" then
26
+ l = ['jobs', 'errors']
27
+ [ 200, {'Content-Type' => 'application/json'}, l.to_json ]
28
+
29
+ when request.request_method == "GET" && section == "jobs" && parts.length == 2 then
30
+ l = Array.new
31
+ @hash['jobs'].hash.each do |k,job|
32
+ l << Hash['name',job.name,'next',job.next,'errors',job.errorList.length]
33
+ end
34
+ [ 200, {'Content-Type' => 'application/json'}, l.to_json ]
35
+
36
+
37
+
38
+ when request.request_method == "GET" && section == "jobs" && parts.length == 3 then
39
+ jobName = parts[2]
40
+ [404, {'Content-Type' => 'text/plain'}, "Not Found"] if @hash['jobs'].byName[jobName].nil?
41
+
42
+
43
+ job = @hash['jobs'].byName[jobName]
44
+ h = Hash['name',job.name,'next',job.next,'errorList',job.errorList]
45
+ [ 200, {'Content-Type' => 'application/json'}, h.to_json ]
46
+
47
+
48
+ when request.request_method == "POST" && section == "jobs" && parts.length == 4 && parts[3] == 'run' then
49
+ jobName = parts[2]
50
+ [404, {'Content-Type' => 'text/plain'}, "Not Found"] if @hash['jobs'].byName[jobName].nil?
51
+
52
+ @hash['jobs'].byName[ jobName ].runNow
53
+ [ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
54
+
55
+
56
+ when request.request_method == "POST" && parts.length == 3 && parts[2] == 'run' then
57
+ @hash['jobs'].byName[ parts[1] ].runNow
58
+ [ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
59
+
60
+ else
61
+ [ 200, {'Content-Type' => 'text/plain'}, "Ok" ]
62
+ end
63
+
64
+
65
+ end
66
+ end
67
+ end
68
+
@@ -1,5 +1,8 @@
1
1
  module DataPipe
2
2
 
3
+ require 'FluidDb'
4
+
5
+
3
6
  def DataPipe.log( string, verbose=false )
4
7
  type = verbose ? "VERB" : "INFO"
5
8
  if !ENV["VERBOSE"].nil? || !verbose then
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datapipe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-05-01 00:00:00.000000000 Z
12
+ date: 2014-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
16
- requirement: &70106395416780 !ruby/object:Gem::Requirement
16
+ requirement: &70316502296420 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70106395416780
24
+ version_requirements: *70316502296420
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: fluiddb
27
- requirement: &70106395416340 !ruby/object:Gem::Requirement
27
+ requirement: &70316502295960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70106395416340
35
+ version_requirements: *70316502295960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: parse-cron
38
- requirement: &70106395415920 !ruby/object:Gem::Requirement
38
+ requirement: &70316502295500 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70106395415920
46
+ version_requirements: *70316502295500
47
47
  description: Helping to move data around your system
48
48
  email: guy@guyirvine.com
49
49
  executables:
@@ -53,12 +53,20 @@ extra_rdoc_files: []
53
53
  files:
54
54
  - lib/DataPipe.rb
55
55
  - lib/DbToDir.rb
56
+ - lib/DbToJson.rb
57
+ - lib/DirToDb.rb
56
58
  - lib/helper_functions.rb
57
59
  - lib/Host.rb
58
60
  - lib/Jobs.rb
61
+ - lib/JsonToPgsql.rb
62
+ - lib/PathFromRemote.rb
59
63
  - lib/PathToRemote.rb
60
64
  - lib/PgsqlToPgsql.rb
61
65
  - lib/SqlServerToPgsql.rb
66
+ - lib/Stream.rb
67
+ - lib/StreamBuilder.rb
68
+ - lib/StreamParser.rb
69
+ - lib/WWW.rb
62
70
  - bin/datapipe
63
71
  - LICENSE
64
72
  - README.md