dataflow-rb 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c772f31da3f0e1cb5c01cc6a58a44dc7c85ad9c4
4
- data.tar.gz: 91fe0d8ddffb416e94144f59dcc5ec085d2b402b
3
+ metadata.gz: da8a0cc4aa93a9a282f672e830d2ab8931e6fe58
4
+ data.tar.gz: a4a205460bcda2715d1e5bd16b4fe0982a0f652c
5
5
  SHA512:
6
- metadata.gz: 7de37f3f90d3ee18dc86243fccebf136894ee7cd7265abba1bcee15a1217b743f4fbf32722449e3d20698e19b470b831351ff773288f23e1138def65de1869c0
7
- data.tar.gz: 27008fb14a1e89e1924858912f6db56f519ddf39eb9d56b8ff2ab36f344584817310d8fd72be91586968ddc89ca8785610da99eae4865fb779ce134e4af3e36b
6
+ metadata.gz: 14cdd199d230e5048d599372798343274bc130cc906dcb4f39449bb4dd54eec89bd06047ef16560e0cedc15588d333701550bdc0ad5ba37d6511b9935d7b7d5d
7
+ data.tar.gz: 1d1658b28845cd78128d44e0f9acae8848117ebc304ec37f059f326faa2d22f9547c47405773becfa49f99a7071c6c11ce02b73926b14994f9c6c4f0c7643489
@@ -1,4 +1,17 @@
1
+ dist: trusty
2
+ sudo: required
1
3
  language: ruby
2
4
  rvm:
3
- - 2.3.3
4
- before_install: gem install bundler -v 1.14.3
5
+ - 2.3.1
6
+ before_install:
7
+ - gem install bundler -v 1.14.3
8
+ - mysql -e 'CREATE DATABASE dataflow_test;'
9
+ - psql -c 'create database dataflow_test;' -U postgres
10
+ services:
11
+ - mongodb
12
+ - mysql
13
+ - postgresql
14
+ env:
15
+ - MOJACO_MYSQL_USER=root MOJACO_POSTGRESQL_USER=postgres
16
+ addons:
17
+ postgresql: "9.6"
@@ -0,0 +1,15 @@
1
+ # Changelog
2
+
3
+
4
+
5
+ #### 0.9.2
6
+ - [2f3129c] Fix bug when joining datasets directly in SQL
7
+ - Updated the readme with some information on how to use the gem
8
+ - Set up .travis.yml
9
+
10
+ #### 0.9.1
11
+ - Fixed the gem public information
12
+
13
+ #### 0.9.0
14
+ - Extracted the open-source version
15
+
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Build Status](https://travis-ci.org/Phybbit/dataflow-rb.svg?branch=master)](https://travis-ci.org/Phybbit/dataflow-rb)
2
+
1
3
  # Dataflow
2
4
 
3
5
  The purpose of this gem is to help building complex dataflows and support automating long-running batch processes.
@@ -31,9 +33,51 @@ Or install it yourself as:
31
33
 
32
34
  $ gem install dataflow-rb
33
35
 
36
+ You also need to install:
37
+ - mongodb 3.2 (required)
38
+ - postgresql (optional)
39
+ - mysql (optional)
40
+
34
41
  ## Usage
35
42
 
36
- TODO: Write usage instructions here
43
+ ```ruby
44
+ require 'dataflow-rb'
45
+
46
+ # Create a data node
47
+ node1 = Dataflow::Nodes::DataNode.create(db_name: 'test', name: 'data_source1')
48
+ node1.add(records: [{id: 1, first_name: 'hello'}])
49
+ node1.all
50
+ # => [{"id"=>1, "name"=>"test"}]
51
+
52
+ node2 = Dataflow::Nodes::DataNode.create(db_name: 'test', name: 'data_source2')
53
+ node2.add(records: [{id: 1, last_name: 'world'}])
54
+ node2.all
55
+ # => [{"id"=>1, "name"=>"world"}]
56
+
57
+ # We will keep the results of the computation in this dataset
58
+ result_node = Dataflow::Nodes::DataNode.create(db_name: 'test', name: 'result')
59
+
60
+ # Join the 2 datasets by id:
61
+ compute_node = Dataflow::Nodes::JoinNode.create(
62
+ name: 'join',
63
+ dependency_ids: [node1, node2],
64
+ data_node_id: result_node,
65
+ key1: 'id',
66
+ key2: 'id'
67
+ )
68
+ compute_node.compute
69
+ compute_node.data_node.all
70
+ # => [{"id"=>1, "first_name"=>"hello", "last_name"=>"world"}]
71
+ compute_node.all # this is just a facade for the above
72
+ # => [{"id"=>1, "first_name"=>"hello", "last_name"=>"world"}]
73
+
74
+ # Fetch the data again later:
75
+ result_node = Dataflow::Nodes::DataNode.find_by(name: 'result')
76
+ # or the short hand:
77
+ result_node = Dataflow.data_node('result')
78
+ result_node.all
79
+ # => [{"id"=>1, "first_name"=>"hello", "last_name"=>"world"}]
80
+ ```
37
81
 
38
82
  ## Development
39
83
 
@@ -21,8 +21,8 @@ module Dataflow
21
21
  when 'postgresql'
22
22
  host = ENV['MOJACO_POSTGRESQL_ADDRESS'] || '127.0.0.1'
23
23
  port = ENV['MOJACO_POSTGRESQL_PORT'] || '5432'
24
- user = ENV['MOJACO_POSTGRESQL_USER'] || 'eurico'
25
- password = ENV['MOJACO_POSTGRESQL_PASSWORD'] || 'eurico'
24
+ user = ENV['MOJACO_POSTGRESQL_USER']
25
+ password = ENV['MOJACO_POSTGRESQL_PASSWORD']
26
26
  end
27
27
 
28
28
  db_name ||= settings.db_name
@@ -52,7 +52,7 @@ module Dataflow
52
52
 
53
53
  private
54
54
 
55
- def execute_sql_join
55
+ def sql_join_query
56
56
  fields = required_schema.keys
57
57
  select_keys = dependencies[0].schema.keys.map { |x| "d1.#{x}" } + (dependencies[1].schema.keys - dependencies[0].schema.keys).map { |x| "d2.#{x}" }
58
58
  query = "INSERT INTO #{write_dataset_name} (#{fields.join(',')})
@@ -60,10 +60,16 @@ module Dataflow
60
60
  FROM #{dependencies[0].read_dataset_name} as d1
61
61
  INNER JOIN #{dependencies[1].read_dataset_name} as d2
62
62
  ON d1.#{key1} = d2.#{key2}"
63
- p query
64
- db_adapter.client[query].to_a
65
63
  end
66
64
 
65
+ def execute_sql_join
66
+ query = sql_join_query
67
+ # TODO: work on a better way to interface this
68
+ sql_adapter = data_node.send(:db_adapter)
69
+ sql_adapter.client[query].to_a
70
+ end
71
+
72
+
67
73
  def compute_batch(records:)
68
74
  join(n1_records: records)
69
75
  end
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Dataflow
3
- VERSION = '0.9.1'
3
+ VERSION = '0.9.2'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataflow-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1
4
+ version: 0.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eurico Doirado
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-02-12 00:00:00.000000000 Z
11
+ date: 2017-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -302,6 +302,7 @@ files:
302
302
  - ".gitignore"
303
303
  - ".rspec"
304
304
  - ".travis.yml"
305
+ - CHANGELOG.md
305
306
  - Gemfile
306
307
  - LICENSE
307
308
  - README.md