dataflow-rb 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c772f31da3f0e1cb5c01cc6a58a44dc7c85ad9c4
4
- data.tar.gz: 91fe0d8ddffb416e94144f59dcc5ec085d2b402b
3
+ metadata.gz: da8a0cc4aa93a9a282f672e830d2ab8931e6fe58
4
+ data.tar.gz: a4a205460bcda2715d1e5bd16b4fe0982a0f652c
5
5
  SHA512:
6
- metadata.gz: 7de37f3f90d3ee18dc86243fccebf136894ee7cd7265abba1bcee15a1217b743f4fbf32722449e3d20698e19b470b831351ff773288f23e1138def65de1869c0
7
- data.tar.gz: 27008fb14a1e89e1924858912f6db56f519ddf39eb9d56b8ff2ab36f344584817310d8fd72be91586968ddc89ca8785610da99eae4865fb779ce134e4af3e36b
6
+ metadata.gz: 14cdd199d230e5048d599372798343274bc130cc906dcb4f39449bb4dd54eec89bd06047ef16560e0cedc15588d333701550bdc0ad5ba37d6511b9935d7b7d5d
7
+ data.tar.gz: 1d1658b28845cd78128d44e0f9acae8848117ebc304ec37f059f326faa2d22f9547c47405773becfa49f99a7071c6c11ce02b73926b14994f9c6c4f0c7643489
@@ -1,4 +1,17 @@
1
+ dist: trusty
2
+ sudo: required
1
3
  language: ruby
2
4
  rvm:
3
- - 2.3.3
4
- before_install: gem install bundler -v 1.14.3
5
+ - 2.3.1
6
+ before_install:
7
+ - gem install bundler -v 1.14.3
8
+ - mysql -e 'CREATE DATABASE dataflow_test;'
9
+ - psql -c 'create database dataflow_test;' -U postgres
10
+ services:
11
+ - mongodb
12
+ - mysql
13
+ - postgresql
14
+ env:
15
+ - MOJACO_MYSQL_USER=root MOJACO_POSTGRESQL_USER=postgres
16
+ addons:
17
+ postgresql: "9.6"
@@ -0,0 +1,15 @@
1
+ # Changelog
2
+
3
+
4
+
5
+ #### 0.9.2
6
+ - [2f3129c] Fix bug when joining datasets directly in SQL
7
+ - Updated the readme with some information on how to use the gem
8
+ - Set up .travis.yml
9
+
10
+ #### 0.9.1
11
+ - Fixed the gem public information
12
+
13
+ #### 0.9.0
14
+ - Extracted the open-source version
15
+
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Build Status](https://travis-ci.org/Phybbit/dataflow-rb.svg?branch=master)](https://travis-ci.org/Phybbit/dataflow-rb)
2
+
1
3
  # Dataflow
2
4
 
3
5
  The purpose of this gem is to help building complex dataflows and support automating long-running batch processes.
@@ -31,9 +33,51 @@ Or install it yourself as:
31
33
 
32
34
  $ gem install dataflow-rb
33
35
 
36
+ You also need to install:
37
+ - mongodb 3.2 (required)
38
+ - postgresql (optional)
39
+ - mysql (optional)
40
+
34
41
  ## Usage
35
42
 
36
- TODO: Write usage instructions here
43
+ ```ruby
44
+ require 'dataflow-rb'
45
+
46
+ # Create a data node
47
+ node1 = Dataflow::Nodes::DataNode.create(db_name: 'test', name: 'data_source1')
48
+ node1.add(records: [{id: 1, first_name: 'hello'}])
49
+ node1.all
50
+ # => [{"id"=>1, "name"=>"test"}]
51
+
52
+ node2 = Dataflow::Nodes::DataNode.create(db_name: 'test', name: 'data_source2')
53
+ node2.add(records: [{id: 1, last_name: 'world'}])
54
+ node2.all
55
+ # => [{"id"=>1, "name"=>"world"}]
56
+
57
+ # We will keep the results of the computation in this dataset
58
+ result_node = Dataflow::Nodes::DataNode.create(db_name: 'test', name: 'result')
59
+
60
+ # Join the 2 datasets by id:
61
+ compute_node = Dataflow::Nodes::JoinNode.create(
62
+ name: 'join',
63
+ dependency_ids: [node1, node2],
64
+ data_node_id: result_node,
65
+ key1: 'id',
66
+ key2: 'id'
67
+ )
68
+ compute_node.compute
69
+ compute_node.data_node.all
70
+ # => [{"id"=>1, "first_name"=>"hello", "last_name"=>"world"}]
71
+ compute_node.all # this is just a facade for the above
72
+ # => [{"id"=>1, "first_name"=>"hello", "last_name"=>"world"}]
73
+
74
+ # Fetch the data again later:
75
+ result_node = Dataflow::Nodes::DataNode.find_by(name: 'result')
76
+ # or the short hand:
77
+ result_node = Dataflow.data_node('result')
78
+ result_node.all
79
+ # => [{"id"=>1, "first_name"=>"hello", "last_name"=>"world"}]
80
+ ```
37
81
 
38
82
  ## Development
39
83
 
@@ -21,8 +21,8 @@ module Dataflow
21
21
  when 'postgresql'
22
22
  host = ENV['MOJACO_POSTGRESQL_ADDRESS'] || '127.0.0.1'
23
23
  port = ENV['MOJACO_POSTGRESQL_PORT'] || '5432'
24
- user = ENV['MOJACO_POSTGRESQL_USER'] || 'eurico'
25
- password = ENV['MOJACO_POSTGRESQL_PASSWORD'] || 'eurico'
24
+ user = ENV['MOJACO_POSTGRESQL_USER']
25
+ password = ENV['MOJACO_POSTGRESQL_PASSWORD']
26
26
  end
27
27
 
28
28
  db_name ||= settings.db_name
@@ -52,7 +52,7 @@ module Dataflow
52
52
 
53
53
  private
54
54
 
55
- def execute_sql_join
55
+ def sql_join_query
56
56
  fields = required_schema.keys
57
57
  select_keys = dependencies[0].schema.keys.map { |x| "d1.#{x}" } + (dependencies[1].schema.keys - dependencies[0].schema.keys).map { |x| "d2.#{x}" }
58
58
  query = "INSERT INTO #{write_dataset_name} (#{fields.join(',')})
@@ -60,10 +60,16 @@ module Dataflow
60
60
  FROM #{dependencies[0].read_dataset_name} as d1
61
61
  INNER JOIN #{dependencies[1].read_dataset_name} as d2
62
62
  ON d1.#{key1} = d2.#{key2}"
63
- p query
64
- db_adapter.client[query].to_a
65
63
  end
66
64
 
65
+ def execute_sql_join
66
+ query = sql_join_query
67
+ # TODO: work on a better way to interface this
68
+ sql_adapter = data_node.send(:db_adapter)
69
+ sql_adapter.client[query].to_a
70
+ end
71
+
72
+
67
73
  def compute_batch(records:)
68
74
  join(n1_records: records)
69
75
  end
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Dataflow
3
- VERSION = '0.9.1'
3
+ VERSION = '0.9.2'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataflow-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1
4
+ version: 0.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eurico Doirado
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-02-12 00:00:00.000000000 Z
11
+ date: 2017-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -302,6 +302,7 @@ files:
302
302
  - ".gitignore"
303
303
  - ".rspec"
304
304
  - ".travis.yml"
305
+ - CHANGELOG.md
305
306
  - Gemfile
306
307
  - LICENSE
307
308
  - README.md