datamancer 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/datamancer.rb +1 -0
- data/lib/datamancer/datastream.rb +18 -0
- data/lib/datamancer/extract.rb +14 -5
- data/lib/datamancer/load.rb +46 -1
- data/lib/datamancer/transform.rb +38 -2
- data/lib/datamancer/version.rb +1 -1
- data/spec/datastream_spec.rb +20 -0
- metadata +34 -45
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 89a4e4a23249888271ce7e622cef69ac4872369a
|
4
|
+
data.tar.gz: e74f08a6e9b943a74e1527d39ded6ffdd96adcf3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 44b412595b669021ecc519d5d9e0eb70b9c8160a92480e5960c759d64afd55f7f0145393aa311d0831a668fb34cd0f1421245571adbd985b8681d90f114ee43b
|
7
|
+
data.tar.gz: 41c415bfd5434eac51d8d210fd29c5bb8ff92e959b8df9c93e208ca8d61898739e3cd801c7847cfbe645b4b50b81740f0785e34d65898e084423934307d6d7e9
|
data/lib/datamancer.rb
CHANGED
data/lib/datamancer/extract.rb
CHANGED
@@ -69,20 +69,29 @@ module Datamancer
|
|
69
69
|
csv
|
70
70
|
|
71
71
|
when Hash
|
72
|
-
|
72
|
+
|
73
|
+
#TODO: Test for column names with spaces.
|
74
|
+
#TODO: Implement all the SQL escaping cases.
|
75
|
+
|
76
|
+
columns = @fields.map { |field, mapping| "[#{mapping}] AS [#{field}]" }.join(', ')
|
73
77
|
|
74
78
|
@fields.keys.each_with_index do |field, index|
|
75
79
|
@fields[field] = index
|
76
80
|
end
|
77
81
|
|
78
|
-
# TODO: Test top.
|
82
|
+
# TODO: Test top, distinct, where.
|
79
83
|
# TODO: Top for CSV.
|
80
84
|
# TODO: Top for support several databases.
|
81
85
|
|
82
|
-
if args[:
|
83
|
-
db.select_rows(
|
86
|
+
if args[:distinct]
|
87
|
+
db.select_rows(
|
88
|
+
"SELECT DISTINCT #{columns} FROM #{table}")
|
89
|
+
elsif args[:top]
|
90
|
+
db.select_rows(
|
91
|
+
"SELECT TOP #{args[:top]} #{columns} FROM #{table} #{'WHERE ' + args[:where] if args[:where]}")
|
84
92
|
else
|
85
|
-
db.select_rows(
|
93
|
+
db.select_rows(
|
94
|
+
"SELECT #{columns} FROM #{table} #{'WHERE ' + args[:where] if args[:where]}")
|
86
95
|
end
|
87
96
|
end
|
88
97
|
|
data/lib/datamancer/load.rb
CHANGED
@@ -1,5 +1,50 @@
|
|
1
1
|
module Datamancer
|
2
2
|
|
3
|
+
def raw input, args
|
4
|
+
|
5
|
+
raise ArgumentError,
|
6
|
+
'Raw requires a destination, i.e. raw(data, to: destination)' unless
|
7
|
+
args.is_a?(Hash) && args[:to]
|
8
|
+
|
9
|
+
::ActiveRecord::Base.establish_connection(args[:to])
|
10
|
+
|
11
|
+
# TODO: Test this.
|
12
|
+
|
13
|
+
# table = args[:table] || args[:to][:table]
|
14
|
+
|
15
|
+
# raise ArgumentError,
|
16
|
+
# 'Raw requires a database table, i.e. raw(data, to: destination, table: table_name)' unless table
|
17
|
+
|
18
|
+
# TODO: Method-overriding safeguard.
|
19
|
+
|
20
|
+
input.first.each_key do |key|
|
21
|
+
define_singleton_method key.downcase do
|
22
|
+
|
23
|
+
# Some methods applied to fields might modify the original fields.
|
24
|
+
# Fields could be duplicated in case this be a common problem.
|
25
|
+
|
26
|
+
#@input_row[key].dup
|
27
|
+
|
28
|
+
@input_row[key]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
define_singleton_method :db do
|
33
|
+
::ActiveRecord::Base.connection
|
34
|
+
end
|
35
|
+
|
36
|
+
define_singleton_method :query do |query|
|
37
|
+
::ActiveRecord::Base.connection.execute query
|
38
|
+
end
|
39
|
+
|
40
|
+
input.each do |row|
|
41
|
+
@input_row = row
|
42
|
+
|
43
|
+
yield if block_given?
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
3
48
|
def load input, args
|
4
49
|
|
5
50
|
raise ArgumentError,
|
@@ -64,7 +109,7 @@ module Datamancer
|
|
64
109
|
table = args[:table] || args[:to][:table]
|
65
110
|
|
66
111
|
raise ArgumentError,
|
67
|
-
'Load requires a database table, i.e. load(to: destination, table: table_name)' unless table
|
112
|
+
'Load requires a database table, i.e. load(data, to: destination, table: table_name)' unless table
|
68
113
|
|
69
114
|
::ActiveRecord::Base.connection.delete("DELETE FROM #{table}") unless args[:append]
|
70
115
|
batch_size = args[:batch] || 1000
|
data/lib/datamancer/transform.rb
CHANGED
@@ -1,5 +1,35 @@
|
|
1
1
|
module Datamancer
|
2
2
|
|
3
|
+
def add left, right
|
4
|
+
first_row = left.first.merge right.first
|
5
|
+
|
6
|
+
keys = first_row.keys
|
7
|
+
|
8
|
+
valores_por_defecto = {}
|
9
|
+
|
10
|
+
keys.each do |key|
|
11
|
+
valores_por_defecto[key] = case first_row[key]
|
12
|
+
when String then ''
|
13
|
+
when Numeric then 0
|
14
|
+
else nil end
|
15
|
+
end
|
16
|
+
|
17
|
+
output = []
|
18
|
+
|
19
|
+
(left + right).each do |input_row|
|
20
|
+
|
21
|
+
output_row = {}
|
22
|
+
|
23
|
+
keys.each do |key|
|
24
|
+
output_row[key] = input_row[key] || valores_por_defecto[key]
|
25
|
+
end
|
26
|
+
|
27
|
+
output << output_row
|
28
|
+
end
|
29
|
+
|
30
|
+
output
|
31
|
+
end
|
32
|
+
|
3
33
|
def join left, right, attribute
|
4
34
|
|
5
35
|
attribute = attribute.to_sym
|
@@ -59,6 +89,8 @@ module Datamancer
|
|
59
89
|
|
60
90
|
def transform input, args = {}
|
61
91
|
|
92
|
+
# TODO: Mensajes que expliquen mejor los estos errores.
|
93
|
+
|
62
94
|
if args[:join]
|
63
95
|
raise ArgumentError unless args[:on]
|
64
96
|
raise ArgumentError unless input.first.keys.include?(args[:on].to_sym)
|
@@ -67,6 +99,10 @@ module Datamancer
|
|
67
99
|
input = join input, args[:join], args[:on]
|
68
100
|
end
|
69
101
|
|
102
|
+
if args[:add]
|
103
|
+
input = add input, args[:add]
|
104
|
+
end
|
105
|
+
|
70
106
|
if args[:unique]
|
71
107
|
input = unique input, args[:unique]
|
72
108
|
end
|
@@ -74,13 +110,13 @@ module Datamancer
|
|
74
110
|
# TODO: Method-overriding safeguard.
|
75
111
|
|
76
112
|
input.first.each_key do |key|
|
77
|
-
define_singleton_method key.downcase do
|
113
|
+
define_singleton_method key.to_s.gsub(' ', '_').downcase do
|
78
114
|
|
79
115
|
# Some methods applied to fields might modify the original fields.
|
80
116
|
# Fields could be duplicated in case this be a common problem.
|
81
117
|
|
82
118
|
#@input_row[key].dup
|
83
|
-
|
119
|
+
|
84
120
|
@input_row[key]
|
85
121
|
end
|
86
122
|
end
|
data/lib/datamancer/version.rb
CHANGED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Datamancer do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@datastream = [
|
7
|
+
{a: 1, b: 2, c: 3},
|
8
|
+
{a: 1, b: 1, c: 3},
|
9
|
+
{a: 1, b: 1, c: 1}
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'selects rows that match a criteria' do
|
14
|
+
expect(
|
15
|
+
@datastream.where(a: 1, b: 1)
|
16
|
+
).to eq(
|
17
|
+
[{a: 1, b: 1, c: 3},
|
18
|
+
{a: 1, b: 1, c: 1}])
|
19
|
+
end
|
20
|
+
end
|
metadata
CHANGED
@@ -1,112 +1,99 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datamancer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Matías Battocchia
|
9
|
-
autorequire:
|
8
|
+
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-03-27 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
|
-
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
17
16
|
requirements:
|
18
17
|
- - ~>
|
19
18
|
- !ruby/object:Gem::Version
|
20
19
|
version: '1.3'
|
21
|
-
|
22
|
-
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
|
-
none: false
|
28
|
-
prerelease: false
|
29
|
-
type: :development
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rake
|
32
|
-
version_requirements: !ruby/object:Gem::Requirement
|
33
|
-
requirements:
|
34
|
-
- - '>='
|
35
|
-
- !ruby/object:Gem::Version
|
36
|
-
version: '0'
|
37
|
-
none: false
|
38
29
|
requirement: !ruby/object:Gem::Requirement
|
39
30
|
requirements:
|
40
31
|
- - '>='
|
41
32
|
- !ruby/object:Gem::Version
|
42
33
|
version: '0'
|
43
|
-
none: false
|
44
|
-
prerelease: false
|
45
34
|
type: :development
|
46
|
-
|
47
|
-
name: rspec
|
35
|
+
prerelease: false
|
48
36
|
version_requirements: !ruby/object:Gem::Requirement
|
49
37
|
requirements:
|
50
38
|
- - '>='
|
51
39
|
- !ruby/object:Gem::Version
|
52
40
|
version: '0'
|
53
|
-
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
54
43
|
requirement: !ruby/object:Gem::Requirement
|
55
44
|
requirements:
|
56
45
|
- - '>='
|
57
46
|
- !ruby/object:Gem::Version
|
58
47
|
version: '0'
|
59
|
-
none: false
|
60
|
-
prerelease: false
|
61
48
|
type: :development
|
62
|
-
|
63
|
-
name: activerecord
|
49
|
+
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
52
|
- - '>='
|
67
53
|
- !ruby/object:Gem::Version
|
68
54
|
version: '0'
|
69
|
-
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: activerecord
|
70
57
|
requirement: !ruby/object:Gem::Requirement
|
71
58
|
requirements:
|
72
59
|
- - '>='
|
73
60
|
- !ruby/object:Gem::Version
|
74
61
|
version: '0'
|
75
|
-
none: false
|
76
|
-
prerelease: false
|
77
62
|
type: :development
|
78
|
-
|
79
|
-
name: sqlite3
|
63
|
+
prerelease: false
|
80
64
|
version_requirements: !ruby/object:Gem::Requirement
|
81
65
|
requirements:
|
82
66
|
- - '>='
|
83
67
|
- !ruby/object:Gem::Version
|
84
68
|
version: '0'
|
85
|
-
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sqlite3
|
86
71
|
requirement: !ruby/object:Gem::Requirement
|
87
72
|
requirements:
|
88
73
|
- - '>='
|
89
74
|
- !ruby/object:Gem::Version
|
90
75
|
version: '0'
|
91
|
-
none: false
|
92
|
-
prerelease: false
|
93
76
|
type: :development
|
94
|
-
|
95
|
-
name: activerecord-jdbcsqlite3-adapter
|
77
|
+
prerelease: false
|
96
78
|
version_requirements: !ruby/object:Gem::Requirement
|
97
79
|
requirements:
|
98
80
|
- - '>='
|
99
81
|
- !ruby/object:Gem::Version
|
100
82
|
version: '0'
|
101
|
-
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activerecord-jdbcsqlite3-adapter
|
102
85
|
requirement: !ruby/object:Gem::Requirement
|
103
86
|
requirements:
|
104
87
|
- - '>='
|
105
88
|
- !ruby/object:Gem::Version
|
106
89
|
version: '0'
|
107
|
-
none: false
|
108
|
-
prerelease: false
|
109
90
|
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
110
97
|
description: A magical extract, transform, load (ETL) library for data integration.
|
111
98
|
email:
|
112
99
|
- matias@riseup.net
|
@@ -121,6 +108,7 @@ files:
|
|
121
108
|
- Rakefile
|
122
109
|
- datamancer.gemspec
|
123
110
|
- lib/datamancer.rb
|
111
|
+
- lib/datamancer/datastream.rb
|
124
112
|
- lib/datamancer/extract.rb
|
125
113
|
- lib/datamancer/load.rb
|
126
114
|
- lib/datamancer/transform.rb
|
@@ -131,6 +119,7 @@ files:
|
|
131
119
|
- spec/data/source.csv
|
132
120
|
- spec/data/source.sqlite3
|
133
121
|
- spec/data/source2.csv
|
122
|
+
- spec/datastream_spec.rb
|
134
123
|
- spec/extract_spec.rb
|
135
124
|
- spec/load_spec.rb
|
136
125
|
- spec/spec_helper.rb
|
@@ -138,7 +127,8 @@ files:
|
|
138
127
|
homepage: https://github.com/matiasbattocchia/datamancer
|
139
128
|
licenses:
|
140
129
|
- MIT
|
141
|
-
|
130
|
+
metadata: {}
|
131
|
+
post_install_message:
|
142
132
|
rdoc_options: []
|
143
133
|
require_paths:
|
144
134
|
- lib
|
@@ -147,18 +137,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
147
137
|
- - '>='
|
148
138
|
- !ruby/object:Gem::Version
|
149
139
|
version: '0'
|
150
|
-
none: false
|
151
140
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
141
|
requirements:
|
153
142
|
- - '>='
|
154
143
|
- !ruby/object:Gem::Version
|
155
144
|
version: '0'
|
156
|
-
none: false
|
157
145
|
requirements: []
|
158
|
-
rubyforge_project:
|
159
|
-
rubygems_version:
|
160
|
-
signing_key:
|
161
|
-
specification_version:
|
146
|
+
rubyforge_project:
|
147
|
+
rubygems_version: 2.0.2
|
148
|
+
signing_key:
|
149
|
+
specification_version: 4
|
162
150
|
summary: ''
|
163
151
|
test_files:
|
164
152
|
- spec/data/destination.csv
|
@@ -167,6 +155,7 @@ test_files:
|
|
167
155
|
- spec/data/source.csv
|
168
156
|
- spec/data/source.sqlite3
|
169
157
|
- spec/data/source2.csv
|
158
|
+
- spec/datastream_spec.rb
|
170
159
|
- spec/extract_spec.rb
|
171
160
|
- spec/load_spec.rb
|
172
161
|
- spec/spec_helper.rb
|