datamancer 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/datamancer.rb +1 -0
- data/lib/datamancer/datastream.rb +18 -0
- data/lib/datamancer/extract.rb +14 -5
- data/lib/datamancer/load.rb +46 -1
- data/lib/datamancer/transform.rb +38 -2
- data/lib/datamancer/version.rb +1 -1
- data/spec/datastream_spec.rb +20 -0
- metadata +34 -45
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 89a4e4a23249888271ce7e622cef69ac4872369a
|
4
|
+
data.tar.gz: e74f08a6e9b943a74e1527d39ded6ffdd96adcf3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 44b412595b669021ecc519d5d9e0eb70b9c8160a92480e5960c759d64afd55f7f0145393aa311d0831a668fb34cd0f1421245571adbd985b8681d90f114ee43b
|
7
|
+
data.tar.gz: 41c415bfd5434eac51d8d210fd29c5bb8ff92e959b8df9c93e208ca8d61898739e3cd801c7847cfbe645b4b50b81740f0785e34d65898e084423934307d6d7e9
|
data/lib/datamancer.rb
CHANGED
data/lib/datamancer/extract.rb
CHANGED
@@ -69,20 +69,29 @@ module Datamancer
|
|
69
69
|
csv
|
70
70
|
|
71
71
|
when Hash
|
72
|
-
|
72
|
+
|
73
|
+
#TODO: Test for column names with spaces.
|
74
|
+
#TODO: Implement all the SQL escaping cases.
|
75
|
+
|
76
|
+
columns = @fields.map { |field, mapping| "[#{mapping}] AS [#{field}]" }.join(', ')
|
73
77
|
|
74
78
|
@fields.keys.each_with_index do |field, index|
|
75
79
|
@fields[field] = index
|
76
80
|
end
|
77
81
|
|
78
|
-
# TODO: Test top.
|
82
|
+
# TODO: Test top, distinct, where.
|
79
83
|
# TODO: Top for CSV.
|
80
84
|
# TODO: Top for support several databases.
|
81
85
|
|
82
|
-
if args[:
|
83
|
-
db.select_rows(
|
86
|
+
if args[:distinct]
|
87
|
+
db.select_rows(
|
88
|
+
"SELECT DISTINCT #{columns} FROM #{table}")
|
89
|
+
elsif args[:top]
|
90
|
+
db.select_rows(
|
91
|
+
"SELECT TOP #{args[:top]} #{columns} FROM #{table} #{'WHERE ' + args[:where] if args[:where]}")
|
84
92
|
else
|
85
|
-
db.select_rows(
|
93
|
+
db.select_rows(
|
94
|
+
"SELECT #{columns} FROM #{table} #{'WHERE ' + args[:where] if args[:where]}")
|
86
95
|
end
|
87
96
|
end
|
88
97
|
|
data/lib/datamancer/load.rb
CHANGED
@@ -1,5 +1,50 @@
|
|
1
1
|
module Datamancer
|
2
2
|
|
3
|
+
def raw input, args
|
4
|
+
|
5
|
+
raise ArgumentError,
|
6
|
+
'Raw requires a destination, i.e. raw(data, to: destination)' unless
|
7
|
+
args.is_a?(Hash) && args[:to]
|
8
|
+
|
9
|
+
::ActiveRecord::Base.establish_connection(args[:to])
|
10
|
+
|
11
|
+
# TODO: Test this.
|
12
|
+
|
13
|
+
# table = args[:table] || args[:to][:table]
|
14
|
+
|
15
|
+
# raise ArgumentError,
|
16
|
+
# 'Raw requires a database table, i.e. raw(data, to: destination, table: table_name)' unless table
|
17
|
+
|
18
|
+
# TODO: Method-overriding safeguard.
|
19
|
+
|
20
|
+
input.first.each_key do |key|
|
21
|
+
define_singleton_method key.downcase do
|
22
|
+
|
23
|
+
# Some methods applied to fields might modify the original fields.
|
24
|
+
# Fields could be duplicated in case this be a common problem.
|
25
|
+
|
26
|
+
#@input_row[key].dup
|
27
|
+
|
28
|
+
@input_row[key]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
define_singleton_method :db do
|
33
|
+
::ActiveRecord::Base.connection
|
34
|
+
end
|
35
|
+
|
36
|
+
define_singleton_method :query do |query|
|
37
|
+
::ActiveRecord::Base.connection.execute query
|
38
|
+
end
|
39
|
+
|
40
|
+
input.each do |row|
|
41
|
+
@input_row = row
|
42
|
+
|
43
|
+
yield if block_given?
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
3
48
|
def load input, args
|
4
49
|
|
5
50
|
raise ArgumentError,
|
@@ -64,7 +109,7 @@ module Datamancer
|
|
64
109
|
table = args[:table] || args[:to][:table]
|
65
110
|
|
66
111
|
raise ArgumentError,
|
67
|
-
'Load requires a database table, i.e. load(to: destination, table: table_name)' unless table
|
112
|
+
'Load requires a database table, i.e. load(data, to: destination, table: table_name)' unless table
|
68
113
|
|
69
114
|
::ActiveRecord::Base.connection.delete("DELETE FROM #{table}") unless args[:append]
|
70
115
|
batch_size = args[:batch] || 1000
|
data/lib/datamancer/transform.rb
CHANGED
@@ -1,5 +1,35 @@
|
|
1
1
|
module Datamancer
|
2
2
|
|
3
|
+
def add left, right
|
4
|
+
first_row = left.first.merge right.first
|
5
|
+
|
6
|
+
keys = first_row.keys
|
7
|
+
|
8
|
+
valores_por_defecto = {}
|
9
|
+
|
10
|
+
keys.each do |key|
|
11
|
+
valores_por_defecto[key] = case first_row[key]
|
12
|
+
when String then ''
|
13
|
+
when Numeric then 0
|
14
|
+
else nil end
|
15
|
+
end
|
16
|
+
|
17
|
+
output = []
|
18
|
+
|
19
|
+
(left + right).each do |input_row|
|
20
|
+
|
21
|
+
output_row = {}
|
22
|
+
|
23
|
+
keys.each do |key|
|
24
|
+
output_row[key] = input_row[key] || valores_por_defecto[key]
|
25
|
+
end
|
26
|
+
|
27
|
+
output << output_row
|
28
|
+
end
|
29
|
+
|
30
|
+
output
|
31
|
+
end
|
32
|
+
|
3
33
|
def join left, right, attribute
|
4
34
|
|
5
35
|
attribute = attribute.to_sym
|
@@ -59,6 +89,8 @@ module Datamancer
|
|
59
89
|
|
60
90
|
def transform input, args = {}
|
61
91
|
|
92
|
+
# TODO: Mensajes que expliquen mejor los estos errores.
|
93
|
+
|
62
94
|
if args[:join]
|
63
95
|
raise ArgumentError unless args[:on]
|
64
96
|
raise ArgumentError unless input.first.keys.include?(args[:on].to_sym)
|
@@ -67,6 +99,10 @@ module Datamancer
|
|
67
99
|
input = join input, args[:join], args[:on]
|
68
100
|
end
|
69
101
|
|
102
|
+
if args[:add]
|
103
|
+
input = add input, args[:add]
|
104
|
+
end
|
105
|
+
|
70
106
|
if args[:unique]
|
71
107
|
input = unique input, args[:unique]
|
72
108
|
end
|
@@ -74,13 +110,13 @@ module Datamancer
|
|
74
110
|
# TODO: Method-overriding safeguard.
|
75
111
|
|
76
112
|
input.first.each_key do |key|
|
77
|
-
define_singleton_method key.downcase do
|
113
|
+
define_singleton_method key.to_s.gsub(' ', '_').downcase do
|
78
114
|
|
79
115
|
# Some methods applied to fields might modify the original fields.
|
80
116
|
# Fields could be duplicated in case this be a common problem.
|
81
117
|
|
82
118
|
#@input_row[key].dup
|
83
|
-
|
119
|
+
|
84
120
|
@input_row[key]
|
85
121
|
end
|
86
122
|
end
|
data/lib/datamancer/version.rb
CHANGED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Datamancer do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@datastream = [
|
7
|
+
{a: 1, b: 2, c: 3},
|
8
|
+
{a: 1, b: 1, c: 3},
|
9
|
+
{a: 1, b: 1, c: 1}
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'selects rows that match a criteria' do
|
14
|
+
expect(
|
15
|
+
@datastream.where(a: 1, b: 1)
|
16
|
+
).to eq(
|
17
|
+
[{a: 1, b: 1, c: 3},
|
18
|
+
{a: 1, b: 1, c: 1}])
|
19
|
+
end
|
20
|
+
end
|
metadata
CHANGED
@@ -1,112 +1,99 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datamancer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Matías Battocchia
|
9
|
-
autorequire:
|
8
|
+
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-03-27 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
|
-
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
17
16
|
requirements:
|
18
17
|
- - ~>
|
19
18
|
- !ruby/object:Gem::Version
|
20
19
|
version: '1.3'
|
21
|
-
|
22
|
-
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
|
-
none: false
|
28
|
-
prerelease: false
|
29
|
-
type: :development
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rake
|
32
|
-
version_requirements: !ruby/object:Gem::Requirement
|
33
|
-
requirements:
|
34
|
-
- - '>='
|
35
|
-
- !ruby/object:Gem::Version
|
36
|
-
version: '0'
|
37
|
-
none: false
|
38
29
|
requirement: !ruby/object:Gem::Requirement
|
39
30
|
requirements:
|
40
31
|
- - '>='
|
41
32
|
- !ruby/object:Gem::Version
|
42
33
|
version: '0'
|
43
|
-
none: false
|
44
|
-
prerelease: false
|
45
34
|
type: :development
|
46
|
-
|
47
|
-
name: rspec
|
35
|
+
prerelease: false
|
48
36
|
version_requirements: !ruby/object:Gem::Requirement
|
49
37
|
requirements:
|
50
38
|
- - '>='
|
51
39
|
- !ruby/object:Gem::Version
|
52
40
|
version: '0'
|
53
|
-
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
54
43
|
requirement: !ruby/object:Gem::Requirement
|
55
44
|
requirements:
|
56
45
|
- - '>='
|
57
46
|
- !ruby/object:Gem::Version
|
58
47
|
version: '0'
|
59
|
-
none: false
|
60
|
-
prerelease: false
|
61
48
|
type: :development
|
62
|
-
|
63
|
-
name: activerecord
|
49
|
+
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
52
|
- - '>='
|
67
53
|
- !ruby/object:Gem::Version
|
68
54
|
version: '0'
|
69
|
-
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: activerecord
|
70
57
|
requirement: !ruby/object:Gem::Requirement
|
71
58
|
requirements:
|
72
59
|
- - '>='
|
73
60
|
- !ruby/object:Gem::Version
|
74
61
|
version: '0'
|
75
|
-
none: false
|
76
|
-
prerelease: false
|
77
62
|
type: :development
|
78
|
-
|
79
|
-
name: sqlite3
|
63
|
+
prerelease: false
|
80
64
|
version_requirements: !ruby/object:Gem::Requirement
|
81
65
|
requirements:
|
82
66
|
- - '>='
|
83
67
|
- !ruby/object:Gem::Version
|
84
68
|
version: '0'
|
85
|
-
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sqlite3
|
86
71
|
requirement: !ruby/object:Gem::Requirement
|
87
72
|
requirements:
|
88
73
|
- - '>='
|
89
74
|
- !ruby/object:Gem::Version
|
90
75
|
version: '0'
|
91
|
-
none: false
|
92
|
-
prerelease: false
|
93
76
|
type: :development
|
94
|
-
|
95
|
-
name: activerecord-jdbcsqlite3-adapter
|
77
|
+
prerelease: false
|
96
78
|
version_requirements: !ruby/object:Gem::Requirement
|
97
79
|
requirements:
|
98
80
|
- - '>='
|
99
81
|
- !ruby/object:Gem::Version
|
100
82
|
version: '0'
|
101
|
-
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activerecord-jdbcsqlite3-adapter
|
102
85
|
requirement: !ruby/object:Gem::Requirement
|
103
86
|
requirements:
|
104
87
|
- - '>='
|
105
88
|
- !ruby/object:Gem::Version
|
106
89
|
version: '0'
|
107
|
-
none: false
|
108
|
-
prerelease: false
|
109
90
|
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
110
97
|
description: A magical extract, transform, load (ETL) library for data integration.
|
111
98
|
email:
|
112
99
|
- matias@riseup.net
|
@@ -121,6 +108,7 @@ files:
|
|
121
108
|
- Rakefile
|
122
109
|
- datamancer.gemspec
|
123
110
|
- lib/datamancer.rb
|
111
|
+
- lib/datamancer/datastream.rb
|
124
112
|
- lib/datamancer/extract.rb
|
125
113
|
- lib/datamancer/load.rb
|
126
114
|
- lib/datamancer/transform.rb
|
@@ -131,6 +119,7 @@ files:
|
|
131
119
|
- spec/data/source.csv
|
132
120
|
- spec/data/source.sqlite3
|
133
121
|
- spec/data/source2.csv
|
122
|
+
- spec/datastream_spec.rb
|
134
123
|
- spec/extract_spec.rb
|
135
124
|
- spec/load_spec.rb
|
136
125
|
- spec/spec_helper.rb
|
@@ -138,7 +127,8 @@ files:
|
|
138
127
|
homepage: https://github.com/matiasbattocchia/datamancer
|
139
128
|
licenses:
|
140
129
|
- MIT
|
141
|
-
|
130
|
+
metadata: {}
|
131
|
+
post_install_message:
|
142
132
|
rdoc_options: []
|
143
133
|
require_paths:
|
144
134
|
- lib
|
@@ -147,18 +137,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
147
137
|
- - '>='
|
148
138
|
- !ruby/object:Gem::Version
|
149
139
|
version: '0'
|
150
|
-
none: false
|
151
140
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
141
|
requirements:
|
153
142
|
- - '>='
|
154
143
|
- !ruby/object:Gem::Version
|
155
144
|
version: '0'
|
156
|
-
none: false
|
157
145
|
requirements: []
|
158
|
-
rubyforge_project:
|
159
|
-
rubygems_version:
|
160
|
-
signing_key:
|
161
|
-
specification_version:
|
146
|
+
rubyforge_project:
|
147
|
+
rubygems_version: 2.0.2
|
148
|
+
signing_key:
|
149
|
+
specification_version: 4
|
162
150
|
summary: ''
|
163
151
|
test_files:
|
164
152
|
- spec/data/destination.csv
|
@@ -167,6 +155,7 @@ test_files:
|
|
167
155
|
- spec/data/source.csv
|
168
156
|
- spec/data/source.sqlite3
|
169
157
|
- spec/data/source2.csv
|
158
|
+
- spec/datastream_spec.rb
|
170
159
|
- spec/extract_spec.rb
|
171
160
|
- spec/load_spec.rb
|
172
161
|
- spec/spec_helper.rb
|