setl_tool 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/setl_tool/extract.rb +51 -0
- data/lib/setl_tool/load.rb +36 -0
- data/lib/setl_tool/transform.rb +70 -0
- data/lib/setl_tool.rb +42 -0
- metadata +49 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
# Licensed to you under one or more contributor license agreements.
|
2
|
+
# See the NOTICE file distributed with this work for additional
|
3
|
+
# information regarding copyright ownership. Ernesto Angel Celis de la
|
4
|
+
# Fuente licenses this file to you under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with the
|
6
|
+
# License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
require "csv"
|
17
|
+
|
18
|
+
# Extract data into a in-memory table
|
19
|
+
class Extract
|
20
|
+
|
21
|
+
# In-memory table
|
22
|
+
attr_reader :table
|
23
|
+
|
24
|
+
# Currently src_type only 'csv' is supported
|
25
|
+
# Headers true if source has column names in first row
|
26
|
+
# src_path is an absolute path to the csv file
|
27
|
+
def initialize(src_type,
|
28
|
+
headers,
|
29
|
+
src_path)
|
30
|
+
@src_type = src_type
|
31
|
+
@headers = headers
|
32
|
+
@src_path = src_path
|
33
|
+
end
|
34
|
+
|
35
|
+
# Extract data from @src_type
|
36
|
+
def data
|
37
|
+
case @src_type
|
38
|
+
when "csv"
|
39
|
+
@table = CSV.read(@src_path,
|
40
|
+
:headers => @headers)
|
41
|
+
# TODO FIX headers
|
42
|
+
#@col_headers = @table.headers
|
43
|
+
#if @headers
|
44
|
+
# @table.by_col!
|
45
|
+
#@table.delete(0)
|
46
|
+
#end
|
47
|
+
else
|
48
|
+
puts "xlsl"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Licensed to you under one or more contributor license agreements.
|
2
|
+
# See the NOTICE file distributed with this work for additional
|
3
|
+
# information regarding copyright ownership. Ernesto Angel Celis de la
|
4
|
+
# Fuente licenses this file to you under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with the
|
6
|
+
# License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
require 'pg'
|
17
|
+
|
18
|
+
class Load
|
19
|
+
attr_accessor :db_host
|
20
|
+
attr_accessor :db_name
|
21
|
+
attr_accessor :db_user
|
22
|
+
attr_accessor :db_passwd
|
23
|
+
attr_accessor :conn
|
24
|
+
|
25
|
+
def initialize(db_host, db_name, db_user, db_passwd)
|
26
|
+
@conn = PG.connect(host: db_host,
|
27
|
+
dbname: db_name,
|
28
|
+
user: db_user,
|
29
|
+
password: db_passwd)
|
30
|
+
@conn.set_error_verbosity(PG::PQERRORS_VERBOSE)
|
31
|
+
end
|
32
|
+
|
33
|
+
def insert(stmt)
|
34
|
+
@conn.exec(stmt)
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# Licensed to you under one or more contributor license agreements.
|
2
|
+
# See the NOTICE file distributed with this work for additional
|
3
|
+
# information regarding copyright ownership. Ernesto Angel Celis de la
|
4
|
+
# Fuente licenses this file to you under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with the
|
6
|
+
# License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
# Transforms from an in memory table to a known SQL dialect statement
|
17
|
+
class Transform
|
18
|
+
|
19
|
+
# Target Table in RDMS engine
|
20
|
+
attr_accessor :target_table
|
21
|
+
# In-Memory source data table
|
22
|
+
attr_accessor :table
|
23
|
+
# don't use, reserved
|
24
|
+
attr_reader :values
|
25
|
+
# Valid SQL dialect statement
|
26
|
+
attr_reader :stmt
|
27
|
+
|
28
|
+
# Currently only CSV to PostgreSQL is supported
|
29
|
+
def initialize(src_type = "csv",
|
30
|
+
target_db = "pg")
|
31
|
+
@src_type = src_type
|
32
|
+
@target_db = target_db
|
33
|
+
if @target_table == nil
|
34
|
+
@target_table = "#{@src_type}_import"
|
35
|
+
end
|
36
|
+
@values = Array.new
|
37
|
+
end
|
38
|
+
|
39
|
+
# Execute data transformation
|
40
|
+
def data
|
41
|
+
case @src_type
|
42
|
+
when "csv" then process_csv
|
43
|
+
end
|
44
|
+
case @target_db
|
45
|
+
when "pg" then to_pg
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def process_csv
|
51
|
+
@table.each do |row|
|
52
|
+
values = row.map(&:inspect).join(', ')
|
53
|
+
stmt = "#{values}"
|
54
|
+
@values << stmt.gsub('"', "'")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
def to_pg
|
60
|
+
values = ""
|
61
|
+
@values.each do |row|
|
62
|
+
values << "("
|
63
|
+
values << row
|
64
|
+
values << "),"
|
65
|
+
end
|
66
|
+
@stmt = "INSERT INTO #{@target_table} VALUES #{values.chop!};"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
|
data/lib/setl_tool.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# Licensed to you under one or more contributor license agreements.
|
2
|
+
# See the NOTICE file distributed with this work for additional
|
3
|
+
# information regarding copyright ownership. Ernesto Angel Celis de la
|
4
|
+
# Fuente licenses this file to you under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with the
|
6
|
+
# License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
require 'setl_tool/extract'
|
17
|
+
require 'setl_tool/transform'
|
18
|
+
require 'setl_tool/load'
|
19
|
+
|
20
|
+
class Setl_tool
|
21
|
+
|
22
|
+
attr_accessor :target_db
|
23
|
+
attr_accessor :target_table
|
24
|
+
attr_reader :extract
|
25
|
+
attr_reader :transform
|
26
|
+
attr_accessor :load
|
27
|
+
|
28
|
+
# TODO Change headers to true
|
29
|
+
def initialize(src_type = "csv",
|
30
|
+
headers = false,
|
31
|
+
src_path)
|
32
|
+
@src_type = src_type
|
33
|
+
@headers = headers
|
34
|
+
@src_path = src_path
|
35
|
+
@extract = Extract.new(@src_type,
|
36
|
+
@headers,
|
37
|
+
@src_path)
|
38
|
+
@transform = Transform.new
|
39
|
+
#@load = Load.new
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
metadata
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: setl_tool
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ernesto Celis
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-08-05 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Just what the summary and name implies
|
15
|
+
email: ecelis@sdf.org
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/setl_tool.rb
|
21
|
+
- lib/setl_tool/extract.rb
|
22
|
+
- lib/setl_tool/load.rb
|
23
|
+
- lib/setl_tool/transform.rb
|
24
|
+
homepage: https://bitbucket.org/ecelis/seth-gem
|
25
|
+
licenses:
|
26
|
+
- Apache 2.0
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
none: false
|
33
|
+
requirements:
|
34
|
+
- - ! '>='
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ! '>='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 1.8.23
|
46
|
+
signing_key:
|
47
|
+
specification_version: 3
|
48
|
+
summary: Simple ETL
|
49
|
+
test_files: []
|