sip 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/LICENSE +674 -0
- data/README.rdoc +32 -0
- data/Rakefile +21 -0
- data/bin/sip +83 -0
- data/bin/transpart +114 -0
- data/docs/classes/Sip.html +169 -0
- data/docs/classes/Sip/CmdOpts.html +179 -0
- data/docs/classes/Sip/Config.html +362 -0
- data/docs/classes/Sip/DBBase.html +368 -0
- data/docs/classes/Sip/HadoopException.html +111 -0
- data/docs/classes/Sip/Hive.html +295 -0
- data/docs/classes/Sip/HiveQueryException.html +111 -0
- data/docs/classes/Sip/ImportScriptExecutionError.html +111 -0
- data/docs/classes/Sip/MySQLSipper.html +273 -0
- data/docs/classes/Sip/NoSuchColumn.html +111 -0
- data/docs/classes/Sip/NoSuchTable.html +111 -0
- data/docs/classes/Sip/PastFailureException.html +111 -0
- data/docs/classes/Sip/Sipper.html +454 -0
- data/docs/classes/Sip/UnsupportedDatabaseType.html +111 -0
- data/docs/classes/Sip/Utils.html +269 -0
- data/docs/classes/Struct.html +146 -0
- data/docs/created.rid +1 -0
- data/docs/files/README_rdoc.html +174 -0
- data/docs/files/lib/sip/cmdopts_rb.html +101 -0
- data/docs/files/lib/sip/config_rb.html +108 -0
- data/docs/files/lib/sip/databases/dbbase_rb.html +108 -0
- data/docs/files/lib/sip/databases/mysql_rb.html +108 -0
- data/docs/files/lib/sip/exceptions_rb.html +101 -0
- data/docs/files/lib/sip/extensions_rb.html +101 -0
- data/docs/files/lib/sip/hive_rb.html +101 -0
- data/docs/files/lib/sip/sipper_rb.html +101 -0
- data/docs/files/lib/sip/utils_rb.html +110 -0
- data/docs/files/lib/sip/version_rb.html +101 -0
- data/docs/files/lib/sip_rb.html +117 -0
- data/docs/fr_class_index.html +42 -0
- data/docs/fr_file_index.html +38 -0
- data/docs/fr_method_index.html +72 -0
- data/docs/index.html +24 -0
- data/docs/rdoc-style.css +208 -0
- data/lib/sip.rb +10 -0
- data/lib/sip/cmdopts.rb +20 -0
- data/lib/sip/config.rb +80 -0
- data/lib/sip/databases/dbbase.rb +56 -0
- data/lib/sip/databases/mysql.rb +52 -0
- data/lib/sip/exceptions.rb +9 -0
- data/lib/sip/extensions.rb +5 -0
- data/lib/sip/hive.rb +62 -0
- data/lib/sip/sipper.rb +118 -0
- data/lib/sip/templates/export.sh +73 -0
- data/lib/sip/utils.rb +58 -0
- data/lib/sip/version.rb +3 -0
- data/test/database_interaction_test.rb +7 -0
- data/test/hive_test.rb +28 -0
- data/test/sipper_test.rb +25 -0
- metadata +125 -0
data/docs/index.html
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
|
5
|
+
|
6
|
+
<!--
|
7
|
+
|
8
|
+
Sip - SQL to Hive importer
|
9
|
+
|
10
|
+
-->
|
11
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
12
|
+
<head>
|
13
|
+
<title>Sip - SQL to Hive importer</title>
|
14
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
15
|
+
</head>
|
16
|
+
<frameset rows="20%, 80%">
|
17
|
+
<frameset cols="25%,35%,45%">
|
18
|
+
<frame src="fr_file_index.html" title="Files" name="Files" />
|
19
|
+
<frame src="fr_class_index.html" name="Classes" />
|
20
|
+
<frame src="fr_method_index.html" name="Methods" />
|
21
|
+
</frameset>
|
22
|
+
<frame src="files/README_rdoc.html" name="docwin" />
|
23
|
+
</frameset>
|
24
|
+
</html>
|
data/docs/rdoc-style.css
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
|
2
|
+
body {
|
3
|
+
font-family: Verdana,Arial,Helvetica,sans-serif;
|
4
|
+
font-size: 90%;
|
5
|
+
margin: 0;
|
6
|
+
margin-left: 40px;
|
7
|
+
padding: 0;
|
8
|
+
background: white;
|
9
|
+
}
|
10
|
+
|
11
|
+
h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
|
12
|
+
h1 { font-size: 150%; }
|
13
|
+
h2,h3,h4 { margin-top: 1em; }
|
14
|
+
|
15
|
+
a { background: #eef; color: #039; text-decoration: none; }
|
16
|
+
a:hover { background: #039; color: #eef; }
|
17
|
+
|
18
|
+
/* Override the base stylesheet's Anchor inside a table cell */
|
19
|
+
td > a {
|
20
|
+
background: transparent;
|
21
|
+
color: #039;
|
22
|
+
text-decoration: none;
|
23
|
+
}
|
24
|
+
|
25
|
+
/* and inside a section title */
|
26
|
+
.section-title > a {
|
27
|
+
background: transparent;
|
28
|
+
color: #eee;
|
29
|
+
text-decoration: none;
|
30
|
+
}
|
31
|
+
|
32
|
+
/* === Structural elements =================================== */
|
33
|
+
|
34
|
+
div#index {
|
35
|
+
margin: 0;
|
36
|
+
margin-left: -40px;
|
37
|
+
padding: 0;
|
38
|
+
font-size: 90%;
|
39
|
+
}
|
40
|
+
|
41
|
+
|
42
|
+
div#index a {
|
43
|
+
margin-left: 0.7em;
|
44
|
+
}
|
45
|
+
|
46
|
+
div#index .section-bar {
|
47
|
+
margin-left: 0px;
|
48
|
+
padding-left: 0.7em;
|
49
|
+
background: #ccc;
|
50
|
+
font-size: small;
|
51
|
+
}
|
52
|
+
|
53
|
+
|
54
|
+
div#classHeader, div#fileHeader {
|
55
|
+
width: auto;
|
56
|
+
color: white;
|
57
|
+
padding: 0.5em 1.5em 0.5em 1.5em;
|
58
|
+
margin: 0;
|
59
|
+
margin-left: -40px;
|
60
|
+
border-bottom: 3px solid #006;
|
61
|
+
}
|
62
|
+
|
63
|
+
div#classHeader a, div#fileHeader a {
|
64
|
+
background: inherit;
|
65
|
+
color: white;
|
66
|
+
}
|
67
|
+
|
68
|
+
div#classHeader td, div#fileHeader td {
|
69
|
+
background: inherit;
|
70
|
+
color: white;
|
71
|
+
}
|
72
|
+
|
73
|
+
|
74
|
+
div#fileHeader {
|
75
|
+
background: #057;
|
76
|
+
}
|
77
|
+
|
78
|
+
div#classHeader {
|
79
|
+
background: #048;
|
80
|
+
}
|
81
|
+
|
82
|
+
|
83
|
+
.class-name-in-header {
|
84
|
+
font-size: 180%;
|
85
|
+
font-weight: bold;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
div#bodyContent {
|
90
|
+
padding: 0 1.5em 0 1.5em;
|
91
|
+
}
|
92
|
+
|
93
|
+
div#description {
|
94
|
+
padding: 0.5em 1.5em;
|
95
|
+
background: #efefef;
|
96
|
+
border: 1px dotted #999;
|
97
|
+
}
|
98
|
+
|
99
|
+
div#description h1,h2,h3,h4,h5,h6 {
|
100
|
+
color: #125;;
|
101
|
+
background: transparent;
|
102
|
+
}
|
103
|
+
|
104
|
+
div#validator-badges {
|
105
|
+
text-align: center;
|
106
|
+
}
|
107
|
+
div#validator-badges img { border: 0; }
|
108
|
+
|
109
|
+
div#copyright {
|
110
|
+
color: #333;
|
111
|
+
background: #efefef;
|
112
|
+
font: 0.75em sans-serif;
|
113
|
+
margin-top: 5em;
|
114
|
+
margin-bottom: 0;
|
115
|
+
padding: 0.5em 2em;
|
116
|
+
}
|
117
|
+
|
118
|
+
|
119
|
+
/* === Classes =================================== */
|
120
|
+
|
121
|
+
table.header-table {
|
122
|
+
color: white;
|
123
|
+
font-size: small;
|
124
|
+
}
|
125
|
+
|
126
|
+
.type-note {
|
127
|
+
font-size: small;
|
128
|
+
color: #DEDEDE;
|
129
|
+
}
|
130
|
+
|
131
|
+
.xxsection-bar {
|
132
|
+
background: #eee;
|
133
|
+
color: #333;
|
134
|
+
padding: 3px;
|
135
|
+
}
|
136
|
+
|
137
|
+
.section-bar {
|
138
|
+
color: #333;
|
139
|
+
border-bottom: 1px solid #999;
|
140
|
+
margin-left: -20px;
|
141
|
+
}
|
142
|
+
|
143
|
+
|
144
|
+
.section-title {
|
145
|
+
background: #79a;
|
146
|
+
color: #eee;
|
147
|
+
padding: 3px;
|
148
|
+
margin-top: 2em;
|
149
|
+
margin-left: -30px;
|
150
|
+
border: 1px solid #999;
|
151
|
+
}
|
152
|
+
|
153
|
+
.top-aligned-row { vertical-align: top }
|
154
|
+
.bottom-aligned-row { vertical-align: bottom }
|
155
|
+
|
156
|
+
/* --- Context section classes ----------------------- */
|
157
|
+
|
158
|
+
.context-row { }
|
159
|
+
.context-item-name { font-family: monospace; font-weight: bold; color: black; }
|
160
|
+
.context-item-value { font-size: small; color: #448; }
|
161
|
+
.context-item-desc { color: #333; padding-left: 2em; }
|
162
|
+
|
163
|
+
/* --- Method classes -------------------------- */
|
164
|
+
.method-detail {
|
165
|
+
background: #efefef;
|
166
|
+
padding: 0;
|
167
|
+
margin-top: 0.5em;
|
168
|
+
margin-bottom: 1em;
|
169
|
+
border: 1px dotted #ccc;
|
170
|
+
}
|
171
|
+
.method-heading {
|
172
|
+
color: black;
|
173
|
+
background: #ccc;
|
174
|
+
border-bottom: 1px solid #666;
|
175
|
+
padding: 0.2em 0.5em 0 0.5em;
|
176
|
+
}
|
177
|
+
.method-signature { color: black; background: inherit; }
|
178
|
+
.method-name { font-weight: bold; }
|
179
|
+
.method-args { font-style: italic; }
|
180
|
+
.method-description { padding: 0 0.5em 0 0.5em; }
|
181
|
+
|
182
|
+
/* --- Source code sections -------------------- */
|
183
|
+
|
184
|
+
a.source-toggle { font-size: 90%; }
|
185
|
+
div.method-source-code {
|
186
|
+
background: #262626;
|
187
|
+
color: #ffdead;
|
188
|
+
margin: 1em;
|
189
|
+
padding: 0.5em;
|
190
|
+
border: 1px dashed #999;
|
191
|
+
overflow: hidden;
|
192
|
+
}
|
193
|
+
|
194
|
+
div.method-source-code pre { color: #ffdead; overflow: hidden; }
|
195
|
+
|
196
|
+
/* --- Ruby keyword styles --------------------- */
|
197
|
+
|
198
|
+
.standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
|
199
|
+
|
200
|
+
.ruby-constant { color: #7fffd4; background: transparent; }
|
201
|
+
.ruby-keyword { color: #00ffff; background: transparent; }
|
202
|
+
.ruby-ivar { color: #eedd82; background: transparent; }
|
203
|
+
.ruby-operator { color: #00ffee; background: transparent; }
|
204
|
+
.ruby-identifier { color: #ffdead; background: transparent; }
|
205
|
+
.ruby-node { color: #ffa07a; background: transparent; }
|
206
|
+
.ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
|
207
|
+
.ruby-regexp { color: #ffa07a; background: transparent; }
|
208
|
+
.ruby-value { color: #7fffd4; background: transparent; }
|
data/lib/sip.rb
ADDED
data/lib/sip/cmdopts.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Sip
|
2
|
+
class CmdOpts < Hash
|
3
|
+
def set(*k)
|
4
|
+
k.each { |key|
|
5
|
+
self[key] = nil
|
6
|
+
}
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_s(prefix=nil, suffix=nil)
|
10
|
+
opts = map { |k,v|
|
11
|
+
if v.nil?
|
12
|
+
(k.length > 1) ? "--#{k}" : "-#{k}"
|
13
|
+
else
|
14
|
+
(k.length > 1) ? "--#{k}=#{v}" : "-#{k} #{v}"
|
15
|
+
end
|
16
|
+
}.join(" ")
|
17
|
+
[(prefix.nil? ? "" : prefix), opts, (suffix.nil? ? "" : suffix)].select { |v| v != "" }.join(" ")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/sip/config.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Sip
|
4
|
+
class Config < Hash
|
5
|
+
|
6
|
+
DBCONF_DEFAULT = {
|
7
|
+
'type' => 'mysql',
|
8
|
+
'host' => 'localhost',
|
9
|
+
'dbport' => nil
|
10
|
+
}
|
11
|
+
|
12
|
+
TABLECONF_DEFAULT = {
|
13
|
+
'incremental_index' => 'id',
|
14
|
+
'method' => 'append',
|
15
|
+
'incremental_index_value' => 0,
|
16
|
+
'partition_by' => nil,
|
17
|
+
'columns' => nil
|
18
|
+
}
|
19
|
+
|
20
|
+
def self.load_file(location)
|
21
|
+
Config.new YAML.load_file(location)
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(initial_values)
|
25
|
+
# temp_keys are ones we'll delete before saving to a file
|
26
|
+
@temp_keys = []
|
27
|
+
merge! initial_values
|
28
|
+
|
29
|
+
# initialize defaults, including setting dbname and tablename ease of use keys
|
30
|
+
self['databases'].each { |dbname, dbconf|
|
31
|
+
self['databases'][dbname] = DBCONF_DEFAULT.merge(dbconf)
|
32
|
+
self['databases'][dbname]['dbname'] = dbname
|
33
|
+
self['databases'][dbname]['tables'].each { |tablename, tableconf|
|
34
|
+
tableconf = {'hive_table_name' => "#{dbname}_#{tablename}"}.merge(TABLECONF_DEFAULT).merge(tableconf)
|
35
|
+
self['databases'][dbname]['tables'][tablename] = tableconf
|
36
|
+
self['databases'][dbname]['tables'][tablename]['tablename'] = tablename
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def save_file(location)
|
42
|
+
# remove unecessary dbname and tablename keys
|
43
|
+
self['databases'].each { |dbname, dbconf|
|
44
|
+
dbconf.delete 'dbname'
|
45
|
+
dbconf['tables'].each { |tablename, tableconf|
|
46
|
+
tableconf.delete 'tablename'
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
File.open(location, 'w') { |f|
|
51
|
+
h = Hash.new.merge self
|
52
|
+
@temp_keys.each { |k| h.delete k }
|
53
|
+
YAML.dump(h, f)
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
def dbconf(dbname)
|
58
|
+
self['databases'][dbname]
|
59
|
+
end
|
60
|
+
|
61
|
+
def tconf(dbname, tablename)
|
62
|
+
dbconf(dbname)['tables'][tablename]
|
63
|
+
end
|
64
|
+
|
65
|
+
def store_database(dbname, conf)
|
66
|
+
self['databases'][dbname] = conf
|
67
|
+
end
|
68
|
+
|
69
|
+
def store_table(dbname, tablename, conf)
|
70
|
+
self['databases'][dbname]['tables'][tablename] = conf
|
71
|
+
end
|
72
|
+
|
73
|
+
def set_temp(other, keys)
|
74
|
+
keys.each { |k|
|
75
|
+
@temp_keys << k
|
76
|
+
self[k] = other[k]
|
77
|
+
}
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Sip
|
2
|
+
class DBBase
|
3
|
+
attr_reader :args
|
4
|
+
|
5
|
+
def self.make_interface(type, args, sipper)
|
6
|
+
if type == 'mysql'
|
7
|
+
require 'sip/databases/mysql'
|
8
|
+
MySQLSipper.new args, sipper
|
9
|
+
else
|
10
|
+
raise UnsupportedDatabaseType, "DB type #{type.to_s} not supported."
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(args, sipper)
|
15
|
+
@args = args
|
16
|
+
@sipper = sipper
|
17
|
+
end
|
18
|
+
|
19
|
+
def tables
|
20
|
+
query('SHOW tables')
|
21
|
+
end
|
22
|
+
|
23
|
+
def rowcount(table)
|
24
|
+
query('SELECT count(1) FROM #{table}').first.first.to_i
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_column_max(tablename, field)
|
28
|
+
query("SELECT max(#{field}) FROM #{tablename}").first.first.to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def hive_columns(table)
|
32
|
+
columns(table).map { |name, type|
|
33
|
+
[name, convert_to_hive_type(type)]
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def columns(table)
|
38
|
+
query("DESCRIBE #{table}").map { |col|
|
39
|
+
col.slice(0,2)
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
def order_column_list(table, cols)
|
44
|
+
columns(table).map { |k,v| k }.select { |c| cols.include? c }
|
45
|
+
end
|
46
|
+
|
47
|
+
def generate_command(tableconf, first=nil, last=nil)
|
48
|
+
select = "SELECT #{columns(tableconf['tablename']).map { |k,v| k }.join(',')} FROM #{tableconf['tablename']}"
|
49
|
+
wheres = []
|
50
|
+
wheres << "#{tableconf['incremental_index']} >= #{first}" if not first.nil?
|
51
|
+
wheres << "#{tableconf['incremental_index']} <= #{last}" if not last.nil?
|
52
|
+
select += " WHERE #{wheres.join(" AND ")}" if wheres.length > 0
|
53
|
+
cmd_line_execute_string select
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require "mysql"
|
2
|
+
|
3
|
+
module Sip
|
4
|
+
class MySQLSipper < DBBase
|
5
|
+
def initialize(args, sipper)
|
6
|
+
super(args, sipper)
|
7
|
+
@connection = Mysql::new @args['host'], @args['username'], @args['password'], @args['dbname'], @args['port']
|
8
|
+
end
|
9
|
+
|
10
|
+
def cmd_line_execute_string(select)
|
11
|
+
opts = CmdOpts.new
|
12
|
+
opts.set 'N', 'B', 'C', 'q'
|
13
|
+
opts['u'] = @args['username']
|
14
|
+
opts['password'] = @args['password']
|
15
|
+
opts['h'] = @args['host']
|
16
|
+
opts['e'] = "'#{select}'"
|
17
|
+
opts['P'] = @args['port']
|
18
|
+
path = `which mysql`
|
19
|
+
opts.to_s(path.strip, @args['dbname'])
|
20
|
+
end
|
21
|
+
|
22
|
+
def query(q)
|
23
|
+
@sipper.log "Running MySQL Query: #{q}"
|
24
|
+
c = @connection.query(q)
|
25
|
+
return nil if c.nil?
|
26
|
+
results = []
|
27
|
+
c.num_rows.times { results << c.fetch_row }
|
28
|
+
results
|
29
|
+
end
|
30
|
+
|
31
|
+
def convert_to_hive_type(typename)
|
32
|
+
case typename.split("(").first
|
33
|
+
when "tinyint" then "TINYINT"
|
34
|
+
when "smallint" then "MEDIUMINT"
|
35
|
+
when "mediumint" then "INT"
|
36
|
+
when "int" then "INT"
|
37
|
+
when "bigint" then "BIGINT"
|
38
|
+
when "decimal" then "FLOAT"
|
39
|
+
when "numeric" then "DOUBLE"
|
40
|
+
when "float" then "FLOAT"
|
41
|
+
when "real" then "DOUBLE"
|
42
|
+
when "double" then "DOUBLE"
|
43
|
+
when "boolean" then "BOOLEAN"
|
44
|
+
else "STRING"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def close
|
49
|
+
@connection.close
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|