sip 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
- data/LICENSE +674 -0
- data/README.rdoc +32 -0
- data/Rakefile +21 -0
- data/bin/sip +83 -0
- data/bin/transpart +114 -0
- data/docs/classes/Sip.html +169 -0
- data/docs/classes/Sip/CmdOpts.html +179 -0
- data/docs/classes/Sip/Config.html +362 -0
- data/docs/classes/Sip/DBBase.html +368 -0
- data/docs/classes/Sip/HadoopException.html +111 -0
- data/docs/classes/Sip/Hive.html +295 -0
- data/docs/classes/Sip/HiveQueryException.html +111 -0
- data/docs/classes/Sip/ImportScriptExecutionError.html +111 -0
- data/docs/classes/Sip/MySQLSipper.html +273 -0
- data/docs/classes/Sip/NoSuchColumn.html +111 -0
- data/docs/classes/Sip/NoSuchTable.html +111 -0
- data/docs/classes/Sip/PastFailureException.html +111 -0
- data/docs/classes/Sip/Sipper.html +454 -0
- data/docs/classes/Sip/UnsupportedDatabaseType.html +111 -0
- data/docs/classes/Sip/Utils.html +269 -0
- data/docs/classes/Struct.html +146 -0
- data/docs/created.rid +1 -0
- data/docs/files/README_rdoc.html +174 -0
- data/docs/files/lib/sip/cmdopts_rb.html +101 -0
- data/docs/files/lib/sip/config_rb.html +108 -0
- data/docs/files/lib/sip/databases/dbbase_rb.html +108 -0
- data/docs/files/lib/sip/databases/mysql_rb.html +108 -0
- data/docs/files/lib/sip/exceptions_rb.html +101 -0
- data/docs/files/lib/sip/extensions_rb.html +101 -0
- data/docs/files/lib/sip/hive_rb.html +101 -0
- data/docs/files/lib/sip/sipper_rb.html +101 -0
- data/docs/files/lib/sip/utils_rb.html +110 -0
- data/docs/files/lib/sip/version_rb.html +101 -0
- data/docs/files/lib/sip_rb.html +117 -0
- data/docs/fr_class_index.html +42 -0
- data/docs/fr_file_index.html +38 -0
- data/docs/fr_method_index.html +72 -0
- data/docs/index.html +24 -0
- data/docs/rdoc-style.css +208 -0
- data/lib/sip.rb +10 -0
- data/lib/sip/cmdopts.rb +20 -0
- data/lib/sip/config.rb +80 -0
- data/lib/sip/databases/dbbase.rb +56 -0
- data/lib/sip/databases/mysql.rb +52 -0
- data/lib/sip/exceptions.rb +9 -0
- data/lib/sip/extensions.rb +5 -0
- data/lib/sip/hive.rb +62 -0
- data/lib/sip/sipper.rb +118 -0
- data/lib/sip/templates/export.sh +73 -0
- data/lib/sip/utils.rb +58 -0
- data/lib/sip/version.rb +3 -0
- data/test/database_interaction_test.rb +7 -0
- data/test/hive_test.rb +28 -0
- data/test/sipper_test.rb +25 -0
- metadata +125 -0
data/docs/index.html
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
|
5
|
+
|
6
|
+
<!--
|
7
|
+
|
8
|
+
Sip - SQL to Hive importer
|
9
|
+
|
10
|
+
-->
|
11
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
12
|
+
<head>
|
13
|
+
<title>Sip - SQL to Hive importer</title>
|
14
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
15
|
+
</head>
|
16
|
+
<frameset rows="20%, 80%">
|
17
|
+
<frameset cols="25%,35%,45%">
|
18
|
+
<frame src="fr_file_index.html" title="Files" name="Files" />
|
19
|
+
<frame src="fr_class_index.html" name="Classes" />
|
20
|
+
<frame src="fr_method_index.html" name="Methods" />
|
21
|
+
</frameset>
|
22
|
+
<frame src="files/README_rdoc.html" name="docwin" />
|
23
|
+
</frameset>
|
24
|
+
</html>
|
data/docs/rdoc-style.css
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
|
2
|
+
body {
|
3
|
+
font-family: Verdana,Arial,Helvetica,sans-serif;
|
4
|
+
font-size: 90%;
|
5
|
+
margin: 0;
|
6
|
+
margin-left: 40px;
|
7
|
+
padding: 0;
|
8
|
+
background: white;
|
9
|
+
}
|
10
|
+
|
11
|
+
h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
|
12
|
+
h1 { font-size: 150%; }
|
13
|
+
h2,h3,h4 { margin-top: 1em; }
|
14
|
+
|
15
|
+
a { background: #eef; color: #039; text-decoration: none; }
|
16
|
+
a:hover { background: #039; color: #eef; }
|
17
|
+
|
18
|
+
/* Override the base stylesheet's Anchor inside a table cell */
|
19
|
+
td > a {
|
20
|
+
background: transparent;
|
21
|
+
color: #039;
|
22
|
+
text-decoration: none;
|
23
|
+
}
|
24
|
+
|
25
|
+
/* and inside a section title */
|
26
|
+
.section-title > a {
|
27
|
+
background: transparent;
|
28
|
+
color: #eee;
|
29
|
+
text-decoration: none;
|
30
|
+
}
|
31
|
+
|
32
|
+
/* === Structural elements =================================== */
|
33
|
+
|
34
|
+
div#index {
|
35
|
+
margin: 0;
|
36
|
+
margin-left: -40px;
|
37
|
+
padding: 0;
|
38
|
+
font-size: 90%;
|
39
|
+
}
|
40
|
+
|
41
|
+
|
42
|
+
div#index a {
|
43
|
+
margin-left: 0.7em;
|
44
|
+
}
|
45
|
+
|
46
|
+
div#index .section-bar {
|
47
|
+
margin-left: 0px;
|
48
|
+
padding-left: 0.7em;
|
49
|
+
background: #ccc;
|
50
|
+
font-size: small;
|
51
|
+
}
|
52
|
+
|
53
|
+
|
54
|
+
div#classHeader, div#fileHeader {
|
55
|
+
width: auto;
|
56
|
+
color: white;
|
57
|
+
padding: 0.5em 1.5em 0.5em 1.5em;
|
58
|
+
margin: 0;
|
59
|
+
margin-left: -40px;
|
60
|
+
border-bottom: 3px solid #006;
|
61
|
+
}
|
62
|
+
|
63
|
+
div#classHeader a, div#fileHeader a {
|
64
|
+
background: inherit;
|
65
|
+
color: white;
|
66
|
+
}
|
67
|
+
|
68
|
+
div#classHeader td, div#fileHeader td {
|
69
|
+
background: inherit;
|
70
|
+
color: white;
|
71
|
+
}
|
72
|
+
|
73
|
+
|
74
|
+
div#fileHeader {
|
75
|
+
background: #057;
|
76
|
+
}
|
77
|
+
|
78
|
+
div#classHeader {
|
79
|
+
background: #048;
|
80
|
+
}
|
81
|
+
|
82
|
+
|
83
|
+
.class-name-in-header {
|
84
|
+
font-size: 180%;
|
85
|
+
font-weight: bold;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
div#bodyContent {
|
90
|
+
padding: 0 1.5em 0 1.5em;
|
91
|
+
}
|
92
|
+
|
93
|
+
div#description {
|
94
|
+
padding: 0.5em 1.5em;
|
95
|
+
background: #efefef;
|
96
|
+
border: 1px dotted #999;
|
97
|
+
}
|
98
|
+
|
99
|
+
div#description h1,h2,h3,h4,h5,h6 {
|
100
|
+
color: #125;;
|
101
|
+
background: transparent;
|
102
|
+
}
|
103
|
+
|
104
|
+
div#validator-badges {
|
105
|
+
text-align: center;
|
106
|
+
}
|
107
|
+
div#validator-badges img { border: 0; }
|
108
|
+
|
109
|
+
div#copyright {
|
110
|
+
color: #333;
|
111
|
+
background: #efefef;
|
112
|
+
font: 0.75em sans-serif;
|
113
|
+
margin-top: 5em;
|
114
|
+
margin-bottom: 0;
|
115
|
+
padding: 0.5em 2em;
|
116
|
+
}
|
117
|
+
|
118
|
+
|
119
|
+
/* === Classes =================================== */
|
120
|
+
|
121
|
+
table.header-table {
|
122
|
+
color: white;
|
123
|
+
font-size: small;
|
124
|
+
}
|
125
|
+
|
126
|
+
.type-note {
|
127
|
+
font-size: small;
|
128
|
+
color: #DEDEDE;
|
129
|
+
}
|
130
|
+
|
131
|
+
.xxsection-bar {
|
132
|
+
background: #eee;
|
133
|
+
color: #333;
|
134
|
+
padding: 3px;
|
135
|
+
}
|
136
|
+
|
137
|
+
.section-bar {
|
138
|
+
color: #333;
|
139
|
+
border-bottom: 1px solid #999;
|
140
|
+
margin-left: -20px;
|
141
|
+
}
|
142
|
+
|
143
|
+
|
144
|
+
.section-title {
|
145
|
+
background: #79a;
|
146
|
+
color: #eee;
|
147
|
+
padding: 3px;
|
148
|
+
margin-top: 2em;
|
149
|
+
margin-left: -30px;
|
150
|
+
border: 1px solid #999;
|
151
|
+
}
|
152
|
+
|
153
|
+
.top-aligned-row { vertical-align: top }
|
154
|
+
.bottom-aligned-row { vertical-align: bottom }
|
155
|
+
|
156
|
+
/* --- Context section classes ----------------------- */
|
157
|
+
|
158
|
+
.context-row { }
|
159
|
+
.context-item-name { font-family: monospace; font-weight: bold; color: black; }
|
160
|
+
.context-item-value { font-size: small; color: #448; }
|
161
|
+
.context-item-desc { color: #333; padding-left: 2em; }
|
162
|
+
|
163
|
+
/* --- Method classes -------------------------- */
|
164
|
+
.method-detail {
|
165
|
+
background: #efefef;
|
166
|
+
padding: 0;
|
167
|
+
margin-top: 0.5em;
|
168
|
+
margin-bottom: 1em;
|
169
|
+
border: 1px dotted #ccc;
|
170
|
+
}
|
171
|
+
.method-heading {
|
172
|
+
color: black;
|
173
|
+
background: #ccc;
|
174
|
+
border-bottom: 1px solid #666;
|
175
|
+
padding: 0.2em 0.5em 0 0.5em;
|
176
|
+
}
|
177
|
+
.method-signature { color: black; background: inherit; }
|
178
|
+
.method-name { font-weight: bold; }
|
179
|
+
.method-args { font-style: italic; }
|
180
|
+
.method-description { padding: 0 0.5em 0 0.5em; }
|
181
|
+
|
182
|
+
/* --- Source code sections -------------------- */
|
183
|
+
|
184
|
+
a.source-toggle { font-size: 90%; }
|
185
|
+
div.method-source-code {
|
186
|
+
background: #262626;
|
187
|
+
color: #ffdead;
|
188
|
+
margin: 1em;
|
189
|
+
padding: 0.5em;
|
190
|
+
border: 1px dashed #999;
|
191
|
+
overflow: hidden;
|
192
|
+
}
|
193
|
+
|
194
|
+
div.method-source-code pre { color: #ffdead; overflow: hidden; }
|
195
|
+
|
196
|
+
/* --- Ruby keyword styles --------------------- */
|
197
|
+
|
198
|
+
.standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
|
199
|
+
|
200
|
+
.ruby-constant { color: #7fffd4; background: transparent; }
|
201
|
+
.ruby-keyword { color: #00ffff; background: transparent; }
|
202
|
+
.ruby-ivar { color: #eedd82; background: transparent; }
|
203
|
+
.ruby-operator { color: #00ffee; background: transparent; }
|
204
|
+
.ruby-identifier { color: #ffdead; background: transparent; }
|
205
|
+
.ruby-node { color: #ffa07a; background: transparent; }
|
206
|
+
.ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
|
207
|
+
.ruby-regexp { color: #ffa07a; background: transparent; }
|
208
|
+
.ruby-value { color: #7fffd4; background: transparent; }
|
data/lib/sip.rb
ADDED
data/lib/sip/cmdopts.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Sip
|
2
|
+
class CmdOpts < Hash
|
3
|
+
def set(*k)
|
4
|
+
k.each { |key|
|
5
|
+
self[key] = nil
|
6
|
+
}
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_s(prefix=nil, suffix=nil)
|
10
|
+
opts = map { |k,v|
|
11
|
+
if v.nil?
|
12
|
+
(k.length > 1) ? "--#{k}" : "-#{k}"
|
13
|
+
else
|
14
|
+
(k.length > 1) ? "--#{k}=#{v}" : "-#{k} #{v}"
|
15
|
+
end
|
16
|
+
}.join(" ")
|
17
|
+
[(prefix.nil? ? "" : prefix), opts, (suffix.nil? ? "" : suffix)].select { |v| v != "" }.join(" ")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/sip/config.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Sip
|
4
|
+
class Config < Hash
|
5
|
+
|
6
|
+
DBCONF_DEFAULT = {
|
7
|
+
'type' => 'mysql',
|
8
|
+
'host' => 'localhost',
|
9
|
+
'dbport' => nil
|
10
|
+
}
|
11
|
+
|
12
|
+
TABLECONF_DEFAULT = {
|
13
|
+
'incremental_index' => 'id',
|
14
|
+
'method' => 'append',
|
15
|
+
'incremental_index_value' => 0,
|
16
|
+
'partition_by' => nil,
|
17
|
+
'columns' => nil
|
18
|
+
}
|
19
|
+
|
20
|
+
def self.load_file(location)
|
21
|
+
Config.new YAML.load_file(location)
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(initial_values)
|
25
|
+
# temp_keys are ones we'll delete before saving to a file
|
26
|
+
@temp_keys = []
|
27
|
+
merge! initial_values
|
28
|
+
|
29
|
+
# initialize defaults, including setting dbname and tablename ease of use keys
|
30
|
+
self['databases'].each { |dbname, dbconf|
|
31
|
+
self['databases'][dbname] = DBCONF_DEFAULT.merge(dbconf)
|
32
|
+
self['databases'][dbname]['dbname'] = dbname
|
33
|
+
self['databases'][dbname]['tables'].each { |tablename, tableconf|
|
34
|
+
tableconf = {'hive_table_name' => "#{dbname}_#{tablename}"}.merge(TABLECONF_DEFAULT).merge(tableconf)
|
35
|
+
self['databases'][dbname]['tables'][tablename] = tableconf
|
36
|
+
self['databases'][dbname]['tables'][tablename]['tablename'] = tablename
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def save_file(location)
|
42
|
+
# remove unecessary dbname and tablename keys
|
43
|
+
self['databases'].each { |dbname, dbconf|
|
44
|
+
dbconf.delete 'dbname'
|
45
|
+
dbconf['tables'].each { |tablename, tableconf|
|
46
|
+
tableconf.delete 'tablename'
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
File.open(location, 'w') { |f|
|
51
|
+
h = Hash.new.merge self
|
52
|
+
@temp_keys.each { |k| h.delete k }
|
53
|
+
YAML.dump(h, f)
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
def dbconf(dbname)
|
58
|
+
self['databases'][dbname]
|
59
|
+
end
|
60
|
+
|
61
|
+
def tconf(dbname, tablename)
|
62
|
+
dbconf(dbname)['tables'][tablename]
|
63
|
+
end
|
64
|
+
|
65
|
+
def store_database(dbname, conf)
|
66
|
+
self['databases'][dbname] = conf
|
67
|
+
end
|
68
|
+
|
69
|
+
def store_table(dbname, tablename, conf)
|
70
|
+
self['databases'][dbname]['tables'][tablename] = conf
|
71
|
+
end
|
72
|
+
|
73
|
+
def set_temp(other, keys)
|
74
|
+
keys.each { |k|
|
75
|
+
@temp_keys << k
|
76
|
+
self[k] = other[k]
|
77
|
+
}
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Sip
|
2
|
+
class DBBase
|
3
|
+
attr_reader :args
|
4
|
+
|
5
|
+
def self.make_interface(type, args, sipper)
|
6
|
+
if type == 'mysql'
|
7
|
+
require 'sip/databases/mysql'
|
8
|
+
MySQLSipper.new args, sipper
|
9
|
+
else
|
10
|
+
raise UnsupportedDatabaseType, "DB type #{type.to_s} not supported."
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(args, sipper)
|
15
|
+
@args = args
|
16
|
+
@sipper = sipper
|
17
|
+
end
|
18
|
+
|
19
|
+
def tables
|
20
|
+
query('SHOW tables')
|
21
|
+
end
|
22
|
+
|
23
|
+
def rowcount(table)
|
24
|
+
query('SELECT count(1) FROM #{table}').first.first.to_i
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_column_max(tablename, field)
|
28
|
+
query("SELECT max(#{field}) FROM #{tablename}").first.first.to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def hive_columns(table)
|
32
|
+
columns(table).map { |name, type|
|
33
|
+
[name, convert_to_hive_type(type)]
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def columns(table)
|
38
|
+
query("DESCRIBE #{table}").map { |col|
|
39
|
+
col.slice(0,2)
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
def order_column_list(table, cols)
|
44
|
+
columns(table).map { |k,v| k }.select { |c| cols.include? c }
|
45
|
+
end
|
46
|
+
|
47
|
+
def generate_command(tableconf, first=nil, last=nil)
|
48
|
+
select = "SELECT #{columns(tableconf['tablename']).map { |k,v| k }.join(',')} FROM #{tableconf['tablename']}"
|
49
|
+
wheres = []
|
50
|
+
wheres << "#{tableconf['incremental_index']} >= #{first}" if not first.nil?
|
51
|
+
wheres << "#{tableconf['incremental_index']} <= #{last}" if not last.nil?
|
52
|
+
select += " WHERE #{wheres.join(" AND ")}" if wheres.length > 0
|
53
|
+
cmd_line_execute_string select
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require "mysql"
|
2
|
+
|
3
|
+
module Sip
|
4
|
+
class MySQLSipper < DBBase
|
5
|
+
def initialize(args, sipper)
|
6
|
+
super(args, sipper)
|
7
|
+
@connection = Mysql::new @args['host'], @args['username'], @args['password'], @args['dbname'], @args['port']
|
8
|
+
end
|
9
|
+
|
10
|
+
def cmd_line_execute_string(select)
|
11
|
+
opts = CmdOpts.new
|
12
|
+
opts.set 'N', 'B', 'C', 'q'
|
13
|
+
opts['u'] = @args['username']
|
14
|
+
opts['password'] = @args['password']
|
15
|
+
opts['h'] = @args['host']
|
16
|
+
opts['e'] = "'#{select}'"
|
17
|
+
opts['P'] = @args['port']
|
18
|
+
path = `which mysql`
|
19
|
+
opts.to_s(path.strip, @args['dbname'])
|
20
|
+
end
|
21
|
+
|
22
|
+
def query(q)
|
23
|
+
@sipper.log "Running MySQL Query: #{q}"
|
24
|
+
c = @connection.query(q)
|
25
|
+
return nil if c.nil?
|
26
|
+
results = []
|
27
|
+
c.num_rows.times { results << c.fetch_row }
|
28
|
+
results
|
29
|
+
end
|
30
|
+
|
31
|
+
def convert_to_hive_type(typename)
|
32
|
+
case typename.split("(").first
|
33
|
+
when "tinyint" then "TINYINT"
|
34
|
+
when "smallint" then "MEDIUMINT"
|
35
|
+
when "mediumint" then "INT"
|
36
|
+
when "int" then "INT"
|
37
|
+
when "bigint" then "BIGINT"
|
38
|
+
when "decimal" then "FLOAT"
|
39
|
+
when "numeric" then "DOUBLE"
|
40
|
+
when "float" then "FLOAT"
|
41
|
+
when "real" then "DOUBLE"
|
42
|
+
when "double" then "DOUBLE"
|
43
|
+
when "boolean" then "BOOLEAN"
|
44
|
+
else "STRING"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def close
|
49
|
+
@connection.close
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|