sip 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/LICENSE +674 -0
- data/README.rdoc +32 -0
- data/Rakefile +21 -0
- data/bin/sip +83 -0
- data/bin/transpart +114 -0
- data/docs/classes/Sip.html +169 -0
- data/docs/classes/Sip/CmdOpts.html +179 -0
- data/docs/classes/Sip/Config.html +362 -0
- data/docs/classes/Sip/DBBase.html +368 -0
- data/docs/classes/Sip/HadoopException.html +111 -0
- data/docs/classes/Sip/Hive.html +295 -0
- data/docs/classes/Sip/HiveQueryException.html +111 -0
- data/docs/classes/Sip/ImportScriptExecutionError.html +111 -0
- data/docs/classes/Sip/MySQLSipper.html +273 -0
- data/docs/classes/Sip/NoSuchColumn.html +111 -0
- data/docs/classes/Sip/NoSuchTable.html +111 -0
- data/docs/classes/Sip/PastFailureException.html +111 -0
- data/docs/classes/Sip/Sipper.html +454 -0
- data/docs/classes/Sip/UnsupportedDatabaseType.html +111 -0
- data/docs/classes/Sip/Utils.html +269 -0
- data/docs/classes/Struct.html +146 -0
- data/docs/created.rid +1 -0
- data/docs/files/README_rdoc.html +174 -0
- data/docs/files/lib/sip/cmdopts_rb.html +101 -0
- data/docs/files/lib/sip/config_rb.html +108 -0
- data/docs/files/lib/sip/databases/dbbase_rb.html +108 -0
- data/docs/files/lib/sip/databases/mysql_rb.html +108 -0
- data/docs/files/lib/sip/exceptions_rb.html +101 -0
- data/docs/files/lib/sip/extensions_rb.html +101 -0
- data/docs/files/lib/sip/hive_rb.html +101 -0
- data/docs/files/lib/sip/sipper_rb.html +101 -0
- data/docs/files/lib/sip/utils_rb.html +110 -0
- data/docs/files/lib/sip/version_rb.html +101 -0
- data/docs/files/lib/sip_rb.html +117 -0
- data/docs/fr_class_index.html +42 -0
- data/docs/fr_file_index.html +38 -0
- data/docs/fr_method_index.html +72 -0
- data/docs/index.html +24 -0
- data/docs/rdoc-style.css +208 -0
- data/lib/sip.rb +10 -0
- data/lib/sip/cmdopts.rb +20 -0
- data/lib/sip/config.rb +80 -0
- data/lib/sip/databases/dbbase.rb +56 -0
- data/lib/sip/databases/mysql.rb +52 -0
- data/lib/sip/exceptions.rb +9 -0
- data/lib/sip/extensions.rb +5 -0
- data/lib/sip/hive.rb +62 -0
- data/lib/sip/sipper.rb +118 -0
- data/lib/sip/templates/export.sh +73 -0
- data/lib/sip/utils.rb +58 -0
- data/lib/sip/version.rb +3 -0
- data/test/database_interaction_test.rb +7 -0
- data/test/hive_test.rb +28 -0
- data/test/sipper_test.rb +25 -0
- metadata +125 -0
data/docs/created.rid
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Fri, 25 Mar 2011 16:22:17 -0400
|
@@ -0,0 +1,174 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: README.rdoc</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>README.rdoc</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>README.rdoc
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Fri Mar 25 16:22:16 -0400 2011</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
<div id="description">
|
72
|
+
<h1>SIP: SQL to Hive Importer</h1>
|
73
|
+
<p>
|
74
|
+
SIP is a <a
|
75
|
+
href="http://en.wikipedia.org/wiki/Extract,_transform,_load">ETL</a> tool
|
76
|
+
for extracting SQL databases and importing them into <a
|
77
|
+
href="http://hive.apache.org">Hive</a>. It was created because the ability
|
78
|
+
to transform columns and partition data was an absolute requirement, and no
|
79
|
+
other tool provided that functionality.
|
80
|
+
</p>
|
81
|
+
<p>
|
82
|
+
Unique features include:
|
83
|
+
</p>
|
84
|
+
<ul>
|
85
|
+
<li>The ability to transform columns (using Ruby code)
|
86
|
+
|
87
|
+
</li>
|
88
|
+
<li>A single, simple, human readable configuration file
|
89
|
+
|
90
|
+
</li>
|
91
|
+
<li>The ability to parition tables in Hive based on the value of any (possibly
|
92
|
+
transformed) columns
|
93
|
+
|
94
|
+
</li>
|
95
|
+
</ul>
|
96
|
+
<p>
|
97
|
+
Bug reports and pull requests welcome on <a
|
98
|
+
href="https://github.com/livingsocial/sip">Github</a>.
|
99
|
+
</p>
|
100
|
+
<h2>Requirements</h2>
|
101
|
+
<ul>
|
102
|
+
<li>A working Hadoop installation
|
103
|
+
|
104
|
+
</li>
|
105
|
+
<li>Hive on the namenode
|
106
|
+
|
107
|
+
</li>
|
108
|
+
<li>Ruby >= 1.8.6 on all datanodes
|
109
|
+
|
110
|
+
</li>
|
111
|
+
<li>SQL Ruby libs
|
112
|
+
|
113
|
+
</li>
|
114
|
+
</ul>
|
115
|
+
<h2>Installation, Configuration, and Use</h2>
|
116
|
+
<p>
|
117
|
+
if no primary key (default: id), must set incremental_index to blank
|
118
|
+
</p>
|
119
|
+
<pre>
|
120
|
+
sip [--db <dbname>] [--table <tablename>] [-c <config location>]
|
121
|
+
</pre>
|
122
|
+
<h2>How it Works</h2>
|
123
|
+
<p>
|
124
|
+
Per table to be imported, SIP determines the queries necessary to perform
|
125
|
+
an export and the creates scripts (one per datanode) that are then run
|
126
|
+
individually in parallel. Each script:
|
127
|
+
</p>
|
128
|
+
<ol>
|
129
|
+
<li>Copies a transformation / partition (transpart) script to the datanode
|
130
|
+
|
131
|
+
</li>
|
132
|
+
<li>Performs the SQL extraction, piping output through the transpart script
|
133
|
+
|
134
|
+
</li>
|
135
|
+
<li>Uploads all partitions to HDFS
|
136
|
+
|
137
|
+
</li>
|
138
|
+
</ol>
|
139
|
+
<p>
|
140
|
+
Then, all of the partitions are imported from HDFS into Hive. Easy squeezy.
|
141
|
+
</p>
|
142
|
+
|
143
|
+
</div>
|
144
|
+
|
145
|
+
|
146
|
+
</div>
|
147
|
+
|
148
|
+
|
149
|
+
</div>
|
150
|
+
|
151
|
+
|
152
|
+
<!-- if includes -->
|
153
|
+
|
154
|
+
<div id="section">
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
<!-- if method_list -->
|
164
|
+
|
165
|
+
|
166
|
+
</div>
|
167
|
+
|
168
|
+
|
169
|
+
<div id="validator-badges">
|
170
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
171
|
+
</div>
|
172
|
+
|
173
|
+
</body>
|
174
|
+
</html>
|
@@ -0,0 +1,101 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: cmdopts.rb</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>cmdopts.rb</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>lib/sip/cmdopts.rb
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Mon Mar 14 17:18:33 -0400 2011</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
</div>
|
74
|
+
|
75
|
+
|
76
|
+
</div>
|
77
|
+
|
78
|
+
|
79
|
+
<!-- if includes -->
|
80
|
+
|
81
|
+
<div id="section">
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
<!-- if method_list -->
|
91
|
+
|
92
|
+
|
93
|
+
</div>
|
94
|
+
|
95
|
+
|
96
|
+
<div id="validator-badges">
|
97
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
98
|
+
</div>
|
99
|
+
|
100
|
+
</body>
|
101
|
+
</html>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: config.rb</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>config.rb</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>lib/sip/config.rb
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Wed Mar 23 14:50:15 -0400 2011</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
|
72
|
+
<div id="requires-list">
|
73
|
+
<h3 class="section-bar">Required files</h3>
|
74
|
+
|
75
|
+
<div class="name-list">
|
76
|
+
yaml
|
77
|
+
</div>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
</div>
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
|
86
|
+
<!-- if includes -->
|
87
|
+
|
88
|
+
<div id="section">
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
<!-- if method_list -->
|
98
|
+
|
99
|
+
|
100
|
+
</div>
|
101
|
+
|
102
|
+
|
103
|
+
<div id="validator-badges">
|
104
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
105
|
+
</div>
|
106
|
+
|
107
|
+
</body>
|
108
|
+
</html>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: dbbase.rb</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>dbbase.rb</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>lib/sip/databases/dbbase.rb
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Wed Mar 23 14:50:15 -0400 2011</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
|
72
|
+
<div id="requires-list">
|
73
|
+
<h3 class="section-bar">Required files</h3>
|
74
|
+
|
75
|
+
<div class="name-list">
|
76
|
+
sip/databases/mysql
|
77
|
+
</div>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
</div>
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
|
86
|
+
<!-- if includes -->
|
87
|
+
|
88
|
+
<div id="section">
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
<!-- if method_list -->
|
98
|
+
|
99
|
+
|
100
|
+
</div>
|
101
|
+
|
102
|
+
|
103
|
+
<div id="validator-badges">
|
104
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
105
|
+
</div>
|
106
|
+
|
107
|
+
</body>
|
108
|
+
</html>
|