sip 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. data/Gemfile +2 -0
  2. data/LICENSE +674 -0
  3. data/README.rdoc +32 -0
  4. data/Rakefile +21 -0
  5. data/bin/sip +83 -0
  6. data/bin/transpart +114 -0
  7. data/docs/classes/Sip.html +169 -0
  8. data/docs/classes/Sip/CmdOpts.html +179 -0
  9. data/docs/classes/Sip/Config.html +362 -0
  10. data/docs/classes/Sip/DBBase.html +368 -0
  11. data/docs/classes/Sip/HadoopException.html +111 -0
  12. data/docs/classes/Sip/Hive.html +295 -0
  13. data/docs/classes/Sip/HiveQueryException.html +111 -0
  14. data/docs/classes/Sip/ImportScriptExecutionError.html +111 -0
  15. data/docs/classes/Sip/MySQLSipper.html +273 -0
  16. data/docs/classes/Sip/NoSuchColumn.html +111 -0
  17. data/docs/classes/Sip/NoSuchTable.html +111 -0
  18. data/docs/classes/Sip/PastFailureException.html +111 -0
  19. data/docs/classes/Sip/Sipper.html +454 -0
  20. data/docs/classes/Sip/UnsupportedDatabaseType.html +111 -0
  21. data/docs/classes/Sip/Utils.html +269 -0
  22. data/docs/classes/Struct.html +146 -0
  23. data/docs/created.rid +1 -0
  24. data/docs/files/README_rdoc.html +174 -0
  25. data/docs/files/lib/sip/cmdopts_rb.html +101 -0
  26. data/docs/files/lib/sip/config_rb.html +108 -0
  27. data/docs/files/lib/sip/databases/dbbase_rb.html +108 -0
  28. data/docs/files/lib/sip/databases/mysql_rb.html +108 -0
  29. data/docs/files/lib/sip/exceptions_rb.html +101 -0
  30. data/docs/files/lib/sip/extensions_rb.html +101 -0
  31. data/docs/files/lib/sip/hive_rb.html +101 -0
  32. data/docs/files/lib/sip/sipper_rb.html +101 -0
  33. data/docs/files/lib/sip/utils_rb.html +110 -0
  34. data/docs/files/lib/sip/version_rb.html +101 -0
  35. data/docs/files/lib/sip_rb.html +117 -0
  36. data/docs/fr_class_index.html +42 -0
  37. data/docs/fr_file_index.html +38 -0
  38. data/docs/fr_method_index.html +72 -0
  39. data/docs/index.html +24 -0
  40. data/docs/rdoc-style.css +208 -0
  41. data/lib/sip.rb +10 -0
  42. data/lib/sip/cmdopts.rb +20 -0
  43. data/lib/sip/config.rb +80 -0
  44. data/lib/sip/databases/dbbase.rb +56 -0
  45. data/lib/sip/databases/mysql.rb +52 -0
  46. data/lib/sip/exceptions.rb +9 -0
  47. data/lib/sip/extensions.rb +5 -0
  48. data/lib/sip/hive.rb +62 -0
  49. data/lib/sip/sipper.rb +118 -0
  50. data/lib/sip/templates/export.sh +73 -0
  51. data/lib/sip/utils.rb +58 -0
  52. data/lib/sip/version.rb +3 -0
  53. data/test/database_interaction_test.rb +7 -0
  54. data/test/hive_test.rb +28 -0
  55. data/test/sipper_test.rb +25 -0
  56. metadata +125 -0
@@ -0,0 +1 @@
1
+ Fri, 25 Mar 2011 16:22:17 -0400
@@ -0,0 +1,174 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: README.rdoc</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>README.rdoc</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>README.rdoc
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Fri Mar 25 16:22:16 -0400 2011</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <h1>SIP: SQL to Hive Importer</h1>
73
+ <p>
74
+ SIP is a <a
75
+ href="http://en.wikipedia.org/wiki/Extract,_transform,_load">ETL</a> tool
76
+ for extracting SQL databases and importing them into <a
77
+ href="http://hive.apache.org">Hive</a>. It was created because the ability
78
+ to transform columns and partition data was an absolute requirement, and no
79
+ other tool provided that functionality.
80
+ </p>
81
+ <p>
82
+ Unique features include:
83
+ </p>
84
+ <ul>
85
+ <li>The ability to transform columns (using Ruby code)
86
+
87
+ </li>
88
+ <li>A single, simple, human readable configuration file
89
+
90
+ </li>
91
+ <li>The ability to parition tables in Hive based on the value of any (possibly
92
+ transformed) columns
93
+
94
+ </li>
95
+ </ul>
96
+ <p>
97
+ Bug reports and pull requests welcome on <a
98
+ href="https://github.com/livingsocial/sip">Github</a>.
99
+ </p>
100
+ <h2>Requirements</h2>
101
+ <ul>
102
+ <li>A working Hadoop installation
103
+
104
+ </li>
105
+ <li>Hive on the namenode
106
+
107
+ </li>
108
+ <li>Ruby &gt;= 1.8.6 on all datanodes
109
+
110
+ </li>
111
+ <li>SQL Ruby libs
112
+
113
+ </li>
114
+ </ul>
115
+ <h2>Installation, Configuration, and Use</h2>
116
+ <p>
117
+ if no primary key (default: id), must set incremental_index to blank
118
+ </p>
119
+ <pre>
120
+ sip [--db &lt;dbname&gt;] [--table &lt;tablename&gt;] [-c &lt;config location&gt;]
121
+ </pre>
122
+ <h2>How it Works</h2>
123
+ <p>
124
+ Per table to be imported, SIP determines the queries necessary to perform
125
+ an export and the creates scripts (one per datanode) that are then run
126
+ individually in parallel. Each script:
127
+ </p>
128
+ <ol>
129
+ <li>Copies a transformation / partition (transpart) script to the datanode
130
+
131
+ </li>
132
+ <li>Performs the SQL extraction, piping output through the transpart script
133
+
134
+ </li>
135
+ <li>Uploads all partitions to HDFS
136
+
137
+ </li>
138
+ </ol>
139
+ <p>
140
+ Then, all of the partitions are imported from HDFS into Hive. Easy squeezy.
141
+ </p>
142
+
143
+ </div>
144
+
145
+
146
+ </div>
147
+
148
+
149
+ </div>
150
+
151
+
152
+ <!-- if includes -->
153
+
154
+ <div id="section">
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+ <!-- if method_list -->
164
+
165
+
166
+ </div>
167
+
168
+
169
+ <div id="validator-badges">
170
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
171
+ </div>
172
+
173
+ </body>
174
+ </html>
@@ -0,0 +1,101 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: cmdopts.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>cmdopts.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/sip/cmdopts.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Mon Mar 14 17:18:33 -0400 2011</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+
72
+
73
+ </div>
74
+
75
+
76
+ </div>
77
+
78
+
79
+ <!-- if includes -->
80
+
81
+ <div id="section">
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+ <!-- if method_list -->
91
+
92
+
93
+ </div>
94
+
95
+
96
+ <div id="validator-badges">
97
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
98
+ </div>
99
+
100
+ </body>
101
+ </html>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: config.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>config.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/sip/config.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Wed Mar 23 14:50:15 -0400 2011</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+
72
+ <div id="requires-list">
73
+ <h3 class="section-bar">Required files</h3>
74
+
75
+ <div class="name-list">
76
+ yaml&nbsp;&nbsp;
77
+ </div>
78
+ </div>
79
+
80
+ </div>
81
+
82
+
83
+ </div>
84
+
85
+
86
+ <!-- if includes -->
87
+
88
+ <div id="section">
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+ <!-- if method_list -->
98
+
99
+
100
+ </div>
101
+
102
+
103
+ <div id="validator-badges">
104
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
105
+ </div>
106
+
107
+ </body>
108
+ </html>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: dbbase.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>dbbase.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/sip/databases/dbbase.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Wed Mar 23 14:50:15 -0400 2011</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+
72
+ <div id="requires-list">
73
+ <h3 class="section-bar">Required files</h3>
74
+
75
+ <div class="name-list">
76
+ sip/databases/mysql&nbsp;&nbsp;
77
+ </div>
78
+ </div>
79
+
80
+ </div>
81
+
82
+
83
+ </div>
84
+
85
+
86
+ <!-- if includes -->
87
+
88
+ <div id="section">
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+ <!-- if method_list -->
98
+
99
+
100
+ </div>
101
+
102
+
103
+ <div id="validator-badges">
104
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
105
+ </div>
106
+
107
+ </body>
108
+ </html>