sip 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/Gemfile +2 -0
  2. data/LICENSE +674 -0
  3. data/README.rdoc +32 -0
  4. data/Rakefile +21 -0
  5. data/bin/sip +83 -0
  6. data/bin/transpart +114 -0
  7. data/docs/classes/Sip.html +169 -0
  8. data/docs/classes/Sip/CmdOpts.html +179 -0
  9. data/docs/classes/Sip/Config.html +362 -0
  10. data/docs/classes/Sip/DBBase.html +368 -0
  11. data/docs/classes/Sip/HadoopException.html +111 -0
  12. data/docs/classes/Sip/Hive.html +295 -0
  13. data/docs/classes/Sip/HiveQueryException.html +111 -0
  14. data/docs/classes/Sip/ImportScriptExecutionError.html +111 -0
  15. data/docs/classes/Sip/MySQLSipper.html +273 -0
  16. data/docs/classes/Sip/NoSuchColumn.html +111 -0
  17. data/docs/classes/Sip/NoSuchTable.html +111 -0
  18. data/docs/classes/Sip/PastFailureException.html +111 -0
  19. data/docs/classes/Sip/Sipper.html +454 -0
  20. data/docs/classes/Sip/UnsupportedDatabaseType.html +111 -0
  21. data/docs/classes/Sip/Utils.html +269 -0
  22. data/docs/classes/Struct.html +146 -0
  23. data/docs/created.rid +1 -0
  24. data/docs/files/README_rdoc.html +174 -0
  25. data/docs/files/lib/sip/cmdopts_rb.html +101 -0
  26. data/docs/files/lib/sip/config_rb.html +108 -0
  27. data/docs/files/lib/sip/databases/dbbase_rb.html +108 -0
  28. data/docs/files/lib/sip/databases/mysql_rb.html +108 -0
  29. data/docs/files/lib/sip/exceptions_rb.html +101 -0
  30. data/docs/files/lib/sip/extensions_rb.html +101 -0
  31. data/docs/files/lib/sip/hive_rb.html +101 -0
  32. data/docs/files/lib/sip/sipper_rb.html +101 -0
  33. data/docs/files/lib/sip/utils_rb.html +110 -0
  34. data/docs/files/lib/sip/version_rb.html +101 -0
  35. data/docs/files/lib/sip_rb.html +117 -0
  36. data/docs/fr_class_index.html +42 -0
  37. data/docs/fr_file_index.html +38 -0
  38. data/docs/fr_method_index.html +72 -0
  39. data/docs/index.html +24 -0
  40. data/docs/rdoc-style.css +208 -0
  41. data/lib/sip.rb +10 -0
  42. data/lib/sip/cmdopts.rb +20 -0
  43. data/lib/sip/config.rb +80 -0
  44. data/lib/sip/databases/dbbase.rb +56 -0
  45. data/lib/sip/databases/mysql.rb +52 -0
  46. data/lib/sip/exceptions.rb +9 -0
  47. data/lib/sip/extensions.rb +5 -0
  48. data/lib/sip/hive.rb +62 -0
  49. data/lib/sip/sipper.rb +118 -0
  50. data/lib/sip/templates/export.sh +73 -0
  51. data/lib/sip/utils.rb +58 -0
  52. data/lib/sip/version.rb +3 -0
  53. data/test/database_interaction_test.rb +7 -0
  54. data/test/hive_test.rb +28 -0
  55. data/test/sipper_test.rb +25 -0
  56. metadata +125 -0
@@ -0,0 +1 @@
1
+ Fri, 25 Mar 2011 16:22:17 -0400
@@ -0,0 +1,174 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: README.rdoc</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>README.rdoc</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>README.rdoc
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Fri Mar 25 16:22:16 -0400 2011</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <h1>SIP: SQL to Hive Importer</h1>
73
+ <p>
74
+ SIP is a <a
75
+ href="http://en.wikipedia.org/wiki/Extract,_transform,_load">ETL</a> tool
76
+ for extracting SQL databases and importing them into <a
77
+ href="http://hive.apache.org">Hive</a>. It was created because the ability
78
+ to transform columns and partition data was an absolute requirement, and no
79
+ other tool provided that functionality.
80
+ </p>
81
+ <p>
82
+ Unique features include:
83
+ </p>
84
+ <ul>
85
+ <li>The ability to transform columns (using Ruby code)
86
+
87
+ </li>
88
+ <li>A single, simple, human readable configuration file
89
+
90
+ </li>
91
+ <li>The ability to parition tables in Hive based on the value of any (possibly
92
+ transformed) columns
93
+
94
+ </li>
95
+ </ul>
96
+ <p>
97
+ Bug reports and pull requests welcome on <a
98
+ href="https://github.com/livingsocial/sip">Github</a>.
99
+ </p>
100
+ <h2>Requirements</h2>
101
+ <ul>
102
+ <li>A working Hadoop installation
103
+
104
+ </li>
105
+ <li>Hive on the namenode
106
+
107
+ </li>
108
+ <li>Ruby &gt;= 1.8.6 on all datanodes
109
+
110
+ </li>
111
+ <li>SQL Ruby libs
112
+
113
+ </li>
114
+ </ul>
115
+ <h2>Installation, Configuration, and Use</h2>
116
+ <p>
117
+ if no primary key (default: id), must set incremental_index to blank
118
+ </p>
119
+ <pre>
120
+ sip [--db &lt;dbname&gt;] [--table &lt;tablename&gt;] [-c &lt;config location&gt;]
121
+ </pre>
122
+ <h2>How it Works</h2>
123
+ <p>
124
+ Per table to be imported, SIP determines the queries necessary to perform
125
+ an export and the creates scripts (one per datanode) that are then run
126
+ individually in parallel. Each script:
127
+ </p>
128
+ <ol>
129
+ <li>Copies a transformation / partition (transpart) script to the datanode
130
+
131
+ </li>
132
+ <li>Performs the SQL extraction, piping output through the transpart script
133
+
134
+ </li>
135
+ <li>Uploads all partitions to HDFS
136
+
137
+ </li>
138
+ </ol>
139
+ <p>
140
+ Then, all of the partitions are imported from HDFS into Hive. Easy squeezy.
141
+ </p>
142
+
143
+ </div>
144
+
145
+
146
+ </div>
147
+
148
+
149
+ </div>
150
+
151
+
152
+ <!-- if includes -->
153
+
154
+ <div id="section">
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+ <!-- if method_list -->
164
+
165
+
166
+ </div>
167
+
168
+
169
+ <div id="validator-badges">
170
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
171
+ </div>
172
+
173
+ </body>
174
+ </html>
@@ -0,0 +1,101 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: cmdopts.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>cmdopts.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/sip/cmdopts.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Mon Mar 14 17:18:33 -0400 2011</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+
72
+
73
+ </div>
74
+
75
+
76
+ </div>
77
+
78
+
79
+ <!-- if includes -->
80
+
81
+ <div id="section">
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+ <!-- if method_list -->
91
+
92
+
93
+ </div>
94
+
95
+
96
+ <div id="validator-badges">
97
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
98
+ </div>
99
+
100
+ </body>
101
+ </html>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: config.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>config.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/sip/config.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Wed Mar 23 14:50:15 -0400 2011</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+
72
+ <div id="requires-list">
73
+ <h3 class="section-bar">Required files</h3>
74
+
75
+ <div class="name-list">
76
+ yaml&nbsp;&nbsp;
77
+ </div>
78
+ </div>
79
+
80
+ </div>
81
+
82
+
83
+ </div>
84
+
85
+
86
+ <!-- if includes -->
87
+
88
+ <div id="section">
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+ <!-- if method_list -->
98
+
99
+
100
+ </div>
101
+
102
+
103
+ <div id="validator-badges">
104
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
105
+ </div>
106
+
107
+ </body>
108
+ </html>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: dbbase.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>dbbase.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/sip/databases/dbbase.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Wed Mar 23 14:50:15 -0400 2011</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+
72
+ <div id="requires-list">
73
+ <h3 class="section-bar">Required files</h3>
74
+
75
+ <div class="name-list">
76
+ sip/databases/mysql&nbsp;&nbsp;
77
+ </div>
78
+ </div>
79
+
80
+ </div>
81
+
82
+
83
+ </div>
84
+
85
+
86
+ <!-- if includes -->
87
+
88
+ <div id="section">
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+ <!-- if method_list -->
98
+
99
+
100
+ </div>
101
+
102
+
103
+ <div id="validator-badges">
104
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
105
+ </div>
106
+
107
+ </body>
108
+ </html>