sip 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. data/Gemfile +2 -0
  2. data/LICENSE +674 -0
  3. data/README.rdoc +32 -0
  4. data/Rakefile +21 -0
  5. data/bin/sip +83 -0
  6. data/bin/transpart +114 -0
  7. data/docs/classes/Sip.html +169 -0
  8. data/docs/classes/Sip/CmdOpts.html +179 -0
  9. data/docs/classes/Sip/Config.html +362 -0
  10. data/docs/classes/Sip/DBBase.html +368 -0
  11. data/docs/classes/Sip/HadoopException.html +111 -0
  12. data/docs/classes/Sip/Hive.html +295 -0
  13. data/docs/classes/Sip/HiveQueryException.html +111 -0
  14. data/docs/classes/Sip/ImportScriptExecutionError.html +111 -0
  15. data/docs/classes/Sip/MySQLSipper.html +273 -0
  16. data/docs/classes/Sip/NoSuchColumn.html +111 -0
  17. data/docs/classes/Sip/NoSuchTable.html +111 -0
  18. data/docs/classes/Sip/PastFailureException.html +111 -0
  19. data/docs/classes/Sip/Sipper.html +454 -0
  20. data/docs/classes/Sip/UnsupportedDatabaseType.html +111 -0
  21. data/docs/classes/Sip/Utils.html +269 -0
  22. data/docs/classes/Struct.html +146 -0
  23. data/docs/created.rid +1 -0
  24. data/docs/files/README_rdoc.html +174 -0
  25. data/docs/files/lib/sip/cmdopts_rb.html +101 -0
  26. data/docs/files/lib/sip/config_rb.html +108 -0
  27. data/docs/files/lib/sip/databases/dbbase_rb.html +108 -0
  28. data/docs/files/lib/sip/databases/mysql_rb.html +108 -0
  29. data/docs/files/lib/sip/exceptions_rb.html +101 -0
  30. data/docs/files/lib/sip/extensions_rb.html +101 -0
  31. data/docs/files/lib/sip/hive_rb.html +101 -0
  32. data/docs/files/lib/sip/sipper_rb.html +101 -0
  33. data/docs/files/lib/sip/utils_rb.html +110 -0
  34. data/docs/files/lib/sip/version_rb.html +101 -0
  35. data/docs/files/lib/sip_rb.html +117 -0
  36. data/docs/fr_class_index.html +42 -0
  37. data/docs/fr_file_index.html +38 -0
  38. data/docs/fr_method_index.html +72 -0
  39. data/docs/index.html +24 -0
  40. data/docs/rdoc-style.css +208 -0
  41. data/lib/sip.rb +10 -0
  42. data/lib/sip/cmdopts.rb +20 -0
  43. data/lib/sip/config.rb +80 -0
  44. data/lib/sip/databases/dbbase.rb +56 -0
  45. data/lib/sip/databases/mysql.rb +52 -0
  46. data/lib/sip/exceptions.rb +9 -0
  47. data/lib/sip/extensions.rb +5 -0
  48. data/lib/sip/hive.rb +62 -0
  49. data/lib/sip/sipper.rb +118 -0
  50. data/lib/sip/templates/export.sh +73 -0
  51. data/lib/sip/utils.rb +58 -0
  52. data/lib/sip/version.rb +3 -0
  53. data/test/database_interaction_test.rb +7 -0
  54. data/test/hive_test.rb +28 -0
  55. data/test/sipper_test.rb +25 -0
  56. metadata +125 -0
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5
+
6
+ <!--
7
+
8
+ Sip - SQL to Hive importer
9
+
10
+ -->
11
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
12
+ <head>
13
+ <title>Sip - SQL to Hive importer</title>
14
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
15
+ </head>
16
+ <frameset rows="20%, 80%">
17
+ <frameset cols="25%,35%,45%">
18
+ <frame src="fr_file_index.html" title="Files" name="Files" />
19
+ <frame src="fr_class_index.html" name="Classes" />
20
+ <frame src="fr_method_index.html" name="Methods" />
21
+ </frameset>
22
+ <frame src="files/README_rdoc.html" name="docwin" />
23
+ </frameset>
24
+ </html>
@@ -0,0 +1,208 @@
1
+
2
+ body {
3
+ font-family: Verdana,Arial,Helvetica,sans-serif;
4
+ font-size: 90%;
5
+ margin: 0;
6
+ margin-left: 40px;
7
+ padding: 0;
8
+ background: white;
9
+ }
10
+
11
+ h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
12
+ h1 { font-size: 150%; }
13
+ h2,h3,h4 { margin-top: 1em; }
14
+
15
+ a { background: #eef; color: #039; text-decoration: none; }
16
+ a:hover { background: #039; color: #eef; }
17
+
18
+ /* Override the base stylesheet's Anchor inside a table cell */
19
+ td > a {
20
+ background: transparent;
21
+ color: #039;
22
+ text-decoration: none;
23
+ }
24
+
25
+ /* and inside a section title */
26
+ .section-title > a {
27
+ background: transparent;
28
+ color: #eee;
29
+ text-decoration: none;
30
+ }
31
+
32
+ /* === Structural elements =================================== */
33
+
34
+ div#index {
35
+ margin: 0;
36
+ margin-left: -40px;
37
+ padding: 0;
38
+ font-size: 90%;
39
+ }
40
+
41
+
42
+ div#index a {
43
+ margin-left: 0.7em;
44
+ }
45
+
46
+ div#index .section-bar {
47
+ margin-left: 0px;
48
+ padding-left: 0.7em;
49
+ background: #ccc;
50
+ font-size: small;
51
+ }
52
+
53
+
54
+ div#classHeader, div#fileHeader {
55
+ width: auto;
56
+ color: white;
57
+ padding: 0.5em 1.5em 0.5em 1.5em;
58
+ margin: 0;
59
+ margin-left: -40px;
60
+ border-bottom: 3px solid #006;
61
+ }
62
+
63
+ div#classHeader a, div#fileHeader a {
64
+ background: inherit;
65
+ color: white;
66
+ }
67
+
68
+ div#classHeader td, div#fileHeader td {
69
+ background: inherit;
70
+ color: white;
71
+ }
72
+
73
+
74
+ div#fileHeader {
75
+ background: #057;
76
+ }
77
+
78
+ div#classHeader {
79
+ background: #048;
80
+ }
81
+
82
+
83
+ .class-name-in-header {
84
+ font-size: 180%;
85
+ font-weight: bold;
86
+ }
87
+
88
+
89
+ div#bodyContent {
90
+ padding: 0 1.5em 0 1.5em;
91
+ }
92
+
93
+ div#description {
94
+ padding: 0.5em 1.5em;
95
+ background: #efefef;
96
+ border: 1px dotted #999;
97
+ }
98
+
99
+ div#description h1,h2,h3,h4,h5,h6 {
100
+ color: #125;;
101
+ background: transparent;
102
+ }
103
+
104
+ div#validator-badges {
105
+ text-align: center;
106
+ }
107
+ div#validator-badges img { border: 0; }
108
+
109
+ div#copyright {
110
+ color: #333;
111
+ background: #efefef;
112
+ font: 0.75em sans-serif;
113
+ margin-top: 5em;
114
+ margin-bottom: 0;
115
+ padding: 0.5em 2em;
116
+ }
117
+
118
+
119
+ /* === Classes =================================== */
120
+
121
+ table.header-table {
122
+ color: white;
123
+ font-size: small;
124
+ }
125
+
126
+ .type-note {
127
+ font-size: small;
128
+ color: #DEDEDE;
129
+ }
130
+
131
+ .xxsection-bar {
132
+ background: #eee;
133
+ color: #333;
134
+ padding: 3px;
135
+ }
136
+
137
+ .section-bar {
138
+ color: #333;
139
+ border-bottom: 1px solid #999;
140
+ margin-left: -20px;
141
+ }
142
+
143
+
144
+ .section-title {
145
+ background: #79a;
146
+ color: #eee;
147
+ padding: 3px;
148
+ margin-top: 2em;
149
+ margin-left: -30px;
150
+ border: 1px solid #999;
151
+ }
152
+
153
+ .top-aligned-row { vertical-align: top }
154
+ .bottom-aligned-row { vertical-align: bottom }
155
+
156
+ /* --- Context section classes ----------------------- */
157
+
158
+ .context-row { }
159
+ .context-item-name { font-family: monospace; font-weight: bold; color: black; }
160
+ .context-item-value { font-size: small; color: #448; }
161
+ .context-item-desc { color: #333; padding-left: 2em; }
162
+
163
+ /* --- Method classes -------------------------- */
164
+ .method-detail {
165
+ background: #efefef;
166
+ padding: 0;
167
+ margin-top: 0.5em;
168
+ margin-bottom: 1em;
169
+ border: 1px dotted #ccc;
170
+ }
171
+ .method-heading {
172
+ color: black;
173
+ background: #ccc;
174
+ border-bottom: 1px solid #666;
175
+ padding: 0.2em 0.5em 0 0.5em;
176
+ }
177
+ .method-signature { color: black; background: inherit; }
178
+ .method-name { font-weight: bold; }
179
+ .method-args { font-style: italic; }
180
+ .method-description { padding: 0 0.5em 0 0.5em; }
181
+
182
+ /* --- Source code sections -------------------- */
183
+
184
+ a.source-toggle { font-size: 90%; }
185
+ div.method-source-code {
186
+ background: #262626;
187
+ color: #ffdead;
188
+ margin: 1em;
189
+ padding: 0.5em;
190
+ border: 1px dashed #999;
191
+ overflow: hidden;
192
+ }
193
+
194
+ div.method-source-code pre { color: #ffdead; overflow: hidden; }
195
+
196
+ /* --- Ruby keyword styles --------------------- */
197
+
198
+ .standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
199
+
200
+ .ruby-constant { color: #7fffd4; background: transparent; }
201
+ .ruby-keyword { color: #00ffff; background: transparent; }
202
+ .ruby-ivar { color: #eedd82; background: transparent; }
203
+ .ruby-operator { color: #00ffee; background: transparent; }
204
+ .ruby-identifier { color: #ffdead; background: transparent; }
205
+ .ruby-node { color: #ffa07a; background: transparent; }
206
+ .ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
207
+ .ruby-regexp { color: #ffa07a; background: transparent; }
208
+ .ruby-value { color: #7fffd4; background: transparent; }
@@ -0,0 +1,10 @@
1
+ require 'sip/version'
2
+ require 'sip/utils'
3
+ require 'sip/exceptions'
4
+ require 'sip/extensions'
5
+ require 'sip/sipper'
6
+ require 'sip/databases/dbbase'
7
+ require 'sip/databases/mysql'
8
+ require 'sip/hive'
9
+ require 'sip/cmdopts'
10
+ require 'sip/config'
@@ -0,0 +1,20 @@
1
+ module Sip
2
+ class CmdOpts < Hash
3
+ def set(*k)
4
+ k.each { |key|
5
+ self[key] = nil
6
+ }
7
+ end
8
+
9
+ def to_s(prefix=nil, suffix=nil)
10
+ opts = map { |k,v|
11
+ if v.nil?
12
+ (k.length > 1) ? "--#{k}" : "-#{k}"
13
+ else
14
+ (k.length > 1) ? "--#{k}=#{v}" : "-#{k} #{v}"
15
+ end
16
+ }.join(" ")
17
+ [(prefix.nil? ? "" : prefix), opts, (suffix.nil? ? "" : suffix)].select { |v| v != "" }.join(" ")
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,80 @@
1
+ require 'yaml'
2
+
3
+ module Sip
4
+ class Config < Hash
5
+
6
+ DBCONF_DEFAULT = {
7
+ 'type' => 'mysql',
8
+ 'host' => 'localhost',
9
+ 'dbport' => nil
10
+ }
11
+
12
+ TABLECONF_DEFAULT = {
13
+ 'incremental_index' => 'id',
14
+ 'method' => 'append',
15
+ 'incremental_index_value' => 0,
16
+ 'partition_by' => nil,
17
+ 'columns' => nil
18
+ }
19
+
20
+ def self.load_file(location)
21
+ Config.new YAML.load_file(location)
22
+ end
23
+
24
+ def initialize(initial_values)
25
+ # temp_keys are ones we'll delete before saving to a file
26
+ @temp_keys = []
27
+ merge! initial_values
28
+
29
+ # initialize defaults, including setting dbname and tablename ease of use keys
30
+ self['databases'].each { |dbname, dbconf|
31
+ self['databases'][dbname] = DBCONF_DEFAULT.merge(dbconf)
32
+ self['databases'][dbname]['dbname'] = dbname
33
+ self['databases'][dbname]['tables'].each { |tablename, tableconf|
34
+ tableconf = {'hive_table_name' => "#{dbname}_#{tablename}"}.merge(TABLECONF_DEFAULT).merge(tableconf)
35
+ self['databases'][dbname]['tables'][tablename] = tableconf
36
+ self['databases'][dbname]['tables'][tablename]['tablename'] = tablename
37
+ }
38
+ }
39
+ end
40
+
41
+ def save_file(location)
42
+ # remove unecessary dbname and tablename keys
43
+ self['databases'].each { |dbname, dbconf|
44
+ dbconf.delete 'dbname'
45
+ dbconf['tables'].each { |tablename, tableconf|
46
+ tableconf.delete 'tablename'
47
+ }
48
+ }
49
+
50
+ File.open(location, 'w') { |f|
51
+ h = Hash.new.merge self
52
+ @temp_keys.each { |k| h.delete k }
53
+ YAML.dump(h, f)
54
+ }
55
+ end
56
+
57
+ def dbconf(dbname)
58
+ self['databases'][dbname]
59
+ end
60
+
61
+ def tconf(dbname, tablename)
62
+ dbconf(dbname)['tables'][tablename]
63
+ end
64
+
65
+ def store_database(dbname, conf)
66
+ self['databases'][dbname] = conf
67
+ end
68
+
69
+ def store_table(dbname, tablename, conf)
70
+ self['databases'][dbname]['tables'][tablename] = conf
71
+ end
72
+
73
+ def set_temp(other, keys)
74
+ keys.each { |k|
75
+ @temp_keys << k
76
+ self[k] = other[k]
77
+ }
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,56 @@
1
+ module Sip
2
+ class DBBase
3
+ attr_reader :args
4
+
5
+ def self.make_interface(type, args, sipper)
6
+ if type == 'mysql'
7
+ require 'sip/databases/mysql'
8
+ MySQLSipper.new args, sipper
9
+ else
10
+ raise UnsupportedDatabaseType, "DB type #{type.to_s} not supported."
11
+ end
12
+ end
13
+
14
+ def initialize(args, sipper)
15
+ @args = args
16
+ @sipper = sipper
17
+ end
18
+
19
+ def tables
20
+ query('SHOW tables')
21
+ end
22
+
23
+ def rowcount(table)
24
+ query('SELECT count(1) FROM #{table}').first.first.to_i
25
+ end
26
+
27
+ def get_column_max(tablename, field)
28
+ query("SELECT max(#{field}) FROM #{tablename}").first.first.to_i
29
+ end
30
+
31
+ def hive_columns(table)
32
+ columns(table).map { |name, type|
33
+ [name, convert_to_hive_type(type)]
34
+ }
35
+ end
36
+
37
+ def columns(table)
38
+ query("DESCRIBE #{table}").map { |col|
39
+ col.slice(0,2)
40
+ }
41
+ end
42
+
43
+ def order_column_list(table, cols)
44
+ columns(table).map { |k,v| k }.select { |c| cols.include? c }
45
+ end
46
+
47
+ def generate_command(tableconf, first=nil, last=nil)
48
+ select = "SELECT #{columns(tableconf['tablename']).map { |k,v| k }.join(',')} FROM #{tableconf['tablename']}"
49
+ wheres = []
50
+ wheres << "#{tableconf['incremental_index']} >= #{first}" if not first.nil?
51
+ wheres << "#{tableconf['incremental_index']} <= #{last}" if not last.nil?
52
+ select += " WHERE #{wheres.join(" AND ")}" if wheres.length > 0
53
+ cmd_line_execute_string select
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,52 @@
1
+ require "mysql"
2
+
3
+ module Sip
4
+ class MySQLSipper < DBBase
5
+ def initialize(args, sipper)
6
+ super(args, sipper)
7
+ @connection = Mysql::new @args['host'], @args['username'], @args['password'], @args['dbname'], @args['port']
8
+ end
9
+
10
+ def cmd_line_execute_string(select)
11
+ opts = CmdOpts.new
12
+ opts.set 'N', 'B', 'C', 'q'
13
+ opts['u'] = @args['username']
14
+ opts['password'] = @args['password']
15
+ opts['h'] = @args['host']
16
+ opts['e'] = "'#{select}'"
17
+ opts['P'] = @args['port']
18
+ path = `which mysql`
19
+ opts.to_s(path.strip, @args['dbname'])
20
+ end
21
+
22
+ def query(q)
23
+ @sipper.log "Running MySQL Query: #{q}"
24
+ c = @connection.query(q)
25
+ return nil if c.nil?
26
+ results = []
27
+ c.num_rows.times { results << c.fetch_row }
28
+ results
29
+ end
30
+
31
+ def convert_to_hive_type(typename)
32
+ case typename.split("(").first
33
+ when "tinyint" then "TINYINT"
34
+ when "smallint" then "MEDIUMINT"
35
+ when "mediumint" then "INT"
36
+ when "int" then "INT"
37
+ when "bigint" then "BIGINT"
38
+ when "decimal" then "FLOAT"
39
+ when "numeric" then "DOUBLE"
40
+ when "float" then "FLOAT"
41
+ when "real" then "DOUBLE"
42
+ when "double" then "DOUBLE"
43
+ when "boolean" then "BOOLEAN"
44
+ else "STRING"
45
+ end
46
+ end
47
+
48
+ def close
49
+ @connection.close
50
+ end
51
+ end
52
+ end