sip 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/Gemfile +2 -0
  2. data/LICENSE +674 -0
  3. data/README.rdoc +32 -0
  4. data/Rakefile +21 -0
  5. data/bin/sip +83 -0
  6. data/bin/transpart +114 -0
  7. data/docs/classes/Sip.html +169 -0
  8. data/docs/classes/Sip/CmdOpts.html +179 -0
  9. data/docs/classes/Sip/Config.html +362 -0
  10. data/docs/classes/Sip/DBBase.html +368 -0
  11. data/docs/classes/Sip/HadoopException.html +111 -0
  12. data/docs/classes/Sip/Hive.html +295 -0
  13. data/docs/classes/Sip/HiveQueryException.html +111 -0
  14. data/docs/classes/Sip/ImportScriptExecutionError.html +111 -0
  15. data/docs/classes/Sip/MySQLSipper.html +273 -0
  16. data/docs/classes/Sip/NoSuchColumn.html +111 -0
  17. data/docs/classes/Sip/NoSuchTable.html +111 -0
  18. data/docs/classes/Sip/PastFailureException.html +111 -0
  19. data/docs/classes/Sip/Sipper.html +454 -0
  20. data/docs/classes/Sip/UnsupportedDatabaseType.html +111 -0
  21. data/docs/classes/Sip/Utils.html +269 -0
  22. data/docs/classes/Struct.html +146 -0
  23. data/docs/created.rid +1 -0
  24. data/docs/files/README_rdoc.html +174 -0
  25. data/docs/files/lib/sip/cmdopts_rb.html +101 -0
  26. data/docs/files/lib/sip/config_rb.html +108 -0
  27. data/docs/files/lib/sip/databases/dbbase_rb.html +108 -0
  28. data/docs/files/lib/sip/databases/mysql_rb.html +108 -0
  29. data/docs/files/lib/sip/exceptions_rb.html +101 -0
  30. data/docs/files/lib/sip/extensions_rb.html +101 -0
  31. data/docs/files/lib/sip/hive_rb.html +101 -0
  32. data/docs/files/lib/sip/sipper_rb.html +101 -0
  33. data/docs/files/lib/sip/utils_rb.html +110 -0
  34. data/docs/files/lib/sip/version_rb.html +101 -0
  35. data/docs/files/lib/sip_rb.html +117 -0
  36. data/docs/fr_class_index.html +42 -0
  37. data/docs/fr_file_index.html +38 -0
  38. data/docs/fr_method_index.html +72 -0
  39. data/docs/index.html +24 -0
  40. data/docs/rdoc-style.css +208 -0
  41. data/lib/sip.rb +10 -0
  42. data/lib/sip/cmdopts.rb +20 -0
  43. data/lib/sip/config.rb +80 -0
  44. data/lib/sip/databases/dbbase.rb +56 -0
  45. data/lib/sip/databases/mysql.rb +52 -0
  46. data/lib/sip/exceptions.rb +9 -0
  47. data/lib/sip/extensions.rb +5 -0
  48. data/lib/sip/hive.rb +62 -0
  49. data/lib/sip/sipper.rb +118 -0
  50. data/lib/sip/templates/export.sh +73 -0
  51. data/lib/sip/utils.rb +58 -0
  52. data/lib/sip/version.rb +3 -0
  53. data/test/database_interaction_test.rb +7 -0
  54. data/test/hive_test.rb +28 -0
  55. data/test/sipper_test.rb +25 -0
  56. metadata +125 -0
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5
+
6
+ <!--
7
+
8
+ Sip - SQL to Hive importer
9
+
10
+ -->
11
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
12
+ <head>
13
+ <title>Sip - SQL to Hive importer</title>
14
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
15
+ </head>
16
+ <frameset rows="20%, 80%">
17
+ <frameset cols="25%,35%,45%">
18
+ <frame src="fr_file_index.html" title="Files" name="Files" />
19
+ <frame src="fr_class_index.html" name="Classes" />
20
+ <frame src="fr_method_index.html" name="Methods" />
21
+ </frameset>
22
+ <frame src="files/README_rdoc.html" name="docwin" />
23
+ </frameset>
24
+ </html>
@@ -0,0 +1,208 @@
1
+
2
+ body {
3
+ font-family: Verdana,Arial,Helvetica,sans-serif;
4
+ font-size: 90%;
5
+ margin: 0;
6
+ margin-left: 40px;
7
+ padding: 0;
8
+ background: white;
9
+ }
10
+
11
+ h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
12
+ h1 { font-size: 150%; }
13
+ h2,h3,h4 { margin-top: 1em; }
14
+
15
+ a { background: #eef; color: #039; text-decoration: none; }
16
+ a:hover { background: #039; color: #eef; }
17
+
18
+ /* Override the base stylesheet's Anchor inside a table cell */
19
+ td > a {
20
+ background: transparent;
21
+ color: #039;
22
+ text-decoration: none;
23
+ }
24
+
25
+ /* and inside a section title */
26
+ .section-title > a {
27
+ background: transparent;
28
+ color: #eee;
29
+ text-decoration: none;
30
+ }
31
+
32
+ /* === Structural elements =================================== */
33
+
34
+ div#index {
35
+ margin: 0;
36
+ margin-left: -40px;
37
+ padding: 0;
38
+ font-size: 90%;
39
+ }
40
+
41
+
42
+ div#index a {
43
+ margin-left: 0.7em;
44
+ }
45
+
46
+ div#index .section-bar {
47
+ margin-left: 0px;
48
+ padding-left: 0.7em;
49
+ background: #ccc;
50
+ font-size: small;
51
+ }
52
+
53
+
54
+ div#classHeader, div#fileHeader {
55
+ width: auto;
56
+ color: white;
57
+ padding: 0.5em 1.5em 0.5em 1.5em;
58
+ margin: 0;
59
+ margin-left: -40px;
60
+ border-bottom: 3px solid #006;
61
+ }
62
+
63
+ div#classHeader a, div#fileHeader a {
64
+ background: inherit;
65
+ color: white;
66
+ }
67
+
68
+ div#classHeader td, div#fileHeader td {
69
+ background: inherit;
70
+ color: white;
71
+ }
72
+
73
+
74
+ div#fileHeader {
75
+ background: #057;
76
+ }
77
+
78
+ div#classHeader {
79
+ background: #048;
80
+ }
81
+
82
+
83
+ .class-name-in-header {
84
+ font-size: 180%;
85
+ font-weight: bold;
86
+ }
87
+
88
+
89
+ div#bodyContent {
90
+ padding: 0 1.5em 0 1.5em;
91
+ }
92
+
93
+ div#description {
94
+ padding: 0.5em 1.5em;
95
+ background: #efefef;
96
+ border: 1px dotted #999;
97
+ }
98
+
99
+ div#description h1,h2,h3,h4,h5,h6 {
100
+ color: #125;;
101
+ background: transparent;
102
+ }
103
+
104
+ div#validator-badges {
105
+ text-align: center;
106
+ }
107
+ div#validator-badges img { border: 0; }
108
+
109
+ div#copyright {
110
+ color: #333;
111
+ background: #efefef;
112
+ font: 0.75em sans-serif;
113
+ margin-top: 5em;
114
+ margin-bottom: 0;
115
+ padding: 0.5em 2em;
116
+ }
117
+
118
+
119
+ /* === Classes =================================== */
120
+
121
+ table.header-table {
122
+ color: white;
123
+ font-size: small;
124
+ }
125
+
126
+ .type-note {
127
+ font-size: small;
128
+ color: #DEDEDE;
129
+ }
130
+
131
+ .xxsection-bar {
132
+ background: #eee;
133
+ color: #333;
134
+ padding: 3px;
135
+ }
136
+
137
+ .section-bar {
138
+ color: #333;
139
+ border-bottom: 1px solid #999;
140
+ margin-left: -20px;
141
+ }
142
+
143
+
144
+ .section-title {
145
+ background: #79a;
146
+ color: #eee;
147
+ padding: 3px;
148
+ margin-top: 2em;
149
+ margin-left: -30px;
150
+ border: 1px solid #999;
151
+ }
152
+
153
+ .top-aligned-row { vertical-align: top }
154
+ .bottom-aligned-row { vertical-align: bottom }
155
+
156
+ /* --- Context section classes ----------------------- */
157
+
158
+ .context-row { }
159
+ .context-item-name { font-family: monospace; font-weight: bold; color: black; }
160
+ .context-item-value { font-size: small; color: #448; }
161
+ .context-item-desc { color: #333; padding-left: 2em; }
162
+
163
+ /* --- Method classes -------------------------- */
164
+ .method-detail {
165
+ background: #efefef;
166
+ padding: 0;
167
+ margin-top: 0.5em;
168
+ margin-bottom: 1em;
169
+ border: 1px dotted #ccc;
170
+ }
171
+ .method-heading {
172
+ color: black;
173
+ background: #ccc;
174
+ border-bottom: 1px solid #666;
175
+ padding: 0.2em 0.5em 0 0.5em;
176
+ }
177
+ .method-signature { color: black; background: inherit; }
178
+ .method-name { font-weight: bold; }
179
+ .method-args { font-style: italic; }
180
+ .method-description { padding: 0 0.5em 0 0.5em; }
181
+
182
+ /* --- Source code sections -------------------- */
183
+
184
+ a.source-toggle { font-size: 90%; }
185
+ div.method-source-code {
186
+ background: #262626;
187
+ color: #ffdead;
188
+ margin: 1em;
189
+ padding: 0.5em;
190
+ border: 1px dashed #999;
191
+ overflow: hidden;
192
+ }
193
+
194
+ div.method-source-code pre { color: #ffdead; overflow: hidden; }
195
+
196
+ /* --- Ruby keyword styles --------------------- */
197
+
198
+ .standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
199
+
200
+ .ruby-constant { color: #7fffd4; background: transparent; }
201
+ .ruby-keyword { color: #00ffff; background: transparent; }
202
+ .ruby-ivar { color: #eedd82; background: transparent; }
203
+ .ruby-operator { color: #00ffee; background: transparent; }
204
+ .ruby-identifier { color: #ffdead; background: transparent; }
205
+ .ruby-node { color: #ffa07a; background: transparent; }
206
+ .ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
207
+ .ruby-regexp { color: #ffa07a; background: transparent; }
208
+ .ruby-value { color: #7fffd4; background: transparent; }
@@ -0,0 +1,10 @@
1
+ require 'sip/version'
2
+ require 'sip/utils'
3
+ require 'sip/exceptions'
4
+ require 'sip/extensions'
5
+ require 'sip/sipper'
6
+ require 'sip/databases/dbbase'
7
+ require 'sip/databases/mysql'
8
+ require 'sip/hive'
9
+ require 'sip/cmdopts'
10
+ require 'sip/config'
@@ -0,0 +1,20 @@
1
+ module Sip
2
+ class CmdOpts < Hash
3
+ def set(*k)
4
+ k.each { |key|
5
+ self[key] = nil
6
+ }
7
+ end
8
+
9
+ def to_s(prefix=nil, suffix=nil)
10
+ opts = map { |k,v|
11
+ if v.nil?
12
+ (k.length > 1) ? "--#{k}" : "-#{k}"
13
+ else
14
+ (k.length > 1) ? "--#{k}=#{v}" : "-#{k} #{v}"
15
+ end
16
+ }.join(" ")
17
+ [(prefix.nil? ? "" : prefix), opts, (suffix.nil? ? "" : suffix)].select { |v| v != "" }.join(" ")
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,80 @@
1
+ require 'yaml'
2
+
3
+ module Sip
4
+ class Config < Hash
5
+
6
+ DBCONF_DEFAULT = {
7
+ 'type' => 'mysql',
8
+ 'host' => 'localhost',
9
+ 'dbport' => nil
10
+ }
11
+
12
+ TABLECONF_DEFAULT = {
13
+ 'incremental_index' => 'id',
14
+ 'method' => 'append',
15
+ 'incremental_index_value' => 0,
16
+ 'partition_by' => nil,
17
+ 'columns' => nil
18
+ }
19
+
20
+ def self.load_file(location)
21
+ Config.new YAML.load_file(location)
22
+ end
23
+
24
+ def initialize(initial_values)
25
+ # temp_keys are ones we'll delete before saving to a file
26
+ @temp_keys = []
27
+ merge! initial_values
28
+
29
+ # initialize defaults, including setting dbname and tablename ease of use keys
30
+ self['databases'].each { |dbname, dbconf|
31
+ self['databases'][dbname] = DBCONF_DEFAULT.merge(dbconf)
32
+ self['databases'][dbname]['dbname'] = dbname
33
+ self['databases'][dbname]['tables'].each { |tablename, tableconf|
34
+ tableconf = {'hive_table_name' => "#{dbname}_#{tablename}"}.merge(TABLECONF_DEFAULT).merge(tableconf)
35
+ self['databases'][dbname]['tables'][tablename] = tableconf
36
+ self['databases'][dbname]['tables'][tablename]['tablename'] = tablename
37
+ }
38
+ }
39
+ end
40
+
41
+ def save_file(location)
42
+ # remove unecessary dbname and tablename keys
43
+ self['databases'].each { |dbname, dbconf|
44
+ dbconf.delete 'dbname'
45
+ dbconf['tables'].each { |tablename, tableconf|
46
+ tableconf.delete 'tablename'
47
+ }
48
+ }
49
+
50
+ File.open(location, 'w') { |f|
51
+ h = Hash.new.merge self
52
+ @temp_keys.each { |k| h.delete k }
53
+ YAML.dump(h, f)
54
+ }
55
+ end
56
+
57
+ def dbconf(dbname)
58
+ self['databases'][dbname]
59
+ end
60
+
61
+ def tconf(dbname, tablename)
62
+ dbconf(dbname)['tables'][tablename]
63
+ end
64
+
65
+ def store_database(dbname, conf)
66
+ self['databases'][dbname] = conf
67
+ end
68
+
69
+ def store_table(dbname, tablename, conf)
70
+ self['databases'][dbname]['tables'][tablename] = conf
71
+ end
72
+
73
+ def set_temp(other, keys)
74
+ keys.each { |k|
75
+ @temp_keys << k
76
+ self[k] = other[k]
77
+ }
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,56 @@
1
+ module Sip
2
+ class DBBase
3
+ attr_reader :args
4
+
5
+ def self.make_interface(type, args, sipper)
6
+ if type == 'mysql'
7
+ require 'sip/databases/mysql'
8
+ MySQLSipper.new args, sipper
9
+ else
10
+ raise UnsupportedDatabaseType, "DB type #{type.to_s} not supported."
11
+ end
12
+ end
13
+
14
+ def initialize(args, sipper)
15
+ @args = args
16
+ @sipper = sipper
17
+ end
18
+
19
+ def tables
20
+ query('SHOW tables')
21
+ end
22
+
23
+ def rowcount(table)
24
+ query('SELECT count(1) FROM #{table}').first.first.to_i
25
+ end
26
+
27
+ def get_column_max(tablename, field)
28
+ query("SELECT max(#{field}) FROM #{tablename}").first.first.to_i
29
+ end
30
+
31
+ def hive_columns(table)
32
+ columns(table).map { |name, type|
33
+ [name, convert_to_hive_type(type)]
34
+ }
35
+ end
36
+
37
+ def columns(table)
38
+ query("DESCRIBE #{table}").map { |col|
39
+ col.slice(0,2)
40
+ }
41
+ end
42
+
43
+ def order_column_list(table, cols)
44
+ columns(table).map { |k,v| k }.select { |c| cols.include? c }
45
+ end
46
+
47
+ def generate_command(tableconf, first=nil, last=nil)
48
+ select = "SELECT #{columns(tableconf['tablename']).map { |k,v| k }.join(',')} FROM #{tableconf['tablename']}"
49
+ wheres = []
50
+ wheres << "#{tableconf['incremental_index']} >= #{first}" if not first.nil?
51
+ wheres << "#{tableconf['incremental_index']} <= #{last}" if not last.nil?
52
+ select += " WHERE #{wheres.join(" AND ")}" if wheres.length > 0
53
+ cmd_line_execute_string select
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,52 @@
1
+ require "mysql"
2
+
3
+ module Sip
4
+ class MySQLSipper < DBBase
5
+ def initialize(args, sipper)
6
+ super(args, sipper)
7
+ @connection = Mysql::new @args['host'], @args['username'], @args['password'], @args['dbname'], @args['port']
8
+ end
9
+
10
+ def cmd_line_execute_string(select)
11
+ opts = CmdOpts.new
12
+ opts.set 'N', 'B', 'C', 'q'
13
+ opts['u'] = @args['username']
14
+ opts['password'] = @args['password']
15
+ opts['h'] = @args['host']
16
+ opts['e'] = "'#{select}'"
17
+ opts['P'] = @args['port']
18
+ path = `which mysql`
19
+ opts.to_s(path.strip, @args['dbname'])
20
+ end
21
+
22
+ def query(q)
23
+ @sipper.log "Running MySQL Query: #{q}"
24
+ c = @connection.query(q)
25
+ return nil if c.nil?
26
+ results = []
27
+ c.num_rows.times { results << c.fetch_row }
28
+ results
29
+ end
30
+
31
+ def convert_to_hive_type(typename)
32
+ case typename.split("(").first
33
+ when "tinyint" then "TINYINT"
34
+ when "smallint" then "MEDIUMINT"
35
+ when "mediumint" then "INT"
36
+ when "int" then "INT"
37
+ when "bigint" then "BIGINT"
38
+ when "decimal" then "FLOAT"
39
+ when "numeric" then "DOUBLE"
40
+ when "float" then "FLOAT"
41
+ when "real" then "DOUBLE"
42
+ when "double" then "DOUBLE"
43
+ when "boolean" then "BOOLEAN"
44
+ else "STRING"
45
+ end
46
+ end
47
+
48
+ def close
49
+ @connection.close
50
+ end
51
+ end
52
+ end