hipster_sql_to_hbase 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.document +4 -0
  3. data/Gemfile +5 -0
  4. data/LICENSE +20 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.md +39 -0
  7. data/README.rdoc +19 -0
  8. data/Rakefile +31 -0
  9. data/VERSION +1 -0
  10. data/hipster_sql_to_hbase.gemspec +55 -0
  11. data/lib/adapter/Hbase.thrift +914 -0
  12. data/lib/adapter/hbase.rb +59 -0
  13. data/lib/adapter/hbase/hbase.rb +2966 -0
  14. data/lib/adapter/hbase/hbase_constants.rb +14 -0
  15. data/lib/adapter/hbase/hbase_types.rb +282 -0
  16. data/lib/datatype_extras.rb +18 -0
  17. data/lib/executor.rb +91 -0
  18. data/lib/hipster_sql_to_hbase.rb +167 -0
  19. data/lib/result_tree_to_hbase_converter.rb +119 -0
  20. data/lib/result_tree_to_json_converter.rb +40 -0
  21. data/lib/sql_parser/sql.treetop +21 -0
  22. data/lib/sql_parser/sql_chars.treetop +5 -0
  23. data/lib/sql_parser/sql_create_table.treetop +47 -0
  24. data/lib/sql_parser/sql_datatypes.treetop +71 -0
  25. data/lib/sql_parser/sql_delete.treetop +64 -0
  26. data/lib/sql_parser/sql_drop_table.treetop +26 -0
  27. data/lib/sql_parser/sql_from_clause.treetop +12 -0
  28. data/lib/sql_parser/sql_group_by_clause.treetop +15 -0
  29. data/lib/sql_parser/sql_helpers.treetop +19 -0
  30. data/lib/sql_parser/sql_insert.treetop +118 -0
  31. data/lib/sql_parser/sql_key_value_pair.treetop +91 -0
  32. data/lib/sql_parser/sql_limit.treetop +7 -0
  33. data/lib/sql_parser/sql_order_by_clause.treetop +53 -0
  34. data/lib/sql_parser/sql_primitives.treetop +118 -0
  35. data/lib/sql_parser/sql_row_support.treetop +72 -0
  36. data/lib/sql_parser/sql_select.treetop +82 -0
  37. data/lib/sql_parser/sql_select_clause.treetop +17 -0
  38. data/lib/sql_parser/sql_show_tables.treetop +26 -0
  39. data/lib/sql_parser/sql_tokens.treetop +125 -0
  40. data/lib/sql_parser/sql_transaction.treetop +43 -0
  41. data/lib/sql_parser/sql_truncate.treetop +11 -0
  42. data/lib/sql_parser/sql_update.treetop +82 -0
  43. data/lib/sql_parser/sql_where_condition.treetop +46 -0
  44. data/lib/sql_treetop_load.rb +23 -0
  45. data/spec/hipster_sql_to_hbase_spec.rb +171 -0
  46. data/spec/spec_helper.rb +3 -0
  47. metadata +192 -0
@@ -0,0 +1,119 @@
1
+ require 'securerandom'
2
+ require 'thrift'
3
+ require_relative "executor"
4
+
5
+ require File.join(File.dirname(__FILE__), 'adapter', 'hbase')
6
+
7
+ module HipsterSqlToHbase
8
+
9
+ # This class provides the method necessary to execute the Thrift result
10
+ # generated after parsing the SQL sentence.
11
+ class ThriftCallGroup < Array
12
+ @incr = false
13
+ def initialize(arr,incr=false)
14
+ arr.each do |v|
15
+ self << v
16
+ end
17
+ @incr = incr
18
+ end
19
+ def execute(host=nil,port=nil)
20
+ HipsterSqlToHbase::Executor.new().execute(self,host,port,@incr)
21
+ end
22
+ end
23
+
24
+ # This class takes care of all HBase (Thrift) conversion magic by transforming
25
+ # the ResultTree objects into ThriftCallGroup objects.
26
+ class ResultTreeToHbaseConverter
27
+
28
+ # Depending on the SQL sentence type, call the appropriate function.
29
+ def convert(result_tree)
30
+ send("#{result_tree[:query_type].to_s}_sentence",result_tree[:query_hash])
31
+ end
32
+
33
+ # When SQL sentence is an INSERT query generate the Thrift mutations according
34
+ # to the specified query values.
35
+ def insert_sentence(hash)
36
+ thrift_method = "mutateRow"
37
+ thrift_table = hash[:into]
38
+ thrift_calls = []
39
+ hash[:values].each do |value_set|
40
+ thrift_row = SecureRandom.uuid
41
+ thrift_mutations = []
42
+ i = 0
43
+ hash[:columns].each do |col|
44
+ thrift_mutations << HBase::Mutation.new(column: col, value: value_set[i].to_s)
45
+ i += 1
46
+ end
47
+ thrift_calls << {:method => thrift_method,:arguments => [thrift_table,thrift_row,thrift_mutations,{}]}
48
+ end
49
+ HipsterSqlToHbase::ThriftCallGroup.new(thrift_calls,true)
50
+ end
51
+
52
+ # When SQL sentence is a SELECT query generate the Thrift filters according
53
+ # to the specified query values.
54
+ def select_sentence(hash)
55
+ thrift_method = "getRowsByScanner"
56
+ thrift_table = hash[:from]
57
+ thrift_columns = hash[:select]
58
+ thrift_filters = recurse_where(hash[:where] || [])
59
+
60
+ HipsterSqlToHbase::ThriftCallGroup.new([{:method => thrift_method,:arguments => [thrift_table,thrift_columns,thrift_filters,{}]}])
61
+ end
62
+
63
+ # When SQL sentence is a CREATE TABLE query generate the Thrift column descriptors/families
64
+ # in accordance to the specified query values.
65
+ def create_table_sentence(hash)
66
+ thrift_method = "createTable"
67
+ thrift_table = hash[:table]
68
+ thrift_columns = []
69
+ hash[:columns].each do |col_name|
70
+ col_descriptor = Hbase::ColumnDescriptor.new
71
+ col_descriptor.name = col_name
72
+ thrift_columns << col_descriptor
73
+ end
74
+
75
+ HipsterSqlToHbase::ThriftCallGroup.new([{:method => thrift_method,:arguments => [thrift_table,thrift_columns]}])
76
+ end
77
+
78
+ private
79
+
80
+ # Format the scanner filter for thrift based on the where clause(s)
81
+ # of a SELECT query.
82
+ def recurse_where(where_arr)
83
+ result_arr = []
84
+ where_arr.each do |val|
85
+ if val.is_a? Hash
86
+ result_arr << filters_from_key_value_pair(val)
87
+ elsif val.is_a? Array
88
+ result_arr << "(#{recurse_where(val)})"
89
+ elsif val.is_a? String
90
+ result_arr << val
91
+ else
92
+ raise "Recursive where undefined error."
93
+ end
94
+ end
95
+ result_arr.join(" ")
96
+ end
97
+
98
+ # Generate a Thrift QualifierFilter and ValueFilter from key value pair.
99
+ def filters_from_key_value_pair(kvp)
100
+ if (kvp[:condition].to_s != "LIKE")
101
+ "(ValueFilter(#{kvp[:condition]},'binary:#{kvp[:value]}') AND DependentColumnFilter('#{kvp[:column]}',''))"
102
+ else
103
+ kvp[:value] = Regexp.escape(kvp[:value])
104
+ kvp[:value].sub!(/^%/,"^.*")
105
+ kvp[:value].sub!(/%$/,".*$")
106
+ while kvp[:value].match(/([^\\]{1,1})%/)
107
+ kvp[:value].sub!(/([^\\]{1,1})%/,"#{$1}.*?")
108
+ end
109
+ kvp[:value].sub!(/^_/,"^.")
110
+ kvp[:value].sub!(/_$/,".$")
111
+ while kvp[:value].match(/([^\\]{1,1})_/)
112
+ kvp[:value].sub!(/([^\\]{1,1})_/,"#{$1}.")
113
+ end
114
+ "(ValueFilter(=,'regexstring:#{kvp[:value]}') AND DependentColumnFilter('#{kvp[:column]}',''))"
115
+ end
116
+ end
117
+
118
+ end
119
+ end
@@ -0,0 +1,40 @@
1
+ require 'json'
2
+
3
+ require File.join(File.dirname(__FILE__), 'adapter', 'hbase')
4
+
5
+ module HipsterSqlToHbase
6
+ class ResultTreeToJsonConverter
7
+ def convert(result_tree)
8
+ send("#{result_tree[:query_type].to_s}_sentence",result_tree[:query_hash])
9
+ end
10
+ def insert_sentence(hash)
11
+ table = hash[:into]
12
+ objects = []
13
+ hash[:values].each do |value_set|
14
+ object = {}
15
+ i = 0
16
+ hash[:columns].each do |col|
17
+ object[col.to_sym] = value_set[i]
18
+ i += 1
19
+ end
20
+ objects << object
21
+ end
22
+ JSON.generate({:write=>{:table=>table,:objects=>objects}})
23
+ end
24
+ def select_sentence
25
+
26
+ end
27
+ def create_table_sentence
28
+
29
+ end
30
+
31
+ private
32
+ def qualifier_filters_from_cols(cols)
33
+
34
+ end
35
+ def value_filters_from_vals(vals)
36
+
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,21 @@
1
+ grammar SQL
2
+ include SQLSelect
3
+ include SQLDelete
4
+ include SQLTruncate
5
+ include DropTable #TODO: Fix my name
6
+ include SQLTransaction
7
+ include SQLInsert
8
+ include SQLShowTables
9
+ include SQLCreateTable
10
+
11
+ rule sql_statement
12
+ select_expression /
13
+ delete /
14
+ truncate /
15
+ drop_table /
16
+ transaction_statement /
17
+ insert /
18
+ show_tables /
19
+ create_table
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ grammar SQLChars
2
+ rule sql_embeded_language_char
3
+ "[" / "]"
4
+ end
5
+ end
@@ -0,0 +1,47 @@
1
+ grammar SQLCreateTable
2
+ include SQLRowSupport
3
+ include SQLDataTypes
4
+
5
+ rule create_table
6
+ "CREATE" SPACE+ "TABLE" SPACE+ table_name SPACE+ OPEN_PARENS columns_and_datatypes CLOSE_PARENS {
7
+ def eval
8
+ options = {
9
+ :columns => columns_and_datatypes.eval,
10
+ :table_name => table_name.eval
11
+ }
12
+
13
+ options
14
+ end
15
+ def query_type
16
+ :create_table
17
+ end
18
+ def tree
19
+ values = eval
20
+ {
21
+ :table => values[:table_name],
22
+ :columns => values[:columns]
23
+ }
24
+ end
25
+ }
26
+ end
27
+
28
+ rule columns_and_datatypes
29
+ column_with_datatype COMMA columns_and_datatypes {
30
+ def eval
31
+ all = column_with_datatype.eval + columns_and_datatypes.eval
32
+ all.flatten!
33
+ all
34
+ end
35
+ }
36
+ /
37
+ column_with_datatype
38
+ end
39
+
40
+ rule column_with_datatype
41
+ SPACE* column_name SPACE+ datatype SPACE* {
42
+ def eval
43
+ [column_name.eval]
44
+ end
45
+ }
46
+ end
47
+ end
@@ -0,0 +1,71 @@
1
+ grammar SQLDataTypes
2
+ include SQLPrimitives
3
+
4
+ rule datatype
5
+ bit_field /
6
+ numeric_type /
7
+ char_type
8
+ end
9
+
10
+ rule char_type
11
+ char_field field_size_with_space?
12
+ end
13
+
14
+ rule char_field
15
+ VARCHAR_KEYWORD
16
+ end
17
+
18
+ rule numeric_type
19
+ int_type / decimal_type
20
+ end
21
+
22
+ rule decimal_type
23
+ decimal_field field_size_with_space? unsigned? zerofill?
24
+ end
25
+
26
+ rule decimal_field
27
+ REAL_KEYWORD /
28
+ DOUBLE_KEYWORD /
29
+ FLOAT_KEYWORD /
30
+ DECIMAL_KEYWORD /
31
+ NUMERIC_KEYWORD
32
+ end
33
+
34
+ rule int_type
35
+ int_field int_options
36
+ end
37
+
38
+ rule int_field
39
+ TINY_INT_KEYWORD /
40
+ SMALL_INT_KEYWORD /
41
+ INTEGER_KEYWORD /
42
+ INT_KEYWORD /
43
+ BIG_INT_KEYWORD
44
+ end
45
+
46
+ rule int_options
47
+ field_size_with_space? unsigned? zerofill?
48
+ end
49
+
50
+ rule bit_field
51
+ "BIT" field_size?
52
+ end
53
+
54
+ rule unsigned
55
+ SPACE UNSIGNED_KEYWORD
56
+ end
57
+
58
+ rule zerofill
59
+ SPACE ZEROFILL_KEYWORD
60
+ end
61
+
62
+ rule field_size_with_space
63
+ SPACE* field_size
64
+ end
65
+
66
+ rule field_size
67
+ OPEN_PARENS SPACE* integer SPACE* CLOSE_PARENS /
68
+ OPEN_PARENS SPACE* integer SPACE* COMMA SPACE* integer SPACE* CLOSE_PARENS
69
+ end
70
+ end
71
+
@@ -0,0 +1,64 @@
1
+ grammar SQLDelete
2
+ include SQLRowSupport
3
+ include SQLWhereCondition
4
+ include SQLOrderByClause
5
+ include SQLLimit
6
+ include SQLHelpers
7
+
8
+ rule delete
9
+ single_table_delete
10
+ end
11
+
12
+ rule single_table_delete
13
+ common_delete_clause
14
+ table_name
15
+ where_condition_or_empty
16
+ order_by_condition_or_empty
17
+ limit_condition_or_empty {
18
+ def eval
19
+ DeleteStatement.new(
20
+ table_name.eval,
21
+ where_condition_or_empty.eval,
22
+ order_by_condition_or_empty.eval,
23
+ limit_condition_or_empty.eval
24
+ )
25
+ end
26
+ }
27
+ end
28
+
29
+ rule common_delete_clause
30
+ "DELETE" SPACE
31
+ optional_delete_directives
32
+ "FROM" SPACE
33
+ end
34
+
35
+ rule optional_delete_directives
36
+ optional_low_priority
37
+ optional_quick
38
+ optional_ignore
39
+ end
40
+
41
+ rule optional_low_priority
42
+ low_priority / EMPTY_STRING
43
+ end
44
+
45
+ rule optional_quick
46
+ quick / EMPTY_STRING
47
+ end
48
+
49
+ rule optional_ignore
50
+ ignore / EMPTY_STRING
51
+ end
52
+
53
+ rule ignore
54
+ "IGNORE" SPACE
55
+ end
56
+
57
+ rule quick
58
+ "QUICK" SPACE
59
+ end
60
+
61
+ rule low_priority
62
+ "LOW_PRIORITY" SPACE
63
+ end
64
+ end
@@ -0,0 +1,26 @@
1
+ grammar DropTable
2
+ include SQLPrimitives
3
+ include SQLRowSupport
4
+
5
+ rule drop_table
6
+ "DROP" (temporary)? SPACE "TABLE" SPACE
7
+ table_name
8
+ (restrict / cascade)? {
9
+ def eval
10
+ DataStore.drop_table(table_name.eval.to_sym)
11
+ end
12
+ }
13
+ end
14
+
15
+ rule temporary
16
+ SPACE "TEMPORARY"
17
+ end
18
+
19
+ rule restrict
20
+ SPACE "RESTRICT"
21
+ end
22
+
23
+ rule cascade
24
+ SPACE "CASCADE"
25
+ end
26
+ end
@@ -0,0 +1,12 @@
1
+ grammar SQLFromClause
2
+ include SQLPrimitives
3
+ include SQLRowSupport
4
+
5
+ rule from
6
+ "FROM" SPACE one_or_more_table_names {
7
+ def tables
8
+ one_or_more_table_names.eval
9
+ end
10
+ }
11
+ end
12
+ end
@@ -0,0 +1,15 @@
1
+ grammar SQLGroupByClause
2
+ include SQLPrimitives
3
+ include SQLRowSupport
4
+ include SQLHelpers
5
+
6
+ rule group_by
7
+ "GROUP BY " one_or_more_column_names {
8
+ def eval
9
+ columns = one_or_more_column_names.eval
10
+ columns_as_syms = columns.map { |column| column.to_sym }
11
+ GroupBy.new(*columns_as_syms)
12
+ end
13
+ }
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ grammar SQLHelpers
2
+ rule where_condition_or_empty
3
+ SPACE where_condition {
4
+ def eval
5
+ where_condition.eval
6
+ end
7
+ }
8
+ /
9
+ EMPTY_STRING
10
+ end
11
+
12
+ rule limit_condition_or_empty
13
+ SPACE limit { def eval; limit.eval; end } / EMPTY_STRING
14
+ end
15
+
16
+ rule order_by_condition_or_empty
17
+ SPACE order_by { def eval; order_by.eval; end } / EMPTY_STRING
18
+ end
19
+ end