sequel_impala 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +50 -0
  3. data/LICENSE +463 -0
  4. data/README.md +45 -0
  5. data/Rakefile +39 -0
  6. data/lib/driver/commons-collections-3.2.1.jar +0 -0
  7. data/lib/driver/commons-configuration-1.10.jar +0 -0
  8. data/lib/driver/commons-logging-1.2.jar +0 -0
  9. data/lib/driver/hadoop-auth-2.9.0.jar +0 -0
  10. data/lib/driver/hadoop-common-2.9.0.jar +0 -0
  11. data/lib/driver/hadoop-core-2.6.0.jar +0 -0
  12. data/lib/driver/hive-exec-1.1.0.jar +0 -0
  13. data/lib/driver/hive-jdbc-1.1.0.jar +0 -0
  14. data/lib/driver/hive-metastore-1.1.0.jar +0 -0
  15. data/lib/driver/hive-service-1.1.0.jar +0 -0
  16. data/lib/driver/httpclient-4.3.jar +0 -0
  17. data/lib/driver/httpcore-4.3.jar +0 -0
  18. data/lib/driver/libfb303-0.9.0.jar +0 -0
  19. data/lib/driver/log4j-1.2.17.jar +0 -0
  20. data/lib/driver/slf4j-api-1.7.5.jar +0 -0
  21. data/lib/driver/stax2-api-3.1.4.jar +0 -0
  22. data/lib/driver/woodstox-core-asl-4.4.1.jar +0 -0
  23. data/lib/impala.rb +55 -0
  24. data/lib/impala/connection.rb +180 -0
  25. data/lib/impala/cursor.rb +200 -0
  26. data/lib/impala/progress_reporter.rb +40 -0
  27. data/lib/impala/protocol.rb +8 -0
  28. data/lib/impala/protocol/beeswax_constants.rb +15 -0
  29. data/lib/impala/protocol/beeswax_service.rb +747 -0
  30. data/lib/impala/protocol/beeswax_types.rb +193 -0
  31. data/lib/impala/protocol/exec_stats_constants.rb +13 -0
  32. data/lib/impala/protocol/exec_stats_types.rb +133 -0
  33. data/lib/impala/protocol/facebook_service.rb +706 -0
  34. data/lib/impala/protocol/fb303_constants.rb +15 -0
  35. data/lib/impala/protocol/fb303_types.rb +25 -0
  36. data/lib/impala/protocol/hive_metastore_constants.rb +53 -0
  37. data/lib/impala/protocol/hive_metastore_types.rb +698 -0
  38. data/lib/impala/protocol/impala_hive_server2_service.rb +137 -0
  39. data/lib/impala/protocol/impala_service.rb +443 -0
  40. data/lib/impala/protocol/impala_service_constants.rb +13 -0
  41. data/lib/impala/protocol/impala_service_types.rb +192 -0
  42. data/lib/impala/protocol/status_constants.rb +13 -0
  43. data/lib/impala/protocol/status_types.rb +46 -0
  44. data/lib/impala/protocol/t_c_l_i_service.rb +1108 -0
  45. data/lib/impala/protocol/t_c_l_i_service_constants.rb +72 -0
  46. data/lib/impala/protocol/t_c_l_i_service_types.rb +1802 -0
  47. data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
  48. data/lib/impala/protocol/types_constants.rb +13 -0
  49. data/lib/impala/protocol/types_types.rb +332 -0
  50. data/lib/impala/sasl_transport.rb +117 -0
  51. data/lib/impala/thrift_patch.rb +31 -0
  52. data/lib/impala/version.rb +3 -0
  53. data/lib/jdbc/hive2.rb +52 -0
  54. data/lib/jdbc/impala.rb +50 -0
  55. data/lib/rbhive.rb +8 -0
  56. data/lib/rbhive/connection.rb +150 -0
  57. data/lib/rbhive/explain_result.rb +46 -0
  58. data/lib/rbhive/result_set.rb +37 -0
  59. data/lib/rbhive/schema_definition.rb +86 -0
  60. data/lib/rbhive/t_c_l_i_connection.rb +466 -0
  61. data/lib/rbhive/t_c_l_i_result_set.rb +3 -0
  62. data/lib/rbhive/t_c_l_i_schema_definition.rb +87 -0
  63. data/lib/rbhive/table_schema.rb +122 -0
  64. data/lib/rbhive/version.rb +3 -0
  65. data/lib/sequel/adapters/impala.rb +220 -0
  66. data/lib/sequel/adapters/jdbc/hive2.rb +36 -0
  67. data/lib/sequel/adapters/jdbc/impala.rb +38 -0
  68. data/lib/sequel/adapters/rbhive.rb +177 -0
  69. data/lib/sequel/adapters/shared/impala.rb +808 -0
  70. data/lib/sequel/extensions/csv_to_parquet.rb +166 -0
  71. data/lib/thrift/facebook_service.rb +700 -0
  72. data/lib/thrift/fb303_constants.rb +9 -0
  73. data/lib/thrift/fb303_types.rb +19 -0
  74. data/lib/thrift/hive_metastore_constants.rb +41 -0
  75. data/lib/thrift/hive_metastore_types.rb +630 -0
  76. data/lib/thrift/hive_service_constants.rb +13 -0
  77. data/lib/thrift/hive_service_types.rb +72 -0
  78. data/lib/thrift/queryplan_constants.rb +13 -0
  79. data/lib/thrift/queryplan_types.rb +261 -0
  80. data/lib/thrift/sasl_client_transport.rb +161 -0
  81. data/lib/thrift/serde_constants.rb +92 -0
  82. data/lib/thrift/serde_types.rb +7 -0
  83. data/lib/thrift/t_c_l_i_service.rb +1054 -0
  84. data/lib/thrift/t_c_l_i_service_constants.rb +72 -0
  85. data/lib/thrift/t_c_l_i_service_types.rb +1768 -0
  86. data/lib/thrift/thrift_hive.rb +508 -0
  87. data/lib/thrift/thrift_hive_metastore.rb +3856 -0
  88. data/spec/database_test.rb +56 -0
  89. data/spec/dataset_test.rb +1268 -0
  90. data/spec/files/bad_down_migration/001_create_alt_basic.rb +4 -0
  91. data/spec/files/bad_down_migration/002_create_alt_advanced.rb +4 -0
  92. data/spec/files/bad_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  93. data/spec/files/bad_timestamped_migrations/1273253851_create_nodes.rb +9 -0
  94. data/spec/files/bad_timestamped_migrations/1273253853_3_create_users.rb +3 -0
  95. data/spec/files/bad_up_migration/001_create_alt_basic.rb +4 -0
  96. data/spec/files/bad_up_migration/002_create_alt_advanced.rb +3 -0
  97. data/spec/files/convert_to_timestamp_migrations/001_create_sessions.rb +9 -0
  98. data/spec/files/convert_to_timestamp_migrations/002_create_nodes.rb +9 -0
  99. data/spec/files/convert_to_timestamp_migrations/003_3_create_users.rb +4 -0
  100. data/spec/files/convert_to_timestamp_migrations/1273253850_create_artists.rb +9 -0
  101. data/spec/files/convert_to_timestamp_migrations/1273253852_create_albums.rb +9 -0
  102. data/spec/files/duplicate_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  103. data/spec/files/duplicate_timestamped_migrations/1273253853_create_nodes.rb +9 -0
  104. data/spec/files/duplicate_timestamped_migrations/1273253853_create_users.rb +4 -0
  105. data/spec/files/integer_migrations/001_create_sessions.rb +9 -0
  106. data/spec/files/integer_migrations/002_create_nodes.rb +9 -0
  107. data/spec/files/integer_migrations/003_3_create_users.rb +4 -0
  108. data/spec/files/interleaved_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  109. data/spec/files/interleaved_timestamped_migrations/1273253850_create_artists.rb +9 -0
  110. data/spec/files/interleaved_timestamped_migrations/1273253851_create_nodes.rb +9 -0
  111. data/spec/files/interleaved_timestamped_migrations/1273253852_create_albums.rb +9 -0
  112. data/spec/files/interleaved_timestamped_migrations/1273253853_3_create_users.rb +4 -0
  113. data/spec/files/reversible_migrations/001_reversible.rb +5 -0
  114. data/spec/files/reversible_migrations/002_reversible.rb +5 -0
  115. data/spec/files/reversible_migrations/003_reversible.rb +5 -0
  116. data/spec/files/reversible_migrations/004_reversible.rb +5 -0
  117. data/spec/files/reversible_migrations/005_reversible.rb +10 -0
  118. data/spec/files/timestamped_migrations/1273253849_create_sessions.rb +9 -0
  119. data/spec/files/timestamped_migrations/1273253851_create_nodes.rb +9 -0
  120. data/spec/files/timestamped_migrations/1273253853_3_create_users.rb +4 -0
  121. data/spec/impala_test.rb +290 -0
  122. data/spec/migrator_test.rb +240 -0
  123. data/spec/plugin_test.rb +91 -0
  124. data/spec/prepared_statement_test.rb +327 -0
  125. data/spec/schema_test.rb +356 -0
  126. data/spec/spec_helper.rb +19 -0
  127. data/spec/timezone_test.rb +86 -0
  128. data/spec/type_test.rb +99 -0
  129. metadata +294 -0
@@ -0,0 +1,31 @@
1
+ require 'socket'
2
+
3
+ module Thrift
4
+ module KeepAlive
5
+ # We'll override #open so that once the socket is opened
6
+ # we enable keepalive on it
7
+ #
8
+ # Many queries are going to take a long time (10s of minutes) to complete
9
+ # and we don't want the connection to close while we wait for the
10
+ # query to return.
11
+ #
12
+ # Unfortunately, Thrift doesn't supply an easy way to get to the
13
+ # socket that it opens to communicate with Impala.
14
+ #
15
+ # I figured that while I was in here, monkey-patching a way to get
16
+ # to the socket, I might as well just enable keepalive here
17
+ # instead.
18
+ def open
19
+ super
20
+ yield @transport if block_given?
21
+ end
22
+ end
23
+
24
+ class BufferedTransport
25
+ prepend KeepAlive
26
+ end
27
+
28
+ class ImpalaSaslClientTransport
29
+ prepend KeepAlive
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ module Impala
2
+ VERSION = "0.4.3"
3
+ end
@@ -0,0 +1,52 @@
1
+ warn 'jdbc-hive2 is only for use with JRuby' if (JRUBY_VERSION.nil? rescue true)
2
+
3
+ module Jdbc
4
+ module Hive2
5
+ DRIVER_VERSION = '1.1.0'
6
+ VERSION = DRIVER_VERSION + '.0'
7
+
8
+ def self.driver_jar
9
+ %W(
10
+ driver/libfb303-0.9.0.jar
11
+ driver/slf4j-api-1.7.5.jar
12
+ driver/hadoop-common-2.9.0.jar
13
+ driver/hadoop-auth-2.9.0.jar
14
+ driver/hadoop-core-2.6.0.jar
15
+ driver/commons-configuration-1.10.jar
16
+ driver/commons-collections-3.2.1.jar
17
+ driver/commons-logging-1.2.jar
18
+ driver/hive-exec-1.1.0.jar
19
+ driver/hive-jdbc-1.1.0.jar
20
+ driver/hive-metastore-1.1.0.jar
21
+ driver/hive-service-1.1.0.jar
22
+ driver/httpcore-4.3.jar
23
+ driver/httpclient-4.3.jar
24
+ driver/log4j-1.2.17.jar
25
+ driver/woodstox-core-asl-4.4.1.jar
26
+ driver/stax2-api-3.1.4.jar
27
+ )
28
+ end
29
+
30
+ def self.load_driver(method = :load)
31
+ # case version
32
+ # when 11
33
+ # when 12
34
+ # when :cdh5
35
+ # else # 11
36
+ # end
37
+ driver_jar.each do |jar|
38
+ send method, jar
39
+ end
40
+ end
41
+
42
+ def self.driver_name
43
+ 'org.apache.hive.jdbc.HiveDriver'
44
+ end
45
+
46
+ if defined?(JRUBY_VERSION) && # enable backwards-compat behavior
47
+ (Java::JavaLang::Boolean.get_boolean('jdbc.driver.autoload'))
48
+ warn "autoloading jdbc driver on require 'jdbc/hive2'" if $VERBOSE
49
+ load_driver :require
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,50 @@
1
+ warn 'jdbc-impala is only for use with JRuby' if (JRUBY_VERSION.nil? rescue true)
2
+
3
+ module Jdbc
4
+ module Impala
5
+ DRIVER_VERSION = '2.5.41.1061'
6
+ VERSION = DRIVER_VERSION
7
+ JAR_ROOT = ENV['IMPALA_JDBC_JARS']
8
+ unless JAR_ROOT && File.directory?(JAR_ROOT)
9
+ warn "must specify IMPALA_JDBC_JARS environment variable for directory containing necessary jar files for Impala JDBC 4.1 driver version #{VERSION}"
10
+ raise LoadError, "cannot load such file -- jdbc/impala"
11
+ end
12
+
13
+ def self.driver_jar
14
+ %W(
15
+ ImpalaJDBC41.jar
16
+ TCLIServiceClient.jar
17
+ commons-codec-1.3.jar
18
+ commons-logging-1.1.1.jar
19
+ hive_metastore.jar
20
+ hive_service.jar
21
+ httpclient-4.1.3.jar
22
+ httpcore-4.1.3.jar
23
+ libfb303-0.9.0.jar
24
+ libthrift-0.9.0.jar
25
+ log4j-1.2.14.jar
26
+ ql.jar
27
+ slf4j-api-1.5.11.jar
28
+ slf4j-log4j12-1.5.11.jar
29
+ zookeeper-3.4.6.jar
30
+ ).map{|f| File.join(JAR_ROOT, f)}
31
+ end
32
+
33
+ def self.load_driver(method = :load)
34
+ driver_jar.each do |jar|
35
+ send method, jar
36
+ end
37
+ end
38
+
39
+ def self.driver_name
40
+ 'com.cloudera.impala.jdbc41.Driver'
41
+ end
42
+
43
+ if defined?(JRUBY_VERSION) && # enable backwards-compat behavior
44
+ (Java::JavaLang::Boolean.get_boolean('jdbc.driver.autoload'))
45
+ warn "autoloading jdbc driver on require 'jdbc/impala'" if $VERBOSE
46
+ load_driver :require
47
+ end
48
+ end
49
+ end
50
+
@@ -0,0 +1,8 @@
1
+ require File.join(File.dirname(__FILE__), 'rbhive', 'connection')
2
+ require File.join(File.dirname(__FILE__), 'rbhive', 'table_schema')
3
+ require File.join(File.dirname(__FILE__), 'rbhive', 'result_set')
4
+ require File.join(File.dirname(__FILE__), 'rbhive', 'explain_result')
5
+ require File.join(File.dirname(__FILE__), 'rbhive', 'schema_definition')
6
+ require File.join(File.dirname(__FILE__), *%w[rbhive t_c_l_i_result_set])
7
+ require File.join(File.dirname(__FILE__), *%w[rbhive t_c_l_i_schema_definition])
8
+ require File.join(File.dirname(__FILE__), *%w[rbhive t_c_l_i_connection])
@@ -0,0 +1,150 @@
1
+ # suppress warnings
2
+ old_verbose, $VERBOSE = $VERBOSE, nil
3
+ # require thrift autogenerated files
4
+ require File.join(File.split(File.dirname(__FILE__)).first, *%w[thrift thrift_hive])
5
+ # require 'thrift'
6
+ # restore warnings
7
+ $VERBOSE = old_verbose
8
+
9
+ module RBHive
10
+ def connect(server, port=10_000)
11
+ connection = RBHive::Connection.new(server, port)
12
+ ret = nil
13
+ begin
14
+ connection.open
15
+ ret = yield(connection)
16
+ ensure
17
+ connection.close
18
+ ret
19
+ end
20
+ end
21
+ module_function :connect
22
+
23
+ class StdOutLogger
24
+ %w(fatal error warn info debug).each do |level|
25
+ define_method level.to_sym do |message|
26
+ STDOUT.puts(message)
27
+ end
28
+ end
29
+ end
30
+
31
+ class Connection
32
+ attr_reader :client
33
+
34
+ def initialize(server, port=10_000, logger=StdOutLogger.new)
35
+ @socket = Thrift::Socket.new(server, port)
36
+ @transport = Thrift::BufferedTransport.new(@socket)
37
+ @protocol = Thrift::BinaryProtocol.new(@transport)
38
+ @client = Hive::Thrift::ThriftHive::Client.new(@protocol)
39
+ @logger = logger
40
+ @logger.info("Connecting to #{server} on port #{port}")
41
+ @mutex = Mutex.new
42
+ end
43
+
44
+ def open
45
+ @transport.open
46
+ end
47
+
48
+ def close
49
+ @transport.close
50
+ end
51
+
52
+ def client
53
+ @client
54
+ end
55
+
56
+ def execute(query)
57
+ execute_safe(query)
58
+ end
59
+
60
+ def explain(query)
61
+ safe do
62
+ execute_unsafe("EXPLAIN "+ query)
63
+ ExplainResult.new(client.fetchAll)
64
+ end
65
+ end
66
+
67
+ def priority=(priority)
68
+ set("mapred.job.priority", priority)
69
+ end
70
+
71
+ def queue=(queue)
72
+ set("mapred.job.queue.name", queue)
73
+ end
74
+
75
+ def set(name,value)
76
+ @logger.info("Setting #{name}=#{value}")
77
+ client.execute("SET #{name}=#{value}")
78
+ end
79
+
80
+ def fetch(query)
81
+ safe do
82
+ execute_unsafe(query)
83
+ rows = client.fetchAll
84
+ the_schema = SchemaDefinition.new(client.getSchema, rows.first)
85
+ ResultSet.new(rows, the_schema)
86
+ end
87
+ end
88
+
89
+ def fetch_in_batch(query, batch_size=1_000)
90
+ safe do
91
+ execute_unsafe(query)
92
+ until (next_batch = client.fetchN(batch_size)).empty?
93
+ the_schema ||= SchemaDefinition.new(client.getSchema, next_batch.first)
94
+ yield ResultSet.new(next_batch, the_schema)
95
+ end
96
+ end
97
+ end
98
+
99
+ def first(query)
100
+ safe do
101
+ execute_unsafe(query)
102
+ row = client.fetchOne
103
+ the_schema = SchemaDefinition.new(client.getSchema, row)
104
+ ResultSet.new([row], the_schema).first
105
+ end
106
+ end
107
+
108
+ def schema(example_row=[])
109
+ safe { SchemaDefinition.new(client.getSchema, example_row) }
110
+ end
111
+
112
+ def create_table(schema)
113
+ execute(schema.create_table_statement)
114
+ end
115
+
116
+ def drop_table(name)
117
+ name = name.name if name.is_a?(TableSchema)
118
+ execute("DROP TABLE `#{name}`")
119
+ end
120
+
121
+ def replace_columns(schema)
122
+ execute(schema.replace_columns_statement)
123
+ end
124
+
125
+ def add_columns(schema)
126
+ execute(schema.add_columns_statement)
127
+ end
128
+
129
+ def method_missing(meth, *args)
130
+ client.send(meth, *args)
131
+ end
132
+
133
+ private
134
+
135
+ def execute_safe(query)
136
+ safe { execute_unsafe(query) }
137
+ end
138
+
139
+ def execute_unsafe(query)
140
+ @logger.info("Executing Hive Query: #{query}")
141
+ client.execute(query)
142
+ end
143
+
144
+ def safe
145
+ ret = nil
146
+ @mutex.synchronize { ret = yield }
147
+ ret
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,46 @@
1
+ class ExplainResult
2
+ def initialize(rows)
3
+ @rows = rows
4
+ end
5
+
6
+ def ast
7
+ by_section[:abstract_syntax_tree].first
8
+ end
9
+
10
+ def stage_count
11
+ stage_dependencies.length
12
+ end
13
+
14
+ def stage_dependencies
15
+ by_section[:stage_dependencies] || []
16
+ end
17
+
18
+ def to_tsv
19
+ @rows.join("\n")
20
+ end
21
+
22
+ def raw
23
+ @rows
24
+ end
25
+
26
+ def to_s
27
+ to_tsv
28
+ end
29
+
30
+ private
31
+
32
+ def by_section
33
+ current_section = nil
34
+ @rows.inject({}) do |sections, row|
35
+ if row.match(/^[A-Z]/)
36
+ current_section = row.chomp(':').downcase.gsub(' ', '_').to_sym
37
+ sections[current_section] = []
38
+ elsif row.length == 0
39
+ next sections
40
+ else
41
+ sections[current_section] << row.strip
42
+ end
43
+ sections
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,37 @@
1
+ module RBHive
2
+ class ResultSet < Array
3
+ def initialize(rows, schema)
4
+ @schema = schema
5
+ super(rows.map {|r| @schema.coerce_row(r) })
6
+ end
7
+
8
+ def column_names
9
+ @schema.column_names
10
+ end
11
+
12
+ def column_type_map
13
+ @schema.column_type_map
14
+ end
15
+
16
+ def to_csv(out_file=nil)
17
+ to_separated_output(",", out_file)
18
+ end
19
+
20
+ def to_tsv(out_file=nil)
21
+ to_separated_output("\t", out_file)
22
+ end
23
+
24
+ def as_arrays
25
+ @as_arrays ||= self.map{ |r| @schema.coerce_row_to_array(r) }
26
+ end
27
+
28
+ private
29
+
30
+ def to_separated_output(sep, out_file)
31
+ rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
32
+ sv = rows.join("\n")
33
+ return sv if out_file.nil?
34
+ File.open(out_file, 'w+') { |f| f << sv }
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,86 @@
1
+ require 'json'
2
+
3
+ module RBHive
4
+ class SchemaDefinition
5
+ attr_reader :schema
6
+
7
+ NAN = Float::NAN rescue 0.0/0.0
8
+ INFINITY = Float::INFINITY rescue 1.0/0.0
9
+ TYPES = {
10
+ :boolean => :to_s,
11
+ :string => :to_s,
12
+ :bigint => :to_i,
13
+ :float => :to_f,
14
+ :double => :to_f,
15
+ :int => :to_i,
16
+ :smallint => :to_i,
17
+ :tinyint => :to_i,
18
+ }
19
+
20
+ def initialize(schema, example_row)
21
+ @schema = schema
22
+ @example_row = example_row ? example_row.split("\t") : []
23
+ end
24
+
25
+ def column_names
26
+ @column_names ||= begin
27
+ schema_names = @schema.fieldSchemas.map {|c| c.name }
28
+
29
+ # In rare cases Hive can return two identical column names
30
+ # consider SELECT a.foo, b.foo...
31
+ # in this case you get two columns called foo with no disambiguation.
32
+ # as a (far from ideal) solution we detect this edge case and rename them
33
+ # a.foo => foo1, b.foo => foo2
34
+ # otherwise we will trample one of the columns during Hash mapping.
35
+ s = Hash.new(0)
36
+ schema_names.map! { |c| s[c] += 1; s[c] > 1 ? "#{c}---|---#{s[c]}" : c }
37
+ schema_names.map! { |c| s[c] > 1 ? "#{c}---|---1" : c }
38
+ schema_names.map! { |c| c.gsub('---|---', '_').to_sym }
39
+
40
+ # Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries
41
+ # For now we will call them :_p1, :_p2, etc. to avoid collisions.
42
+ offset = 0
43
+ while schema_names.length < @example_row.length
44
+ schema_names.push(:"_p#{offset+=1}")
45
+ end
46
+ schema_names
47
+ end
48
+ end
49
+
50
+ def column_type_map
51
+ @column_type_map ||= column_names.inject({}) do |hsh, c|
52
+ definition = @schema.fieldSchemas.find {|s| s.name.to_sym == c }
53
+ # If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
54
+ hsh[c] = definition ? definition.type.to_sym : :string
55
+ hsh
56
+ end
57
+ end
58
+
59
+ def coerce_row(row)
60
+ column_names.zip(row.split("\t")).inject({}) do |hsh, (column_name, value)|
61
+ hsh[column_name] = coerce_column(column_name, value)
62
+ hsh
63
+ end
64
+ end
65
+
66
+ def coerce_column(column_name, value)
67
+ type = column_type_map[column_name]
68
+ return INFINITY if (type != :string && value == "Infinity")
69
+ return NAN if (type != :string && value == "NaN")
70
+ return coerce_complex_value(value) if type.to_s =~ /^array/
71
+ conversion_method = TYPES[type]
72
+ conversion_method ? value.send(conversion_method) : value
73
+ end
74
+
75
+ def coerce_row_to_array(row)
76
+ column_names.map { |n| row[n] }
77
+ end
78
+
79
+ def coerce_complex_value(value)
80
+ return nil if value.nil?
81
+ return nil if value.length == 0
82
+ return nil if value == 'null'
83
+ JSON.parse(value)
84
+ end
85
+ end
86
+ end