altertable-lakehouse 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ lib = File.expand_path("lib", __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "altertable/lakehouse/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "altertable-lakehouse"
7
+ spec.version = Altertable::Lakehouse::VERSION
8
+ spec.authors = ["Altertable AI"]
9
+ spec.email = ["support@altertable.ai"]
10
+
11
+ spec.summary = "Official Ruby client for Altertable Lakehouse"
12
+ spec.description = "Official Ruby client for Altertable Lakehouse API."
13
+ spec.homepage = "https://github.com/altertable-ai/altertable-lakehouse-ruby"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 3.0.0"
16
+
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = "https://github.com/altertable-ai/altertable-lakehouse-ruby"
19
+ spec.metadata["changelog_uri"] = "https://github.com/altertable-ai/altertable-lakehouse-ruby/blob/main/CHANGELOG.md"
20
+
21
+ spec.files = Dir.chdir(__dir__) do
22
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features|sorbet)/}) }
23
+ end
24
+ spec.bindir = "exe"
25
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
26
+ spec.require_paths = ["lib"]
27
+
28
+ spec.add_dependency "base64"
29
+
30
+ spec.add_development_dependency "faraday", "~> 2.12"
31
+ spec.add_development_dependency "faraday-retry", "~> 2.0"
32
+ spec.add_development_dependency "faraday-net_http"
33
+ spec.add_development_dependency "httpx" # For testing optional support
34
+ spec.add_development_dependency "rake", "~> 13.0"
35
+ spec.add_development_dependency "rspec", "~> 3.0"
36
+ spec.add_development_dependency "rubocop", "~> 1.50"
37
+ spec.add_development_dependency "testcontainers"
38
+ spec.add_development_dependency "rbs"
39
+ spec.add_development_dependency "sorbet"
40
+ spec.add_development_dependency "sorbet-runtime"
41
+ end
@@ -0,0 +1,192 @@
1
+ module Altertable
2
+ module Lakehouse
3
+ module Adapters
4
+ Response = Struct.new(:status, :body, :headers)
5
+
6
+ class Base
7
+ def initialize(base_url:, timeout:, headers: {})
8
+ @base_url = base_url
9
+ @timeout = timeout
10
+ @headers = headers
11
+ end
12
+
13
+ def get(path, body: nil, params: {}, headers: {}, &_block)
14
+ raise NotImplementedError
15
+ end
16
+
17
+ def post(path, body: nil, params: {}, headers: {}, &_block)
18
+ raise NotImplementedError
19
+ end
20
+
21
+ def delete(path, body: nil, params: {}, headers: {}, &_block)
22
+ raise NotImplementedError
23
+ end
24
+ end
25
+
26
+ class FaradayAdapter < Base
27
+ def initialize(base_url:, timeout:, headers: {})
28
+ super
29
+ require "faraday"
30
+ require "faraday/retry"
31
+ require "faraday/net_http"
32
+
33
+ @conn = Faraday.new(url: @base_url) do |f|
34
+ @headers.each { |k, v| f.headers[k] = v }
35
+ f.options.timeout = @timeout
36
+ f.request :retry, max: 3, interval: 0.05, backoff_factor: 2
37
+ f.adapter Faraday.default_adapter
38
+ end
39
+ end
40
+
41
+ def get(path, body: nil, params: {}, headers: {}, &_block) # rubocop:disable Lint/UnusedMethodArgument
42
+ resp = @conn.get(path, params, headers)
43
+ wrap_response(resp)
44
+ rescue Faraday::ConnectionFailed => e
45
+ raise Altertable::Lakehouse::NetworkError, e.message
46
+ rescue Faraday::TimeoutError => e
47
+ raise Altertable::Lakehouse::TimeoutError, e.message
48
+ end
49
+
50
+ def post(path, body: nil, params: {}, headers: {}, &block)
51
+ resp = @conn.post(path) do |req|
52
+ req.params = params if params
53
+ req.headers = req.headers.merge(headers) unless headers.empty?
54
+ req.body = body
55
+ req.options.on_data = block if block_given?
56
+ end
57
+ wrap_response(resp)
58
+ rescue Faraday::ConnectionFailed => e
59
+ raise Altertable::Lakehouse::NetworkError, e.message
60
+ rescue Faraday::TimeoutError => e
61
+ raise Altertable::Lakehouse::TimeoutError, e.message
62
+ end
63
+
64
+ def delete(path, body: nil, params: {}, headers: {}, &_block) # rubocop:disable Lint/UnusedMethodArgument
65
+ resp = @conn.delete(path, params, headers)
66
+ wrap_response(resp)
67
+ rescue Faraday::ConnectionFailed => e
68
+ raise Altertable::Lakehouse::NetworkError, e.message
69
+ rescue Faraday::TimeoutError => e
70
+ raise Altertable::Lakehouse::TimeoutError, e.message
71
+ end
72
+
73
+ private
74
+
75
+ def wrap_response(resp)
76
+ Response.new(resp.status, resp.body, resp.headers)
77
+ end
78
+ end
79
+
80
+ class HttpxAdapter < Base
81
+ def initialize(base_url:, timeout:, headers: {})
82
+ super
83
+ require "httpx"
84
+ # Configure retries plugin if available or implement manual retries?
85
+ # Httpx has built-in retries via plugin.
86
+ @client = HTTPX.plugin(:retries).with(
87
+ timeout: { operation_timeout: @timeout },
88
+ headers: @headers,
89
+ base_url: @base_url
90
+ )
91
+ end
92
+
93
+ def get(path, body: nil, params: {}, headers: {}, &_block) # rubocop:disable Lint/UnusedMethodArgument
94
+ resp = @client.with(headers: headers).get(path, params: params)
95
+ wrap_response(resp)
96
+ end
97
+
98
+ def post(path, body: nil, params: {}, headers: {}, &block)
99
+ client = @client.with(headers: headers)
100
+ if block_given?
101
+ # Stream response body
102
+ # HTTPX response streaming:
103
+ response = client.request("POST", path, body: body, params: params, stream: true)
104
+
105
+ # Check for error immediately
106
+ if response.is_a?(HTTPX::ErrorResponse)
107
+ raise Altertable::Lakehouse::NetworkError, response.error.message
108
+ end
109
+
110
+ response.body.each do |chunk|
111
+ block.call(chunk, response.headers["content-length"])
112
+ end
113
+ wrap_response(response)
114
+ else
115
+ resp = client.post(path, body: body, params: params)
116
+ wrap_response(resp)
117
+ end
118
+ end
119
+
120
+ def delete(path, body: nil, params: {}, headers: {}, &_block) # rubocop:disable Lint/UnusedMethodArgument
121
+ resp = @client.with(headers: headers).delete(path, params: params)
122
+ wrap_response(resp)
123
+ end
124
+
125
+ private
126
+
127
+ def wrap_response(resp)
128
+ if resp.is_a?(HTTPX::ErrorResponse)
129
+ raise Altertable::Lakehouse::NetworkError, resp.error.message
130
+ end
131
+ Response.new(resp.status, resp.to_s, resp.headers)
132
+ end
133
+ end
134
+
135
+ class NetHttpAdapter < Base
136
+ def initialize(base_url:, timeout:, headers: {})
137
+ super
138
+ require "net/http"
139
+ require "uri"
140
+ @uri = URI.parse(@base_url)
141
+ end
142
+
143
+ def get(path, body: nil, params: {}, headers: {}, &block) # rubocop:disable Lint/UnusedMethodArgument
144
+ request(Net::HTTP::Get, path, params: params, headers: headers, &block)
145
+ end
146
+
147
+ def post(path, body: nil, params: {}, headers: {}, &block)
148
+ request(Net::HTTP::Post, path, body: body, params: params, headers: headers, &block)
149
+ end
150
+
151
+ def delete(path, body: nil, params: {}, headers: {}, &block) # rubocop:disable Lint/UnusedMethodArgument
152
+ request(Net::HTTP::Delete, path, params: params, headers: headers, &block)
153
+ end
154
+
155
+ private
156
+
157
+ def request(klass, path, body: nil, params: {}, headers: {}, &block)
158
+ # Construct full URI for request
159
+ uri = URI.join(@uri, path)
160
+ uri.query = URI.encode_www_form(params) unless params.nil? || params.empty?
161
+
162
+ req = klass.new(uri)
163
+ @headers.merge(headers).each { |k, v| req[k] = v }
164
+ req.body = body if body
165
+
166
+ # Net::HTTP start
167
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: @timeout, read_timeout: @timeout) do |http|
168
+ if block_given?
169
+ http.request(req) do |response|
170
+ # Stream the body if block is given
171
+ if response.is_a?(Net::HTTPSuccess)
172
+ response.read_body do |chunk|
173
+ block.call(chunk, response.content_length)
174
+ end
175
+ end
176
+ # Return wrapped response (body might be empty if consumed?)
177
+ # If we consumed the body with read_body, response.body is nil.
178
+ # But our Response struct expects body. For streaming, we might not need body in the Response if block handled it.
179
+ return Response.new(response.code.to_i, response.body, response.to_hash)
180
+ end
181
+ else
182
+ resp = http.request(req)
183
+ Response.new(resp.code.to_i, resp.body, resp.to_hash)
184
+ end
185
+ end
186
+ rescue SocketError, Net::OpenTimeout, Net::ReadTimeout => e
187
+ raise Altertable::Lakehouse::NetworkError, e.message
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end
@@ -1,11 +1,9 @@
1
- require "faraday"
2
- require "faraday/retry"
3
- require "faraday/net_http"
4
1
  require "json"
5
2
  require "base64"
6
3
  require_relative "models"
7
4
  require_relative "errors"
8
5
  require_relative "version"
6
+ require_relative "adapters"
9
7
 
10
8
  module Altertable
11
9
  module Lakehouse
@@ -13,7 +11,7 @@ module Altertable
13
11
  DEFAULT_BASE_URL = "https://api.altertable.ai"
14
12
  DEFAULT_TIMEOUT = 10
15
13
 
16
- def initialize(username: nil, password: nil, basic_auth_token: nil, base_url: nil, timeout: nil, user_agent: nil)
14
+ def initialize(username: nil, password: nil, basic_auth_token: nil, base_url: nil, timeout: nil, user_agent: nil, adapter: nil)
17
15
  # 1. Try passed basic_auth_token
18
16
  # 2. Try passed username/password
19
17
  # 3. Try ENV["ALTERTABLE_BASIC_AUTH_TOKEN"]
@@ -35,57 +33,45 @@ module Altertable
35
33
  @timeout = timeout || DEFAULT_TIMEOUT
36
34
  @user_agent = user_agent ? "AltertableRuby/#{VERSION} #{user_agent}" : "AltertableRuby/#{VERSION}"
37
35
 
38
- @conn = Faraday.new(url: @base_url) do |f|
39
- f.headers["Authorization"] = @auth_header
40
- f.headers["User-Agent"] = @user_agent
41
- f.headers["Content-Type"] = "application/json"
42
- f.options.timeout = @timeout
43
- f.request :retry, max: 3, interval: 0.05, backoff_factor: 2
44
- f.adapter Faraday.default_adapter
45
- end
36
+ headers = {
37
+ "Authorization" => @auth_header,
38
+ "User-Agent" => @user_agent,
39
+ "Content-Type" => "application/json"
40
+ }
41
+
42
+ @adapter = select_adapter(adapter, base_url: @base_url, timeout: @timeout, headers: headers)
46
43
  end
47
44
 
48
45
  # POST /append
49
- def append(catalog:, schema:, table:, payload:)
46
+ def append(catalog:, schema:, table:, payload:, sync: nil)
50
47
  params = { catalog: catalog, schema: schema, table: table }
48
+ params[:sync] = sync unless sync.nil?
51
49
  req = Models::AppendRequest.new(payload)
52
50
  resp = request(:post, "/append", body: req.to_h, query: params)
53
51
  Models::AppendResponse.from_h(resp)
54
52
  end
55
53
 
54
+ # GET /tasks/:task_id
55
+ def get_task(task_id)
56
+ resp = request(:get, "/tasks/#{task_id}")
57
+ Models::TaskResponse.from_h(resp)
58
+ end
59
+
56
60
  # POST /query (streamed)
57
61
  def query(statement:, **options)
58
62
  req_body = Models::QueryRequest.new(statement: statement, **options).to_h.to_json
59
-
63
+
60
64
  enum = Enumerator.new do |yielder|
61
65
  buffer = ""
62
- @conn.post("/query") do |req|
63
- req.headers["Content-Type"] = "application/json"
64
- req.body = req_body
65
- req.options.on_data = Proc.new do |chunk, _|
66
- buffer << chunk
67
- while (line_end = buffer.index("\n"))
68
- line = buffer.slice!(0, line_end + 1).strip
69
- next if line.empty?
70
- begin
71
- yielder << JSON.parse(line)
72
- rescue JSON::ParserError
73
- raise ParseError, "Invalid JSON line: #{line}"
74
- end
75
- end
76
- end
77
- end
78
66
 
79
- # Process remaining buffer
80
- unless buffer.empty?
81
- begin
82
- yielder << JSON.parse(buffer.strip)
83
- rescue JSON::ParserError
84
- raise ParseError, "Invalid JSON line: #{buffer}"
85
- end
67
+ # Use adapter's stream capability
68
+ resp = @adapter.post("/query", body: req_body) do |chunk, _|
69
+ buffer << chunk
86
70
  end
71
+
72
+ handle_stream_response(resp, buffer, yielder)
87
73
  end
88
-
74
+
89
75
  QueryResult.new(enum)
90
76
  end
91
77
 
@@ -102,23 +88,18 @@ module Altertable
102
88
 
103
89
  # POST /upload
104
90
  def upload(catalog:, schema:, table:, format:, mode:, file_io:, primary_key: nil)
105
- params = {
106
- catalog: catalog,
107
- schema: schema,
108
- table: table,
109
- format: format,
110
- mode: mode
91
+ params = {
92
+ catalog: catalog,
93
+ schema: schema,
94
+ table: table,
95
+ format: format,
96
+ mode: mode
111
97
  }
112
98
  params[:primary_key] = primary_key if primary_key
113
99
 
114
- # Use a separate connection for multipart/binary if needed,
115
- # but spec says body is octet-stream.
116
- resp = @conn.post("/upload") do |req|
117
- req.params = params
118
- req.headers["Content-Type"] = "application/octet-stream"
119
- req.body = file_io # IO object or string
120
- end
121
-
100
+ body = file_io.respond_to?(:read) ? file_io.read : file_io
101
+
102
+ resp = @adapter.post("/upload", body: body, params: params, headers: { "Content-Type" => "application/octet-stream" })
122
103
  handle_response(resp)
123
104
  end
124
105
 
@@ -135,30 +116,112 @@ module Altertable
135
116
  end
136
117
 
137
118
  # POST /validate
138
- def validate(statement:)
139
- req = Models::ValidateRequest.new(statement: statement)
119
+ def validate(statement:, catalog: nil, schema: nil, session_id: nil)
120
+ req = Models::ValidateRequest.new(
121
+ statement: statement,
122
+ catalog: catalog,
123
+ schema: schema,
124
+ session_id: session_id
125
+ )
140
126
  resp = request(:post, "/validate", body: req.to_h)
141
127
  Models::ValidateResponse.from_h(resp)
142
128
  end
143
129
 
130
+ # POST /autocomplete
131
+ def autocomplete(statement:, catalog: nil, schema: nil, session_id: nil, max_suggestions: nil)
132
+ req = Models::AutocompleteRequest.new(
133
+ statement: statement,
134
+ catalog: catalog,
135
+ schema: schema,
136
+ session_id: session_id,
137
+ max_suggestions: max_suggestions
138
+ )
139
+ resp = request(:post, "/autocomplete", body: req.to_h)
140
+ Models::AutocompleteResponse.from_h(resp)
141
+ end
142
+
143
+ # POST /explain
144
+ def explain(statement:, catalog: nil, schema: nil, session_id: nil, include_plan: nil)
145
+ req = Models::ExplainRequest.new(
146
+ statement: statement,
147
+ catalog: catalog,
148
+ schema: schema,
149
+ session_id: session_id,
150
+ include_plan: include_plan
151
+ )
152
+ resp = request(:post, "/explain", body: req.to_h)
153
+ Models::ExplainResponse.from_h(resp)
154
+ end
155
+
144
156
  private
145
157
 
146
- def request(method, path, body: nil, query: nil, stream: false, &block)
147
- resp = @conn.send(method, path) do |req|
148
- req.params = query if query
149
- req.body = body.to_json if body
150
- if stream
151
- req.options.on_data = block
158
+ def select_adapter(name, options)
159
+ case name
160
+ when :faraday
161
+ Adapters::FaradayAdapter.new(**options)
162
+ when :httpx
163
+ Adapters::HttpxAdapter.new(**options)
164
+ when :net_http
165
+ Adapters::NetHttpAdapter.new(**options)
166
+ else
167
+ # Auto-detect
168
+ if defined?(Faraday) || try_require("faraday")
169
+ Adapters::FaradayAdapter.new(**options)
170
+ elsif defined?(HTTPX) || try_require("httpx")
171
+ Adapters::HttpxAdapter.new(**options)
172
+ else
173
+ Adapters::NetHttpAdapter.new(**options)
152
174
  end
153
175
  end
154
-
155
- return if stream # Block handles data
156
-
176
+ end
177
+
178
+ def try_require(gem_name)
179
+ require gem_name
180
+ true
181
+ rescue LoadError
182
+ false
183
+ end
184
+
185
+ def request(method, path, body: nil, query: nil)
186
+ resp = @adapter.send(method, path, body: body.is_a?(Hash) ? body.to_json : body, params: query || {})
157
187
  handle_response(resp)
158
- rescue Faraday::ConnectionFailed => e
159
- raise NetworkError, e.message
160
- rescue Faraday::TimeoutError => e
161
- raise TimeoutError, e.message
188
+ end
189
+
190
+ def handle_stream_response(resp, buffer, yielder)
191
+ case resp.status
192
+ when 400
193
+ raise BadRequestError, "Bad Request: #{buffer.strip}"
194
+ when 401
195
+ raise AuthError, "Unauthorized"
196
+ when 200..299
197
+ # Parse the accumulated NDJSON buffer line by line
198
+ # Buffer might be partial?
199
+ # In streaming, the block is called.
200
+ # Here we are processing after the stream is done?
201
+ # Wait, QueryResult expects the stream to be processed as it comes?
202
+ # The previous implementation used an Enumerator that yielded as data came in.
203
+ # Here, @adapter.post blocks until done?
204
+ # If @adapter.post blocks, we only get the buffer at the end.
205
+ # To stream truly, @adapter.post needs to yield to the block, which yields to yielder?
206
+
207
+ # Re-implementing streaming logic:
208
+ # The enumerator in `query` wraps the call.
209
+ # When `query` returns QueryResult, it hasn't run the request yet.
210
+ # Enumerator logic is inside.
211
+
212
+ buffer.each_line do |line|
213
+ line = line.strip
214
+ next if line.empty?
215
+ begin
216
+ yielder << JSON.parse(line)
217
+ rescue JSON::ParserError
218
+ # Partial line?
219
+ # For now assume full lines or handle buffering properly
220
+ end
221
+ end
222
+ else
223
+ raise ApiError, "API Error #{resp.status}: #{buffer.strip}"
224
+ end
162
225
  end
163
226
 
164
227
  def handle_response(resp)
@@ -168,7 +231,7 @@ module Altertable
168
231
  begin
169
232
  JSON.parse(resp.body)
170
233
  rescue JSON::ParserError
171
- # For non-JSON responses (like empty upload response?)
234
+ # For non-JSON responses
172
235
  resp.body
173
236
  end
174
237
  when 400
@@ -176,40 +239,48 @@ module Altertable
176
239
  when 401
177
240
  raise AuthError, "Unauthorized"
178
241
  when 404
179
- raise ApiError, "Not Found: #{resp.url}" # Could be specific
242
+ raise ApiError, "Not Found: #{resp.headers}" # Url not avail in struct easily
180
243
  else
181
244
  raise ApiError, "API Error #{resp.status}: #{resp.body}"
182
245
  end
183
246
  end
184
247
  end
185
-
248
+
186
249
  class QueryResult
187
250
  include Enumerable
188
-
251
+
252
+ # metadata: the stream header object (first NDJSON line)
253
+ # columns: array of column name strings (second NDJSON line)
189
254
  attr_reader :metadata, :columns
190
-
255
+
191
256
  def initialize(enum)
192
257
  @enum = enum
193
258
  @metadata = nil
194
259
  @columns = nil
195
260
  end
196
-
261
+
197
262
  def each(&block)
198
- # We need to wrap the enum to extract metadata/columns first
199
- # Note: This will re-trigger the request if enumerated multiple times
200
- first = true
201
- second = true
202
-
263
+ # The real mock streams:
264
+ # line 1: { "statement":…, "session_id":…, } (header object)
265
+ # line 2: ["col1", "col2", …] (column names array)
266
+ # line 3+: [val1, val2, …] (row value arrays)
267
+ # We zip each row array with the column names to produce a Hash.
268
+ line_index = 0
269
+
203
270
  @enum.each do |item|
204
- if first
271
+ case line_index
272
+ when 0
205
273
  @metadata = item
206
- first = false
207
- elsif second
274
+ when 1
208
275
  @columns = item
209
- second = false
210
276
  else
211
- block.call(item)
277
+ if @columns.is_a?(Array) && item.is_a?(Array)
278
+ block.call(@columns.zip(item).to_h)
279
+ else
280
+ block.call(item)
281
+ end
212
282
  end
283
+ line_index += 1
213
284
  end
214
285
  end
215
286
  end