mangleframes 0.3.4__tar.gz → 0.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mangleframes-0.3.4 → mangleframes-0.3.5}/PKG-INFO +1 -1
- {mangleframes-0.3.4 → mangleframes-0.3.5}/pyproject.toml +1 -1
- {mangleframes-0.3.4 → mangleframes-0.3.5}/python/mangleframes/__init__.py +1 -1
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/src/client.rs +190 -7
- mangleframes-0.3.5/viewer/src/benchmark.rs +242 -0
- mangleframes-0.3.5/viewer/src/main.rs +194 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/spark_client.rs +30 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/sql_builder.rs +18 -3
- mangleframes-0.3.4/viewer/src/main.rs +0 -109
- {mangleframes-0.3.4 → mangleframes-0.3.5}/Cargo.lock +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/Cargo.toml +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/python/mangleframes/alerts.py +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/python/mangleframes/launcher.py +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/python/mangleframes/session.py +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/Cargo.toml +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/build.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/base.proto +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/catalog.proto +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/commands.proto +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/common.proto +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/expressions.proto +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/ml.proto +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/ml_common.proto +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/relations.proto +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/types.proto +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/src/error.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/src/lib.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/src/proto/spark.connect.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/Cargo.toml +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/index.html +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/package-lock.json +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/package.json +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/postcss.config.js +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/App.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/analysis/JoinAnalyzer.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/analysis/Reconciliation.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/analysis/SQLEditor.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/analysis/__tests__/JoinAnalyzer.test.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/data/ColumnDropdown.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/data/ColumnStats.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/data/DataGrid.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/data/SchemaView.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/ERDBuilder.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/ERDCanvas.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/ERDConfigModal.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/ERDTableList.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/ERDToolbar.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/ERDValidationPanel.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/TableNode.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/__tests__/ERDDragDrop.test.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/index.ts +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/ContextPanel.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/Layout.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/MainContent.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/Sidebar.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/StatusBar.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/TabBar.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/TopBar.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/quality/AlertBuilder.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/quality/QualityDashboard.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/index.css +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/lib/api.ts +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/lib/erdValidation.ts +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/main.tsx +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/stores/dataStore.ts +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/stores/erdStore.ts +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/stores/uiStore.ts +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/test/setup.ts +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/tailwind.config.js +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/tsconfig.json +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/tsconfig.node.json +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/vite.config.ts +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/alert_handlers.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/arrow_reader.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/dashboard.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/export.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/handlers.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/history_analysis.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/history_handlers.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/join_handlers.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/perf.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/reconcile_handlers.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/stats.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/test_helpers.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/web_server.rs +0 -0
- {mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/websocket.rs +0 -0
|
@@ -16,7 +16,8 @@ use uuid::Uuid;
|
|
|
16
16
|
use crate::error::SparkConnectError;
|
|
17
17
|
use crate::proto::spark_connect_service_client::SparkConnectServiceClient;
|
|
18
18
|
use crate::proto::{
|
|
19
|
-
ExecutePlanRequest, Plan, Relation, Sql, UserContext,
|
|
19
|
+
ExecutePlanRequest, Plan, ReattachExecuteRequest, ReattachOptions, Relation, Sql, UserContext,
|
|
20
|
+
execute_plan_request::{RequestOption, request_option},
|
|
20
21
|
execute_plan_response::ResponseType,
|
|
21
22
|
};
|
|
22
23
|
|
|
@@ -247,26 +248,140 @@ impl SparkConnectClient {
|
|
|
247
248
|
ClientInner::Proxy(c) => c.clone().execute_plan(request).await?,
|
|
248
249
|
};
|
|
249
250
|
let mut stream = response.into_inner();
|
|
250
|
-
|
|
251
251
|
let mut batches = Vec::new();
|
|
252
|
-
let mut arrow_data = Vec::new();
|
|
253
252
|
|
|
254
253
|
while let Some(resp) = stream.message().await? {
|
|
255
254
|
if let Some(response_type) = resp.response_type {
|
|
256
255
|
if let ResponseType::ArrowBatch(batch) = response_type {
|
|
257
|
-
|
|
256
|
+
// Each ArrowBatch is a complete IPC stream - parse it separately
|
|
257
|
+
let parsed = parse_arrow_ipc(&batch.data)?;
|
|
258
|
+
batches.extend(parsed);
|
|
258
259
|
}
|
|
259
260
|
}
|
|
260
261
|
}
|
|
261
262
|
|
|
262
|
-
|
|
263
|
-
|
|
263
|
+
let elapsed_ms = start.elapsed().as_millis();
|
|
264
|
+
let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
|
|
265
|
+
info!(
|
|
266
|
+
"SQL executed in {}ms, {} rows returned",
|
|
267
|
+
elapsed_ms, row_count
|
|
268
|
+
);
|
|
269
|
+
|
|
270
|
+
if batches.is_empty() {
|
|
271
|
+
return Err(SparkConnectError::NoData);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
Ok(batches)
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/// Execute SQL with reattachable execution for large result sets (>10K rows).
|
|
278
|
+
/// Uses ReattachExecute RPC to continue fetching when server sends partial results.
|
|
279
|
+
pub async fn sql_reattachable(
|
|
280
|
+
&self,
|
|
281
|
+
query: &str,
|
|
282
|
+
limit: u32,
|
|
283
|
+
) -> Result<Vec<RecordBatch>, SparkConnectError> {
|
|
284
|
+
let start = Instant::now();
|
|
285
|
+
info!("Executing reattachable SQL via Spark Connect: {}", query);
|
|
286
|
+
|
|
287
|
+
let sql_relation = Relation {
|
|
288
|
+
common: None,
|
|
289
|
+
rel_type: Some(crate::proto::relation::RelType::Sql(Sql {
|
|
290
|
+
query: query.to_string(),
|
|
291
|
+
args: Default::default(),
|
|
292
|
+
pos_args: vec![],
|
|
293
|
+
named_arguments: Default::default(),
|
|
294
|
+
pos_arguments: vec![],
|
|
295
|
+
})),
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
let relation = if limit < u32::MAX {
|
|
299
|
+
Relation {
|
|
300
|
+
common: None,
|
|
301
|
+
rel_type: Some(crate::proto::relation::RelType::Limit(Box::new(
|
|
302
|
+
crate::proto::Limit {
|
|
303
|
+
input: Some(Box::new(sql_relation)),
|
|
304
|
+
limit: limit as i32,
|
|
305
|
+
},
|
|
306
|
+
))),
|
|
307
|
+
}
|
|
308
|
+
} else {
|
|
309
|
+
sql_relation
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
let plan = Plan {
|
|
313
|
+
op_type: Some(crate::proto::plan::OpType::Root(relation)),
|
|
314
|
+
};
|
|
315
|
+
|
|
316
|
+
let operation_id = Uuid::new_v4().to_string();
|
|
317
|
+
let reattach_option = RequestOption {
|
|
318
|
+
request_option: Some(request_option::RequestOption::ReattachOptions(
|
|
319
|
+
ReattachOptions { reattachable: true },
|
|
320
|
+
)),
|
|
321
|
+
};
|
|
322
|
+
|
|
323
|
+
let request = ExecutePlanRequest {
|
|
324
|
+
session_id: self.session_id.clone(),
|
|
325
|
+
user_context: Some(UserContext {
|
|
326
|
+
user_id: "spark-connect-rs".to_string(),
|
|
327
|
+
user_name: "spark-connect-rs".to_string(),
|
|
328
|
+
extensions: vec![],
|
|
329
|
+
}),
|
|
330
|
+
operation_id: Some(operation_id.clone()),
|
|
331
|
+
plan: Some(plan),
|
|
332
|
+
client_type: Some("spark-connect-rs".to_string()),
|
|
333
|
+
request_options: vec![reattach_option],
|
|
334
|
+
tags: vec![],
|
|
335
|
+
client_observed_server_side_session_id: None,
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
let mut batches = Vec::new();
|
|
339
|
+
let mut last_response_id: Option<String> = None;
|
|
340
|
+
let mut result_complete = false;
|
|
341
|
+
|
|
342
|
+
// Initial execution
|
|
343
|
+
info!("Starting reattachable execution for operation {}", operation_id);
|
|
344
|
+
let (complete, resp_id) = self
|
|
345
|
+
.process_execute_stream(request, &mut batches)
|
|
346
|
+
.await?;
|
|
347
|
+
result_complete = complete;
|
|
348
|
+
last_response_id = resp_id;
|
|
349
|
+
info!(
|
|
350
|
+
"Initial stream ended: result_complete={}, last_response_id={:?}, batches_count={}",
|
|
351
|
+
result_complete, last_response_id, batches.len()
|
|
352
|
+
);
|
|
353
|
+
|
|
354
|
+
// Reattach loop: continue fetching if ResultComplete was not received
|
|
355
|
+
while !result_complete {
|
|
356
|
+
info!(
|
|
357
|
+
"Reattaching to operation {} from response {:?}",
|
|
358
|
+
operation_id, last_response_id
|
|
359
|
+
);
|
|
360
|
+
|
|
361
|
+
let reattach_request = ReattachExecuteRequest {
|
|
362
|
+
session_id: self.session_id.clone(),
|
|
363
|
+
user_context: Some(UserContext {
|
|
364
|
+
user_id: "spark-connect-rs".to_string(),
|
|
365
|
+
user_name: "spark-connect-rs".to_string(),
|
|
366
|
+
extensions: vec![],
|
|
367
|
+
}),
|
|
368
|
+
operation_id: operation_id.clone(),
|
|
369
|
+
client_type: Some("spark-connect-rs".to_string()),
|
|
370
|
+
last_response_id: last_response_id.clone(),
|
|
371
|
+
client_observed_server_side_session_id: None,
|
|
372
|
+
};
|
|
373
|
+
|
|
374
|
+
let (complete, resp_id) = self
|
|
375
|
+
.process_reattach_stream(reattach_request, &mut batches)
|
|
376
|
+
.await?;
|
|
377
|
+
result_complete = complete;
|
|
378
|
+
last_response_id = resp_id;
|
|
264
379
|
}
|
|
265
380
|
|
|
266
381
|
let elapsed_ms = start.elapsed().as_millis();
|
|
267
382
|
let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
|
|
268
383
|
info!(
|
|
269
|
-
"SQL executed in {}ms, {} rows returned",
|
|
384
|
+
"Reattachable SQL executed in {}ms, {} rows returned",
|
|
270
385
|
elapsed_ms, row_count
|
|
271
386
|
);
|
|
272
387
|
|
|
@@ -277,6 +392,74 @@ impl SparkConnectClient {
|
|
|
277
392
|
Ok(batches)
|
|
278
393
|
}
|
|
279
394
|
|
|
395
|
+
/// Process ExecutePlan response stream, returns (result_complete, last_response_id).
|
|
396
|
+
async fn process_execute_stream(
|
|
397
|
+
&self,
|
|
398
|
+
request: ExecutePlanRequest,
|
|
399
|
+
batches: &mut Vec<RecordBatch>,
|
|
400
|
+
) -> Result<(bool, Option<String>), SparkConnectError> {
|
|
401
|
+
let response = match &self.inner {
|
|
402
|
+
ClientInner::Direct(c) => c.clone().execute_plan(request).await?,
|
|
403
|
+
ClientInner::Proxy(c) => c.clone().execute_plan(request).await?,
|
|
404
|
+
};
|
|
405
|
+
self.process_response_stream(response.into_inner(), batches)
|
|
406
|
+
.await
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/// Process ReattachExecute response stream, returns (result_complete, last_response_id).
|
|
410
|
+
async fn process_reattach_stream(
|
|
411
|
+
&self,
|
|
412
|
+
request: ReattachExecuteRequest,
|
|
413
|
+
batches: &mut Vec<RecordBatch>,
|
|
414
|
+
) -> Result<(bool, Option<String>), SparkConnectError> {
|
|
415
|
+
let response = match &self.inner {
|
|
416
|
+
ClientInner::Direct(c) => c.clone().reattach_execute(request).await?,
|
|
417
|
+
ClientInner::Proxy(c) => c.clone().reattach_execute(request).await?,
|
|
418
|
+
};
|
|
419
|
+
self.process_response_stream(response.into_inner(), batches)
|
|
420
|
+
.await
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/// Process a response stream, collecting Arrow record batches.
|
|
424
|
+
/// Returns (result_complete, last_response_id).
|
|
425
|
+
async fn process_response_stream(
|
|
426
|
+
&self,
|
|
427
|
+
mut stream: tonic::Streaming<crate::proto::ExecutePlanResponse>,
|
|
428
|
+
batches: &mut Vec<RecordBatch>,
|
|
429
|
+
) -> Result<(bool, Option<String>), SparkConnectError> {
|
|
430
|
+
let mut result_complete = false;
|
|
431
|
+
let mut last_response_id: Option<String> = None;
|
|
432
|
+
let mut batch_count = 0;
|
|
433
|
+
let mut total_rows = 0i64;
|
|
434
|
+
|
|
435
|
+
while let Some(resp) = stream.message().await? {
|
|
436
|
+
last_response_id = Some(resp.response_id.clone());
|
|
437
|
+
|
|
438
|
+
if let Some(response_type) = resp.response_type {
|
|
439
|
+
match response_type {
|
|
440
|
+
ResponseType::ArrowBatch(batch) => {
|
|
441
|
+
batch_count += 1;
|
|
442
|
+
total_rows += batch.row_count;
|
|
443
|
+
// Each ArrowBatch is a complete IPC stream - parse it separately
|
|
444
|
+
let parsed = parse_arrow_ipc(&batch.data)?;
|
|
445
|
+
batches.extend(parsed);
|
|
446
|
+
}
|
|
447
|
+
ResponseType::ResultComplete(_) => {
|
|
448
|
+
info!("Received ResultComplete after {} batches, {} rows", batch_count, total_rows);
|
|
449
|
+
result_complete = true;
|
|
450
|
+
}
|
|
451
|
+
_ => {}
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
info!(
|
|
457
|
+
"Stream ended: {} batches, {} rows, result_complete={}",
|
|
458
|
+
batch_count, total_rows, result_complete
|
|
459
|
+
);
|
|
460
|
+
Ok((result_complete, last_response_id))
|
|
461
|
+
}
|
|
462
|
+
|
|
280
463
|
/// Execute SQL and return a single row as JSON Value.
|
|
281
464
|
pub async fn sql_single_row(
|
|
282
465
|
&self,
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
//! Benchmark harness for measuring Spark Connect client performance.
|
|
2
|
+
//!
|
|
3
|
+
//! Measures query execution time across different row counts to compare
|
|
4
|
+
//! against alternative approaches (Python subprocess, HTTP service).
|
|
5
|
+
|
|
6
|
+
use std::time::Instant;
|
|
7
|
+
|
|
8
|
+
use serde::{Deserialize, Serialize};
|
|
9
|
+
use tracing::info;
|
|
10
|
+
|
|
11
|
+
use crate::spark_client::DatabricksClient;
|
|
12
|
+
use crate::sql_builder;
|
|
13
|
+
|
|
14
|
+
/// Single benchmark iteration result.
|
|
15
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
16
|
+
pub struct IterationResult {
|
|
17
|
+
pub iteration: usize,
|
|
18
|
+
pub rows_fetched: usize,
|
|
19
|
+
pub total_ms: u64,
|
|
20
|
+
pub rows_per_sec: f64,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/// Aggregated results for a single row count.
|
|
24
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
25
|
+
pub struct RowCountResult {
|
|
26
|
+
pub row_count: usize,
|
|
27
|
+
pub iterations: Vec<IterationResult>,
|
|
28
|
+
pub avg_ms: f64,
|
|
29
|
+
pub min_ms: u64,
|
|
30
|
+
pub max_ms: u64,
|
|
31
|
+
pub p50_ms: u64,
|
|
32
|
+
pub p95_ms: u64,
|
|
33
|
+
pub avg_rows_per_sec: f64,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/// Full benchmark suite results.
|
|
37
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
38
|
+
pub struct BenchmarkResults {
|
|
39
|
+
pub scenario: String,
|
|
40
|
+
pub table_name: String,
|
|
41
|
+
pub timestamp: String,
|
|
42
|
+
pub row_counts: Vec<RowCountResult>,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/// Configuration for benchmark run.
|
|
46
|
+
#[derive(Debug, Clone)]
|
|
47
|
+
pub struct BenchmarkConfig {
|
|
48
|
+
pub table_name: String,
|
|
49
|
+
pub row_counts: Vec<usize>,
|
|
50
|
+
pub iterations: usize,
|
|
51
|
+
pub warmup_iterations: usize,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
impl Default for BenchmarkConfig {
|
|
55
|
+
fn default() -> Self {
|
|
56
|
+
Self {
|
|
57
|
+
table_name: String::new(),
|
|
58
|
+
row_counts: vec![1000, 10_000, 100_000],
|
|
59
|
+
iterations: 10,
|
|
60
|
+
warmup_iterations: 2,
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/// Run benchmark suite against Databricks via Rust Spark Connect client.
|
|
66
|
+
pub async fn run_benchmark(
|
|
67
|
+
client: &DatabricksClient,
|
|
68
|
+
config: &BenchmarkConfig,
|
|
69
|
+
) -> Result<BenchmarkResults, String> {
|
|
70
|
+
info!(
|
|
71
|
+
"Starting Rust client benchmark: table={}, row_counts={:?}, iterations={}",
|
|
72
|
+
config.table_name, config.row_counts, config.iterations
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
let mut row_count_results = Vec::with_capacity(config.row_counts.len());
|
|
76
|
+
|
|
77
|
+
for &row_count in &config.row_counts {
|
|
78
|
+
info!("Benchmarking {} rows...", row_count);
|
|
79
|
+
|
|
80
|
+
// Warmup runs (not counted)
|
|
81
|
+
for i in 0..config.warmup_iterations {
|
|
82
|
+
info!(" Warmup {}/{}", i + 1, config.warmup_iterations);
|
|
83
|
+
let sql = sql_builder::select_data_sql(&config.table_name, row_count, 0);
|
|
84
|
+
if let Err(e) = client.execute_sql_reattachable(&sql, row_count).await {
|
|
85
|
+
return Err(format!("Warmup query failed: {}", e));
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Actual benchmark iterations (use reattachable for >10K rows)
|
|
90
|
+
let mut iterations = Vec::with_capacity(config.iterations);
|
|
91
|
+
|
|
92
|
+
for i in 0..config.iterations {
|
|
93
|
+
let sql = sql_builder::select_data_sql(&config.table_name, row_count, 0);
|
|
94
|
+
|
|
95
|
+
let start = Instant::now();
|
|
96
|
+
let response = client
|
|
97
|
+
.execute_sql_reattachable(&sql, row_count)
|
|
98
|
+
.await
|
|
99
|
+
.map_err(|e| format!("Query failed: {}", e))?;
|
|
100
|
+
let total_ms = start.elapsed().as_millis() as u64;
|
|
101
|
+
|
|
102
|
+
let rows_fetched = response.row_count as usize;
|
|
103
|
+
let rows_per_sec = if total_ms > 0 {
|
|
104
|
+
(rows_fetched as f64) / (total_ms as f64 / 1000.0)
|
|
105
|
+
} else {
|
|
106
|
+
0.0
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
info!(
|
|
110
|
+
" Iteration {}/{}: {}ms, {} rows, {:.0} rows/sec",
|
|
111
|
+
i + 1,
|
|
112
|
+
config.iterations,
|
|
113
|
+
total_ms,
|
|
114
|
+
rows_fetched,
|
|
115
|
+
rows_per_sec
|
|
116
|
+
);
|
|
117
|
+
|
|
118
|
+
iterations.push(IterationResult {
|
|
119
|
+
iteration: i + 1,
|
|
120
|
+
rows_fetched,
|
|
121
|
+
total_ms,
|
|
122
|
+
rows_per_sec,
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
let result = aggregate_iterations(&iterations, row_count);
|
|
127
|
+
info!(
|
|
128
|
+
" Summary: avg={}ms, p50={}ms, p95={}ms, {:.0} rows/sec",
|
|
129
|
+
result.avg_ms as u64, result.p50_ms, result.p95_ms, result.avg_rows_per_sec
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
row_count_results.push(result);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
Ok(BenchmarkResults {
|
|
136
|
+
scenario: "rust_direct".to_string(),
|
|
137
|
+
table_name: config.table_name.clone(),
|
|
138
|
+
timestamp: chrono::Utc::now().to_rfc3339(),
|
|
139
|
+
row_counts: row_count_results,
|
|
140
|
+
})
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
fn aggregate_iterations(iterations: &[IterationResult], row_count: usize) -> RowCountResult {
|
|
144
|
+
let mut timings: Vec<u64> = iterations.iter().map(|i| i.total_ms).collect();
|
|
145
|
+
timings.sort_unstable();
|
|
146
|
+
|
|
147
|
+
let sum_ms: u64 = timings.iter().sum();
|
|
148
|
+
let avg_ms = sum_ms as f64 / timings.len() as f64;
|
|
149
|
+
let min_ms = *timings.first().unwrap_or(&0);
|
|
150
|
+
let max_ms = *timings.last().unwrap_or(&0);
|
|
151
|
+
let p50_ms = percentile(&timings, 50);
|
|
152
|
+
let p95_ms = percentile(&timings, 95);
|
|
153
|
+
|
|
154
|
+
// Use actual rows_fetched, not the requested row_count
|
|
155
|
+
let total_rows_fetched: usize = iterations.iter().map(|i| i.rows_fetched).sum();
|
|
156
|
+
let avg_rows_fetched = total_rows_fetched as f64 / iterations.len() as f64;
|
|
157
|
+
|
|
158
|
+
let avg_rows_per_sec = if avg_ms > 0.0 {
|
|
159
|
+
avg_rows_fetched / (avg_ms / 1000.0)
|
|
160
|
+
} else {
|
|
161
|
+
0.0
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
RowCountResult {
|
|
165
|
+
row_count,
|
|
166
|
+
iterations: iterations.to_vec(),
|
|
167
|
+
avg_ms,
|
|
168
|
+
min_ms,
|
|
169
|
+
max_ms,
|
|
170
|
+
p50_ms,
|
|
171
|
+
p95_ms,
|
|
172
|
+
avg_rows_per_sec,
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
fn percentile(sorted: &[u64], p: usize) -> u64 {
|
|
177
|
+
if sorted.is_empty() {
|
|
178
|
+
return 0;
|
|
179
|
+
}
|
|
180
|
+
let idx = (p * sorted.len() / 100).min(sorted.len() - 1);
|
|
181
|
+
sorted[idx]
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
#[cfg(test)]
|
|
185
|
+
mod tests {
|
|
186
|
+
use super::*;
|
|
187
|
+
|
|
188
|
+
#[test]
|
|
189
|
+
fn test_percentile_empty() {
|
|
190
|
+
assert_eq!(percentile(&[], 50), 0);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
#[test]
|
|
194
|
+
fn test_percentile_single() {
|
|
195
|
+
assert_eq!(percentile(&[100], 50), 100);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
#[test]
|
|
199
|
+
fn test_percentile_multiple() {
|
|
200
|
+
let sorted: Vec<u64> = (1..=100).collect();
|
|
201
|
+
assert!(percentile(&sorted, 50) >= 50);
|
|
202
|
+
assert!(percentile(&sorted, 95) >= 95);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
#[test]
|
|
206
|
+
fn test_aggregate_iterations() {
|
|
207
|
+
let iterations = vec![
|
|
208
|
+
IterationResult {
|
|
209
|
+
iteration: 1,
|
|
210
|
+
rows_fetched: 1000,
|
|
211
|
+
total_ms: 100,
|
|
212
|
+
rows_per_sec: 10000.0,
|
|
213
|
+
},
|
|
214
|
+
IterationResult {
|
|
215
|
+
iteration: 2,
|
|
216
|
+
rows_fetched: 1000,
|
|
217
|
+
total_ms: 120,
|
|
218
|
+
rows_per_sec: 8333.0,
|
|
219
|
+
},
|
|
220
|
+
IterationResult {
|
|
221
|
+
iteration: 3,
|
|
222
|
+
rows_fetched: 1000,
|
|
223
|
+
total_ms: 110,
|
|
224
|
+
rows_per_sec: 9090.0,
|
|
225
|
+
},
|
|
226
|
+
];
|
|
227
|
+
|
|
228
|
+
let result = aggregate_iterations(&iterations, 1000);
|
|
229
|
+
assert_eq!(result.row_count, 1000);
|
|
230
|
+
assert_eq!(result.min_ms, 100);
|
|
231
|
+
assert_eq!(result.max_ms, 120);
|
|
232
|
+
assert!((result.avg_ms - 110.0).abs() < 0.1);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
#[test]
|
|
236
|
+
fn test_benchmark_config_default() {
|
|
237
|
+
let config = BenchmarkConfig::default();
|
|
238
|
+
assert_eq!(config.row_counts, vec![1000, 10_000, 100_000]);
|
|
239
|
+
assert_eq!(config.iterations, 10);
|
|
240
|
+
assert_eq!(config.warmup_iterations, 2);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
//! MangleFrames Viewer - Web-based PySpark DataFrame viewer.
|
|
2
|
+
|
|
3
|
+
mod alert_handlers;
|
|
4
|
+
mod arrow_reader;
|
|
5
|
+
mod benchmark;
|
|
6
|
+
mod dashboard;
|
|
7
|
+
mod export;
|
|
8
|
+
mod handlers;
|
|
9
|
+
mod history_analysis;
|
|
10
|
+
mod history_handlers;
|
|
11
|
+
mod join_handlers;
|
|
12
|
+
mod perf;
|
|
13
|
+
mod reconcile_handlers;
|
|
14
|
+
mod spark_client;
|
|
15
|
+
mod sql_builder;
|
|
16
|
+
mod stats;
|
|
17
|
+
#[cfg(test)]
|
|
18
|
+
mod test_helpers;
|
|
19
|
+
mod web_server;
|
|
20
|
+
mod websocket;
|
|
21
|
+
|
|
22
|
+
use std::sync::Arc;
|
|
23
|
+
|
|
24
|
+
use clap::{Parser, Subcommand};
|
|
25
|
+
use tracing::info;
|
|
26
|
+
use tracing_subscriber::EnvFilter;
|
|
27
|
+
|
|
28
|
+
use crate::web_server::AppState;
|
|
29
|
+
|
|
30
|
+
#[derive(Parser)]
|
|
31
|
+
#[command(name = "mangleframes-viewer")]
|
|
32
|
+
#[command(about = "Web-based DataFrame viewer via Spark Connect")]
|
|
33
|
+
struct Args {
|
|
34
|
+
/// Web server port
|
|
35
|
+
#[arg(short, long, default_value = "8765", global = true)]
|
|
36
|
+
port: u16,
|
|
37
|
+
|
|
38
|
+
/// Connect via Spark Connect proxy (e.g., sc://localhost:15002)
|
|
39
|
+
#[arg(long, global = true)]
|
|
40
|
+
proxy_url: Option<String>,
|
|
41
|
+
|
|
42
|
+
/// Databricks workspace host (not needed when using --proxy-url)
|
|
43
|
+
#[arg(long, env = "DATABRICKS_HOST", global = true)]
|
|
44
|
+
databricks_host: Option<String>,
|
|
45
|
+
|
|
46
|
+
/// Databricks personal access token (not needed when using --proxy-url)
|
|
47
|
+
#[arg(long, env = "DATABRICKS_TOKEN", global = true)]
|
|
48
|
+
databricks_token: Option<String>,
|
|
49
|
+
|
|
50
|
+
/// Databricks cluster ID (for cluster mode)
|
|
51
|
+
#[arg(long, env = "DATABRICKS_CLUSTER_ID", global = true)]
|
|
52
|
+
databricks_cluster_id: Option<String>,
|
|
53
|
+
|
|
54
|
+
/// Use Databricks serverless compute (no cluster ID needed)
|
|
55
|
+
#[arg(long, global = true)]
|
|
56
|
+
serverless: bool,
|
|
57
|
+
|
|
58
|
+
#[command(subcommand)]
|
|
59
|
+
command: Option<Command>,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
#[derive(Subcommand)]
|
|
63
|
+
enum Command {
|
|
64
|
+
/// Run performance benchmark against Databricks
|
|
65
|
+
Benchmark {
|
|
66
|
+
/// Table name to benchmark (e.g., catalog.schema.table)
|
|
67
|
+
#[arg(short, long)]
|
|
68
|
+
table: String,
|
|
69
|
+
|
|
70
|
+
/// Row counts to test (comma-separated)
|
|
71
|
+
#[arg(short, long, default_value = "1000,10000,100000")]
|
|
72
|
+
row_counts: String,
|
|
73
|
+
|
|
74
|
+
/// Number of iterations per row count
|
|
75
|
+
#[arg(short, long, default_value = "10")]
|
|
76
|
+
iterations: usize,
|
|
77
|
+
|
|
78
|
+
/// Number of warmup iterations (not counted)
|
|
79
|
+
#[arg(short, long, default_value = "2")]
|
|
80
|
+
warmup: usize,
|
|
81
|
+
|
|
82
|
+
/// Output file for JSON results
|
|
83
|
+
#[arg(short, long)]
|
|
84
|
+
output: Option<String>,
|
|
85
|
+
},
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
#[tokio::main]
|
|
89
|
+
async fn main() -> anyhow::Result<()> {
|
|
90
|
+
tracing_subscriber::fmt()
|
|
91
|
+
.with_env_filter(EnvFilter::from_default_env())
|
|
92
|
+
.init();
|
|
93
|
+
|
|
94
|
+
let args = Args::parse();
|
|
95
|
+
|
|
96
|
+
let client = Arc::new(spark_client::DatabricksClient::new());
|
|
97
|
+
|
|
98
|
+
// Connect via proxy or directly to Databricks
|
|
99
|
+
if let Some(ref proxy_url) = args.proxy_url {
|
|
100
|
+
info!("Connecting via Spark Connect proxy at {}", proxy_url);
|
|
101
|
+
client
|
|
102
|
+
.connect_via_proxy(proxy_url)
|
|
103
|
+
.await
|
|
104
|
+
.map_err(|e| anyhow::anyhow!("Proxy connection failed: {}", e))?;
|
|
105
|
+
} else {
|
|
106
|
+
// Direct Databricks connection requires host and token
|
|
107
|
+
let host = args
|
|
108
|
+
.databricks_host
|
|
109
|
+
.as_deref()
|
|
110
|
+
.ok_or_else(|| anyhow::anyhow!("DATABRICKS_HOST is required"))?;
|
|
111
|
+
let token = args
|
|
112
|
+
.databricks_token
|
|
113
|
+
.as_deref()
|
|
114
|
+
.ok_or_else(|| anyhow::anyhow!("DATABRICKS_TOKEN is required"))?;
|
|
115
|
+
|
|
116
|
+
// Determine cluster_id: None for serverless, Some for cluster mode
|
|
117
|
+
let cluster_id = if args.serverless {
|
|
118
|
+
info!("Initializing Databricks serverless mode");
|
|
119
|
+
None
|
|
120
|
+
} else if let Some(ref id) = args.databricks_cluster_id {
|
|
121
|
+
info!("Initializing Databricks cluster mode (cluster: {})", id);
|
|
122
|
+
Some(id.as_str())
|
|
123
|
+
} else {
|
|
124
|
+
info!("Initializing Databricks serverless mode (default)");
|
|
125
|
+
None
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
client
|
|
129
|
+
.connect(host, token, cluster_id)
|
|
130
|
+
.await
|
|
131
|
+
.map_err(|e| anyhow::anyhow!("Databricks connection failed: {}", e))?;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Handle subcommand or default to web server
|
|
135
|
+
match args.command {
|
|
136
|
+
Some(Command::Benchmark {
|
|
137
|
+
table,
|
|
138
|
+
row_counts,
|
|
139
|
+
iterations,
|
|
140
|
+
warmup,
|
|
141
|
+
output,
|
|
142
|
+
}) => {
|
|
143
|
+
run_benchmark_command(&client, &table, &row_counts, iterations, warmup, output).await
|
|
144
|
+
}
|
|
145
|
+
None => {
|
|
146
|
+
let state = AppState::new(Some(client));
|
|
147
|
+
info!("Starting web server on http://localhost:{}", args.port);
|
|
148
|
+
info!("Open this URL in your browser to view the UI");
|
|
149
|
+
web_server::run(state, args.port).await
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async fn run_benchmark_command(
|
|
155
|
+
client: &spark_client::DatabricksClient,
|
|
156
|
+
table: &str,
|
|
157
|
+
row_counts_str: &str,
|
|
158
|
+
iterations: usize,
|
|
159
|
+
warmup: usize,
|
|
160
|
+
output: Option<String>,
|
|
161
|
+
) -> anyhow::Result<()> {
|
|
162
|
+
let row_counts: Vec<usize> = row_counts_str
|
|
163
|
+
.split(',')
|
|
164
|
+
.filter_map(|s| s.trim().parse().ok())
|
|
165
|
+
.collect();
|
|
166
|
+
|
|
167
|
+
if row_counts.is_empty() {
|
|
168
|
+
return Err(anyhow::anyhow!("No valid row counts provided"));
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
info!("Running benchmark: table={}, row_counts={:?}", table, row_counts);
|
|
172
|
+
|
|
173
|
+
let config = benchmark::BenchmarkConfig {
|
|
174
|
+
table_name: table.to_string(),
|
|
175
|
+
row_counts,
|
|
176
|
+
iterations,
|
|
177
|
+
warmup_iterations: warmup,
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
let results = benchmark::run_benchmark(client, &config)
|
|
181
|
+
.await
|
|
182
|
+
.map_err(|e| anyhow::anyhow!("Benchmark failed: {}", e))?;
|
|
183
|
+
|
|
184
|
+
let json = serde_json::to_string_pretty(&results)?;
|
|
185
|
+
|
|
186
|
+
if let Some(path) = output {
|
|
187
|
+
std::fs::write(&path, &json)?;
|
|
188
|
+
info!("Results written to {}", path);
|
|
189
|
+
} else {
|
|
190
|
+
println!("{}", json);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
Ok(())
|
|
194
|
+
}
|
|
@@ -103,6 +103,36 @@ impl DatabricksClient {
|
|
|
103
103
|
})
|
|
104
104
|
}
|
|
105
105
|
|
|
106
|
+
/// Execute SQL with reattachable execution for large result sets (>10K rows).
|
|
107
|
+
/// Uses ReattachExecute RPC to ensure complete results.
|
|
108
|
+
pub async fn execute_sql_reattachable(
|
|
109
|
+
&self,
|
|
110
|
+
query: &str,
|
|
111
|
+
limit: usize,
|
|
112
|
+
) -> Result<SqlResponse, SparkConnectError> {
|
|
113
|
+
let guard = self.client.read().await;
|
|
114
|
+
let client = guard
|
|
115
|
+
.as_ref()
|
|
116
|
+
.ok_or_else(|| SparkConnectError::Config("Not connected".to_string()))?;
|
|
117
|
+
|
|
118
|
+
let start = Instant::now();
|
|
119
|
+
let batches = client.sql_reattachable(query, limit as u32).await?;
|
|
120
|
+
let execution_ms = start.elapsed().as_millis() as u64;
|
|
121
|
+
|
|
122
|
+
let row_count: u64 = batches.iter().map(|b| b.num_rows() as u64).sum();
|
|
123
|
+
|
|
124
|
+
info!(
|
|
125
|
+
"Reattachable SQL executed via Spark Connect in {}ms, {} rows",
|
|
126
|
+
execution_ms, row_count
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
Ok(SqlResponse {
|
|
130
|
+
batches,
|
|
131
|
+
row_count,
|
|
132
|
+
execution_ms,
|
|
133
|
+
})
|
|
134
|
+
}
|
|
135
|
+
|
|
106
136
|
/// Register Arrow batches as a temporary view in Spark.
|
|
107
137
|
pub async fn create_temp_view(
|
|
108
138
|
&self,
|
|
@@ -3,8 +3,12 @@
|
|
|
3
3
|
use serde_json::Value;
|
|
4
4
|
|
|
5
5
|
/// Quote an identifier to prevent SQL injection.
|
|
6
|
+
/// Handles multi-part names (catalog.schema.table) by quoting each part separately.
|
|
6
7
|
pub fn quote_identifier(name: &str) -> String {
|
|
7
|
-
|
|
8
|
+
name.split('.')
|
|
9
|
+
.map(|part| format!("`{}`", part.replace('`', "``")))
|
|
10
|
+
.collect::<Vec<_>>()
|
|
11
|
+
.join(".")
|
|
8
12
|
}
|
|
9
13
|
|
|
10
14
|
/// Quote multiple identifiers and join with commas.
|
|
@@ -1279,7 +1283,18 @@ mod tests {
|
|
|
1279
1283
|
#[test]
|
|
1280
1284
|
fn test_quote_identifier_special_chars() {
|
|
1281
1285
|
assert_eq!(quote_identifier("col-name"), "`col-name`");
|
|
1282
|
-
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
#[test]
|
|
1289
|
+
fn test_quote_identifier_multipart_names() {
|
|
1290
|
+
assert_eq!(quote_identifier("catalog.schema.table"), "`catalog`.`schema`.`table`");
|
|
1291
|
+
assert_eq!(quote_identifier("schema.table"), "`schema`.`table`");
|
|
1292
|
+
assert_eq!(quote_identifier("simple"), "`simple`");
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
#[test]
|
|
1296
|
+
fn test_quote_identifier_multipart_with_backticks() {
|
|
1297
|
+
assert_eq!(quote_identifier("cat`alog.sch`ema.tab`le"), "`cat``alog`.`sch``ema`.`tab``le`");
|
|
1283
1298
|
}
|
|
1284
1299
|
|
|
1285
1300
|
// ============ quote_identifiers tests ============
|
|
@@ -1326,7 +1341,7 @@ mod tests {
|
|
|
1326
1341
|
#[test]
|
|
1327
1342
|
fn test_describe_table_sql_qualified_name() {
|
|
1328
1343
|
let sql = describe_table_sql("catalog.schema.table");
|
|
1329
|
-
assert_eq!(sql, "DESCRIBE TABLE `catalog
|
|
1344
|
+
assert_eq!(sql, "DESCRIBE TABLE `catalog`.`schema`.`table`");
|
|
1330
1345
|
}
|
|
1331
1346
|
|
|
1332
1347
|
// ============ select_data_sql tests ============
|
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
//! MangleFrames Viewer - Web-based PySpark DataFrame viewer.
|
|
2
|
-
|
|
3
|
-
mod alert_handlers;
|
|
4
|
-
mod arrow_reader;
|
|
5
|
-
mod dashboard;
|
|
6
|
-
mod export;
|
|
7
|
-
mod handlers;
|
|
8
|
-
mod history_analysis;
|
|
9
|
-
mod history_handlers;
|
|
10
|
-
mod join_handlers;
|
|
11
|
-
mod perf;
|
|
12
|
-
mod reconcile_handlers;
|
|
13
|
-
mod spark_client;
|
|
14
|
-
mod sql_builder;
|
|
15
|
-
mod stats;
|
|
16
|
-
#[cfg(test)]
|
|
17
|
-
mod test_helpers;
|
|
18
|
-
mod web_server;
|
|
19
|
-
mod websocket;
|
|
20
|
-
|
|
21
|
-
use std::sync::Arc;
|
|
22
|
-
|
|
23
|
-
use clap::Parser;
|
|
24
|
-
use tracing::info;
|
|
25
|
-
use tracing_subscriber::EnvFilter;
|
|
26
|
-
|
|
27
|
-
use crate::web_server::AppState;
|
|
28
|
-
|
|
29
|
-
#[derive(Parser)]
|
|
30
|
-
#[command(name = "mangleframes-viewer")]
|
|
31
|
-
#[command(about = "Web-based DataFrame viewer via Spark Connect")]
|
|
32
|
-
struct Args {
|
|
33
|
-
/// Web server port
|
|
34
|
-
#[arg(short, long, default_value = "8765")]
|
|
35
|
-
port: u16,
|
|
36
|
-
|
|
37
|
-
/// Connect via Spark Connect proxy (e.g., sc://localhost:15002)
|
|
38
|
-
#[arg(long)]
|
|
39
|
-
proxy_url: Option<String>,
|
|
40
|
-
|
|
41
|
-
/// Databricks workspace host (not needed when using --proxy-url)
|
|
42
|
-
#[arg(long, env = "DATABRICKS_HOST")]
|
|
43
|
-
databricks_host: Option<String>,
|
|
44
|
-
|
|
45
|
-
/// Databricks personal access token (not needed when using --proxy-url)
|
|
46
|
-
#[arg(long, env = "DATABRICKS_TOKEN")]
|
|
47
|
-
databricks_token: Option<String>,
|
|
48
|
-
|
|
49
|
-
/// Databricks cluster ID (for cluster mode)
|
|
50
|
-
#[arg(long, env = "DATABRICKS_CLUSTER_ID")]
|
|
51
|
-
databricks_cluster_id: Option<String>,
|
|
52
|
-
|
|
53
|
-
/// Use Databricks serverless compute (no cluster ID needed)
|
|
54
|
-
#[arg(long)]
|
|
55
|
-
serverless: bool,
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
#[tokio::main]
|
|
59
|
-
async fn main() -> anyhow::Result<()> {
|
|
60
|
-
tracing_subscriber::fmt()
|
|
61
|
-
.with_env_filter(EnvFilter::from_default_env())
|
|
62
|
-
.init();
|
|
63
|
-
|
|
64
|
-
let args = Args::parse();
|
|
65
|
-
|
|
66
|
-
let client = Arc::new(spark_client::DatabricksClient::new());
|
|
67
|
-
|
|
68
|
-
// Connect via proxy or directly to Databricks
|
|
69
|
-
if let Some(ref proxy_url) = args.proxy_url {
|
|
70
|
-
info!("Connecting via Spark Connect proxy at {}", proxy_url);
|
|
71
|
-
client
|
|
72
|
-
.connect_via_proxy(proxy_url)
|
|
73
|
-
.await
|
|
74
|
-
.map_err(|e| anyhow::anyhow!("Proxy connection failed: {}", e))?;
|
|
75
|
-
} else {
|
|
76
|
-
// Direct Databricks connection requires host and token
|
|
77
|
-
let host = args
|
|
78
|
-
.databricks_host
|
|
79
|
-
.as_deref()
|
|
80
|
-
.ok_or_else(|| anyhow::anyhow!("DATABRICKS_HOST is required"))?;
|
|
81
|
-
let token = args
|
|
82
|
-
.databricks_token
|
|
83
|
-
.as_deref()
|
|
84
|
-
.ok_or_else(|| anyhow::anyhow!("DATABRICKS_TOKEN is required"))?;
|
|
85
|
-
|
|
86
|
-
// Determine cluster_id: None for serverless, Some for cluster mode
|
|
87
|
-
let cluster_id = if args.serverless {
|
|
88
|
-
info!("Initializing Databricks serverless mode");
|
|
89
|
-
None
|
|
90
|
-
} else if let Some(ref id) = args.databricks_cluster_id {
|
|
91
|
-
info!("Initializing Databricks cluster mode (cluster: {})", id);
|
|
92
|
-
Some(id.as_str())
|
|
93
|
-
} else {
|
|
94
|
-
info!("Initializing Databricks serverless mode (default)");
|
|
95
|
-
None
|
|
96
|
-
};
|
|
97
|
-
|
|
98
|
-
client
|
|
99
|
-
.connect(host, token, cluster_id)
|
|
100
|
-
.await
|
|
101
|
-
.map_err(|e| anyhow::anyhow!("Databricks connection failed: {}", e))?;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
let state = AppState::new(Some(client));
|
|
105
|
-
|
|
106
|
-
info!("Starting web server on http://localhost:{}", args.port);
|
|
107
|
-
info!("Open this URL in your browser to view the UI");
|
|
108
|
-
web_server::run(state, args.port).await
|
|
109
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/proto/spark/connect/expressions.proto
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/analysis/JoinAnalyzer.tsx
RENAMED
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/analysis/Reconciliation.tsx
RENAMED
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/analysis/SQLEditor.tsx
RENAMED
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/data/ColumnDropdown.tsx
RENAMED
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/data/ColumnStats.tsx
RENAMED
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/data/SchemaView.tsx
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/ERDConfigModal.tsx
RENAMED
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/ERDTableList.tsx
RENAMED
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/erd/ERDValidationPanel.tsx
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/ContextPanel.tsx
RENAMED
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/MainContent.tsx
RENAMED
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/layout/StatusBar.tsx
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/frontend/src/components/quality/AlertBuilder.tsx
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|