PyPI - mangleframes - Versions diffs - 0.3.4__tar.gz → 0.3.5__tar.gz - Mend

mangleframes 0.3.4tar.gz → 0.3.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

{mangleframes-0.3.4 → mangleframes-0.3.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mangleframes
-Version: 0.3.4
+Version: 0.3.5
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Rust
 Classifier: License :: OSI Approved :: MIT License

{mangleframes-0.3.4 → mangleframes-0.3.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "maturin"
 [project]
 name = "mangleframes"
-version = "0.3.4"
+version = "0.3.5"
 description = "PySpark DataFrame viewer with modern web UI"
 requires-python = ">=3.12"
 license = { text = "MIT" }

{mangleframes-0.3.4 → mangleframes-0.3.5}/python/mangleframes/__init__.py RENAMED Viewed

@@ -32,7 +32,7 @@ from .session import SparkSession, get_proxy_port, get_spark_session
 if TYPE_CHECKING:
     from pyspark.sql import DataFrame
-__version__ = "0.3.4"
+__version__ = "0.3.5"
 # Import alert classes for convenience (optional dependency)
 try:

{mangleframes-0.3.4 → mangleframes-0.3.5}/spark-connect/src/client.rs RENAMED Viewed

@@ -16,7 +16,8 @@ use uuid::Uuid;
 use crate::error::SparkConnectError;
 use crate::proto::spark_connect_service_client::SparkConnectServiceClient;
 use crate::proto::{
-    ExecutePlanRequest, Plan, Relation, Sql, UserContext,
+    ExecutePlanRequest, Plan, ReattachExecuteRequest, ReattachOptions, Relation, Sql, UserContext,
+    execute_plan_request::{RequestOption, request_option},
     execute_plan_response::ResponseType,
 };
@@ -247,26 +248,140 @@ impl SparkConnectClient {
             ClientInner::Proxy(c) => c.clone().execute_plan(request).await?,
         };
         let mut stream = response.into_inner();
         let mut batches = Vec::new();
-        let mut arrow_data = Vec::new();
         while let Some(resp) = stream.message().await? {
             if let Some(response_type) = resp.response_type {
                 if let ResponseType::ArrowBatch(batch) = response_type {
-                    arrow_data.extend_from_slice(&batch.data);
+                    // Each ArrowBatch is a complete IPC stream - parse it separately
+                    let parsed = parse_arrow_ipc(&batch.data)?;
+                    batches.extend(parsed);
                 }
             }
         }
-        if !arrow_data.is_empty() {
-            batches = parse_arrow_ipc(&arrow_data)?;
+        let elapsed_ms = start.elapsed().as_millis();
+        let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
+        info!(
+            "SQL executed in {}ms, {} rows returned",
+            elapsed_ms, row_count
+        );
+        if batches.is_empty() {
+            return Err(SparkConnectError::NoData);
+        }
+        Ok(batches)
+    }
+    /// Execute SQL with reattachable execution for large result sets (>10K rows).
+    /// Uses ReattachExecute RPC to continue fetching when server sends partial results.
+    pub async fn sql_reattachable(
+        &self,
+        query: &str,
+        limit: u32,
+    ) -> Result<Vec<RecordBatch>, SparkConnectError> {
+        let start = Instant::now();
+        info!("Executing reattachable SQL via Spark Connect: {}", query);
+        let sql_relation = Relation {
+            common: None,
+            rel_type: Some(crate::proto::relation::RelType::Sql(Sql {
+                query: query.to_string(),
+                args: Default::default(),
+                pos_args: vec![],
+                named_arguments: Default::default(),
+                pos_arguments: vec![],
+            })),
+        };
+        let relation = if limit < u32::MAX {
+            Relation {
+                common: None,
+                rel_type: Some(crate::proto::relation::RelType::Limit(Box::new(
+                    crate::proto::Limit {
+                        input: Some(Box::new(sql_relation)),
+                        limit: limit as i32,
+                    },
+                ))),
+            }
+        } else {
+            sql_relation
+        };
+        let plan = Plan {
+            op_type: Some(crate::proto::plan::OpType::Root(relation)),
+        };
+        let operation_id = Uuid::new_v4().to_string();
+        let reattach_option = RequestOption {
+            request_option: Some(request_option::RequestOption::ReattachOptions(
+                ReattachOptions { reattachable: true },
+            )),
+        };
+        let request = ExecutePlanRequest {
+            session_id: self.session_id.clone(),
+            user_context: Some(UserContext {
+                user_id: "spark-connect-rs".to_string(),
+                user_name: "spark-connect-rs".to_string(),
+                extensions: vec![],
+            }),
+            operation_id: Some(operation_id.clone()),
+            plan: Some(plan),
+            client_type: Some("spark-connect-rs".to_string()),
+            request_options: vec![reattach_option],
+            tags: vec![],
+            client_observed_server_side_session_id: None,
+        };
+        let mut batches = Vec::new();
+        let mut last_response_id: Option<String> = None;
+        let mut result_complete = false;
+        // Initial execution
+        info!("Starting reattachable execution for operation {}", operation_id);
+        let (complete, resp_id) = self
+            .process_execute_stream(request, &mut batches)
+            .await?;
+        result_complete = complete;
+        last_response_id = resp_id;
+        info!(
+            "Initial stream ended: result_complete={}, last_response_id={:?}, batches_count={}",
+            result_complete, last_response_id, batches.len()
+        );
+        // Reattach loop: continue fetching if ResultComplete was not received
+        while !result_complete {
+            info!(
+                "Reattaching to operation {} from response {:?}",
+                operation_id, last_response_id
+            );
+            let reattach_request = ReattachExecuteRequest {
+                session_id: self.session_id.clone(),
+                user_context: Some(UserContext {
+                    user_id: "spark-connect-rs".to_string(),
+                    user_name: "spark-connect-rs".to_string(),
+                    extensions: vec![],
+                }),
+                operation_id: operation_id.clone(),
+                client_type: Some("spark-connect-rs".to_string()),
+                last_response_id: last_response_id.clone(),
+                client_observed_server_side_session_id: None,
+            };
+            let (complete, resp_id) = self
+                .process_reattach_stream(reattach_request, &mut batches)
+                .await?;
+            result_complete = complete;
+            last_response_id = resp_id;
         }
         let elapsed_ms = start.elapsed().as_millis();
         let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
         info!(
-            "SQL executed in {}ms, {} rows returned",
+            "Reattachable SQL executed in {}ms, {} rows returned",
             elapsed_ms, row_count
         );
@@ -277,6 +392,74 @@ impl SparkConnectClient {
         Ok(batches)
     }
+    /// Process ExecutePlan response stream, returns (result_complete, last_response_id).
+    async fn process_execute_stream(
+        &self,
+        request: ExecutePlanRequest,
+        batches: &mut Vec<RecordBatch>,
+    ) -> Result<(bool, Option<String>), SparkConnectError> {
+        let response = match &self.inner {
+            ClientInner::Direct(c) => c.clone().execute_plan(request).await?,
+            ClientInner::Proxy(c) => c.clone().execute_plan(request).await?,
+        };
+        self.process_response_stream(response.into_inner(), batches)
+            .await
+    }
+    /// Process ReattachExecute response stream, returns (result_complete, last_response_id).
+    async fn process_reattach_stream(
+        &self,
+        request: ReattachExecuteRequest,
+        batches: &mut Vec<RecordBatch>,
+    ) -> Result<(bool, Option<String>), SparkConnectError> {
+        let response = match &self.inner {
+            ClientInner::Direct(c) => c.clone().reattach_execute(request).await?,
+            ClientInner::Proxy(c) => c.clone().reattach_execute(request).await?,
+        };
+        self.process_response_stream(response.into_inner(), batches)
+            .await
+    }
+    /// Process a response stream, collecting Arrow record batches.
+    /// Returns (result_complete, last_response_id).
+    async fn process_response_stream(
+        &self,
+        mut stream: tonic::Streaming<crate::proto::ExecutePlanResponse>,
+        batches: &mut Vec<RecordBatch>,
+    ) -> Result<(bool, Option<String>), SparkConnectError> {
+        let mut result_complete = false;
+        let mut last_response_id: Option<String> = None;
+        let mut batch_count = 0;
+        let mut total_rows = 0i64;
+        while let Some(resp) = stream.message().await? {
+            last_response_id = Some(resp.response_id.clone());
+            if let Some(response_type) = resp.response_type {
+                match response_type {
+                    ResponseType::ArrowBatch(batch) => {
+                        batch_count += 1;
+                        total_rows += batch.row_count;
+                        // Each ArrowBatch is a complete IPC stream - parse it separately
+                        let parsed = parse_arrow_ipc(&batch.data)?;
+                        batches.extend(parsed);
+                    }
+                    ResponseType::ResultComplete(_) => {
+                        info!("Received ResultComplete after {} batches, {} rows", batch_count, total_rows);
+                        result_complete = true;
+                    }
+                    _ => {}
+                }
+            }
+        }
+        info!(
+            "Stream ended: {} batches, {} rows, result_complete={}",
+            batch_count, total_rows, result_complete
+        );
+        Ok((result_complete, last_response_id))
+    }
     /// Execute SQL and return a single row as JSON Value.
     pub async fn sql_single_row(
         &self,

mangleframes-0.3.5/viewer/src/benchmark.rs ADDED Viewed

@@ -0,0 +1,242 @@
+//! Benchmark harness for measuring Spark Connect client performance.
+//!
+//! Measures query execution time across different row counts to compare
+//! against alternative approaches (Python subprocess, HTTP service).
+use std::time::Instant;
+use serde::{Deserialize, Serialize};
+use tracing::info;
+use crate::spark_client::DatabricksClient;
+use crate::sql_builder;
+/// Single benchmark iteration result.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct IterationResult {
+    pub iteration: usize,
+    pub rows_fetched: usize,
+    pub total_ms: u64,
+    pub rows_per_sec: f64,
+}
+/// Aggregated results for a single row count.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RowCountResult {
+    pub row_count: usize,
+    pub iterations: Vec<IterationResult>,
+    pub avg_ms: f64,
+    pub min_ms: u64,
+    pub max_ms: u64,
+    pub p50_ms: u64,
+    pub p95_ms: u64,
+    pub avg_rows_per_sec: f64,
+}
+/// Full benchmark suite results.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BenchmarkResults {
+    pub scenario: String,
+    pub table_name: String,
+    pub timestamp: String,
+    pub row_counts: Vec<RowCountResult>,
+}
+/// Configuration for benchmark run.
+#[derive(Debug, Clone)]
+pub struct BenchmarkConfig {
+    pub table_name: String,
+    pub row_counts: Vec<usize>,
+    pub iterations: usize,
+    pub warmup_iterations: usize,
+}
+impl Default for BenchmarkConfig {
+    fn default() -> Self {
+        Self {
+            table_name: String::new(),
+            row_counts: vec![1000, 10_000, 100_000],
+            iterations: 10,
+            warmup_iterations: 2,
+        }
+    }
+}
+/// Run benchmark suite against Databricks via Rust Spark Connect client.
+pub async fn run_benchmark(
+    client: &DatabricksClient,
+    config: &BenchmarkConfig,
+) -> Result<BenchmarkResults, String> {
+    info!(
+        "Starting Rust client benchmark: table={}, row_counts={:?}, iterations={}",
+        config.table_name, config.row_counts, config.iterations
+    );
+    let mut row_count_results = Vec::with_capacity(config.row_counts.len());
+    for &row_count in &config.row_counts {
+        info!("Benchmarking {} rows...", row_count);
+        // Warmup runs (not counted)
+        for i in 0..config.warmup_iterations {
+            info!("  Warmup {}/{}", i + 1, config.warmup_iterations);
+            let sql = sql_builder::select_data_sql(&config.table_name, row_count, 0);
+            if let Err(e) = client.execute_sql_reattachable(&sql, row_count).await {
+                return Err(format!("Warmup query failed: {}", e));
+            }
+        }
+        // Actual benchmark iterations (use reattachable for >10K rows)
+        let mut iterations = Vec::with_capacity(config.iterations);
+        for i in 0..config.iterations {
+            let sql = sql_builder::select_data_sql(&config.table_name, row_count, 0);
+            let start = Instant::now();
+            let response = client
+                .execute_sql_reattachable(&sql, row_count)
+                .await
+                .map_err(|e| format!("Query failed: {}", e))?;
+            let total_ms = start.elapsed().as_millis() as u64;
+            let rows_fetched = response.row_count as usize;
+            let rows_per_sec = if total_ms > 0 {
+                (rows_fetched as f64) / (total_ms as f64 / 1000.0)
+            } else {
+                0.0
+            };
+            info!(
+                "  Iteration {}/{}: {}ms, {} rows, {:.0} rows/sec",
+                i + 1,
+                config.iterations,
+                total_ms,
+                rows_fetched,
+                rows_per_sec
+            );
+            iterations.push(IterationResult {
+                iteration: i + 1,
+                rows_fetched,
+                total_ms,
+                rows_per_sec,
+            });
+        }
+        let result = aggregate_iterations(&iterations, row_count);
+        info!(
+            "  Summary: avg={}ms, p50={}ms, p95={}ms, {:.0} rows/sec",
+            result.avg_ms as u64, result.p50_ms, result.p95_ms, result.avg_rows_per_sec
+        );
+        row_count_results.push(result);
+    }
+    Ok(BenchmarkResults {
+        scenario: "rust_direct".to_string(),
+        table_name: config.table_name.clone(),
+        timestamp: chrono::Utc::now().to_rfc3339(),
+        row_counts: row_count_results,
+    })
+}
+fn aggregate_iterations(iterations: &[IterationResult], row_count: usize) -> RowCountResult {
+    let mut timings: Vec<u64> = iterations.iter().map(|i| i.total_ms).collect();
+    timings.sort_unstable();
+    let sum_ms: u64 = timings.iter().sum();
+    let avg_ms = sum_ms as f64 / timings.len() as f64;
+    let min_ms = *timings.first().unwrap_or(&0);
+    let max_ms = *timings.last().unwrap_or(&0);
+    let p50_ms = percentile(&timings, 50);
+    let p95_ms = percentile(&timings, 95);
+    // Use actual rows_fetched, not the requested row_count
+    let total_rows_fetched: usize = iterations.iter().map(|i| i.rows_fetched).sum();
+    let avg_rows_fetched = total_rows_fetched as f64 / iterations.len() as f64;
+    let avg_rows_per_sec = if avg_ms > 0.0 {
+        avg_rows_fetched / (avg_ms / 1000.0)
+    } else {
+        0.0
+    };
+    RowCountResult {
+        row_count,
+        iterations: iterations.to_vec(),
+        avg_ms,
+        min_ms,
+        max_ms,
+        p50_ms,
+        p95_ms,
+        avg_rows_per_sec,
+    }
+}
+fn percentile(sorted: &[u64], p: usize) -> u64 {
+    if sorted.is_empty() {
+        return 0;
+    }
+    let idx = (p * sorted.len() / 100).min(sorted.len() - 1);
+    sorted[idx]
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_percentile_empty() {
+        assert_eq!(percentile(&[], 50), 0);
+    }
+    #[test]
+    fn test_percentile_single() {
+        assert_eq!(percentile(&[100], 50), 100);
+    }
+    #[test]
+    fn test_percentile_multiple() {
+        let sorted: Vec<u64> = (1..=100).collect();
+        assert!(percentile(&sorted, 50) >= 50);
+        assert!(percentile(&sorted, 95) >= 95);
+    }
+    #[test]
+    fn test_aggregate_iterations() {
+        let iterations = vec![
+            IterationResult {
+                iteration: 1,
+                rows_fetched: 1000,
+                total_ms: 100,
+                rows_per_sec: 10000.0,
+            },
+            IterationResult {
+                iteration: 2,
+                rows_fetched: 1000,
+                total_ms: 120,
+                rows_per_sec: 8333.0,
+            },
+            IterationResult {
+                iteration: 3,
+                rows_fetched: 1000,
+                total_ms: 110,
+                rows_per_sec: 9090.0,
+            },
+        ];
+        let result = aggregate_iterations(&iterations, 1000);
+        assert_eq!(result.row_count, 1000);
+        assert_eq!(result.min_ms, 100);
+        assert_eq!(result.max_ms, 120);
+        assert!((result.avg_ms - 110.0).abs() < 0.1);
+    }
+    #[test]
+    fn test_benchmark_config_default() {
+        let config = BenchmarkConfig::default();
+        assert_eq!(config.row_counts, vec![1000, 10_000, 100_000]);
+        assert_eq!(config.iterations, 10);
+        assert_eq!(config.warmup_iterations, 2);
+    }
+}

mangleframes-0.3.5/viewer/src/main.rs ADDED Viewed

@@ -0,0 +1,194 @@
+//! MangleFrames Viewer - Web-based PySpark DataFrame viewer.
+mod alert_handlers;
+mod arrow_reader;
+mod benchmark;
+mod dashboard;
+mod export;
+mod handlers;
+mod history_analysis;
+mod history_handlers;
+mod join_handlers;
+mod perf;
+mod reconcile_handlers;
+mod spark_client;
+mod sql_builder;
+mod stats;
+#[cfg(test)]
+mod test_helpers;
+mod web_server;
+mod websocket;
+use std::sync::Arc;
+use clap::{Parser, Subcommand};
+use tracing::info;
+use tracing_subscriber::EnvFilter;
+use crate::web_server::AppState;
+#[derive(Parser)]
+#[command(name = "mangleframes-viewer")]
+#[command(about = "Web-based DataFrame viewer via Spark Connect")]
+struct Args {
+    /// Web server port
+    #[arg(short, long, default_value = "8765", global = true)]
+    port: u16,
+    /// Connect via Spark Connect proxy (e.g., sc://localhost:15002)
+    #[arg(long, global = true)]
+    proxy_url: Option<String>,
+    /// Databricks workspace host (not needed when using --proxy-url)
+    #[arg(long, env = "DATABRICKS_HOST", global = true)]
+    databricks_host: Option<String>,
+    /// Databricks personal access token (not needed when using --proxy-url)
+    #[arg(long, env = "DATABRICKS_TOKEN", global = true)]
+    databricks_token: Option<String>,
+    /// Databricks cluster ID (for cluster mode)
+    #[arg(long, env = "DATABRICKS_CLUSTER_ID", global = true)]
+    databricks_cluster_id: Option<String>,
+    /// Use Databricks serverless compute (no cluster ID needed)
+    #[arg(long, global = true)]
+    serverless: bool,
+    #[command(subcommand)]
+    command: Option<Command>,
+}
+#[derive(Subcommand)]
+enum Command {
+    /// Run performance benchmark against Databricks
+    Benchmark {
+        /// Table name to benchmark (e.g., catalog.schema.table)
+        #[arg(short, long)]
+        table: String,
+        /// Row counts to test (comma-separated)
+        #[arg(short, long, default_value = "1000,10000,100000")]
+        row_counts: String,
+        /// Number of iterations per row count
+        #[arg(short, long, default_value = "10")]
+        iterations: usize,
+        /// Number of warmup iterations (not counted)
+        #[arg(short, long, default_value = "2")]
+        warmup: usize,
+        /// Output file for JSON results
+        #[arg(short, long)]
+        output: Option<String>,
+    },
+}
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    tracing_subscriber::fmt()
+        .with_env_filter(EnvFilter::from_default_env())
+        .init();
+    let args = Args::parse();
+    let client = Arc::new(spark_client::DatabricksClient::new());
+    // Connect via proxy or directly to Databricks
+    if let Some(ref proxy_url) = args.proxy_url {
+        info!("Connecting via Spark Connect proxy at {}", proxy_url);
+        client
+            .connect_via_proxy(proxy_url)
+            .await
+            .map_err(|e| anyhow::anyhow!("Proxy connection failed: {}", e))?;
+    } else {
+        // Direct Databricks connection requires host and token
+        let host = args
+            .databricks_host
+            .as_deref()
+            .ok_or_else(|| anyhow::anyhow!("DATABRICKS_HOST is required"))?;
+        let token = args
+            .databricks_token
+            .as_deref()
+            .ok_or_else(|| anyhow::anyhow!("DATABRICKS_TOKEN is required"))?;
+        // Determine cluster_id: None for serverless, Some for cluster mode
+        let cluster_id = if args.serverless {
+            info!("Initializing Databricks serverless mode");
+            None
+        } else if let Some(ref id) = args.databricks_cluster_id {
+            info!("Initializing Databricks cluster mode (cluster: {})", id);
+            Some(id.as_str())
+        } else {
+            info!("Initializing Databricks serverless mode (default)");
+            None
+        };
+        client
+            .connect(host, token, cluster_id)
+            .await
+            .map_err(|e| anyhow::anyhow!("Databricks connection failed: {}", e))?;
+    }
+    // Handle subcommand or default to web server
+    match args.command {
+        Some(Command::Benchmark {
+            table,
+            row_counts,
+            iterations,
+            warmup,
+            output,
+        }) => {
+            run_benchmark_command(&client, &table, &row_counts, iterations, warmup, output).await
+        }
+        None => {
+            let state = AppState::new(Some(client));
+            info!("Starting web server on http://localhost:{}", args.port);
+            info!("Open this URL in your browser to view the UI");
+            web_server::run(state, args.port).await
+        }
+    }
+}
+async fn run_benchmark_command(
+    client: &spark_client::DatabricksClient,
+    table: &str,
+    row_counts_str: &str,
+    iterations: usize,
+    warmup: usize,
+    output: Option<String>,
+) -> anyhow::Result<()> {
+    let row_counts: Vec<usize> = row_counts_str
+        .split(',')
+        .filter_map(|s| s.trim().parse().ok())
+        .collect();
+    if row_counts.is_empty() {
+        return Err(anyhow::anyhow!("No valid row counts provided"));
+    }
+    info!("Running benchmark: table={}, row_counts={:?}", table, row_counts);
+    let config = benchmark::BenchmarkConfig {
+        table_name: table.to_string(),
+        row_counts,
+        iterations,
+        warmup_iterations: warmup,
+    };
+    let results = benchmark::run_benchmark(client, &config)
+        .await
+        .map_err(|e| anyhow::anyhow!("Benchmark failed: {}", e))?;
+    let json = serde_json::to_string_pretty(&results)?;
+    if let Some(path) = output {
+        std::fs::write(&path, &json)?;
+        info!("Results written to {}", path);
+    } else {
+        println!("{}", json);
+    }
+    Ok(())
+}

{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/spark_client.rs RENAMED Viewed

@@ -103,6 +103,36 @@ impl DatabricksClient {
         })
     }
+    /// Execute SQL with reattachable execution for large result sets (>10K rows).
+    /// Uses ReattachExecute RPC to ensure complete results.
+    pub async fn execute_sql_reattachable(
+        &self,
+        query: &str,
+        limit: usize,
+    ) -> Result<SqlResponse, SparkConnectError> {
+        let guard = self.client.read().await;
+        let client = guard
+            .as_ref()
+            .ok_or_else(|| SparkConnectError::Config("Not connected".to_string()))?;
+        let start = Instant::now();
+        let batches = client.sql_reattachable(query, limit as u32).await?;
+        let execution_ms = start.elapsed().as_millis() as u64;
+        let row_count: u64 = batches.iter().map(|b| b.num_rows() as u64).sum();
+        info!(
+            "Reattachable SQL executed via Spark Connect in {}ms, {} rows",
+            execution_ms, row_count
+        );
+        Ok(SqlResponse {
+            batches,
+            row_count,
+            execution_ms,
+        })
+    }
     /// Register Arrow batches as a temporary view in Spark.
     pub async fn create_temp_view(
         &self,

{mangleframes-0.3.4 → mangleframes-0.3.5}/viewer/src/sql_builder.rs RENAMED Viewed

@@ -3,8 +3,12 @@
 use serde_json::Value;
 /// Quote an identifier to prevent SQL injection.
+/// Handles multi-part names (catalog.schema.table) by quoting each part separately.
 pub fn quote_identifier(name: &str) -> String {
-    format!("`{}`", name.replace('`', "``"))
+    name.split('.')
+        .map(|part| format!("`{}`", part.replace('`', "``")))
+        .collect::<Vec<_>>()
+        .join(".")
 }
 /// Quote multiple identifiers and join with commas.
@@ -1279,7 +1283,18 @@ mod tests {
     #[test]
     fn test_quote_identifier_special_chars() {
         assert_eq!(quote_identifier("col-name"), "`col-name`");
-        assert_eq!(quote_identifier("col.name"), "`col.name`");
+    }
+    #[test]
+    fn test_quote_identifier_multipart_names() {
+        assert_eq!(quote_identifier("catalog.schema.table"), "`catalog`.`schema`.`table`");
+        assert_eq!(quote_identifier("schema.table"), "`schema`.`table`");
+        assert_eq!(quote_identifier("simple"), "`simple`");
+    }
+    #[test]
+    fn test_quote_identifier_multipart_with_backticks() {
+        assert_eq!(quote_identifier("cat`alog.sch`ema.tab`le"), "`cat``alog`.`sch``ema`.`tab``le`");
     }
     // ============ quote_identifiers tests ============
@@ -1326,7 +1341,7 @@ mod tests {
     #[test]
     fn test_describe_table_sql_qualified_name() {
         let sql = describe_table_sql("catalog.schema.table");
-        assert_eq!(sql, "DESCRIBE TABLE `catalog.schema.table`");
+        assert_eq!(sql, "DESCRIBE TABLE `catalog`.`schema`.`table`");
     }
     // ============ select_data_sql tests ============

mangleframes-0.3.4/viewer/src/main.rs DELETED Viewed

@@ -1,109 +0,0 @@
-//! MangleFrames Viewer - Web-based PySpark DataFrame viewer.
-mod alert_handlers;
-mod arrow_reader;
-mod dashboard;
-mod export;
-mod handlers;
-mod history_analysis;
-mod history_handlers;
-mod join_handlers;
-mod perf;
-mod reconcile_handlers;
-mod spark_client;
-mod sql_builder;
-mod stats;
-#[cfg(test)]
-mod test_helpers;
-mod web_server;
-mod websocket;
-use std::sync::Arc;
-use clap::Parser;
-use tracing::info;
-use tracing_subscriber::EnvFilter;
-use crate::web_server::AppState;
-#[derive(Parser)]
-#[command(name = "mangleframes-viewer")]
-#[command(about = "Web-based DataFrame viewer via Spark Connect")]
-struct Args {
-    /// Web server port
-    #[arg(short, long, default_value = "8765")]
-    port: u16,
-    /// Connect via Spark Connect proxy (e.g., sc://localhost:15002)
-    #[arg(long)]
-    proxy_url: Option<String>,
-    /// Databricks workspace host (not needed when using --proxy-url)
-    #[arg(long, env = "DATABRICKS_HOST")]
-    databricks_host: Option<String>,
-    /// Databricks personal access token (not needed when using --proxy-url)
-    #[arg(long, env = "DATABRICKS_TOKEN")]
-    databricks_token: Option<String>,
-    /// Databricks cluster ID (for cluster mode)
-    #[arg(long, env = "DATABRICKS_CLUSTER_ID")]
-    databricks_cluster_id: Option<String>,
-    /// Use Databricks serverless compute (no cluster ID needed)
-    #[arg(long)]
-    serverless: bool,
-}
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    tracing_subscriber::fmt()
-        .with_env_filter(EnvFilter::from_default_env())
-        .init();
-    let args = Args::parse();
-    let client = Arc::new(spark_client::DatabricksClient::new());
-    // Connect via proxy or directly to Databricks
-    if let Some(ref proxy_url) = args.proxy_url {
-        info!("Connecting via Spark Connect proxy at {}", proxy_url);
-        client
-            .connect_via_proxy(proxy_url)
-            .await
-            .map_err(|e| anyhow::anyhow!("Proxy connection failed: {}", e))?;
-    } else {
-        // Direct Databricks connection requires host and token
-        let host = args
-            .databricks_host
-            .as_deref()
-            .ok_or_else(|| anyhow::anyhow!("DATABRICKS_HOST is required"))?;
-        let token = args
-            .databricks_token
-            .as_deref()
-            .ok_or_else(|| anyhow::anyhow!("DATABRICKS_TOKEN is required"))?;
-        // Determine cluster_id: None for serverless, Some for cluster mode
-        let cluster_id = if args.serverless {
-            info!("Initializing Databricks serverless mode");
-            None
-        } else if let Some(ref id) = args.databricks_cluster_id {
-            info!("Initializing Databricks cluster mode (cluster: {})", id);
-            Some(id.as_str())
-        } else {
-            info!("Initializing Databricks serverless mode (default)");
-            None
-        };
-        client
-            .connect(host, token, cluster_id)
-            .await
-            .map_err(|e| anyhow::anyhow!("Databricks connection failed: {}", e))?;
-    }
-    let state = AppState::new(Some(client));
-    info!("Starting web server on http://localhost:{}", args.port);
-    info!("Open this URL in your browser to view the UI");
-    web_server::run(state, args.port).await
-}