mangleframes 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {mangleframes-0.1.1 → mangleframes-0.1.2}/PKG-INFO +1 -1
  2. {mangleframes-0.1.1 → mangleframes-0.1.2}/pyproject.toml +1 -1
  3. {mangleframes-0.1.1 → mangleframes-0.1.2}/python/mangleframes/__init__.py +4 -1
  4. {mangleframes-0.1.1 → mangleframes-0.1.2}/python/mangleframes/protocol.py +53 -19
  5. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/src/main.rs +15 -2
  6. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/static/app.js +3 -3
  7. {mangleframes-0.1.1 → mangleframes-0.1.2}/Cargo.lock +0 -0
  8. {mangleframes-0.1.1 → mangleframes-0.1.2}/Cargo.toml +0 -0
  9. {mangleframes-0.1.1 → mangleframes-0.1.2}/python/mangleframes/launcher.py +0 -0
  10. {mangleframes-0.1.1 → mangleframes-0.1.2}/python/mangleframes/server.py +0 -0
  11. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/Cargo.toml +0 -0
  12. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/src/arrow_reader.rs +0 -0
  13. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/src/export.rs +0 -0
  14. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/src/handlers.rs +0 -0
  15. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/src/query_engine.rs +0 -0
  16. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/src/socket_client.rs +0 -0
  17. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/src/stats.rs +0 -0
  18. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/src/web_server.rs +0 -0
  19. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/src/websocket.rs +0 -0
  20. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/static/index.html +0 -0
  21. {mangleframes-0.1.1 → mangleframes-0.1.2}/viewer/static/style.css +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mangleframes
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Classifier: Programming Language :: Python :: 3
5
5
  Classifier: Programming Language :: Rust
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "mangleframes"
7
- version = "0.1.1"
7
+ version = "0.1.2"
8
8
  description = "PySpark DataFrame viewer with modern web UI"
9
9
  requires-python = ">=3.9"
10
10
  license = { text = "MIT" }
@@ -5,12 +5,13 @@ import time
5
5
  from typing import TYPE_CHECKING
6
6
 
7
7
  from .launcher import launch_viewer, open_browser
8
+ from .protocol import clear_stats_cache
8
9
  from .server import DataFrameServer
9
10
 
10
11
  if TYPE_CHECKING:
11
12
  from pyspark.sql import DataFrame
12
13
 
13
- __version__ = "0.1.1"
14
+ __version__ = "0.1.2"
14
15
  __all__ = ["register", "unregister", "show"]
15
16
 
16
17
  _registry: dict[str, DataFrame] = {}
@@ -23,6 +24,7 @@ def register(name: str, df: DataFrame) -> None:
23
24
  global _server
24
25
 
25
26
  _registry[name] = df
27
+ clear_stats_cache(name) # Invalidate cached stats for this name
26
28
 
27
29
  if _server is None:
28
30
  _server = DataFrameServer(_registry)
@@ -33,6 +35,7 @@ def unregister(name: str) -> None:
33
35
  """Remove a DataFrame from the viewer."""
34
36
  if name in _registry:
35
37
  del _registry[name]
38
+ clear_stats_cache(name)
36
39
 
37
40
 
38
41
  def show(port: int = 8765, block: bool = True) -> None:
@@ -6,6 +6,7 @@ import struct
6
6
  from typing import TYPE_CHECKING, Any
7
7
 
8
8
  import pyarrow as pa
9
+ from pyspark.sql import functions as F
9
10
 
10
11
  if TYPE_CHECKING:
11
12
  from pyspark.sql import DataFrame
@@ -13,6 +14,17 @@ if TYPE_CHECKING:
13
14
  STATUS_OK = 0
14
15
  STATUS_ERROR = 1
15
16
 
17
+ # Cache for computed stats (cleared when DataFrame is re-registered)
18
+ _stats_cache: dict[str, dict] = {}
19
+
20
+
21
+ def clear_stats_cache(name: str | None = None) -> None:
22
+ """Clear cached stats for a DataFrame or all DataFrames."""
23
+ if name is None:
24
+ _stats_cache.clear()
25
+ elif name in _stats_cache:
26
+ del _stats_cache[name]
27
+
16
28
 
17
29
  def encode_response(status: int, payload: bytes) -> bytes:
18
30
  """Encode response with status and length prefix."""
@@ -66,37 +78,59 @@ def handle_get(registry: dict[str, DataFrame], name: str, limit: int) -> bytes:
66
78
  return encode_response(STATUS_OK, sink.getvalue().to_pybytes())
67
79
 
68
80
 
81
+ def _is_numeric_type(dtype_str: str) -> bool:
82
+ """Check if a Spark type string represents a numeric type."""
83
+ dtype_lower = dtype_str.lower()
84
+ return any(t in dtype_lower for t in ["int", "long", "double", "float", "decimal"])
85
+
86
+
69
87
  def handle_stats(registry: dict[str, DataFrame], name: str) -> bytes:
70
- """Return basic statistics for a DataFrame."""
88
+ """Return basic statistics for a DataFrame using single aggregation."""
71
89
  if name not in registry:
72
90
  return encode_error(f"DataFrame '{name}' not found")
73
91
 
92
+ # Return cached stats if available
93
+ if name in _stats_cache:
94
+ return encode_json_response(_stats_cache[name])
95
+
74
96
  df = registry[name]
75
- row_count = df.count()
97
+ fields = df.schema.fields
76
98
 
99
+ # Build all aggregation expressions in one pass
100
+ agg_exprs = [F.count(F.lit(1)).alias("__total")]
101
+ for field in fields:
102
+ col_name = field.name
103
+ agg_exprs.append(
104
+ F.sum(F.when(F.col(col_name).isNull(), 1).otherwise(0)).alias(f"{col_name}__nulls")
105
+ )
106
+ if _is_numeric_type(str(field.dataType)):
107
+ agg_exprs.append(F.min(col_name).alias(f"{col_name}__min"))
108
+ agg_exprs.append(F.max(col_name).alias(f"{col_name}__max"))
109
+
110
+ # Single Spark action
111
+ result = df.agg(*agg_exprs).collect()[0]
112
+ row_count = result["__total"]
113
+
114
+ # Extract stats from result
77
115
  column_stats = []
78
- for field in df.schema.fields:
116
+ for field in fields:
79
117
  col_name = field.name
80
- stats = {"name": col_name, "type": str(field.dataType), "nullable": field.nullable}
81
-
82
- null_count_row = df.filter(df[col_name].isNull()).count()
83
- stats["null_count"] = null_count_row
118
+ dtype_str = str(field.dataType)
119
+ stats = {"name": col_name, "type": dtype_str, "nullable": field.nullable}
120
+ stats["null_count"] = result[f"{col_name}__nulls"] or 0
84
121
 
85
- dtype = str(field.dataType).lower()
86
- if any(t in dtype for t in ["int", "long", "double", "float", "decimal"]):
87
- agg_result = df.agg({col_name: "min"}).collect()[0][0]
88
- stats["min"] = str(agg_result) if agg_result is not None else None
89
-
90
- agg_result = df.agg({col_name: "max"}).collect()[0][0]
91
- stats["max"] = str(agg_result) if agg_result is not None else None
122
+ if _is_numeric_type(dtype_str):
123
+ min_val = result[f"{col_name}__min"]
124
+ max_val = result[f"{col_name}__max"]
125
+ stats["min"] = str(min_val) if min_val is not None else None
126
+ stats["max"] = str(max_val) if max_val is not None else None
92
127
 
93
128
  column_stats.append(stats)
94
129
 
95
- return encode_json_response({
96
- "name": name,
97
- "row_count": row_count,
98
- "columns": column_stats,
99
- })
130
+ stats_data = {"name": name, "row_count": row_count, "columns": column_stats}
131
+ _stats_cache[name] = stats_data # Cache for future requests
132
+
133
+ return encode_json_response(stats_data)
100
134
 
101
135
 
102
136
  def dispatch_command(
@@ -17,7 +17,7 @@ use tracing::info;
17
17
  use tracing_subscriber::EnvFilter;
18
18
 
19
19
  use crate::socket_client::SocketClient;
20
- use crate::web_server::AppState;
20
+ use crate::web_server::{AppState, CachedFrame};
21
21
 
22
22
  #[derive(Parser)]
23
23
  #[command(name = "mangleframes-viewer")]
@@ -43,7 +43,20 @@ async fn main() -> anyhow::Result<()> {
43
43
 
44
44
  info!("Connecting to Python server at {:?}", args.socket);
45
45
  let client = Arc::new(SocketClient::new(&args.socket));
46
- let state = AppState::new(client);
46
+ let state = AppState::new(client.clone());
47
+
48
+ // Preload first frame into cache for instant display
49
+ if let Ok(frames) = client.list_frames() {
50
+ if let Some(first) = frames.first() {
51
+ info!("Preloading frame: {}", first);
52
+ if let Ok(data) = client.get_frame(first, 1000) {
53
+ if let Ok(batches) = arrow_reader::parse_arrow_stream(&data) {
54
+ let mut cache = state.cache.write().await;
55
+ cache.insert(first.clone(), CachedFrame { batches, stats: None });
56
+ }
57
+ }
58
+ }
59
+ }
47
60
 
48
61
  if !args.no_browser {
49
62
  let url = format!("http://localhost:{}", args.port);
@@ -297,9 +297,9 @@ function init() {
297
297
  state.offset = 0;
298
298
  state.sortCol = null;
299
299
 
300
- await loadSchema(name);
301
- await loadData(name);
302
- loadStats(name);
300
+ // Fetch schema and data in parallel for faster display
301
+ await Promise.all([loadSchema(name), loadData(name)]);
302
+ loadStats(name); // Fire-and-forget, updates UI when ready
303
303
  };
304
304
 
305
305
  $('refresh-btn').onclick = () => {
File without changes
File without changes