figpack 0.2.37__py3-none-any.whl → 0.2.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of figpack might be problematic. Click here for more details.

figpack/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  figpack - A Python package for creating shareable, interactive visualizations in the browser
3
3
  """
4
4
 
5
- __version__ = "0.2.37"
5
+ __version__ = "0.2.39"
6
6
 
7
7
  from .cli import view_figure
8
8
  from .core import FigpackView, FigpackExtension, ExtensionView
@@ -9,6 +9,7 @@ from .figpack_view import FigpackView
9
9
  from .figpack_extension import FigpackExtension
10
10
  from .extension_view import ExtensionView
11
11
  from .zarr import Group, _check_zarr_version
12
+ from ._zarr_consolidate import consolidate_zarr_chunks
12
13
 
13
14
  thisdir = pathlib.Path(__file__).parent.resolve()
14
15
 
@@ -72,12 +73,15 @@ def prepare_figure_bundle(
72
73
  # Generate extension manifest
73
74
  _write_extension_manifest(required_extensions, tmpdir)
74
75
 
76
+ # Create the .zmetadata file
75
77
  zarr.consolidate_metadata(zarr_group._zarr_group.store)
76
78
 
77
79
  # It's important that we remove all the metadata files except for the
78
- # consolidated one, because otherwise we may get inconstencies
79
- # once we start editing the zarr data from the browser.
80
+ # consolidated one so there is a single source of truth.
80
81
  _remove_metadata_files_except_consolidated(pathlib.Path(tmpdir) / "data.zarr")
82
+
83
+ # Consolidate zarr chunks into larger files to reduce upload count
84
+ consolidate_zarr_chunks(pathlib.Path(tmpdir) / "data.zarr")
81
85
  finally:
82
86
  if _check_zarr_version() == 3:
83
87
  zarr.config.set({"default_zarr_format": old_default_zarr_format}) # type: ignore
@@ -42,6 +42,9 @@ class FileUploadCORSRequestHandler(CORSRequestHandler):
42
42
  "Accept-Ranges, Content-Encoding, Content-Length, Content-Range",
43
43
  )
44
44
 
45
+ # Always send Accept-Ranges header to indicate byte-range support
46
+ self.send_header("Accept-Ranges", "bytes")
47
+
45
48
  # Prevent browser caching - important for when we are editing figures in place
46
49
  # This ensures the browser always fetches the latest version of files
47
50
  self.send_header("Cache-Control", "no-cache, no-store, must-revalidate")
@@ -29,6 +29,9 @@ class CORSRequestHandler(SimpleHTTPRequestHandler):
29
29
  "Accept-Ranges, Content-Encoding, Content-Length, Content-Range",
30
30
  )
31
31
 
32
+ # Always send Accept-Ranges header to indicate byte-range support
33
+ self.send_header("Accept-Ranges", "bytes")
34
+
32
35
  # Prevent browser caching - important for when we are editing figures in place
33
36
  # This ensures the browser always fetches the latest version of files
34
37
  self.send_header("Cache-Control", "no-cache, no-store, must-revalidate")
@@ -45,6 +48,99 @@ class CORSRequestHandler(SimpleHTTPRequestHandler):
45
48
  """Reject PUT requests when file upload is not enabled."""
46
49
  self.send_error(405, "Method Not Allowed")
47
50
 
51
+ def do_GET(self):
52
+ """Handle GET requests with support for Range requests."""
53
+ # Translate path and check if file exists
54
+ path = self.translate_path(self.path)
55
+
56
+ # Check if path is a file
57
+ if not os.path.isfile(path):
58
+ # Let parent class handle directories and 404s
59
+ return super().do_GET()
60
+
61
+ # Check for Range header
62
+ range_header = self.headers.get("Range")
63
+
64
+ if range_header is None:
65
+ # No range request, use parent's implementation
66
+ return super().do_GET()
67
+
68
+ # Parse range header
69
+ try:
70
+ # Range header format: "bytes=start-end"
71
+ if not range_header.startswith("bytes="):
72
+ # Invalid range format, ignore and serve full file
73
+ return super().do_GET()
74
+
75
+ range_spec = range_header[6:] # Remove "bytes=" prefix
76
+
77
+ # Get file size
78
+ file_size = os.path.getsize(path)
79
+
80
+ # Parse range specification
81
+ if "-" not in range_spec:
82
+ # Invalid format
83
+ self.send_error(400, "Invalid Range header")
84
+ return
85
+
86
+ range_parts = range_spec.split("-", 1)
87
+
88
+ # Determine start and end positions
89
+ if range_parts[0]: # Start position specified
90
+ start = int(range_parts[0])
91
+ if range_parts[1]: # End position also specified
92
+ end = int(range_parts[1])
93
+ else: # Open-ended range (e.g., "1024-")
94
+ end = file_size - 1
95
+ else: # Suffix range (e.g., "-500" means last 500 bytes)
96
+ if not range_parts[1]:
97
+ self.send_error(400, "Invalid Range header")
98
+ return
99
+ suffix_length = int(range_parts[1])
100
+ start = max(0, file_size - suffix_length)
101
+ end = file_size - 1
102
+
103
+ # Validate range
104
+ if start < 0 or end >= file_size or start > end:
105
+ self.send_response(416, "Range Not Satisfiable")
106
+ self.send_header("Content-Range", f"bytes */{file_size}")
107
+ self.end_headers()
108
+ return
109
+
110
+ # Calculate content length
111
+ content_length = end - start + 1
112
+
113
+ # Guess content type
114
+ import mimetypes
115
+
116
+ content_type = mimetypes.guess_type(path)[0] or "application/octet-stream"
117
+
118
+ # Send 206 Partial Content response
119
+ self.send_response(206, "Partial Content")
120
+ self.send_header("Content-Type", content_type)
121
+ self.send_header("Content-Length", str(content_length))
122
+ self.send_header("Content-Range", f"bytes {start}-{end}/{file_size}")
123
+ self.end_headers()
124
+
125
+ # Send the requested byte range
126
+ with open(path, "rb") as f:
127
+ f.seek(start)
128
+ remaining = content_length
129
+ while remaining > 0:
130
+ chunk_size = min(8192, remaining)
131
+ chunk = f.read(chunk_size)
132
+ if not chunk:
133
+ break
134
+ self.wfile.write(chunk)
135
+ remaining -= len(chunk)
136
+
137
+ except ValueError:
138
+ # Invalid range values
139
+ self.send_error(400, "Invalid Range header")
140
+ except Exception as e:
141
+ # Log error and return 500
142
+ self.send_error(500, f"Internal Server Error: {str(e)}")
143
+
48
144
  def log_message(self, format, *args):
49
145
  pass
50
146
 
@@ -0,0 +1,185 @@
1
+ import os
2
+ import pathlib
3
+ import json
4
+ from typing import Dict, List, Tuple
5
+
6
+
7
+ def consolidate_zarr_chunks(
8
+ zarr_dir: pathlib.Path, max_file_size: int = 100_000_000
9
+ ) -> None:
10
+ """
11
+ Consolidate zarr chunk files into larger files to reduce the number of files
12
+ that need to be uploaded. Updates the .zmetadata file with refs mapping.
13
+
14
+ Args:
15
+ zarr_dir: Path to the zarr directory
16
+ max_file_size: Maximum size for each consolidated file in bytes (default: 100 MB)
17
+ """
18
+ if not zarr_dir.is_dir():
19
+ raise ValueError(f"Expected a directory, got: {zarr_dir}")
20
+
21
+ # Read the existing .zmetadata file
22
+ zmetadata_path = zarr_dir / ".zmetadata"
23
+ if not zmetadata_path.exists():
24
+ raise ValueError(f"No .zmetadata file found at {zmetadata_path}")
25
+
26
+ with open(zmetadata_path, "r") as f:
27
+ zmetadata = json.load(f)
28
+
29
+ # Collect all chunk files (non-metadata files)
30
+ chunk_files = _collect_chunk_files(zarr_dir)
31
+
32
+ if not chunk_files:
33
+ # No chunk files to consolidate
34
+ return
35
+
36
+ # Group chunk files into consolidated files
37
+ consolidated_groups = _group_files_by_size(chunk_files, max_file_size)
38
+
39
+ # Create consolidated files and build refs mapping
40
+ refs: Dict[str, List] = {}
41
+ for group_idx, file_group in enumerate(consolidated_groups):
42
+ consolidated_filename = f"_consolidated_{group_idx}.dat"
43
+ consolidated_path = zarr_dir / consolidated_filename
44
+
45
+ # Write the consolidated file and track byte offsets
46
+ current_offset = 0
47
+ with open(consolidated_path, "wb") as consolidated_file:
48
+ for file_path, relative_path in file_group:
49
+ # Read the chunk file
50
+ with open(file_path, "rb") as chunk_file:
51
+ chunk_data = chunk_file.read()
52
+
53
+ # Write to consolidated file
54
+ consolidated_file.write(chunk_data)
55
+
56
+ # Add to refs mapping
57
+ refs[relative_path] = [
58
+ consolidated_filename,
59
+ current_offset,
60
+ len(chunk_data),
61
+ ]
62
+
63
+ # Update offset
64
+ current_offset += len(chunk_data)
65
+
66
+ # Update .zmetadata with refs
67
+ zmetadata["refs"] = refs
68
+
69
+ # Write updated .zmetadata
70
+ with open(zmetadata_path, "w") as f:
71
+ json.dump(zmetadata, f, indent=2)
72
+
73
+ # Delete original chunk files
74
+ for file_path, _ in chunk_files:
75
+ try:
76
+ file_path.unlink()
77
+ except Exception as e:
78
+ print(f"Warning: could not remove file {file_path}: {e}")
79
+
80
+ # Clean up empty directories
81
+ _remove_empty_directories(zarr_dir)
82
+
83
+
84
+ def _collect_chunk_files(zarr_dir: pathlib.Path) -> List[Tuple[pathlib.Path, str]]:
85
+ """
86
+ Collect all chunk files in the zarr directory (excluding metadata files).
87
+
88
+ Args:
89
+ zarr_dir: Path to the zarr directory
90
+
91
+ Returns:
92
+ List of tuples (absolute_path, relative_path) for each chunk file
93
+ """
94
+ chunk_files = []
95
+ metadata_files = {".zmetadata", ".zarray", ".zgroup", ".zattrs"}
96
+
97
+ for root, dirs, files in os.walk(zarr_dir):
98
+ for file in files:
99
+ # Skip metadata files
100
+ if file in metadata_files or file.startswith("_consolidated_"):
101
+ continue
102
+
103
+ file_path = pathlib.Path(root) / file
104
+ # Get relative path from zarr_dir
105
+ relative_path = file_path.relative_to(zarr_dir).as_posix()
106
+
107
+ chunk_files.append((file_path, relative_path))
108
+
109
+ return chunk_files
110
+
111
+
112
+ def _group_files_by_size(
113
+ files: List[Tuple[pathlib.Path, str]], max_size: int
114
+ ) -> List[List[Tuple[pathlib.Path, str]]]:
115
+ """
116
+ Group files into bins where each bin's total size is <= max_size.
117
+
118
+ Uses a simple first-fit bin packing algorithm.
119
+
120
+ Args:
121
+ files: List of (file_path, relative_path) tuples
122
+ max_size: Maximum total size for each group in bytes
123
+
124
+ Returns:
125
+ List of groups, where each group is a list of (file_path, relative_path) tuples
126
+ """
127
+ # Get file sizes
128
+ files_with_sizes = []
129
+ for file_path, relative_path in files:
130
+ try:
131
+ size = file_path.stat().st_size
132
+ files_with_sizes.append((file_path, relative_path, size))
133
+ except Exception as e:
134
+ print(f"Warning: could not get size of {file_path}: {e}")
135
+ continue
136
+
137
+ # Sort by size (largest first) for better packing
138
+ files_with_sizes.sort(key=lambda x: x[2], reverse=True)
139
+
140
+ # First-fit bin packing
141
+ groups: List[List[Tuple[pathlib.Path, str]]] = []
142
+ group_sizes: List[int] = []
143
+
144
+ for file_path, relative_path, size in files_with_sizes:
145
+ # If file is larger than max_size, put it in its own group
146
+ if size > max_size:
147
+ groups.append([(file_path, relative_path)])
148
+ group_sizes.append(size)
149
+ continue
150
+
151
+ # Try to fit into existing group
152
+ placed = False
153
+ for i, group_size in enumerate(group_sizes):
154
+ if group_size + size <= max_size:
155
+ groups[i].append((file_path, relative_path))
156
+ group_sizes[i] += size
157
+ placed = True
158
+ break
159
+
160
+ # If doesn't fit anywhere, create new group
161
+ if not placed:
162
+ groups.append([(file_path, relative_path)])
163
+ group_sizes.append(size)
164
+
165
+ return groups
166
+
167
+
168
+ def _remove_empty_directories(zarr_dir: pathlib.Path) -> None:
169
+ """
170
+ Remove empty directories within the zarr directory.
171
+
172
+ Args:
173
+ zarr_dir: Path to the zarr directory
174
+ """
175
+ # Walk bottom-up so we can remove empty parent directories
176
+ for root, dirs, files in os.walk(zarr_dir, topdown=False):
177
+ for dir_name in dirs:
178
+ dir_path = pathlib.Path(root) / dir_name
179
+ try:
180
+ # Only remove if directory is empty
181
+ if not any(dir_path.iterdir()):
182
+ dir_path.rmdir()
183
+ except Exception:
184
+ # Directory not empty or other error, skip
185
+ pass