figpack 0.2.38__tar.gz → 0.2.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of figpack might be problematic. Click here for more details.
- {figpack-0.2.38/figpack.egg-info → figpack-0.2.39}/PKG-INFO +2 -2
- {figpack-0.2.38 → figpack-0.2.39}/README.md +1 -1
- {figpack-0.2.38 → figpack-0.2.39}/figpack/__init__.py +1 -1
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/_bundle_utils.py +6 -2
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/_file_handler.py +3 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/_server_manager.py +96 -0
- figpack-0.2.39/figpack/core/_zarr_consolidate.py +185 -0
- figpack-0.2.38/figpack/figpack-figure-dist/assets/index-DsU-DhF6.js → figpack-0.2.39/figpack/figpack-figure-dist/assets/index-ST_DU17U.js +23 -23
- {figpack-0.2.38 → figpack-0.2.39}/figpack/figpack-figure-dist/index.html +1 -1
- {figpack-0.2.38 → figpack-0.2.39/figpack.egg-info}/PKG-INFO +2 -2
- {figpack-0.2.38 → figpack-0.2.39}/figpack.egg-info/SOURCES.txt +2 -1
- {figpack-0.2.38 → figpack-0.2.39}/pyproject.toml +2 -2
- {figpack-0.2.38 → figpack-0.2.39}/LICENSE +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/MANIFEST.in +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/cli.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/__init__.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/_save_figure.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/_show_view.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/_upload_bundle.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/_view_figure.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/config.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/extension_view.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/figpack_extension.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/figpack_view.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/core/zarr.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/extensions.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/figpack-figure-dist/assets/index-V5m_wCvw.css +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/figpack-figure-dist/assets/neurosift-logo-CLsuwLMO.png +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/Box.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/CaptionedView.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/DataFrame.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/Gallery.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/GalleryItem.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/Iframe.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/Image.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/LayoutItem.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/Markdown.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/MatplotlibFigure.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/MountainLayout.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/MountainLayoutItem.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/MultiChannelTimeseries.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/PlotlyExtension/PlotlyExtension.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/PlotlyExtension/__init__.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/PlotlyExtension/_plotly_extension.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/PlotlyExtension/plotly_view.js +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/Spectrogram.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/Splitter.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/TabLayout.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/TabLayoutItem.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/TimeseriesGraph.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack/views/__init__.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack.egg-info/dependency_links.txt +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack.egg-info/entry_points.txt +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack.egg-info/requires.txt +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/figpack.egg-info/top_level.txt +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/setup.cfg +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_box.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_cli.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_core.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_dataframe.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_extension_system.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_figpack_view.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_file_handler.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_gallery.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_image.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_markdown.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_matplotlib_figure.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_multichannel_timeseries.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_plotly_figure.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_server_manager.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_spectrogram.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_splitter.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_tablayout.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_timeseries_graph.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_upload_bundle.py +0 -0
- {figpack-0.2.38 → figpack-0.2.39}/tests/test_view_figure.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: figpack
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.39
|
|
4
4
|
Summary: A Python package for creating shareable, interactive visualizations in the browser
|
|
5
5
|
Author-email: Jeremy Magland <jmagland@flatironinstitute.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -133,7 +133,7 @@ If you use figpack in your research, please cite it:
|
|
|
133
133
|
|
|
134
134
|
Or in APA format:
|
|
135
135
|
|
|
136
|
-
> Magland, J. (2025). figpack (Version 0.2.
|
|
136
|
+
> Magland, J. (2025). figpack (Version 0.2.39) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.17419621
|
|
137
137
|
|
|
138
138
|
## Contributing
|
|
139
139
|
|
|
@@ -57,7 +57,7 @@ If you use figpack in your research, please cite it:
|
|
|
57
57
|
|
|
58
58
|
Or in APA format:
|
|
59
59
|
|
|
60
|
-
> Magland, J. (2025). figpack (Version 0.2.
|
|
60
|
+
> Magland, J. (2025). figpack (Version 0.2.39) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.17419621
|
|
61
61
|
|
|
62
62
|
## Contributing
|
|
63
63
|
|
|
@@ -9,6 +9,7 @@ from .figpack_view import FigpackView
|
|
|
9
9
|
from .figpack_extension import FigpackExtension
|
|
10
10
|
from .extension_view import ExtensionView
|
|
11
11
|
from .zarr import Group, _check_zarr_version
|
|
12
|
+
from ._zarr_consolidate import consolidate_zarr_chunks
|
|
12
13
|
|
|
13
14
|
thisdir = pathlib.Path(__file__).parent.resolve()
|
|
14
15
|
|
|
@@ -72,12 +73,15 @@ def prepare_figure_bundle(
|
|
|
72
73
|
# Generate extension manifest
|
|
73
74
|
_write_extension_manifest(required_extensions, tmpdir)
|
|
74
75
|
|
|
76
|
+
# Create the .zmetadata file
|
|
75
77
|
zarr.consolidate_metadata(zarr_group._zarr_group.store)
|
|
76
78
|
|
|
77
79
|
# It's important that we remove all the metadata files except for the
|
|
78
|
-
# consolidated one
|
|
79
|
-
# once we start editing the zarr data from the browser.
|
|
80
|
+
# consolidated one so there is a single source of truth.
|
|
80
81
|
_remove_metadata_files_except_consolidated(pathlib.Path(tmpdir) / "data.zarr")
|
|
82
|
+
|
|
83
|
+
# Consolidate zarr chunks into larger files to reduce upload count
|
|
84
|
+
consolidate_zarr_chunks(pathlib.Path(tmpdir) / "data.zarr")
|
|
81
85
|
finally:
|
|
82
86
|
if _check_zarr_version() == 3:
|
|
83
87
|
zarr.config.set({"default_zarr_format": old_default_zarr_format}) # type: ignore
|
|
@@ -42,6 +42,9 @@ class FileUploadCORSRequestHandler(CORSRequestHandler):
|
|
|
42
42
|
"Accept-Ranges, Content-Encoding, Content-Length, Content-Range",
|
|
43
43
|
)
|
|
44
44
|
|
|
45
|
+
# Always send Accept-Ranges header to indicate byte-range support
|
|
46
|
+
self.send_header("Accept-Ranges", "bytes")
|
|
47
|
+
|
|
45
48
|
# Prevent browser caching - important for when we are editing figures in place
|
|
46
49
|
# This ensures the browser always fetches the latest version of files
|
|
47
50
|
self.send_header("Cache-Control", "no-cache, no-store, must-revalidate")
|
|
@@ -29,6 +29,9 @@ class CORSRequestHandler(SimpleHTTPRequestHandler):
|
|
|
29
29
|
"Accept-Ranges, Content-Encoding, Content-Length, Content-Range",
|
|
30
30
|
)
|
|
31
31
|
|
|
32
|
+
# Always send Accept-Ranges header to indicate byte-range support
|
|
33
|
+
self.send_header("Accept-Ranges", "bytes")
|
|
34
|
+
|
|
32
35
|
# Prevent browser caching - important for when we are editing figures in place
|
|
33
36
|
# This ensures the browser always fetches the latest version of files
|
|
34
37
|
self.send_header("Cache-Control", "no-cache, no-store, must-revalidate")
|
|
@@ -45,6 +48,99 @@ class CORSRequestHandler(SimpleHTTPRequestHandler):
|
|
|
45
48
|
"""Reject PUT requests when file upload is not enabled."""
|
|
46
49
|
self.send_error(405, "Method Not Allowed")
|
|
47
50
|
|
|
51
|
+
def do_GET(self):
|
|
52
|
+
"""Handle GET requests with support for Range requests."""
|
|
53
|
+
# Translate path and check if file exists
|
|
54
|
+
path = self.translate_path(self.path)
|
|
55
|
+
|
|
56
|
+
# Check if path is a file
|
|
57
|
+
if not os.path.isfile(path):
|
|
58
|
+
# Let parent class handle directories and 404s
|
|
59
|
+
return super().do_GET()
|
|
60
|
+
|
|
61
|
+
# Check for Range header
|
|
62
|
+
range_header = self.headers.get("Range")
|
|
63
|
+
|
|
64
|
+
if range_header is None:
|
|
65
|
+
# No range request, use parent's implementation
|
|
66
|
+
return super().do_GET()
|
|
67
|
+
|
|
68
|
+
# Parse range header
|
|
69
|
+
try:
|
|
70
|
+
# Range header format: "bytes=start-end"
|
|
71
|
+
if not range_header.startswith("bytes="):
|
|
72
|
+
# Invalid range format, ignore and serve full file
|
|
73
|
+
return super().do_GET()
|
|
74
|
+
|
|
75
|
+
range_spec = range_header[6:] # Remove "bytes=" prefix
|
|
76
|
+
|
|
77
|
+
# Get file size
|
|
78
|
+
file_size = os.path.getsize(path)
|
|
79
|
+
|
|
80
|
+
# Parse range specification
|
|
81
|
+
if "-" not in range_spec:
|
|
82
|
+
# Invalid format
|
|
83
|
+
self.send_error(400, "Invalid Range header")
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
range_parts = range_spec.split("-", 1)
|
|
87
|
+
|
|
88
|
+
# Determine start and end positions
|
|
89
|
+
if range_parts[0]: # Start position specified
|
|
90
|
+
start = int(range_parts[0])
|
|
91
|
+
if range_parts[1]: # End position also specified
|
|
92
|
+
end = int(range_parts[1])
|
|
93
|
+
else: # Open-ended range (e.g., "1024-")
|
|
94
|
+
end = file_size - 1
|
|
95
|
+
else: # Suffix range (e.g., "-500" means last 500 bytes)
|
|
96
|
+
if not range_parts[1]:
|
|
97
|
+
self.send_error(400, "Invalid Range header")
|
|
98
|
+
return
|
|
99
|
+
suffix_length = int(range_parts[1])
|
|
100
|
+
start = max(0, file_size - suffix_length)
|
|
101
|
+
end = file_size - 1
|
|
102
|
+
|
|
103
|
+
# Validate range
|
|
104
|
+
if start < 0 or end >= file_size or start > end:
|
|
105
|
+
self.send_response(416, "Range Not Satisfiable")
|
|
106
|
+
self.send_header("Content-Range", f"bytes */{file_size}")
|
|
107
|
+
self.end_headers()
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
# Calculate content length
|
|
111
|
+
content_length = end - start + 1
|
|
112
|
+
|
|
113
|
+
# Guess content type
|
|
114
|
+
import mimetypes
|
|
115
|
+
|
|
116
|
+
content_type = mimetypes.guess_type(path)[0] or "application/octet-stream"
|
|
117
|
+
|
|
118
|
+
# Send 206 Partial Content response
|
|
119
|
+
self.send_response(206, "Partial Content")
|
|
120
|
+
self.send_header("Content-Type", content_type)
|
|
121
|
+
self.send_header("Content-Length", str(content_length))
|
|
122
|
+
self.send_header("Content-Range", f"bytes {start}-{end}/{file_size}")
|
|
123
|
+
self.end_headers()
|
|
124
|
+
|
|
125
|
+
# Send the requested byte range
|
|
126
|
+
with open(path, "rb") as f:
|
|
127
|
+
f.seek(start)
|
|
128
|
+
remaining = content_length
|
|
129
|
+
while remaining > 0:
|
|
130
|
+
chunk_size = min(8192, remaining)
|
|
131
|
+
chunk = f.read(chunk_size)
|
|
132
|
+
if not chunk:
|
|
133
|
+
break
|
|
134
|
+
self.wfile.write(chunk)
|
|
135
|
+
remaining -= len(chunk)
|
|
136
|
+
|
|
137
|
+
except ValueError:
|
|
138
|
+
# Invalid range values
|
|
139
|
+
self.send_error(400, "Invalid Range header")
|
|
140
|
+
except Exception as e:
|
|
141
|
+
# Log error and return 500
|
|
142
|
+
self.send_error(500, f"Internal Server Error: {str(e)}")
|
|
143
|
+
|
|
48
144
|
def log_message(self, format, *args):
|
|
49
145
|
pass
|
|
50
146
|
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
import json
|
|
4
|
+
from typing import Dict, List, Tuple
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def consolidate_zarr_chunks(
|
|
8
|
+
zarr_dir: pathlib.Path, max_file_size: int = 100_000_000
|
|
9
|
+
) -> None:
|
|
10
|
+
"""
|
|
11
|
+
Consolidate zarr chunk files into larger files to reduce the number of files
|
|
12
|
+
that need to be uploaded. Updates the .zmetadata file with refs mapping.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
zarr_dir: Path to the zarr directory
|
|
16
|
+
max_file_size: Maximum size for each consolidated file in bytes (default: 100 MB)
|
|
17
|
+
"""
|
|
18
|
+
if not zarr_dir.is_dir():
|
|
19
|
+
raise ValueError(f"Expected a directory, got: {zarr_dir}")
|
|
20
|
+
|
|
21
|
+
# Read the existing .zmetadata file
|
|
22
|
+
zmetadata_path = zarr_dir / ".zmetadata"
|
|
23
|
+
if not zmetadata_path.exists():
|
|
24
|
+
raise ValueError(f"No .zmetadata file found at {zmetadata_path}")
|
|
25
|
+
|
|
26
|
+
with open(zmetadata_path, "r") as f:
|
|
27
|
+
zmetadata = json.load(f)
|
|
28
|
+
|
|
29
|
+
# Collect all chunk files (non-metadata files)
|
|
30
|
+
chunk_files = _collect_chunk_files(zarr_dir)
|
|
31
|
+
|
|
32
|
+
if not chunk_files:
|
|
33
|
+
# No chunk files to consolidate
|
|
34
|
+
return
|
|
35
|
+
|
|
36
|
+
# Group chunk files into consolidated files
|
|
37
|
+
consolidated_groups = _group_files_by_size(chunk_files, max_file_size)
|
|
38
|
+
|
|
39
|
+
# Create consolidated files and build refs mapping
|
|
40
|
+
refs: Dict[str, List] = {}
|
|
41
|
+
for group_idx, file_group in enumerate(consolidated_groups):
|
|
42
|
+
consolidated_filename = f"_consolidated_{group_idx}.dat"
|
|
43
|
+
consolidated_path = zarr_dir / consolidated_filename
|
|
44
|
+
|
|
45
|
+
# Write the consolidated file and track byte offsets
|
|
46
|
+
current_offset = 0
|
|
47
|
+
with open(consolidated_path, "wb") as consolidated_file:
|
|
48
|
+
for file_path, relative_path in file_group:
|
|
49
|
+
# Read the chunk file
|
|
50
|
+
with open(file_path, "rb") as chunk_file:
|
|
51
|
+
chunk_data = chunk_file.read()
|
|
52
|
+
|
|
53
|
+
# Write to consolidated file
|
|
54
|
+
consolidated_file.write(chunk_data)
|
|
55
|
+
|
|
56
|
+
# Add to refs mapping
|
|
57
|
+
refs[relative_path] = [
|
|
58
|
+
consolidated_filename,
|
|
59
|
+
current_offset,
|
|
60
|
+
len(chunk_data),
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
# Update offset
|
|
64
|
+
current_offset += len(chunk_data)
|
|
65
|
+
|
|
66
|
+
# Update .zmetadata with refs
|
|
67
|
+
zmetadata["refs"] = refs
|
|
68
|
+
|
|
69
|
+
# Write updated .zmetadata
|
|
70
|
+
with open(zmetadata_path, "w") as f:
|
|
71
|
+
json.dump(zmetadata, f, indent=2)
|
|
72
|
+
|
|
73
|
+
# Delete original chunk files
|
|
74
|
+
for file_path, _ in chunk_files:
|
|
75
|
+
try:
|
|
76
|
+
file_path.unlink()
|
|
77
|
+
except Exception as e:
|
|
78
|
+
print(f"Warning: could not remove file {file_path}: {e}")
|
|
79
|
+
|
|
80
|
+
# Clean up empty directories
|
|
81
|
+
_remove_empty_directories(zarr_dir)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _collect_chunk_files(zarr_dir: pathlib.Path) -> List[Tuple[pathlib.Path, str]]:
|
|
85
|
+
"""
|
|
86
|
+
Collect all chunk files in the zarr directory (excluding metadata files).
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
zarr_dir: Path to the zarr directory
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
List of tuples (absolute_path, relative_path) for each chunk file
|
|
93
|
+
"""
|
|
94
|
+
chunk_files = []
|
|
95
|
+
metadata_files = {".zmetadata", ".zarray", ".zgroup", ".zattrs"}
|
|
96
|
+
|
|
97
|
+
for root, dirs, files in os.walk(zarr_dir):
|
|
98
|
+
for file in files:
|
|
99
|
+
# Skip metadata files
|
|
100
|
+
if file in metadata_files or file.startswith("_consolidated_"):
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
file_path = pathlib.Path(root) / file
|
|
104
|
+
# Get relative path from zarr_dir
|
|
105
|
+
relative_path = file_path.relative_to(zarr_dir).as_posix()
|
|
106
|
+
|
|
107
|
+
chunk_files.append((file_path, relative_path))
|
|
108
|
+
|
|
109
|
+
return chunk_files
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _group_files_by_size(
|
|
113
|
+
files: List[Tuple[pathlib.Path, str]], max_size: int
|
|
114
|
+
) -> List[List[Tuple[pathlib.Path, str]]]:
|
|
115
|
+
"""
|
|
116
|
+
Group files into bins where each bin's total size is <= max_size.
|
|
117
|
+
|
|
118
|
+
Uses a simple first-fit bin packing algorithm.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
files: List of (file_path, relative_path) tuples
|
|
122
|
+
max_size: Maximum total size for each group in bytes
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
List of groups, where each group is a list of (file_path, relative_path) tuples
|
|
126
|
+
"""
|
|
127
|
+
# Get file sizes
|
|
128
|
+
files_with_sizes = []
|
|
129
|
+
for file_path, relative_path in files:
|
|
130
|
+
try:
|
|
131
|
+
size = file_path.stat().st_size
|
|
132
|
+
files_with_sizes.append((file_path, relative_path, size))
|
|
133
|
+
except Exception as e:
|
|
134
|
+
print(f"Warning: could not get size of {file_path}: {e}")
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
# Sort by size (largest first) for better packing
|
|
138
|
+
files_with_sizes.sort(key=lambda x: x[2], reverse=True)
|
|
139
|
+
|
|
140
|
+
# First-fit bin packing
|
|
141
|
+
groups: List[List[Tuple[pathlib.Path, str]]] = []
|
|
142
|
+
group_sizes: List[int] = []
|
|
143
|
+
|
|
144
|
+
for file_path, relative_path, size in files_with_sizes:
|
|
145
|
+
# If file is larger than max_size, put it in its own group
|
|
146
|
+
if size > max_size:
|
|
147
|
+
groups.append([(file_path, relative_path)])
|
|
148
|
+
group_sizes.append(size)
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
# Try to fit into existing group
|
|
152
|
+
placed = False
|
|
153
|
+
for i, group_size in enumerate(group_sizes):
|
|
154
|
+
if group_size + size <= max_size:
|
|
155
|
+
groups[i].append((file_path, relative_path))
|
|
156
|
+
group_sizes[i] += size
|
|
157
|
+
placed = True
|
|
158
|
+
break
|
|
159
|
+
|
|
160
|
+
# If doesn't fit anywhere, create new group
|
|
161
|
+
if not placed:
|
|
162
|
+
groups.append([(file_path, relative_path)])
|
|
163
|
+
group_sizes.append(size)
|
|
164
|
+
|
|
165
|
+
return groups
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _remove_empty_directories(zarr_dir: pathlib.Path) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Remove empty directories within the zarr directory.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
zarr_dir: Path to the zarr directory
|
|
174
|
+
"""
|
|
175
|
+
# Walk bottom-up so we can remove empty parent directories
|
|
176
|
+
for root, dirs, files in os.walk(zarr_dir, topdown=False):
|
|
177
|
+
for dir_name in dirs:
|
|
178
|
+
dir_path = pathlib.Path(root) / dir_name
|
|
179
|
+
try:
|
|
180
|
+
# Only remove if directory is empty
|
|
181
|
+
if not any(dir_path.iterdir()):
|
|
182
|
+
dir_path.rmdir()
|
|
183
|
+
except Exception:
|
|
184
|
+
# Directory not empty or other error, skip
|
|
185
|
+
pass
|