figpack 0.2.27__py3-none-any.whl → 0.2.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ from typing import Optional, Union
1
2
  import hashlib
2
3
  import json
3
4
  import pathlib
@@ -114,62 +115,31 @@ def _upload_single_file_with_signed_url(
114
115
  else:
115
116
  break
116
117
 
118
+ assert last_exception is not None
117
119
  raise last_exception
118
120
 
119
121
 
120
122
  MAX_WORKERS_FOR_UPLOAD = 16
121
123
 
122
124
 
123
- def _compute_deterministic_figure_hash(tmpdir_path: pathlib.Path) -> str:
124
- """
125
- Compute a deterministic figure ID based on SHA1 hashes of all files
126
-
127
- Returns:
128
- str: 40-character SHA1 hash representing the content of all files
129
- """
130
- file_hashes = []
131
-
132
- # Collect all files and their hashes
133
- for file_path in sorted(tmpdir_path.rglob("*")):
134
- if file_path.is_file():
135
- relative_path = file_path.relative_to(tmpdir_path)
136
-
137
- # Compute SHA1 hash of file content
138
- sha1_hash = hashlib.sha1()
139
- with open(file_path, "rb") as f:
140
- for chunk in iter(lambda: f.read(4096), b""):
141
- sha1_hash.update(chunk)
142
-
143
- # Include both the relative path and content hash to ensure uniqueness
144
- file_info = f"{relative_path}:{sha1_hash.hexdigest()}"
145
- file_hashes.append(file_info)
146
-
147
- # Create final hash from all file hashes
148
- combined_hash = hashlib.sha1()
149
- for file_hash in file_hashes:
150
- combined_hash.update(file_hash.encode("utf-8"))
151
-
152
- return combined_hash.hexdigest()
153
-
154
-
155
125
  def _create_or_get_figure(
156
- figure_hash: str,
157
- api_key: str,
158
- total_files: int = None,
159
- total_size: int = None,
160
- title: str = None,
126
+ api_key: Optional[str],
127
+ total_files: Optional[int] = None,
128
+ total_size: Optional[int] = None,
129
+ title: Optional[str] = None,
161
130
  ephemeral: bool = False,
131
+ source_url: Optional[str] = None,
162
132
  ) -> dict:
163
133
  """
164
134
  Create a new figure or get existing figure information
165
135
 
166
136
  Args:
167
- figure_hash: The hash of the figure
168
137
  api_key: The API key for authentication (required for non-ephemeral)
169
138
  total_files: Optional total number of files
170
139
  total_size: Optional total size of files
171
140
  title: Optional title for the figure
172
141
  ephemeral: Whether to create an ephemeral figure
142
+ source_url: Optional source URL for the figure (must be unique)
173
143
 
174
144
  Returns:
175
145
  dict: Figure information from the API
@@ -178,8 +148,7 @@ def _create_or_get_figure(
178
148
  if not ephemeral and api_key is None:
179
149
  raise ValueError("API key is required for non-ephemeral figures")
180
150
 
181
- payload = {
182
- "figureHash": figure_hash,
151
+ payload: dict[str, Union[str, int]] = {
183
152
  "figpackVersion": __version__,
184
153
  "bucket": FIGPACK_BUCKET,
185
154
  }
@@ -196,6 +165,8 @@ def _create_or_get_figure(
196
165
  payload["title"] = title
197
166
  if ephemeral:
198
167
  payload["ephemeral"] = True
168
+ if source_url is not None:
169
+ payload["sourceUrl"] = source_url
199
170
 
200
171
  # Use the same endpoint for both regular and ephemeral figures
201
172
  response = requests.post(f"{FIGPACK_API_BASE_URL}/api/figures/create", json=payload)
@@ -206,12 +177,12 @@ def _create_or_get_figure(
206
177
  error_msg = error_data.get("message", "Unknown error")
207
178
  except:
208
179
  error_msg = f"HTTP {response.status_code}"
209
- raise Exception(f"Failed to create figure {figure_hash}: {error_msg}")
180
+ raise Exception(f"Failed to create figure: {error_msg}")
210
181
 
211
182
  response_data = response.json()
212
183
  if not response_data.get("success"):
213
184
  raise Exception(
214
- f"Failed to create figure {figure_hash}: {response_data.get('message', 'Unknown error')}"
185
+ f"Failed to create figure: {response_data.get('message', 'Unknown error')}"
215
186
  )
216
187
 
217
188
  return response_data
@@ -252,10 +223,11 @@ def _finalize_figure(figure_url: str, api_key: str) -> dict:
252
223
 
253
224
  def _upload_bundle(
254
225
  tmpdir: str,
255
- api_key: str,
256
- title: str = None,
226
+ api_key: Optional[str],
227
+ title: Optional[str] = None,
257
228
  ephemeral: bool = False,
258
229
  use_consolidated_metadata_only: bool = False,
230
+ source_url: Optional[str] = None,
259
231
  ) -> str:
260
232
  """
261
233
  Upload the prepared bundle to the cloud using the new database-driven approach
@@ -267,12 +239,10 @@ def _upload_bundle(
267
239
  ephemeral: Whether to create an ephemeral figure
268
240
  use_consolidated_metadata_only: If True, excludes individual zarr metadata files
269
241
  (.zgroup, .zarray, .zattrs) since they are included in .zmetadata
242
+ source_url: Optional source URL for the figure (must be unique)
270
243
  """
271
244
  tmpdir_path = pathlib.Path(tmpdir)
272
245
 
273
- # Compute deterministic figure ID based on file contents
274
- figure_hash = _compute_deterministic_figure_hash(tmpdir_path)
275
-
276
246
  # Collect all files to upload
277
247
  all_files = []
278
248
  for file_path in tmpdir_path.rglob("*"):
@@ -293,7 +263,12 @@ def _upload_bundle(
293
263
 
294
264
  # Find available figure ID and create/get figure in database with metadata
295
265
  result = _create_or_get_figure(
296
- figure_hash, api_key, total_files, total_size, title=title, ephemeral=ephemeral
266
+ api_key,
267
+ total_files,
268
+ total_size,
269
+ title=title,
270
+ ephemeral=ephemeral,
271
+ source_url=source_url,
297
272
  )
298
273
  figure_info = result.get("figure", {})
299
274
  figure_url = figure_info.get("figureUrl")
@@ -329,7 +304,9 @@ def _upload_bundle(
329
304
 
330
305
  # Get signed URLs for this batch
331
306
  try:
332
- signed_urls_map = _get_batch_signed_urls(figure_url, batch, api_key)
307
+ signed_urls_map = _get_batch_signed_urls(
308
+ figure_url, batch, api_key if api_key else ""
309
+ )
333
310
  except Exception as e:
334
311
  print(f"Failed to get signed URLs for batch {batch_num}: {e}")
335
312
  raise
@@ -400,7 +377,9 @@ def _upload_bundle(
400
377
  try:
401
378
  # Use batch API for manifest
402
379
  manifest_batch = [("manifest.json", temp_file_path)]
403
- signed_urls_map = _get_batch_signed_urls(figure_url, manifest_batch, api_key)
380
+ signed_urls_map = _get_batch_signed_urls(
381
+ figure_url, manifest_batch, api_key if api_key else ""
382
+ )
404
383
 
405
384
  if "manifest.json" not in signed_urls_map:
406
385
  raise Exception("No signed URL returned for manifest.json")
@@ -418,12 +397,45 @@ def _upload_bundle(
418
397
 
419
398
  # Finalize the figure upload
420
399
  print("Finalizing figure...")
421
- _finalize_figure(figure_url, api_key)
400
+ _finalize_figure(figure_url, api_key if api_key else "")
422
401
  print("Upload completed successfully")
423
402
 
424
403
  return figure_url
425
404
 
426
405
 
406
+ def get_figure_by_source_url(source_url: str) -> Optional[str]:
407
+ """
408
+ Query the API for a figure URL by its source URL
409
+
410
+ Args:
411
+ source_url: The source URL to search for
412
+
413
+ Returns:
414
+ Optional[str]: The figure URL if found, None otherwise
415
+ """
416
+ payload = {"sourceUrl": source_url}
417
+
418
+ response = requests.post(
419
+ f"{FIGPACK_API_BASE_URL}/api/figures/find-by-source-url", json=payload
420
+ )
421
+
422
+ if not response.ok:
423
+ if response.status_code == 404:
424
+ return None
425
+ try:
426
+ error_data = response.json()
427
+ error_msg = error_data.get("message", "Unknown error")
428
+ except:
429
+ error_msg = f"HTTP {response.status_code}"
430
+ raise Exception(f"Failed to query figure by source URL: {error_msg}")
431
+
432
+ response_data = response.json()
433
+ if not response_data.get("success"):
434
+ return None
435
+
436
+ return response_data.get("figureUrl")
437
+
438
+
427
439
  def _determine_content_type(file_path: str) -> str:
428
440
  """
429
441
  Determine content type for upload based on file extension
@@ -34,10 +34,10 @@ def serve_files(
34
34
  enable_file_upload: Whether to enable PUT requests for file uploads
35
35
  max_file_size: Maximum file size in bytes for uploads (default 10MB)
36
36
  """
37
- tmpdir = pathlib.Path(tmpdir)
38
- tmpdir = tmpdir.resolve()
39
- if not tmpdir.exists() or not tmpdir.is_dir():
40
- raise SystemExit(f"Directory not found: {tmpdir}")
37
+ tmpdir_2 = pathlib.Path(tmpdir)
38
+ tmpdir_2 = tmpdir_2.resolve()
39
+ if not tmpdir_2.exists() or not tmpdir_2.is_dir():
40
+ raise SystemExit(f"Directory not found: {tmpdir_2}")
41
41
 
42
42
  # Create a temporary server manager instance for this specific directory
43
43
  # Note: We can't use the singleton ProcessServerManager here because it serves
@@ -56,29 +56,34 @@ def serve_files(
56
56
  # Choose handler based on file upload requirement
57
57
  if enable_file_upload:
58
58
 
59
- def handler_factory(*args, **kwargs):
59
+ def handler_factory_upload_enabled(*args, **kwargs):
60
60
  return FileUploadCORSRequestHandler(
61
61
  *args,
62
- directory=str(tmpdir),
62
+ directory=str(tmpdir_2),
63
63
  allow_origin=allow_origin,
64
64
  enable_file_upload=True,
65
65
  max_file_size=max_file_size,
66
66
  **kwargs,
67
67
  )
68
68
 
69
- upload_status = " (file upload enabled)" if enable_file_upload else ""
69
+ upload_status = (
70
+ " (file upload enabled)" if handler_factory_upload_enabled else ""
71
+ )
72
+
73
+ httpd = ThreadingHTTPServer(("0.0.0.0", port), handler_factory_upload_enabled) # type: ignore
70
74
  else:
71
75
 
72
76
  def handler_factory(*args, **kwargs):
73
77
  return CORSRequestHandler(
74
- *args, directory=str(tmpdir), allow_origin=allow_origin, **kwargs
78
+ *args, directory=str(tmpdir_2), allow_origin=allow_origin, **kwargs
75
79
  )
76
80
 
77
81
  upload_status = ""
78
82
 
79
- httpd = ThreadingHTTPServer(("0.0.0.0", port), handler_factory)
83
+ httpd = ThreadingHTTPServer(("0.0.0.0", port), handler_factory) # type: ignore
84
+
80
85
  print(
81
- f"Serving {tmpdir} at http://localhost:{port} (CORS → {allow_origin}){upload_status}"
86
+ f"Serving {tmpdir_2} at http://localhost:{port} (CORS → {allow_origin}){upload_status}"
82
87
  )
83
88
  thread = threading.Thread(target=httpd.serve_forever, daemon=True)
84
89
  thread.start()
@@ -0,0 +1,185 @@
1
+ import os
2
+ import pathlib
3
+ import json
4
+ from typing import Dict, List, Tuple
5
+
6
+
7
+ def consolidate_zarr_chunks(
8
+ zarr_dir: pathlib.Path, max_file_size: int = 100_000_000
9
+ ) -> None:
10
+ """
11
+ Consolidate zarr chunk files into larger files to reduce the number of files
12
+ that need to be uploaded. Updates the .zmetadata file with refs mapping.
13
+
14
+ Args:
15
+ zarr_dir: Path to the zarr directory
16
+ max_file_size: Maximum size for each consolidated file in bytes (default: 100 MB)
17
+ """
18
+ if not zarr_dir.is_dir():
19
+ raise ValueError(f"Expected a directory, got: {zarr_dir}")
20
+
21
+ # Read the existing .zmetadata file
22
+ zmetadata_path = zarr_dir / ".zmetadata"
23
+ if not zmetadata_path.exists():
24
+ raise ValueError(f"No .zmetadata file found at {zmetadata_path}")
25
+
26
+ with open(zmetadata_path, "r") as f:
27
+ zmetadata = json.load(f)
28
+
29
+ # Collect all chunk files (non-metadata files)
30
+ chunk_files = _collect_chunk_files(zarr_dir)
31
+
32
+ if not chunk_files:
33
+ # No chunk files to consolidate
34
+ return
35
+
36
+ # Group chunk files into consolidated files
37
+ consolidated_groups = _group_files_by_size(chunk_files, max_file_size)
38
+
39
+ # Create consolidated files and build refs mapping
40
+ refs: Dict[str, List] = {}
41
+ for group_idx, file_group in enumerate(consolidated_groups):
42
+ consolidated_filename = f"_consolidated_{group_idx}.dat"
43
+ consolidated_path = zarr_dir / consolidated_filename
44
+
45
+ # Write the consolidated file and track byte offsets
46
+ current_offset = 0
47
+ with open(consolidated_path, "wb") as consolidated_file:
48
+ for file_path, relative_path in file_group:
49
+ # Read the chunk file
50
+ with open(file_path, "rb") as chunk_file:
51
+ chunk_data = chunk_file.read()
52
+
53
+ # Write to consolidated file
54
+ consolidated_file.write(chunk_data)
55
+
56
+ # Add to refs mapping
57
+ refs[relative_path] = [
58
+ consolidated_filename,
59
+ current_offset,
60
+ len(chunk_data),
61
+ ]
62
+
63
+ # Update offset
64
+ current_offset += len(chunk_data)
65
+
66
+ # Update .zmetadata with refs
67
+ zmetadata["refs"] = refs
68
+
69
+ # Write updated .zmetadata
70
+ with open(zmetadata_path, "w") as f:
71
+ json.dump(zmetadata, f, indent=2)
72
+
73
+ # Delete original chunk files
74
+ for file_path, _ in chunk_files:
75
+ try:
76
+ file_path.unlink()
77
+ except Exception as e:
78
+ print(f"Warning: could not remove file {file_path}: {e}")
79
+
80
+ # Clean up empty directories
81
+ _remove_empty_directories(zarr_dir)
82
+
83
+
84
+ def _collect_chunk_files(zarr_dir: pathlib.Path) -> List[Tuple[pathlib.Path, str]]:
85
+ """
86
+ Collect all chunk files in the zarr directory (excluding metadata files).
87
+
88
+ Args:
89
+ zarr_dir: Path to the zarr directory
90
+
91
+ Returns:
92
+ List of tuples (absolute_path, relative_path) for each chunk file
93
+ """
94
+ chunk_files = []
95
+ metadata_files = {".zmetadata", ".zarray", ".zgroup", ".zattrs"}
96
+
97
+ for root, dirs, files in os.walk(zarr_dir):
98
+ for file in files:
99
+ # Skip metadata files
100
+ if file in metadata_files or file.startswith("_consolidated_"):
101
+ continue
102
+
103
+ file_path = pathlib.Path(root) / file
104
+ # Get relative path from zarr_dir
105
+ relative_path = file_path.relative_to(zarr_dir).as_posix()
106
+
107
+ chunk_files.append((file_path, relative_path))
108
+
109
+ return chunk_files
110
+
111
+
112
+ def _group_files_by_size(
113
+ files: List[Tuple[pathlib.Path, str]], max_size: int
114
+ ) -> List[List[Tuple[pathlib.Path, str]]]:
115
+ """
116
+ Group files into bins where each bin's total size is <= max_size.
117
+
118
+ Uses a simple first-fit bin packing algorithm.
119
+
120
+ Args:
121
+ files: List of (file_path, relative_path) tuples
122
+ max_size: Maximum total size for each group in bytes
123
+
124
+ Returns:
125
+ List of groups, where each group is a list of (file_path, relative_path) tuples
126
+ """
127
+ # Get file sizes
128
+ files_with_sizes = []
129
+ for file_path, relative_path in files:
130
+ try:
131
+ size = file_path.stat().st_size
132
+ files_with_sizes.append((file_path, relative_path, size))
133
+ except Exception as e:
134
+ print(f"Warning: could not get size of {file_path}: {e}")
135
+ continue
136
+
137
+ # Sort by size (largest first) for better packing
138
+ files_with_sizes.sort(key=lambda x: x[2], reverse=True)
139
+
140
+ # First-fit bin packing
141
+ groups: List[List[Tuple[pathlib.Path, str]]] = []
142
+ group_sizes: List[int] = []
143
+
144
+ for file_path, relative_path, size in files_with_sizes:
145
+ # If file is larger than max_size, put it in its own group
146
+ if size > max_size:
147
+ groups.append([(file_path, relative_path)])
148
+ group_sizes.append(size)
149
+ continue
150
+
151
+ # Try to fit into existing group
152
+ placed = False
153
+ for i, group_size in enumerate(group_sizes):
154
+ if group_size + size <= max_size:
155
+ groups[i].append((file_path, relative_path))
156
+ group_sizes[i] += size
157
+ placed = True
158
+ break
159
+
160
+ # If doesn't fit anywhere, create new group
161
+ if not placed:
162
+ groups.append([(file_path, relative_path)])
163
+ group_sizes.append(size)
164
+
165
+ return groups
166
+
167
+
168
+ def _remove_empty_directories(zarr_dir: pathlib.Path) -> None:
169
+ """
170
+ Remove empty directories within the zarr directory.
171
+
172
+ Args:
173
+ zarr_dir: Path to the zarr directory
174
+ """
175
+ # Walk bottom-up so we can remove empty parent directories
176
+ for root, dirs, files in os.walk(zarr_dir, topdown=False):
177
+ for dir_name in dirs:
178
+ dir_path = pathlib.Path(root) / dir_name
179
+ try:
180
+ # Only remove if directory is empty
181
+ if not any(dir_path.iterdir()):
182
+ dir_path.rmdir()
183
+ except Exception:
184
+ # Directory not empty or other error, skip
185
+ pass
@@ -2,9 +2,13 @@
2
2
  Base class for views that use figpack extensions
3
3
  """
4
4
 
5
+ from typing import TYPE_CHECKING
6
+
5
7
  from .figpack_view import FigpackView
6
- from .figpack_extension import FigpackExtension
7
- from ..core.zarr import Group
8
+
9
+ if TYPE_CHECKING:
10
+ from .figpack_extension import FigpackExtension
11
+ from .zarr import Group
8
12
 
9
13
 
10
14
  class ExtensionView(FigpackView):
@@ -12,7 +16,7 @@ class ExtensionView(FigpackView):
12
16
  Base class for views that are rendered by figpack extensions
13
17
  """
14
18
 
15
- def __init__(self, *, extension: FigpackExtension, view_type: str):
19
+ def __init__(self, *, extension: "FigpackExtension", view_type: str) -> None:
16
20
  """
17
21
  Initialize an extension-based view
18
22
 
@@ -23,7 +27,7 @@ class ExtensionView(FigpackView):
23
27
  self.extension = extension
24
28
  self.view_type = view_type
25
29
 
26
- def write_to_zarr_group(self, group: Group) -> None:
30
+ def write_to_zarr_group(self, group: "Group") -> None:
27
31
  """
28
32
  Write the extension view metadata to a Zarr group.
29
33
  Subclasses should call super().write_to_zarr_group(group) first,
@@ -17,7 +17,7 @@ class FigpackExtension:
17
17
  javascript_code: str,
18
18
  additional_files: Optional[Dict[str, str]] = None,
19
19
  version: str = "1.0.0",
20
- ):
20
+ ) -> None:
21
21
  """
22
22
  Initialize a figpack extension
23
23
 
@@ -5,7 +5,7 @@ Base view class for figpack visualization components
5
5
  import os
6
6
  import random
7
7
  import string
8
- from typing import Union
8
+ from typing import Optional
9
9
 
10
10
  from .zarr import Group
11
11
 
@@ -19,17 +19,17 @@ class FigpackView:
19
19
  self,
20
20
  *,
21
21
  title: str,
22
- description: Union[str, None] = None,
23
- port: Union[int, None] = None,
24
- open_in_browser: Union[bool, None] = None,
25
- upload: Union[bool, None] = None,
26
- inline: Union[bool, None] = None,
22
+ description: Optional[str] = None,
23
+ port: Optional[int] = None,
24
+ open_in_browser: Optional[bool] = None,
25
+ upload: Optional[bool] = None,
26
+ inline: Optional[bool] = None,
27
27
  inline_height: int = 600,
28
- ephemeral: Union[bool, None] = None,
29
- allow_origin: Union[str, None] = None,
30
- wait_for_input: Union[bool, None] = None,
31
- _dev: Union[bool, None] = None,
32
- ):
28
+ ephemeral: Optional[bool] = None,
29
+ allow_origin: Optional[str] = None,
30
+ wait_for_input: Optional[bool] = None,
31
+ _dev: Optional[bool] = None,
32
+ ) -> None:
33
33
  """
34
34
  Display a figpack view component with intelligent environment detection and flexible display options.
35
35
  See https://flatironinstitute.github.io/figpack/show_function.html for complete documentation.
@@ -86,6 +86,8 @@ class FigpackView:
86
86
  inline = False
87
87
  elif _is_in_notebook() and not upload:
88
88
  inline = True
89
+ else:
90
+ inline = False
89
91
 
90
92
  # determine open_in_browser
91
93
  if open_in_browser is None:
@@ -111,10 +113,22 @@ class FigpackView:
111
113
  upload = True
112
114
  ephemeral = True
113
115
 
116
+ if ephemeral is None:
117
+ ephemeral = False
118
+
119
+ if upload is None:
120
+ upload = False
121
+
114
122
  # determine _dev
115
123
  if _dev is None:
116
124
  _dev = os.environ.get("FIGPACK_DEV") == "1"
117
125
 
126
+ if port is None and os.environ.get("FIGPACK_PORT"):
127
+ try:
128
+ port = int(os.environ.get("FIGPACK_PORT", ""))
129
+ except Exception:
130
+ pass
131
+
118
132
  # determine wait_for_input
119
133
  if wait_for_input is None:
120
134
  wait_for_input = not _is_in_notebook()
@@ -123,6 +137,8 @@ class FigpackView:
123
137
  if ephemeral and not upload:
124
138
  raise ValueError("ephemeral=True requires upload=True to be set")
125
139
 
140
+ _local_figure_name: Optional[str] = None
141
+
126
142
  if _dev:
127
143
  if open_in_browser:
128
144
  print("** Note: In dev mode, open_in_browser is forced to False **")
@@ -160,7 +176,7 @@ class FigpackView:
160
176
  _local_figure_name=_local_figure_name if _dev else None,
161
177
  )
162
178
 
163
- def save(self, output_path: str, *, title: str) -> None:
179
+ def save(self, output_path: str, *, title: str, description: str = "") -> None:
164
180
  """
165
181
  Save as figure either to a folder or to a .tar.gz file
166
182
  Args:
@@ -168,7 +184,7 @@ class FigpackView:
168
184
  """
169
185
  from ._save_figure import _save_figure
170
186
 
171
- _save_figure(self, output_path, title=title)
187
+ _save_figure(self, output_path, title=title, description=description)
172
188
 
173
189
  def write_to_zarr_group(self, group: Group) -> None:
174
190
  """
figpack/core/zarr.py CHANGED
@@ -33,13 +33,13 @@ class Group:
33
33
  if _check_zarr_version() == 2:
34
34
  self._zarr_group.create_dataset(name, **kwargs)
35
35
  elif _check_zarr_version() == 3:
36
- self._zarr_group.create_array(name, **kwargs)
36
+ self._zarr_group.create_array(name, **kwargs) # type: ignore
37
37
  else:
38
38
  raise RuntimeError("Unsupported Zarr version")
39
39
 
40
40
  @property
41
41
  def attrs(self) -> Dict[str, Any]:
42
- return self._zarr_group.attrs
42
+ return self._zarr_group.attrs # type: ignore
43
43
 
44
44
  def __getitem__(self, key: str) -> Any:
45
45
  return self._zarr_group[key]