hippius 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hippius_sdk/ipfs.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  IPFS operations for the Hippius SDK.
3
3
  """
4
-
4
+ import asyncio
5
5
  import hashlib
6
6
  import json
7
7
  import os
@@ -10,7 +10,7 @@ import shutil
10
10
  import tempfile
11
11
  import time
12
12
  import uuid
13
- from typing import Any, Dict, List, Optional
13
+ from typing import Any, Callable, Dict, List, Optional
14
14
 
15
15
  import httpx
16
16
  import requests
@@ -37,6 +37,12 @@ try:
37
37
  except ImportError:
38
38
  ERASURE_CODING_AVAILABLE = False
39
39
 
40
+ # Configuration constants
41
+ PARALLEL_EC_CHUNKS = 20 # Maximum number of concurrent chunk downloads
42
+ PARALLEL_ORIGINAL_CHUNKS = (
43
+ 15 # Maximum number of original chunks to process in parallel
44
+ )
45
+
40
46
 
41
47
  class IPFSClient:
42
48
  """Client for interacting with IPFS."""
@@ -60,7 +66,7 @@ class IPFSClient:
60
66
  """
61
67
  # Load configuration values if not explicitly provided
62
68
  if gateway is None:
63
- gateway = get_config_value("ipfs", "gateway", "https://ipfs.io")
69
+ gateway = get_config_value("ipfs", "gateway", "https://get.hippius.network")
64
70
 
65
71
  if api_url is None:
66
72
  api_url = get_config_value(
@@ -78,11 +84,12 @@ class IPFSClient:
78
84
  self.base_url = api_url
79
85
 
80
86
  try:
81
- self.client = AsyncIPFSClient(api_url)
87
+ self.client = AsyncIPFSClient(api_url=api_url, gateway=self.gateway)
82
88
  except httpx.ConnectError as e:
83
- print(f"Warning: Could not connect to IPFS node at {api_url}: {e}")
84
- # Try to connect to local IPFS daemon as fallback
85
- self.client = AsyncIPFSClient()
89
+ print(
90
+ f"Warning: Falling back to local IPFS daemon, but still using gateway={self.gateway}"
91
+ )
92
+ self.client = AsyncIPFSClient(gateway=self.gateway)
86
93
 
87
94
  self._initialize_encryption(encrypt_by_default, encryption_key)
88
95
 
@@ -477,8 +484,6 @@ class IPFSClient:
477
484
 
478
485
  # Download the file with retry logic
479
486
  retries = 0
480
- last_error = None
481
-
482
487
  while retries < max_retries:
483
488
  try:
484
489
  # Download the file
@@ -499,7 +504,6 @@ class IPFSClient:
499
504
 
500
505
  except (requests.exceptions.RequestException, IOError) as e:
501
506
  # Save the error and retry
502
- last_error = e
503
507
  retries += 1
504
508
 
505
509
  if retries < max_retries:
@@ -736,6 +740,7 @@ class IPFSClient:
736
740
  encrypt: Optional[bool] = None,
737
741
  max_retries: int = 3,
738
742
  verbose: bool = True,
743
+ progress_callback: Optional[Callable[[str, int, int], None]] = None,
739
744
  ) -> Dict[str, Any]:
740
745
  """
741
746
  Split a file using erasure coding, then upload the chunks to IPFS.
@@ -753,6 +758,8 @@ class IPFSClient:
753
758
  encrypt: Whether to encrypt the file before encoding (defaults to self.encrypt_by_default)
754
759
  max_retries: Maximum number of retry attempts for IPFS uploads
755
760
  verbose: Whether to print progress information
761
+ progress_callback: Optional callback function for progress updates
762
+ Function receives (stage_name, current, total)
756
763
 
757
764
  Returns:
758
765
  dict: Metadata including the original file info and chunk information
@@ -934,14 +941,19 @@ class IPFSClient:
934
941
 
935
942
  # Step 4: Upload all chunks to IPFS
936
943
  if verbose:
937
- print(f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS...")
944
+ print(
945
+ f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS in parallel..."
946
+ )
938
947
 
939
948
  chunk_uploads = 0
940
949
  chunk_data = []
950
+ batch_size = 20 # Number of concurrent uploads
941
951
 
942
952
  # Create a temporary directory for the chunks
943
953
  with tempfile.TemporaryDirectory() as temp_dir:
944
- # Write and upload each encoded chunk
954
+ # Prepare all chunks for upload
955
+ all_chunk_info = []
956
+
945
957
  for original_idx, encoded_chunks in enumerate(all_encoded_chunks):
946
958
  for share_idx, share_data in enumerate(encoded_chunks):
947
959
  # Create a name for this chunk that includes needed info
@@ -952,29 +964,64 @@ class IPFSClient:
952
964
  with open(chunk_path, "wb") as f:
953
965
  f.write(share_data)
954
966
 
955
- # Upload the chunk to IPFS
956
- try:
957
- chunk_cid = await self.upload_file(
958
- chunk_path, max_retries=max_retries
959
- )
960
-
961
- # Store info about this chunk
962
- chunk_info = {
967
+ # Store info for async upload
968
+ all_chunk_info.append(
969
+ {
963
970
  "name": chunk_name,
964
- "cid": chunk_cid,
971
+ "path": chunk_path,
965
972
  "original_chunk": original_idx,
966
973
  "share_idx": share_idx,
967
974
  "size": len(share_data),
968
975
  }
969
- chunk_data.append(chunk_info)
976
+ )
977
+
978
+ # Create a semaphore to limit concurrent uploads
979
+ semaphore = asyncio.Semaphore(batch_size)
980
+
981
+ # Track total uploads for progress reporting
982
+ total_chunks = len(all_chunk_info)
970
983
 
984
+ # Initialize progress tracking if callback provided
985
+ if progress_callback:
986
+ progress_callback("upload", 0, total_chunks)
987
+
988
+ if verbose:
989
+ print(f"Uploading {total_chunks} erasure-coded chunks to IPFS...")
990
+
991
+ # Define upload task for a single chunk
992
+ async def upload_chunk(chunk_info):
993
+ nonlocal chunk_uploads
994
+
995
+ async with semaphore:
996
+ try:
997
+ chunk_cid = await self.upload_file(
998
+ chunk_info["path"], max_retries=max_retries
999
+ )
1000
+ chunk_info["cid"] = chunk_cid
971
1001
  chunk_uploads += 1
1002
+
1003
+ # Update progress through callback
1004
+ if progress_callback:
1005
+ progress_callback("upload", chunk_uploads, total_chunks)
1006
+
972
1007
  if verbose and chunk_uploads % 10 == 0:
1008
+ print(f" Uploaded {chunk_uploads}/{total_chunks} chunks")
1009
+ return chunk_info
1010
+ except Exception as e:
1011
+ if verbose:
973
1012
  print(
974
- f" Uploaded {chunk_uploads}/{len(chunks) * m} chunks"
1013
+ f"Error uploading chunk {chunk_info['name']}: {str(e)}"
975
1014
  )
976
- except Exception as e:
977
- print(f"Error uploading chunk {chunk_name}: {str(e)}")
1015
+ return None
1016
+
1017
+ # Create tasks for all chunk uploads
1018
+ upload_tasks = [upload_chunk(chunk_info) for chunk_info in all_chunk_info]
1019
+
1020
+ # Wait for all uploads to complete
1021
+ completed_uploads = await asyncio.gather(*upload_tasks)
1022
+
1023
+ # Filter out failed uploads
1024
+ chunk_data = [upload for upload in completed_uploads if upload is not None]
978
1025
 
979
1026
  # Add all chunk info to metadata
980
1027
  metadata["chunks"] = chunk_data
@@ -1012,7 +1059,7 @@ class IPFSClient:
1012
1059
  temp_dir: str = None,
1013
1060
  max_retries: int = 3,
1014
1061
  verbose: bool = True,
1015
- ) -> str:
1062
+ ) -> Dict:
1016
1063
  """
1017
1064
  Reconstruct a file from erasure-coded chunks using its metadata.
1018
1065
 
@@ -1024,7 +1071,7 @@ class IPFSClient:
1024
1071
  verbose: Whether to print progress information
1025
1072
 
1026
1073
  Returns:
1027
- str: Path to the reconstructed file
1074
+ Dict: containing file reconstruction info.
1028
1075
 
1029
1076
  Raises:
1030
1077
  ValueError: If reconstruction fails
@@ -1082,6 +1129,9 @@ class IPFSClient:
1082
1129
  )
1083
1130
  if is_encrypted:
1084
1131
  print("Encrypted: Yes")
1132
+ print(
1133
+ f"Using parallel download with max {PARALLEL_ORIGINAL_CHUNKS} original chunks and {PARALLEL_EC_CHUNKS} chunk downloads concurrently"
1134
+ )
1085
1135
 
1086
1136
  # Step 3: Group chunks by their original chunk index
1087
1137
  chunks_by_original = {}
@@ -1091,136 +1141,157 @@ class IPFSClient:
1091
1141
  chunks_by_original[orig_idx] = []
1092
1142
  chunks_by_original[orig_idx].append(chunk)
1093
1143
 
1094
- # Step 4: For each original chunk, download at least k shares
1144
+ # Step 4: Process all original chunks in parallel
1095
1145
  if verbose:
1096
1146
  total_original_chunks = len(chunks_by_original)
1097
- total_chunks_to_download = total_original_chunks * k
1147
+ total_chunks_needed = total_original_chunks * k
1098
1148
  print(
1099
- f"Downloading and reconstructing {total_chunks_to_download} chunks..."
1149
+ f"Downloading and reconstructing {total_chunks_needed} chunks in parallel..."
1100
1150
  )
1101
1151
 
1102
- reconstructed_chunks = []
1103
- chunks_downloaded = 0
1104
- chunks_failed = 0
1105
-
1106
- for orig_idx in sorted(chunks_by_original.keys()):
1107
- available_chunks = chunks_by_original[orig_idx]
1108
-
1109
- if len(available_chunks) < k:
1110
- raise ValueError(
1111
- f"Not enough chunks available for original chunk {orig_idx}. "
1112
- f"Need {k}, but only have {len(available_chunks)}."
1113
- )
1152
+ # Create semaphores to limit concurrency
1153
+ encoded_chunks_semaphore = asyncio.Semaphore(PARALLEL_EC_CHUNKS)
1154
+ original_chunks_semaphore = asyncio.Semaphore(PARALLEL_ORIGINAL_CHUNKS)
1155
+
1156
+ # Process a single original chunk and its required downloads
1157
+ async def process_original_chunk(orig_idx, available_chunks):
1158
+ # Limit number of original chunks processing at once
1159
+ async with original_chunks_semaphore:
1160
+ if verbose:
1161
+ print(f"Processing original chunk {orig_idx}...")
1162
+
1163
+ if len(available_chunks) < k:
1164
+ raise ValueError(
1165
+ f"Not enough chunks available for original chunk {orig_idx}. "
1166
+ f"Need {k}, but only have {len(available_chunks)}."
1167
+ )
1114
1168
 
1115
- # Try to download all available chunks, but we only need k successful ones
1116
- downloaded_shares = []
1117
- share_indexes = []
1118
- chunks_to_try = available_chunks.copy()
1169
+ # Try slightly more than k chunks (k+2) to handle some failures
1170
+ num_to_try = min(k + 2, len(available_chunks))
1171
+ chunks_to_try = random.sample(available_chunks, num_to_try)
1119
1172
 
1120
- # Shuffle to get a better variety of chunks
1121
- random.shuffle(chunks_to_try)
1173
+ # Track downloaded chunks
1174
+ download_tasks = []
1122
1175
 
1123
- for chunk in chunks_to_try:
1124
- # Break if we already have k chunks
1125
- if len(downloaded_shares) >= k:
1126
- break
1176
+ # Start parallel downloads for chunks
1177
+ for chunk in chunks_to_try:
1178
+ chunk_path = os.path.join(temp_dir, f"{chunk['name']}")
1127
1179
 
1128
- chunk_path = os.path.join(temp_dir, chunk["name"])
1129
- try:
1130
- # Extract the CID string from the chunk's cid dictionary
1180
+ # Extract CID
1131
1181
  chunk_cid = (
1132
1182
  chunk["cid"]["cid"]
1133
1183
  if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
1134
1184
  else chunk["cid"]
1135
1185
  )
1136
- await self.download_file(
1137
- chunk_cid, chunk_path, max_retries=max_retries
1138
- )
1139
- chunks_downloaded += 1
1140
1186
 
1141
- # Read the chunk data
1142
- with open(chunk_path, "rb") as f:
1143
- share_data = f.read()
1144
-
1145
- downloaded_shares.append(share_data)
1146
- share_indexes.append(chunk["share_idx"])
1147
-
1148
- except Exception as e:
1149
- if verbose:
1150
- print(f"Error downloading chunk {chunk['name']}: {str(e)}")
1151
- chunks_failed += 1
1152
- # Continue to the next chunk
1187
+ # Create download task
1188
+ async def download_chunk(cid, path, chunk_info):
1189
+ async with encoded_chunks_semaphore:
1190
+ try:
1191
+ await self.download_file(
1192
+ cid, path, max_retries=max_retries
1193
+ )
1194
+
1195
+ # Read chunk data
1196
+ with open(path, "rb") as f:
1197
+ share_data = f.read()
1198
+
1199
+ return {
1200
+ "success": True,
1201
+ "data": share_data,
1202
+ "share_idx": chunk_info["share_idx"],
1203
+ "name": chunk_info["name"],
1204
+ }
1205
+ except Exception as e:
1206
+ if verbose:
1207
+ print(
1208
+ f"Error downloading chunk {chunk_info['name']}: {str(e)}"
1209
+ )
1210
+ return {
1211
+ "success": False,
1212
+ "error": str(e),
1213
+ "name": chunk_info["name"],
1214
+ }
1215
+
1216
+ # Create task
1217
+ task = asyncio.create_task(
1218
+ download_chunk(chunk_cid, chunk_path, chunk)
1219
+ )
1220
+ download_tasks.append(task)
1221
+
1222
+ # Process downloads as they complete
1223
+ downloaded_shares = []
1224
+ share_indexes = []
1225
+
1226
+ for done_task in asyncio.as_completed(download_tasks):
1227
+ result = await done_task
1228
+
1229
+ if result["success"]:
1230
+ downloaded_shares.append(result["data"])
1231
+ share_indexes.append(result["share_idx"])
1232
+
1233
+ # Once we have k chunks, cancel remaining downloads
1234
+ if len(downloaded_shares) >= k:
1235
+ for task in download_tasks:
1236
+ if not task.done():
1237
+ task.cancel()
1238
+ break
1239
+
1240
+ # Check if we have enough chunks
1241
+ if len(downloaded_shares) < k:
1242
+ raise ValueError(
1243
+ f"Failed to download enough chunks for original chunk {orig_idx}. "
1244
+ f"Need {k}, but only downloaded {len(downloaded_shares)}."
1245
+ )
1153
1246
 
1154
- # If we don't have enough chunks, fail
1155
- if len(downloaded_shares) < k:
1156
- raise ValueError(
1157
- f"Failed to download enough chunks for original chunk {orig_idx}. "
1158
- f"Need {k}, but only downloaded {len(downloaded_shares)}."
1247
+ # Reconstruct this chunk
1248
+ decoder = zfec.Decoder(k, m)
1249
+ reconstructed_data = decoder.decode(
1250
+ downloaded_shares, share_indexes
1159
1251
  )
1160
1252
 
1161
- # Reconstruct this chunk
1162
- decoder = zfec.Decoder(k, m)
1163
- reconstructed_data = decoder.decode(downloaded_shares, share_indexes)
1253
+ if not isinstance(reconstructed_data, list):
1254
+ raise TypeError(
1255
+ f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
1256
+ )
1164
1257
 
1165
- if not isinstance(reconstructed_data, list):
1166
- # Handle unexpected output type
1167
- raise TypeError(
1168
- f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
1169
- )
1258
+ # Calculate the actual size of this original chunk
1259
+ is_last_chunk = orig_idx == max(chunks_by_original.keys())
1260
+ original_chunk_size = total_original_size - orig_idx * chunk_size
1261
+ if not is_last_chunk:
1262
+ original_chunk_size = min(chunk_size, original_chunk_size)
1263
+
1264
+ # Recombine the sub-blocks
1265
+ reconstructed_chunk = b""
1266
+ total_bytes = 0
1267
+ for sub_block in reconstructed_data:
1268
+ bytes_to_take = min(
1269
+ len(sub_block), original_chunk_size - total_bytes
1270
+ )
1271
+ if bytes_to_take <= 0:
1272
+ break
1170
1273
 
1171
- # Calculate the actual size of this original chunk
1172
- # For all chunks except possibly the last one, it should be chunk_size
1173
- is_last_chunk = orig_idx == max(chunks_by_original.keys())
1174
- original_chunk_size = total_original_size - orig_idx * chunk_size
1175
- if not is_last_chunk:
1176
- original_chunk_size = min(chunk_size, original_chunk_size)
1177
-
1178
- # Recombine the sub-blocks, respecting the original chunk size
1179
- reconstructed_chunk = b""
1180
- total_bytes = 0
1181
- for sub_block in reconstructed_data:
1182
- # Calculate how many bytes we should take from this sub-block
1183
- bytes_to_take = min(
1184
- len(sub_block), original_chunk_size - total_bytes
1185
- )
1186
- if bytes_to_take <= 0:
1187
- break
1274
+ reconstructed_chunk += sub_block[:bytes_to_take]
1275
+ total_bytes += bytes_to_take
1188
1276
 
1189
- reconstructed_chunk += sub_block[:bytes_to_take]
1190
- total_bytes += bytes_to_take
1277
+ return reconstructed_chunk
1191
1278
 
1192
- reconstructed_chunks.append(reconstructed_chunk)
1279
+ # Create tasks for all original chunks and process them in parallel
1280
+ chunk_tasks = []
1281
+ for orig_idx in sorted(chunks_by_original.keys()):
1282
+ chunk_tasks.append(
1283
+ process_original_chunk(orig_idx, chunks_by_original[orig_idx])
1284
+ )
1193
1285
 
1194
- # Add debugging information if verbose
1195
- if verbose:
1196
- progress_pct = (orig_idx + 1) / total_original_chunks * 100
1197
- print(
1198
- f" Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
1199
- )
1200
- if (
1201
- orig_idx == 0 or is_last_chunk
1202
- ): # Only show debug for first and last chunks to avoid spam
1203
- print(f" Debug info for chunk {orig_idx}:")
1204
- print(f" Original chunk size: {original_chunk_size} bytes")
1205
- print(
1206
- f" Reconstructed chunk size: {len(reconstructed_chunk)} bytes"
1207
- )
1208
- print(f" Share indexes used: {share_indexes}")
1209
- print(f" Sub-blocks received: {len(reconstructed_data)}")
1286
+ # Wait for all chunks to be reconstructed
1287
+ reconstructed_chunks = await asyncio.gather(*chunk_tasks)
1210
1288
 
1211
1289
  if verbose:
1212
1290
  download_time = time.time() - start_time
1213
- print(
1214
- f"Downloaded {chunks_downloaded} chunks in {download_time:.2f} seconds"
1215
- )
1216
- if chunks_failed > 0:
1217
- print(
1218
- f"Failed to download {chunks_failed} chunks (not needed for reconstruction)"
1219
- )
1291
+ print(f"Chunk reconstruction completed in {download_time:.2f} seconds")
1220
1292
 
1221
1293
  # Step 5: Combine the reconstructed chunks into a file
1222
- if verbose:
1223
- print("Combining reconstructed chunks...")
1294
+ print("Combining reconstructed chunks...")
1224
1295
 
1225
1296
  # Process chunks to remove padding correctly
1226
1297
  processed_chunks = []
@@ -1293,7 +1364,10 @@ class IPFSClient:
1293
1364
  print(f"Reconstruction complete in {total_time:.2f} seconds!")
1294
1365
  print(f"File saved to: {output_file}")
1295
1366
 
1296
- return output_file
1367
+ return {
1368
+ "output_path": output_file,
1369
+ "size_bytes": size_processed,
1370
+ }
1297
1371
 
1298
1372
  finally:
1299
1373
  # Clean up temporary directory if we created it
@@ -1311,6 +1385,7 @@ class IPFSClient:
1311
1385
  substrate_client=None,
1312
1386
  max_retries: int = 3,
1313
1387
  verbose: bool = True,
1388
+ progress_callback: Optional[Callable[[str, int, int], None]] = None,
1314
1389
  ) -> Dict[str, Any]:
1315
1390
  """
1316
1391
  Erasure code a file, upload the chunks to IPFS, and store in the Hippius marketplace.
@@ -1327,6 +1402,8 @@ class IPFSClient:
1327
1402
  substrate_client: SubstrateClient to use (or None to create one)
1328
1403
  max_retries: Maximum number of retry attempts
1329
1404
  verbose: Whether to print progress information
1405
+ progress_callback: Optional callback function for progress updates
1406
+ Function receives (stage_name, current, total)
1330
1407
 
1331
1408
  Returns:
1332
1409
  dict: Result including metadata CID and transaction hash
@@ -1344,6 +1421,7 @@ class IPFSClient:
1344
1421
  encrypt=encrypt,
1345
1422
  max_retries=max_retries,
1346
1423
  verbose=verbose,
1424
+ progress_callback=progress_callback,
1347
1425
  )
1348
1426
 
1349
1427
  # Step 2: Create substrate client if we need it
@@ -1418,3 +1496,335 @@ class IPFSClient:
1418
1496
  print(f"Error storing files in marketplace: {str(e)}")
1419
1497
  # Return the metadata even if storage fails
1420
1498
  return {"metadata": metadata, "metadata_cid": metadata_cid, "error": str(e)}
1499
+
1500
+ async def delete_file(
1501
+ self, cid: str, cancel_from_blockchain: bool = True
1502
+ ) -> Dict[str, Any]:
1503
+ """
1504
+ Delete a file from IPFS and optionally cancel its storage on the blockchain.
1505
+
1506
+ Args:
1507
+ cid: Content Identifier (CID) of the file to delete
1508
+ cancel_from_blockchain: Whether to also cancel the storage request from the blockchain
1509
+
1510
+ Returns:
1511
+ Dict containing the result of the operation
1512
+ """
1513
+ result = {
1514
+ "cid": cid,
1515
+ "unpin_result": None,
1516
+ "blockchain_result": None,
1517
+ "timing": {
1518
+ "start_time": time.time(),
1519
+ "end_time": None,
1520
+ "duration_seconds": None,
1521
+ },
1522
+ }
1523
+
1524
+ # First, unpin from IPFS
1525
+ try:
1526
+ print(f"Unpinning file from IPFS: {cid}")
1527
+ try:
1528
+ # Try to check if file exists in IPFS before unpinning
1529
+ await self.exists(cid)
1530
+ except Exception as exists_e:
1531
+ print(f"ERROR: Error checking file existence: {exists_e}")
1532
+
1533
+ unpin_result = await self.client.unpin(cid)
1534
+ result["unpin_result"] = unpin_result
1535
+ print("Successfully unpinned from IPFS")
1536
+ except Exception as e:
1537
+ print(f"Warning: Failed to unpin file from IPFS: {e}")
1538
+ raise
1539
+
1540
+ # Then, if requested, cancel from blockchain
1541
+ if cancel_from_blockchain:
1542
+ try:
1543
+ # Create a substrate client
1544
+ print(f"DEBUG: Creating SubstrateClient for blockchain cancellation...")
1545
+ substrate_client = SubstrateClient()
1546
+ print(
1547
+ f"DEBUG: Substrate client created with URL: {substrate_client.url}"
1548
+ )
1549
+ print(f"DEBUG: Calling cancel_storage_request with CID: {cid}")
1550
+
1551
+ tx_hash = await substrate_client.cancel_storage_request(cid)
1552
+ print(f"DEBUG: Received transaction hash: {tx_hash}")
1553
+
1554
+ # Check the return value - special cases for when blockchain cancellation isn't available
1555
+ if tx_hash == "no-blockchain-cancellation-available":
1556
+ print(
1557
+ "Blockchain cancellation not available, but IPFS unpinning was successful"
1558
+ )
1559
+ result["blockchain_result"] = {
1560
+ "status": "not_available",
1561
+ "message": "Blockchain cancellation not available, but IPFS unpinning was successful",
1562
+ }
1563
+ elif tx_hash.startswith("ipfs-unpinned-only"):
1564
+ error_msg = tx_hash.replace("ipfs-unpinned-only-", "")
1565
+ print(
1566
+ f"IPFS unpinning successful, but blockchain cancellation failed: {error_msg}"
1567
+ )
1568
+ result["blockchain_result"] = {
1569
+ "status": "failed",
1570
+ "error": error_msg,
1571
+ "message": "IPFS unpinning successful, but blockchain cancellation failed",
1572
+ }
1573
+ else:
1574
+ # Standard successful transaction
1575
+ result["blockchain_result"] = {
1576
+ "transaction_hash": tx_hash,
1577
+ "status": "success",
1578
+ }
1579
+ print(f"Successfully canceled storage request from blockchain")
1580
+ print(
1581
+ f"DEBUG: Blockchain cancellation succeeded with transaction hash: {tx_hash}"
1582
+ )
1583
+ except Exception as e:
1584
+ print(f"Warning: Failed to cancel storage from blockchain: {e}")
1585
+ print(
1586
+ f"DEBUG: Blockchain cancellation exception: {type(e).__name__}: {str(e)}"
1587
+ )
1588
+ if hasattr(e, "__dict__"):
1589
+ print(f"DEBUG: Exception attributes: {e.__dict__}")
1590
+ result["blockchain_error"] = str(e)
1591
+
1592
+ # Calculate timing
1593
+ result["timing"]["end_time"] = time.time()
1594
+ result["timing"]["duration_seconds"] = (
1595
+ result["timing"]["end_time"] - result["timing"]["start_time"]
1596
+ )
1597
+
1598
+ return result
1599
+
1600
+ async def delete_ec_file(
1601
+ self,
1602
+ metadata_cid: str,
1603
+ cancel_from_blockchain: bool = True,
1604
+ parallel_limit: int = 20,
1605
+ ) -> Dict[str, Any]:
1606
+ """
1607
+ Delete an erasure-coded file, including all its chunks in parallel.
1608
+
1609
+ Args:
1610
+ metadata_cid: CID of the metadata file for the erasure-coded file
1611
+ cancel_from_blockchain: Whether to cancel storage from blockchain
1612
+ parallel_limit: Maximum number of concurrent deletion operations
1613
+
1614
+ Returns:
1615
+ Dict containing the result of the operation
1616
+ """
1617
+ result = {
1618
+ "metadata_cid": metadata_cid,
1619
+ "deleted_chunks": [],
1620
+ "failed_chunks": [],
1621
+ "blockchain_result": None,
1622
+ "timing": {
1623
+ "start_time": time.time(),
1624
+ "end_time": None,
1625
+ "duration_seconds": None,
1626
+ },
1627
+ }
1628
+
1629
+ # Track deletions for reporting
1630
+ deleted_chunks_lock = asyncio.Lock()
1631
+ failed_chunks_lock = asyncio.Lock()
1632
+
1633
+ # First, get the metadata to find all chunks
1634
+ try:
1635
+ print(f"Downloading metadata file (CID: {metadata_cid})...")
1636
+ start_time = time.time()
1637
+ metadata_content = await self.client.cat(metadata_cid)
1638
+ metadata = json.loads(metadata_content.decode("utf-8"))
1639
+ metadata_download_time = time.time() - start_time
1640
+
1641
+ print(f"Metadata downloaded in {metadata_download_time:.2f} seconds")
1642
+
1643
+ # Extract chunk CIDs
1644
+ chunks = []
1645
+ total_chunks = 0
1646
+
1647
+ for chunk_data in metadata.get("chunks", []):
1648
+ for ec_chunk in chunk_data.get("ec_chunks", []):
1649
+ chunk_cid = ec_chunk.get("cid")
1650
+ if chunk_cid:
1651
+ chunks.append(chunk_cid)
1652
+ total_chunks += 1
1653
+
1654
+ print(f"Found {total_chunks} chunks to delete")
1655
+
1656
+ # Create a semaphore to limit concurrent operations
1657
+ sem = asyncio.Semaphore(parallel_limit)
1658
+
1659
+ # Define the chunk deletion function
1660
+ async def delete_chunk(chunk_cid):
1661
+ async with sem:
1662
+ try:
1663
+ print(f"Unpinning chunk: {chunk_cid}")
1664
+ await self.client.unpin(chunk_cid)
1665
+
1666
+ # Record success
1667
+ async with deleted_chunks_lock:
1668
+ result["deleted_chunks"].append(chunk_cid)
1669
+
1670
+ # Cancel from blockchain if requested
1671
+ if cancel_from_blockchain:
1672
+ try:
1673
+ substrate_client = SubstrateClient()
1674
+ tx_hash = await substrate_client.cancel_storage_request(
1675
+ chunk_cid
1676
+ )
1677
+
1678
+ # Add blockchain result
1679
+ if "chunk_results" not in result["blockchain_result"]:
1680
+ result["blockchain_result"] = {}
1681
+ result["blockchain_result"]["chunk_results"] = []
1682
+
1683
+ # Handle special return values from cancel_storage_request
1684
+ if tx_hash == "no-blockchain-cancellation-available":
1685
+ result["blockchain_result"]["chunk_results"].append(
1686
+ {
1687
+ "cid": chunk_cid,
1688
+ "status": "not_available",
1689
+ "message": "Blockchain cancellation not available",
1690
+ }
1691
+ )
1692
+ elif tx_hash.startswith("ipfs-unpinned-only"):
1693
+ error_msg = tx_hash.replace(
1694
+ "ipfs-unpinned-only-", ""
1695
+ )
1696
+ result["blockchain_result"]["chunk_results"].append(
1697
+ {
1698
+ "cid": chunk_cid,
1699
+ "status": "failed",
1700
+ "error": error_msg,
1701
+ }
1702
+ )
1703
+ else:
1704
+ # Standard successful transaction
1705
+ result["blockchain_result"]["chunk_results"].append(
1706
+ {
1707
+ "cid": chunk_cid,
1708
+ "transaction_hash": tx_hash,
1709
+ "status": "success",
1710
+ }
1711
+ )
1712
+ except Exception as e:
1713
+ print(
1714
+ f"Warning: Failed to cancel blockchain storage for chunk {chunk_cid}: {e}"
1715
+ )
1716
+
1717
+ if "chunk_results" not in result["blockchain_result"]:
1718
+ result["blockchain_result"] = {}
1719
+ result["blockchain_result"]["chunk_results"] = []
1720
+
1721
+ result["blockchain_result"]["chunk_results"].append(
1722
+ {
1723
+ "cid": chunk_cid,
1724
+ "error": str(e),
1725
+ "status": "failed",
1726
+ }
1727
+ )
1728
+
1729
+ return True
1730
+ except Exception as e:
1731
+ error_msg = f"Failed to delete chunk {chunk_cid}: {e}"
1732
+ print(f"Warning: {error_msg}")
1733
+
1734
+ # Record failure
1735
+ async with failed_chunks_lock:
1736
+ result["failed_chunks"].append(
1737
+ {"cid": chunk_cid, "error": str(e)}
1738
+ )
1739
+
1740
+ return False
1741
+
1742
+ # Start deleting chunks in parallel
1743
+ print(
1744
+ f"Starting parallel deletion of {total_chunks} chunks with max {parallel_limit} concurrent operations"
1745
+ )
1746
+ delete_tasks = [delete_chunk(cid) for cid in chunks]
1747
+ await asyncio.gather(*delete_tasks)
1748
+
1749
+ # Delete the metadata file itself
1750
+ print(f"Unpinning metadata file: {metadata_cid}")
1751
+ response = await self.client.unpin(metadata_cid)
1752
+
1753
+ print(">>>", response)
1754
+ raise SystemExit
1755
+
1756
+ # Cancel metadata from blockchain if requested
1757
+ if cancel_from_blockchain:
1758
+ try:
1759
+ print(f"Canceling blockchain storage request for metadata file...")
1760
+ substrate_client = SubstrateClient()
1761
+ tx_hash = await substrate_client.cancel_storage_request(
1762
+ metadata_cid
1763
+ )
1764
+
1765
+ # Handle special return values from cancel_storage_request
1766
+ if tx_hash == "no-blockchain-cancellation-available":
1767
+ print(
1768
+ "Blockchain cancellation not available for metadata, but IPFS unpinning was successful"
1769
+ )
1770
+ result["blockchain_result"] = {
1771
+ "status": "not_available",
1772
+ "message": "Blockchain cancellation not available, but IPFS unpinning was successful",
1773
+ }
1774
+ elif tx_hash.startswith("ipfs-unpinned-only"):
1775
+ error_msg = tx_hash.replace("ipfs-unpinned-only-", "")
1776
+ print(
1777
+ f"IPFS unpinning successful, but blockchain cancellation failed for metadata: {error_msg}"
1778
+ )
1779
+ result["blockchain_result"] = {
1780
+ "status": "failed",
1781
+ "error": error_msg,
1782
+ "message": "IPFS unpinning successful, but blockchain cancellation failed",
1783
+ }
1784
+ else:
1785
+ # Standard successful transaction
1786
+ result["blockchain_result"] = {
1787
+ "metadata_transaction_hash": tx_hash,
1788
+ "status": "success",
1789
+ }
1790
+ print(
1791
+ f"Successfully canceled blockchain storage for metadata file"
1792
+ )
1793
+ except Exception as e:
1794
+ print(
1795
+ f"Warning: Failed to cancel blockchain storage for metadata file: {e}"
1796
+ )
1797
+
1798
+ if not result["blockchain_result"]:
1799
+ result["blockchain_result"] = {}
1800
+
1801
+ result["blockchain_result"]["metadata_error"] = str(e)
1802
+ result["blockchain_result"]["status"] = "failed"
1803
+
1804
+ # Calculate and record timing information
1805
+ end_time = time.time()
1806
+ duration = end_time - result["timing"]["start_time"]
1807
+
1808
+ result["timing"]["end_time"] = end_time
1809
+ result["timing"]["duration_seconds"] = duration
1810
+
1811
+ deleted_count = len(result["deleted_chunks"])
1812
+ failed_count = len(result["failed_chunks"])
1813
+
1814
+ print(f"Deletion complete in {duration:.2f} seconds!")
1815
+ print(f"Successfully deleted: {deleted_count}/{total_chunks} chunks")
1816
+
1817
+ if failed_count > 0:
1818
+ print(f"Failed to delete: {failed_count}/{total_chunks} chunks")
1819
+
1820
+ return result
1821
+ except Exception as e:
1822
+ # Record end time even if there was an error
1823
+ result["timing"]["end_time"] = time.time()
1824
+ result["timing"]["duration_seconds"] = (
1825
+ result["timing"]["end_time"] - result["timing"]["start_time"]
1826
+ )
1827
+
1828
+ error_msg = f"Error deleting erasure-coded file: {e}"
1829
+ print(f"Error: {error_msg}")
1830
+ raise RuntimeError(error_msg)