hippius 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hippius-0.2.1.dist-info → hippius-0.2.3.dist-info}/METADATA +8 -7
- hippius-0.2.3.dist-info/RECORD +16 -0
- hippius_sdk/__init__.py +1 -1
- hippius_sdk/cli.py +277 -2628
- hippius_sdk/cli_assets.py +8 -0
- hippius_sdk/cli_handlers.py +2370 -0
- hippius_sdk/cli_parser.py +602 -0
- hippius_sdk/cli_rich.py +253 -0
- hippius_sdk/client.py +56 -8
- hippius_sdk/config.py +1 -1
- hippius_sdk/ipfs.py +540 -130
- hippius_sdk/ipfs_core.py +22 -1
- hippius_sdk/substrate.py +215 -525
- hippius_sdk/utils.py +84 -2
- hippius-0.2.1.dist-info/RECORD +0 -12
- {hippius-0.2.1.dist-info → hippius-0.2.3.dist-info}/WHEEL +0 -0
- {hippius-0.2.1.dist-info → hippius-0.2.3.dist-info}/entry_points.txt +0 -0
hippius_sdk/ipfs.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
IPFS operations for the Hippius SDK.
|
3
3
|
"""
|
4
|
-
|
4
|
+
import asyncio
|
5
5
|
import hashlib
|
6
6
|
import json
|
7
7
|
import os
|
@@ -10,7 +10,7 @@ import shutil
|
|
10
10
|
import tempfile
|
11
11
|
import time
|
12
12
|
import uuid
|
13
|
-
from typing import Any, Dict, List, Optional
|
13
|
+
from typing import Any, Callable, Dict, List, Optional
|
14
14
|
|
15
15
|
import httpx
|
16
16
|
import requests
|
@@ -37,6 +37,12 @@ try:
|
|
37
37
|
except ImportError:
|
38
38
|
ERASURE_CODING_AVAILABLE = False
|
39
39
|
|
40
|
+
# Configuration constants
|
41
|
+
PARALLEL_EC_CHUNKS = 20 # Maximum number of concurrent chunk downloads
|
42
|
+
PARALLEL_ORIGINAL_CHUNKS = (
|
43
|
+
15 # Maximum number of original chunks to process in parallel
|
44
|
+
)
|
45
|
+
|
40
46
|
|
41
47
|
class IPFSClient:
|
42
48
|
"""Client for interacting with IPFS."""
|
@@ -60,7 +66,7 @@ class IPFSClient:
|
|
60
66
|
"""
|
61
67
|
# Load configuration values if not explicitly provided
|
62
68
|
if gateway is None:
|
63
|
-
gateway = get_config_value("ipfs", "gateway", "https://
|
69
|
+
gateway = get_config_value("ipfs", "gateway", "https://get.hippius.network")
|
64
70
|
|
65
71
|
if api_url is None:
|
66
72
|
api_url = get_config_value(
|
@@ -78,11 +84,12 @@ class IPFSClient:
|
|
78
84
|
self.base_url = api_url
|
79
85
|
|
80
86
|
try:
|
81
|
-
self.client = AsyncIPFSClient(api_url)
|
87
|
+
self.client = AsyncIPFSClient(api_url=api_url, gateway=self.gateway)
|
82
88
|
except httpx.ConnectError as e:
|
83
|
-
print(
|
84
|
-
|
85
|
-
|
89
|
+
print(
|
90
|
+
f"Warning: Falling back to local IPFS daemon, but still using gateway={self.gateway}"
|
91
|
+
)
|
92
|
+
self.client = AsyncIPFSClient(gateway=self.gateway)
|
86
93
|
|
87
94
|
self._initialize_encryption(encrypt_by_default, encryption_key)
|
88
95
|
|
@@ -477,8 +484,6 @@ class IPFSClient:
|
|
477
484
|
|
478
485
|
# Download the file with retry logic
|
479
486
|
retries = 0
|
480
|
-
last_error = None
|
481
|
-
|
482
487
|
while retries < max_retries:
|
483
488
|
try:
|
484
489
|
# Download the file
|
@@ -499,7 +504,6 @@ class IPFSClient:
|
|
499
504
|
|
500
505
|
except (requests.exceptions.RequestException, IOError) as e:
|
501
506
|
# Save the error and retry
|
502
|
-
last_error = e
|
503
507
|
retries += 1
|
504
508
|
|
505
509
|
if retries < max_retries:
|
@@ -736,6 +740,7 @@ class IPFSClient:
|
|
736
740
|
encrypt: Optional[bool] = None,
|
737
741
|
max_retries: int = 3,
|
738
742
|
verbose: bool = True,
|
743
|
+
progress_callback: Optional[Callable[[str, int, int], None]] = None,
|
739
744
|
) -> Dict[str, Any]:
|
740
745
|
"""
|
741
746
|
Split a file using erasure coding, then upload the chunks to IPFS.
|
@@ -753,6 +758,8 @@ class IPFSClient:
|
|
753
758
|
encrypt: Whether to encrypt the file before encoding (defaults to self.encrypt_by_default)
|
754
759
|
max_retries: Maximum number of retry attempts for IPFS uploads
|
755
760
|
verbose: Whether to print progress information
|
761
|
+
progress_callback: Optional callback function for progress updates
|
762
|
+
Function receives (stage_name, current, total)
|
756
763
|
|
757
764
|
Returns:
|
758
765
|
dict: Metadata including the original file info and chunk information
|
@@ -934,14 +941,19 @@ class IPFSClient:
|
|
934
941
|
|
935
942
|
# Step 4: Upload all chunks to IPFS
|
936
943
|
if verbose:
|
937
|
-
print(
|
944
|
+
print(
|
945
|
+
f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS in parallel..."
|
946
|
+
)
|
938
947
|
|
939
948
|
chunk_uploads = 0
|
940
949
|
chunk_data = []
|
950
|
+
batch_size = 20 # Number of concurrent uploads
|
941
951
|
|
942
952
|
# Create a temporary directory for the chunks
|
943
953
|
with tempfile.TemporaryDirectory() as temp_dir:
|
944
|
-
#
|
954
|
+
# Prepare all chunks for upload
|
955
|
+
all_chunk_info = []
|
956
|
+
|
945
957
|
for original_idx, encoded_chunks in enumerate(all_encoded_chunks):
|
946
958
|
for share_idx, share_data in enumerate(encoded_chunks):
|
947
959
|
# Create a name for this chunk that includes needed info
|
@@ -952,29 +964,64 @@ class IPFSClient:
|
|
952
964
|
with open(chunk_path, "wb") as f:
|
953
965
|
f.write(share_data)
|
954
966
|
|
955
|
-
#
|
956
|
-
|
957
|
-
|
958
|
-
chunk_path, max_retries=max_retries
|
959
|
-
)
|
960
|
-
|
961
|
-
# Store info about this chunk
|
962
|
-
chunk_info = {
|
967
|
+
# Store info for async upload
|
968
|
+
all_chunk_info.append(
|
969
|
+
{
|
963
970
|
"name": chunk_name,
|
964
|
-
"
|
971
|
+
"path": chunk_path,
|
965
972
|
"original_chunk": original_idx,
|
966
973
|
"share_idx": share_idx,
|
967
974
|
"size": len(share_data),
|
968
975
|
}
|
969
|
-
|
976
|
+
)
|
977
|
+
|
978
|
+
# Create a semaphore to limit concurrent uploads
|
979
|
+
semaphore = asyncio.Semaphore(batch_size)
|
980
|
+
|
981
|
+
# Track total uploads for progress reporting
|
982
|
+
total_chunks = len(all_chunk_info)
|
970
983
|
|
984
|
+
# Initialize progress tracking if callback provided
|
985
|
+
if progress_callback:
|
986
|
+
progress_callback("upload", 0, total_chunks)
|
987
|
+
|
988
|
+
if verbose:
|
989
|
+
print(f"Uploading {total_chunks} erasure-coded chunks to IPFS...")
|
990
|
+
|
991
|
+
# Define upload task for a single chunk
|
992
|
+
async def upload_chunk(chunk_info):
|
993
|
+
nonlocal chunk_uploads
|
994
|
+
|
995
|
+
async with semaphore:
|
996
|
+
try:
|
997
|
+
chunk_cid = await self.upload_file(
|
998
|
+
chunk_info["path"], max_retries=max_retries
|
999
|
+
)
|
1000
|
+
chunk_info["cid"] = chunk_cid
|
971
1001
|
chunk_uploads += 1
|
1002
|
+
|
1003
|
+
# Update progress through callback
|
1004
|
+
if progress_callback:
|
1005
|
+
progress_callback("upload", chunk_uploads, total_chunks)
|
1006
|
+
|
972
1007
|
if verbose and chunk_uploads % 10 == 0:
|
1008
|
+
print(f" Uploaded {chunk_uploads}/{total_chunks} chunks")
|
1009
|
+
return chunk_info
|
1010
|
+
except Exception as e:
|
1011
|
+
if verbose:
|
973
1012
|
print(
|
974
|
-
f"
|
1013
|
+
f"Error uploading chunk {chunk_info['name']}: {str(e)}"
|
975
1014
|
)
|
976
|
-
|
977
|
-
|
1015
|
+
return None
|
1016
|
+
|
1017
|
+
# Create tasks for all chunk uploads
|
1018
|
+
upload_tasks = [upload_chunk(chunk_info) for chunk_info in all_chunk_info]
|
1019
|
+
|
1020
|
+
# Wait for all uploads to complete
|
1021
|
+
completed_uploads = await asyncio.gather(*upload_tasks)
|
1022
|
+
|
1023
|
+
# Filter out failed uploads
|
1024
|
+
chunk_data = [upload for upload in completed_uploads if upload is not None]
|
978
1025
|
|
979
1026
|
# Add all chunk info to metadata
|
980
1027
|
metadata["chunks"] = chunk_data
|
@@ -1012,7 +1059,7 @@ class IPFSClient:
|
|
1012
1059
|
temp_dir: str = None,
|
1013
1060
|
max_retries: int = 3,
|
1014
1061
|
verbose: bool = True,
|
1015
|
-
) ->
|
1062
|
+
) -> Dict:
|
1016
1063
|
"""
|
1017
1064
|
Reconstruct a file from erasure-coded chunks using its metadata.
|
1018
1065
|
|
@@ -1024,7 +1071,7 @@ class IPFSClient:
|
|
1024
1071
|
verbose: Whether to print progress information
|
1025
1072
|
|
1026
1073
|
Returns:
|
1027
|
-
|
1074
|
+
Dict: containing file reconstruction info.
|
1028
1075
|
|
1029
1076
|
Raises:
|
1030
1077
|
ValueError: If reconstruction fails
|
@@ -1082,6 +1129,9 @@ class IPFSClient:
|
|
1082
1129
|
)
|
1083
1130
|
if is_encrypted:
|
1084
1131
|
print("Encrypted: Yes")
|
1132
|
+
print(
|
1133
|
+
f"Using parallel download with max {PARALLEL_ORIGINAL_CHUNKS} original chunks and {PARALLEL_EC_CHUNKS} chunk downloads concurrently"
|
1134
|
+
)
|
1085
1135
|
|
1086
1136
|
# Step 3: Group chunks by their original chunk index
|
1087
1137
|
chunks_by_original = {}
|
@@ -1091,136 +1141,157 @@ class IPFSClient:
|
|
1091
1141
|
chunks_by_original[orig_idx] = []
|
1092
1142
|
chunks_by_original[orig_idx].append(chunk)
|
1093
1143
|
|
1094
|
-
# Step 4:
|
1144
|
+
# Step 4: Process all original chunks in parallel
|
1095
1145
|
if verbose:
|
1096
1146
|
total_original_chunks = len(chunks_by_original)
|
1097
|
-
|
1147
|
+
total_chunks_needed = total_original_chunks * k
|
1098
1148
|
print(
|
1099
|
-
f"Downloading and reconstructing {
|
1149
|
+
f"Downloading and reconstructing {total_chunks_needed} chunks in parallel..."
|
1100
1150
|
)
|
1101
1151
|
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
f"
|
1112
|
-
|
1113
|
-
)
|
1152
|
+
# Create semaphores to limit concurrency
|
1153
|
+
encoded_chunks_semaphore = asyncio.Semaphore(PARALLEL_EC_CHUNKS)
|
1154
|
+
original_chunks_semaphore = asyncio.Semaphore(PARALLEL_ORIGINAL_CHUNKS)
|
1155
|
+
|
1156
|
+
# Process a single original chunk and its required downloads
|
1157
|
+
async def process_original_chunk(orig_idx, available_chunks):
|
1158
|
+
# Limit number of original chunks processing at once
|
1159
|
+
async with original_chunks_semaphore:
|
1160
|
+
if verbose:
|
1161
|
+
print(f"Processing original chunk {orig_idx}...")
|
1162
|
+
|
1163
|
+
if len(available_chunks) < k:
|
1164
|
+
raise ValueError(
|
1165
|
+
f"Not enough chunks available for original chunk {orig_idx}. "
|
1166
|
+
f"Need {k}, but only have {len(available_chunks)}."
|
1167
|
+
)
|
1114
1168
|
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
chunks_to_try = available_chunks.copy()
|
1169
|
+
# Try slightly more than k chunks (k+2) to handle some failures
|
1170
|
+
num_to_try = min(k + 2, len(available_chunks))
|
1171
|
+
chunks_to_try = random.sample(available_chunks, num_to_try)
|
1119
1172
|
|
1120
|
-
|
1121
|
-
|
1173
|
+
# Track downloaded chunks
|
1174
|
+
download_tasks = []
|
1122
1175
|
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
break
|
1176
|
+
# Start parallel downloads for chunks
|
1177
|
+
for chunk in chunks_to_try:
|
1178
|
+
chunk_path = os.path.join(temp_dir, f"{chunk['name']}")
|
1127
1179
|
|
1128
|
-
|
1129
|
-
try:
|
1130
|
-
# Extract the CID string from the chunk's cid dictionary
|
1180
|
+
# Extract CID
|
1131
1181
|
chunk_cid = (
|
1132
1182
|
chunk["cid"]["cid"]
|
1133
1183
|
if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
|
1134
1184
|
else chunk["cid"]
|
1135
1185
|
)
|
1136
|
-
await self.download_file(
|
1137
|
-
chunk_cid, chunk_path, max_retries=max_retries
|
1138
|
-
)
|
1139
|
-
chunks_downloaded += 1
|
1140
1186
|
|
1141
|
-
#
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1187
|
+
# Create download task
|
1188
|
+
async def download_chunk(cid, path, chunk_info):
|
1189
|
+
async with encoded_chunks_semaphore:
|
1190
|
+
try:
|
1191
|
+
await self.download_file(
|
1192
|
+
cid, path, max_retries=max_retries
|
1193
|
+
)
|
1194
|
+
|
1195
|
+
# Read chunk data
|
1196
|
+
with open(path, "rb") as f:
|
1197
|
+
share_data = f.read()
|
1198
|
+
|
1199
|
+
return {
|
1200
|
+
"success": True,
|
1201
|
+
"data": share_data,
|
1202
|
+
"share_idx": chunk_info["share_idx"],
|
1203
|
+
"name": chunk_info["name"],
|
1204
|
+
}
|
1205
|
+
except Exception as e:
|
1206
|
+
if verbose:
|
1207
|
+
print(
|
1208
|
+
f"Error downloading chunk {chunk_info['name']}: {str(e)}"
|
1209
|
+
)
|
1210
|
+
return {
|
1211
|
+
"success": False,
|
1212
|
+
"error": str(e),
|
1213
|
+
"name": chunk_info["name"],
|
1214
|
+
}
|
1215
|
+
|
1216
|
+
# Create task
|
1217
|
+
task = asyncio.create_task(
|
1218
|
+
download_chunk(chunk_cid, chunk_path, chunk)
|
1219
|
+
)
|
1220
|
+
download_tasks.append(task)
|
1221
|
+
|
1222
|
+
# Process downloads as they complete
|
1223
|
+
downloaded_shares = []
|
1224
|
+
share_indexes = []
|
1225
|
+
|
1226
|
+
for done_task in asyncio.as_completed(download_tasks):
|
1227
|
+
result = await done_task
|
1228
|
+
|
1229
|
+
if result["success"]:
|
1230
|
+
downloaded_shares.append(result["data"])
|
1231
|
+
share_indexes.append(result["share_idx"])
|
1232
|
+
|
1233
|
+
# Once we have k chunks, cancel remaining downloads
|
1234
|
+
if len(downloaded_shares) >= k:
|
1235
|
+
for task in download_tasks:
|
1236
|
+
if not task.done():
|
1237
|
+
task.cancel()
|
1238
|
+
break
|
1239
|
+
|
1240
|
+
# Check if we have enough chunks
|
1241
|
+
if len(downloaded_shares) < k:
|
1242
|
+
raise ValueError(
|
1243
|
+
f"Failed to download enough chunks for original chunk {orig_idx}. "
|
1244
|
+
f"Need {k}, but only downloaded {len(downloaded_shares)}."
|
1245
|
+
)
|
1153
1246
|
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
f"Need {k}, but only downloaded {len(downloaded_shares)}."
|
1247
|
+
# Reconstruct this chunk
|
1248
|
+
decoder = zfec.Decoder(k, m)
|
1249
|
+
reconstructed_data = decoder.decode(
|
1250
|
+
downloaded_shares, share_indexes
|
1159
1251
|
)
|
1160
1252
|
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1253
|
+
if not isinstance(reconstructed_data, list):
|
1254
|
+
raise TypeError(
|
1255
|
+
f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
|
1256
|
+
)
|
1164
1257
|
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1258
|
+
# Calculate the actual size of this original chunk
|
1259
|
+
is_last_chunk = orig_idx == max(chunks_by_original.keys())
|
1260
|
+
original_chunk_size = total_original_size - orig_idx * chunk_size
|
1261
|
+
if not is_last_chunk:
|
1262
|
+
original_chunk_size = min(chunk_size, original_chunk_size)
|
1263
|
+
|
1264
|
+
# Recombine the sub-blocks
|
1265
|
+
reconstructed_chunk = b""
|
1266
|
+
total_bytes = 0
|
1267
|
+
for sub_block in reconstructed_data:
|
1268
|
+
bytes_to_take = min(
|
1269
|
+
len(sub_block), original_chunk_size - total_bytes
|
1270
|
+
)
|
1271
|
+
if bytes_to_take <= 0:
|
1272
|
+
break
|
1170
1273
|
|
1171
|
-
|
1172
|
-
|
1173
|
-
is_last_chunk = orig_idx == max(chunks_by_original.keys())
|
1174
|
-
original_chunk_size = total_original_size - orig_idx * chunk_size
|
1175
|
-
if not is_last_chunk:
|
1176
|
-
original_chunk_size = min(chunk_size, original_chunk_size)
|
1177
|
-
|
1178
|
-
# Recombine the sub-blocks, respecting the original chunk size
|
1179
|
-
reconstructed_chunk = b""
|
1180
|
-
total_bytes = 0
|
1181
|
-
for sub_block in reconstructed_data:
|
1182
|
-
# Calculate how many bytes we should take from this sub-block
|
1183
|
-
bytes_to_take = min(
|
1184
|
-
len(sub_block), original_chunk_size - total_bytes
|
1185
|
-
)
|
1186
|
-
if bytes_to_take <= 0:
|
1187
|
-
break
|
1274
|
+
reconstructed_chunk += sub_block[:bytes_to_take]
|
1275
|
+
total_bytes += bytes_to_take
|
1188
1276
|
|
1189
|
-
reconstructed_chunk
|
1190
|
-
total_bytes += bytes_to_take
|
1277
|
+
return reconstructed_chunk
|
1191
1278
|
|
1192
|
-
|
1279
|
+
# Create tasks for all original chunks and process them in parallel
|
1280
|
+
chunk_tasks = []
|
1281
|
+
for orig_idx in sorted(chunks_by_original.keys()):
|
1282
|
+
chunk_tasks.append(
|
1283
|
+
process_original_chunk(orig_idx, chunks_by_original[orig_idx])
|
1284
|
+
)
|
1193
1285
|
|
1194
|
-
|
1195
|
-
|
1196
|
-
progress_pct = (orig_idx + 1) / total_original_chunks * 100
|
1197
|
-
print(
|
1198
|
-
f" Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
|
1199
|
-
)
|
1200
|
-
if (
|
1201
|
-
orig_idx == 0 or is_last_chunk
|
1202
|
-
): # Only show debug for first and last chunks to avoid spam
|
1203
|
-
print(f" Debug info for chunk {orig_idx}:")
|
1204
|
-
print(f" Original chunk size: {original_chunk_size} bytes")
|
1205
|
-
print(
|
1206
|
-
f" Reconstructed chunk size: {len(reconstructed_chunk)} bytes"
|
1207
|
-
)
|
1208
|
-
print(f" Share indexes used: {share_indexes}")
|
1209
|
-
print(f" Sub-blocks received: {len(reconstructed_data)}")
|
1286
|
+
# Wait for all chunks to be reconstructed
|
1287
|
+
reconstructed_chunks = await asyncio.gather(*chunk_tasks)
|
1210
1288
|
|
1211
1289
|
if verbose:
|
1212
1290
|
download_time = time.time() - start_time
|
1213
|
-
print(
|
1214
|
-
f"Downloaded {chunks_downloaded} chunks in {download_time:.2f} seconds"
|
1215
|
-
)
|
1216
|
-
if chunks_failed > 0:
|
1217
|
-
print(
|
1218
|
-
f"Failed to download {chunks_failed} chunks (not needed for reconstruction)"
|
1219
|
-
)
|
1291
|
+
print(f"Chunk reconstruction completed in {download_time:.2f} seconds")
|
1220
1292
|
|
1221
1293
|
# Step 5: Combine the reconstructed chunks into a file
|
1222
|
-
|
1223
|
-
print("Combining reconstructed chunks...")
|
1294
|
+
print("Combining reconstructed chunks...")
|
1224
1295
|
|
1225
1296
|
# Process chunks to remove padding correctly
|
1226
1297
|
processed_chunks = []
|
@@ -1293,7 +1364,10 @@ class IPFSClient:
|
|
1293
1364
|
print(f"Reconstruction complete in {total_time:.2f} seconds!")
|
1294
1365
|
print(f"File saved to: {output_file}")
|
1295
1366
|
|
1296
|
-
return
|
1367
|
+
return {
|
1368
|
+
"output_path": output_file,
|
1369
|
+
"size_bytes": size_processed,
|
1370
|
+
}
|
1297
1371
|
|
1298
1372
|
finally:
|
1299
1373
|
# Clean up temporary directory if we created it
|
@@ -1311,6 +1385,7 @@ class IPFSClient:
|
|
1311
1385
|
substrate_client=None,
|
1312
1386
|
max_retries: int = 3,
|
1313
1387
|
verbose: bool = True,
|
1388
|
+
progress_callback: Optional[Callable[[str, int, int], None]] = None,
|
1314
1389
|
) -> Dict[str, Any]:
|
1315
1390
|
"""
|
1316
1391
|
Erasure code a file, upload the chunks to IPFS, and store in the Hippius marketplace.
|
@@ -1327,6 +1402,8 @@ class IPFSClient:
|
|
1327
1402
|
substrate_client: SubstrateClient to use (or None to create one)
|
1328
1403
|
max_retries: Maximum number of retry attempts
|
1329
1404
|
verbose: Whether to print progress information
|
1405
|
+
progress_callback: Optional callback function for progress updates
|
1406
|
+
Function receives (stage_name, current, total)
|
1330
1407
|
|
1331
1408
|
Returns:
|
1332
1409
|
dict: Result including metadata CID and transaction hash
|
@@ -1344,6 +1421,7 @@ class IPFSClient:
|
|
1344
1421
|
encrypt=encrypt,
|
1345
1422
|
max_retries=max_retries,
|
1346
1423
|
verbose=verbose,
|
1424
|
+
progress_callback=progress_callback,
|
1347
1425
|
)
|
1348
1426
|
|
1349
1427
|
# Step 2: Create substrate client if we need it
|
@@ -1418,3 +1496,335 @@ class IPFSClient:
|
|
1418
1496
|
print(f"Error storing files in marketplace: {str(e)}")
|
1419
1497
|
# Return the metadata even if storage fails
|
1420
1498
|
return {"metadata": metadata, "metadata_cid": metadata_cid, "error": str(e)}
|
1499
|
+
|
1500
|
+
async def delete_file(
|
1501
|
+
self, cid: str, cancel_from_blockchain: bool = True
|
1502
|
+
) -> Dict[str, Any]:
|
1503
|
+
"""
|
1504
|
+
Delete a file from IPFS and optionally cancel its storage on the blockchain.
|
1505
|
+
|
1506
|
+
Args:
|
1507
|
+
cid: Content Identifier (CID) of the file to delete
|
1508
|
+
cancel_from_blockchain: Whether to also cancel the storage request from the blockchain
|
1509
|
+
|
1510
|
+
Returns:
|
1511
|
+
Dict containing the result of the operation
|
1512
|
+
"""
|
1513
|
+
result = {
|
1514
|
+
"cid": cid,
|
1515
|
+
"unpin_result": None,
|
1516
|
+
"blockchain_result": None,
|
1517
|
+
"timing": {
|
1518
|
+
"start_time": time.time(),
|
1519
|
+
"end_time": None,
|
1520
|
+
"duration_seconds": None,
|
1521
|
+
},
|
1522
|
+
}
|
1523
|
+
|
1524
|
+
# First, unpin from IPFS
|
1525
|
+
try:
|
1526
|
+
print(f"Unpinning file from IPFS: {cid}")
|
1527
|
+
try:
|
1528
|
+
# Try to check if file exists in IPFS before unpinning
|
1529
|
+
await self.exists(cid)
|
1530
|
+
except Exception as exists_e:
|
1531
|
+
print(f"ERROR: Error checking file existence: {exists_e}")
|
1532
|
+
|
1533
|
+
unpin_result = await self.client.unpin(cid)
|
1534
|
+
result["unpin_result"] = unpin_result
|
1535
|
+
print("Successfully unpinned from IPFS")
|
1536
|
+
except Exception as e:
|
1537
|
+
print(f"Warning: Failed to unpin file from IPFS: {e}")
|
1538
|
+
raise
|
1539
|
+
|
1540
|
+
# Then, if requested, cancel from blockchain
|
1541
|
+
if cancel_from_blockchain:
|
1542
|
+
try:
|
1543
|
+
# Create a substrate client
|
1544
|
+
print(f"DEBUG: Creating SubstrateClient for blockchain cancellation...")
|
1545
|
+
substrate_client = SubstrateClient()
|
1546
|
+
print(
|
1547
|
+
f"DEBUG: Substrate client created with URL: {substrate_client.url}"
|
1548
|
+
)
|
1549
|
+
print(f"DEBUG: Calling cancel_storage_request with CID: {cid}")
|
1550
|
+
|
1551
|
+
tx_hash = await substrate_client.cancel_storage_request(cid)
|
1552
|
+
print(f"DEBUG: Received transaction hash: {tx_hash}")
|
1553
|
+
|
1554
|
+
# Check the return value - special cases for when blockchain cancellation isn't available
|
1555
|
+
if tx_hash == "no-blockchain-cancellation-available":
|
1556
|
+
print(
|
1557
|
+
"Blockchain cancellation not available, but IPFS unpinning was successful"
|
1558
|
+
)
|
1559
|
+
result["blockchain_result"] = {
|
1560
|
+
"status": "not_available",
|
1561
|
+
"message": "Blockchain cancellation not available, but IPFS unpinning was successful",
|
1562
|
+
}
|
1563
|
+
elif tx_hash.startswith("ipfs-unpinned-only"):
|
1564
|
+
error_msg = tx_hash.replace("ipfs-unpinned-only-", "")
|
1565
|
+
print(
|
1566
|
+
f"IPFS unpinning successful, but blockchain cancellation failed: {error_msg}"
|
1567
|
+
)
|
1568
|
+
result["blockchain_result"] = {
|
1569
|
+
"status": "failed",
|
1570
|
+
"error": error_msg,
|
1571
|
+
"message": "IPFS unpinning successful, but blockchain cancellation failed",
|
1572
|
+
}
|
1573
|
+
else:
|
1574
|
+
# Standard successful transaction
|
1575
|
+
result["blockchain_result"] = {
|
1576
|
+
"transaction_hash": tx_hash,
|
1577
|
+
"status": "success",
|
1578
|
+
}
|
1579
|
+
print(f"Successfully canceled storage request from blockchain")
|
1580
|
+
print(
|
1581
|
+
f"DEBUG: Blockchain cancellation succeeded with transaction hash: {tx_hash}"
|
1582
|
+
)
|
1583
|
+
except Exception as e:
|
1584
|
+
print(f"Warning: Failed to cancel storage from blockchain: {e}")
|
1585
|
+
print(
|
1586
|
+
f"DEBUG: Blockchain cancellation exception: {type(e).__name__}: {str(e)}"
|
1587
|
+
)
|
1588
|
+
if hasattr(e, "__dict__"):
|
1589
|
+
print(f"DEBUG: Exception attributes: {e.__dict__}")
|
1590
|
+
result["blockchain_error"] = str(e)
|
1591
|
+
|
1592
|
+
# Calculate timing
|
1593
|
+
result["timing"]["end_time"] = time.time()
|
1594
|
+
result["timing"]["duration_seconds"] = (
|
1595
|
+
result["timing"]["end_time"] - result["timing"]["start_time"]
|
1596
|
+
)
|
1597
|
+
|
1598
|
+
return result
|
1599
|
+
|
1600
|
+
async def delete_ec_file(
|
1601
|
+
self,
|
1602
|
+
metadata_cid: str,
|
1603
|
+
cancel_from_blockchain: bool = True,
|
1604
|
+
parallel_limit: int = 20,
|
1605
|
+
) -> Dict[str, Any]:
|
1606
|
+
"""
|
1607
|
+
Delete an erasure-coded file, including all its chunks in parallel.
|
1608
|
+
|
1609
|
+
Args:
|
1610
|
+
metadata_cid: CID of the metadata file for the erasure-coded file
|
1611
|
+
cancel_from_blockchain: Whether to cancel storage from blockchain
|
1612
|
+
parallel_limit: Maximum number of concurrent deletion operations
|
1613
|
+
|
1614
|
+
Returns:
|
1615
|
+
Dict containing the result of the operation
|
1616
|
+
"""
|
1617
|
+
result = {
|
1618
|
+
"metadata_cid": metadata_cid,
|
1619
|
+
"deleted_chunks": [],
|
1620
|
+
"failed_chunks": [],
|
1621
|
+
"blockchain_result": None,
|
1622
|
+
"timing": {
|
1623
|
+
"start_time": time.time(),
|
1624
|
+
"end_time": None,
|
1625
|
+
"duration_seconds": None,
|
1626
|
+
},
|
1627
|
+
}
|
1628
|
+
|
1629
|
+
# Track deletions for reporting
|
1630
|
+
deleted_chunks_lock = asyncio.Lock()
|
1631
|
+
failed_chunks_lock = asyncio.Lock()
|
1632
|
+
|
1633
|
+
# First, get the metadata to find all chunks
|
1634
|
+
try:
|
1635
|
+
print(f"Downloading metadata file (CID: {metadata_cid})...")
|
1636
|
+
start_time = time.time()
|
1637
|
+
metadata_content = await self.client.cat(metadata_cid)
|
1638
|
+
metadata = json.loads(metadata_content.decode("utf-8"))
|
1639
|
+
metadata_download_time = time.time() - start_time
|
1640
|
+
|
1641
|
+
print(f"Metadata downloaded in {metadata_download_time:.2f} seconds")
|
1642
|
+
|
1643
|
+
# Extract chunk CIDs
|
1644
|
+
chunks = []
|
1645
|
+
total_chunks = 0
|
1646
|
+
|
1647
|
+
for chunk_data in metadata.get("chunks", []):
|
1648
|
+
for ec_chunk in chunk_data.get("ec_chunks", []):
|
1649
|
+
chunk_cid = ec_chunk.get("cid")
|
1650
|
+
if chunk_cid:
|
1651
|
+
chunks.append(chunk_cid)
|
1652
|
+
total_chunks += 1
|
1653
|
+
|
1654
|
+
print(f"Found {total_chunks} chunks to delete")
|
1655
|
+
|
1656
|
+
# Create a semaphore to limit concurrent operations
|
1657
|
+
sem = asyncio.Semaphore(parallel_limit)
|
1658
|
+
|
1659
|
+
# Define the chunk deletion function
|
1660
|
+
async def delete_chunk(chunk_cid):
|
1661
|
+
async with sem:
|
1662
|
+
try:
|
1663
|
+
print(f"Unpinning chunk: {chunk_cid}")
|
1664
|
+
await self.client.unpin(chunk_cid)
|
1665
|
+
|
1666
|
+
# Record success
|
1667
|
+
async with deleted_chunks_lock:
|
1668
|
+
result["deleted_chunks"].append(chunk_cid)
|
1669
|
+
|
1670
|
+
# Cancel from blockchain if requested
|
1671
|
+
if cancel_from_blockchain:
|
1672
|
+
try:
|
1673
|
+
substrate_client = SubstrateClient()
|
1674
|
+
tx_hash = await substrate_client.cancel_storage_request(
|
1675
|
+
chunk_cid
|
1676
|
+
)
|
1677
|
+
|
1678
|
+
# Add blockchain result
|
1679
|
+
if "chunk_results" not in result["blockchain_result"]:
|
1680
|
+
result["blockchain_result"] = {}
|
1681
|
+
result["blockchain_result"]["chunk_results"] = []
|
1682
|
+
|
1683
|
+
# Handle special return values from cancel_storage_request
|
1684
|
+
if tx_hash == "no-blockchain-cancellation-available":
|
1685
|
+
result["blockchain_result"]["chunk_results"].append(
|
1686
|
+
{
|
1687
|
+
"cid": chunk_cid,
|
1688
|
+
"status": "not_available",
|
1689
|
+
"message": "Blockchain cancellation not available",
|
1690
|
+
}
|
1691
|
+
)
|
1692
|
+
elif tx_hash.startswith("ipfs-unpinned-only"):
|
1693
|
+
error_msg = tx_hash.replace(
|
1694
|
+
"ipfs-unpinned-only-", ""
|
1695
|
+
)
|
1696
|
+
result["blockchain_result"]["chunk_results"].append(
|
1697
|
+
{
|
1698
|
+
"cid": chunk_cid,
|
1699
|
+
"status": "failed",
|
1700
|
+
"error": error_msg,
|
1701
|
+
}
|
1702
|
+
)
|
1703
|
+
else:
|
1704
|
+
# Standard successful transaction
|
1705
|
+
result["blockchain_result"]["chunk_results"].append(
|
1706
|
+
{
|
1707
|
+
"cid": chunk_cid,
|
1708
|
+
"transaction_hash": tx_hash,
|
1709
|
+
"status": "success",
|
1710
|
+
}
|
1711
|
+
)
|
1712
|
+
except Exception as e:
|
1713
|
+
print(
|
1714
|
+
f"Warning: Failed to cancel blockchain storage for chunk {chunk_cid}: {e}"
|
1715
|
+
)
|
1716
|
+
|
1717
|
+
if "chunk_results" not in result["blockchain_result"]:
|
1718
|
+
result["blockchain_result"] = {}
|
1719
|
+
result["blockchain_result"]["chunk_results"] = []
|
1720
|
+
|
1721
|
+
result["blockchain_result"]["chunk_results"].append(
|
1722
|
+
{
|
1723
|
+
"cid": chunk_cid,
|
1724
|
+
"error": str(e),
|
1725
|
+
"status": "failed",
|
1726
|
+
}
|
1727
|
+
)
|
1728
|
+
|
1729
|
+
return True
|
1730
|
+
except Exception as e:
|
1731
|
+
error_msg = f"Failed to delete chunk {chunk_cid}: {e}"
|
1732
|
+
print(f"Warning: {error_msg}")
|
1733
|
+
|
1734
|
+
# Record failure
|
1735
|
+
async with failed_chunks_lock:
|
1736
|
+
result["failed_chunks"].append(
|
1737
|
+
{"cid": chunk_cid, "error": str(e)}
|
1738
|
+
)
|
1739
|
+
|
1740
|
+
return False
|
1741
|
+
|
1742
|
+
# Start deleting chunks in parallel
|
1743
|
+
print(
|
1744
|
+
f"Starting parallel deletion of {total_chunks} chunks with max {parallel_limit} concurrent operations"
|
1745
|
+
)
|
1746
|
+
delete_tasks = [delete_chunk(cid) for cid in chunks]
|
1747
|
+
await asyncio.gather(*delete_tasks)
|
1748
|
+
|
1749
|
+
# Delete the metadata file itself
|
1750
|
+
print(f"Unpinning metadata file: {metadata_cid}")
|
1751
|
+
response = await self.client.unpin(metadata_cid)
|
1752
|
+
|
1753
|
+
print(">>>", response)
|
1754
|
+
raise SystemExit
|
1755
|
+
|
1756
|
+
# Cancel metadata from blockchain if requested
|
1757
|
+
if cancel_from_blockchain:
|
1758
|
+
try:
|
1759
|
+
print(f"Canceling blockchain storage request for metadata file...")
|
1760
|
+
substrate_client = SubstrateClient()
|
1761
|
+
tx_hash = await substrate_client.cancel_storage_request(
|
1762
|
+
metadata_cid
|
1763
|
+
)
|
1764
|
+
|
1765
|
+
# Handle special return values from cancel_storage_request
|
1766
|
+
if tx_hash == "no-blockchain-cancellation-available":
|
1767
|
+
print(
|
1768
|
+
"Blockchain cancellation not available for metadata, but IPFS unpinning was successful"
|
1769
|
+
)
|
1770
|
+
result["blockchain_result"] = {
|
1771
|
+
"status": "not_available",
|
1772
|
+
"message": "Blockchain cancellation not available, but IPFS unpinning was successful",
|
1773
|
+
}
|
1774
|
+
elif tx_hash.startswith("ipfs-unpinned-only"):
|
1775
|
+
error_msg = tx_hash.replace("ipfs-unpinned-only-", "")
|
1776
|
+
print(
|
1777
|
+
f"IPFS unpinning successful, but blockchain cancellation failed for metadata: {error_msg}"
|
1778
|
+
)
|
1779
|
+
result["blockchain_result"] = {
|
1780
|
+
"status": "failed",
|
1781
|
+
"error": error_msg,
|
1782
|
+
"message": "IPFS unpinning successful, but blockchain cancellation failed",
|
1783
|
+
}
|
1784
|
+
else:
|
1785
|
+
# Standard successful transaction
|
1786
|
+
result["blockchain_result"] = {
|
1787
|
+
"metadata_transaction_hash": tx_hash,
|
1788
|
+
"status": "success",
|
1789
|
+
}
|
1790
|
+
print(
|
1791
|
+
f"Successfully canceled blockchain storage for metadata file"
|
1792
|
+
)
|
1793
|
+
except Exception as e:
|
1794
|
+
print(
|
1795
|
+
f"Warning: Failed to cancel blockchain storage for metadata file: {e}"
|
1796
|
+
)
|
1797
|
+
|
1798
|
+
if not result["blockchain_result"]:
|
1799
|
+
result["blockchain_result"] = {}
|
1800
|
+
|
1801
|
+
result["blockchain_result"]["metadata_error"] = str(e)
|
1802
|
+
result["blockchain_result"]["status"] = "failed"
|
1803
|
+
|
1804
|
+
# Calculate and record timing information
|
1805
|
+
end_time = time.time()
|
1806
|
+
duration = end_time - result["timing"]["start_time"]
|
1807
|
+
|
1808
|
+
result["timing"]["end_time"] = end_time
|
1809
|
+
result["timing"]["duration_seconds"] = duration
|
1810
|
+
|
1811
|
+
deleted_count = len(result["deleted_chunks"])
|
1812
|
+
failed_count = len(result["failed_chunks"])
|
1813
|
+
|
1814
|
+
print(f"Deletion complete in {duration:.2f} seconds!")
|
1815
|
+
print(f"Successfully deleted: {deleted_count}/{total_chunks} chunks")
|
1816
|
+
|
1817
|
+
if failed_count > 0:
|
1818
|
+
print(f"Failed to delete: {failed_count}/{total_chunks} chunks")
|
1819
|
+
|
1820
|
+
return result
|
1821
|
+
except Exception as e:
|
1822
|
+
# Record end time even if there was an error
|
1823
|
+
result["timing"]["end_time"] = time.time()
|
1824
|
+
result["timing"]["duration_seconds"] = (
|
1825
|
+
result["timing"]["end_time"] - result["timing"]["start_time"]
|
1826
|
+
)
|
1827
|
+
|
1828
|
+
error_msg = f"Error deleting erasure-coded file: {e}"
|
1829
|
+
print(f"Error: {error_msg}")
|
1830
|
+
raise RuntimeError(error_msg)
|