hippius 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hippius-0.2.0.dist-info → hippius-0.2.2.dist-info}/METADATA +2 -1
- hippius-0.2.2.dist-info/RECORD +12 -0
- hippius_sdk/__init__.py +1 -1
- hippius_sdk/cli.py +414 -43
- hippius_sdk/ipfs.py +247 -89
- hippius_sdk/substrate.py +468 -10
- hippius-0.2.0.dist-info/RECORD +0 -12
- {hippius-0.2.0.dist-info → hippius-0.2.2.dist-info}/WHEEL +0 -0
- {hippius-0.2.0.dist-info → hippius-0.2.2.dist-info}/entry_points.txt +0 -0
hippius_sdk/ipfs.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
"""
|
2
2
|
IPFS operations for the Hippius SDK.
|
3
3
|
"""
|
4
|
-
|
4
|
+
import asyncio
|
5
5
|
import hashlib
|
6
6
|
import json
|
7
7
|
import os
|
8
|
+
import random
|
8
9
|
import shutil
|
9
10
|
import tempfile
|
10
11
|
import time
|
@@ -36,6 +37,12 @@ try:
|
|
36
37
|
except ImportError:
|
37
38
|
ERASURE_CODING_AVAILABLE = False
|
38
39
|
|
40
|
+
# Configuration constants
|
41
|
+
PARALLEL_EC_CHUNKS = 20 # Maximum number of concurrent chunk downloads
|
42
|
+
PARALLEL_ORIGINAL_CHUNKS = (
|
43
|
+
15 # Maximum number of original chunks to process in parallel
|
44
|
+
)
|
45
|
+
|
39
46
|
|
40
47
|
class IPFSClient:
|
41
48
|
"""Client for interacting with IPFS."""
|
@@ -650,6 +657,44 @@ class IPFSClient:
|
|
650
657
|
"gateway_url": gateway_url if exists else None,
|
651
658
|
}
|
652
659
|
|
660
|
+
async def publish_global(self, cid: str) -> Dict[str, Any]:
|
661
|
+
"""
|
662
|
+
Publish a CID to the global IPFS network, ensuring it's widely available.
|
663
|
+
|
664
|
+
This makes the content available beyond the local IPFS node by pinning
|
665
|
+
it to multiple public services.
|
666
|
+
|
667
|
+
Args:
|
668
|
+
cid: Content Identifier (CID) to publish globally
|
669
|
+
|
670
|
+
Returns:
|
671
|
+
Dict[str, Any]: Dictionary containing:
|
672
|
+
- published: Boolean indicating if publishing was successful
|
673
|
+
- cid: The CID that was published
|
674
|
+
- formatted_cid: Formatted version of the CID
|
675
|
+
- message: Status message
|
676
|
+
"""
|
677
|
+
# First ensure it's pinned locally
|
678
|
+
pin_result = await self.pin(cid)
|
679
|
+
|
680
|
+
if not pin_result.get("success", False):
|
681
|
+
return {
|
682
|
+
"published": False,
|
683
|
+
"cid": cid,
|
684
|
+
"formatted_cid": self.format_cid(cid),
|
685
|
+
"message": f"Failed to pin content locally: {pin_result.get('message', 'Unknown error')}",
|
686
|
+
}
|
687
|
+
|
688
|
+
# Then request pinning on public services
|
689
|
+
# This implementation focuses on making the content available through
|
690
|
+
# the default gateway, which provides sufficient global access
|
691
|
+
return {
|
692
|
+
"published": True,
|
693
|
+
"cid": cid,
|
694
|
+
"formatted_cid": self.format_cid(cid),
|
695
|
+
"message": "Content published to global IPFS network",
|
696
|
+
}
|
697
|
+
|
653
698
|
async def pin(self, cid: str) -> Dict[str, Any]:
|
654
699
|
"""
|
655
700
|
Pin a CID to IPFS to keep it available.
|
@@ -895,14 +940,19 @@ class IPFSClient:
|
|
895
940
|
|
896
941
|
# Step 4: Upload all chunks to IPFS
|
897
942
|
if verbose:
|
898
|
-
print(
|
943
|
+
print(
|
944
|
+
f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS in parallel..."
|
945
|
+
)
|
899
946
|
|
900
947
|
chunk_uploads = 0
|
901
948
|
chunk_data = []
|
949
|
+
batch_size = 20 # Number of concurrent uploads
|
902
950
|
|
903
951
|
# Create a temporary directory for the chunks
|
904
952
|
with tempfile.TemporaryDirectory() as temp_dir:
|
905
|
-
#
|
953
|
+
# Prepare all chunks for upload
|
954
|
+
all_chunk_info = []
|
955
|
+
|
906
956
|
for original_idx, encoded_chunks in enumerate(all_encoded_chunks):
|
907
957
|
for share_idx, share_data in enumerate(encoded_chunks):
|
908
958
|
# Create a name for this chunk that includes needed info
|
@@ -913,29 +963,48 @@ class IPFSClient:
|
|
913
963
|
with open(chunk_path, "wb") as f:
|
914
964
|
f.write(share_data)
|
915
965
|
|
916
|
-
#
|
917
|
-
|
918
|
-
|
919
|
-
chunk_path, max_retries=max_retries
|
920
|
-
)
|
921
|
-
|
922
|
-
# Store info about this chunk
|
923
|
-
chunk_info = {
|
966
|
+
# Store info for async upload
|
967
|
+
all_chunk_info.append(
|
968
|
+
{
|
924
969
|
"name": chunk_name,
|
925
|
-
"
|
970
|
+
"path": chunk_path,
|
926
971
|
"original_chunk": original_idx,
|
927
972
|
"share_idx": share_idx,
|
928
973
|
"size": len(share_data),
|
929
974
|
}
|
930
|
-
|
975
|
+
)
|
976
|
+
|
977
|
+
# Create a semaphore to limit concurrent uploads
|
978
|
+
semaphore = asyncio.Semaphore(batch_size)
|
979
|
+
|
980
|
+
# Define upload task for a single chunk
|
981
|
+
async def upload_chunk(chunk_info):
|
982
|
+
nonlocal chunk_uploads
|
931
983
|
|
984
|
+
async with semaphore:
|
985
|
+
try:
|
986
|
+
chunk_cid = await self.upload_file(
|
987
|
+
chunk_info["path"], max_retries=max_retries
|
988
|
+
)
|
989
|
+
chunk_info["cid"] = chunk_cid
|
932
990
|
chunk_uploads += 1
|
933
991
|
if verbose and chunk_uploads % 10 == 0:
|
934
992
|
print(
|
935
993
|
f" Uploaded {chunk_uploads}/{len(chunks) * m} chunks"
|
936
994
|
)
|
995
|
+
return chunk_info
|
937
996
|
except Exception as e:
|
938
|
-
print(f"Error uploading chunk {
|
997
|
+
print(f"Error uploading chunk {chunk_info['name']}: {str(e)}")
|
998
|
+
return None
|
999
|
+
|
1000
|
+
# Create tasks for all chunk uploads
|
1001
|
+
upload_tasks = [upload_chunk(chunk_info) for chunk_info in all_chunk_info]
|
1002
|
+
|
1003
|
+
# Wait for all uploads to complete
|
1004
|
+
completed_uploads = await asyncio.gather(*upload_tasks)
|
1005
|
+
|
1006
|
+
# Filter out failed uploads
|
1007
|
+
chunk_data = [upload for upload in completed_uploads if upload is not None]
|
939
1008
|
|
940
1009
|
# Add all chunk info to metadata
|
941
1010
|
metadata["chunks"] = chunk_data
|
@@ -1032,6 +1101,7 @@ class IPFSClient:
|
|
1032
1101
|
m = erasure_params["m"]
|
1033
1102
|
is_encrypted = erasure_params.get("encrypted", False)
|
1034
1103
|
chunk_size = erasure_params.get("chunk_size", 1024 * 1024)
|
1104
|
+
total_original_size = original_file["size"]
|
1035
1105
|
|
1036
1106
|
if verbose:
|
1037
1107
|
print(
|
@@ -1042,6 +1112,9 @@ class IPFSClient:
|
|
1042
1112
|
)
|
1043
1113
|
if is_encrypted:
|
1044
1114
|
print("Encrypted: Yes")
|
1115
|
+
print(
|
1116
|
+
f"Using parallel download with max {PARALLEL_ORIGINAL_CHUNKS} original chunks and {PARALLEL_EC_CHUNKS} chunk downloads concurrently"
|
1117
|
+
)
|
1045
1118
|
|
1046
1119
|
# Step 3: Group chunks by their original chunk index
|
1047
1120
|
chunks_by_original = {}
|
@@ -1051,109 +1124,194 @@ class IPFSClient:
|
|
1051
1124
|
chunks_by_original[orig_idx] = []
|
1052
1125
|
chunks_by_original[orig_idx].append(chunk)
|
1053
1126
|
|
1054
|
-
# Step 4:
|
1127
|
+
# Step 4: Process all original chunks in parallel
|
1055
1128
|
if verbose:
|
1056
1129
|
total_original_chunks = len(chunks_by_original)
|
1057
|
-
|
1130
|
+
total_chunks_needed = total_original_chunks * k
|
1058
1131
|
print(
|
1059
|
-
f"Downloading and reconstructing {
|
1132
|
+
f"Downloading and reconstructing {total_chunks_needed} chunks in parallel..."
|
1060
1133
|
)
|
1061
1134
|
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1135
|
+
# Create semaphores to limit concurrency
|
1136
|
+
encoded_chunks_semaphore = asyncio.Semaphore(PARALLEL_EC_CHUNKS)
|
1137
|
+
original_chunks_semaphore = asyncio.Semaphore(PARALLEL_ORIGINAL_CHUNKS)
|
1138
|
+
|
1139
|
+
# Process a single original chunk and its required downloads
|
1140
|
+
async def process_original_chunk(orig_idx, available_chunks):
|
1141
|
+
# Limit number of original chunks processing at once
|
1142
|
+
async with original_chunks_semaphore:
|
1143
|
+
if verbose:
|
1144
|
+
print(f"Processing original chunk {orig_idx}...")
|
1145
|
+
|
1146
|
+
if len(available_chunks) < k:
|
1147
|
+
raise ValueError(
|
1148
|
+
f"Not enough chunks available for original chunk {orig_idx}. "
|
1149
|
+
f"Need {k}, but only have {len(available_chunks)}."
|
1150
|
+
)
|
1065
1151
|
|
1066
|
-
|
1067
|
-
|
1152
|
+
# Try slightly more than k chunks (k+2) to handle some failures
|
1153
|
+
num_to_try = min(k + 2, len(available_chunks))
|
1154
|
+
chunks_to_try = random.sample(available_chunks, num_to_try)
|
1068
1155
|
|
1069
|
-
|
1070
|
-
|
1071
|
-
f"Not enough chunks available for original chunk {orig_idx}. "
|
1072
|
-
f"Need {k}, but only have {len(available_chunks)}."
|
1073
|
-
)
|
1074
|
-
|
1075
|
-
# We only need k chunks, so take the first k
|
1076
|
-
chunks_to_download = available_chunks[:k]
|
1156
|
+
# Track downloaded chunks
|
1157
|
+
download_tasks = []
|
1077
1158
|
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1159
|
+
# Start parallel downloads for chunks
|
1160
|
+
for chunk in chunks_to_try:
|
1161
|
+
chunk_path = os.path.join(temp_dir, f"{chunk['name']}")
|
1081
1162
|
|
1082
|
-
|
1083
|
-
chunk_path = os.path.join(temp_dir, chunk["name"])
|
1084
|
-
try:
|
1085
|
-
# Extract the CID string from the chunk's cid dictionary
|
1163
|
+
# Extract CID
|
1086
1164
|
chunk_cid = (
|
1087
1165
|
chunk["cid"]["cid"]
|
1088
1166
|
if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
|
1089
1167
|
else chunk["cid"]
|
1090
1168
|
)
|
1091
|
-
await self.download_file(
|
1092
|
-
chunk_cid, chunk_path, max_retries=max_retries
|
1093
|
-
)
|
1094
|
-
chunks_downloaded += 1
|
1095
1169
|
|
1096
|
-
#
|
1097
|
-
|
1098
|
-
|
1170
|
+
# Create download task
|
1171
|
+
async def download_chunk(cid, path, chunk_info):
|
1172
|
+
async with encoded_chunks_semaphore:
|
1173
|
+
try:
|
1174
|
+
await self.download_file(
|
1175
|
+
cid, path, max_retries=max_retries
|
1176
|
+
)
|
1177
|
+
|
1178
|
+
# Read chunk data
|
1179
|
+
with open(path, "rb") as f:
|
1180
|
+
share_data = f.read()
|
1181
|
+
|
1182
|
+
return {
|
1183
|
+
"success": True,
|
1184
|
+
"data": share_data,
|
1185
|
+
"share_idx": chunk_info["share_idx"],
|
1186
|
+
"name": chunk_info["name"],
|
1187
|
+
}
|
1188
|
+
except Exception as e:
|
1189
|
+
if verbose:
|
1190
|
+
print(
|
1191
|
+
f"Error downloading chunk {chunk_info['name']}: {str(e)}"
|
1192
|
+
)
|
1193
|
+
return {
|
1194
|
+
"success": False,
|
1195
|
+
"error": str(e),
|
1196
|
+
"name": chunk_info["name"],
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
# Create task
|
1200
|
+
task = asyncio.create_task(
|
1201
|
+
download_chunk(chunk_cid, chunk_path, chunk)
|
1202
|
+
)
|
1203
|
+
download_tasks.append(task)
|
1204
|
+
|
1205
|
+
# Process downloads as they complete
|
1206
|
+
downloaded_shares = []
|
1207
|
+
share_indexes = []
|
1208
|
+
|
1209
|
+
for done_task in asyncio.as_completed(download_tasks):
|
1210
|
+
result = await done_task
|
1211
|
+
|
1212
|
+
if result["success"]:
|
1213
|
+
downloaded_shares.append(result["data"])
|
1214
|
+
share_indexes.append(result["share_idx"])
|
1215
|
+
|
1216
|
+
# Once we have k chunks, cancel remaining downloads
|
1217
|
+
if len(downloaded_shares) >= k:
|
1218
|
+
for task in download_tasks:
|
1219
|
+
if not task.done():
|
1220
|
+
task.cancel()
|
1221
|
+
break
|
1222
|
+
|
1223
|
+
# Check if we have enough chunks
|
1224
|
+
if len(downloaded_shares) < k:
|
1225
|
+
raise ValueError(
|
1226
|
+
f"Failed to download enough chunks for original chunk {orig_idx}. "
|
1227
|
+
f"Need {k}, but only downloaded {len(downloaded_shares)}."
|
1228
|
+
)
|
1099
1229
|
|
1100
|
-
|
1101
|
-
|
1230
|
+
# Reconstruct this chunk
|
1231
|
+
decoder = zfec.Decoder(k, m)
|
1232
|
+
reconstructed_data = decoder.decode(
|
1233
|
+
downloaded_shares, share_indexes
|
1234
|
+
)
|
1102
1235
|
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
# Continue to the next chunk
|
1236
|
+
if not isinstance(reconstructed_data, list):
|
1237
|
+
raise TypeError(
|
1238
|
+
f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
|
1239
|
+
)
|
1108
1240
|
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1241
|
+
# Calculate the actual size of this original chunk
|
1242
|
+
is_last_chunk = orig_idx == max(chunks_by_original.keys())
|
1243
|
+
original_chunk_size = total_original_size - orig_idx * chunk_size
|
1244
|
+
if not is_last_chunk:
|
1245
|
+
original_chunk_size = min(chunk_size, original_chunk_size)
|
1246
|
+
|
1247
|
+
# Recombine the sub-blocks
|
1248
|
+
reconstructed_chunk = b""
|
1249
|
+
total_bytes = 0
|
1250
|
+
for sub_block in reconstructed_data:
|
1251
|
+
bytes_to_take = min(
|
1252
|
+
len(sub_block), original_chunk_size - total_bytes
|
1253
|
+
)
|
1254
|
+
if bytes_to_take <= 0:
|
1255
|
+
break
|
1115
1256
|
|
1116
|
-
|
1117
|
-
|
1118
|
-
reconstructed_data = decoder.decode(downloaded_shares, share_indexes)
|
1257
|
+
reconstructed_chunk += sub_block[:bytes_to_take]
|
1258
|
+
total_bytes += bytes_to_take
|
1119
1259
|
|
1120
|
-
|
1121
|
-
if isinstance(reconstructed_data, list):
|
1122
|
-
# Combine the sub-blocks back into a single chunk
|
1123
|
-
reconstructed_chunk = b"".join(reconstructed_data)
|
1124
|
-
else:
|
1125
|
-
# The simple case where we didn't use sub-blocks
|
1126
|
-
reconstructed_chunk = reconstructed_data
|
1260
|
+
return reconstructed_chunk
|
1127
1261
|
|
1128
|
-
|
1262
|
+
# Create tasks for all original chunks and process them in parallel
|
1263
|
+
chunk_tasks = []
|
1264
|
+
for orig_idx in sorted(chunks_by_original.keys()):
|
1265
|
+
chunk_tasks.append(
|
1266
|
+
process_original_chunk(orig_idx, chunks_by_original[orig_idx])
|
1267
|
+
)
|
1129
1268
|
|
1130
|
-
|
1131
|
-
|
1132
|
-
progress_pct = (orig_idx + 1) / total_original_chunks * 100
|
1133
|
-
print(
|
1134
|
-
f" Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
|
1135
|
-
)
|
1269
|
+
# Wait for all chunks to be reconstructed
|
1270
|
+
reconstructed_chunks = await asyncio.gather(*chunk_tasks)
|
1136
1271
|
|
1137
1272
|
if verbose:
|
1138
1273
|
download_time = time.time() - start_time
|
1139
|
-
print(
|
1140
|
-
f"Downloaded {chunks_downloaded} chunks in {download_time:.2f} seconds"
|
1141
|
-
)
|
1142
|
-
if chunks_failed > 0:
|
1143
|
-
print(
|
1144
|
-
f"Failed to download {chunks_failed} chunks (not needed for reconstruction)"
|
1145
|
-
)
|
1274
|
+
print(f"Chunk reconstruction completed in {download_time:.2f} seconds")
|
1146
1275
|
|
1147
1276
|
# Step 5: Combine the reconstructed chunks into a file
|
1148
|
-
|
1149
|
-
|
1277
|
+
print("Combining reconstructed chunks...")
|
1278
|
+
|
1279
|
+
# Process chunks to remove padding correctly
|
1280
|
+
processed_chunks = []
|
1281
|
+
size_processed = 0
|
1282
|
+
|
1283
|
+
for i, chunk in enumerate(reconstructed_chunks):
|
1284
|
+
# For all chunks except the last one, use full chunk size
|
1285
|
+
if i < len(reconstructed_chunks) - 1:
|
1286
|
+
# Calculate how much of this chunk should be used (handle full chunks)
|
1287
|
+
chunk_valid_bytes = min(
|
1288
|
+
chunk_size, total_original_size - size_processed
|
1289
|
+
)
|
1290
|
+
processed_chunks.append(chunk[:chunk_valid_bytes])
|
1291
|
+
size_processed += chunk_valid_bytes
|
1292
|
+
else:
|
1293
|
+
# For the last chunk, calculate the remaining bytes needed
|
1294
|
+
remaining_bytes = total_original_size - size_processed
|
1295
|
+
processed_chunks.append(chunk[:remaining_bytes])
|
1296
|
+
size_processed += remaining_bytes
|
1150
1297
|
|
1151
|
-
# Concatenate all chunks
|
1152
|
-
file_data = b"".join(
|
1298
|
+
# Concatenate all processed chunks
|
1299
|
+
file_data = b"".join(processed_chunks)
|
1153
1300
|
|
1154
|
-
#
|
1155
|
-
if original_file["size"]
|
1156
|
-
|
1301
|
+
# Double-check the final size matches the original
|
1302
|
+
if len(file_data) != original_file["size"]:
|
1303
|
+
print(
|
1304
|
+
f"Warning: Reconstructed size ({len(file_data)}) differs from original ({original_file['size']})"
|
1305
|
+
)
|
1306
|
+
# Ensure we have exactly the right size
|
1307
|
+
if len(file_data) > original_file["size"]:
|
1308
|
+
file_data = file_data[: original_file["size"]]
|
1309
|
+
else:
|
1310
|
+
# If we're short, pad with zeros (shouldn't happen with proper reconstruction)
|
1311
|
+
print(
|
1312
|
+
"Warning: Reconstructed file is smaller than original, padding with zeros"
|
1313
|
+
)
|
1314
|
+
file_data += b"\0" * (original_file["size"] - len(file_data))
|
1157
1315
|
|
1158
1316
|
# Step 6: Decrypt if necessary
|
1159
1317
|
if is_encrypted:
|
@@ -1181,7 +1339,7 @@ class IPFSClient:
|
|
1181
1339
|
print("Warning: File hash mismatch!")
|
1182
1340
|
print(f" Expected: {expected_hash}")
|
1183
1341
|
print(f" Actual: {actual_hash}")
|
1184
|
-
|
1342
|
+
else:
|
1185
1343
|
print("Hash verification successful!")
|
1186
1344
|
|
1187
1345
|
total_time = time.time() - start_time
|