hippius 0.2.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hippius-0.2.1 → hippius-0.2.2}/PKG-INFO +1 -1
- {hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/__init__.py +1 -1
- {hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/cli.py +6 -6
- {hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/ipfs.py +170 -116
- {hippius-0.2.1 → hippius-0.2.2}/pyproject.toml +1 -1
- {hippius-0.2.1 → hippius-0.2.2}/README.md +0 -0
- {hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/client.py +0 -0
- {hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/config.py +0 -0
- {hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/ipfs_core.py +0 -0
- {hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/substrate.py +0 -0
- {hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/utils.py +0 -0
@@ -790,7 +790,7 @@ async def handle_erasure_code(
|
|
790
790
|
chunk_size,
|
791
791
|
miner_ids,
|
792
792
|
encrypt=None,
|
793
|
-
publish=
|
793
|
+
publish=True,
|
794
794
|
verbose=True,
|
795
795
|
):
|
796
796
|
"""Handle the erasure-code command"""
|
@@ -2086,8 +2086,8 @@ examples:
|
|
2086
2086
|
# Erasure code a file (Reed-Solomon)
|
2087
2087
|
hippius erasure-code large_file.mp4 --k 3 --m 5
|
2088
2088
|
|
2089
|
-
# Erasure code
|
2090
|
-
hippius erasure-code large_file.avi --publish
|
2089
|
+
# Erasure code without publishing to global IPFS network
|
2090
|
+
hippius erasure-code large_file.avi --no-publish
|
2091
2091
|
|
2092
2092
|
# Reconstruct an erasure-coded file
|
2093
2093
|
hippius reconstruct QmMetadataHash reconstructed_file.mp4
|
@@ -2326,9 +2326,9 @@ examples:
|
|
2326
2326
|
"--no-encrypt", action="store_true", help="Do not encrypt the file"
|
2327
2327
|
)
|
2328
2328
|
erasure_code_parser.add_argument(
|
2329
|
-
"--publish",
|
2329
|
+
"--no-publish",
|
2330
2330
|
action="store_true",
|
2331
|
-
help="
|
2331
|
+
help="Do not upload and publish the erasure-coded file to the global IPFS network",
|
2332
2332
|
)
|
2333
2333
|
erasure_code_parser.add_argument(
|
2334
2334
|
"--verbose", action="store_true", help="Enable verbose output", default=True
|
@@ -2687,7 +2687,7 @@ examples:
|
|
2687
2687
|
args.chunk_size,
|
2688
2688
|
miner_ids,
|
2689
2689
|
encrypt=args.encrypt,
|
2690
|
-
publish=args.
|
2690
|
+
publish=not args.no_publish,
|
2691
2691
|
verbose=args.verbose,
|
2692
2692
|
)
|
2693
2693
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
IPFS operations for the Hippius SDK.
|
3
3
|
"""
|
4
|
-
|
4
|
+
import asyncio
|
5
5
|
import hashlib
|
6
6
|
import json
|
7
7
|
import os
|
@@ -37,6 +37,12 @@ try:
|
|
37
37
|
except ImportError:
|
38
38
|
ERASURE_CODING_AVAILABLE = False
|
39
39
|
|
40
|
+
# Configuration constants
|
41
|
+
PARALLEL_EC_CHUNKS = 20 # Maximum number of concurrent chunk downloads
|
42
|
+
PARALLEL_ORIGINAL_CHUNKS = (
|
43
|
+
15 # Maximum number of original chunks to process in parallel
|
44
|
+
)
|
45
|
+
|
40
46
|
|
41
47
|
class IPFSClient:
|
42
48
|
"""Client for interacting with IPFS."""
|
@@ -934,14 +940,19 @@ class IPFSClient:
|
|
934
940
|
|
935
941
|
# Step 4: Upload all chunks to IPFS
|
936
942
|
if verbose:
|
937
|
-
print(
|
943
|
+
print(
|
944
|
+
f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS in parallel..."
|
945
|
+
)
|
938
946
|
|
939
947
|
chunk_uploads = 0
|
940
948
|
chunk_data = []
|
949
|
+
batch_size = 20 # Number of concurrent uploads
|
941
950
|
|
942
951
|
# Create a temporary directory for the chunks
|
943
952
|
with tempfile.TemporaryDirectory() as temp_dir:
|
944
|
-
#
|
953
|
+
# Prepare all chunks for upload
|
954
|
+
all_chunk_info = []
|
955
|
+
|
945
956
|
for original_idx, encoded_chunks in enumerate(all_encoded_chunks):
|
946
957
|
for share_idx, share_data in enumerate(encoded_chunks):
|
947
958
|
# Create a name for this chunk that includes needed info
|
@@ -952,29 +963,48 @@ class IPFSClient:
|
|
952
963
|
with open(chunk_path, "wb") as f:
|
953
964
|
f.write(share_data)
|
954
965
|
|
955
|
-
#
|
956
|
-
|
957
|
-
|
958
|
-
chunk_path, max_retries=max_retries
|
959
|
-
)
|
960
|
-
|
961
|
-
# Store info about this chunk
|
962
|
-
chunk_info = {
|
966
|
+
# Store info for async upload
|
967
|
+
all_chunk_info.append(
|
968
|
+
{
|
963
969
|
"name": chunk_name,
|
964
|
-
"
|
970
|
+
"path": chunk_path,
|
965
971
|
"original_chunk": original_idx,
|
966
972
|
"share_idx": share_idx,
|
967
973
|
"size": len(share_data),
|
968
974
|
}
|
969
|
-
|
975
|
+
)
|
970
976
|
|
977
|
+
# Create a semaphore to limit concurrent uploads
|
978
|
+
semaphore = asyncio.Semaphore(batch_size)
|
979
|
+
|
980
|
+
# Define upload task for a single chunk
|
981
|
+
async def upload_chunk(chunk_info):
|
982
|
+
nonlocal chunk_uploads
|
983
|
+
|
984
|
+
async with semaphore:
|
985
|
+
try:
|
986
|
+
chunk_cid = await self.upload_file(
|
987
|
+
chunk_info["path"], max_retries=max_retries
|
988
|
+
)
|
989
|
+
chunk_info["cid"] = chunk_cid
|
971
990
|
chunk_uploads += 1
|
972
991
|
if verbose and chunk_uploads % 10 == 0:
|
973
992
|
print(
|
974
993
|
f" Uploaded {chunk_uploads}/{len(chunks) * m} chunks"
|
975
994
|
)
|
995
|
+
return chunk_info
|
976
996
|
except Exception as e:
|
977
|
-
print(f"Error uploading chunk {
|
997
|
+
print(f"Error uploading chunk {chunk_info['name']}: {str(e)}")
|
998
|
+
return None
|
999
|
+
|
1000
|
+
# Create tasks for all chunk uploads
|
1001
|
+
upload_tasks = [upload_chunk(chunk_info) for chunk_info in all_chunk_info]
|
1002
|
+
|
1003
|
+
# Wait for all uploads to complete
|
1004
|
+
completed_uploads = await asyncio.gather(*upload_tasks)
|
1005
|
+
|
1006
|
+
# Filter out failed uploads
|
1007
|
+
chunk_data = [upload for upload in completed_uploads if upload is not None]
|
978
1008
|
|
979
1009
|
# Add all chunk info to metadata
|
980
1010
|
metadata["chunks"] = chunk_data
|
@@ -1082,6 +1112,9 @@ class IPFSClient:
|
|
1082
1112
|
)
|
1083
1113
|
if is_encrypted:
|
1084
1114
|
print("Encrypted: Yes")
|
1115
|
+
print(
|
1116
|
+
f"Using parallel download with max {PARALLEL_ORIGINAL_CHUNKS} original chunks and {PARALLEL_EC_CHUNKS} chunk downloads concurrently"
|
1117
|
+
)
|
1085
1118
|
|
1086
1119
|
# Step 3: Group chunks by their original chunk index
|
1087
1120
|
chunks_by_original = {}
|
@@ -1091,136 +1124,157 @@ class IPFSClient:
|
|
1091
1124
|
chunks_by_original[orig_idx] = []
|
1092
1125
|
chunks_by_original[orig_idx].append(chunk)
|
1093
1126
|
|
1094
|
-
# Step 4:
|
1127
|
+
# Step 4: Process all original chunks in parallel
|
1095
1128
|
if verbose:
|
1096
1129
|
total_original_chunks = len(chunks_by_original)
|
1097
|
-
|
1130
|
+
total_chunks_needed = total_original_chunks * k
|
1098
1131
|
print(
|
1099
|
-
f"Downloading and reconstructing {
|
1132
|
+
f"Downloading and reconstructing {total_chunks_needed} chunks in parallel..."
|
1100
1133
|
)
|
1101
1134
|
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
f"
|
1112
|
-
|
1113
|
-
)
|
1135
|
+
# Create semaphores to limit concurrency
|
1136
|
+
encoded_chunks_semaphore = asyncio.Semaphore(PARALLEL_EC_CHUNKS)
|
1137
|
+
original_chunks_semaphore = asyncio.Semaphore(PARALLEL_ORIGINAL_CHUNKS)
|
1138
|
+
|
1139
|
+
# Process a single original chunk and its required downloads
|
1140
|
+
async def process_original_chunk(orig_idx, available_chunks):
|
1141
|
+
# Limit number of original chunks processing at once
|
1142
|
+
async with original_chunks_semaphore:
|
1143
|
+
if verbose:
|
1144
|
+
print(f"Processing original chunk {orig_idx}...")
|
1145
|
+
|
1146
|
+
if len(available_chunks) < k:
|
1147
|
+
raise ValueError(
|
1148
|
+
f"Not enough chunks available for original chunk {orig_idx}. "
|
1149
|
+
f"Need {k}, but only have {len(available_chunks)}."
|
1150
|
+
)
|
1114
1151
|
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
chunks_to_try = available_chunks.copy()
|
1152
|
+
# Try slightly more than k chunks (k+2) to handle some failures
|
1153
|
+
num_to_try = min(k + 2, len(available_chunks))
|
1154
|
+
chunks_to_try = random.sample(available_chunks, num_to_try)
|
1119
1155
|
|
1120
|
-
|
1121
|
-
|
1156
|
+
# Track downloaded chunks
|
1157
|
+
download_tasks = []
|
1122
1158
|
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
break
|
1159
|
+
# Start parallel downloads for chunks
|
1160
|
+
for chunk in chunks_to_try:
|
1161
|
+
chunk_path = os.path.join(temp_dir, f"{chunk['name']}")
|
1127
1162
|
|
1128
|
-
|
1129
|
-
try:
|
1130
|
-
# Extract the CID string from the chunk's cid dictionary
|
1163
|
+
# Extract CID
|
1131
1164
|
chunk_cid = (
|
1132
1165
|
chunk["cid"]["cid"]
|
1133
1166
|
if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
|
1134
1167
|
else chunk["cid"]
|
1135
1168
|
)
|
1136
|
-
await self.download_file(
|
1137
|
-
chunk_cid, chunk_path, max_retries=max_retries
|
1138
|
-
)
|
1139
|
-
chunks_downloaded += 1
|
1140
1169
|
|
1141
|
-
#
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1170
|
+
# Create download task
|
1171
|
+
async def download_chunk(cid, path, chunk_info):
|
1172
|
+
async with encoded_chunks_semaphore:
|
1173
|
+
try:
|
1174
|
+
await self.download_file(
|
1175
|
+
cid, path, max_retries=max_retries
|
1176
|
+
)
|
1177
|
+
|
1178
|
+
# Read chunk data
|
1179
|
+
with open(path, "rb") as f:
|
1180
|
+
share_data = f.read()
|
1181
|
+
|
1182
|
+
return {
|
1183
|
+
"success": True,
|
1184
|
+
"data": share_data,
|
1185
|
+
"share_idx": chunk_info["share_idx"],
|
1186
|
+
"name": chunk_info["name"],
|
1187
|
+
}
|
1188
|
+
except Exception as e:
|
1189
|
+
if verbose:
|
1190
|
+
print(
|
1191
|
+
f"Error downloading chunk {chunk_info['name']}: {str(e)}"
|
1192
|
+
)
|
1193
|
+
return {
|
1194
|
+
"success": False,
|
1195
|
+
"error": str(e),
|
1196
|
+
"name": chunk_info["name"],
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
# Create task
|
1200
|
+
task = asyncio.create_task(
|
1201
|
+
download_chunk(chunk_cid, chunk_path, chunk)
|
1202
|
+
)
|
1203
|
+
download_tasks.append(task)
|
1204
|
+
|
1205
|
+
# Process downloads as they complete
|
1206
|
+
downloaded_shares = []
|
1207
|
+
share_indexes = []
|
1208
|
+
|
1209
|
+
for done_task in asyncio.as_completed(download_tasks):
|
1210
|
+
result = await done_task
|
1211
|
+
|
1212
|
+
if result["success"]:
|
1213
|
+
downloaded_shares.append(result["data"])
|
1214
|
+
share_indexes.append(result["share_idx"])
|
1215
|
+
|
1216
|
+
# Once we have k chunks, cancel remaining downloads
|
1217
|
+
if len(downloaded_shares) >= k:
|
1218
|
+
for task in download_tasks:
|
1219
|
+
if not task.done():
|
1220
|
+
task.cancel()
|
1221
|
+
break
|
1222
|
+
|
1223
|
+
# Check if we have enough chunks
|
1224
|
+
if len(downloaded_shares) < k:
|
1225
|
+
raise ValueError(
|
1226
|
+
f"Failed to download enough chunks for original chunk {orig_idx}. "
|
1227
|
+
f"Need {k}, but only downloaded {len(downloaded_shares)}."
|
1228
|
+
)
|
1153
1229
|
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
f"Need {k}, but only downloaded {len(downloaded_shares)}."
|
1230
|
+
# Reconstruct this chunk
|
1231
|
+
decoder = zfec.Decoder(k, m)
|
1232
|
+
reconstructed_data = decoder.decode(
|
1233
|
+
downloaded_shares, share_indexes
|
1159
1234
|
)
|
1160
1235
|
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1236
|
+
if not isinstance(reconstructed_data, list):
|
1237
|
+
raise TypeError(
|
1238
|
+
f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
|
1239
|
+
)
|
1164
1240
|
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1241
|
+
# Calculate the actual size of this original chunk
|
1242
|
+
is_last_chunk = orig_idx == max(chunks_by_original.keys())
|
1243
|
+
original_chunk_size = total_original_size - orig_idx * chunk_size
|
1244
|
+
if not is_last_chunk:
|
1245
|
+
original_chunk_size = min(chunk_size, original_chunk_size)
|
1246
|
+
|
1247
|
+
# Recombine the sub-blocks
|
1248
|
+
reconstructed_chunk = b""
|
1249
|
+
total_bytes = 0
|
1250
|
+
for sub_block in reconstructed_data:
|
1251
|
+
bytes_to_take = min(
|
1252
|
+
len(sub_block), original_chunk_size - total_bytes
|
1253
|
+
)
|
1254
|
+
if bytes_to_take <= 0:
|
1255
|
+
break
|
1170
1256
|
|
1171
|
-
|
1172
|
-
|
1173
|
-
is_last_chunk = orig_idx == max(chunks_by_original.keys())
|
1174
|
-
original_chunk_size = total_original_size - orig_idx * chunk_size
|
1175
|
-
if not is_last_chunk:
|
1176
|
-
original_chunk_size = min(chunk_size, original_chunk_size)
|
1177
|
-
|
1178
|
-
# Recombine the sub-blocks, respecting the original chunk size
|
1179
|
-
reconstructed_chunk = b""
|
1180
|
-
total_bytes = 0
|
1181
|
-
for sub_block in reconstructed_data:
|
1182
|
-
# Calculate how many bytes we should take from this sub-block
|
1183
|
-
bytes_to_take = min(
|
1184
|
-
len(sub_block), original_chunk_size - total_bytes
|
1185
|
-
)
|
1186
|
-
if bytes_to_take <= 0:
|
1187
|
-
break
|
1257
|
+
reconstructed_chunk += sub_block[:bytes_to_take]
|
1258
|
+
total_bytes += bytes_to_take
|
1188
1259
|
|
1189
|
-
reconstructed_chunk
|
1190
|
-
total_bytes += bytes_to_take
|
1260
|
+
return reconstructed_chunk
|
1191
1261
|
|
1192
|
-
|
1262
|
+
# Create tasks for all original chunks and process them in parallel
|
1263
|
+
chunk_tasks = []
|
1264
|
+
for orig_idx in sorted(chunks_by_original.keys()):
|
1265
|
+
chunk_tasks.append(
|
1266
|
+
process_original_chunk(orig_idx, chunks_by_original[orig_idx])
|
1267
|
+
)
|
1193
1268
|
|
1194
|
-
|
1195
|
-
|
1196
|
-
progress_pct = (orig_idx + 1) / total_original_chunks * 100
|
1197
|
-
print(
|
1198
|
-
f" Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
|
1199
|
-
)
|
1200
|
-
if (
|
1201
|
-
orig_idx == 0 or is_last_chunk
|
1202
|
-
): # Only show debug for first and last chunks to avoid spam
|
1203
|
-
print(f" Debug info for chunk {orig_idx}:")
|
1204
|
-
print(f" Original chunk size: {original_chunk_size} bytes")
|
1205
|
-
print(
|
1206
|
-
f" Reconstructed chunk size: {len(reconstructed_chunk)} bytes"
|
1207
|
-
)
|
1208
|
-
print(f" Share indexes used: {share_indexes}")
|
1209
|
-
print(f" Sub-blocks received: {len(reconstructed_data)}")
|
1269
|
+
# Wait for all chunks to be reconstructed
|
1270
|
+
reconstructed_chunks = await asyncio.gather(*chunk_tasks)
|
1210
1271
|
|
1211
1272
|
if verbose:
|
1212
1273
|
download_time = time.time() - start_time
|
1213
|
-
print(
|
1214
|
-
f"Downloaded {chunks_downloaded} chunks in {download_time:.2f} seconds"
|
1215
|
-
)
|
1216
|
-
if chunks_failed > 0:
|
1217
|
-
print(
|
1218
|
-
f"Failed to download {chunks_failed} chunks (not needed for reconstruction)"
|
1219
|
-
)
|
1274
|
+
print(f"Chunk reconstruction completed in {download_time:.2f} seconds")
|
1220
1275
|
|
1221
1276
|
# Step 5: Combine the reconstructed chunks into a file
|
1222
|
-
|
1223
|
-
print("Combining reconstructed chunks...")
|
1277
|
+
print("Combining reconstructed chunks...")
|
1224
1278
|
|
1225
1279
|
# Process chunks to remove padding correctly
|
1226
1280
|
processed_chunks = []
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|