hippius 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hippius
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Python SDK and CLI for Hippius blockchain storage
5
5
  Home-page: https://github.com/thenervelab/hippius-sdk
6
6
  Author: Dubs
@@ -26,7 +26,7 @@ from hippius_sdk.config import (
26
26
  from hippius_sdk.ipfs import IPFSClient
27
27
  from hippius_sdk.utils import format_cid, format_size, hex_to_ipfs_cid
28
28
 
29
- __version__ = "0.2.1"
29
+ __version__ = "0.2.2"
30
30
  __all__ = [
31
31
  "HippiusClient",
32
32
  "IPFSClient",
@@ -790,7 +790,7 @@ async def handle_erasure_code(
790
790
  chunk_size,
791
791
  miner_ids,
792
792
  encrypt=None,
793
- publish=False,
793
+ publish=True,
794
794
  verbose=True,
795
795
  ):
796
796
  """Handle the erasure-code command"""
@@ -2086,8 +2086,8 @@ examples:
2086
2086
  # Erasure code a file (Reed-Solomon)
2087
2087
  hippius erasure-code large_file.mp4 --k 3 --m 5
2088
2088
 
2089
- # Erasure code and publish to global IPFS network
2090
- hippius erasure-code large_file.avi --publish
2089
+ # Erasure code without publishing to global IPFS network
2090
+ hippius erasure-code large_file.avi --no-publish
2091
2091
 
2092
2092
  # Reconstruct an erasure-coded file
2093
2093
  hippius reconstruct QmMetadataHash reconstructed_file.mp4
@@ -2326,9 +2326,9 @@ examples:
2326
2326
  "--no-encrypt", action="store_true", help="Do not encrypt the file"
2327
2327
  )
2328
2328
  erasure_code_parser.add_argument(
2329
- "--publish",
2329
+ "--no-publish",
2330
2330
  action="store_true",
2331
- help="Upload and publish the erasure-coded file to the global IPFS network",
2331
+ help="Do not upload and publish the erasure-coded file to the global IPFS network",
2332
2332
  )
2333
2333
  erasure_code_parser.add_argument(
2334
2334
  "--verbose", action="store_true", help="Enable verbose output", default=True
@@ -2687,7 +2687,7 @@ examples:
2687
2687
  args.chunk_size,
2688
2688
  miner_ids,
2689
2689
  encrypt=args.encrypt,
2690
- publish=args.publish,
2690
+ publish=not args.no_publish,
2691
2691
  verbose=args.verbose,
2692
2692
  )
2693
2693
 
@@ -1,7 +1,7 @@
1
1
  """
2
2
  IPFS operations for the Hippius SDK.
3
3
  """
4
-
4
+ import asyncio
5
5
  import hashlib
6
6
  import json
7
7
  import os
@@ -37,6 +37,12 @@ try:
37
37
  except ImportError:
38
38
  ERASURE_CODING_AVAILABLE = False
39
39
 
40
+ # Configuration constants
41
+ PARALLEL_EC_CHUNKS = 20 # Maximum number of concurrent chunk downloads
42
+ PARALLEL_ORIGINAL_CHUNKS = (
43
+ 15 # Maximum number of original chunks to process in parallel
44
+ )
45
+
40
46
 
41
47
  class IPFSClient:
42
48
  """Client for interacting with IPFS."""
@@ -934,14 +940,19 @@ class IPFSClient:
934
940
 
935
941
  # Step 4: Upload all chunks to IPFS
936
942
  if verbose:
937
- print(f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS...")
943
+ print(
944
+ f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS in parallel..."
945
+ )
938
946
 
939
947
  chunk_uploads = 0
940
948
  chunk_data = []
949
+ batch_size = 20 # Number of concurrent uploads
941
950
 
942
951
  # Create a temporary directory for the chunks
943
952
  with tempfile.TemporaryDirectory() as temp_dir:
944
- # Write and upload each encoded chunk
953
+ # Prepare all chunks for upload
954
+ all_chunk_info = []
955
+
945
956
  for original_idx, encoded_chunks in enumerate(all_encoded_chunks):
946
957
  for share_idx, share_data in enumerate(encoded_chunks):
947
958
  # Create a name for this chunk that includes needed info
@@ -952,29 +963,48 @@ class IPFSClient:
952
963
  with open(chunk_path, "wb") as f:
953
964
  f.write(share_data)
954
965
 
955
- # Upload the chunk to IPFS
956
- try:
957
- chunk_cid = await self.upload_file(
958
- chunk_path, max_retries=max_retries
959
- )
960
-
961
- # Store info about this chunk
962
- chunk_info = {
966
+ # Store info for async upload
967
+ all_chunk_info.append(
968
+ {
963
969
  "name": chunk_name,
964
- "cid": chunk_cid,
970
+ "path": chunk_path,
965
971
  "original_chunk": original_idx,
966
972
  "share_idx": share_idx,
967
973
  "size": len(share_data),
968
974
  }
969
- chunk_data.append(chunk_info)
975
+ )
970
976
 
977
+ # Create a semaphore to limit concurrent uploads
978
+ semaphore = asyncio.Semaphore(batch_size)
979
+
980
+ # Define upload task for a single chunk
981
+ async def upload_chunk(chunk_info):
982
+ nonlocal chunk_uploads
983
+
984
+ async with semaphore:
985
+ try:
986
+ chunk_cid = await self.upload_file(
987
+ chunk_info["path"], max_retries=max_retries
988
+ )
989
+ chunk_info["cid"] = chunk_cid
971
990
  chunk_uploads += 1
972
991
  if verbose and chunk_uploads % 10 == 0:
973
992
  print(
974
993
  f" Uploaded {chunk_uploads}/{len(chunks) * m} chunks"
975
994
  )
995
+ return chunk_info
976
996
  except Exception as e:
977
- print(f"Error uploading chunk {chunk_name}: {str(e)}")
997
+ print(f"Error uploading chunk {chunk_info['name']}: {str(e)}")
998
+ return None
999
+
1000
+ # Create tasks for all chunk uploads
1001
+ upload_tasks = [upload_chunk(chunk_info) for chunk_info in all_chunk_info]
1002
+
1003
+ # Wait for all uploads to complete
1004
+ completed_uploads = await asyncio.gather(*upload_tasks)
1005
+
1006
+ # Filter out failed uploads
1007
+ chunk_data = [upload for upload in completed_uploads if upload is not None]
978
1008
 
979
1009
  # Add all chunk info to metadata
980
1010
  metadata["chunks"] = chunk_data
@@ -1082,6 +1112,9 @@ class IPFSClient:
1082
1112
  )
1083
1113
  if is_encrypted:
1084
1114
  print("Encrypted: Yes")
1115
+ print(
1116
+ f"Using parallel download with max {PARALLEL_ORIGINAL_CHUNKS} original chunks and {PARALLEL_EC_CHUNKS} chunk downloads concurrently"
1117
+ )
1085
1118
 
1086
1119
  # Step 3: Group chunks by their original chunk index
1087
1120
  chunks_by_original = {}
@@ -1091,136 +1124,157 @@ class IPFSClient:
1091
1124
  chunks_by_original[orig_idx] = []
1092
1125
  chunks_by_original[orig_idx].append(chunk)
1093
1126
 
1094
- # Step 4: For each original chunk, download at least k shares
1127
+ # Step 4: Process all original chunks in parallel
1095
1128
  if verbose:
1096
1129
  total_original_chunks = len(chunks_by_original)
1097
- total_chunks_to_download = total_original_chunks * k
1130
+ total_chunks_needed = total_original_chunks * k
1098
1131
  print(
1099
- f"Downloading and reconstructing {total_chunks_to_download} chunks..."
1132
+ f"Downloading and reconstructing {total_chunks_needed} chunks in parallel..."
1100
1133
  )
1101
1134
 
1102
- reconstructed_chunks = []
1103
- chunks_downloaded = 0
1104
- chunks_failed = 0
1105
-
1106
- for orig_idx in sorted(chunks_by_original.keys()):
1107
- available_chunks = chunks_by_original[orig_idx]
1108
-
1109
- if len(available_chunks) < k:
1110
- raise ValueError(
1111
- f"Not enough chunks available for original chunk {orig_idx}. "
1112
- f"Need {k}, but only have {len(available_chunks)}."
1113
- )
1135
+ # Create semaphores to limit concurrency
1136
+ encoded_chunks_semaphore = asyncio.Semaphore(PARALLEL_EC_CHUNKS)
1137
+ original_chunks_semaphore = asyncio.Semaphore(PARALLEL_ORIGINAL_CHUNKS)
1138
+
1139
+ # Process a single original chunk and its required downloads
1140
+ async def process_original_chunk(orig_idx, available_chunks):
1141
+ # Limit number of original chunks processing at once
1142
+ async with original_chunks_semaphore:
1143
+ if verbose:
1144
+ print(f"Processing original chunk {orig_idx}...")
1145
+
1146
+ if len(available_chunks) < k:
1147
+ raise ValueError(
1148
+ f"Not enough chunks available for original chunk {orig_idx}. "
1149
+ f"Need {k}, but only have {len(available_chunks)}."
1150
+ )
1114
1151
 
1115
- # Try to download all available chunks, but we only need k successful ones
1116
- downloaded_shares = []
1117
- share_indexes = []
1118
- chunks_to_try = available_chunks.copy()
1152
+ # Try slightly more than k chunks (k+2) to handle some failures
1153
+ num_to_try = min(k + 2, len(available_chunks))
1154
+ chunks_to_try = random.sample(available_chunks, num_to_try)
1119
1155
 
1120
- # Shuffle to get a better variety of chunks
1121
- random.shuffle(chunks_to_try)
1156
+ # Track downloaded chunks
1157
+ download_tasks = []
1122
1158
 
1123
- for chunk in chunks_to_try:
1124
- # Break if we already have k chunks
1125
- if len(downloaded_shares) >= k:
1126
- break
1159
+ # Start parallel downloads for chunks
1160
+ for chunk in chunks_to_try:
1161
+ chunk_path = os.path.join(temp_dir, f"{chunk['name']}")
1127
1162
 
1128
- chunk_path = os.path.join(temp_dir, chunk["name"])
1129
- try:
1130
- # Extract the CID string from the chunk's cid dictionary
1163
+ # Extract CID
1131
1164
  chunk_cid = (
1132
1165
  chunk["cid"]["cid"]
1133
1166
  if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
1134
1167
  else chunk["cid"]
1135
1168
  )
1136
- await self.download_file(
1137
- chunk_cid, chunk_path, max_retries=max_retries
1138
- )
1139
- chunks_downloaded += 1
1140
1169
 
1141
- # Read the chunk data
1142
- with open(chunk_path, "rb") as f:
1143
- share_data = f.read()
1144
-
1145
- downloaded_shares.append(share_data)
1146
- share_indexes.append(chunk["share_idx"])
1147
-
1148
- except Exception as e:
1149
- if verbose:
1150
- print(f"Error downloading chunk {chunk['name']}: {str(e)}")
1151
- chunks_failed += 1
1152
- # Continue to the next chunk
1170
+ # Create download task
1171
+ async def download_chunk(cid, path, chunk_info):
1172
+ async with encoded_chunks_semaphore:
1173
+ try:
1174
+ await self.download_file(
1175
+ cid, path, max_retries=max_retries
1176
+ )
1177
+
1178
+ # Read chunk data
1179
+ with open(path, "rb") as f:
1180
+ share_data = f.read()
1181
+
1182
+ return {
1183
+ "success": True,
1184
+ "data": share_data,
1185
+ "share_idx": chunk_info["share_idx"],
1186
+ "name": chunk_info["name"],
1187
+ }
1188
+ except Exception as e:
1189
+ if verbose:
1190
+ print(
1191
+ f"Error downloading chunk {chunk_info['name']}: {str(e)}"
1192
+ )
1193
+ return {
1194
+ "success": False,
1195
+ "error": str(e),
1196
+ "name": chunk_info["name"],
1197
+ }
1198
+
1199
+ # Create task
1200
+ task = asyncio.create_task(
1201
+ download_chunk(chunk_cid, chunk_path, chunk)
1202
+ )
1203
+ download_tasks.append(task)
1204
+
1205
+ # Process downloads as they complete
1206
+ downloaded_shares = []
1207
+ share_indexes = []
1208
+
1209
+ for done_task in asyncio.as_completed(download_tasks):
1210
+ result = await done_task
1211
+
1212
+ if result["success"]:
1213
+ downloaded_shares.append(result["data"])
1214
+ share_indexes.append(result["share_idx"])
1215
+
1216
+ # Once we have k chunks, cancel remaining downloads
1217
+ if len(downloaded_shares) >= k:
1218
+ for task in download_tasks:
1219
+ if not task.done():
1220
+ task.cancel()
1221
+ break
1222
+
1223
+ # Check if we have enough chunks
1224
+ if len(downloaded_shares) < k:
1225
+ raise ValueError(
1226
+ f"Failed to download enough chunks for original chunk {orig_idx}. "
1227
+ f"Need {k}, but only downloaded {len(downloaded_shares)}."
1228
+ )
1153
1229
 
1154
- # If we don't have enough chunks, fail
1155
- if len(downloaded_shares) < k:
1156
- raise ValueError(
1157
- f"Failed to download enough chunks for original chunk {orig_idx}. "
1158
- f"Need {k}, but only downloaded {len(downloaded_shares)}."
1230
+ # Reconstruct this chunk
1231
+ decoder = zfec.Decoder(k, m)
1232
+ reconstructed_data = decoder.decode(
1233
+ downloaded_shares, share_indexes
1159
1234
  )
1160
1235
 
1161
- # Reconstruct this chunk
1162
- decoder = zfec.Decoder(k, m)
1163
- reconstructed_data = decoder.decode(downloaded_shares, share_indexes)
1236
+ if not isinstance(reconstructed_data, list):
1237
+ raise TypeError(
1238
+ f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
1239
+ )
1164
1240
 
1165
- if not isinstance(reconstructed_data, list):
1166
- # Handle unexpected output type
1167
- raise TypeError(
1168
- f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
1169
- )
1241
+ # Calculate the actual size of this original chunk
1242
+ is_last_chunk = orig_idx == max(chunks_by_original.keys())
1243
+ original_chunk_size = total_original_size - orig_idx * chunk_size
1244
+ if not is_last_chunk:
1245
+ original_chunk_size = min(chunk_size, original_chunk_size)
1246
+
1247
+ # Recombine the sub-blocks
1248
+ reconstructed_chunk = b""
1249
+ total_bytes = 0
1250
+ for sub_block in reconstructed_data:
1251
+ bytes_to_take = min(
1252
+ len(sub_block), original_chunk_size - total_bytes
1253
+ )
1254
+ if bytes_to_take <= 0:
1255
+ break
1170
1256
 
1171
- # Calculate the actual size of this original chunk
1172
- # For all chunks except possibly the last one, it should be chunk_size
1173
- is_last_chunk = orig_idx == max(chunks_by_original.keys())
1174
- original_chunk_size = total_original_size - orig_idx * chunk_size
1175
- if not is_last_chunk:
1176
- original_chunk_size = min(chunk_size, original_chunk_size)
1177
-
1178
- # Recombine the sub-blocks, respecting the original chunk size
1179
- reconstructed_chunk = b""
1180
- total_bytes = 0
1181
- for sub_block in reconstructed_data:
1182
- # Calculate how many bytes we should take from this sub-block
1183
- bytes_to_take = min(
1184
- len(sub_block), original_chunk_size - total_bytes
1185
- )
1186
- if bytes_to_take <= 0:
1187
- break
1257
+ reconstructed_chunk += sub_block[:bytes_to_take]
1258
+ total_bytes += bytes_to_take
1188
1259
 
1189
- reconstructed_chunk += sub_block[:bytes_to_take]
1190
- total_bytes += bytes_to_take
1260
+ return reconstructed_chunk
1191
1261
 
1192
- reconstructed_chunks.append(reconstructed_chunk)
1262
+ # Create tasks for all original chunks and process them in parallel
1263
+ chunk_tasks = []
1264
+ for orig_idx in sorted(chunks_by_original.keys()):
1265
+ chunk_tasks.append(
1266
+ process_original_chunk(orig_idx, chunks_by_original[orig_idx])
1267
+ )
1193
1268
 
1194
- # Add debugging information if verbose
1195
- if verbose:
1196
- progress_pct = (orig_idx + 1) / total_original_chunks * 100
1197
- print(
1198
- f" Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
1199
- )
1200
- if (
1201
- orig_idx == 0 or is_last_chunk
1202
- ): # Only show debug for first and last chunks to avoid spam
1203
- print(f" Debug info for chunk {orig_idx}:")
1204
- print(f" Original chunk size: {original_chunk_size} bytes")
1205
- print(
1206
- f" Reconstructed chunk size: {len(reconstructed_chunk)} bytes"
1207
- )
1208
- print(f" Share indexes used: {share_indexes}")
1209
- print(f" Sub-blocks received: {len(reconstructed_data)}")
1269
+ # Wait for all chunks to be reconstructed
1270
+ reconstructed_chunks = await asyncio.gather(*chunk_tasks)
1210
1271
 
1211
1272
  if verbose:
1212
1273
  download_time = time.time() - start_time
1213
- print(
1214
- f"Downloaded {chunks_downloaded} chunks in {download_time:.2f} seconds"
1215
- )
1216
- if chunks_failed > 0:
1217
- print(
1218
- f"Failed to download {chunks_failed} chunks (not needed for reconstruction)"
1219
- )
1274
+ print(f"Chunk reconstruction completed in {download_time:.2f} seconds")
1220
1275
 
1221
1276
  # Step 5: Combine the reconstructed chunks into a file
1222
- if verbose:
1223
- print("Combining reconstructed chunks...")
1277
+ print("Combining reconstructed chunks...")
1224
1278
 
1225
1279
  # Process chunks to remove padding correctly
1226
1280
  processed_chunks = []
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "hippius"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  description = "Python SDK and CLI for Hippius blockchain storage"
5
5
  authors = ["Dubs <dubs@dubs.rs>"]
6
6
  readme = "README.md"
File without changes
File without changes
File without changes
File without changes