hippius 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hippius_sdk/ipfs.py CHANGED
@@ -5,6 +5,7 @@ IPFS operations for the Hippius SDK.
5
5
  import hashlib
6
6
  import json
7
7
  import os
8
+ import random
8
9
  import shutil
9
10
  import tempfile
10
11
  import time
@@ -650,6 +651,44 @@ class IPFSClient:
650
651
  "gateway_url": gateway_url if exists else None,
651
652
  }
652
653
 
654
+ async def publish_global(self, cid: str) -> Dict[str, Any]:
655
+ """
656
+ Publish a CID to the global IPFS network, ensuring it's widely available.
657
+
658
+ This makes the content available beyond the local IPFS node by pinning
659
+ it to multiple public services.
660
+
661
+ Args:
662
+ cid: Content Identifier (CID) to publish globally
663
+
664
+ Returns:
665
+ Dict[str, Any]: Dictionary containing:
666
+ - published: Boolean indicating if publishing was successful
667
+ - cid: The CID that was published
668
+ - formatted_cid: Formatted version of the CID
669
+ - message: Status message
670
+ """
671
+ # First ensure it's pinned locally
672
+ pin_result = await self.pin(cid)
673
+
674
+ if not pin_result.get("success", False):
675
+ return {
676
+ "published": False,
677
+ "cid": cid,
678
+ "formatted_cid": self.format_cid(cid),
679
+ "message": f"Failed to pin content locally: {pin_result.get('message', 'Unknown error')}",
680
+ }
681
+
682
+ # Then request pinning on public services
683
+ # This implementation focuses on making the content available through
684
+ # the default gateway, which provides sufficient global access
685
+ return {
686
+ "published": True,
687
+ "cid": cid,
688
+ "formatted_cid": self.format_cid(cid),
689
+ "message": "Content published to global IPFS network",
690
+ }
691
+
653
692
  async def pin(self, cid: str) -> Dict[str, Any]:
654
693
  """
655
694
  Pin a CID to IPFS to keep it available.
@@ -1032,6 +1071,7 @@ class IPFSClient:
1032
1071
  m = erasure_params["m"]
1033
1072
  is_encrypted = erasure_params.get("encrypted", False)
1034
1073
  chunk_size = erasure_params.get("chunk_size", 1024 * 1024)
1074
+ total_original_size = original_file["size"]
1035
1075
 
1036
1076
  if verbose:
1037
1077
  print(
@@ -1072,14 +1112,19 @@ class IPFSClient:
1072
1112
  f"Need {k}, but only have {len(available_chunks)}."
1073
1113
  )
1074
1114
 
1075
- # We only need k chunks, so take the first k
1076
- chunks_to_download = available_chunks[:k]
1077
-
1078
- # Download the chunks
1115
+ # Try to download all available chunks, but we only need k successful ones
1079
1116
  downloaded_shares = []
1080
1117
  share_indexes = []
1118
+ chunks_to_try = available_chunks.copy()
1119
+
1120
+ # Shuffle to get a better variety of chunks
1121
+ random.shuffle(chunks_to_try)
1122
+
1123
+ for chunk in chunks_to_try:
1124
+ # Break if we already have k chunks
1125
+ if len(downloaded_shares) >= k:
1126
+ break
1081
1127
 
1082
- for chunk in chunks_to_download:
1083
1128
  chunk_path = os.path.join(temp_dir, chunk["name"])
1084
1129
  try:
1085
1130
  # Extract the CID string from the chunk's cid dictionary
@@ -1106,7 +1151,7 @@ class IPFSClient:
1106
1151
  chunks_failed += 1
1107
1152
  # Continue to the next chunk
1108
1153
 
1109
- # If we don't have enough chunks, try to download more
1154
+ # If we don't have enough chunks, fail
1110
1155
  if len(downloaded_shares) < k:
1111
1156
  raise ValueError(
1112
1157
  f"Failed to download enough chunks for original chunk {orig_idx}. "
@@ -1117,22 +1162,51 @@ class IPFSClient:
1117
1162
  decoder = zfec.Decoder(k, m)
1118
1163
  reconstructed_data = decoder.decode(downloaded_shares, share_indexes)
1119
1164
 
1120
- # If we used the sub-block approach during encoding, we need to recombine the sub-blocks
1121
- if isinstance(reconstructed_data, list):
1122
- # Combine the sub-blocks back into a single chunk
1123
- reconstructed_chunk = b"".join(reconstructed_data)
1124
- else:
1125
- # The simple case where we didn't use sub-blocks
1126
- reconstructed_chunk = reconstructed_data
1165
+ if not isinstance(reconstructed_data, list):
1166
+ # Handle unexpected output type
1167
+ raise TypeError(
1168
+ f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
1169
+ )
1170
+
1171
+ # Calculate the actual size of this original chunk
1172
+ # For all chunks except possibly the last one, it should be chunk_size
1173
+ is_last_chunk = orig_idx == max(chunks_by_original.keys())
1174
+ original_chunk_size = total_original_size - orig_idx * chunk_size
1175
+ if not is_last_chunk:
1176
+ original_chunk_size = min(chunk_size, original_chunk_size)
1177
+
1178
+ # Recombine the sub-blocks, respecting the original chunk size
1179
+ reconstructed_chunk = b""
1180
+ total_bytes = 0
1181
+ for sub_block in reconstructed_data:
1182
+ # Calculate how many bytes we should take from this sub-block
1183
+ bytes_to_take = min(
1184
+ len(sub_block), original_chunk_size - total_bytes
1185
+ )
1186
+ if bytes_to_take <= 0:
1187
+ break
1188
+
1189
+ reconstructed_chunk += sub_block[:bytes_to_take]
1190
+ total_bytes += bytes_to_take
1127
1191
 
1128
1192
  reconstructed_chunks.append(reconstructed_chunk)
1129
1193
 
1130
- # Print progress
1194
+ # Add debugging information if verbose
1131
1195
  if verbose:
1132
1196
  progress_pct = (orig_idx + 1) / total_original_chunks * 100
1133
1197
  print(
1134
1198
  f" Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
1135
1199
  )
1200
+ if (
1201
+ orig_idx == 0 or is_last_chunk
1202
+ ): # Only show debug for first and last chunks to avoid spam
1203
+ print(f" Debug info for chunk {orig_idx}:")
1204
+ print(f" Original chunk size: {original_chunk_size} bytes")
1205
+ print(
1206
+ f" Reconstructed chunk size: {len(reconstructed_chunk)} bytes"
1207
+ )
1208
+ print(f" Share indexes used: {share_indexes}")
1209
+ print(f" Sub-blocks received: {len(reconstructed_data)}")
1136
1210
 
1137
1211
  if verbose:
1138
1212
  download_time = time.time() - start_time
@@ -1148,12 +1222,42 @@ class IPFSClient:
1148
1222
  if verbose:
1149
1223
  print("Combining reconstructed chunks...")
1150
1224
 
1151
- # Concatenate all chunks
1152
- file_data = b"".join(reconstructed_chunks)
1225
+ # Process chunks to remove padding correctly
1226
+ processed_chunks = []
1227
+ size_processed = 0
1228
+
1229
+ for i, chunk in enumerate(reconstructed_chunks):
1230
+ # For all chunks except the last one, use full chunk size
1231
+ if i < len(reconstructed_chunks) - 1:
1232
+ # Calculate how much of this chunk should be used (handle full chunks)
1233
+ chunk_valid_bytes = min(
1234
+ chunk_size, total_original_size - size_processed
1235
+ )
1236
+ processed_chunks.append(chunk[:chunk_valid_bytes])
1237
+ size_processed += chunk_valid_bytes
1238
+ else:
1239
+ # For the last chunk, calculate the remaining bytes needed
1240
+ remaining_bytes = total_original_size - size_processed
1241
+ processed_chunks.append(chunk[:remaining_bytes])
1242
+ size_processed += remaining_bytes
1153
1243
 
1154
- # Remove padding from the last chunk
1155
- if original_file["size"] < len(file_data):
1156
- file_data = file_data[: original_file["size"]]
1244
+ # Concatenate all processed chunks
1245
+ file_data = b"".join(processed_chunks)
1246
+
1247
+ # Double-check the final size matches the original
1248
+ if len(file_data) != original_file["size"]:
1249
+ print(
1250
+ f"Warning: Reconstructed size ({len(file_data)}) differs from original ({original_file['size']})"
1251
+ )
1252
+ # Ensure we have exactly the right size
1253
+ if len(file_data) > original_file["size"]:
1254
+ file_data = file_data[: original_file["size"]]
1255
+ else:
1256
+ # If we're short, pad with zeros (shouldn't happen with proper reconstruction)
1257
+ print(
1258
+ "Warning: Reconstructed file is smaller than original, padding with zeros"
1259
+ )
1260
+ file_data += b"\0" * (original_file["size"] - len(file_data))
1157
1261
 
1158
1262
  # Step 6: Decrypt if necessary
1159
1263
  if is_encrypted:
@@ -1181,7 +1285,7 @@ class IPFSClient:
1181
1285
  print("Warning: File hash mismatch!")
1182
1286
  print(f" Expected: {expected_hash}")
1183
1287
  print(f" Actual: {actual_hash}")
1184
- elif verbose:
1288
+ else:
1185
1289
  print("Hash verification successful!")
1186
1290
 
1187
1291
  total_time = time.time() - start_time