hippius 0.1.0__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hippius_sdk/ipfs.py CHANGED
@@ -8,9 +8,15 @@ import requests
8
8
  import base64
9
9
  import time
10
10
  import tempfile
11
- from typing import Dict, Any, Optional, Union, List
11
+ import hashlib
12
+ import uuid
13
+ from typing import Dict, Any, Optional, Union, List, Tuple
12
14
  import ipfshttpclient
13
15
  from dotenv import load_dotenv
16
+ from hippius_sdk.config import (
17
+ get_config_value,
18
+ get_encryption_key,
19
+ )
14
20
 
15
21
  # Import PyNaCl for encryption
16
22
  try:
@@ -21,14 +27,22 @@ try:
21
27
  except ImportError:
22
28
  ENCRYPTION_AVAILABLE = False
23
29
 
30
+ # Import zfec for erasure coding
31
+ try:
32
+ import zfec
33
+
34
+ ERASURE_CODING_AVAILABLE = True
35
+ except ImportError:
36
+ ERASURE_CODING_AVAILABLE = False
37
+
24
38
 
25
39
  class IPFSClient:
26
40
  """Client for interacting with IPFS."""
27
41
 
28
42
  def __init__(
29
43
  self,
30
- gateway: str = "https://ipfs.io",
31
- api_url: Optional[str] = "https://relay-fr.hippius.network",
44
+ gateway: Optional[str] = None,
45
+ api_url: Optional[str] = None,
32
46
  encrypt_by_default: Optional[bool] = None,
33
47
  encryption_key: Optional[bytes] = None,
34
48
  ):
@@ -36,12 +50,25 @@ class IPFSClient:
36
50
  Initialize the IPFS client.
37
51
 
38
52
  Args:
39
- gateway: IPFS gateway URL for downloading content
40
- api_url: IPFS API URL for uploading content. Defaults to Hippius relay node.
53
+ gateway: IPFS gateway URL for downloading content (from config if None)
54
+ api_url: IPFS API URL for uploading content (from config if None)
41
55
  Set to None to try to connect to a local IPFS daemon.
42
- encrypt_by_default: Whether to encrypt files by default (from .env if None)
43
- encryption_key: Encryption key for NaCl secretbox (from .env if None)
56
+ encrypt_by_default: Whether to encrypt files by default (from config if None)
57
+ encryption_key: Encryption key for NaCl secretbox (from config if None)
44
58
  """
59
+ # Load configuration values if not explicitly provided
60
+ if gateway is None:
61
+ gateway = get_config_value("ipfs", "gateway", "https://ipfs.io")
62
+
63
+ if api_url is None:
64
+ api_url = get_config_value(
65
+ "ipfs", "api_url", "https://relay-fr.hippius.network"
66
+ )
67
+
68
+ # Check if local IPFS is enabled in config
69
+ if get_config_value("ipfs", "local_ipfs", False):
70
+ api_url = "http://localhost:5001"
71
+
45
72
  self.gateway = gateway.rstrip("/")
46
73
  self.api_url = api_url
47
74
  self.client = None
@@ -82,7 +109,7 @@ class IPFSClient:
82
109
  def _initialize_encryption(
83
110
  self, encrypt_by_default: Optional[bool], encryption_key: Optional[bytes]
84
111
  ):
85
- """Initialize encryption settings from parameters or .env file."""
112
+ """Initialize encryption settings from parameters or configuration."""
86
113
  # Check if encryption is available
87
114
  if not ENCRYPTION_AVAILABLE:
88
115
  self.encryption_available = False
@@ -90,31 +117,19 @@ class IPFSClient:
90
117
  self.encryption_key = None
91
118
  return
92
119
 
93
- # Load environment variables
94
- load_dotenv()
95
-
96
- # Set up encryption default from parameter or .env
97
- self.encrypt_by_default = encrypt_by_default
98
- if self.encrypt_by_default is None:
99
- env_default = os.getenv("HIPPIUS_ENCRYPT_BY_DEFAULT", "false").lower()
100
- self.encrypt_by_default = env_default in ("true", "1", "yes")
120
+ # Set up encryption default from parameter or config
121
+ if encrypt_by_default is None:
122
+ self.encrypt_by_default = get_config_value(
123
+ "encryption", "encrypt_by_default", False
124
+ )
125
+ else:
126
+ self.encrypt_by_default = encrypt_by_default
101
127
 
102
- # Set up encryption key from parameter or .env
103
- self.encryption_key = encryption_key
104
- if self.encryption_key is None:
105
- env_key = os.getenv("HIPPIUS_ENCRYPTION_KEY")
106
- if env_key:
107
- try:
108
- self.encryption_key = base64.b64decode(env_key)
109
- # Validate key length
110
- if len(self.encryption_key) != nacl.secret.SecretBox.KEY_SIZE:
111
- print(
112
- f"Warning: Encryption key from .env has incorrect length. Expected {nacl.secret.SecretBox.KEY_SIZE} bytes, got {len(self.encryption_key)} bytes."
113
- )
114
- self.encryption_key = None
115
- except Exception as e:
116
- print(f"Warning: Failed to decode encryption key from .env: {e}")
117
- self.encryption_key = None
128
+ # Set up encryption key from parameter or config
129
+ if encryption_key is None:
130
+ self.encryption_key = get_encryption_key()
131
+ else:
132
+ self.encryption_key = encryption_key
118
133
 
119
134
  # Check if we have a valid key and can encrypt
120
135
  self.encryption_available = (
@@ -288,6 +303,7 @@ class IPFSClient:
288
303
  file_path: str,
289
304
  include_formatted_size: bool = True,
290
305
  encrypt: Optional[bool] = None,
306
+ max_retries: int = 3,
291
307
  ) -> Dict[str, Any]:
292
308
  """
293
309
  Upload a file to IPFS with optional encryption.
@@ -296,6 +312,7 @@ class IPFSClient:
296
312
  file_path: Path to the file to upload
297
313
  include_formatted_size: Whether to include formatted size in the result (default: True)
298
314
  encrypt: Whether to encrypt the file (overrides default)
315
+ max_retries: Maximum number of retry attempts (default: 3)
299
316
 
300
317
  Returns:
301
318
  Dict[str, Any]: Dictionary containing:
@@ -355,7 +372,7 @@ class IPFSClient:
355
372
  cid = result["Hash"]
356
373
  elif self.base_url:
357
374
  # Fallback to using HTTP API
358
- cid = self._upload_via_http_api(upload_path)
375
+ cid = self._upload_via_http_api(upload_path, max_retries=max_retries)
359
376
  else:
360
377
  # No connection or API URL available
361
378
  raise ConnectionError(
@@ -714,7 +731,11 @@ class IPFSClient:
714
731
  return cid
715
732
 
716
733
  def download_file(
717
- self, cid: str, output_path: str, decrypt: Optional[bool] = None
734
+ self,
735
+ cid: str,
736
+ output_path: str,
737
+ decrypt: Optional[bool] = None,
738
+ max_retries: int = 3,
718
739
  ) -> Dict[str, Any]:
719
740
  """
720
741
  Download a file from IPFS with optional decryption.
@@ -723,6 +744,7 @@ class IPFSClient:
723
744
  cid: Content Identifier (CID) of the file to download
724
745
  output_path: Path where the downloaded file will be saved
725
746
  decrypt: Whether to decrypt the file (overrides default)
747
+ max_retries: Maximum number of retry attempts (default: 3)
726
748
 
727
749
  Returns:
728
750
  Dict[str, Any]: Dictionary containing download results:
@@ -760,16 +782,41 @@ class IPFSClient:
760
782
  else:
761
783
  download_path = output_path
762
784
 
763
- # Download the file
764
- url = f"{self.gateway}/ipfs/{cid}"
765
- response = requests.get(url, stream=True)
766
- response.raise_for_status()
785
+ # Download the file with retry logic
786
+ retries = 0
787
+ last_error = None
788
+
789
+ while retries < max_retries:
790
+ try:
791
+ # Download the file
792
+ url = f"{self.gateway}/ipfs/{cid}"
793
+ response = requests.get(url, stream=True)
794
+ response.raise_for_status()
795
+
796
+ os.makedirs(
797
+ os.path.dirname(os.path.abspath(download_path)), exist_ok=True
798
+ )
799
+
800
+ with open(download_path, "wb") as f:
801
+ for chunk in response.iter_content(chunk_size=8192):
802
+ f.write(chunk)
767
803
 
768
- os.makedirs(os.path.dirname(os.path.abspath(download_path)), exist_ok=True)
804
+ # If we reach here, download was successful
805
+ break
769
806
 
770
- with open(download_path, "wb") as f:
771
- for chunk in response.iter_content(chunk_size=8192):
772
- f.write(chunk)
807
+ except (requests.exceptions.RequestException, IOError) as e:
808
+ # Save the error and retry
809
+ last_error = e
810
+ retries += 1
811
+
812
+ if retries < max_retries:
813
+ wait_time = 2**retries # Exponential backoff: 2, 4, 8 seconds
814
+ print(f"Download attempt {retries} failed: {str(e)}")
815
+ print(f"Retrying in {wait_time} seconds...")
816
+ time.sleep(wait_time)
817
+ else:
818
+ # Raise the last error if we've exhausted all retries
819
+ raise
773
820
 
774
821
  # Decrypt if needed
775
822
  if should_decrypt:
@@ -983,3 +1030,633 @@ class IPFSClient:
983
1030
  "formatted_cid": formatted_cid,
984
1031
  "message": message,
985
1032
  }
1033
+
1034
+ def erasure_code_file(
1035
+ self,
1036
+ file_path: str,
1037
+ k: int = 3,
1038
+ m: int = 5,
1039
+ chunk_size: int = 1024 * 1024, # 1MB chunks
1040
+ encrypt: Optional[bool] = None,
1041
+ max_retries: int = 3,
1042
+ verbose: bool = True,
1043
+ ) -> Dict[str, Any]:
1044
+ """
1045
+ Split a file using erasure coding, then upload the chunks to IPFS.
1046
+
1047
+ This implements an (m, k) Reed-Solomon code where:
1048
+ - m = total number of chunks
1049
+ - k = minimum chunks needed to reconstruct the file (k <= m)
1050
+ - The file can be reconstructed from any k of the m chunks
1051
+
1052
+ Args:
1053
+ file_path: Path to the file to upload
1054
+ k: Number of data chunks (minimum required to reconstruct)
1055
+ m: Total number of chunks (k + redundancy)
1056
+ chunk_size: Size of each chunk in bytes before encoding
1057
+ encrypt: Whether to encrypt the file before encoding (defaults to self.encrypt_by_default)
1058
+ max_retries: Maximum number of retry attempts for IPFS uploads
1059
+ verbose: Whether to print progress information
1060
+
1061
+ Returns:
1062
+ dict: Metadata including the original file info and chunk information
1063
+
1064
+ Raises:
1065
+ ValueError: If erasure coding is not available or parameters are invalid
1066
+ RuntimeError: If chunk uploads fail
1067
+ """
1068
+ if not ERASURE_CODING_AVAILABLE:
1069
+ raise ValueError(
1070
+ "Erasure coding is not available. Install zfec: pip install zfec"
1071
+ )
1072
+
1073
+ if k >= m:
1074
+ raise ValueError(
1075
+ f"Invalid erasure coding parameters: k ({k}) must be less than m ({m})"
1076
+ )
1077
+
1078
+ # Get original file info
1079
+ file_name = os.path.basename(file_path)
1080
+ file_size = os.path.getsize(file_path)
1081
+ file_extension = os.path.splitext(file_name)[1]
1082
+
1083
+ # Determine if encryption should be used
1084
+ should_encrypt = self.encrypt_by_default if encrypt is None else encrypt
1085
+
1086
+ if should_encrypt and not self.encryption_available:
1087
+ raise ValueError(
1088
+ "Encryption requested but not available. Install PyNaCl and configure an encryption key."
1089
+ )
1090
+
1091
+ # Generate a unique ID for this file
1092
+ file_id = str(uuid.uuid4())
1093
+
1094
+ if verbose:
1095
+ print(f"Processing file: {file_name} ({file_size/1024/1024:.2f} MB)")
1096
+ print(
1097
+ f"Erasure coding parameters: k={k}, m={m} (need {k}/{m} chunks to reconstruct)"
1098
+ )
1099
+ if should_encrypt:
1100
+ print("Encryption: Enabled")
1101
+
1102
+ # Step 1: Read and potentially encrypt the file
1103
+ with open(file_path, "rb") as f:
1104
+ file_data = f.read()
1105
+
1106
+ # Calculate original file hash
1107
+ original_file_hash = hashlib.sha256(file_data).hexdigest()
1108
+
1109
+ # Encrypt if requested
1110
+ if should_encrypt:
1111
+ if verbose:
1112
+ print("Encrypting file data...")
1113
+ file_data = self.encrypt_data(file_data)
1114
+
1115
+ # Step 2: Split the file into chunks for erasure coding
1116
+ chunks = []
1117
+ chunk_positions = []
1118
+ for i in range(0, len(file_data), chunk_size):
1119
+ chunk = file_data[i : i + chunk_size]
1120
+ chunks.append(chunk)
1121
+ chunk_positions.append(i)
1122
+
1123
+ # Pad the last chunk if necessary
1124
+ if chunks and len(chunks[-1]) < chunk_size:
1125
+ pad_size = chunk_size - len(chunks[-1])
1126
+ chunks[-1] = chunks[-1] + b"\0" * pad_size
1127
+
1128
+ # If we don't have enough chunks for the requested parameters, adjust
1129
+ if len(chunks) < k:
1130
+ if verbose:
1131
+ print(
1132
+ f"Warning: File has fewer chunks ({len(chunks)}) than k={k}. Adjusting parameters."
1133
+ )
1134
+
1135
+ # If we have a very small file, we'll just use a single chunk
1136
+ # but will still split it into k sub-blocks during encoding
1137
+ if len(chunks) == 1:
1138
+ if verbose:
1139
+ print(
1140
+ f"Small file (single chunk): will split into {k} sub-blocks for encoding"
1141
+ )
1142
+ else:
1143
+ # If we have multiple chunks but fewer than k, adjust k to match
1144
+ old_k = k
1145
+ k = max(1, len(chunks))
1146
+ if verbose:
1147
+ print(f"Adjusting k from {old_k} to {k} to match available chunks")
1148
+
1149
+ # Ensure m is greater than k for redundancy
1150
+ if m <= k:
1151
+ old_m = m
1152
+ m = k + 2 # Ensure we have at least 2 redundant chunks
1153
+ if verbose:
1154
+ print(f"Adjusting m from {old_m} to {m} to ensure redundancy")
1155
+
1156
+ if verbose:
1157
+ print(f"New parameters: k={k}, m={m}")
1158
+
1159
+ # Ensure we have at least one chunk to process
1160
+ if not chunks:
1161
+ raise ValueError("File is empty or too small to process")
1162
+
1163
+ # For k=1 case, ensure we have proper sized input for zfec
1164
+ if k == 1 and len(chunks) == 1:
1165
+ # zfec expects the input to be exactly chunk_size for k=1
1166
+ # So we need to pad if shorter or truncate if longer
1167
+ if len(chunks[0]) != chunk_size:
1168
+ chunks[0] = chunks[0].ljust(chunk_size, b"\0")[:chunk_size]
1169
+
1170
+ # Create metadata
1171
+ metadata = {
1172
+ "original_file": {
1173
+ "name": file_name,
1174
+ "size": file_size,
1175
+ "hash": original_file_hash,
1176
+ "extension": file_extension,
1177
+ },
1178
+ "erasure_coding": {
1179
+ "k": k,
1180
+ "m": m,
1181
+ "chunk_size": chunk_size,
1182
+ "encrypted": should_encrypt,
1183
+ "file_id": file_id,
1184
+ },
1185
+ "chunks": [],
1186
+ }
1187
+
1188
+ # Step 3: Apply erasure coding to each chunk
1189
+ if verbose:
1190
+ print(f"Applying erasure coding to {len(chunks)} chunks...")
1191
+
1192
+ all_encoded_chunks = []
1193
+ for i, chunk in enumerate(chunks):
1194
+ try:
1195
+ # For zfec encoder.encode(), we must provide exactly k blocks
1196
+
1197
+ # Calculate how many bytes each sub-block should have
1198
+ sub_block_size = (
1199
+ len(chunk) + k - 1
1200
+ ) // k # ceiling division for even distribution
1201
+
1202
+ # Split the chunk into exactly k sub-blocks of equal size (padding as needed)
1203
+ sub_blocks = []
1204
+ for j in range(k):
1205
+ start = j * sub_block_size
1206
+ end = min(start + sub_block_size, len(chunk))
1207
+ sub_block = chunk[start:end]
1208
+
1209
+ # Pad if needed to make all sub-blocks the same size
1210
+ if len(sub_block) < sub_block_size:
1211
+ sub_block = sub_block.ljust(sub_block_size, b"\0")
1212
+
1213
+ sub_blocks.append(sub_block)
1214
+
1215
+ # Verify we have exactly k sub-blocks
1216
+ if len(sub_blocks) != k:
1217
+ raise ValueError(
1218
+ f"Expected {k} sub-blocks but got {len(sub_blocks)}"
1219
+ )
1220
+
1221
+ # Encode the k sub-blocks to create m encoded blocks
1222
+ encoder = zfec.Encoder(k, m)
1223
+ encoded_chunks = encoder.encode(sub_blocks)
1224
+
1225
+ # Add to our collection
1226
+ all_encoded_chunks.append(encoded_chunks)
1227
+
1228
+ if verbose and (i + 1) % 10 == 0:
1229
+ print(f" Encoded {i+1}/{len(chunks)} chunks")
1230
+ except Exception as e:
1231
+ # If encoding fails, provide more helpful error message
1232
+ error_msg = f"Error encoding chunk {i}: {str(e)}"
1233
+ print(f"Error details: chunk size={len(chunk)}, k={k}, m={m}")
1234
+ print(
1235
+ f"Sub-blocks created: {len(sub_blocks) if 'sub_blocks' in locals() else 'None'}"
1236
+ )
1237
+ raise RuntimeError(f"{error_msg}")
1238
+
1239
+ # Step 4: Upload all chunks to IPFS
1240
+ if verbose:
1241
+ print(f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS...")
1242
+
1243
+ chunk_uploads = 0
1244
+ chunk_data = []
1245
+
1246
+ # Create a temporary directory for the chunks
1247
+ with tempfile.TemporaryDirectory() as temp_dir:
1248
+ # Write and upload each encoded chunk
1249
+ for original_idx, encoded_chunks in enumerate(all_encoded_chunks):
1250
+ for share_idx, share_data in enumerate(encoded_chunks):
1251
+ # Create a name for this chunk that includes needed info
1252
+ chunk_name = f"{file_id}_chunk_{original_idx}_{share_idx}.ec"
1253
+ chunk_path = os.path.join(temp_dir, chunk_name)
1254
+
1255
+ # Write the chunk to a temp file
1256
+ with open(chunk_path, "wb") as f:
1257
+ f.write(share_data)
1258
+
1259
+ # Upload the chunk to IPFS
1260
+ try:
1261
+ chunk_cid = self.upload_file(
1262
+ chunk_path, max_retries=max_retries
1263
+ )
1264
+
1265
+ # Store info about this chunk
1266
+ chunk_info = {
1267
+ "name": chunk_name,
1268
+ "cid": chunk_cid,
1269
+ "original_chunk": original_idx,
1270
+ "share_idx": share_idx,
1271
+ "size": len(share_data),
1272
+ }
1273
+ chunk_data.append(chunk_info)
1274
+
1275
+ chunk_uploads += 1
1276
+ if verbose and chunk_uploads % 10 == 0:
1277
+ print(
1278
+ f" Uploaded {chunk_uploads}/{len(chunks) * m} chunks"
1279
+ )
1280
+ except Exception as e:
1281
+ print(f"Error uploading chunk {chunk_name}: {str(e)}")
1282
+
1283
+ # Add all chunk info to metadata
1284
+ metadata["chunks"] = chunk_data
1285
+
1286
+ # Step 5: Create and upload the metadata file
1287
+ metadata_path = os.path.join(temp_dir, f"{file_id}_metadata.json")
1288
+ with open(metadata_path, "w") as f:
1289
+ json.dump(metadata, f, indent=2)
1290
+
1291
+ if verbose:
1292
+ print(f"Uploading metadata file...")
1293
+
1294
+ # Upload the metadata file to IPFS
1295
+ metadata_cid_result = self.upload_file(
1296
+ metadata_path, max_retries=max_retries
1297
+ )
1298
+
1299
+ # Extract just the CID string from the result dictionary
1300
+ metadata_cid = metadata_cid_result["cid"]
1301
+ metadata["metadata_cid"] = metadata_cid
1302
+
1303
+ if verbose:
1304
+ print(f"Erasure coding complete!")
1305
+ print(f"Metadata CID: {metadata_cid}")
1306
+ print(f"Original file size: {file_size/1024/1024:.2f} MB")
1307
+ print(f"Total chunks: {len(chunks) * m}")
1308
+ print(f"Minimum chunks needed: {k * len(chunks)}")
1309
+
1310
+ return metadata
1311
+
1312
+ def reconstruct_from_erasure_code(
1313
+ self,
1314
+ metadata_cid: str,
1315
+ output_file: str,
1316
+ temp_dir: str = None,
1317
+ max_retries: int = 3,
1318
+ verbose: bool = True,
1319
+ ) -> str:
1320
+ """
1321
+ Reconstruct a file from erasure-coded chunks using its metadata.
1322
+
1323
+ Args:
1324
+ metadata_cid: IPFS CID of the metadata file
1325
+ output_file: Path where the reconstructed file should be saved
1326
+ temp_dir: Directory to use for temporary files (default: system temp)
1327
+ max_retries: Maximum number of retry attempts for IPFS downloads
1328
+ verbose: Whether to print progress information
1329
+
1330
+ Returns:
1331
+ str: Path to the reconstructed file
1332
+
1333
+ Raises:
1334
+ ValueError: If reconstruction fails
1335
+ RuntimeError: If not enough chunks can be downloaded
1336
+ """
1337
+ if not ERASURE_CODING_AVAILABLE:
1338
+ raise ValueError(
1339
+ "Erasure coding is not available. Install zfec: pip install zfec"
1340
+ )
1341
+
1342
+ # Start timing the reconstruction process
1343
+ start_time = time.time()
1344
+
1345
+ # Create a temporary directory if not provided
1346
+ if temp_dir is None:
1347
+ temp_dir_obj = tempfile.TemporaryDirectory()
1348
+ temp_dir = temp_dir_obj.name
1349
+ else:
1350
+ temp_dir_obj = None
1351
+
1352
+ try:
1353
+ # Step 1: Download and parse the metadata file
1354
+ if verbose:
1355
+ print(f"Downloading metadata file (CID: {metadata_cid})...")
1356
+
1357
+ metadata_path = os.path.join(temp_dir, "metadata.json")
1358
+ self.download_file(metadata_cid, metadata_path, max_retries=max_retries)
1359
+
1360
+ if verbose:
1361
+ metadata_download_time = time.time() - start_time
1362
+ print(f"Metadata downloaded in {metadata_download_time:.2f} seconds")
1363
+
1364
+ with open(metadata_path, "r") as f:
1365
+ metadata = json.load(f)
1366
+
1367
+ # Step 2: Extract key information
1368
+ original_file = metadata["original_file"]
1369
+ erasure_params = metadata["erasure_coding"]
1370
+ chunks_info = metadata["chunks"]
1371
+
1372
+ k = erasure_params["k"]
1373
+ m = erasure_params["m"]
1374
+ is_encrypted = erasure_params.get("encrypted", False)
1375
+ chunk_size = erasure_params.get("chunk_size", 1024 * 1024)
1376
+
1377
+ if verbose:
1378
+ print(
1379
+ f"File: {original_file['name']} ({original_file['size']/1024/1024:.2f} MB)"
1380
+ )
1381
+ print(
1382
+ f"Erasure coding parameters: k={k}, m={m} (need {k} of {m} chunks to reconstruct)"
1383
+ )
1384
+ if is_encrypted:
1385
+ print(f"Encrypted: Yes")
1386
+
1387
+ # Step 3: Group chunks by their original chunk index
1388
+ chunks_by_original = {}
1389
+ for chunk in chunks_info:
1390
+ orig_idx = chunk["original_chunk"]
1391
+ if orig_idx not in chunks_by_original:
1392
+ chunks_by_original[orig_idx] = []
1393
+ chunks_by_original[orig_idx].append(chunk)
1394
+
1395
+ # Step 4: For each original chunk, download at least k shares
1396
+ if verbose:
1397
+ total_original_chunks = len(chunks_by_original)
1398
+ total_chunks_to_download = total_original_chunks * k
1399
+ print(
1400
+ f"Downloading and reconstructing {total_chunks_to_download} chunks..."
1401
+ )
1402
+
1403
+ reconstructed_chunks = []
1404
+ chunks_downloaded = 0
1405
+ chunks_failed = 0
1406
+
1407
+ for orig_idx in sorted(chunks_by_original.keys()):
1408
+ available_chunks = chunks_by_original[orig_idx]
1409
+
1410
+ if len(available_chunks) < k:
1411
+ raise ValueError(
1412
+ f"Not enough chunks available for original chunk {orig_idx}. "
1413
+ f"Need {k}, but only have {len(available_chunks)}."
1414
+ )
1415
+
1416
+ # We only need k chunks, so take the first k
1417
+ chunks_to_download = available_chunks[:k]
1418
+
1419
+ # Download the chunks
1420
+ downloaded_shares = []
1421
+ share_indexes = []
1422
+
1423
+ for chunk in chunks_to_download:
1424
+ chunk_path = os.path.join(temp_dir, chunk["name"])
1425
+ try:
1426
+ # Extract the CID string from the chunk's cid dictionary
1427
+ chunk_cid = (
1428
+ chunk["cid"]["cid"]
1429
+ if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
1430
+ else chunk["cid"]
1431
+ )
1432
+ self.download_file(
1433
+ chunk_cid, chunk_path, max_retries=max_retries
1434
+ )
1435
+ chunks_downloaded += 1
1436
+
1437
+ # Read the chunk data
1438
+ with open(chunk_path, "rb") as f:
1439
+ share_data = f.read()
1440
+
1441
+ downloaded_shares.append(share_data)
1442
+ share_indexes.append(chunk["share_idx"])
1443
+
1444
+ except Exception as e:
1445
+ if verbose:
1446
+ print(f"Error downloading chunk {chunk['name']}: {str(e)}")
1447
+ chunks_failed += 1
1448
+ # Continue to the next chunk
1449
+
1450
+ # If we don't have enough chunks, try to download more
1451
+ if len(downloaded_shares) < k:
1452
+ raise ValueError(
1453
+ f"Failed to download enough chunks for original chunk {orig_idx}. "
1454
+ f"Need {k}, but only downloaded {len(downloaded_shares)}."
1455
+ )
1456
+
1457
+ # Reconstruct this chunk
1458
+ decoder = zfec.Decoder(k, m)
1459
+ reconstructed_data = decoder.decode(downloaded_shares, share_indexes)
1460
+
1461
+ # If we used the sub-block approach during encoding, we need to recombine the sub-blocks
1462
+ if isinstance(reconstructed_data, list):
1463
+ # Combine the sub-blocks back into a single chunk
1464
+ reconstructed_chunk = b"".join(reconstructed_data)
1465
+ else:
1466
+ # The simple case where we didn't use sub-blocks
1467
+ reconstructed_chunk = reconstructed_data
1468
+
1469
+ reconstructed_chunks.append(reconstructed_chunk)
1470
+
1471
+ # Print progress
1472
+ if verbose:
1473
+ progress_pct = (orig_idx + 1) / total_original_chunks * 100
1474
+ print(
1475
+ f" Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
1476
+ )
1477
+
1478
+ if verbose:
1479
+ download_time = time.time() - start_time
1480
+ print(
1481
+ f"Downloaded {chunks_downloaded} chunks in {download_time:.2f} seconds"
1482
+ )
1483
+ if chunks_failed > 0:
1484
+ print(
1485
+ f"Failed to download {chunks_failed} chunks (not needed for reconstruction)"
1486
+ )
1487
+
1488
+ # Step 5: Combine the reconstructed chunks into a file
1489
+ if verbose:
1490
+ print(f"Combining reconstructed chunks...")
1491
+
1492
+ # Concatenate all chunks
1493
+ file_data = b"".join(reconstructed_chunks)
1494
+
1495
+ # Remove padding from the last chunk
1496
+ if original_file["size"] < len(file_data):
1497
+ file_data = file_data[: original_file["size"]]
1498
+
1499
+ # Step 6: Decrypt if necessary
1500
+ if is_encrypted:
1501
+ if not self.encryption_available:
1502
+ raise ValueError(
1503
+ "File is encrypted but encryption is not available. "
1504
+ "Install PyNaCl and configure an encryption key."
1505
+ )
1506
+
1507
+ if verbose:
1508
+ print(f"Decrypting file data...")
1509
+
1510
+ file_data = self.decrypt_data(file_data)
1511
+
1512
+ # Step 7: Write to the output file
1513
+ with open(output_file, "wb") as f:
1514
+ f.write(file_data)
1515
+
1516
+ # Step 8: Verify hash if available
1517
+ if "hash" in original_file:
1518
+ actual_hash = hashlib.sha256(file_data).hexdigest()
1519
+ expected_hash = original_file["hash"]
1520
+
1521
+ if actual_hash != expected_hash:
1522
+ print(f"Warning: File hash mismatch!")
1523
+ print(f" Expected: {expected_hash}")
1524
+ print(f" Actual: {actual_hash}")
1525
+ elif verbose:
1526
+ print(f"Hash verification successful!")
1527
+
1528
+ total_time = time.time() - start_time
1529
+ if verbose:
1530
+ print(f"Reconstruction complete in {total_time:.2f} seconds!")
1531
+ print(f"File saved to: {output_file}")
1532
+
1533
+ return output_file
1534
+
1535
+ finally:
1536
+ # Clean up temporary directory if we created it
1537
+ if temp_dir_obj is not None:
1538
+ temp_dir_obj.cleanup()
1539
+
1540
+ def store_erasure_coded_file(
1541
+ self,
1542
+ file_path: str,
1543
+ k: int = 3,
1544
+ m: int = 5,
1545
+ chunk_size: int = 1024 * 1024, # 1MB chunks
1546
+ encrypt: Optional[bool] = None,
1547
+ miner_ids: List[str] = None,
1548
+ substrate_client=None,
1549
+ max_retries: int = 3,
1550
+ verbose: bool = True,
1551
+ ) -> Dict[str, Any]:
1552
+ """
1553
+ Erasure code a file, upload the chunks to IPFS, and store in the Hippius marketplace.
1554
+
1555
+ This is a convenience method that combines erasure_code_file with storage_request.
1556
+
1557
+ Args:
1558
+ file_path: Path to the file to upload
1559
+ k: Number of data chunks (minimum required to reconstruct)
1560
+ m: Total number of chunks (k + redundancy)
1561
+ chunk_size: Size of each chunk in bytes before encoding
1562
+ encrypt: Whether to encrypt the file before encoding
1563
+ miner_ids: List of specific miner IDs to use for storage
1564
+ substrate_client: SubstrateClient to use (or None to create one)
1565
+ max_retries: Maximum number of retry attempts
1566
+ verbose: Whether to print progress information
1567
+
1568
+ Returns:
1569
+ dict: Result including metadata CID and transaction hash
1570
+
1571
+ Raises:
1572
+ ValueError: If parameters are invalid
1573
+ RuntimeError: If processing fails
1574
+ """
1575
+ # Step 1: Erasure code the file and upload chunks
1576
+ metadata = self.erasure_code_file(
1577
+ file_path=file_path,
1578
+ k=k,
1579
+ m=m,
1580
+ chunk_size=chunk_size,
1581
+ encrypt=encrypt,
1582
+ max_retries=max_retries,
1583
+ verbose=verbose,
1584
+ )
1585
+
1586
+ # Step 2: Import substrate client if we need it
1587
+ if substrate_client is None:
1588
+ from hippius_sdk.substrate import SubstrateClient, FileInput
1589
+
1590
+ substrate_client = SubstrateClient()
1591
+ else:
1592
+ # Just get the FileInput class
1593
+ from hippius_sdk.substrate import FileInput
1594
+
1595
+ original_file = metadata["original_file"]
1596
+ metadata_cid = metadata["metadata_cid"]
1597
+
1598
+ # Create a list to hold all the file inputs (metadata + all chunks)
1599
+ all_file_inputs = []
1600
+
1601
+ # Step 3: Prepare metadata file for storage
1602
+ if verbose:
1603
+ print(
1604
+ f"Preparing to store metadata and {len(metadata['chunks'])} chunks in the Hippius marketplace..."
1605
+ )
1606
+
1607
+ # Create a file input for the metadata file
1608
+ metadata_file_input = FileInput(
1609
+ file_hash=metadata_cid, file_name=f"{original_file['name']}.ec_metadata"
1610
+ )
1611
+ all_file_inputs.append(metadata_file_input)
1612
+
1613
+ # Step 4: Add all chunks to the storage request
1614
+ if verbose:
1615
+ print(f"Adding all chunks to storage request...")
1616
+
1617
+ for i, chunk in enumerate(metadata["chunks"]):
1618
+ # Extract the CID string from the chunk's cid dictionary
1619
+ chunk_cid = (
1620
+ chunk["cid"]["cid"]
1621
+ if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
1622
+ else chunk["cid"]
1623
+ )
1624
+ chunk_file_input = FileInput(file_hash=chunk_cid, file_name=chunk["name"])
1625
+ all_file_inputs.append(chunk_file_input)
1626
+
1627
+ # Print progress for large numbers of chunks
1628
+ if verbose and (i + 1) % 50 == 0:
1629
+ print(
1630
+ f" Prepared {i + 1}/{len(metadata['chunks'])} chunks for storage"
1631
+ )
1632
+
1633
+ # Step 5: Submit the storage request for all files
1634
+ try:
1635
+ if verbose:
1636
+ print(
1637
+ f"Submitting storage request for 1 metadata file and {len(metadata['chunks'])} chunks..."
1638
+ )
1639
+
1640
+ tx_hash = substrate_client.storage_request(
1641
+ files=all_file_inputs, miner_ids=miner_ids
1642
+ )
1643
+
1644
+ if verbose:
1645
+ print(f"Successfully stored all files in marketplace!")
1646
+ print(f"Transaction hash: {tx_hash}")
1647
+ print(f"Metadata CID: {metadata_cid}")
1648
+ print(
1649
+ f"Total files stored: {len(all_file_inputs)} (1 metadata + {len(metadata['chunks'])} chunks)"
1650
+ )
1651
+
1652
+ return {
1653
+ "metadata": metadata,
1654
+ "metadata_cid": metadata_cid,
1655
+ "transaction_hash": tx_hash,
1656
+ "total_files_stored": len(all_file_inputs),
1657
+ }
1658
+
1659
+ except Exception as e:
1660
+ print(f"Error storing files in marketplace: {str(e)}")
1661
+ # Return the metadata even if storage fails
1662
+ return {"metadata": metadata, "metadata_cid": metadata_cid, "error": str(e)}