hippius 0.1.0__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hippius-0.1.0.dist-info → hippius-0.1.7.dist-info}/METADATA +386 -4
- hippius-0.1.7.dist-info/RECORD +10 -0
- hippius_sdk/__init__.py +45 -1
- hippius_sdk/cli.py +1509 -35
- hippius_sdk/client.py +187 -22
- hippius_sdk/config.py +744 -0
- hippius_sdk/ipfs.py +719 -42
- hippius_sdk/substrate.py +130 -68
- hippius-0.1.0.dist-info/RECORD +0 -9
- {hippius-0.1.0.dist-info → hippius-0.1.7.dist-info}/WHEEL +0 -0
- {hippius-0.1.0.dist-info → hippius-0.1.7.dist-info}/entry_points.txt +0 -0
hippius_sdk/ipfs.py
CHANGED
@@ -8,9 +8,15 @@ import requests
|
|
8
8
|
import base64
|
9
9
|
import time
|
10
10
|
import tempfile
|
11
|
-
|
11
|
+
import hashlib
|
12
|
+
import uuid
|
13
|
+
from typing import Dict, Any, Optional, Union, List, Tuple
|
12
14
|
import ipfshttpclient
|
13
15
|
from dotenv import load_dotenv
|
16
|
+
from hippius_sdk.config import (
|
17
|
+
get_config_value,
|
18
|
+
get_encryption_key,
|
19
|
+
)
|
14
20
|
|
15
21
|
# Import PyNaCl for encryption
|
16
22
|
try:
|
@@ -21,14 +27,22 @@ try:
|
|
21
27
|
except ImportError:
|
22
28
|
ENCRYPTION_AVAILABLE = False
|
23
29
|
|
30
|
+
# Import zfec for erasure coding
|
31
|
+
try:
|
32
|
+
import zfec
|
33
|
+
|
34
|
+
ERASURE_CODING_AVAILABLE = True
|
35
|
+
except ImportError:
|
36
|
+
ERASURE_CODING_AVAILABLE = False
|
37
|
+
|
24
38
|
|
25
39
|
class IPFSClient:
|
26
40
|
"""Client for interacting with IPFS."""
|
27
41
|
|
28
42
|
def __init__(
|
29
43
|
self,
|
30
|
-
gateway: str =
|
31
|
-
api_url: Optional[str] =
|
44
|
+
gateway: Optional[str] = None,
|
45
|
+
api_url: Optional[str] = None,
|
32
46
|
encrypt_by_default: Optional[bool] = None,
|
33
47
|
encryption_key: Optional[bytes] = None,
|
34
48
|
):
|
@@ -36,12 +50,25 @@ class IPFSClient:
|
|
36
50
|
Initialize the IPFS client.
|
37
51
|
|
38
52
|
Args:
|
39
|
-
gateway: IPFS gateway URL for downloading content
|
40
|
-
api_url: IPFS API URL for uploading content
|
53
|
+
gateway: IPFS gateway URL for downloading content (from config if None)
|
54
|
+
api_url: IPFS API URL for uploading content (from config if None)
|
41
55
|
Set to None to try to connect to a local IPFS daemon.
|
42
|
-
encrypt_by_default: Whether to encrypt files by default (from
|
43
|
-
encryption_key: Encryption key for NaCl secretbox (from
|
56
|
+
encrypt_by_default: Whether to encrypt files by default (from config if None)
|
57
|
+
encryption_key: Encryption key for NaCl secretbox (from config if None)
|
44
58
|
"""
|
59
|
+
# Load configuration values if not explicitly provided
|
60
|
+
if gateway is None:
|
61
|
+
gateway = get_config_value("ipfs", "gateway", "https://ipfs.io")
|
62
|
+
|
63
|
+
if api_url is None:
|
64
|
+
api_url = get_config_value(
|
65
|
+
"ipfs", "api_url", "https://relay-fr.hippius.network"
|
66
|
+
)
|
67
|
+
|
68
|
+
# Check if local IPFS is enabled in config
|
69
|
+
if get_config_value("ipfs", "local_ipfs", False):
|
70
|
+
api_url = "http://localhost:5001"
|
71
|
+
|
45
72
|
self.gateway = gateway.rstrip("/")
|
46
73
|
self.api_url = api_url
|
47
74
|
self.client = None
|
@@ -82,7 +109,7 @@ class IPFSClient:
|
|
82
109
|
def _initialize_encryption(
|
83
110
|
self, encrypt_by_default: Optional[bool], encryption_key: Optional[bytes]
|
84
111
|
):
|
85
|
-
"""Initialize encryption settings from parameters or .
|
112
|
+
"""Initialize encryption settings from parameters or configuration."""
|
86
113
|
# Check if encryption is available
|
87
114
|
if not ENCRYPTION_AVAILABLE:
|
88
115
|
self.encryption_available = False
|
@@ -90,31 +117,19 @@ class IPFSClient:
|
|
90
117
|
self.encryption_key = None
|
91
118
|
return
|
92
119
|
|
93
|
-
#
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
self.encrypt_by_default = env_default in ("true", "1", "yes")
|
120
|
+
# Set up encryption default from parameter or config
|
121
|
+
if encrypt_by_default is None:
|
122
|
+
self.encrypt_by_default = get_config_value(
|
123
|
+
"encryption", "encrypt_by_default", False
|
124
|
+
)
|
125
|
+
else:
|
126
|
+
self.encrypt_by_default = encrypt_by_default
|
101
127
|
|
102
|
-
# Set up encryption key from parameter or
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
try:
|
108
|
-
self.encryption_key = base64.b64decode(env_key)
|
109
|
-
# Validate key length
|
110
|
-
if len(self.encryption_key) != nacl.secret.SecretBox.KEY_SIZE:
|
111
|
-
print(
|
112
|
-
f"Warning: Encryption key from .env has incorrect length. Expected {nacl.secret.SecretBox.KEY_SIZE} bytes, got {len(self.encryption_key)} bytes."
|
113
|
-
)
|
114
|
-
self.encryption_key = None
|
115
|
-
except Exception as e:
|
116
|
-
print(f"Warning: Failed to decode encryption key from .env: {e}")
|
117
|
-
self.encryption_key = None
|
128
|
+
# Set up encryption key from parameter or config
|
129
|
+
if encryption_key is None:
|
130
|
+
self.encryption_key = get_encryption_key()
|
131
|
+
else:
|
132
|
+
self.encryption_key = encryption_key
|
118
133
|
|
119
134
|
# Check if we have a valid key and can encrypt
|
120
135
|
self.encryption_available = (
|
@@ -288,6 +303,7 @@ class IPFSClient:
|
|
288
303
|
file_path: str,
|
289
304
|
include_formatted_size: bool = True,
|
290
305
|
encrypt: Optional[bool] = None,
|
306
|
+
max_retries: int = 3,
|
291
307
|
) -> Dict[str, Any]:
|
292
308
|
"""
|
293
309
|
Upload a file to IPFS with optional encryption.
|
@@ -296,6 +312,7 @@ class IPFSClient:
|
|
296
312
|
file_path: Path to the file to upload
|
297
313
|
include_formatted_size: Whether to include formatted size in the result (default: True)
|
298
314
|
encrypt: Whether to encrypt the file (overrides default)
|
315
|
+
max_retries: Maximum number of retry attempts (default: 3)
|
299
316
|
|
300
317
|
Returns:
|
301
318
|
Dict[str, Any]: Dictionary containing:
|
@@ -355,7 +372,7 @@ class IPFSClient:
|
|
355
372
|
cid = result["Hash"]
|
356
373
|
elif self.base_url:
|
357
374
|
# Fallback to using HTTP API
|
358
|
-
cid = self._upload_via_http_api(upload_path)
|
375
|
+
cid = self._upload_via_http_api(upload_path, max_retries=max_retries)
|
359
376
|
else:
|
360
377
|
# No connection or API URL available
|
361
378
|
raise ConnectionError(
|
@@ -714,7 +731,11 @@ class IPFSClient:
|
|
714
731
|
return cid
|
715
732
|
|
716
733
|
def download_file(
|
717
|
-
self,
|
734
|
+
self,
|
735
|
+
cid: str,
|
736
|
+
output_path: str,
|
737
|
+
decrypt: Optional[bool] = None,
|
738
|
+
max_retries: int = 3,
|
718
739
|
) -> Dict[str, Any]:
|
719
740
|
"""
|
720
741
|
Download a file from IPFS with optional decryption.
|
@@ -723,6 +744,7 @@ class IPFSClient:
|
|
723
744
|
cid: Content Identifier (CID) of the file to download
|
724
745
|
output_path: Path where the downloaded file will be saved
|
725
746
|
decrypt: Whether to decrypt the file (overrides default)
|
747
|
+
max_retries: Maximum number of retry attempts (default: 3)
|
726
748
|
|
727
749
|
Returns:
|
728
750
|
Dict[str, Any]: Dictionary containing download results:
|
@@ -760,16 +782,41 @@ class IPFSClient:
|
|
760
782
|
else:
|
761
783
|
download_path = output_path
|
762
784
|
|
763
|
-
# Download the file
|
764
|
-
|
765
|
-
|
766
|
-
|
785
|
+
# Download the file with retry logic
|
786
|
+
retries = 0
|
787
|
+
last_error = None
|
788
|
+
|
789
|
+
while retries < max_retries:
|
790
|
+
try:
|
791
|
+
# Download the file
|
792
|
+
url = f"{self.gateway}/ipfs/{cid}"
|
793
|
+
response = requests.get(url, stream=True)
|
794
|
+
response.raise_for_status()
|
795
|
+
|
796
|
+
os.makedirs(
|
797
|
+
os.path.dirname(os.path.abspath(download_path)), exist_ok=True
|
798
|
+
)
|
799
|
+
|
800
|
+
with open(download_path, "wb") as f:
|
801
|
+
for chunk in response.iter_content(chunk_size=8192):
|
802
|
+
f.write(chunk)
|
767
803
|
|
768
|
-
|
804
|
+
# If we reach here, download was successful
|
805
|
+
break
|
769
806
|
|
770
|
-
|
771
|
-
|
772
|
-
|
807
|
+
except (requests.exceptions.RequestException, IOError) as e:
|
808
|
+
# Save the error and retry
|
809
|
+
last_error = e
|
810
|
+
retries += 1
|
811
|
+
|
812
|
+
if retries < max_retries:
|
813
|
+
wait_time = 2**retries # Exponential backoff: 2, 4, 8 seconds
|
814
|
+
print(f"Download attempt {retries} failed: {str(e)}")
|
815
|
+
print(f"Retrying in {wait_time} seconds...")
|
816
|
+
time.sleep(wait_time)
|
817
|
+
else:
|
818
|
+
# Raise the last error if we've exhausted all retries
|
819
|
+
raise
|
773
820
|
|
774
821
|
# Decrypt if needed
|
775
822
|
if should_decrypt:
|
@@ -983,3 +1030,633 @@ class IPFSClient:
|
|
983
1030
|
"formatted_cid": formatted_cid,
|
984
1031
|
"message": message,
|
985
1032
|
}
|
1033
|
+
|
1034
|
+
def erasure_code_file(
|
1035
|
+
self,
|
1036
|
+
file_path: str,
|
1037
|
+
k: int = 3,
|
1038
|
+
m: int = 5,
|
1039
|
+
chunk_size: int = 1024 * 1024, # 1MB chunks
|
1040
|
+
encrypt: Optional[bool] = None,
|
1041
|
+
max_retries: int = 3,
|
1042
|
+
verbose: bool = True,
|
1043
|
+
) -> Dict[str, Any]:
|
1044
|
+
"""
|
1045
|
+
Split a file using erasure coding, then upload the chunks to IPFS.
|
1046
|
+
|
1047
|
+
This implements an (m, k) Reed-Solomon code where:
|
1048
|
+
- m = total number of chunks
|
1049
|
+
- k = minimum chunks needed to reconstruct the file (k <= m)
|
1050
|
+
- The file can be reconstructed from any k of the m chunks
|
1051
|
+
|
1052
|
+
Args:
|
1053
|
+
file_path: Path to the file to upload
|
1054
|
+
k: Number of data chunks (minimum required to reconstruct)
|
1055
|
+
m: Total number of chunks (k + redundancy)
|
1056
|
+
chunk_size: Size of each chunk in bytes before encoding
|
1057
|
+
encrypt: Whether to encrypt the file before encoding (defaults to self.encrypt_by_default)
|
1058
|
+
max_retries: Maximum number of retry attempts for IPFS uploads
|
1059
|
+
verbose: Whether to print progress information
|
1060
|
+
|
1061
|
+
Returns:
|
1062
|
+
dict: Metadata including the original file info and chunk information
|
1063
|
+
|
1064
|
+
Raises:
|
1065
|
+
ValueError: If erasure coding is not available or parameters are invalid
|
1066
|
+
RuntimeError: If chunk uploads fail
|
1067
|
+
"""
|
1068
|
+
if not ERASURE_CODING_AVAILABLE:
|
1069
|
+
raise ValueError(
|
1070
|
+
"Erasure coding is not available. Install zfec: pip install zfec"
|
1071
|
+
)
|
1072
|
+
|
1073
|
+
if k >= m:
|
1074
|
+
raise ValueError(
|
1075
|
+
f"Invalid erasure coding parameters: k ({k}) must be less than m ({m})"
|
1076
|
+
)
|
1077
|
+
|
1078
|
+
# Get original file info
|
1079
|
+
file_name = os.path.basename(file_path)
|
1080
|
+
file_size = os.path.getsize(file_path)
|
1081
|
+
file_extension = os.path.splitext(file_name)[1]
|
1082
|
+
|
1083
|
+
# Determine if encryption should be used
|
1084
|
+
should_encrypt = self.encrypt_by_default if encrypt is None else encrypt
|
1085
|
+
|
1086
|
+
if should_encrypt and not self.encryption_available:
|
1087
|
+
raise ValueError(
|
1088
|
+
"Encryption requested but not available. Install PyNaCl and configure an encryption key."
|
1089
|
+
)
|
1090
|
+
|
1091
|
+
# Generate a unique ID for this file
|
1092
|
+
file_id = str(uuid.uuid4())
|
1093
|
+
|
1094
|
+
if verbose:
|
1095
|
+
print(f"Processing file: {file_name} ({file_size/1024/1024:.2f} MB)")
|
1096
|
+
print(
|
1097
|
+
f"Erasure coding parameters: k={k}, m={m} (need {k}/{m} chunks to reconstruct)"
|
1098
|
+
)
|
1099
|
+
if should_encrypt:
|
1100
|
+
print("Encryption: Enabled")
|
1101
|
+
|
1102
|
+
# Step 1: Read and potentially encrypt the file
|
1103
|
+
with open(file_path, "rb") as f:
|
1104
|
+
file_data = f.read()
|
1105
|
+
|
1106
|
+
# Calculate original file hash
|
1107
|
+
original_file_hash = hashlib.sha256(file_data).hexdigest()
|
1108
|
+
|
1109
|
+
# Encrypt if requested
|
1110
|
+
if should_encrypt:
|
1111
|
+
if verbose:
|
1112
|
+
print("Encrypting file data...")
|
1113
|
+
file_data = self.encrypt_data(file_data)
|
1114
|
+
|
1115
|
+
# Step 2: Split the file into chunks for erasure coding
|
1116
|
+
chunks = []
|
1117
|
+
chunk_positions = []
|
1118
|
+
for i in range(0, len(file_data), chunk_size):
|
1119
|
+
chunk = file_data[i : i + chunk_size]
|
1120
|
+
chunks.append(chunk)
|
1121
|
+
chunk_positions.append(i)
|
1122
|
+
|
1123
|
+
# Pad the last chunk if necessary
|
1124
|
+
if chunks and len(chunks[-1]) < chunk_size:
|
1125
|
+
pad_size = chunk_size - len(chunks[-1])
|
1126
|
+
chunks[-1] = chunks[-1] + b"\0" * pad_size
|
1127
|
+
|
1128
|
+
# If we don't have enough chunks for the requested parameters, adjust
|
1129
|
+
if len(chunks) < k:
|
1130
|
+
if verbose:
|
1131
|
+
print(
|
1132
|
+
f"Warning: File has fewer chunks ({len(chunks)}) than k={k}. Adjusting parameters."
|
1133
|
+
)
|
1134
|
+
|
1135
|
+
# If we have a very small file, we'll just use a single chunk
|
1136
|
+
# but will still split it into k sub-blocks during encoding
|
1137
|
+
if len(chunks) == 1:
|
1138
|
+
if verbose:
|
1139
|
+
print(
|
1140
|
+
f"Small file (single chunk): will split into {k} sub-blocks for encoding"
|
1141
|
+
)
|
1142
|
+
else:
|
1143
|
+
# If we have multiple chunks but fewer than k, adjust k to match
|
1144
|
+
old_k = k
|
1145
|
+
k = max(1, len(chunks))
|
1146
|
+
if verbose:
|
1147
|
+
print(f"Adjusting k from {old_k} to {k} to match available chunks")
|
1148
|
+
|
1149
|
+
# Ensure m is greater than k for redundancy
|
1150
|
+
if m <= k:
|
1151
|
+
old_m = m
|
1152
|
+
m = k + 2 # Ensure we have at least 2 redundant chunks
|
1153
|
+
if verbose:
|
1154
|
+
print(f"Adjusting m from {old_m} to {m} to ensure redundancy")
|
1155
|
+
|
1156
|
+
if verbose:
|
1157
|
+
print(f"New parameters: k={k}, m={m}")
|
1158
|
+
|
1159
|
+
# Ensure we have at least one chunk to process
|
1160
|
+
if not chunks:
|
1161
|
+
raise ValueError("File is empty or too small to process")
|
1162
|
+
|
1163
|
+
# For k=1 case, ensure we have proper sized input for zfec
|
1164
|
+
if k == 1 and len(chunks) == 1:
|
1165
|
+
# zfec expects the input to be exactly chunk_size for k=1
|
1166
|
+
# So we need to pad if shorter or truncate if longer
|
1167
|
+
if len(chunks[0]) != chunk_size:
|
1168
|
+
chunks[0] = chunks[0].ljust(chunk_size, b"\0")[:chunk_size]
|
1169
|
+
|
1170
|
+
# Create metadata
|
1171
|
+
metadata = {
|
1172
|
+
"original_file": {
|
1173
|
+
"name": file_name,
|
1174
|
+
"size": file_size,
|
1175
|
+
"hash": original_file_hash,
|
1176
|
+
"extension": file_extension,
|
1177
|
+
},
|
1178
|
+
"erasure_coding": {
|
1179
|
+
"k": k,
|
1180
|
+
"m": m,
|
1181
|
+
"chunk_size": chunk_size,
|
1182
|
+
"encrypted": should_encrypt,
|
1183
|
+
"file_id": file_id,
|
1184
|
+
},
|
1185
|
+
"chunks": [],
|
1186
|
+
}
|
1187
|
+
|
1188
|
+
# Step 3: Apply erasure coding to each chunk
|
1189
|
+
if verbose:
|
1190
|
+
print(f"Applying erasure coding to {len(chunks)} chunks...")
|
1191
|
+
|
1192
|
+
all_encoded_chunks = []
|
1193
|
+
for i, chunk in enumerate(chunks):
|
1194
|
+
try:
|
1195
|
+
# For zfec encoder.encode(), we must provide exactly k blocks
|
1196
|
+
|
1197
|
+
# Calculate how many bytes each sub-block should have
|
1198
|
+
sub_block_size = (
|
1199
|
+
len(chunk) + k - 1
|
1200
|
+
) // k # ceiling division for even distribution
|
1201
|
+
|
1202
|
+
# Split the chunk into exactly k sub-blocks of equal size (padding as needed)
|
1203
|
+
sub_blocks = []
|
1204
|
+
for j in range(k):
|
1205
|
+
start = j * sub_block_size
|
1206
|
+
end = min(start + sub_block_size, len(chunk))
|
1207
|
+
sub_block = chunk[start:end]
|
1208
|
+
|
1209
|
+
# Pad if needed to make all sub-blocks the same size
|
1210
|
+
if len(sub_block) < sub_block_size:
|
1211
|
+
sub_block = sub_block.ljust(sub_block_size, b"\0")
|
1212
|
+
|
1213
|
+
sub_blocks.append(sub_block)
|
1214
|
+
|
1215
|
+
# Verify we have exactly k sub-blocks
|
1216
|
+
if len(sub_blocks) != k:
|
1217
|
+
raise ValueError(
|
1218
|
+
f"Expected {k} sub-blocks but got {len(sub_blocks)}"
|
1219
|
+
)
|
1220
|
+
|
1221
|
+
# Encode the k sub-blocks to create m encoded blocks
|
1222
|
+
encoder = zfec.Encoder(k, m)
|
1223
|
+
encoded_chunks = encoder.encode(sub_blocks)
|
1224
|
+
|
1225
|
+
# Add to our collection
|
1226
|
+
all_encoded_chunks.append(encoded_chunks)
|
1227
|
+
|
1228
|
+
if verbose and (i + 1) % 10 == 0:
|
1229
|
+
print(f" Encoded {i+1}/{len(chunks)} chunks")
|
1230
|
+
except Exception as e:
|
1231
|
+
# If encoding fails, provide more helpful error message
|
1232
|
+
error_msg = f"Error encoding chunk {i}: {str(e)}"
|
1233
|
+
print(f"Error details: chunk size={len(chunk)}, k={k}, m={m}")
|
1234
|
+
print(
|
1235
|
+
f"Sub-blocks created: {len(sub_blocks) if 'sub_blocks' in locals() else 'None'}"
|
1236
|
+
)
|
1237
|
+
raise RuntimeError(f"{error_msg}")
|
1238
|
+
|
1239
|
+
# Step 4: Upload all chunks to IPFS
|
1240
|
+
if verbose:
|
1241
|
+
print(f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS...")
|
1242
|
+
|
1243
|
+
chunk_uploads = 0
|
1244
|
+
chunk_data = []
|
1245
|
+
|
1246
|
+
# Create a temporary directory for the chunks
|
1247
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
1248
|
+
# Write and upload each encoded chunk
|
1249
|
+
for original_idx, encoded_chunks in enumerate(all_encoded_chunks):
|
1250
|
+
for share_idx, share_data in enumerate(encoded_chunks):
|
1251
|
+
# Create a name for this chunk that includes needed info
|
1252
|
+
chunk_name = f"{file_id}_chunk_{original_idx}_{share_idx}.ec"
|
1253
|
+
chunk_path = os.path.join(temp_dir, chunk_name)
|
1254
|
+
|
1255
|
+
# Write the chunk to a temp file
|
1256
|
+
with open(chunk_path, "wb") as f:
|
1257
|
+
f.write(share_data)
|
1258
|
+
|
1259
|
+
# Upload the chunk to IPFS
|
1260
|
+
try:
|
1261
|
+
chunk_cid = self.upload_file(
|
1262
|
+
chunk_path, max_retries=max_retries
|
1263
|
+
)
|
1264
|
+
|
1265
|
+
# Store info about this chunk
|
1266
|
+
chunk_info = {
|
1267
|
+
"name": chunk_name,
|
1268
|
+
"cid": chunk_cid,
|
1269
|
+
"original_chunk": original_idx,
|
1270
|
+
"share_idx": share_idx,
|
1271
|
+
"size": len(share_data),
|
1272
|
+
}
|
1273
|
+
chunk_data.append(chunk_info)
|
1274
|
+
|
1275
|
+
chunk_uploads += 1
|
1276
|
+
if verbose and chunk_uploads % 10 == 0:
|
1277
|
+
print(
|
1278
|
+
f" Uploaded {chunk_uploads}/{len(chunks) * m} chunks"
|
1279
|
+
)
|
1280
|
+
except Exception as e:
|
1281
|
+
print(f"Error uploading chunk {chunk_name}: {str(e)}")
|
1282
|
+
|
1283
|
+
# Add all chunk info to metadata
|
1284
|
+
metadata["chunks"] = chunk_data
|
1285
|
+
|
1286
|
+
# Step 5: Create and upload the metadata file
|
1287
|
+
metadata_path = os.path.join(temp_dir, f"{file_id}_metadata.json")
|
1288
|
+
with open(metadata_path, "w") as f:
|
1289
|
+
json.dump(metadata, f, indent=2)
|
1290
|
+
|
1291
|
+
if verbose:
|
1292
|
+
print(f"Uploading metadata file...")
|
1293
|
+
|
1294
|
+
# Upload the metadata file to IPFS
|
1295
|
+
metadata_cid_result = self.upload_file(
|
1296
|
+
metadata_path, max_retries=max_retries
|
1297
|
+
)
|
1298
|
+
|
1299
|
+
# Extract just the CID string from the result dictionary
|
1300
|
+
metadata_cid = metadata_cid_result["cid"]
|
1301
|
+
metadata["metadata_cid"] = metadata_cid
|
1302
|
+
|
1303
|
+
if verbose:
|
1304
|
+
print(f"Erasure coding complete!")
|
1305
|
+
print(f"Metadata CID: {metadata_cid}")
|
1306
|
+
print(f"Original file size: {file_size/1024/1024:.2f} MB")
|
1307
|
+
print(f"Total chunks: {len(chunks) * m}")
|
1308
|
+
print(f"Minimum chunks needed: {k * len(chunks)}")
|
1309
|
+
|
1310
|
+
return metadata
|
1311
|
+
|
1312
|
+
def reconstruct_from_erasure_code(
|
1313
|
+
self,
|
1314
|
+
metadata_cid: str,
|
1315
|
+
output_file: str,
|
1316
|
+
temp_dir: str = None,
|
1317
|
+
max_retries: int = 3,
|
1318
|
+
verbose: bool = True,
|
1319
|
+
) -> str:
|
1320
|
+
"""
|
1321
|
+
Reconstruct a file from erasure-coded chunks using its metadata.
|
1322
|
+
|
1323
|
+
Args:
|
1324
|
+
metadata_cid: IPFS CID of the metadata file
|
1325
|
+
output_file: Path where the reconstructed file should be saved
|
1326
|
+
temp_dir: Directory to use for temporary files (default: system temp)
|
1327
|
+
max_retries: Maximum number of retry attempts for IPFS downloads
|
1328
|
+
verbose: Whether to print progress information
|
1329
|
+
|
1330
|
+
Returns:
|
1331
|
+
str: Path to the reconstructed file
|
1332
|
+
|
1333
|
+
Raises:
|
1334
|
+
ValueError: If reconstruction fails
|
1335
|
+
RuntimeError: If not enough chunks can be downloaded
|
1336
|
+
"""
|
1337
|
+
if not ERASURE_CODING_AVAILABLE:
|
1338
|
+
raise ValueError(
|
1339
|
+
"Erasure coding is not available. Install zfec: pip install zfec"
|
1340
|
+
)
|
1341
|
+
|
1342
|
+
# Start timing the reconstruction process
|
1343
|
+
start_time = time.time()
|
1344
|
+
|
1345
|
+
# Create a temporary directory if not provided
|
1346
|
+
if temp_dir is None:
|
1347
|
+
temp_dir_obj = tempfile.TemporaryDirectory()
|
1348
|
+
temp_dir = temp_dir_obj.name
|
1349
|
+
else:
|
1350
|
+
temp_dir_obj = None
|
1351
|
+
|
1352
|
+
try:
|
1353
|
+
# Step 1: Download and parse the metadata file
|
1354
|
+
if verbose:
|
1355
|
+
print(f"Downloading metadata file (CID: {metadata_cid})...")
|
1356
|
+
|
1357
|
+
metadata_path = os.path.join(temp_dir, "metadata.json")
|
1358
|
+
self.download_file(metadata_cid, metadata_path, max_retries=max_retries)
|
1359
|
+
|
1360
|
+
if verbose:
|
1361
|
+
metadata_download_time = time.time() - start_time
|
1362
|
+
print(f"Metadata downloaded in {metadata_download_time:.2f} seconds")
|
1363
|
+
|
1364
|
+
with open(metadata_path, "r") as f:
|
1365
|
+
metadata = json.load(f)
|
1366
|
+
|
1367
|
+
# Step 2: Extract key information
|
1368
|
+
original_file = metadata["original_file"]
|
1369
|
+
erasure_params = metadata["erasure_coding"]
|
1370
|
+
chunks_info = metadata["chunks"]
|
1371
|
+
|
1372
|
+
k = erasure_params["k"]
|
1373
|
+
m = erasure_params["m"]
|
1374
|
+
is_encrypted = erasure_params.get("encrypted", False)
|
1375
|
+
chunk_size = erasure_params.get("chunk_size", 1024 * 1024)
|
1376
|
+
|
1377
|
+
if verbose:
|
1378
|
+
print(
|
1379
|
+
f"File: {original_file['name']} ({original_file['size']/1024/1024:.2f} MB)"
|
1380
|
+
)
|
1381
|
+
print(
|
1382
|
+
f"Erasure coding parameters: k={k}, m={m} (need {k} of {m} chunks to reconstruct)"
|
1383
|
+
)
|
1384
|
+
if is_encrypted:
|
1385
|
+
print(f"Encrypted: Yes")
|
1386
|
+
|
1387
|
+
# Step 3: Group chunks by their original chunk index
|
1388
|
+
chunks_by_original = {}
|
1389
|
+
for chunk in chunks_info:
|
1390
|
+
orig_idx = chunk["original_chunk"]
|
1391
|
+
if orig_idx not in chunks_by_original:
|
1392
|
+
chunks_by_original[orig_idx] = []
|
1393
|
+
chunks_by_original[orig_idx].append(chunk)
|
1394
|
+
|
1395
|
+
# Step 4: For each original chunk, download at least k shares
|
1396
|
+
if verbose:
|
1397
|
+
total_original_chunks = len(chunks_by_original)
|
1398
|
+
total_chunks_to_download = total_original_chunks * k
|
1399
|
+
print(
|
1400
|
+
f"Downloading and reconstructing {total_chunks_to_download} chunks..."
|
1401
|
+
)
|
1402
|
+
|
1403
|
+
reconstructed_chunks = []
|
1404
|
+
chunks_downloaded = 0
|
1405
|
+
chunks_failed = 0
|
1406
|
+
|
1407
|
+
for orig_idx in sorted(chunks_by_original.keys()):
|
1408
|
+
available_chunks = chunks_by_original[orig_idx]
|
1409
|
+
|
1410
|
+
if len(available_chunks) < k:
|
1411
|
+
raise ValueError(
|
1412
|
+
f"Not enough chunks available for original chunk {orig_idx}. "
|
1413
|
+
f"Need {k}, but only have {len(available_chunks)}."
|
1414
|
+
)
|
1415
|
+
|
1416
|
+
# We only need k chunks, so take the first k
|
1417
|
+
chunks_to_download = available_chunks[:k]
|
1418
|
+
|
1419
|
+
# Download the chunks
|
1420
|
+
downloaded_shares = []
|
1421
|
+
share_indexes = []
|
1422
|
+
|
1423
|
+
for chunk in chunks_to_download:
|
1424
|
+
chunk_path = os.path.join(temp_dir, chunk["name"])
|
1425
|
+
try:
|
1426
|
+
# Extract the CID string from the chunk's cid dictionary
|
1427
|
+
chunk_cid = (
|
1428
|
+
chunk["cid"]["cid"]
|
1429
|
+
if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
|
1430
|
+
else chunk["cid"]
|
1431
|
+
)
|
1432
|
+
self.download_file(
|
1433
|
+
chunk_cid, chunk_path, max_retries=max_retries
|
1434
|
+
)
|
1435
|
+
chunks_downloaded += 1
|
1436
|
+
|
1437
|
+
# Read the chunk data
|
1438
|
+
with open(chunk_path, "rb") as f:
|
1439
|
+
share_data = f.read()
|
1440
|
+
|
1441
|
+
downloaded_shares.append(share_data)
|
1442
|
+
share_indexes.append(chunk["share_idx"])
|
1443
|
+
|
1444
|
+
except Exception as e:
|
1445
|
+
if verbose:
|
1446
|
+
print(f"Error downloading chunk {chunk['name']}: {str(e)}")
|
1447
|
+
chunks_failed += 1
|
1448
|
+
# Continue to the next chunk
|
1449
|
+
|
1450
|
+
# If we don't have enough chunks, try to download more
|
1451
|
+
if len(downloaded_shares) < k:
|
1452
|
+
raise ValueError(
|
1453
|
+
f"Failed to download enough chunks for original chunk {orig_idx}. "
|
1454
|
+
f"Need {k}, but only downloaded {len(downloaded_shares)}."
|
1455
|
+
)
|
1456
|
+
|
1457
|
+
# Reconstruct this chunk
|
1458
|
+
decoder = zfec.Decoder(k, m)
|
1459
|
+
reconstructed_data = decoder.decode(downloaded_shares, share_indexes)
|
1460
|
+
|
1461
|
+
# If we used the sub-block approach during encoding, we need to recombine the sub-blocks
|
1462
|
+
if isinstance(reconstructed_data, list):
|
1463
|
+
# Combine the sub-blocks back into a single chunk
|
1464
|
+
reconstructed_chunk = b"".join(reconstructed_data)
|
1465
|
+
else:
|
1466
|
+
# The simple case where we didn't use sub-blocks
|
1467
|
+
reconstructed_chunk = reconstructed_data
|
1468
|
+
|
1469
|
+
reconstructed_chunks.append(reconstructed_chunk)
|
1470
|
+
|
1471
|
+
# Print progress
|
1472
|
+
if verbose:
|
1473
|
+
progress_pct = (orig_idx + 1) / total_original_chunks * 100
|
1474
|
+
print(
|
1475
|
+
f" Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
|
1476
|
+
)
|
1477
|
+
|
1478
|
+
if verbose:
|
1479
|
+
download_time = time.time() - start_time
|
1480
|
+
print(
|
1481
|
+
f"Downloaded {chunks_downloaded} chunks in {download_time:.2f} seconds"
|
1482
|
+
)
|
1483
|
+
if chunks_failed > 0:
|
1484
|
+
print(
|
1485
|
+
f"Failed to download {chunks_failed} chunks (not needed for reconstruction)"
|
1486
|
+
)
|
1487
|
+
|
1488
|
+
# Step 5: Combine the reconstructed chunks into a file
|
1489
|
+
if verbose:
|
1490
|
+
print(f"Combining reconstructed chunks...")
|
1491
|
+
|
1492
|
+
# Concatenate all chunks
|
1493
|
+
file_data = b"".join(reconstructed_chunks)
|
1494
|
+
|
1495
|
+
# Remove padding from the last chunk
|
1496
|
+
if original_file["size"] < len(file_data):
|
1497
|
+
file_data = file_data[: original_file["size"]]
|
1498
|
+
|
1499
|
+
# Step 6: Decrypt if necessary
|
1500
|
+
if is_encrypted:
|
1501
|
+
if not self.encryption_available:
|
1502
|
+
raise ValueError(
|
1503
|
+
"File is encrypted but encryption is not available. "
|
1504
|
+
"Install PyNaCl and configure an encryption key."
|
1505
|
+
)
|
1506
|
+
|
1507
|
+
if verbose:
|
1508
|
+
print(f"Decrypting file data...")
|
1509
|
+
|
1510
|
+
file_data = self.decrypt_data(file_data)
|
1511
|
+
|
1512
|
+
# Step 7: Write to the output file
|
1513
|
+
with open(output_file, "wb") as f:
|
1514
|
+
f.write(file_data)
|
1515
|
+
|
1516
|
+
# Step 8: Verify hash if available
|
1517
|
+
if "hash" in original_file:
|
1518
|
+
actual_hash = hashlib.sha256(file_data).hexdigest()
|
1519
|
+
expected_hash = original_file["hash"]
|
1520
|
+
|
1521
|
+
if actual_hash != expected_hash:
|
1522
|
+
print(f"Warning: File hash mismatch!")
|
1523
|
+
print(f" Expected: {expected_hash}")
|
1524
|
+
print(f" Actual: {actual_hash}")
|
1525
|
+
elif verbose:
|
1526
|
+
print(f"Hash verification successful!")
|
1527
|
+
|
1528
|
+
total_time = time.time() - start_time
|
1529
|
+
if verbose:
|
1530
|
+
print(f"Reconstruction complete in {total_time:.2f} seconds!")
|
1531
|
+
print(f"File saved to: {output_file}")
|
1532
|
+
|
1533
|
+
return output_file
|
1534
|
+
|
1535
|
+
finally:
|
1536
|
+
# Clean up temporary directory if we created it
|
1537
|
+
if temp_dir_obj is not None:
|
1538
|
+
temp_dir_obj.cleanup()
|
1539
|
+
|
1540
|
+
def store_erasure_coded_file(
|
1541
|
+
self,
|
1542
|
+
file_path: str,
|
1543
|
+
k: int = 3,
|
1544
|
+
m: int = 5,
|
1545
|
+
chunk_size: int = 1024 * 1024, # 1MB chunks
|
1546
|
+
encrypt: Optional[bool] = None,
|
1547
|
+
miner_ids: List[str] = None,
|
1548
|
+
substrate_client=None,
|
1549
|
+
max_retries: int = 3,
|
1550
|
+
verbose: bool = True,
|
1551
|
+
) -> Dict[str, Any]:
|
1552
|
+
"""
|
1553
|
+
Erasure code a file, upload the chunks to IPFS, and store in the Hippius marketplace.
|
1554
|
+
|
1555
|
+
This is a convenience method that combines erasure_code_file with storage_request.
|
1556
|
+
|
1557
|
+
Args:
|
1558
|
+
file_path: Path to the file to upload
|
1559
|
+
k: Number of data chunks (minimum required to reconstruct)
|
1560
|
+
m: Total number of chunks (k + redundancy)
|
1561
|
+
chunk_size: Size of each chunk in bytes before encoding
|
1562
|
+
encrypt: Whether to encrypt the file before encoding
|
1563
|
+
miner_ids: List of specific miner IDs to use for storage
|
1564
|
+
substrate_client: SubstrateClient to use (or None to create one)
|
1565
|
+
max_retries: Maximum number of retry attempts
|
1566
|
+
verbose: Whether to print progress information
|
1567
|
+
|
1568
|
+
Returns:
|
1569
|
+
dict: Result including metadata CID and transaction hash
|
1570
|
+
|
1571
|
+
Raises:
|
1572
|
+
ValueError: If parameters are invalid
|
1573
|
+
RuntimeError: If processing fails
|
1574
|
+
"""
|
1575
|
+
# Step 1: Erasure code the file and upload chunks
|
1576
|
+
metadata = self.erasure_code_file(
|
1577
|
+
file_path=file_path,
|
1578
|
+
k=k,
|
1579
|
+
m=m,
|
1580
|
+
chunk_size=chunk_size,
|
1581
|
+
encrypt=encrypt,
|
1582
|
+
max_retries=max_retries,
|
1583
|
+
verbose=verbose,
|
1584
|
+
)
|
1585
|
+
|
1586
|
+
# Step 2: Import substrate client if we need it
|
1587
|
+
if substrate_client is None:
|
1588
|
+
from hippius_sdk.substrate import SubstrateClient, FileInput
|
1589
|
+
|
1590
|
+
substrate_client = SubstrateClient()
|
1591
|
+
else:
|
1592
|
+
# Just get the FileInput class
|
1593
|
+
from hippius_sdk.substrate import FileInput
|
1594
|
+
|
1595
|
+
original_file = metadata["original_file"]
|
1596
|
+
metadata_cid = metadata["metadata_cid"]
|
1597
|
+
|
1598
|
+
# Create a list to hold all the file inputs (metadata + all chunks)
|
1599
|
+
all_file_inputs = []
|
1600
|
+
|
1601
|
+
# Step 3: Prepare metadata file for storage
|
1602
|
+
if verbose:
|
1603
|
+
print(
|
1604
|
+
f"Preparing to store metadata and {len(metadata['chunks'])} chunks in the Hippius marketplace..."
|
1605
|
+
)
|
1606
|
+
|
1607
|
+
# Create a file input for the metadata file
|
1608
|
+
metadata_file_input = FileInput(
|
1609
|
+
file_hash=metadata_cid, file_name=f"{original_file['name']}.ec_metadata"
|
1610
|
+
)
|
1611
|
+
all_file_inputs.append(metadata_file_input)
|
1612
|
+
|
1613
|
+
# Step 4: Add all chunks to the storage request
|
1614
|
+
if verbose:
|
1615
|
+
print(f"Adding all chunks to storage request...")
|
1616
|
+
|
1617
|
+
for i, chunk in enumerate(metadata["chunks"]):
|
1618
|
+
# Extract the CID string from the chunk's cid dictionary
|
1619
|
+
chunk_cid = (
|
1620
|
+
chunk["cid"]["cid"]
|
1621
|
+
if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
|
1622
|
+
else chunk["cid"]
|
1623
|
+
)
|
1624
|
+
chunk_file_input = FileInput(file_hash=chunk_cid, file_name=chunk["name"])
|
1625
|
+
all_file_inputs.append(chunk_file_input)
|
1626
|
+
|
1627
|
+
# Print progress for large numbers of chunks
|
1628
|
+
if verbose and (i + 1) % 50 == 0:
|
1629
|
+
print(
|
1630
|
+
f" Prepared {i + 1}/{len(metadata['chunks'])} chunks for storage"
|
1631
|
+
)
|
1632
|
+
|
1633
|
+
# Step 5: Submit the storage request for all files
|
1634
|
+
try:
|
1635
|
+
if verbose:
|
1636
|
+
print(
|
1637
|
+
f"Submitting storage request for 1 metadata file and {len(metadata['chunks'])} chunks..."
|
1638
|
+
)
|
1639
|
+
|
1640
|
+
tx_hash = substrate_client.storage_request(
|
1641
|
+
files=all_file_inputs, miner_ids=miner_ids
|
1642
|
+
)
|
1643
|
+
|
1644
|
+
if verbose:
|
1645
|
+
print(f"Successfully stored all files in marketplace!")
|
1646
|
+
print(f"Transaction hash: {tx_hash}")
|
1647
|
+
print(f"Metadata CID: {metadata_cid}")
|
1648
|
+
print(
|
1649
|
+
f"Total files stored: {len(all_file_inputs)} (1 metadata + {len(metadata['chunks'])} chunks)"
|
1650
|
+
)
|
1651
|
+
|
1652
|
+
return {
|
1653
|
+
"metadata": metadata,
|
1654
|
+
"metadata_cid": metadata_cid,
|
1655
|
+
"transaction_hash": tx_hash,
|
1656
|
+
"total_files_stored": len(all_file_inputs),
|
1657
|
+
}
|
1658
|
+
|
1659
|
+
except Exception as e:
|
1660
|
+
print(f"Error storing files in marketplace: {str(e)}")
|
1661
|
+
# Return the metadata even if storage fails
|
1662
|
+
return {"metadata": metadata, "metadata_cid": metadata_cid, "error": str(e)}
|