hippius 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hippius_sdk/ipfs.py CHANGED
@@ -2,20 +2,23 @@
2
2
  IPFS operations for the Hippius SDK.
3
3
  """
4
4
 
5
- import base64
6
5
  import hashlib
7
6
  import json
8
7
  import os
8
+ import random
9
+ import shutil
9
10
  import tempfile
10
11
  import time
11
12
  import uuid
12
- from typing import Any, Dict, List, Optional, Tuple, Union
13
+ from typing import Any, Dict, List, Optional
13
14
 
14
- import ipfshttpclient
15
+ import httpx
15
16
  import requests
16
- from dotenv import load_dotenv
17
17
 
18
18
  from hippius_sdk.config import get_config_value, get_encryption_key
19
+ from hippius_sdk.ipfs_core import AsyncIPFSClient
20
+ from hippius_sdk.substrate import FileInput, SubstrateClient
21
+ from hippius_sdk.utils import format_cid, format_size
19
22
 
20
23
  # Import PyNaCl for encryption
21
24
  try:
@@ -70,39 +73,17 @@ class IPFSClient:
70
73
 
71
74
  self.gateway = gateway.rstrip("/")
72
75
  self.api_url = api_url
73
- self.client = None
74
76
 
75
77
  # Extract base URL from API URL for HTTP fallback
76
78
  self.base_url = api_url
77
79
 
78
- # Connect to IPFS daemon
79
- if api_url:
80
- try:
81
- # Only attempt to use ipfshttpclient if the URL is in multiaddr format (starts with /)
82
- if api_url.startswith("/"):
83
- self.client = ipfshttpclient.connect(api_url)
84
- else:
85
- # For regular HTTP URLs, we'll use the HTTP API directly
86
- print(f"Using HTTP API at {api_url} for IPFS operations")
87
- except ipfshttpclient.exceptions.ConnectionError as e:
88
- print(f"Warning: Could not connect to IPFS node at {api_url}: {e}")
89
- print(f"Falling back to HTTP API for uploads")
90
- # We'll use HTTP API fallback for uploads
91
- try:
92
- # Try to connect to local IPFS daemon as fallback
93
- self.client = ipfshttpclient.connect()
94
- except ipfshttpclient.exceptions.ConnectionError:
95
- # No IPFS connection available, but HTTP API fallback will be used
96
- pass
97
- else:
98
- try:
99
- # Try to connect to local IPFS daemon
100
- self.client = ipfshttpclient.connect()
101
- except ipfshttpclient.exceptions.ConnectionError:
102
- # No local IPFS daemon connection available
103
- pass
80
+ try:
81
+ self.client = AsyncIPFSClient(api_url)
82
+ except httpx.ConnectError as e:
83
+ print(f"Warning: Could not connect to IPFS node at {api_url}: {e}")
84
+ # Try to connect to local IPFS daemon as fallback
85
+ self.client = AsyncIPFSClient()
104
86
 
105
- # Initialize encryption settings
106
87
  self._initialize_encryption(encrypt_by_default, encryption_key)
107
88
 
108
89
  def _initialize_encryption(
@@ -140,7 +121,7 @@ class IPFSClient:
140
121
  # If encryption is requested but not available, warn the user
141
122
  if self.encrypt_by_default and not self.encryption_available:
142
123
  print(
143
- f"Warning: Encryption requested but not available. Check that PyNaCl is installed and a valid encryption key is provided."
124
+ "Warning: Encryption requested but not available. Check that PyNaCl is installed and a valid encryption key is provided."
144
125
  )
145
126
 
146
127
  def encrypt_data(self, data: bytes) -> bytes:
@@ -202,102 +183,7 @@ class IPFSClient:
202
183
  f"Decryption failed: {str(e)}. Incorrect key or corrupted data?"
203
184
  )
204
185
 
205
- def _upload_via_http_api(self, file_path: str, max_retries: int = 3) -> str:
206
- """
207
- Upload a file to IPFS using the HTTP API.
208
-
209
- This is a fallback method when ipfshttpclient is not available.
210
-
211
- Args:
212
- file_path: Path to the file to upload
213
- max_retries: Maximum number of retry attempts (default: 3)
214
-
215
- Returns:
216
- str: Content Identifier (CID) of the uploaded file
217
-
218
- Raises:
219
- ConnectionError: If the upload fails
220
- """
221
- if not self.base_url:
222
- raise ConnectionError("No IPFS API URL provided for HTTP upload")
223
-
224
- # Retry logic
225
- retries = 0
226
- last_error = None
227
-
228
- while retries < max_retries:
229
- try:
230
- # Show progress for large files
231
- file_size = os.path.getsize(file_path)
232
- if file_size > 1024 * 1024: # If file is larger than 1MB
233
- print(f" Uploading {file_size/1024/1024:.2f} MB file...")
234
-
235
- # Prepare the file for upload
236
- with open(file_path, "rb") as file:
237
- files = {
238
- "file": (
239
- os.path.basename(file_path),
240
- file,
241
- "application/octet-stream",
242
- )
243
- }
244
-
245
- # Make HTTP POST request to the IPFS HTTP API with a timeout
246
- print(
247
- f" Sending request to {self.base_url}/api/v0/add... (attempt {retries+1}/{max_retries})"
248
- )
249
- upload_url = f"{self.base_url}/api/v0/add"
250
- response = requests.post(
251
- upload_url,
252
- files=files,
253
- timeout=120, # 2 minute timeout for uploads
254
- )
255
- response.raise_for_status()
256
-
257
- # Parse the response JSON
258
- result = response.json()
259
- print(f" Upload successful! CID: {result['Hash']}")
260
- return result["Hash"]
261
-
262
- except (
263
- requests.exceptions.Timeout,
264
- requests.exceptions.ConnectionError,
265
- requests.exceptions.RequestException,
266
- ) as e:
267
- # Save the error and retry
268
- last_error = e
269
- retries += 1
270
- wait_time = 2**retries # Exponential backoff: 2, 4, 8 seconds
271
- print(f" Upload attempt {retries} failed: {str(e)}")
272
- if retries < max_retries:
273
- print(f" Retrying in {wait_time} seconds...")
274
- time.sleep(wait_time)
275
- except Exception as e:
276
- # For other exceptions, don't retry
277
- raise ConnectionError(f"Failed to upload file via HTTP API: {str(e)}")
278
-
279
- # If we've exhausted all retries
280
- if last_error:
281
- error_type = type(last_error).__name__
282
- if isinstance(last_error, requests.exceptions.Timeout):
283
- raise ConnectionError(
284
- f"Timeout when uploading to {self.base_url} after {max_retries} attempts. The server is not responding."
285
- )
286
- elif isinstance(last_error, requests.exceptions.ConnectionError):
287
- raise ConnectionError(
288
- f"Failed to connect to IPFS node at {self.base_url} after {max_retries} attempts: {str(last_error)}"
289
- )
290
- else:
291
- raise ConnectionError(
292
- f"Failed to upload file via HTTP API after {max_retries} attempts. Last error ({error_type}): {str(last_error)}"
293
- )
294
-
295
- # This should never happen, but just in case
296
- raise ConnectionError(
297
- f"Failed to upload file to {self.base_url} after {max_retries} attempts for unknown reasons."
298
- )
299
-
300
- def upload_file(
186
+ async def upload_file(
301
187
  self,
302
188
  file_path: str,
303
189
  include_formatted_size: bool = True,
@@ -364,19 +250,8 @@ class IPFSClient:
364
250
  # Use the original file for upload
365
251
  upload_path = file_path
366
252
 
367
- # Upload to IPFS
368
- if self.client:
369
- # Use IPFS client
370
- result = self.client.add(upload_path)
371
- cid = result["Hash"]
372
- elif self.base_url:
373
- # Fallback to using HTTP API
374
- cid = self._upload_via_http_api(upload_path, max_retries=max_retries)
375
- else:
376
- # No connection or API URL available
377
- raise ConnectionError(
378
- "No IPFS connection available. Please provide a valid api_url or ensure a local IPFS daemon is running."
379
- )
253
+ result = await self.client.add_file(upload_path)
254
+ cid = result["Hash"]
380
255
 
381
256
  finally:
382
257
  # Clean up temporary file if created
@@ -397,7 +272,7 @@ class IPFSClient:
397
272
 
398
273
  return result
399
274
 
400
- def upload_directory(
275
+ async def upload_directory(
401
276
  self,
402
277
  dir_path: str,
403
278
  include_formatted_size: bool = True,
@@ -471,20 +346,13 @@ class IPFSClient:
471
346
  total_size_bytes += os.path.getsize(file_path)
472
347
 
473
348
  # Use temp_dir instead of dir_path for upload
474
- if self.client:
475
- result = self.client.add(temp_dir, recursive=True)
476
- if isinstance(result, list):
477
- cid = result[-1]["Hash"]
478
- else:
479
- cid = result["Hash"]
480
- elif self.base_url:
481
- cid = self._upload_directory_via_http_api(temp_dir)
349
+ result = await self.client.add_directory(temp_dir)
350
+ if isinstance(result, list):
351
+ cid = result[-1]["Hash"]
482
352
  else:
483
- raise ConnectionError("No IPFS connection available")
353
+ cid = result["Hash"]
484
354
  finally:
485
355
  # Clean up the temporary directory
486
- import shutil
487
-
488
356
  shutil.rmtree(temp_dir, ignore_errors=True)
489
357
  else:
490
358
  # Get directory info
@@ -503,22 +371,13 @@ class IPFSClient:
503
371
  pass
504
372
 
505
373
  # Upload to IPFS
506
- if self.client:
507
- # Use IPFS client
508
- result = self.client.add(dir_path, recursive=True)
509
- if isinstance(result, list):
510
- # Get the last item, which should be the directory itself
511
- cid = result[-1]["Hash"]
512
- else:
513
- cid = result["Hash"]
514
- elif self.base_url:
515
- # Fallback to using HTTP API
516
- cid = self._upload_directory_via_http_api(dir_path)
374
+
375
+ result = await self.client.add_directory(dir_path)
376
+ if isinstance(result, list):
377
+ # Get the last item, which should be the directory itself
378
+ cid = result[-1]["Hash"]
517
379
  else:
518
- # No connection or API URL available
519
- raise ConnectionError(
520
- "No IPFS connection available. Please provide a valid api_url or ensure a local IPFS daemon is running."
521
- )
380
+ cid = result["Hash"]
522
381
 
523
382
  # Get dirname in case it wasn't set (for encryption path)
524
383
  dirname = os.path.basename(dir_path)
@@ -538,120 +397,6 @@ class IPFSClient:
538
397
 
539
398
  return result
540
399
 
541
- def _upload_directory_via_http_api(
542
- self, dir_path: str, max_retries: int = 3
543
- ) -> str:
544
- """
545
- Upload a directory to IPFS using the HTTP API.
546
-
547
- This is a limited implementation and may not support all directory features.
548
-
549
- Args:
550
- dir_path: Path to the directory to upload
551
- max_retries: Maximum number of retry attempts (default: 3)
552
-
553
- Returns:
554
- str: Content Identifier (CID) of the uploaded directory
555
-
556
- Raises:
557
- ConnectionError: If the upload fails
558
- """
559
- if not self.base_url:
560
- raise ConnectionError("No IPFS API URL provided for HTTP upload")
561
-
562
- # Retry logic
563
- retries = 0
564
- last_error = None
565
-
566
- while retries < max_retries:
567
- try:
568
- # This is a simplified approach - we'll upload the directory with recursive flag
569
- files = []
570
-
571
- print(f" Preparing directory contents for upload...")
572
- # Collect all files in the directory
573
- for root, _, filenames in os.walk(dir_path):
574
- for filename in filenames:
575
- file_path = os.path.join(root, filename)
576
- rel_path = os.path.relpath(file_path, dir_path)
577
-
578
- with open(file_path, "rb") as f:
579
- file_content = f.read()
580
-
581
- # Add the file to the multipart request
582
- files.append(
583
- (
584
- "file",
585
- (rel_path, file_content, "application/octet-stream"),
586
- )
587
- )
588
-
589
- # Create a request with the directory flag
590
- upload_url = f"{self.base_url}/api/v0/add?recursive=true&wrap-with-directory=true"
591
-
592
- print(
593
- f" Sending directory upload request to {self.base_url}/api/v0/add... (attempt {retries+1}/{max_retries})"
594
- )
595
- print(f" Uploading {len(files)} files...")
596
-
597
- # Make HTTP POST request with timeout
598
- response = requests.post(
599
- upload_url,
600
- files=files,
601
- timeout=300, # 5 minute timeout for directory uploads
602
- )
603
- response.raise_for_status()
604
-
605
- # The IPFS API returns a JSON object for each file, one per line
606
- # The last one should be the directory itself
607
- lines = response.text.strip().split("\n")
608
- if not lines:
609
- raise ConnectionError("Empty response from IPFS API")
610
-
611
- last_item = json.loads(lines[-1])
612
- print(f" Directory upload successful! CID: {last_item['Hash']}")
613
- return last_item["Hash"]
614
-
615
- except (
616
- requests.exceptions.Timeout,
617
- requests.exceptions.ConnectionError,
618
- requests.exceptions.RequestException,
619
- ) as e:
620
- # Save the error and retry
621
- last_error = e
622
- retries += 1
623
- wait_time = 2**retries # Exponential backoff: 2, 4, 8 seconds
624
- print(f" Upload attempt {retries} failed: {str(e)}")
625
- if retries < max_retries:
626
- print(f" Retrying in {wait_time} seconds...")
627
- time.sleep(wait_time)
628
- except Exception as e:
629
- # For other exceptions, don't retry
630
- raise ConnectionError(
631
- f"Failed to upload directory via HTTP API: {str(e)}"
632
- )
633
-
634
- # If we've exhausted all retries
635
- if last_error:
636
- error_type = type(last_error).__name__
637
- if isinstance(last_error, requests.exceptions.Timeout):
638
- raise ConnectionError(
639
- f"Timeout when uploading directory to {self.base_url} after {max_retries} attempts. The server is not responding."
640
- )
641
- elif isinstance(last_error, requests.exceptions.ConnectionError):
642
- raise ConnectionError(
643
- f"Failed to connect to IPFS node at {self.base_url} after {max_retries} attempts: {str(last_error)}"
644
- )
645
- else:
646
- raise ConnectionError(
647
- f"Failed to upload directory via HTTP API after {max_retries} attempts. Last error ({error_type}): {str(last_error)}"
648
- )
649
-
650
- # This should never happen, but just in case
651
- raise ConnectionError(
652
- f"Failed to upload directory to {self.base_url} after {max_retries} attempts for unknown reasons."
653
- )
654
-
655
400
  def format_size(self, size_bytes: int) -> str:
656
401
  """
657
402
  Format a size in bytes to a human-readable string.
@@ -662,14 +407,7 @@ class IPFSClient:
662
407
  Returns:
663
408
  str: Human-readable size string (e.g., '1.23 MB', '456.78 KB')
664
409
  """
665
- if size_bytes >= 1024 * 1024 * 1024:
666
- return f"{size_bytes / (1024 * 1024 * 1024):.2f} GB"
667
- elif size_bytes >= 1024 * 1024:
668
- return f"{size_bytes / (1024 * 1024):.2f} MB"
669
- elif size_bytes >= 1024:
670
- return f"{size_bytes / 1024:.2f} KB"
671
- else:
672
- return f"{size_bytes} bytes"
410
+ return format_size(size_bytes)
673
411
 
674
412
  def format_cid(self, cid: str) -> str:
675
413
  """
@@ -683,54 +421,9 @@ class IPFSClient:
683
421
  Returns:
684
422
  str: Formatted CID string
685
423
  """
686
- # If it already looks like a proper CID, return it as is
687
- if cid.startswith(("Qm", "bafy", "bafk", "bafyb", "bafzb", "b")):
688
- return cid
689
-
690
- # Check if it's a hex string
691
- if all(c in "0123456789abcdefABCDEF" for c in cid):
692
- # First try the special case where the hex string is actually ASCII encoded
693
- try:
694
- # Try to decode the hex as ASCII characters
695
- hex_bytes = bytes.fromhex(cid)
696
- ascii_str = hex_bytes.decode("ascii")
424
+ return format_cid(cid)
697
425
 
698
- # If the decoded string starts with a valid CID prefix, return it
699
- if ascii_str.startswith(("Qm", "bafy", "bafk", "bafyb", "bafzb", "b")):
700
- return ascii_str
701
- except Exception:
702
- pass
703
-
704
- # If the above doesn't work, try the standard CID decoding
705
- try:
706
- import binascii
707
-
708
- import base58
709
-
710
- # Try to decode hex to binary then to base58 for CIDv0
711
- try:
712
- binary_data = binascii.unhexlify(cid)
713
- if (
714
- len(binary_data) > 2
715
- and binary_data[0] == 0x12
716
- and binary_data[1] == 0x20
717
- ):
718
- # This looks like a CIDv0 (Qm...)
719
- decoded_cid = base58.b58encode(binary_data).decode("utf-8")
720
- return decoded_cid
721
- except Exception:
722
- pass
723
-
724
- # If not successful, just return hex with 0x prefix as fallback
725
- return f"0x{cid}"
726
- except ImportError:
727
- # If base58 is not available, return hex with prefix
728
- return f"0x{cid}"
729
-
730
- # Default case - return as is
731
- return cid
732
-
733
- def download_file(
426
+ async def download_file(
734
427
  self,
735
428
  cid: str,
736
429
  output_path: str,
@@ -858,7 +551,7 @@ class IPFSClient:
858
551
  if temp_file_path and os.path.exists(temp_file_path):
859
552
  os.unlink(temp_file_path)
860
553
 
861
- def cat(
554
+ async def cat(
862
555
  self,
863
556
  cid: str,
864
557
  max_display_bytes: int = 1024,
@@ -898,14 +591,7 @@ class IPFSClient:
898
591
  "Decryption requested but not available. Check that PyNaCl is installed and a valid encryption key is provided."
899
592
  )
900
593
 
901
- # Get the content
902
- if self.client:
903
- content = self.client.cat(cid)
904
- else:
905
- url = f"{self.gateway}/ipfs/{cid}"
906
- response = requests.get(url)
907
- response.raise_for_status()
908
- content = response.content
594
+ content = await self.client.cat(cid)
909
595
 
910
596
  # Decrypt if needed
911
597
  if should_decrypt:
@@ -940,7 +626,7 @@ class IPFSClient:
940
626
 
941
627
  return result
942
628
 
943
- def exists(self, cid: str) -> Dict[str, Any]:
629
+ async def exists(self, cid: str) -> Dict[str, Any]:
944
630
  """
945
631
  Check if a CID exists on IPFS.
946
632
 
@@ -956,19 +642,7 @@ class IPFSClient:
956
642
  """
957
643
  formatted_cid = self.format_cid(cid)
958
644
  gateway_url = f"{self.gateway}/ipfs/{cid}"
959
-
960
- try:
961
- if self.client:
962
- # We'll try to get the file stats
963
- self.client.ls(cid)
964
- exists = True
965
- else:
966
- # Try to access through gateway
967
- url = f"{self.gateway}/ipfs/{cid}"
968
- response = requests.head(url)
969
- exists = response.status_code == 200
970
- except (ipfshttpclient.exceptions.ErrorResponse, requests.RequestException):
971
- exists = False
645
+ exists = await self.client.ls(cid)
972
646
 
973
647
  return {
974
648
  "exists": exists,
@@ -977,7 +651,45 @@ class IPFSClient:
977
651
  "gateway_url": gateway_url if exists else None,
978
652
  }
979
653
 
980
- def pin(self, cid: str) -> Dict[str, Any]:
654
+ async def publish_global(self, cid: str) -> Dict[str, Any]:
655
+ """
656
+ Publish a CID to the global IPFS network, ensuring it's widely available.
657
+
658
+ This makes the content available beyond the local IPFS node by pinning
659
+ it to multiple public services.
660
+
661
+ Args:
662
+ cid: Content Identifier (CID) to publish globally
663
+
664
+ Returns:
665
+ Dict[str, Any]: Dictionary containing:
666
+ - published: Boolean indicating if publishing was successful
667
+ - cid: The CID that was published
668
+ - formatted_cid: Formatted version of the CID
669
+ - message: Status message
670
+ """
671
+ # First ensure it's pinned locally
672
+ pin_result = await self.pin(cid)
673
+
674
+ if not pin_result.get("success", False):
675
+ return {
676
+ "published": False,
677
+ "cid": cid,
678
+ "formatted_cid": self.format_cid(cid),
679
+ "message": f"Failed to pin content locally: {pin_result.get('message', 'Unknown error')}",
680
+ }
681
+
682
+ # Then request pinning on public services
683
+ # This implementation focuses on making the content available through
684
+ # the default gateway, which provides sufficient global access
685
+ return {
686
+ "published": True,
687
+ "cid": cid,
688
+ "formatted_cid": self.format_cid(cid),
689
+ "message": "Content published to global IPFS network",
690
+ }
691
+
692
+ async def pin(self, cid: str) -> Dict[str, Any]:
981
693
  """
982
694
  Pin a CID to IPFS to keep it available.
983
695
 
@@ -996,31 +708,15 @@ class IPFSClient:
996
708
  """
997
709
  formatted_cid = self.format_cid(cid)
998
710
 
999
- if not self.client and self.base_url:
1000
- # Try using HTTP API for pinning
1001
- try:
1002
- url = f"{self.base_url}/api/v0/pin/add?arg={cid}"
1003
- response = requests.post(url)
1004
- response.raise_for_status()
1005
- success = True
1006
- message = "Successfully pinned via HTTP API"
1007
- except requests.RequestException as e:
1008
- success = False
1009
- message = f"Failed to pin: {str(e)}"
1010
- elif not self.client:
1011
- raise ConnectionError(
1012
- "No IPFS connection available. Please provide a valid api_url or ensure a local IPFS daemon is running."
1013
- )
1014
-
1015
711
  try:
1016
712
  if self.client:
1017
- self.client.pin.add(cid)
713
+ await self.client.pin(cid)
1018
714
  success = True
1019
715
  message = "Successfully pinned"
1020
716
  else:
1021
717
  success = False
1022
718
  message = "No IPFS client available"
1023
- except ipfshttpclient.exceptions.ErrorResponse as e:
719
+ except httpx.HTTPError as e:
1024
720
  success = False
1025
721
  message = f"Failed to pin: {str(e)}"
1026
722
 
@@ -1031,7 +727,7 @@ class IPFSClient:
1031
727
  "message": message,
1032
728
  }
1033
729
 
1034
- def erasure_code_file(
730
+ async def erasure_code_file(
1035
731
  self,
1036
732
  file_path: str,
1037
733
  k: int = 3,
@@ -1092,7 +788,7 @@ class IPFSClient:
1092
788
  file_id = str(uuid.uuid4())
1093
789
 
1094
790
  if verbose:
1095
- print(f"Processing file: {file_name} ({file_size/1024/1024:.2f} MB)")
791
+ print(f"Processing file: {file_name} ({file_size / 1024 / 1024:.2f} MB)")
1096
792
  print(
1097
793
  f"Erasure coding parameters: k={k}, m={m} (need {k}/{m} chunks to reconstruct)"
1098
794
  )
@@ -1226,7 +922,7 @@ class IPFSClient:
1226
922
  all_encoded_chunks.append(encoded_chunks)
1227
923
 
1228
924
  if verbose and (i + 1) % 10 == 0:
1229
- print(f" Encoded {i+1}/{len(chunks)} chunks")
925
+ print(f" Encoded {i + 1}/{len(chunks)} chunks")
1230
926
  except Exception as e:
1231
927
  # If encoding fails, provide more helpful error message
1232
928
  error_msg = f"Error encoding chunk {i}: {str(e)}"
@@ -1258,7 +954,7 @@ class IPFSClient:
1258
954
 
1259
955
  # Upload the chunk to IPFS
1260
956
  try:
1261
- chunk_cid = self.upload_file(
957
+ chunk_cid = await self.upload_file(
1262
958
  chunk_path, max_retries=max_retries
1263
959
  )
1264
960
 
@@ -1289,10 +985,10 @@ class IPFSClient:
1289
985
  json.dump(metadata, f, indent=2)
1290
986
 
1291
987
  if verbose:
1292
- print(f"Uploading metadata file...")
988
+ print("Uploading metadata file...")
1293
989
 
1294
990
  # Upload the metadata file to IPFS
1295
- metadata_cid_result = self.upload_file(
991
+ metadata_cid_result = await self.upload_file(
1296
992
  metadata_path, max_retries=max_retries
1297
993
  )
1298
994
 
@@ -1301,15 +997,15 @@ class IPFSClient:
1301
997
  metadata["metadata_cid"] = metadata_cid
1302
998
 
1303
999
  if verbose:
1304
- print(f"Erasure coding complete!")
1000
+ print("Erasure coding complete!")
1305
1001
  print(f"Metadata CID: {metadata_cid}")
1306
- print(f"Original file size: {file_size/1024/1024:.2f} MB")
1002
+ print(f"Original file size: {file_size / 1024 / 1024:.2f} MB")
1307
1003
  print(f"Total chunks: {len(chunks) * m}")
1308
1004
  print(f"Minimum chunks needed: {k * len(chunks)}")
1309
1005
 
1310
1006
  return metadata
1311
1007
 
1312
- def reconstruct_from_erasure_code(
1008
+ async def reconstruct_from_erasure_code(
1313
1009
  self,
1314
1010
  metadata_cid: str,
1315
1011
  output_file: str,
@@ -1355,7 +1051,9 @@ class IPFSClient:
1355
1051
  print(f"Downloading metadata file (CID: {metadata_cid})...")
1356
1052
 
1357
1053
  metadata_path = os.path.join(temp_dir, "metadata.json")
1358
- self.download_file(metadata_cid, metadata_path, max_retries=max_retries)
1054
+ await self.download_file(
1055
+ metadata_cid, metadata_path, max_retries=max_retries
1056
+ )
1359
1057
 
1360
1058
  if verbose:
1361
1059
  metadata_download_time = time.time() - start_time
@@ -1373,16 +1071,17 @@ class IPFSClient:
1373
1071
  m = erasure_params["m"]
1374
1072
  is_encrypted = erasure_params.get("encrypted", False)
1375
1073
  chunk_size = erasure_params.get("chunk_size", 1024 * 1024)
1074
+ total_original_size = original_file["size"]
1376
1075
 
1377
1076
  if verbose:
1378
1077
  print(
1379
- f"File: {original_file['name']} ({original_file['size']/1024/1024:.2f} MB)"
1078
+ f"File: {original_file['name']} ({original_file['size'] / 1024 / 1024:.2f} MB)"
1380
1079
  )
1381
1080
  print(
1382
1081
  f"Erasure coding parameters: k={k}, m={m} (need {k} of {m} chunks to reconstruct)"
1383
1082
  )
1384
1083
  if is_encrypted:
1385
- print(f"Encrypted: Yes")
1084
+ print("Encrypted: Yes")
1386
1085
 
1387
1086
  # Step 3: Group chunks by their original chunk index
1388
1087
  chunks_by_original = {}
@@ -1413,14 +1112,19 @@ class IPFSClient:
1413
1112
  f"Need {k}, but only have {len(available_chunks)}."
1414
1113
  )
1415
1114
 
1416
- # We only need k chunks, so take the first k
1417
- chunks_to_download = available_chunks[:k]
1418
-
1419
- # Download the chunks
1115
+ # Try to download all available chunks, but we only need k successful ones
1420
1116
  downloaded_shares = []
1421
1117
  share_indexes = []
1118
+ chunks_to_try = available_chunks.copy()
1119
+
1120
+ # Shuffle to get a better variety of chunks
1121
+ random.shuffle(chunks_to_try)
1122
+
1123
+ for chunk in chunks_to_try:
1124
+ # Break if we already have k chunks
1125
+ if len(downloaded_shares) >= k:
1126
+ break
1422
1127
 
1423
- for chunk in chunks_to_download:
1424
1128
  chunk_path = os.path.join(temp_dir, chunk["name"])
1425
1129
  try:
1426
1130
  # Extract the CID string from the chunk's cid dictionary
@@ -1429,7 +1133,7 @@ class IPFSClient:
1429
1133
  if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
1430
1134
  else chunk["cid"]
1431
1135
  )
1432
- self.download_file(
1136
+ await self.download_file(
1433
1137
  chunk_cid, chunk_path, max_retries=max_retries
1434
1138
  )
1435
1139
  chunks_downloaded += 1
@@ -1447,7 +1151,7 @@ class IPFSClient:
1447
1151
  chunks_failed += 1
1448
1152
  # Continue to the next chunk
1449
1153
 
1450
- # If we don't have enough chunks, try to download more
1154
+ # If we don't have enough chunks, fail
1451
1155
  if len(downloaded_shares) < k:
1452
1156
  raise ValueError(
1453
1157
  f"Failed to download enough chunks for original chunk {orig_idx}. "
@@ -1458,22 +1162,51 @@ class IPFSClient:
1458
1162
  decoder = zfec.Decoder(k, m)
1459
1163
  reconstructed_data = decoder.decode(downloaded_shares, share_indexes)
1460
1164
 
1461
- # If we used the sub-block approach during encoding, we need to recombine the sub-blocks
1462
- if isinstance(reconstructed_data, list):
1463
- # Combine the sub-blocks back into a single chunk
1464
- reconstructed_chunk = b"".join(reconstructed_data)
1465
- else:
1466
- # The simple case where we didn't use sub-blocks
1467
- reconstructed_chunk = reconstructed_data
1165
+ if not isinstance(reconstructed_data, list):
1166
+ # Handle unexpected output type
1167
+ raise TypeError(
1168
+ f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
1169
+ )
1170
+
1171
+ # Calculate the actual size of this original chunk
1172
+ # For all chunks except possibly the last one, it should be chunk_size
1173
+ is_last_chunk = orig_idx == max(chunks_by_original.keys())
1174
+ original_chunk_size = total_original_size - orig_idx * chunk_size
1175
+ if not is_last_chunk:
1176
+ original_chunk_size = min(chunk_size, original_chunk_size)
1177
+
1178
+ # Recombine the sub-blocks, respecting the original chunk size
1179
+ reconstructed_chunk = b""
1180
+ total_bytes = 0
1181
+ for sub_block in reconstructed_data:
1182
+ # Calculate how many bytes we should take from this sub-block
1183
+ bytes_to_take = min(
1184
+ len(sub_block), original_chunk_size - total_bytes
1185
+ )
1186
+ if bytes_to_take <= 0:
1187
+ break
1188
+
1189
+ reconstructed_chunk += sub_block[:bytes_to_take]
1190
+ total_bytes += bytes_to_take
1468
1191
 
1469
1192
  reconstructed_chunks.append(reconstructed_chunk)
1470
1193
 
1471
- # Print progress
1194
+ # Add debugging information if verbose
1472
1195
  if verbose:
1473
1196
  progress_pct = (orig_idx + 1) / total_original_chunks * 100
1474
1197
  print(
1475
1198
  f" Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
1476
1199
  )
1200
+ if (
1201
+ orig_idx == 0 or is_last_chunk
1202
+ ): # Only show debug for first and last chunks to avoid spam
1203
+ print(f" Debug info for chunk {orig_idx}:")
1204
+ print(f" Original chunk size: {original_chunk_size} bytes")
1205
+ print(
1206
+ f" Reconstructed chunk size: {len(reconstructed_chunk)} bytes"
1207
+ )
1208
+ print(f" Share indexes used: {share_indexes}")
1209
+ print(f" Sub-blocks received: {len(reconstructed_data)}")
1477
1210
 
1478
1211
  if verbose:
1479
1212
  download_time = time.time() - start_time
@@ -1487,14 +1220,44 @@ class IPFSClient:
1487
1220
 
1488
1221
  # Step 5: Combine the reconstructed chunks into a file
1489
1222
  if verbose:
1490
- print(f"Combining reconstructed chunks...")
1223
+ print("Combining reconstructed chunks...")
1224
+
1225
+ # Process chunks to remove padding correctly
1226
+ processed_chunks = []
1227
+ size_processed = 0
1228
+
1229
+ for i, chunk in enumerate(reconstructed_chunks):
1230
+ # For all chunks except the last one, use full chunk size
1231
+ if i < len(reconstructed_chunks) - 1:
1232
+ # Calculate how much of this chunk should be used (handle full chunks)
1233
+ chunk_valid_bytes = min(
1234
+ chunk_size, total_original_size - size_processed
1235
+ )
1236
+ processed_chunks.append(chunk[:chunk_valid_bytes])
1237
+ size_processed += chunk_valid_bytes
1238
+ else:
1239
+ # For the last chunk, calculate the remaining bytes needed
1240
+ remaining_bytes = total_original_size - size_processed
1241
+ processed_chunks.append(chunk[:remaining_bytes])
1242
+ size_processed += remaining_bytes
1491
1243
 
1492
- # Concatenate all chunks
1493
- file_data = b"".join(reconstructed_chunks)
1244
+ # Concatenate all processed chunks
1245
+ file_data = b"".join(processed_chunks)
1494
1246
 
1495
- # Remove padding from the last chunk
1496
- if original_file["size"] < len(file_data):
1497
- file_data = file_data[: original_file["size"]]
1247
+ # Double-check the final size matches the original
1248
+ if len(file_data) != original_file["size"]:
1249
+ print(
1250
+ f"Warning: Reconstructed size ({len(file_data)}) differs from original ({original_file['size']})"
1251
+ )
1252
+ # Ensure we have exactly the right size
1253
+ if len(file_data) > original_file["size"]:
1254
+ file_data = file_data[: original_file["size"]]
1255
+ else:
1256
+ # If we're short, pad with zeros (shouldn't happen with proper reconstruction)
1257
+ print(
1258
+ "Warning: Reconstructed file is smaller than original, padding with zeros"
1259
+ )
1260
+ file_data += b"\0" * (original_file["size"] - len(file_data))
1498
1261
 
1499
1262
  # Step 6: Decrypt if necessary
1500
1263
  if is_encrypted:
@@ -1505,7 +1268,7 @@ class IPFSClient:
1505
1268
  )
1506
1269
 
1507
1270
  if verbose:
1508
- print(f"Decrypting file data...")
1271
+ print("Decrypting file data...")
1509
1272
 
1510
1273
  file_data = self.decrypt_data(file_data)
1511
1274
 
@@ -1519,11 +1282,11 @@ class IPFSClient:
1519
1282
  expected_hash = original_file["hash"]
1520
1283
 
1521
1284
  if actual_hash != expected_hash:
1522
- print(f"Warning: File hash mismatch!")
1285
+ print("Warning: File hash mismatch!")
1523
1286
  print(f" Expected: {expected_hash}")
1524
1287
  print(f" Actual: {actual_hash}")
1525
- elif verbose:
1526
- print(f"Hash verification successful!")
1288
+ else:
1289
+ print("Hash verification successful!")
1527
1290
 
1528
1291
  total_time = time.time() - start_time
1529
1292
  if verbose:
@@ -1537,7 +1300,7 @@ class IPFSClient:
1537
1300
  if temp_dir_obj is not None:
1538
1301
  temp_dir_obj.cleanup()
1539
1302
 
1540
- def store_erasure_coded_file(
1303
+ async def store_erasure_coded_file(
1541
1304
  self,
1542
1305
  file_path: str,
1543
1306
  k: int = 3,
@@ -1573,7 +1336,7 @@ class IPFSClient:
1573
1336
  RuntimeError: If processing fails
1574
1337
  """
1575
1338
  # Step 1: Erasure code the file and upload chunks
1576
- metadata = self.erasure_code_file(
1339
+ metadata = await self.erasure_code_file(
1577
1340
  file_path=file_path,
1578
1341
  k=k,
1579
1342
  m=m,
@@ -1583,14 +1346,9 @@ class IPFSClient:
1583
1346
  verbose=verbose,
1584
1347
  )
1585
1348
 
1586
- # Step 2: Import substrate client if we need it
1349
+ # Step 2: Create substrate client if we need it
1587
1350
  if substrate_client is None:
1588
- from hippius_sdk.substrate import FileInput, SubstrateClient
1589
-
1590
1351
  substrate_client = SubstrateClient()
1591
- else:
1592
- # Just get the FileInput class
1593
- from hippius_sdk.substrate import FileInput
1594
1352
 
1595
1353
  original_file = metadata["original_file"]
1596
1354
  metadata_cid = metadata["metadata_cid"]
@@ -1612,7 +1370,7 @@ class IPFSClient:
1612
1370
 
1613
1371
  # Step 4: Add all chunks to the storage request
1614
1372
  if verbose:
1615
- print(f"Adding all chunks to storage request...")
1373
+ print("Adding all chunks to storage request...")
1616
1374
 
1617
1375
  for i, chunk in enumerate(metadata["chunks"]):
1618
1376
  # Extract the CID string from the chunk's cid dictionary
@@ -1637,12 +1395,12 @@ class IPFSClient:
1637
1395
  f"Submitting storage request for 1 metadata file and {len(metadata['chunks'])} chunks..."
1638
1396
  )
1639
1397
 
1640
- tx_hash = substrate_client.storage_request(
1398
+ tx_hash = await substrate_client.storage_request(
1641
1399
  files=all_file_inputs, miner_ids=miner_ids
1642
1400
  )
1643
1401
 
1644
1402
  if verbose:
1645
- print(f"Successfully stored all files in marketplace!")
1403
+ print("Successfully stored all files in marketplace!")
1646
1404
  print(f"Transaction hash: {tx_hash}")
1647
1405
  print(f"Metadata CID: {metadata_cid}")
1648
1406
  print(