hippius 0.1.0__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hippius-0.1.0.dist-info → hippius-0.1.6.dist-info}/METADATA +113 -1
- hippius-0.1.6.dist-info/RECORD +9 -0
- hippius_sdk/cli.py +252 -11
- hippius_sdk/client.py +133 -9
- hippius_sdk/ipfs.py +588 -2
- hippius-0.1.0.dist-info/RECORD +0 -9
- {hippius-0.1.0.dist-info → hippius-0.1.6.dist-info}/WHEEL +0 -0
- {hippius-0.1.0.dist-info → hippius-0.1.6.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: hippius
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.6
|
4
4
|
Summary: Python SDK and CLI for Hippius blockchain storage
|
5
5
|
Home-page: https://github.com/thenervelab/hippius-sdk
|
6
6
|
Author: Dubs
|
@@ -23,6 +23,7 @@ Requires-Dist: pyperclip (>=1.8.2,<2.0.0) ; extra == "clipboard"
|
|
23
23
|
Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
|
24
24
|
Requires-Dist: requests (>=2.28.1,<3.0.0)
|
25
25
|
Requires-Dist: substrate-interface (>=1.4.2,<2.0.0)
|
26
|
+
Requires-Dist: zfec (>=1.5.3,<2.0.0)
|
26
27
|
Project-URL: Documentation, https://github.com/thenervelab/hippius-sdk/docs
|
27
28
|
Project-URL: Repository, https://github.com/thenervelab/hippius-sdk
|
28
29
|
Description-Content-Type: text/markdown
|
@@ -172,6 +173,104 @@ raw_result = client.download_file(encrypted_result['cid'], "still_encrypted.txt"
|
|
172
173
|
content = client.cat(encrypted_result['cid'], decrypt=True)
|
173
174
|
```
|
174
175
|
|
176
|
+
## Erasure Coding
|
177
|
+
|
178
|
+
Hippius SDK supports Reed-Solomon erasure coding for reliable and resilient file storage. This allows files to be split into chunks with added redundancy, so that the original file can be reconstructed even if some chunks are lost.
|
179
|
+
|
180
|
+
### Erasure Coding Concepts
|
181
|
+
|
182
|
+
- **k**: The number of data chunks needed to reconstruct the original file
|
183
|
+
- **m**: The total number of chunks created (m > k)
|
184
|
+
- The file can be reconstructed from any k chunks out of m total chunks
|
185
|
+
- Higher redundancy (m-k) provides better protection against chunk loss
|
186
|
+
|
187
|
+
### Using Erasure Coding
|
188
|
+
|
189
|
+
```python
|
190
|
+
from hippius_sdk import HippiusClient
|
191
|
+
|
192
|
+
client = HippiusClient()
|
193
|
+
|
194
|
+
# Erasure code a file with default parameters (k=3, m=5)
|
195
|
+
result = client.erasure_code_file("large_file.mp4")
|
196
|
+
metadata_cid = result["metadata_cid"]
|
197
|
+
|
198
|
+
# Use custom parameters for more redundancy
|
199
|
+
result = client.erasure_code_file(
|
200
|
+
file_path="important_data.zip",
|
201
|
+
k=4, # Need 4 chunks to reconstruct
|
202
|
+
m=10, # Create 10 chunks total (6 redundant)
|
203
|
+
chunk_size=2097152, # 2MB chunks
|
204
|
+
encrypt=True # Encrypt before splitting
|
205
|
+
)
|
206
|
+
|
207
|
+
# Store erasure-coded file in Hippius marketplace
|
208
|
+
result = client.store_erasure_coded_file(
|
209
|
+
file_path="critical_backup.tar",
|
210
|
+
k=3,
|
211
|
+
m=5,
|
212
|
+
encrypt=True,
|
213
|
+
miner_ids=["miner1", "miner2", "miner3"]
|
214
|
+
)
|
215
|
+
|
216
|
+
# Reconstruct a file from its metadata
|
217
|
+
reconstructed_path = client.reconstruct_from_erasure_code(
|
218
|
+
metadata_cid=metadata_cid,
|
219
|
+
output_file="reconstructed_file.mp4"
|
220
|
+
)
|
221
|
+
```
|
222
|
+
|
223
|
+
### When to Use Erasure Coding
|
224
|
+
|
225
|
+
Erasure coding is particularly useful for:
|
226
|
+
|
227
|
+
- Large files where reliability is critical
|
228
|
+
- Long-term archival storage
|
229
|
+
- Data that must survive partial network failures
|
230
|
+
- Situations where higher redundancy is needed without full replication
|
231
|
+
|
232
|
+
### Advanced Features
|
233
|
+
|
234
|
+
#### Small File Handling
|
235
|
+
|
236
|
+
The SDK automatically adjusts parameters for small files:
|
237
|
+
|
238
|
+
- If a file is too small to be split into `k` chunks, the SDK will adjust the chunk size
|
239
|
+
- For very small files, the content is split into exactly `k` sub-blocks
|
240
|
+
- Parameters are always optimized to provide the requested level of redundancy
|
241
|
+
|
242
|
+
#### Robust Storage in Marketplace
|
243
|
+
|
244
|
+
When using `store_erasure_coded_file`, the SDK now:
|
245
|
+
|
246
|
+
- Stores both the metadata file AND all encoded chunks in the marketplace
|
247
|
+
- Ensures miners can access all necessary data for redundancy and retrieval
|
248
|
+
- Reports total number of files stored for verification
|
249
|
+
|
250
|
+
#### CLI Commands
|
251
|
+
|
252
|
+
The CLI provides powerful commands for erasure coding:
|
253
|
+
|
254
|
+
```bash
|
255
|
+
# Basic usage with automatic parameter adjustment
|
256
|
+
hippius erasure-code myfile.txt
|
257
|
+
|
258
|
+
# Specify custom parameters
|
259
|
+
hippius erasure-code large_video.mp4 --k 4 --m 8 --chunk-size 4194304
|
260
|
+
|
261
|
+
# For smaller files, using smaller parameters
|
262
|
+
hippius erasure-code small_doc.txt --k 2 --m 5 --chunk-size 4096
|
263
|
+
|
264
|
+
# Reconstruct a file from its metadata CID
|
265
|
+
hippius reconstruct QmMetadataCID reconstructed_file.mp4
|
266
|
+
```
|
267
|
+
|
268
|
+
The CLI provides detailed output during the process, including:
|
269
|
+
- Automatic parameter adjustments for optimal encoding
|
270
|
+
- Progress of chunk creation and upload
|
271
|
+
- Storage confirmation in the marketplace
|
272
|
+
- Instructions for reconstruction
|
273
|
+
|
175
274
|
## Command Line Interface
|
176
275
|
|
177
276
|
The Hippius SDK includes a powerful command-line interface (CLI) that provides access to all major features of the SDK directly from your terminal.
|
@@ -240,6 +339,19 @@ hippius store my_file.txt --encrypt
|
|
240
339
|
hippius download QmCID123 output_file.txt --decrypt
|
241
340
|
```
|
242
341
|
|
342
|
+
### Erasure Coding
|
343
|
+
|
344
|
+
```bash
|
345
|
+
# Erasure code a file with default parameters (k=3, m=5)
|
346
|
+
hippius erasure-code large_file.mp4
|
347
|
+
|
348
|
+
# Erasure code with custom parameters
|
349
|
+
hippius erasure-code important_data.zip --k 4 --m 10 --chunk-size 2097152 --encrypt
|
350
|
+
|
351
|
+
# Reconstruct a file from its metadata
|
352
|
+
hippius reconstruct QmMetadataCID reconstructed_file.mp4
|
353
|
+
```
|
354
|
+
|
243
355
|
### Using Environment Variables
|
244
356
|
|
245
357
|
The CLI automatically reads from your `.env` file for common settings:
|
@@ -0,0 +1,9 @@
|
|
1
|
+
hippius_sdk/__init__.py,sha256=SwOREu9EJZ9ZRM-rSPX0o1hhsOUIADuP3CxoF4Mp_qI,288
|
2
|
+
hippius_sdk/cli.py,sha256=WfjU9nuUBXN6Tu25PnOpLHftClP_umh6Zl9t4BOzAfo,30576
|
3
|
+
hippius_sdk/client.py,sha256=bHsoadw2WMMZDU7D0r02nHeU82PAa4cvmblieDzBw54,13305
|
4
|
+
hippius_sdk/ipfs.py,sha256=C9oMTBefCIfWFUsUBxhUkivz5rIUkhHKJtqdVIkMAbc,61475
|
5
|
+
hippius_sdk/substrate.py,sha256=mfDxbKn9HdtcK1xEnj_BnnreRw8ITZswtDoBhtliidM,27278
|
6
|
+
hippius-0.1.6.dist-info/METADATA,sha256=295Uv9mZq1G0pypT4PibEmTDVNRr7gM_ScFNVPZTfdo,16580
|
7
|
+
hippius-0.1.6.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
8
|
+
hippius-0.1.6.dist-info/entry_points.txt,sha256=b1lo60zRXmv1ud-c5BC-cJcAfGE5FD4qM_nia6XeQtM,98
|
9
|
+
hippius-0.1.6.dist-info/RECORD,,
|
hippius_sdk/cli.py
CHANGED
@@ -435,25 +435,202 @@ def handle_files(client, account_address, debug=False, show_all_miners=False):
|
|
435
435
|
return 0
|
436
436
|
|
437
437
|
|
438
|
+
def handle_erasure_code(
|
439
|
+
client, file_path, k, m, chunk_size, miner_ids, encrypt=None, verbose=True
|
440
|
+
):
|
441
|
+
"""Handle the erasure-code command"""
|
442
|
+
if not os.path.exists(file_path):
|
443
|
+
print(f"Error: File {file_path} not found")
|
444
|
+
return 1
|
445
|
+
|
446
|
+
# Check if zfec is installed
|
447
|
+
try:
|
448
|
+
import zfec
|
449
|
+
except ImportError:
|
450
|
+
print(
|
451
|
+
"Error: zfec is required for erasure coding. Install it with: pip install zfec"
|
452
|
+
)
|
453
|
+
print("Then update your environment: poetry add zfec")
|
454
|
+
return 1
|
455
|
+
|
456
|
+
# Parse miner IDs if provided
|
457
|
+
miner_id_list = None
|
458
|
+
if miner_ids:
|
459
|
+
miner_id_list = [m.strip() for m in miner_ids.split(",") if m.strip()]
|
460
|
+
if verbose:
|
461
|
+
print(f"Targeting {len(miner_id_list)} miners: {', '.join(miner_id_list)}")
|
462
|
+
|
463
|
+
# Get the file size and adjust parameters if needed
|
464
|
+
file_size = os.path.getsize(file_path)
|
465
|
+
file_size_mb = file_size / (1024 * 1024)
|
466
|
+
|
467
|
+
print(f"Processing {file_path} ({file_size_mb:.2f} MB) with erasure coding...")
|
468
|
+
|
469
|
+
# Check if the file is too small for the current chunk size and k value
|
470
|
+
original_k = k
|
471
|
+
original_m = m
|
472
|
+
original_chunk_size = chunk_size
|
473
|
+
|
474
|
+
# Calculate how many chunks we would get with current settings
|
475
|
+
potential_chunks = max(1, file_size // chunk_size)
|
476
|
+
|
477
|
+
# If we can't get at least k chunks, adjust the chunk size
|
478
|
+
if potential_chunks < k:
|
479
|
+
# Calculate a new chunk size that would give us exactly k chunks
|
480
|
+
new_chunk_size = max(1024, file_size // k) # Ensure at least 1KB chunks
|
481
|
+
|
482
|
+
print(f"Warning: File is too small for the requested parameters.")
|
483
|
+
print(f"Original parameters: k={k}, m={m}, chunk size={chunk_size/1024/1024:.2f} MB")
|
484
|
+
print(f"Would create only {potential_chunks} chunks, which is less than k={k}")
|
485
|
+
print(f"Automatically adjusting chunk size to {new_chunk_size/1024/1024:.6f} MB to create at least {k} chunks")
|
486
|
+
|
487
|
+
chunk_size = new_chunk_size
|
488
|
+
|
489
|
+
print(f"Final parameters: k={k}, m={m} (need {k} of {m} chunks to reconstruct)")
|
490
|
+
print(f"Chunk size: {chunk_size/1024/1024:.6f} MB")
|
491
|
+
|
492
|
+
if encrypt:
|
493
|
+
print("Encryption: Enabled")
|
494
|
+
|
495
|
+
start_time = time.time()
|
496
|
+
|
497
|
+
try:
|
498
|
+
# Use the store_erasure_coded_file method directly from HippiusClient
|
499
|
+
result = client.store_erasure_coded_file(
|
500
|
+
file_path=file_path,
|
501
|
+
k=k,
|
502
|
+
m=m,
|
503
|
+
chunk_size=chunk_size,
|
504
|
+
encrypt=encrypt,
|
505
|
+
miner_ids=miner_id_list,
|
506
|
+
max_retries=3,
|
507
|
+
verbose=verbose,
|
508
|
+
)
|
509
|
+
|
510
|
+
elapsed_time = time.time() - start_time
|
511
|
+
|
512
|
+
print(f"\nErasure coding and storage completed in {elapsed_time:.2f} seconds!")
|
513
|
+
|
514
|
+
# Display metadata
|
515
|
+
metadata = result.get("metadata", {})
|
516
|
+
metadata_cid = result.get("metadata_cid", "unknown")
|
517
|
+
total_files_stored = result.get("total_files_stored", 0)
|
518
|
+
|
519
|
+
original_file = metadata.get("original_file", {})
|
520
|
+
erasure_coding = metadata.get("erasure_coding", {})
|
521
|
+
|
522
|
+
print("\nErasure Coding Summary:")
|
523
|
+
print(
|
524
|
+
f" Original file: {original_file.get('name')} ({original_file.get('size', 0)/1024/1024:.2f} MB)"
|
525
|
+
)
|
526
|
+
print(f" File ID: {erasure_coding.get('file_id')}")
|
527
|
+
print(f" Parameters: k={erasure_coding.get('k')}, m={erasure_coding.get('m')}")
|
528
|
+
print(f" Total chunks: {len(metadata.get('chunks', []))}")
|
529
|
+
print(f" Total files stored in marketplace: {total_files_stored}")
|
530
|
+
print(f" Metadata CID: {metadata_cid}")
|
531
|
+
|
532
|
+
# If we stored in the marketplace
|
533
|
+
if "transaction_hash" in result:
|
534
|
+
print(
|
535
|
+
f"\nStored in marketplace. Transaction hash: {result['transaction_hash']}"
|
536
|
+
)
|
537
|
+
|
538
|
+
# Instructions for reconstruction
|
539
|
+
print("\nTo reconstruct this file, you will need:")
|
540
|
+
print(f" 1. The metadata CID: {metadata_cid}")
|
541
|
+
print(" 2. Access to at least k chunks for each original chunk")
|
542
|
+
print("\nReconstruction command:")
|
543
|
+
print(
|
544
|
+
f" hippius reconstruct {metadata_cid} reconstructed_{original_file.get('name')}"
|
545
|
+
)
|
546
|
+
|
547
|
+
return 0
|
548
|
+
|
549
|
+
except Exception as e:
|
550
|
+
print(f"Error during erasure coding: {e}")
|
551
|
+
|
552
|
+
# Provide helpful advice based on the error
|
553
|
+
if "Wrong length" in str(e) and "input blocks" in str(e):
|
554
|
+
print("\nThis error typically occurs with very small files.")
|
555
|
+
print("Suggestions:")
|
556
|
+
print(" 1. Try using a smaller chunk size: --chunk-size 4096")
|
557
|
+
print(" 2. Try using a smaller k value: --k 2")
|
558
|
+
print(" 3. For very small files, consider using regular storage instead of erasure coding.")
|
559
|
+
|
560
|
+
return 1
|
561
|
+
|
562
|
+
|
563
|
+
def handle_reconstruct(client, metadata_cid, output_file, verbose=True):
|
564
|
+
"""Handle the reconstruct command for erasure-coded files"""
|
565
|
+
# Check if zfec is installed
|
566
|
+
try:
|
567
|
+
import zfec
|
568
|
+
except ImportError:
|
569
|
+
print(
|
570
|
+
"Error: zfec is required for erasure coding. Install it with: pip install zfec"
|
571
|
+
)
|
572
|
+
print("Then update your environment: poetry add zfec")
|
573
|
+
return 1
|
574
|
+
|
575
|
+
print(f"Reconstructing file from metadata CID: {metadata_cid}")
|
576
|
+
print(f"Output file: {output_file}")
|
577
|
+
|
578
|
+
start_time = time.time()
|
579
|
+
|
580
|
+
try:
|
581
|
+
# Use the reconstruct_from_erasure_code method
|
582
|
+
result = client.reconstruct_from_erasure_code(
|
583
|
+
metadata_cid=metadata_cid, output_file=output_file, verbose=verbose
|
584
|
+
)
|
585
|
+
|
586
|
+
elapsed_time = time.time() - start_time
|
587
|
+
print(f"\nFile reconstruction completed in {elapsed_time:.2f} seconds!")
|
588
|
+
print(f"Reconstructed file saved to: {result}")
|
589
|
+
|
590
|
+
return 0
|
591
|
+
|
592
|
+
except Exception as e:
|
593
|
+
print(f"Error during file reconstruction: {e}")
|
594
|
+
return 1
|
595
|
+
|
596
|
+
|
438
597
|
def main():
|
439
|
-
"""Main entry point for
|
598
|
+
"""Main CLI entry point for hippius command."""
|
599
|
+
# Set up the argument parser
|
440
600
|
parser = argparse.ArgumentParser(
|
441
601
|
description="Hippius SDK Command Line Interface",
|
442
602
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
443
603
|
epilog="""
|
444
|
-
|
445
|
-
|
446
|
-
hippius
|
447
|
-
|
448
|
-
|
449
|
-
hippius store-dir ./
|
604
|
+
examples:
|
605
|
+
# Store a file
|
606
|
+
hippius store example.txt
|
607
|
+
|
608
|
+
# Store a directory
|
609
|
+
hippius store-dir ./my_directory
|
610
|
+
|
611
|
+
# Download a file
|
612
|
+
hippius download QmHash output.txt
|
613
|
+
|
614
|
+
# Check if a CID exists
|
615
|
+
hippius exists QmHash
|
616
|
+
|
617
|
+
# View the content of a CID
|
618
|
+
hippius cat QmHash
|
619
|
+
|
620
|
+
# View your available credits
|
450
621
|
hippius credits
|
451
|
-
|
622
|
+
|
623
|
+
# View your stored files
|
452
624
|
hippius files
|
453
|
-
|
625
|
+
|
626
|
+
# View all miners for stored files
|
454
627
|
hippius files --all-miners
|
455
|
-
|
456
|
-
|
628
|
+
|
629
|
+
# Erasure code a file (Reed-Solomon)
|
630
|
+
hippius erasure-code large_file.mp4 --k 3 --m 5
|
631
|
+
|
632
|
+
# Reconstruct an erasure-coded file
|
633
|
+
hippius reconstruct QmMetadataHash reconstructed_file.mp4
|
457
634
|
""",
|
458
635
|
)
|
459
636
|
|
@@ -588,6 +765,53 @@ Examples:
|
|
588
765
|
"--copy", action="store_true", help="Copy the generated key to the clipboard"
|
589
766
|
)
|
590
767
|
|
768
|
+
# Erasure code command
|
769
|
+
erasure_code_parser = subparsers.add_parser(
|
770
|
+
"erasure-code", help="Erasure code a file"
|
771
|
+
)
|
772
|
+
erasure_code_parser.add_argument("file_path", help="Path to file to erasure code")
|
773
|
+
erasure_code_parser.add_argument(
|
774
|
+
"--k",
|
775
|
+
type=int,
|
776
|
+
default=3,
|
777
|
+
help="Number of data chunks needed to reconstruct (default: 3)",
|
778
|
+
)
|
779
|
+
erasure_code_parser.add_argument(
|
780
|
+
"--m", type=int, default=5, help="Total number of chunks to create (default: 5)"
|
781
|
+
)
|
782
|
+
erasure_code_parser.add_argument(
|
783
|
+
"--chunk-size",
|
784
|
+
type=int,
|
785
|
+
default=1048576,
|
786
|
+
help="Chunk size in bytes (default: 1MB)",
|
787
|
+
)
|
788
|
+
erasure_code_parser.add_argument(
|
789
|
+
"--miner-ids", help="Comma-separated list of miner IDs"
|
790
|
+
)
|
791
|
+
erasure_code_parser.add_argument(
|
792
|
+
"--encrypt", action="store_true", help="Encrypt the file"
|
793
|
+
)
|
794
|
+
erasure_code_parser.add_argument(
|
795
|
+
"--no-encrypt", action="store_true", help="Do not encrypt the file"
|
796
|
+
)
|
797
|
+
erasure_code_parser.add_argument(
|
798
|
+
"--verbose", action="store_true", help="Enable verbose output", default=True
|
799
|
+
)
|
800
|
+
|
801
|
+
# Reconstruct command
|
802
|
+
reconstruct_parser = subparsers.add_parser(
|
803
|
+
"reconstruct", help="Reconstruct an erasure-coded file"
|
804
|
+
)
|
805
|
+
reconstruct_parser.add_argument(
|
806
|
+
"metadata_cid", help="Metadata CID of the erasure-coded file"
|
807
|
+
)
|
808
|
+
reconstruct_parser.add_argument(
|
809
|
+
"output_file", help="Path to save reconstructed file"
|
810
|
+
)
|
811
|
+
reconstruct_parser.add_argument(
|
812
|
+
"--verbose", action="store_true", help="Enable verbose output", default=True
|
813
|
+
)
|
814
|
+
|
591
815
|
args = parser.parse_args()
|
592
816
|
|
593
817
|
if not args.command:
|
@@ -647,6 +871,23 @@ Examples:
|
|
647
871
|
else False,
|
648
872
|
)
|
649
873
|
|
874
|
+
elif args.command == "erasure-code":
|
875
|
+
return handle_erasure_code(
|
876
|
+
client,
|
877
|
+
args.file_path,
|
878
|
+
args.k,
|
879
|
+
args.m,
|
880
|
+
args.chunk_size,
|
881
|
+
miner_ids,
|
882
|
+
encrypt=args.encrypt,
|
883
|
+
verbose=args.verbose,
|
884
|
+
)
|
885
|
+
|
886
|
+
elif args.command == "reconstruct":
|
887
|
+
return handle_reconstruct(
|
888
|
+
client, args.metadata_cid, args.output_file, verbose=args.verbose
|
889
|
+
)
|
890
|
+
|
650
891
|
except Exception as e:
|
651
892
|
print(f"Error: {e}")
|
652
893
|
return 1
|
hippius_sdk/client.py
CHANGED
@@ -35,7 +35,7 @@ class HippiusClient:
|
|
35
35
|
encrypt_by_default: Whether to encrypt files by default (from .env if None)
|
36
36
|
encryption_key: Encryption key for NaCl secretbox (from .env if None)
|
37
37
|
"""
|
38
|
-
self.
|
38
|
+
self.ipfs_client = IPFSClient(
|
39
39
|
gateway=ipfs_gateway,
|
40
40
|
api_url=ipfs_api_url,
|
41
41
|
encrypt_by_default=encrypt_by_default,
|
@@ -75,7 +75,7 @@ class HippiusClient:
|
|
75
75
|
ValueError: If encryption is requested but not available
|
76
76
|
"""
|
77
77
|
# Use the enhanced IPFSClient method directly with encryption parameter
|
78
|
-
return self.
|
78
|
+
return self.ipfs_client.upload_file(file_path, encrypt=encrypt)
|
79
79
|
|
80
80
|
def upload_directory(
|
81
81
|
self, dir_path: str, encrypt: Optional[bool] = None
|
@@ -102,7 +102,7 @@ class HippiusClient:
|
|
102
102
|
ValueError: If encryption is requested but not available
|
103
103
|
"""
|
104
104
|
# Use the enhanced IPFSClient method directly with encryption parameter
|
105
|
-
return self.
|
105
|
+
return self.ipfs_client.upload_directory(dir_path, encrypt=encrypt)
|
106
106
|
|
107
107
|
def download_file(
|
108
108
|
self, cid: str, output_path: str, decrypt: Optional[bool] = None
|
@@ -128,7 +128,7 @@ class HippiusClient:
|
|
128
128
|
requests.RequestException: If the download fails
|
129
129
|
ValueError: If decryption is requested but fails
|
130
130
|
"""
|
131
|
-
return self.
|
131
|
+
return self.ipfs_client.download_file(cid, output_path, decrypt=decrypt)
|
132
132
|
|
133
133
|
def cat(
|
134
134
|
self,
|
@@ -155,7 +155,9 @@ class HippiusClient:
|
|
155
155
|
- text_preview/hex_preview: Preview of the content
|
156
156
|
- decrypted: Whether the file was decrypted
|
157
157
|
"""
|
158
|
-
return self.
|
158
|
+
return self.ipfs_client.cat(
|
159
|
+
cid, max_display_bytes, format_output, decrypt=decrypt
|
160
|
+
)
|
159
161
|
|
160
162
|
def exists(self, cid: str) -> Dict[str, Any]:
|
161
163
|
"""
|
@@ -171,7 +173,7 @@ class HippiusClient:
|
|
171
173
|
- formatted_cid: Formatted version of the CID
|
172
174
|
- gateway_url: URL to access the content if it exists
|
173
175
|
"""
|
174
|
-
return self.
|
176
|
+
return self.ipfs_client.exists(cid)
|
175
177
|
|
176
178
|
def pin(self, cid: str) -> Dict[str, Any]:
|
177
179
|
"""
|
@@ -187,7 +189,7 @@ class HippiusClient:
|
|
187
189
|
- formatted_cid: Formatted version of the CID
|
188
190
|
- message: Status message
|
189
191
|
"""
|
190
|
-
return self.
|
192
|
+
return self.ipfs_client.pin(cid)
|
191
193
|
|
192
194
|
def format_cid(self, cid: str) -> str:
|
193
195
|
"""
|
@@ -201,7 +203,7 @@ class HippiusClient:
|
|
201
203
|
Returns:
|
202
204
|
str: Formatted CID string
|
203
205
|
"""
|
204
|
-
return self.
|
206
|
+
return self.ipfs_client.format_cid(cid)
|
205
207
|
|
206
208
|
def format_size(self, size_bytes: int) -> str:
|
207
209
|
"""
|
@@ -215,7 +217,7 @@ class HippiusClient:
|
|
215
217
|
Returns:
|
216
218
|
str: Human-readable size string (e.g., '1.23 MB', '456.78 KB')
|
217
219
|
"""
|
218
|
-
return self.
|
220
|
+
return self.ipfs_client.format_size(size_bytes)
|
219
221
|
|
220
222
|
def generate_encryption_key(self) -> str:
|
221
223
|
"""
|
@@ -244,3 +246,125 @@ class HippiusClient:
|
|
244
246
|
raise ImportError(
|
245
247
|
"PyNaCl is required for encryption. Install it with: pip install pynacl"
|
246
248
|
)
|
249
|
+
|
250
|
+
def erasure_code_file(
|
251
|
+
self,
|
252
|
+
file_path: str,
|
253
|
+
k: int = 3,
|
254
|
+
m: int = 5,
|
255
|
+
chunk_size: int = 1024 * 1024, # 1MB chunks
|
256
|
+
encrypt: Optional[bool] = None,
|
257
|
+
max_retries: int = 3,
|
258
|
+
verbose: bool = True,
|
259
|
+
) -> Dict[str, Any]:
|
260
|
+
"""
|
261
|
+
Split a file using erasure coding, then upload the chunks to IPFS.
|
262
|
+
|
263
|
+
This implements an (m, k) Reed-Solomon code where:
|
264
|
+
- m = total number of chunks
|
265
|
+
- k = minimum chunks needed to reconstruct the file (k <= m)
|
266
|
+
- The file can be reconstructed from any k of the m chunks
|
267
|
+
|
268
|
+
Args:
|
269
|
+
file_path: Path to the file to upload
|
270
|
+
k: Number of data chunks (minimum required to reconstruct)
|
271
|
+
m: Total number of chunks (k + redundancy)
|
272
|
+
chunk_size: Size of each chunk in bytes before encoding
|
273
|
+
encrypt: Whether to encrypt the file before encoding (defaults to self.encrypt_by_default)
|
274
|
+
max_retries: Maximum number of retry attempts for IPFS uploads
|
275
|
+
verbose: Whether to print progress information
|
276
|
+
|
277
|
+
Returns:
|
278
|
+
dict: Metadata including the original file info and chunk information
|
279
|
+
|
280
|
+
Raises:
|
281
|
+
ValueError: If erasure coding is not available or parameters are invalid
|
282
|
+
RuntimeError: If chunk uploads fail
|
283
|
+
"""
|
284
|
+
return self.ipfs_client.erasure_code_file(
|
285
|
+
file_path=file_path,
|
286
|
+
k=k,
|
287
|
+
m=m,
|
288
|
+
chunk_size=chunk_size,
|
289
|
+
encrypt=encrypt,
|
290
|
+
max_retries=max_retries,
|
291
|
+
verbose=verbose,
|
292
|
+
)
|
293
|
+
|
294
|
+
def reconstruct_from_erasure_code(
|
295
|
+
self,
|
296
|
+
metadata_cid: str,
|
297
|
+
output_file: str,
|
298
|
+
temp_dir: str = None,
|
299
|
+
max_retries: int = 3,
|
300
|
+
verbose: bool = True,
|
301
|
+
) -> str:
|
302
|
+
"""
|
303
|
+
Reconstruct a file from erasure-coded chunks using its metadata.
|
304
|
+
|
305
|
+
Args:
|
306
|
+
metadata_cid: IPFS CID of the metadata file
|
307
|
+
output_file: Path where the reconstructed file should be saved
|
308
|
+
temp_dir: Directory to use for temporary files (default: system temp)
|
309
|
+
max_retries: Maximum number of retry attempts for IPFS downloads
|
310
|
+
verbose: Whether to print progress information
|
311
|
+
|
312
|
+
Returns:
|
313
|
+
str: Path to the reconstructed file
|
314
|
+
|
315
|
+
Raises:
|
316
|
+
ValueError: If reconstruction fails
|
317
|
+
RuntimeError: If not enough chunks can be downloaded
|
318
|
+
"""
|
319
|
+
return self.ipfs_client.reconstruct_from_erasure_code(
|
320
|
+
metadata_cid=metadata_cid,
|
321
|
+
output_file=output_file,
|
322
|
+
temp_dir=temp_dir,
|
323
|
+
max_retries=max_retries,
|
324
|
+
verbose=verbose,
|
325
|
+
)
|
326
|
+
|
327
|
+
def store_erasure_coded_file(
|
328
|
+
self,
|
329
|
+
file_path: str,
|
330
|
+
k: int = 3,
|
331
|
+
m: int = 5,
|
332
|
+
chunk_size: int = 1024 * 1024, # 1MB chunks
|
333
|
+
encrypt: Optional[bool] = None,
|
334
|
+
miner_ids: List[str] = None,
|
335
|
+
max_retries: int = 3,
|
336
|
+
verbose: bool = True,
|
337
|
+
) -> Dict[str, Any]:
|
338
|
+
"""
|
339
|
+
Erasure code a file, upload the chunks to IPFS, and store in the Hippius marketplace.
|
340
|
+
|
341
|
+
This is a convenience method that combines erasure_code_file with storage_request.
|
342
|
+
|
343
|
+
Args:
|
344
|
+
file_path: Path to the file to upload
|
345
|
+
k: Number of data chunks (minimum required to reconstruct)
|
346
|
+
m: Total number of chunks (k + redundancy)
|
347
|
+
chunk_size: Size of each chunk in bytes before encoding
|
348
|
+
encrypt: Whether to encrypt the file before encoding
|
349
|
+
miner_ids: List of specific miner IDs to use for storage
|
350
|
+
max_retries: Maximum number of retry attempts
|
351
|
+
verbose: Whether to print progress information
|
352
|
+
|
353
|
+
Returns:
|
354
|
+
dict: Result including metadata CID and transaction hash
|
355
|
+
|
356
|
+
Raises:
|
357
|
+
ValueError: If parameters are invalid
|
358
|
+
RuntimeError: If processing fails
|
359
|
+
"""
|
360
|
+
return self.ipfs_client.store_erasure_coded_file(
|
361
|
+
file_path=file_path,
|
362
|
+
k=k,
|
363
|
+
m=m,
|
364
|
+
chunk_size=chunk_size,
|
365
|
+
encrypt=encrypt,
|
366
|
+
miner_ids=miner_ids,
|
367
|
+
substrate_client=self.substrate_client,
|
368
|
+
max_retries=max_retries,
|
369
|
+
verbose=verbose,
|
370
|
+
)
|
hippius_sdk/ipfs.py
CHANGED
@@ -8,7 +8,9 @@ import requests
|
|
8
8
|
import base64
|
9
9
|
import time
|
10
10
|
import tempfile
|
11
|
-
|
11
|
+
import hashlib
|
12
|
+
import uuid
|
13
|
+
from typing import Dict, Any, Optional, Union, List, Tuple
|
12
14
|
import ipfshttpclient
|
13
15
|
from dotenv import load_dotenv
|
14
16
|
|
@@ -21,6 +23,14 @@ try:
|
|
21
23
|
except ImportError:
|
22
24
|
ENCRYPTION_AVAILABLE = False
|
23
25
|
|
26
|
+
# Import zfec for erasure coding
|
27
|
+
try:
|
28
|
+
import zfec
|
29
|
+
|
30
|
+
ERASURE_CODING_AVAILABLE = True
|
31
|
+
except ImportError:
|
32
|
+
ERASURE_CODING_AVAILABLE = False
|
33
|
+
|
24
34
|
|
25
35
|
class IPFSClient:
|
26
36
|
"""Client for interacting with IPFS."""
|
@@ -288,6 +298,7 @@ class IPFSClient:
|
|
288
298
|
file_path: str,
|
289
299
|
include_formatted_size: bool = True,
|
290
300
|
encrypt: Optional[bool] = None,
|
301
|
+
max_retries: int = 3,
|
291
302
|
) -> Dict[str, Any]:
|
292
303
|
"""
|
293
304
|
Upload a file to IPFS with optional encryption.
|
@@ -296,6 +307,7 @@ class IPFSClient:
|
|
296
307
|
file_path: Path to the file to upload
|
297
308
|
include_formatted_size: Whether to include formatted size in the result (default: True)
|
298
309
|
encrypt: Whether to encrypt the file (overrides default)
|
310
|
+
max_retries: Maximum number of retry attempts (default: 3)
|
299
311
|
|
300
312
|
Returns:
|
301
313
|
Dict[str, Any]: Dictionary containing:
|
@@ -355,7 +367,7 @@ class IPFSClient:
|
|
355
367
|
cid = result["Hash"]
|
356
368
|
elif self.base_url:
|
357
369
|
# Fallback to using HTTP API
|
358
|
-
cid = self._upload_via_http_api(upload_path)
|
370
|
+
cid = self._upload_via_http_api(upload_path, max_retries=max_retries)
|
359
371
|
else:
|
360
372
|
# No connection or API URL available
|
361
373
|
raise ConnectionError(
|
@@ -983,3 +995,577 @@ class IPFSClient:
|
|
983
995
|
"formatted_cid": formatted_cid,
|
984
996
|
"message": message,
|
985
997
|
}
|
998
|
+
|
999
|
+
def erasure_code_file(
|
1000
|
+
self,
|
1001
|
+
file_path: str,
|
1002
|
+
k: int = 3,
|
1003
|
+
m: int = 5,
|
1004
|
+
chunk_size: int = 1024 * 1024, # 1MB chunks
|
1005
|
+
encrypt: Optional[bool] = None,
|
1006
|
+
max_retries: int = 3,
|
1007
|
+
verbose: bool = True,
|
1008
|
+
) -> Dict[str, Any]:
|
1009
|
+
"""
|
1010
|
+
Split a file using erasure coding, then upload the chunks to IPFS.
|
1011
|
+
|
1012
|
+
This implements an (m, k) Reed-Solomon code where:
|
1013
|
+
- m = total number of chunks
|
1014
|
+
- k = minimum chunks needed to reconstruct the file (k <= m)
|
1015
|
+
- The file can be reconstructed from any k of the m chunks
|
1016
|
+
|
1017
|
+
Args:
|
1018
|
+
file_path: Path to the file to upload
|
1019
|
+
k: Number of data chunks (minimum required to reconstruct)
|
1020
|
+
m: Total number of chunks (k + redundancy)
|
1021
|
+
chunk_size: Size of each chunk in bytes before encoding
|
1022
|
+
encrypt: Whether to encrypt the file before encoding (defaults to self.encrypt_by_default)
|
1023
|
+
max_retries: Maximum number of retry attempts for IPFS uploads
|
1024
|
+
verbose: Whether to print progress information
|
1025
|
+
|
1026
|
+
Returns:
|
1027
|
+
dict: Metadata including the original file info and chunk information
|
1028
|
+
|
1029
|
+
Raises:
|
1030
|
+
ValueError: If erasure coding is not available or parameters are invalid
|
1031
|
+
RuntimeError: If chunk uploads fail
|
1032
|
+
"""
|
1033
|
+
if not ERASURE_CODING_AVAILABLE:
|
1034
|
+
raise ValueError(
|
1035
|
+
"Erasure coding is not available. Install zfec: pip install zfec"
|
1036
|
+
)
|
1037
|
+
|
1038
|
+
if k >= m:
|
1039
|
+
raise ValueError(
|
1040
|
+
f"Invalid erasure coding parameters: k ({k}) must be less than m ({m})"
|
1041
|
+
)
|
1042
|
+
|
1043
|
+
# Get original file info
|
1044
|
+
file_name = os.path.basename(file_path)
|
1045
|
+
file_size = os.path.getsize(file_path)
|
1046
|
+
file_extension = os.path.splitext(file_name)[1]
|
1047
|
+
|
1048
|
+
# Determine if encryption should be used
|
1049
|
+
should_encrypt = self.encrypt_by_default if encrypt is None else encrypt
|
1050
|
+
|
1051
|
+
if should_encrypt and not self.encryption_available:
|
1052
|
+
raise ValueError(
|
1053
|
+
"Encryption requested but not available. Install PyNaCl and configure an encryption key."
|
1054
|
+
)
|
1055
|
+
|
1056
|
+
# Generate a unique ID for this file
|
1057
|
+
file_id = str(uuid.uuid4())
|
1058
|
+
|
1059
|
+
if verbose:
|
1060
|
+
print(f"Processing file: {file_name} ({file_size/1024/1024:.2f} MB)")
|
1061
|
+
print(
|
1062
|
+
f"Erasure coding parameters: k={k}, m={m} (need {k}/{m} chunks to reconstruct)"
|
1063
|
+
)
|
1064
|
+
if should_encrypt:
|
1065
|
+
print("Encryption: Enabled")
|
1066
|
+
|
1067
|
+
# Step 1: Read and potentially encrypt the file
|
1068
|
+
with open(file_path, "rb") as f:
|
1069
|
+
file_data = f.read()
|
1070
|
+
|
1071
|
+
# Calculate original file hash
|
1072
|
+
original_file_hash = hashlib.sha256(file_data).hexdigest()
|
1073
|
+
|
1074
|
+
# Encrypt if requested
|
1075
|
+
if should_encrypt:
|
1076
|
+
if verbose:
|
1077
|
+
print("Encrypting file data...")
|
1078
|
+
file_data = self.encrypt_data(file_data)
|
1079
|
+
|
1080
|
+
# Step 2: Split the file into chunks for erasure coding
|
1081
|
+
chunks = []
|
1082
|
+
chunk_positions = []
|
1083
|
+
for i in range(0, len(file_data), chunk_size):
|
1084
|
+
chunk = file_data[i : i + chunk_size]
|
1085
|
+
chunks.append(chunk)
|
1086
|
+
chunk_positions.append(i)
|
1087
|
+
|
1088
|
+
# Pad the last chunk if necessary
|
1089
|
+
if chunks and len(chunks[-1]) < chunk_size:
|
1090
|
+
pad_size = chunk_size - len(chunks[-1])
|
1091
|
+
chunks[-1] = chunks[-1] + b"\0" * pad_size
|
1092
|
+
|
1093
|
+
# If we don't have enough chunks for the requested parameters, adjust
|
1094
|
+
if len(chunks) < k:
|
1095
|
+
if verbose:
|
1096
|
+
print(
|
1097
|
+
f"Warning: File has fewer chunks ({len(chunks)}) than k={k}. Adjusting parameters."
|
1098
|
+
)
|
1099
|
+
|
1100
|
+
# If we have a very small file, we'll just use a single chunk
|
1101
|
+
# but will still split it into k sub-blocks during encoding
|
1102
|
+
if len(chunks) == 1:
|
1103
|
+
if verbose:
|
1104
|
+
print(f"Small file (single chunk): will split into {k} sub-blocks for encoding")
|
1105
|
+
else:
|
1106
|
+
# If we have multiple chunks but fewer than k, adjust k to match
|
1107
|
+
old_k = k
|
1108
|
+
k = max(1, len(chunks))
|
1109
|
+
if verbose:
|
1110
|
+
print(f"Adjusting k from {old_k} to {k} to match available chunks")
|
1111
|
+
|
1112
|
+
# Ensure m is greater than k for redundancy
|
1113
|
+
if m <= k:
|
1114
|
+
old_m = m
|
1115
|
+
m = k + 2 # Ensure we have at least 2 redundant chunks
|
1116
|
+
if verbose:
|
1117
|
+
print(f"Adjusting m from {old_m} to {m} to ensure redundancy")
|
1118
|
+
|
1119
|
+
if verbose:
|
1120
|
+
print(f"New parameters: k={k}, m={m}")
|
1121
|
+
|
1122
|
+
# Ensure we have at least one chunk to process
|
1123
|
+
if not chunks:
|
1124
|
+
raise ValueError("File is empty or too small to process")
|
1125
|
+
|
1126
|
+
# For k=1 case, ensure we have proper sized input for zfec
|
1127
|
+
if k == 1 and len(chunks) == 1:
|
1128
|
+
# zfec expects the input to be exactly chunk_size for k=1
|
1129
|
+
# So we need to pad if shorter or truncate if longer
|
1130
|
+
if len(chunks[0]) != chunk_size:
|
1131
|
+
chunks[0] = chunks[0].ljust(chunk_size, b'\0')[:chunk_size]
|
1132
|
+
|
1133
|
+
# Create metadata
|
1134
|
+
metadata = {
|
1135
|
+
"original_file": {
|
1136
|
+
"name": file_name,
|
1137
|
+
"size": file_size,
|
1138
|
+
"hash": original_file_hash,
|
1139
|
+
"extension": file_extension,
|
1140
|
+
},
|
1141
|
+
"erasure_coding": {
|
1142
|
+
"k": k,
|
1143
|
+
"m": m,
|
1144
|
+
"chunk_size": chunk_size,
|
1145
|
+
"encrypted": should_encrypt,
|
1146
|
+
"file_id": file_id,
|
1147
|
+
},
|
1148
|
+
"chunks": [],
|
1149
|
+
}
|
1150
|
+
|
1151
|
+
# Step 3: Apply erasure coding to each chunk
|
1152
|
+
if verbose:
|
1153
|
+
print(f"Applying erasure coding to {len(chunks)} chunks...")
|
1154
|
+
|
1155
|
+
all_encoded_chunks = []
|
1156
|
+
for i, chunk in enumerate(chunks):
|
1157
|
+
try:
|
1158
|
+
# For zfec encoder.encode(), we must provide exactly k blocks
|
1159
|
+
|
1160
|
+
# Calculate how many bytes each sub-block should have
|
1161
|
+
sub_block_size = (len(chunk) + k - 1) // k # ceiling division for even distribution
|
1162
|
+
|
1163
|
+
# Split the chunk into exactly k sub-blocks of equal size (padding as needed)
|
1164
|
+
sub_blocks = []
|
1165
|
+
for j in range(k):
|
1166
|
+
start = j * sub_block_size
|
1167
|
+
end = min(start + sub_block_size, len(chunk))
|
1168
|
+
sub_block = chunk[start:end]
|
1169
|
+
|
1170
|
+
# Pad if needed to make all sub-blocks the same size
|
1171
|
+
if len(sub_block) < sub_block_size:
|
1172
|
+
sub_block = sub_block.ljust(sub_block_size, b'\0')
|
1173
|
+
|
1174
|
+
sub_blocks.append(sub_block)
|
1175
|
+
|
1176
|
+
# Verify we have exactly k sub-blocks
|
1177
|
+
if len(sub_blocks) != k:
|
1178
|
+
raise ValueError(f"Expected {k} sub-blocks but got {len(sub_blocks)}")
|
1179
|
+
|
1180
|
+
# Encode the k sub-blocks to create m encoded blocks
|
1181
|
+
encoder = zfec.Encoder(k, m)
|
1182
|
+
encoded_chunks = encoder.encode(sub_blocks)
|
1183
|
+
|
1184
|
+
# Add to our collection
|
1185
|
+
all_encoded_chunks.append(encoded_chunks)
|
1186
|
+
|
1187
|
+
if verbose and (i + 1) % 10 == 0:
|
1188
|
+
print(f" Encoded {i+1}/{len(chunks)} chunks")
|
1189
|
+
except Exception as e:
|
1190
|
+
# If encoding fails, provide more helpful error message
|
1191
|
+
error_msg = f"Error encoding chunk {i}: {str(e)}"
|
1192
|
+
print(f"Error details: chunk size={len(chunk)}, k={k}, m={m}")
|
1193
|
+
print(f"Sub-blocks created: {len(sub_blocks) if 'sub_blocks' in locals() else 'None'}")
|
1194
|
+
raise RuntimeError(f"{error_msg}")
|
1195
|
+
|
1196
|
+
# Step 4: Upload all chunks to IPFS
|
1197
|
+
if verbose:
|
1198
|
+
print(f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS...")
|
1199
|
+
|
1200
|
+
chunk_uploads = 0
|
1201
|
+
chunk_data = []
|
1202
|
+
|
1203
|
+
# Create a temporary directory for the chunks
|
1204
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
1205
|
+
# Write and upload each encoded chunk
|
1206
|
+
for original_idx, encoded_chunks in enumerate(all_encoded_chunks):
|
1207
|
+
for share_idx, share_data in enumerate(encoded_chunks):
|
1208
|
+
# Create a name for this chunk that includes needed info
|
1209
|
+
chunk_name = f"{file_id}_chunk_{original_idx}_{share_idx}.ec"
|
1210
|
+
chunk_path = os.path.join(temp_dir, chunk_name)
|
1211
|
+
|
1212
|
+
# Write the chunk to a temp file
|
1213
|
+
with open(chunk_path, "wb") as f:
|
1214
|
+
f.write(share_data)
|
1215
|
+
|
1216
|
+
# Upload the chunk to IPFS
|
1217
|
+
try:
|
1218
|
+
chunk_cid = self.upload_file(
|
1219
|
+
chunk_path, max_retries=max_retries
|
1220
|
+
)
|
1221
|
+
|
1222
|
+
# Store info about this chunk
|
1223
|
+
chunk_info = {
|
1224
|
+
"name": chunk_name,
|
1225
|
+
"cid": chunk_cid,
|
1226
|
+
"original_chunk": original_idx,
|
1227
|
+
"share_idx": share_idx,
|
1228
|
+
"size": len(share_data),
|
1229
|
+
}
|
1230
|
+
chunk_data.append(chunk_info)
|
1231
|
+
|
1232
|
+
chunk_uploads += 1
|
1233
|
+
if verbose and chunk_uploads % 10 == 0:
|
1234
|
+
print(
|
1235
|
+
f" Uploaded {chunk_uploads}/{len(chunks) * m} chunks"
|
1236
|
+
)
|
1237
|
+
except Exception as e:
|
1238
|
+
print(f"Error uploading chunk {chunk_name}: {str(e)}")
|
1239
|
+
|
1240
|
+
# Add all chunk info to metadata
|
1241
|
+
metadata["chunks"] = chunk_data
|
1242
|
+
|
1243
|
+
# Step 5: Create and upload the metadata file
|
1244
|
+
metadata_path = os.path.join(temp_dir, f"{file_id}_metadata.json")
|
1245
|
+
with open(metadata_path, "w") as f:
|
1246
|
+
json.dump(metadata, f, indent=2)
|
1247
|
+
|
1248
|
+
if verbose:
|
1249
|
+
print(f"Uploading metadata file...")
|
1250
|
+
|
1251
|
+
# Upload the metadata file to IPFS
|
1252
|
+
metadata_cid_result = self.upload_file(metadata_path, max_retries=max_retries)
|
1253
|
+
|
1254
|
+
# Extract just the CID string from the result dictionary
|
1255
|
+
metadata_cid = metadata_cid_result['cid']
|
1256
|
+
metadata["metadata_cid"] = metadata_cid
|
1257
|
+
|
1258
|
+
if verbose:
|
1259
|
+
print(f"Erasure coding complete!")
|
1260
|
+
print(f"Metadata CID: {metadata_cid}")
|
1261
|
+
print(f"Original file size: {file_size/1024/1024:.2f} MB")
|
1262
|
+
print(f"Total chunks: {len(chunks) * m}")
|
1263
|
+
print(f"Minimum chunks needed: {k * len(chunks)}")
|
1264
|
+
|
1265
|
+
return metadata
|
1266
|
+
|
1267
|
+
def reconstruct_from_erasure_code(
|
1268
|
+
self,
|
1269
|
+
metadata_cid: str,
|
1270
|
+
output_file: str,
|
1271
|
+
temp_dir: str = None,
|
1272
|
+
max_retries: int = 3,
|
1273
|
+
verbose: bool = True,
|
1274
|
+
) -> str:
|
1275
|
+
"""
|
1276
|
+
Reconstruct a file from erasure-coded chunks using its metadata.
|
1277
|
+
|
1278
|
+
Args:
|
1279
|
+
metadata_cid: IPFS CID of the metadata file
|
1280
|
+
output_file: Path where the reconstructed file should be saved
|
1281
|
+
temp_dir: Directory to use for temporary files (default: system temp)
|
1282
|
+
max_retries: Maximum number of retry attempts for IPFS downloads
|
1283
|
+
verbose: Whether to print progress information
|
1284
|
+
|
1285
|
+
Returns:
|
1286
|
+
str: Path to the reconstructed file
|
1287
|
+
|
1288
|
+
Raises:
|
1289
|
+
ValueError: If reconstruction fails
|
1290
|
+
RuntimeError: If not enough chunks can be downloaded
|
1291
|
+
"""
|
1292
|
+
if not ERASURE_CODING_AVAILABLE:
|
1293
|
+
raise ValueError(
|
1294
|
+
"Erasure coding is not available. Install zfec: pip install zfec"
|
1295
|
+
)
|
1296
|
+
|
1297
|
+
# Create a temporary directory if not provided
|
1298
|
+
if temp_dir is None:
|
1299
|
+
temp_dir_obj = tempfile.TemporaryDirectory()
|
1300
|
+
temp_dir = temp_dir_obj.name
|
1301
|
+
else:
|
1302
|
+
temp_dir_obj = None
|
1303
|
+
|
1304
|
+
try:
|
1305
|
+
# Step 1: Download and parse the metadata file
|
1306
|
+
if verbose:
|
1307
|
+
print(f"Downloading metadata file (CID: {metadata_cid})...")
|
1308
|
+
|
1309
|
+
metadata_path = os.path.join(temp_dir, "metadata.json")
|
1310
|
+
self.download_file(metadata_cid, metadata_path, max_retries=max_retries)
|
1311
|
+
|
1312
|
+
with open(metadata_path, "r") as f:
|
1313
|
+
metadata = json.load(f)
|
1314
|
+
|
1315
|
+
# Step 2: Extract key information
|
1316
|
+
original_file = metadata["original_file"]
|
1317
|
+
erasure_params = metadata["erasure_coding"]
|
1318
|
+
chunks_info = metadata["chunks"]
|
1319
|
+
|
1320
|
+
k = erasure_params["k"]
|
1321
|
+
m = erasure_params["m"]
|
1322
|
+
is_encrypted = erasure_params.get("encrypted", False)
|
1323
|
+
chunk_size = erasure_params.get("chunk_size", 1024 * 1024)
|
1324
|
+
|
1325
|
+
if verbose:
|
1326
|
+
print(
|
1327
|
+
f"File: {original_file['name']} ({original_file['size']/1024/1024:.2f} MB)"
|
1328
|
+
)
|
1329
|
+
print(f"Erasure coding parameters: k={k}, m={m}")
|
1330
|
+
print(f"Encrypted: {is_encrypted}")
|
1331
|
+
|
1332
|
+
# Step 3: Group chunks by their original chunk index
|
1333
|
+
chunks_by_original = {}
|
1334
|
+
for chunk in chunks_info:
|
1335
|
+
orig_idx = chunk["original_chunk"]
|
1336
|
+
if orig_idx not in chunks_by_original:
|
1337
|
+
chunks_by_original[orig_idx] = []
|
1338
|
+
chunks_by_original[orig_idx].append(chunk)
|
1339
|
+
|
1340
|
+
# Step 4: For each original chunk, download at least k shares
|
1341
|
+
if verbose:
|
1342
|
+
print(f"Downloading and reconstructing chunks...")
|
1343
|
+
|
1344
|
+
reconstructed_chunks = []
|
1345
|
+
|
1346
|
+
for orig_idx in sorted(chunks_by_original.keys()):
|
1347
|
+
available_chunks = chunks_by_original[orig_idx]
|
1348
|
+
|
1349
|
+
if len(available_chunks) < k:
|
1350
|
+
raise ValueError(
|
1351
|
+
f"Not enough chunks available for original chunk {orig_idx}. "
|
1352
|
+
f"Need {k}, but only have {len(available_chunks)}."
|
1353
|
+
)
|
1354
|
+
|
1355
|
+
# We only need k chunks, so take the first k
|
1356
|
+
chunks_to_download = available_chunks[:k]
|
1357
|
+
|
1358
|
+
# Download the chunks
|
1359
|
+
downloaded_shares = []
|
1360
|
+
share_indexes = []
|
1361
|
+
|
1362
|
+
for chunk in chunks_to_download:
|
1363
|
+
chunk_path = os.path.join(temp_dir, chunk["name"])
|
1364
|
+
try:
|
1365
|
+
# Extract the CID string from the chunk's cid dictionary
|
1366
|
+
chunk_cid = chunk["cid"]["cid"] if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"] else chunk["cid"]
|
1367
|
+
self.download_file(
|
1368
|
+
chunk_cid, chunk_path, max_retries=max_retries
|
1369
|
+
)
|
1370
|
+
|
1371
|
+
# Read the chunk data
|
1372
|
+
with open(chunk_path, "rb") as f:
|
1373
|
+
share_data = f.read()
|
1374
|
+
|
1375
|
+
downloaded_shares.append(share_data)
|
1376
|
+
share_indexes.append(chunk["share_idx"])
|
1377
|
+
|
1378
|
+
except Exception as e:
|
1379
|
+
if verbose:
|
1380
|
+
print(f"Error downloading chunk {chunk['name']}: {str(e)}")
|
1381
|
+
# Continue to the next chunk
|
1382
|
+
|
1383
|
+
# If we don't have enough chunks, try to download more
|
1384
|
+
if len(downloaded_shares) < k:
|
1385
|
+
raise ValueError(
|
1386
|
+
f"Failed to download enough chunks for original chunk {orig_idx}. "
|
1387
|
+
f"Need {k}, but only downloaded {len(downloaded_shares)}."
|
1388
|
+
)
|
1389
|
+
|
1390
|
+
# Reconstruct this chunk
|
1391
|
+
decoder = zfec.Decoder(k, m)
|
1392
|
+
reconstructed_data = decoder.decode(downloaded_shares, share_indexes)
|
1393
|
+
|
1394
|
+
# If we used the sub-block approach during encoding, we need to recombine the sub-blocks
|
1395
|
+
if isinstance(reconstructed_data, list):
|
1396
|
+
# Combine the sub-blocks back into a single chunk
|
1397
|
+
reconstructed_chunk = b''.join(reconstructed_data)
|
1398
|
+
else:
|
1399
|
+
# The simple case where we didn't use sub-blocks
|
1400
|
+
reconstructed_chunk = reconstructed_data
|
1401
|
+
|
1402
|
+
reconstructed_chunks.append(reconstructed_chunk)
|
1403
|
+
|
1404
|
+
if verbose and (orig_idx + 1) % 10 == 0:
|
1405
|
+
print(
|
1406
|
+
f" Reconstructed {orig_idx + 1}/{len(chunks_by_original)} chunks"
|
1407
|
+
)
|
1408
|
+
|
1409
|
+
# Step 5: Combine the reconstructed chunks into a file
|
1410
|
+
if verbose:
|
1411
|
+
print(f"Combining reconstructed chunks...")
|
1412
|
+
|
1413
|
+
# Concatenate all chunks
|
1414
|
+
file_data = b"".join(reconstructed_chunks)
|
1415
|
+
|
1416
|
+
# Remove padding from the last chunk
|
1417
|
+
if original_file["size"] < len(file_data):
|
1418
|
+
file_data = file_data[: original_file["size"]]
|
1419
|
+
|
1420
|
+
# Step 6: Decrypt if necessary
|
1421
|
+
if is_encrypted:
|
1422
|
+
if not self.encryption_available:
|
1423
|
+
raise ValueError(
|
1424
|
+
"File is encrypted but encryption is not available. "
|
1425
|
+
"Install PyNaCl and configure an encryption key."
|
1426
|
+
)
|
1427
|
+
|
1428
|
+
if verbose:
|
1429
|
+
print(f"Decrypting file data...")
|
1430
|
+
|
1431
|
+
file_data = self.decrypt_data(file_data)
|
1432
|
+
|
1433
|
+
# Step 7: Write to the output file
|
1434
|
+
with open(output_file, "wb") as f:
|
1435
|
+
f.write(file_data)
|
1436
|
+
|
1437
|
+
# Step 8: Verify hash if available
|
1438
|
+
if "hash" in original_file:
|
1439
|
+
actual_hash = hashlib.sha256(file_data).hexdigest()
|
1440
|
+
expected_hash = original_file["hash"]
|
1441
|
+
|
1442
|
+
if actual_hash != expected_hash:
|
1443
|
+
print(f"Warning: File hash mismatch!")
|
1444
|
+
print(f" Expected: {expected_hash}")
|
1445
|
+
print(f" Actual: {actual_hash}")
|
1446
|
+
|
1447
|
+
if verbose:
|
1448
|
+
print(f"Reconstruction complete!")
|
1449
|
+
print(f"File saved to: {output_file}")
|
1450
|
+
|
1451
|
+
return output_file
|
1452
|
+
|
1453
|
+
finally:
|
1454
|
+
# Clean up temporary directory if we created it
|
1455
|
+
if temp_dir_obj is not None:
|
1456
|
+
temp_dir_obj.close()
|
1457
|
+
|
1458
|
+
def store_erasure_coded_file(
|
1459
|
+
self,
|
1460
|
+
file_path: str,
|
1461
|
+
k: int = 3,
|
1462
|
+
m: int = 5,
|
1463
|
+
chunk_size: int = 1024 * 1024, # 1MB chunks
|
1464
|
+
encrypt: Optional[bool] = None,
|
1465
|
+
miner_ids: List[str] = None,
|
1466
|
+
substrate_client=None,
|
1467
|
+
max_retries: int = 3,
|
1468
|
+
verbose: bool = True,
|
1469
|
+
) -> Dict[str, Any]:
|
1470
|
+
"""
|
1471
|
+
Erasure code a file, upload the chunks to IPFS, and store in the Hippius marketplace.
|
1472
|
+
|
1473
|
+
This is a convenience method that combines erasure_code_file with storage_request.
|
1474
|
+
|
1475
|
+
Args:
|
1476
|
+
file_path: Path to the file to upload
|
1477
|
+
k: Number of data chunks (minimum required to reconstruct)
|
1478
|
+
m: Total number of chunks (k + redundancy)
|
1479
|
+
chunk_size: Size of each chunk in bytes before encoding
|
1480
|
+
encrypt: Whether to encrypt the file before encoding
|
1481
|
+
miner_ids: List of specific miner IDs to use for storage
|
1482
|
+
substrate_client: SubstrateClient to use (or None to create one)
|
1483
|
+
max_retries: Maximum number of retry attempts
|
1484
|
+
verbose: Whether to print progress information
|
1485
|
+
|
1486
|
+
Returns:
|
1487
|
+
dict: Result including metadata CID and transaction hash
|
1488
|
+
|
1489
|
+
Raises:
|
1490
|
+
ValueError: If parameters are invalid
|
1491
|
+
RuntimeError: If processing fails
|
1492
|
+
"""
|
1493
|
+
# Step 1: Erasure code the file and upload chunks
|
1494
|
+
metadata = self.erasure_code_file(
|
1495
|
+
file_path=file_path,
|
1496
|
+
k=k,
|
1497
|
+
m=m,
|
1498
|
+
chunk_size=chunk_size,
|
1499
|
+
encrypt=encrypt,
|
1500
|
+
max_retries=max_retries,
|
1501
|
+
verbose=verbose,
|
1502
|
+
)
|
1503
|
+
|
1504
|
+
# Step 2: Import substrate client if we need it
|
1505
|
+
if substrate_client is None:
|
1506
|
+
from hippius_sdk.substrate import SubstrateClient, FileInput
|
1507
|
+
|
1508
|
+
substrate_client = SubstrateClient()
|
1509
|
+
else:
|
1510
|
+
# Just get the FileInput class
|
1511
|
+
from hippius_sdk.substrate import FileInput
|
1512
|
+
|
1513
|
+
original_file = metadata["original_file"]
|
1514
|
+
metadata_cid = metadata["metadata_cid"]
|
1515
|
+
|
1516
|
+
# Create a list to hold all the file inputs (metadata + all chunks)
|
1517
|
+
all_file_inputs = []
|
1518
|
+
|
1519
|
+
# Step 3: Prepare metadata file for storage
|
1520
|
+
if verbose:
|
1521
|
+
print(f"Preparing to store metadata and {len(metadata['chunks'])} chunks in the Hippius marketplace...")
|
1522
|
+
|
1523
|
+
# Create a file input for the metadata file
|
1524
|
+
metadata_file_input = FileInput(
|
1525
|
+
file_hash=metadata_cid, file_name=f"{original_file['name']}.ec_metadata"
|
1526
|
+
)
|
1527
|
+
all_file_inputs.append(metadata_file_input)
|
1528
|
+
|
1529
|
+
# Step 4: Add all chunks to the storage request
|
1530
|
+
if verbose:
|
1531
|
+
print(f"Adding all chunks to storage request...")
|
1532
|
+
|
1533
|
+
for i, chunk in enumerate(metadata["chunks"]):
|
1534
|
+
# Extract the CID string from the chunk's cid dictionary
|
1535
|
+
chunk_cid = chunk["cid"]["cid"] if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"] else chunk["cid"]
|
1536
|
+
chunk_file_input = FileInput(
|
1537
|
+
file_hash=chunk_cid,
|
1538
|
+
file_name=chunk["name"]
|
1539
|
+
)
|
1540
|
+
all_file_inputs.append(chunk_file_input)
|
1541
|
+
|
1542
|
+
# Print progress for large numbers of chunks
|
1543
|
+
if verbose and (i + 1) % 50 == 0:
|
1544
|
+
print(f" Prepared {i + 1}/{len(metadata['chunks'])} chunks for storage")
|
1545
|
+
|
1546
|
+
# Step 5: Submit the storage request for all files
|
1547
|
+
try:
|
1548
|
+
if verbose:
|
1549
|
+
print(f"Submitting storage request for 1 metadata file and {len(metadata['chunks'])} chunks...")
|
1550
|
+
|
1551
|
+
tx_hash = substrate_client.storage_request(
|
1552
|
+
files=all_file_inputs, miner_ids=miner_ids
|
1553
|
+
)
|
1554
|
+
|
1555
|
+
if verbose:
|
1556
|
+
print(f"Successfully stored all files in marketplace!")
|
1557
|
+
print(f"Transaction hash: {tx_hash}")
|
1558
|
+
print(f"Metadata CID: {metadata_cid}")
|
1559
|
+
print(f"Total files stored: {len(all_file_inputs)} (1 metadata + {len(metadata['chunks'])} chunks)")
|
1560
|
+
|
1561
|
+
return {
|
1562
|
+
"metadata": metadata,
|
1563
|
+
"metadata_cid": metadata_cid,
|
1564
|
+
"transaction_hash": tx_hash,
|
1565
|
+
"total_files_stored": len(all_file_inputs)
|
1566
|
+
}
|
1567
|
+
|
1568
|
+
except Exception as e:
|
1569
|
+
print(f"Error storing files in marketplace: {str(e)}")
|
1570
|
+
# Return the metadata even if storage fails
|
1571
|
+
return {"metadata": metadata, "metadata_cid": metadata_cid, "error": str(e)}
|
hippius-0.1.0.dist-info/RECORD
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
hippius_sdk/__init__.py,sha256=SwOREu9EJZ9ZRM-rSPX0o1hhsOUIADuP3CxoF4Mp_qI,288
|
2
|
-
hippius_sdk/cli.py,sha256=ctg-dfe3uoXBx6McPenZmWE-5AZTLZ39Pro3xMRbAD8,22274
|
3
|
-
hippius_sdk/client.py,sha256=Etj6u4Q0Y5KN4QxixOc8uy-zSuIsixx4TGLHXqGiHno,8888
|
4
|
-
hippius_sdk/ipfs.py,sha256=IcPtC99I9CmBA3-sSfbnc0RMZ3d3Z0CRtCmRmf1hzR0,37905
|
5
|
-
hippius_sdk/substrate.py,sha256=mfDxbKn9HdtcK1xEnj_BnnreRw8ITZswtDoBhtliidM,27278
|
6
|
-
hippius-0.1.0.dist-info/METADATA,sha256=RHf-CbtSTQLKeIsfMnOjzOnpWlkmWKszd8JeoYwUCMM,13047
|
7
|
-
hippius-0.1.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
8
|
-
hippius-0.1.0.dist-info/entry_points.txt,sha256=b1lo60zRXmv1ud-c5BC-cJcAfGE5FD4qM_nia6XeQtM,98
|
9
|
-
hippius-0.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|