PyPI - hippius - Versions diffs - 0.2.1__tar.gz → 0.2.2__tar.gz - Mend

hippius 0.2.1tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{hippius-0.2.1 → hippius-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hippius
-Version: 0.2.1
+Version: 0.2.2
 Summary: Python SDK and CLI for Hippius blockchain storage
 Home-page: https://github.com/thenervelab/hippius-sdk
 Author: Dubs

{hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/__init__.py RENAMED Viewed

@@ -26,7 +26,7 @@ from hippius_sdk.config import (
 from hippius_sdk.ipfs import IPFSClient
 from hippius_sdk.utils import format_cid, format_size, hex_to_ipfs_cid
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 __all__ = [
     "HippiusClient",
     "IPFSClient",

{hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/cli.py RENAMED Viewed

@@ -790,7 +790,7 @@ async def handle_erasure_code(
     chunk_size,
     miner_ids,
     encrypt=None,
-    publish=False,
+    publish=True,
     verbose=True,
 ):
     """Handle the erasure-code command"""
@@ -2086,8 +2086,8 @@ examples:
   # Erasure code a file (Reed-Solomon)
   hippius erasure-code large_file.mp4 --k 3 --m 5
-  # Erasure code and publish to global IPFS network
-  hippius erasure-code large_file.avi --publish
+  # Erasure code without publishing to global IPFS network
+  hippius erasure-code large_file.avi --no-publish
   # Reconstruct an erasure-coded file
   hippius reconstruct QmMetadataHash reconstructed_file.mp4
@@ -2326,9 +2326,9 @@ examples:
         "--no-encrypt", action="store_true", help="Do not encrypt the file"
     )
     erasure_code_parser.add_argument(
-        "--publish",
+        "--no-publish",
         action="store_true",
-        help="Upload and publish the erasure-coded file to the global IPFS network",
+        help="Do not upload and publish the erasure-coded file to the global IPFS network",
     )
     erasure_code_parser.add_argument(
         "--verbose", action="store_true", help="Enable verbose output", default=True
@@ -2687,7 +2687,7 @@ examples:
                 args.chunk_size,
                 miner_ids,
                 encrypt=args.encrypt,
-                publish=args.publish,
+                publish=not args.no_publish,
                 verbose=args.verbose,
             )

{hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/ipfs.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """
 IPFS operations for the Hippius SDK.
 """
+import asyncio
 import hashlib
 import json
 import os
@@ -37,6 +37,12 @@ try:
 except ImportError:
     ERASURE_CODING_AVAILABLE = False
+# Configuration constants
+PARALLEL_EC_CHUNKS = 20  # Maximum number of concurrent chunk downloads
+PARALLEL_ORIGINAL_CHUNKS = (
+    15  # Maximum number of original chunks to process in parallel
+)
 class IPFSClient:
     """Client for interacting with IPFS."""
@@ -934,14 +940,19 @@ class IPFSClient:
         # Step 4: Upload all chunks to IPFS
         if verbose:
-            print(f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS...")
+            print(
+                f"Uploading {len(chunks) * m} erasure-coded chunks to IPFS in parallel..."
+            )
         chunk_uploads = 0
         chunk_data = []
+        batch_size = 20  # Number of concurrent uploads
         # Create a temporary directory for the chunks
         with tempfile.TemporaryDirectory() as temp_dir:
-            # Write and upload each encoded chunk
+            # Prepare all chunks for upload
+            all_chunk_info = []
             for original_idx, encoded_chunks in enumerate(all_encoded_chunks):
                 for share_idx, share_data in enumerate(encoded_chunks):
                     # Create a name for this chunk that includes needed info
@@ -952,29 +963,48 @@ class IPFSClient:
                     with open(chunk_path, "wb") as f:
                         f.write(share_data)
-                    # Upload the chunk to IPFS
-                    try:
-                        chunk_cid = await self.upload_file(
-                            chunk_path, max_retries=max_retries
-                        )
-                        # Store info about this chunk
-                        chunk_info = {
+                    # Store info for async upload
+                    all_chunk_info.append(
+                        {
                             "name": chunk_name,
-                            "cid": chunk_cid,
+                            "path": chunk_path,
                             "original_chunk": original_idx,
                             "share_idx": share_idx,
                             "size": len(share_data),
                         }
-                        chunk_data.append(chunk_info)
+                    )
+            # Create a semaphore to limit concurrent uploads
+            semaphore = asyncio.Semaphore(batch_size)
+            # Define upload task for a single chunk
+            async def upload_chunk(chunk_info):
+                nonlocal chunk_uploads
+                async with semaphore:
+                    try:
+                        chunk_cid = await self.upload_file(
+                            chunk_info["path"], max_retries=max_retries
+                        )
+                        chunk_info["cid"] = chunk_cid
                         chunk_uploads += 1
                         if verbose and chunk_uploads % 10 == 0:
                             print(
                                 f"  Uploaded {chunk_uploads}/{len(chunks) * m} chunks"
                             )
+                        return chunk_info
                     except Exception as e:
-                        print(f"Error uploading chunk {chunk_name}: {str(e)}")
+                        print(f"Error uploading chunk {chunk_info['name']}: {str(e)}")
+                        return None
+            # Create tasks for all chunk uploads
+            upload_tasks = [upload_chunk(chunk_info) for chunk_info in all_chunk_info]
+            # Wait for all uploads to complete
+            completed_uploads = await asyncio.gather(*upload_tasks)
+            # Filter out failed uploads
+            chunk_data = [upload for upload in completed_uploads if upload is not None]
             # Add all chunk info to metadata
             metadata["chunks"] = chunk_data
@@ -1082,6 +1112,9 @@ class IPFSClient:
                 )
                 if is_encrypted:
                     print("Encrypted: Yes")
+                print(
+                    f"Using parallel download with max {PARALLEL_ORIGINAL_CHUNKS} original chunks and {PARALLEL_EC_CHUNKS} chunk downloads concurrently"
+                )
             # Step 3: Group chunks by their original chunk index
             chunks_by_original = {}
@@ -1091,136 +1124,157 @@ class IPFSClient:
                     chunks_by_original[orig_idx] = []
                 chunks_by_original[orig_idx].append(chunk)
-            # Step 4: For each original chunk, download at least k shares
+            # Step 4: Process all original chunks in parallel
             if verbose:
                 total_original_chunks = len(chunks_by_original)
-                total_chunks_to_download = total_original_chunks * k
+                total_chunks_needed = total_original_chunks * k
                 print(
-                    f"Downloading and reconstructing {total_chunks_to_download} chunks..."
+                    f"Downloading and reconstructing {total_chunks_needed} chunks in parallel..."
                 )
-            reconstructed_chunks = []
-            chunks_downloaded = 0
-            chunks_failed = 0
-            for orig_idx in sorted(chunks_by_original.keys()):
-                available_chunks = chunks_by_original[orig_idx]
-                if len(available_chunks) < k:
-                    raise ValueError(
-                        f"Not enough chunks available for original chunk {orig_idx}. "
-                        f"Need {k}, but only have {len(available_chunks)}."
-                    )
+            # Create semaphores to limit concurrency
+            encoded_chunks_semaphore = asyncio.Semaphore(PARALLEL_EC_CHUNKS)
+            original_chunks_semaphore = asyncio.Semaphore(PARALLEL_ORIGINAL_CHUNKS)
+            # Process a single original chunk and its required downloads
+            async def process_original_chunk(orig_idx, available_chunks):
+                # Limit number of original chunks processing at once
+                async with original_chunks_semaphore:
+                    if verbose:
+                        print(f"Processing original chunk {orig_idx}...")
+                    if len(available_chunks) < k:
+                        raise ValueError(
+                            f"Not enough chunks available for original chunk {orig_idx}. "
+                            f"Need {k}, but only have {len(available_chunks)}."
+                        )
-                # Try to download all available chunks, but we only need k successful ones
-                downloaded_shares = []
-                share_indexes = []
-                chunks_to_try = available_chunks.copy()
+                    # Try slightly more than k chunks (k+2) to handle some failures
+                    num_to_try = min(k + 2, len(available_chunks))
+                    chunks_to_try = random.sample(available_chunks, num_to_try)
-                # Shuffle to get a better variety of chunks
-                random.shuffle(chunks_to_try)
+                    # Track downloaded chunks
+                    download_tasks = []
-                for chunk in chunks_to_try:
-                    # Break if we already have k chunks
-                    if len(downloaded_shares) >= k:
-                        break
+                    # Start parallel downloads for chunks
+                    for chunk in chunks_to_try:
+                        chunk_path = os.path.join(temp_dir, f"{chunk['name']}")
-                    chunk_path = os.path.join(temp_dir, chunk["name"])
-                    try:
-                        # Extract the CID string from the chunk's cid dictionary
+                        # Extract CID
                         chunk_cid = (
                             chunk["cid"]["cid"]
                             if isinstance(chunk["cid"], dict) and "cid" in chunk["cid"]
                             else chunk["cid"]
                         )
-                        await self.download_file(
-                            chunk_cid, chunk_path, max_retries=max_retries
-                        )
-                        chunks_downloaded += 1
-                        # Read the chunk data
-                        with open(chunk_path, "rb") as f:
-                            share_data = f.read()
-                        downloaded_shares.append(share_data)
-                        share_indexes.append(chunk["share_idx"])
-                    except Exception as e:
-                        if verbose:
-                            print(f"Error downloading chunk {chunk['name']}: {str(e)}")
-                        chunks_failed += 1
-                        # Continue to the next chunk
+                        # Create download task
+                        async def download_chunk(cid, path, chunk_info):
+                            async with encoded_chunks_semaphore:
+                                try:
+                                    await self.download_file(
+                                        cid, path, max_retries=max_retries
+                                    )
+                                    # Read chunk data
+                                    with open(path, "rb") as f:
+                                        share_data = f.read()
+                                    return {
+                                        "success": True,
+                                        "data": share_data,
+                                        "share_idx": chunk_info["share_idx"],
+                                        "name": chunk_info["name"],
+                                    }
+                                except Exception as e:
+                                    if verbose:
+                                        print(
+                                            f"Error downloading chunk {chunk_info['name']}: {str(e)}"
+                                        )
+                                    return {
+                                        "success": False,
+                                        "error": str(e),
+                                        "name": chunk_info["name"],
+                                    }
+                        # Create task
+                        task = asyncio.create_task(
+                            download_chunk(chunk_cid, chunk_path, chunk)
+                        )
+                        download_tasks.append(task)
+                    # Process downloads as they complete
+                    downloaded_shares = []
+                    share_indexes = []
+                    for done_task in asyncio.as_completed(download_tasks):
+                        result = await done_task
+                        if result["success"]:
+                            downloaded_shares.append(result["data"])
+                            share_indexes.append(result["share_idx"])
+                            # Once we have k chunks, cancel remaining downloads
+                            if len(downloaded_shares) >= k:
+                                for task in download_tasks:
+                                    if not task.done():
+                                        task.cancel()
+                                break
+                    # Check if we have enough chunks
+                    if len(downloaded_shares) < k:
+                        raise ValueError(
+                            f"Failed to download enough chunks for original chunk {orig_idx}. "
+                            f"Need {k}, but only downloaded {len(downloaded_shares)}."
+                        )
-                # If we don't have enough chunks, fail
-                if len(downloaded_shares) < k:
-                    raise ValueError(
-                        f"Failed to download enough chunks for original chunk {orig_idx}. "
-                        f"Need {k}, but only downloaded {len(downloaded_shares)}."
+                    # Reconstruct this chunk
+                    decoder = zfec.Decoder(k, m)
+                    reconstructed_data = decoder.decode(
+                        downloaded_shares, share_indexes
                     )
-                # Reconstruct this chunk
-                decoder = zfec.Decoder(k, m)
-                reconstructed_data = decoder.decode(downloaded_shares, share_indexes)
+                    if not isinstance(reconstructed_data, list):
+                        raise TypeError(
+                            f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
+                        )
-                if not isinstance(reconstructed_data, list):
-                    # Handle unexpected output type
-                    raise TypeError(
-                        f"Unexpected type from decoder: {type(reconstructed_data)}. Expected list of bytes."
-                    )
+                    # Calculate the actual size of this original chunk
+                    is_last_chunk = orig_idx == max(chunks_by_original.keys())
+                    original_chunk_size = total_original_size - orig_idx * chunk_size
+                    if not is_last_chunk:
+                        original_chunk_size = min(chunk_size, original_chunk_size)
+                    # Recombine the sub-blocks
+                    reconstructed_chunk = b""
+                    total_bytes = 0
+                    for sub_block in reconstructed_data:
+                        bytes_to_take = min(
+                            len(sub_block), original_chunk_size - total_bytes
+                        )
+                        if bytes_to_take <= 0:
+                            break
-                # Calculate the actual size of this original chunk
-                # For all chunks except possibly the last one, it should be chunk_size
-                is_last_chunk = orig_idx == max(chunks_by_original.keys())
-                original_chunk_size = total_original_size - orig_idx * chunk_size
-                if not is_last_chunk:
-                    original_chunk_size = min(chunk_size, original_chunk_size)
-                # Recombine the sub-blocks, respecting the original chunk size
-                reconstructed_chunk = b""
-                total_bytes = 0
-                for sub_block in reconstructed_data:
-                    # Calculate how many bytes we should take from this sub-block
-                    bytes_to_take = min(
-                        len(sub_block), original_chunk_size - total_bytes
-                    )
-                    if bytes_to_take <= 0:
-                        break
+                        reconstructed_chunk += sub_block[:bytes_to_take]
+                        total_bytes += bytes_to_take
-                    reconstructed_chunk += sub_block[:bytes_to_take]
-                    total_bytes += bytes_to_take
+                    return reconstructed_chunk
-                reconstructed_chunks.append(reconstructed_chunk)
+            # Create tasks for all original chunks and process them in parallel
+            chunk_tasks = []
+            for orig_idx in sorted(chunks_by_original.keys()):
+                chunk_tasks.append(
+                    process_original_chunk(orig_idx, chunks_by_original[orig_idx])
+                )
-                # Add debugging information if verbose
-                if verbose:
-                    progress_pct = (orig_idx + 1) / total_original_chunks * 100
-                    print(
-                        f"  Progress: {orig_idx + 1}/{total_original_chunks} chunks ({progress_pct:.1f}%)"
-                    )
-                    if (
-                        orig_idx == 0 or is_last_chunk
-                    ):  # Only show debug for first and last chunks to avoid spam
-                        print(f"  Debug info for chunk {orig_idx}:")
-                        print(f"    Original chunk size: {original_chunk_size} bytes")
-                        print(
-                            f"    Reconstructed chunk size: {len(reconstructed_chunk)} bytes"
-                        )
-                        print(f"    Share indexes used: {share_indexes}")
-                        print(f"    Sub-blocks received: {len(reconstructed_data)}")
+            # Wait for all chunks to be reconstructed
+            reconstructed_chunks = await asyncio.gather(*chunk_tasks)
             if verbose:
                 download_time = time.time() - start_time
-                print(
-                    f"Downloaded {chunks_downloaded} chunks in {download_time:.2f} seconds"
-                )
-                if chunks_failed > 0:
-                    print(
-                        f"Failed to download {chunks_failed} chunks (not needed for reconstruction)"
-                    )
+                print(f"Chunk reconstruction completed in {download_time:.2f} seconds")
             # Step 5: Combine the reconstructed chunks into a file
-            if verbose:
-                print("Combining reconstructed chunks...")
+            print("Combining reconstructed chunks...")
             # Process chunks to remove padding correctly
             processed_chunks = []

{hippius-0.2.1 → hippius-0.2.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "hippius"
-version = "0.2.1"
+version = "0.2.2"
 description = "Python SDK and CLI for Hippius blockchain storage"
 authors = ["Dubs <dubs@dubs.rs>"]
 readme = "README.md"

{hippius-0.2.1 → hippius-0.2.2}/README.md RENAMED Viewed

File without changes

{hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/client.py RENAMED Viewed

File without changes

{hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/config.py RENAMED Viewed

File without changes

{hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/ipfs_core.py RENAMED Viewed

File without changes

{hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/substrate.py RENAMED Viewed

File without changes

{hippius-0.2.1 → hippius-0.2.2}/hippius_sdk/utils.py RENAMED Viewed

File without changes

hippius 0.2.1__tar.gz → 0.2.2__tar.gz

hippius 0.2.1tar.gz → 0.2.2tar.gz