PyPI - poma - Versions diffs - 0.2.2__tar.gz → 0.3.2__tar.gz - Mend

poma 0.2.2tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{poma-0.2.2/poma.egg-info → poma-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: poma
-Version: 0.2.2
+Version: 0.3.2
 Summary: Official Python SDK for the Poma document-processing API
 Author-email: "POMA AI GmbH, Berlin" <sdk@poma-ai.com>
 License-Expression: MPL-2.0
@@ -10,20 +10,24 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: httpx==0.28.1
 Requires-Dist: pydantic==2.11.7
-Provides-Extra: integrations
-Requires-Dist: langchain==0.3.27; extra == "integrations"
-Requires-Dist: langchain-text-splitters==0.3.9; extra == "integrations"
-Requires-Dist: llama-index==0.13.0; extra == "integrations"
-Provides-Extra: integration-examples
-Requires-Dist: langchain==0.3.27; extra == "integration-examples"
-Requires-Dist: langchain-text-splitters==0.3.9; extra == "integration-examples"
-Requires-Dist: llama-index==0.13.0; extra == "integration-examples"
-Requires-Dist: llama-index-vector-stores-faiss==0.5.0; extra == "integration-examples"
-Requires-Dist: faiss-cpu==1.10.0; extra == "integration-examples"
-Requires-Dist: langchain_openai==0.3.28; extra == "integration-examples"
-Requires-Dist: langchain_community==0.3.27; extra == "integration-examples"
-Requires-Dist: llama-index-embeddings-langchain==0.4.0; extra == "integration-examples"
-Requires-Dist: dotenv; extra == "integration-examples"
+Provides-Extra: langchain
+Requires-Dist: langchain==0.3.27; extra == "langchain"
+Requires-Dist: langchain-text-splitters==0.3.9; extra == "langchain"
+Provides-Extra: llamaindex
+Requires-Dist: llama-index==0.13.0; extra == "llamaindex"
+Provides-Extra: qdrant
+Requires-Dist: qdrant-client[fastembed]==1.16.2; extra == "qdrant"
+Provides-Extra: all
+Requires-Dist: langchain==0.3.27; extra == "all"
+Requires-Dist: langchain-text-splitters==0.3.9; extra == "all"
+Requires-Dist: llama-index==0.13.0; extra == "all"
+Requires-Dist: llama-index-vector-stores-faiss==0.5.0; extra == "all"
+Requires-Dist: faiss-cpu==1.10.0; extra == "all"
+Requires-Dist: langchain_openai==0.3.28; extra == "all"
+Requires-Dist: langchain_community==0.3.27; extra == "all"
+Requires-Dist: llama-index-embeddings-langchain==0.4.0; extra == "all"
+Requires-Dist: qdrant-client[fastembed]==1.16.2; extra == "all"
+Requires-Dist: dotenv; extra == "all"
 Dynamic: license-file
 ![POMA AI Logo](https://raw.githubusercontent.com/poma-ai/.github/main/assets/POMA_AI_Logo_Pink.svg)
@@ -38,11 +42,14 @@ Requires Python 3.10+. Install the core packages:
 pip install poma
 ```
-For integrations into LangChain and LlamaIndex:
+For different integrations:
 ```bash
-pip install 'poma[integrations]'
-# Or LangChain/LlamaIndex including example extras:
-pip install 'poma[integration-examples]'
+pip install 'poma[langchain]'
+pip install 'poma[llamaindex]'
+pip install 'poma[qdrant]'
+# Or LangChain/LlamaIndex/Qdrant including example extras:
+pip install 'poma[all]'
 ```
@@ -53,10 +60,11 @@ pip install 'poma[integration-examples]'
 ### Example Implementations — all examples, integrations, and additional information can be found in our GitHub repository: [poma-ai/poma](https://github.com/poma-ai/)
-We provide four example implementations to help you get started with POMA AI:
+We provide example implementations to help you get started with POMA AI:
 - example.py — A standalone implementation for documents, showing the basic POMA AI workflow with simple keyword-based retrieval
 - example_langchain.py — Integration with LangChain, demonstrating how easy it is to use POMA AI with LangChain
 - example_llamaindex.py — Integration with LlamaIndex, showing how simple it is to use POMA AI with LlamaIndex
+-
 *Note: The integration examples use OpenAI embeddings. Make sure to set your OPENAI_API_KEY environment variable, or replace the embeddings with your preferred ones.*

{poma-0.2.2 → poma-0.3.2}/README.md RENAMED Viewed

@@ -10,11 +10,14 @@ Requires Python 3.10+. Install the core packages:
 pip install poma
 ```
-For integrations into LangChain and LlamaIndex:
+For different integrations:
 ```bash
-pip install 'poma[integrations]'
-# Or LangChain/LlamaIndex including example extras:
-pip install 'poma[integration-examples]'
+pip install 'poma[langchain]'
+pip install 'poma[llamaindex]'
+pip install 'poma[qdrant]'
+# Or LangChain/LlamaIndex/Qdrant including example extras:
+pip install 'poma[all]'
 ```
@@ -25,10 +28,11 @@ pip install 'poma[integration-examples]'
 ### Example Implementations — all examples, integrations, and additional information can be found in our GitHub repository: [poma-ai/poma](https://github.com/poma-ai/)
-We provide four example implementations to help you get started with POMA AI:
+We provide example implementations to help you get started with POMA AI:
 - example.py — A standalone implementation for documents, showing the basic POMA AI workflow with simple keyword-based retrieval
 - example_langchain.py — Integration with LangChain, demonstrating how easy it is to use POMA AI with LangChain
 - example_llamaindex.py — Integration with LlamaIndex, showing how simple it is to use POMA AI with LlamaIndex
+-
 *Note: The integration examples use OpenAI embeddings. Make sure to set your OPENAI_API_KEY environment variable, or replace the embeddings with your preferred ones.*

{poma-0.2.2 → poma-0.3.2}/poma/client.py RENAMED Viewed

@@ -21,6 +21,38 @@ USER_AGENT = "poma-ai-sdk/0.1.0"
 API_BASE_URL = "https://api.poma-ai.com/api/v1"
+def extract_chunks_and_chunksets_from_poma_archive(
+    poma_archive_data: bytes | None = None,
+    poma_archive_path: str | os.PathLike[str] | None = None,
+) -> dict[str, Any]:
+    """
+    Extract chunks and chunksets from a POMA archive file.
+    POMA archive is a zip file containing chunks.json and chunksets.json.
+    Args:
+        poma_archive_data: The POMA archive as bytes.
+        poma_archive_path: Path to the POMA archive file.
+    Returns:
+        dict: A dictionary with ``chunks`` and ``chunksets`` keys.
+    """
+    chunks = None
+    chunksets = None
+    if poma_archive_path:
+        with zipfile.ZipFile(poma_archive_path, "r") as zip_ref:
+            chunks = zip_ref.read("chunks.json")
+            chunksets = zip_ref.read("chunksets.json")
+    elif poma_archive_data:
+        with zipfile.ZipFile(io.BytesIO(poma_archive_data), "r") as zip_ref:
+            chunks = zip_ref.read("chunks.json")
+            chunksets = zip_ref.read("chunksets.json")
+    else:
+        raise ValueError(
+            "Either poma_archive_data or poma_archive_path must be provided."
+        )
+    if not chunks or not chunksets:
+        raise KeyError("Result must contain 'chunks' and 'chunksets' keys.")
+    return {"chunks": json.loads(chunks), "chunksets": json.loads(chunksets)}
 class Poma:
     """
     Client for interacting with the POMA API.
@@ -61,23 +93,25 @@ class Poma:
     def start_chunk_file(
         self,
-        file_path: os.PathLike[str],
+        file_path: str | os.PathLike[str],
         *,
         base_url: str | None = None,
     ) -> dict[str, Any]:
         """
         Submit a file with text to POMA for chunking.
         Args:
-            file_path (os.PathLike[str]):
-                Path to the input file. Must have an allowed file extension.
+            file_path (str | os.PathLike[str]):
+                Path to the input file (string or path-like). Must have an allowed file extension.
             base_url (str, optional):
                 Optional base URL to resolve relative links within the file.
         Returns:
             A dictionary containing a unique job identifier for the submitted job.
         """
-        if not file_path or not isinstance(file_path, os.PathLike):
-            raise ValueError("file_path must be a non-empty os.PathLike.")
+        if file_path is None or (isinstance(file_path, str) and not file_path.strip()):
+            raise ValueError("file_path must be a non-empty string or path-like.")
+        path = Path(file_path)
         payload = {}
+        payload["is_sdk"] = True
         if base_url:
             payload["base_url"] = base_url
         try:
@@ -85,7 +119,7 @@ class Poma:
                 f"{self.base_api_url}/ingest",
                 data=payload,
                 files={
-                    "file": (Path(file_path).name, Path(file_path).read_bytes()),
+                    "file": (path.name, path.read_bytes()),
                 },
             )
             response.raise_for_status()
@@ -93,10 +127,10 @@ class Poma:
             status = error.response.status_code
             if status in (401, 403):
                 raise AuthenticationError(
-                    f"Failed to submit file '{file_path}': authentication error"
+                    f"Failed to submit file '{path}': authentication error"
                 ) from error
             raise RemoteServerError(
-                f"Failed to submit file '{file_path}': {status}"
+                f"Failed to submit file '{path}': {status}"
             ) from error
         try:
             data = response.json()
@@ -115,6 +149,7 @@ class Poma:
         max_interval: float = 15.0,
         show_progress: bool = False,
         download_dir: str | os.PathLike[str] | None = None,
+        filename: str | None = None,
     ) -> dict[str, Any]:
         """
         Poll POMA for the result of a chunking job until completion.
@@ -130,15 +165,22 @@ class Poma:
             show_progress (bool, default=False):
                 If True, logs progress messages during polling.
             download_dir (str | os.PathLike[str], optional):
-                Directory to save the downloaded file in. Required if return_bytes=False.
-            return_bytes (bool, default=False):
-                If True, returns the file content as bytes instead of saving to disk.
+                Directory to save the downloaded file in. If neither download_dir nor
+                filename is set, the result is returned in memory (no file saved). If
+                filename is set but download_dir is not, the file is saved in the
+                current directory.
+            filename (str, optional):
+                Name for the saved .poma file. If it does not end with ``.poma``, that
+                suffix is appended. If not set when saving to disk, uses the server
+                filename when provided, otherwise ``{job_id}.poma``.
         Returns:
             The JSON result containing at least the keys `chunks` and `chunksets`.
         """
         time.sleep(initial_delay)
         current_interval = poll_interval
+        last_status = None
         while True:
             time.sleep(current_interval)
             try:
@@ -154,17 +196,26 @@ class Poma:
                             "Failed to receive download URL from server."
                         )
-                    if download_dir is None:
+                    if download_dir is None and filename is None:
                         # Return bytes content instead of saving to file
                         file_bytes = self.download_bytes(download_url)
                         return self.extract_chunks_and_chunksets_from_poma_archive(
                             poma_archive_data=file_bytes
                         )
                     else:
-                        # Save downloaded file to directory
-                        filename = download.get("filename", "downloaded_file.poma")
+                        # Save downloaded file (to download_dir or current dir if only filename set)
+                        save_filename = (
+                            filename or download.get("filename") or f"{job_id}.poma"
+                        )
+                        if not save_filename.endswith(".poma"):
+                            save_filename = f"{save_filename}.poma"
+                        save_dir = (
+                            download_dir
+                            if download_dir not in (None, "")
+                            else "."
+                        )
                         downloaded_file_path = self.download_file(
-                            download_url, filename, save_directory=download_dir
+                            download_url, save_filename, save_directory=save_dir
                         )
                         return self.extract_chunks_and_chunksets_from_poma_archive(
                             poma_archive_path=downloaded_file_path
@@ -181,9 +232,18 @@ class Poma:
                 elif status == "processing":
                     if show_progress:
                         print(f"Job {job_id} is still processing...")
+                    if last_status == "pending":
+                        current_interval = poll_interval
+                    current_interval = min(current_interval * 1.5, max_interval)
+                elif status == "pending":
+                    if show_progress:
+                        print(
+                            f"Job {job_id} is pending (queued due to rate limiting, sequential processing - common on demo accounts)..."
+                        )
                     current_interval = min(current_interval * 1.5, max_interval)
                 else:
                     raise InvalidResponseError(f"Unexpected job status: {status}")
+                last_status = status
             except httpx.HTTPStatusError as error:
                 raise RemoteServerError(
                     f"HTTP error: {error.response.status_code} {error.response.text}"
@@ -197,37 +257,18 @@ class Poma:
         poma_archive_path: str | os.PathLike[str] | None = None,
     ) -> dict[str, Any]:
         """
-        Extract POMA archive file.
-        POMA archive file is a zip file containing the chunks.json and chunksets.json files.
+        Extract chunks and chunksets from a POMA archive; delegates to module-level function.
+        POMA archive is a zip file containing chunks.json and chunksets.json.
         Args:
-            poma_archive (bytes): The POMA archive file.
+            poma_archive_data: The POMA archive as bytes.
+            poma_archive_path: Path to the POMA archive file.
         Returns:
-            dict: A dictionary containing the chunks and chunksets.
+            dict: A dictionary with ``chunks`` and ``chunksets`` keys.
         """
-        # Load the chunks and chunksets from POMA archive
-        chunks = None
-        chunksets = None
-        if poma_archive_path:
-            with zipfile.ZipFile(poma_archive_path, "r") as zip_ref:
-                chunks = zip_ref.read("chunks.json")
-                chunksets = zip_ref.read("chunksets.json")
-        elif poma_archive_data:
-            with zipfile.ZipFile(io.BytesIO(poma_archive_data), "r") as zip_ref:
-                chunks = zip_ref.read("chunks.json")
-                chunksets = zip_ref.read("chunksets.json")
-        else:
-            raise ValueError(
-                "Either poma_archive_data or poma_archive_path must be provided."
-            )
-        # Sanity check
-        if not chunks or not chunksets:
-            raise KeyError("Result must contain 'chunks' and 'chunksets' keys.")
-        # Load the chunks and chunksets
-        json_result = {"chunks": json.loads(chunks), "chunksets": json.loads(chunksets)}
-        return json_result
+        return extract_chunks_and_chunksets_from_poma_archive(
+            poma_archive_data=poma_archive_data,
+            poma_archive_path=poma_archive_path,
+        )
     def create_cheatsheet(
         self,
@@ -301,14 +342,16 @@ class Poma:
         if not filename:
             filename = Path(download_url).name or "downloaded_file"
-        # Determine save directory
+        # Determine save directory (default current directory so path has a parent)
         if save_directory:
             save_path = Path(save_directory) / filename
         else:
-            save_path = Path(filename)
+            save_path = Path(".") / filename
-        # Create the directory if it doesn't exist
-        os.makedirs(os.path.dirname(save_path), exist_ok=True)
+        # Create the directory if it doesn't exist (skip when file is in cwd)
+        parent = os.path.dirname(save_path)
+        if parent:
+            os.makedirs(parent, exist_ok=True)
         # Download the file data
         content = self.download_bytes(download_url)

poma-0.3.2/poma/integrations/__init__.py ADDED Viewed

File without changes

poma-0.3.2/poma/integrations/langchain/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+from .langchain_poma import (
+    PomaFileLoader,
+    PomaChunksetSplitter,
+    PomaCheatsheetRetrieverLC,
+)
+__all__ = [
+    "PomaFileLoader",
+    "PomaChunksetSplitter",
+    "PomaCheatsheetRetrieverLC",
+]

{poma-0.2.2/poma/integrations → poma-0.3.2/poma/integrations/llamaindex}/__init__.py RENAMED Viewed

@@ -1,9 +1,3 @@
-from .langchain_poma import (
-    PomaFileLoader,
-    PomaChunksetSplitter,
-    PomaCheatsheetRetrieverLC,
-)
 from .llamaindex_poma import (
     PomaFileReader,
     PomaChunksetNodeParser,
@@ -11,9 +5,6 @@ from .llamaindex_poma import (
 )
 __all__ = [
-    "PomaFileLoader",
-    "PomaChunksetSplitter",
-    "PomaCheatsheetRetrieverLC",
     "PomaFileReader",
     "PomaChunksetNodeParser",
     "PomaCheatsheetRetrieverLI",

poma-0.3.2/poma/integrations/qdrant/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+from .qdrant_poma import (
+    PomaQdrant,
+    QdrantConfig,
+    VectorConfig,
+    InferenceConfig,
+    QdrantResponseError,
+    SearchResult,
+    chunk_uuid_string,
+    DenseEmbedSync,
+    SparseEmbedSync,
+)
+__all__ = [
+    "PomaQdrant",
+    "QdrantConfig",
+    "VectorConfig",
+    "InferenceConfig",
+    "QdrantResponseError",
+    "SearchResult",
+    "chunk_uuid_string",
+    "DenseEmbedSync",
+    "SparseEmbedSync",
+]

poma 0.2.2__tar.gz → 0.3.2__tar.gz

poma 0.2.2tar.gz → 0.3.2tar.gz