PyPI - morphik - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

morphik 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

morphik/__init__.py +1 -1
morphik/_internal.py +1 -1
morphik/async_.py +43 -16
morphik/sync.py +46 -12
morphik/tests/README.md +41 -0
morphik/tests/__init__.py +0 -0
morphik/tests/example_usage.py +280 -0
morphik/tests/test_async.py +300 -0
morphik/tests/test_docs/sample1.txt +11 -0
morphik/tests/test_docs/sample2.txt +15 -0
morphik/tests/test_docs/sample3.txt +17 -0
morphik/tests/test_sync.py +293 -0
morphik-0.1.4.dist-info/METADATA +153 -0
morphik-0.1.4.dist-info/RECORD +18 -0
morphik-0.1.3.dist-info/METADATA +0 -47
morphik-0.1.3.dist-info/RECORD +0 -10
{morphik-0.1.3.dist-info → morphik-0.1.4.dist-info}/WHEEL +0 -0

morphik/tests/test_async.py ADDED Viewed

@@ -0,0 +1,300 @@
+import os
+import pytest
+import asyncio
+import uuid
+from pathlib import Path
+from morphik.async_ import AsyncMorphik, AsyncFolder, AsyncUserScope
+from morphik.models import Document, CompletionResponse
+# Set to your local Morphik server - use localhost by default
+# Default client connects to localhost:8000 automatically
+# Skip these tests if the SKIP_LIVE_TESTS environment variable is set
+pytestmark = pytest.mark.skipif(
+    os.environ.get("SKIP_LIVE_TESTS") == "1",
+    reason="Skip tests that require a running Morphik server"
+)
+# Get the test files directory
+TEST_DOCS_DIR = Path(__file__).parent / "test_docs"
+class TestAsyncMorphik:
+    """
+    Tests for the asynchronous Morphik SDK client with a live server.
+    To run these tests, start a local Morphik server and then run:
+    MORPHIK_TEST_URL=http://localhost:8000 pytest morphik/tests/test_async.py -v
+    """
+    @pytest.fixture
+    async def db(self):
+        """Create an AsyncMorphik client for testing"""
+        client = AsyncMorphik()  # Connects to localhost:8000 by default
+        yield client
+        await client.close()
+    @pytest.mark.asyncio
+    async def test_ingest_text(self, db):
+        """Test ingesting a text document"""
+        # Generate a unique filename to avoid conflicts
+        filename = f"test_{uuid.uuid4().hex[:8]}.txt"
+        # Test basic text ingestion
+        doc = await db.ingest_text(
+            content="This is a test document for the Morphik SDK.",
+            filename=filename,
+            metadata={"test_id": "async_text_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        assert doc.filename == filename
+        assert "test_id" in doc.metadata
+        assert doc.metadata["test_id"] == "async_text_test"
+        # Clean up
+        await db.delete_document(doc.external_id)
+    @pytest.mark.asyncio
+    async def test_ingest_file(self, db):
+        """Test ingesting a file from disk"""
+        # Use one of our test documents
+        file_path = TEST_DOCS_DIR / "sample1.txt"
+        # Test file ingestion
+        doc = await db.ingest_file(
+            file=file_path,
+            metadata={"test_id": "async_file_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        assert doc.filename == "sample1.txt"
+        assert "test_id" in doc.metadata
+        assert doc.metadata["test_id"] == "async_file_test"
+        # Clean up
+        await db.delete_document(doc.external_id)
+    @pytest.mark.asyncio
+    async def test_retrieve_chunks(self, db):
+        """Test retrieving chunks with a query"""
+        # First ingest a document
+        doc = await db.ingest_text(
+            content="Artificial intelligence and machine learning are transforming industries worldwide.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "async_retrieval_test", "category": "test"}
+        )
+        # Wait for processing to complete
+        max_retries = 10
+        for _ in range(max_retries):
+            try:
+                status = await db.get_document_status(doc.external_id)
+                if status.get("status") == "completed":
+                    break
+                await asyncio.sleep(2)  # Wait before checking again
+            except Exception:
+                await asyncio.sleep(2)
+        # Test retrieval
+        chunks = await db.retrieve_chunks(
+            query="What is artificial intelligence?",
+            filters={"test_id": "async_retrieval_test"}
+        )
+        # Verify results (may be empty if processing is slow)
+        if len(chunks) > 0:
+            assert chunks[0].document_id == doc.external_id
+            assert chunks[0].score > 0
+        # Clean up
+        await db.delete_document(doc.external_id)
+    @pytest.mark.asyncio
+    async def test_folder_operations(self, db):
+        """Test folder operations"""
+        # Create a unique folder name
+        folder_name = f"test_folder_{uuid.uuid4().hex[:8]}"
+        # Create a folder
+        folder = await db.create_folder(
+            name=folder_name,
+            description="Test folder for SDK tests"
+        )
+        # Verify folder was created
+        assert folder.name == folder_name
+        assert folder.id is not None
+        # Test ingesting a document into the folder
+        doc = await folder.ingest_text(
+            content="This is a test document in a folder.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "async_folder_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        # List documents in the folder
+        docs = await folder.list_documents()
+        # There should be at least our test document
+        doc_ids = [d.external_id for d in docs]
+        assert doc.external_id in doc_ids
+        # Clean up - first delete the document
+        await db.delete_document(doc.external_id)
+        # TODO: Add folder deletion when API supports it
+    @pytest.mark.asyncio
+    async def test_user_scope(self, db):
+        """Test user scoped operations"""
+        # Create a unique user ID
+        user_id = f"test_user_{uuid.uuid4().hex[:8]}"
+        # Create a user scope
+        user_scope = db.signin(user_id)
+        # Verify user scope
+        assert user_scope.end_user_id == user_id
+        # Test ingesting a document as the user
+        doc = await user_scope.ingest_text(
+            content="This is a test document from a specific user.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "async_user_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        assert "test_id" in doc.metadata
+        assert doc.metadata["test_id"] == "async_user_test"
+        # List documents for this user
+        docs = await user_scope.list_documents()
+        # There should be at least our test document
+        doc_ids = [d.external_id for d in docs]
+        assert doc.external_id in doc_ids
+        # Clean up
+        await db.delete_document(doc.external_id)
+    @pytest.mark.asyncio
+    async def test_batch_operations(self, db):
+        """Test batch operations"""
+        # Ingest multiple files
+        files = [
+            TEST_DOCS_DIR / "sample1.txt",
+            TEST_DOCS_DIR / "sample2.txt",
+            TEST_DOCS_DIR / "sample3.txt"
+        ]
+        # Test batch ingestion
+        docs = await db.ingest_files(
+            files=files,
+            metadata={"test_id": "async_batch_test", "category": "test"},
+            parallel=True
+        )
+        # Verify documents were created
+        assert len(docs) == 3
+        file_names = [doc.filename for doc in docs]
+        assert "sample1.txt" in file_names
+        assert "sample2.txt" in file_names
+        assert "sample3.txt" in file_names
+        # Get documents in batch
+        doc_ids = [doc.external_id for doc in docs]
+        batch_docs = await db.batch_get_documents(doc_ids)
+        # Verify batch retrieval
+        assert len(batch_docs) == len(doc_ids)
+        retrieved_ids = [doc.external_id for doc in batch_docs]
+        for doc_id in doc_ids:
+            assert doc_id in retrieved_ids
+        # Clean up
+        for doc_id in doc_ids:
+            await db.delete_document(doc_id)
+    @pytest.mark.asyncio
+    async def test_folder_with_user_scope(self, db):
+        """Test combination of folder and user scope"""
+        # Create unique names
+        folder_name = f"test_folder_{uuid.uuid4().hex[:8]}"
+        user_id = f"test_user_{uuid.uuid4().hex[:8]}"
+        # Create a folder
+        folder = await db.create_folder(name=folder_name)
+        # Create a user scope within the folder
+        user_scope = folder.signin(user_id)
+        # Verify scopes
+        assert user_scope.folder_name == folder_name
+        assert user_scope.end_user_id == user_id
+        # Test ingestion in this combined scope
+        doc = await user_scope.ingest_text(
+            content="This is a test document in a folder from a specific user.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "async_folder_user_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        # List documents in this scope
+        docs = await user_scope.list_documents()
+        # There should be at least our test document
+        doc_ids = [d.external_id for d in docs]
+        assert doc.external_id in doc_ids
+        # Clean up
+        await db.delete_document(doc.external_id)
+    @pytest.mark.asyncio
+    async def test_query_endpoint(self, db):
+        """Test the query endpoint for RAG capabilities"""
+        # First ingest a document
+        doc = await db.ingest_text(
+            content="Artificial intelligence and machine learning are transforming industries worldwide. "
+                   "AI systems can now process natural language, recognize images, and make complex decisions.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "async_query_test", "category": "test"}
+        )
+        try:
+            # Wait for processing to complete
+            for _ in range(10):
+                status = await db.get_document_status(doc.external_id)
+                if status.get("status") == "completed":
+                    break
+                await asyncio.sleep(2)
+            # Only proceed with test if document is processed
+            if status.get("status") == "completed":
+                # Test the query endpoint
+                response = await db.query(
+                    query="What can AI systems do?",
+                    filters={"test_id": "async_query_test"},
+                    k=1,
+                    temperature=0.7
+                )
+                # Verify response
+                assert response.completion is not None
+                assert len(response.completion) > 0
+                assert len(response.sources) > 0
+                assert response.sources[0].document_id == doc.external_id
+        finally:
+            # Clean up
+            await db.delete_document(doc.external_id)

morphik/tests/test_docs/sample1.txt ADDED Viewed

@@ -0,0 +1,11 @@
+This is a sample text file for testing the Morphik SDK.
+It contains information about artificial intelligence and machine learning.
+Large language models like Claude can process and understand natural language.
+These models are trained on vast amounts of text data and can generate
+human-like responses to various prompts and questions.
+The field of AI has seen rapid advancement in recent years, with models
+becoming increasingly capable of understanding context and generating
+coherent, relevant text.

morphik/tests/test_docs/sample2.txt ADDED Viewed

@@ -0,0 +1,15 @@
+Vector databases are specialized databases designed for storing and retrieving
+high-dimensional vectors. They are becoming increasingly important in the
+world of machine learning and AI applications.
+Unlike traditional databases that work with structured data, vector databases
+are optimized for similarity search operations on embedding vectors.
+Key features of vector databases include:
+1. Efficient similarity search algorithms
+2. Support for high-dimensional vectors
+3. Specialized indexing for fast retrieval
+4. Scalability for large vector collections
+Common applications include semantic search, recommendation systems,
+image retrieval, and natural language processing tasks.

morphik/tests/test_docs/sample3.txt ADDED Viewed

@@ -0,0 +1,17 @@
+Retrieval Augmented Generation (RAG) combines the power of large language
+models with external knowledge retrieval systems.
+In a RAG system, when a query is received:
+1. Relevant documents are retrieved from a knowledge base
+2. These documents are provided as context to the language model
+3. The model generates a response informed by both its trained knowledge
+   and the retrieved information
+RAG has several advantages:
+- More accurate responses with up-to-date information
+- Ability to cite sources for generated content
+- Reduced hallucinations compared to standalone LLMs
+- More controllable knowledge base
+This approach is now widely used in enterprise AI applications where
+accuracy and source attribution are critical.

morphik/tests/test_sync.py ADDED Viewed

@@ -0,0 +1,293 @@
+import os
+import pytest
+import time
+import uuid
+from pathlib import Path
+from morphik.sync import Morphik, Folder, UserScope
+from morphik.models import Document, CompletionResponse
+# Set to your local Morphik server - use localhost by default
+# Default client connects to localhost:8000 automatically
+# Skip these tests if the SKIP_LIVE_TESTS environment variable is set
+pytestmark = pytest.mark.skipif(
+    os.environ.get("SKIP_LIVE_TESTS") == "1",
+    reason="Skip tests that require a running Morphik server"
+)
+# Get the test files directory
+TEST_DOCS_DIR = Path(__file__).parent / "test_docs"
+class TestMorphik:
+    """
+    Tests for the synchronous Morphik SDK client with a live server.
+    To run these tests, start a local Morphik server and then run:
+    MORPHIK_TEST_URL=http://localhost:8000 pytest morphik/tests/test_sync.py -v
+    """
+    @pytest.fixture
+    def db(self):
+        """Create a Morphik client for testing"""
+        client = Morphik()  # Connects to localhost:8000 by default
+        yield client
+        client.close()
+    def test_ingest_text(self, db):
+        """Test ingesting a text document"""
+        # Generate a unique filename to avoid conflicts
+        filename = f"test_{uuid.uuid4().hex[:8]}.txt"
+        # Test basic text ingestion
+        doc = db.ingest_text(
+            content="This is a test document for the Morphik SDK.",
+            filename=filename,
+            metadata={"test_id": "sync_text_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        assert doc.filename == filename
+        assert "test_id" in doc.metadata
+        assert doc.metadata["test_id"] == "sync_text_test"
+        # Clean up
+        db.delete_document(doc.external_id)
+    def test_ingest_file(self, db):
+        """Test ingesting a file from disk"""
+        # Use one of our test documents
+        file_path = TEST_DOCS_DIR / "sample1.txt"
+        # Test file ingestion
+        doc = db.ingest_file(
+            file=file_path,
+            metadata={"test_id": "sync_file_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        assert doc.filename == "sample1.txt"
+        assert "test_id" in doc.metadata
+        assert doc.metadata["test_id"] == "sync_file_test"
+        # Clean up
+        db.delete_document(doc.external_id)
+    def test_retrieve_chunks(self, db):
+        """Test retrieving chunks with a query"""
+        # First ingest a document
+        doc = db.ingest_text(
+            content="Artificial intelligence and machine learning are transforming industries worldwide.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "sync_retrieval_test", "category": "test"}
+        )
+        # Wait for processing to complete
+        processed_doc = doc
+        max_retries = 10
+        for _ in range(max_retries):
+            try:
+                status = db.get_document_status(doc.external_id)
+                if status.get("status") == "completed":
+                    break
+                time.sleep(2)  # Wait before checking again
+            except Exception:
+                time.sleep(2)
+        # Test retrieval
+        chunks = db.retrieve_chunks(
+            query="What is artificial intelligence?",
+            filters={"test_id": "sync_retrieval_test"}
+        )
+        # Verify results (may be empty if processing is slow)
+        if len(chunks) > 0:
+            assert chunks[0].document_id == doc.external_id
+            assert chunks[0].score > 0
+        # Clean up
+        db.delete_document(doc.external_id)
+    def test_folder_operations(self, db):
+        """Test folder operations"""
+        # Create a unique folder name
+        folder_name = f"test_folder_{uuid.uuid4().hex[:8]}"
+        # Create a folder
+        folder = db.create_folder(
+            name=folder_name,
+            description="Test folder for SDK tests"
+        )
+        # Verify folder was created
+        assert folder.name == folder_name
+        assert folder.id is not None
+        # Test ingesting a document into the folder
+        doc = folder.ingest_text(
+            content="This is a test document in a folder.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "sync_folder_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        # List documents in the folder
+        docs = folder.list_documents()
+        # There should be at least our test document
+        doc_ids = [d.external_id for d in docs]
+        assert doc.external_id in doc_ids
+        # Clean up - first delete the document
+        db.delete_document(doc.external_id)
+        # TODO: Add folder deletion when API supports it
+    def test_user_scope(self, db):
+        """Test user scoped operations"""
+        # Create a unique user ID
+        user_id = f"test_user_{uuid.uuid4().hex[:8]}"
+        # Create a user scope
+        user_scope = db.signin(user_id)
+        # Verify user scope
+        assert user_scope.end_user_id == user_id
+        # Test ingesting a document as the user
+        doc = user_scope.ingest_text(
+            content="This is a test document from a specific user.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "sync_user_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        assert "test_id" in doc.metadata
+        assert doc.metadata["test_id"] == "sync_user_test"
+        # List documents for this user
+        docs = user_scope.list_documents()
+        # There should be at least our test document
+        doc_ids = [d.external_id for d in docs]
+        assert doc.external_id in doc_ids
+        # Clean up
+        db.delete_document(doc.external_id)
+    def test_batch_operations(self, db):
+        """Test batch operations"""
+        # Ingest multiple files
+        files = [
+            TEST_DOCS_DIR / "sample1.txt",
+            TEST_DOCS_DIR / "sample2.txt",
+            TEST_DOCS_DIR / "sample3.txt"
+        ]
+        # Test batch ingestion
+        docs = db.ingest_files(
+            files=files,
+            metadata={"test_id": "sync_batch_test", "category": "test"},
+            parallel=True
+        )
+        # Verify documents were created
+        assert len(docs) == 3
+        file_names = [doc.filename for doc in docs]
+        assert "sample1.txt" in file_names
+        assert "sample2.txt" in file_names
+        assert "sample3.txt" in file_names
+        # Get documents in batch
+        doc_ids = [doc.external_id for doc in docs]
+        batch_docs = db.batch_get_documents(doc_ids)
+        # Verify batch retrieval
+        assert len(batch_docs) == len(doc_ids)
+        retrieved_ids = [doc.external_id for doc in batch_docs]
+        for doc_id in doc_ids:
+            assert doc_id in retrieved_ids
+        # Clean up
+        for doc_id in doc_ids:
+            db.delete_document(doc_id)
+    def test_folder_with_user_scope(self, db):
+        """Test combination of folder and user scope"""
+        # Create unique names
+        folder_name = f"test_folder_{uuid.uuid4().hex[:8]}"
+        user_id = f"test_user_{uuid.uuid4().hex[:8]}"
+        # Create a folder
+        folder = db.create_folder(name=folder_name)
+        # Create a user scope within the folder
+        user_scope = folder.signin(user_id)
+        # Verify scopes
+        assert user_scope.folder_name == folder_name
+        assert user_scope.end_user_id == user_id
+        # Test ingestion in this combined scope
+        doc = user_scope.ingest_text(
+            content="This is a test document in a folder from a specific user.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "sync_folder_user_test", "category": "test"}
+        )
+        # Verify the document was created
+        assert doc.external_id is not None
+        # List documents in this scope
+        docs = user_scope.list_documents()
+        # There should be at least our test document
+        doc_ids = [d.external_id for d in docs]
+        assert doc.external_id in doc_ids
+        # Clean up
+        db.delete_document(doc.external_id)
+    def test_query_endpoint(self, db):
+        """Test the query endpoint for RAG capabilities"""
+        # First ingest a document
+        doc = db.ingest_text(
+            content="Artificial intelligence and machine learning are transforming industries worldwide. "
+                   "AI systems can now process natural language, recognize images, and make complex decisions.",
+            filename=f"test_{uuid.uuid4().hex[:8]}.txt",
+            metadata={"test_id": "sync_query_test", "category": "test"}
+        )
+        try:
+            # Wait for processing to complete
+            for _ in range(10):
+                status = db.get_document_status(doc.external_id)
+                if status.get("status") == "completed":
+                    break
+                time.sleep(2)
+            # Only proceed with test if document is processed
+            if status.get("status") == "completed":
+                # Test the query endpoint
+                response = db.query(
+                    query="What can AI systems do?",
+                    filters={"test_id": "sync_query_test"},
+                    k=1,
+                    temperature=0.7
+                )
+                # Verify response
+                assert response.completion is not None
+                assert len(response.completion) > 0
+                assert len(response.sources) > 0
+                assert response.sources[0].document_id == doc.external_id
+        finally:
+            # Clean up
+            db.delete_document(doc.external_id)

morphik 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

morphik 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl