morphik 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,7 +33,10 @@ offload/*
33
33
  test.pdf
34
34
 
35
35
  experiments/*
36
- ui-component/package-lock.json
36
+ ee/ui-component/package-lock.json/*
37
+ ee/ui-component/node-modules/*
38
+ ee/ui-component/.next
37
39
 
38
40
 
39
41
  ui-component/notebook-storage/notebooks.json
42
+ ee/ui-component/package-lock.json
morphik-0.1.5/PKG-INFO ADDED
@@ -0,0 +1,149 @@
1
+ Metadata-Version: 2.4
2
+ Name: morphik
3
+ Version: 0.1.5
4
+ Summary: Morphik Python Client
5
+ Author-email: Morphik <founders@morphik.ai>
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: httpx>=0.24.0
8
+ Requires-Dist: pillow==10.4.0
9
+ Requires-Dist: pydantic==2.10.3
10
+ Requires-Dist: pyjwt>=2.0.0
11
+ Requires-Dist: requests>=2.32.3
12
+ Description-Content-Type: text/markdown
13
+
14
+ # Morphik
15
+
16
+ A Python client for Morphik API that enables document ingestion, semantic search, and retrieval augmented generation capabilities.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install morphik
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ The SDK provides both synchronous and asynchronous clients:
27
+
28
+ ### Synchronous Usage
29
+
30
+ ```python
31
+ from morphik import Morphik
32
+
33
+ # Initialize client - connects to localhost:8000 by default
34
+ db = Morphik()
35
+
36
+ # Or with authentication URI (for production)
37
+ # db = Morphik("morphik://owner_id:token@api.morphik.ai")
38
+
39
+ # Ingest a text document
40
+ doc = db.ingest_text(
41
+ content="Your document content",
42
+ metadata={"title": "Example Document"}
43
+ )
44
+
45
+ # Ingest a file
46
+ doc = db.ingest_file(
47
+ file="path/to/document.pdf",
48
+ metadata={"category": "reports"}
49
+ )
50
+
51
+ # Retrieve relevant chunks
52
+ chunks = db.retrieve_chunks(
53
+ query="Your search query",
54
+ filters={"category": "reports"}
55
+ )
56
+
57
+ # Query with RAG
58
+ response = db.query(
59
+ query="Summarize the key points in the document",
60
+ filters={"category": "reports"}
61
+ )
62
+
63
+ print(response.completion)
64
+ ```
65
+
66
+ ### Asynchronous Usage
67
+
68
+ ```python
69
+ import asyncio
70
+ from morphik.async_ import AsyncMorphik
71
+
72
+ async def main():
73
+ # Initialize async client - connects to localhost:8000 by default
74
+ async with AsyncMorphik() as db:
75
+
76
+ # Or with authentication URI (for production)
77
+ # async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
78
+ # Ingest a text document
79
+ doc = await db.ingest_text(
80
+ content="Your document content",
81
+ metadata={"title": "Example Document"}
82
+ )
83
+
84
+ # Query with RAG
85
+ response = await db.query(
86
+ query="Summarize the key points in the document",
87
+ )
88
+
89
+ print(response.completion)
90
+
91
+ # Run the async function
92
+ asyncio.run(main())
93
+ ```
94
+
95
+ ## Features
96
+
97
+ - Document ingestion (text, files, directories)
98
+ - Semantic search and retrieval
99
+ - Retrieval-augmented generation (RAG)
100
+ - Knowledge graph creation and querying
101
+ - Multi-user and multi-folder scoping
102
+ - Metadata filtering
103
+ - Document management
104
+
105
+ ## Development
106
+
107
+ ### Running Tests
108
+
109
+ To run the tests, first install the development dependencies:
110
+
111
+ ```bash
112
+ pip install -r test_requirements.txt
113
+ ```
114
+
115
+ Then run the tests:
116
+
117
+ ```bash
118
+ # Run all tests (requires a running Morphik server)
119
+ pytest morphik/tests/ -v
120
+
121
+ # Run specific test modules
122
+ pytest morphik/tests/test_sync.py -v
123
+ pytest morphik/tests/test_async.py -v
124
+
125
+ # Skip tests if you don't have a running server
126
+ SKIP_LIVE_TESTS=1 pytest morphik/tests/ -v
127
+
128
+ # Specify a custom server URL for tests
129
+ MORPHIK_TEST_URL=http://custom-server:8000 pytest morphik/tests/ -v
130
+ ```
131
+
132
+ ### Example Usage Script
133
+
134
+ The SDK comes with an example script that demonstrates basic usage:
135
+
136
+ ```bash
137
+ # Run synchronous example
138
+ python -m morphik.tests.example_usage
139
+
140
+ # Run asynchronous example
141
+ python -m morphik.tests.example_usage --async
142
+ ```
143
+
144
+ The example script demonstrates:
145
+ - Text and file ingestion
146
+ - Creating folders and user scopes
147
+ - Retrieving chunks and documents
148
+ - Generating completions using RAG
149
+ - Batch operations and cleanup
@@ -0,0 +1,136 @@
1
+ # Morphik
2
+
3
+ A Python client for Morphik API that enables document ingestion, semantic search, and retrieval augmented generation capabilities.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install morphik
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ The SDK provides both synchronous and asynchronous clients:
14
+
15
+ ### Synchronous Usage
16
+
17
+ ```python
18
+ from morphik import Morphik
19
+
20
+ # Initialize client - connects to localhost:8000 by default
21
+ db = Morphik()
22
+
23
+ # Or with authentication URI (for production)
24
+ # db = Morphik("morphik://owner_id:token@api.morphik.ai")
25
+
26
+ # Ingest a text document
27
+ doc = db.ingest_text(
28
+ content="Your document content",
29
+ metadata={"title": "Example Document"}
30
+ )
31
+
32
+ # Ingest a file
33
+ doc = db.ingest_file(
34
+ file="path/to/document.pdf",
35
+ metadata={"category": "reports"}
36
+ )
37
+
38
+ # Retrieve relevant chunks
39
+ chunks = db.retrieve_chunks(
40
+ query="Your search query",
41
+ filters={"category": "reports"}
42
+ )
43
+
44
+ # Query with RAG
45
+ response = db.query(
46
+ query="Summarize the key points in the document",
47
+ filters={"category": "reports"}
48
+ )
49
+
50
+ print(response.completion)
51
+ ```
52
+
53
+ ### Asynchronous Usage
54
+
55
+ ```python
56
+ import asyncio
57
+ from morphik.async_ import AsyncMorphik
58
+
59
+ async def main():
60
+ # Initialize async client - connects to localhost:8000 by default
61
+ async with AsyncMorphik() as db:
62
+
63
+ # Or with authentication URI (for production)
64
+ # async with AsyncMorphik("morphik://owner_id:token@api.morphik.ai") as db:
65
+ # Ingest a text document
66
+ doc = await db.ingest_text(
67
+ content="Your document content",
68
+ metadata={"title": "Example Document"}
69
+ )
70
+
71
+ # Query with RAG
72
+ response = await db.query(
73
+ query="Summarize the key points in the document",
74
+ )
75
+
76
+ print(response.completion)
77
+
78
+ # Run the async function
79
+ asyncio.run(main())
80
+ ```
81
+
82
+ ## Features
83
+
84
+ - Document ingestion (text, files, directories)
85
+ - Semantic search and retrieval
86
+ - Retrieval-augmented generation (RAG)
87
+ - Knowledge graph creation and querying
88
+ - Multi-user and multi-folder scoping
89
+ - Metadata filtering
90
+ - Document management
91
+
92
+ ## Development
93
+
94
+ ### Running Tests
95
+
96
+ To run the tests, first install the development dependencies:
97
+
98
+ ```bash
99
+ pip install -r test_requirements.txt
100
+ ```
101
+
102
+ Then run the tests:
103
+
104
+ ```bash
105
+ # Run all tests (requires a running Morphik server)
106
+ pytest morphik/tests/ -v
107
+
108
+ # Run specific test modules
109
+ pytest morphik/tests/test_sync.py -v
110
+ pytest morphik/tests/test_async.py -v
111
+
112
+ # Skip tests if you don't have a running server
113
+ SKIP_LIVE_TESTS=1 pytest morphik/tests/ -v
114
+
115
+ # Specify a custom server URL for tests
116
+ MORPHIK_TEST_URL=http://custom-server:8000 pytest morphik/tests/ -v
117
+ ```
118
+
119
+ ### Example Usage Script
120
+
121
+ The SDK comes with an example script that demonstrates basic usage:
122
+
123
+ ```bash
124
+ # Run synchronous example
125
+ python -m morphik.tests.example_usage
126
+
127
+ # Run asynchronous example
128
+ python -m morphik.tests.example_usage --async
129
+ ```
130
+
131
+ The example script demonstrates:
132
+ - Text and file ingestion
133
+ - Creating folders and user scopes
134
+ - Retrieving chunks and documents
135
+ - Generating completions using RAG
136
+ - Batch operations and cleanup
@@ -2,9 +2,9 @@
2
2
  Morphik Python SDK for document ingestion and querying.
3
3
  """
4
4
 
5
- from .sync import Morphik
6
5
  from .async_ import AsyncMorphik
7
6
  from .models import Document
7
+ from .sync import Morphik
8
8
 
9
9
  __all__ = [
10
10
  "Morphik",
@@ -12,4 +12,4 @@ __all__ = [
12
12
  "Document",
13
13
  ]
14
14
 
15
- __version__ = "0.1.3"
15
+ __version__ = "0.1.4"
@@ -1,26 +1,25 @@
1
1
  import base64
2
2
  import io
3
3
  import json
4
- from io import BytesIO, IOBase
5
- from PIL import Image
6
- from PIL.Image import Image as PILImage
4
+ from io import BytesIO
7
5
  from pathlib import Path
8
- from typing import Dict, Any, List, Optional, Union, Tuple, BinaryIO
6
+ from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Type, Union
9
7
  from urllib.parse import urlparse
10
8
 
11
9
  import jwt
10
+ from PIL import Image
11
+ from PIL.Image import Image as PILImage
12
12
  from pydantic import BaseModel, Field
13
13
 
14
14
  from .models import (
15
- Document,
16
15
  ChunkResult,
17
- DocumentResult,
16
+ ChunkSource, # Prompt override models
18
17
  CompletionResponse,
19
- IngestTextRequest,
20
- ChunkSource,
18
+ Document,
19
+ DocumentResult,
21
20
  Graph,
22
- # Prompt override models
23
21
  GraphPromptOverrides,
22
+ IngestTextRequest,
24
23
  )
25
24
  from .rules import Rule
26
25
 
@@ -199,9 +198,7 @@ class _MorphikClientLogic:
199
198
  if rules:
200
199
  if all(isinstance(r, list) for r in rules):
201
200
  # List of lists - per-file rules
202
- converted_rules = [
203
- [self._convert_rule(r) for r in rule_list] for rule_list in rules
204
- ]
201
+ converted_rules = [[self._convert_rule(r) for r in rule_list] for rule_list in rules]
205
202
  else:
206
203
  # Flat list - shared rules for all files
207
204
  converted_rules = [self._convert_rule(r) for r in rules]
@@ -211,7 +208,7 @@ class _MorphikClientLogic:
211
208
  data = {
212
209
  "metadata": json.dumps(metadata or {}),
213
210
  "rules": json.dumps(converted_rules),
214
- "use_colpali": str(use_colpali).lower() if use_colpali is not None else None,
211
+ # use_colpali is a query parameter, not a form field
215
212
  "parallel": str(parallel).lower(),
216
213
  }
217
214
 
@@ -237,6 +234,7 @@ class _MorphikClientLogic:
237
234
  prompt_overrides: Optional[Dict],
238
235
  folder_name: Optional[str],
239
236
  end_user_id: Optional[str],
237
+ schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
240
238
  ) -> Dict[str, Any]:
241
239
  """Prepare request for query endpoint"""
242
240
  payload = {
@@ -256,6 +254,20 @@ class _MorphikClientLogic:
256
254
  payload["folder_name"] = folder_name
257
255
  if end_user_id:
258
256
  payload["end_user_id"] = end_user_id
257
+
258
+ # Add schema to payload if provided
259
+ if schema:
260
+ # If schema is a Pydantic model class, serialize it to a JSON schema dict
261
+ if isinstance(schema, type) and issubclass(schema, BaseModel):
262
+ payload["schema"] = schema.model_json_schema()
263
+ elif isinstance(schema, dict):
264
+ # Basic check if it looks like a JSON schema (has 'properties' or 'type')
265
+ if "properties" not in schema and "type" not in schema:
266
+ raise ValueError("Provided schema dictionary does not look like a valid JSON schema")
267
+ payload["schema"] = schema
268
+ else:
269
+ raise TypeError("schema must be a Pydantic model type or a dictionary representing a JSON schema")
270
+
259
271
  # Filter out None values before sending
260
272
  return {k_p: v_p for k_p, v_p in payload.items() if v_p is not None}
261
273
 
@@ -361,7 +373,8 @@ class _MorphikClientLogic:
361
373
  if end_user_id:
362
374
  request["end_user_id"] = end_user_id
363
375
  return request
364
- return source_dicts # Return just sources list if no scoping is needed
376
+ # Return the dictionary structure { "sources": [...] } consistently.
377
+ return {"sources": source_dicts}
365
378
 
366
379
  def _prepare_create_graph_request(
367
380
  self,
@@ -454,15 +467,11 @@ class _MorphikClientLogic:
454
467
  docs = [Document(**doc) for doc in response_json]
455
468
  return docs
456
469
 
457
- def _parse_document_result_list_response(
458
- self, response_json: List[Dict[str, Any]]
459
- ) -> List[DocumentResult]:
470
+ def _parse_document_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[DocumentResult]:
460
471
  """Parse document result list response"""
461
472
  return [DocumentResult(**r) for r in response_json]
462
473
 
463
- def _parse_chunk_result_list_response(
464
- self, response_json: List[Dict[str, Any]]
465
- ) -> List[FinalChunkResult]:
474
+ def _parse_chunk_result_list_response(self, response_json: List[Dict[str, Any]]) -> List[FinalChunkResult]:
466
475
  """Parse chunk result list response"""
467
476
  chunks = [ChunkResult(**r) for r in response_json]
468
477