morphik 1.2.2__tar.gz → 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {morphik-1.2.2 → morphik-1.2.3}/PKG-INFO +6 -1
- {morphik-1.2.2 → morphik-1.2.3}/README.md +5 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/__init__.py +1 -1
- {morphik-1.2.2 → morphik-1.2.3}/morphik/_internal.py +10 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/_scoped_ops.py +2 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/async_.py +16 -3
- {morphik-1.2.2 → morphik-1.2.3}/morphik/sync.py +15 -2
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_scoped_ops_unit.py +33 -0
- {morphik-1.2.2 → morphik-1.2.3}/pyproject.toml +1 -1
- {morphik-1.2.2 → morphik-1.2.3}/.gitignore +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/_shared.py +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/exceptions.py +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/models.py +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/README.md +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/__init__.py +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/example_usage.py +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_app_ops.py +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_async.py +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_docs/sample1.txt +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_docs/sample2.txt +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_docs/sample3.txt +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_shared_helpers.py +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_sync.py +0 -0
- {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_update_document_metadata_rename.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: morphik
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.3
|
|
4
4
|
Summary: Morphik Python Client
|
|
5
5
|
Author-email: Morphik <founders@morphik.ai>
|
|
6
6
|
Requires-Python: >=3.8
|
|
@@ -106,6 +106,11 @@ renamed = moved.rename("specs-v2")
|
|
|
106
106
|
# Scope queries to a path and include descendants with folder_depth=-1
|
|
107
107
|
chunks = folder.retrieve_chunks(query="design notes", folder_depth=-1)
|
|
108
108
|
docs = db.list_documents(folder_name="/projects/alpha", folder_depth=-1)
|
|
109
|
+
|
|
110
|
+
# List only the fields you need. The server reads and returns just those columns, so
|
|
111
|
+
# the full document text is never downloaded — fast for large corpora.
|
|
112
|
+
for doc in db.list_documents(fields=["metadata"]).documents:
|
|
113
|
+
print(doc.external_id, doc.metadata)
|
|
109
114
|
```
|
|
110
115
|
|
|
111
116
|
`Folder.full_path` is exposed on folder objects, and `Document.folder_path` mirrors server responses for tracing scope.
|
|
@@ -93,6 +93,11 @@ renamed = moved.rename("specs-v2")
|
|
|
93
93
|
# Scope queries to a path and include descendants with folder_depth=-1
|
|
94
94
|
chunks = folder.retrieve_chunks(query="design notes", folder_depth=-1)
|
|
95
95
|
docs = db.list_documents(folder_name="/projects/alpha", folder_depth=-1)
|
|
96
|
+
|
|
97
|
+
# List only the fields you need. The server reads and returns just those columns, so
|
|
98
|
+
# the full document text is never downloaded — fast for large corpora.
|
|
99
|
+
for doc in db.list_documents(fields=["metadata"]).documents:
|
|
100
|
+
print(doc.external_id, doc.metadata)
|
|
96
101
|
```
|
|
97
102
|
|
|
98
103
|
`Folder.full_path` is exposed on folder objects, and `Document.folder_path` mirrors server responses for tracing scope.
|
|
@@ -428,6 +428,7 @@ class _MorphikClientLogic:
|
|
|
428
428
|
completed_only: bool,
|
|
429
429
|
sort_by: Optional[str],
|
|
430
430
|
sort_direction: str,
|
|
431
|
+
fields: Optional[List[str]] = None,
|
|
431
432
|
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
|
432
433
|
"""Prepare request for list_docs endpoint"""
|
|
433
434
|
params = {}
|
|
@@ -450,6 +451,15 @@ class _MorphikClientLogic:
|
|
|
450
451
|
"sort_by": sort_by,
|
|
451
452
|
"sort_direction": sort_direction,
|
|
452
453
|
}
|
|
454
|
+
if fields:
|
|
455
|
+
# Always include the fields required to reconstruct a Document client-side, so
|
|
456
|
+
# projected responses still parse into Document objects. When any metadata field
|
|
457
|
+
# is requested, also pull metadata_types so typed values (datetime/date/decimal)
|
|
458
|
+
# are reconstructed instead of returned as raw strings.
|
|
459
|
+
projected = ["external_id", "content_type", *fields]
|
|
460
|
+
if any(field.split(".", 1)[0] == "metadata" for field in fields):
|
|
461
|
+
projected.append("metadata_types")
|
|
462
|
+
data["fields"] = list(dict.fromkeys(projected))
|
|
453
463
|
return params, data
|
|
454
464
|
|
|
455
465
|
def _prepare_batch_get_documents_request(
|
|
@@ -277,6 +277,7 @@ class _ScopedOperationsMixin:
|
|
|
277
277
|
completed_only: bool,
|
|
278
278
|
sort_by: Optional[str],
|
|
279
279
|
sort_direction: str,
|
|
280
|
+
fields: Optional[List[str]] = None,
|
|
280
281
|
):
|
|
281
282
|
params, data = self._logic._prepare_list_documents_request(
|
|
282
283
|
skip,
|
|
@@ -291,6 +292,7 @@ class _ScopedOperationsMixin:
|
|
|
291
292
|
completed_only,
|
|
292
293
|
sort_by,
|
|
293
294
|
sort_direction,
|
|
295
|
+
fields,
|
|
294
296
|
)
|
|
295
297
|
|
|
296
298
|
return self._execute_scoped_operation(
|
|
@@ -12,11 +12,11 @@ from pydantic import BaseModel
|
|
|
12
12
|
from ._internal import FinalChunkResult, _MorphikClientLogic
|
|
13
13
|
from ._scoped_ops import _ScopedOperationsMixin
|
|
14
14
|
from ._shared import (
|
|
15
|
+
build_create_app_payload,
|
|
16
|
+
build_document_by_filename_params,
|
|
15
17
|
build_folder_endpoint_identifier,
|
|
16
18
|
build_folder_move_payload,
|
|
17
19
|
build_folder_rename_path,
|
|
18
|
-
build_create_app_payload,
|
|
19
|
-
build_document_by_filename_params,
|
|
20
20
|
build_list_apps_params,
|
|
21
21
|
build_logs_params,
|
|
22
22
|
build_rename_app_params,
|
|
@@ -267,8 +267,15 @@ class _AsyncScopedClientOps:
|
|
|
267
267
|
completed_only: bool = False,
|
|
268
268
|
sort_by: Optional[str] = "updated_at",
|
|
269
269
|
sort_direction: str = "desc",
|
|
270
|
+
fields: Optional[List[str]] = None,
|
|
270
271
|
) -> ListDocsResponse:
|
|
271
|
-
"""List documents within this scope (async).
|
|
272
|
+
"""List documents within this scope (async).
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
fields: Optional list of fields to return for each document (e.g. ["metadata"]).
|
|
276
|
+
Only those fields are read and returned, so the full document text is never
|
|
277
|
+
downloaded. external_id and content_type are always included.
|
|
278
|
+
"""
|
|
272
279
|
effective_folder = self._merge_folders(additional_folders)
|
|
273
280
|
return await self._client._scoped_list_documents(
|
|
274
281
|
skip=skip,
|
|
@@ -283,6 +290,7 @@ class _AsyncScopedClientOps:
|
|
|
283
290
|
completed_only=completed_only,
|
|
284
291
|
sort_by=sort_by,
|
|
285
292
|
sort_direction=sort_direction,
|
|
293
|
+
fields=fields,
|
|
286
294
|
)
|
|
287
295
|
|
|
288
296
|
async def batch_get_documents(
|
|
@@ -1231,6 +1239,7 @@ class AsyncMorphik(_ScopedOperationsMixin):
|
|
|
1231
1239
|
completed_only: bool = False,
|
|
1232
1240
|
sort_by: Optional[str] = "updated_at",
|
|
1233
1241
|
sort_direction: str = "desc",
|
|
1242
|
+
fields: Optional[List[str]] = None,
|
|
1234
1243
|
) -> ListDocsResponse:
|
|
1235
1244
|
"""
|
|
1236
1245
|
List accessible documents.
|
|
@@ -1247,6 +1256,9 @@ class AsyncMorphik(_ScopedOperationsMixin):
|
|
|
1247
1256
|
completed_only: Only return completed documents
|
|
1248
1257
|
sort_by: Field to sort by (created_at, updated_at, filename, external_id)
|
|
1249
1258
|
sort_direction: Sort direction (asc, desc)
|
|
1259
|
+
fields: Optional list of fields to return for each document (e.g. ["metadata"]).
|
|
1260
|
+
Only those fields are read and returned, so the full document text is never
|
|
1261
|
+
downloaded. external_id and content_type are always included.
|
|
1250
1262
|
Returns:
|
|
1251
1263
|
ListDocsResponse: Response with documents and metadata
|
|
1252
1264
|
|
|
@@ -1264,6 +1276,7 @@ class AsyncMorphik(_ScopedOperationsMixin):
|
|
|
1264
1276
|
completed_only=completed_only,
|
|
1265
1277
|
sort_by=sort_by,
|
|
1266
1278
|
sort_direction=sort_direction,
|
|
1279
|
+
fields=fields,
|
|
1267
1280
|
)
|
|
1268
1281
|
|
|
1269
1282
|
async def get_document(self, document_id: str) -> Document:
|
|
@@ -12,11 +12,11 @@ from pydantic import BaseModel
|
|
|
12
12
|
from ._internal import FinalChunkResult, _MorphikClientLogic
|
|
13
13
|
from ._scoped_ops import _ScopedOperationsMixin
|
|
14
14
|
from ._shared import (
|
|
15
|
+
build_create_app_payload,
|
|
16
|
+
build_document_by_filename_params,
|
|
15
17
|
build_folder_endpoint_identifier,
|
|
16
18
|
build_folder_move_payload,
|
|
17
19
|
build_folder_rename_path,
|
|
18
|
-
build_create_app_payload,
|
|
19
|
-
build_document_by_filename_params,
|
|
20
20
|
build_list_apps_params,
|
|
21
21
|
build_logs_params,
|
|
22
22
|
build_rename_app_params,
|
|
@@ -283,9 +283,16 @@ class _ScopedClientOps:
|
|
|
283
283
|
completed_only: bool = False,
|
|
284
284
|
sort_by: Optional[str] = "updated_at",
|
|
285
285
|
sort_direction: str = "desc",
|
|
286
|
+
fields: Optional[List[str]] = None,
|
|
286
287
|
) -> ListDocsResponse:
|
|
287
288
|
"""
|
|
288
289
|
List documents within this scope.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
fields: Optional list of fields to return for each document (e.g.
|
|
293
|
+
["metadata"]). Only those fields are read and returned, so the full
|
|
294
|
+
document text is never downloaded. external_id and content_type are
|
|
295
|
+
always included.
|
|
289
296
|
"""
|
|
290
297
|
effective_folder = self._merge_folders(additional_folders)
|
|
291
298
|
return self._client._scoped_list_documents(
|
|
@@ -301,6 +308,7 @@ class _ScopedClientOps:
|
|
|
301
308
|
completed_only=completed_only,
|
|
302
309
|
sort_by=sort_by,
|
|
303
310
|
sort_direction=sort_direction,
|
|
311
|
+
fields=fields,
|
|
304
312
|
)
|
|
305
313
|
|
|
306
314
|
def batch_get_documents(
|
|
@@ -1270,6 +1278,7 @@ class Morphik(_ScopedOperationsMixin):
|
|
|
1270
1278
|
completed_only: bool = False,
|
|
1271
1279
|
sort_by: Optional[str] = "updated_at",
|
|
1272
1280
|
sort_direction: str = "desc",
|
|
1281
|
+
fields: Optional[List[str]] = None,
|
|
1273
1282
|
) -> ListDocsResponse:
|
|
1274
1283
|
"""
|
|
1275
1284
|
List accessible documents.
|
|
@@ -1286,6 +1295,9 @@ class Morphik(_ScopedOperationsMixin):
|
|
|
1286
1295
|
completed_only: Only return completed documents
|
|
1287
1296
|
sort_by: Field to sort by (created_at, updated_at, filename, external_id)
|
|
1288
1297
|
sort_direction: Sort direction (asc, desc)
|
|
1298
|
+
fields: Optional list of fields to return for each document (e.g. ["metadata"]).
|
|
1299
|
+
Only those fields are read and returned, so the full document text is never
|
|
1300
|
+
downloaded. external_id and content_type are always included.
|
|
1289
1301
|
Returns:
|
|
1290
1302
|
ListDocsResponse: Response with documents and metadata
|
|
1291
1303
|
|
|
@@ -1303,6 +1315,7 @@ class Morphik(_ScopedOperationsMixin):
|
|
|
1303
1315
|
completed_only=completed_only,
|
|
1304
1316
|
sort_by=sort_by,
|
|
1305
1317
|
sort_direction=sort_direction,
|
|
1318
|
+
fields=fields,
|
|
1306
1319
|
)
|
|
1307
1320
|
|
|
1308
1321
|
def get_document(self, document_id: str) -> Document:
|
|
@@ -117,6 +117,39 @@ def test_sync_list_documents_payloads_across_scopes():
|
|
|
117
117
|
client.close()
|
|
118
118
|
|
|
119
119
|
|
|
120
|
+
def test_sync_list_documents_fields_projection():
|
|
121
|
+
client, calls = _make_sync_client()
|
|
122
|
+
try:
|
|
123
|
+
# external_id + content_type are always added so the response parses into a Document;
|
|
124
|
+
# metadata_types is added so typed metadata values are reconstructed, not left as strings.
|
|
125
|
+
client.list_documents(fields=["metadata"])
|
|
126
|
+
assert calls.pop()["data"]["fields"] == ["external_id", "content_type", "metadata", "metadata_types"]
|
|
127
|
+
|
|
128
|
+
# Already-included required fields are not duplicated; order is preserved.
|
|
129
|
+
client.list_documents(fields=["external_id", "filename", "metadata"])
|
|
130
|
+
assert calls.pop()["data"]["fields"] == [
|
|
131
|
+
"external_id",
|
|
132
|
+
"content_type",
|
|
133
|
+
"filename",
|
|
134
|
+
"metadata",
|
|
135
|
+
"metadata_types",
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
# Nested metadata paths also trigger metadata_types.
|
|
139
|
+
client.list_documents(fields=["metadata.client"])
|
|
140
|
+
assert calls.pop()["data"]["fields"] == ["external_id", "content_type", "metadata.client", "metadata_types"]
|
|
141
|
+
|
|
142
|
+
# Non-metadata projection does not pull metadata_types.
|
|
143
|
+
client.list_documents(fields=["filename"])
|
|
144
|
+
assert calls.pop()["data"]["fields"] == ["external_id", "content_type", "filename"]
|
|
145
|
+
|
|
146
|
+
# No fields -> no projection requested (full documents).
|
|
147
|
+
client.list_documents()
|
|
148
|
+
assert "fields" not in calls.pop()["data"]
|
|
149
|
+
finally:
|
|
150
|
+
client.close()
|
|
151
|
+
|
|
152
|
+
|
|
120
153
|
def test_async_client_http2_toggle(monkeypatch):
|
|
121
154
|
captured = []
|
|
122
155
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|