morphik 1.2.2__tar.gz → 1.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {morphik-1.2.2 → morphik-1.2.3}/PKG-INFO +6 -1
  2. {morphik-1.2.2 → morphik-1.2.3}/README.md +5 -0
  3. {morphik-1.2.2 → morphik-1.2.3}/morphik/__init__.py +1 -1
  4. {morphik-1.2.2 → morphik-1.2.3}/morphik/_internal.py +10 -0
  5. {morphik-1.2.2 → morphik-1.2.3}/morphik/_scoped_ops.py +2 -0
  6. {morphik-1.2.2 → morphik-1.2.3}/morphik/async_.py +16 -3
  7. {morphik-1.2.2 → morphik-1.2.3}/morphik/sync.py +15 -2
  8. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_scoped_ops_unit.py +33 -0
  9. {morphik-1.2.2 → morphik-1.2.3}/pyproject.toml +1 -1
  10. {morphik-1.2.2 → morphik-1.2.3}/.gitignore +0 -0
  11. {morphik-1.2.2 → morphik-1.2.3}/morphik/_shared.py +0 -0
  12. {morphik-1.2.2 → morphik-1.2.3}/morphik/exceptions.py +0 -0
  13. {morphik-1.2.2 → morphik-1.2.3}/morphik/models.py +0 -0
  14. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/README.md +0 -0
  15. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/__init__.py +0 -0
  16. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/example_usage.py +0 -0
  17. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_app_ops.py +0 -0
  18. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_async.py +0 -0
  19. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_docs/sample1.txt +0 -0
  20. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_docs/sample2.txt +0 -0
  21. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_docs/sample3.txt +0 -0
  22. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_shared_helpers.py +0 -0
  23. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_sync.py +0 -0
  24. {morphik-1.2.2 → morphik-1.2.3}/morphik/tests/test_update_document_metadata_rename.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: morphik
3
- Version: 1.2.2
3
+ Version: 1.2.3
4
4
  Summary: Morphik Python Client
5
5
  Author-email: Morphik <founders@morphik.ai>
6
6
  Requires-Python: >=3.8
@@ -106,6 +106,11 @@ renamed = moved.rename("specs-v2")
106
106
  # Scope queries to a path and include descendants with folder_depth=-1
107
107
  chunks = folder.retrieve_chunks(query="design notes", folder_depth=-1)
108
108
  docs = db.list_documents(folder_name="/projects/alpha", folder_depth=-1)
109
+
110
+ # List only the fields you need. The server reads and returns just those columns, so
111
+ # the full document text is never downloaded — fast for large corpora.
112
+ for doc in db.list_documents(fields=["metadata"]).documents:
113
+ print(doc.external_id, doc.metadata)
109
114
  ```
110
115
 
111
116
  `Folder.full_path` is exposed on folder objects, and `Document.folder_path` mirrors server responses for tracing scope.
@@ -93,6 +93,11 @@ renamed = moved.rename("specs-v2")
93
93
  # Scope queries to a path and include descendants with folder_depth=-1
94
94
  chunks = folder.retrieve_chunks(query="design notes", folder_depth=-1)
95
95
  docs = db.list_documents(folder_name="/projects/alpha", folder_depth=-1)
96
+
97
+ # List only the fields you need. The server reads and returns just those columns, so
98
+ # the full document text is never downloaded — fast for large corpora.
99
+ for doc in db.list_documents(fields=["metadata"]).documents:
100
+ print(doc.external_id, doc.metadata)
96
101
  ```
97
102
 
98
103
  `Folder.full_path` is exposed on folder objects, and `Document.folder_path` mirrors server responses for tracing scope.
@@ -14,4 +14,4 @@ __all__ = [
14
14
  "DocumentQueryResponse",
15
15
  ]
16
16
 
17
- __version__ = "1.2.2"
17
+ __version__ = "1.2.3"
@@ -428,6 +428,7 @@ class _MorphikClientLogic:
428
428
  completed_only: bool,
429
429
  sort_by: Optional[str],
430
430
  sort_direction: str,
431
+ fields: Optional[List[str]] = None,
431
432
  ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
432
433
  """Prepare request for list_docs endpoint"""
433
434
  params = {}
@@ -450,6 +451,15 @@ class _MorphikClientLogic:
450
451
  "sort_by": sort_by,
451
452
  "sort_direction": sort_direction,
452
453
  }
454
+ if fields:
455
+ # Always include the fields required to reconstruct a Document client-side, so
456
+ # projected responses still parse into Document objects. When any metadata field
457
+ # is requested, also pull metadata_types so typed values (datetime/date/decimal)
458
+ # are reconstructed instead of returned as raw strings.
459
+ projected = ["external_id", "content_type", *fields]
460
+ if any(field.split(".", 1)[0] == "metadata" for field in fields):
461
+ projected.append("metadata_types")
462
+ data["fields"] = list(dict.fromkeys(projected))
453
463
  return params, data
454
464
 
455
465
  def _prepare_batch_get_documents_request(
@@ -277,6 +277,7 @@ class _ScopedOperationsMixin:
277
277
  completed_only: bool,
278
278
  sort_by: Optional[str],
279
279
  sort_direction: str,
280
+ fields: Optional[List[str]] = None,
280
281
  ):
281
282
  params, data = self._logic._prepare_list_documents_request(
282
283
  skip,
@@ -291,6 +292,7 @@ class _ScopedOperationsMixin:
291
292
  completed_only,
292
293
  sort_by,
293
294
  sort_direction,
295
+ fields,
294
296
  )
295
297
 
296
298
  return self._execute_scoped_operation(
@@ -12,11 +12,11 @@ from pydantic import BaseModel
12
12
  from ._internal import FinalChunkResult, _MorphikClientLogic
13
13
  from ._scoped_ops import _ScopedOperationsMixin
14
14
  from ._shared import (
15
+ build_create_app_payload,
16
+ build_document_by_filename_params,
15
17
  build_folder_endpoint_identifier,
16
18
  build_folder_move_payload,
17
19
  build_folder_rename_path,
18
- build_create_app_payload,
19
- build_document_by_filename_params,
20
20
  build_list_apps_params,
21
21
  build_logs_params,
22
22
  build_rename_app_params,
@@ -267,8 +267,15 @@ class _AsyncScopedClientOps:
267
267
  completed_only: bool = False,
268
268
  sort_by: Optional[str] = "updated_at",
269
269
  sort_direction: str = "desc",
270
+ fields: Optional[List[str]] = None,
270
271
  ) -> ListDocsResponse:
271
- """List documents within this scope (async)."""
272
+ """List documents within this scope (async).
273
+
274
+ Args:
275
+ fields: Optional list of fields to return for each document (e.g. ["metadata"]).
276
+ Only those fields are read and returned, so the full document text is never
277
+ downloaded. external_id and content_type are always included.
278
+ """
272
279
  effective_folder = self._merge_folders(additional_folders)
273
280
  return await self._client._scoped_list_documents(
274
281
  skip=skip,
@@ -283,6 +290,7 @@ class _AsyncScopedClientOps:
283
290
  completed_only=completed_only,
284
291
  sort_by=sort_by,
285
292
  sort_direction=sort_direction,
293
+ fields=fields,
286
294
  )
287
295
 
288
296
  async def batch_get_documents(
@@ -1231,6 +1239,7 @@ class AsyncMorphik(_ScopedOperationsMixin):
1231
1239
  completed_only: bool = False,
1232
1240
  sort_by: Optional[str] = "updated_at",
1233
1241
  sort_direction: str = "desc",
1242
+ fields: Optional[List[str]] = None,
1234
1243
  ) -> ListDocsResponse:
1235
1244
  """
1236
1245
  List accessible documents.
@@ -1247,6 +1256,9 @@ class AsyncMorphik(_ScopedOperationsMixin):
1247
1256
  completed_only: Only return completed documents
1248
1257
  sort_by: Field to sort by (created_at, updated_at, filename, external_id)
1249
1258
  sort_direction: Sort direction (asc, desc)
1259
+ fields: Optional list of fields to return for each document (e.g. ["metadata"]).
1260
+ Only those fields are read and returned, so the full document text is never
1261
+ downloaded. external_id and content_type are always included.
1250
1262
  Returns:
1251
1263
  ListDocsResponse: Response with documents and metadata
1252
1264
 
@@ -1264,6 +1276,7 @@ class AsyncMorphik(_ScopedOperationsMixin):
1264
1276
  completed_only=completed_only,
1265
1277
  sort_by=sort_by,
1266
1278
  sort_direction=sort_direction,
1279
+ fields=fields,
1267
1280
  )
1268
1281
 
1269
1282
  async def get_document(self, document_id: str) -> Document:
@@ -12,11 +12,11 @@ from pydantic import BaseModel
12
12
  from ._internal import FinalChunkResult, _MorphikClientLogic
13
13
  from ._scoped_ops import _ScopedOperationsMixin
14
14
  from ._shared import (
15
+ build_create_app_payload,
16
+ build_document_by_filename_params,
15
17
  build_folder_endpoint_identifier,
16
18
  build_folder_move_payload,
17
19
  build_folder_rename_path,
18
- build_create_app_payload,
19
- build_document_by_filename_params,
20
20
  build_list_apps_params,
21
21
  build_logs_params,
22
22
  build_rename_app_params,
@@ -283,9 +283,16 @@ class _ScopedClientOps:
283
283
  completed_only: bool = False,
284
284
  sort_by: Optional[str] = "updated_at",
285
285
  sort_direction: str = "desc",
286
+ fields: Optional[List[str]] = None,
286
287
  ) -> ListDocsResponse:
287
288
  """
288
289
  List documents within this scope.
290
+
291
+ Args:
292
+ fields: Optional list of fields to return for each document (e.g.
293
+ ["metadata"]). Only those fields are read and returned, so the full
294
+ document text is never downloaded. external_id and content_type are
295
+ always included.
289
296
  """
290
297
  effective_folder = self._merge_folders(additional_folders)
291
298
  return self._client._scoped_list_documents(
@@ -301,6 +308,7 @@ class _ScopedClientOps:
301
308
  completed_only=completed_only,
302
309
  sort_by=sort_by,
303
310
  sort_direction=sort_direction,
311
+ fields=fields,
304
312
  )
305
313
 
306
314
  def batch_get_documents(
@@ -1270,6 +1278,7 @@ class Morphik(_ScopedOperationsMixin):
1270
1278
  completed_only: bool = False,
1271
1279
  sort_by: Optional[str] = "updated_at",
1272
1280
  sort_direction: str = "desc",
1281
+ fields: Optional[List[str]] = None,
1273
1282
  ) -> ListDocsResponse:
1274
1283
  """
1275
1284
  List accessible documents.
@@ -1286,6 +1295,9 @@ class Morphik(_ScopedOperationsMixin):
1286
1295
  completed_only: Only return completed documents
1287
1296
  sort_by: Field to sort by (created_at, updated_at, filename, external_id)
1288
1297
  sort_direction: Sort direction (asc, desc)
1298
+ fields: Optional list of fields to return for each document (e.g. ["metadata"]).
1299
+ Only those fields are read and returned, so the full document text is never
1300
+ downloaded. external_id and content_type are always included.
1289
1301
  Returns:
1290
1302
  ListDocsResponse: Response with documents and metadata
1291
1303
 
@@ -1303,6 +1315,7 @@ class Morphik(_ScopedOperationsMixin):
1303
1315
  completed_only=completed_only,
1304
1316
  sort_by=sort_by,
1305
1317
  sort_direction=sort_direction,
1318
+ fields=fields,
1306
1319
  )
1307
1320
 
1308
1321
  def get_document(self, document_id: str) -> Document:
@@ -117,6 +117,39 @@ def test_sync_list_documents_payloads_across_scopes():
117
117
  client.close()
118
118
 
119
119
 
120
+ def test_sync_list_documents_fields_projection():
121
+ client, calls = _make_sync_client()
122
+ try:
123
+ # external_id + content_type are always added so the response parses into a Document;
124
+ # metadata_types is added so typed metadata values are reconstructed, not left as strings.
125
+ client.list_documents(fields=["metadata"])
126
+ assert calls.pop()["data"]["fields"] == ["external_id", "content_type", "metadata", "metadata_types"]
127
+
128
+ # Already-included required fields are not duplicated; order is preserved.
129
+ client.list_documents(fields=["external_id", "filename", "metadata"])
130
+ assert calls.pop()["data"]["fields"] == [
131
+ "external_id",
132
+ "content_type",
133
+ "filename",
134
+ "metadata",
135
+ "metadata_types",
136
+ ]
137
+
138
+ # Nested metadata paths also trigger metadata_types.
139
+ client.list_documents(fields=["metadata.client"])
140
+ assert calls.pop()["data"]["fields"] == ["external_id", "content_type", "metadata.client", "metadata_types"]
141
+
142
+ # Non-metadata projection does not pull metadata_types.
143
+ client.list_documents(fields=["filename"])
144
+ assert calls.pop()["data"]["fields"] == ["external_id", "content_type", "filename"]
145
+
146
+ # No fields -> no projection requested (full documents).
147
+ client.list_documents()
148
+ assert "fields" not in calls.pop()["data"]
149
+ finally:
150
+ client.close()
151
+
152
+
120
153
  def test_async_client_http2_toggle(monkeypatch):
121
154
  captured = []
122
155
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "morphik"
7
- version = "1.2.2"
7
+ version = "1.2.3"
8
8
  authors = [
9
9
  { name = "Morphik", email = "founders@morphik.ai" },
10
10
  ]
File without changes
File without changes
File without changes
File without changes
File without changes