morphik 1.1.2__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {morphik-1.1.2 → morphik-1.2.0}/.gitignore +2 -2
  2. {morphik-1.1.2 → morphik-1.2.0}/PKG-INFO +2 -3
  3. {morphik-1.1.2 → morphik-1.2.0}/README.md +1 -2
  4. {morphik-1.1.2 → morphik-1.2.0}/morphik/__init__.py +1 -1
  5. {morphik-1.1.2 → morphik-1.2.0}/morphik/_internal.py +0 -97
  6. {morphik-1.1.2 → morphik-1.2.0}/morphik/_scoped_ops.py +1 -13
  7. morphik-1.2.0/morphik/_shared.py +182 -0
  8. {morphik-1.1.2 → morphik-1.2.0}/morphik/async_.py +455 -1264
  9. {morphik-1.1.2 → morphik-1.2.0}/morphik/models.py +67 -143
  10. {morphik-1.1.2 → morphik-1.2.0}/morphik/sync.py +492 -1323
  11. morphik-1.2.0/morphik/tests/test_app_ops.py +170 -0
  12. {morphik-1.1.2 → morphik-1.2.0}/morphik/tests/test_async.py +5 -5
  13. {morphik-1.1.2 → morphik-1.2.0}/morphik/tests/test_scoped_ops_unit.py +299 -0
  14. morphik-1.2.0/morphik/tests/test_shared_helpers.py +135 -0
  15. {morphik-1.1.2 → morphik-1.2.0}/morphik/tests/test_sync.py +6 -6
  16. morphik-1.2.0/morphik/tests/test_update_document_metadata_rename.py +128 -0
  17. {morphik-1.1.2 → morphik-1.2.0}/pyproject.toml +1 -1
  18. morphik-1.1.2/morphik/rules.py +0 -85
  19. {morphik-1.1.2 → morphik-1.2.0}/morphik/exceptions.py +0 -0
  20. {morphik-1.1.2 → morphik-1.2.0}/morphik/tests/README.md +0 -0
  21. {morphik-1.1.2 → morphik-1.2.0}/morphik/tests/__init__.py +0 -0
  22. {morphik-1.1.2 → morphik-1.2.0}/morphik/tests/example_usage.py +0 -0
  23. {morphik-1.1.2 → morphik-1.2.0}/morphik/tests/test_docs/sample1.txt +0 -0
  24. {morphik-1.1.2 → morphik-1.2.0}/morphik/tests/test_docs/sample2.txt +0 -0
  25. {morphik-1.1.2 → morphik-1.2.0}/morphik/tests/test_docs/sample3.txt +0 -0
@@ -58,5 +58,5 @@ multi_vector_embeddings_*.json
58
58
  # Rust build artifacts
59
59
  morphik_rust/target/
60
60
  morphik_rust/Cargo.lock
61
- testkb_morphik/*
62
- DOCS_BY_FACILITIES/*
61
+ DOCS_BY_FACILITIES/
62
+ eval_freshworks/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: morphik
3
- Version: 1.1.2
3
+ Version: 1.2.0
4
4
  Summary: Morphik Python Client
5
5
  Author-email: Morphik <founders@morphik.ai>
6
6
  Requires-Python: >=3.8
@@ -101,7 +101,7 @@ chunks = folder.retrieve_chunks(query="design notes", folder_depth=-1)
101
101
  docs = db.list_documents(folder_name="/projects/alpha", folder_depth=-1)
102
102
  ```
103
103
 
104
- `Folder.full_path` is exposed on folder objects, and `Document.folder_path` / `Graph.folder_path` mirror server responses for tracing scope.
104
+ `Folder.full_path` is exposed on folder objects, and `Document.folder_path` mirrors server responses for tracing scope.
105
105
 
106
106
  ### Asynchronous Usage
107
107
 
@@ -145,7 +145,6 @@ asyncio.run(main())
145
145
  - Semantic search and retrieval
146
146
  - Retrieval-augmented generation (RAG)
147
147
  - Morphik On-the-Fly document querying with optional ingestion follow-up
148
- - Knowledge graph creation and querying
149
148
  - Multi-user and multi-folder scoping
150
149
  - Metadata filtering
151
150
  - Document management
@@ -88,7 +88,7 @@ chunks = folder.retrieve_chunks(query="design notes", folder_depth=-1)
88
88
  docs = db.list_documents(folder_name="/projects/alpha", folder_depth=-1)
89
89
  ```
90
90
 
91
- `Folder.full_path` is exposed on folder objects, and `Document.folder_path` / `Graph.folder_path` mirror server responses for tracing scope.
91
+ `Folder.full_path` is exposed on folder objects, and `Document.folder_path` mirrors server responses for tracing scope.
92
92
 
93
93
  ### Asynchronous Usage
94
94
 
@@ -132,7 +132,6 @@ asyncio.run(main())
132
132
  - Semantic search and retrieval
133
133
  - Retrieval-augmented generation (RAG)
134
134
  - Morphik On-the-Fly document querying with optional ingestion follow-up
135
- - Knowledge graph creation and querying
136
135
  - Multi-user and multi-folder scoping
137
136
  - Metadata filtering
138
137
  - Document management
@@ -14,4 +14,4 @@ __all__ = [
14
14
  "DocumentQueryResponse",
15
15
  ]
16
16
 
17
- __version__ = "1.1.2"
17
+ __version__ = "1.2.0"
@@ -1,7 +1,6 @@
1
1
  import base64
2
2
  import io
3
3
  import json
4
- import warnings
5
4
  from datetime import date, datetime
6
5
  from decimal import Decimal
7
6
  from io import BytesIO
@@ -21,15 +20,9 @@ from .models import (
21
20
  Document,
22
21
  DocumentQueryResponse,
23
22
  DocumentResult,
24
- Graph,
25
- GraphPromptOverrides,
26
23
  IngestTextRequest,
27
24
  QueryPromptOverrides,
28
25
  )
29
- from .rules import Rule
30
-
31
- # Type alias for rules
32
- RuleOrDict = Union[Rule, Dict[str, Any]]
33
26
 
34
27
 
35
28
  class FinalChunkResult(BaseModel):
@@ -81,21 +74,6 @@ class _MorphikClientLogic:
81
74
  # Basic token validation
82
75
  jwt.decode(self._auth_token, options={"verify_signature": False})
83
76
 
84
- def _convert_rule(self, rule: RuleOrDict) -> Dict[str, Any]:
85
- """Convert a rule to a dictionary format"""
86
- if hasattr(rule, "to_dict"):
87
- return rule.to_dict()
88
- return rule
89
-
90
- def _warn_legacy_rules(self, rules: Optional[List[RuleOrDict]], context: str) -> None:
91
- """Emit a deprecation warning when legacy rules are supplied."""
92
- if rules:
93
- warnings.warn(
94
- f"'rules' support has been removed; payload supplied to {context} is ignored.",
95
- DeprecationWarning,
96
- stacklevel=3,
97
- )
98
-
99
77
  def _get_url(self, endpoint: str) -> str:
100
78
  """Get the full URL for an API endpoint"""
101
79
  return f"{self._base_url}/{endpoint.lstrip('/')}"
@@ -112,13 +90,11 @@ class _MorphikClientLogic:
112
90
  content: str,
113
91
  filename: Optional[str],
114
92
  metadata: Optional[Dict[str, Any]],
115
- rules: Optional[List[RuleOrDict]],
116
93
  use_colpali: bool,
117
94
  folder_name: Optional[str],
118
95
  end_user_id: Optional[str],
119
96
  ) -> Dict[str, Any]:
120
97
  """Prepare request for ingest_text endpoint"""
121
- self._warn_legacy_rules(rules, "ingest/text")
122
98
  serialized_metadata, metadata_types_map = self._serialize_metadata_map(metadata)
123
99
  payload = {
124
100
  "content": content,
@@ -185,7 +161,6 @@ class _MorphikClientLogic:
185
161
  def _prepare_ingest_file_form_data(
186
162
  self,
187
163
  metadata: Optional[Dict[str, Any]],
188
- rules: Optional[List[RuleOrDict]],
189
164
  folder_name: Optional[str],
190
165
  end_user_id: Optional[str],
191
166
  use_colpali: Optional[bool] = None,
@@ -196,7 +171,6 @@ class _MorphikClientLogic:
196
171
  never relies on query-string values. *use_colpali* is therefore always
197
172
  embedded here when provided.
198
173
  """
199
- self._warn_legacy_rules(rules, "ingest/file")
200
174
  serialized_metadata, metadata_types_map = self._serialize_metadata_map(metadata)
201
175
  form_data = {
202
176
  "metadata": json.dumps(serialized_metadata),
@@ -219,15 +193,12 @@ class _MorphikClientLogic:
219
193
  def _prepare_ingest_files_form_data(
220
194
  self,
221
195
  metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]],
222
- rules: Optional[List[RuleOrDict]],
223
196
  use_colpali: bool,
224
197
  parallel: bool,
225
198
  folder_name: Optional[str],
226
199
  end_user_id: Optional[str],
227
200
  ) -> Dict[str, Any]:
228
201
  """Prepare form data for ingest_files endpoint"""
229
- self._warn_legacy_rules(rules, "ingest/files")
230
-
231
202
  serialized_metadata, metadata_types_payload = self._serialize_metadata_collection(metadata)
232
203
 
233
204
  data = {
@@ -290,9 +261,6 @@ class _MorphikClientLogic:
290
261
  max_tokens: Optional[int],
291
262
  temperature: Optional[float],
292
263
  use_colpali: bool,
293
- graph_name: Optional[str],
294
- hop_depth: int,
295
- include_paths: bool,
296
264
  prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]],
297
265
  folder_name: Optional[Union[str, List[str]]],
298
266
  folder_depth: Optional[int],
@@ -317,9 +285,6 @@ class _MorphikClientLogic:
317
285
  "temperature": temperature,
318
286
  "use_colpali": use_colpali,
319
287
  "use_reranking": use_reranking, # Add to payload
320
- "graph_name": graph_name,
321
- "hop_depth": hop_depth,
322
- "include_paths": include_paths,
323
288
  "prompt_overrides": prompt_overrides,
324
289
  }
325
290
  if folder_name:
@@ -506,69 +471,15 @@ class _MorphikClientLogic:
506
471
  request["output_format"] = output_format
507
472
  return request
508
473
 
509
- def _prepare_create_graph_request(
510
- self,
511
- name: str,
512
- filters: Optional[Dict[str, Any]],
513
- documents: Optional[List[str]],
514
- prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]],
515
- folder_name: Optional[Union[str, List[str]]],
516
- end_user_id: Optional[str],
517
- ) -> Dict[str, Any]:
518
- """Prepare request for create_graph endpoint"""
519
- # Convert prompt_overrides to dict if it's a model
520
- if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
521
- prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
522
-
523
- request = {
524
- "name": name,
525
- "filters": filters,
526
- "documents": documents,
527
- "prompt_overrides": prompt_overrides,
528
- }
529
- if folder_name:
530
- request["folder_name"] = folder_name
531
- if end_user_id:
532
- request["end_user_id"] = end_user_id
533
- return request
534
-
535
- def _prepare_update_graph_request(
536
- self,
537
- name: str,
538
- additional_filters: Optional[Dict[str, Any]],
539
- additional_documents: Optional[List[str]],
540
- prompt_overrides: Optional[Union[GraphPromptOverrides, Dict[str, Any]]],
541
- folder_name: Optional[Union[str, List[str]]],
542
- end_user_id: Optional[str],
543
- ) -> Dict[str, Any]:
544
- """Prepare request for update_graph endpoint"""
545
- # Convert prompt_overrides to dict if it's a model
546
- if prompt_overrides and isinstance(prompt_overrides, GraphPromptOverrides):
547
- prompt_overrides = prompt_overrides.model_dump(exclude_none=True)
548
-
549
- request = {
550
- "additional_filters": additional_filters,
551
- "additional_documents": additional_documents,
552
- "prompt_overrides": prompt_overrides,
553
- }
554
- if folder_name:
555
- request["folder_name"] = folder_name
556
- if end_user_id:
557
- request["end_user_id"] = end_user_id
558
- return request
559
-
560
474
  def _prepare_update_document_with_text_request(
561
475
  self,
562
476
  document_id: str,
563
477
  content: str,
564
478
  filename: Optional[str],
565
479
  metadata: Optional[Dict[str, Any]],
566
- rules: Optional[List],
567
480
  use_colpali: Optional[bool],
568
481
  ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
569
482
  """Prepare request for update_document_with_text endpoint"""
570
- self._warn_legacy_rules(rules, "documents/update_text")
571
-
572
483
  serialized_metadata, metadata_types_map = self._serialize_metadata_map(metadata)
573
484
  request = IngestTextRequest(
574
485
  content=content,
@@ -721,14 +632,6 @@ class _MorphikClientLogic:
721
632
 
722
633
  return final_chunks
723
634
 
724
- def _parse_graph_response(self, response_json: Dict[str, Any]) -> Graph:
725
- """Parse graph response"""
726
- return Graph(**response_json)
727
-
728
- def _parse_graph_list_response(self, response_json: List[Dict[str, Any]]) -> List[Graph]:
729
- """Parse graph list response"""
730
- return [Graph(**graph) for graph in response_json]
731
-
732
635
  def _parse_document_query_response(self, response_json: Dict[str, Any]) -> DocumentQueryResponse:
733
636
  """Parse document query response."""
734
637
  payload = dict(response_json)
@@ -5,7 +5,7 @@ from typing import Any, Callable, Dict, List, Optional, Type, TypeVar, Union
5
5
 
6
6
  from pydantic import BaseModel
7
7
 
8
- from ._internal import FinalChunkResult, RuleOrDict
8
+ from ._internal import FinalChunkResult
9
9
 
10
10
  T = TypeVar("T")
11
11
 
@@ -39,7 +39,6 @@ class _ScopedOperationsMixin:
39
39
  content: str,
40
40
  filename: Optional[str],
41
41
  metadata: Optional[Dict[str, Any]],
42
- rules: Optional[List[RuleOrDict]],
43
42
  use_colpali: bool,
44
43
  folder_name: Optional[str],
45
44
  end_user_id: Optional[str],
@@ -48,7 +47,6 @@ class _ScopedOperationsMixin:
48
47
  content,
49
48
  filename,
50
49
  metadata,
51
- rules,
52
50
  use_colpali,
53
51
  folder_name,
54
52
  end_user_id,
@@ -67,7 +65,6 @@ class _ScopedOperationsMixin:
67
65
  file: Union[str, bytes, BytesIO, IOBase, Path],
68
66
  filename: Optional[str],
69
67
  metadata: Optional[Dict[str, Any]],
70
- rules: Optional[List[RuleOrDict]],
71
68
  use_colpali: bool,
72
69
  folder_name: Optional[str],
73
70
  end_user_id: Optional[str],
@@ -80,7 +77,6 @@ class _ScopedOperationsMixin:
80
77
 
81
78
  form_data = self._logic._prepare_ingest_file_form_data(
82
79
  metadata,
83
- rules,
84
80
  folder_name,
85
81
  end_user_id,
86
82
  use_colpali,
@@ -100,7 +96,6 @@ class _ScopedOperationsMixin:
100
96
  *,
101
97
  files: List[Union[str, bytes, BytesIO, IOBase, Path]],
102
98
  metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]],
103
- rules: Optional[List[RuleOrDict]],
104
99
  use_colpali: bool,
105
100
  parallel: bool,
106
101
  folder_name: Optional[str],
@@ -115,7 +110,6 @@ class _ScopedOperationsMixin:
115
110
 
116
111
  form_data = self._logic._prepare_ingest_files_form_data(
117
112
  metadata,
118
- rules,
119
113
  use_colpali,
120
114
  parallel,
121
115
  folder_name,
@@ -222,9 +216,6 @@ class _ScopedOperationsMixin:
222
216
  max_tokens: Optional[int],
223
217
  temperature: Optional[float],
224
218
  use_colpali: bool,
225
- graph_name: Optional[str],
226
- hop_depth: int,
227
- include_paths: bool,
228
219
  prompt_overrides: Optional[Dict[str, Any]],
229
220
  folder_name: Optional[Union[str, List[str]]],
230
221
  folder_depth: Optional[int],
@@ -243,9 +234,6 @@ class _ScopedOperationsMixin:
243
234
  max_tokens,
244
235
  temperature,
245
236
  use_colpali,
246
- graph_name,
247
- hop_depth,
248
- include_paths,
249
237
  prompt_overrides,
250
238
  folder_name,
251
239
  folder_depth,
@@ -0,0 +1,182 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Iterable, List, Optional, Union
6
+
7
+ from pydantic import BaseModel
8
+
9
+ MAX_LIMIT = 500
10
+ MIN_LOG_HOURS = 0.1
11
+ MAX_LOG_HOURS = 168.0
12
+
13
+
14
+ def merge_folders(
15
+ base: Optional[Union[str, List[str]]],
16
+ additional: Optional[List[str]],
17
+ ) -> Optional[Union[str, List[str]]]:
18
+ if not additional:
19
+ return base
20
+ if base:
21
+ if isinstance(base, list):
22
+ return base + additional
23
+ return [base] + additional
24
+ return additional
25
+
26
+
27
+ def collect_directory_files(directory: Union[str, Path], recursive: bool, pattern: str) -> List[Path]:
28
+ dir_path = Path(directory)
29
+ if not dir_path.is_dir():
30
+ raise ValueError(f"Directory not found: {dir_path}")
31
+
32
+ files = list(dir_path.rglob(pattern) if recursive else dir_path.glob(pattern))
33
+ return [f for f in files if f.is_file()]
34
+
35
+
36
+ def normalize_limit_offset(limit: int, offset: int) -> Dict[str, int]:
37
+ return {
38
+ "limit": max(1, min(limit, MAX_LIMIT)),
39
+ "offset": max(0, offset),
40
+ }
41
+
42
+
43
+ def normalize_filter_param(value: Optional[Union[str, Dict[str, Any], List[Any]]]) -> Optional[str]:
44
+ if value is None:
45
+ return None
46
+ if isinstance(value, str):
47
+ return value
48
+ return json.dumps(value)
49
+
50
+
51
+ def build_list_apps_params(
52
+ *,
53
+ org_id: Optional[str],
54
+ user_id: Optional[str],
55
+ app_id_filter: Optional[Union[str, Dict[str, Any], List[Any]]],
56
+ app_name_filter: Optional[Union[str, Dict[str, Any], List[Any]]],
57
+ limit: int,
58
+ offset: int,
59
+ ) -> Dict[str, Any]:
60
+ params: Dict[str, Any] = normalize_limit_offset(limit, offset)
61
+ if org_id:
62
+ params["org_id"] = org_id
63
+ if user_id:
64
+ params["user_id"] = user_id
65
+ if app_id_filter is not None:
66
+ params["app_id_filter"] = normalize_filter_param(app_id_filter)
67
+ if app_name_filter is not None:
68
+ params["app_name_filter"] = normalize_filter_param(app_name_filter)
69
+ return params
70
+
71
+
72
+ def build_rename_app_params(*, new_name: str, app_id: Optional[str], app_name: Optional[str]) -> Dict[str, Any]:
73
+ if not app_id and not app_name:
74
+ raise ValueError("app_id or app_name is required to rename an app")
75
+ params: Dict[str, Any] = {"new_name": new_name}
76
+ if app_id:
77
+ params["app_id"] = app_id
78
+ if app_name:
79
+ params["app_name"] = app_name
80
+ return params
81
+
82
+
83
+ def build_rotate_app_params(
84
+ *, app_id: Optional[str], app_name: Optional[str], expiry_days: Optional[int]
85
+ ) -> Dict[str, Any]:
86
+ if not app_id and not app_name:
87
+ raise ValueError("app_id or app_name is required to rotate an app token")
88
+ params: Dict[str, Any] = {}
89
+ if app_id:
90
+ params["app_id"] = app_id
91
+ if app_name:
92
+ params["app_name"] = app_name
93
+ if expiry_days is not None:
94
+ params["expiry_days"] = expiry_days
95
+ return params
96
+
97
+
98
+ def build_create_app_payload(
99
+ *,
100
+ name: str,
101
+ app_id: Optional[str],
102
+ user_id: Optional[str],
103
+ expiry_days: Optional[int],
104
+ org_id: Optional[str],
105
+ created_by_user_id: Optional[str],
106
+ ) -> Dict[str, Any]:
107
+ payload: Dict[str, Any] = {"name": name}
108
+ if app_id is not None:
109
+ payload["app_id"] = app_id
110
+ if user_id is not None:
111
+ payload["user_id"] = user_id
112
+ if expiry_days is not None:
113
+ payload["expiry_days"] = expiry_days
114
+ if org_id is not None:
115
+ payload["org_id"] = org_id
116
+ if created_by_user_id is not None:
117
+ payload["created_by_user_id"] = created_by_user_id
118
+ return payload
119
+
120
+
121
+ def build_requeue_payload(
122
+ *,
123
+ jobs: Optional[Iterable[Union[BaseModel, Dict[str, Any]]]],
124
+ include_all: bool,
125
+ statuses: Optional[List[str]],
126
+ limit: Optional[int],
127
+ ) -> Dict[str, Any]:
128
+ jobs_list = list(jobs) if jobs is not None else None
129
+ if not include_all and not jobs_list:
130
+ raise ValueError("jobs or include_all must be provided for requeue")
131
+ payload: Dict[str, Any] = {}
132
+ if jobs_list:
133
+ payload["jobs"] = [job.model_dump() if isinstance(job, BaseModel) else job for job in jobs_list]
134
+ if include_all:
135
+ payload["include_all"] = True
136
+ if statuses is not None:
137
+ payload["statuses"] = statuses
138
+ if limit is not None:
139
+ payload["limit"] = limit
140
+ return payload
141
+
142
+
143
+ def build_logs_params(
144
+ *, limit: int, hours: float, op_type: Optional[str], status: Optional[str]
145
+ ) -> Dict[str, Any]:
146
+ params: Dict[str, Any] = {
147
+ "limit": max(1, min(limit, MAX_LIMIT)),
148
+ "hours": max(MIN_LOG_HOURS, min(hours, MAX_LOG_HOURS)),
149
+ }
150
+ if op_type is not None:
151
+ params["op_type"] = op_type
152
+ if status is not None:
153
+ params["status"] = status
154
+ return params
155
+
156
+
157
+ def build_document_by_filename_params(
158
+ *, folder_name: Optional[Union[str, List[str]]], folder_depth: Optional[int], end_user_id: Optional[str]
159
+ ) -> Dict[str, Any]:
160
+ params: Dict[str, Any] = {}
161
+ if folder_name is not None:
162
+ params["folder_name"] = folder_name
163
+ if folder_depth is not None:
164
+ params["folder_depth"] = folder_depth
165
+ if end_user_id is not None:
166
+ params["end_user_id"] = end_user_id
167
+ return params
168
+
169
+
170
+ def normalize_additional_folders(
171
+ additional_folders: Optional[List[str]],
172
+ folder_name: Optional[Union[str, List[str]]],
173
+ ) -> Optional[List[str]]:
174
+ if folder_name is None:
175
+ return additional_folders
176
+ if isinstance(folder_name, str):
177
+ folder_list = [folder_name]
178
+ else:
179
+ folder_list = list(folder_name)
180
+ if additional_folders:
181
+ return list(additional_folders) + folder_list
182
+ return folder_list