morphik 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- morphik/__init__.py +1 -1
- morphik/_internal.py +507 -0
- morphik/async_.py +1174 -402
- morphik/models.py +38 -25
- morphik/sync.py +1259 -371
- {morphik-0.1.0.dist-info → morphik-0.1.2.dist-info}/METADATA +1 -1
- morphik-0.1.2.dist-info/RECORD +10 -0
- morphik-0.1.0.dist-info/RECORD +0 -9
- {morphik-0.1.0.dist-info → morphik-0.1.2.dist-info}/WHEEL +0 -0
morphik/models.py
CHANGED
@@ -21,10 +21,10 @@ class Document(BaseModel):
|
|
21
21
|
default_factory=dict, description="Access control information"
|
22
22
|
)
|
23
23
|
chunk_ids: List[str] = Field(default_factory=list, description="IDs of document chunks")
|
24
|
-
|
24
|
+
|
25
25
|
# Client reference for update methods
|
26
26
|
_client = None
|
27
|
-
|
27
|
+
|
28
28
|
def update_with_text(
|
29
29
|
self,
|
30
30
|
content: str,
|
@@ -36,7 +36,7 @@ class Document(BaseModel):
|
|
36
36
|
) -> "Document":
|
37
37
|
"""
|
38
38
|
Update this document with new text content using the specified strategy.
|
39
|
-
|
39
|
+
|
40
40
|
Args:
|
41
41
|
content: The new content to add
|
42
42
|
filename: Optional new filename for the document
|
@@ -44,13 +44,15 @@ class Document(BaseModel):
|
|
44
44
|
rules: Optional list of rules to apply to the content
|
45
45
|
update_strategy: Strategy for updating the document (currently only 'add' is supported)
|
46
46
|
use_colpali: Whether to use multi-vector embedding
|
47
|
-
|
47
|
+
|
48
48
|
Returns:
|
49
49
|
Document: Updated document metadata
|
50
50
|
"""
|
51
51
|
if self._client is None:
|
52
|
-
raise ValueError(
|
53
|
-
|
52
|
+
raise ValueError(
|
53
|
+
"Document instance not connected to a client. Use a document returned from a Morphik client method."
|
54
|
+
)
|
55
|
+
|
54
56
|
return self._client.update_document_with_text(
|
55
57
|
document_id=self.external_id,
|
56
58
|
content=content,
|
@@ -58,9 +60,9 @@ class Document(BaseModel):
|
|
58
60
|
metadata=metadata,
|
59
61
|
rules=rules,
|
60
62
|
update_strategy=update_strategy,
|
61
|
-
use_colpali=use_colpali
|
63
|
+
use_colpali=use_colpali,
|
62
64
|
)
|
63
|
-
|
65
|
+
|
64
66
|
def update_with_file(
|
65
67
|
self,
|
66
68
|
file: "Union[str, bytes, BinaryIO, Path]",
|
@@ -72,7 +74,7 @@ class Document(BaseModel):
|
|
72
74
|
) -> "Document":
|
73
75
|
"""
|
74
76
|
Update this document with content from a file using the specified strategy.
|
75
|
-
|
77
|
+
|
76
78
|
Args:
|
77
79
|
file: File to add (path string, bytes, file object, or Path)
|
78
80
|
filename: Name of the file
|
@@ -80,13 +82,15 @@ class Document(BaseModel):
|
|
80
82
|
rules: Optional list of rules to apply to the content
|
81
83
|
update_strategy: Strategy for updating the document (currently only 'add' is supported)
|
82
84
|
use_colpali: Whether to use multi-vector embedding
|
83
|
-
|
85
|
+
|
84
86
|
Returns:
|
85
87
|
Document: Updated document metadata
|
86
88
|
"""
|
87
89
|
if self._client is None:
|
88
|
-
raise ValueError(
|
89
|
-
|
90
|
+
raise ValueError(
|
91
|
+
"Document instance not connected to a client. Use a document returned from a Morphik client method."
|
92
|
+
)
|
93
|
+
|
90
94
|
return self._client.update_document_with_file(
|
91
95
|
document_id=self.external_id,
|
92
96
|
file=file,
|
@@ -94,28 +98,29 @@ class Document(BaseModel):
|
|
94
98
|
metadata=metadata,
|
95
99
|
rules=rules,
|
96
100
|
update_strategy=update_strategy,
|
97
|
-
use_colpali=use_colpali
|
101
|
+
use_colpali=use_colpali,
|
98
102
|
)
|
99
|
-
|
103
|
+
|
100
104
|
def update_metadata(
|
101
105
|
self,
|
102
106
|
metadata: Dict[str, Any],
|
103
107
|
) -> "Document":
|
104
108
|
"""
|
105
109
|
Update this document's metadata only.
|
106
|
-
|
110
|
+
|
107
111
|
Args:
|
108
112
|
metadata: Metadata to update
|
109
|
-
|
113
|
+
|
110
114
|
Returns:
|
111
115
|
Document: Updated document metadata
|
112
116
|
"""
|
113
117
|
if self._client is None:
|
114
|
-
raise ValueError(
|
115
|
-
|
118
|
+
raise ValueError(
|
119
|
+
"Document instance not connected to a client. Use a document returned from a Morphik client method."
|
120
|
+
)
|
121
|
+
|
116
122
|
return self._client.update_document_metadata(
|
117
|
-
document_id=self.external_id,
|
118
|
-
metadata=metadata
|
123
|
+
document_id=self.external_id, metadata=metadata
|
119
124
|
)
|
120
125
|
|
121
126
|
|
@@ -159,7 +164,7 @@ class DocumentResult(BaseModel):
|
|
159
164
|
|
160
165
|
class ChunkSource(BaseModel):
|
161
166
|
"""Source information for a chunk used in completion"""
|
162
|
-
|
167
|
+
|
163
168
|
document_id: str = Field(..., description="ID of the source document")
|
164
169
|
chunk_number: int = Field(..., description="Chunk number within the document")
|
165
170
|
score: Optional[float] = Field(None, description="Relevance score")
|
@@ -194,7 +199,9 @@ class Entity(BaseModel):
|
|
194
199
|
type: str = Field(..., description="Entity type")
|
195
200
|
properties: Dict[str, Any] = Field(default_factory=dict, description="Entity properties")
|
196
201
|
document_ids: List[str] = Field(default_factory=list, description="Source document IDs")
|
197
|
-
chunk_sources: Dict[str, List[int]] = Field(
|
202
|
+
chunk_sources: Dict[str, List[int]] = Field(
|
203
|
+
default_factory=dict, description="Source chunk numbers by document ID"
|
204
|
+
)
|
198
205
|
|
199
206
|
def __hash__(self):
|
200
207
|
return hash(self.id)
|
@@ -213,7 +220,9 @@ class Relationship(BaseModel):
|
|
213
220
|
target_id: str = Field(..., description="Target entity ID")
|
214
221
|
type: str = Field(..., description="Relationship type")
|
215
222
|
document_ids: List[str] = Field(default_factory=list, description="Source document IDs")
|
216
|
-
chunk_sources: Dict[str, List[int]] = Field(
|
223
|
+
chunk_sources: Dict[str, List[int]] = Field(
|
224
|
+
default_factory=dict, description="Source chunk numbers by document ID"
|
225
|
+
)
|
217
226
|
|
218
227
|
def __hash__(self):
|
219
228
|
return hash(self.id)
|
@@ -230,10 +239,14 @@ class Graph(BaseModel):
|
|
230
239
|
id: str = Field(..., description="Unique graph identifier")
|
231
240
|
name: str = Field(..., description="Graph name")
|
232
241
|
entities: List[Entity] = Field(default_factory=list, description="Entities in the graph")
|
233
|
-
relationships: List[Relationship] = Field(
|
242
|
+
relationships: List[Relationship] = Field(
|
243
|
+
default_factory=list, description="Relationships in the graph"
|
244
|
+
)
|
234
245
|
metadata: Dict[str, Any] = Field(default_factory=dict, description="Graph metadata")
|
235
246
|
document_ids: List[str] = Field(default_factory=list, description="Source document IDs")
|
236
|
-
filters: Optional[Dict[str, Any]] = Field(
|
247
|
+
filters: Optional[Dict[str, Any]] = Field(
|
248
|
+
None, description="Document filters used to create the graph"
|
249
|
+
)
|
237
250
|
created_at: datetime = Field(..., description="Creation timestamp")
|
238
251
|
updated_at: datetime = Field(..., description="Last update timestamp")
|
239
252
|
owner: Dict[str, str] = Field(default_factory=dict, description="Graph owner information")
|