graphiti-core 0.30.0rc1__py3-none-any.whl → 0.30.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/prompts/dedupe_nodes.py +42 -26
- graphiti_core/utils/maintenance/node_operations.py +33 -6
- {graphiti_core-0.30.0rc1.dist-info → graphiti_core-0.30.0rc3.dist-info}/METADATA +1 -1
- {graphiti_core-0.30.0rc1.dist-info → graphiti_core-0.30.0rc3.dist-info}/RECORD +6 -6
- {graphiti_core-0.30.0rc1.dist-info → graphiti_core-0.30.0rc3.dist-info}/WHEEL +0 -0
- {graphiti_core-0.30.0rc1.dist-info → graphiti_core-0.30.0rc3.dist-info}/licenses/LICENSE +0 -0
|
@@ -92,12 +92,23 @@ def node(context: dict[str, Any]) -> list[Message]:
|
|
|
92
92
|
|
|
93
93
|
TASK:
|
|
94
94
|
1. Compare `new_entity` against each item in `existing_entities`.
|
|
95
|
-
2. If it refers to the same real
|
|
96
|
-
3. Let `duplicate_idx` = the
|
|
97
|
-
4. Let `duplicates` = the list of
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
2. If it refers to the same real-world object or concept, collect its index.
|
|
96
|
+
3. Let `duplicate_idx` = the smallest collected index, or -1 if none.
|
|
97
|
+
4. Let `duplicates` = the sorted list of all collected indices (empty list if none).
|
|
98
|
+
|
|
99
|
+
Respond with a JSON object containing an "entity_resolutions" array with a single entry:
|
|
100
|
+
{{
|
|
101
|
+
"entity_resolutions": [
|
|
102
|
+
{{
|
|
103
|
+
"id": integer id from NEW ENTITY,
|
|
104
|
+
"name": the best full name for the entity,
|
|
105
|
+
"duplicate_idx": integer index of the best duplicate in EXISTING ENTITIES, or -1 if none,
|
|
106
|
+
"duplicates": sorted list of all duplicate indices you collected (deduplicate the list, use [] when none)
|
|
107
|
+
}}
|
|
108
|
+
]
|
|
109
|
+
}}
|
|
110
|
+
|
|
111
|
+
Only reference indices that appear in EXISTING ENTITIES, and return [] / -1 when unsure.
|
|
101
112
|
""",
|
|
102
113
|
),
|
|
103
114
|
]
|
|
@@ -126,26 +137,26 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
126
137
|
{{
|
|
127
138
|
id: integer id of the entity,
|
|
128
139
|
name: "name of the entity",
|
|
129
|
-
entity_type: "
|
|
130
|
-
entity_type_description: "Description of what the entity type represents"
|
|
131
|
-
duplication_candidates: [
|
|
132
|
-
{{
|
|
133
|
-
idx: integer index of the candidate entity,
|
|
134
|
-
name: "name of the candidate entity",
|
|
135
|
-
entity_type: "ontological classification of the candidate entity",
|
|
136
|
-
...<additional attributes>
|
|
137
|
-
}}
|
|
138
|
-
]
|
|
140
|
+
entity_type: ["Entity", "<optional additional label>", ...],
|
|
141
|
+
entity_type_description: "Description of what the entity type represents"
|
|
139
142
|
}}
|
|
140
|
-
|
|
143
|
+
|
|
141
144
|
<ENTITIES>
|
|
142
145
|
{to_prompt_json(context['extracted_nodes'], ensure_ascii=context.get('ensure_ascii', True), indent=2)}
|
|
143
146
|
</ENTITIES>
|
|
144
|
-
|
|
147
|
+
|
|
145
148
|
<EXISTING ENTITIES>
|
|
146
149
|
{to_prompt_json(context['existing_nodes'], ensure_ascii=context.get('ensure_ascii', True), indent=2)}
|
|
147
150
|
</EXISTING ENTITIES>
|
|
148
151
|
|
|
152
|
+
Each entry in EXISTING ENTITIES is an object with the following structure:
|
|
153
|
+
{{
|
|
154
|
+
idx: integer index of the candidate entity (use this when referencing a duplicate),
|
|
155
|
+
name: "name of the candidate entity",
|
|
156
|
+
entity_types: ["Entity", "<optional additional label>", ...],
|
|
157
|
+
...<additional attributes such as summaries or metadata>
|
|
158
|
+
}}
|
|
159
|
+
|
|
149
160
|
For each of the above ENTITIES, determine if the entity is a duplicate of any of the EXISTING ENTITIES.
|
|
150
161
|
|
|
151
162
|
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
|
|
@@ -155,14 +166,19 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
155
166
|
- They have similar names or purposes but refer to separate instances or concepts.
|
|
156
167
|
|
|
157
168
|
Task:
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
For
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
169
|
+
Respond with a JSON object that contains an "entity_resolutions" array with one entry for each entity in ENTITIES, ordered by the entity id.
|
|
170
|
+
|
|
171
|
+
For every entity, return an object with the following keys:
|
|
172
|
+
{{
|
|
173
|
+
"id": integer id from ENTITIES,
|
|
174
|
+
"name": the best full name for the entity (preserve the original name unless a duplicate has a more complete name),
|
|
175
|
+
"duplicate_idx": the idx of the EXISTING ENTITY that is the best duplicate match, or -1 if there is no duplicate,
|
|
176
|
+
"duplicates": a sorted list of all idx values from EXISTING ENTITIES that refer to duplicates (deduplicate the list, use [] when none or unsure)
|
|
177
|
+
}}
|
|
178
|
+
|
|
179
|
+
- Only use idx values that appear in EXISTING ENTITIES.
|
|
180
|
+
- Set duplicate_idx to the smallest idx you collected for that entity, or -1 if duplicates is empty.
|
|
181
|
+
- Never fabricate entities or indices.
|
|
166
182
|
""",
|
|
167
183
|
),
|
|
168
184
|
]
|
|
@@ -241,7 +241,11 @@ async def _resolve_with_llm(
|
|
|
241
241
|
previous_episodes: list[EpisodicNode] | None,
|
|
242
242
|
entity_types: dict[str, type[BaseModel]] | None,
|
|
243
243
|
) -> None:
|
|
244
|
-
"""Escalate unresolved nodes to the dedupe prompt so the LLM can select or reject duplicates.
|
|
244
|
+
"""Escalate unresolved nodes to the dedupe prompt so the LLM can select or reject duplicates.
|
|
245
|
+
|
|
246
|
+
The guardrails below defensively ignore malformed or duplicate LLM responses so the
|
|
247
|
+
ingestion workflow remains deterministic even when the model misbehaves.
|
|
248
|
+
"""
|
|
245
249
|
if not state.unresolved_indices:
|
|
246
250
|
return
|
|
247
251
|
|
|
@@ -291,18 +295,41 @@ async def _resolve_with_llm(
|
|
|
291
295
|
|
|
292
296
|
node_resolutions: list[NodeDuplicate] = NodeResolutions(**llm_response).entity_resolutions
|
|
293
297
|
|
|
298
|
+
valid_relative_range = range(len(state.unresolved_indices))
|
|
299
|
+
processed_relative_ids: set[int] = set()
|
|
300
|
+
|
|
294
301
|
for resolution in node_resolutions:
|
|
295
302
|
relative_id: int = resolution.id
|
|
296
303
|
duplicate_idx: int = resolution.duplicate_idx
|
|
297
304
|
|
|
305
|
+
if relative_id not in valid_relative_range:
|
|
306
|
+
logger.warning(
|
|
307
|
+
'Skipping invalid LLM dedupe id %s (unresolved indices: %s)',
|
|
308
|
+
relative_id,
|
|
309
|
+
state.unresolved_indices,
|
|
310
|
+
)
|
|
311
|
+
continue
|
|
312
|
+
|
|
313
|
+
if relative_id in processed_relative_ids:
|
|
314
|
+
logger.warning('Duplicate LLM dedupe id %s received; ignoring.', relative_id)
|
|
315
|
+
continue
|
|
316
|
+
processed_relative_ids.add(relative_id)
|
|
317
|
+
|
|
298
318
|
original_index = state.unresolved_indices[relative_id]
|
|
299
319
|
extracted_node = extracted_nodes[original_index]
|
|
300
320
|
|
|
301
|
-
resolved_node
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
321
|
+
resolved_node: EntityNode
|
|
322
|
+
if duplicate_idx == -1:
|
|
323
|
+
resolved_node = extracted_node
|
|
324
|
+
elif 0 <= duplicate_idx < len(indexes.existing_nodes):
|
|
325
|
+
resolved_node = indexes.existing_nodes[duplicate_idx]
|
|
326
|
+
else:
|
|
327
|
+
logger.warning(
|
|
328
|
+
'Invalid duplicate_idx %s for extracted node %s; treating as no duplicate.',
|
|
329
|
+
duplicate_idx,
|
|
330
|
+
extracted_node.uuid,
|
|
331
|
+
)
|
|
332
|
+
resolved_node = extracted_node
|
|
306
333
|
|
|
307
334
|
state.resolved_nodes[original_index] = resolved_node
|
|
308
335
|
state.uuid_map[extracted_node.uuid] = resolved_node.uuid
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: graphiti-core
|
|
3
|
-
Version: 0.30.
|
|
3
|
+
Version: 0.30.0rc3
|
|
4
4
|
Summary: A temporal graph building library
|
|
5
5
|
Project-URL: Homepage, https://help.getzep.com/graphiti/graphiti/overview
|
|
6
6
|
Project-URL: Repository, https://github.com/getzep/graphiti
|
|
@@ -44,7 +44,7 @@ graphiti_core/models/nodes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
|
44
44
|
graphiti_core/models/nodes/node_db_queries.py,sha256=TCHZKG5bQNarV9C5k4hOFFqc-LwTVQ8Pnd6okVVNKbo,12826
|
|
45
45
|
graphiti_core/prompts/__init__.py,sha256=EA-x9xUki9l8wnu2l8ek_oNf75-do5tq5hVq7Zbv8Kw,101
|
|
46
46
|
graphiti_core/prompts/dedupe_edges.py,sha256=WRXQi7JQZdIfKDICWyU7Wbs5WyD_KBblLBSeKdbLyuk,5914
|
|
47
|
-
graphiti_core/prompts/dedupe_nodes.py,sha256=
|
|
47
|
+
graphiti_core/prompts/dedupe_nodes.py,sha256=H4sIzpi1gBwPedTMhdY175jnLj5JtnEeb_WNITitPLU,9171
|
|
48
48
|
graphiti_core/prompts/eval.py,sha256=ijwxbE87G678imdhfPvRujepQMq_JZ3XHX4vOAcVnVI,5507
|
|
49
49
|
graphiti_core/prompts/extract_edge_dates.py,sha256=3Drs3CmvP0gJN5BidWSxrNvLet3HPoTybU3BUIAoc0Y,4218
|
|
50
50
|
graphiti_core/prompts/extract_edges.py,sha256=mnncxb6lyr3ufKajRAh09czmJawiEM54sSPNy9ukiio,6888
|
|
@@ -71,11 +71,11 @@ graphiti_core/utils/maintenance/community_operations.py,sha256=XMiokEemn96GlvjkO
|
|
|
71
71
|
graphiti_core/utils/maintenance/dedup_helpers.py,sha256=B7k6KkB6Sii8PZCWNNTvsNiy4BNTNWpoLeGgrPLq6BE,9220
|
|
72
72
|
graphiti_core/utils/maintenance/edge_operations.py,sha256=fvWKJWzz4_d2Y8bOfZFjJpLnGmsFwnrutFW25LX-S08,21287
|
|
73
73
|
graphiti_core/utils/maintenance/graph_data_operations.py,sha256=42icj3S_ELAJ-NK3jVS_rg_243dmnaZOyUitJj_uJ-M,6085
|
|
74
|
-
graphiti_core/utils/maintenance/node_operations.py,sha256=
|
|
74
|
+
graphiti_core/utils/maintenance/node_operations.py,sha256=TKpXPtnTVxxan8I1xQyVkGn3zyRdb_Q00cgUpLcloig,16860
|
|
75
75
|
graphiti_core/utils/maintenance/temporal_operations.py,sha256=IIaVtShpVkOYe6haxz3a1x3v54-MzaEXG8VsxFUNeoY,3582
|
|
76
76
|
graphiti_core/utils/maintenance/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
graphiti_core/utils/ontology_utils/entity_types_utils.py,sha256=4eVgxLWY6Q8k9cRJ5pW59IYF--U4nXZsZIGOVb_yHfQ,1285
|
|
78
|
-
graphiti_core-0.30.
|
|
79
|
-
graphiti_core-0.30.
|
|
80
|
-
graphiti_core-0.30.
|
|
81
|
-
graphiti_core-0.30.
|
|
78
|
+
graphiti_core-0.30.0rc3.dist-info/METADATA,sha256=y9D976_UrZVxaSYxn8yEVDHl9bTHk1d9fcWoffuDO7Y,26933
|
|
79
|
+
graphiti_core-0.30.0rc3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
80
|
+
graphiti_core-0.30.0rc3.dist-info/licenses/LICENSE,sha256=KCUwCyDXuVEgmDWkozHyniRyWjnWUWjkuDHfU6o3JlA,11325
|
|
81
|
+
graphiti_core-0.30.0rc3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|