graphiti-core 0.21.0rc6__tar.gz → 0.30.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

Files changed (192) hide show
  1. graphiti_core-0.30.0rc1/AGENTS.md +21 -0
  2. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/Makefile +2 -2
  3. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/PKG-INFO +1 -1
  4. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/docker-compose.test.yml +1 -1
  5. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/bulk_utils.py +126 -60
  6. graphiti_core-0.30.0rc1/graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
  7. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/edge_operations.py +14 -0
  8. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/node_operations.py +141 -61
  9. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/pyproject.toml +1 -1
  10. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_edge_int.py +1 -0
  11. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_node_int.py +2 -0
  12. graphiti_core-0.30.0rc1/tests/utils/maintenance/test_bulk_utils.py +232 -0
  13. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/utils/maintenance/test_edge_operations.py +50 -0
  14. graphiti_core-0.30.0rc1/tests/utils/maintenance/test_node_operations.py +345 -0
  15. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/uv.lock +2 -2
  16. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.env.example +0 -0
  17. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  18. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/dependabot.yml +0 -0
  19. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/pull_request_template.md +0 -0
  20. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/secret_scanning.yml +0 -0
  21. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/ai-moderator.yml +0 -0
  22. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/cla.yml +0 -0
  23. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/claude-code-review.yml +0 -0
  24. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/claude.yml +0 -0
  25. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/codeql.yml +0 -0
  26. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/lint.yml +0 -0
  27. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/mcp-server-docker.yml +0 -0
  28. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/release-graphiti-core.yml +0 -0
  29. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/typecheck.yml +0 -0
  30. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/unit_tests.yml +0 -0
  31. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.gitignore +0 -0
  32. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/CLAUDE.md +0 -0
  33. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/CODE_OF_CONDUCT.md +0 -0
  34. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/CONTRIBUTING.md +0 -0
  35. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/Dockerfile +0 -0
  36. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/LICENSE +0 -0
  37. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/README.md +0 -0
  38. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/SECURITY.md +0 -0
  39. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/Zep-CLA.md +0 -0
  40. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/conftest.py +0 -0
  41. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/depot.json +0 -0
  42. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/docker-compose.yml +0 -0
  43. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/ellipsis.yaml +0 -0
  44. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/data/manybirds_products.json +0 -0
  45. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/ecommerce/runner.ipynb +0 -0
  46. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/ecommerce/runner.py +0 -0
  47. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/langgraph-agent/agent.ipynb +0 -0
  48. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/langgraph-agent/tinybirds-jess.png +0 -0
  49. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/podcast/podcast_runner.py +0 -0
  50. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/podcast/podcast_transcript.txt +0 -0
  51. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/podcast/transcript_parser.py +0 -0
  52. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/README.md +0 -0
  53. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/quickstart_falkordb.py +0 -0
  54. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/quickstart_neo4j.py +0 -0
  55. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/quickstart_neptune.py +0 -0
  56. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/requirements.txt +0 -0
  57. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/wizard_of_oz/parser.py +0 -0
  58. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/wizard_of_oz/runner.py +0 -0
  59. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/wizard_of_oz/woo.txt +0 -0
  60. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/__init__.py +0 -0
  61. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/__init__.py +0 -0
  62. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/bge_reranker_client.py +0 -0
  63. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/client.py +0 -0
  64. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/gemini_reranker_client.py +0 -0
  65. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/openai_reranker_client.py +0 -0
  66. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/__init__.py +0 -0
  67. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/driver.py +0 -0
  68. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/falkordb_driver.py +0 -0
  69. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/kuzu_driver.py +0 -0
  70. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/neo4j_driver.py +0 -0
  71. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/neptune_driver.py +0 -0
  72. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/edges.py +0 -0
  73. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/__init__.py +0 -0
  74. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/azure_openai.py +0 -0
  75. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/client.py +0 -0
  76. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/gemini.py +0 -0
  77. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/openai.py +0 -0
  78. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/voyage.py +0 -0
  79. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/errors.py +0 -0
  80. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/graph_queries.py +0 -0
  81. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/graphiti.py +0 -0
  82. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/graphiti_types.py +0 -0
  83. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/helpers.py +0 -0
  84. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/__init__.py +0 -0
  85. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/anthropic_client.py +0 -0
  86. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/azure_openai_client.py +0 -0
  87. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/client.py +0 -0
  88. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/config.py +0 -0
  89. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/errors.py +0 -0
  90. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/gemini_client.py +0 -0
  91. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/groq_client.py +0 -0
  92. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/openai_base_client.py +0 -0
  93. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/openai_client.py +0 -0
  94. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/openai_generic_client.py +0 -0
  95. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/utils.py +0 -0
  96. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/migrations/__init__.py +0 -0
  97. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/__init__.py +0 -0
  98. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/edges/__init__.py +0 -0
  99. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/edges/edge_db_queries.py +0 -0
  100. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/nodes/__init__.py +0 -0
  101. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/nodes/node_db_queries.py +0 -0
  102. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/nodes.py +0 -0
  103. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/__init__.py +0 -0
  104. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/dedupe_edges.py +0 -0
  105. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/dedupe_nodes.py +0 -0
  106. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/eval.py +0 -0
  107. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/extract_edge_dates.py +0 -0
  108. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/extract_edges.py +0 -0
  109. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/extract_nodes.py +0 -0
  110. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/invalidate_edges.py +0 -0
  111. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/lib.py +0 -0
  112. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/models.py +0 -0
  113. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/prompt_helpers.py +0 -0
  114. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/summarize_nodes.py +0 -0
  115. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/py.typed +0 -0
  116. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/__init__.py +0 -0
  117. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search.py +0 -0
  118. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_config.py +0 -0
  119. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_config_recipes.py +0 -0
  120. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_filters.py +0 -0
  121. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_helpers.py +0 -0
  122. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_utils.py +0 -0
  123. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/telemetry/__init__.py +0 -0
  124. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/telemetry/telemetry.py +0 -0
  125. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/__init__.py +0 -0
  126. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/datetime_utils.py +0 -0
  127. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/__init__.py +0 -0
  128. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/community_operations.py +0 -0
  129. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/graph_data_operations.py +0 -0
  130. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/temporal_operations.py +0 -0
  131. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/utils.py +0 -0
  132. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/ontology_utils/entity_types_utils.py +0 -0
  133. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/images/arxiv-screenshot.png +0 -0
  134. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/images/graphiti-graph-intro.gif +0 -0
  135. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/images/graphiti-intro-slides-stock-2.gif +0 -0
  136. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/images/simple_graph.svg +0 -0
  137. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/.env.example +0 -0
  138. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/.python-version +0 -0
  139. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/Dockerfile +0 -0
  140. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/README.md +0 -0
  141. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/cursor_rules.md +0 -0
  142. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/docker-compose.yml +0 -0
  143. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/graphiti_mcp_server.py +0 -0
  144. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/mcp_config_sse_example.json +0 -0
  145. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/mcp_config_stdio_example.json +0 -0
  146. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/pyproject.toml +0 -0
  147. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/uv.lock +0 -0
  148. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/poetry.lock +0 -0
  149. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/py.typed +0 -0
  150. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/pytest.ini +0 -0
  151. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/.env.example +0 -0
  152. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/Makefile +0 -0
  153. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/README.md +0 -0
  154. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/__init__.py +0 -0
  155. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/config.py +0 -0
  156. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/dto/__init__.py +0 -0
  157. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/dto/common.py +0 -0
  158. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/dto/ingest.py +0 -0
  159. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/dto/retrieve.py +0 -0
  160. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/main.py +0 -0
  161. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/routers/__init__.py +0 -0
  162. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/routers/ingest.py +0 -0
  163. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/routers/retrieve.py +0 -0
  164. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/zep_graphiti.py +0 -0
  165. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/pyproject.toml +0 -0
  166. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/uv.lock +0 -0
  167. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/signatures/version1/cla.json +0 -0
  168. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/cross_encoder/test_bge_reranker_client.py +0 -0
  169. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/cross_encoder/test_gemini_reranker_client.py +0 -0
  170. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/driver/__init__.py +0 -0
  171. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/driver/test_falkordb_driver.py +0 -0
  172. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/embedder/embedder_fixtures.py +0 -0
  173. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/embedder/test_gemini.py +0 -0
  174. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/embedder/test_openai.py +0 -0
  175. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/embedder/test_voyage.py +0 -0
  176. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/data/longmemeval_data/README.md +0 -0
  177. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/data/longmemeval_data/longmemeval_oracle.json +0 -0
  178. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/eval_cli.py +0 -0
  179. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/eval_e2e_graph_building.py +0 -0
  180. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/pytest.ini +0 -0
  181. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/utils.py +0 -0
  182. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/helpers_test.py +0 -0
  183. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_anthropic_client.py +0 -0
  184. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_anthropic_client_int.py +0 -0
  185. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_client.py +0 -0
  186. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_errors.py +0 -0
  187. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_gemini_client.py +0 -0
  188. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_entity_exclusion_int.py +0 -0
  189. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_graphiti_int.py +0 -0
  190. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_graphiti_mock.py +0 -0
  191. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/utils/maintenance/test_temporal_operations_int.py +0 -0
  192. {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/utils/search/search_utils_test.py +0 -0
@@ -0,0 +1,21 @@
1
+ # Repository Guidelines
2
+
3
+ ## Project Structure & Module Organization
4
+ Graphiti's core library lives under `graphiti_core/`, split into domain modules such as `nodes.py`, `edges.py`, `models/`, and `search/` for retrieval pipelines. Service adapters and API glue reside in `server/graph_service/`, while the MCP integration lives in `mcp_server/`. Shared assets and collateral sit in `images/` and `examples/`. Tests cover the package via `tests/`, with configuration in `conftest.py`, `pytest.ini`, and Docker compose files for optional services. Tooling manifests live at the repo root, including `pyproject.toml`, `Makefile`, and deployment compose files.
5
+
6
+ ## Build, Test, and Development Commands
7
+ - `uv sync --extra dev`: install the dev environment declared in `pyproject.toml`.
8
+ - `make format`: run `ruff` to sort imports and apply the canonical formatter.
9
+ - `make lint`: execute `ruff` plus `pyright` type checks against `graphiti_core`.
10
+ - `make test`: run the full `pytest` suite (`uv run pytest`).
11
+ - `uv run pytest tests/path/test_file.py`: target a specific module or test selection.
12
+ - `docker-compose -f docker-compose.test.yml up`: provision local graph/search dependencies for integration flows.
13
+
14
+ ## Coding Style & Naming Conventions
15
+ Python code uses 4-space indentation, 100-character lines, and prefers single quotes as configured in `pyproject.toml`. Modules, files, and functions stay snake_case; Pydantic models in `graphiti_core/models` use PascalCase with explicit type hints. Keep side-effectful code inside drivers or adapters (`graphiti_core/driver`, `graphiti_core/utils`) and rely on pure helpers elsewhere. Run `make format` before committing to normalize imports and docstring formatting.
16
+
17
+ ## Testing Guidelines
18
+ Author tests alongside features under `tests/`, naming files `test_<feature>.py` and functions `test_<behavior>`. Use `@pytest.mark.integration` for database-reliant scenarios so CI can gate them. Reproduce regressions with a failing test first and validate fixes via `uv run pytest -k "pattern"`. Start required backing services through `docker-compose.test.yml` when running integration suites locally.
19
+
20
+ ## Commit & Pull Request Guidelines
21
+ Commits use an imperative, present-tense summary (for example, `add async cache invalidation`) optionally suffixed with the PR number as seen in history (`(#927)`). Squash fixups and keep unrelated changes isolated. Pull requests should include: a concise description, linked tracking issue, notes about schema or API impacts, and screenshots or logs when behavior changes. Confirm `make lint` and `make test` pass locally, and update docs or examples when public interfaces shift.
@@ -26,7 +26,7 @@ lint:
26
26
 
27
27
  # Run tests
28
28
  test:
29
- $(PYTEST)
29
+ DISABLE_FALKORDB=1 DISABLE_KUZU=1 DISABLE_NEPTUNE=1 $(PYTEST) -m "not integration"
30
30
 
31
31
  # Run format, lint, and test
32
- check: format lint test
32
+ check: format lint test
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphiti-core
3
- Version: 0.21.0rc6
3
+ Version: 0.30.0rc1
4
4
  Summary: A temporal graph building library
5
5
  Project-URL: Homepage, https://help.getzep.com/graphiti/graphiti/overview
6
6
  Project-URL: Repository, https://github.com/getzep/graphiti
@@ -25,7 +25,7 @@ services:
25
25
  - PORT=8000
26
26
 
27
27
  neo4j:
28
- image: neo4j:5.22.0
28
+ image: neo4j:5.26.2
29
29
  ports:
30
30
  - "7474:7474"
31
31
  - "${NEO4J_PORT}:${NEO4J_PORT}"
@@ -43,8 +43,14 @@ from graphiti_core.models.nodes.node_db_queries import (
43
43
  get_entity_node_save_bulk_query,
44
44
  get_episode_node_save_bulk_query,
45
45
  )
46
- from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode, create_entity_node_embeddings
46
+ from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode
47
47
  from graphiti_core.utils.datetime_utils import convert_datetimes_to_strings
48
+ from graphiti_core.utils.maintenance.dedup_helpers import (
49
+ DedupResolutionState,
50
+ _build_candidate_indexes,
51
+ _normalize_string_exact,
52
+ _resolve_with_similarity,
53
+ )
48
54
  from graphiti_core.utils.maintenance.edge_operations import (
49
55
  extract_edges,
50
56
  resolve_extracted_edge,
@@ -63,6 +69,38 @@ logger = logging.getLogger(__name__)
63
69
  CHUNK_SIZE = 10
64
70
 
65
71
 
72
+ def _build_directed_uuid_map(pairs: list[tuple[str, str]]) -> dict[str, str]:
73
+ """Collapse alias -> canonical chains while preserving direction.
74
+
75
+ The incoming pairs represent directed mappings discovered during node dedupe. We use a simple
76
+ union-find with iterative path compression to ensure every source UUID resolves to its ultimate
77
+ canonical target, even if aliases appear lexicographically smaller than the canonical UUID.
78
+ """
79
+
80
+ parent: dict[str, str] = {}
81
+
82
+ def find(uuid: str) -> str:
83
+ """Directed union-find lookup using iterative path compression."""
84
+ parent.setdefault(uuid, uuid)
85
+ root = uuid
86
+ while parent[root] != root:
87
+ root = parent[root]
88
+
89
+ while parent[uuid] != root:
90
+ next_uuid = parent[uuid]
91
+ parent[uuid] = root
92
+ uuid = next_uuid
93
+
94
+ return root
95
+
96
+ for source_uuid, target_uuid in pairs:
97
+ parent.setdefault(source_uuid, source_uuid)
98
+ parent.setdefault(target_uuid, target_uuid)
99
+ parent[find(source_uuid)] = find(target_uuid)
100
+
101
+ return {uuid: find(uuid) for uuid in parent}
102
+
103
+
66
104
  class RawEpisode(BaseModel):
67
105
  name: str
68
106
  uuid: str | None = Field(default=None)
@@ -266,83 +304,111 @@ async def dedupe_nodes_bulk(
266
304
  episode_tuples: list[tuple[EpisodicNode, list[EpisodicNode]]],
267
305
  entity_types: dict[str, type[BaseModel]] | None = None,
268
306
  ) -> tuple[dict[str, list[EntityNode]], dict[str, str]]:
269
- embedder = clients.embedder
270
- min_score = 0.8
271
-
272
- # generate embeddings
273
- await semaphore_gather(
274
- *[create_entity_node_embeddings(embedder, nodes) for nodes in extracted_nodes]
275
- )
276
-
277
- # Find similar results
278
- dedupe_tuples: list[tuple[list[EntityNode], list[EntityNode]]] = []
279
- for i, nodes_i in enumerate(extracted_nodes):
280
- existing_nodes: list[EntityNode] = []
281
- for j, nodes_j in enumerate(extracted_nodes):
282
- if i == j:
283
- continue
284
- existing_nodes += nodes_j
285
-
286
- candidates_i: list[EntityNode] = []
287
- for node in nodes_i:
288
- for existing_node in existing_nodes:
289
- # Approximate BM25 by checking for word overlaps (this is faster than creating many in-memory indices)
290
- # This approach will cast a wider net than BM25, which is ideal for this use case
291
- node_words = set(node.name.lower().split())
292
- existing_node_words = set(existing_node.name.lower().split())
293
- has_overlap = not node_words.isdisjoint(existing_node_words)
294
- if has_overlap:
295
- candidates_i.append(existing_node)
296
- continue
307
+ """Resolve entity duplicates across an in-memory batch using a two-pass strategy.
297
308
 
298
- # Check for semantic similarity even if there is no overlap
299
- similarity = np.dot(
300
- normalize_l2(node.name_embedding or []),
301
- normalize_l2(existing_node.name_embedding or []),
302
- )
303
- if similarity >= min_score:
304
- candidates_i.append(existing_node)
305
-
306
- dedupe_tuples.append((nodes_i, candidates_i))
309
+ 1. Run :func:`resolve_extracted_nodes` for every episode in parallel so each batch item is
310
+ reconciled against the live graph just like the non-batch flow.
311
+ 2. Re-run the deterministic similarity heuristics across the union of resolved nodes to catch
312
+ duplicates that only co-occur inside this batch, emitting a canonical UUID map that callers
313
+ can apply to edges and persistence.
314
+ """
307
315
 
308
- # Determine Node Resolutions
309
- bulk_node_resolutions: list[
310
- tuple[list[EntityNode], dict[str, str], list[tuple[EntityNode, EntityNode]]]
311
- ] = await semaphore_gather(
316
+ first_pass_results = await semaphore_gather(
312
317
  *[
313
318
  resolve_extracted_nodes(
314
319
  clients,
315
- dedupe_tuple[0],
320
+ nodes,
316
321
  episode_tuples[i][0],
317
322
  episode_tuples[i][1],
318
323
  entity_types,
319
- existing_nodes_override=dedupe_tuples[i][1],
320
324
  )
321
- for i, dedupe_tuple in enumerate(dedupe_tuples)
325
+ for i, nodes in enumerate(extracted_nodes)
322
326
  ]
323
327
  )
324
328
 
325
- # Collect all duplicate pairs sorted by uuid
329
+ episode_resolutions: list[tuple[str, list[EntityNode]]] = []
330
+ per_episode_uuid_maps: list[dict[str, str]] = []
326
331
  duplicate_pairs: list[tuple[str, str]] = []
327
- for _, _, duplicates in bulk_node_resolutions:
328
- for duplicate in duplicates:
329
- n, m = duplicate
330
- duplicate_pairs.append((n.uuid, m.uuid))
331
332
 
332
- # Now we compress the duplicate_map, so that 3 -> 2 and 2 -> becomes 3 -> 1 (sorted by uuid)
333
- compressed_map: dict[str, str] = compress_uuid_map(duplicate_pairs)
333
+ for (resolved_nodes, uuid_map, duplicates), (episode, _) in zip(
334
+ first_pass_results, episode_tuples, strict=True
335
+ ):
336
+ episode_resolutions.append((episode.uuid, resolved_nodes))
337
+ per_episode_uuid_maps.append(uuid_map)
338
+ duplicate_pairs.extend((source.uuid, target.uuid) for source, target in duplicates)
339
+
340
+ canonical_nodes: dict[str, EntityNode] = {}
341
+ for _, resolved_nodes in episode_resolutions:
342
+ for node in resolved_nodes:
343
+ # NOTE: this loop is O(n^2) in the number of nodes inside the batch because we rebuild
344
+ # the MinHash index for the accumulated canonical pool each time. The LRU-backed
345
+ # shingle cache keeps the constant factors low for typical batch sizes (≤ CHUNK_SIZE),
346
+ # but if batches grow significantly we should switch to an incremental index or chunked
347
+ # processing.
348
+ if not canonical_nodes:
349
+ canonical_nodes[node.uuid] = node
350
+ continue
334
351
 
335
- node_uuid_map: dict[str, EntityNode] = {
336
- node.uuid: node for nodes in extracted_nodes for node in nodes
337
- }
352
+ existing_candidates = list(canonical_nodes.values())
353
+ normalized = _normalize_string_exact(node.name)
354
+ exact_match = next(
355
+ (
356
+ candidate
357
+ for candidate in existing_candidates
358
+ if _normalize_string_exact(candidate.name) == normalized
359
+ ),
360
+ None,
361
+ )
362
+ if exact_match is not None:
363
+ if exact_match.uuid != node.uuid:
364
+ duplicate_pairs.append((node.uuid, exact_match.uuid))
365
+ continue
366
+
367
+ indexes = _build_candidate_indexes(existing_candidates)
368
+ state = DedupResolutionState(
369
+ resolved_nodes=[None],
370
+ uuid_map={},
371
+ unresolved_indices=[],
372
+ )
373
+ _resolve_with_similarity([node], indexes, state)
374
+
375
+ resolved = state.resolved_nodes[0]
376
+ if resolved is None:
377
+ canonical_nodes[node.uuid] = node
378
+ continue
379
+
380
+ canonical_uuid = resolved.uuid
381
+ canonical_nodes.setdefault(canonical_uuid, resolved)
382
+ if canonical_uuid != node.uuid:
383
+ duplicate_pairs.append((node.uuid, canonical_uuid))
384
+
385
+ union_pairs: list[tuple[str, str]] = []
386
+ for uuid_map in per_episode_uuid_maps:
387
+ union_pairs.extend(uuid_map.items())
388
+ union_pairs.extend(duplicate_pairs)
389
+
390
+ compressed_map: dict[str, str] = _build_directed_uuid_map(union_pairs)
338
391
 
339
392
  nodes_by_episode: dict[str, list[EntityNode]] = {}
340
- for i, nodes in enumerate(extracted_nodes):
341
- episode = episode_tuples[i][0]
393
+ for episode_uuid, resolved_nodes in episode_resolutions:
394
+ deduped_nodes: list[EntityNode] = []
395
+ seen: set[str] = set()
396
+ for node in resolved_nodes:
397
+ canonical_uuid = compressed_map.get(node.uuid, node.uuid)
398
+ if canonical_uuid in seen:
399
+ continue
400
+ seen.add(canonical_uuid)
401
+ canonical_node = canonical_nodes.get(canonical_uuid)
402
+ if canonical_node is None:
403
+ logger.error(
404
+ 'Canonical node %s missing during batch dedupe; falling back to %s',
405
+ canonical_uuid,
406
+ node.uuid,
407
+ )
408
+ canonical_node = node
409
+ deduped_nodes.append(canonical_node)
342
410
 
343
- nodes_by_episode[episode.uuid] = [
344
- node_uuid_map[compressed_map.get(node.uuid, node.uuid)] for node in nodes
345
- ]
411
+ nodes_by_episode[episode_uuid] = deduped_nodes
346
412
 
347
413
  return nodes_by_episode, compressed_map
348
414
 
@@ -0,0 +1,262 @@
1
+ """
2
+ Copyright 2024, Zep Software, Inc.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import math
20
+ import re
21
+ from collections import defaultdict
22
+ from collections.abc import Iterable
23
+ from dataclasses import dataclass, field
24
+ from functools import lru_cache
25
+ from hashlib import blake2b
26
+ from typing import TYPE_CHECKING
27
+
28
+ if TYPE_CHECKING:
29
+ from graphiti_core.nodes import EntityNode
30
+
31
+ _NAME_ENTROPY_THRESHOLD = 1.5
32
+ _MIN_NAME_LENGTH = 6
33
+ _MIN_TOKEN_COUNT = 2
34
+ _FUZZY_JACCARD_THRESHOLD = 0.9
35
+ _MINHASH_PERMUTATIONS = 32
36
+ _MINHASH_BAND_SIZE = 4
37
+
38
+
39
+ def _normalize_string_exact(name: str) -> str:
40
+ """Lowercase text and collapse whitespace so equal names map to the same key."""
41
+ normalized = re.sub(r'[\s]+', ' ', name.lower())
42
+ return normalized.strip()
43
+
44
+
45
+ def _normalize_name_for_fuzzy(name: str) -> str:
46
+ """Produce a fuzzier form that keeps alphanumerics and apostrophes for n-gram shingles."""
47
+ normalized = re.sub(r"[^a-z0-9' ]", ' ', _normalize_string_exact(name))
48
+ normalized = normalized.strip()
49
+ return re.sub(r'[\s]+', ' ', normalized)
50
+
51
+
52
+ def _name_entropy(normalized_name: str) -> float:
53
+ """Approximate text specificity using Shannon entropy over characters.
54
+
55
+ We strip spaces, count how often each character appears, and sum
56
+ probability * -log2(probability). Short or repetitive names yield low
57
+ entropy, which signals we should defer resolution to the LLM instead of
58
+ trusting fuzzy similarity.
59
+ """
60
+ if not normalized_name:
61
+ return 0.0
62
+
63
+ counts: dict[str, int] = {}
64
+ for char in normalized_name.replace(' ', ''):
65
+ counts[char] = counts.get(char, 0) + 1
66
+
67
+ total = sum(counts.values())
68
+ if total == 0:
69
+ return 0.0
70
+
71
+ entropy = 0.0
72
+ for count in counts.values():
73
+ probability = count / total
74
+ entropy -= probability * math.log2(probability)
75
+
76
+ return entropy
77
+
78
+
79
+ def _has_high_entropy(normalized_name: str) -> bool:
80
+ """Filter out very short or low-entropy names that are unreliable for fuzzy matching."""
81
+ token_count = len(normalized_name.split())
82
+ if len(normalized_name) < _MIN_NAME_LENGTH and token_count < _MIN_TOKEN_COUNT:
83
+ return False
84
+
85
+ return _name_entropy(normalized_name) >= _NAME_ENTROPY_THRESHOLD
86
+
87
+
88
+ def _shingles(normalized_name: str) -> set[str]:
89
+ """Create 3-gram shingles from the normalized name for MinHash calculations."""
90
+ cleaned = normalized_name.replace(' ', '')
91
+ if len(cleaned) < 2:
92
+ return {cleaned} if cleaned else set()
93
+
94
+ return {cleaned[i : i + 3] for i in range(len(cleaned) - 2)}
95
+
96
+
97
+ def _hash_shingle(shingle: str, seed: int) -> int:
98
+ """Generate a deterministic 64-bit hash for a shingle given the permutation seed."""
99
+ digest = blake2b(f'{seed}:{shingle}'.encode(), digest_size=8)
100
+ return int.from_bytes(digest.digest(), 'big')
101
+
102
+
103
+ def _minhash_signature(shingles: Iterable[str]) -> tuple[int, ...]:
104
+ """Compute the MinHash signature for the shingle set across predefined permutations."""
105
+ if not shingles:
106
+ return tuple()
107
+
108
+ seeds = range(_MINHASH_PERMUTATIONS)
109
+ signature: list[int] = []
110
+ for seed in seeds:
111
+ min_hash = min(_hash_shingle(shingle, seed) for shingle in shingles)
112
+ signature.append(min_hash)
113
+
114
+ return tuple(signature)
115
+
116
+
117
+ def _lsh_bands(signature: Iterable[int]) -> list[tuple[int, ...]]:
118
+ """Split the MinHash signature into fixed-size bands for locality-sensitive hashing."""
119
+ signature_list = list(signature)
120
+ if not signature_list:
121
+ return []
122
+
123
+ bands: list[tuple[int, ...]] = []
124
+ for start in range(0, len(signature_list), _MINHASH_BAND_SIZE):
125
+ band = tuple(signature_list[start : start + _MINHASH_BAND_SIZE])
126
+ if len(band) == _MINHASH_BAND_SIZE:
127
+ bands.append(band)
128
+ return bands
129
+
130
+
131
+ def _jaccard_similarity(a: set[str], b: set[str]) -> float:
132
+ """Return the Jaccard similarity between two shingle sets, handling empty edge cases."""
133
+ if not a and not b:
134
+ return 1.0
135
+ if not a or not b:
136
+ return 0.0
137
+
138
+ intersection = len(a.intersection(b))
139
+ union = len(a.union(b))
140
+ return intersection / union if union else 0.0
141
+
142
+
143
+ @lru_cache(maxsize=512)
144
+ def _cached_shingles(name: str) -> set[str]:
145
+ """Cache shingle sets per normalized name to avoid recomputation within a worker."""
146
+ return _shingles(name)
147
+
148
+
149
+ @dataclass
150
+ class DedupCandidateIndexes:
151
+ """Precomputed lookup structures that drive entity deduplication heuristics."""
152
+
153
+ existing_nodes: list[EntityNode]
154
+ nodes_by_uuid: dict[str, EntityNode]
155
+ normalized_existing: defaultdict[str, list[EntityNode]]
156
+ shingles_by_candidate: dict[str, set[str]]
157
+ lsh_buckets: defaultdict[tuple[int, tuple[int, ...]], list[str]]
158
+
159
+
160
+ @dataclass
161
+ class DedupResolutionState:
162
+ """Mutable resolution bookkeeping shared across deterministic and LLM passes."""
163
+
164
+ resolved_nodes: list[EntityNode | None]
165
+ uuid_map: dict[str, str]
166
+ unresolved_indices: list[int]
167
+ duplicate_pairs: list[tuple[EntityNode, EntityNode]] = field(default_factory=list)
168
+
169
+
170
+ def _build_candidate_indexes(existing_nodes: list[EntityNode]) -> DedupCandidateIndexes:
171
+ """Precompute exact and fuzzy lookup structures once per dedupe run."""
172
+ normalized_existing: defaultdict[str, list[EntityNode]] = defaultdict(list)
173
+ nodes_by_uuid: dict[str, EntityNode] = {}
174
+ shingles_by_candidate: dict[str, set[str]] = {}
175
+ lsh_buckets: defaultdict[tuple[int, tuple[int, ...]], list[str]] = defaultdict(list)
176
+
177
+ for candidate in existing_nodes:
178
+ normalized = _normalize_string_exact(candidate.name)
179
+ normalized_existing[normalized].append(candidate)
180
+ nodes_by_uuid[candidate.uuid] = candidate
181
+
182
+ shingles = _cached_shingles(_normalize_name_for_fuzzy(candidate.name))
183
+ shingles_by_candidate[candidate.uuid] = shingles
184
+
185
+ signature = _minhash_signature(shingles)
186
+ for band_index, band in enumerate(_lsh_bands(signature)):
187
+ lsh_buckets[(band_index, band)].append(candidate.uuid)
188
+
189
+ return DedupCandidateIndexes(
190
+ existing_nodes=existing_nodes,
191
+ nodes_by_uuid=nodes_by_uuid,
192
+ normalized_existing=normalized_existing,
193
+ shingles_by_candidate=shingles_by_candidate,
194
+ lsh_buckets=lsh_buckets,
195
+ )
196
+
197
+
198
+ def _resolve_with_similarity(
199
+ extracted_nodes: list[EntityNode],
200
+ indexes: DedupCandidateIndexes,
201
+ state: DedupResolutionState,
202
+ ) -> None:
203
+ """Attempt deterministic resolution using exact name hits and fuzzy MinHash comparisons."""
204
+ for idx, node in enumerate(extracted_nodes):
205
+ normalized_exact = _normalize_string_exact(node.name)
206
+ normalized_fuzzy = _normalize_name_for_fuzzy(node.name)
207
+
208
+ if not _has_high_entropy(normalized_fuzzy):
209
+ state.unresolved_indices.append(idx)
210
+ continue
211
+
212
+ existing_matches = indexes.normalized_existing.get(normalized_exact, [])
213
+ if len(existing_matches) == 1:
214
+ match = existing_matches[0]
215
+ state.resolved_nodes[idx] = match
216
+ state.uuid_map[node.uuid] = match.uuid
217
+ if match.uuid != node.uuid:
218
+ state.duplicate_pairs.append((node, match))
219
+ continue
220
+ if len(existing_matches) > 1:
221
+ state.unresolved_indices.append(idx)
222
+ continue
223
+
224
+ shingles = _cached_shingles(normalized_fuzzy)
225
+ signature = _minhash_signature(shingles)
226
+ candidate_ids: set[str] = set()
227
+ for band_index, band in enumerate(_lsh_bands(signature)):
228
+ candidate_ids.update(indexes.lsh_buckets.get((band_index, band), []))
229
+
230
+ best_candidate: EntityNode | None = None
231
+ best_score = 0.0
232
+ for candidate_id in candidate_ids:
233
+ candidate_shingles = indexes.shingles_by_candidate.get(candidate_id, set())
234
+ score = _jaccard_similarity(shingles, candidate_shingles)
235
+ if score > best_score:
236
+ best_score = score
237
+ best_candidate = indexes.nodes_by_uuid.get(candidate_id)
238
+
239
+ if best_candidate is not None and best_score >= _FUZZY_JACCARD_THRESHOLD:
240
+ state.resolved_nodes[idx] = best_candidate
241
+ state.uuid_map[node.uuid] = best_candidate.uuid
242
+ if best_candidate.uuid != node.uuid:
243
+ state.duplicate_pairs.append((node, best_candidate))
244
+ continue
245
+
246
+ state.unresolved_indices.append(idx)
247
+
248
+
249
+ __all__ = [
250
+ 'DedupCandidateIndexes',
251
+ 'DedupResolutionState',
252
+ '_normalize_string_exact',
253
+ '_normalize_name_for_fuzzy',
254
+ '_has_high_entropy',
255
+ '_minhash_signature',
256
+ '_lsh_bands',
257
+ '_jaccard_similarity',
258
+ '_cached_shingles',
259
+ '_FUZZY_JACCARD_THRESHOLD',
260
+ '_build_candidate_indexes',
261
+ '_resolve_with_similarity',
262
+ ]
@@ -41,6 +41,7 @@ from graphiti_core.search.search_config import SearchResults
41
41
  from graphiti_core.search.search_config_recipes import EDGE_HYBRID_SEARCH_RRF
42
42
  from graphiti_core.search.search_filters import SearchFilters
43
43
  from graphiti_core.utils.datetime_utils import ensure_utc, utc_now
44
+ from graphiti_core.utils.maintenance.dedup_helpers import _normalize_string_exact
44
45
 
45
46
  logger = logging.getLogger(__name__)
46
47
 
@@ -397,6 +398,19 @@ async def resolve_extracted_edge(
397
398
  if len(related_edges) == 0 and len(existing_edges) == 0:
398
399
  return extracted_edge, [], []
399
400
 
401
+ # Fast path: if the fact text and endpoints already exist verbatim, reuse the matching edge.
402
+ normalized_fact = _normalize_string_exact(extracted_edge.fact)
403
+ for edge in related_edges:
404
+ if (
405
+ edge.source_node_uuid == extracted_edge.source_node_uuid
406
+ and edge.target_node_uuid == extracted_edge.target_node_uuid
407
+ and _normalize_string_exact(edge.fact) == normalized_fact
408
+ ):
409
+ resolved = edge
410
+ if episode is not None and episode.uuid not in resolved.episodes:
411
+ resolved.episodes.append(episode.uuid)
412
+ return resolved, [], []
413
+
400
414
  start = time()
401
415
 
402
416
  # Prepare context for LLM