graphiti-core 0.21.0rc6__tar.gz → 0.30.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core-0.30.0rc1/AGENTS.md +21 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/Makefile +2 -2
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/PKG-INFO +1 -1
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/docker-compose.test.yml +1 -1
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/bulk_utils.py +126 -60
- graphiti_core-0.30.0rc1/graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/edge_operations.py +14 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/node_operations.py +141 -61
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/pyproject.toml +1 -1
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_edge_int.py +1 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_node_int.py +2 -0
- graphiti_core-0.30.0rc1/tests/utils/maintenance/test_bulk_utils.py +232 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/utils/maintenance/test_edge_operations.py +50 -0
- graphiti_core-0.30.0rc1/tests/utils/maintenance/test_node_operations.py +345 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/uv.lock +2 -2
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.env.example +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/dependabot.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/pull_request_template.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/secret_scanning.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/ai-moderator.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/cla.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/claude-code-review.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/claude.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/codeql.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/lint.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/mcp-server-docker.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/release-graphiti-core.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/typecheck.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.github/workflows/unit_tests.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/.gitignore +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/CLAUDE.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/CODE_OF_CONDUCT.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/CONTRIBUTING.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/Dockerfile +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/LICENSE +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/README.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/SECURITY.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/Zep-CLA.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/conftest.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/depot.json +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/docker-compose.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/ellipsis.yaml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/data/manybirds_products.json +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/ecommerce/runner.ipynb +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/ecommerce/runner.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/langgraph-agent/agent.ipynb +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/langgraph-agent/tinybirds-jess.png +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/podcast/podcast_runner.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/podcast/podcast_transcript.txt +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/podcast/transcript_parser.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/README.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/quickstart_falkordb.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/quickstart_neo4j.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/quickstart_neptune.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/quickstart/requirements.txt +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/wizard_of_oz/parser.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/wizard_of_oz/runner.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/examples/wizard_of_oz/woo.txt +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/bge_reranker_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/gemini_reranker_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/cross_encoder/openai_reranker_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/driver.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/falkordb_driver.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/kuzu_driver.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/neo4j_driver.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/driver/neptune_driver.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/edges.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/azure_openai.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/gemini.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/openai.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/embedder/voyage.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/errors.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/graph_queries.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/graphiti.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/graphiti_types.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/helpers.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/anthropic_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/azure_openai_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/config.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/errors.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/gemini_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/groq_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/openai_base_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/openai_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/openai_generic_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/llm_client/utils.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/migrations/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/edges/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/edges/edge_db_queries.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/nodes/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/models/nodes/node_db_queries.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/nodes.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/dedupe_edges.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/dedupe_nodes.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/eval.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/extract_edge_dates.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/extract_edges.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/extract_nodes.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/invalidate_edges.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/lib.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/models.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/prompt_helpers.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/prompts/summarize_nodes.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/py.typed +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_config.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_config_recipes.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_filters.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_helpers.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/search/search_utils.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/telemetry/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/telemetry/telemetry.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/datetime_utils.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/community_operations.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/graph_data_operations.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/temporal_operations.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/maintenance/utils.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/graphiti_core/utils/ontology_utils/entity_types_utils.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/images/arxiv-screenshot.png +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/images/graphiti-graph-intro.gif +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/images/graphiti-intro-slides-stock-2.gif +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/images/simple_graph.svg +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/.env.example +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/.python-version +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/Dockerfile +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/README.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/cursor_rules.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/docker-compose.yml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/graphiti_mcp_server.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/mcp_config_sse_example.json +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/mcp_config_stdio_example.json +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/pyproject.toml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/mcp_server/uv.lock +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/poetry.lock +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/py.typed +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/pytest.ini +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/.env.example +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/Makefile +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/README.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/config.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/dto/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/dto/common.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/dto/ingest.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/dto/retrieve.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/main.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/routers/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/routers/ingest.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/routers/retrieve.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/graph_service/zep_graphiti.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/pyproject.toml +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/server/uv.lock +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/signatures/version1/cla.json +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/cross_encoder/test_bge_reranker_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/cross_encoder/test_gemini_reranker_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/driver/__init__.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/driver/test_falkordb_driver.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/embedder/embedder_fixtures.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/embedder/test_gemini.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/embedder/test_openai.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/embedder/test_voyage.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/data/longmemeval_data/README.md +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/data/longmemeval_data/longmemeval_oracle.json +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/eval_cli.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/eval_e2e_graph_building.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/pytest.ini +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/evals/utils.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/helpers_test.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_anthropic_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_anthropic_client_int.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_errors.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/llm_client/test_gemini_client.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_entity_exclusion_int.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_graphiti_int.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/test_graphiti_mock.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/utils/maintenance/test_temporal_operations_int.py +0 -0
- {graphiti_core-0.21.0rc6 → graphiti_core-0.30.0rc1}/tests/utils/search/search_utils_test.py +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Repository Guidelines
|
|
2
|
+
|
|
3
|
+
## Project Structure & Module Organization
|
|
4
|
+
Graphiti's core library lives under `graphiti_core/`, split into domain modules such as `nodes.py`, `edges.py`, `models/`, and `search/` for retrieval pipelines. Service adapters and API glue reside in `server/graph_service/`, while the MCP integration lives in `mcp_server/`. Shared assets and collateral sit in `images/` and `examples/`. Tests cover the package via `tests/`, with configuration in `conftest.py`, `pytest.ini`, and Docker compose files for optional services. Tooling manifests live at the repo root, including `pyproject.toml`, `Makefile`, and deployment compose files.
|
|
5
|
+
|
|
6
|
+
## Build, Test, and Development Commands
|
|
7
|
+
- `uv sync --extra dev`: install the dev environment declared in `pyproject.toml`.
|
|
8
|
+
- `make format`: run `ruff` to sort imports and apply the canonical formatter.
|
|
9
|
+
- `make lint`: execute `ruff` plus `pyright` type checks against `graphiti_core`.
|
|
10
|
+
- `make test`: run the full `pytest` suite (`uv run pytest`).
|
|
11
|
+
- `uv run pytest tests/path/test_file.py`: target a specific module or test selection.
|
|
12
|
+
- `docker-compose -f docker-compose.test.yml up`: provision local graph/search dependencies for integration flows.
|
|
13
|
+
|
|
14
|
+
## Coding Style & Naming Conventions
|
|
15
|
+
Python code uses 4-space indentation, 100-character lines, and prefers single quotes as configured in `pyproject.toml`. Modules, files, and functions stay snake_case; Pydantic models in `graphiti_core/models` use PascalCase with explicit type hints. Keep side-effectful code inside drivers or adapters (`graphiti_core/driver`, `graphiti_core/utils`) and rely on pure helpers elsewhere. Run `make format` before committing to normalize imports and docstring formatting.
|
|
16
|
+
|
|
17
|
+
## Testing Guidelines
|
|
18
|
+
Author tests alongside features under `tests/`, naming files `test_<feature>.py` and functions `test_<behavior>`. Use `@pytest.mark.integration` for database-reliant scenarios so CI can gate them. Reproduce regressions with a failing test first and validate fixes via `uv run pytest -k "pattern"`. Start required backing services through `docker-compose.test.yml` when running integration suites locally.
|
|
19
|
+
|
|
20
|
+
## Commit & Pull Request Guidelines
|
|
21
|
+
Commits use an imperative, present-tense summary (for example, `add async cache invalidation`) optionally suffixed with the PR number as seen in history (`(#927)`). Squash fixups and keep unrelated changes isolated. Pull requests should include: a concise description, linked tracking issue, notes about schema or API impacts, and screenshots or logs when behavior changes. Confirm `make lint` and `make test` pass locally, and update docs or examples when public interfaces shift.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: graphiti-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.30.0rc1
|
|
4
4
|
Summary: A temporal graph building library
|
|
5
5
|
Project-URL: Homepage, https://help.getzep.com/graphiti/graphiti/overview
|
|
6
6
|
Project-URL: Repository, https://github.com/getzep/graphiti
|
|
@@ -43,8 +43,14 @@ from graphiti_core.models.nodes.node_db_queries import (
|
|
|
43
43
|
get_entity_node_save_bulk_query,
|
|
44
44
|
get_episode_node_save_bulk_query,
|
|
45
45
|
)
|
|
46
|
-
from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode
|
|
46
|
+
from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode
|
|
47
47
|
from graphiti_core.utils.datetime_utils import convert_datetimes_to_strings
|
|
48
|
+
from graphiti_core.utils.maintenance.dedup_helpers import (
|
|
49
|
+
DedupResolutionState,
|
|
50
|
+
_build_candidate_indexes,
|
|
51
|
+
_normalize_string_exact,
|
|
52
|
+
_resolve_with_similarity,
|
|
53
|
+
)
|
|
48
54
|
from graphiti_core.utils.maintenance.edge_operations import (
|
|
49
55
|
extract_edges,
|
|
50
56
|
resolve_extracted_edge,
|
|
@@ -63,6 +69,38 @@ logger = logging.getLogger(__name__)
|
|
|
63
69
|
CHUNK_SIZE = 10
|
|
64
70
|
|
|
65
71
|
|
|
72
|
+
def _build_directed_uuid_map(pairs: list[tuple[str, str]]) -> dict[str, str]:
|
|
73
|
+
"""Collapse alias -> canonical chains while preserving direction.
|
|
74
|
+
|
|
75
|
+
The incoming pairs represent directed mappings discovered during node dedupe. We use a simple
|
|
76
|
+
union-find with iterative path compression to ensure every source UUID resolves to its ultimate
|
|
77
|
+
canonical target, even if aliases appear lexicographically smaller than the canonical UUID.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
parent: dict[str, str] = {}
|
|
81
|
+
|
|
82
|
+
def find(uuid: str) -> str:
|
|
83
|
+
"""Directed union-find lookup using iterative path compression."""
|
|
84
|
+
parent.setdefault(uuid, uuid)
|
|
85
|
+
root = uuid
|
|
86
|
+
while parent[root] != root:
|
|
87
|
+
root = parent[root]
|
|
88
|
+
|
|
89
|
+
while parent[uuid] != root:
|
|
90
|
+
next_uuid = parent[uuid]
|
|
91
|
+
parent[uuid] = root
|
|
92
|
+
uuid = next_uuid
|
|
93
|
+
|
|
94
|
+
return root
|
|
95
|
+
|
|
96
|
+
for source_uuid, target_uuid in pairs:
|
|
97
|
+
parent.setdefault(source_uuid, source_uuid)
|
|
98
|
+
parent.setdefault(target_uuid, target_uuid)
|
|
99
|
+
parent[find(source_uuid)] = find(target_uuid)
|
|
100
|
+
|
|
101
|
+
return {uuid: find(uuid) for uuid in parent}
|
|
102
|
+
|
|
103
|
+
|
|
66
104
|
class RawEpisode(BaseModel):
|
|
67
105
|
name: str
|
|
68
106
|
uuid: str | None = Field(default=None)
|
|
@@ -266,83 +304,111 @@ async def dedupe_nodes_bulk(
|
|
|
266
304
|
episode_tuples: list[tuple[EpisodicNode, list[EpisodicNode]]],
|
|
267
305
|
entity_types: dict[str, type[BaseModel]] | None = None,
|
|
268
306
|
) -> tuple[dict[str, list[EntityNode]], dict[str, str]]:
|
|
269
|
-
|
|
270
|
-
min_score = 0.8
|
|
271
|
-
|
|
272
|
-
# generate embeddings
|
|
273
|
-
await semaphore_gather(
|
|
274
|
-
*[create_entity_node_embeddings(embedder, nodes) for nodes in extracted_nodes]
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
# Find similar results
|
|
278
|
-
dedupe_tuples: list[tuple[list[EntityNode], list[EntityNode]]] = []
|
|
279
|
-
for i, nodes_i in enumerate(extracted_nodes):
|
|
280
|
-
existing_nodes: list[EntityNode] = []
|
|
281
|
-
for j, nodes_j in enumerate(extracted_nodes):
|
|
282
|
-
if i == j:
|
|
283
|
-
continue
|
|
284
|
-
existing_nodes += nodes_j
|
|
285
|
-
|
|
286
|
-
candidates_i: list[EntityNode] = []
|
|
287
|
-
for node in nodes_i:
|
|
288
|
-
for existing_node in existing_nodes:
|
|
289
|
-
# Approximate BM25 by checking for word overlaps (this is faster than creating many in-memory indices)
|
|
290
|
-
# This approach will cast a wider net than BM25, which is ideal for this use case
|
|
291
|
-
node_words = set(node.name.lower().split())
|
|
292
|
-
existing_node_words = set(existing_node.name.lower().split())
|
|
293
|
-
has_overlap = not node_words.isdisjoint(existing_node_words)
|
|
294
|
-
if has_overlap:
|
|
295
|
-
candidates_i.append(existing_node)
|
|
296
|
-
continue
|
|
307
|
+
"""Resolve entity duplicates across an in-memory batch using a two-pass strategy.
|
|
297
308
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
candidates_i.append(existing_node)
|
|
305
|
-
|
|
306
|
-
dedupe_tuples.append((nodes_i, candidates_i))
|
|
309
|
+
1. Run :func:`resolve_extracted_nodes` for every episode in parallel so each batch item is
|
|
310
|
+
reconciled against the live graph just like the non-batch flow.
|
|
311
|
+
2. Re-run the deterministic similarity heuristics across the union of resolved nodes to catch
|
|
312
|
+
duplicates that only co-occur inside this batch, emitting a canonical UUID map that callers
|
|
313
|
+
can apply to edges and persistence.
|
|
314
|
+
"""
|
|
307
315
|
|
|
308
|
-
|
|
309
|
-
bulk_node_resolutions: list[
|
|
310
|
-
tuple[list[EntityNode], dict[str, str], list[tuple[EntityNode, EntityNode]]]
|
|
311
|
-
] = await semaphore_gather(
|
|
316
|
+
first_pass_results = await semaphore_gather(
|
|
312
317
|
*[
|
|
313
318
|
resolve_extracted_nodes(
|
|
314
319
|
clients,
|
|
315
|
-
|
|
320
|
+
nodes,
|
|
316
321
|
episode_tuples[i][0],
|
|
317
322
|
episode_tuples[i][1],
|
|
318
323
|
entity_types,
|
|
319
|
-
existing_nodes_override=dedupe_tuples[i][1],
|
|
320
324
|
)
|
|
321
|
-
for i,
|
|
325
|
+
for i, nodes in enumerate(extracted_nodes)
|
|
322
326
|
]
|
|
323
327
|
)
|
|
324
328
|
|
|
325
|
-
|
|
329
|
+
episode_resolutions: list[tuple[str, list[EntityNode]]] = []
|
|
330
|
+
per_episode_uuid_maps: list[dict[str, str]] = []
|
|
326
331
|
duplicate_pairs: list[tuple[str, str]] = []
|
|
327
|
-
for _, _, duplicates in bulk_node_resolutions:
|
|
328
|
-
for duplicate in duplicates:
|
|
329
|
-
n, m = duplicate
|
|
330
|
-
duplicate_pairs.append((n.uuid, m.uuid))
|
|
331
332
|
|
|
332
|
-
|
|
333
|
-
|
|
333
|
+
for (resolved_nodes, uuid_map, duplicates), (episode, _) in zip(
|
|
334
|
+
first_pass_results, episode_tuples, strict=True
|
|
335
|
+
):
|
|
336
|
+
episode_resolutions.append((episode.uuid, resolved_nodes))
|
|
337
|
+
per_episode_uuid_maps.append(uuid_map)
|
|
338
|
+
duplicate_pairs.extend((source.uuid, target.uuid) for source, target in duplicates)
|
|
339
|
+
|
|
340
|
+
canonical_nodes: dict[str, EntityNode] = {}
|
|
341
|
+
for _, resolved_nodes in episode_resolutions:
|
|
342
|
+
for node in resolved_nodes:
|
|
343
|
+
# NOTE: this loop is O(n^2) in the number of nodes inside the batch because we rebuild
|
|
344
|
+
# the MinHash index for the accumulated canonical pool each time. The LRU-backed
|
|
345
|
+
# shingle cache keeps the constant factors low for typical batch sizes (≤ CHUNK_SIZE),
|
|
346
|
+
# but if batches grow significantly we should switch to an incremental index or chunked
|
|
347
|
+
# processing.
|
|
348
|
+
if not canonical_nodes:
|
|
349
|
+
canonical_nodes[node.uuid] = node
|
|
350
|
+
continue
|
|
334
351
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
352
|
+
existing_candidates = list(canonical_nodes.values())
|
|
353
|
+
normalized = _normalize_string_exact(node.name)
|
|
354
|
+
exact_match = next(
|
|
355
|
+
(
|
|
356
|
+
candidate
|
|
357
|
+
for candidate in existing_candidates
|
|
358
|
+
if _normalize_string_exact(candidate.name) == normalized
|
|
359
|
+
),
|
|
360
|
+
None,
|
|
361
|
+
)
|
|
362
|
+
if exact_match is not None:
|
|
363
|
+
if exact_match.uuid != node.uuid:
|
|
364
|
+
duplicate_pairs.append((node.uuid, exact_match.uuid))
|
|
365
|
+
continue
|
|
366
|
+
|
|
367
|
+
indexes = _build_candidate_indexes(existing_candidates)
|
|
368
|
+
state = DedupResolutionState(
|
|
369
|
+
resolved_nodes=[None],
|
|
370
|
+
uuid_map={},
|
|
371
|
+
unresolved_indices=[],
|
|
372
|
+
)
|
|
373
|
+
_resolve_with_similarity([node], indexes, state)
|
|
374
|
+
|
|
375
|
+
resolved = state.resolved_nodes[0]
|
|
376
|
+
if resolved is None:
|
|
377
|
+
canonical_nodes[node.uuid] = node
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
canonical_uuid = resolved.uuid
|
|
381
|
+
canonical_nodes.setdefault(canonical_uuid, resolved)
|
|
382
|
+
if canonical_uuid != node.uuid:
|
|
383
|
+
duplicate_pairs.append((node.uuid, canonical_uuid))
|
|
384
|
+
|
|
385
|
+
union_pairs: list[tuple[str, str]] = []
|
|
386
|
+
for uuid_map in per_episode_uuid_maps:
|
|
387
|
+
union_pairs.extend(uuid_map.items())
|
|
388
|
+
union_pairs.extend(duplicate_pairs)
|
|
389
|
+
|
|
390
|
+
compressed_map: dict[str, str] = _build_directed_uuid_map(union_pairs)
|
|
338
391
|
|
|
339
392
|
nodes_by_episode: dict[str, list[EntityNode]] = {}
|
|
340
|
-
for
|
|
341
|
-
|
|
393
|
+
for episode_uuid, resolved_nodes in episode_resolutions:
|
|
394
|
+
deduped_nodes: list[EntityNode] = []
|
|
395
|
+
seen: set[str] = set()
|
|
396
|
+
for node in resolved_nodes:
|
|
397
|
+
canonical_uuid = compressed_map.get(node.uuid, node.uuid)
|
|
398
|
+
if canonical_uuid in seen:
|
|
399
|
+
continue
|
|
400
|
+
seen.add(canonical_uuid)
|
|
401
|
+
canonical_node = canonical_nodes.get(canonical_uuid)
|
|
402
|
+
if canonical_node is None:
|
|
403
|
+
logger.error(
|
|
404
|
+
'Canonical node %s missing during batch dedupe; falling back to %s',
|
|
405
|
+
canonical_uuid,
|
|
406
|
+
node.uuid,
|
|
407
|
+
)
|
|
408
|
+
canonical_node = node
|
|
409
|
+
deduped_nodes.append(canonical_node)
|
|
342
410
|
|
|
343
|
-
nodes_by_episode[
|
|
344
|
-
node_uuid_map[compressed_map.get(node.uuid, node.uuid)] for node in nodes
|
|
345
|
-
]
|
|
411
|
+
nodes_by_episode[episode_uuid] = deduped_nodes
|
|
346
412
|
|
|
347
413
|
return nodes_by_episode, compressed_map
|
|
348
414
|
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024, Zep Software, Inc.
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import math
|
|
20
|
+
import re
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
from collections.abc import Iterable
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from functools import lru_cache
|
|
25
|
+
from hashlib import blake2b
|
|
26
|
+
from typing import TYPE_CHECKING
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from graphiti_core.nodes import EntityNode
|
|
30
|
+
|
|
31
|
+
_NAME_ENTROPY_THRESHOLD = 1.5
|
|
32
|
+
_MIN_NAME_LENGTH = 6
|
|
33
|
+
_MIN_TOKEN_COUNT = 2
|
|
34
|
+
_FUZZY_JACCARD_THRESHOLD = 0.9
|
|
35
|
+
_MINHASH_PERMUTATIONS = 32
|
|
36
|
+
_MINHASH_BAND_SIZE = 4
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _normalize_string_exact(name: str) -> str:
|
|
40
|
+
"""Lowercase text and collapse whitespace so equal names map to the same key."""
|
|
41
|
+
normalized = re.sub(r'[\s]+', ' ', name.lower())
|
|
42
|
+
return normalized.strip()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _normalize_name_for_fuzzy(name: str) -> str:
|
|
46
|
+
"""Produce a fuzzier form that keeps alphanumerics and apostrophes for n-gram shingles."""
|
|
47
|
+
normalized = re.sub(r"[^a-z0-9' ]", ' ', _normalize_string_exact(name))
|
|
48
|
+
normalized = normalized.strip()
|
|
49
|
+
return re.sub(r'[\s]+', ' ', normalized)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _name_entropy(normalized_name: str) -> float:
|
|
53
|
+
"""Approximate text specificity using Shannon entropy over characters.
|
|
54
|
+
|
|
55
|
+
We strip spaces, count how often each character appears, and sum
|
|
56
|
+
probability * -log2(probability). Short or repetitive names yield low
|
|
57
|
+
entropy, which signals we should defer resolution to the LLM instead of
|
|
58
|
+
trusting fuzzy similarity.
|
|
59
|
+
"""
|
|
60
|
+
if not normalized_name:
|
|
61
|
+
return 0.0
|
|
62
|
+
|
|
63
|
+
counts: dict[str, int] = {}
|
|
64
|
+
for char in normalized_name.replace(' ', ''):
|
|
65
|
+
counts[char] = counts.get(char, 0) + 1
|
|
66
|
+
|
|
67
|
+
total = sum(counts.values())
|
|
68
|
+
if total == 0:
|
|
69
|
+
return 0.0
|
|
70
|
+
|
|
71
|
+
entropy = 0.0
|
|
72
|
+
for count in counts.values():
|
|
73
|
+
probability = count / total
|
|
74
|
+
entropy -= probability * math.log2(probability)
|
|
75
|
+
|
|
76
|
+
return entropy
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _has_high_entropy(normalized_name: str) -> bool:
|
|
80
|
+
"""Filter out very short or low-entropy names that are unreliable for fuzzy matching."""
|
|
81
|
+
token_count = len(normalized_name.split())
|
|
82
|
+
if len(normalized_name) < _MIN_NAME_LENGTH and token_count < _MIN_TOKEN_COUNT:
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
return _name_entropy(normalized_name) >= _NAME_ENTROPY_THRESHOLD
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _shingles(normalized_name: str) -> set[str]:
|
|
89
|
+
"""Create 3-gram shingles from the normalized name for MinHash calculations."""
|
|
90
|
+
cleaned = normalized_name.replace(' ', '')
|
|
91
|
+
if len(cleaned) < 2:
|
|
92
|
+
return {cleaned} if cleaned else set()
|
|
93
|
+
|
|
94
|
+
return {cleaned[i : i + 3] for i in range(len(cleaned) - 2)}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _hash_shingle(shingle: str, seed: int) -> int:
|
|
98
|
+
"""Generate a deterministic 64-bit hash for a shingle given the permutation seed."""
|
|
99
|
+
digest = blake2b(f'{seed}:{shingle}'.encode(), digest_size=8)
|
|
100
|
+
return int.from_bytes(digest.digest(), 'big')
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _minhash_signature(shingles: Iterable[str]) -> tuple[int, ...]:
|
|
104
|
+
"""Compute the MinHash signature for the shingle set across predefined permutations."""
|
|
105
|
+
if not shingles:
|
|
106
|
+
return tuple()
|
|
107
|
+
|
|
108
|
+
seeds = range(_MINHASH_PERMUTATIONS)
|
|
109
|
+
signature: list[int] = []
|
|
110
|
+
for seed in seeds:
|
|
111
|
+
min_hash = min(_hash_shingle(shingle, seed) for shingle in shingles)
|
|
112
|
+
signature.append(min_hash)
|
|
113
|
+
|
|
114
|
+
return tuple(signature)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _lsh_bands(signature: Iterable[int]) -> list[tuple[int, ...]]:
|
|
118
|
+
"""Split the MinHash signature into fixed-size bands for locality-sensitive hashing."""
|
|
119
|
+
signature_list = list(signature)
|
|
120
|
+
if not signature_list:
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
bands: list[tuple[int, ...]] = []
|
|
124
|
+
for start in range(0, len(signature_list), _MINHASH_BAND_SIZE):
|
|
125
|
+
band = tuple(signature_list[start : start + _MINHASH_BAND_SIZE])
|
|
126
|
+
if len(band) == _MINHASH_BAND_SIZE:
|
|
127
|
+
bands.append(band)
|
|
128
|
+
return bands
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _jaccard_similarity(a: set[str], b: set[str]) -> float:
|
|
132
|
+
"""Return the Jaccard similarity between two shingle sets, handling empty edge cases."""
|
|
133
|
+
if not a and not b:
|
|
134
|
+
return 1.0
|
|
135
|
+
if not a or not b:
|
|
136
|
+
return 0.0
|
|
137
|
+
|
|
138
|
+
intersection = len(a.intersection(b))
|
|
139
|
+
union = len(a.union(b))
|
|
140
|
+
return intersection / union if union else 0.0
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@lru_cache(maxsize=512)
|
|
144
|
+
def _cached_shingles(name: str) -> set[str]:
|
|
145
|
+
"""Cache shingle sets per normalized name to avoid recomputation within a worker."""
|
|
146
|
+
return _shingles(name)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class DedupCandidateIndexes:
|
|
151
|
+
"""Precomputed lookup structures that drive entity deduplication heuristics."""
|
|
152
|
+
|
|
153
|
+
existing_nodes: list[EntityNode]
|
|
154
|
+
nodes_by_uuid: dict[str, EntityNode]
|
|
155
|
+
normalized_existing: defaultdict[str, list[EntityNode]]
|
|
156
|
+
shingles_by_candidate: dict[str, set[str]]
|
|
157
|
+
lsh_buckets: defaultdict[tuple[int, tuple[int, ...]], list[str]]
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@dataclass
|
|
161
|
+
class DedupResolutionState:
|
|
162
|
+
"""Mutable resolution bookkeeping shared across deterministic and LLM passes."""
|
|
163
|
+
|
|
164
|
+
resolved_nodes: list[EntityNode | None]
|
|
165
|
+
uuid_map: dict[str, str]
|
|
166
|
+
unresolved_indices: list[int]
|
|
167
|
+
duplicate_pairs: list[tuple[EntityNode, EntityNode]] = field(default_factory=list)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _build_candidate_indexes(existing_nodes: list[EntityNode]) -> DedupCandidateIndexes:
|
|
171
|
+
"""Precompute exact and fuzzy lookup structures once per dedupe run."""
|
|
172
|
+
normalized_existing: defaultdict[str, list[EntityNode]] = defaultdict(list)
|
|
173
|
+
nodes_by_uuid: dict[str, EntityNode] = {}
|
|
174
|
+
shingles_by_candidate: dict[str, set[str]] = {}
|
|
175
|
+
lsh_buckets: defaultdict[tuple[int, tuple[int, ...]], list[str]] = defaultdict(list)
|
|
176
|
+
|
|
177
|
+
for candidate in existing_nodes:
|
|
178
|
+
normalized = _normalize_string_exact(candidate.name)
|
|
179
|
+
normalized_existing[normalized].append(candidate)
|
|
180
|
+
nodes_by_uuid[candidate.uuid] = candidate
|
|
181
|
+
|
|
182
|
+
shingles = _cached_shingles(_normalize_name_for_fuzzy(candidate.name))
|
|
183
|
+
shingles_by_candidate[candidate.uuid] = shingles
|
|
184
|
+
|
|
185
|
+
signature = _minhash_signature(shingles)
|
|
186
|
+
for band_index, band in enumerate(_lsh_bands(signature)):
|
|
187
|
+
lsh_buckets[(band_index, band)].append(candidate.uuid)
|
|
188
|
+
|
|
189
|
+
return DedupCandidateIndexes(
|
|
190
|
+
existing_nodes=existing_nodes,
|
|
191
|
+
nodes_by_uuid=nodes_by_uuid,
|
|
192
|
+
normalized_existing=normalized_existing,
|
|
193
|
+
shingles_by_candidate=shingles_by_candidate,
|
|
194
|
+
lsh_buckets=lsh_buckets,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _resolve_with_similarity(
|
|
199
|
+
extracted_nodes: list[EntityNode],
|
|
200
|
+
indexes: DedupCandidateIndexes,
|
|
201
|
+
state: DedupResolutionState,
|
|
202
|
+
) -> None:
|
|
203
|
+
"""Attempt deterministic resolution using exact name hits and fuzzy MinHash comparisons."""
|
|
204
|
+
for idx, node in enumerate(extracted_nodes):
|
|
205
|
+
normalized_exact = _normalize_string_exact(node.name)
|
|
206
|
+
normalized_fuzzy = _normalize_name_for_fuzzy(node.name)
|
|
207
|
+
|
|
208
|
+
if not _has_high_entropy(normalized_fuzzy):
|
|
209
|
+
state.unresolved_indices.append(idx)
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
existing_matches = indexes.normalized_existing.get(normalized_exact, [])
|
|
213
|
+
if len(existing_matches) == 1:
|
|
214
|
+
match = existing_matches[0]
|
|
215
|
+
state.resolved_nodes[idx] = match
|
|
216
|
+
state.uuid_map[node.uuid] = match.uuid
|
|
217
|
+
if match.uuid != node.uuid:
|
|
218
|
+
state.duplicate_pairs.append((node, match))
|
|
219
|
+
continue
|
|
220
|
+
if len(existing_matches) > 1:
|
|
221
|
+
state.unresolved_indices.append(idx)
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
shingles = _cached_shingles(normalized_fuzzy)
|
|
225
|
+
signature = _minhash_signature(shingles)
|
|
226
|
+
candidate_ids: set[str] = set()
|
|
227
|
+
for band_index, band in enumerate(_lsh_bands(signature)):
|
|
228
|
+
candidate_ids.update(indexes.lsh_buckets.get((band_index, band), []))
|
|
229
|
+
|
|
230
|
+
best_candidate: EntityNode | None = None
|
|
231
|
+
best_score = 0.0
|
|
232
|
+
for candidate_id in candidate_ids:
|
|
233
|
+
candidate_shingles = indexes.shingles_by_candidate.get(candidate_id, set())
|
|
234
|
+
score = _jaccard_similarity(shingles, candidate_shingles)
|
|
235
|
+
if score > best_score:
|
|
236
|
+
best_score = score
|
|
237
|
+
best_candidate = indexes.nodes_by_uuid.get(candidate_id)
|
|
238
|
+
|
|
239
|
+
if best_candidate is not None and best_score >= _FUZZY_JACCARD_THRESHOLD:
|
|
240
|
+
state.resolved_nodes[idx] = best_candidate
|
|
241
|
+
state.uuid_map[node.uuid] = best_candidate.uuid
|
|
242
|
+
if best_candidate.uuid != node.uuid:
|
|
243
|
+
state.duplicate_pairs.append((node, best_candidate))
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
state.unresolved_indices.append(idx)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
__all__ = [
|
|
250
|
+
'DedupCandidateIndexes',
|
|
251
|
+
'DedupResolutionState',
|
|
252
|
+
'_normalize_string_exact',
|
|
253
|
+
'_normalize_name_for_fuzzy',
|
|
254
|
+
'_has_high_entropy',
|
|
255
|
+
'_minhash_signature',
|
|
256
|
+
'_lsh_bands',
|
|
257
|
+
'_jaccard_similarity',
|
|
258
|
+
'_cached_shingles',
|
|
259
|
+
'_FUZZY_JACCARD_THRESHOLD',
|
|
260
|
+
'_build_candidate_indexes',
|
|
261
|
+
'_resolve_with_similarity',
|
|
262
|
+
]
|
|
@@ -41,6 +41,7 @@ from graphiti_core.search.search_config import SearchResults
|
|
|
41
41
|
from graphiti_core.search.search_config_recipes import EDGE_HYBRID_SEARCH_RRF
|
|
42
42
|
from graphiti_core.search.search_filters import SearchFilters
|
|
43
43
|
from graphiti_core.utils.datetime_utils import ensure_utc, utc_now
|
|
44
|
+
from graphiti_core.utils.maintenance.dedup_helpers import _normalize_string_exact
|
|
44
45
|
|
|
45
46
|
logger = logging.getLogger(__name__)
|
|
46
47
|
|
|
@@ -397,6 +398,19 @@ async def resolve_extracted_edge(
|
|
|
397
398
|
if len(related_edges) == 0 and len(existing_edges) == 0:
|
|
398
399
|
return extracted_edge, [], []
|
|
399
400
|
|
|
401
|
+
# Fast path: if the fact text and endpoints already exist verbatim, reuse the matching edge.
|
|
402
|
+
normalized_fact = _normalize_string_exact(extracted_edge.fact)
|
|
403
|
+
for edge in related_edges:
|
|
404
|
+
if (
|
|
405
|
+
edge.source_node_uuid == extracted_edge.source_node_uuid
|
|
406
|
+
and edge.target_node_uuid == extracted_edge.target_node_uuid
|
|
407
|
+
and _normalize_string_exact(edge.fact) == normalized_fact
|
|
408
|
+
):
|
|
409
|
+
resolved = edge
|
|
410
|
+
if episode is not None and episode.uuid not in resolved.episodes:
|
|
411
|
+
resolved.episodes.append(episode.uuid)
|
|
412
|
+
return resolved, [], []
|
|
413
|
+
|
|
400
414
|
start = time()
|
|
401
415
|
|
|
402
416
|
# Prepare context for LLM
|