PyPI - graphiti-core - Versions diffs - 0.9.6__tar.gz → 0.10.0__tar.gz - Mend

graphiti-core 0.9.6tar.gz → 0.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (65) hide show

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: graphiti-core
-Version: 0.9.6
+Version: 0.10.0
 Summary: A temporal graph building library
 License: Apache-2.0
 Author: Paul Paliychuk
@@ -126,6 +126,11 @@ Requirements:
 - Neo4j 5.26 or higher (serves as the embeddings storage backend)
 - OpenAI API key (for LLM inference and embedding)
+> [!IMPORTANT]
+> Graphiti works best with LLM services that support Structured Output (such as OpenAI and Gemini).
+> Using other services may result in incorrect output schemas and ingestion failures. This is particularly
+> problematic when using smaller models.
 Optional:
 - Google Gemini, Anthropic, or Groq API key (for alternative LLM providers)

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/README.md RENAMED Viewed

@@ -94,6 +94,11 @@ Requirements:
 - Neo4j 5.26 or higher (serves as the embeddings storage backend)
 - OpenAI API key (for LLM inference and embedding)
+> [!IMPORTANT]
+> Graphiti works best with LLM services that support Structured Output (such as OpenAI and Gemini).
+> Using other services may result in incorrect output schemas and ingestion failures. This is particularly
+> problematic when using smaller models.
 Optional:
 - Google Gemini, Anthropic, or Groq API key (for alternative LLM providers)

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/cross_encoder/openai_reranker_client.py RENAMED Viewed

@@ -17,9 +17,9 @@ limitations under the License.
 import logging
 from typing import Any
+import numpy as np
 import openai
 from openai import AsyncAzureOpenAI, AsyncOpenAI
-from pydantic import BaseModel
 from ..helpers import semaphore_gather
 from ..llm_client import LLMConfig, RateLimitError
@@ -28,11 +28,7 @@ from .client import CrossEncoderClient
 logger = logging.getLogger(__name__)
-DEFAULT_MODEL = 'gpt-4o-mini'
-class BooleanClassifier(BaseModel):
-    isTrue: bool
+DEFAULT_MODEL = 'gpt-4.1-nano'
 class OpenAIRerankerClient(CrossEncoderClient):
@@ -107,11 +103,15 @@ class OpenAIRerankerClient(CrossEncoderClient):
             ]
             scores: list[float] = []
             for top_logprobs in responses_top_logprobs:
-                for logprob in top_logprobs:
-                    if bool(logprob.token):
-                        scores.append(logprob.logprob)
-            results = [(passage, score) for passage, score in zip(passages, scores, strict=False)]
+                if len(top_logprobs) == 0:
+                    continue
+                norm_logprobs = np.exp(top_logprobs[0].logprob)
+                if bool(top_logprobs[0].token):
+                    scores.append(norm_logprobs)
+                else:
+                    scores.append(1 - norm_logprobs)
+            results = [(passage, score) for passage, score in zip(passages, scores, strict=True)]
             results.sort(reverse=True, key=lambda x: x[1])
             return results
         except openai.RateLimitError as e:

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/graphiti.py RENAMED Viewed

@@ -750,7 +750,7 @@ class Graphiti:
         nodes = await get_mentioned_nodes(self.driver, episodes)
-        return SearchResults(edges=edges, nodes=nodes, communities=[])
+        return SearchResults(edges=edges, nodes=nodes, episodes=[], communities=[])
     async def add_triplet(self, source_node: EntityNode, edge: EntityEdge, target_node: EntityNode):
         if source_node.name_embedding is None:

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/llm_client/client.py RENAMED Viewed

@@ -32,6 +32,10 @@ from .errors import RateLimitError
 DEFAULT_TEMPERATURE = 0
 DEFAULT_CACHE_DIR = './llm_cache'
+MULTILINGUAL_EXTRACTION_RESPONSES = (
+    '\n\nAny extracted information should be returned in the same language as it was written in.'
+)
 logger = logging.getLogger(__name__)
@@ -133,6 +137,9 @@ class LLMClient(ABC):
                 f'\n\nRespond with a JSON object in the following format:\n\n{serialized_model}'
             )
+        # Add multilingual extraction instructions
+        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
         if self.cache_enabled and self.cache_dir is not None:
             cache_key = self._get_cache_key(messages)

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/llm_client/config.py RENAMED Viewed

@@ -43,7 +43,7 @@ class LLMConfig:
                                                 This is required for making authorized requests.
                 model (str, optional): The specific LLM model to use for generating responses.
-                                                                Defaults to "gpt-4o-mini", which appears to be a custom model name.
+                                                                Defaults to "gpt-4.1-mini", which appears to be a custom model name.
                                                                 Common values might include "gpt-3.5-turbo" or "gpt-4".
                 base_url (str, optional): The base URL of the LLM API service.

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/llm_client/openai_client.py RENAMED Viewed

@@ -24,13 +24,13 @@ from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
 from ..prompts.models import Message
-from .client import LLMClient
+from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
 from .config import DEFAULT_MAX_TOKENS, LLMConfig
 from .errors import RateLimitError, RefusalError
 logger = logging.getLogger(__name__)
-DEFAULT_MODEL = 'gpt-4o-mini'
+DEFAULT_MODEL = 'gpt-4.1-mini'
 class OpenAIClient(LLMClient):
@@ -136,6 +136,9 @@ class OpenAIClient(LLMClient):
         retry_count = 0
         last_error = None
+        # Add multilingual extraction instructions
+        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
         while retry_count <= self.MAX_RETRIES:
             try:
                 response = await self._generate_response(messages, response_model, max_tokens)

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/llm_client/openai_generic_client.py RENAMED Viewed

@@ -25,13 +25,13 @@ from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
 from ..prompts.models import Message
-from .client import LLMClient
+from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
 from .config import DEFAULT_MAX_TOKENS, LLMConfig
 from .errors import RateLimitError, RefusalError
 logger = logging.getLogger(__name__)
-DEFAULT_MODEL = 'gpt-4o-mini'
+DEFAULT_MODEL = 'gpt-4.1-mini'
 class OpenAIGenericClient(LLMClient):
@@ -130,6 +130,9 @@ class OpenAIGenericClient(LLMClient):
                 f'\n\nRespond with a JSON object in the following format:\n\n{serialized_model}'
             )
+        # Add multilingual extraction instructions
+        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
         while retry_count <= self.MAX_RETRIES:
             try:
                 response = await self._generate_response(

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/nodes.py RENAMED Viewed

@@ -251,6 +251,31 @@ class EpisodicNode(Node):
         return episodes
+    @classmethod
+    async def get_by_entity_node_uuid(cls, driver: AsyncDriver, entity_node_uuid: str):
+        records, _, _ = await driver.execute_query(
+            """
+        MATCH (e:Episodic)-[r:MENTIONS]->(n:Entity {uuid: $entity_node_uuid})
+            RETURN DISTINCT
+            e.content AS content,
+            e.created_at AS created_at,
+            e.valid_at AS valid_at,
+            e.uuid AS uuid,
+            e.name AS name,
+            e.group_id AS group_id,
+            e.source_description AS source_description,
+            e.source AS source,
+            e.entity_edges AS entity_edges
+        """,
+            entity_node_uuid=entity_node_uuid,
+            database_=DEFAULT_DATABASE,
+            routing_='r',
+        )
+        episodes = [get_episodic_node_from_record(record) for record in records]
+        return episodes
 class EntityNode(Node):
     name_embedding: list[float] | None = Field(default=None, description='embedding of the name')

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/prompts/eval.py RENAMED Viewed

@@ -37,16 +37,28 @@ class EvalResponse(BaseModel):
     )
+class EvalAddEpisodeResults(BaseModel):
+    candidate_is_worse: bool = Field(
+        ...,
+        description='boolean if the baseline extraction is higher quality than the candidate extraction.',
+    )
+    reasoning: str = Field(
+        ..., description='why you determined the response was correct or incorrect'
+    )
 class Prompt(Protocol):
     qa_prompt: PromptVersion
     eval_prompt: PromptVersion
     query_expansion: PromptVersion
+    eval_add_episode_results: PromptVersion
 class Versions(TypedDict):
     qa_prompt: PromptFunction
     eval_prompt: PromptFunction
     query_expansion: PromptFunction
+    eval_add_episode_results: PromptFunction
 def query_expansion(context: dict[str, Any]) -> list[Message]:
@@ -112,8 +124,41 @@ def eval_prompt(context: dict[str, Any]) -> list[Message]:
     ]
+def eval_add_episode_results(context: dict[str, Any]) -> list[Message]:
+    sys_prompt = """You are a judge that determines whether a baseline graph building result from a list of messages is better
+        than a candidate graph building result based on the same messages."""
+    user_prompt = f"""
+    Given the following PREVIOUS MESSAGES and MESSAGE, determine if the BASELINE graph data extracted from the
+    conversation is higher quality than the CANDIDATE graph data extracted from the conversation.
+    Return False if the BASELINE extraction is better, and True otherwise. If the CANDIDATE extraction and
+    BASELINE extraction are nearly identical in quality, return True. Add your reasoning for your decision to the reasoning field
+    <PREVIOUS MESSAGES>
+    {context['previous_messages']}
+    </PREVIOUS MESSAGES>
+    <MESSAGE>
+    {context['message']}
+    </MESSAGE>
+    <BASELINE>
+    {context['baseline']}
+    </BASELINE>
+    <CANDIDATE>
+    {context['candidate']}
+    </CANDIDATE>
+    """
+    return [
+        Message(role='system', content=sys_prompt),
+        Message(role='user', content=user_prompt),
+    ]
 versions: Versions = {
     'qa_prompt': qa_prompt,
     'eval_prompt': eval_prompt,
     'query_expansion': query_expansion,
+    'eval_add_episode_results': eval_add_episode_results,
 }

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/search/search.py RENAMED Viewed

@@ -25,7 +25,7 @@ from graphiti_core.edges import EntityEdge
 from graphiti_core.embedder import EmbedderClient
 from graphiti_core.errors import SearchRerankerError
 from graphiti_core.helpers import semaphore_gather
-from graphiti_core.nodes import CommunityNode, EntityNode
+from graphiti_core.nodes import CommunityNode, EntityNode, EpisodicNode
 from graphiti_core.search.search_config import (
     DEFAULT_SEARCH_LIMIT,
     CommunityReranker,
@@ -33,6 +33,8 @@ from graphiti_core.search.search_config import (
     EdgeReranker,
     EdgeSearchConfig,
     EdgeSearchMethod,
+    EpisodeReranker,
+    EpisodeSearchConfig,
     NodeReranker,
     NodeSearchConfig,
     NodeSearchMethod,
@@ -46,6 +48,7 @@ from graphiti_core.search.search_utils import (
     edge_bfs_search,
     edge_fulltext_search,
     edge_similarity_search,
+    episode_fulltext_search,
     episode_mentions_reranker,
     maximal_marginal_relevance,
     node_bfs_search,
@@ -74,13 +77,14 @@ async def search(
         return SearchResults(
             edges=[],
             nodes=[],
+            episodes=[],
             communities=[],
         )
     query_vector = await embedder.create(input_data=[query.replace('\n', ' ')])
     # if group_ids is empty, set it to None
     group_ids = group_ids if group_ids else None
-    edges, nodes, communities = await semaphore_gather(
+    edges, nodes, episodes, communities = await semaphore_gather(
         edge_search(
             driver,
             cross_encoder,
@@ -92,6 +96,7 @@ async def search(
             center_node_uuid,
             bfs_origin_node_uuids,
             config.limit,
+            config.reranker_min_score,
         ),
         node_search(
             driver,
@@ -104,6 +109,18 @@ async def search(
             center_node_uuid,
             bfs_origin_node_uuids,
             config.limit,
+            config.reranker_min_score,
+        ),
+        episode_search(
+            driver,
+            cross_encoder,
+            query,
+            query_vector,
+            group_ids,
+            config.episode_config,
+            search_filter,
+            config.limit,
+            config.reranker_min_score,
         ),
         community_search(
             driver,
@@ -112,14 +129,15 @@ async def search(
             query_vector,
             group_ids,
             config.community_config,
-            bfs_origin_node_uuids,
             config.limit,
+            config.reranker_min_score,
         ),
     )
     results = SearchResults(
         edges=edges,
         nodes=nodes,
+        episodes=episodes,
         communities=communities,
     )
@@ -141,6 +159,7 @@ async def edge_search(
     center_node_uuid: str | None = None,
     bfs_origin_node_uuids: list[str] | None = None,
     limit=DEFAULT_SEARCH_LIMIT,
+    reranker_min_score: float = 0,
 ) -> list[EntityEdge]:
     if config is None:
         return []
@@ -180,7 +199,7 @@ async def edge_search(
     if config.reranker == EdgeReranker.rrf or config.reranker == EdgeReranker.episode_mentions:
         search_result_uuids = [[edge.uuid for edge in result] for result in search_results]
-        reranked_uuids = rrf(search_result_uuids)
+        reranked_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
     elif config.reranker == EdgeReranker.mmr:
         search_result_uuids_and_vectors = [
             (edge.uuid, edge.fact_embedding if edge.fact_embedding is not None else [0.0] * 1024)
@@ -188,23 +207,31 @@ async def edge_search(
             for edge in result
         ]
         reranked_uuids = maximal_marginal_relevance(
-            query_vector, search_result_uuids_and_vectors, config.mmr_lambda
+            query_vector,
+            search_result_uuids_and_vectors,
+            config.mmr_lambda,
+            min_score=reranker_min_score,
         )
     elif config.reranker == EdgeReranker.cross_encoder:
         search_result_uuids = [[edge.uuid for edge in result] for result in search_results]
-        rrf_result_uuids = rrf(search_result_uuids)
+        rrf_result_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
         rrf_edges = [edge_uuid_map[uuid] for uuid in rrf_result_uuids][:limit]
         fact_to_uuid_map = {edge.fact: edge.uuid for edge in rrf_edges}
         reranked_facts = await cross_encoder.rank(query, list(fact_to_uuid_map.keys()))
-        reranked_uuids = [fact_to_uuid_map[fact] for fact, _ in reranked_facts]
+        reranked_uuids = [
+            fact_to_uuid_map[fact] for fact, score in reranked_facts if score >= reranker_min_score
+        ]
     elif config.reranker == EdgeReranker.node_distance:
         if center_node_uuid is None:
             raise SearchRerankerError('No center node provided for Node Distance reranker')
         # use rrf as a preliminary sort
-        sorted_result_uuids = rrf([[edge.uuid for edge in result] for result in search_results])
+        sorted_result_uuids = rrf(
+            [[edge.uuid for edge in result] for result in search_results],
+            min_score=reranker_min_score,
+        )
         sorted_results = [edge_uuid_map[uuid] for uuid in sorted_result_uuids]
         # node distance reranking
@@ -214,7 +241,9 @@ async def edge_search(
         source_uuids = [source_node_uuid for source_node_uuid in source_to_edge_uuid_map]
-        reranked_node_uuids = await node_distance_reranker(driver, source_uuids, center_node_uuid)
+        reranked_node_uuids = await node_distance_reranker(
+            driver, source_uuids, center_node_uuid, min_score=reranker_min_score
+        )
         for node_uuid in reranked_node_uuids:
             reranked_uuids.extend(source_to_edge_uuid_map[node_uuid])
@@ -238,6 +267,7 @@ async def node_search(
     center_node_uuid: str | None = None,
     bfs_origin_node_uuids: list[str] | None = None,
     limit=DEFAULT_SEARCH_LIMIT,
+    reranker_min_score: float = 0,
 ) -> list[EntityNode]:
     if config is None:
         return []
@@ -269,7 +299,7 @@ async def node_search(
     reranked_uuids: list[str] = []
     if config.reranker == NodeReranker.rrf:
-        reranked_uuids = rrf(search_result_uuids)
+        reranked_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
     elif config.reranker == NodeReranker.mmr:
         search_result_uuids_and_vectors = [
             (node.uuid, node.name_embedding if node.name_embedding is not None else [0.0] * 1024)
@@ -277,24 +307,36 @@ async def node_search(
             for node in result
         ]
         reranked_uuids = maximal_marginal_relevance(
-            query_vector, search_result_uuids_and_vectors, config.mmr_lambda
+            query_vector,
+            search_result_uuids_and_vectors,
+            config.mmr_lambda,
+            min_score=reranker_min_score,
         )
     elif config.reranker == NodeReranker.cross_encoder:
         # use rrf as a preliminary reranker
-        rrf_result_uuids = rrf(search_result_uuids)
+        rrf_result_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
         rrf_results = [node_uuid_map[uuid] for uuid in rrf_result_uuids][:limit]
         summary_to_uuid_map = {node.summary: node.uuid for node in rrf_results}
         reranked_summaries = await cross_encoder.rank(query, list(summary_to_uuid_map.keys()))
-        reranked_uuids = [summary_to_uuid_map[fact] for fact, _ in reranked_summaries]
+        reranked_uuids = [
+            summary_to_uuid_map[fact]
+            for fact, score in reranked_summaries
+            if score >= reranker_min_score
+        ]
     elif config.reranker == NodeReranker.episode_mentions:
-        reranked_uuids = await episode_mentions_reranker(driver, search_result_uuids)
+        reranked_uuids = await episode_mentions_reranker(
+            driver, search_result_uuids, min_score=reranker_min_score
+        )
     elif config.reranker == NodeReranker.node_distance:
         if center_node_uuid is None:
             raise SearchRerankerError('No center node provided for Node Distance reranker')
         reranked_uuids = await node_distance_reranker(
-            driver, rrf(search_result_uuids), center_node_uuid
+            driver,
+            rrf(search_result_uuids, min_score=reranker_min_score),
+            center_node_uuid,
+            min_score=reranker_min_score,
         )
     reranked_nodes = [node_uuid_map[uuid] for uuid in reranked_uuids]
@@ -302,6 +344,54 @@ async def node_search(
     return reranked_nodes[:limit]
+async def episode_search(
+    driver: AsyncDriver,
+    cross_encoder: CrossEncoderClient,
+    query: str,
+    _query_vector: list[float],
+    group_ids: list[str] | None,
+    config: EpisodeSearchConfig | None,
+    search_filter: SearchFilters,
+    limit=DEFAULT_SEARCH_LIMIT,
+    reranker_min_score: float = 0,
+) -> list[EpisodicNode]:
+    if config is None:
+        return []
+    search_results: list[list[EpisodicNode]] = list(
+        await semaphore_gather(
+            *[
+                episode_fulltext_search(driver, query, search_filter, group_ids, 2 * limit),
+            ]
+        )
+    )
+    search_result_uuids = [[episode.uuid for episode in result] for result in search_results]
+    episode_uuid_map = {episode.uuid: episode for result in search_results for episode in result}
+    reranked_uuids: list[str] = []
+    if config.reranker == EpisodeReranker.rrf:
+        reranked_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
+    elif config.reranker == EpisodeReranker.cross_encoder:
+        # use rrf as a preliminary reranker
+        rrf_result_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
+        rrf_results = [episode_uuid_map[uuid] for uuid in rrf_result_uuids][:limit]
+        content_to_uuid_map = {episode.content: episode.uuid for episode in rrf_results}
+        reranked_contents = await cross_encoder.rank(query, list(content_to_uuid_map.keys()))
+        reranked_uuids = [
+            content_to_uuid_map[content]
+            for content, score in reranked_contents
+            if score >= reranker_min_score
+        ]
+    reranked_episodes = [episode_uuid_map[uuid] for uuid in reranked_uuids]
+    return reranked_episodes[:limit]
 async def community_search(
     driver: AsyncDriver,
     cross_encoder: CrossEncoderClient,
@@ -309,8 +399,8 @@ async def community_search(
     query_vector: list[float],
     group_ids: list[str] | None,
     config: CommunitySearchConfig | None,
-    bfs_origin_node_uuids: list[str] | None = None,
     limit=DEFAULT_SEARCH_LIMIT,
+    reranker_min_score: float = 0,
 ) -> list[CommunityNode]:
     if config is None:
         return []
@@ -333,7 +423,7 @@ async def community_search(
     reranked_uuids: list[str] = []
     if config.reranker == CommunityReranker.rrf:
-        reranked_uuids = rrf(search_result_uuids)
+        reranked_uuids = rrf(search_result_uuids, min_score=reranker_min_score)
     elif config.reranker == CommunityReranker.mmr:
         search_result_uuids_and_vectors = [
             (
@@ -344,14 +434,21 @@ async def community_search(
             for community in result
         ]
         reranked_uuids = maximal_marginal_relevance(
-            query_vector, search_result_uuids_and_vectors, config.mmr_lambda
+            query_vector,
+            search_result_uuids_and_vectors,
+            config.mmr_lambda,
+            min_score=reranker_min_score,
         )
     elif config.reranker == CommunityReranker.cross_encoder:
         summary_to_uuid_map = {
             node.summary: node.uuid for result in search_results for node in result
         }
         reranked_summaries = await cross_encoder.rank(query, list(summary_to_uuid_map.keys()))
-        reranked_uuids = [summary_to_uuid_map[fact] for fact, _ in reranked_summaries]
+        reranked_uuids = [
+            summary_to_uuid_map[fact]
+            for fact, score in reranked_summaries
+            if score >= reranker_min_score
+        ]
     reranked_communities = [community_uuid_map[uuid] for uuid in reranked_uuids]

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/search/search_config.py RENAMED Viewed

@@ -19,7 +19,7 @@ from enum import Enum
 from pydantic import BaseModel, Field
 from graphiti_core.edges import EntityEdge
-from graphiti_core.nodes import CommunityNode, EntityNode
+from graphiti_core.nodes import CommunityNode, EntityNode, EpisodicNode
 from graphiti_core.search.search_utils import (
     DEFAULT_MIN_SCORE,
     DEFAULT_MMR_LAMBDA,
@@ -41,6 +41,10 @@ class NodeSearchMethod(Enum):
     bfs = 'breadth_first_search'
+class EpisodeSearchMethod(Enum):
+    bm25 = 'bm25'
 class CommunitySearchMethod(Enum):
     cosine_similarity = 'cosine_similarity'
     bm25 = 'bm25'
@@ -62,6 +66,11 @@ class NodeReranker(Enum):
     cross_encoder = 'cross_encoder'
+class EpisodeReranker(Enum):
+    rrf = 'reciprocal_rank_fusion'
+    cross_encoder = 'cross_encoder'
 class CommunityReranker(Enum):
     rrf = 'reciprocal_rank_fusion'
     mmr = 'mmr'
@@ -84,6 +93,14 @@ class NodeSearchConfig(BaseModel):
     bfs_max_depth: int = Field(default=MAX_SEARCH_DEPTH)
+class EpisodeSearchConfig(BaseModel):
+    search_methods: list[EpisodeSearchMethod]
+    reranker: EpisodeReranker = Field(default=EpisodeReranker.rrf)
+    sim_min_score: float = Field(default=DEFAULT_MIN_SCORE)
+    mmr_lambda: float = Field(default=DEFAULT_MMR_LAMBDA)
+    bfs_max_depth: int = Field(default=MAX_SEARCH_DEPTH)
 class CommunitySearchConfig(BaseModel):
     search_methods: list[CommunitySearchMethod]
     reranker: CommunityReranker = Field(default=CommunityReranker.rrf)
@@ -95,11 +112,14 @@ class CommunitySearchConfig(BaseModel):
 class SearchConfig(BaseModel):
     edge_config: EdgeSearchConfig | None = Field(default=None)
     node_config: NodeSearchConfig | None = Field(default=None)
+    episode_config: EpisodeSearchConfig | None = Field(default=None)
     community_config: CommunitySearchConfig | None = Field(default=None)
     limit: int = Field(default=DEFAULT_SEARCH_LIMIT)
+    reranker_min_score: float = Field(default=0)
 class SearchResults(BaseModel):
     edges: list[EntityEdge]
     nodes: list[EntityNode]
+    episodes: list[EpisodicNode]
     communities: list[CommunityNode]

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/search/search_config_recipes.py RENAMED Viewed

@@ -21,6 +21,9 @@ from graphiti_core.search.search_config import (
     EdgeReranker,
     EdgeSearchConfig,
     EdgeSearchMethod,
+    EpisodeReranker,
+    EpisodeSearchConfig,
+    EpisodeSearchMethod,
     NodeReranker,
     NodeSearchConfig,
     NodeSearchMethod,
@@ -37,6 +40,12 @@ COMBINED_HYBRID_SEARCH_RRF = SearchConfig(
         search_methods=[NodeSearchMethod.bm25, NodeSearchMethod.cosine_similarity],
         reranker=NodeReranker.rrf,
     ),
+    episode_config=EpisodeSearchConfig(
+        search_methods=[
+            EpisodeSearchMethod.bm25,
+        ],
+        reranker=EpisodeReranker.rrf,
+    ),
     community_config=CommunitySearchConfig(
         search_methods=[CommunitySearchMethod.bm25, CommunitySearchMethod.cosine_similarity],
         reranker=CommunityReranker.rrf,
@@ -55,6 +64,12 @@ COMBINED_HYBRID_SEARCH_MMR = SearchConfig(
         reranker=NodeReranker.mmr,
         mmr_lambda=1,
     ),
+    episode_config=EpisodeSearchConfig(
+        search_methods=[
+            EpisodeSearchMethod.bm25,
+        ],
+        reranker=EpisodeReranker.rrf,
+    ),
     community_config=CommunitySearchConfig(
         search_methods=[CommunitySearchMethod.bm25, CommunitySearchMethod.cosine_similarity],
         reranker=CommunityReranker.mmr,
@@ -80,6 +95,12 @@ COMBINED_HYBRID_SEARCH_CROSS_ENCODER = SearchConfig(
         ],
         reranker=NodeReranker.cross_encoder,
     ),
+    episode_config=EpisodeSearchConfig(
+        search_methods=[
+            EpisodeSearchMethod.bm25,
+        ],
+        reranker=EpisodeReranker.cross_encoder,
+    ),
     community_config=CommunitySearchConfig(
         search_methods=[CommunitySearchMethod.bm25, CommunitySearchMethod.cosine_similarity],
         reranker=CommunityReranker.cross_encoder,

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/search/search_helpers.py RENAMED Viewed

@@ -38,6 +38,13 @@ def search_results_to_context_string(search_results: SearchResults) -> str:
     entity_json = [
         {'entity_name': node.name, 'summary': node.summary} for node in search_results.nodes
     ]
+    episode_json = [
+        {
+            'source_description': episode.source_description,
+            'content': episode.content,
+        }
+        for episode in search_results.episodes
+    ]
     community_json = [
         {'community_name': community.name, 'summary': community.summary}
         for community in search_results.communities
@@ -55,6 +62,9 @@ def search_results_to_context_string(search_results: SearchResults) -> str:
     <ENTITIES>
     {json.dumps(entity_json, indent=12)}
     </ENTITIES>
+    <EPISODES>
+    {json.dumps(episode_json, indent=12)}
+    </EPISODES>
     <COMMUNITIES>
     {json.dumps(community_json, indent=12)}
     </COMMUNITIES>

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/search/search_utils.py RENAMED Viewed

@@ -37,6 +37,7 @@ from graphiti_core.nodes import (
     EpisodicNode,
     get_community_node_from_record,
     get_entity_node_from_record,
+    get_episodic_node_from_record,
 )
 from graphiti_core.search.search_filters import (
     SearchFilters,
@@ -229,8 +230,8 @@ async def edge_similarity_search(
     query: LiteralString = (
         """
-                                                                                                MATCH (n:Entity)-[r:RELATES_TO]->(m:Entity)
-                                                                                                """
+                                                                                                                    MATCH (n:Entity)-[r:RELATES_TO]->(m:Entity)
+                                                                                                                    """
         + group_filter_query
         + filter_query
         + """\nWITH DISTINCT r, vector.similarity.cosine(r.fact_embedding, $search_vector) AS score
@@ -475,6 +476,48 @@ async def node_bfs_search(
     return nodes
+async def episode_fulltext_search(
+    driver: AsyncDriver,
+    query: str,
+    _search_filter: SearchFilters,
+    group_ids: list[str] | None = None,
+    limit=RELEVANT_SCHEMA_LIMIT,
+) -> list[EpisodicNode]:
+    # BM25 search to get top episodes
+    fuzzy_query = fulltext_query(query, group_ids)
+    if fuzzy_query == '':
+        return []
+    records, _, _ = await driver.execute_query(
+        """
+        CALL db.index.fulltext.queryNodes("episode_content", $query, {limit: $limit})
+        YIELD node AS episode, score
+        MATCH (e:Episodic)
+        WHERE e.uuid = episode.uuid
+        RETURN
+            e.content AS content,
+            e.created_at AS created_at,
+            e.valid_at AS valid_at,
+            e.uuid AS uuid,
+            e.name AS name,
+            e.group_id AS group_id,
+            e.source_description AS source_description,
+            e.source AS source,
+            e.entity_edges AS entity_edges
+        ORDER BY score DESC
+        LIMIT $limit
+        """,
+        query=fuzzy_query,
+        group_ids=group_ids,
+        limit=limit,
+        database_=DEFAULT_DATABASE,
+        routing_='r',
+    )
+    episodes = [get_episodic_node_from_record(record) for record in records]
+    return episodes
 async def community_fulltext_search(
     driver: AsyncDriver,
     query: str,
@@ -718,7 +761,7 @@ async def get_relevant_edges(
 # takes in a list of rankings of uuids
-def rrf(results: list[list[str]], rank_const=1) -> list[str]:
+def rrf(results: list[list[str]], rank_const=1, min_score: float = 0) -> list[str]:
     scores: dict[str, float] = defaultdict(float)
     for result in results:
         for i, uuid in enumerate(result):
@@ -729,11 +772,14 @@ def rrf(results: list[list[str]], rank_const=1) -> list[str]:
     sorted_uuids = [term[0] for term in scored_uuids]
-    return sorted_uuids
+    return [uuid for uuid in sorted_uuids if scores[uuid] >= min_score]
 async def node_distance_reranker(
-    driver: AsyncDriver, node_uuids: list[str], center_node_uuid: str
+    driver: AsyncDriver,
+    node_uuids: list[str],
+    center_node_uuid: str,
+    min_score: float = 0,
 ) -> list[str]:
     # filter out node_uuid center node node uuid
     filtered_uuids = list(filter(lambda node_uuid: node_uuid != center_node_uuid, node_uuids))
@@ -767,12 +813,15 @@ async def node_distance_reranker(
     # add back in filtered center uuid if it was filtered out
     if center_node_uuid in node_uuids:
+        scores[center_node_uuid] = 0.1
         filtered_uuids = [center_node_uuid] + filtered_uuids
-    return filtered_uuids
+    return [uuid for uuid in filtered_uuids if (1 / scores[uuid]) >= min_score]
-async def episode_mentions_reranker(driver: AsyncDriver, node_uuids: list[list[str]]) -> list[str]:
+async def episode_mentions_reranker(
+    driver: AsyncDriver, node_uuids: list[list[str]], min_score: float = 0
+) -> list[str]:
     # use rrf as a preliminary ranker
     sorted_uuids = rrf(node_uuids)
     scores: dict[str, float] = {}
@@ -796,13 +845,14 @@ async def episode_mentions_reranker(driver: AsyncDriver, node_uuids: list[list[s
     # rerank on shortest distance
     sorted_uuids.sort(key=lambda cur_uuid: scores[cur_uuid])
-    return sorted_uuids
+    return [uuid for uuid in sorted_uuids if scores[uuid] >= min_score]
 def maximal_marginal_relevance(
     query_vector: list[float],
     candidates: list[tuple[str, list[float]]],
     mmr_lambda: float = DEFAULT_MMR_LAMBDA,
+    min_score: float = 0,
 ):
     candidates_with_mmr: list[tuple[str, float]] = []
     for candidate in candidates:
@@ -812,4 +862,6 @@ def maximal_marginal_relevance(
     candidates_with_mmr.sort(reverse=True, key=lambda c: c[1])
-    return list(set([candidate[0] for candidate in candidates_with_mmr]))
+    return list(
+        set([candidate[0] for candidate in candidates_with_mmr if candidate[1] >= min_score])
+    )

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/graphiti_core/utils/maintenance/graph_data_operations.py RENAMED Viewed

@@ -71,6 +71,8 @@ async def build_indices_and_constraints(driver: AsyncDriver, delete_existing: bo
     ]
     fulltext_indices: list[LiteralString] = [
+        """CREATE FULLTEXT INDEX episode_content IF NOT EXISTS
+        FOR (e:Episodic) ON EACH [e.content, e.source, e.group_id]""",
         """CREATE FULLTEXT INDEX node_name_and_summary IF NOT EXISTS
         FOR (n:Entity) ON EACH [n.name, n.summary, n.group_id]""",
         """CREATE FULLTEXT INDEX community_name IF NOT EXISTS

{graphiti_core-0.9.6 → graphiti_core-0.10.0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "graphiti-core"
 description = "A temporal graph building library"
-version = "0.9.6"
+version = "0.10.0"
 authors = [
     { "name" = "Paul Paliychuk", "email" = "paul@getzep.com" },
     { "name" = "Preston Rasmussen", "email" = "preston@getzep.com" },