cognee 0.2.3.dev1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__main__.py +4 -0
- cognee/api/v1/add/add.py +18 -6
- cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
- cognee/api/v1/cognify/cognify.py +22 -107
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +1 -1
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/search.py +6 -0
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +180 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +14 -5
- cognee/infrastructure/llm/config.py +5 -5
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +23 -138
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
- cognee/modules/retrieval/graph_completion_retriever.py +89 -5
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/methods/search.py +46 -5
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/logging_utils.py +142 -31
- cognee/shared/utils.py +0 -1
- cognee/tasks/graph/extract_graph_from_data.py +6 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
- cognee/tasks/storage/add_data_points.py +33 -3
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_search_db.py +126 -7
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
- cognee/tests/unit/modules/search/search_methods_test.py +2 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/RECORD +120 -83
- cognee-0.2.4.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/infrastructure/pipeline/models/__init__.py +0 -0
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
cognee/cli/exceptions.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CliCommandException(Exception):
|
|
5
|
+
"""Exception raised by CLI commands with additional context"""
|
|
6
|
+
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
message: str,
|
|
10
|
+
error_code: int = -1,
|
|
11
|
+
docs_url: Optional[str] = None,
|
|
12
|
+
raiseable_exception: Optional[Exception] = None,
|
|
13
|
+
) -> None:
|
|
14
|
+
super().__init__(message)
|
|
15
|
+
self.error_code = error_code
|
|
16
|
+
self.docs_url = docs_url
|
|
17
|
+
self.raiseable_exception = raiseable_exception
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CliCommandInnerException(Exception):
|
|
21
|
+
"""Inner exception for wrapping other exceptions in CLI context"""
|
|
22
|
+
|
|
23
|
+
pass
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Minimal CLI entry point for cognee that avoids early initialization
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
from typing import Any, Sequence
|
|
9
|
+
|
|
10
|
+
# CRITICAL: Prevent verbose logging initialization for CLI-only usage
|
|
11
|
+
# This must be set before any cognee imports to be effective
|
|
12
|
+
os.environ["COGNEE_MINIMAL_LOGGING"] = "true"
|
|
13
|
+
os.environ["COGNEE_CLI_MODE"] = "true"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_version() -> str:
|
|
17
|
+
"""Get cognee version without importing the main package"""
|
|
18
|
+
try:
|
|
19
|
+
# Try to get version from pyproject.toml first (for development)
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
pyproject_path = Path(__file__).parent.parent.parent / "pyproject.toml"
|
|
23
|
+
if pyproject_path.exists():
|
|
24
|
+
with open(pyproject_path, encoding="utf-8") as f:
|
|
25
|
+
for line in f:
|
|
26
|
+
if line.startswith("version"):
|
|
27
|
+
version = line.split("=")[1].strip("'\"\n ")
|
|
28
|
+
return f"{version}-local"
|
|
29
|
+
|
|
30
|
+
# Fallback to installed package version
|
|
31
|
+
import importlib.metadata
|
|
32
|
+
|
|
33
|
+
return importlib.metadata.version("cognee")
|
|
34
|
+
except Exception:
|
|
35
|
+
return "unknown"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_command_info() -> dict:
|
|
39
|
+
"""Get command information without importing cognee"""
|
|
40
|
+
return {
|
|
41
|
+
"add": "Add data to Cognee for knowledge graph processing",
|
|
42
|
+
"search": "Search and query the knowledge graph for insights, information, and connections",
|
|
43
|
+
"cognify": "Transform ingested data into a structured knowledge graph",
|
|
44
|
+
"delete": "Delete data from cognee knowledge base",
|
|
45
|
+
"config": "Manage cognee configuration settings",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def print_help() -> None:
|
|
50
|
+
"""Print help message with dynamic command descriptions"""
|
|
51
|
+
commands = get_command_info()
|
|
52
|
+
command_list = "\n".join(f" {cmd:<12} {desc}" for cmd, desc in commands.items())
|
|
53
|
+
|
|
54
|
+
print(f"""
|
|
55
|
+
usage: cognee [-h] [--version] [--debug] {{{"|".join(commands.keys())}}} ...
|
|
56
|
+
|
|
57
|
+
Cognee CLI - Manage your knowledge graphs and cognitive processing pipelines.
|
|
58
|
+
|
|
59
|
+
options:
|
|
60
|
+
-h, --help show this help message and exit
|
|
61
|
+
--version show program's version number and exit
|
|
62
|
+
--debug Enable debug mode to show full stack traces on exceptions
|
|
63
|
+
|
|
64
|
+
Available commands:
|
|
65
|
+
{{{",".join(commands.keys())}}}
|
|
66
|
+
{command_list}
|
|
67
|
+
|
|
68
|
+
For more information on each command, use: cognee <command> --help
|
|
69
|
+
""")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def main() -> int:
|
|
73
|
+
"""Minimal CLI main function"""
|
|
74
|
+
# Handle help and version without any imports - purely static
|
|
75
|
+
if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["-h", "--help"]):
|
|
76
|
+
print_help()
|
|
77
|
+
return 0
|
|
78
|
+
|
|
79
|
+
if len(sys.argv) == 2 and sys.argv[1] == "--version":
|
|
80
|
+
print(f"cognee {get_version()}")
|
|
81
|
+
return 0
|
|
82
|
+
|
|
83
|
+
# For actual commands, import the full CLI with minimal logging
|
|
84
|
+
try:
|
|
85
|
+
from cognee.cli._cognee import main as full_main
|
|
86
|
+
|
|
87
|
+
return full_main()
|
|
88
|
+
except Exception as e:
|
|
89
|
+
if "--debug" in sys.argv:
|
|
90
|
+
raise
|
|
91
|
+
print(f"Error: {e}")
|
|
92
|
+
print("Use --debug for full stack trace")
|
|
93
|
+
return 1
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
sys.exit(main())
|
cognee/cli/reference.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import Protocol, Optional
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SupportsCliCommand(Protocol):
|
|
7
|
+
"""Protocol for defining one cognee cli command"""
|
|
8
|
+
|
|
9
|
+
command_string: str
|
|
10
|
+
"""name of the command"""
|
|
11
|
+
help_string: str
|
|
12
|
+
"""the help string for argparse"""
|
|
13
|
+
description: Optional[str]
|
|
14
|
+
"""the more detailed description for argparse, may include markdown for the docs"""
|
|
15
|
+
docs_url: Optional[str]
|
|
16
|
+
"""the default docs url to be printed in case of an exception"""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def configure_parser(self, parser: argparse.ArgumentParser) -> None:
|
|
20
|
+
"""Configures the parser for the given argument"""
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def execute(self, args: argparse.Namespace) -> None:
|
|
25
|
+
"""Executes the command with the given arguments"""
|
|
26
|
+
...
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module to suppress verbose logging before any cognee imports.
|
|
3
|
+
This must be imported before any other cognee modules.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
# Set CLI mode to suppress verbose logging
|
|
9
|
+
os.environ["COGNEE_CLI_MODE"] = "true"
|
|
10
|
+
|
|
11
|
+
# Also set log level to ERROR for extra safety
|
|
12
|
+
os.environ["LOG_LEVEL"] = "ERROR"
|
|
@@ -5,7 +5,7 @@ from typing import Optional, Tuple, List, Dict, Union, Any, Callable, Awaitable
|
|
|
5
5
|
from cognee.eval_framework.benchmark_adapters.benchmark_adapters import BenchmarkAdapter
|
|
6
6
|
from cognee.modules.chunking.TextChunker import TextChunker
|
|
7
7
|
from cognee.modules.pipelines.tasks.task import Task
|
|
8
|
-
from cognee.modules.pipelines import
|
|
8
|
+
from cognee.modules.pipelines import run_pipeline
|
|
9
9
|
|
|
10
10
|
logger = get_logger(level=ERROR)
|
|
11
11
|
|
|
@@ -61,7 +61,7 @@ class CorpusBuilderExecutor:
|
|
|
61
61
|
await cognee.add(self.raw_corpus)
|
|
62
62
|
|
|
63
63
|
tasks = await self.task_getter(chunk_size=chunk_size, chunker=chunker)
|
|
64
|
-
pipeline_run =
|
|
64
|
+
pipeline_run = run_pipeline(tasks=tasks)
|
|
65
65
|
|
|
66
66
|
async for run_info in pipeline_run:
|
|
67
67
|
print(run_info)
|
|
@@ -21,10 +21,6 @@ async def get_graph_engine() -> GraphDBInterface:
|
|
|
21
21
|
if hasattr(graph_client, "initialize"):
|
|
22
22
|
await graph_client.initialize()
|
|
23
23
|
|
|
24
|
-
# Handle loading of graph for NetworkX
|
|
25
|
-
if config["graph_database_provider"].lower() == "networkx" and graph_client.graph is None:
|
|
26
|
-
await graph_client.load_graph_from_file()
|
|
27
|
-
|
|
28
24
|
return graph_client
|
|
29
25
|
|
|
30
26
|
|
|
@@ -181,8 +177,7 @@ def create_graph_engine(
|
|
|
181
177
|
graph_id=graph_identifier,
|
|
182
178
|
)
|
|
183
179
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
return graph_client
|
|
180
|
+
raise EnvironmentError(
|
|
181
|
+
f"Unsupported graph database provider: {graph_database_provider}. "
|
|
182
|
+
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'falkordb', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
|
|
183
|
+
)
|
|
@@ -138,8 +138,9 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
138
138
|
|
|
139
139
|
s3_file_storage = S3FileStorage("")
|
|
140
140
|
|
|
141
|
-
|
|
142
|
-
self.
|
|
141
|
+
if self.connection:
|
|
142
|
+
async with self.KUZU_ASYNC_LOCK:
|
|
143
|
+
self.connection.execute("CHECKPOINT;")
|
|
143
144
|
|
|
144
145
|
s3_file_storage.s3.put(self.temp_graph_file, self.db_path, recursive=True)
|
|
145
146
|
|
|
@@ -1631,3 +1632,64 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1631
1632
|
"""
|
|
1632
1633
|
result = await self.query(query)
|
|
1633
1634
|
return [record[0] for record in result] if result else []
|
|
1635
|
+
|
|
1636
|
+
async def get_last_user_interaction_ids(self, limit: int) -> List[str]:
|
|
1637
|
+
"""
|
|
1638
|
+
Retrieve the IDs of the most recent CogneeUserInteraction nodes.
|
|
1639
|
+
Parameters:
|
|
1640
|
+
-----------
|
|
1641
|
+
- limit (int): The maximum number of interaction IDs to return.
|
|
1642
|
+
Returns:
|
|
1643
|
+
--------
|
|
1644
|
+
- List[str]: A list of interaction IDs, sorted by created_at descending.
|
|
1645
|
+
"""
|
|
1646
|
+
|
|
1647
|
+
query = """
|
|
1648
|
+
MATCH (n)
|
|
1649
|
+
WHERE n.type = 'CogneeUserInteraction'
|
|
1650
|
+
RETURN n.id as id
|
|
1651
|
+
ORDER BY n.created_at DESC
|
|
1652
|
+
LIMIT $limit
|
|
1653
|
+
"""
|
|
1654
|
+
rows = await self.query(query, {"limit": limit})
|
|
1655
|
+
|
|
1656
|
+
id_list = [row[0] for row in rows]
|
|
1657
|
+
return id_list
|
|
1658
|
+
|
|
1659
|
+
async def apply_feedback_weight(
|
|
1660
|
+
self,
|
|
1661
|
+
node_ids: List[str],
|
|
1662
|
+
weight: float,
|
|
1663
|
+
) -> None:
|
|
1664
|
+
"""
|
|
1665
|
+
Increment `feedback_weight` inside r.properties JSON for edges where
|
|
1666
|
+
relationship_name = 'used_graph_element_to_answer'.
|
|
1667
|
+
|
|
1668
|
+
"""
|
|
1669
|
+
# Step 1: fetch matching edges
|
|
1670
|
+
query = """
|
|
1671
|
+
MATCH (n:Node)-[r:EDGE]->()
|
|
1672
|
+
WHERE n.id IN $node_ids AND r.relationship_name = 'used_graph_element_to_answer'
|
|
1673
|
+
RETURN r.properties, n.id
|
|
1674
|
+
"""
|
|
1675
|
+
results = await self.query(query, {"node_ids": node_ids})
|
|
1676
|
+
|
|
1677
|
+
# Step 2: update JSON client-side
|
|
1678
|
+
updates = []
|
|
1679
|
+
for props_json, source_id in results:
|
|
1680
|
+
try:
|
|
1681
|
+
props = json.loads(props_json) if props_json else {}
|
|
1682
|
+
except json.JSONDecodeError:
|
|
1683
|
+
props = {}
|
|
1684
|
+
|
|
1685
|
+
props["feedback_weight"] = props.get("feedback_weight", 0) + weight
|
|
1686
|
+
updates.append((source_id, json.dumps(props)))
|
|
1687
|
+
|
|
1688
|
+
# Step 3: write back
|
|
1689
|
+
for node_id, new_props in updates:
|
|
1690
|
+
update_query = """
|
|
1691
|
+
MATCH (n:Node)-[r:EDGE]->()
|
|
1692
|
+
WHERE n.id = $node_id AND r.relationship_name = 'used_graph_element_to_answer'
|
|
1693
|
+
SET r.properties = $props
|
|
1694
|
+
"""
|
|
1695
|
+
await self.query(update_query, {"node_id": node_id, "props": new_props})
|
|
@@ -1322,3 +1322,52 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1322
1322
|
"""
|
|
1323
1323
|
result = await self.query(query)
|
|
1324
1324
|
return [record["n"] for record in result] if result else []
|
|
1325
|
+
|
|
1326
|
+
async def get_last_user_interaction_ids(self, limit: int) -> List[str]:
|
|
1327
|
+
"""
|
|
1328
|
+
Retrieve the IDs of the most recent CogneeUserInteraction nodes.
|
|
1329
|
+
Parameters:
|
|
1330
|
+
-----------
|
|
1331
|
+
- limit (int): The maximum number of interaction IDs to return.
|
|
1332
|
+
Returns:
|
|
1333
|
+
--------
|
|
1334
|
+
- List[str]: A list of interaction IDs, sorted by created_at descending.
|
|
1335
|
+
"""
|
|
1336
|
+
|
|
1337
|
+
query = """
|
|
1338
|
+
MATCH (n)
|
|
1339
|
+
WHERE n.type = 'CogneeUserInteraction'
|
|
1340
|
+
RETURN n.id as id
|
|
1341
|
+
ORDER BY n.created_at DESC
|
|
1342
|
+
LIMIT $limit
|
|
1343
|
+
"""
|
|
1344
|
+
rows = await self.query(query, {"limit": limit})
|
|
1345
|
+
|
|
1346
|
+
id_list = [row["id"] for row in rows if "id" in row]
|
|
1347
|
+
return id_list
|
|
1348
|
+
|
|
1349
|
+
async def apply_feedback_weight(
|
|
1350
|
+
self,
|
|
1351
|
+
node_ids: List[str],
|
|
1352
|
+
weight: float,
|
|
1353
|
+
) -> None:
|
|
1354
|
+
"""
|
|
1355
|
+
Increment `feedback_weight` on relationships `:used_graph_element_to_answer`
|
|
1356
|
+
outgoing from nodes whose `id` is in `node_ids`.
|
|
1357
|
+
|
|
1358
|
+
Args:
|
|
1359
|
+
node_ids: List of node IDs to match.
|
|
1360
|
+
weight: Amount to add to `r.feedback_weight` (can be negative).
|
|
1361
|
+
|
|
1362
|
+
Side effects:
|
|
1363
|
+
Updates relationship property `feedback_weight`, defaulting missing values to 0.
|
|
1364
|
+
"""
|
|
1365
|
+
query = """
|
|
1366
|
+
MATCH (n)-[r]->()
|
|
1367
|
+
WHERE n.id IN $node_ids AND r.relationship_name = 'used_graph_element_to_answer'
|
|
1368
|
+
SET r.feedback_weight = coalesce(r.feedback_weight, 0) + $weight
|
|
1369
|
+
"""
|
|
1370
|
+
await self.query(
|
|
1371
|
+
query,
|
|
1372
|
+
params={"weight": float(weight), "node_ids": list(node_ids)},
|
|
1373
|
+
)
|
|
@@ -41,11 +41,11 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
|
|
|
41
41
|
self,
|
|
42
42
|
model: Optional[str] = "openai/text-embedding-3-large",
|
|
43
43
|
dimensions: Optional[int] = 3072,
|
|
44
|
-
|
|
44
|
+
max_completion_tokens: int = 512,
|
|
45
45
|
):
|
|
46
46
|
self.model = model
|
|
47
47
|
self.dimensions = dimensions
|
|
48
|
-
self.
|
|
48
|
+
self.max_completion_tokens = max_completion_tokens
|
|
49
49
|
self.tokenizer = self.get_tokenizer()
|
|
50
50
|
# self.retry_count = 0
|
|
51
51
|
self.embedding_model = TextEmbedding(model_name=model)
|
|
@@ -112,7 +112,9 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
|
|
|
112
112
|
"""
|
|
113
113
|
logger.debug("Loading tokenizer for FastembedEmbeddingEngine...")
|
|
114
114
|
|
|
115
|
-
tokenizer = TikTokenTokenizer(
|
|
115
|
+
tokenizer = TikTokenTokenizer(
|
|
116
|
+
model="gpt-4o", max_completion_tokens=self.max_completion_tokens
|
|
117
|
+
)
|
|
116
118
|
|
|
117
119
|
logger.debug("Tokenizer loaded for for FastembedEmbeddingEngine")
|
|
118
120
|
return tokenizer
|
|
@@ -57,7 +57,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|
|
57
57
|
api_key: str = None,
|
|
58
58
|
endpoint: str = None,
|
|
59
59
|
api_version: str = None,
|
|
60
|
-
|
|
60
|
+
max_completion_tokens: int = 512,
|
|
61
61
|
):
|
|
62
62
|
self.api_key = api_key
|
|
63
63
|
self.endpoint = endpoint
|
|
@@ -65,7 +65,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|
|
65
65
|
self.provider = provider
|
|
66
66
|
self.model = model
|
|
67
67
|
self.dimensions = dimensions
|
|
68
|
-
self.
|
|
68
|
+
self.max_completion_tokens = max_completion_tokens
|
|
69
69
|
self.tokenizer = self.get_tokenizer()
|
|
70
70
|
self.retry_count = 0
|
|
71
71
|
|
|
@@ -179,20 +179,29 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|
|
179
179
|
model = self.model.split("/")[-1]
|
|
180
180
|
|
|
181
181
|
if "openai" in self.provider.lower():
|
|
182
|
-
tokenizer = TikTokenTokenizer(
|
|
182
|
+
tokenizer = TikTokenTokenizer(
|
|
183
|
+
model=model, max_completion_tokens=self.max_completion_tokens
|
|
184
|
+
)
|
|
183
185
|
elif "gemini" in self.provider.lower():
|
|
184
|
-
tokenizer = GeminiTokenizer(
|
|
186
|
+
tokenizer = GeminiTokenizer(
|
|
187
|
+
model=model, max_completion_tokens=self.max_completion_tokens
|
|
188
|
+
)
|
|
185
189
|
elif "mistral" in self.provider.lower():
|
|
186
|
-
tokenizer = MistralTokenizer(
|
|
190
|
+
tokenizer = MistralTokenizer(
|
|
191
|
+
model=model, max_completion_tokens=self.max_completion_tokens
|
|
192
|
+
)
|
|
187
193
|
else:
|
|
188
194
|
try:
|
|
189
195
|
tokenizer = HuggingFaceTokenizer(
|
|
190
|
-
model=self.model.replace("hosted_vllm/", ""),
|
|
196
|
+
model=self.model.replace("hosted_vllm/", ""),
|
|
197
|
+
max_completion_tokens=self.max_completion_tokens,
|
|
191
198
|
)
|
|
192
199
|
except Exception as e:
|
|
193
200
|
logger.warning(f"Could not get tokenizer from HuggingFace due to: {e}")
|
|
194
201
|
logger.info("Switching to TikToken default tokenizer.")
|
|
195
|
-
tokenizer = TikTokenTokenizer(
|
|
202
|
+
tokenizer = TikTokenTokenizer(
|
|
203
|
+
model=None, max_completion_tokens=self.max_completion_tokens
|
|
204
|
+
)
|
|
196
205
|
|
|
197
206
|
logger.debug(f"Tokenizer loaded for model: {self.model}")
|
|
198
207
|
return tokenizer
|
|
@@ -30,7 +30,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
|
|
30
30
|
Instance variables:
|
|
31
31
|
- model
|
|
32
32
|
- dimensions
|
|
33
|
-
-
|
|
33
|
+
- max_completion_tokens
|
|
34
34
|
- endpoint
|
|
35
35
|
- mock
|
|
36
36
|
- huggingface_tokenizer_name
|
|
@@ -39,7 +39,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
|
|
39
39
|
|
|
40
40
|
model: str
|
|
41
41
|
dimensions: int
|
|
42
|
-
|
|
42
|
+
max_completion_tokens: int
|
|
43
43
|
endpoint: str
|
|
44
44
|
mock: bool
|
|
45
45
|
huggingface_tokenizer_name: str
|
|
@@ -50,13 +50,13 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
|
|
50
50
|
self,
|
|
51
51
|
model: Optional[str] = "avr/sfr-embedding-mistral:latest",
|
|
52
52
|
dimensions: Optional[int] = 1024,
|
|
53
|
-
|
|
53
|
+
max_completion_tokens: int = 512,
|
|
54
54
|
endpoint: Optional[str] = "http://localhost:11434/api/embeddings",
|
|
55
55
|
huggingface_tokenizer: str = "Salesforce/SFR-Embedding-Mistral",
|
|
56
56
|
):
|
|
57
57
|
self.model = model
|
|
58
58
|
self.dimensions = dimensions
|
|
59
|
-
self.
|
|
59
|
+
self.max_completion_tokens = max_completion_tokens
|
|
60
60
|
self.endpoint = endpoint
|
|
61
61
|
self.huggingface_tokenizer_name = huggingface_tokenizer
|
|
62
62
|
self.tokenizer = self.get_tokenizer()
|
|
@@ -132,7 +132,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
|
|
132
132
|
"""
|
|
133
133
|
logger.debug("Loading HuggingfaceTokenizer for OllamaEmbeddingEngine...")
|
|
134
134
|
tokenizer = HuggingFaceTokenizer(
|
|
135
|
-
model=self.huggingface_tokenizer_name,
|
|
135
|
+
model=self.huggingface_tokenizer_name, max_completion_tokens=self.max_completion_tokens
|
|
136
136
|
)
|
|
137
137
|
logger.debug("Tokenizer loaded for OllamaEmbeddingEngine")
|
|
138
138
|
return tokenizer
|
|
@@ -18,7 +18,7 @@ class EmbeddingConfig(BaseSettings):
|
|
|
18
18
|
embedding_endpoint: Optional[str] = None
|
|
19
19
|
embedding_api_key: Optional[str] = None
|
|
20
20
|
embedding_api_version: Optional[str] = None
|
|
21
|
-
|
|
21
|
+
embedding_max_completion_tokens: Optional[int] = 8191
|
|
22
22
|
huggingface_tokenizer: Optional[str] = None
|
|
23
23
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
|
24
24
|
|
|
@@ -38,7 +38,7 @@ class EmbeddingConfig(BaseSettings):
|
|
|
38
38
|
"embedding_endpoint": self.embedding_endpoint,
|
|
39
39
|
"embedding_api_key": self.embedding_api_key,
|
|
40
40
|
"embedding_api_version": self.embedding_api_version,
|
|
41
|
-
"
|
|
41
|
+
"embedding_max_completion_tokens": self.embedding_max_completion_tokens,
|
|
42
42
|
"huggingface_tokenizer": self.huggingface_tokenizer,
|
|
43
43
|
}
|
|
44
44
|
|
|
@@ -27,7 +27,7 @@ def get_embedding_engine() -> EmbeddingEngine:
|
|
|
27
27
|
config.embedding_provider,
|
|
28
28
|
config.embedding_model,
|
|
29
29
|
config.embedding_dimensions,
|
|
30
|
-
config.
|
|
30
|
+
config.embedding_max_completion_tokens,
|
|
31
31
|
config.embedding_endpoint,
|
|
32
32
|
config.embedding_api_key,
|
|
33
33
|
config.embedding_api_version,
|
|
@@ -41,7 +41,7 @@ def create_embedding_engine(
|
|
|
41
41
|
embedding_provider,
|
|
42
42
|
embedding_model,
|
|
43
43
|
embedding_dimensions,
|
|
44
|
-
|
|
44
|
+
embedding_max_completion_tokens,
|
|
45
45
|
embedding_endpoint,
|
|
46
46
|
embedding_api_key,
|
|
47
47
|
embedding_api_version,
|
|
@@ -58,7 +58,7 @@ def create_embedding_engine(
|
|
|
58
58
|
'ollama', or another supported provider.
|
|
59
59
|
- embedding_model: The model to be used for the embedding engine.
|
|
60
60
|
- embedding_dimensions: The number of dimensions for the embeddings.
|
|
61
|
-
-
|
|
61
|
+
- embedding_max_completion_tokens: The maximum number of tokens for the embeddings.
|
|
62
62
|
- embedding_endpoint: The endpoint for the embedding service, relevant for certain
|
|
63
63
|
providers.
|
|
64
64
|
- embedding_api_key: API key to authenticate with the embedding service, if
|
|
@@ -81,7 +81,7 @@ def create_embedding_engine(
|
|
|
81
81
|
return FastembedEmbeddingEngine(
|
|
82
82
|
model=embedding_model,
|
|
83
83
|
dimensions=embedding_dimensions,
|
|
84
|
-
|
|
84
|
+
max_completion_tokens=embedding_max_completion_tokens,
|
|
85
85
|
)
|
|
86
86
|
|
|
87
87
|
if embedding_provider == "ollama":
|
|
@@ -90,7 +90,7 @@ def create_embedding_engine(
|
|
|
90
90
|
return OllamaEmbeddingEngine(
|
|
91
91
|
model=embedding_model,
|
|
92
92
|
dimensions=embedding_dimensions,
|
|
93
|
-
|
|
93
|
+
max_completion_tokens=embedding_max_completion_tokens,
|
|
94
94
|
endpoint=embedding_endpoint,
|
|
95
95
|
huggingface_tokenizer=huggingface_tokenizer,
|
|
96
96
|
)
|
|
@@ -104,5 +104,5 @@ def create_embedding_engine(
|
|
|
104
104
|
api_version=embedding_api_version,
|
|
105
105
|
model=embedding_model,
|
|
106
106
|
dimensions=embedding_dimensions,
|
|
107
|
-
|
|
107
|
+
max_completion_tokens=embedding_max_completion_tokens,
|
|
108
108
|
)
|
|
@@ -5,19 +5,24 @@ from urllib.parse import urlparse
|
|
|
5
5
|
def get_data_file_path(file_path: str):
|
|
6
6
|
# Check if this is a file URI BEFORE normalizing (which corrupts URIs)
|
|
7
7
|
if file_path.startswith("file://"):
|
|
8
|
+
# Remove first occurrence of file:// prefix
|
|
9
|
+
pure_file_path = file_path.replace("file://", "", 1)
|
|
8
10
|
# Normalize the file URI for Windows - replace backslashes with forward slashes
|
|
9
|
-
normalized_file_uri = os.path.normpath(
|
|
11
|
+
normalized_file_uri = os.path.normpath(pure_file_path)
|
|
10
12
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
# Convert URI path to file system path
|
|
13
|
+
# Convert path to proper file system path
|
|
14
14
|
if os.name == "nt": # Windows
|
|
15
15
|
# Handle Windows drive letters correctly
|
|
16
|
-
fs_path =
|
|
17
|
-
if
|
|
18
|
-
fs_path
|
|
19
|
-
|
|
20
|
-
|
|
16
|
+
fs_path = normalized_file_uri
|
|
17
|
+
if (
|
|
18
|
+
(fs_path.startswith("/") or fs_path.startswith("\\"))
|
|
19
|
+
and len(fs_path) > 1
|
|
20
|
+
and fs_path[2] == ":"
|
|
21
|
+
):
|
|
22
|
+
fs_path = fs_path[1:]
|
|
23
|
+
else:
|
|
24
|
+
# Unix - like systems
|
|
25
|
+
fs_path = normalized_file_uri
|
|
21
26
|
|
|
22
27
|
# Now split the actual filesystem path
|
|
23
28
|
actual_fs_path = os.path.normpath(fs_path)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import os.path
|
|
3
3
|
from typing import BinaryIO, TypedDict
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
from cognee.shared.logging_utils import get_logger
|
|
6
7
|
from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash
|
|
@@ -55,7 +56,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
|
|
|
55
56
|
file_type = guess_file_type(file)
|
|
56
57
|
|
|
57
58
|
file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
|
|
58
|
-
file_name =
|
|
59
|
+
file_name = Path(file_path).stem if file_path else None
|
|
59
60
|
|
|
60
61
|
# Get file size
|
|
61
62
|
pos = file.tell() # remember current pointer
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
from typing import Type
|
|
1
|
+
from typing import Type, Optional, Coroutine
|
|
2
2
|
from pydantic import BaseModel
|
|
3
|
-
from typing import Coroutine
|
|
4
3
|
from cognee.infrastructure.llm import get_llm_config
|
|
5
4
|
|
|
6
5
|
|
|
@@ -79,7 +78,10 @@ class LLMGateway:
|
|
|
79
78
|
|
|
80
79
|
@staticmethod
|
|
81
80
|
def extract_content_graph(
|
|
82
|
-
content: str,
|
|
81
|
+
content: str,
|
|
82
|
+
response_model: Type[BaseModel],
|
|
83
|
+
mode: str = "simple",
|
|
84
|
+
custom_prompt: Optional[str] = None,
|
|
83
85
|
) -> Coroutine:
|
|
84
86
|
llm_config = get_llm_config()
|
|
85
87
|
if llm_config.structured_output_framework.upper() == "BAML":
|
|
@@ -87,13 +89,20 @@ class LLMGateway:
|
|
|
87
89
|
extract_content_graph,
|
|
88
90
|
)
|
|
89
91
|
|
|
90
|
-
return extract_content_graph(
|
|
92
|
+
return extract_content_graph(
|
|
93
|
+
content=content,
|
|
94
|
+
response_model=response_model,
|
|
95
|
+
mode=mode,
|
|
96
|
+
custom_prompt=custom_prompt,
|
|
97
|
+
)
|
|
91
98
|
else:
|
|
92
99
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
|
|
93
100
|
extract_content_graph,
|
|
94
101
|
)
|
|
95
102
|
|
|
96
|
-
return extract_content_graph(
|
|
103
|
+
return extract_content_graph(
|
|
104
|
+
content=content, response_model=response_model, custom_prompt=custom_prompt
|
|
105
|
+
)
|
|
97
106
|
|
|
98
107
|
@staticmethod
|
|
99
108
|
def extract_categories(content: str, response_model: Type[BaseModel]) -> Coroutine:
|
|
@@ -18,7 +18,7 @@ class LLMConfig(BaseSettings):
|
|
|
18
18
|
- llm_api_version
|
|
19
19
|
- llm_temperature
|
|
20
20
|
- llm_streaming
|
|
21
|
-
-
|
|
21
|
+
- llm_max_completion_tokens
|
|
22
22
|
- transcription_model
|
|
23
23
|
- graph_prompt_path
|
|
24
24
|
- llm_rate_limit_enabled
|
|
@@ -35,16 +35,16 @@ class LLMConfig(BaseSettings):
|
|
|
35
35
|
|
|
36
36
|
structured_output_framework: str = "instructor"
|
|
37
37
|
llm_provider: str = "openai"
|
|
38
|
-
llm_model: str = "gpt-
|
|
38
|
+
llm_model: str = "gpt-5-mini"
|
|
39
39
|
llm_endpoint: str = ""
|
|
40
40
|
llm_api_key: Optional[str] = None
|
|
41
41
|
llm_api_version: Optional[str] = None
|
|
42
42
|
llm_temperature: float = 0.0
|
|
43
43
|
llm_streaming: bool = False
|
|
44
|
-
|
|
44
|
+
llm_max_completion_tokens: int = 16384
|
|
45
45
|
|
|
46
46
|
baml_llm_provider: str = "openai"
|
|
47
|
-
baml_llm_model: str = "gpt-
|
|
47
|
+
baml_llm_model: str = "gpt-5-mini"
|
|
48
48
|
baml_llm_endpoint: str = ""
|
|
49
49
|
baml_llm_api_key: Optional[str] = None
|
|
50
50
|
baml_llm_temperature: float = 0.0
|
|
@@ -171,7 +171,7 @@ class LLMConfig(BaseSettings):
|
|
|
171
171
|
"api_version": self.llm_api_version,
|
|
172
172
|
"temperature": self.llm_temperature,
|
|
173
173
|
"streaming": self.llm_streaming,
|
|
174
|
-
"
|
|
174
|
+
"max_completion_tokens": self.llm_max_completion_tokens,
|
|
175
175
|
"transcription_model": self.transcription_model,
|
|
176
176
|
"graph_prompt_path": self.graph_prompt_path,
|
|
177
177
|
"rate_limit_enabled": self.llm_rate_limit_enabled,
|