cognee 0.5.0.dev0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +1 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +12 -37
- cognee/api/v1/ontologies/routers/get_ontology_router.py +27 -25
- cognee/api/v1/search/search.py +8 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/context_global_variables.py +61 -16
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/graph/config.py +3 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +1 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +62 -48
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +2 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +1 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +35 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -16
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +40 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +39 -36
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +19 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +11 -9
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +23 -21
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +42 -34
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +85 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_cot_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_retriever.py +10 -0
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +4 -0
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +42 -10
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +8 -1
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +46 -18
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +15 -3
- cognee/shared/logging_utils.py +4 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/test_cognee_server_start.py +2 -4
- cognee/tests/test_conversation_history.py +23 -1
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_search_db.py +37 -1
- cognee/tests/unit/api/test_ontology_endpoint.py +77 -89
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/modules/search/test_search.py +100 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/METADATA +76 -89
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/RECORD +119 -97
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/WHEEL +1 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- cognee/tests/test_delete_bmw_example.py +0 -60
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,158 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
async def get_non_py_files(repo_path):
|
|
5
|
-
"""
|
|
6
|
-
Get files that are not .py files and their contents.
|
|
7
|
-
|
|
8
|
-
Check if the specified repository path exists and if so, traverse the directory,
|
|
9
|
-
collecting the paths of files that do not have a .py extension and meet the
|
|
10
|
-
criteria set in the allowed and ignored patterns. Return a list of paths to
|
|
11
|
-
those files.
|
|
12
|
-
|
|
13
|
-
Parameters:
|
|
14
|
-
-----------
|
|
15
|
-
|
|
16
|
-
- repo_path: The file system path to the repository to scan for non-Python files.
|
|
17
|
-
|
|
18
|
-
Returns:
|
|
19
|
-
--------
|
|
20
|
-
|
|
21
|
-
A list of file paths that are not Python files and meet the specified criteria.
|
|
22
|
-
"""
|
|
23
|
-
if not os.path.exists(repo_path):
|
|
24
|
-
return {}
|
|
25
|
-
|
|
26
|
-
IGNORED_PATTERNS = {
|
|
27
|
-
".git",
|
|
28
|
-
"__pycache__",
|
|
29
|
-
"*.pyc",
|
|
30
|
-
"*.pyo",
|
|
31
|
-
"*.pyd",
|
|
32
|
-
"node_modules",
|
|
33
|
-
"*.egg-info",
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
ALLOWED_EXTENSIONS = {
|
|
37
|
-
".txt",
|
|
38
|
-
".md",
|
|
39
|
-
".csv",
|
|
40
|
-
".json",
|
|
41
|
-
".xml",
|
|
42
|
-
".yaml",
|
|
43
|
-
".yml",
|
|
44
|
-
".html",
|
|
45
|
-
".css",
|
|
46
|
-
".js",
|
|
47
|
-
".ts",
|
|
48
|
-
".jsx",
|
|
49
|
-
".tsx",
|
|
50
|
-
".sql",
|
|
51
|
-
".log",
|
|
52
|
-
".ini",
|
|
53
|
-
".toml",
|
|
54
|
-
".properties",
|
|
55
|
-
".sh",
|
|
56
|
-
".bash",
|
|
57
|
-
".dockerfile",
|
|
58
|
-
".gitignore",
|
|
59
|
-
".gitattributes",
|
|
60
|
-
".makefile",
|
|
61
|
-
".pyproject",
|
|
62
|
-
".requirements",
|
|
63
|
-
".env",
|
|
64
|
-
".pdf",
|
|
65
|
-
".doc",
|
|
66
|
-
".docx",
|
|
67
|
-
".dot",
|
|
68
|
-
".dotx",
|
|
69
|
-
".rtf",
|
|
70
|
-
".wps",
|
|
71
|
-
".wpd",
|
|
72
|
-
".odt",
|
|
73
|
-
".ott",
|
|
74
|
-
".ottx",
|
|
75
|
-
".txt",
|
|
76
|
-
".wp",
|
|
77
|
-
".sdw",
|
|
78
|
-
".sdx",
|
|
79
|
-
".docm",
|
|
80
|
-
".dotm",
|
|
81
|
-
# Additional extensions for other programming languages
|
|
82
|
-
".java",
|
|
83
|
-
".c",
|
|
84
|
-
".cpp",
|
|
85
|
-
".h",
|
|
86
|
-
".cs",
|
|
87
|
-
".go",
|
|
88
|
-
".php",
|
|
89
|
-
".rb",
|
|
90
|
-
".swift",
|
|
91
|
-
".pl",
|
|
92
|
-
".lua",
|
|
93
|
-
".rs",
|
|
94
|
-
".scala",
|
|
95
|
-
".kt",
|
|
96
|
-
".sh",
|
|
97
|
-
".sql",
|
|
98
|
-
".v",
|
|
99
|
-
".asm",
|
|
100
|
-
".pas",
|
|
101
|
-
".d",
|
|
102
|
-
".ml",
|
|
103
|
-
".clj",
|
|
104
|
-
".cljs",
|
|
105
|
-
".erl",
|
|
106
|
-
".ex",
|
|
107
|
-
".exs",
|
|
108
|
-
".f",
|
|
109
|
-
".fs",
|
|
110
|
-
".r",
|
|
111
|
-
".pyi",
|
|
112
|
-
".pdb",
|
|
113
|
-
".ipynb",
|
|
114
|
-
".rmd",
|
|
115
|
-
".cabal",
|
|
116
|
-
".hs",
|
|
117
|
-
".nim",
|
|
118
|
-
".vhdl",
|
|
119
|
-
".verilog",
|
|
120
|
-
".svelte",
|
|
121
|
-
".html",
|
|
122
|
-
".css",
|
|
123
|
-
".scss",
|
|
124
|
-
".less",
|
|
125
|
-
".json5",
|
|
126
|
-
".yaml",
|
|
127
|
-
".yml",
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
def should_process(path):
|
|
131
|
-
"""
|
|
132
|
-
Determine if a file should be processed based on its extension and path patterns.
|
|
133
|
-
|
|
134
|
-
This function checks if the file extension is in the allowed list and ensures that none
|
|
135
|
-
of the ignored patterns are present in the provided file path.
|
|
136
|
-
|
|
137
|
-
Parameters:
|
|
138
|
-
-----------
|
|
139
|
-
|
|
140
|
-
- path: The file path to check for processing eligibility.
|
|
141
|
-
|
|
142
|
-
Returns:
|
|
143
|
-
--------
|
|
144
|
-
|
|
145
|
-
Returns True if the file should be processed; otherwise, False.
|
|
146
|
-
"""
|
|
147
|
-
_, ext = os.path.splitext(path)
|
|
148
|
-
return ext in ALLOWED_EXTENSIONS and not any(
|
|
149
|
-
pattern in path for pattern in IGNORED_PATTERNS
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
non_py_files_paths = [
|
|
153
|
-
os.path.join(root, file)
|
|
154
|
-
for root, _, files in os.walk(repo_path)
|
|
155
|
-
for file in files
|
|
156
|
-
if not file.endswith(".py") and should_process(os.path.join(root, file))
|
|
157
|
-
]
|
|
158
|
-
return non_py_files_paths
|
|
@@ -1,243 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import math
|
|
3
|
-
import os
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Set
|
|
6
|
-
from typing import AsyncGenerator, Optional, List
|
|
7
|
-
from uuid import NAMESPACE_OID, uuid5
|
|
8
|
-
|
|
9
|
-
from cognee.infrastructure.engine import DataPoint
|
|
10
|
-
from cognee.shared.CodeGraphEntities import CodeFile, Repository
|
|
11
|
-
|
|
12
|
-
# constant, declared only once
|
|
13
|
-
EXCLUDED_DIRS: Set[str] = {
|
|
14
|
-
".venv",
|
|
15
|
-
"venv",
|
|
16
|
-
"env",
|
|
17
|
-
".env",
|
|
18
|
-
"site-packages",
|
|
19
|
-
"node_modules",
|
|
20
|
-
"dist",
|
|
21
|
-
"build",
|
|
22
|
-
".git",
|
|
23
|
-
"tests",
|
|
24
|
-
"test",
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
async def get_source_code_files(
|
|
29
|
-
repo_path,
|
|
30
|
-
language_config: dict[str, list[str]] | None = None,
|
|
31
|
-
excluded_paths: Optional[List[str]] = None,
|
|
32
|
-
):
|
|
33
|
-
"""
|
|
34
|
-
Retrieve Python source code files from the specified repository path.
|
|
35
|
-
|
|
36
|
-
This function scans the given repository path for files that have the .py extension
|
|
37
|
-
while excluding test files and files within a virtual environment. It returns a list of
|
|
38
|
-
absolute paths to the source code files that are not empty.
|
|
39
|
-
|
|
40
|
-
Parameters:
|
|
41
|
-
-----------
|
|
42
|
-
- repo_path: Root path of the repository to search
|
|
43
|
-
- language_config: dict mapping language names to file extensions, e.g.,
|
|
44
|
-
{'python': ['.py'], 'javascript': ['.js', '.jsx'], ...}
|
|
45
|
-
- excluded_paths: Optional list of path fragments or glob patterns to exclude
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
--------
|
|
49
|
-
A list of (absolute_path, language) tuples for source code files.
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
def _get_language_from_extension(file, language_config):
|
|
53
|
-
for lang, exts in language_config.items():
|
|
54
|
-
for ext in exts:
|
|
55
|
-
if file.endswith(ext):
|
|
56
|
-
return lang
|
|
57
|
-
return None
|
|
58
|
-
|
|
59
|
-
# Default config if not provided
|
|
60
|
-
if language_config is None:
|
|
61
|
-
language_config = {
|
|
62
|
-
"python": [".py"],
|
|
63
|
-
"javascript": [".js", ".jsx"],
|
|
64
|
-
"typescript": [".ts", ".tsx"],
|
|
65
|
-
"java": [".java"],
|
|
66
|
-
"csharp": [".cs"],
|
|
67
|
-
"go": [".go"],
|
|
68
|
-
"rust": [".rs"],
|
|
69
|
-
"cpp": [".cpp", ".c", ".h", ".hpp"],
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
if not os.path.exists(repo_path):
|
|
73
|
-
return []
|
|
74
|
-
|
|
75
|
-
source_code_files = set()
|
|
76
|
-
for root, _, files in os.walk(repo_path):
|
|
77
|
-
for file in files:
|
|
78
|
-
lang = _get_language_from_extension(file, language_config)
|
|
79
|
-
if lang is None:
|
|
80
|
-
continue
|
|
81
|
-
# Exclude tests, common build/venv directories and files provided in exclude_paths
|
|
82
|
-
excluded_dirs = EXCLUDED_DIRS
|
|
83
|
-
excluded_paths = {Path(p).resolve() for p in (excluded_paths or [])} # full paths
|
|
84
|
-
|
|
85
|
-
root_path = Path(root).resolve()
|
|
86
|
-
root_parts = set(root_path.parts) # same as before
|
|
87
|
-
base_name, _ext = os.path.splitext(file)
|
|
88
|
-
if (
|
|
89
|
-
base_name.startswith("test_")
|
|
90
|
-
or base_name.endswith("_test")
|
|
91
|
-
or ".test." in file
|
|
92
|
-
or ".spec." in file
|
|
93
|
-
or (excluded_dirs & root_parts) # name match
|
|
94
|
-
or any(
|
|
95
|
-
root_path.is_relative_to(p) # full-path match
|
|
96
|
-
for p in excluded_paths
|
|
97
|
-
)
|
|
98
|
-
):
|
|
99
|
-
continue
|
|
100
|
-
file_path = os.path.abspath(os.path.join(root, file))
|
|
101
|
-
if os.path.getsize(file_path) == 0:
|
|
102
|
-
continue
|
|
103
|
-
source_code_files.add((file_path, lang))
|
|
104
|
-
|
|
105
|
-
return sorted(list(source_code_files))
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def run_coroutine(coroutine_func, *args, **kwargs):
|
|
109
|
-
"""
|
|
110
|
-
Run a coroutine function until it completes.
|
|
111
|
-
|
|
112
|
-
This function creates a new asyncio event loop, sets it as the current loop, and
|
|
113
|
-
executes the given coroutine function with the provided arguments. Once the coroutine
|
|
114
|
-
completes, the loop is closed. Intended for use in environments where an existing event
|
|
115
|
-
loop is not available or desirable.
|
|
116
|
-
|
|
117
|
-
Parameters:
|
|
118
|
-
-----------
|
|
119
|
-
|
|
120
|
-
- coroutine_func: The coroutine function to be run.
|
|
121
|
-
- *args: Positional arguments to pass to the coroutine function.
|
|
122
|
-
- **kwargs: Keyword arguments to pass to the coroutine function.
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
--------
|
|
126
|
-
|
|
127
|
-
The result returned by the coroutine after completion.
|
|
128
|
-
"""
|
|
129
|
-
loop = asyncio.new_event_loop()
|
|
130
|
-
asyncio.set_event_loop(loop)
|
|
131
|
-
result = loop.run_until_complete(coroutine_func(*args, **kwargs))
|
|
132
|
-
loop.close()
|
|
133
|
-
return result
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
async def get_repo_file_dependencies(
|
|
137
|
-
repo_path: str,
|
|
138
|
-
detailed_extraction: bool = False,
|
|
139
|
-
supported_languages: list = None,
|
|
140
|
-
excluded_paths: Optional[List[str]] = None,
|
|
141
|
-
) -> AsyncGenerator[DataPoint, None]:
|
|
142
|
-
"""
|
|
143
|
-
Generate a dependency graph for source files (multi-language) in the given repository path.
|
|
144
|
-
|
|
145
|
-
Check the validity of the repository path and yield a repository object followed by the
|
|
146
|
-
dependencies of source files within that repository. Raise a FileNotFoundError if the
|
|
147
|
-
provided path does not exist. The extraction of detailed dependencies can be controlled
|
|
148
|
-
via the `detailed_extraction` argument. Languages considered can be restricted via
|
|
149
|
-
the `supported_languages` argument.
|
|
150
|
-
|
|
151
|
-
Parameters:
|
|
152
|
-
-----------
|
|
153
|
-
|
|
154
|
-
- repo_path (str): The file path to the repository to process.
|
|
155
|
-
- detailed_extraction (bool): Whether to perform a detailed extraction of code parts.
|
|
156
|
-
- supported_languages (list | None): Subset of languages to include; if None, use defaults.
|
|
157
|
-
"""
|
|
158
|
-
|
|
159
|
-
if isinstance(repo_path, list) and len(repo_path) == 1:
|
|
160
|
-
repo_path = repo_path[0]
|
|
161
|
-
|
|
162
|
-
if not os.path.exists(repo_path):
|
|
163
|
-
raise FileNotFoundError(f"Repository path {repo_path} does not exist.")
|
|
164
|
-
|
|
165
|
-
# Build language config from supported_languages
|
|
166
|
-
default_language_config = {
|
|
167
|
-
"python": [".py"],
|
|
168
|
-
"javascript": [".js", ".jsx"],
|
|
169
|
-
"typescript": [".ts", ".tsx"],
|
|
170
|
-
"java": [".java"],
|
|
171
|
-
"csharp": [".cs"],
|
|
172
|
-
"go": [".go"],
|
|
173
|
-
"rust": [".rs"],
|
|
174
|
-
"cpp": [".cpp", ".c", ".h", ".hpp"],
|
|
175
|
-
"c": [".c", ".h"],
|
|
176
|
-
}
|
|
177
|
-
if supported_languages is not None:
|
|
178
|
-
language_config = {
|
|
179
|
-
k: v for k, v in default_language_config.items() if k in supported_languages
|
|
180
|
-
}
|
|
181
|
-
else:
|
|
182
|
-
language_config = default_language_config
|
|
183
|
-
|
|
184
|
-
source_code_files = await get_source_code_files(
|
|
185
|
-
repo_path, language_config=language_config, excluded_paths=excluded_paths
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
repo = Repository(
|
|
189
|
-
id=uuid5(NAMESPACE_OID, repo_path),
|
|
190
|
-
path=repo_path,
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
yield repo
|
|
194
|
-
|
|
195
|
-
chunk_size = 100
|
|
196
|
-
number_of_chunks = math.ceil(len(source_code_files) / chunk_size)
|
|
197
|
-
chunk_ranges = [
|
|
198
|
-
(
|
|
199
|
-
chunk_number * chunk_size,
|
|
200
|
-
min((chunk_number + 1) * chunk_size, len(source_code_files)) - 1,
|
|
201
|
-
)
|
|
202
|
-
for chunk_number in range(number_of_chunks)
|
|
203
|
-
]
|
|
204
|
-
|
|
205
|
-
# Import dependency extractors for each language (Python for now, extend later)
|
|
206
|
-
from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
|
|
207
|
-
import aiofiles
|
|
208
|
-
# TODO: Add other language extractors here
|
|
209
|
-
|
|
210
|
-
for start_range, end_range in chunk_ranges:
|
|
211
|
-
tasks = []
|
|
212
|
-
for file_path, lang in source_code_files[start_range : end_range + 1]:
|
|
213
|
-
# For now, only Python is supported; extend with other languages
|
|
214
|
-
if lang == "python":
|
|
215
|
-
tasks.append(
|
|
216
|
-
get_local_script_dependencies(repo_path, file_path, detailed_extraction)
|
|
217
|
-
)
|
|
218
|
-
else:
|
|
219
|
-
# Placeholder: create a minimal CodeFile for other languages
|
|
220
|
-
async def make_codefile_stub(file_path=file_path, lang=lang):
|
|
221
|
-
async with aiofiles.open(
|
|
222
|
-
file_path, "r", encoding="utf-8", errors="replace"
|
|
223
|
-
) as f:
|
|
224
|
-
source = await f.read()
|
|
225
|
-
return CodeFile(
|
|
226
|
-
id=uuid5(NAMESPACE_OID, file_path),
|
|
227
|
-
name=os.path.relpath(file_path, repo_path),
|
|
228
|
-
file_path=file_path,
|
|
229
|
-
language=lang,
|
|
230
|
-
source_code=source,
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
tasks.append(make_codefile_stub())
|
|
234
|
-
|
|
235
|
-
results: list[CodeFile] = await asyncio.gather(*tasks)
|
|
236
|
-
|
|
237
|
-
for source_code_file in results:
|
|
238
|
-
source_code_file.part_of = repo
|
|
239
|
-
if getattr(
|
|
240
|
-
source_code_file, "language", None
|
|
241
|
-
) is None and source_code_file.file_path.endswith(".py"):
|
|
242
|
-
source_code_file.language = "python"
|
|
243
|
-
yield source_code_file
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import pathlib
|
|
3
|
-
from uuid import UUID
|
|
4
|
-
|
|
5
|
-
import cognee
|
|
6
|
-
|
|
7
|
-
from cognee.api.v1.datasets import datasets
|
|
8
|
-
from cognee.api.v1.visualize.visualize import visualize_graph
|
|
9
|
-
from cognee.context_global_variables import set_database_global_context_variables
|
|
10
|
-
from cognee.modules.engine.operations.setup import setup
|
|
11
|
-
from cognee.modules.users.methods import get_default_user
|
|
12
|
-
|
|
13
|
-
# from cognee.modules.engine.operations.setup import setup
|
|
14
|
-
from cognee.shared.logging_utils import get_logger
|
|
15
|
-
|
|
16
|
-
logger = get_logger()
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
async def main():
|
|
20
|
-
data_directory_path = os.path.join(
|
|
21
|
-
pathlib.Path(__file__).parent, ".data_storage/test_delete_bmw_example"
|
|
22
|
-
)
|
|
23
|
-
cognee.config.data_root_directory(data_directory_path)
|
|
24
|
-
|
|
25
|
-
cognee_directory_path = os.path.join(
|
|
26
|
-
pathlib.Path(__file__).parent, ".cognee_system/test_delete_bmw_example"
|
|
27
|
-
)
|
|
28
|
-
cognee.config.system_root_directory(cognee_directory_path)
|
|
29
|
-
|
|
30
|
-
# await cognee.prune.prune_data()
|
|
31
|
-
# await cognee.prune.prune_system(metadata=True)
|
|
32
|
-
# await setup()
|
|
33
|
-
|
|
34
|
-
# add_result = await cognee.add("Bmw is a german carmanufacturer")
|
|
35
|
-
# add_result = await cognee.add("Germany is located next to the netherlands")
|
|
36
|
-
# data_id = add_result.data_ingestion_info[0]["data_id"]
|
|
37
|
-
|
|
38
|
-
# cognify_result: dict = await cognee.cognify()
|
|
39
|
-
# dataset_id = list(cognify_result.keys())[0]
|
|
40
|
-
|
|
41
|
-
user = await get_default_user()
|
|
42
|
-
await set_database_global_context_variables("main_dataset", user.id)
|
|
43
|
-
|
|
44
|
-
graph_file_path = os.path.join(data_directory_path, "artifacts/graph-before.html")
|
|
45
|
-
await visualize_graph(graph_file_path)
|
|
46
|
-
|
|
47
|
-
await datasets.delete_data(
|
|
48
|
-
UUID("b52be2e1-9fdb-5be0-a317-d3a56e9a34c6"),
|
|
49
|
-
UUID("fdae2cbd-61e1-5e99-93ca-4f3e32ed0d02"),
|
|
50
|
-
user,
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
graph_file_path = os.path.join(data_directory_path, "artifacts/graph-after.html")
|
|
54
|
-
await visualize_graph(graph_file_path)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
if __name__ == "__main__":
|
|
58
|
-
import asyncio
|
|
59
|
-
|
|
60
|
-
asyncio.run(main())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|