cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +5 -1
- cognee/api/v1/add/add.py +1 -2
- cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
- cognee/api/v1/cognify/cognify.py +16 -24
- cognee/api/v1/cognify/routers/__init__.py +1 -0
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +37 -12
- cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
- cognee/api/v1/search/search.py +0 -4
- cognee/api/v1/ui/ui.py +68 -38
- cognee/context_global_variables.py +16 -61
- cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +28 -16
- cognee/infrastructure/databases/graph/config.py +0 -3
- cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
- cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
- cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
- cognee/infrastructure/databases/utils/__init__.py +0 -3
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
- cognee/infrastructure/databases/vector/config.py +0 -2
- cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
- cognee/infrastructure/files/storage/s3_config.py +0 -2
- cognee/infrastructure/llm/LLMGateway.py +2 -5
- cognee/infrastructure/llm/config.py +0 -35
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
- cognee/modules/cognify/config.py +0 -2
- cognee/modules/data/deletion/prune_system.py +2 -52
- cognee/modules/data/methods/delete_dataset.py +0 -26
- cognee/modules/engine/models/__init__.py +0 -1
- cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
- cognee/modules/memify/memify.py +7 -1
- cognee/modules/pipelines/operations/pipeline.py +2 -18
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/code_retriever.py +232 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_retriever.py +0 -10
- cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
- cognee/modules/retrieval/temporal_retriever.py +0 -4
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
- cognee/modules/search/methods/get_search_type_tools.py +8 -54
- cognee/modules/search/methods/no_access_control_search.py +0 -4
- cognee/modules/search/methods/search.py +0 -21
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +0 -19
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +3 -15
- cognee/shared/logging_utils.py +0 -4
- cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
- cognee/tasks/code/get_local_dependencies_checker.py +20 -0
- cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
- cognee/tasks/documents/__init__.py +1 -0
- cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
- cognee/tasks/graph/extract_graph_from_data.py +10 -9
- cognee/tasks/repo_processor/__init__.py +2 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
- cognee/tasks/repo_processor/get_non_code_files.py +158 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
- cognee/tasks/storage/add_data_points.py +2 -142
- cognee/tests/test_cognee_server_start.py +4 -2
- cognee/tests/test_conversation_history.py +1 -23
- cognee/tests/test_delete_bmw_example.py +60 -0
- cognee/tests/test_search_db.py +1 -37
- cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
- cognee/api/v1/ui/node_setup.py +0 -360
- cognee/api/v1/ui/npm_utils.py +0 -50
- cognee/eval_framework/Dockerfile +0 -29
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
- cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
- cognee/modules/engine/models/Triplet.py +0 -9
- cognee/modules/retrieval/register_retriever.py +0 -10
- cognee/modules/retrieval/registered_community_retrievers.py +0 -1
- cognee/modules/retrieval/triplet_retriever.py +0 -182
- cognee/shared/rate_limiting.py +0 -30
- cognee/tasks/memify/get_triplet_datapoints.py +0 -289
- cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
- cognee/tests/integration/tasks/test_add_data_points.py +0 -139
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
- cognee/tests/test_dataset_database_handler.py +0 -137
- cognee/tests/test_dataset_delete.py +0 -76
- cognee/tests/test_edge_centered_payload.py +0 -170
- cognee/tests/test_pipeline_cache.py +0 -164
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
- cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import asyncio
|
|
3
|
+
from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
|
|
4
|
+
|
|
5
|
+
if __name__ == "__main__":
|
|
6
|
+
parser = argparse.ArgumentParser(description="Get local script dependencies.")
|
|
7
|
+
|
|
8
|
+
# Suggested path: .../cognee/examples/python/simple_example.py
|
|
9
|
+
parser.add_argument("script_path", type=str, help="Absolute path to the Python script file")
|
|
10
|
+
|
|
11
|
+
# Suggested path: .../cognee
|
|
12
|
+
parser.add_argument("repo_path", type=str, help="Absolute path to the repository root")
|
|
13
|
+
|
|
14
|
+
args = parser.parse_args()
|
|
15
|
+
|
|
16
|
+
dependencies = asyncio.run(get_local_script_dependencies(args.script_path, args.repo_path))
|
|
17
|
+
|
|
18
|
+
print("Dependencies:")
|
|
19
|
+
for dependency in dependencies:
|
|
20
|
+
print(dependency)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import asyncio
|
|
3
|
+
import argparse
|
|
4
|
+
from cognee.tasks.repo_processor.get_repo_file_dependencies import get_repo_file_dependencies
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main():
|
|
8
|
+
"""
|
|
9
|
+
Parse the command line arguments and print the repository file dependencies.
|
|
10
|
+
|
|
11
|
+
This function sets up an argument parser to retrieve the path of a repository. It checks
|
|
12
|
+
if the provided path exists and if it doesn’t, it prints an error message and exits. If
|
|
13
|
+
the path is valid, it calls an asynchronous function to get the dependencies and prints
|
|
14
|
+
the nodes and their relations in the dependency graph.
|
|
15
|
+
"""
|
|
16
|
+
parser = argparse.ArgumentParser()
|
|
17
|
+
parser.add_argument("repo_path", help="Path to the repository")
|
|
18
|
+
args = parser.parse_args()
|
|
19
|
+
|
|
20
|
+
repo_path = args.repo_path
|
|
21
|
+
if not os.path.exists(repo_path):
|
|
22
|
+
print(f"Error: The provided repository path does not exist: {repo_path}")
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
graph = asyncio.run(get_repo_file_dependencies(repo_path))
|
|
26
|
+
|
|
27
|
+
for node in graph.nodes:
|
|
28
|
+
print(f"Node: {node}")
|
|
29
|
+
edges = graph.edges(node, data=True)
|
|
30
|
+
for _, target, data in edges:
|
|
31
|
+
print(f" Edge to {target}, Relation: {data.get('relation')}")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
if __name__ == "__main__":
|
|
35
|
+
main()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from cognee.modules.data.processing.document_types import Document
|
|
2
|
+
from cognee.modules.users.permissions.methods import check_permission_on_dataset
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
async def check_permissions_on_dataset(
|
|
7
|
+
documents: List[Document], context: dict, user, permissions
|
|
8
|
+
) -> List[Document]:
|
|
9
|
+
"""
|
|
10
|
+
Validates a user's permissions on a list of documents.
|
|
11
|
+
|
|
12
|
+
Notes:
|
|
13
|
+
- This function assumes that `check_permission_on_documents` raises an exception if the permission check fails.
|
|
14
|
+
- It is designed to validate multiple permissions in a sequential manner for the same set of documents.
|
|
15
|
+
- Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
for permission in permissions:
|
|
19
|
+
await check_permission_on_dataset(
|
|
20
|
+
user,
|
|
21
|
+
permission,
|
|
22
|
+
# TODO: pass dataset through argument instead of context
|
|
23
|
+
context["dataset"].id,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
return documents
|
|
@@ -2,7 +2,9 @@ import asyncio
|
|
|
2
2
|
from typing import Type, List, Optional
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
5
6
|
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
|
|
7
|
+
from cognee.tasks.storage import index_graph_edges
|
|
6
8
|
from cognee.tasks.storage.add_data_points import add_data_points
|
|
7
9
|
from cognee.modules.ontology.ontology_config import Config
|
|
8
10
|
from cognee.modules.ontology.get_default_ontology_resolver import (
|
|
@@ -23,7 +25,6 @@ from cognee.tasks.graph.exceptions import (
|
|
|
23
25
|
InvalidChunkGraphInputError,
|
|
24
26
|
InvalidOntologyAdapterError,
|
|
25
27
|
)
|
|
26
|
-
from cognee.modules.cognify.config import get_cognify_config
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
async def integrate_chunk_graphs(
|
|
@@ -66,6 +67,8 @@ async def integrate_chunk_graphs(
|
|
|
66
67
|
type(ontology_resolver).__name__ if ontology_resolver else "None"
|
|
67
68
|
)
|
|
68
69
|
|
|
70
|
+
graph_engine = await get_graph_engine()
|
|
71
|
+
|
|
69
72
|
if graph_model is not KnowledgeGraph:
|
|
70
73
|
for chunk_index, chunk_graph in enumerate(chunk_graphs):
|
|
71
74
|
data_chunks[chunk_index].contains = chunk_graph
|
|
@@ -81,13 +84,12 @@ async def integrate_chunk_graphs(
|
|
|
81
84
|
data_chunks, chunk_graphs, ontology_resolver, existing_edges_map
|
|
82
85
|
)
|
|
83
86
|
|
|
84
|
-
cognify_config = get_cognify_config()
|
|
85
|
-
embed_triplets = cognify_config.triplet_embedding
|
|
86
|
-
|
|
87
87
|
if len(graph_nodes) > 0:
|
|
88
|
-
await add_data_points(
|
|
89
|
-
|
|
90
|
-
|
|
88
|
+
await add_data_points(graph_nodes)
|
|
89
|
+
|
|
90
|
+
if len(graph_edges) > 0:
|
|
91
|
+
await graph_engine.add_edges(graph_edges)
|
|
92
|
+
await index_graph_edges(graph_edges)
|
|
91
93
|
|
|
92
94
|
return data_chunks
|
|
93
95
|
|
|
@@ -97,7 +99,6 @@ async def extract_graph_from_data(
|
|
|
97
99
|
graph_model: Type[BaseModel],
|
|
98
100
|
config: Config = None,
|
|
99
101
|
custom_prompt: Optional[str] = None,
|
|
100
|
-
**kwargs,
|
|
101
102
|
) -> List[DocumentChunk]:
|
|
102
103
|
"""
|
|
103
104
|
Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
|
|
@@ -112,7 +113,7 @@ async def extract_graph_from_data(
|
|
|
112
113
|
|
|
113
114
|
chunk_graphs = await asyncio.gather(
|
|
114
115
|
*[
|
|
115
|
-
extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt
|
|
116
|
+
extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
|
|
116
117
|
for chunk in data_chunks
|
|
117
118
|
]
|
|
118
119
|
)
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import aiofiles
|
|
3
|
+
import importlib
|
|
4
|
+
from typing import AsyncGenerator, Optional
|
|
5
|
+
from uuid import NAMESPACE_OID, uuid5
|
|
6
|
+
import tree_sitter_python as tspython
|
|
7
|
+
from tree_sitter import Language, Node, Parser, Tree
|
|
8
|
+
from cognee.shared.logging_utils import get_logger
|
|
9
|
+
|
|
10
|
+
from cognee.low_level import DataPoint
|
|
11
|
+
from cognee.shared.CodeGraphEntities import (
|
|
12
|
+
CodeFile,
|
|
13
|
+
ImportStatement,
|
|
14
|
+
FunctionDefinition,
|
|
15
|
+
ClassDefinition,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
logger = get_logger()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FileParser:
|
|
22
|
+
"""
|
|
23
|
+
Handles the parsing of files into source code and an abstract syntax tree
|
|
24
|
+
representation. Public methods include:
|
|
25
|
+
|
|
26
|
+
- parse_file: Parses a file and returns its source code and syntax tree representation.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
self.parsed_files = {}
|
|
31
|
+
|
|
32
|
+
async def parse_file(self, file_path: str) -> tuple[str, Tree]:
|
|
33
|
+
"""
|
|
34
|
+
Parse a file and return its source code along with its syntax tree representation.
|
|
35
|
+
|
|
36
|
+
If the file has already been parsed, retrieve the result from memory instead of reading
|
|
37
|
+
the file again.
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
-----------
|
|
41
|
+
|
|
42
|
+
- file_path (str): The path of the file to parse.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
--------
|
|
46
|
+
|
|
47
|
+
- tuple[str, Tree]: A tuple containing the source code of the file and its
|
|
48
|
+
corresponding syntax tree representation.
|
|
49
|
+
"""
|
|
50
|
+
PY_LANGUAGE = Language(tspython.language())
|
|
51
|
+
source_code_parser = Parser(PY_LANGUAGE)
|
|
52
|
+
|
|
53
|
+
if file_path not in self.parsed_files:
|
|
54
|
+
source_code = await get_source_code(file_path)
|
|
55
|
+
source_code_tree = source_code_parser.parse(bytes(source_code, "utf-8"))
|
|
56
|
+
self.parsed_files[file_path] = (source_code, source_code_tree)
|
|
57
|
+
|
|
58
|
+
return self.parsed_files[file_path]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
async def get_source_code(file_path: str):
|
|
62
|
+
"""
|
|
63
|
+
Read source code from a file asynchronously.
|
|
64
|
+
|
|
65
|
+
This function attempts to open a file specified by the given file path, read its
|
|
66
|
+
contents, and return the source code. In case of any errors during the file reading
|
|
67
|
+
process, it logs an error message and returns None.
|
|
68
|
+
|
|
69
|
+
Parameters:
|
|
70
|
+
-----------
|
|
71
|
+
|
|
72
|
+
- file_path (str): The path to the file from which to read the source code.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
--------
|
|
76
|
+
|
|
77
|
+
Returns the contents of the file as a string if successful, or None if an error
|
|
78
|
+
occurs.
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
|
|
82
|
+
source_code = await f.read()
|
|
83
|
+
return source_code
|
|
84
|
+
except Exception as error:
|
|
85
|
+
logger.error(f"Error reading file {file_path}: {str(error)}")
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def resolve_module_path(module_name):
|
|
90
|
+
"""
|
|
91
|
+
Find the file path of a module.
|
|
92
|
+
|
|
93
|
+
Return the file path of the specified module if found, or return None if the module does
|
|
94
|
+
not exist or cannot be located.
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
-----------
|
|
98
|
+
|
|
99
|
+
- module_name: The name of the module whose file path is to be resolved.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
--------
|
|
103
|
+
|
|
104
|
+
The file path of the module as a string or None if the module is not found.
|
|
105
|
+
"""
|
|
106
|
+
try:
|
|
107
|
+
spec = importlib.util.find_spec(module_name)
|
|
108
|
+
if spec and spec.origin:
|
|
109
|
+
return spec.origin
|
|
110
|
+
except ModuleNotFoundError:
|
|
111
|
+
return None
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def find_function_location(
|
|
116
|
+
module_path: str, function_name: str, parser: FileParser
|
|
117
|
+
) -> Optional[tuple[str, str]]:
|
|
118
|
+
"""
|
|
119
|
+
Find the location of a function definition in a specified module.
|
|
120
|
+
|
|
121
|
+
Parameters:
|
|
122
|
+
-----------
|
|
123
|
+
|
|
124
|
+
- module_path (str): The path to the module where the function is defined.
|
|
125
|
+
- function_name (str): The name of the function whose location is to be found.
|
|
126
|
+
- parser (FileParser): An instance of FileParser used to parse the module's source
|
|
127
|
+
code.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
--------
|
|
131
|
+
|
|
132
|
+
- Optional[tuple[str, str]]: Returns a tuple containing the module path and the
|
|
133
|
+
start point of the function if found; otherwise, returns None.
|
|
134
|
+
"""
|
|
135
|
+
if not module_path or not os.path.exists(module_path):
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
source_code, tree = parser.parse_file(module_path)
|
|
139
|
+
root_node: Node = tree.root_node
|
|
140
|
+
|
|
141
|
+
for node in root_node.children:
|
|
142
|
+
if node.type == "function_definition":
|
|
143
|
+
func_name_node = node.child_by_field_name("name")
|
|
144
|
+
|
|
145
|
+
if func_name_node and func_name_node.text.decode() == function_name:
|
|
146
|
+
return (module_path, node.start_point) # (line, column)
|
|
147
|
+
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
async def get_local_script_dependencies(
|
|
152
|
+
repo_path: str, script_path: str, detailed_extraction: bool = False
|
|
153
|
+
) -> CodeFile:
|
|
154
|
+
"""
|
|
155
|
+
Retrieve local script dependencies and create a CodeFile object.
|
|
156
|
+
|
|
157
|
+
Parameters:
|
|
158
|
+
-----------
|
|
159
|
+
|
|
160
|
+
- repo_path (str): The path to the repository that contains the script.
|
|
161
|
+
- script_path (str): The path of the script for which dependencies are being
|
|
162
|
+
extracted.
|
|
163
|
+
- detailed_extraction (bool): A flag indicating whether to perform a detailed
|
|
164
|
+
extraction of code components.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
--------
|
|
168
|
+
|
|
169
|
+
- CodeFile: Returns a CodeFile object containing information about the script,
|
|
170
|
+
including its dependencies and definitions.
|
|
171
|
+
"""
|
|
172
|
+
code_file_parser = FileParser()
|
|
173
|
+
source_code, source_code_tree = await code_file_parser.parse_file(script_path)
|
|
174
|
+
|
|
175
|
+
file_path_relative_to_repo = script_path[len(repo_path) + 1 :]
|
|
176
|
+
|
|
177
|
+
if not detailed_extraction:
|
|
178
|
+
code_file_node = CodeFile(
|
|
179
|
+
id=uuid5(NAMESPACE_OID, script_path),
|
|
180
|
+
name=file_path_relative_to_repo,
|
|
181
|
+
source_code=source_code,
|
|
182
|
+
file_path=script_path,
|
|
183
|
+
language="python",
|
|
184
|
+
)
|
|
185
|
+
return code_file_node
|
|
186
|
+
|
|
187
|
+
code_file_node = CodeFile(
|
|
188
|
+
id=uuid5(NAMESPACE_OID, script_path),
|
|
189
|
+
name=file_path_relative_to_repo,
|
|
190
|
+
source_code=None,
|
|
191
|
+
file_path=script_path,
|
|
192
|
+
language="python",
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
async for part in extract_code_parts(source_code_tree.root_node, script_path=script_path):
|
|
196
|
+
part.file_path = script_path
|
|
197
|
+
|
|
198
|
+
if isinstance(part, FunctionDefinition):
|
|
199
|
+
code_file_node.provides_function_definition.append(part)
|
|
200
|
+
if isinstance(part, ClassDefinition):
|
|
201
|
+
code_file_node.provides_class_definition.append(part)
|
|
202
|
+
if isinstance(part, ImportStatement):
|
|
203
|
+
code_file_node.depends_on.append(part)
|
|
204
|
+
|
|
205
|
+
return code_file_node
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def find_node(nodes: list[Node], condition: callable) -> Node:
|
|
209
|
+
"""
|
|
210
|
+
Find and return the first node that satisfies the given condition.
|
|
211
|
+
|
|
212
|
+
Iterate through the provided list of nodes and return the first node for which the
|
|
213
|
+
condition callable returns True. If no such node is found, return None.
|
|
214
|
+
|
|
215
|
+
Parameters:
|
|
216
|
+
-----------
|
|
217
|
+
|
|
218
|
+
- nodes (list[Node]): A list of Node objects to search through.
|
|
219
|
+
- condition (callable): A callable that takes a Node and returns a boolean
|
|
220
|
+
indicating if the node meets specified criteria.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
--------
|
|
224
|
+
|
|
225
|
+
- Node: The first Node that matches the condition, or None if no such node exists.
|
|
226
|
+
"""
|
|
227
|
+
for node in nodes:
|
|
228
|
+
if condition(node):
|
|
229
|
+
return node
|
|
230
|
+
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
async def extract_code_parts(
|
|
235
|
+
tree_root: Node, script_path: str, existing_nodes: list[DataPoint] = {}
|
|
236
|
+
) -> AsyncGenerator[DataPoint, None]:
|
|
237
|
+
"""
|
|
238
|
+
Extract code parts from a given AST node tree asynchronously.
|
|
239
|
+
|
|
240
|
+
Iteratively yields DataPoint nodes representing import statements, function definitions,
|
|
241
|
+
and class definitions found in the children of the specified tree root. The function
|
|
242
|
+
checks
|
|
243
|
+
if nodes are already present in the existing_nodes dictionary to prevent duplicates.
|
|
244
|
+
This function has to be used in an asynchronous context, and it requires a valid
|
|
245
|
+
tree_root
|
|
246
|
+
and proper initialization of existing_nodes.
|
|
247
|
+
|
|
248
|
+
Parameters:
|
|
249
|
+
-----------
|
|
250
|
+
|
|
251
|
+
- tree_root (Node): The root node of the AST tree containing code parts to extract.
|
|
252
|
+
- script_path (str): The file path of the script from which the AST was generated.
|
|
253
|
+
- existing_nodes (list[DataPoint]): A dictionary that holds already extracted
|
|
254
|
+
DataPoint nodes to avoid duplicates. (default {})
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
--------
|
|
258
|
+
|
|
259
|
+
Yields DataPoint nodes representing imported modules, functions, and classes.
|
|
260
|
+
"""
|
|
261
|
+
for child_node in tree_root.children:
|
|
262
|
+
if child_node.type == "import_statement" or child_node.type == "import_from_statement":
|
|
263
|
+
parts = child_node.text.decode("utf-8").split()
|
|
264
|
+
|
|
265
|
+
if parts[0] == "import":
|
|
266
|
+
module_name = parts[1]
|
|
267
|
+
function_name = None
|
|
268
|
+
elif parts[0] == "from":
|
|
269
|
+
module_name = parts[1]
|
|
270
|
+
function_name = parts[3]
|
|
271
|
+
|
|
272
|
+
if " as " in function_name:
|
|
273
|
+
function_name = function_name.split(" as ")[0]
|
|
274
|
+
|
|
275
|
+
if " as " in module_name:
|
|
276
|
+
module_name = module_name.split(" as ")[0]
|
|
277
|
+
|
|
278
|
+
if function_name and "import " + function_name not in existing_nodes:
|
|
279
|
+
import_statement_node = ImportStatement(
|
|
280
|
+
name=function_name,
|
|
281
|
+
module=module_name,
|
|
282
|
+
start_point=child_node.start_point,
|
|
283
|
+
end_point=child_node.end_point,
|
|
284
|
+
file_path=script_path,
|
|
285
|
+
source_code=child_node.text,
|
|
286
|
+
)
|
|
287
|
+
existing_nodes["import " + function_name] = import_statement_node
|
|
288
|
+
|
|
289
|
+
if function_name:
|
|
290
|
+
yield existing_nodes["import " + function_name]
|
|
291
|
+
|
|
292
|
+
if module_name not in existing_nodes:
|
|
293
|
+
import_statement_node = ImportStatement(
|
|
294
|
+
name=module_name,
|
|
295
|
+
module=module_name,
|
|
296
|
+
start_point=child_node.start_point,
|
|
297
|
+
end_point=child_node.end_point,
|
|
298
|
+
file_path=script_path,
|
|
299
|
+
source_code=child_node.text,
|
|
300
|
+
)
|
|
301
|
+
existing_nodes[module_name] = import_statement_node
|
|
302
|
+
|
|
303
|
+
yield existing_nodes[module_name]
|
|
304
|
+
|
|
305
|
+
if child_node.type == "function_definition":
|
|
306
|
+
function_node = find_node(child_node.children, lambda node: node.type == "identifier")
|
|
307
|
+
function_node_name = function_node.text
|
|
308
|
+
|
|
309
|
+
if function_node_name not in existing_nodes:
|
|
310
|
+
function_definition_node = FunctionDefinition(
|
|
311
|
+
name=function_node_name,
|
|
312
|
+
start_point=child_node.start_point,
|
|
313
|
+
end_point=child_node.end_point,
|
|
314
|
+
file_path=script_path,
|
|
315
|
+
source_code=child_node.text,
|
|
316
|
+
)
|
|
317
|
+
existing_nodes[function_node_name] = function_definition_node
|
|
318
|
+
|
|
319
|
+
yield existing_nodes[function_node_name]
|
|
320
|
+
|
|
321
|
+
if child_node.type == "class_definition":
|
|
322
|
+
class_name_node = find_node(child_node.children, lambda node: node.type == "identifier")
|
|
323
|
+
class_name_node_name = class_name_node.text
|
|
324
|
+
|
|
325
|
+
if class_name_node_name not in existing_nodes:
|
|
326
|
+
class_definition_node = ClassDefinition(
|
|
327
|
+
name=class_name_node_name,
|
|
328
|
+
start_point=child_node.start_point,
|
|
329
|
+
end_point=child_node.end_point,
|
|
330
|
+
file_path=script_path,
|
|
331
|
+
source_code=child_node.text,
|
|
332
|
+
)
|
|
333
|
+
existing_nodes[class_name_node_name] = class_definition_node
|
|
334
|
+
|
|
335
|
+
yield existing_nodes[class_name_node_name]
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
async def get_non_py_files(repo_path):
|
|
5
|
+
"""
|
|
6
|
+
Get files that are not .py files and their contents.
|
|
7
|
+
|
|
8
|
+
Check if the specified repository path exists and if so, traverse the directory,
|
|
9
|
+
collecting the paths of files that do not have a .py extension and meet the
|
|
10
|
+
criteria set in the allowed and ignored patterns. Return a list of paths to
|
|
11
|
+
those files.
|
|
12
|
+
|
|
13
|
+
Parameters:
|
|
14
|
+
-----------
|
|
15
|
+
|
|
16
|
+
- repo_path: The file system path to the repository to scan for non-Python files.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
--------
|
|
20
|
+
|
|
21
|
+
A list of file paths that are not Python files and meet the specified criteria.
|
|
22
|
+
"""
|
|
23
|
+
if not os.path.exists(repo_path):
|
|
24
|
+
return {}
|
|
25
|
+
|
|
26
|
+
IGNORED_PATTERNS = {
|
|
27
|
+
".git",
|
|
28
|
+
"__pycache__",
|
|
29
|
+
"*.pyc",
|
|
30
|
+
"*.pyo",
|
|
31
|
+
"*.pyd",
|
|
32
|
+
"node_modules",
|
|
33
|
+
"*.egg-info",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
ALLOWED_EXTENSIONS = {
|
|
37
|
+
".txt",
|
|
38
|
+
".md",
|
|
39
|
+
".csv",
|
|
40
|
+
".json",
|
|
41
|
+
".xml",
|
|
42
|
+
".yaml",
|
|
43
|
+
".yml",
|
|
44
|
+
".html",
|
|
45
|
+
".css",
|
|
46
|
+
".js",
|
|
47
|
+
".ts",
|
|
48
|
+
".jsx",
|
|
49
|
+
".tsx",
|
|
50
|
+
".sql",
|
|
51
|
+
".log",
|
|
52
|
+
".ini",
|
|
53
|
+
".toml",
|
|
54
|
+
".properties",
|
|
55
|
+
".sh",
|
|
56
|
+
".bash",
|
|
57
|
+
".dockerfile",
|
|
58
|
+
".gitignore",
|
|
59
|
+
".gitattributes",
|
|
60
|
+
".makefile",
|
|
61
|
+
".pyproject",
|
|
62
|
+
".requirements",
|
|
63
|
+
".env",
|
|
64
|
+
".pdf",
|
|
65
|
+
".doc",
|
|
66
|
+
".docx",
|
|
67
|
+
".dot",
|
|
68
|
+
".dotx",
|
|
69
|
+
".rtf",
|
|
70
|
+
".wps",
|
|
71
|
+
".wpd",
|
|
72
|
+
".odt",
|
|
73
|
+
".ott",
|
|
74
|
+
".ottx",
|
|
75
|
+
".txt",
|
|
76
|
+
".wp",
|
|
77
|
+
".sdw",
|
|
78
|
+
".sdx",
|
|
79
|
+
".docm",
|
|
80
|
+
".dotm",
|
|
81
|
+
# Additional extensions for other programming languages
|
|
82
|
+
".java",
|
|
83
|
+
".c",
|
|
84
|
+
".cpp",
|
|
85
|
+
".h",
|
|
86
|
+
".cs",
|
|
87
|
+
".go",
|
|
88
|
+
".php",
|
|
89
|
+
".rb",
|
|
90
|
+
".swift",
|
|
91
|
+
".pl",
|
|
92
|
+
".lua",
|
|
93
|
+
".rs",
|
|
94
|
+
".scala",
|
|
95
|
+
".kt",
|
|
96
|
+
".sh",
|
|
97
|
+
".sql",
|
|
98
|
+
".v",
|
|
99
|
+
".asm",
|
|
100
|
+
".pas",
|
|
101
|
+
".d",
|
|
102
|
+
".ml",
|
|
103
|
+
".clj",
|
|
104
|
+
".cljs",
|
|
105
|
+
".erl",
|
|
106
|
+
".ex",
|
|
107
|
+
".exs",
|
|
108
|
+
".f",
|
|
109
|
+
".fs",
|
|
110
|
+
".r",
|
|
111
|
+
".pyi",
|
|
112
|
+
".pdb",
|
|
113
|
+
".ipynb",
|
|
114
|
+
".rmd",
|
|
115
|
+
".cabal",
|
|
116
|
+
".hs",
|
|
117
|
+
".nim",
|
|
118
|
+
".vhdl",
|
|
119
|
+
".verilog",
|
|
120
|
+
".svelte",
|
|
121
|
+
".html",
|
|
122
|
+
".css",
|
|
123
|
+
".scss",
|
|
124
|
+
".less",
|
|
125
|
+
".json5",
|
|
126
|
+
".yaml",
|
|
127
|
+
".yml",
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
def should_process(path):
|
|
131
|
+
"""
|
|
132
|
+
Determine if a file should be processed based on its extension and path patterns.
|
|
133
|
+
|
|
134
|
+
This function checks if the file extension is in the allowed list and ensures that none
|
|
135
|
+
of the ignored patterns are present in the provided file path.
|
|
136
|
+
|
|
137
|
+
Parameters:
|
|
138
|
+
-----------
|
|
139
|
+
|
|
140
|
+
- path: The file path to check for processing eligibility.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
--------
|
|
144
|
+
|
|
145
|
+
Returns True if the file should be processed; otherwise, False.
|
|
146
|
+
"""
|
|
147
|
+
_, ext = os.path.splitext(path)
|
|
148
|
+
return ext in ALLOWED_EXTENSIONS and not any(
|
|
149
|
+
pattern in path for pattern in IGNORED_PATTERNS
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
non_py_files_paths = [
|
|
153
|
+
os.path.join(root, file)
|
|
154
|
+
for root, _, files in os.walk(repo_path)
|
|
155
|
+
for file in files
|
|
156
|
+
if not file.endswith(".py") and should_process(os.path.join(root, file))
|
|
157
|
+
]
|
|
158
|
+
return non_py_files_paths
|