cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  166. cognee/tests/test_add_docling_document.py +2 -2
  167. cognee/tests/test_cognee_server_start.py +84 -3
  168. cognee/tests/test_conversation_history.py +68 -5
  169. cognee/tests/test_data/example_with_header.csv +3 -0
  170. cognee/tests/test_dataset_database_handler.py +137 -0
  171. cognee/tests/test_dataset_delete.py +76 -0
  172. cognee/tests/test_edge_centered_payload.py +170 -0
  173. cognee/tests/test_edge_ingestion.py +27 -0
  174. cognee/tests/test_feedback_enrichment.py +1 -1
  175. cognee/tests/test_library.py +6 -4
  176. cognee/tests/test_load.py +62 -0
  177. cognee/tests/test_multi_tenancy.py +165 -0
  178. cognee/tests/test_parallel_databases.py +2 -0
  179. cognee/tests/test_pipeline_cache.py +164 -0
  180. cognee/tests/test_relational_db_migration.py +54 -2
  181. cognee/tests/test_search_db.py +44 -2
  182. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  183. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  184. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  185. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  186. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  187. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  188. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  189. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  190. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  191. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  192. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  193. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  194. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  195. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  196. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  197. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  198. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  199. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  200. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  201. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  202. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  203. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  204. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  205. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  206. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  207. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
  208. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
  209. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  210. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  211. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  212. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  213. cognee/modules/retrieval/code_retriever.py +0 -232
  214. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  215. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  216. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  217. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  218. cognee/tasks/repo_processor/__init__.py +0 -2
  219. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  220. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  221. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  222. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
  223. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  224. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,158 +0,0 @@
1
- import os
2
-
3
-
4
- async def get_non_py_files(repo_path):
5
- """
6
- Get files that are not .py files and their contents.
7
-
8
- Check if the specified repository path exists and if so, traverse the directory,
9
- collecting the paths of files that do not have a .py extension and meet the
10
- criteria set in the allowed and ignored patterns. Return a list of paths to
11
- those files.
12
-
13
- Parameters:
14
- -----------
15
-
16
- - repo_path: The file system path to the repository to scan for non-Python files.
17
-
18
- Returns:
19
- --------
20
-
21
- A list of file paths that are not Python files and meet the specified criteria.
22
- """
23
- if not os.path.exists(repo_path):
24
- return {}
25
-
26
- IGNORED_PATTERNS = {
27
- ".git",
28
- "__pycache__",
29
- "*.pyc",
30
- "*.pyo",
31
- "*.pyd",
32
- "node_modules",
33
- "*.egg-info",
34
- }
35
-
36
- ALLOWED_EXTENSIONS = {
37
- ".txt",
38
- ".md",
39
- ".csv",
40
- ".json",
41
- ".xml",
42
- ".yaml",
43
- ".yml",
44
- ".html",
45
- ".css",
46
- ".js",
47
- ".ts",
48
- ".jsx",
49
- ".tsx",
50
- ".sql",
51
- ".log",
52
- ".ini",
53
- ".toml",
54
- ".properties",
55
- ".sh",
56
- ".bash",
57
- ".dockerfile",
58
- ".gitignore",
59
- ".gitattributes",
60
- ".makefile",
61
- ".pyproject",
62
- ".requirements",
63
- ".env",
64
- ".pdf",
65
- ".doc",
66
- ".docx",
67
- ".dot",
68
- ".dotx",
69
- ".rtf",
70
- ".wps",
71
- ".wpd",
72
- ".odt",
73
- ".ott",
74
- ".ottx",
75
- ".txt",
76
- ".wp",
77
- ".sdw",
78
- ".sdx",
79
- ".docm",
80
- ".dotm",
81
- # Additional extensions for other programming languages
82
- ".java",
83
- ".c",
84
- ".cpp",
85
- ".h",
86
- ".cs",
87
- ".go",
88
- ".php",
89
- ".rb",
90
- ".swift",
91
- ".pl",
92
- ".lua",
93
- ".rs",
94
- ".scala",
95
- ".kt",
96
- ".sh",
97
- ".sql",
98
- ".v",
99
- ".asm",
100
- ".pas",
101
- ".d",
102
- ".ml",
103
- ".clj",
104
- ".cljs",
105
- ".erl",
106
- ".ex",
107
- ".exs",
108
- ".f",
109
- ".fs",
110
- ".r",
111
- ".pyi",
112
- ".pdb",
113
- ".ipynb",
114
- ".rmd",
115
- ".cabal",
116
- ".hs",
117
- ".nim",
118
- ".vhdl",
119
- ".verilog",
120
- ".svelte",
121
- ".html",
122
- ".css",
123
- ".scss",
124
- ".less",
125
- ".json5",
126
- ".yaml",
127
- ".yml",
128
- }
129
-
130
- def should_process(path):
131
- """
132
- Determine if a file should be processed based on its extension and path patterns.
133
-
134
- This function checks if the file extension is in the allowed list and ensures that none
135
- of the ignored patterns are present in the provided file path.
136
-
137
- Parameters:
138
- -----------
139
-
140
- - path: The file path to check for processing eligibility.
141
-
142
- Returns:
143
- --------
144
-
145
- Returns True if the file should be processed; otherwise, False.
146
- """
147
- _, ext = os.path.splitext(path)
148
- return ext in ALLOWED_EXTENSIONS and not any(
149
- pattern in path for pattern in IGNORED_PATTERNS
150
- )
151
-
152
- non_py_files_paths = [
153
- os.path.join(root, file)
154
- for root, _, files in os.walk(repo_path)
155
- for file in files
156
- if not file.endswith(".py") and should_process(os.path.join(root, file))
157
- ]
158
- return non_py_files_paths
@@ -1,243 +0,0 @@
1
- import asyncio
2
- import math
3
- import os
4
- from pathlib import Path
5
- from typing import Set
6
- from typing import AsyncGenerator, Optional, List
7
- from uuid import NAMESPACE_OID, uuid5
8
-
9
- from cognee.infrastructure.engine import DataPoint
10
- from cognee.shared.CodeGraphEntities import CodeFile, Repository
11
-
12
- # constant, declared only once
13
- EXCLUDED_DIRS: Set[str] = {
14
- ".venv",
15
- "venv",
16
- "env",
17
- ".env",
18
- "site-packages",
19
- "node_modules",
20
- "dist",
21
- "build",
22
- ".git",
23
- "tests",
24
- "test",
25
- }
26
-
27
-
28
- async def get_source_code_files(
29
- repo_path,
30
- language_config: dict[str, list[str]] | None = None,
31
- excluded_paths: Optional[List[str]] = None,
32
- ):
33
- """
34
- Retrieve Python source code files from the specified repository path.
35
-
36
- This function scans the given repository path for files that have the .py extension
37
- while excluding test files and files within a virtual environment. It returns a list of
38
- absolute paths to the source code files that are not empty.
39
-
40
- Parameters:
41
- -----------
42
- - repo_path: Root path of the repository to search
43
- - language_config: dict mapping language names to file extensions, e.g.,
44
- {'python': ['.py'], 'javascript': ['.js', '.jsx'], ...}
45
- - excluded_paths: Optional list of path fragments or glob patterns to exclude
46
-
47
- Returns:
48
- --------
49
- A list of (absolute_path, language) tuples for source code files.
50
- """
51
-
52
- def _get_language_from_extension(file, language_config):
53
- for lang, exts in language_config.items():
54
- for ext in exts:
55
- if file.endswith(ext):
56
- return lang
57
- return None
58
-
59
- # Default config if not provided
60
- if language_config is None:
61
- language_config = {
62
- "python": [".py"],
63
- "javascript": [".js", ".jsx"],
64
- "typescript": [".ts", ".tsx"],
65
- "java": [".java"],
66
- "csharp": [".cs"],
67
- "go": [".go"],
68
- "rust": [".rs"],
69
- "cpp": [".cpp", ".c", ".h", ".hpp"],
70
- }
71
-
72
- if not os.path.exists(repo_path):
73
- return []
74
-
75
- source_code_files = set()
76
- for root, _, files in os.walk(repo_path):
77
- for file in files:
78
- lang = _get_language_from_extension(file, language_config)
79
- if lang is None:
80
- continue
81
- # Exclude tests, common build/venv directories and files provided in exclude_paths
82
- excluded_dirs = EXCLUDED_DIRS
83
- excluded_paths = {Path(p).resolve() for p in (excluded_paths or [])} # full paths
84
-
85
- root_path = Path(root).resolve()
86
- root_parts = set(root_path.parts) # same as before
87
- base_name, _ext = os.path.splitext(file)
88
- if (
89
- base_name.startswith("test_")
90
- or base_name.endswith("_test")
91
- or ".test." in file
92
- or ".spec." in file
93
- or (excluded_dirs & root_parts) # name match
94
- or any(
95
- root_path.is_relative_to(p) # full-path match
96
- for p in excluded_paths
97
- )
98
- ):
99
- continue
100
- file_path = os.path.abspath(os.path.join(root, file))
101
- if os.path.getsize(file_path) == 0:
102
- continue
103
- source_code_files.add((file_path, lang))
104
-
105
- return sorted(list(source_code_files))
106
-
107
-
108
- def run_coroutine(coroutine_func, *args, **kwargs):
109
- """
110
- Run a coroutine function until it completes.
111
-
112
- This function creates a new asyncio event loop, sets it as the current loop, and
113
- executes the given coroutine function with the provided arguments. Once the coroutine
114
- completes, the loop is closed. Intended for use in environments where an existing event
115
- loop is not available or desirable.
116
-
117
- Parameters:
118
- -----------
119
-
120
- - coroutine_func: The coroutine function to be run.
121
- - *args: Positional arguments to pass to the coroutine function.
122
- - **kwargs: Keyword arguments to pass to the coroutine function.
123
-
124
- Returns:
125
- --------
126
-
127
- The result returned by the coroutine after completion.
128
- """
129
- loop = asyncio.new_event_loop()
130
- asyncio.set_event_loop(loop)
131
- result = loop.run_until_complete(coroutine_func(*args, **kwargs))
132
- loop.close()
133
- return result
134
-
135
-
136
- async def get_repo_file_dependencies(
137
- repo_path: str,
138
- detailed_extraction: bool = False,
139
- supported_languages: list = None,
140
- excluded_paths: Optional[List[str]] = None,
141
- ) -> AsyncGenerator[DataPoint, None]:
142
- """
143
- Generate a dependency graph for source files (multi-language) in the given repository path.
144
-
145
- Check the validity of the repository path and yield a repository object followed by the
146
- dependencies of source files within that repository. Raise a FileNotFoundError if the
147
- provided path does not exist. The extraction of detailed dependencies can be controlled
148
- via the `detailed_extraction` argument. Languages considered can be restricted via
149
- the `supported_languages` argument.
150
-
151
- Parameters:
152
- -----------
153
-
154
- - repo_path (str): The file path to the repository to process.
155
- - detailed_extraction (bool): Whether to perform a detailed extraction of code parts.
156
- - supported_languages (list | None): Subset of languages to include; if None, use defaults.
157
- """
158
-
159
- if isinstance(repo_path, list) and len(repo_path) == 1:
160
- repo_path = repo_path[0]
161
-
162
- if not os.path.exists(repo_path):
163
- raise FileNotFoundError(f"Repository path {repo_path} does not exist.")
164
-
165
- # Build language config from supported_languages
166
- default_language_config = {
167
- "python": [".py"],
168
- "javascript": [".js", ".jsx"],
169
- "typescript": [".ts", ".tsx"],
170
- "java": [".java"],
171
- "csharp": [".cs"],
172
- "go": [".go"],
173
- "rust": [".rs"],
174
- "cpp": [".cpp", ".c", ".h", ".hpp"],
175
- "c": [".c", ".h"],
176
- }
177
- if supported_languages is not None:
178
- language_config = {
179
- k: v for k, v in default_language_config.items() if k in supported_languages
180
- }
181
- else:
182
- language_config = default_language_config
183
-
184
- source_code_files = await get_source_code_files(
185
- repo_path, language_config=language_config, excluded_paths=excluded_paths
186
- )
187
-
188
- repo = Repository(
189
- id=uuid5(NAMESPACE_OID, repo_path),
190
- path=repo_path,
191
- )
192
-
193
- yield repo
194
-
195
- chunk_size = 100
196
- number_of_chunks = math.ceil(len(source_code_files) / chunk_size)
197
- chunk_ranges = [
198
- (
199
- chunk_number * chunk_size,
200
- min((chunk_number + 1) * chunk_size, len(source_code_files)) - 1,
201
- )
202
- for chunk_number in range(number_of_chunks)
203
- ]
204
-
205
- # Import dependency extractors for each language (Python for now, extend later)
206
- from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
207
- import aiofiles
208
- # TODO: Add other language extractors here
209
-
210
- for start_range, end_range in chunk_ranges:
211
- tasks = []
212
- for file_path, lang in source_code_files[start_range : end_range + 1]:
213
- # For now, only Python is supported; extend with other languages
214
- if lang == "python":
215
- tasks.append(
216
- get_local_script_dependencies(repo_path, file_path, detailed_extraction)
217
- )
218
- else:
219
- # Placeholder: create a minimal CodeFile for other languages
220
- async def make_codefile_stub(file_path=file_path, lang=lang):
221
- async with aiofiles.open(
222
- file_path, "r", encoding="utf-8", errors="replace"
223
- ) as f:
224
- source = await f.read()
225
- return CodeFile(
226
- id=uuid5(NAMESPACE_OID, file_path),
227
- name=os.path.relpath(file_path, repo_path),
228
- file_path=file_path,
229
- language=lang,
230
- source_code=source,
231
- )
232
-
233
- tasks.append(make_codefile_stub())
234
-
235
- results: list[CodeFile] = await asyncio.gather(*tasks)
236
-
237
- for source_code_file in results:
238
- source_code_file.part_of = repo
239
- if getattr(
240
- source_code_file, "language", None
241
- ) is None and source_code_file.file_path.endswith(".py"):
242
- source_code_file.language = "python"
243
- yield source_code_file
File without changes