cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  166. cognee/tests/test_add_docling_document.py +2 -2
  167. cognee/tests/test_cognee_server_start.py +84 -3
  168. cognee/tests/test_conversation_history.py +68 -5
  169. cognee/tests/test_data/example_with_header.csv +3 -0
  170. cognee/tests/test_dataset_database_handler.py +137 -0
  171. cognee/tests/test_dataset_delete.py +76 -0
  172. cognee/tests/test_edge_centered_payload.py +170 -0
  173. cognee/tests/test_edge_ingestion.py +27 -0
  174. cognee/tests/test_feedback_enrichment.py +1 -1
  175. cognee/tests/test_library.py +6 -4
  176. cognee/tests/test_load.py +62 -0
  177. cognee/tests/test_multi_tenancy.py +165 -0
  178. cognee/tests/test_parallel_databases.py +2 -0
  179. cognee/tests/test_pipeline_cache.py +164 -0
  180. cognee/tests/test_relational_db_migration.py +54 -2
  181. cognee/tests/test_search_db.py +44 -2
  182. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  183. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  184. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  185. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  186. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  187. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  188. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  189. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  190. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  191. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  192. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  193. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  194. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  195. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  196. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  197. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  198. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  199. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  200. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  201. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  202. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  203. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  204. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  205. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  206. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  207. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
  208. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
  209. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  210. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  211. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  212. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  213. cognee/modules/retrieval/code_retriever.py +0 -232
  214. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  215. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  216. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  217. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  218. cognee/tasks/repo_processor/__init__.py +0 -2
  219. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  220. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  221. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  222. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
  223. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  224. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,360 @@
1
+ import os
2
+ import platform
3
+ import subprocess
4
+ import tempfile
5
+ from pathlib import Path
6
+
7
+ import requests
8
+
9
+ from cognee.shared.logging_utils import get_logger
10
+
11
+ logger = get_logger()
12
+
13
+
14
+ def get_nvm_dir() -> Path:
15
+ """
16
+ Get the nvm directory path following standard nvm installation logic.
17
+ Uses XDG_CONFIG_HOME if set, otherwise falls back to ~/.nvm.
18
+ """
19
+ xdg_config_home = os.environ.get("XDG_CONFIG_HOME")
20
+ if xdg_config_home:
21
+ return Path(xdg_config_home) / "nvm"
22
+ return Path.home() / ".nvm"
23
+
24
+
25
+ def get_nvm_sh_path() -> Path:
26
+ """
27
+ Get the path to nvm.sh following standard nvm installation logic.
28
+ """
29
+ return get_nvm_dir() / "nvm.sh"
30
+
31
+
32
+ def check_nvm_installed() -> bool:
33
+ """
34
+ Check if nvm (Node Version Manager) is installed.
35
+ """
36
+ try:
37
+ # Check if nvm is available in the shell
38
+ # nvm is typically sourced in shell config files, so we need to check via shell
39
+ if platform.system() == "Windows":
40
+ # On Windows, nvm-windows uses a different approach
41
+ result = subprocess.run(
42
+ ["nvm", "version"],
43
+ capture_output=True,
44
+ text=True,
45
+ timeout=10,
46
+ shell=True,
47
+ )
48
+ else:
49
+ # On Unix-like systems, nvm is a shell function, so we need to source it
50
+ # First check if nvm.sh exists
51
+ nvm_path = get_nvm_sh_path()
52
+ if not nvm_path.exists():
53
+ logger.debug(f"nvm.sh not found at {nvm_path}")
54
+ return False
55
+
56
+ # Try to source nvm and check version, capturing errors
57
+ result = subprocess.run(
58
+ ["bash", "-c", f"source {nvm_path} && nvm --version"],
59
+ capture_output=True,
60
+ text=True,
61
+ timeout=10,
62
+ )
63
+
64
+ if result.returncode != 0:
65
+ # Log the error to help diagnose configuration issues
66
+ if result.stderr:
67
+ logger.debug(f"nvm check failed: {result.stderr.strip()}")
68
+ return False
69
+
70
+ return result.returncode == 0
71
+ except Exception as e:
72
+ logger.debug(f"Exception checking nvm: {str(e)}")
73
+ return False
74
+
75
+
76
+ def install_nvm() -> bool:
77
+ """
78
+ Install nvm (Node Version Manager) on Unix-like systems.
79
+ """
80
+ if platform.system() == "Windows":
81
+ logger.error("nvm installation on Windows requires nvm-windows.")
82
+ logger.error(
83
+ "Please install nvm-windows manually from: https://github.com/coreybutler/nvm-windows"
84
+ )
85
+ return False
86
+
87
+ logger.info("Installing nvm (Node Version Manager)...")
88
+
89
+ try:
90
+ # Download and install nvm
91
+ nvm_install_script = "https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh"
92
+ logger.info(f"Downloading nvm installer from {nvm_install_script}...")
93
+
94
+ response = requests.get(nvm_install_script, timeout=60)
95
+ response.raise_for_status()
96
+
97
+ # Create a temporary script file
98
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".sh", delete=False) as f:
99
+ f.write(response.text)
100
+ install_script_path = f.name
101
+
102
+ try:
103
+ # Make the script executable and run it
104
+ os.chmod(install_script_path, 0o755)
105
+ result = subprocess.run(
106
+ ["bash", install_script_path],
107
+ capture_output=True,
108
+ text=True,
109
+ timeout=120,
110
+ )
111
+
112
+ if result.returncode == 0:
113
+ logger.info("✓ nvm installed successfully")
114
+ # Source nvm in current shell session
115
+ nvm_dir = get_nvm_dir()
116
+ if nvm_dir.exists():
117
+ return True
118
+ else:
119
+ logger.warning(
120
+ f"nvm installation completed but nvm directory not found at {nvm_dir}"
121
+ )
122
+ return False
123
+ else:
124
+ logger.error(f"nvm installation failed: {result.stderr}")
125
+ return False
126
+ finally:
127
+ # Clean up temporary script
128
+ try:
129
+ os.unlink(install_script_path)
130
+ except Exception:
131
+ pass
132
+
133
+ except requests.exceptions.RequestException as e:
134
+ logger.error(f"Failed to download nvm installer: {str(e)}")
135
+ return False
136
+ except Exception as e:
137
+ logger.error(f"Failed to install nvm: {str(e)}")
138
+ return False
139
+
140
+
141
+ def install_node_with_nvm() -> bool:
142
+ """
143
+ Install the latest Node.js version using nvm.
144
+ Returns True if installation succeeds, False otherwise.
145
+ """
146
+ if platform.system() == "Windows":
147
+ logger.error("Node.js installation via nvm on Windows requires nvm-windows.")
148
+ logger.error("Please install Node.js manually from: https://nodejs.org/")
149
+ return False
150
+
151
+ logger.info("Installing latest Node.js version using nvm...")
152
+
153
+ try:
154
+ # Source nvm and install latest Node.js
155
+ nvm_path = get_nvm_sh_path()
156
+ if not nvm_path.exists():
157
+ logger.error(f"nvm.sh not found at {nvm_path}. nvm may not be properly installed.")
158
+ return False
159
+
160
+ nvm_source_cmd = f"source {nvm_path}"
161
+ install_cmd = f"{nvm_source_cmd} && nvm install node"
162
+
163
+ result = subprocess.run(
164
+ ["bash", "-c", install_cmd],
165
+ capture_output=True,
166
+ text=True,
167
+ timeout=300, # 5 minutes timeout for Node.js installation
168
+ )
169
+
170
+ if result.returncode == 0:
171
+ logger.info("✓ Node.js installed successfully via nvm")
172
+
173
+ # Set as default version
174
+ use_cmd = f"{nvm_source_cmd} && nvm alias default node"
175
+ subprocess.run(
176
+ ["bash", "-c", use_cmd],
177
+ capture_output=True,
178
+ text=True,
179
+ timeout=30,
180
+ )
181
+
182
+ # Add nvm to PATH for current session
183
+ # This ensures node/npm are available in subsequent commands
184
+ nvm_dir = get_nvm_dir()
185
+ if nvm_dir.exists():
186
+ # Update PATH for current process
187
+ nvm_bin = nvm_dir / "versions" / "node"
188
+ # Find the latest installed version
189
+ if nvm_bin.exists():
190
+ versions = sorted(nvm_bin.iterdir(), reverse=True)
191
+ if versions:
192
+ latest_node_bin = versions[0] / "bin"
193
+ if latest_node_bin.exists():
194
+ current_path = os.environ.get("PATH", "")
195
+ os.environ["PATH"] = f"{latest_node_bin}:{current_path}"
196
+
197
+ return True
198
+ else:
199
+ logger.error(f"Failed to install Node.js: {result.stderr}")
200
+ return False
201
+
202
+ except subprocess.TimeoutExpired:
203
+ logger.error("Timeout installing Node.js (this can take several minutes)")
204
+ return False
205
+ except Exception as e:
206
+ logger.error(f"Error installing Node.js: {str(e)}")
207
+ return False
208
+
209
+
210
+ def check_node_npm() -> tuple[bool, str]: # (is_available, error_message)
211
+ """
212
+ Check if Node.js and npm are available.
213
+ If not available, attempts to install nvm and Node.js automatically.
214
+ """
215
+
216
+ try:
217
+ # Check Node.js - try direct command first, then with nvm if needed
218
+ result = subprocess.run(["node", "--version"], capture_output=True, text=True, timeout=10)
219
+ if result.returncode != 0:
220
+ # If direct command fails, try with nvm sourced (in case nvm is installed but not in PATH)
221
+ nvm_path = get_nvm_sh_path()
222
+ if nvm_path.exists():
223
+ result = subprocess.run(
224
+ ["bash", "-c", f"source {nvm_path} && node --version"],
225
+ capture_output=True,
226
+ text=True,
227
+ timeout=10,
228
+ )
229
+ if result.returncode != 0 and result.stderr:
230
+ logger.debug(f"Failed to source nvm or run node: {result.stderr.strip()}")
231
+ if result.returncode != 0:
232
+ # Node.js is not installed, try to install it
233
+ logger.info("Node.js is not installed. Attempting to install automatically...")
234
+
235
+ # Check if nvm is installed
236
+ if not check_nvm_installed():
237
+ logger.info("nvm is not installed. Installing nvm first...")
238
+ if not install_nvm():
239
+ return (
240
+ False,
241
+ "Failed to install nvm. Please install Node.js manually from https://nodejs.org/",
242
+ )
243
+
244
+ # Install Node.js using nvm
245
+ if not install_node_with_nvm():
246
+ return (
247
+ False,
248
+ "Failed to install Node.js. Please install Node.js manually from https://nodejs.org/",
249
+ )
250
+
251
+ # Verify installation after automatic setup
252
+ # Try with nvm sourced first
253
+ nvm_path = get_nvm_sh_path()
254
+ if nvm_path.exists():
255
+ result = subprocess.run(
256
+ ["bash", "-c", f"source {nvm_path} && node --version"],
257
+ capture_output=True,
258
+ text=True,
259
+ timeout=10,
260
+ )
261
+ if result.returncode != 0 and result.stderr:
262
+ logger.debug(
263
+ f"Failed to verify node after installation: {result.stderr.strip()}"
264
+ )
265
+ else:
266
+ result = subprocess.run(
267
+ ["node", "--version"], capture_output=True, text=True, timeout=10
268
+ )
269
+ if result.returncode != 0:
270
+ nvm_path = get_nvm_sh_path()
271
+ return (
272
+ False,
273
+ f"Node.js installation completed but node command is not available. Please restart your terminal or source {nvm_path}",
274
+ )
275
+
276
+ node_version = result.stdout.strip()
277
+ logger.debug(f"Found Node.js version: {node_version}")
278
+
279
+ # Check npm - handle Windows PowerShell scripts
280
+ if platform.system() == "Windows":
281
+ # On Windows, npm might be a PowerShell script, so we need to use shell=True
282
+ result = subprocess.run(
283
+ ["npm", "--version"], capture_output=True, text=True, timeout=10, shell=True
284
+ )
285
+ else:
286
+ # On Unix-like systems, if we just installed via nvm, we may need to source nvm
287
+ # Try direct command first
288
+ result = subprocess.run(
289
+ ["npm", "--version"], capture_output=True, text=True, timeout=10
290
+ )
291
+ if result.returncode != 0:
292
+ # Try with nvm sourced
293
+ nvm_path = get_nvm_sh_path()
294
+ if nvm_path.exists():
295
+ result = subprocess.run(
296
+ ["bash", "-c", f"source {nvm_path} && npm --version"],
297
+ capture_output=True,
298
+ text=True,
299
+ timeout=10,
300
+ )
301
+ if result.returncode != 0 and result.stderr:
302
+ logger.debug(f"Failed to source nvm or run npm: {result.stderr.strip()}")
303
+
304
+ if result.returncode != 0:
305
+ return False, "npm is not installed or not in PATH"
306
+
307
+ npm_version = result.stdout.strip()
308
+ logger.debug(f"Found npm version: {npm_version}")
309
+
310
+ return True, f"Node.js {node_version}, npm {npm_version}"
311
+
312
+ except subprocess.TimeoutExpired:
313
+ return False, "Timeout checking Node.js/npm installation"
314
+ except FileNotFoundError:
315
+ # Node.js is not installed, try to install it
316
+ logger.info("Node.js is not found. Attempting to install automatically...")
317
+
318
+ # Check if nvm is installed
319
+ if not check_nvm_installed():
320
+ logger.info("nvm is not installed. Installing nvm first...")
321
+ if not install_nvm():
322
+ return (
323
+ False,
324
+ "Failed to install nvm. Please install Node.js manually from https://nodejs.org/",
325
+ )
326
+
327
+ # Install Node.js using nvm
328
+ if not install_node_with_nvm():
329
+ return (
330
+ False,
331
+ "Failed to install Node.js. Please install Node.js manually from https://nodejs.org/",
332
+ )
333
+
334
+ # Retry checking Node.js after installation
335
+ try:
336
+ result = subprocess.run(
337
+ ["node", "--version"], capture_output=True, text=True, timeout=10
338
+ )
339
+ if result.returncode == 0:
340
+ node_version = result.stdout.strip()
341
+ # Check npm
342
+ nvm_path = get_nvm_sh_path()
343
+ if nvm_path.exists():
344
+ result = subprocess.run(
345
+ ["bash", "-c", f"source {nvm_path} && npm --version"],
346
+ capture_output=True,
347
+ text=True,
348
+ timeout=10,
349
+ )
350
+ if result.returncode == 0:
351
+ npm_version = result.stdout.strip()
352
+ return True, f"Node.js {node_version}, npm {npm_version}"
353
+ elif result.stderr:
354
+ logger.debug(f"Failed to source nvm or run npm: {result.stderr.strip()}")
355
+ except Exception as e:
356
+ logger.debug(f"Exception retrying node/npm check: {str(e)}")
357
+
358
+ return False, "Node.js/npm not found. Please install Node.js from https://nodejs.org/"
359
+ except Exception as e:
360
+ return False, f"Error checking Node.js/npm: {str(e)}"
@@ -0,0 +1,50 @@
1
+ import platform
2
+ import subprocess
3
+ from pathlib import Path
4
+ from typing import List
5
+
6
+ from cognee.shared.logging_utils import get_logger
7
+ from .node_setup import get_nvm_sh_path
8
+
9
+ logger = get_logger()
10
+
11
+
12
+ def run_npm_command(cmd: List[str], cwd: Path, timeout: int = 300) -> subprocess.CompletedProcess:
13
+ """
14
+ Run an npm command, ensuring nvm is sourced if needed (Unix-like systems only).
15
+ Returns the CompletedProcess result.
16
+ """
17
+ if platform.system() == "Windows":
18
+ # On Windows, use shell=True for npm commands
19
+ return subprocess.run(
20
+ cmd,
21
+ cwd=cwd,
22
+ capture_output=True,
23
+ text=True,
24
+ timeout=timeout,
25
+ shell=True,
26
+ )
27
+ else:
28
+ # On Unix-like systems, try direct command first
29
+ result = subprocess.run(
30
+ cmd,
31
+ cwd=cwd,
32
+ capture_output=True,
33
+ text=True,
34
+ timeout=timeout,
35
+ )
36
+ # If it fails and nvm might be installed, try with nvm sourced
37
+ if result.returncode != 0:
38
+ nvm_path = get_nvm_sh_path()
39
+ if nvm_path.exists():
40
+ nvm_cmd = f"source {nvm_path} && {' '.join(cmd)}"
41
+ result = subprocess.run(
42
+ ["bash", "-c", nvm_cmd],
43
+ cwd=cwd,
44
+ capture_output=True,
45
+ text=True,
46
+ timeout=timeout,
47
+ )
48
+ if result.returncode != 0 and result.stderr:
49
+ logger.debug(f"npm command failed with nvm: {result.stderr.strip()}")
50
+ return result
cognee/api/v1/ui/ui.py CHANGED
@@ -15,6 +15,8 @@ import shutil
15
15
 
16
16
  from cognee.shared.logging_utils import get_logger
17
17
  from cognee.version import get_cognee_version
18
+ from .node_setup import check_node_npm, get_nvm_dir, get_nvm_sh_path
19
+ from .npm_utils import run_npm_command
18
20
 
19
21
  logger = get_logger()
20
22
 
@@ -285,48 +287,6 @@ def find_frontend_path() -> Optional[Path]:
285
287
  return None
286
288
 
287
289
 
288
- def check_node_npm() -> tuple[bool, str]:
289
- """
290
- Check if Node.js and npm are available.
291
- Returns (is_available, error_message)
292
- """
293
-
294
- try:
295
- # Check Node.js
296
- result = subprocess.run(["node", "--version"], capture_output=True, text=True, timeout=10)
297
- if result.returncode != 0:
298
- return False, "Node.js is not installed or not in PATH"
299
-
300
- node_version = result.stdout.strip()
301
- logger.debug(f"Found Node.js version: {node_version}")
302
-
303
- # Check npm - handle Windows PowerShell scripts
304
- if platform.system() == "Windows":
305
- # On Windows, npm might be a PowerShell script, so we need to use shell=True
306
- result = subprocess.run(
307
- ["npm", "--version"], capture_output=True, text=True, timeout=10, shell=True
308
- )
309
- else:
310
- result = subprocess.run(
311
- ["npm", "--version"], capture_output=True, text=True, timeout=10
312
- )
313
-
314
- if result.returncode != 0:
315
- return False, "npm is not installed or not in PATH"
316
-
317
- npm_version = result.stdout.strip()
318
- logger.debug(f"Found npm version: {npm_version}")
319
-
320
- return True, f"Node.js {node_version}, npm {npm_version}"
321
-
322
- except subprocess.TimeoutExpired:
323
- return False, "Timeout checking Node.js/npm installation"
324
- except FileNotFoundError:
325
- return False, "Node.js/npm not found. Please install Node.js from https://nodejs.org/"
326
- except Exception as e:
327
- return False, f"Error checking Node.js/npm: {str(e)}"
328
-
329
-
330
290
  def install_frontend_dependencies(frontend_path: Path) -> bool:
331
291
  """
332
292
  Install frontend dependencies if node_modules doesn't exist.
@@ -341,24 +301,7 @@ def install_frontend_dependencies(frontend_path: Path) -> bool:
341
301
  logger.info("Installing frontend dependencies (this may take a few minutes)...")
342
302
 
343
303
  try:
344
- # Use shell=True on Windows for npm commands
345
- if platform.system() == "Windows":
346
- result = subprocess.run(
347
- ["npm", "install"],
348
- cwd=frontend_path,
349
- capture_output=True,
350
- text=True,
351
- timeout=300, # 5 minutes timeout
352
- shell=True,
353
- )
354
- else:
355
- result = subprocess.run(
356
- ["npm", "install"],
357
- cwd=frontend_path,
358
- capture_output=True,
359
- text=True,
360
- timeout=300, # 5 minutes timeout
361
- )
304
+ result = run_npm_command(["npm", "install"], frontend_path, timeout=300)
362
305
 
363
306
  if result.returncode == 0:
364
307
  logger.info("Frontend dependencies installed successfully")
@@ -642,6 +585,21 @@ def start_ui(
642
585
  env["HOST"] = "localhost"
643
586
  env["PORT"] = str(port)
644
587
 
588
+ # If nvm is installed, ensure it's available in the environment
589
+ nvm_path = get_nvm_sh_path()
590
+ if platform.system() != "Windows" and nvm_path.exists():
591
+ # Add nvm to PATH for the subprocess
592
+ nvm_dir = get_nvm_dir()
593
+ # Find the latest Node.js version installed via nvm
594
+ nvm_versions = nvm_dir / "versions" / "node"
595
+ if nvm_versions.exists():
596
+ versions = sorted(nvm_versions.iterdir(), reverse=True)
597
+ if versions:
598
+ latest_node_bin = versions[0] / "bin"
599
+ if latest_node_bin.exists():
600
+ current_path = env.get("PATH", "")
601
+ env["PATH"] = f"{latest_node_bin}:{current_path}"
602
+
645
603
  # Start the development server
646
604
  logger.info(f"Starting frontend server at http://localhost:{port}")
647
605
  logger.info("This may take a moment to compile and start...")
@@ -659,14 +617,26 @@ def start_ui(
659
617
  shell=True,
660
618
  )
661
619
  else:
662
- process = subprocess.Popen(
663
- ["npm", "run", "dev"],
664
- cwd=frontend_path,
665
- env=env,
666
- stdout=subprocess.PIPE,
667
- stderr=subprocess.PIPE,
668
- preexec_fn=os.setsid if hasattr(os, "setsid") else None,
669
- )
620
+ # On Unix-like systems, use bash with nvm sourced if available
621
+ if nvm_path.exists():
622
+ # Use bash to source nvm and run npm
623
+ process = subprocess.Popen(
624
+ ["bash", "-c", f"source {nvm_path} && npm run dev"],
625
+ cwd=frontend_path,
626
+ env=env,
627
+ stdout=subprocess.PIPE,
628
+ stderr=subprocess.PIPE,
629
+ preexec_fn=os.setsid if hasattr(os, "setsid") else None,
630
+ )
631
+ else:
632
+ process = subprocess.Popen(
633
+ ["npm", "run", "dev"],
634
+ cwd=frontend_path,
635
+ env=env,
636
+ stdout=subprocess.PIPE,
637
+ stderr=subprocess.PIPE,
638
+ preexec_fn=os.setsid if hasattr(os, "setsid") else None,
639
+ )
670
640
 
671
641
  # Start threads to stream frontend output with prefix
672
642
  _stream_process_output(process, "stdout", "[FRONTEND]", "\033[33m") # Yellow
@@ -22,7 +22,7 @@ relationships, and creates semantic connections for enhanced search and reasonin
22
22
 
23
23
  Processing Pipeline:
24
24
  1. **Document Classification**: Identifies document types and structures
25
- 2. **Permission Validation**: Ensures user has processing rights
25
+ 2. **Permission Validation**: Ensures user has processing rights
26
26
  3. **Text Chunking**: Breaks content into semantically meaningful segments
27
27
  4. **Entity Extraction**: Identifies key concepts, people, places, organizations
28
28
  5. **Relationship Detection**: Discovers connections between entities
@@ -97,6 +97,13 @@ After successful cognify processing, use `cognee search` to query the knowledge
97
97
  chunker_class = LangchainChunker
98
98
  except ImportError:
99
99
  fmt.warning("LangchainChunker not available, using TextChunker")
100
+ elif args.chunker == "CsvChunker":
101
+ try:
102
+ from cognee.modules.chunking.CsvChunker import CsvChunker
103
+
104
+ chunker_class = CsvChunker
105
+ except ImportError:
106
+ fmt.warning("CsvChunker not available, using TextChunker")
100
107
 
101
108
  result = await cognee.cognify(
102
109
  datasets=datasets,
cognee/cli/config.py CHANGED
@@ -26,7 +26,7 @@ SEARCH_TYPE_CHOICES = [
26
26
  ]
27
27
 
28
28
  # Chunker choices
29
- CHUNKER_CHOICES = ["TextChunker", "LangchainChunker"]
29
+ CHUNKER_CHOICES = ["TextChunker", "LangchainChunker", "CsvChunker"]
30
30
 
31
31
  # Output format choices
32
32
  OUTPUT_FORMAT_CHOICES = ["json", "pretty", "simple"]