cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/chunks/__init__.py +9 -0
  156. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  157. cognee/tasks/graph/__init__.py +7 -0
  158. cognee/tasks/memify/__init__.py +8 -0
  159. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  160. cognee/tasks/summarization/models.py +0 -2
  161. cognee/tasks/temporal_graph/__init__.py +0 -1
  162. cognee/tasks/translation/__init__.py +96 -0
  163. cognee/tasks/translation/config.py +110 -0
  164. cognee/tasks/translation/detect_language.py +190 -0
  165. cognee/tasks/translation/exceptions.py +62 -0
  166. cognee/tasks/translation/models.py +72 -0
  167. cognee/tasks/translation/providers/__init__.py +44 -0
  168. cognee/tasks/translation/providers/azure_provider.py +192 -0
  169. cognee/tasks/translation/providers/base.py +85 -0
  170. cognee/tasks/translation/providers/google_provider.py +158 -0
  171. cognee/tasks/translation/providers/llm_provider.py +143 -0
  172. cognee/tasks/translation/translate_content.py +282 -0
  173. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  174. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  175. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  176. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  177. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  178. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  179. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  180. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  181. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  182. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  183. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  184. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  185. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  186. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  187. cognee/tests/tasks/translation/README.md +147 -0
  188. cognee/tests/tasks/translation/__init__.py +1 -0
  189. cognee/tests/tasks/translation/config_test.py +93 -0
  190. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  191. cognee/tests/tasks/translation/providers_test.py +151 -0
  192. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  193. cognee/tests/test_chromadb.py +1 -1
  194. cognee/tests/test_cleanup_unused_data.py +165 -0
  195. cognee/tests/test_delete_by_id.py +6 -6
  196. cognee/tests/test_extract_usage_frequency.py +308 -0
  197. cognee/tests/test_kuzu.py +17 -7
  198. cognee/tests/test_lancedb.py +3 -1
  199. cognee/tests/test_library.py +1 -1
  200. cognee/tests/test_neo4j.py +17 -7
  201. cognee/tests/test_neptune_analytics_vector.py +3 -1
  202. cognee/tests/test_permissions.py +172 -187
  203. cognee/tests/test_pgvector.py +3 -1
  204. cognee/tests/test_relational_db_migration.py +15 -1
  205. cognee/tests/test_remote_kuzu.py +3 -1
  206. cognee/tests/test_s3_file_storage.py +1 -1
  207. cognee/tests/test_search_db.py +97 -110
  208. cognee/tests/test_usage_logger_e2e.py +268 -0
  209. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  210. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  211. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  212. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  213. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  214. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  215. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  216. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  217. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  218. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  219. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  220. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  221. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  222. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  223. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  224. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  225. cognee/tests/unit/modules/search/test_search.py +176 -0
  226. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  227. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  228. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  229. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  230. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  231. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
  232. cognee/api/.env.example +0 -5
  233. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  234. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  235. cognee/modules/search/methods/no_access_control_search.py +0 -62
  236. cognee/modules/search/utils/prepare_search_result.py +0 -63
  237. cognee/tests/test_feedback_enrichment.py +0 -174
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  239. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  240. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  241. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,3 @@
1
+ # Run cognify to process the data and create a knowledge graph
2
+
3
+ await cognee.cognify(datasets=["cognee-basics"])
@@ -0,0 +1,9 @@
1
+ # And after the cognification, search the knowledge graph
2
+
3
+ result = await cognee.search(
4
+ "Which characters belong to Gryffindor?",
5
+ datasets=["cognee-basics"],
6
+ )
7
+
8
+ # Print the result so you can see it in the notebook output
9
+ print(result)
@@ -0,0 +1,17 @@
1
+ # Run multiple searches and print the results
2
+
3
+ result_1 = await cognee.search(
4
+ "Who taught Potions at Hogwarts at time Albus Dumbledore was the headmaster?",
5
+ datasets=["cognee-basics"],
6
+ )
7
+
8
+ # Print the result so you can see it in the notebook output
9
+ print(result_1)
10
+
11
+
12
+ result_2 = await cognee.search(
13
+ "How to defeat Voldemort?",
14
+ datasets=["cognee-basics"],
15
+ )
16
+
17
+ print(result_2)
@@ -0,0 +1,4 @@
1
+ {
2
+ "name": "Cognee Basics - tutorial 🧠",
3
+ "deletable": false
4
+ }
@@ -0,0 +1,3 @@
1
+ # Using Cognee with Python Development Data
2
+
3
+ Unite authoritative Python practice (Guido van Rossum's own contributions!), normative guidance (Zen/PEP 8), and your lived context (rules + conversations) into one AI memory that produces answers that are relevant, explainable, and consistent.
@@ -0,0 +1,3 @@
1
+ You'll see that cognee has connected your Python development challenges with Guido's approaches, revealing patterns like:
2
+ - "Type hint implementation failed due to circular imports - similar to issue Guido solved in mypy PR #1234"
3
+ - "Performance bottleneck in list comprehension matches pattern Guido optimized in CPython commit abc123"
@@ -0,0 +1,3 @@
1
+ # Memify
2
+
3
+ Let's now introduce the memory functions. These algorithms run on top of your semantic layer, connecting the dots and improving the reasoning in search.
@@ -0,0 +1,3 @@
1
+ memify_result = await cognee.memify(dataset="python-development-with-cognee")
2
+
3
+ print(memify_result)
@@ -0,0 +1,7 @@
1
+ # What Memify Does in This Case
2
+
3
+ This advanced function uses AI to:
4
+ - Infer rule patterns from your code (e.g., "When implementing iterators, always follow the protocol Guido established")
5
+ - Connect design philosophy to practice (e.g., linking "explicit is better than implicit" to your type hinting decisions)
6
+
7
+ Now let's see how the system has connected your Python development patterns with established best practices:
@@ -0,0 +1,6 @@
1
+ # Search for connections between your async patterns and Python philosophy
2
+ results = await cognee.search(
3
+ query_text="How does my AsyncWebScraper implementation align with Python's design principles?",
4
+ datasets=["python-development-with-cognee"],
5
+ )
6
+ print(results)
@@ -0,0 +1,3 @@
1
+ # Temporal Graphs
2
+
3
+ As we used `temporal_cognify` option for each cognification, we can ask time related questions, for example:
@@ -0,0 +1,7 @@
1
+ result = await cognee.search(
2
+ query_text="What can we learn from Guido's contributions in 2025?",
3
+ query_type=cognee.SearchType.TEMPORAL,
4
+ datasets=["python-development-with-cognee"],
5
+ )
6
+
7
+ print(result)
@@ -0,0 +1,9 @@
1
+ # What You'll Learn in This Tutorial
2
+
3
+ In this comprehensive tutorial, you'll discover how to transform scattered development data into an intelligent knowledge system that enhances your coding workflow. By the end, you'll have:
4
+ - Connected disparate data sources (Guido's CPython contributions, mypy development, PEP discussions, your Python projects) into a unified AI memory graph
5
+ - Built a memory layer that understands Python design philosophy, best practice coding patterns, and your preferences and experience
6
+ - Learned how to use intelligent search capabilities that combine the diverse context
7
+ - Integrated everything with your coding environment through MCP (Model Context Protocol)
8
+
9
+ This tutorial demonstrates the power of knowledge graphs and retrieval-augmented generation (RAG) for software development, showing you how to build systems that learn from Python's creator and improve your own Python development.
@@ -0,0 +1,7 @@
1
+ # Cognee and Its Core Operations
2
+
3
+ Before we dive in, let's understand the core Cognee operations we'll be working with:
4
+ - `cognee.add()` - Ingests raw data (files, text, APIs) into the system
5
+ - `cognee.cognify()` - Processes and structures data into a knowledge graph using AI
6
+ - `cognee.search()` - Queries the knowledge graph with natural language or Cypher
7
+ - `cognee.memify()` - Cognee's "secret sauce" that infers implicit connections and rules from your data
@@ -0,0 +1,9 @@
1
+ # Data Used in This Tutorial
2
+
3
+ Cognee can ingest many types of sources. In this tutorial, we use a small, concrete set of files that cover different perspectives:
4
+
5
+ - `guido_contributions.json` — Authoritative exemplars. Real PRs and commits from Guido van Rossum (mypy, CPython). These show how Python’s creator solved problems and provide concrete anchors for patterns.
6
+ - `pep_style_guide.md` — Norms. Encodes community style and typing conventions (PEP 8 and related). Ensures that search results and inferred rules align with widely accepted standards.
7
+ - `zen_principles.md` — Philosophy The Zen of Python. Grounds design trade‑offs (simplicity, explicitness, readability) beyond syntax or mechanics.
8
+ - `my_developer_rules.md` — Local constraints Your house rules, conventions, and project‑specific requirements (scope, privacy, Spec.md). Keeps recommendations relevant to your actual workflow.
9
+ - `copilot_conversations.json` — Personal history. Transcripts of real assistant conversations, including your questions, code snippets, and discussion topics. Captures “how you code” and connects it to “how Guido codes.”
@@ -0,0 +1,5 @@
1
+ # First Data Ingestion: Exploring Guido's Python Contributions
2
+
3
+ We'll begin with a document that contains detailed PRs and commits from Guido van Rossum's work on mypy and CPython, showing real-world examples of Python's creator solving type system and language design challenges.
4
+
5
+ We'll use Cognee's `add()` and `cognify()` functions to ingest this data and build a knowledge graph that connects Guido's development patterns with Python best practices.
@@ -0,0 +1,13 @@
1
+ # Add Guido's contributions to data
2
+ await cognee.add(
3
+ "d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/guido_contributions.json",
4
+ node_set=["guido_data"],
5
+ dataset_name="python-development-with-cognee",
6
+ )
7
+
8
+ # Cognify added data into a knowledge graph
9
+ await cognee.cognify(datasets=["python-development-with-cognee"], temporal_cognify=True)
10
+
11
+ # Search the knowledge graph
12
+ results = await cognee.search("Show me commits", datasets=["python-development-with-cognee"])
13
+ print(results)
@@ -0,0 +1,3 @@
1
+ # What Just Happened?
2
+
3
+ The `search()` function uses natural language to query a knowledge graph containing Guido's development history. Unlike traditional databases, Cognee understands the relationships between commits, language features, design decisions, and evolution over time.
@@ -0,0 +1,3 @@
1
+ # Ingesting More Data
2
+
3
+ Now we'll add the remaining data and see how the connections emerge between Guido's contributions, Python best practices, and user conversations.
@@ -0,0 +1,31 @@
1
+ await cognee.add(
2
+ "d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/copilot_conversations.json",
3
+ node_set=["developer_data"],
4
+ dataset_name="python-development-with-cognee",
5
+ )
6
+
7
+ await cognee.add(
8
+ "d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/my_developer_rules.md",
9
+ node_set=["developer_data"],
10
+ dataset_name="python-development-with-cognee",
11
+ )
12
+
13
+ await cognee.add(
14
+ "d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/zen_principles.md",
15
+ node_set=["principles_data"],
16
+ dataset_name="python-development-with-cognee",
17
+ )
18
+
19
+ await cognee.add(
20
+ "d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/pep_style_guide.md",
21
+ node_set=["principles_data"],
22
+ dataset_name="python-development-with-cognee",
23
+ )
24
+
25
+ await cognee.cognify(datasets=["python-development-with-cognee"], temporal_cognify=True)
26
+
27
+ results = await cognee.search(
28
+ "What Python type hinting challenges did I face, and how does Guido approach similar problems in mypy?",
29
+ datasets=["python-development-with-cognee"],
30
+ )
31
+ print(results)
@@ -0,0 +1,4 @@
1
+ {
2
+ "name": "Python Development with Cognee - tutorial 🧠",
3
+ "deletable": false
4
+ }
@@ -0,0 +1,107 @@
1
+ {
2
+ "conversations": [
3
+ {
4
+ "id": "conv_001",
5
+ "timestamp": "2024-01-15T10:30:00Z",
6
+ "topic": "async/await patterns",
7
+ "user_query": "I'm building a web scraper that needs to handle thousands of URLs concurrently. What's the best way to structure this with asyncio?",
8
+ "assistant_response": "For high-concurrency web scraping, you'll want to use asyncio with aiohttp and implement proper semaphore-based rate limiting. Here's a robust pattern:\n\n```python\nimport asyncio\nimport aiohttp\nfrom typing import List, Dict, Any\n\nclass AsyncWebScraper:\n def __init__(self, max_concurrent: int = 100):\n self.semaphore = asyncio.Semaphore(max_concurrent)\n self.session = None\n \n async def __aenter__(self):\n self.session = aiohttp.ClientSession(\n timeout=aiohttp.ClientTimeout(total=30),\n connector=aiohttp.TCPConnector(limit=100)\n )\n return self\n \n async def __aexit__(self, exc_type, exc_val, exc_tb):\n if self.session:\n await self.session.close()\n \n async def fetch_url(self, url: str) -> Dict[str, Any]:\n async with self.semaphore:\n try:\n async with self.session.get(url) as response:\n content = await response.text()\n return {\n 'url': url,\n 'status': response.status,\n 'content': content,\n 'headers': dict(response.headers)\n }\n except Exception as e:\n return {'url': url, 'error': str(e)}\n \n async def scrape_urls(self, urls: List[str]) -> List[Dict[str, Any]]:\n tasks = [self.fetch_url(url) for url in urls]\n return await asyncio.gather(*tasks, return_exceptions=True)\n\n# Usage\nasync def main():\n urls = ['http://example.com'] * 1000 # Your URLs here\n \n async with AsyncWebScraper(max_concurrent=50) as scraper:\n results = await scraper.scrape_urls(urls)\n \n successful = [r for r in results if 'error' not in r]\n print(f'Successfully scraped {len(successful)} URLs')\n\nif __name__ == '__main__':\n asyncio.run(main())\n```\n\nKey points:\n- Semaphore controls concurrency to avoid overwhelming servers\n- Context manager ensures proper cleanup\n- TCPConnector with connection pooling\n- Proper exception handling for failed requests",
9
+ "code_context": {
10
+ "file": "scraper.py",
11
+ "language": "python",
12
+ "patterns_discussed": ["async/await", "context_managers", "semaphores", "aiohttp", "error_handling"]
13
+ },
14
+ "follow_up_questions": [
15
+ "How do I add retry logic for failed requests?",
16
+ "What's the best way to parse the scraped HTML content?"
17
+ ]
18
+ },
19
+ {
20
+ "id": "conv_002",
21
+ "timestamp": "2024-01-16T14:20:00Z",
22
+ "topic": "dataclass vs pydantic",
23
+ "user_query": "When should I use dataclasses vs Pydantic models? I'm building an API and need to handle user input validation.",
24
+ "assistant_response": "Great question! For API development with input validation, here's when to use each:\n\n**Use Pydantic when:**\n- Building APIs (FastAPI, Django REST)\n- Need runtime validation of external data\n- Want automatic JSON serialization/deserialization\n- Need complex validation rules\n- Working with configuration files\n\n**Use dataclasses when:**\n- Internal data structures\n- Simple data containers\n- Want to stay in stdlib\n- Performance is critical (dataclasses are faster)\n\nFor your API, I'd recommend Pydantic:\n\n```python\nfrom pydantic import BaseModel, validator, Field\nfrom typing import Optional, List\nfrom datetime import datetime\nfrom enum import Enum\n\nclass UserRole(str, Enum):\n ADMIN = 'admin'\n USER = 'user'\n GUEST = 'guest'\n\nclass CreateUserRequest(BaseModel):\n username: str = Field(..., min_length=3, max_length=50, regex=r'^[a-zA-Z0-9_]+$')\n email: str = Field(..., regex=r'^[\\w\\.-]+@[\\w\\.-]+\\.\\w+$')\n password: str = Field(..., min_length=8)\n role: UserRole = UserRole.USER\n tags: Optional[List[str]] = None\n \n @validator('password')\n def validate_password(cls, v):\n if not any(c.isupper() for c in v):\n raise ValueError('Password must contain uppercase letter')\n if not any(c.isdigit() for c in v):\n raise ValueError('Password must contain digit')\n return v\n \n @validator('tags')\n def validate_tags(cls, v):\n if v and len(v) > 10:\n raise ValueError('Too many tags')\n return v\n\nclass User(BaseModel):\n id: int\n username: str\n email: str\n role: UserRole\n created_at: datetime\n is_active: bool = True\n \n class Config:\n orm_mode = True\n json_encoders = { datetime: lambda v: v.isoformat() }\n\n# FastAPI usage\nfrom fastapi import FastAPI, HTTPException\n\napp = FastAPI()\n\n@app.post('/users/', response_model=User)\nasync def create_user(user_data: CreateUserRequest):\n try:\n new_user = await create_user_in_db(user_data)\n return User.from_orm(new_user)\n except Exception as e:\n raise HTTPException(status_code=400, detail=str(e))\n```\n\nPydantic gives you:\n- Automatic request validation\n- Clear error messages\n- Type coercion\n- JSON Schema generation\n- IDE support with type hints",
25
+ "code_context": {
26
+ "file": "models.py",
27
+ "language": "python",
28
+ "patterns_discussed": ["pydantic", "dataclasses", "validation", "fastapi", "type_hints", "enums"]
29
+ },
30
+ "follow_up_questions": [
31
+ "How do I handle nested validation with Pydantic?",
32
+ "Can I use Pydantic with SQLAlchemy models?"
33
+ ]
34
+ },
35
+ {
36
+ "id": "conv_003",
37
+ "timestamp": "2024-01-17T09:15:00Z",
38
+ "topic": "testing patterns",
39
+ "user_query": "I'm struggling with testing async code and database interactions. What's the best approach for pytest with async functions?",
40
+ "assistant_response": "Testing async code can be tricky! Here's a comprehensive approach using pytest-asyncio and proper test isolation...",
41
+ "code_context": {
42
+ "file": "test_users.py",
43
+ "language": "python",
44
+ "patterns_discussed": ["pytest", "async_testing", "fixtures", "mocking", "database_testing", "fastapi_testing"]
45
+ },
46
+ "follow_up_questions": [
47
+ "How do I test WebSocket connections?",
48
+ "What's the best way to test database migrations?"
49
+ ]
50
+ },
51
+ {
52
+ "id": "conv_004",
53
+ "timestamp": "2024-01-18T16:45:00Z",
54
+ "topic": "performance optimization",
55
+ "user_query": "My FastAPI app is getting slow with large datasets. How can I optimize database queries and response times?",
56
+ "assistant_response": "Performance optimization in FastAPI apps typically involves several layers...",
57
+ "code_context": {
58
+ "file": "optimizations.py",
59
+ "language": "python",
60
+ "patterns_discussed": ["performance_optimization", "caching", "database_optimization", "async_patterns", "monitoring"]
61
+ },
62
+ "follow_up_questions": [
63
+ "How do I implement database connection pooling properly?",
64
+ "What's the best way to handle memory usage with large datasets?"
65
+ ]
66
+ },
67
+ {
68
+ "id": "conv_005",
69
+ "timestamp": "2024-01-19T11:30:00Z",
70
+ "topic": "error handling and logging",
71
+ "user_query": "I need to implement proper error handling and logging across my Python application. What's the best approach for production-ready error management?",
72
+ "assistant_response": "Excellent question! Proper error handling and logging are crucial for production applications...",
73
+ "code_context": {
74
+ "file": "error_handling.py",
75
+ "language": "python",
76
+ "patterns_discussed": ["error_handling", "logging", "exceptions", "middleware", "decorators", "fastapi"]
77
+ },
78
+ "follow_up_questions": [
79
+ "How do I integrate this with external monitoring tools like Sentry?",
80
+ "What's the best way to handle errors in background tasks?"
81
+ ]
82
+ }
83
+ ],
84
+ "metadata": {
85
+ "total_conversations": 5,
86
+ "date_range": "2024-01-15 to 2024-01-19",
87
+ "topics_covered": [
88
+ "async/await patterns",
89
+ "dataclass vs pydantic",
90
+ "testing patterns",
91
+ "performance optimization",
92
+ "error handling and logging"
93
+ ],
94
+ "code_patterns_discussed": [
95
+ "asyncio", "aiohttp", "semaphores", "context_managers",
96
+ "pydantic", "fastapi", "type_hints", "validation",
97
+ "pytest", "async_testing", "fixtures", "mocking",
98
+ "performance_optimization", "caching", "database_optimization",
99
+ "error_handling", "logging", "exceptions", "middleware"
100
+ ],
101
+ "difficulty_levels": {
102
+ "beginner": 1,
103
+ "intermediate": 2,
104
+ "advanced": 2
105
+ }
106
+ }
107
+ }