cognee 0.3.2__py3-none-any.whl → 0.3.4.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
  2. cognee/api/v1/search/search.py +1 -1
  3. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
  4. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
  5. cognee/infrastructure/databases/vector/config.py +1 -1
  6. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +2 -4
  7. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
  8. cognee/infrastructure/utils/run_async.py +9 -4
  9. cognee/infrastructure/utils/run_sync.py +4 -3
  10. cognee/modules/notebooks/methods/create_tutorial_notebook.py +87 -0
  11. cognee/modules/notebooks/methods/get_notebook.py +2 -2
  12. cognee/modules/notebooks/methods/update_notebook.py +0 -1
  13. cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
  14. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
  15. cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
  16. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  17. cognee/modules/retrieval/temporal_retriever.py +1 -1
  18. cognee/modules/retrieval/user_qa_feedback.py +1 -1
  19. cognee/modules/search/methods/search.py +12 -13
  20. cognee/modules/search/utils/prepare_search_result.py +31 -9
  21. cognee/modules/search/utils/transform_context_to_graph.py +1 -1
  22. cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
  23. cognee/tasks/temporal_graph/models.py +11 -6
  24. cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
  25. cognee/tests/test_temporal_graph.py +6 -34
  26. {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/METADATA +5 -5
  27. {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/RECORD +31 -31
  28. cognee-0.3.4.dev0.dist-info/entry_points.txt +2 -0
  29. cognee/api/v1/save/save.py +0 -335
  30. cognee/tests/test_save_export_path.py +0 -116
  31. cognee-0.3.2.dist-info/entry_points.txt +0 -2
  32. {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/WHEEL +0 -0
  33. {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/licenses/LICENSE +0 -0
  34. {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cognee
3
- Version: 0.3.2
3
+ Version: 0.3.4.dev0
4
4
  Summary: Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning.
5
5
  Project-URL: Homepage, https://www.cognee.ai
6
6
  Project-URL: Repository, https://github.com/topoteretes/cognee
@@ -57,7 +57,7 @@ Requires-Dist: structlog<26,>=25.2.0
57
57
  Requires-Dist: tiktoken<1.0.0,>=0.8.0
58
58
  Requires-Dist: typing-extensions<5.0.0,>=4.12.2
59
59
  Provides-Extra: anthropic
60
- Requires-Dist: anthropic<0.27,>=0.26.1; extra == 'anthropic'
60
+ Requires-Dist: anthropic>=0.27; extra == 'anthropic'
61
61
  Provides-Extra: api
62
62
  Requires-Dist: gunicorn<24,>=20.1.0; extra == 'api'
63
63
  Requires-Dist: uvicorn<1.0.0,>=0.34.0; extra == 'api'
@@ -65,8 +65,8 @@ Requires-Dist: websockets<16.0.0,>=15.0.1; extra == 'api'
65
65
  Provides-Extra: aws
66
66
  Requires-Dist: s3fs[boto3]==2025.3.2; extra == 'aws'
67
67
  Provides-Extra: chromadb
68
- Requires-Dist: chromadb<0.7,>=0.3.0; extra == 'chromadb'
69
- Requires-Dist: pypika==0.48.8; extra == 'chromadb'
68
+ Requires-Dist: chromadb<0.7,>=0.6; extra == 'chromadb'
69
+ Requires-Dist: pypika==0.48.9; extra == 'chromadb'
70
70
  Provides-Extra: codegraph
71
71
  Requires-Dist: fastembed<=0.6.0; (python_version < '3.13') and extra == 'codegraph'
72
72
  Requires-Dist: transformers<5,>=4.46.3; extra == 'codegraph'
@@ -316,7 +316,7 @@ You can also cognify your files and query using cognee UI.
316
316
 
317
317
  <img src="assets/cognee-new-ui.webp" width="100%" alt="Cognee UI 2"></a>
318
318
 
319
- Try cognee UI by runnning ``` cognee -ui ``` command on your terminal.
319
+ Try cognee UI by runnning ``` cognee-cli -ui ``` command on your terminal.
320
320
 
321
321
  ## Understand our architecture
322
322
 
@@ -41,7 +41,7 @@ cognee/api/v1/memify/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
41
41
  cognee/api/v1/memify/routers/__init__.py,sha256=Uv25PVGhfjnNi1NYWOmOLIlzaeTlyMYF9m7BEfdu45Q,49
42
42
  cognee/api/v1/memify/routers/get_memify_router.py,sha256=C1Cjt9D5TxhqBPmXZGNrCS4lJqPVXIJYgxZFtWVjZNs,4599
43
43
  cognee/api/v1/notebooks/routers/__init__.py,sha256=TvQz6caluaMoXNvjbE1p_C8savypgs8rAyP5lQ8jlpc,55
44
- cognee/api/v1/notebooks/routers/get_notebooks_router.py,sha256=YFxvs3WR5RCjB-Rk4uJ8yZtlLU0vl0AcOuRNFrb-i6U,3420
44
+ cognee/api/v1/notebooks/routers/get_notebooks_router.py,sha256=m8OH3Kw1UHF8aTP4yNuSpv7gNThE4HxmLIrUnvECYGA,3484
45
45
  cognee/api/v1/permissions/routers/__init__.py,sha256=ljE3YnrzlMcVfThmkR5GSIxkm7sQVyibaLNtYQL4HO0,59
46
46
  cognee/api/v1/permissions/routers/get_permissions_router.py,sha256=tqd-J__UBlstTWnQocesdjVM9JnYO5rtJhhFj-Zv1_o,8316
47
47
  cognee/api/v1/prune/__init__.py,sha256=FEr5tTlX7wf3X4aFff6NPlVhNrPyqx7RBoJ71bJN1cY,25
@@ -53,9 +53,8 @@ cognee/api/v1/responses/models.py,sha256=MylzSnK-QB0kXe7nS-Mu4XRKZa-uBw8qP7Ke9On
53
53
  cognee/api/v1/responses/routers/__init__.py,sha256=X2qishwGRVFXawnvkZ5bv420PuPRLvknaFO2jdfiR10,122
54
54
  cognee/api/v1/responses/routers/default_tools.py,sha256=9qqzEZhrt3_YMKzUA06ke8P-2WeLXhYpKgVW6mLHlzw,3004
55
55
  cognee/api/v1/responses/routers/get_responses_router.py,sha256=ggbLhY9IXaInCgIs5TUuOCkFW64xmTKZQsc2ENq2Ocs,5979
56
- cognee/api/v1/save/save.py,sha256=xRthVNANIsrVJlLa5QKrdSiwCSckr7HBLmoeVJ_gEdE,12639
57
56
  cognee/api/v1/search/__init__.py,sha256=Sqw60DcOj4Bnvt-EWFknT31sPcvROIRKCWLr5pbkFr4,39
58
- cognee/api/v1/search/search.py,sha256=YQicNVi9q4FteAmt_EtY75I_EuNZ9ZjGE73wg-NcDwY,8824
57
+ cognee/api/v1/search/search.py,sha256=WhBtj90nW9ulas_dm8lX72VYGMmWVdcrC7nAfxcQgso,8821
59
58
  cognee/api/v1/search/routers/__init__.py,sha256=6RebeLX_2NTRxIMPH_mGuLztPxnGnMJK1y_O93CtRm8,49
60
59
  cognee/api/v1/search/routers/get_search_router.py,sha256=-5GLgHipflEblYAwl3uiPAZ2i3TgrLEjDuiO_cCqcB8,6252
61
60
  cognee/api/v1/settings/routers/__init__.py,sha256=wj_UYAXNMPCkn6Mo1YB01dCBiV9DQwTIf6OWjnGRpf8,53
@@ -182,24 +181,24 @@ cognee/infrastructure/databases/relational/get_async_session.py,sha256=qfiXSsTAA
182
181
  cognee/infrastructure/databases/relational/get_migration_relational_engine.py,sha256=5RtH281iIQo3vqgwmKT0nuiJp9jNd7vw6xRUjc5xIDM,1070
183
182
  cognee/infrastructure/databases/relational/get_relational_engine.py,sha256=De51ieg9eFhRLX08k9oNc-oszvt_9J5DHebqI1qI8_U,741
184
183
  cognee/infrastructure/databases/relational/with_async_session.py,sha256=UgQeJOvgeM6yhyNDwWdGULtTjZosTnjDlr267Losnfs,803
185
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py,sha256=uuX2tFPWueX0etooLFFr1PvaPQSdFtZRyCnRYSau20Q,27539
184
+ cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py,sha256=j4mnqNJAO-U-Qfveam6NgjIH5lt7WjSMLVlemBrdpYU,27540
186
185
  cognee/infrastructure/databases/relational/sqlalchemy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
187
186
  cognee/infrastructure/databases/utils/__init__.py,sha256=4C0ncZG-O6bOFJpKgscCHu6D5vodLWRIKpe-WT4Ijbs,75
188
187
  cognee/infrastructure/databases/utils/get_or_create_dataset_database.py,sha256=wn7pRgeX-BU0L191_6pgT9P54uhVQlGMPqxQdvIlv4Y,2101
189
188
  cognee/infrastructure/databases/vector/__init__.py,sha256=7MdGJ3Mxdh2RyDq39rcjD99liIa-yGXxDUzq--1qQZs,291
190
- cognee/infrastructure/databases/vector/config.py,sha256=cY833pGsse4_dBmacNXmsdNZJQrSWPevKcGW1f_klYU,2927
189
+ cognee/infrastructure/databases/vector/config.py,sha256=4HOmqZOEfVNmAhjxRNePMU9haTVeR35R2XbhPTcMqFg,2952
191
190
  cognee/infrastructure/databases/vector/create_vector_engine.py,sha256=ECtICkIW5QM_lX9465ZTxVXC5MCRo_h219q3GyFXxpc,4716
192
191
  cognee/infrastructure/databases/vector/get_vector_engine.py,sha256=y4TMWJ6B6DxwKF9PMfjB6WqujPnVhf0oR2j35Q-KhvA,272
193
192
  cognee/infrastructure/databases/vector/supported_databases.py,sha256=0UIYcQ15p7-rq5y_2A-E9ydcXyP6frdg8T5e5ECDDMI,25
194
193
  cognee/infrastructure/databases/vector/use_vector_adapter.py,sha256=ab2x6-sxVDu_tf4zWChN_ngqv8LaLYk2VCtBjZEyjaM,174
195
194
  cognee/infrastructure/databases/vector/utils.py,sha256=WHPSMFsN2XK72uURvCl_jlzQa-N3XKPhrDnB6GKmBtM,1224
196
195
  cognee/infrastructure/databases/vector/vector_db_interface.py,sha256=EUpRVyMyS0MOQwFEgxwRa_9MY1vYotCyO6CONM81r94,7118
197
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py,sha256=IC2F8EGUrERDJdzPl0pZGgdCiTptQRCDsxzF-xLzSAs,18951
196
+ cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py,sha256=c8oREW4EcX_TL2i-JdCRsi5EOtPxrtxpYkaUzc8IolU,18775
198
197
  cognee/infrastructure/databases/vector/chromadb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
198
  cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py,sha256=boNJ55dxJQ_ImW1_DDjToQa0Hos9mkeRYwfCI7UPLn0,983
200
199
  cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py,sha256=_R3yIuDaMN2lz9JhMy6SNpZeeCRZxHA9hmSB3gOxKkA,3823
201
200
  cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py,sha256=XUZnVftE57qWlAebr99aOEg-FynMKB7IS-kmBBT8E5Y,7544
202
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py,sha256=SczVlBpz7faocouJnDkt7pDrd7DEDkclGn0F96bmAKE,4190
201
+ cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py,sha256=uR9ItOYN0ySsnPrmHGaoLGjiKJcFF-88KMwbtH6j0DU,4173
203
202
  cognee/infrastructure/databases/vector/embeddings/__init__.py,sha256=Akv-ShdXjHw-BE00Gw55GgGxIMr0SZ9FHi3RlpsJmiE,55
204
203
  cognee/infrastructure/databases/vector/embeddings/config.py,sha256=s9acnhn1DLFggCNJMVcN9AxruMf3J00O_R--JVGqMNs,2221
205
204
  cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py,sha256=TyCoo_SipQ6JNy5eqXY2shrZnhb2JVjt9xOsJltOCdw,17598
@@ -331,7 +330,7 @@ cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get
331
330
  cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py,sha256=126jfQhTEAbmsVsc4wyf20dK-C2AFJQ0sVmNPZFEet0,2194
332
331
  cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/rate_limiter.py,sha256=ie_zMYnUzMcW4okP4P41mEC31EML2ztdU7bEQQdg99U,16763
333
332
  cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
334
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py,sha256=ReVmaGNEsuHN5nLxEcWuj2cihqimfKpVB-Wobqbh0nU,3151
333
+ cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py,sha256=8KTFmFm9uLagIDTSsZMYjuyhXtmFkbm-YMWVDhrn7qw,3249
335
334
  cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
335
  cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py,sha256=maSHU7nEZiR68ZeZW896LhXPm9b1f0rmEYQ6kB4CZMM,5089
337
336
  cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -366,8 +365,8 @@ cognee/infrastructure/loaders/external/pypdf_loader.py,sha256=nFa_h3LURBPoguRIID
366
365
  cognee/infrastructure/loaders/external/unstructured_loader.py,sha256=XCRVHwpM5XmcjRmL4Pr9ELzBU_qYDPhX_Ahn5K8w0AU,4603
367
366
  cognee/infrastructure/loaders/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
368
367
  cognee/infrastructure/utils/calculate_backoff.py,sha256=O6h4MCe357BKaECmLZPLGYpffrMol65LwQCklBj4sh4,935
369
- cognee/infrastructure/utils/run_async.py,sha256=3J0OGzh3HLO6wHQN-rjEnGitVD_mbs4AO6VFgZ47eQE,393
370
- cognee/infrastructure/utils/run_sync.py,sha256=wLhXUdopsEaIRU7CrzcfPdj1KiRBjCf83HjqvSsace8,726
368
+ cognee/infrastructure/utils/run_async.py,sha256=gZY8ZLG_86O9YVK8hciduIoDONHaEEnGOILh3EeD9LA,510
369
+ cognee/infrastructure/utils/run_sync.py,sha256=9pAXc-EmjtV03exnUMOVSC-IJq_KCslX05z62MHQjlQ,800
371
370
  cognee/modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
372
371
  cognee/modules/chunking/Chunker.py,sha256=KezN4WBiV0KNJtx6daMg4g1-a-_oJxn_l_iQT94T1lQ,343
373
372
  cognee/modules/chunking/LangchainChunker.py,sha256=Yo9Jza-t3x3V8I8PWbxUu48vlVVdvJKxwzL2gManwDc,2351
@@ -483,14 +482,15 @@ cognee/modules/metrics/operations/__init__.py,sha256=MZ3xbVdfEKqfLct8WnbyFVyZmkB
483
482
  cognee/modules/metrics/operations/get_pipeline_run_metrics.py,sha256=upIWnzKeJT1_XbL_ABdGxW-Ai7mO3AqMK35BNmItIQQ,2434
484
483
  cognee/modules/notebooks/methods/__init__.py,sha256=IhY4fUVPJbuvS83QESsWzjZRC6oC1I-kJi5gr3kPTLk,215
485
484
  cognee/modules/notebooks/methods/create_notebook.py,sha256=S41H3Rha0pj9dEKFy1nBG9atTGHhUdOmDZgr0ckUA6M,633
485
+ cognee/modules/notebooks/methods/create_tutorial_notebook.py,sha256=ZoGilQU993M0j3fFjBicOSsF5TFEq_k8tjbD_90sI7g,4269
486
486
  cognee/modules/notebooks/methods/delete_notebook.py,sha256=BKxoRlPzkwXvTYh5WcF-zo_iVmaXqEiptS42JwB0KQU,309
487
- cognee/modules/notebooks/methods/get_notebook.py,sha256=O-iWX4sElOn_5EpI9_WCwdvbfPRgVQVGBev1U4tI8AA,545
487
+ cognee/modules/notebooks/methods/get_notebook.py,sha256=IP4imsdt9X6GYd6i6WF6PlVhotGNH0i7XZpPqbtqMwo,554
488
488
  cognee/modules/notebooks/methods/get_notebooks.py,sha256=ee40ALHvebVORuwZVkQ271qAj260rrYy6eVGxAmfo8c,483
489
- cognee/modules/notebooks/methods/update_notebook.py,sha256=L-WgIxEr_uPClRZQZtnBEV9iT2C7aWqs0FuSW-F5qqk,410
489
+ cognee/modules/notebooks/methods/update_notebook.py,sha256=MnZbfh-WfEfH3ImNvyQNhDeNwpYeS7p8FPVwnmBvZVg,361
490
490
  cognee/modules/notebooks/models/Notebook.py,sha256=Jth47QxJQ2-VGPyIcS0ul3bS8bgGrk9vCGoJVagxanw,1690
491
491
  cognee/modules/notebooks/models/__init__.py,sha256=jldsDjwRvFMreGpe4wxxr5TlFXTZuU7rbsRkGQvTO5s,45
492
492
  cognee/modules/notebooks/operations/__init__.py,sha256=VR_2w_d0lEiJ5Xw7_mboo2qWUv0umrR_Bp58MaMoE6w,55
493
- cognee/modules/notebooks/operations/run_in_local_sandbox.py,sha256=0Au8-bDy7S-c1eNLKInQI5HV7u3bhl7Lvvtt79c5J4Q,1186
493
+ cognee/modules/notebooks/operations/run_in_local_sandbox.py,sha256=17hMEQC3LZTfPvbRUrPN9SzDeJPWSTq_BAhtwRZiqT8,1338
494
494
  cognee/modules/observability/get_observe.py,sha256=chRw4jmpmrwEvDecF9sgApm23IOzVgCbwkKEAyz1_AI,264
495
495
  cognee/modules/observability/observers.py,sha256=SKQSWWyGDG0QY2_bqsFgfpLUb7OUL4WFf8tDZYe5JMM,157
496
496
  cognee/modules/ontology/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -550,15 +550,15 @@ cognee/modules/retrieval/code_retriever.py,sha256=cnOjgfCATzz0-XZGFrIIkuVZLc6HBh
550
550
  cognee/modules/retrieval/coding_rules_retriever.py,sha256=3GU259jTbGLqmp_A8sUdE4fyf0td06SKuxBJVW-npIQ,1134
551
551
  cognee/modules/retrieval/completion_retriever.py,sha256=Lw5sxN_UrtmWSOtcSS7Yj50Gw9p4nNBmW3dr2kV9JJ0,3754
552
552
  cognee/modules/retrieval/cypher_search_retriever.py,sha256=_3rZJ23hSZpDa8kVyOSWN3fwjMI_aLF2m5p-FtBek8k,2440
553
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py,sha256=PUJRR13MZ6eAjOH3HeQRRl0rEElHEBh4IKahgrDUXPo,4526
554
- cognee/modules/retrieval/graph_completion_cot_retriever.py,sha256=quJYusaUNTvy7A3V_PAIbDLBMrFDuX8_wT0NnTcE5x8,6134
555
- cognee/modules/retrieval/graph_completion_retriever.py,sha256=XHuu1kvANGNMz-j6UkW2mpUpb1sf3hXcmMhu4TVM03c,8816
553
+ cognee/modules/retrieval/graph_completion_context_extension_retriever.py,sha256=-6yN8gpRlDue8d28rk-Ly-gq0T8BW-i1-Jgbp1x-Zsg,4532
554
+ cognee/modules/retrieval/graph_completion_cot_retriever.py,sha256=JU-FkikaU68v8fT8VAmG6jojwhwroKYW2RUxdlJ1R-k,6140
555
+ cognee/modules/retrieval/graph_completion_retriever.py,sha256=VnrFD4xUQewIO83mfmIUcPLA_HBGdUlDVRyA2Pm4ARo,8822
556
556
  cognee/modules/retrieval/graph_summary_completion_retriever.py,sha256=3AMisk3fObk2Vh1heY4veHkDjLsHgSSUc_ChZseJUYw,2456
557
557
  cognee/modules/retrieval/insights_retriever.py,sha256=1pcYd34EfKk85MSPFQ8b-ZbSARmnauks8TxXfNOxvOw,4953
558
558
  cognee/modules/retrieval/natural_language_retriever.py,sha256=zJz35zRmBP8-pRlkoxxSxn3-jtG2lUW0xcu58bq9Ebs,5761
559
559
  cognee/modules/retrieval/summaries_retriever.py,sha256=joXYphypACm2JiCjbC8nBS61m1q2oYkzyIt9bdgALNw,3384
560
- cognee/modules/retrieval/temporal_retriever.py,sha256=mYik14shVjK24fNEtYzjte5ovwwTdROn5Kxy5FrOE10,5679
561
- cognee/modules/retrieval/user_qa_feedback.py,sha256=WSMPg6WjteR-XgK0vK9f_bkZ_o0JMPb4XZ9OAcFyz9E,3371
560
+ cognee/modules/retrieval/temporal_retriever.py,sha256=EUEYN94LpoWfbPjsToe_pC3rFsUUTIPA5K6wNjv8Nds,5685
561
+ cognee/modules/retrieval/user_qa_feedback.py,sha256=-VEOsE_t0FiTy00OpOMWAYv12YSLPieAcMsu82vm7h4,3366
562
562
  cognee/modules/retrieval/context_providers/DummyContextProvider.py,sha256=9GsvINc7ekRyRWO5IefFGyytRYqsSlhpwAOw6Q691cA,419
563
563
  cognee/modules/retrieval/context_providers/SummarizedTripletSearchContextProvider.py,sha256=ypO6yWLxvmRsj_5dyYdvXTbztJmB_ioLrgyG6bF5WGA,894
564
564
  cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py,sha256=8PzksHAtRw7tZarP3nZuxhi0cd1EYEDHOT4Q74mNEvc,3656
@@ -579,7 +579,7 @@ cognee/modules/search/exceptions/exceptions.py,sha256=Zc5Y0M-r-UnSSlpKzHKBplfjZ-
579
579
  cognee/modules/search/methods/__init__.py,sha256=jGfRvNwM5yIzj025gaVhcx7nCupRSXbUUnFjYVjL_Js,27
580
580
  cognee/modules/search/methods/get_search_type_tools.py,sha256=wXxOZx3uEnMhRhUO2HGswQ5iVbWvjUj17UT_qdJg6Oo,6837
581
581
  cognee/modules/search/methods/no_access_control_search.py,sha256=R08aMgaB8AkD0_XVaX15qLyC9KJ3fSVFv9zeZwuyez4,1566
582
- cognee/modules/search/methods/search.py,sha256=Akqf4a913_nG56TMxTKU65kOwL0tWURDLHEXlwcgV1c,12459
582
+ cognee/modules/search/methods/search.py,sha256=JjB9Nhxt_AIDF24z81FWGm7VVJFW90RCXRAU9VhMG34,12430
583
583
  cognee/modules/search/models/Query.py,sha256=9WcF5Z1oCFtA4O-7An37eNAPX3iyygO4B5NSwhx7iIg,558
584
584
  cognee/modules/search/models/Result.py,sha256=U7QtoNzAtZnUDwGWhjVfcalHQd4daKtYYvJz2BeWQ4w,564
585
585
  cognee/modules/search/operations/__init__.py,sha256=AwJl6v9BTpocoefEZLk-flo1EtydYb46NSUoNFHkhX0,156
@@ -593,8 +593,9 @@ cognee/modules/search/types/SearchResult.py,sha256=blEean6PRFKcDRQugsojZPfH-Wohx
593
593
  cognee/modules/search/types/SearchType.py,sha256=-lT4bLKKunV4cL4FfF3tjNbdN7X4AsRMLpTkReNwXZM,594
594
594
  cognee/modules/search/types/__init__.py,sha256=8k6OjVrL70W1Jh-ClTbG2ETYIhOtSk3tfqjzYgEdPzA,117
595
595
  cognee/modules/search/utils/__init__.py,sha256=86mRtCN-B5-2NNChdQoU5x8_8hqTczGZjBoKVE9O7hA,124
596
- cognee/modules/search/utils/prepare_search_result.py,sha256=nfK8aqR2tRL_SYHqtkK1ssG8Ws_oflEDZZAEvQmu5F4,1293
597
- cognee/modules/search/utils/transform_context_to_graph.py,sha256=rUQeEH-Z-GqAzAZTCetRVpwgrOHlNe3mUBRLwRb0478,1238
596
+ cognee/modules/search/utils/prepare_search_result.py,sha256=I_NrC6G549mEm1f0JZYJLCxAYQbKXBIzTJB4kv_3538,2334
597
+ cognee/modules/search/utils/transform_context_to_graph.py,sha256=Wl0kZR6YqyBxY-vBNNIy2pPIZaJVCigcRveJWjSX8BA,1238
598
+ cognee/modules/search/utils/transform_insights_to_graph.py,sha256=_ID5-37Ppl7jHbxNkUioZyH_I8SGXnhbfeLHgfEYec8,925
598
599
  cognee/modules/settings/__init__.py,sha256=_SZQgCQnnnIHLJuKOMO9uWzXNBQxwYHHMUSBp0qa2uQ,210
599
600
  cognee/modules/settings/get_current_settings.py,sha256=R2lOusG5Q2PMa2-2vDndh3Lm7nXyZVkdzTV7vQHT81Y,1642
600
601
  cognee/modules/settings/get_settings.py,sha256=qkpNB_-IRexSzaiVvSS7NXG3S3fpbhDb6BQIPGAKET4,4221
@@ -762,7 +763,7 @@ cognee/tasks/temporal_graph/add_entities_to_event.py,sha256=wH4TlJfGN5_tjouuSFKK
762
763
  cognee/tasks/temporal_graph/enrich_events.py,sha256=aLwGKzKLdUXbdn4WGN1uK5vOBk8nPzGM6bJ-7lWkt6s,1097
763
764
  cognee/tasks/temporal_graph/extract_events_and_entities.py,sha256=iL0ppf5zmTey67yncLPkDY0Fd2GL4CqDGV4v1L0VmoA,1301
764
765
  cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py,sha256=biDjIOnL_6ZSifFokwAlhVqNUixuzoFdYUmPzAT9d1Y,1440
765
- cognee/tasks/temporal_graph/models.py,sha256=R8MuYyqmix2RQ2YwFM1zavdVQpj-SF3CBTo1z5EhVtU,1096
766
+ cognee/tasks/temporal_graph/models.py,sha256=2fBZWqfZfLNh5BHqU8RbW60R1_IZU3PgY8MZJHlF0S0,1390
766
767
  cognee/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
767
768
  cognee/tests/test_chromadb.py,sha256=D9JEN0xbFxNLgp8UJTVAjpwob9S-LOQC-hSaMVvYhR8,9240
768
769
  cognee/tests/test_cognee_server_start.py,sha256=kcIbzu72ZZUlPZ51c_DpSCCwx3X9mNvYZrVcxHfZaJs,4226
@@ -789,17 +790,16 @@ cognee/tests/test_remote_kuzu.py,sha256=2GG05MtGuhOo6ST82OxjdVDetBS0GWHvKKmmmEtQ
789
790
  cognee/tests/test_remote_kuzu_stress.py,sha256=5vgnu4Uz_NoKKqFZJeVceHwb2zNhvdTVBgpN3NjhfAE,5304
790
791
  cognee/tests/test_s3.py,sha256=rY2UDK15cdyywlyVrR8N2DRtVXWYIW5REaaz99gaQeE,2694
791
792
  cognee/tests/test_s3_file_storage.py,sha256=62tvIFyh_uTP0TFF9Ck4Y-sxWPW-cwJKYEJUJI1atPI,5654
792
- cognee/tests/test_save_export_path.py,sha256=z07oQao82INzldg2mesS3ZGt7fl7rcjKx15JwoGT5tI,3898
793
793
  cognee/tests/test_search_db.py,sha256=4GpLx8ZJoMjkp-XqQ-LCrkf3NhAM4j_rMmlOFgmDO-A,13420
794
794
  cognee/tests/test_starter_pipelines.py,sha256=X1J8RDD0bFMKnRETyi5nyaF4TYdmUIu0EuD3WQwShNs,2475
795
795
  cognee/tests/test_telemetry.py,sha256=FIneuVofSKWFYqxNC88sT_P5GPzgfjVyqDCf2TYBE2E,4130
796
- cognee/tests/test_temporal_graph.py,sha256=G0PyzuvIYylwFT-3eZSzjtBik9O1g75sGLj3QK9RYTA,12624
796
+ cognee/tests/test_temporal_graph.py,sha256=GRYS2FsFybYOuoQvmG711UTVAHgvGvapgMEzW4sclZg,11551
797
797
  cognee/tests/cli_tests/cli_integration_tests/__init__.py,sha256=xYkvpZkxv_HRWmX71pGM3NUw2KKkDQIM-V6Ehxu-f0I,39
798
798
  cognee/tests/cli_tests/cli_integration_tests/test_cli_integration.py,sha256=3hdz1DoGeidJInqbCy1YQte6J0QeQG1_WKGs9utjAFg,11560
799
799
  cognee/tests/cli_tests/cli_unit_tests/__init__.py,sha256=U069aFvdwfKPd6YsR_FJML5LRphHHF5wx9mwug1hRh4,32
800
800
  cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py,sha256=5a3vPiSFmKumq6sTfdfMyeUpJGjbZ6_5zX4TUcV0ZJQ,17625
801
801
  cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py,sha256=PyFCnClvbXG1GaiS16qwcuyXXDJ4sRyBCKV5WHrOUxk,23501
802
- cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py,sha256=Gsj2zYlVL80iU9EjRj4Q4QzgsYuIngUvDbA9suV99oA,6098
802
+ cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py,sha256=6tx2A4us8uyZ7Zk4wZXplqLn5MtAejxOrG5ZxZpbFvQ,6143
803
803
  cognee/tests/cli_tests/cli_unit_tests/test_cli_runner.py,sha256=WZ8oZIlc_JintDq_cnEg9tmLEMZMGFPQGhU7Y_7sfgs,1497
804
804
  cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py,sha256=Flej8LNYRXNkWd2tq8elMm8MkqbhCUb8RtXaPzfNYm4,4323
805
805
  cognee/tests/integration/documents/AudioDocument_test.py,sha256=0mJnlWRc7gWqOxAUfdSSIxntcUrzkPXhlsd-MFsiRoM,2790
@@ -890,9 +890,9 @@ distributed/tasks/queued_add_edges.py,sha256=kz1DHE05y-kNHORQJjYWHUi6Q1QWUp_v3Dl
890
890
  distributed/tasks/queued_add_nodes.py,sha256=aqK4Ij--ADwUWknxYpiwbYrpa6CcvFfqHWbUZW4Kh3A,452
891
891
  distributed/workers/data_point_saving_worker.py,sha256=jFmA0-P_0Ru2IUDrSug0wML-5goAKrGtlBm5BA5Ryw4,3229
892
892
  distributed/workers/graph_saving_worker.py,sha256=oUYl99CdhlrPAIsUOHbHnS3d4XhGoV0_OIbCO8wYzRg,3648
893
- cognee-0.3.2.dist-info/METADATA,sha256=psFYLZRaPVkg3zbOTWugIyH0H_3RL0hxAQHqYNXCz54,14753
894
- cognee-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
895
- cognee-0.3.2.dist-info/entry_points.txt,sha256=4Fe5PRV0e3j5MFUo7kYyRFa3MhMNbOu69pGBazTxPps,51
896
- cognee-0.3.2.dist-info/licenses/LICENSE,sha256=pHHjSQj1DD8SDppW88MMs04TPk7eAanL1c5xj8NY7NQ,11344
897
- cognee-0.3.2.dist-info/licenses/NOTICE.md,sha256=6L3saP3kSpcingOxDh-SGjMS8GY79Rlh2dBNLaO0o5c,339
898
- cognee-0.3.2.dist-info/RECORD,,
893
+ cognee-0.3.4.dev0.dist-info/METADATA,sha256=T3Pt0L4t3GKyziXuy4n1Kdlh3OUfPt4pPh4VDrjVkPY,14752
894
+ cognee-0.3.4.dev0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
895
+ cognee-0.3.4.dev0.dist-info/entry_points.txt,sha256=GCCTsNg8gzOJkolq7dR7OK1VlIAO202dGDnMI8nm8oQ,55
896
+ cognee-0.3.4.dev0.dist-info/licenses/LICENSE,sha256=pHHjSQj1DD8SDppW88MMs04TPk7eAanL1c5xj8NY7NQ,11344
897
+ cognee-0.3.4.dev0.dist-info/licenses/NOTICE.md,sha256=6L3saP3kSpcingOxDh-SGjMS8GY79Rlh2dBNLaO0o5c,339
898
+ cognee-0.3.4.dev0.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cognee-cli = cognee.cli._cognee:main
@@ -1,335 +0,0 @@
1
- import os
2
- import asyncio
3
- import json
4
- from typing import Optional, Union, List, Dict
5
- from uuid import UUID
6
-
7
- from pydantic import BaseModel
8
-
9
- from cognee.base_config import get_base_config
10
- from cognee.modules.users.models import User
11
- from cognee.modules.users.methods import get_default_user
12
- from cognee.modules.data.methods import get_authorized_existing_datasets, get_dataset_data
13
- from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
14
- from cognee.infrastructure.llm.LLMGateway import LLMGateway
15
- from cognee.shared.logging_utils import get_logger
16
- from cognee.api.v1.search import search
17
- from cognee.modules.search.types import SearchType
18
-
19
-
20
- logger = get_logger("save")
21
-
22
-
23
- class QuestionsModel(BaseModel):
24
- questions: List[str]
25
-
26
-
27
- def _sanitize_filename(name: str) -> str:
28
- safe = "".join(c if c.isalnum() or c in ("-", "_", ".", " ") else "_" for c in name)
29
- return safe.strip().replace(" ", "_")
30
-
31
-
32
- def _dataset_dir_name(dataset) -> str:
33
- # Prefer readable dataset name when available, fallback to id
34
- if getattr(dataset, "name", None):
35
- return _sanitize_filename(str(dataset.name))
36
- return str(dataset.id)
37
-
38
-
39
- def _file_markdown_name(data_item, used_names: set[str]) -> str:
40
- # Use original file name if present, else data.name
41
- name = getattr(data_item, "name", None) or "file"
42
- base = _sanitize_filename(str(name))
43
- filename = f"{base}.md"
44
- if filename in used_names:
45
- short_id = str(getattr(data_item, "id", ""))[:8]
46
- filename = f"{base}__{short_id}.md"
47
- used_names.add(filename)
48
- return filename
49
-
50
-
51
- def _ascii_path_tree(path_str: str) -> str:
52
- if not path_str:
53
- return "(no path)"
54
-
55
- # Normalize special schemes but keep segments readable
56
- try:
57
- normalized = get_data_file_path(path_str)
58
- except Exception:
59
- normalized = path_str
60
-
61
- # Keep the path compact – show last 5 segments
62
- parts = [p for p in normalized.replace("\\", "/").split("/") if p]
63
- if len(parts) > 6:
64
- display = ["…"] + parts[-5:]
65
- else:
66
- display = parts
67
-
68
- # Render a single-branch tree
69
- lines = []
70
- for idx, seg in enumerate(display):
71
- prefix = "└── " if idx == 0 else (" " * idx + "└── ")
72
- lines.append(f"{prefix}{seg}")
73
- return "\n".join(lines)
74
-
75
-
76
- async def _get_summary_via_summaries(query_text: str, dataset_id: UUID, top_k: int) -> str:
77
- try:
78
- results = await search(
79
- query_text=query_text,
80
- query_type=SearchType.SUMMARIES,
81
- dataset_ids=[dataset_id],
82
- top_k=top_k,
83
- )
84
- if not results:
85
- return ""
86
- texts: List[str] = []
87
- for r in results[:top_k]:
88
- texts.append(str(r))
89
- return "\n\n".join(texts)
90
- except Exception as e:
91
- logger.error(
92
- "SUMMARIES search failed for '%s' in dataset %s: %s",
93
- query_text,
94
- str(dataset_id),
95
- str(e),
96
- )
97
- return ""
98
-
99
-
100
- async def _generate_questions(file_name: str, summary_text: str) -> List[str]:
101
- prompt = (
102
- "You are an expert analyst. Given a file and its summary, propose 10 diverse, high-signal "
103
- "questions to further explore the file's content, implications, relationships, and gaps. "
104
- "Avoid duplicates; vary depth and angle (overview, details, cross-references, temporal, quality).\n\n"
105
- f"File: {file_name}\n\nSummary:\n{summary_text[:4000]}"
106
- )
107
-
108
- model = await LLMGateway.acreate_structured_output(
109
- text_input=prompt,
110
- system_prompt="Return strictly a JSON with key 'questions' and value as an array of 10 concise strings.",
111
- response_model=QuestionsModel,
112
- )
113
-
114
- # model can be either pydantic model or dict-like, normalize
115
- try:
116
- questions = list(getattr(model, "questions", []))
117
- except Exception:
118
- questions = []
119
-
120
- # Fallback if the tool returned a dict-like
121
- if not questions and isinstance(model, dict):
122
- questions = list(model.get("questions", []) or [])
123
-
124
- # Enforce 10 max
125
- return questions[:10]
126
-
127
-
128
- async def _run_searches_for_question(
129
- question: str, dataset_id: UUID, search_types: List[SearchType], top_k: int
130
- ) -> Dict[str, Union[str, List[dict], List[str]]]:
131
- async def run_one(st: SearchType):
132
- try:
133
- result = await search(
134
- query_text=question,
135
- query_type=st,
136
- dataset_ids=[dataset_id],
137
- top_k=top_k,
138
- )
139
- return st.value, result
140
- except Exception as e:
141
- logger.error("Search failed for type %s: %s", st.value, str(e))
142
- return st.value, [f"Error: {str(e)}"]
143
-
144
- pairs = await asyncio.gather(*[run_one(st) for st in search_types])
145
- return {k: v for k, v in pairs}
146
-
147
-
148
- def _format_results_md(results: Dict[str, Union[str, List[dict], List[str]]]) -> str:
149
- lines: List[str] = []
150
- for st, payload in results.items():
151
- lines.append(f"#### {st}")
152
- if isinstance(payload, list):
153
- # Printed as bullet items; stringify dicts
154
- for item in payload[:5]:
155
- if isinstance(item, dict):
156
- # compact representation
157
- snippet = json.dumps(item, ensure_ascii=False)[:800]
158
- lines.append(f"- {snippet}")
159
- else:
160
- text = str(item)
161
- lines.append(f"- {text[:800]}")
162
- else:
163
- lines.append(str(payload))
164
- lines.append("")
165
- return "\n".join(lines)
166
-
167
-
168
- async def save(
169
- datasets: Optional[Union[List[str], List[UUID]]] = None,
170
- export_root_directory: Optional[str] = None,
171
- user: Optional[User] = None,
172
- # Configurable knobs
173
- max_questions: int = 10,
174
- search_types: Optional[List[Union[str, SearchType]]] = None,
175
- top_k: int = 5,
176
- include_summary: bool = True,
177
- include_ascii_tree: bool = True,
178
- concurrency: int = 4,
179
- timeout: Optional[float] = None,
180
- ) -> Dict[str, str]:
181
- """
182
- Export per-dataset markdown summaries and search insights for each ingested file.
183
-
184
- For every dataset the user can read:
185
- - Create a folder under export_root_directory (or data_root_directory/exports)
186
- - For each data item (file), create a .md containing:
187
- - Summary of the file (from existing TextSummary nodes)
188
- - A small ASCII path tree showing its folder position
189
- - Up to N LLM-generated question ideas (configurable)
190
- - Results of configured Cognee searches per question
191
- Also creates an index.md per dataset with links to files and an optional dataset summary.
192
-
193
- Returns a mapping of dataset_id -> export_directory path.
194
- """
195
- base_config = get_base_config()
196
- export_root = export_root_directory or os.path.join(
197
- base_config.data_root_directory, "memory_export"
198
- )
199
- os.makedirs(export_root, exist_ok=True)
200
-
201
- if user is None:
202
- user = await get_default_user()
203
-
204
- datasets_list = await get_authorized_existing_datasets(datasets, "read", user)
205
- results: Dict[str, str] = {}
206
-
207
- for dataset in datasets_list:
208
- ds_dir = os.path.join(export_root, _dataset_dir_name(dataset))
209
- os.makedirs(ds_dir, exist_ok=True)
210
- results[str(dataset.id)] = ds_dir
211
-
212
- data_items = await get_dataset_data(dataset.id)
213
-
214
- # Normalize search types
215
- if not search_types:
216
- effective_search_types = [
217
- SearchType.GRAPH_COMPLETION,
218
- SearchType.INSIGHTS,
219
- SearchType.CHUNKS,
220
- ]
221
- else:
222
- effective_search_types = []
223
- for st in search_types:
224
- if isinstance(st, SearchType):
225
- effective_search_types.append(st)
226
- else:
227
- try:
228
- effective_search_types.append(SearchType[str(st)])
229
- except Exception:
230
- logger.warning("Unknown search type '%s', skipping", str(st))
231
-
232
- sem = asyncio.Semaphore(max(1, int(concurrency)))
233
- used_names: set[str] = set()
234
- index_entries: List[tuple[str, str]] = []
235
-
236
- async def process_one(data_item):
237
- async with sem:
238
- file_label = getattr(data_item, "name", str(data_item.id))
239
- original_path = getattr(data_item, "original_data_location", None)
240
-
241
- ascii_tree = (
242
- _ascii_path_tree(original_path or file_label) if include_ascii_tree else ""
243
- )
244
-
245
- summary_text = ""
246
- if include_summary:
247
- # Use SUMMARIES search scoped to dataset to derive file summary
248
- file_query = getattr(data_item, "name", str(data_item.id)) or "file"
249
- summary_text = await _get_summary_via_summaries(file_query, dataset.id, top_k)
250
- if not summary_text:
251
- summary_text = "Summary not available."
252
-
253
- if max_questions == 0:
254
- questions = []
255
- else:
256
- questions = await _generate_questions(file_label, summary_text)
257
- if max_questions is not None and max_questions >= 0:
258
- questions = questions[:max_questions]
259
-
260
- async def searches_for_question(q: str):
261
- return await _run_searches_for_question(
262
- q, dataset.id, effective_search_types, top_k
263
- )
264
-
265
- # Run per-question searches concurrently
266
- per_q_results = await asyncio.gather(*[searches_for_question(q) for q in questions])
267
-
268
- # Build markdown content
269
- md_lines = [f"# {file_label}", ""]
270
- if include_ascii_tree:
271
- md_lines.extend(["## Location", "", "```", ascii_tree, "```", ""])
272
- if include_summary:
273
- md_lines.extend(["## Summary", "", summary_text, ""])
274
-
275
- md_lines.append("## Question ideas")
276
- for idx, q in enumerate(questions, start=1):
277
- md_lines.append(f"- {idx}. {q}")
278
- md_lines.append("")
279
-
280
- md_lines.append("## Searches")
281
- md_lines.append("")
282
- for q, per_type in zip(questions, per_q_results):
283
- md_lines.append(f"### Q: {q}")
284
- md_lines.append(_format_results_md(per_type))
285
- md_lines.append("")
286
-
287
- # Write to file (collision-safe)
288
- md_filename = _file_markdown_name(data_item, used_names)
289
- export_path = os.path.join(ds_dir, md_filename)
290
- tmp_path = export_path + ".tmp"
291
- with open(tmp_path, "w", encoding="utf-8") as f:
292
- f.write("\n".join(md_lines))
293
- os.replace(tmp_path, export_path)
294
-
295
- index_entries.append((file_label, md_filename))
296
-
297
- tasks = [asyncio.create_task(process_one(item)) for item in data_items]
298
-
299
- if timeout and timeout > 0:
300
- try:
301
- await asyncio.wait_for(asyncio.gather(*tasks, return_exceptions=True), timeout)
302
- except asyncio.TimeoutError:
303
- logger.error("Save timed out for dataset %s", str(dataset.id))
304
- else:
305
- await asyncio.gather(*tasks, return_exceptions=True)
306
-
307
- # Build dataset index.md with TOC and optional dataset summary via SUMMARIES
308
- try:
309
- index_lines = [f"# Dataset: {_dataset_dir_name(dataset)}", "", "## Files", ""]
310
- for display, fname in sorted(index_entries, key=lambda x: x[0].lower()):
311
- index_lines.append(f"- [{display}]({fname})")
312
-
313
- # Dataset summary section
314
- try:
315
- summaries = await search(
316
- query_text="dataset overview",
317
- query_type=SearchType.SUMMARIES,
318
- dataset_ids=[dataset.id],
319
- top_k=top_k,
320
- )
321
- except Exception as e:
322
- logger.error("Dataset summary search failed: %s", str(e))
323
- summaries = []
324
-
325
- if summaries:
326
- index_lines.extend(["", "## Dataset summary (top summaries)", ""])
327
- for s in summaries[:top_k]:
328
- index_lines.append(f"- {str(s)[:800]}")
329
-
330
- with open(os.path.join(ds_dir, "index.md"), "w", encoding="utf-8") as f:
331
- f.write("\n".join(index_lines))
332
- except Exception as e:
333
- logger.error("Failed to write dataset index for %s: %s", str(dataset.id), str(e))
334
-
335
- return results