cognee 0.3.2__py3-none-any.whl → 0.3.4.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
- cognee/api/v1/search/search.py +1 -1
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
- cognee/infrastructure/databases/vector/config.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +2 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
- cognee/infrastructure/utils/run_async.py +9 -4
- cognee/infrastructure/utils/run_sync.py +4 -3
- cognee/modules/notebooks/methods/create_tutorial_notebook.py +87 -0
- cognee/modules/notebooks/methods/get_notebook.py +2 -2
- cognee/modules/notebooks/methods/update_notebook.py +0 -1
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +1 -1
- cognee/modules/retrieval/user_qa_feedback.py +1 -1
- cognee/modules/search/methods/search.py +12 -13
- cognee/modules/search/utils/prepare_search_result.py +31 -9
- cognee/modules/search/utils/transform_context_to_graph.py +1 -1
- cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
- cognee/tasks/temporal_graph/models.py +11 -6
- cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
- cognee/tests/test_temporal_graph.py +6 -34
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/METADATA +5 -5
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/RECORD +31 -31
- cognee-0.3.4.dev0.dist-info/entry_points.txt +2 -0
- cognee/api/v1/save/save.py +0 -335
- cognee/tests/test_save_export_path.py +0 -116
- cognee-0.3.2.dist-info/entry_points.txt +0 -2
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognee
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4.dev0
|
|
4
4
|
Summary: Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning.
|
|
5
5
|
Project-URL: Homepage, https://www.cognee.ai
|
|
6
6
|
Project-URL: Repository, https://github.com/topoteretes/cognee
|
|
@@ -57,7 +57,7 @@ Requires-Dist: structlog<26,>=25.2.0
|
|
|
57
57
|
Requires-Dist: tiktoken<1.0.0,>=0.8.0
|
|
58
58
|
Requires-Dist: typing-extensions<5.0.0,>=4.12.2
|
|
59
59
|
Provides-Extra: anthropic
|
|
60
|
-
Requires-Dist: anthropic
|
|
60
|
+
Requires-Dist: anthropic>=0.27; extra == 'anthropic'
|
|
61
61
|
Provides-Extra: api
|
|
62
62
|
Requires-Dist: gunicorn<24,>=20.1.0; extra == 'api'
|
|
63
63
|
Requires-Dist: uvicorn<1.0.0,>=0.34.0; extra == 'api'
|
|
@@ -65,8 +65,8 @@ Requires-Dist: websockets<16.0.0,>=15.0.1; extra == 'api'
|
|
|
65
65
|
Provides-Extra: aws
|
|
66
66
|
Requires-Dist: s3fs[boto3]==2025.3.2; extra == 'aws'
|
|
67
67
|
Provides-Extra: chromadb
|
|
68
|
-
Requires-Dist: chromadb<0.7,>=0.
|
|
69
|
-
Requires-Dist: pypika==0.48.
|
|
68
|
+
Requires-Dist: chromadb<0.7,>=0.6; extra == 'chromadb'
|
|
69
|
+
Requires-Dist: pypika==0.48.9; extra == 'chromadb'
|
|
70
70
|
Provides-Extra: codegraph
|
|
71
71
|
Requires-Dist: fastembed<=0.6.0; (python_version < '3.13') and extra == 'codegraph'
|
|
72
72
|
Requires-Dist: transformers<5,>=4.46.3; extra == 'codegraph'
|
|
@@ -316,7 +316,7 @@ You can also cognify your files and query using cognee UI.
|
|
|
316
316
|
|
|
317
317
|
<img src="assets/cognee-new-ui.webp" width="100%" alt="Cognee UI 2"></a>
|
|
318
318
|
|
|
319
|
-
Try cognee UI by runnning ``` cognee -ui ``` command on your terminal.
|
|
319
|
+
Try cognee UI by runnning ``` cognee-cli -ui ``` command on your terminal.
|
|
320
320
|
|
|
321
321
|
## Understand our architecture
|
|
322
322
|
|
|
@@ -41,7 +41,7 @@ cognee/api/v1/memify/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
41
41
|
cognee/api/v1/memify/routers/__init__.py,sha256=Uv25PVGhfjnNi1NYWOmOLIlzaeTlyMYF9m7BEfdu45Q,49
|
|
42
42
|
cognee/api/v1/memify/routers/get_memify_router.py,sha256=C1Cjt9D5TxhqBPmXZGNrCS4lJqPVXIJYgxZFtWVjZNs,4599
|
|
43
43
|
cognee/api/v1/notebooks/routers/__init__.py,sha256=TvQz6caluaMoXNvjbE1p_C8savypgs8rAyP5lQ8jlpc,55
|
|
44
|
-
cognee/api/v1/notebooks/routers/get_notebooks_router.py,sha256=
|
|
44
|
+
cognee/api/v1/notebooks/routers/get_notebooks_router.py,sha256=m8OH3Kw1UHF8aTP4yNuSpv7gNThE4HxmLIrUnvECYGA,3484
|
|
45
45
|
cognee/api/v1/permissions/routers/__init__.py,sha256=ljE3YnrzlMcVfThmkR5GSIxkm7sQVyibaLNtYQL4HO0,59
|
|
46
46
|
cognee/api/v1/permissions/routers/get_permissions_router.py,sha256=tqd-J__UBlstTWnQocesdjVM9JnYO5rtJhhFj-Zv1_o,8316
|
|
47
47
|
cognee/api/v1/prune/__init__.py,sha256=FEr5tTlX7wf3X4aFff6NPlVhNrPyqx7RBoJ71bJN1cY,25
|
|
@@ -53,9 +53,8 @@ cognee/api/v1/responses/models.py,sha256=MylzSnK-QB0kXe7nS-Mu4XRKZa-uBw8qP7Ke9On
|
|
|
53
53
|
cognee/api/v1/responses/routers/__init__.py,sha256=X2qishwGRVFXawnvkZ5bv420PuPRLvknaFO2jdfiR10,122
|
|
54
54
|
cognee/api/v1/responses/routers/default_tools.py,sha256=9qqzEZhrt3_YMKzUA06ke8P-2WeLXhYpKgVW6mLHlzw,3004
|
|
55
55
|
cognee/api/v1/responses/routers/get_responses_router.py,sha256=ggbLhY9IXaInCgIs5TUuOCkFW64xmTKZQsc2ENq2Ocs,5979
|
|
56
|
-
cognee/api/v1/save/save.py,sha256=xRthVNANIsrVJlLa5QKrdSiwCSckr7HBLmoeVJ_gEdE,12639
|
|
57
56
|
cognee/api/v1/search/__init__.py,sha256=Sqw60DcOj4Bnvt-EWFknT31sPcvROIRKCWLr5pbkFr4,39
|
|
58
|
-
cognee/api/v1/search/search.py,sha256=
|
|
57
|
+
cognee/api/v1/search/search.py,sha256=WhBtj90nW9ulas_dm8lX72VYGMmWVdcrC7nAfxcQgso,8821
|
|
59
58
|
cognee/api/v1/search/routers/__init__.py,sha256=6RebeLX_2NTRxIMPH_mGuLztPxnGnMJK1y_O93CtRm8,49
|
|
60
59
|
cognee/api/v1/search/routers/get_search_router.py,sha256=-5GLgHipflEblYAwl3uiPAZ2i3TgrLEjDuiO_cCqcB8,6252
|
|
61
60
|
cognee/api/v1/settings/routers/__init__.py,sha256=wj_UYAXNMPCkn6Mo1YB01dCBiV9DQwTIf6OWjnGRpf8,53
|
|
@@ -182,24 +181,24 @@ cognee/infrastructure/databases/relational/get_async_session.py,sha256=qfiXSsTAA
|
|
|
182
181
|
cognee/infrastructure/databases/relational/get_migration_relational_engine.py,sha256=5RtH281iIQo3vqgwmKT0nuiJp9jNd7vw6xRUjc5xIDM,1070
|
|
183
182
|
cognee/infrastructure/databases/relational/get_relational_engine.py,sha256=De51ieg9eFhRLX08k9oNc-oszvt_9J5DHebqI1qI8_U,741
|
|
184
183
|
cognee/infrastructure/databases/relational/with_async_session.py,sha256=UgQeJOvgeM6yhyNDwWdGULtTjZosTnjDlr267Losnfs,803
|
|
185
|
-
cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py,sha256=
|
|
184
|
+
cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py,sha256=j4mnqNJAO-U-Qfveam6NgjIH5lt7WjSMLVlemBrdpYU,27540
|
|
186
185
|
cognee/infrastructure/databases/relational/sqlalchemy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
187
186
|
cognee/infrastructure/databases/utils/__init__.py,sha256=4C0ncZG-O6bOFJpKgscCHu6D5vodLWRIKpe-WT4Ijbs,75
|
|
188
187
|
cognee/infrastructure/databases/utils/get_or_create_dataset_database.py,sha256=wn7pRgeX-BU0L191_6pgT9P54uhVQlGMPqxQdvIlv4Y,2101
|
|
189
188
|
cognee/infrastructure/databases/vector/__init__.py,sha256=7MdGJ3Mxdh2RyDq39rcjD99liIa-yGXxDUzq--1qQZs,291
|
|
190
|
-
cognee/infrastructure/databases/vector/config.py,sha256=
|
|
189
|
+
cognee/infrastructure/databases/vector/config.py,sha256=4HOmqZOEfVNmAhjxRNePMU9haTVeR35R2XbhPTcMqFg,2952
|
|
191
190
|
cognee/infrastructure/databases/vector/create_vector_engine.py,sha256=ECtICkIW5QM_lX9465ZTxVXC5MCRo_h219q3GyFXxpc,4716
|
|
192
191
|
cognee/infrastructure/databases/vector/get_vector_engine.py,sha256=y4TMWJ6B6DxwKF9PMfjB6WqujPnVhf0oR2j35Q-KhvA,272
|
|
193
192
|
cognee/infrastructure/databases/vector/supported_databases.py,sha256=0UIYcQ15p7-rq5y_2A-E9ydcXyP6frdg8T5e5ECDDMI,25
|
|
194
193
|
cognee/infrastructure/databases/vector/use_vector_adapter.py,sha256=ab2x6-sxVDu_tf4zWChN_ngqv8LaLYk2VCtBjZEyjaM,174
|
|
195
194
|
cognee/infrastructure/databases/vector/utils.py,sha256=WHPSMFsN2XK72uURvCl_jlzQa-N3XKPhrDnB6GKmBtM,1224
|
|
196
195
|
cognee/infrastructure/databases/vector/vector_db_interface.py,sha256=EUpRVyMyS0MOQwFEgxwRa_9MY1vYotCyO6CONM81r94,7118
|
|
197
|
-
cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py,sha256=
|
|
196
|
+
cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py,sha256=c8oREW4EcX_TL2i-JdCRsi5EOtPxrtxpYkaUzc8IolU,18775
|
|
198
197
|
cognee/infrastructure/databases/vector/chromadb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
199
198
|
cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py,sha256=boNJ55dxJQ_ImW1_DDjToQa0Hos9mkeRYwfCI7UPLn0,983
|
|
200
199
|
cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py,sha256=_R3yIuDaMN2lz9JhMy6SNpZeeCRZxHA9hmSB3gOxKkA,3823
|
|
201
200
|
cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py,sha256=XUZnVftE57qWlAebr99aOEg-FynMKB7IS-kmBBT8E5Y,7544
|
|
202
|
-
cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py,sha256=
|
|
201
|
+
cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py,sha256=uR9ItOYN0ySsnPrmHGaoLGjiKJcFF-88KMwbtH6j0DU,4173
|
|
203
202
|
cognee/infrastructure/databases/vector/embeddings/__init__.py,sha256=Akv-ShdXjHw-BE00Gw55GgGxIMr0SZ9FHi3RlpsJmiE,55
|
|
204
203
|
cognee/infrastructure/databases/vector/embeddings/config.py,sha256=s9acnhn1DLFggCNJMVcN9AxruMf3J00O_R--JVGqMNs,2221
|
|
205
204
|
cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py,sha256=TyCoo_SipQ6JNy5eqXY2shrZnhb2JVjt9xOsJltOCdw,17598
|
|
@@ -331,7 +330,7 @@ cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get
|
|
|
331
330
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py,sha256=126jfQhTEAbmsVsc4wyf20dK-C2AFJQ0sVmNPZFEet0,2194
|
|
332
331
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/rate_limiter.py,sha256=ie_zMYnUzMcW4okP4P41mEC31EML2ztdU7bEQQdg99U,16763
|
|
333
332
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
334
|
-
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py,sha256=
|
|
333
|
+
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py,sha256=8KTFmFm9uLagIDTSsZMYjuyhXtmFkbm-YMWVDhrn7qw,3249
|
|
335
334
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
335
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py,sha256=maSHU7nEZiR68ZeZW896LhXPm9b1f0rmEYQ6kB4CZMM,5089
|
|
337
336
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -366,8 +365,8 @@ cognee/infrastructure/loaders/external/pypdf_loader.py,sha256=nFa_h3LURBPoguRIID
|
|
|
366
365
|
cognee/infrastructure/loaders/external/unstructured_loader.py,sha256=XCRVHwpM5XmcjRmL4Pr9ELzBU_qYDPhX_Ahn5K8w0AU,4603
|
|
367
366
|
cognee/infrastructure/loaders/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
368
367
|
cognee/infrastructure/utils/calculate_backoff.py,sha256=O6h4MCe357BKaECmLZPLGYpffrMol65LwQCklBj4sh4,935
|
|
369
|
-
cognee/infrastructure/utils/run_async.py,sha256=
|
|
370
|
-
cognee/infrastructure/utils/run_sync.py,sha256=
|
|
368
|
+
cognee/infrastructure/utils/run_async.py,sha256=gZY8ZLG_86O9YVK8hciduIoDONHaEEnGOILh3EeD9LA,510
|
|
369
|
+
cognee/infrastructure/utils/run_sync.py,sha256=9pAXc-EmjtV03exnUMOVSC-IJq_KCslX05z62MHQjlQ,800
|
|
371
370
|
cognee/modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
372
371
|
cognee/modules/chunking/Chunker.py,sha256=KezN4WBiV0KNJtx6daMg4g1-a-_oJxn_l_iQT94T1lQ,343
|
|
373
372
|
cognee/modules/chunking/LangchainChunker.py,sha256=Yo9Jza-t3x3V8I8PWbxUu48vlVVdvJKxwzL2gManwDc,2351
|
|
@@ -483,14 +482,15 @@ cognee/modules/metrics/operations/__init__.py,sha256=MZ3xbVdfEKqfLct8WnbyFVyZmkB
|
|
|
483
482
|
cognee/modules/metrics/operations/get_pipeline_run_metrics.py,sha256=upIWnzKeJT1_XbL_ABdGxW-Ai7mO3AqMK35BNmItIQQ,2434
|
|
484
483
|
cognee/modules/notebooks/methods/__init__.py,sha256=IhY4fUVPJbuvS83QESsWzjZRC6oC1I-kJi5gr3kPTLk,215
|
|
485
484
|
cognee/modules/notebooks/methods/create_notebook.py,sha256=S41H3Rha0pj9dEKFy1nBG9atTGHhUdOmDZgr0ckUA6M,633
|
|
485
|
+
cognee/modules/notebooks/methods/create_tutorial_notebook.py,sha256=ZoGilQU993M0j3fFjBicOSsF5TFEq_k8tjbD_90sI7g,4269
|
|
486
486
|
cognee/modules/notebooks/methods/delete_notebook.py,sha256=BKxoRlPzkwXvTYh5WcF-zo_iVmaXqEiptS42JwB0KQU,309
|
|
487
|
-
cognee/modules/notebooks/methods/get_notebook.py,sha256=
|
|
487
|
+
cognee/modules/notebooks/methods/get_notebook.py,sha256=IP4imsdt9X6GYd6i6WF6PlVhotGNH0i7XZpPqbtqMwo,554
|
|
488
488
|
cognee/modules/notebooks/methods/get_notebooks.py,sha256=ee40ALHvebVORuwZVkQ271qAj260rrYy6eVGxAmfo8c,483
|
|
489
|
-
cognee/modules/notebooks/methods/update_notebook.py,sha256=
|
|
489
|
+
cognee/modules/notebooks/methods/update_notebook.py,sha256=MnZbfh-WfEfH3ImNvyQNhDeNwpYeS7p8FPVwnmBvZVg,361
|
|
490
490
|
cognee/modules/notebooks/models/Notebook.py,sha256=Jth47QxJQ2-VGPyIcS0ul3bS8bgGrk9vCGoJVagxanw,1690
|
|
491
491
|
cognee/modules/notebooks/models/__init__.py,sha256=jldsDjwRvFMreGpe4wxxr5TlFXTZuU7rbsRkGQvTO5s,45
|
|
492
492
|
cognee/modules/notebooks/operations/__init__.py,sha256=VR_2w_d0lEiJ5Xw7_mboo2qWUv0umrR_Bp58MaMoE6w,55
|
|
493
|
-
cognee/modules/notebooks/operations/run_in_local_sandbox.py,sha256=
|
|
493
|
+
cognee/modules/notebooks/operations/run_in_local_sandbox.py,sha256=17hMEQC3LZTfPvbRUrPN9SzDeJPWSTq_BAhtwRZiqT8,1338
|
|
494
494
|
cognee/modules/observability/get_observe.py,sha256=chRw4jmpmrwEvDecF9sgApm23IOzVgCbwkKEAyz1_AI,264
|
|
495
495
|
cognee/modules/observability/observers.py,sha256=SKQSWWyGDG0QY2_bqsFgfpLUb7OUL4WFf8tDZYe5JMM,157
|
|
496
496
|
cognee/modules/ontology/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -550,15 +550,15 @@ cognee/modules/retrieval/code_retriever.py,sha256=cnOjgfCATzz0-XZGFrIIkuVZLc6HBh
|
|
|
550
550
|
cognee/modules/retrieval/coding_rules_retriever.py,sha256=3GU259jTbGLqmp_A8sUdE4fyf0td06SKuxBJVW-npIQ,1134
|
|
551
551
|
cognee/modules/retrieval/completion_retriever.py,sha256=Lw5sxN_UrtmWSOtcSS7Yj50Gw9p4nNBmW3dr2kV9JJ0,3754
|
|
552
552
|
cognee/modules/retrieval/cypher_search_retriever.py,sha256=_3rZJ23hSZpDa8kVyOSWN3fwjMI_aLF2m5p-FtBek8k,2440
|
|
553
|
-
cognee/modules/retrieval/graph_completion_context_extension_retriever.py,sha256
|
|
554
|
-
cognee/modules/retrieval/graph_completion_cot_retriever.py,sha256=
|
|
555
|
-
cognee/modules/retrieval/graph_completion_retriever.py,sha256=
|
|
553
|
+
cognee/modules/retrieval/graph_completion_context_extension_retriever.py,sha256=-6yN8gpRlDue8d28rk-Ly-gq0T8BW-i1-Jgbp1x-Zsg,4532
|
|
554
|
+
cognee/modules/retrieval/graph_completion_cot_retriever.py,sha256=JU-FkikaU68v8fT8VAmG6jojwhwroKYW2RUxdlJ1R-k,6140
|
|
555
|
+
cognee/modules/retrieval/graph_completion_retriever.py,sha256=VnrFD4xUQewIO83mfmIUcPLA_HBGdUlDVRyA2Pm4ARo,8822
|
|
556
556
|
cognee/modules/retrieval/graph_summary_completion_retriever.py,sha256=3AMisk3fObk2Vh1heY4veHkDjLsHgSSUc_ChZseJUYw,2456
|
|
557
557
|
cognee/modules/retrieval/insights_retriever.py,sha256=1pcYd34EfKk85MSPFQ8b-ZbSARmnauks8TxXfNOxvOw,4953
|
|
558
558
|
cognee/modules/retrieval/natural_language_retriever.py,sha256=zJz35zRmBP8-pRlkoxxSxn3-jtG2lUW0xcu58bq9Ebs,5761
|
|
559
559
|
cognee/modules/retrieval/summaries_retriever.py,sha256=joXYphypACm2JiCjbC8nBS61m1q2oYkzyIt9bdgALNw,3384
|
|
560
|
-
cognee/modules/retrieval/temporal_retriever.py,sha256=
|
|
561
|
-
cognee/modules/retrieval/user_qa_feedback.py,sha256
|
|
560
|
+
cognee/modules/retrieval/temporal_retriever.py,sha256=EUEYN94LpoWfbPjsToe_pC3rFsUUTIPA5K6wNjv8Nds,5685
|
|
561
|
+
cognee/modules/retrieval/user_qa_feedback.py,sha256=-VEOsE_t0FiTy00OpOMWAYv12YSLPieAcMsu82vm7h4,3366
|
|
562
562
|
cognee/modules/retrieval/context_providers/DummyContextProvider.py,sha256=9GsvINc7ekRyRWO5IefFGyytRYqsSlhpwAOw6Q691cA,419
|
|
563
563
|
cognee/modules/retrieval/context_providers/SummarizedTripletSearchContextProvider.py,sha256=ypO6yWLxvmRsj_5dyYdvXTbztJmB_ioLrgyG6bF5WGA,894
|
|
564
564
|
cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py,sha256=8PzksHAtRw7tZarP3nZuxhi0cd1EYEDHOT4Q74mNEvc,3656
|
|
@@ -579,7 +579,7 @@ cognee/modules/search/exceptions/exceptions.py,sha256=Zc5Y0M-r-UnSSlpKzHKBplfjZ-
|
|
|
579
579
|
cognee/modules/search/methods/__init__.py,sha256=jGfRvNwM5yIzj025gaVhcx7nCupRSXbUUnFjYVjL_Js,27
|
|
580
580
|
cognee/modules/search/methods/get_search_type_tools.py,sha256=wXxOZx3uEnMhRhUO2HGswQ5iVbWvjUj17UT_qdJg6Oo,6837
|
|
581
581
|
cognee/modules/search/methods/no_access_control_search.py,sha256=R08aMgaB8AkD0_XVaX15qLyC9KJ3fSVFv9zeZwuyez4,1566
|
|
582
|
-
cognee/modules/search/methods/search.py,sha256=
|
|
582
|
+
cognee/modules/search/methods/search.py,sha256=JjB9Nhxt_AIDF24z81FWGm7VVJFW90RCXRAU9VhMG34,12430
|
|
583
583
|
cognee/modules/search/models/Query.py,sha256=9WcF5Z1oCFtA4O-7An37eNAPX3iyygO4B5NSwhx7iIg,558
|
|
584
584
|
cognee/modules/search/models/Result.py,sha256=U7QtoNzAtZnUDwGWhjVfcalHQd4daKtYYvJz2BeWQ4w,564
|
|
585
585
|
cognee/modules/search/operations/__init__.py,sha256=AwJl6v9BTpocoefEZLk-flo1EtydYb46NSUoNFHkhX0,156
|
|
@@ -593,8 +593,9 @@ cognee/modules/search/types/SearchResult.py,sha256=blEean6PRFKcDRQugsojZPfH-Wohx
|
|
|
593
593
|
cognee/modules/search/types/SearchType.py,sha256=-lT4bLKKunV4cL4FfF3tjNbdN7X4AsRMLpTkReNwXZM,594
|
|
594
594
|
cognee/modules/search/types/__init__.py,sha256=8k6OjVrL70W1Jh-ClTbG2ETYIhOtSk3tfqjzYgEdPzA,117
|
|
595
595
|
cognee/modules/search/utils/__init__.py,sha256=86mRtCN-B5-2NNChdQoU5x8_8hqTczGZjBoKVE9O7hA,124
|
|
596
|
-
cognee/modules/search/utils/prepare_search_result.py,sha256=
|
|
597
|
-
cognee/modules/search/utils/transform_context_to_graph.py,sha256=
|
|
596
|
+
cognee/modules/search/utils/prepare_search_result.py,sha256=I_NrC6G549mEm1f0JZYJLCxAYQbKXBIzTJB4kv_3538,2334
|
|
597
|
+
cognee/modules/search/utils/transform_context_to_graph.py,sha256=Wl0kZR6YqyBxY-vBNNIy2pPIZaJVCigcRveJWjSX8BA,1238
|
|
598
|
+
cognee/modules/search/utils/transform_insights_to_graph.py,sha256=_ID5-37Ppl7jHbxNkUioZyH_I8SGXnhbfeLHgfEYec8,925
|
|
598
599
|
cognee/modules/settings/__init__.py,sha256=_SZQgCQnnnIHLJuKOMO9uWzXNBQxwYHHMUSBp0qa2uQ,210
|
|
599
600
|
cognee/modules/settings/get_current_settings.py,sha256=R2lOusG5Q2PMa2-2vDndh3Lm7nXyZVkdzTV7vQHT81Y,1642
|
|
600
601
|
cognee/modules/settings/get_settings.py,sha256=qkpNB_-IRexSzaiVvSS7NXG3S3fpbhDb6BQIPGAKET4,4221
|
|
@@ -762,7 +763,7 @@ cognee/tasks/temporal_graph/add_entities_to_event.py,sha256=wH4TlJfGN5_tjouuSFKK
|
|
|
762
763
|
cognee/tasks/temporal_graph/enrich_events.py,sha256=aLwGKzKLdUXbdn4WGN1uK5vOBk8nPzGM6bJ-7lWkt6s,1097
|
|
763
764
|
cognee/tasks/temporal_graph/extract_events_and_entities.py,sha256=iL0ppf5zmTey67yncLPkDY0Fd2GL4CqDGV4v1L0VmoA,1301
|
|
764
765
|
cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py,sha256=biDjIOnL_6ZSifFokwAlhVqNUixuzoFdYUmPzAT9d1Y,1440
|
|
765
|
-
cognee/tasks/temporal_graph/models.py,sha256=
|
|
766
|
+
cognee/tasks/temporal_graph/models.py,sha256=2fBZWqfZfLNh5BHqU8RbW60R1_IZU3PgY8MZJHlF0S0,1390
|
|
766
767
|
cognee/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
767
768
|
cognee/tests/test_chromadb.py,sha256=D9JEN0xbFxNLgp8UJTVAjpwob9S-LOQC-hSaMVvYhR8,9240
|
|
768
769
|
cognee/tests/test_cognee_server_start.py,sha256=kcIbzu72ZZUlPZ51c_DpSCCwx3X9mNvYZrVcxHfZaJs,4226
|
|
@@ -789,17 +790,16 @@ cognee/tests/test_remote_kuzu.py,sha256=2GG05MtGuhOo6ST82OxjdVDetBS0GWHvKKmmmEtQ
|
|
|
789
790
|
cognee/tests/test_remote_kuzu_stress.py,sha256=5vgnu4Uz_NoKKqFZJeVceHwb2zNhvdTVBgpN3NjhfAE,5304
|
|
790
791
|
cognee/tests/test_s3.py,sha256=rY2UDK15cdyywlyVrR8N2DRtVXWYIW5REaaz99gaQeE,2694
|
|
791
792
|
cognee/tests/test_s3_file_storage.py,sha256=62tvIFyh_uTP0TFF9Ck4Y-sxWPW-cwJKYEJUJI1atPI,5654
|
|
792
|
-
cognee/tests/test_save_export_path.py,sha256=z07oQao82INzldg2mesS3ZGt7fl7rcjKx15JwoGT5tI,3898
|
|
793
793
|
cognee/tests/test_search_db.py,sha256=4GpLx8ZJoMjkp-XqQ-LCrkf3NhAM4j_rMmlOFgmDO-A,13420
|
|
794
794
|
cognee/tests/test_starter_pipelines.py,sha256=X1J8RDD0bFMKnRETyi5nyaF4TYdmUIu0EuD3WQwShNs,2475
|
|
795
795
|
cognee/tests/test_telemetry.py,sha256=FIneuVofSKWFYqxNC88sT_P5GPzgfjVyqDCf2TYBE2E,4130
|
|
796
|
-
cognee/tests/test_temporal_graph.py,sha256=
|
|
796
|
+
cognee/tests/test_temporal_graph.py,sha256=GRYS2FsFybYOuoQvmG711UTVAHgvGvapgMEzW4sclZg,11551
|
|
797
797
|
cognee/tests/cli_tests/cli_integration_tests/__init__.py,sha256=xYkvpZkxv_HRWmX71pGM3NUw2KKkDQIM-V6Ehxu-f0I,39
|
|
798
798
|
cognee/tests/cli_tests/cli_integration_tests/test_cli_integration.py,sha256=3hdz1DoGeidJInqbCy1YQte6J0QeQG1_WKGs9utjAFg,11560
|
|
799
799
|
cognee/tests/cli_tests/cli_unit_tests/__init__.py,sha256=U069aFvdwfKPd6YsR_FJML5LRphHHF5wx9mwug1hRh4,32
|
|
800
800
|
cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py,sha256=5a3vPiSFmKumq6sTfdfMyeUpJGjbZ6_5zX4TUcV0ZJQ,17625
|
|
801
801
|
cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py,sha256=PyFCnClvbXG1GaiS16qwcuyXXDJ4sRyBCKV5WHrOUxk,23501
|
|
802
|
-
cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py,sha256=
|
|
802
|
+
cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py,sha256=6tx2A4us8uyZ7Zk4wZXplqLn5MtAejxOrG5ZxZpbFvQ,6143
|
|
803
803
|
cognee/tests/cli_tests/cli_unit_tests/test_cli_runner.py,sha256=WZ8oZIlc_JintDq_cnEg9tmLEMZMGFPQGhU7Y_7sfgs,1497
|
|
804
804
|
cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py,sha256=Flej8LNYRXNkWd2tq8elMm8MkqbhCUb8RtXaPzfNYm4,4323
|
|
805
805
|
cognee/tests/integration/documents/AudioDocument_test.py,sha256=0mJnlWRc7gWqOxAUfdSSIxntcUrzkPXhlsd-MFsiRoM,2790
|
|
@@ -890,9 +890,9 @@ distributed/tasks/queued_add_edges.py,sha256=kz1DHE05y-kNHORQJjYWHUi6Q1QWUp_v3Dl
|
|
|
890
890
|
distributed/tasks/queued_add_nodes.py,sha256=aqK4Ij--ADwUWknxYpiwbYrpa6CcvFfqHWbUZW4Kh3A,452
|
|
891
891
|
distributed/workers/data_point_saving_worker.py,sha256=jFmA0-P_0Ru2IUDrSug0wML-5goAKrGtlBm5BA5Ryw4,3229
|
|
892
892
|
distributed/workers/graph_saving_worker.py,sha256=oUYl99CdhlrPAIsUOHbHnS3d4XhGoV0_OIbCO8wYzRg,3648
|
|
893
|
-
cognee-0.3.
|
|
894
|
-
cognee-0.3.
|
|
895
|
-
cognee-0.3.
|
|
896
|
-
cognee-0.3.
|
|
897
|
-
cognee-0.3.
|
|
898
|
-
cognee-0.3.
|
|
893
|
+
cognee-0.3.4.dev0.dist-info/METADATA,sha256=T3Pt0L4t3GKyziXuy4n1Kdlh3OUfPt4pPh4VDrjVkPY,14752
|
|
894
|
+
cognee-0.3.4.dev0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
895
|
+
cognee-0.3.4.dev0.dist-info/entry_points.txt,sha256=GCCTsNg8gzOJkolq7dR7OK1VlIAO202dGDnMI8nm8oQ,55
|
|
896
|
+
cognee-0.3.4.dev0.dist-info/licenses/LICENSE,sha256=pHHjSQj1DD8SDppW88MMs04TPk7eAanL1c5xj8NY7NQ,11344
|
|
897
|
+
cognee-0.3.4.dev0.dist-info/licenses/NOTICE.md,sha256=6L3saP3kSpcingOxDh-SGjMS8GY79Rlh2dBNLaO0o5c,339
|
|
898
|
+
cognee-0.3.4.dev0.dist-info/RECORD,,
|
cognee/api/v1/save/save.py
DELETED
|
@@ -1,335 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import asyncio
|
|
3
|
-
import json
|
|
4
|
-
from typing import Optional, Union, List, Dict
|
|
5
|
-
from uuid import UUID
|
|
6
|
-
|
|
7
|
-
from pydantic import BaseModel
|
|
8
|
-
|
|
9
|
-
from cognee.base_config import get_base_config
|
|
10
|
-
from cognee.modules.users.models import User
|
|
11
|
-
from cognee.modules.users.methods import get_default_user
|
|
12
|
-
from cognee.modules.data.methods import get_authorized_existing_datasets, get_dataset_data
|
|
13
|
-
from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
|
|
14
|
-
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
15
|
-
from cognee.shared.logging_utils import get_logger
|
|
16
|
-
from cognee.api.v1.search import search
|
|
17
|
-
from cognee.modules.search.types import SearchType
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
logger = get_logger("save")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class QuestionsModel(BaseModel):
|
|
24
|
-
questions: List[str]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def _sanitize_filename(name: str) -> str:
|
|
28
|
-
safe = "".join(c if c.isalnum() or c in ("-", "_", ".", " ") else "_" for c in name)
|
|
29
|
-
return safe.strip().replace(" ", "_")
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def _dataset_dir_name(dataset) -> str:
|
|
33
|
-
# Prefer readable dataset name when available, fallback to id
|
|
34
|
-
if getattr(dataset, "name", None):
|
|
35
|
-
return _sanitize_filename(str(dataset.name))
|
|
36
|
-
return str(dataset.id)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def _file_markdown_name(data_item, used_names: set[str]) -> str:
|
|
40
|
-
# Use original file name if present, else data.name
|
|
41
|
-
name = getattr(data_item, "name", None) or "file"
|
|
42
|
-
base = _sanitize_filename(str(name))
|
|
43
|
-
filename = f"{base}.md"
|
|
44
|
-
if filename in used_names:
|
|
45
|
-
short_id = str(getattr(data_item, "id", ""))[:8]
|
|
46
|
-
filename = f"{base}__{short_id}.md"
|
|
47
|
-
used_names.add(filename)
|
|
48
|
-
return filename
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def _ascii_path_tree(path_str: str) -> str:
|
|
52
|
-
if not path_str:
|
|
53
|
-
return "(no path)"
|
|
54
|
-
|
|
55
|
-
# Normalize special schemes but keep segments readable
|
|
56
|
-
try:
|
|
57
|
-
normalized = get_data_file_path(path_str)
|
|
58
|
-
except Exception:
|
|
59
|
-
normalized = path_str
|
|
60
|
-
|
|
61
|
-
# Keep the path compact – show last 5 segments
|
|
62
|
-
parts = [p for p in normalized.replace("\\", "/").split("/") if p]
|
|
63
|
-
if len(parts) > 6:
|
|
64
|
-
display = ["…"] + parts[-5:]
|
|
65
|
-
else:
|
|
66
|
-
display = parts
|
|
67
|
-
|
|
68
|
-
# Render a single-branch tree
|
|
69
|
-
lines = []
|
|
70
|
-
for idx, seg in enumerate(display):
|
|
71
|
-
prefix = "└── " if idx == 0 else (" " * idx + "└── ")
|
|
72
|
-
lines.append(f"{prefix}{seg}")
|
|
73
|
-
return "\n".join(lines)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
async def _get_summary_via_summaries(query_text: str, dataset_id: UUID, top_k: int) -> str:
|
|
77
|
-
try:
|
|
78
|
-
results = await search(
|
|
79
|
-
query_text=query_text,
|
|
80
|
-
query_type=SearchType.SUMMARIES,
|
|
81
|
-
dataset_ids=[dataset_id],
|
|
82
|
-
top_k=top_k,
|
|
83
|
-
)
|
|
84
|
-
if not results:
|
|
85
|
-
return ""
|
|
86
|
-
texts: List[str] = []
|
|
87
|
-
for r in results[:top_k]:
|
|
88
|
-
texts.append(str(r))
|
|
89
|
-
return "\n\n".join(texts)
|
|
90
|
-
except Exception as e:
|
|
91
|
-
logger.error(
|
|
92
|
-
"SUMMARIES search failed for '%s' in dataset %s: %s",
|
|
93
|
-
query_text,
|
|
94
|
-
str(dataset_id),
|
|
95
|
-
str(e),
|
|
96
|
-
)
|
|
97
|
-
return ""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
async def _generate_questions(file_name: str, summary_text: str) -> List[str]:
|
|
101
|
-
prompt = (
|
|
102
|
-
"You are an expert analyst. Given a file and its summary, propose 10 diverse, high-signal "
|
|
103
|
-
"questions to further explore the file's content, implications, relationships, and gaps. "
|
|
104
|
-
"Avoid duplicates; vary depth and angle (overview, details, cross-references, temporal, quality).\n\n"
|
|
105
|
-
f"File: {file_name}\n\nSummary:\n{summary_text[:4000]}"
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
model = await LLMGateway.acreate_structured_output(
|
|
109
|
-
text_input=prompt,
|
|
110
|
-
system_prompt="Return strictly a JSON with key 'questions' and value as an array of 10 concise strings.",
|
|
111
|
-
response_model=QuestionsModel,
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
# model can be either pydantic model or dict-like, normalize
|
|
115
|
-
try:
|
|
116
|
-
questions = list(getattr(model, "questions", []))
|
|
117
|
-
except Exception:
|
|
118
|
-
questions = []
|
|
119
|
-
|
|
120
|
-
# Fallback if the tool returned a dict-like
|
|
121
|
-
if not questions and isinstance(model, dict):
|
|
122
|
-
questions = list(model.get("questions", []) or [])
|
|
123
|
-
|
|
124
|
-
# Enforce 10 max
|
|
125
|
-
return questions[:10]
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
async def _run_searches_for_question(
|
|
129
|
-
question: str, dataset_id: UUID, search_types: List[SearchType], top_k: int
|
|
130
|
-
) -> Dict[str, Union[str, List[dict], List[str]]]:
|
|
131
|
-
async def run_one(st: SearchType):
|
|
132
|
-
try:
|
|
133
|
-
result = await search(
|
|
134
|
-
query_text=question,
|
|
135
|
-
query_type=st,
|
|
136
|
-
dataset_ids=[dataset_id],
|
|
137
|
-
top_k=top_k,
|
|
138
|
-
)
|
|
139
|
-
return st.value, result
|
|
140
|
-
except Exception as e:
|
|
141
|
-
logger.error("Search failed for type %s: %s", st.value, str(e))
|
|
142
|
-
return st.value, [f"Error: {str(e)}"]
|
|
143
|
-
|
|
144
|
-
pairs = await asyncio.gather(*[run_one(st) for st in search_types])
|
|
145
|
-
return {k: v for k, v in pairs}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def _format_results_md(results: Dict[str, Union[str, List[dict], List[str]]]) -> str:
|
|
149
|
-
lines: List[str] = []
|
|
150
|
-
for st, payload in results.items():
|
|
151
|
-
lines.append(f"#### {st}")
|
|
152
|
-
if isinstance(payload, list):
|
|
153
|
-
# Printed as bullet items; stringify dicts
|
|
154
|
-
for item in payload[:5]:
|
|
155
|
-
if isinstance(item, dict):
|
|
156
|
-
# compact representation
|
|
157
|
-
snippet = json.dumps(item, ensure_ascii=False)[:800]
|
|
158
|
-
lines.append(f"- {snippet}")
|
|
159
|
-
else:
|
|
160
|
-
text = str(item)
|
|
161
|
-
lines.append(f"- {text[:800]}")
|
|
162
|
-
else:
|
|
163
|
-
lines.append(str(payload))
|
|
164
|
-
lines.append("")
|
|
165
|
-
return "\n".join(lines)
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
async def save(
|
|
169
|
-
datasets: Optional[Union[List[str], List[UUID]]] = None,
|
|
170
|
-
export_root_directory: Optional[str] = None,
|
|
171
|
-
user: Optional[User] = None,
|
|
172
|
-
# Configurable knobs
|
|
173
|
-
max_questions: int = 10,
|
|
174
|
-
search_types: Optional[List[Union[str, SearchType]]] = None,
|
|
175
|
-
top_k: int = 5,
|
|
176
|
-
include_summary: bool = True,
|
|
177
|
-
include_ascii_tree: bool = True,
|
|
178
|
-
concurrency: int = 4,
|
|
179
|
-
timeout: Optional[float] = None,
|
|
180
|
-
) -> Dict[str, str]:
|
|
181
|
-
"""
|
|
182
|
-
Export per-dataset markdown summaries and search insights for each ingested file.
|
|
183
|
-
|
|
184
|
-
For every dataset the user can read:
|
|
185
|
-
- Create a folder under export_root_directory (or data_root_directory/exports)
|
|
186
|
-
- For each data item (file), create a .md containing:
|
|
187
|
-
- Summary of the file (from existing TextSummary nodes)
|
|
188
|
-
- A small ASCII path tree showing its folder position
|
|
189
|
-
- Up to N LLM-generated question ideas (configurable)
|
|
190
|
-
- Results of configured Cognee searches per question
|
|
191
|
-
Also creates an index.md per dataset with links to files and an optional dataset summary.
|
|
192
|
-
|
|
193
|
-
Returns a mapping of dataset_id -> export_directory path.
|
|
194
|
-
"""
|
|
195
|
-
base_config = get_base_config()
|
|
196
|
-
export_root = export_root_directory or os.path.join(
|
|
197
|
-
base_config.data_root_directory, "memory_export"
|
|
198
|
-
)
|
|
199
|
-
os.makedirs(export_root, exist_ok=True)
|
|
200
|
-
|
|
201
|
-
if user is None:
|
|
202
|
-
user = await get_default_user()
|
|
203
|
-
|
|
204
|
-
datasets_list = await get_authorized_existing_datasets(datasets, "read", user)
|
|
205
|
-
results: Dict[str, str] = {}
|
|
206
|
-
|
|
207
|
-
for dataset in datasets_list:
|
|
208
|
-
ds_dir = os.path.join(export_root, _dataset_dir_name(dataset))
|
|
209
|
-
os.makedirs(ds_dir, exist_ok=True)
|
|
210
|
-
results[str(dataset.id)] = ds_dir
|
|
211
|
-
|
|
212
|
-
data_items = await get_dataset_data(dataset.id)
|
|
213
|
-
|
|
214
|
-
# Normalize search types
|
|
215
|
-
if not search_types:
|
|
216
|
-
effective_search_types = [
|
|
217
|
-
SearchType.GRAPH_COMPLETION,
|
|
218
|
-
SearchType.INSIGHTS,
|
|
219
|
-
SearchType.CHUNKS,
|
|
220
|
-
]
|
|
221
|
-
else:
|
|
222
|
-
effective_search_types = []
|
|
223
|
-
for st in search_types:
|
|
224
|
-
if isinstance(st, SearchType):
|
|
225
|
-
effective_search_types.append(st)
|
|
226
|
-
else:
|
|
227
|
-
try:
|
|
228
|
-
effective_search_types.append(SearchType[str(st)])
|
|
229
|
-
except Exception:
|
|
230
|
-
logger.warning("Unknown search type '%s', skipping", str(st))
|
|
231
|
-
|
|
232
|
-
sem = asyncio.Semaphore(max(1, int(concurrency)))
|
|
233
|
-
used_names: set[str] = set()
|
|
234
|
-
index_entries: List[tuple[str, str]] = []
|
|
235
|
-
|
|
236
|
-
async def process_one(data_item):
|
|
237
|
-
async with sem:
|
|
238
|
-
file_label = getattr(data_item, "name", str(data_item.id))
|
|
239
|
-
original_path = getattr(data_item, "original_data_location", None)
|
|
240
|
-
|
|
241
|
-
ascii_tree = (
|
|
242
|
-
_ascii_path_tree(original_path or file_label) if include_ascii_tree else ""
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
summary_text = ""
|
|
246
|
-
if include_summary:
|
|
247
|
-
# Use SUMMARIES search scoped to dataset to derive file summary
|
|
248
|
-
file_query = getattr(data_item, "name", str(data_item.id)) or "file"
|
|
249
|
-
summary_text = await _get_summary_via_summaries(file_query, dataset.id, top_k)
|
|
250
|
-
if not summary_text:
|
|
251
|
-
summary_text = "Summary not available."
|
|
252
|
-
|
|
253
|
-
if max_questions == 0:
|
|
254
|
-
questions = []
|
|
255
|
-
else:
|
|
256
|
-
questions = await _generate_questions(file_label, summary_text)
|
|
257
|
-
if max_questions is not None and max_questions >= 0:
|
|
258
|
-
questions = questions[:max_questions]
|
|
259
|
-
|
|
260
|
-
async def searches_for_question(q: str):
|
|
261
|
-
return await _run_searches_for_question(
|
|
262
|
-
q, dataset.id, effective_search_types, top_k
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
# Run per-question searches concurrently
|
|
266
|
-
per_q_results = await asyncio.gather(*[searches_for_question(q) for q in questions])
|
|
267
|
-
|
|
268
|
-
# Build markdown content
|
|
269
|
-
md_lines = [f"# {file_label}", ""]
|
|
270
|
-
if include_ascii_tree:
|
|
271
|
-
md_lines.extend(["## Location", "", "```", ascii_tree, "```", ""])
|
|
272
|
-
if include_summary:
|
|
273
|
-
md_lines.extend(["## Summary", "", summary_text, ""])
|
|
274
|
-
|
|
275
|
-
md_lines.append("## Question ideas")
|
|
276
|
-
for idx, q in enumerate(questions, start=1):
|
|
277
|
-
md_lines.append(f"- {idx}. {q}")
|
|
278
|
-
md_lines.append("")
|
|
279
|
-
|
|
280
|
-
md_lines.append("## Searches")
|
|
281
|
-
md_lines.append("")
|
|
282
|
-
for q, per_type in zip(questions, per_q_results):
|
|
283
|
-
md_lines.append(f"### Q: {q}")
|
|
284
|
-
md_lines.append(_format_results_md(per_type))
|
|
285
|
-
md_lines.append("")
|
|
286
|
-
|
|
287
|
-
# Write to file (collision-safe)
|
|
288
|
-
md_filename = _file_markdown_name(data_item, used_names)
|
|
289
|
-
export_path = os.path.join(ds_dir, md_filename)
|
|
290
|
-
tmp_path = export_path + ".tmp"
|
|
291
|
-
with open(tmp_path, "w", encoding="utf-8") as f:
|
|
292
|
-
f.write("\n".join(md_lines))
|
|
293
|
-
os.replace(tmp_path, export_path)
|
|
294
|
-
|
|
295
|
-
index_entries.append((file_label, md_filename))
|
|
296
|
-
|
|
297
|
-
tasks = [asyncio.create_task(process_one(item)) for item in data_items]
|
|
298
|
-
|
|
299
|
-
if timeout and timeout > 0:
|
|
300
|
-
try:
|
|
301
|
-
await asyncio.wait_for(asyncio.gather(*tasks, return_exceptions=True), timeout)
|
|
302
|
-
except asyncio.TimeoutError:
|
|
303
|
-
logger.error("Save timed out for dataset %s", str(dataset.id))
|
|
304
|
-
else:
|
|
305
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
306
|
-
|
|
307
|
-
# Build dataset index.md with TOC and optional dataset summary via SUMMARIES
|
|
308
|
-
try:
|
|
309
|
-
index_lines = [f"# Dataset: {_dataset_dir_name(dataset)}", "", "## Files", ""]
|
|
310
|
-
for display, fname in sorted(index_entries, key=lambda x: x[0].lower()):
|
|
311
|
-
index_lines.append(f"- [{display}]({fname})")
|
|
312
|
-
|
|
313
|
-
# Dataset summary section
|
|
314
|
-
try:
|
|
315
|
-
summaries = await search(
|
|
316
|
-
query_text="dataset overview",
|
|
317
|
-
query_type=SearchType.SUMMARIES,
|
|
318
|
-
dataset_ids=[dataset.id],
|
|
319
|
-
top_k=top_k,
|
|
320
|
-
)
|
|
321
|
-
except Exception as e:
|
|
322
|
-
logger.error("Dataset summary search failed: %s", str(e))
|
|
323
|
-
summaries = []
|
|
324
|
-
|
|
325
|
-
if summaries:
|
|
326
|
-
index_lines.extend(["", "## Dataset summary (top summaries)", ""])
|
|
327
|
-
for s in summaries[:top_k]:
|
|
328
|
-
index_lines.append(f"- {str(s)[:800]}")
|
|
329
|
-
|
|
330
|
-
with open(os.path.join(ds_dir, "index.md"), "w", encoding="utf-8") as f:
|
|
331
|
-
f.write("\n".join(index_lines))
|
|
332
|
-
except Exception as e:
|
|
333
|
-
logger.error("Failed to write dataset index for %s: %s", str(dataset.id), str(e))
|
|
334
|
-
|
|
335
|
-
return results
|