letta-nightly 0.4.1.dev20241014104152__tar.gz → 0.5.0.dev20241015104156__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (194) hide show
  1. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/PKG-INFO +1 -1
  2. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/__init__.py +2 -2
  3. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/agent_store/db.py +18 -7
  4. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/agent_store/lancedb.py +2 -2
  5. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/agent_store/milvus.py +1 -1
  6. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/agent_store/qdrant.py +1 -1
  7. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/agent_store/storage.py +12 -10
  8. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/cli/cli_load.py +1 -1
  9. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/client/client.py +51 -0
  10. letta_nightly-0.5.0.dev20241015104156/letta/data_sources/connectors.py +247 -0
  11. letta_nightly-0.5.0.dev20241015104156/letta/data_sources/connectors_helper.py +97 -0
  12. letta_nightly-0.5.0.dev20241015104156/letta/llm_api/mistral.py +47 -0
  13. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/metadata.py +58 -0
  14. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/providers.py +44 -0
  15. letta_nightly-0.5.0.dev20241015104156/letta/schemas/file.py +31 -0
  16. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/job.py +1 -1
  17. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/letta_request.py +3 -3
  18. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/llm_config.py +1 -0
  19. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/message.py +6 -2
  20. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/passage.py +3 -3
  21. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/source.py +2 -2
  22. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/agents.py +10 -16
  23. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/jobs.py +17 -1
  24. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/sources.py +7 -9
  25. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/server.py +86 -13
  26. letta_nightly-0.4.1.dev20241014104152/letta/server/static_files/assets/index-9a9c449b.js → letta_nightly-0.5.0.dev20241015104156/letta/server/static_files/assets/index-dc228d4a.js +4 -4
  27. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/static_files/index.html +1 -1
  28. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/pyproject.toml +1 -1
  29. letta_nightly-0.4.1.dev20241014104152/letta/data_sources/connectors.py +0 -247
  30. letta_nightly-0.4.1.dev20241014104152/letta/schemas/document.py +0 -21
  31. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/LICENSE +0 -0
  32. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/README.md +0 -0
  33. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/__main__.py +0 -0
  34. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/agent.py +0 -0
  35. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/agent_store/chroma.py +0 -0
  36. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/base.py +0 -0
  37. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/benchmark/benchmark.py +0 -0
  38. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/benchmark/constants.py +0 -0
  39. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/cli/cli.py +0 -0
  40. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/cli/cli_config.py +0 -0
  41. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/client/__init__.py +0 -0
  42. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/client/admin.py +0 -0
  43. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/client/streaming.py +0 -0
  44. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/client/utils.py +0 -0
  45. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/config.py +0 -0
  46. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/constants.py +0 -0
  47. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/credentials.py +0 -0
  48. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/embeddings.py +0 -0
  49. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/errors.py +0 -0
  50. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/functions/__init__.py +0 -0
  51. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/functions/function_sets/base.py +0 -0
  52. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/functions/function_sets/extras.py +0 -0
  53. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/functions/functions.py +0 -0
  54. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/functions/helpers.py +0 -0
  55. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/functions/schema_generator.py +0 -0
  56. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/humans/__init__.py +0 -0
  57. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/humans/examples/basic.txt +0 -0
  58. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/humans/examples/cs_phd.txt +0 -0
  59. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/interface.py +0 -0
  60. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/llm_api/__init__.py +0 -0
  61. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/llm_api/anthropic.py +0 -0
  62. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/llm_api/azure_openai.py +0 -0
  63. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/llm_api/azure_openai_constants.py +0 -0
  64. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/llm_api/cohere.py +0 -0
  65. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/llm_api/google_ai.py +0 -0
  66. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/llm_api/helpers.py +0 -0
  67. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/llm_api/llm_api_tools.py +0 -0
  68. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/llm_api/openai.py +0 -0
  69. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/README.md +0 -0
  70. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/__init__.py +0 -0
  71. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/chat_completion_proxy.py +0 -0
  72. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/constants.py +0 -0
  73. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/function_parser.py +0 -0
  74. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/grammars/__init__.py +0 -0
  75. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/grammars/gbnf_grammar_generator.py +0 -0
  76. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/grammars/json.gbnf +0 -0
  77. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +0 -0
  78. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/json_parser.py +0 -0
  79. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/koboldcpp/api.py +0 -0
  80. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/koboldcpp/settings.py +0 -0
  81. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llamacpp/api.py +0 -0
  82. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llamacpp/settings.py +0 -0
  83. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
  84. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llm_chat_completion_wrappers/airoboros.py +0 -0
  85. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llm_chat_completion_wrappers/chatml.py +0 -0
  86. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +0 -0
  87. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llm_chat_completion_wrappers/dolphin.py +0 -0
  88. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llm_chat_completion_wrappers/llama3.py +0 -0
  89. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +0 -0
  90. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +0 -0
  91. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/llm_chat_completion_wrappers/zephyr.py +0 -0
  92. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/lmstudio/api.py +0 -0
  93. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/lmstudio/settings.py +0 -0
  94. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/ollama/api.py +0 -0
  95. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/ollama/settings.py +0 -0
  96. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/settings/__init__.py +0 -0
  97. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/settings/deterministic_mirostat.py +0 -0
  98. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/settings/settings.py +0 -0
  99. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/settings/simple.py +0 -0
  100. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/utils.py +0 -0
  101. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/vllm/api.py +0 -0
  102. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/webui/api.py +0 -0
  103. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/webui/legacy_api.py +0 -0
  104. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/webui/legacy_settings.py +0 -0
  105. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/local_llm/webui/settings.py +0 -0
  106. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/log.py +0 -0
  107. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/main.py +0 -0
  108. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/memory.py +0 -0
  109. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/openai_backcompat/__init__.py +0 -0
  110. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/openai_backcompat/openai_object.py +0 -0
  111. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/persistence_manager.py +0 -0
  112. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/personas/__init__.py +0 -0
  113. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/personas/examples/anna_pa.txt +0 -0
  114. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/personas/examples/google_search_persona.txt +0 -0
  115. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/personas/examples/memgpt_doc.txt +0 -0
  116. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/personas/examples/memgpt_starter.txt +0 -0
  117. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/personas/examples/sam.txt +0 -0
  118. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/personas/examples/sam_pov.txt +0 -0
  119. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/personas/examples/sam_simple_pov_gpt35.txt +0 -0
  120. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/personas/examples/sqldb/test.db +0 -0
  121. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/__init__.py +0 -0
  122. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/gpt_summarize.py +0 -0
  123. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/gpt_system.py +0 -0
  124. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/system/memgpt_base.txt +0 -0
  125. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/system/memgpt_chat.txt +0 -0
  126. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/system/memgpt_chat_compressed.txt +0 -0
  127. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/system/memgpt_chat_fstring.txt +0 -0
  128. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/system/memgpt_doc.txt +0 -0
  129. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/system/memgpt_gpt35_extralong.txt +0 -0
  130. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/system/memgpt_intuitive_knowledge.txt +0 -0
  131. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/prompts/system/memgpt_modified_chat.txt +0 -0
  132. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/pytest.ini +0 -0
  133. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/agent.py +0 -0
  134. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/api_key.py +0 -0
  135. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/block.py +0 -0
  136. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/embedding_config.py +0 -0
  137. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/enums.py +0 -0
  138. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/health.py +0 -0
  139. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/letta_base.py +0 -0
  140. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/letta_message.py +0 -0
  141. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/letta_response.py +0 -0
  142. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/memory.py +0 -0
  143. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/openai/chat_completion_request.py +0 -0
  144. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/openai/chat_completion_response.py +0 -0
  145. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/openai/chat_completions.py +0 -0
  146. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/openai/embedding_response.py +0 -0
  147. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/openai/openai.py +0 -0
  148. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/organization.py +0 -0
  149. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/tool.py +0 -0
  150. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/usage.py +0 -0
  151. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/schemas/user.py +0 -0
  152. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/__init__.py +0 -0
  153. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/constants.py +0 -0
  154. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/__init__.py +0 -0
  155. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/admin/__init__.py +0 -0
  156. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/admin/agents.py +0 -0
  157. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/admin/tools.py +0 -0
  158. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/admin/users.py +0 -0
  159. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/app.py +0 -0
  160. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/auth/__init__.py +0 -0
  161. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/auth/index.py +0 -0
  162. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/auth_token.py +0 -0
  163. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/interface.py +0 -0
  164. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/__init__.py +0 -0
  165. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/openai/__init__.py +0 -0
  166. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  167. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/openai/assistants/assistants.py +0 -0
  168. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/openai/assistants/schemas.py +0 -0
  169. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/openai/assistants/threads.py +0 -0
  170. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  171. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +0 -0
  172. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/__init__.py +0 -0
  173. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/blocks.py +0 -0
  174. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/health.py +0 -0
  175. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/llms.py +0 -0
  176. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/organizations.py +0 -0
  177. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/tools.py +0 -0
  178. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/routers/v1/users.py +0 -0
  179. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/static_files.py +0 -0
  180. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/rest_api/utils.py +0 -0
  181. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/startup.sh +0 -0
  182. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/static_files/assets/index-3ab03d5b.css +0 -0
  183. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/static_files/favicon.ico +0 -0
  184. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/static_files/memgpt_logo_transparent.png +0 -0
  185. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/utils.py +0 -0
  186. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/ws_api/__init__.py +0 -0
  187. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/ws_api/example_client.py +0 -0
  188. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/ws_api/interface.py +0 -0
  189. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/ws_api/protocol.py +0 -0
  190. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/server/ws_api/server.py +0 -0
  191. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/settings.py +0 -0
  192. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/streaming_interface.py +0 -0
  193. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/system.py +0 -0
  194. {letta_nightly-0.4.1.dev20241014104152 → letta_nightly-0.5.0.dev20241015104156}/letta/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: letta-nightly
3
- Version: 0.4.1.dev20241014104152
3
+ Version: 0.5.0.dev20241015104156
4
4
  Summary: Create LLM agents with long-term memory and custom tools
5
5
  License: Apache License
6
6
  Author: Letta Team
@@ -1,4 +1,4 @@
1
- __version__ = "0.4.1"
1
+ __version__ = "0.5.0"
2
2
 
3
3
  # import clients
4
4
  from letta.client.admin import Admin
@@ -7,9 +7,9 @@ from letta.client.client import LocalClient, RESTClient, create_client
7
7
  # imports for easier access
8
8
  from letta.schemas.agent import AgentState
9
9
  from letta.schemas.block import Block
10
- from letta.schemas.document import Document
11
10
  from letta.schemas.embedding_config import EmbeddingConfig
12
11
  from letta.schemas.enums import JobStatus
12
+ from letta.schemas.file import FileMetadata
13
13
  from letta.schemas.job import Job
14
14
  from letta.schemas.letta_message import LettaMessage
15
15
  from letta.schemas.llm_config import LLMConfig
@@ -28,7 +28,7 @@ from letta.agent_store.storage import StorageConnector, TableType
28
28
  from letta.base import Base
29
29
  from letta.config import LettaConfig
30
30
  from letta.constants import MAX_EMBEDDING_DIM
31
- from letta.metadata import EmbeddingConfigColumn, ToolCallColumn
31
+ from letta.metadata import EmbeddingConfigColumn, FileMetadataModel, ToolCallColumn
32
32
 
33
33
  # from letta.schemas.message import Message, Passage, Record, RecordType, ToolCall
34
34
  from letta.schemas.message import Message
@@ -141,7 +141,7 @@ class PassageModel(Base):
141
141
  id = Column(String, primary_key=True)
142
142
  user_id = Column(String, nullable=False)
143
143
  text = Column(String)
144
- doc_id = Column(String)
144
+ file_id = Column(String)
145
145
  agent_id = Column(String)
146
146
  source_id = Column(String)
147
147
 
@@ -160,7 +160,7 @@ class PassageModel(Base):
160
160
  # Add a datetime column, with default value as the current time
161
161
  created_at = Column(DateTime(timezone=True))
162
162
 
163
- Index("passage_idx_user", user_id, agent_id, doc_id),
163
+ Index("passage_idx_user", user_id, agent_id, file_id),
164
164
 
165
165
  def __repr__(self):
166
166
  return f"<Passage(passage_id='{self.id}', text='{self.text}', embedding='{self.embedding})>"
@@ -170,7 +170,7 @@ class PassageModel(Base):
170
170
  text=self.text,
171
171
  embedding=self.embedding,
172
172
  embedding_config=self.embedding_config,
173
- doc_id=self.doc_id,
173
+ file_id=self.file_id,
174
174
  user_id=self.user_id,
175
175
  id=self.id,
176
176
  source_id=self.source_id,
@@ -365,12 +365,17 @@ class PostgresStorageConnector(SQLStorageConnector):
365
365
  self.uri = self.config.archival_storage_uri
366
366
  self.db_model = PassageModel
367
367
  if self.config.archival_storage_uri is None:
368
- raise ValueError(f"Must specifiy archival_storage_uri in config {self.config.config_path}")
368
+ raise ValueError(f"Must specify archival_storage_uri in config {self.config.config_path}")
369
369
  elif table_type == TableType.RECALL_MEMORY:
370
370
  self.uri = self.config.recall_storage_uri
371
371
  self.db_model = MessageModel
372
372
  if self.config.recall_storage_uri is None:
373
- raise ValueError(f"Must specifiy recall_storage_uri in config {self.config.config_path}")
373
+ raise ValueError(f"Must specify recall_storage_uri in config {self.config.config_path}")
374
+ elif table_type == TableType.FILES:
375
+ self.uri = self.config.metadata_storage_uri
376
+ self.db_model = FileMetadataModel
377
+ if self.config.metadata_storage_uri is None:
378
+ raise ValueError(f"Must specify metadata_storage_uri in config {self.config.config_path}")
374
379
  else:
375
380
  raise ValueError(f"Table type {table_type} not implemented")
376
381
 
@@ -487,8 +492,14 @@ class SQLLiteStorageConnector(SQLStorageConnector):
487
492
  # TODO: eventually implement URI option
488
493
  self.path = self.config.recall_storage_path
489
494
  if self.path is None:
490
- raise ValueError(f"Must specifiy recall_storage_path in config {self.config.recall_storage_path}")
495
+ raise ValueError(f"Must specify recall_storage_path in config.")
491
496
  self.db_model = MessageModel
497
+ elif table_type == TableType.FILES:
498
+ self.path = self.config.metadata_storage_path
499
+ if self.path is None:
500
+ raise ValueError(f"Must specify metadata_storage_path in config.")
501
+ self.db_model = FileMetadataModel
502
+
492
503
  else:
493
504
  raise ValueError(f"Table type {table_type} not implemented")
494
505
 
@@ -24,7 +24,7 @@ def get_db_model(table_name: str, table_type: TableType):
24
24
  id: uuid.UUID
25
25
  user_id: str
26
26
  text: str
27
- doc_id: str
27
+ file_id: str
28
28
  agent_id: str
29
29
  data_source: str
30
30
  embedding: Vector(config.default_embedding_config.embedding_dim)
@@ -37,7 +37,7 @@ def get_db_model(table_name: str, table_type: TableType):
37
37
  return Passage(
38
38
  text=self.text,
39
39
  embedding=self.embedding,
40
- doc_id=self.doc_id,
40
+ file_id=self.file_id,
41
41
  user_id=self.user_id,
42
42
  id=self.id,
43
43
  data_source=self.data_source,
@@ -26,7 +26,7 @@ class MilvusStorageConnector(StorageConnector):
26
26
  raise ValueError("Please set `archival_storage_uri` in the config file when using Milvus.")
27
27
 
28
28
  # need to be converted to strings
29
- self.uuid_fields = ["id", "user_id", "agent_id", "source_id", "doc_id"]
29
+ self.uuid_fields = ["id", "user_id", "agent_id", "source_id", "file_id"]
30
30
 
31
31
  def _create_collection(self):
32
32
  schema = MilvusClient.create_schema(
@@ -38,7 +38,7 @@ class QdrantStorageConnector(StorageConnector):
38
38
  distance=models.Distance.COSINE,
39
39
  ),
40
40
  )
41
- self.uuid_fields = ["id", "user_id", "agent_id", "source_id", "doc_id"]
41
+ self.uuid_fields = ["id", "user_id", "agent_id", "source_id", "file_id"]
42
42
 
43
43
  def get_all_paginated(self, filters: Optional[Dict] = {}, page_size: int = 10) -> Iterator[List[RecordType]]:
44
44
  from qdrant_client import grpc
@@ -10,7 +10,7 @@ from typing import Dict, List, Optional, Tuple, Type, Union
10
10
  from pydantic import BaseModel
11
11
 
12
12
  from letta.config import LettaConfig
13
- from letta.schemas.document import Document
13
+ from letta.schemas.file import FileMetadata
14
14
  from letta.schemas.message import Message
15
15
  from letta.schemas.passage import Passage
16
16
  from letta.utils import printd
@@ -22,7 +22,7 @@ class TableType:
22
22
  ARCHIVAL_MEMORY = "archival_memory" # recall memory table: letta_agent_{agent_id}
23
23
  RECALL_MEMORY = "recall_memory" # archival memory table: letta_agent_recall_{agent_id}
24
24
  PASSAGES = "passages" # TODO
25
- DOCUMENTS = "documents" # TODO
25
+ FILES = "files"
26
26
 
27
27
 
28
28
  # table names used by Letta
@@ -33,17 +33,17 @@ ARCHIVAL_TABLE_NAME = "letta_archival_memory_agent" # agent memory
33
33
 
34
34
  # external data source tables
35
35
  PASSAGE_TABLE_NAME = "letta_passages" # chunked/embedded passages (from source)
36
- DOCUMENT_TABLE_NAME = "letta_documents" # original documents (from source)
36
+ FILE_TABLE_NAME = "letta_files" # original files (from source)
37
37
 
38
38
 
39
39
  class StorageConnector:
40
- """Defines a DB connection that is user-specific to access data: Documents, Passages, Archival/Recall Memory"""
40
+ """Defines a DB connection that is user-specific to access data: files, Passages, Archival/Recall Memory"""
41
41
 
42
42
  type: Type[BaseModel]
43
43
 
44
44
  def __init__(
45
45
  self,
46
- table_type: Union[TableType.ARCHIVAL_MEMORY, TableType.RECALL_MEMORY, TableType.PASSAGES, TableType.DOCUMENTS],
46
+ table_type: Union[TableType.ARCHIVAL_MEMORY, TableType.RECALL_MEMORY, TableType.PASSAGES, TableType.FILES],
47
47
  config: LettaConfig,
48
48
  user_id,
49
49
  agent_id=None,
@@ -59,9 +59,9 @@ class StorageConnector:
59
59
  elif table_type == TableType.RECALL_MEMORY:
60
60
  self.type = Message
61
61
  self.table_name = RECALL_TABLE_NAME
62
- elif table_type == TableType.DOCUMENTS:
63
- self.type = Document
64
- self.table_name == DOCUMENT_TABLE_NAME
62
+ elif table_type == TableType.FILES:
63
+ self.type = FileMetadata
64
+ self.table_name = FILE_TABLE_NAME
65
65
  elif table_type == TableType.PASSAGES:
66
66
  self.type = Passage
67
67
  self.table_name = PASSAGE_TABLE_NAME
@@ -74,7 +74,7 @@ class StorageConnector:
74
74
  # agent-specific table
75
75
  assert agent_id is not None, "Agent ID must be provided for agent-specific tables"
76
76
  self.filters = {"user_id": self.user_id, "agent_id": self.agent_id}
77
- elif self.table_type == TableType.PASSAGES or self.table_type == TableType.DOCUMENTS:
77
+ elif self.table_type == TableType.PASSAGES or self.table_type == TableType.FILES:
78
78
  # setup base filters for user-specific tables
79
79
  assert agent_id is None, "Agent ID must not be provided for user-specific tables"
80
80
  self.filters = {"user_id": self.user_id}
@@ -83,7 +83,7 @@ class StorageConnector:
83
83
 
84
84
  @staticmethod
85
85
  def get_storage_connector(
86
- table_type: Union[TableType.ARCHIVAL_MEMORY, TableType.RECALL_MEMORY, TableType.PASSAGES, TableType.DOCUMENTS],
86
+ table_type: Union[TableType.ARCHIVAL_MEMORY, TableType.RECALL_MEMORY, TableType.PASSAGES, TableType.FILES],
87
87
  config: LettaConfig,
88
88
  user_id,
89
89
  agent_id=None,
@@ -92,6 +92,8 @@ class StorageConnector:
92
92
  storage_type = config.archival_storage_type
93
93
  elif table_type == TableType.RECALL_MEMORY:
94
94
  storage_type = config.recall_storage_type
95
+ elif table_type == TableType.FILES:
96
+ storage_type = config.metadata_storage_type
95
97
  else:
96
98
  raise ValueError(f"Table type {table_type} not implemented")
97
99
 
@@ -106,7 +106,7 @@ def load_vector_database(
106
106
  # document_store=None,
107
107
  # passage_store=passage_storage,
108
108
  # )
109
- # print(f"Loaded {num_passages} passages and {num_documents} documents from {name}")
109
+ # print(f"Loaded {num_passages} passages and {num_documents} files from {name}")
110
110
  # except Exception as e:
111
111
  # typer.secho(f"Failed to load data from provided information.\n{e}", fg=typer.colors.RED)
112
112
  # ms.delete_source(source_id=source.id)
@@ -25,6 +25,7 @@ from letta.schemas.embedding_config import EmbeddingConfig
25
25
 
26
26
  # new schemas
27
27
  from letta.schemas.enums import JobStatus, MessageRole
28
+ from letta.schemas.file import FileMetadata
28
29
  from letta.schemas.job import Job
29
30
  from letta.schemas.letta_request import LettaRequest
30
31
  from letta.schemas.letta_response import LettaResponse, LettaStreamingResponse
@@ -232,6 +233,9 @@ class AbstractClient(object):
232
233
  def list_attached_sources(self, agent_id: str) -> List[Source]:
233
234
  raise NotImplementedError
234
235
 
236
+ def list_files_from_source(self, source_id: str, limit: int = 1000, cursor: Optional[str] = None) -> List[FileMetadata]:
237
+ raise NotImplementedError
238
+
235
239
  def update_source(self, source_id: str, name: Optional[str] = None) -> Source:
236
240
  raise NotImplementedError
237
241
 
@@ -1016,6 +1020,12 @@ class RESTClient(AbstractClient):
1016
1020
  raise ValueError(f"Failed to get job: {response.text}")
1017
1021
  return Job(**response.json())
1018
1022
 
1023
+ def delete_job(self, job_id: str) -> Job:
1024
+ response = requests.delete(f"{self.base_url}/{self.api_prefix}/jobs/{job_id}", headers=self.headers)
1025
+ if response.status_code != 200:
1026
+ raise ValueError(f"Failed to delete job: {response.text}")
1027
+ return Job(**response.json())
1028
+
1019
1029
  def list_jobs(self):
1020
1030
  response = requests.get(f"{self.base_url}/{self.api_prefix}/jobs", headers=self.headers)
1021
1031
  return [Job(**job) for job in response.json()]
@@ -1088,6 +1098,30 @@ class RESTClient(AbstractClient):
1088
1098
  raise ValueError(f"Failed to list attached sources: {response.text}")
1089
1099
  return [Source(**source) for source in response.json()]
1090
1100
 
1101
+ def list_files_from_source(self, source_id: str, limit: int = 1000, cursor: Optional[str] = None) -> List[FileMetadata]:
1102
+ """
1103
+ List files from source with pagination support.
1104
+
1105
+ Args:
1106
+ source_id (str): ID of the source
1107
+ limit (int): Number of files to return
1108
+ cursor (Optional[str]): Pagination cursor for fetching the next page
1109
+
1110
+ Returns:
1111
+ List[FileMetadata]: List of files
1112
+ """
1113
+ # Prepare query parameters for pagination
1114
+ params = {"limit": limit, "cursor": cursor}
1115
+
1116
+ # Make the request to the FastAPI endpoint
1117
+ response = requests.get(f"{self.base_url}/{self.api_prefix}/sources/{source_id}/files", headers=self.headers, params=params)
1118
+
1119
+ if response.status_code != 200:
1120
+ raise ValueError(f"Failed to list files with source id {source_id}: [{response.status_code}] {response.text}")
1121
+
1122
+ # Parse the JSON response
1123
+ return [FileMetadata(**metadata) for metadata in response.json()]
1124
+
1091
1125
  def update_source(self, source_id: str, name: Optional[str] = None) -> Source:
1092
1126
  """
1093
1127
  Update a source
@@ -2162,6 +2196,9 @@ class LocalClient(AbstractClient):
2162
2196
  def get_job(self, job_id: str):
2163
2197
  return self.server.get_job(job_id=job_id)
2164
2198
 
2199
+ def delete_job(self, job_id: str):
2200
+ return self.server.delete_job(job_id)
2201
+
2165
2202
  def list_jobs(self):
2166
2203
  return self.server.list_jobs(user_id=self.user_id)
2167
2204
 
@@ -2261,6 +2298,20 @@ class LocalClient(AbstractClient):
2261
2298
  """
2262
2299
  return self.server.list_attached_sources(agent_id=agent_id)
2263
2300
 
2301
+ def list_files_from_source(self, source_id: str, limit: int = 1000, cursor: Optional[str] = None) -> List[FileMetadata]:
2302
+ """
2303
+ List files from source.
2304
+
2305
+ Args:
2306
+ source_id (str): ID of the source
2307
+ limit (int): The # of items to return
2308
+ cursor (str): The cursor for fetching the next page
2309
+
2310
+ Returns:
2311
+ files (List[FileMetadata]): List of files
2312
+ """
2313
+ return self.server.list_files_from_source(source_id=source_id, limit=limit, cursor=cursor)
2314
+
2264
2315
  def update_source(self, source_id: str, name: Optional[str] = None) -> Source:
2265
2316
  """
2266
2317
  Update a source
@@ -0,0 +1,247 @@
1
+ from typing import Dict, Iterator, List, Tuple
2
+
3
+ import typer
4
+
5
+ from letta.agent_store.storage import StorageConnector
6
+ from letta.data_sources.connectors_helper import (
7
+ assert_all_files_exist_locally,
8
+ extract_metadata_from_files,
9
+ get_filenames_in_dir,
10
+ )
11
+ from letta.embeddings import embedding_model
12
+ from letta.schemas.file import FileMetadata
13
+ from letta.schemas.passage import Passage
14
+ from letta.schemas.source import Source
15
+ from letta.utils import create_uuid_from_string
16
+
17
+
18
+ class DataConnector:
19
+ """
20
+ Base class for data connectors that can be extended to generate files and passages from a custom data source.
21
+ """
22
+
23
+ def find_files(self, source: Source) -> Iterator[FileMetadata]:
24
+ """
25
+ Generate file metadata from a data source.
26
+
27
+ Returns:
28
+ files (Iterator[FileMetadata]): Generate file metadata for each file found.
29
+ """
30
+
31
+ def generate_passages(self, file: FileMetadata, chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Passage]:
32
+ """
33
+ Generate passage text and metadata from a list of files.
34
+
35
+ Args:
36
+ file (FileMetadata): The document to generate passages from.
37
+ chunk_size (int, optional): Chunk size for splitting passages. Defaults to 1024.
38
+
39
+ Returns:
40
+ passages (Iterator[Tuple[str, Dict]]): Generate a tuple of string text and metadata dictionary for each passage.
41
+ """
42
+
43
+
44
+ def load_data(
45
+ connector: DataConnector,
46
+ source: Source,
47
+ passage_store: StorageConnector,
48
+ file_metadata_store: StorageConnector,
49
+ ):
50
+ """Load data from a connector (generates file and passages) into a specified source_id, associatedw with a user_id."""
51
+ embedding_config = source.embedding_config
52
+
53
+ # embedding model
54
+ embed_model = embedding_model(embedding_config)
55
+
56
+ # insert passages/file
57
+ passages = []
58
+ embedding_to_document_name = {}
59
+ passage_count = 0
60
+ file_count = 0
61
+ for file_metadata in connector.find_files(source):
62
+ file_count += 1
63
+ file_metadata_store.insert(file_metadata)
64
+
65
+ # generate passages
66
+ for passage_text, passage_metadata in connector.generate_passages(file_metadata, chunk_size=embedding_config.embedding_chunk_size):
67
+ # for some reason, llama index parsers sometimes return empty strings
68
+ if len(passage_text) == 0:
69
+ typer.secho(
70
+ f"Warning: Llama index parser returned empty string, skipping insert of passage with metadata '{passage_metadata}' into VectorDB. You can usually ignore this warning.",
71
+ fg=typer.colors.YELLOW,
72
+ )
73
+ continue
74
+
75
+ # get embedding
76
+ try:
77
+ embedding = embed_model.get_text_embedding(passage_text)
78
+ except Exception as e:
79
+ typer.secho(
80
+ f"Warning: Failed to get embedding for {passage_text} (error: {str(e)}), skipping insert into VectorDB.",
81
+ fg=typer.colors.YELLOW,
82
+ )
83
+ continue
84
+
85
+ passage = Passage(
86
+ id=create_uuid_from_string(f"{str(source.id)}_{passage_text}"),
87
+ text=passage_text,
88
+ file_id=file_metadata.id,
89
+ source_id=source.id,
90
+ metadata_=passage_metadata,
91
+ user_id=source.user_id,
92
+ embedding_config=source.embedding_config,
93
+ embedding=embedding,
94
+ )
95
+
96
+ hashable_embedding = tuple(passage.embedding)
97
+ file_name = file_metadata.file_name
98
+ if hashable_embedding in embedding_to_document_name:
99
+ typer.secho(
100
+ f"Warning: Duplicate embedding found for passage in {file_name} (already exists in {embedding_to_document_name[hashable_embedding]}), skipping insert into VectorDB.",
101
+ fg=typer.colors.YELLOW,
102
+ )
103
+ continue
104
+
105
+ passages.append(passage)
106
+ embedding_to_document_name[hashable_embedding] = file_name
107
+ if len(passages) >= 100:
108
+ # insert passages into passage store
109
+ passage_store.insert_many(passages)
110
+
111
+ passage_count += len(passages)
112
+ passages = []
113
+
114
+ if len(passages) > 0:
115
+ # insert passages into passage store
116
+ passage_store.insert_many(passages)
117
+ passage_count += len(passages)
118
+
119
+ return passage_count, file_count
120
+
121
+
122
+ class DirectoryConnector(DataConnector):
123
+ def __init__(self, input_files: List[str] = None, input_directory: str = None, recursive: bool = False, extensions: List[str] = None):
124
+ """
125
+ Connector for reading text data from a directory of files.
126
+
127
+ Args:
128
+ input_files (List[str], optional): List of file paths to read. Defaults to None.
129
+ input_directory (str, optional): Directory to read files from. Defaults to None.
130
+ recursive (bool, optional): Whether to read files recursively from the input directory. Defaults to False.
131
+ extensions (List[str], optional): List of file extensions to read. Defaults to None.
132
+ """
133
+ self.connector_type = "directory"
134
+ self.input_files = input_files
135
+ self.input_directory = input_directory
136
+ self.recursive = recursive
137
+ self.extensions = extensions
138
+
139
+ if self.recursive == True:
140
+ assert self.input_directory is not None, "Must provide input directory if recursive is True."
141
+
142
+ def find_files(self, source: Source) -> Iterator[FileMetadata]:
143
+ if self.input_directory is not None:
144
+ files = get_filenames_in_dir(
145
+ input_dir=self.input_directory,
146
+ recursive=self.recursive,
147
+ required_exts=[ext.strip() for ext in str(self.extensions).split(",")],
148
+ exclude=["*png", "*jpg", "*jpeg"],
149
+ )
150
+ else:
151
+ files = self.input_files
152
+
153
+ # Check that file paths are valid
154
+ assert_all_files_exist_locally(files)
155
+
156
+ for metadata in extract_metadata_from_files(files):
157
+ yield FileMetadata(
158
+ user_id=source.user_id,
159
+ source_id=source.id,
160
+ file_name=metadata.get("file_name"),
161
+ file_path=metadata.get("file_path"),
162
+ file_type=metadata.get("file_type"),
163
+ file_size=metadata.get("file_size"),
164
+ file_creation_date=metadata.get("file_creation_date"),
165
+ file_last_modified_date=metadata.get("file_last_modified_date"),
166
+ )
167
+
168
+ def generate_passages(self, file: FileMetadata, chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]:
169
+ from llama_index.core import SimpleDirectoryReader
170
+ from llama_index.core.node_parser import TokenTextSplitter
171
+
172
+ parser = TokenTextSplitter(chunk_size=chunk_size)
173
+ documents = SimpleDirectoryReader(input_files=[file.file_path]).load_data()
174
+ nodes = parser.get_nodes_from_documents(documents)
175
+ for node in nodes:
176
+ yield node.text, None
177
+
178
+
179
+ """
180
+ The below isn't used anywhere, it isn't tested, and pretty much should be deleted.
181
+ - Matt
182
+ """
183
+ # class WebConnector(DirectoryConnector):
184
+ # def __init__(self, urls: List[str] = None, html_to_text: bool = True):
185
+ # self.urls = urls
186
+ # self.html_to_text = html_to_text
187
+ #
188
+ # def generate_files(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
189
+ # from llama_index.readers.web import SimpleWebPageReader
190
+ #
191
+ # files = SimpleWebPageReader(html_to_text=self.html_to_text).load_data(self.urls)
192
+ # for document in files:
193
+ # yield document.text, {"url": document.id_}
194
+ #
195
+ #
196
+ # class VectorDBConnector(DataConnector):
197
+ # # NOTE: this class has not been properly tested, so is unlikely to work
198
+ # # TODO: allow loading multiple tables (1:1 mapping between FileMetadata and Table)
199
+ #
200
+ # def __init__(
201
+ # self,
202
+ # name: str,
203
+ # uri: str,
204
+ # table_name: str,
205
+ # text_column: str,
206
+ # embedding_column: str,
207
+ # embedding_dim: int,
208
+ # ):
209
+ # self.name = name
210
+ # self.uri = uri
211
+ # self.table_name = table_name
212
+ # self.text_column = text_column
213
+ # self.embedding_column = embedding_column
214
+ # self.embedding_dim = embedding_dim
215
+ #
216
+ # # connect to db table
217
+ # from sqlalchemy import create_engine
218
+ #
219
+ # self.engine = create_engine(uri)
220
+ #
221
+ # def generate_files(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
222
+ # yield self.table_name, None
223
+ #
224
+ # def generate_passages(self, file_text: str, file: FileMetadata, chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Passage]:
225
+ # from pgvector.sqlalchemy import Vector
226
+ # from sqlalchemy import Inspector, MetaData, Table, select
227
+ #
228
+ # metadata = MetaData()
229
+ # # Create an inspector to inspect the database
230
+ # inspector = Inspector.from_engine(self.engine)
231
+ # table_names = inspector.get_table_names()
232
+ # assert self.table_name in table_names, f"Table {self.table_name} not found in database: tables that exist {table_names}."
233
+ #
234
+ # table = Table(self.table_name, metadata, autoload_with=self.engine)
235
+ #
236
+ # # Prepare a select statement
237
+ # select_statement = select(table.c[self.text_column], table.c[self.embedding_column].cast(Vector(self.embedding_dim)))
238
+ #
239
+ # # Execute the query and fetch the results
240
+ # # TODO: paginate results
241
+ # with self.engine.connect() as connection:
242
+ # result = connection.execute(select_statement).fetchall()
243
+ #
244
+ # for text, embedding in result:
245
+ # # assume that embeddings are the same model as in config
246
+ # # TODO: don't re-compute embedding
247
+ # yield text, {"embedding": embedding}
@@ -0,0 +1,97 @@
1
+ import mimetypes
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+
8
+ def extract_file_metadata(file_path) -> dict:
9
+ """Extracts metadata from a single file."""
10
+ if not os.path.exists(file_path):
11
+ raise FileNotFoundError(file_path)
12
+
13
+ file_metadata = {
14
+ "file_name": os.path.basename(file_path),
15
+ "file_path": file_path,
16
+ "file_type": mimetypes.guess_type(file_path)[0] or "unknown",
17
+ "file_size": os.path.getsize(file_path),
18
+ "file_creation_date": datetime.fromtimestamp(os.path.getctime(file_path)).strftime("%Y-%m-%d"),
19
+ "file_last_modified_date": datetime.fromtimestamp(os.path.getmtime(file_path)).strftime("%Y-%m-%d"),
20
+ }
21
+ return file_metadata
22
+
23
+
24
+ def extract_metadata_from_files(file_list):
25
+ """Extracts metadata for a list of files."""
26
+ metadata = []
27
+ for file_path in file_list:
28
+ file_metadata = extract_file_metadata(file_path)
29
+ if file_metadata:
30
+ metadata.append(file_metadata)
31
+ return metadata
32
+
33
+
34
+ def get_filenames_in_dir(
35
+ input_dir: str, recursive: bool = True, required_exts: Optional[List[str]] = None, exclude: Optional[List[str]] = None
36
+ ):
37
+ """
38
+ Recursively reads files from the directory, applying required_exts and exclude filters.
39
+ Ensures that required_exts and exclude do not overlap.
40
+
41
+ Args:
42
+ input_dir (str): The directory to scan for files.
43
+ recursive (bool): Whether to scan directories recursively.
44
+ required_exts (list): List of file extensions to include (e.g., ['pdf', 'txt']).
45
+ If None or empty, matches any file extension.
46
+ exclude (list): List of file patterns to exclude (e.g., ['*png', '*jpg']).
47
+
48
+ Returns:
49
+ list: A list of matching file paths.
50
+ """
51
+ required_exts = required_exts or []
52
+ exclude = exclude or []
53
+
54
+ # Ensure required_exts and exclude do not overlap
55
+ ext_set = set(required_exts)
56
+ exclude_set = set(exclude)
57
+ overlap = ext_set & exclude_set
58
+ if overlap:
59
+ raise ValueError(f"Extensions in required_exts and exclude overlap: {overlap}")
60
+
61
+ def is_excluded(file_name):
62
+ """Check if a file matches any pattern in the exclude list."""
63
+ for pattern in exclude:
64
+ if Path(file_name).match(pattern):
65
+ return True
66
+ return False
67
+
68
+ files = []
69
+ search_pattern = "**/*" if recursive else "*"
70
+
71
+ for file_path in Path(input_dir).glob(search_pattern):
72
+ if file_path.is_file() and not is_excluded(file_path.name):
73
+ ext = file_path.suffix.lstrip(".")
74
+ # If required_exts is empty, match any file
75
+ if not required_exts or ext in required_exts:
76
+ files.append(file_path)
77
+
78
+ return files
79
+
80
+
81
+ def assert_all_files_exist_locally(file_paths: List[str]) -> bool:
82
+ """
83
+ Checks if all file paths in the provided list exist locally.
84
+ Raises a FileNotFoundError with a list of missing files if any do not exist.
85
+
86
+ Args:
87
+ file_paths (List[str]): List of file paths to check.
88
+
89
+ Returns:
90
+ bool: True if all files exist, raises FileNotFoundError if any file is missing.
91
+ """
92
+ missing_files = [file_path for file_path in file_paths if not Path(file_path).exists()]
93
+
94
+ if missing_files:
95
+ raise FileNotFoundError(missing_files)
96
+
97
+ return True