openrag 0.4.0.dev7__tar.gz → 0.4.0.dev9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. {openrag-0.4.0.dev7/src/openrag.egg-info → openrag-0.4.0.dev9}/PKG-INFO +1 -1
  2. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/pyproject.toml +1 -1
  3. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/settings.py +83 -40
  4. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/main.py +29 -17
  5. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/models/processors.py +3 -2
  6. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9/src/openrag.egg-info}/PKG-INFO +1 -1
  7. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/openrag.egg-info/SOURCES.txt +0 -1
  8. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/task_service.py +33 -24
  9. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/ingestion_flow.json +1 -1
  10. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/openrag_agent.json +1 -1
  11. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/openrag_nudges.json +1 -1
  12. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/openrag_url_mcp.json +1 -1
  13. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/main.py +83 -30
  14. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/screens/monitor.py +25 -6
  15. openrag-0.4.0.dev7/src/utils/opensearch_filter_merge.py +0 -188
  16. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/LICENSE +0 -0
  17. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/MANIFEST.in +0 -0
  18. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/README.md +0 -0
  19. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/setup.cfg +0 -0
  20. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/agent.py +0 -0
  21. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/__init__.py +0 -0
  22. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/auth.py +0 -0
  23. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/chat.py +0 -0
  24. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/connector_router.py +0 -0
  25. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/connectors.py +0 -0
  26. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/docling.py +0 -0
  27. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/documents.py +0 -0
  28. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/flows.py +0 -0
  29. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/keys.py +0 -0
  30. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/knowledge_filter.py +0 -0
  31. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/langflow_files.py +0 -0
  32. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/models.py +0 -0
  33. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/nudges.py +0 -0
  34. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/oidc.py +0 -0
  35. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/provider_health.py +0 -0
  36. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/provider_validation.py +0 -0
  37. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/router.py +0 -0
  38. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/search.py +0 -0
  39. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/tasks.py +0 -0
  40. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/upload.py +0 -0
  41. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/v1/__init__.py +0 -0
  42. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/v1/chat.py +0 -0
  43. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/v1/documents.py +0 -0
  44. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/v1/knowledge_filters.py +0 -0
  45. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/v1/models.py +0 -0
  46. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/v1/search.py +0 -0
  47. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/api/v1/settings.py +0 -0
  48. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/auth_context.py +0 -0
  49. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/config/__init__.py +0 -0
  50. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/config/config_manager.py +0 -0
  51. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/config/model_constants.py +0 -0
  52. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/config/settings.py +0 -0
  53. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/__init__.py +0 -0
  54. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/aws_s3/__init__.py +0 -0
  55. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/aws_s3/api.py +0 -0
  56. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/aws_s3/auth.py +0 -0
  57. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/aws_s3/connector.py +0 -0
  58. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/aws_s3/models.py +0 -0
  59. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/aws_s3/support.py +0 -0
  60. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/base.py +0 -0
  61. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/connection_manager.py +0 -0
  62. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/google_drive/__init__.py +0 -0
  63. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/google_drive/connector.py +0 -0
  64. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/google_drive/oauth.py +0 -0
  65. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/ibm_cos/__init__.py +0 -0
  66. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/ibm_cos/api.py +0 -0
  67. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/ibm_cos/auth.py +0 -0
  68. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/ibm_cos/connector.py +0 -0
  69. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/ibm_cos/models.py +0 -0
  70. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/ibm_cos/support.py +0 -0
  71. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/langflow_connector_service.py +0 -0
  72. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/onedrive/__init__.py +0 -0
  73. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/onedrive/connector.py +0 -0
  74. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/onedrive/oauth.py +0 -0
  75. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/service.py +0 -0
  76. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/sharepoint/__init__.py +0 -0
  77. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/sharepoint/connector.py +0 -0
  78. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/sharepoint/oauth.py +0 -0
  79. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/connectors/sharepoint/utils.py +0 -0
  80. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/dependencies.py +0 -0
  81. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/models/__init__.py +0 -0
  82. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/models/tasks.py +0 -0
  83. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/models/url.py +0 -0
  84. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/openrag.egg-info/dependency_links.txt +0 -0
  85. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/openrag.egg-info/entry_points.txt +0 -0
  86. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/openrag.egg-info/requires.txt +0 -0
  87. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/openrag.egg-info/top_level.txt +0 -0
  88. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/__init__.py +0 -0
  89. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/api_key_service.py +0 -0
  90. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/auth_service.py +0 -0
  91. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/chat_service.py +0 -0
  92. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/conversation_persistence_service.py +0 -0
  93. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/document_service.py +0 -0
  94. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/flows_service.py +0 -0
  95. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/knowledge_filter_service.py +0 -0
  96. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/langflow_file_service.py +0 -0
  97. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/langflow_history_service.py +0 -0
  98. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/langflow_mcp_service.py +0 -0
  99. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/models_service.py +0 -0
  100. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/monitor_service.py +0 -0
  101. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/search_service.py +0 -0
  102. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/services/session_ownership_service.py +0 -0
  103. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/session_manager.py +0 -0
  104. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/__init__.py +0 -0
  105. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/docker-compose.gpu.yml +0 -0
  106. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/docker-compose.yml +0 -0
  107. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/components/ollama_embedding.json +0 -0
  108. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/components/ollama_llm.json +0 -0
  109. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/components/ollama_llm_text.json +0 -0
  110. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/components/watsonx_embedding.json +0 -0
  111. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/components/watsonx_llm.json +0 -0
  112. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/flows/components/watsonx_llm_text.json +0 -0
  113. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/openrag-documents/docling.pdf +0 -0
  114. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/openrag-documents/ibm_anthropic.pdf +0 -0
  115. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/openrag-documents/openrag-documentation.pdf +0 -0
  116. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/_assets/openrag-documents/warmup_ocr.pdf +0 -0
  117. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/cli.py +0 -0
  118. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/config_fields.py +0 -0
  119. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/managers/__init__.py +0 -0
  120. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/managers/container_manager.py +0 -0
  121. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/managers/docling_manager.py +0 -0
  122. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/managers/env_manager.py +0 -0
  123. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/screens/__init__.py +0 -0
  124. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/screens/config.py +0 -0
  125. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/screens/diagnostics.py +0 -0
  126. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/screens/logs.py +0 -0
  127. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/screens/welcome.py +0 -0
  128. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/utils/__init__.py +0 -0
  129. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/utils/clipboard.py +0 -0
  130. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/utils/platform.py +0 -0
  131. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/utils/startup_checks.py +0 -0
  132. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/utils/validation.py +0 -0
  133. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/utils/version_check.py +0 -0
  134. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/__init__.py +0 -0
  135. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/command_modal.py +0 -0
  136. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/diagnostics_notification.py +0 -0
  137. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/error_notification.py +0 -0
  138. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/factory_reset_warning_modal.py +0 -0
  139. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/flow_backup_warning_modal.py +0 -0
  140. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/prune_options_modal.py +0 -0
  141. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/upgrade_instructions_modal.py +0 -0
  142. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/version_mismatch_warning_modal.py +0 -0
  143. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/tui/widgets/waves.py +0 -0
  144. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/__init__.py +0 -0
  145. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/acl_utils.py +0 -0
  146. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/container_utils.py +0 -0
  147. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/docling_client.py +0 -0
  148. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/document_processing.py +0 -0
  149. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/embedding_fields.py +0 -0
  150. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/embeddings.py +0 -0
  151. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/env_utils.py +0 -0
  152. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/file_utils.py +0 -0
  153. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/gpu_detection.py +0 -0
  154. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/hash_utils.py +0 -0
  155. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/langflow_headers.py +0 -0
  156. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/langflow_utils.py +0 -0
  157. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/logging_config.py +0 -0
  158. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/opensearch_queries.py +0 -0
  159. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/opensearch_utils.py +0 -0
  160. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/paths.py +0 -0
  161. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/telemetry/__init__.py +0 -0
  162. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/telemetry/category.py +0 -0
  163. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/telemetry/client.py +0 -0
  164. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/telemetry/message_id.py +0 -0
  165. {openrag-0.4.0.dev7 → openrag-0.4.0.dev9}/src/utils/version_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openrag
3
- Version: 0.4.0.dev7
3
+ Version: 0.4.0.dev9
4
4
  Summary: OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations.
5
5
  Classifier: Development Status :: 4 - Beta
6
6
  Classifier: Environment :: Console
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "openrag"
7
- version = "0.4.0.dev7"
7
+ version = "0.4.0.dev9"
8
8
  description = "OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.13"
@@ -211,6 +211,9 @@ class RollbackResponse(BaseModel):
211
211
  cancelled_tasks: int
212
212
  deleted_files: int
213
213
 
214
+ class RollbackBody(BaseModel):
215
+ embedding_only: bool = False
216
+
214
217
 
215
218
  # Docling preset configurations
216
219
  def get_docling_preset_configs(
@@ -1638,8 +1641,10 @@ async def reapply_all_settings(session_manager = None):
1638
1641
 
1639
1642
  async def rollback_onboarding(
1640
1643
  request: Request,
1644
+ body: Optional[RollbackBody] = None,
1641
1645
  session_manager=Depends(get_session_manager),
1642
1646
  task_service=Depends(get_task_service),
1647
+ knowledge_filter_service=Depends(get_knowledge_filter_service),
1643
1648
  user: User = Depends(get_current_user),
1644
1649
  ) -> RollbackResponse:
1645
1650
  """Rollback onboarding configuration when sample data files fail.
@@ -1669,7 +1674,36 @@ async def rollback_onboarding(
1669
1674
  cancelled_tasks = []
1670
1675
  deleted_files = []
1671
1676
 
1677
+ # Delete knowledge filters created during onboarding
1678
+ try:
1679
+ async def remove_filter(filter_id: Optional[str]):
1680
+ if filter_id and knowledge_filter_service:
1681
+ try:
1682
+ result = await knowledge_filter_service.delete_knowledge_filter(
1683
+ filter_id, user.user_id, user.jwt_token
1684
+ )
1685
+ if result and result.get("success"):
1686
+ logger.info(f"Deleted knowledge filter {filter_id}")
1687
+ else:
1688
+ error_msg = result.get("error") if result else "Unknown error"
1689
+ logger.warning(f"Could not delete knowledge filter {filter_id}: {error_msg}")
1690
+ except Exception as e:
1691
+ logger.warning(f"Exception deleting knowledge filter {filter_id}: {str(e)}")
1692
+
1693
+ if getattr(current_config.onboarding, 'openrag_docs_filter_id', None):
1694
+ await remove_filter(current_config.onboarding.openrag_docs_filter_id)
1695
+ current_config.onboarding.openrag_docs_filter_id = None
1696
+
1697
+ if getattr(current_config.onboarding, 'user_doc_filter_id', None):
1698
+ await remove_filter(current_config.onboarding.user_doc_filter_id)
1699
+ current_config.onboarding.user_doc_filter_id = None
1700
+ except Exception as e:
1701
+ logger.error(f"Error while cleaning up knowledge filters: {e}")
1702
+
1672
1703
  # Cancel all active tasks and collect successfully ingested files
1704
+ from session_manager import AnonymousUser
1705
+ anonymous_user_id = AnonymousUser().user_id
1706
+
1673
1707
  for task_data in all_tasks:
1674
1708
  task_id = task_data.get("task_id")
1675
1709
  task_status = task_data.get("status")
@@ -1684,41 +1718,40 @@ async def rollback_onboarding(
1684
1718
  except Exception as e:
1685
1719
  logger.error(f"Failed to cancel task {task_id}: {str(e)}")
1686
1720
 
1687
- # For completed tasks, find successfully ingested files and delete them
1688
- elif task_status == "completed":
1689
- files = task_data.get("files", {})
1690
- if isinstance(files, dict):
1691
- for file_path, file_info in files.items():
1692
- # Check if file was successfully ingested
1693
- if isinstance(file_info, dict):
1694
- file_status = file_info.get("status")
1695
- filename = file_info.get("filename") or file_path.split("/")[-1]
1696
-
1697
- if file_status == "completed" and filename:
1698
- try:
1699
- # Get user's OpenSearch client
1700
- opensearch_client = session_manager.get_user_opensearch_client(
1701
- user.user_id, jwt_token
1702
- )
1703
-
1704
- # Delete documents by filename
1705
- from utils.opensearch_queries import build_filename_delete_body
1706
- from config.settings import get_index_name
1707
-
1708
- delete_query = build_filename_delete_body(filename)
1709
-
1710
- result = await opensearch_client.delete_by_query(
1711
- index=get_index_name(),
1712
- body=delete_query,
1713
- conflicts="proceed"
1714
- )
1715
-
1716
- deleted_count = result.get("deleted", 0)
1717
- if deleted_count > 0:
1718
- deleted_files.append(filename)
1719
- logger.info(f"Deleted {deleted_count} chunks for filename {filename}")
1720
- except Exception as e:
1721
- logger.error(f"Failed to delete documents for {filename}: {str(e)}")
1721
+ # Delete all files associated with any task, regardless of whether
1722
+ # the task failed or completed, to ensure no partial chunks remain in OpenSearch.
1723
+ files = task_data.get("files", {})
1724
+ if isinstance(files, dict):
1725
+ for file_path, file_info in files.items():
1726
+ if isinstance(file_info, dict):
1727
+ filename = file_info.get("filename") or file_path.split("/")[-1]
1728
+ if filename:
1729
+ try:
1730
+ opensearch_client = session_manager.get_user_opensearch_client(
1731
+ user.user_id, jwt_token
1732
+ )
1733
+ from utils.opensearch_queries import build_filename_delete_body
1734
+ from config.settings import get_index_name
1735
+
1736
+ delete_query = build_filename_delete_body(filename)
1737
+ result = await opensearch_client.delete_by_query(
1738
+ index=get_index_name(),
1739
+ body=delete_query,
1740
+ conflicts="proceed"
1741
+ )
1742
+ deleted_count = result.get("deleted", 0)
1743
+ if deleted_count > 0:
1744
+ deleted_files.append(filename)
1745
+ logger.info(f"Deleted {deleted_count} chunks for filename {filename}")
1746
+ except Exception as e:
1747
+ logger.error(f"Failed to delete documents for {filename}: {str(e)}")
1748
+
1749
+ # Wipe the task completely from memory so the frontend doesn't see it anymore
1750
+ for check_user_id in [user.user_id, anonymous_user_id]:
1751
+ if check_user_id in task_service.task_store and task_id in task_service.task_store[check_user_id]:
1752
+ task_service._task_locks.pop(task_id, None)
1753
+ task_service.task_store[check_user_id].pop(task_id, None)
1754
+ logger.info(f"Purged task {task_id} completely from task_store for user {check_user_id}")
1722
1755
 
1723
1756
  # Clear embedding provider and model settings
1724
1757
  current_config.knowledge.embedding_provider = "openai" # Reset to default
@@ -1726,11 +1759,21 @@ async def rollback_onboarding(
1726
1759
  current_config.onboarding.openrag_docs_ingested_version = None
1727
1760
  current_config.onboarding.openrag_docs_remote_signature = None
1728
1761
 
1762
+ embedding_only = body.embedding_only if body else False
1763
+
1729
1764
  # Mark config as not edited so user can go through onboarding again
1730
- current_config.edited = False
1731
- current_config.onboarding.current_step = 0
1765
+ if not embedding_only:
1766
+ current_config.edited = False
1767
+ current_config.onboarding.current_step = 0
1768
+ # Also clear LLM provider and model settings when doing a full rollback
1769
+ current_config.agent.llm_provider = "openai" # Reset to default
1770
+ current_config.agent.llm_model = ""
1771
+ else:
1772
+ # When rolling back embedding only, we keep edited=True
1773
+ # and set current_step to 1 (which is the embedding step)
1774
+ current_config.onboarding.current_step = 1
1732
1775
 
1733
- # Save the rolled back configuration manually to avoid save_config_file setting edited=True
1776
+ # Save the rolled back configuration manually
1734
1777
  try:
1735
1778
  import yaml
1736
1779
  config_file = config_manager.config_file
@@ -1738,14 +1781,14 @@ async def rollback_onboarding(
1738
1781
  # Ensure directory exists
1739
1782
  config_file.parent.mkdir(parents=True, exist_ok=True)
1740
1783
 
1741
- # Save config with edited=False
1784
+ # Save config with current edited state
1742
1785
  with open(config_file, "w") as f:
1743
1786
  yaml.dump(current_config.to_dict(), f, default_flow_style=False, indent=2)
1744
1787
 
1745
1788
  # Update cached config
1746
1789
  config_manager._config = current_config
1747
1790
 
1748
- logger.info("Successfully saved rolled back configuration with edited=False")
1791
+ logger.info(f"Successfully saved rolled back configuration with edited={current_config.edited}")
1749
1792
  except Exception as e:
1750
1793
  logger.error(f"Failed to save rolled back configuration: {e}")
1751
1794
  return JSONResponse(
@@ -410,13 +410,14 @@ async def ingest_openrag_docs_when_ready(
410
410
  ):
411
411
  """Ingest OpenRAG docs during onboarding."""
412
412
  use_url_ingest = _should_use_url_default_docs_ingest()
413
+ task_id = None
413
414
  if use_url_ingest:
414
415
  try:
415
416
  await TelemetryClient.send_event(
416
417
  Category.DOCUMENT_INGESTION, MessageId.ORB_DOC_DEFAULT_URL_START
417
418
  )
418
419
  if DISABLE_INGEST_WITH_LANGFLOW:
419
- await _ingest_default_documents_url(
420
+ task_id = await _ingest_default_documents_url(
420
421
  document_service=document_service,
421
422
  docs_url=DEFAULT_DOCS_URL,
422
423
  crawl_depth=DEFAULT_DOCS_CRAWL_DEPTH,
@@ -426,7 +427,7 @@ async def ingest_openrag_docs_when_ready(
426
427
  "Ingesting default documents using Langflow",
427
428
  docs_url=DEFAULT_DOCS_URL,
428
429
  )
429
- await _ingest_default_documents_url_langflow(
430
+ task_id = await _ingest_default_documents_url_langflow(
430
431
  langflow_file_service=langflow_file_service,
431
432
  session_manager=session_manager,
432
433
  task_service=task_service,
@@ -441,6 +442,7 @@ async def ingest_openrag_docs_when_ready(
441
442
  await TelemetryClient.send_event(
442
443
  Category.DOCUMENT_INGESTION, MessageId.ORB_DOC_DEFAULT_URL_FAILED
443
444
  )
445
+ return task_id
444
446
 
445
447
 
446
448
  async def ingest_default_documents_when_ready(
@@ -456,9 +458,11 @@ async def ingest_default_documents_when_ready(
456
458
  await TelemetryClient.send_event(
457
459
  Category.DOCUMENT_INGESTION, MessageId.ORB_DOC_DEFAULT_START
458
460
  )
459
- await ingest_openrag_docs_when_ready(
460
- document_service, task_service, langflow_file_service, session_manager
461
- )
461
+ task_id = None
462
+ if _should_use_url_default_docs_ingest():
463
+ task_id = await ingest_openrag_docs_when_ready(
464
+ document_service, task_service, langflow_file_service, session_manager
465
+ )
462
466
 
463
467
  base_dir = _get_documents_dir()
464
468
  if not os.path.isdir(base_dir):
@@ -469,8 +473,7 @@ async def ingest_default_documents_when_ready(
469
473
  excluded_files = set(EXCLUDED_INGESTION_FILES)
470
474
  if _should_use_url_default_docs_ingest():
471
475
  excluded_files.update(URL_INGEST_EXCLUDED_INGESTION_FILES)
472
-
473
- # Collect files recursively, excluding warmup files and URL-ingested docs
476
+
474
477
  file_paths = [
475
478
  os.path.join(root, fn)
476
479
  for root, _, files in os.walk(base_dir)
@@ -482,18 +485,22 @@ async def ingest_default_documents_when_ready(
482
485
  raise FileNotFoundError(f"No default documents found in {base_dir}")
483
486
 
484
487
  if DISABLE_INGEST_WITH_LANGFLOW:
485
- await _ingest_default_documents_openrag(
486
- document_service, task_service, file_paths
488
+ new_task_id = await _ingest_default_documents_openrag(
489
+ document_service, task_service, file_paths, existing_task_id=task_id, connector_type="local"
487
490
  )
491
+ task_id = new_task_id or task_id
488
492
  else:
489
- await _ingest_default_documents_langflow(
490
- langflow_file_service, session_manager, task_service, file_paths
493
+ new_task_id = await _ingest_default_documents_langflow(
494
+ langflow_file_service, session_manager, task_service, file_paths, existing_task_id=task_id, connector_type="local"
491
495
  )
496
+ task_id = new_task_id or task_id
492
497
 
493
498
  await TelemetryClient.send_event(
494
499
  Category.DOCUMENT_INGESTION, MessageId.ORB_DOC_DEFAULT_COMPLETE
495
500
  )
496
501
 
502
+ return task_id
503
+
497
504
  except Exception as e:
498
505
  logger.error("Default documents ingestion failed", error=str(e))
499
506
  await TelemetryClient.send_event(
@@ -503,7 +510,7 @@ async def ingest_default_documents_when_ready(
503
510
 
504
511
 
505
512
  async def _ingest_default_documents_langflow(
506
- langflow_file_service, session_manager, task_service, file_paths
513
+ langflow_file_service, session_manager, task_service, file_paths, existing_task_id: str = None, connector_type: str = "openrag_docs"
507
514
  ):
508
515
  """Ingest default documents using Langflow upload-ingest-delete pipeline."""
509
516
 
@@ -538,7 +545,7 @@ async def _ingest_default_documents_langflow(
538
545
  {"key": "owner", "value": None},
539
546
  {"key": "owner_name", "value": anonymous_user.name},
540
547
  {"key": "owner_email", "value": anonymous_user.email},
541
- {"key": "connector_type", "value": "system_default"},
548
+ {"key": "connector_type", "value": "openrag_docs"},
542
549
  {"key": "is_sample_data", "value": "true"},
543
550
  ]
544
551
  }
@@ -558,6 +565,8 @@ async def _ingest_default_documents_langflow(
558
565
  settings=None, # Use default ingestion settings
559
566
  delete_after_ingest=True, # Clean up after ingestion
560
567
  replace_duplicates=True,
568
+ connector_type=connector_type,
569
+ existing_task_id=existing_task_id,
561
570
  )
562
571
 
563
572
  logger.info(
@@ -565,6 +574,7 @@ async def _ingest_default_documents_langflow(
565
574
  task_id=task_id,
566
575
  file_count=len(file_paths),
567
576
  )
577
+ return task_id
568
578
 
569
579
 
570
580
  async def _ingest_default_documents_url_langflow(
@@ -626,6 +636,7 @@ async def _ingest_default_documents_url_langflow(
626
636
  task_id=task_id,
627
637
  docs_url=docs_url,
628
638
  )
639
+ return task_id
629
640
 
630
641
 
631
642
  async def _ingest_default_documents_url(
@@ -657,7 +668,7 @@ async def _ingest_default_documents_url(
657
668
  owner_name=None,
658
669
  owner_email=None,
659
670
  is_sample_data=True,
660
- connector_type="system_default",
671
+ connector_type="openrag_docs",
661
672
  )
662
673
  await processor.process_document_standard(
663
674
  file_path=temp_file_path,
@@ -668,7 +679,7 @@ async def _ingest_default_documents_url(
668
679
  owner_name=None,
669
680
  owner_email=None,
670
681
  file_size=os.path.getsize(temp_file_path),
671
- connector_type="system_default",
682
+ connector_type="openrag_docs",
672
683
  is_sample_data=True,
673
684
  )
674
685
  finally:
@@ -1035,7 +1046,7 @@ async def opensearch_health_ready(request):
1035
1046
 
1036
1047
 
1037
1048
  async def _ingest_default_documents_openrag(
1038
- document_service, task_service, file_paths, connector_type: str = "local"
1049
+ document_service, task_service, file_paths, existing_task_id: str = None, connector_type: str = "openrag_docs"
1039
1050
  ):
1040
1051
  """Ingest default documents using traditional OpenRAG processor."""
1041
1052
  logger.info(
@@ -1056,12 +1067,13 @@ async def _ingest_default_documents_openrag(
1056
1067
  connector_type=connector_type,
1057
1068
  )
1058
1069
 
1059
- task_id = await task_service.create_custom_task("anonymous", file_paths, processor)
1070
+ task_id = await task_service.create_custom_task("anonymous", file_paths, processor, existing_task_id=existing_task_id)
1060
1071
  logger.info(
1061
1072
  "Started traditional OpenRAG ingestion task",
1062
1073
  task_id=task_id,
1063
1074
  file_count=len(file_paths),
1064
1075
  )
1076
+ return task_id
1065
1077
 
1066
1078
 
1067
1079
  async def _update_mcp_servers_with_provider_credentials(services):
@@ -730,6 +730,7 @@ class LangflowFileProcessor(TaskProcessor):
730
730
  settings: dict = None,
731
731
  delete_after_ingest: bool = True,
732
732
  replace_duplicates: bool = False,
733
+ connector_type: str = "local",
733
734
  ):
734
735
  super().__init__()
735
736
  self.langflow_file_service = langflow_file_service
@@ -743,6 +744,7 @@ class LangflowFileProcessor(TaskProcessor):
743
744
  self.settings = settings
744
745
  self.delete_after_ingest = delete_after_ingest
745
746
  self.replace_duplicates = replace_duplicates
747
+ self.connector_type = connector_type
746
748
 
747
749
  async def process_item(
748
750
  self, upload_task: UploadTask, item: str, file_task: FileTask
@@ -828,8 +830,7 @@ class LangflowFileProcessor(TaskProcessor):
828
830
  owner=self.owner_user_id,
829
831
  owner_name=self.owner_name,
830
832
  owner_email=self.owner_email,
831
- connector_type="local",
832
-
833
+ connector_type=self.connector_type,
833
834
  )
834
835
 
835
836
  # Update task with success
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openrag
3
- Version: 0.4.0.dev7
3
+ Version: 0.4.0.dev9
4
4
  Summary: OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations.
5
5
  Classifier: Development Status :: 4 - Beta
6
6
  Classifier: Environment :: Console
@@ -152,7 +152,6 @@ src/utils/hash_utils.py
152
152
  src/utils/langflow_headers.py
153
153
  src/utils/langflow_utils.py
154
154
  src/utils/logging_config.py
155
- src/utils/opensearch_filter_merge.py
156
155
  src/utils/opensearch_queries.py
157
156
  src/utils/opensearch_utils.py
158
157
  src/utils/paths.py
@@ -137,6 +137,8 @@ class TaskService:
137
137
  settings: dict = None,
138
138
  delete_after_ingest: bool = True,
139
139
  replace_duplicates: bool = False,
140
+ connector_type: str = "local",
141
+ existing_task_id: str = None,
140
142
  ) -> str:
141
143
  """Create a new upload task for Langflow file processing with upload and ingest"""
142
144
  # Use LangflowFileProcessor with user context
@@ -154,8 +156,9 @@ class TaskService:
154
156
  settings=settings,
155
157
  delete_after_ingest=delete_after_ingest,
156
158
  replace_duplicates=replace_duplicates,
159
+ connector_type=connector_type,
157
160
  )
158
- return await self.create_custom_task(user_id, file_paths, processor, original_filenames)
161
+ return await self.create_custom_task(user_id, file_paths, processor, original_filenames, existing_task_id=existing_task_id)
159
162
 
160
163
  async def create_langflow_url_upload_task(
161
164
  self,
@@ -170,6 +173,7 @@ class TaskService:
170
173
  connector_type: str = "openrag_docs",
171
174
  prevent_outside: bool = True,
172
175
  tweaks: dict = None,
176
+ existing_task_id: str = None,
173
177
  ) -> str:
174
178
  """Create a new upload task for Langflow URL ingestion."""
175
179
  from models.url import LangflowUrlProcessor
@@ -187,14 +191,14 @@ class TaskService:
187
191
  prevent_outside=prevent_outside,
188
192
  tweaks=tweaks,
189
193
  )
190
- return await self.create_custom_task(owner_user_id, [docs_url], processor)
194
+ return await self.create_custom_task(owner_user_id, [docs_url], processor, existing_task_id=existing_task_id)
191
195
 
192
- async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: dict | None = None) -> str:
196
+ async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: dict | None = None, existing_task_id: str = None) -> str:
193
197
  """Create a new task with custom processor for any type of items"""
194
198
  import os
195
199
  # Store anonymous tasks under a stable key so they can be retrieved later
196
200
  store_user_id = user_id or AnonymousUser().user_id
197
- task_id = str(uuid.uuid4())
201
+ task_id = existing_task_id or str(uuid.uuid4())
198
202
 
199
203
  # Create file tasks with original filenames if provided
200
204
  normalized_originals = (
@@ -210,28 +214,32 @@ class TaskService:
210
214
  for item in items
211
215
  }
212
216
 
213
- upload_task = UploadTask(
214
- task_id=task_id,
215
- total_files=len(items),
216
- file_tasks=file_tasks,
217
- )
218
-
219
- # Attach the custom processor to the task
220
- upload_task.processor = processor
221
-
222
- if store_user_id not in self.task_store:
223
- self.task_store[store_user_id] = {}
224
- self.task_store[store_user_id][task_id] = upload_task
217
+ if existing_task_id and store_user_id in self.task_store and existing_task_id in self.task_store[store_user_id]:
218
+ upload_task = self.task_store[store_user_id][existing_task_id]
219
+ upload_task.file_tasks.update(file_tasks)
220
+ upload_task.total_files += len(items)
221
+ upload_task.status = TaskStatus.RUNNING
222
+ else:
223
+ upload_task = UploadTask(
224
+ task_id=task_id,
225
+ total_files=len(items),
226
+ file_tasks=file_tasks,
227
+ )
228
+ upload_task.processor = processor
229
+ if store_user_id not in self.task_store:
230
+ self.task_store[store_user_id] = {}
231
+ self.task_store[store_user_id][task_id] = upload_task
225
232
 
226
233
  # Start background processing
227
234
  background_task = asyncio.create_task(
228
- self.background_custom_processor(store_user_id, task_id, items)
235
+ self.background_custom_processor(store_user_id, task_id, items, processor)
229
236
  )
230
237
  self.background_tasks.add(background_task)
231
238
  background_task.add_done_callback(self.background_tasks.discard)
232
239
 
233
- # Store reference to background task for cancellation
234
- upload_task.background_task = background_task
240
+ # Store reference to background task for cancellation if newly created
241
+ if not existing_task_id:
242
+ upload_task.background_task = background_task
235
243
 
236
244
  # Send telemetry event for task creation with metadata
237
245
  asyncio.create_task(
@@ -282,7 +290,7 @@ class TaskService:
282
290
  return f"{hours}h {mins}m {secs}s"
283
291
 
284
292
  async def background_custom_processor(
285
- self, user_id: str, task_id: str, items: list
293
+ self, user_id: str, task_id: str, items: list, processor=None
286
294
  ) -> None:
287
295
  """Background task to process items using custom processor"""
288
296
  try:
@@ -290,7 +298,7 @@ class TaskService:
290
298
  upload_task.status = TaskStatus.RUNNING
291
299
  upload_task.updated_at = time.time()
292
300
 
293
- processor = upload_task.processor
301
+ processor = processor or upload_task.processor
294
302
 
295
303
  logger.info(
296
304
  "Upload / ingestion task started",
@@ -401,9 +409,10 @@ class TaskService:
401
409
 
402
410
  await asyncio.gather(*tasks, return_exceptions=True)
403
411
 
404
- # Mark task as completed
405
- upload_task.status = TaskStatus.COMPLETED
406
- upload_task.updated_at = time.time()
412
+ # Mark task as completed if all files (including appended ones) are done
413
+ if upload_task.processed_files >= upload_task.total_files:
414
+ upload_task.status = TaskStatus.COMPLETED
415
+ upload_task.updated_at = time.time()
407
416
 
408
417
  status: str = "FAILED"
409
418