openrag 0.4.0.dev1__tar.gz → 0.4.0.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. {openrag-0.4.0.dev1/src/openrag.egg-info → openrag-0.4.0.dev3}/PKG-INFO +1 -1
  2. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/pyproject.toml +1 -1
  3. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/documents.py +15 -8
  4. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/settings.py +55 -21
  5. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/models/processors.py +49 -21
  6. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3/src/openrag.egg-info}/PKG-INFO +1 -1
  7. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/search_service.py +70 -7
  8. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/file_utils.py +29 -1
  9. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/LICENSE +0 -0
  10. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/MANIFEST.in +0 -0
  11. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/README.md +0 -0
  12. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/setup.cfg +0 -0
  13. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/agent.py +0 -0
  14. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/__init__.py +0 -0
  15. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/auth.py +0 -0
  16. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/chat.py +0 -0
  17. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/connector_router.py +0 -0
  18. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/connectors.py +0 -0
  19. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/docling.py +0 -0
  20. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/flows.py +0 -0
  21. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/keys.py +0 -0
  22. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/knowledge_filter.py +0 -0
  23. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/langflow_files.py +0 -0
  24. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/models.py +0 -0
  25. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/nudges.py +0 -0
  26. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/oidc.py +0 -0
  27. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/provider_health.py +0 -0
  28. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/provider_validation.py +0 -0
  29. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/router.py +0 -0
  30. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/search.py +0 -0
  31. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/tasks.py +0 -0
  32. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/upload.py +0 -0
  33. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/v1/__init__.py +0 -0
  34. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/v1/chat.py +0 -0
  35. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/v1/documents.py +0 -0
  36. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/v1/knowledge_filters.py +0 -0
  37. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/v1/models.py +0 -0
  38. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/v1/search.py +0 -0
  39. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/api/v1/settings.py +0 -0
  40. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/auth_context.py +0 -0
  41. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/config/__init__.py +0 -0
  42. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/config/config_manager.py +0 -0
  43. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/config/model_constants.py +0 -0
  44. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/config/settings.py +0 -0
  45. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/__init__.py +0 -0
  46. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/aws_s3/__init__.py +0 -0
  47. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/aws_s3/api.py +0 -0
  48. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/aws_s3/auth.py +0 -0
  49. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/aws_s3/connector.py +0 -0
  50. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/aws_s3/models.py +0 -0
  51. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/aws_s3/support.py +0 -0
  52. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/base.py +0 -0
  53. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/connection_manager.py +0 -0
  54. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/google_drive/__init__.py +0 -0
  55. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/google_drive/connector.py +0 -0
  56. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/google_drive/oauth.py +0 -0
  57. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/ibm_cos/__init__.py +0 -0
  58. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/ibm_cos/api.py +0 -0
  59. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/ibm_cos/auth.py +0 -0
  60. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/ibm_cos/connector.py +0 -0
  61. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/ibm_cos/models.py +0 -0
  62. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/ibm_cos/support.py +0 -0
  63. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/langflow_connector_service.py +0 -0
  64. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/onedrive/__init__.py +0 -0
  65. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/onedrive/connector.py +0 -0
  66. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/onedrive/oauth.py +0 -0
  67. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/service.py +0 -0
  68. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/sharepoint/__init__.py +0 -0
  69. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/sharepoint/connector.py +0 -0
  70. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/sharepoint/oauth.py +0 -0
  71. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/connectors/sharepoint/utils.py +0 -0
  72. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/dependencies.py +0 -0
  73. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/main.py +0 -0
  74. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/models/__init__.py +0 -0
  75. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/models/tasks.py +0 -0
  76. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/models/url.py +0 -0
  77. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/openrag.egg-info/SOURCES.txt +0 -0
  78. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/openrag.egg-info/dependency_links.txt +0 -0
  79. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/openrag.egg-info/entry_points.txt +0 -0
  80. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/openrag.egg-info/requires.txt +0 -0
  81. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/openrag.egg-info/top_level.txt +0 -0
  82. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/__init__.py +0 -0
  83. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/api_key_service.py +0 -0
  84. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/auth_service.py +0 -0
  85. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/chat_service.py +0 -0
  86. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/conversation_persistence_service.py +0 -0
  87. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/document_service.py +0 -0
  88. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/flows_service.py +0 -0
  89. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/knowledge_filter_service.py +0 -0
  90. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/langflow_file_service.py +0 -0
  91. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/langflow_history_service.py +0 -0
  92. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/langflow_mcp_service.py +0 -0
  93. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/models_service.py +0 -0
  94. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/monitor_service.py +0 -0
  95. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/session_ownership_service.py +0 -0
  96. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/services/task_service.py +0 -0
  97. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/session_manager.py +0 -0
  98. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/__init__.py +0 -0
  99. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/docker-compose.gpu.yml +0 -0
  100. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/docker-compose.yml +0 -0
  101. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/components/ollama_embedding.json +0 -0
  102. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/components/ollama_llm.json +0 -0
  103. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/components/ollama_llm_text.json +0 -0
  104. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/components/watsonx_embedding.json +0 -0
  105. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/components/watsonx_llm.json +0 -0
  106. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/components/watsonx_llm_text.json +0 -0
  107. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/ingestion_flow.json +0 -0
  108. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/openrag_agent.json +0 -0
  109. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/openrag_nudges.json +0 -0
  110. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/flows/openrag_url_mcp.json +0 -0
  111. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/openrag-documents/docling.pdf +0 -0
  112. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/openrag-documents/ibm_anthropic.pdf +0 -0
  113. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/openrag-documents/openrag-documentation.pdf +0 -0
  114. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/_assets/openrag-documents/warmup_ocr.pdf +0 -0
  115. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/cli.py +0 -0
  116. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/config_fields.py +0 -0
  117. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/main.py +0 -0
  118. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/managers/__init__.py +0 -0
  119. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/managers/container_manager.py +0 -0
  120. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/managers/docling_manager.py +0 -0
  121. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/managers/env_manager.py +0 -0
  122. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/screens/__init__.py +0 -0
  123. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/screens/config.py +0 -0
  124. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/screens/diagnostics.py +0 -0
  125. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/screens/logs.py +0 -0
  126. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/screens/monitor.py +0 -0
  127. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/screens/welcome.py +0 -0
  128. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/utils/__init__.py +0 -0
  129. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/utils/clipboard.py +0 -0
  130. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/utils/platform.py +0 -0
  131. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/utils/startup_checks.py +0 -0
  132. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/utils/validation.py +0 -0
  133. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/utils/version_check.py +0 -0
  134. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/__init__.py +0 -0
  135. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/command_modal.py +0 -0
  136. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/diagnostics_notification.py +0 -0
  137. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/error_notification.py +0 -0
  138. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/factory_reset_warning_modal.py +0 -0
  139. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/flow_backup_warning_modal.py +0 -0
  140. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/prune_options_modal.py +0 -0
  141. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/upgrade_instructions_modal.py +0 -0
  142. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/version_mismatch_warning_modal.py +0 -0
  143. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/tui/widgets/waves.py +0 -0
  144. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/__init__.py +0 -0
  145. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/acl_utils.py +0 -0
  146. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/container_utils.py +0 -0
  147. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/docling_client.py +0 -0
  148. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/document_processing.py +0 -0
  149. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/embedding_fields.py +0 -0
  150. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/embeddings.py +0 -0
  151. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/env_utils.py +0 -0
  152. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/gpu_detection.py +0 -0
  153. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/hash_utils.py +0 -0
  154. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/langflow_headers.py +0 -0
  155. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/langflow_utils.py +0 -0
  156. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/logging_config.py +0 -0
  157. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/opensearch_queries.py +0 -0
  158. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/opensearch_utils.py +0 -0
  159. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/paths.py +0 -0
  160. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/telemetry/__init__.py +0 -0
  161. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/telemetry/category.py +0 -0
  162. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/telemetry/client.py +0 -0
  163. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/telemetry/message_id.py +0 -0
  164. {openrag-0.4.0.dev1 → openrag-0.4.0.dev3}/src/utils/version_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openrag
3
- Version: 0.4.0.dev1
3
+ Version: 0.4.0.dev3
4
4
  Summary: OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations.
5
5
  Classifier: Development Status :: 4 - Beta
6
6
  Classifier: Environment :: Console
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "openrag"
7
- version = "0.4.0.dev1"
7
+ version = "0.4.0.dev3"
8
8
  description = "OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.13"
@@ -121,16 +121,26 @@ async def check_filename_exists(
121
121
  )
122
122
 
123
123
  from utils.opensearch_queries import build_filename_search_body
124
+ from utils.file_utils import get_filename_aliases
124
125
 
125
- search_body = build_filename_search_body(filename, size=1, source=["filename"])
126
+ candidate_filenames = get_filename_aliases(filename)
127
+ if not candidate_filenames:
128
+ return JSONResponse({"exists": False, "filename": filename}, status_code=200)
126
129
 
127
130
  logger.debug("Checking filename existence", filename=filename, index_name=get_index_name())
131
+ exists = False
128
132
 
129
133
  try:
130
- response = await opensearch_client.search(
131
- index=get_index_name(),
132
- body=search_body
133
- )
134
+ for candidate in candidate_filenames:
135
+ search_body = build_filename_search_body(candidate, size=1, source=["filename"])
136
+ response = await opensearch_client.search(
137
+ index=get_index_name(),
138
+ body=search_body
139
+ )
140
+ hits = response.get("hits", {}).get("hits", [])
141
+ if hits:
142
+ exists = True
143
+ break
134
144
  except Exception as search_err:
135
145
  if "index_not_found_exception" in str(search_err):
136
146
  logger.info("Index does not exist, creating it now before upload")
@@ -138,9 +148,6 @@ async def check_filename_exists(
138
148
  return JSONResponse({"exists": False, "filename": filename}, status_code=200)
139
149
  raise
140
150
 
141
- hits = response.get("hits", {}).get("hits", [])
142
- exists = len(hits) > 0
143
-
144
151
  return JSONResponse({"exists": exists, "filename": filename}, status_code=200)
145
152
 
146
153
  except Exception as e:
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import json
2
3
  import platform
3
4
  from fastapi import Depends, Request, HTTPException
@@ -36,6 +37,7 @@ from dependencies import (
36
37
  from session_manager import User
37
38
 
38
39
  logger = get_logger(__name__)
40
+ _background_tasks: set[asyncio.Task] = set()
39
41
 
40
42
 
41
43
  class SettingsUpdateBody(BaseModel):
@@ -815,30 +817,31 @@ async def update_settings(
815
817
  {"error": "Failed to save configuration"}, status_code=500
816
818
  )
817
819
 
818
- # Update Langflow global variables and model values if provider settings changed
820
+ # Refresh patched client immediately so subsequent requests pick up latest config.
819
821
  await clients.refresh_patched_client()
820
822
 
823
+ # Run expensive Langflow sync in the background to keep settings updates responsive.
821
824
  if should_validate or provider_updated:
822
- try:
823
- flows_service = _get_flows_service()
824
-
825
- # Update global variables
826
- await _update_langflow_global_variables(current_config, flows_service=flows_service)
827
-
828
- # Update LLM client credentials when embedding selection changes
829
- if body.embedding_provider is not None or body.embedding_model is not None:
830
- await _update_mcp_servers_with_provider_credentials(
831
- current_config, session_manager, flows_service=flows_service
832
- )
833
-
834
- # Update model values if provider or model changed (including removals that trigger fallback)
835
- if body.llm_provider is not None or body.llm_model is not None or body.embedding_provider is not None or body.embedding_model is not None or provider_updated:
836
- await _update_langflow_model_values(current_config, flows_service)
837
-
838
- except Exception as e:
839
- logger.error(f"Failed to update Langflow settings: {str(e)}")
840
- # Don't fail the entire settings update if Langflow update fails
841
- # The config was still saved
825
+ task = asyncio.create_task(
826
+ _run_async_post_save_langflow_updates(
827
+ session_manager=session_manager,
828
+ update_mcp_servers=(
829
+ body.embedding_provider is not None
830
+ or body.embedding_model is not None
831
+ or provider_updated
832
+ ),
833
+ update_model_values=(
834
+ body.llm_provider is not None
835
+ or body.llm_model is not None
836
+ or body.embedding_provider is not None
837
+ or body.embedding_model is not None
838
+ or provider_updated
839
+ ),
840
+ )
841
+ )
842
+ # Keep a strong reference until completion to avoid premature GC cancellation.
843
+ _background_tasks.add(task)
844
+ task.add_done_callback(_background_tasks.discard)
842
845
 
843
846
 
844
847
  set_fields = [k for k, v in body.model_dump().items() if v is not None]
@@ -1380,6 +1383,37 @@ async def _update_langflow_global_variables(config, flows_service=None):
1380
1383
  raise
1381
1384
 
1382
1385
 
1386
+ async def _run_async_post_save_langflow_updates(
1387
+ session_manager,
1388
+ update_mcp_servers: bool,
1389
+ update_model_values: bool,
1390
+ ) -> None:
1391
+ """Apply post-save Langflow synchronization asynchronously."""
1392
+ try:
1393
+ current_config = get_openrag_config()
1394
+ flows_service = _get_flows_service()
1395
+
1396
+ # Update global variables
1397
+ await _update_langflow_global_variables(
1398
+ current_config, flows_service=flows_service
1399
+ )
1400
+
1401
+ # Update LLM client credentials when embedding selection changes
1402
+ if update_mcp_servers:
1403
+ await _update_mcp_servers_with_provider_credentials(
1404
+ current_config, session_manager, flows_service=flows_service
1405
+ )
1406
+
1407
+ # Update model values if provider/model changed (including removals/fallbacks)
1408
+ if update_model_values:
1409
+ await _update_langflow_model_values(current_config, flows_service)
1410
+
1411
+ logger.info("Completed asynchronous Langflow post-save sync")
1412
+ except Exception as e:
1413
+ # Do not fail user request if async sync fails; keep parity with existing behavior.
1414
+ logger.error(f"Failed to update Langflow settings asynchronously: {str(e)}")
1415
+
1416
+
1383
1417
  async def _update_mcp_servers_with_provider_credentials(config, session_manager = None, flows_service=None):
1384
1418
  # Update MCP servers with provider credentials
1385
1419
  try:
@@ -1,7 +1,11 @@
1
1
  from typing import Any
2
2
  from .tasks import UploadTask, FileTask
3
3
  from utils.logging_config import get_logger
4
- from utils.file_utils import get_file_extension, clean_connector_filename
4
+ from utils.file_utils import (
5
+ get_file_extension,
6
+ clean_connector_filename,
7
+ get_filename_aliases,
8
+ )
5
9
 
6
10
  logger = get_logger(__name__)
7
11
 
@@ -72,19 +76,37 @@ class TaskProcessor:
72
76
  max_retries = 3
73
77
  retry_delay = 1.0
74
78
 
79
+ candidate_filenames = get_filename_aliases(filename)
80
+ if not candidate_filenames:
81
+ return False
82
+ # Keep track of aliases that still need checking across retries.
83
+ # If one alias was already checked successfully with no hits, we avoid
84
+ # re-querying it when another alias fails transiently.
85
+ pending_candidates = list(candidate_filenames)
86
+ # Retry strategy: only retry aliases that have not completed successfully.
87
+ # This avoids re-querying aliases already checked with no hits when a later
88
+ # alias fails transiently (e.g., timeout).
89
+
75
90
  for attempt in range(max_retries):
76
91
  try:
77
- # Search for any document with this exact filename
78
- search_body = build_filename_search_body(filename, size=1, source=False)
79
-
80
- response = await opensearch_client.search(
81
- index=get_index_name(),
82
- body=search_body
83
- )
84
-
85
- # Check if any hits were found
86
- hits = response.get("hits", {}).get("hits", [])
87
- return len(hits) > 0
92
+ i = 0
93
+ while i < len(pending_candidates):
94
+ candidate = pending_candidates[i]
95
+ search_body = build_filename_search_body(
96
+ candidate, size=1, source=False
97
+ )
98
+ response = await opensearch_client.search(
99
+ index=get_index_name(),
100
+ body=search_body
101
+ )
102
+ hits = response.get("hits", {}).get("hits", [])
103
+ if hits:
104
+ return True
105
+ # Successfully checked this alias with no hits; don't
106
+ # re-query it on future retries.
107
+ pending_candidates.pop(i)
108
+ continue
109
+ return False
88
110
 
89
111
  except (asyncio.TimeoutError, Exception) as e:
90
112
  if attempt == max_retries - 1:
@@ -123,15 +145,21 @@ class TaskProcessor:
123
145
  from utils.opensearch_queries import build_filename_delete_body
124
146
 
125
147
  try:
126
- # Delete all documents with this filename
127
- delete_body = build_filename_delete_body(filename)
128
-
129
- response = await opensearch_client.delete_by_query(
130
- index=get_index_name(),
131
- body=delete_body
132
- )
133
-
134
- deleted_count = response.get("deleted", 0)
148
+ deleted_count = 0
149
+ candidate_filenames = get_filename_aliases(filename)
150
+ if not candidate_filenames:
151
+ logger.info(
152
+ "Skipped delete_by_filename due to empty filename input",
153
+ filename=filename,
154
+ )
155
+ return
156
+ for candidate in candidate_filenames:
157
+ delete_body = build_filename_delete_body(candidate)
158
+ response = await opensearch_client.delete_by_query(
159
+ index=get_index_name(),
160
+ body=delete_body
161
+ )
162
+ deleted_count += response.get("deleted", 0)
135
163
  logger.info(
136
164
  "Deleted existing document chunks",
137
165
  filename=filename,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openrag
3
- Version: 0.4.0.dev1
3
+ Version: 0.4.0.dev3
4
4
  Summary: OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations.
5
5
  Classifier: Development Status :: 4 - Beta
6
6
  Classifier: Environment :: Console
@@ -1,5 +1,6 @@
1
1
  import copy
2
2
  import json
3
+ from collections import Counter
3
4
  from typing import Any, Dict
4
5
  from agentd.tool_decorator import tool
5
6
  from config.settings import EMBED_MODEL, clients, get_embedding_model, get_index_name, WATSONX_EMBEDDING_DIMENSIONS
@@ -317,10 +318,25 @@ class SearchService:
317
318
  "query": query,
318
319
  "fields": ["text^2", "filename^1.5"],
319
320
  "type": "best_fields",
320
- "fuzziness": "AUTO",
321
+ "operator": "or",
322
+ "fuzziness": "AUTO:4,7",
321
323
  "boost": 0.3, # 30% weight for keyword search
322
324
  }
323
325
  },
326
+ {
327
+ # Prefix fallback for partial input (e.g. "vita" -> "vitamin").
328
+ # Avoid bool_prefix here because our current mappings are:
329
+ # - text: standard "text" (not search_as_you_type / edge-ngram)
330
+ # - filename: "keyword"
331
+ # match_phrase_prefix with a bounded expansion is safer.
332
+ "match_phrase_prefix": {
333
+ "text": {
334
+ "query": query,
335
+ "max_expansions": 50,
336
+ "boost": 0.25,
337
+ }
338
+ }
339
+ },
324
340
  ],
325
341
  "minimum_should_match": 1,
326
342
  "filter": all_filters,
@@ -481,15 +497,62 @@ class SearchService:
481
497
  }
482
498
  )
483
499
 
500
+ # If query text appears verbatim in one subset of files, prefer those files
501
+ # to avoid broad semantic spillover for unique lookups.
502
+ normalized_query = query.strip().lower()
503
+ aggregations = results.get("aggregations", {})
504
+ if (
505
+ normalized_query
506
+ and not is_wildcard_match_all
507
+ and len(normalized_query) >= 4
508
+ ):
509
+ exact_files = {
510
+ filename
511
+ for chunk in chunks
512
+ for filename in [chunk.get("filename")]
513
+ if isinstance(filename, str)
514
+ and (
515
+ normalized_query in filename.lower()
516
+ or (
517
+ isinstance(chunk.get("text"), str)
518
+ and normalized_query in chunk.get("text", "").lower()
519
+ )
520
+ )
521
+ }
522
+ if exact_files:
523
+ chunks = [chunk for chunk in chunks if chunk.get("filename") in exact_files]
524
+
525
+ def _build_terms_agg(field: str) -> Dict[str, Any]:
526
+ counts = Counter(
527
+ value
528
+ for chunk in chunks
529
+ for value in [chunk.get(field)]
530
+ if isinstance(value, str) and value
531
+ )
532
+ return {
533
+ "doc_count_error_upper_bound": 0,
534
+ "sum_other_doc_count": 0,
535
+ "buckets": [
536
+ {"key": key, "doc_count": count}
537
+ for key, count in counts.most_common()
538
+ ],
539
+ }
540
+
541
+ # Keep aggregations consistent with the post-filtered result set.
542
+ aggregations = {
543
+ **aggregations,
544
+ "data_sources": _build_terms_agg("filename"),
545
+ "document_types": _build_terms_agg("mimetype"),
546
+ "owners": _build_terms_agg("owner"),
547
+ "connector_types": _build_terms_agg("connector_type"),
548
+ "embedding_models": _build_terms_agg("embedding_model"),
549
+ }
550
+
484
551
  # Return both transformed results and aggregations
485
552
  return {
486
553
  "results": chunks,
487
- "aggregations": results.get("aggregations", {}),
488
- "total": (
489
- results.get("hits", {}).get("total", {}).get("value")
490
- if isinstance(results.get("hits", {}).get("total"), dict)
491
- else results.get("hits", {}).get("total")
492
- ),
554
+ "aggregations": aggregations,
555
+ "total": len(chunks),
493
556
  }
494
557
 
495
558
  async def search(
@@ -98,4 +98,32 @@ def clean_connector_filename(filename: str, mimetype: str) -> str:
98
98
  return clean_name
99
99
  if not clean_name.lower().endswith(suffix.lower()):
100
100
  return clean_name + suffix
101
- return clean_name
101
+ return clean_name
102
+
103
+
104
+ def get_filename_aliases(filename: str) -> list[str]:
105
+ """Return equivalent filename variants used by ingestion/indexing.
106
+
107
+ Legacy Langflow ingest indexes `.txt` uploads as `.md` (see
108
+ `LangflowFileProcessor`). The alias always uses a lowercase extension
109
+ to match the rename behavior:
110
+ `original_filename[:-4] + ".md"`
111
+ So `"FOO.TXT"` aliases to `"FOO.md"`, not `"FOO.MD"`.
112
+
113
+ This helper keeps duplicate detection/deletion consistent by checking
114
+ both `.txt` and `.md` forms.
115
+ """
116
+ normalized = (filename or "").strip()
117
+ if not normalized:
118
+ return []
119
+
120
+ aliases = [normalized]
121
+ lower_name = normalized.lower()
122
+
123
+ if lower_name.endswith(".txt"):
124
+ aliases.append(normalized[:-4] + ".md")
125
+ elif lower_name.endswith(".md"):
126
+ aliases.append(normalized[:-3] + ".txt")
127
+
128
+ # Keep order stable while removing duplicates.
129
+ return list(dict.fromkeys(aliases))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes