openrag 0.3.2.dev31__tar.gz → 0.3.2.dev41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. {openrag-0.3.2.dev31/src/openrag.egg-info → openrag-0.3.2.dev41}/PKG-INFO +2 -1
  2. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/pyproject.toml +2 -1
  3. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/connectors.py +57 -5
  4. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/config/settings.py +2 -0
  5. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/__init__.py +4 -0
  6. openrag-0.3.2.dev41/src/connectors/aws_s3/__init__.py +19 -0
  7. openrag-0.3.2.dev41/src/connectors/aws_s3/api.py +175 -0
  8. openrag-0.3.2.dev41/src/connectors/aws_s3/auth.py +90 -0
  9. openrag-0.3.2.dev41/src/connectors/aws_s3/connector.py +277 -0
  10. openrag-0.3.2.dev41/src/connectors/aws_s3/models.py +13 -0
  11. openrag-0.3.2.dev41/src/connectors/aws_s3/support.py +51 -0
  12. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/connection_manager.py +57 -9
  13. openrag-0.3.2.dev41/src/connectors/ibm_cos/__init__.py +17 -0
  14. openrag-0.3.2.dev41/src/connectors/ibm_cos/api.py +202 -0
  15. openrag-0.3.2.dev41/src/connectors/ibm_cos/auth.py +183 -0
  16. openrag-0.3.2.dev41/src/connectors/ibm_cos/connector.py +375 -0
  17. openrag-0.3.2.dev41/src/connectors/ibm_cos/models.py +20 -0
  18. openrag-0.3.2.dev41/src/connectors/ibm_cos/support.py +68 -0
  19. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/langflow_connector_service.py +30 -29
  20. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/main.py +29 -39
  21. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41/src/openrag.egg-info}/PKG-INFO +2 -1
  22. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/SOURCES.txt +12 -0
  23. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/requires.txt +1 -0
  24. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/auth_service.py +69 -4
  25. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/docker-compose.yml +8 -0
  26. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/config_fields.py +41 -0
  27. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/managers/env_manager.py +26 -0
  28. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/file_utils.py +18 -4
  29. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/LICENSE +0 -0
  30. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/MANIFEST.in +0 -0
  31. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/README.md +0 -0
  32. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/setup.cfg +0 -0
  33. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/agent.py +0 -0
  34. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/__init__.py +0 -0
  35. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/auth.py +0 -0
  36. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/chat.py +0 -0
  37. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/connector_router.py +0 -0
  38. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/docling.py +0 -0
  39. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/documents.py +0 -0
  40. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/flows.py +0 -0
  41. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/keys.py +0 -0
  42. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/knowledge_filter.py +0 -0
  43. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/langflow_files.py +0 -0
  44. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/models.py +0 -0
  45. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/nudges.py +0 -0
  46. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/oidc.py +0 -0
  47. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/provider_health.py +0 -0
  48. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/provider_validation.py +0 -0
  49. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/router.py +0 -0
  50. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/search.py +0 -0
  51. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/settings.py +0 -0
  52. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/tasks.py +0 -0
  53. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/upload.py +0 -0
  54. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/__init__.py +0 -0
  55. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/chat.py +0 -0
  56. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/documents.py +0 -0
  57. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/knowledge_filters.py +0 -0
  58. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/models.py +0 -0
  59. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/search.py +0 -0
  60. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/settings.py +0 -0
  61. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/auth_context.py +0 -0
  62. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/config/__init__.py +0 -0
  63. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/config/config_manager.py +0 -0
  64. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/config/model_constants.py +0 -0
  65. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/base.py +0 -0
  66. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/google_drive/__init__.py +0 -0
  67. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/google_drive/connector.py +0 -0
  68. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/google_drive/oauth.py +0 -0
  69. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/onedrive/__init__.py +0 -0
  70. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/onedrive/connector.py +0 -0
  71. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/onedrive/oauth.py +0 -0
  72. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/service.py +0 -0
  73. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/sharepoint/__init__.py +0 -0
  74. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/sharepoint/connector.py +0 -0
  75. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/sharepoint/oauth.py +0 -0
  76. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/sharepoint/utils.py +0 -0
  77. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/dependencies.py +0 -0
  78. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/models/__init__.py +0 -0
  79. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/models/processors.py +0 -0
  80. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/models/tasks.py +0 -0
  81. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/models/url.py +0 -0
  82. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/dependency_links.txt +0 -0
  83. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/entry_points.txt +0 -0
  84. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/top_level.txt +0 -0
  85. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/__init__.py +0 -0
  86. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/api_key_service.py +0 -0
  87. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/chat_service.py +0 -0
  88. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/conversation_persistence_service.py +0 -0
  89. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/document_service.py +0 -0
  90. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/flows_service.py +0 -0
  91. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/knowledge_filter_service.py +0 -0
  92. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/langflow_file_service.py +0 -0
  93. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/langflow_history_service.py +0 -0
  94. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/langflow_mcp_service.py +0 -0
  95. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/models_service.py +0 -0
  96. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/monitor_service.py +0 -0
  97. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/search_service.py +0 -0
  98. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/session_ownership_service.py +0 -0
  99. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/task_service.py +0 -0
  100. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/session_manager.py +0 -0
  101. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/__init__.py +0 -0
  102. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/docker-compose.gpu.yml +0 -0
  103. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/ollama_embedding.json +0 -0
  104. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/ollama_llm.json +0 -0
  105. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/ollama_llm_text.json +0 -0
  106. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/watsonx_embedding.json +0 -0
  107. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/watsonx_llm.json +0 -0
  108. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/watsonx_llm_text.json +0 -0
  109. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/ingestion_flow.json +0 -0
  110. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/openrag_agent.json +0 -0
  111. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/openrag_nudges.json +0 -0
  112. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/openrag_url_mcp.json +0 -0
  113. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/openrag-documents/docling.pdf +0 -0
  114. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/openrag-documents/ibm_anthropic.pdf +0 -0
  115. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/openrag-documents/openrag-documentation.pdf +0 -0
  116. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/openrag-documents/warmup_ocr.pdf +0 -0
  117. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/cli.py +0 -0
  118. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/main.py +0 -0
  119. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/managers/__init__.py +0 -0
  120. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/managers/container_manager.py +0 -0
  121. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/managers/docling_manager.py +0 -0
  122. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/__init__.py +0 -0
  123. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/config.py +0 -0
  124. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/diagnostics.py +0 -0
  125. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/logs.py +0 -0
  126. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/monitor.py +0 -0
  127. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/welcome.py +0 -0
  128. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/__init__.py +0 -0
  129. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/clipboard.py +0 -0
  130. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/platform.py +0 -0
  131. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/startup_checks.py +0 -0
  132. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/validation.py +0 -0
  133. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/version_check.py +0 -0
  134. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/__init__.py +0 -0
  135. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/command_modal.py +0 -0
  136. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/diagnostics_notification.py +0 -0
  137. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/error_notification.py +0 -0
  138. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/factory_reset_warning_modal.py +0 -0
  139. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/flow_backup_warning_modal.py +0 -0
  140. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/prune_options_modal.py +0 -0
  141. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/upgrade_instructions_modal.py +0 -0
  142. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/version_mismatch_warning_modal.py +0 -0
  143. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/waves.py +0 -0
  144. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/__init__.py +0 -0
  145. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/acl_utils.py +0 -0
  146. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/container_utils.py +0 -0
  147. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/docling_client.py +0 -0
  148. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/document_processing.py +0 -0
  149. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/embedding_fields.py +0 -0
  150. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/embeddings.py +0 -0
  151. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/env_utils.py +0 -0
  152. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/gpu_detection.py +0 -0
  153. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/hash_utils.py +0 -0
  154. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/langflow_headers.py +0 -0
  155. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/langflow_utils.py +0 -0
  156. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/logging_config.py +0 -0
  157. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/opensearch_queries.py +0 -0
  158. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/opensearch_utils.py +0 -0
  159. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/paths.py +0 -0
  160. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/telemetry/__init__.py +0 -0
  161. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/telemetry/category.py +0 -0
  162. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/telemetry/client.py +0 -0
  163. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/telemetry/message_id.py +0 -0
  164. {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/version_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openrag
3
- Version: 0.3.2.dev31
3
+ Version: 0.3.2.dev41
4
4
  Summary: OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations.
5
5
  Classifier: Development Status :: 4 - Beta
6
6
  Classifier: Environment :: Console
@@ -29,6 +29,7 @@ Requires-Dist: python-multipart>=0.0.20
29
29
  Requires-Dist: fastapi>=0.115.0
30
30
  Requires-Dist: uvicorn>=0.35.0
31
31
  Requires-Dist: boto3>=1.35.0
32
+ Requires-Dist: ibm-cos-sdk>=2.13.0
32
33
  Requires-Dist: psutil>=7.0.0
33
34
  Requires-Dist: rich>=13.0.0
34
35
  Requires-Dist: textual>=0.45.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "openrag"
7
- version = "0.3.2.dev31"
7
+ version = "0.3.2.dev41"
8
8
  description = "OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.13"
@@ -35,6 +35,7 @@ dependencies = [
35
35
  "fastapi>=0.115.0",
36
36
  "uvicorn>=0.35.0",
37
37
  "boto3>=1.35.0",
38
+ "ibm-cos-sdk>=2.13.0",
38
39
  "psutil>=7.0.0",
39
40
  "rich>=13.0.0",
40
41
  "textual>=0.45.0",
@@ -88,6 +88,11 @@ async def get_synced_file_ids_for_connector(
88
88
  class ConnectorSyncBody(BaseModel):
89
89
  max_files: Optional[int] = None
90
90
  selected_files: Optional[List[Any]] = None
91
+ # When True, ingest ALL files from the connector (bypasses the existing-files gate).
92
+ # Used by direct-sync providers like IBM COS on initial ingest.
93
+ sync_all: bool = False
94
+ # When set, only ingest files from these buckets (IBM COS specific).
95
+ bucket_filter: Optional[List[str]] = None
91
96
 
92
97
 
93
98
  async def list_connectors(
@@ -96,8 +101,8 @@ async def list_connectors(
96
101
  ):
97
102
  """List available connector types with metadata"""
98
103
  try:
99
- connector_types = (
100
- connector_service.connection_manager.get_available_connector_types()
104
+ connector_types = connector_service.connection_manager.get_available_connector_types(
105
+ user_id=user.user_id
101
106
  )
102
107
  return JSONResponse({"connectors": connector_types})
103
108
  except Exception as e:
@@ -200,6 +205,51 @@ async def connector_sync(
200
205
  jwt_token=jwt_token,
201
206
  file_infos=file_infos,
202
207
  )
208
+ elif body.sync_all or body.bucket_filter:
209
+ # Full ingest: discover and ingest all files (or files from specific buckets).
210
+ # Used by direct-sync providers (IBM COS) on initial ingest or per-bucket sync.
211
+ logger.info(
212
+ "Full connector ingest requested",
213
+ connector_type=connector_type,
214
+ bucket_filter=body.bucket_filter,
215
+ )
216
+ connector = await connector_service.get_connector(working_connection.connection_id)
217
+ if body.bucket_filter:
218
+ # List only files from the requested buckets, then sync_specific_files
219
+ original_buckets = connector.bucket_names
220
+ connector.bucket_names = body.bucket_filter
221
+ try:
222
+ all_file_ids = []
223
+ page_token = None
224
+ while True:
225
+ result = await connector.list_files(page_token=page_token)
226
+ for f in result.get("files", []):
227
+ all_file_ids.append(f["id"])
228
+ page_token = result.get("next_page_token")
229
+ if not page_token:
230
+ break
231
+ finally:
232
+ connector.bucket_names = original_buckets
233
+
234
+ if not all_file_ids:
235
+ return JSONResponse(
236
+ {"status": "no_files", "message": "No files found in the selected buckets."},
237
+ status_code=200,
238
+ )
239
+ task_id = await connector_service.sync_specific_files(
240
+ working_connection.connection_id,
241
+ user.user_id,
242
+ all_file_ids,
243
+ jwt_token=jwt_token,
244
+ )
245
+ else:
246
+ # sync_all: ingest everything the connector can see
247
+ task_id = await connector_service.sync_connector_files(
248
+ working_connection.connection_id,
249
+ user.user_id,
250
+ max_files=max_files,
251
+ jwt_token=jwt_token,
252
+ )
203
253
  else:
204
254
  # No files specified - sync only files already in OpenSearch for this connector
205
255
  # This ensures deleted files stay deleted
@@ -209,7 +259,7 @@ async def connector_sync(
209
259
  session_manager=session_manager,
210
260
  jwt_token=jwt_token,
211
261
  )
212
-
262
+
213
263
  if not existing_file_ids and not existing_filenames:
214
264
  return JSONResponse(
215
265
  {
@@ -218,7 +268,7 @@ async def connector_sync(
218
268
  },
219
269
  status_code=200,
220
270
  )
221
-
271
+
222
272
  # If we have document_ids (connector file IDs), use sync_specific_files
223
273
  # Otherwise, use filename filtering with sync_connector_files
224
274
  if existing_file_ids:
@@ -602,6 +652,8 @@ async def connector_disconnect(
602
652
  )
603
653
 
604
654
 
655
+ # ---------------------------------------------------------------------------
656
+
605
657
  async def sync_all_connectors(
606
658
  connector_service=Depends(get_connector_service),
607
659
  session_manager=Depends(get_session_manager),
@@ -615,7 +667,7 @@ async def sync_all_connectors(
615
667
  jwt_token = user.jwt_token
616
668
 
617
669
  # Cloud connector types to sync
618
- cloud_connector_types = ["google_drive", "onedrive", "sharepoint"]
670
+ cloud_connector_types = ["google_drive", "onedrive", "sharepoint", "ibm_cos", "aws_s3"]
619
671
 
620
672
  all_task_ids = []
621
673
  synced_connectors = []
@@ -51,6 +51,8 @@ GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")
51
51
  GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")
52
52
  DOCLING_OCR_ENGINE = os.getenv("DOCLING_OCR_ENGINE")
53
53
 
54
+ IBM_AUTH_ENABLED = os.getenv("IBM_AUTH_ENABLED", "false").lower() in ("true", "1", "yes")
55
+
54
56
  # Ingestion configuration
55
57
  DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
56
58
  "DISABLE_INGEST_WITH_LANGFLOW", "false"
@@ -2,10 +2,14 @@ from .base import BaseConnector
2
2
  from .google_drive import GoogleDriveConnector
3
3
  from .sharepoint import SharePointConnector
4
4
  from .onedrive import OneDriveConnector
5
+ from .ibm_cos import IBMCOSConnector
6
+ from .aws_s3 import S3Connector
5
7
 
6
8
  __all__ = [
7
9
  "BaseConnector",
8
10
  "GoogleDriveConnector",
9
11
  "SharePointConnector",
10
12
  "OneDriveConnector",
13
+ "IBMCOSConnector",
14
+ "S3Connector",
11
15
  ]
@@ -0,0 +1,19 @@
1
+ """Amazon S3 / S3-compatible connector for OpenRAG."""
2
+
3
+ from .connector import S3Connector
4
+ from .models import S3ConfigureBody
5
+ from .api import (
6
+ s3_defaults,
7
+ s3_configure,
8
+ s3_list_buckets,
9
+ s3_bucket_status,
10
+ )
11
+
12
+ __all__ = [
13
+ "S3Connector",
14
+ "S3ConfigureBody",
15
+ "s3_defaults",
16
+ "s3_configure",
17
+ "s3_list_buckets",
18
+ "s3_bucket_status",
19
+ ]
@@ -0,0 +1,175 @@
1
+ """FastAPI route handlers for AWS S3-specific endpoints."""
2
+
3
+ import os
4
+
5
+ from fastapi import Depends
6
+ from fastapi.responses import JSONResponse
7
+
8
+ from config.settings import get_index_name
9
+ from dependencies import get_connector_service, get_session_manager, get_current_user
10
+ from session_manager import User
11
+ from utils.logging_config import get_logger
12
+
13
+ from .auth import create_s3_resource
14
+ from .models import S3ConfigureBody
15
+ from .support import build_s3_config
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ async def s3_defaults(
21
+ connector_service=Depends(get_connector_service),
22
+ user: User = Depends(get_current_user),
23
+ ):
24
+ """Return current S3 env-var defaults for pre-filling the config dialog.
25
+
26
+ Sensitive values (secret key) are masked — only whether they are set is returned.
27
+ """
28
+ access_key = os.getenv("AWS_ACCESS_KEY_ID", "")
29
+ secret_key = os.getenv("AWS_SECRET_ACCESS_KEY", "")
30
+ endpoint_url = os.getenv("AWS_S3_ENDPOINT", "")
31
+ region = os.getenv("AWS_REGION", "")
32
+
33
+ connections = await connector_service.connection_manager.list_connections(
34
+ user_id=user.user_id, connector_type="aws_s3"
35
+ )
36
+ conn_config = connections[0].config or {} if connections else {}
37
+
38
+ def _pick(conn_key, env_val):
39
+ return conn_config.get(conn_key) or env_val
40
+
41
+ return JSONResponse({
42
+ "access_key_set": bool(access_key or conn_config.get("access_key")),
43
+ "secret_key_set": bool(secret_key or conn_config.get("secret_key")),
44
+ "endpoint": _pick("endpoint_url", endpoint_url),
45
+ "region": _pick("region", region),
46
+ "bucket_names": conn_config.get("bucket_names", []),
47
+ "connection_id": connections[0].connection_id if connections else None,
48
+ })
49
+
50
+
51
+ async def s3_configure(
52
+ body: S3ConfigureBody,
53
+ connector_service=Depends(get_connector_service),
54
+ user: User = Depends(get_current_user),
55
+ ):
56
+ """Create or update an S3 connection with explicit credentials.
57
+
58
+ Tests the credentials by listing buckets, then persists the connection.
59
+ """
60
+ existing_connections = await connector_service.connection_manager.list_connections(
61
+ user_id=user.user_id, connector_type="aws_s3"
62
+ )
63
+ existing_config = existing_connections[0].config if existing_connections else {}
64
+
65
+ conn_config, error = build_s3_config(body, existing_config)
66
+ if error:
67
+ return JSONResponse({"error": error}, status_code=400)
68
+
69
+ # Test credentials
70
+ try:
71
+ s3 = create_s3_resource(conn_config)
72
+ list(s3.buckets.all())
73
+ except Exception:
74
+ logger.exception("Failed to connect to S3 during credential test.")
75
+ return JSONResponse(
76
+ {"error": "Could not connect to S3 with the provided configuration."},
77
+ status_code=400,
78
+ )
79
+
80
+ # Persist: update existing connection or create a new one
81
+ if body.connection_id:
82
+ existing = await connector_service.connection_manager.get_connection(body.connection_id)
83
+ if existing and existing.user_id == user.user_id:
84
+ await connector_service.connection_manager.update_connection(
85
+ connection_id=body.connection_id,
86
+ config=conn_config,
87
+ )
88
+ connector_service.connection_manager.active_connectors.pop(body.connection_id, None)
89
+ return JSONResponse({"connection_id": body.connection_id, "status": "connected"})
90
+
91
+ connection_id = await connector_service.connection_manager.create_connection(
92
+ connector_type="aws_s3",
93
+ name="Amazon S3",
94
+ config=conn_config,
95
+ user_id=user.user_id,
96
+ )
97
+ return JSONResponse({"connection_id": connection_id, "status": "connected"})
98
+
99
+
100
+ async def s3_list_buckets(
101
+ connection_id: str,
102
+ connector_service=Depends(get_connector_service),
103
+ user: User = Depends(get_current_user),
104
+ ):
105
+ """List all buckets accessible with the stored S3 credentials."""
106
+ connection = await connector_service.connection_manager.get_connection(connection_id)
107
+ if not connection or connection.user_id != user.user_id:
108
+ return JSONResponse({"error": "Connection not found"}, status_code=404)
109
+ if connection.connector_type != "aws_s3":
110
+ return JSONResponse({"error": "Not an S3 connection"}, status_code=400)
111
+
112
+ try:
113
+ s3 = create_s3_resource(connection.config)
114
+ buckets = [b.name for b in s3.buckets.all()]
115
+ return JSONResponse({"buckets": buckets})
116
+ except Exception:
117
+ logger.exception("Failed to list S3 buckets for connection %s", connection_id)
118
+ return JSONResponse({"error": "Failed to list buckets"}, status_code=500)
119
+
120
+
121
+ async def s3_bucket_status(
122
+ connection_id: str,
123
+ connector_service=Depends(get_connector_service),
124
+ session_manager=Depends(get_session_manager),
125
+ user: User = Depends(get_current_user),
126
+ ):
127
+ """Return all buckets for an S3 connection with their ingestion status."""
128
+ connection = await connector_service.connection_manager.get_connection(connection_id)
129
+ if not connection or connection.user_id != user.user_id:
130
+ return JSONResponse({"error": "Connection not found"}, status_code=404)
131
+ if connection.connector_type != "aws_s3":
132
+ return JSONResponse({"error": "Not an S3 connection"}, status_code=400)
133
+
134
+ # 1. List all buckets from S3
135
+ try:
136
+ s3 = create_s3_resource(connection.config)
137
+ all_buckets = [b.name for b in s3.buckets.all()]
138
+ except Exception as exc:
139
+ logger.exception("Failed to list buckets from S3 for connection %s", connection_id)
140
+ return JSONResponse({"error": "Failed to list buckets"}, status_code=500)
141
+
142
+ # 2. Count indexed documents per bucket from OpenSearch
143
+ ingested_counts: dict = {}
144
+ try:
145
+ opensearch_client = session_manager.get_user_opensearch_client(
146
+ user.user_id, user.jwt_token
147
+ )
148
+ query_body = {
149
+ "size": 0,
150
+ "query": {"term": {"connector_type": "aws_s3"}},
151
+ "aggs": {
152
+ "doc_ids": {
153
+ "terms": {"field": "document_id", "size": 50000}
154
+ }
155
+ },
156
+ }
157
+ index_name = get_index_name()
158
+ os_resp = opensearch_client.search(index=index_name, body=query_body)
159
+ for bucket_entry in os_resp.get("aggregations", {}).get("doc_ids", {}).get("buckets", []):
160
+ doc_id = bucket_entry["key"]
161
+ if "::" in doc_id:
162
+ bucket_name = doc_id.split("::")[0]
163
+ ingested_counts[bucket_name] = ingested_counts.get(bucket_name, 0) + 1
164
+ except Exception:
165
+ pass # OpenSearch unavailable — show zero counts
166
+
167
+ result = [
168
+ {
169
+ "name": bucket,
170
+ "ingested_count": ingested_counts.get(bucket, 0),
171
+ "is_synced": ingested_counts.get(bucket, 0) > 0,
172
+ }
173
+ for bucket in all_buckets
174
+ ]
175
+ return JSONResponse({"buckets": result})
@@ -0,0 +1,90 @@
1
+ """Amazon S3 / S3-compatible storage authentication and client factory."""
2
+
3
+ import os
4
+ from typing import Any, Dict, Optional
5
+
6
+ from utils.logging_config import get_logger
7
+
8
+ logger = get_logger(__name__)
9
+
10
+ _DEFAULT_REGION = "us-east-1"
11
+
12
+
13
+ def _resolve_credentials(config: Dict[str, Any]) -> Dict[str, Any]:
14
+ """Resolve S3 credentials from config dict with environment variable fallback.
15
+
16
+ Resolution order for each value: config dict → environment variable → default.
17
+
18
+ Raises:
19
+ ValueError: If access_key or secret_key cannot be resolved.
20
+ """
21
+ access_key: Optional[str] = config.get("access_key") or os.getenv("AWS_ACCESS_KEY_ID")
22
+ secret_key: Optional[str] = config.get("secret_key") or os.getenv("AWS_SECRET_ACCESS_KEY")
23
+
24
+ if not access_key or not secret_key:
25
+ raise ValueError(
26
+ "S3 credentials are required. Provide 'access_key' and 'secret_key' in the "
27
+ "connector config, or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env vars."
28
+ )
29
+
30
+ # endpoint_url is optional — only inject when non-empty (real AWS users don't set it)
31
+ endpoint_url: Optional[str] = config.get("endpoint_url") or os.getenv("AWS_S3_ENDPOINT") or None
32
+
33
+ region: str = config.get("region") or os.getenv("AWS_REGION") or _DEFAULT_REGION
34
+
35
+ return {
36
+ "access_key": access_key,
37
+ "secret_key": secret_key,
38
+ "endpoint_url": endpoint_url,
39
+ "region": region,
40
+ }
41
+
42
+
43
+ def _build_boto3_kwargs(creds: Dict[str, Any]) -> Dict[str, Any]:
44
+ """Build the keyword arguments for boto3.resource / boto3.client."""
45
+ kwargs: Dict[str, Any] = {
46
+ "aws_access_key_id": creds["access_key"],
47
+ "aws_secret_access_key": creds["secret_key"],
48
+ "region_name": creds["region"],
49
+ }
50
+ if creds["endpoint_url"]:
51
+ kwargs["endpoint_url"] = creds["endpoint_url"]
52
+ return kwargs
53
+
54
+
55
+ def create_s3_resource(config: Dict[str, Any]):
56
+ """Return a boto3 S3 resource (high-level API) for bucket/object access.
57
+
58
+ Works with AWS S3, MinIO, Cloudflare R2, and any S3-compatible service.
59
+ """
60
+ try:
61
+ import boto3
62
+ except ImportError as exc:
63
+ raise ImportError(
64
+ "boto3 is required for the S3 connector. "
65
+ "Install it with: pip install boto3"
66
+ ) from exc
67
+
68
+ creds = _resolve_credentials(config)
69
+ kwargs = _build_boto3_kwargs(creds)
70
+ logger.debug("Creating S3 resource with HMAC authentication (boto3)")
71
+ return boto3.resource("s3", **kwargs)
72
+
73
+
74
+ def create_s3_client(config: Dict[str, Any]):
75
+ """Return a boto3 S3 low-level client.
76
+
77
+ Used for operations such as list_buckets() and get_object_acl().
78
+ """
79
+ try:
80
+ import boto3
81
+ except ImportError as exc:
82
+ raise ImportError(
83
+ "boto3 is required for the S3 connector. "
84
+ "Install it with: pip install boto3"
85
+ ) from exc
86
+
87
+ creds = _resolve_credentials(config)
88
+ kwargs = _build_boto3_kwargs(creds)
89
+ logger.debug("Creating S3 client with HMAC authentication (boto3)")
90
+ return boto3.client("s3", **kwargs)