openrag 0.3.2.dev31__tar.gz → 0.3.2.dev41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openrag-0.3.2.dev31/src/openrag.egg-info → openrag-0.3.2.dev41}/PKG-INFO +2 -1
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/pyproject.toml +2 -1
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/connectors.py +57 -5
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/config/settings.py +2 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/__init__.py +4 -0
- openrag-0.3.2.dev41/src/connectors/aws_s3/__init__.py +19 -0
- openrag-0.3.2.dev41/src/connectors/aws_s3/api.py +175 -0
- openrag-0.3.2.dev41/src/connectors/aws_s3/auth.py +90 -0
- openrag-0.3.2.dev41/src/connectors/aws_s3/connector.py +277 -0
- openrag-0.3.2.dev41/src/connectors/aws_s3/models.py +13 -0
- openrag-0.3.2.dev41/src/connectors/aws_s3/support.py +51 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/connection_manager.py +57 -9
- openrag-0.3.2.dev41/src/connectors/ibm_cos/__init__.py +17 -0
- openrag-0.3.2.dev41/src/connectors/ibm_cos/api.py +202 -0
- openrag-0.3.2.dev41/src/connectors/ibm_cos/auth.py +183 -0
- openrag-0.3.2.dev41/src/connectors/ibm_cos/connector.py +375 -0
- openrag-0.3.2.dev41/src/connectors/ibm_cos/models.py +20 -0
- openrag-0.3.2.dev41/src/connectors/ibm_cos/support.py +68 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/langflow_connector_service.py +30 -29
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/main.py +29 -39
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41/src/openrag.egg-info}/PKG-INFO +2 -1
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/SOURCES.txt +12 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/requires.txt +1 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/auth_service.py +69 -4
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/docker-compose.yml +8 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/config_fields.py +41 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/managers/env_manager.py +26 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/file_utils.py +18 -4
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/LICENSE +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/MANIFEST.in +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/README.md +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/setup.cfg +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/agent.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/auth.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/chat.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/connector_router.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/docling.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/documents.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/flows.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/keys.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/knowledge_filter.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/langflow_files.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/models.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/nudges.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/oidc.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/provider_health.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/provider_validation.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/router.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/search.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/settings.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/tasks.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/upload.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/chat.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/documents.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/knowledge_filters.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/models.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/search.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/api/v1/settings.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/auth_context.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/config/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/config/config_manager.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/config/model_constants.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/base.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/google_drive/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/google_drive/connector.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/google_drive/oauth.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/onedrive/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/onedrive/connector.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/onedrive/oauth.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/sharepoint/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/sharepoint/connector.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/sharepoint/oauth.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/connectors/sharepoint/utils.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/dependencies.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/models/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/models/processors.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/models/tasks.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/models/url.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/dependency_links.txt +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/entry_points.txt +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/openrag.egg-info/top_level.txt +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/api_key_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/chat_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/conversation_persistence_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/document_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/flows_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/knowledge_filter_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/langflow_file_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/langflow_history_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/langflow_mcp_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/models_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/monitor_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/search_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/session_ownership_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/services/task_service.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/session_manager.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/docker-compose.gpu.yml +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/ollama_embedding.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/ollama_llm.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/ollama_llm_text.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/watsonx_embedding.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/watsonx_llm.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/components/watsonx_llm_text.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/ingestion_flow.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/openrag_agent.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/openrag_nudges.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/flows/openrag_url_mcp.json +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/openrag-documents/docling.pdf +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/openrag-documents/ibm_anthropic.pdf +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/openrag-documents/openrag-documentation.pdf +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/_assets/openrag-documents/warmup_ocr.pdf +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/cli.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/main.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/managers/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/managers/container_manager.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/managers/docling_manager.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/config.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/diagnostics.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/logs.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/monitor.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/screens/welcome.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/clipboard.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/platform.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/startup_checks.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/validation.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/utils/version_check.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/command_modal.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/diagnostics_notification.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/error_notification.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/factory_reset_warning_modal.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/flow_backup_warning_modal.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/prune_options_modal.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/upgrade_instructions_modal.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/version_mismatch_warning_modal.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/tui/widgets/waves.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/acl_utils.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/container_utils.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/docling_client.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/document_processing.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/embedding_fields.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/embeddings.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/env_utils.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/gpu_detection.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/hash_utils.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/langflow_headers.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/langflow_utils.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/logging_config.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/opensearch_queries.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/opensearch_utils.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/paths.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/telemetry/__init__.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/telemetry/category.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/telemetry/client.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/telemetry/message_id.py +0 -0
- {openrag-0.3.2.dev31 → openrag-0.3.2.dev41}/src/utils/version_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openrag
|
|
3
|
-
Version: 0.3.2.
|
|
3
|
+
Version: 0.3.2.dev41
|
|
4
4
|
Summary: OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations.
|
|
5
5
|
Classifier: Development Status :: 4 - Beta
|
|
6
6
|
Classifier: Environment :: Console
|
|
@@ -29,6 +29,7 @@ Requires-Dist: python-multipart>=0.0.20
|
|
|
29
29
|
Requires-Dist: fastapi>=0.115.0
|
|
30
30
|
Requires-Dist: uvicorn>=0.35.0
|
|
31
31
|
Requires-Dist: boto3>=1.35.0
|
|
32
|
+
Requires-Dist: ibm-cos-sdk>=2.13.0
|
|
32
33
|
Requires-Dist: psutil>=7.0.0
|
|
33
34
|
Requires-Dist: rich>=13.0.0
|
|
34
35
|
Requires-Dist: textual>=0.45.0
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "openrag"
|
|
7
|
-
version = "0.3.2.
|
|
7
|
+
version = "0.3.2.dev41"
|
|
8
8
|
description = "OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.13"
|
|
@@ -35,6 +35,7 @@ dependencies = [
|
|
|
35
35
|
"fastapi>=0.115.0",
|
|
36
36
|
"uvicorn>=0.35.0",
|
|
37
37
|
"boto3>=1.35.0",
|
|
38
|
+
"ibm-cos-sdk>=2.13.0",
|
|
38
39
|
"psutil>=7.0.0",
|
|
39
40
|
"rich>=13.0.0",
|
|
40
41
|
"textual>=0.45.0",
|
|
@@ -88,6 +88,11 @@ async def get_synced_file_ids_for_connector(
|
|
|
88
88
|
class ConnectorSyncBody(BaseModel):
|
|
89
89
|
max_files: Optional[int] = None
|
|
90
90
|
selected_files: Optional[List[Any]] = None
|
|
91
|
+
# When True, ingest ALL files from the connector (bypasses the existing-files gate).
|
|
92
|
+
# Used by direct-sync providers like IBM COS on initial ingest.
|
|
93
|
+
sync_all: bool = False
|
|
94
|
+
# When set, only ingest files from these buckets (IBM COS specific).
|
|
95
|
+
bucket_filter: Optional[List[str]] = None
|
|
91
96
|
|
|
92
97
|
|
|
93
98
|
async def list_connectors(
|
|
@@ -96,8 +101,8 @@ async def list_connectors(
|
|
|
96
101
|
):
|
|
97
102
|
"""List available connector types with metadata"""
|
|
98
103
|
try:
|
|
99
|
-
connector_types = (
|
|
100
|
-
|
|
104
|
+
connector_types = connector_service.connection_manager.get_available_connector_types(
|
|
105
|
+
user_id=user.user_id
|
|
101
106
|
)
|
|
102
107
|
return JSONResponse({"connectors": connector_types})
|
|
103
108
|
except Exception as e:
|
|
@@ -200,6 +205,51 @@ async def connector_sync(
|
|
|
200
205
|
jwt_token=jwt_token,
|
|
201
206
|
file_infos=file_infos,
|
|
202
207
|
)
|
|
208
|
+
elif body.sync_all or body.bucket_filter:
|
|
209
|
+
# Full ingest: discover and ingest all files (or files from specific buckets).
|
|
210
|
+
# Used by direct-sync providers (IBM COS) on initial ingest or per-bucket sync.
|
|
211
|
+
logger.info(
|
|
212
|
+
"Full connector ingest requested",
|
|
213
|
+
connector_type=connector_type,
|
|
214
|
+
bucket_filter=body.bucket_filter,
|
|
215
|
+
)
|
|
216
|
+
connector = await connector_service.get_connector(working_connection.connection_id)
|
|
217
|
+
if body.bucket_filter:
|
|
218
|
+
# List only files from the requested buckets, then sync_specific_files
|
|
219
|
+
original_buckets = connector.bucket_names
|
|
220
|
+
connector.bucket_names = body.bucket_filter
|
|
221
|
+
try:
|
|
222
|
+
all_file_ids = []
|
|
223
|
+
page_token = None
|
|
224
|
+
while True:
|
|
225
|
+
result = await connector.list_files(page_token=page_token)
|
|
226
|
+
for f in result.get("files", []):
|
|
227
|
+
all_file_ids.append(f["id"])
|
|
228
|
+
page_token = result.get("next_page_token")
|
|
229
|
+
if not page_token:
|
|
230
|
+
break
|
|
231
|
+
finally:
|
|
232
|
+
connector.bucket_names = original_buckets
|
|
233
|
+
|
|
234
|
+
if not all_file_ids:
|
|
235
|
+
return JSONResponse(
|
|
236
|
+
{"status": "no_files", "message": "No files found in the selected buckets."},
|
|
237
|
+
status_code=200,
|
|
238
|
+
)
|
|
239
|
+
task_id = await connector_service.sync_specific_files(
|
|
240
|
+
working_connection.connection_id,
|
|
241
|
+
user.user_id,
|
|
242
|
+
all_file_ids,
|
|
243
|
+
jwt_token=jwt_token,
|
|
244
|
+
)
|
|
245
|
+
else:
|
|
246
|
+
# sync_all: ingest everything the connector can see
|
|
247
|
+
task_id = await connector_service.sync_connector_files(
|
|
248
|
+
working_connection.connection_id,
|
|
249
|
+
user.user_id,
|
|
250
|
+
max_files=max_files,
|
|
251
|
+
jwt_token=jwt_token,
|
|
252
|
+
)
|
|
203
253
|
else:
|
|
204
254
|
# No files specified - sync only files already in OpenSearch for this connector
|
|
205
255
|
# This ensures deleted files stay deleted
|
|
@@ -209,7 +259,7 @@ async def connector_sync(
|
|
|
209
259
|
session_manager=session_manager,
|
|
210
260
|
jwt_token=jwt_token,
|
|
211
261
|
)
|
|
212
|
-
|
|
262
|
+
|
|
213
263
|
if not existing_file_ids and not existing_filenames:
|
|
214
264
|
return JSONResponse(
|
|
215
265
|
{
|
|
@@ -218,7 +268,7 @@ async def connector_sync(
|
|
|
218
268
|
},
|
|
219
269
|
status_code=200,
|
|
220
270
|
)
|
|
221
|
-
|
|
271
|
+
|
|
222
272
|
# If we have document_ids (connector file IDs), use sync_specific_files
|
|
223
273
|
# Otherwise, use filename filtering with sync_connector_files
|
|
224
274
|
if existing_file_ids:
|
|
@@ -602,6 +652,8 @@ async def connector_disconnect(
|
|
|
602
652
|
)
|
|
603
653
|
|
|
604
654
|
|
|
655
|
+
# ---------------------------------------------------------------------------
|
|
656
|
+
|
|
605
657
|
async def sync_all_connectors(
|
|
606
658
|
connector_service=Depends(get_connector_service),
|
|
607
659
|
session_manager=Depends(get_session_manager),
|
|
@@ -615,7 +667,7 @@ async def sync_all_connectors(
|
|
|
615
667
|
jwt_token = user.jwt_token
|
|
616
668
|
|
|
617
669
|
# Cloud connector types to sync
|
|
618
|
-
cloud_connector_types = ["google_drive", "onedrive", "sharepoint"]
|
|
670
|
+
cloud_connector_types = ["google_drive", "onedrive", "sharepoint", "ibm_cos", "aws_s3"]
|
|
619
671
|
|
|
620
672
|
all_task_ids = []
|
|
621
673
|
synced_connectors = []
|
|
@@ -51,6 +51,8 @@ GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")
|
|
|
51
51
|
GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")
|
|
52
52
|
DOCLING_OCR_ENGINE = os.getenv("DOCLING_OCR_ENGINE")
|
|
53
53
|
|
|
54
|
+
IBM_AUTH_ENABLED = os.getenv("IBM_AUTH_ENABLED", "false").lower() in ("true", "1", "yes")
|
|
55
|
+
|
|
54
56
|
# Ingestion configuration
|
|
55
57
|
DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
|
|
56
58
|
"DISABLE_INGEST_WITH_LANGFLOW", "false"
|
|
@@ -2,10 +2,14 @@ from .base import BaseConnector
|
|
|
2
2
|
from .google_drive import GoogleDriveConnector
|
|
3
3
|
from .sharepoint import SharePointConnector
|
|
4
4
|
from .onedrive import OneDriveConnector
|
|
5
|
+
from .ibm_cos import IBMCOSConnector
|
|
6
|
+
from .aws_s3 import S3Connector
|
|
5
7
|
|
|
6
8
|
__all__ = [
|
|
7
9
|
"BaseConnector",
|
|
8
10
|
"GoogleDriveConnector",
|
|
9
11
|
"SharePointConnector",
|
|
10
12
|
"OneDriveConnector",
|
|
13
|
+
"IBMCOSConnector",
|
|
14
|
+
"S3Connector",
|
|
11
15
|
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Amazon S3 / S3-compatible connector for OpenRAG."""
|
|
2
|
+
|
|
3
|
+
from .connector import S3Connector
|
|
4
|
+
from .models import S3ConfigureBody
|
|
5
|
+
from .api import (
|
|
6
|
+
s3_defaults,
|
|
7
|
+
s3_configure,
|
|
8
|
+
s3_list_buckets,
|
|
9
|
+
s3_bucket_status,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"S3Connector",
|
|
14
|
+
"S3ConfigureBody",
|
|
15
|
+
"s3_defaults",
|
|
16
|
+
"s3_configure",
|
|
17
|
+
"s3_list_buckets",
|
|
18
|
+
"s3_bucket_status",
|
|
19
|
+
]
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""FastAPI route handlers for AWS S3-specific endpoints."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from fastapi import Depends
|
|
6
|
+
from fastapi.responses import JSONResponse
|
|
7
|
+
|
|
8
|
+
from config.settings import get_index_name
|
|
9
|
+
from dependencies import get_connector_service, get_session_manager, get_current_user
|
|
10
|
+
from session_manager import User
|
|
11
|
+
from utils.logging_config import get_logger
|
|
12
|
+
|
|
13
|
+
from .auth import create_s3_resource
|
|
14
|
+
from .models import S3ConfigureBody
|
|
15
|
+
from .support import build_s3_config
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
async def s3_defaults(
|
|
21
|
+
connector_service=Depends(get_connector_service),
|
|
22
|
+
user: User = Depends(get_current_user),
|
|
23
|
+
):
|
|
24
|
+
"""Return current S3 env-var defaults for pre-filling the config dialog.
|
|
25
|
+
|
|
26
|
+
Sensitive values (secret key) are masked — only whether they are set is returned.
|
|
27
|
+
"""
|
|
28
|
+
access_key = os.getenv("AWS_ACCESS_KEY_ID", "")
|
|
29
|
+
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY", "")
|
|
30
|
+
endpoint_url = os.getenv("AWS_S3_ENDPOINT", "")
|
|
31
|
+
region = os.getenv("AWS_REGION", "")
|
|
32
|
+
|
|
33
|
+
connections = await connector_service.connection_manager.list_connections(
|
|
34
|
+
user_id=user.user_id, connector_type="aws_s3"
|
|
35
|
+
)
|
|
36
|
+
conn_config = connections[0].config or {} if connections else {}
|
|
37
|
+
|
|
38
|
+
def _pick(conn_key, env_val):
|
|
39
|
+
return conn_config.get(conn_key) or env_val
|
|
40
|
+
|
|
41
|
+
return JSONResponse({
|
|
42
|
+
"access_key_set": bool(access_key or conn_config.get("access_key")),
|
|
43
|
+
"secret_key_set": bool(secret_key or conn_config.get("secret_key")),
|
|
44
|
+
"endpoint": _pick("endpoint_url", endpoint_url),
|
|
45
|
+
"region": _pick("region", region),
|
|
46
|
+
"bucket_names": conn_config.get("bucket_names", []),
|
|
47
|
+
"connection_id": connections[0].connection_id if connections else None,
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
async def s3_configure(
|
|
52
|
+
body: S3ConfigureBody,
|
|
53
|
+
connector_service=Depends(get_connector_service),
|
|
54
|
+
user: User = Depends(get_current_user),
|
|
55
|
+
):
|
|
56
|
+
"""Create or update an S3 connection with explicit credentials.
|
|
57
|
+
|
|
58
|
+
Tests the credentials by listing buckets, then persists the connection.
|
|
59
|
+
"""
|
|
60
|
+
existing_connections = await connector_service.connection_manager.list_connections(
|
|
61
|
+
user_id=user.user_id, connector_type="aws_s3"
|
|
62
|
+
)
|
|
63
|
+
existing_config = existing_connections[0].config if existing_connections else {}
|
|
64
|
+
|
|
65
|
+
conn_config, error = build_s3_config(body, existing_config)
|
|
66
|
+
if error:
|
|
67
|
+
return JSONResponse({"error": error}, status_code=400)
|
|
68
|
+
|
|
69
|
+
# Test credentials
|
|
70
|
+
try:
|
|
71
|
+
s3 = create_s3_resource(conn_config)
|
|
72
|
+
list(s3.buckets.all())
|
|
73
|
+
except Exception:
|
|
74
|
+
logger.exception("Failed to connect to S3 during credential test.")
|
|
75
|
+
return JSONResponse(
|
|
76
|
+
{"error": "Could not connect to S3 with the provided configuration."},
|
|
77
|
+
status_code=400,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Persist: update existing connection or create a new one
|
|
81
|
+
if body.connection_id:
|
|
82
|
+
existing = await connector_service.connection_manager.get_connection(body.connection_id)
|
|
83
|
+
if existing and existing.user_id == user.user_id:
|
|
84
|
+
await connector_service.connection_manager.update_connection(
|
|
85
|
+
connection_id=body.connection_id,
|
|
86
|
+
config=conn_config,
|
|
87
|
+
)
|
|
88
|
+
connector_service.connection_manager.active_connectors.pop(body.connection_id, None)
|
|
89
|
+
return JSONResponse({"connection_id": body.connection_id, "status": "connected"})
|
|
90
|
+
|
|
91
|
+
connection_id = await connector_service.connection_manager.create_connection(
|
|
92
|
+
connector_type="aws_s3",
|
|
93
|
+
name="Amazon S3",
|
|
94
|
+
config=conn_config,
|
|
95
|
+
user_id=user.user_id,
|
|
96
|
+
)
|
|
97
|
+
return JSONResponse({"connection_id": connection_id, "status": "connected"})
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def s3_list_buckets(
|
|
101
|
+
connection_id: str,
|
|
102
|
+
connector_service=Depends(get_connector_service),
|
|
103
|
+
user: User = Depends(get_current_user),
|
|
104
|
+
):
|
|
105
|
+
"""List all buckets accessible with the stored S3 credentials."""
|
|
106
|
+
connection = await connector_service.connection_manager.get_connection(connection_id)
|
|
107
|
+
if not connection or connection.user_id != user.user_id:
|
|
108
|
+
return JSONResponse({"error": "Connection not found"}, status_code=404)
|
|
109
|
+
if connection.connector_type != "aws_s3":
|
|
110
|
+
return JSONResponse({"error": "Not an S3 connection"}, status_code=400)
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
s3 = create_s3_resource(connection.config)
|
|
114
|
+
buckets = [b.name for b in s3.buckets.all()]
|
|
115
|
+
return JSONResponse({"buckets": buckets})
|
|
116
|
+
except Exception:
|
|
117
|
+
logger.exception("Failed to list S3 buckets for connection %s", connection_id)
|
|
118
|
+
return JSONResponse({"error": "Failed to list buckets"}, status_code=500)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
async def s3_bucket_status(
|
|
122
|
+
connection_id: str,
|
|
123
|
+
connector_service=Depends(get_connector_service),
|
|
124
|
+
session_manager=Depends(get_session_manager),
|
|
125
|
+
user: User = Depends(get_current_user),
|
|
126
|
+
):
|
|
127
|
+
"""Return all buckets for an S3 connection with their ingestion status."""
|
|
128
|
+
connection = await connector_service.connection_manager.get_connection(connection_id)
|
|
129
|
+
if not connection or connection.user_id != user.user_id:
|
|
130
|
+
return JSONResponse({"error": "Connection not found"}, status_code=404)
|
|
131
|
+
if connection.connector_type != "aws_s3":
|
|
132
|
+
return JSONResponse({"error": "Not an S3 connection"}, status_code=400)
|
|
133
|
+
|
|
134
|
+
# 1. List all buckets from S3
|
|
135
|
+
try:
|
|
136
|
+
s3 = create_s3_resource(connection.config)
|
|
137
|
+
all_buckets = [b.name for b in s3.buckets.all()]
|
|
138
|
+
except Exception as exc:
|
|
139
|
+
logger.exception("Failed to list buckets from S3 for connection %s", connection_id)
|
|
140
|
+
return JSONResponse({"error": "Failed to list buckets"}, status_code=500)
|
|
141
|
+
|
|
142
|
+
# 2. Count indexed documents per bucket from OpenSearch
|
|
143
|
+
ingested_counts: dict = {}
|
|
144
|
+
try:
|
|
145
|
+
opensearch_client = session_manager.get_user_opensearch_client(
|
|
146
|
+
user.user_id, user.jwt_token
|
|
147
|
+
)
|
|
148
|
+
query_body = {
|
|
149
|
+
"size": 0,
|
|
150
|
+
"query": {"term": {"connector_type": "aws_s3"}},
|
|
151
|
+
"aggs": {
|
|
152
|
+
"doc_ids": {
|
|
153
|
+
"terms": {"field": "document_id", "size": 50000}
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
}
|
|
157
|
+
index_name = get_index_name()
|
|
158
|
+
os_resp = opensearch_client.search(index=index_name, body=query_body)
|
|
159
|
+
for bucket_entry in os_resp.get("aggregations", {}).get("doc_ids", {}).get("buckets", []):
|
|
160
|
+
doc_id = bucket_entry["key"]
|
|
161
|
+
if "::" in doc_id:
|
|
162
|
+
bucket_name = doc_id.split("::")[0]
|
|
163
|
+
ingested_counts[bucket_name] = ingested_counts.get(bucket_name, 0) + 1
|
|
164
|
+
except Exception:
|
|
165
|
+
pass # OpenSearch unavailable — show zero counts
|
|
166
|
+
|
|
167
|
+
result = [
|
|
168
|
+
{
|
|
169
|
+
"name": bucket,
|
|
170
|
+
"ingested_count": ingested_counts.get(bucket, 0),
|
|
171
|
+
"is_synced": ingested_counts.get(bucket, 0) > 0,
|
|
172
|
+
}
|
|
173
|
+
for bucket in all_buckets
|
|
174
|
+
]
|
|
175
|
+
return JSONResponse({"buckets": result})
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Amazon S3 / S3-compatible storage authentication and client factory."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
from utils.logging_config import get_logger
|
|
7
|
+
|
|
8
|
+
logger = get_logger(__name__)
|
|
9
|
+
|
|
10
|
+
_DEFAULT_REGION = "us-east-1"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _resolve_credentials(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
14
|
+
"""Resolve S3 credentials from config dict with environment variable fallback.
|
|
15
|
+
|
|
16
|
+
Resolution order for each value: config dict → environment variable → default.
|
|
17
|
+
|
|
18
|
+
Raises:
|
|
19
|
+
ValueError: If access_key or secret_key cannot be resolved.
|
|
20
|
+
"""
|
|
21
|
+
access_key: Optional[str] = config.get("access_key") or os.getenv("AWS_ACCESS_KEY_ID")
|
|
22
|
+
secret_key: Optional[str] = config.get("secret_key") or os.getenv("AWS_SECRET_ACCESS_KEY")
|
|
23
|
+
|
|
24
|
+
if not access_key or not secret_key:
|
|
25
|
+
raise ValueError(
|
|
26
|
+
"S3 credentials are required. Provide 'access_key' and 'secret_key' in the "
|
|
27
|
+
"connector config, or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env vars."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# endpoint_url is optional — only inject when non-empty (real AWS users don't set it)
|
|
31
|
+
endpoint_url: Optional[str] = config.get("endpoint_url") or os.getenv("AWS_S3_ENDPOINT") or None
|
|
32
|
+
|
|
33
|
+
region: str = config.get("region") or os.getenv("AWS_REGION") or _DEFAULT_REGION
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
"access_key": access_key,
|
|
37
|
+
"secret_key": secret_key,
|
|
38
|
+
"endpoint_url": endpoint_url,
|
|
39
|
+
"region": region,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _build_boto3_kwargs(creds: Dict[str, Any]) -> Dict[str, Any]:
|
|
44
|
+
"""Build the keyword arguments for boto3.resource / boto3.client."""
|
|
45
|
+
kwargs: Dict[str, Any] = {
|
|
46
|
+
"aws_access_key_id": creds["access_key"],
|
|
47
|
+
"aws_secret_access_key": creds["secret_key"],
|
|
48
|
+
"region_name": creds["region"],
|
|
49
|
+
}
|
|
50
|
+
if creds["endpoint_url"]:
|
|
51
|
+
kwargs["endpoint_url"] = creds["endpoint_url"]
|
|
52
|
+
return kwargs
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def create_s3_resource(config: Dict[str, Any]):
|
|
56
|
+
"""Return a boto3 S3 resource (high-level API) for bucket/object access.
|
|
57
|
+
|
|
58
|
+
Works with AWS S3, MinIO, Cloudflare R2, and any S3-compatible service.
|
|
59
|
+
"""
|
|
60
|
+
try:
|
|
61
|
+
import boto3
|
|
62
|
+
except ImportError as exc:
|
|
63
|
+
raise ImportError(
|
|
64
|
+
"boto3 is required for the S3 connector. "
|
|
65
|
+
"Install it with: pip install boto3"
|
|
66
|
+
) from exc
|
|
67
|
+
|
|
68
|
+
creds = _resolve_credentials(config)
|
|
69
|
+
kwargs = _build_boto3_kwargs(creds)
|
|
70
|
+
logger.debug("Creating S3 resource with HMAC authentication (boto3)")
|
|
71
|
+
return boto3.resource("s3", **kwargs)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def create_s3_client(config: Dict[str, Any]):
|
|
75
|
+
"""Return a boto3 S3 low-level client.
|
|
76
|
+
|
|
77
|
+
Used for operations such as list_buckets() and get_object_acl().
|
|
78
|
+
"""
|
|
79
|
+
try:
|
|
80
|
+
import boto3
|
|
81
|
+
except ImportError as exc:
|
|
82
|
+
raise ImportError(
|
|
83
|
+
"boto3 is required for the S3 connector. "
|
|
84
|
+
"Install it with: pip install boto3"
|
|
85
|
+
) from exc
|
|
86
|
+
|
|
87
|
+
creds = _resolve_credentials(config)
|
|
88
|
+
kwargs = _build_boto3_kwargs(creds)
|
|
89
|
+
logger.debug("Creating S3 client with HMAC authentication (boto3)")
|
|
90
|
+
return boto3.client("s3", **kwargs)
|