openrag 0.5.0.dev30__tar.gz → 0.5.0.dev32__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openrag-0.5.0.dev30/src/openrag.egg-info → openrag-0.5.0.dev32}/PKG-INFO +1 -1
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/pyproject.toml +1 -1
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/settings.py +4 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/google_drive/connector.py +144 -148
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/langflow_connector_service.py +5 -1
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/service.py +20 -7
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/models/processors.py +15 -7
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32/src/openrag.egg-info}/PKG-INFO +1 -1
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/auth_service.py +1 -3
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/screens/monitor.py +85 -93
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/LICENSE +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/MANIFEST.in +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/README.md +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/setup.cfg +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/agent.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/auth.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/chat.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/connector_router.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/connectors.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/docling.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/documents.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/flows.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/keys.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/knowledge_filter.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/langflow_files.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/models.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/nudges.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/oidc.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/provider_health.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/provider_validation.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/router.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/search.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/tasks.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/upload.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/v1/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/v1/chat.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/v1/documents.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/v1/knowledge_filters.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/v1/models.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/v1/search.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/api/v1/settings.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/auth/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/auth/ibm_auth.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/auth_context.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/bootstrap.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/config/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/config/config_manager.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/config/embedding_constants.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/config/model_constants.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/config/paths.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/config/settings.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/aws_s3/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/aws_s3/api.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/aws_s3/auth.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/aws_s3/connector.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/aws_s3/models.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/aws_s3/support.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/base.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/connection_manager.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/google_drive/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/google_drive/oauth.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/ibm_cos/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/ibm_cos/api.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/ibm_cos/auth.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/ibm_cos/connector.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/ibm_cos/models.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/ibm_cos/support.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/onedrive/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/onedrive/connector.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/onedrive/oauth.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/sharepoint/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/sharepoint/connector.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/sharepoint/oauth.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/connectors/sharepoint/utils.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/dependencies.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/main.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/mcp_http/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/mcp_http/server.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/models/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/models/tasks.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/models/url.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/openrag.egg-info/SOURCES.txt +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/openrag.egg-info/dependency_links.txt +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/openrag.egg-info/entry_points.txt +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/openrag.egg-info/requires.txt +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/openrag.egg-info/top_level.txt +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/api_key_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/chat_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/conversation_persistence_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/document_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/flows_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/knowledge_filter_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/langflow_file_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/langflow_history_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/langflow_mcp_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/models_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/monitor_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/search_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/session_ownership_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/services/task_service.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/session_manager.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/docker-compose.gpu.yml +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/docker-compose.yml +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/components/ollama_embedding.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/components/ollama_llm.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/components/ollama_llm_text.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/components/watsonx_embedding.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/components/watsonx_llm.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/components/watsonx_llm_text.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/ingestion_flow.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/openrag_agent.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/openrag_nudges.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/flows/openrag_url_mcp.json +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/openrag-documents/docling.pdf +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/openrag-documents/ibm_anthropic.pdf +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/_assets/openrag-documents/warmup_ocr.pdf +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/cli.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/config_fields.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/main.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/managers/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/managers/container_manager.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/managers/docling_manager.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/managers/env_manager.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/screens/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/screens/config.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/screens/diagnostics.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/screens/logs.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/screens/welcome.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/utils/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/utils/clipboard.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/utils/platform.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/utils/startup_checks.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/utils/validation.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/utils/version_check.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/command_modal.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/diagnostics_notification.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/error_notification.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/factory_reset_warning_modal.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/flow_backup_warning_modal.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/prune_options_modal.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/upgrade_instructions_modal.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/version_mismatch_warning_modal.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/tui/widgets/waves.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/acl_utils.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/container_utils.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/docling_client.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/document_processing.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/embedding_fields.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/embeddings.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/encryption.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/env_utils.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/file_utils.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/gpu_detection.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/hash_utils.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/langflow_headers.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/langflow_utils.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/logging_config.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/opensearch_queries.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/opensearch_utils.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/paths.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/run_mode_utils.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/telemetry/__init__.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/telemetry/category.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/telemetry/client.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/telemetry/message_id.py +0 -0
- {openrag-0.5.0.dev30 → openrag-0.5.0.dev32}/src/utils/version_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openrag
|
|
3
|
-
Version: 0.5.0.
|
|
3
|
+
Version: 0.5.0.dev32
|
|
4
4
|
Summary: OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations.
|
|
5
5
|
Classifier: Development Status :: 4 - Beta
|
|
6
6
|
Classifier: Environment :: Console
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "openrag"
|
|
7
|
-
version = "0.5.0.
|
|
7
|
+
version = "0.5.0.dev32"
|
|
8
8
|
description = "OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.13"
|
|
@@ -1940,6 +1940,10 @@ async def rollback_onboarding(
|
|
|
1940
1940
|
current_config.knowledge.embedding_model = ""
|
|
1941
1941
|
current_config.onboarding.openrag_docs_ingested_version = None
|
|
1942
1942
|
current_config.onboarding.openrag_docs_remote_signature = None
|
|
1943
|
+
current_config.onboarding.assistant_message = None
|
|
1944
|
+
current_config.onboarding.selected_nudge = None
|
|
1945
|
+
current_config.onboarding.card_steps = None
|
|
1946
|
+
current_config.onboarding.upload_steps = None
|
|
1943
1947
|
|
|
1944
1948
|
embedding_only = body.embedding_only if body else False
|
|
1945
1949
|
|
|
@@ -3,9 +3,10 @@ import io
|
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
5
|
from collections import deque
|
|
6
|
+
from collections.abc import Iterable
|
|
6
7
|
from dataclasses import dataclass
|
|
7
8
|
from pathlib import Path
|
|
8
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
from googleapiclient.errors import HttpError
|
|
11
12
|
from googleapiclient.http import MediaIoBaseDownload
|
|
@@ -17,6 +18,7 @@ from .oauth import GoogleDriveOAuth
|
|
|
17
18
|
|
|
18
19
|
logger = get_logger(__name__)
|
|
19
20
|
|
|
21
|
+
|
|
20
22
|
# -------------------------
|
|
21
23
|
# Config model
|
|
22
24
|
# -------------------------
|
|
@@ -27,26 +29,26 @@ class GoogleDriveConfig:
|
|
|
27
29
|
token_file: str
|
|
28
30
|
|
|
29
31
|
# Selective sync
|
|
30
|
-
file_ids:
|
|
31
|
-
folder_ids:
|
|
32
|
+
file_ids: list[str] | None = None
|
|
33
|
+
folder_ids: list[str] | None = None
|
|
32
34
|
recursive: bool = True
|
|
33
35
|
|
|
34
36
|
# Shared Drives control
|
|
35
|
-
drive_id:
|
|
36
|
-
corpora:
|
|
37
|
+
drive_id: str | None = None # when set, we use corpora='drive'
|
|
38
|
+
corpora: str | None = None # 'user' | 'drive' | 'domain'; auto-picked if None
|
|
37
39
|
|
|
38
40
|
# Optional filtering
|
|
39
|
-
include_mime_types:
|
|
40
|
-
exclude_mime_types:
|
|
41
|
+
include_mime_types: list[str] | None = None
|
|
42
|
+
exclude_mime_types: list[str] | None = None
|
|
41
43
|
|
|
42
44
|
# Export overrides for Google-native types
|
|
43
|
-
export_format_overrides:
|
|
45
|
+
export_format_overrides: dict[str, str] | None = None # mime -> export-mime
|
|
44
46
|
|
|
45
47
|
# Changes API state persistence (store these in your DB/kv if needed)
|
|
46
|
-
changes_page_token:
|
|
48
|
+
changes_page_token: str | None = None
|
|
47
49
|
|
|
48
50
|
# Optional: resource_id for webhook cleanup
|
|
49
|
-
resource_id:
|
|
51
|
+
resource_id: str | None = None
|
|
50
52
|
|
|
51
53
|
|
|
52
54
|
# -------------------------
|
|
@@ -89,7 +91,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
89
91
|
# Otherwise, implement your custom logic here.
|
|
90
92
|
logger.debug(f"Emitting document: {doc.id} ({doc.filename})")
|
|
91
93
|
|
|
92
|
-
def __init__(self, config:
|
|
94
|
+
def __init__(self, config: dict[str, Any]) -> None:
|
|
93
95
|
# Read from config OR env (backend env, not NEXT_PUBLIC_*):
|
|
94
96
|
env_client_id = os.getenv(self.CLIENT_ID_ENV_VAR)
|
|
95
97
|
env_client_secret = os.getenv(self.CLIENT_SECRET_ENV_VAR)
|
|
@@ -99,6 +101,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
99
101
|
|
|
100
102
|
# Token file default - use data directory for persistence
|
|
101
103
|
from config.paths import get_data_file
|
|
104
|
+
|
|
102
105
|
token_file = config.get("token_file") or get_data_file("google_drive_token.json")
|
|
103
106
|
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
|
|
104
107
|
|
|
@@ -114,9 +117,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
114
117
|
)
|
|
115
118
|
|
|
116
119
|
# Normalize incoming IDs from any of the supported alias keys
|
|
117
|
-
def _first_present_list(
|
|
118
|
-
cfg: Dict[str, Any], keys: Iterable[str]
|
|
119
|
-
) -> Optional[List[str]]:
|
|
120
|
+
def _first_present_list(cfg: dict[str, Any], keys: Iterable[str]) -> list[str] | None:
|
|
120
121
|
for k in keys:
|
|
121
122
|
v = cfg.get(k)
|
|
122
123
|
if v: # accept non-empty list
|
|
@@ -153,11 +154,15 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
153
154
|
# Drive client is built in authenticate()
|
|
154
155
|
from google.oauth2.credentials import Credentials
|
|
155
156
|
|
|
156
|
-
self.creds:
|
|
157
|
+
self.creds: Credentials | None = None
|
|
157
158
|
self.service: Any = None
|
|
158
159
|
|
|
159
160
|
# cache of resolved shortcutId -> target file metadata
|
|
160
|
-
self._shortcut_cache:
|
|
161
|
+
self._shortcut_cache: dict[str, dict[str, Any]] = {}
|
|
162
|
+
|
|
163
|
+
import threading
|
|
164
|
+
|
|
165
|
+
self._lock = threading.Lock()
|
|
161
166
|
|
|
162
167
|
# Authentication state
|
|
163
168
|
self._authenticated: bool = False
|
|
@@ -170,20 +175,20 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
170
175
|
self._shortcut_cache.clear()
|
|
171
176
|
|
|
172
177
|
@property
|
|
173
|
-
def _drives_get_flags(self) ->
|
|
178
|
+
def _drives_get_flags(self) -> dict[str, Any]:
|
|
174
179
|
"""
|
|
175
180
|
Flags valid for GET-like calls (files.get, changes.getStartPageToken).
|
|
176
181
|
"""
|
|
177
182
|
return {"supportsAllDrives": True}
|
|
178
183
|
|
|
179
184
|
@property
|
|
180
|
-
def _drives_list_flags(self) ->
|
|
185
|
+
def _drives_list_flags(self) -> dict[str, Any]:
|
|
181
186
|
"""
|
|
182
187
|
Flags valid for LIST-like calls (files.list, changes.list).
|
|
183
188
|
"""
|
|
184
189
|
return {"supportsAllDrives": True, "includeItemsFromAllDrives": True}
|
|
185
190
|
|
|
186
|
-
def _pick_corpora_args(self) ->
|
|
191
|
+
def _pick_corpora_args(self) -> dict[str, Any]:
|
|
187
192
|
"""
|
|
188
193
|
Decide corpora/driveId based on config.
|
|
189
194
|
|
|
@@ -197,7 +202,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
197
202
|
# Default to allDrives so Picker selections from Shared Drives work without explicit drive_id
|
|
198
203
|
return {"corpora": "allDrives"}
|
|
199
204
|
|
|
200
|
-
def _resolve_shortcut(self, file_obj:
|
|
205
|
+
def _resolve_shortcut(self, file_obj: dict[str, Any]) -> dict[str, Any]:
|
|
201
206
|
"""
|
|
202
207
|
If a file is a shortcut, fetch and return the real target metadata.
|
|
203
208
|
"""
|
|
@@ -234,7 +239,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
234
239
|
# shortcut target not accessible
|
|
235
240
|
return file_obj
|
|
236
241
|
|
|
237
|
-
def _list_children(self, folder_id: str) ->
|
|
242
|
+
def _list_children(self, folder_id: str) -> list[dict[str, Any]]:
|
|
238
243
|
"""
|
|
239
244
|
List immediate children of a folder.
|
|
240
245
|
"""
|
|
@@ -245,7 +250,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
245
250
|
|
|
246
251
|
query = f"'{folder_id}' in parents and trashed = false"
|
|
247
252
|
page_token = None
|
|
248
|
-
results:
|
|
253
|
+
results: list[dict[str, Any]] = []
|
|
249
254
|
|
|
250
255
|
while True:
|
|
251
256
|
resp = (
|
|
@@ -272,13 +277,13 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
272
277
|
|
|
273
278
|
return results
|
|
274
279
|
|
|
275
|
-
def _bfs_expand_folders(self, folder_ids: Iterable[str]) ->
|
|
280
|
+
def _bfs_expand_folders(self, folder_ids: Iterable[str]) -> list[dict[str, Any]]:
|
|
276
281
|
"""
|
|
277
282
|
Breadth-first traversal to expand folders to all descendant files (if recursive),
|
|
278
283
|
or just immediate children (if not recursive). Folders themselves are returned
|
|
279
284
|
as items too, but filtered later.
|
|
280
285
|
"""
|
|
281
|
-
out:
|
|
286
|
+
out: list[dict[str, Any]] = []
|
|
282
287
|
queue = deque(folder_ids)
|
|
283
288
|
|
|
284
289
|
while queue:
|
|
@@ -295,7 +300,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
295
300
|
|
|
296
301
|
return out
|
|
297
302
|
|
|
298
|
-
def _get_file_meta_by_id(self, file_id: str) ->
|
|
303
|
+
def _get_file_meta_by_id(self, file_id: str) -> dict[str, Any] | None:
|
|
299
304
|
"""
|
|
300
305
|
Fetch metadata for a file by ID (resolving shortcuts).
|
|
301
306
|
"""
|
|
@@ -304,30 +309,31 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
304
309
|
"Google Drive service is not initialized. Please authenticate first."
|
|
305
310
|
)
|
|
306
311
|
try:
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
312
|
+
with self._lock:
|
|
313
|
+
meta = (
|
|
314
|
+
self.service.files()
|
|
315
|
+
.get(
|
|
316
|
+
fileId=file_id,
|
|
317
|
+
fields=(
|
|
318
|
+
"id, name, mimeType, modifiedTime, createdTime, size, "
|
|
319
|
+
"webViewLink, parents, shortcutDetails, driveId"
|
|
320
|
+
),
|
|
321
|
+
**self._drives_get_flags,
|
|
322
|
+
)
|
|
323
|
+
.execute()
|
|
316
324
|
)
|
|
317
|
-
.execute()
|
|
318
|
-
)
|
|
319
325
|
return self._resolve_shortcut(meta)
|
|
320
326
|
except HttpError:
|
|
321
327
|
return None
|
|
322
328
|
|
|
323
|
-
def _filter_by_mime(self, items: Iterable[
|
|
329
|
+
def _filter_by_mime(self, items: Iterable[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
324
330
|
"""
|
|
325
331
|
Apply include/exclude mime filters if configured.
|
|
326
332
|
"""
|
|
327
333
|
include = set(self.cfg.include_mime_types or [])
|
|
328
334
|
exclude = set(self.cfg.exclude_mime_types or [])
|
|
329
335
|
|
|
330
|
-
def keep(m:
|
|
336
|
+
def keep(m: dict[str, Any]) -> bool:
|
|
331
337
|
mt = m.get("mimeType")
|
|
332
338
|
if exclude and mt in exclude:
|
|
333
339
|
return False
|
|
@@ -337,7 +343,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
337
343
|
|
|
338
344
|
return [m for m in items if keep(m)]
|
|
339
345
|
|
|
340
|
-
def _iter_selected_items(self) ->
|
|
346
|
+
def _iter_selected_items(self) -> list[dict[str, Any]]:
|
|
341
347
|
"""
|
|
342
348
|
Return a de-duplicated list of file metadata for the selected scope:
|
|
343
349
|
- explicit file_ids (automatically expands folders to their contents)
|
|
@@ -351,16 +357,21 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
351
357
|
)
|
|
352
358
|
self._clear_shortcut_cache()
|
|
353
359
|
|
|
354
|
-
seen:
|
|
355
|
-
items:
|
|
356
|
-
folders_to_expand:
|
|
360
|
+
seen: set[str] = set()
|
|
361
|
+
items: list[dict[str, Any]] = []
|
|
362
|
+
folders_to_expand: list[str] = []
|
|
357
363
|
|
|
358
364
|
if self.cfg.file_ids:
|
|
359
|
-
logger.debug(
|
|
365
|
+
logger.debug(
|
|
366
|
+
"[GoogleDrive] _iter_selected_items: processing %d file_id(s)",
|
|
367
|
+
len(self.cfg.file_ids),
|
|
368
|
+
)
|
|
360
369
|
for fid in self.cfg.file_ids:
|
|
361
370
|
meta = self._get_file_meta_by_id(fid)
|
|
362
371
|
if not meta:
|
|
363
|
-
logger.debug(
|
|
372
|
+
logger.debug(
|
|
373
|
+
"[GoogleDrive] _iter_selected_items: no metadata for file_id=%s", fid
|
|
374
|
+
)
|
|
364
375
|
continue
|
|
365
376
|
|
|
366
377
|
if meta.get("mimeType") == "application/vnd.google-apps.folder":
|
|
@@ -378,7 +389,9 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
378
389
|
folders_to_expand.extend(self.cfg.folder_ids)
|
|
379
390
|
|
|
380
391
|
if folders_to_expand:
|
|
381
|
-
logger.debug(
|
|
392
|
+
logger.debug(
|
|
393
|
+
"[GoogleDrive] _iter_selected_items: expanding %d folder(s)", len(folders_to_expand)
|
|
394
|
+
)
|
|
382
395
|
folder_children = self._bfs_expand_folders(folders_to_expand)
|
|
383
396
|
for meta in folder_children:
|
|
384
397
|
meta = self._resolve_shortcut(meta)
|
|
@@ -394,11 +407,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
394
407
|
return []
|
|
395
408
|
|
|
396
409
|
items = self._filter_by_mime(items)
|
|
397
|
-
items = [
|
|
398
|
-
m
|
|
399
|
-
for m in items
|
|
400
|
-
if m.get("mimeType") != "application/vnd.google-apps.folder"
|
|
401
|
-
]
|
|
410
|
+
items = [m for m in items if m.get("mimeType") != "application/vnd.google-apps.folder"]
|
|
402
411
|
|
|
403
412
|
if not items and (self.cfg.file_ids or self.cfg.folder_ids):
|
|
404
413
|
logger.warning(
|
|
@@ -414,7 +423,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
414
423
|
# -------------------------
|
|
415
424
|
# Download logic
|
|
416
425
|
# -------------------------
|
|
417
|
-
def _pick_export_mime(self, source_mime: str) ->
|
|
426
|
+
def _pick_export_mime(self, source_mime: str) -> str | None:
|
|
418
427
|
"""
|
|
419
428
|
Choose export mime for Google-native docs if needed.
|
|
420
429
|
"""
|
|
@@ -437,7 +446,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
437
446
|
# Return None for non-Google-native or unsupported types
|
|
438
447
|
return overrides.get(source_mime)
|
|
439
448
|
|
|
440
|
-
def _download_file_bytes(self, file_meta:
|
|
449
|
+
def _download_file_bytes(self, file_meta: dict[str, Any]) -> bytes:
|
|
441
450
|
"""
|
|
442
451
|
Download bytes for a given file (exporting if Google-native).
|
|
443
452
|
Raises ValueError if the item is a folder (folders cannot be downloaded).
|
|
@@ -478,57 +487,56 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
478
487
|
"application/vnd.google-apps.drawing", # Google Drawings
|
|
479
488
|
}
|
|
480
489
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
490
|
+
with self._lock:
|
|
491
|
+
if mime_type in exportable_types:
|
|
492
|
+
# This is an exportable Google Workspace file - must use export_media
|
|
493
|
+
export_mime = self._pick_export_mime(mime_type)
|
|
494
|
+
if not export_mime:
|
|
495
|
+
# Default fallback for unsupported Google native types
|
|
496
|
+
export_mime = "application/pdf"
|
|
487
497
|
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
)
|
|
493
|
-
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
|
|
494
|
-
request = self.service.files().export_media(
|
|
495
|
-
fileId=file_id, mimeType=export_mime
|
|
496
|
-
)
|
|
497
|
-
else:
|
|
498
|
-
# This is a regular uploaded file (PDF, image, video, etc.) - use get_media
|
|
499
|
-
# Also handles non-exportable Google Apps files (Forms, Sites, Maps, etc.)
|
|
500
|
-
logger.debug("[GoogleDrive] _download_file_bytes: using get_media (%s)", mime_type)
|
|
501
|
-
# Binary download (get_media also doesn't accept the Drive flags)
|
|
502
|
-
request = self.service.files().get_media(fileId=file_id)
|
|
503
|
-
|
|
504
|
-
# Download the file with error handling for misclassified Google Docs
|
|
505
|
-
fh = io.BytesIO()
|
|
506
|
-
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
|
|
507
|
-
done = False
|
|
508
|
-
|
|
509
|
-
try:
|
|
510
|
-
while not done:
|
|
511
|
-
status, done = downloader.next_chunk()
|
|
512
|
-
# Optional: you can log progress via status.progress()
|
|
513
|
-
except HttpError as e:
|
|
514
|
-
# If download fails with "fileNotDownloadable", it's a Docs Editor file
|
|
515
|
-
# that wasn't properly detected. Retry with export_media.
|
|
516
|
-
if "fileNotDownloadable" in str(e) and mime_type not in exportable_types:
|
|
517
|
-
logger.warning(
|
|
518
|
-
f"Download failed for {file_id} ({mime_type}) with fileNotDownloadable error. "
|
|
519
|
-
f"Retrying with export_media (file might be a Google Doc)"
|
|
520
|
-
)
|
|
521
|
-
export_mime = "application/pdf"
|
|
522
|
-
request = self.service.files().export_media(
|
|
523
|
-
fileId=file_id, mimeType=export_mime
|
|
498
|
+
logger.debug(
|
|
499
|
+
"[GoogleDrive] _download_file_bytes: using export_media (%s -> %s)",
|
|
500
|
+
mime_type,
|
|
501
|
+
export_mime,
|
|
524
502
|
)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
503
|
+
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
|
|
504
|
+
request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
|
|
505
|
+
else:
|
|
506
|
+
# This is a regular uploaded file (PDF, image, video, etc.) - use get_media
|
|
507
|
+
# Also handles non-exportable Google Apps files (Forms, Sites, Maps, etc.)
|
|
508
|
+
logger.debug("[GoogleDrive] _download_file_bytes: using get_media (%s)", mime_type)
|
|
509
|
+
# Binary download (get_media also doesn't accept the Drive flags)
|
|
510
|
+
request = self.service.files().get_media(fileId=file_id)
|
|
511
|
+
|
|
512
|
+
# Download the file with error handling for misclassified Google Docs
|
|
513
|
+
fh = io.BytesIO()
|
|
514
|
+
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
|
|
515
|
+
done = False
|
|
516
|
+
|
|
517
|
+
try:
|
|
528
518
|
while not done:
|
|
529
519
|
status, done = downloader.next_chunk()
|
|
530
|
-
|
|
531
|
-
|
|
520
|
+
# Optional: you can log progress via status.progress()
|
|
521
|
+
except HttpError as e:
|
|
522
|
+
# If download fails with "fileNotDownloadable", it's a Docs Editor file
|
|
523
|
+
# that wasn't properly detected. Retry with export_media.
|
|
524
|
+
if "fileNotDownloadable" in str(e) and mime_type not in exportable_types:
|
|
525
|
+
logger.warning(
|
|
526
|
+
f"Download failed for {file_id} ({mime_type}) with fileNotDownloadable error. "
|
|
527
|
+
f"Retrying with export_media (file might be a Google Doc)"
|
|
528
|
+
)
|
|
529
|
+
export_mime = "application/pdf"
|
|
530
|
+
request = self.service.files().export_media(
|
|
531
|
+
fileId=file_id, mimeType=export_mime
|
|
532
|
+
)
|
|
533
|
+
fh = io.BytesIO()
|
|
534
|
+
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
|
|
535
|
+
done = False
|
|
536
|
+
while not done:
|
|
537
|
+
status, done = downloader.next_chunk()
|
|
538
|
+
else:
|
|
539
|
+
raise
|
|
532
540
|
|
|
533
541
|
data = fh.getvalue()
|
|
534
542
|
logger.debug("[GoogleDrive] _download_file_bytes: done, %d bytes", len(data))
|
|
@@ -572,10 +580,10 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
572
580
|
|
|
573
581
|
async def list_files(
|
|
574
582
|
self,
|
|
575
|
-
page_token:
|
|
576
|
-
max_files:
|
|
583
|
+
page_token: str | None = None,
|
|
584
|
+
max_files: int | None = None,
|
|
577
585
|
**kwargs,
|
|
578
|
-
) ->
|
|
586
|
+
) -> dict[str, Any]:
|
|
579
587
|
"""
|
|
580
588
|
List files in the currently selected scope (file_ids/folder_ids/recursive).
|
|
581
589
|
Returns a dict with 'files' and 'next_page_token'.
|
|
@@ -589,7 +597,9 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
589
597
|
"Google Drive service is not initialized. Please authenticate first."
|
|
590
598
|
)
|
|
591
599
|
|
|
592
|
-
logger.debug(
|
|
600
|
+
logger.debug(
|
|
601
|
+
"[GoogleDrive] list_files: entry (page_token=%s, max_files=%s)", page_token, max_files
|
|
602
|
+
)
|
|
593
603
|
|
|
594
604
|
try:
|
|
595
605
|
items = await asyncio.to_thread(self._iter_selected_items)
|
|
@@ -609,7 +619,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
609
619
|
logger.error("[GoogleDrive] list_files failed: %s", e, exc_info=True)
|
|
610
620
|
raise
|
|
611
621
|
|
|
612
|
-
def _extract_google_drive_acl(self, file_meta:
|
|
622
|
+
def _extract_google_drive_acl(self, file_meta: dict) -> DocumentACL:
|
|
613
623
|
"""
|
|
614
624
|
Extract ACL from Google Drive file metadata.
|
|
615
625
|
|
|
@@ -624,10 +634,15 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
624
634
|
"""
|
|
625
635
|
try:
|
|
626
636
|
# Fetch permissions (requires additional API call)
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
637
|
+
with self._lock:
|
|
638
|
+
permissions_list = (
|
|
639
|
+
self.service.permissions()
|
|
640
|
+
.list(
|
|
641
|
+
fileId=file_meta["id"],
|
|
642
|
+
fields="permissions(emailAddress,role,type,deleted,displayName)",
|
|
643
|
+
)
|
|
644
|
+
.execute()
|
|
645
|
+
)
|
|
631
646
|
|
|
632
647
|
allowed_users = []
|
|
633
648
|
allowed_groups = []
|
|
@@ -730,12 +745,12 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
730
745
|
metadata={
|
|
731
746
|
"parents": meta.get("parents"),
|
|
732
747
|
"driveId": meta.get("driveId"),
|
|
733
|
-
"size": int(meta.get("size", 0))
|
|
734
|
-
if str(meta.get("size", "")).isdigit()
|
|
735
|
-
else None,
|
|
748
|
+
"size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
|
|
736
749
|
},
|
|
737
750
|
)
|
|
738
|
-
logger.debug(
|
|
751
|
+
logger.debug(
|
|
752
|
+
"[GoogleDrive] get_file_content: done for file_id=%s (%d bytes)", file_id, len(blob)
|
|
753
|
+
)
|
|
739
754
|
return doc
|
|
740
755
|
|
|
741
756
|
async def setup_subscription(self) -> str:
|
|
@@ -752,9 +767,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
752
767
|
# 1) Ensure we are authenticated and have a live Drive service
|
|
753
768
|
ok = await self.authenticate()
|
|
754
769
|
if not ok:
|
|
755
|
-
raise RuntimeError(
|
|
756
|
-
"GoogleDriveConnector.setup_subscription: not authenticated"
|
|
757
|
-
)
|
|
770
|
+
raise RuntimeError("GoogleDriveConnector.setup_subscription: not authenticated")
|
|
758
771
|
|
|
759
772
|
# 2) Resolve webhook address (no param in ABC, so pull from config/env)
|
|
760
773
|
webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv(
|
|
@@ -810,9 +823,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
810
823
|
}
|
|
811
824
|
|
|
812
825
|
if not isinstance(channel_id, str) or not channel_id:
|
|
813
|
-
raise RuntimeError(
|
|
814
|
-
f"Drive watch returned invalid channel id: {channel_id!r}"
|
|
815
|
-
)
|
|
826
|
+
raise RuntimeError(f"Drive watch returned invalid channel id: {channel_id!r}")
|
|
816
827
|
|
|
817
828
|
return channel_id
|
|
818
829
|
|
|
@@ -850,13 +861,13 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
850
861
|
|
|
851
862
|
# Single-channel memory
|
|
852
863
|
if getattr(self, "_active_channel", None):
|
|
853
|
-
ch =
|
|
864
|
+
ch = self._active_channel
|
|
854
865
|
if isinstance(ch, dict) and ch.get("channel_id") == subscription_id:
|
|
855
866
|
resource_id = ch.get("resource_id")
|
|
856
867
|
|
|
857
868
|
# Multi-channel memory
|
|
858
869
|
if resource_id is None and hasattr(self, "_subscriptions"):
|
|
859
|
-
subs =
|
|
870
|
+
subs = self._subscriptions
|
|
860
871
|
if isinstance(subs, dict):
|
|
861
872
|
entry = subs.get(subscription_id)
|
|
862
873
|
if isinstance(entry, dict):
|
|
@@ -888,9 +899,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
888
899
|
):
|
|
889
900
|
self._active_channel = {}
|
|
890
901
|
|
|
891
|
-
if hasattr(self, "_subscriptions") and isinstance(
|
|
892
|
-
self._subscriptions, dict
|
|
893
|
-
):
|
|
902
|
+
if hasattr(self, "_subscriptions") and isinstance(self._subscriptions, dict):
|
|
894
903
|
self._subscriptions.pop(subscription_id, None)
|
|
895
904
|
|
|
896
905
|
return True
|
|
@@ -902,7 +911,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
902
911
|
pass
|
|
903
912
|
return False
|
|
904
913
|
|
|
905
|
-
async def handle_webhook(self, payload:
|
|
914
|
+
async def handle_webhook(self, payload: dict[str, Any]) -> list[str]:
|
|
906
915
|
"""
|
|
907
916
|
Process a Google Drive Changes webhook.
|
|
908
917
|
Drive push notifications do NOT include the changed files themselves; they merely tell us
|
|
@@ -915,7 +924,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
915
924
|
Returns:
|
|
916
925
|
List[str]: unique list of affected file IDs (filtered to our selected scope).
|
|
917
926
|
"""
|
|
918
|
-
affected:
|
|
927
|
+
affected: list[str] = []
|
|
919
928
|
try:
|
|
920
929
|
# 1) Ensure we're authenticated / service ready
|
|
921
930
|
ok = await self.authenticate()
|
|
@@ -941,9 +950,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
941
950
|
except Exception as e:
|
|
942
951
|
selected_ids = set()
|
|
943
952
|
try:
|
|
944
|
-
logger.error(
|
|
945
|
-
f"handle_webhook: scope build failed, proceeding unfiltered: {e}"
|
|
946
|
-
)
|
|
953
|
+
logger.error(f"handle_webhook: scope build failed, proceeding unfiltered: {e}")
|
|
947
954
|
except Exception:
|
|
948
955
|
pass
|
|
949
956
|
|
|
@@ -980,11 +987,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
980
987
|
# Filter to our selected scope if we have one; otherwise accept all
|
|
981
988
|
if selected_ids and (rid not in selected_ids):
|
|
982
989
|
# Shortcut target might be in scope even if the shortcut isn't
|
|
983
|
-
tgt = (
|
|
984
|
-
fobj.get("shortcutDetails", {}).get("targetId")
|
|
985
|
-
if fobj
|
|
986
|
-
else None
|
|
987
|
-
)
|
|
990
|
+
tgt = fobj.get("shortcutDetails", {}).get("targetId") if fobj else None
|
|
988
991
|
if not (tgt and tgt in selected_ids):
|
|
989
992
|
continue
|
|
990
993
|
|
|
@@ -1007,7 +1010,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
1007
1010
|
|
|
1008
1011
|
# Deduplicate while preserving order
|
|
1009
1012
|
seen = set()
|
|
1010
|
-
deduped:
|
|
1013
|
+
deduped: list[str] = []
|
|
1011
1014
|
for x in affected:
|
|
1012
1015
|
if x not in seen:
|
|
1013
1016
|
seen.add(x)
|
|
@@ -1033,9 +1036,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
1033
1036
|
blob = self._download_file_bytes(meta)
|
|
1034
1037
|
except HttpError as e:
|
|
1035
1038
|
# Skip/record failures
|
|
1036
|
-
logger.error(
|
|
1037
|
-
f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}"
|
|
1038
|
-
)
|
|
1039
|
+
logger.error(f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}")
|
|
1039
1040
|
continue
|
|
1040
1041
|
|
|
1041
1042
|
from datetime import datetime
|
|
@@ -1081,12 +1082,10 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
1081
1082
|
# -------------------------
|
|
1082
1083
|
def get_start_page_token(self) -> str:
|
|
1083
1084
|
# getStartPageToken accepts supportsAllDrives (not includeItemsFromAllDrives)
|
|
1084
|
-
resp = (
|
|
1085
|
-
self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
|
|
1086
|
-
)
|
|
1085
|
+
resp = self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
|
|
1087
1086
|
return resp["startPageToken"]
|
|
1088
1087
|
|
|
1089
|
-
def poll_changes_and_sync(self) ->
|
|
1088
|
+
def poll_changes_and_sync(self) -> str | None:
|
|
1090
1089
|
"""
|
|
1091
1090
|
Incrementally process changes since the last page token in cfg.changes_page_token.
|
|
1092
1091
|
|
|
@@ -1122,10 +1121,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
1122
1121
|
# Match scope
|
|
1123
1122
|
if fid not in selected_ids:
|
|
1124
1123
|
# also consider shortcut target
|
|
1125
|
-
if (
|
|
1126
|
-
file_obj.get("mimeType")
|
|
1127
|
-
== "application/vnd.google-apps.shortcut"
|
|
1128
|
-
):
|
|
1124
|
+
if file_obj.get("mimeType") == "application/vnd.google-apps.shortcut":
|
|
1129
1125
|
tgt = file_obj.get("shortcutDetails", {}).get("targetId")
|
|
1130
1126
|
if tgt and tgt in selected_ids:
|
|
1131
1127
|
pass
|
|
@@ -1189,8 +1185,8 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
1189
1185
|
# Optional: webhook stubs
|
|
1190
1186
|
# -------------------------
|
|
1191
1187
|
def build_watch_body(
|
|
1192
|
-
self, webhook_address: str, channel_id:
|
|
1193
|
-
) ->
|
|
1188
|
+
self, webhook_address: str, channel_id: str | None = None
|
|
1189
|
+
) -> dict[str, Any]:
|
|
1194
1190
|
"""
|
|
1195
1191
|
Prepare the request body for changes.watch if you use webhooks.
|
|
1196
1192
|
"""
|
|
@@ -1200,7 +1196,7 @@ class GoogleDriveConnector(BaseConnector):
|
|
|
1200
1196
|
"address": webhook_address,
|
|
1201
1197
|
}
|
|
1202
1198
|
|
|
1203
|
-
def start_watch(self, webhook_address: str) ->
|
|
1199
|
+
def start_watch(self, webhook_address: str) -> dict[str, Any]:
|
|
1204
1200
|
"""
|
|
1205
1201
|
Start a webhook watch on changes using the current page token.
|
|
1206
1202
|
Persist the returned resourceId/expiration on your side.
|
|
@@ -53,9 +53,13 @@ class LangflowConnectorService:
|
|
|
53
53
|
filename=document.filename,
|
|
54
54
|
)
|
|
55
55
|
|
|
56
|
+
import os
|
|
57
|
+
|
|
56
58
|
from utils.file_utils import auto_cleanup_tempfile
|
|
57
59
|
|
|
58
|
-
suffix =
|
|
60
|
+
suffix = os.path.splitext(document.filename)[1]
|
|
61
|
+
if not suffix:
|
|
62
|
+
suffix = get_file_extension(document.mimetype)
|
|
59
63
|
|
|
60
64
|
# Create temporary file from document content
|
|
61
65
|
with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
|