octostar-python-client 0.1.759__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- octostar/__init__.py +9 -0
- octostar/api/__init__.py +1 -0
- octostar/api/apps/__init__.py +0 -0
- octostar/api/apps/deploy_app.py +210 -0
- octostar/api/apps/execute_app_job.py +188 -0
- octostar/api/apps/get_app_logs.py +210 -0
- octostar/api/apps/get_apps_url.py +188 -0
- octostar/api/apps/get_job_logs.py +210 -0
- octostar/api/apps/get_job_progress.py +162 -0
- octostar/api/apps/kill_job.py +160 -0
- octostar/api/apps/list_app_jobs.py +276 -0
- octostar/api/apps/list_apps.py +251 -0
- octostar/api/apps/set_job_progress.py +216 -0
- octostar/api/apps/undeploy_app.py +160 -0
- octostar/api/metadata/__init__.py +0 -0
- octostar/api/metadata/get_version.py +232 -0
- octostar/api/metadata/get_whoami.py +232 -0
- octostar/api/notifications/__init__.py +0 -0
- octostar/api/notifications/delete_stream.py +222 -0
- octostar/api/notifications/get_subscriptions.py +240 -0
- octostar/api/notifications/publish_notification.py +275 -0
- octostar/api/notifications/pull_events_from_stream.py +282 -0
- octostar/api/notifications/push_event_to_stream.py +265 -0
- octostar/api/notifications/toast.py +264 -0
- octostar/api/ontology/__init__.py +0 -0
- octostar/api/ontology/fetch_ontology_data.py +275 -0
- octostar/api/ontology/get_ontologies.py +237 -0
- octostar/api/ontology/multi_query.py +297 -0
- octostar/api/ontology/query.py +276 -0
- octostar/api/pipeline/__init__.py +1 -0
- octostar/api/pipeline/get_processing_status.py +185 -0
- octostar/api/pipeline/update_processing_status.py +164 -0
- octostar/api/search/__init__.py +0 -0
- octostar/api/search/get_annotations.py +153 -0
- octostar/api/workspace_data/__init__.py +0 -0
- octostar/api/workspace_data/delete_blob.py +212 -0
- octostar/api/workspace_data/delete_entities.py +326 -0
- octostar/api/workspace_data/download_blob.py +235 -0
- octostar/api/workspace_data/get_attachment.py +336 -0
- octostar/api/workspace_data/get_files_tree.py +397 -0
- octostar/api/workspace_data/upload_blob.py +235 -0
- octostar/api/workspace_data/upsert_entities.py +284 -0
- octostar/api/workspace_permissions/__init__.py +0 -0
- octostar/api/workspace_permissions/get_permissions.py +325 -0
- octostar/api/workspace_tags/__init__.py +0 -0
- octostar/api/workspace_tags/delete_tag_from_entities.py +141 -0
- octostar/api/workspace_tags/tag_entities.py +180 -0
- octostar/client.py +492 -0
- octostar/errors.py +50 -0
- octostar/models/__init__.py +249 -0
- octostar/models/acknowledgement.py +74 -0
- octostar/models/acknowledgement_with_data.py +82 -0
- octostar/models/app_status.py +239 -0
- octostar/models/app_status_annotations.py +66 -0
- octostar/models/app_status_labels.py +69 -0
- octostar/models/app_with_url.py +82 -0
- octostar/models/child_processing_status.py +118 -0
- octostar/models/delete_entities_response_401.py +74 -0
- octostar/models/delete_entities_response_409.py +82 -0
- octostar/models/delete_entities_response_500.py +82 -0
- octostar/models/delete_stream_response_401.py +74 -0
- octostar/models/delete_tag_from_entities_response_401.py +74 -0
- octostar/models/deploy_app_json_body.py +90 -0
- octostar/models/deploy_app_json_body_secrets.py +65 -0
- octostar/models/deploy_app_response_200.py +98 -0
- octostar/models/deploy_app_response_200_data.py +60 -0
- octostar/models/deploy_app_response_400.py +82 -0
- octostar/models/deploy_app_response_403.py +82 -0
- octostar/models/deploy_app_response_404.py +82 -0
- octostar/models/deploy_app_response_409.py +82 -0
- octostar/models/deploy_app_response_500.py +82 -0
- octostar/models/entity.py +80 -0
- octostar/models/entity_response.py +99 -0
- octostar/models/entity_response_s3_urls.py +93 -0
- octostar/models/entity_response_s3_urls_additional_property.py +105 -0
- octostar/models/entity_response_s3_urls_additional_property_fields.py +114 -0
- octostar/models/execute_app_job_json_body.py +151 -0
- octostar/models/execute_app_job_json_body_annotation.py +65 -0
- octostar/models/execute_app_job_response_401.py +74 -0
- octostar/models/fetch_ontology_data_response_200.py +60 -0
- octostar/models/fetch_ontology_data_response_401.py +74 -0
- octostar/models/fetch_ontology_data_response_500.py +82 -0
- octostar/models/get_app_logs_response_401.py +74 -0
- octostar/models/get_app_logs_response_404.py +74 -0
- octostar/models/get_app_logs_response_500.py +82 -0
- octostar/models/get_apps_url_json_body.py +76 -0
- octostar/models/get_apps_url_response_401.py +74 -0
- octostar/models/get_apps_url_response_500.py +82 -0
- octostar/models/get_attachment_response_200.py +74 -0
- octostar/models/get_attachment_response_401.py +74 -0
- octostar/models/get_files_tree_response_200.py +106 -0
- octostar/models/get_files_tree_response_200_status.py +8 -0
- octostar/models/get_files_tree_response_400.py +111 -0
- octostar/models/get_files_tree_response_400_data.py +60 -0
- octostar/models/get_files_tree_response_400_status.py +8 -0
- octostar/models/get_files_tree_response_401.py +74 -0
- octostar/models/get_files_tree_response_500.py +111 -0
- octostar/models/get_files_tree_response_500_data.py +60 -0
- octostar/models/get_files_tree_response_500_status.py +8 -0
- octostar/models/get_job_logs_response_401.py +74 -0
- octostar/models/get_job_logs_response_404.py +74 -0
- octostar/models/get_job_logs_response_500.py +82 -0
- octostar/models/get_job_progress_response_401.py +74 -0
- octostar/models/get_object_response_401.py +74 -0
- octostar/models/get_ontologies_response_401.py +74 -0
- octostar/models/get_ontologies_response_500.py +81 -0
- octostar/models/get_permissions_response_200.py +98 -0
- octostar/models/get_permissions_response_400.py +82 -0
- octostar/models/get_permissions_response_401.py +74 -0
- octostar/models/get_permissions_response_500.py +82 -0
- octostar/models/get_processing_status_response_200.py +104 -0
- octostar/models/get_processing_status_response_200_data.py +87 -0
- octostar/models/get_processing_status_response_400.py +82 -0
- octostar/models/get_processing_status_response_500.py +82 -0
- octostar/models/get_subscriptions_response_200_item.py +74 -0
- octostar/models/get_version_response_200.py +74 -0
- octostar/models/get_version_response_404.py +74 -0
- octostar/models/get_whoami_response_200.py +129 -0
- octostar/models/get_whoami_response_401.py +74 -0
- octostar/models/insert_entity.py +114 -0
- octostar/models/insert_entity_base.py +266 -0
- octostar/models/insert_entity_relationships_item.py +107 -0
- octostar/models/insert_entity_request.py +94 -0
- octostar/models/internal_server_error.py +82 -0
- octostar/models/job_execution_result.py +146 -0
- octostar/models/job_status.py +196 -0
- octostar/models/job_status_labels.py +60 -0
- octostar/models/job_with_url.py +82 -0
- octostar/models/kill_job_response_401.py +74 -0
- octostar/models/list_app_jobs_response_401.py +74 -0
- octostar/models/list_app_jobs_response_500.py +82 -0
- octostar/models/list_apps_response_401.py +74 -0
- octostar/models/list_apps_response_500.py +82 -0
- octostar/models/multi_query_json_body.py +100 -0
- octostar/models/multi_query_json_body_queries_item.py +80 -0
- octostar/models/multi_query_response_400.py +82 -0
- octostar/models/multi_query_response_401.py +74 -0
- octostar/models/not_found_error.py +74 -0
- octostar/models/octostar_event.py +96 -0
- octostar/models/octostar_event_octostar_payload.py +100 -0
- octostar/models/octostar_event_octostar_payload_level.py +11 -0
- octostar/models/os_notification.py +122 -0
- octostar/models/processing_status.py +262 -0
- octostar/models/processing_status_code.py +14 -0
- octostar/models/progress_request.py +73 -0
- octostar/models/publish_notification_response_401.py +74 -0
- octostar/models/pull_events_from_stream_response_401.py +74 -0
- octostar/models/push_event_to_stream_response_401.py +74 -0
- octostar/models/query_json_body.py +101 -0
- octostar/models/query_json_body_params.py +60 -0
- octostar/models/query_response_400.py +82 -0
- octostar/models/query_response_401.py +74 -0
- octostar/models/set_job_progress_response_401.py +74 -0
- octostar/models/string_to_value_label_map.py +99 -0
- octostar/models/string_to_value_label_map_data.py +89 -0
- octostar/models/string_to_value_label_map_data_additional_property.py +80 -0
- octostar/models/successful_get_tags.py +103 -0
- octostar/models/successful_insertion.py +98 -0
- octostar/models/tag_entities_response_401.py +74 -0
- octostar/models/toast_level.py +11 -0
- octostar/models/toast_response_401.py +74 -0
- octostar/models/undeploy_app_response_401.py +74 -0
- octostar/models/update_processing_status_response_200.py +82 -0
- octostar/models/update_processing_status_response_400.py +82 -0
- octostar/models/update_processing_status_response_500.py +82 -0
- octostar/models/upsert_entities_response_401.py +74 -0
- octostar/models/upsert_entity.py +114 -0
- octostar/models/upsert_entity_base.py +266 -0
- octostar/models/upsert_entity_relationships_item.py +107 -0
- octostar/py.typed +1 -0
- octostar/types.py +54 -0
- octostar/utils/__init__.py +15 -0
- octostar/utils/chat/__init__.py +0 -0
- octostar/utils/chat/chat.py +513 -0
- octostar/utils/chat/detokenize.py +105 -0
- octostar/utils/chat/get_default_model.py +50 -0
- octostar/utils/chat/list_models.py +91 -0
- octostar/utils/chat/tokenize.py +105 -0
- octostar/utils/commons.py +226 -0
- octostar/utils/exceptions.py +134 -0
- octostar/utils/jobs/__init__.py +0 -0
- octostar/utils/jobs/apps/__init__.py +0 -0
- octostar/utils/jobs/apps/deploy_app.py +81 -0
- octostar/utils/jobs/apps/execute_app_job.py +114 -0
- octostar/utils/jobs/apps/get_app_logs.py +113 -0
- octostar/utils/jobs/apps/get_app_secret.py +102 -0
- octostar/utils/jobs/apps/get_apps_url.py +73 -0
- octostar/utils/jobs/apps/list_app_jobs.py +62 -0
- octostar/utils/jobs/apps/list_apps.py +126 -0
- octostar/utils/jobs/apps/undeploy_app.py +48 -0
- octostar/utils/jobs/get_job_logs.py +113 -0
- octostar/utils/jobs/get_job_progress.py +76 -0
- octostar/utils/jobs/kill_job.py +47 -0
- octostar/utils/jobs/set_job_progress.py +67 -0
- octostar/utils/meta/__init__.py +0 -0
- octostar/utils/meta/get_version.py +30 -0
- octostar/utils/meta/get_whoami.py +30 -0
- octostar/utils/notifications/__init__.py +0 -0
- octostar/utils/notifications/delete_stream.py +58 -0
- octostar/utils/notifications/get_my_subscriptions.py +49 -0
- octostar/utils/notifications/publish_notification.py +73 -0
- octostar/utils/notifications/pull_event_from_stream.py +63 -0
- octostar/utils/notifications/pull_events_from_stream.py +64 -0
- octostar/utils/notifications/push_event_to_stream.py +109 -0
- octostar/utils/notifications/push_events_to_stream.py +137 -0
- octostar/utils/notifications/toast.py +92 -0
- octostar/utils/ontology/__init__.py +10 -0
- octostar/utils/ontology/fetch_ontology_data.py +141 -0
- octostar/utils/ontology/get_ontologies.py +55 -0
- octostar/utils/ontology/multiquery_ontology.py +287 -0
- octostar/utils/ontology/query_ontology.py +186 -0
- octostar/utils/pipeline/__init__.py +1 -0
- octostar/utils/pipeline/get_processing_status.py +230 -0
- octostar/utils/pipeline/update_processing_status.py +286 -0
- octostar/utils/search/__init__.py +11 -0
- octostar/utils/search/bulk_update.py +138 -0
- octostar/utils/search/count.py +117 -0
- octostar/utils/search/get_entity_annotations.py +304 -0
- octostar/utils/search/get_index_definition.py +111 -0
- octostar/utils/search/multi_search.py +129 -0
- octostar/utils/workspace/__init__.py +0 -0
- octostar/utils/workspace/delete_entities.py +247 -0
- octostar/utils/workspace/delete_entity.py +81 -0
- octostar/utils/workspace/delete_relationship.py +78 -0
- octostar/utils/workspace/delete_relationships.py +85 -0
- octostar/utils/workspace/delete_temporary_blob.py +85 -0
- octostar/utils/workspace/extract_entities.py +140 -0
- octostar/utils/workspace/get_filepath_from_item.py +85 -0
- octostar/utils/workspace/get_filepaths_from_items.py +100 -0
- octostar/utils/workspace/get_files_tree.py +102 -0
- octostar/utils/workspace/get_item_from_filepath.py +102 -0
- octostar/utils/workspace/get_items_from_filepaths.py +108 -0
- octostar/utils/workspace/linkcharts/__init__.py +0 -0
- octostar/utils/workspace/linkcharts/create_linkchart.py +241 -0
- octostar/utils/workspace/permissions/PermissionLevel.py +8 -0
- octostar/utils/workspace/permissions/__init__.py +1 -0
- octostar/utils/workspace/permissions/get_permissions.py +81 -0
- octostar/utils/workspace/read_attachment.py +284 -0
- octostar/utils/workspace/read_file.py +113 -0
- octostar/utils/workspace/read_temporary_blob.py +428 -0
- octostar/utils/workspace/saved_searches/__init__.py +0 -0
- octostar/utils/workspace/saved_searches/create_saved_search.py +183 -0
- octostar/utils/workspace/tags/__init__.py +0 -0
- octostar/utils/workspace/tags/delete_tag_from_entities.py +96 -0
- octostar/utils/workspace/tags/tag_entities.py +175 -0
- octostar/utils/workspace/upsert_entities.py +268 -0
- octostar/utils/workspace/upsert_entity.py +110 -0
- octostar/utils/workspace/upsert_relationship.py +128 -0
- octostar/utils/workspace/upsert_relationships.py +194 -0
- octostar/utils/workspace/write_attachment.py +263 -0
- octostar/utils/workspace/write_file.py +335 -0
- octostar/utils/workspace/write_temporary_blob.py +218 -0
- octostar_python_client-0.1.759.dist-info/METADATA +159 -0
- octostar_python_client-0.1.759.dist-info/RECORD +257 -0
- octostar_python_client-0.1.759.dist-info/WHEEL +5 -0
- octostar_python_client-0.1.759.dist-info/licenses/LICENSE +21 -0
- octostar_python_client-0.1.759.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional, Tuple
|
|
3
|
+
|
|
4
|
+
_logger = logging.getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
from ...client import Client
|
|
7
|
+
from . import read_attachment
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def sync(
|
|
11
|
+
os_workspace: str,
|
|
12
|
+
os_entity_uid: str,
|
|
13
|
+
decode: bool = True,
|
|
14
|
+
stream: bool = False,
|
|
15
|
+
stream_lines: bool = False,
|
|
16
|
+
stream_chunk_size: int = 65_536,
|
|
17
|
+
byte_range: Optional[Tuple[int, Optional[int]]] = None,
|
|
18
|
+
headers_only: bool = False,
|
|
19
|
+
client: Client = None,
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
# Read the content of a file from its workspace ID and object ID
|
|
23
|
+
|
|
24
|
+
Downloads the attachment content via the v2 entities API endpoint,
|
|
25
|
+
which handles S3 proxying, Range requests, and conditional headers.
|
|
26
|
+
|
|
27
|
+
## Arguments
|
|
28
|
+
- `os_workspace`: The workspace ID the object belongs to
|
|
29
|
+
- `os_entity_uid`: The object ID
|
|
30
|
+
- `decode`: Whether to decode the contents to UTF-8
|
|
31
|
+
- `stream`: Whether to return the contents in chunks. Overrides stream_lines and stream_chunk_size if False
|
|
32
|
+
- `stream_lines`: Whether to chunk the contents per line. Overrides stream_chunk_size if set
|
|
33
|
+
- `stream_chunk_size`: How many bytes each chunk should be
|
|
34
|
+
- `byte_range`: Optional (start, end) tuple for Range requests. end can be None for open-ended ranges.
|
|
35
|
+
Example: (0, 1023) fetches the first 1024 bytes; (1024, None) fetches from byte 1024 onwards.
|
|
36
|
+
- `headers_only`: If True, send a HEAD request and return the response headers as a dict
|
|
37
|
+
instead of downloading the body. Useful for inspecting content-type, content-length,
|
|
38
|
+
etag, or last-modified without fetching the full attachment.
|
|
39
|
+
- `client`: The Client with which to connect to Octostar. If None, the default one is used
|
|
40
|
+
|
|
41
|
+
## Returns
|
|
42
|
+
- When `headers_only=True`: a ``dict[str, str]`` of response headers.
|
|
43
|
+
- Otherwise: a string or bytes representation of the object file contents,
|
|
44
|
+
or a generator/async generator when streaming.
|
|
45
|
+
|
|
46
|
+
## Raises
|
|
47
|
+
- `ApiConnectionError`: If the operation was unsuccessful on the server
|
|
48
|
+
- `ValueError`: If the object has no associated file
|
|
49
|
+
"""
|
|
50
|
+
return read_attachment.sync(
|
|
51
|
+
os_workspace=os_workspace,
|
|
52
|
+
os_entity_uid=os_entity_uid,
|
|
53
|
+
decode=decode,
|
|
54
|
+
stream=stream,
|
|
55
|
+
stream_lines=stream_lines,
|
|
56
|
+
stream_chunk_size=stream_chunk_size,
|
|
57
|
+
byte_range=byte_range,
|
|
58
|
+
headers_only=headers_only,
|
|
59
|
+
client=client,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
async def asyncio(
|
|
64
|
+
os_workspace: str,
|
|
65
|
+
os_entity_uid: str,
|
|
66
|
+
decode: bool = True,
|
|
67
|
+
stream: bool = False,
|
|
68
|
+
stream_lines: bool = False,
|
|
69
|
+
stream_chunk_size: int = 65_536,
|
|
70
|
+
byte_range: Optional[Tuple[int, Optional[int]]] = None,
|
|
71
|
+
headers_only: bool = False,
|
|
72
|
+
client: Client = None,
|
|
73
|
+
):
|
|
74
|
+
"""
|
|
75
|
+
# Read the content of a file from its workspace ID and object ID (async)
|
|
76
|
+
|
|
77
|
+
Downloads the attachment content via the v2 entities API endpoint,
|
|
78
|
+
which handles S3 proxying, Range requests, and conditional headers.
|
|
79
|
+
|
|
80
|
+
## Arguments
|
|
81
|
+
- `os_workspace`: The workspace ID the object belongs to
|
|
82
|
+
- `os_entity_uid`: The object ID
|
|
83
|
+
- `decode`: Whether to decode the contents to UTF-8
|
|
84
|
+
- `stream`: Whether to return the contents in chunks. Overrides stream_lines and stream_chunk_size if False
|
|
85
|
+
- `stream_lines`: Whether to chunk the contents per line. Overrides stream_chunk_size if set
|
|
86
|
+
- `stream_chunk_size`: How many bytes each chunk should be
|
|
87
|
+
- `byte_range`: Optional (start, end) tuple for Range requests. end can be None for open-ended ranges.
|
|
88
|
+
Example: (0, 1023) fetches the first 1024 bytes; (1024, None) fetches from byte 1024 onwards.
|
|
89
|
+
- `headers_only`: If True, send a HEAD request and return the response headers as a dict
|
|
90
|
+
instead of downloading the body. Useful for inspecting content-type, content-length,
|
|
91
|
+
etag, or last-modified without fetching the full attachment.
|
|
92
|
+
- `client`: The Client with which to connect to Octostar. If None, the default one is used
|
|
93
|
+
|
|
94
|
+
## Returns
|
|
95
|
+
- When `headers_only=True`: a ``dict[str, str]`` of response headers.
|
|
96
|
+
- Otherwise: a string or bytes representation of the object file contents,
|
|
97
|
+
or an async generator when streaming.
|
|
98
|
+
|
|
99
|
+
## Raises
|
|
100
|
+
- `ApiConnectionError`: If the operation was unsuccessful on the server
|
|
101
|
+
- `ValueError`: If the object has no associated file
|
|
102
|
+
"""
|
|
103
|
+
return await read_attachment.asyncio(
|
|
104
|
+
os_workspace=os_workspace,
|
|
105
|
+
os_entity_uid=os_entity_uid,
|
|
106
|
+
decode=decode,
|
|
107
|
+
stream=stream,
|
|
108
|
+
stream_lines=stream_lines,
|
|
109
|
+
stream_chunk_size=stream_chunk_size,
|
|
110
|
+
byte_range=byte_range,
|
|
111
|
+
headers_only=headers_only,
|
|
112
|
+
client=client,
|
|
113
|
+
)
|
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import codecs
|
|
3
|
+
import os
|
|
4
|
+
import httpx
|
|
5
|
+
from typing import Dict, Optional, Tuple
|
|
6
|
+
from urllib import parse as urllib_parse
|
|
7
|
+
|
|
8
|
+
_logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
from ...client import Client, get_default_client
|
|
11
|
+
from ..commons import network_retry_strategy
|
|
12
|
+
from ..exceptions import ApiConnectionError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
DEFAULT_TIMEOUT = 120
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _is_dev_mode() -> bool:
|
|
19
|
+
return f"{os.getenv('OS_DEV_MODE')}".lower() == "true"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _blob_api_url(client: Client, filename: str) -> str:
|
|
23
|
+
"""Build the blob download API endpoint URL."""
|
|
24
|
+
base = client.get_base_url_v1()
|
|
25
|
+
return f"{base}/api/v1/files/download-blob/{filename}"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _resolve_url(client: Client, path: str, use_external: bool) -> str:
|
|
29
|
+
"""Resolve a presigned URL, joining with base URL in external/dev mode."""
|
|
30
|
+
if use_external:
|
|
31
|
+
return urllib_parse.urljoin(client.get_base_url_v1(), path)
|
|
32
|
+
return path
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _head_sync(url: str, headers: dict, client: Client) -> Dict[str, str]:
|
|
36
|
+
"""Send a HEAD request to the blob API proxy and return the response headers."""
|
|
37
|
+
response = None
|
|
38
|
+
try:
|
|
39
|
+
for attempt in network_retry_strategy():
|
|
40
|
+
with attempt:
|
|
41
|
+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as http_client:
|
|
42
|
+
response = http_client.head(url, headers=headers)
|
|
43
|
+
response.raise_for_status()
|
|
44
|
+
except Exception:
|
|
45
|
+
raise ApiConnectionError("read_temporary_blob", response, client)
|
|
46
|
+
return dict(response.headers)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def _head_async(url: str, headers: dict, client: Client) -> Dict[str, str]:
|
|
50
|
+
"""Send a HEAD request to the blob API proxy and return the response headers (async)."""
|
|
51
|
+
response = None
|
|
52
|
+
try:
|
|
53
|
+
for attempt in network_retry_strategy():
|
|
54
|
+
with attempt:
|
|
55
|
+
async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as http_client:
|
|
56
|
+
response = await http_client.head(url, headers=headers)
|
|
57
|
+
response.raise_for_status()
|
|
58
|
+
except Exception:
|
|
59
|
+
raise ApiConnectionError("read_temporary_blob", response, client)
|
|
60
|
+
return dict(response.headers)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _get_download_url_sync(
|
|
64
|
+
api_url: str, headers: dict, filename: str, client: Client
|
|
65
|
+
) -> str:
|
|
66
|
+
"""Fetch the presigned download URL from the API."""
|
|
67
|
+
use_external = _is_dev_mode()
|
|
68
|
+
params = {"external_url": use_external}
|
|
69
|
+
response = None
|
|
70
|
+
try:
|
|
71
|
+
for attempt in network_retry_strategy():
|
|
72
|
+
with attempt:
|
|
73
|
+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as http_client:
|
|
74
|
+
response = http_client.get(api_url, headers=headers, params=params)
|
|
75
|
+
response.raise_for_status()
|
|
76
|
+
except Exception:
|
|
77
|
+
raise ApiConnectionError("read_temporary_blob", response, client)
|
|
78
|
+
result = response.json()
|
|
79
|
+
url = result.get("url")
|
|
80
|
+
if not url:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"read_temporary_blob: no download url returned for {filename}"
|
|
83
|
+
)
|
|
84
|
+
return _resolve_url(client, url, use_external)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
async def _get_download_url_async(
|
|
88
|
+
api_url: str, headers: dict, filename: str, client: Client
|
|
89
|
+
) -> str:
|
|
90
|
+
"""Fetch the presigned download URL from the API (async)."""
|
|
91
|
+
use_external = _is_dev_mode()
|
|
92
|
+
params = {"external_url": use_external}
|
|
93
|
+
response = None
|
|
94
|
+
try:
|
|
95
|
+
for attempt in network_retry_strategy():
|
|
96
|
+
with attempt:
|
|
97
|
+
async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as http_client:
|
|
98
|
+
response = await http_client.get(
|
|
99
|
+
api_url, headers=headers, params=params
|
|
100
|
+
)
|
|
101
|
+
response.raise_for_status()
|
|
102
|
+
except Exception:
|
|
103
|
+
raise ApiConnectionError("read_temporary_blob", response, client)
|
|
104
|
+
result = response.json()
|
|
105
|
+
url = result.get("url")
|
|
106
|
+
if not url:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
f"read_temporary_blob: no download url returned for {filename}"
|
|
109
|
+
)
|
|
110
|
+
return _resolve_url(client, url, use_external)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _decode_iter(chunks, encoding="utf-8", errors="strict"):
|
|
114
|
+
decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
|
|
115
|
+
for chunk in chunks:
|
|
116
|
+
text = decoder.decode(chunk)
|
|
117
|
+
if text or not chunk:
|
|
118
|
+
yield text
|
|
119
|
+
tail = decoder.decode(b"", final=True)
|
|
120
|
+
if tail:
|
|
121
|
+
yield tail
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
async def _decode_iter_async(chunks, encoding="utf-8", errors="strict"):
|
|
125
|
+
decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
|
|
126
|
+
async for chunk in chunks:
|
|
127
|
+
text = decoder.decode(chunk)
|
|
128
|
+
if text or not chunk:
|
|
129
|
+
yield text
|
|
130
|
+
tail = decoder.decode(b"", final=True)
|
|
131
|
+
if tail:
|
|
132
|
+
yield tail
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _iter_lines(chunks_iter):
|
|
136
|
+
pending = b""
|
|
137
|
+
for chunk in chunks_iter:
|
|
138
|
+
data = pending + chunk
|
|
139
|
+
pending = b""
|
|
140
|
+
lines = data.splitlines(keepends=True)
|
|
141
|
+
if not lines:
|
|
142
|
+
continue
|
|
143
|
+
if not lines[-1].endswith(b"\n"):
|
|
144
|
+
pending = lines.pop()
|
|
145
|
+
for line in lines:
|
|
146
|
+
yield line.rstrip(b"\r\n")
|
|
147
|
+
if pending:
|
|
148
|
+
yield pending.rstrip(b"\r\n")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
async def _iter_lines_async(chunks_iter):
|
|
152
|
+
pending = b""
|
|
153
|
+
async for chunk in chunks_iter:
|
|
154
|
+
data = pending + chunk
|
|
155
|
+
pending = b""
|
|
156
|
+
lines = data.splitlines(keepends=True)
|
|
157
|
+
if not lines:
|
|
158
|
+
continue
|
|
159
|
+
if not lines[-1].endswith(b"\n"):
|
|
160
|
+
pending = lines.pop()
|
|
161
|
+
for line in lines:
|
|
162
|
+
yield line.rstrip(b"\r\n")
|
|
163
|
+
if pending:
|
|
164
|
+
yield pending.rstrip(b"\r\n")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def sync(
|
|
168
|
+
filename: str,
|
|
169
|
+
decode: bool = True,
|
|
170
|
+
stream: bool = False,
|
|
171
|
+
stream_lines: bool = False,
|
|
172
|
+
stream_chunk_size: int = 65_536,
|
|
173
|
+
byte_range: Optional[Tuple[int, Optional[int]]] = None,
|
|
174
|
+
headers_only: bool = False,
|
|
175
|
+
client: Client = None,
|
|
176
|
+
):
|
|
177
|
+
"""
|
|
178
|
+
# Read a temporary blob from the user's temp bucket
|
|
179
|
+
|
|
180
|
+
Downloads a file from the authenticated user's temporary S3 bucket.
|
|
181
|
+
This is useful for retrieving temporary files that were uploaded for
|
|
182
|
+
processing or export operations.
|
|
183
|
+
|
|
184
|
+
## Arguments
|
|
185
|
+
- `filename`: The name of the file in the temp bucket
|
|
186
|
+
- `decode`: Whether to decode the contents to UTF-8 (default: True). If False, returns raw bytes
|
|
187
|
+
- `stream`: Whether to return the contents in chunks
|
|
188
|
+
- `stream_lines`: Whether to chunk the contents per line
|
|
189
|
+
- `stream_chunk_size`: How many bytes each chunk should be
|
|
190
|
+
- `byte_range`: Optional (start, end) tuple for Range requests. end can be None for open-ended ranges.
|
|
191
|
+
Example: (0, 1023) fetches the first 1024 bytes; (1024, None) fetches from byte 1024 onwards.
|
|
192
|
+
- `headers_only`: If True, send a HEAD request and return the response headers as a dict
|
|
193
|
+
instead of downloading the body. Useful for inspecting content-type, content-length,
|
|
194
|
+
etag, or last-modified without fetching the full blob.
|
|
195
|
+
- `client`: The Client with which to connect to Octostar. If None, the default one is used
|
|
196
|
+
|
|
197
|
+
## Returns
|
|
198
|
+
- When `headers_only=True`: a ``dict[str, str]`` of response headers.
|
|
199
|
+
- Otherwise: a string (if decode=True) or bytes (if decode=False) of the file contents,
|
|
200
|
+
or a generator when streaming.
|
|
201
|
+
|
|
202
|
+
## Raises
|
|
203
|
+
- `ApiConnectionError`: If requesting the presigned download URL or HEAD request failed
|
|
204
|
+
- `ConnectionError`: If the download from S3 failed
|
|
205
|
+
"""
|
|
206
|
+
if not client:
|
|
207
|
+
client = get_default_client()
|
|
208
|
+
|
|
209
|
+
api_url = _blob_api_url(client, filename)
|
|
210
|
+
auth_headers = dict(client.get_headers())
|
|
211
|
+
|
|
212
|
+
if headers_only:
|
|
213
|
+
return _head_sync(api_url, auth_headers, client)
|
|
214
|
+
|
|
215
|
+
download_url = _get_download_url_sync(api_url, auth_headers, filename, client)
|
|
216
|
+
|
|
217
|
+
s3_headers = {}
|
|
218
|
+
if byte_range is not None:
|
|
219
|
+
start, end = byte_range
|
|
220
|
+
if end is not None:
|
|
221
|
+
s3_headers["Range"] = f"bytes={start}-{end}"
|
|
222
|
+
else:
|
|
223
|
+
s3_headers["Range"] = f"bytes={start}-"
|
|
224
|
+
|
|
225
|
+
if not stream:
|
|
226
|
+
response = None
|
|
227
|
+
try:
|
|
228
|
+
for attempt in network_retry_strategy():
|
|
229
|
+
with attempt:
|
|
230
|
+
with httpx.Client(timeout=DEFAULT_TIMEOUT) as http_client:
|
|
231
|
+
response = http_client.get(download_url, headers=s3_headers)
|
|
232
|
+
response.raise_for_status()
|
|
233
|
+
except Exception:
|
|
234
|
+
raise ConnectionError(
|
|
235
|
+
f"read_temporary_blob: download failed for {filename}"
|
|
236
|
+
+ (f" — {response.status_code}: {response.text}" if response else "")
|
|
237
|
+
)
|
|
238
|
+
content = response.content
|
|
239
|
+
if decode:
|
|
240
|
+
content = content.decode()
|
|
241
|
+
return content
|
|
242
|
+
|
|
243
|
+
def _read_chunk(url, start, increment, http_client):
|
|
244
|
+
headers = {"Range": f"bytes={start}-{start + increment - 1}"}
|
|
245
|
+
response = http_client.get(url, headers=headers)
|
|
246
|
+
if response.status_code == 206:
|
|
247
|
+
return response.content, start + increment, False
|
|
248
|
+
elif response.status_code == 416:
|
|
249
|
+
return None, start, False
|
|
250
|
+
elif response.status_code == 403:
|
|
251
|
+
return None, start, True
|
|
252
|
+
else:
|
|
253
|
+
raise ConnectionError(
|
|
254
|
+
f"read_temporary_blob: download failed for {filename} — {response.status_code}: {response.text}"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
def _chunk_generator():
|
|
258
|
+
start = byte_range[0] if byte_range is not None else 0
|
|
259
|
+
end = byte_range[1] if byte_range is not None else None
|
|
260
|
+
url = download_url
|
|
261
|
+
http_client = httpx.Client(timeout=DEFAULT_TIMEOUT)
|
|
262
|
+
try:
|
|
263
|
+
while True:
|
|
264
|
+
if end is not None:
|
|
265
|
+
remaining = end - start + 1
|
|
266
|
+
if remaining <= 0:
|
|
267
|
+
break
|
|
268
|
+
increment = min(stream_chunk_size, remaining)
|
|
269
|
+
else:
|
|
270
|
+
increment = stream_chunk_size
|
|
271
|
+
chunk, start, url_expired = _read_chunk(
|
|
272
|
+
url, start, increment, http_client
|
|
273
|
+
)
|
|
274
|
+
if url_expired:
|
|
275
|
+
_logger.info(
|
|
276
|
+
f"Presigned URL expired at byte {start}, refreshing..."
|
|
277
|
+
)
|
|
278
|
+
url = _get_download_url_sync(
|
|
279
|
+
api_url, auth_headers, filename, client
|
|
280
|
+
)
|
|
281
|
+
continue
|
|
282
|
+
if chunk is None:
|
|
283
|
+
break
|
|
284
|
+
yield chunk
|
|
285
|
+
finally:
|
|
286
|
+
http_client.close()
|
|
287
|
+
|
|
288
|
+
if stream_lines:
|
|
289
|
+
result = _iter_lines(_chunk_generator())
|
|
290
|
+
else:
|
|
291
|
+
result = _chunk_generator()
|
|
292
|
+
|
|
293
|
+
if decode:
|
|
294
|
+
result = _decode_iter(result)
|
|
295
|
+
return result
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
async def asyncio(
|
|
299
|
+
filename: str,
|
|
300
|
+
decode: bool = True,
|
|
301
|
+
stream: bool = False,
|
|
302
|
+
stream_lines: bool = False,
|
|
303
|
+
stream_chunk_size: int = 65_536,
|
|
304
|
+
byte_range: Optional[Tuple[int, Optional[int]]] = None,
|
|
305
|
+
headers_only: bool = False,
|
|
306
|
+
client: Client = None,
|
|
307
|
+
):
|
|
308
|
+
"""
|
|
309
|
+
# Read a temporary blob from the user's temp bucket (async)
|
|
310
|
+
|
|
311
|
+
Downloads a file from the authenticated user's temporary S3 bucket.
|
|
312
|
+
This is useful for retrieving temporary files that were uploaded for
|
|
313
|
+
processing or export operations.
|
|
314
|
+
|
|
315
|
+
## Arguments
|
|
316
|
+
- `filename`: The name of the file in the temp bucket
|
|
317
|
+
- `decode`: Whether to decode the contents to UTF-8 (default: True). If False, returns raw bytes
|
|
318
|
+
- `stream`: Whether to return the contents in chunks
|
|
319
|
+
- `stream_lines`: Whether to chunk the contents per line
|
|
320
|
+
- `stream_chunk_size`: How many bytes each chunk should be
|
|
321
|
+
- `byte_range`: Optional (start, end) tuple for Range requests. end can be None for open-ended ranges.
|
|
322
|
+
Example: (0, 1023) fetches the first 1024 bytes; (1024, None) fetches from byte 1024 onwards.
|
|
323
|
+
- `headers_only`: If True, send a HEAD request and return the response headers as a dict
|
|
324
|
+
instead of downloading the body.
|
|
325
|
+
- `client`: The Client with which to connect to Octostar. If None, the default one is used
|
|
326
|
+
|
|
327
|
+
## Returns
|
|
328
|
+
- When `headers_only=True`: a ``dict[str, str]`` of response headers.
|
|
329
|
+
- Otherwise: a string (if decode=True) or bytes (if decode=False) of the file contents,
|
|
330
|
+
or an async generator when streaming.
|
|
331
|
+
|
|
332
|
+
## Raises
|
|
333
|
+
- `ApiConnectionError`: If requesting the presigned download URL or HEAD request failed
|
|
334
|
+
- `ConnectionError`: If the download from S3 failed
|
|
335
|
+
"""
|
|
336
|
+
if not client:
|
|
337
|
+
client = get_default_client()
|
|
338
|
+
|
|
339
|
+
api_url = _blob_api_url(client, filename)
|
|
340
|
+
auth_headers = dict(client.get_headers())
|
|
341
|
+
|
|
342
|
+
if headers_only:
|
|
343
|
+
return await _head_async(api_url, auth_headers, client)
|
|
344
|
+
|
|
345
|
+
download_url = await _get_download_url_async(
|
|
346
|
+
api_url, auth_headers, filename, client
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
s3_headers = {}
|
|
350
|
+
if byte_range is not None:
|
|
351
|
+
start, end = byte_range
|
|
352
|
+
if end is not None:
|
|
353
|
+
s3_headers["Range"] = f"bytes={start}-{end}"
|
|
354
|
+
else:
|
|
355
|
+
s3_headers["Range"] = f"bytes={start}-"
|
|
356
|
+
|
|
357
|
+
if not stream:
|
|
358
|
+
response = None
|
|
359
|
+
try:
|
|
360
|
+
for attempt in network_retry_strategy():
|
|
361
|
+
with attempt:
|
|
362
|
+
async with httpx.AsyncClient(
|
|
363
|
+
timeout=DEFAULT_TIMEOUT
|
|
364
|
+
) as http_client:
|
|
365
|
+
response = await http_client.get(
|
|
366
|
+
download_url, headers=s3_headers
|
|
367
|
+
)
|
|
368
|
+
response.raise_for_status()
|
|
369
|
+
except Exception:
|
|
370
|
+
raise ConnectionError(
|
|
371
|
+
f"read_temporary_blob: download failed for {filename}"
|
|
372
|
+
+ (f" — {response.status_code}: {response.text}" if response else "")
|
|
373
|
+
)
|
|
374
|
+
content = response.content
|
|
375
|
+
if decode:
|
|
376
|
+
content = content.decode()
|
|
377
|
+
return content
|
|
378
|
+
|
|
379
|
+
async def _read_chunk_async(url, start, increment, http_client):
|
|
380
|
+
headers = {"Range": f"bytes={start}-{start + increment - 1}"}
|
|
381
|
+
response = await http_client.get(url, headers=headers)
|
|
382
|
+
if response.status_code == 206:
|
|
383
|
+
return response.content, start + increment, False
|
|
384
|
+
elif response.status_code == 416:
|
|
385
|
+
return None, start, False
|
|
386
|
+
elif response.status_code == 403:
|
|
387
|
+
return None, start, True
|
|
388
|
+
else:
|
|
389
|
+
raise ConnectionError(
|
|
390
|
+
f"read_temporary_blob: download failed for {filename} — {response.status_code}: {response.text}"
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
async def _chunk_generator():
|
|
394
|
+
start = byte_range[0] if byte_range is not None else 0
|
|
395
|
+
end = byte_range[1] if byte_range is not None else None
|
|
396
|
+
url = download_url
|
|
397
|
+
async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as http_client:
|
|
398
|
+
while True:
|
|
399
|
+
if end is not None:
|
|
400
|
+
remaining = end - start + 1
|
|
401
|
+
if remaining <= 0:
|
|
402
|
+
break
|
|
403
|
+
increment = min(stream_chunk_size, remaining)
|
|
404
|
+
else:
|
|
405
|
+
increment = stream_chunk_size
|
|
406
|
+
chunk, start, url_expired = await _read_chunk_async(
|
|
407
|
+
url, start, increment, http_client
|
|
408
|
+
)
|
|
409
|
+
if url_expired:
|
|
410
|
+
_logger.info(
|
|
411
|
+
f"Presigned URL expired at byte {start}, refreshing..."
|
|
412
|
+
)
|
|
413
|
+
url = await _get_download_url_async(
|
|
414
|
+
api_url, auth_headers, filename, client
|
|
415
|
+
)
|
|
416
|
+
continue
|
|
417
|
+
if chunk is None:
|
|
418
|
+
break
|
|
419
|
+
yield chunk
|
|
420
|
+
|
|
421
|
+
if stream_lines:
|
|
422
|
+
result = _iter_lines_async(_chunk_generator())
|
|
423
|
+
else:
|
|
424
|
+
result = _chunk_generator()
|
|
425
|
+
|
|
426
|
+
if decode:
|
|
427
|
+
result = _decode_iter_async(result)
|
|
428
|
+
return result
|
|
File without changes
|