hirundo 0.1.8__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {hirundo-0.1.8 → hirundo-0.1.16}/PKG-INFO +83 -43
  2. {hirundo-0.1.8 → hirundo-0.1.16}/README.md +68 -38
  3. hirundo-0.1.16/hirundo/__init__.py +50 -0
  4. hirundo-0.1.16/hirundo/_constraints.py +53 -0
  5. hirundo-0.1.16/hirundo/_dataframe.py +43 -0
  6. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/_env.py +2 -2
  7. hirundo-0.1.16/hirundo/_headers.py +29 -0
  8. hirundo-0.1.16/hirundo/_http.py +19 -0
  9. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/_iter_sse_retrying.py +61 -17
  10. hirundo-0.1.16/hirundo/_timeouts.py +3 -0
  11. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/cli.py +52 -0
  12. hirundo-0.1.16/hirundo/dataset_enum.py +23 -0
  13. hirundo-0.1.16/hirundo/dataset_optimization.py +839 -0
  14. hirundo-0.1.16/hirundo/dataset_optimization_results.py +42 -0
  15. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/git.py +93 -35
  16. hirundo-0.1.16/hirundo/storage.py +463 -0
  17. hirundo-0.1.16/hirundo/unzip.py +247 -0
  18. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/PKG-INFO +83 -43
  19. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/SOURCES.txt +4 -1
  20. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/requires.txt +14 -3
  21. {hirundo-0.1.8 → hirundo-0.1.16}/pyproject.toml +21 -4
  22. hirundo-0.1.8/hirundo/__init__.py +0 -35
  23. hirundo-0.1.8/hirundo/_constraints.py +0 -21
  24. hirundo-0.1.8/hirundo/_headers.py +0 -13
  25. hirundo-0.1.8/hirundo/_http.py +0 -14
  26. hirundo-0.1.8/hirundo/_timeouts.py +0 -2
  27. hirundo-0.1.8/hirundo/dataset_optimization.py +0 -576
  28. hirundo-0.1.8/hirundo/enum.py +0 -20
  29. hirundo-0.1.8/hirundo/storage.py +0 -295
  30. {hirundo-0.1.8 → hirundo-0.1.16}/LICENSE +0 -0
  31. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/__main__.py +0 -0
  32. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/logger.py +0 -0
  33. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/dependency_links.txt +0 -0
  34. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/entry_points.txt +0 -0
  35. {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/top_level.txt +0 -0
  36. {hirundo-0.1.8 → hirundo-0.1.16}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: hirundo
3
- Version: 0.1.8
3
+ Version: 0.1.16
4
4
  Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
5
5
  Author-email: Hirundo <dev@hirundo.io>
6
6
  License: MIT License
@@ -31,7 +31,6 @@ Requires-Dist: typer>=0.12.3
31
31
  Requires-Dist: httpx>=0.27.0
32
32
  Requires-Dist: stamina>=24.2.0
33
33
  Requires-Dist: httpx-sse>=0.4.0
34
- Requires-Dist: pandas>=2.2.2
35
34
  Requires-Dist: tqdm>=4.66.5
36
35
  Provides-Extra: dev
37
36
  Requires-Dist: pyyaml>=6.0.1; extra == "dev"
@@ -47,10 +46,13 @@ Requires-Dist: stamina>=24.2.0; extra == "dev"
47
46
  Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
48
47
  Requires-Dist: pytest>=8.2.0; extra == "dev"
49
48
  Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
50
- Requires-Dist: uv; extra == "dev"
49
+ Requires-Dist: uv>=0.5.8; extra == "dev"
51
50
  Requires-Dist: pre-commit>=3.7.1; extra == "dev"
52
- Requires-Dist: ruff==0.6.5; extra == "dev"
51
+ Requires-Dist: virtualenv>=20.6.6; extra == "dev"
52
+ Requires-Dist: ruff>=0.11.6; extra == "dev"
53
53
  Requires-Dist: bumpver; extra == "dev"
54
+ Requires-Dist: platformdirs>=4.3.6; extra == "dev"
55
+ Requires-Dist: safety>=3.2.13; extra == "dev"
54
56
  Provides-Extra: docs
55
57
  Requires-Dist: sphinx>=7.4.7; extra == "docs"
56
58
  Requires-Dist: sphinx-autobuild>=2024.4.16; extra == "docs"
@@ -58,6 +60,14 @@ Requires-Dist: sphinx-click>=5.0.1; extra == "docs"
58
60
  Requires-Dist: autodoc_pydantic>=2.2.0; extra == "docs"
59
61
  Requires-Dist: furo; extra == "docs"
60
62
  Requires-Dist: sphinx-multiversion; extra == "docs"
63
+ Requires-Dist: esbonio; extra == "docs"
64
+ Requires-Dist: starlette>0.40.0; extra == "docs"
65
+ Requires-Dist: markupsafe>=3.0.2; extra == "docs"
66
+ Provides-Extra: pandas
67
+ Requires-Dist: pandas>=2.2.2; extra == "pandas"
68
+ Provides-Extra: polars
69
+ Requires-Dist: polars>=1.0.0; extra == "polars"
70
+ Dynamic: license-file
61
71
 
62
72
  # Hirundo
63
73
 
@@ -66,7 +76,7 @@ This package exposes access to Hirundo APIs for dataset optimization for Machine
66
76
  Dataset optimization is currently available for datasets labelled for classification and object detection.
67
77
 
68
78
 
69
- Support dataset storage integrations include:
79
+ Support dataset storage configs include:
70
80
  - Google Cloud (GCP) Storage
71
81
  - Amazon Web Services (AWS) S3
72
82
  - Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
@@ -107,27 +117,33 @@ You can install the codebase with a simple `pip install hirundo` to install the
107
117
  ## Usage
108
118
 
109
119
  Classification example:
110
- ```
111
- from hirundo.dataset_optimization import OptimizationDataset
112
- from hirundo.enum import LabellingType
113
- from hirundo.storage import StorageIntegration, StorageLink, StorageTypes
120
+ ```python
121
+ from hirundo import (
122
+ HirundoCSV,
123
+ LabelingType,
124
+ OptimizationDataset,
125
+ StorageGCP,
126
+ StorageConfig,
127
+ StorageTypes,
128
+ )
114
129
 
130
+ gcp_bucket = StorageGCP(
131
+ bucket_name="cifar100bucket",
132
+ project="Hirundo-global",
133
+ credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
134
+ )
115
135
  test_dataset = OptimizationDataset(
116
136
  name="TEST-GCP cifar 100 classification dataset",
117
- labelling_type=LabellingType.SingleLabelClassification,
118
- dataset_storage=StorageLink(
119
- storage_integration=StorageIntegration(
120
- name="cifar100bucket",
121
- type=StorageTypes.GCP,
122
- gcp=StorageGCP(
123
- bucket_name="cifar100bucket",
124
- project="Hirundo-global",
125
- credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
126
- ),
127
- ),
128
- path="/pytorch-cifar/data",
137
+ labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
138
+ storage_config=StorageConfig(
139
+ name="cifar100bucket",
140
+ type=StorageTypes.GCP,
141
+ gcp=gcp_bucket,
142
+ ),
143
+ data_root_url=gcp_bucket.get_url(path="/pytorch-cifar/data"),
144
+ labeling_info=HirundoCSV(
145
+ csv_url=gcp_bucket.get_url(path="/pytorch-cifar/data/cifar100.csv"),
129
146
  ),
130
- dataset_metadata_path="cifar100.csv",
131
147
  classes=cifar100_classes,
132
148
  )
133
149
 
@@ -139,29 +155,53 @@ print(results)
139
155
 
140
156
  Object detection example:
141
157
 
142
- ```
143
- from hirundo.dataset_optimization import OptimizationDataset
144
- from hirundo.enum import LabellingType
145
- from hirundo.storage import StorageIntegration, StorageLink, StorageTypes
158
+ ```python
159
+ from hirundo import (
160
+ GitRepo,
161
+ HirundoCSV,
162
+ LabelingType,
163
+ OptimizationDataset,
164
+ StorageGit,
165
+ StorageConfig,
166
+ StorageTypes,
167
+ )
146
168
 
169
+ git_storage = StorageGit(
170
+ repo=GitRepo(
171
+ name="BDD-100k-validation-dataset",
172
+ repository_url="https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only",
173
+ ),
174
+ branch="main",
175
+ )
147
176
  test_dataset = OptimizationDataset(
148
- name=f"TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset{unique_id}",
149
- labelling_type=LabellingType.ObjectDetection,
150
- dataset_storage=StorageLink(
151
- storage_integration=StorageIntegration(
152
- name=f"BDD-100k-validation-dataset{unique_id}",
153
- type=StorageTypes.GIT,
154
- git=StorageGit(
155
- repo=GitRepo(
156
- name=f"BDD-100k-validation-dataset{unique_id}",
157
- repository_url="https://git@hf.co/datasets/hirundo-io/bdd100k-validation-only",
158
- ),
159
- branch="main",
160
- ),
177
+ name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
178
+ labeling_type=LabelingType.OBJECT_DETECTION,
179
+ storage_config=StorageConfig(
180
+ name="BDD-100k-validation-dataset",
181
+ type=StorageTypes.GIT,
182
+ git=git_storage,
183
+ ),
184
+ data_root_url=git_storage.get_url(path="/BDD100K Val from Hirundo.zip/bdd100k"),
185
+ labeling_info=HirundoCSV(
186
+ csv_url=git_storage.get_url(
187
+ path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
161
188
  ),
162
- path="/BDD100K Val from Hirundo.zip/bdd100k",
163
189
  ),
164
- dataset_metadata_path="bdd100k.csv",
190
+ classes=[
191
+ "traffic light",
192
+ "traffic sign",
193
+ "car",
194
+ "pedestrian",
195
+ "bus",
196
+ "truck",
197
+ "rider",
198
+ "bicycle",
199
+ "motorcycle",
200
+ "train",
201
+ "other vehicle",
202
+ "other person",
203
+ "trailer",
204
+ ],
165
205
  )
166
206
 
167
207
  test_dataset.run_optimization()
@@ -173,4 +213,4 @@ Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyP
173
213
 
174
214
  ## Further documentation
175
215
 
176
- To learn about mroe how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
216
+ To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
@@ -5,7 +5,7 @@ This package exposes access to Hirundo APIs for dataset optimization for Machine
5
5
  Dataset optimization is currently available for datasets labelled for classification and object detection.
6
6
 
7
7
 
8
- Support dataset storage integrations include:
8
+ Support dataset storage configs include:
9
9
  - Google Cloud (GCP) Storage
10
10
  - Amazon Web Services (AWS) S3
11
11
  - Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
@@ -46,27 +46,33 @@ You can install the codebase with a simple `pip install hirundo` to install the
46
46
  ## Usage
47
47
 
48
48
  Classification example:
49
- ```
50
- from hirundo.dataset_optimization import OptimizationDataset
51
- from hirundo.enum import LabellingType
52
- from hirundo.storage import StorageIntegration, StorageLink, StorageTypes
49
+ ```python
50
+ from hirundo import (
51
+ HirundoCSV,
52
+ LabelingType,
53
+ OptimizationDataset,
54
+ StorageGCP,
55
+ StorageConfig,
56
+ StorageTypes,
57
+ )
53
58
 
59
+ gcp_bucket = StorageGCP(
60
+ bucket_name="cifar100bucket",
61
+ project="Hirundo-global",
62
+ credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
63
+ )
54
64
  test_dataset = OptimizationDataset(
55
65
  name="TEST-GCP cifar 100 classification dataset",
56
- labelling_type=LabellingType.SingleLabelClassification,
57
- dataset_storage=StorageLink(
58
- storage_integration=StorageIntegration(
59
- name="cifar100bucket",
60
- type=StorageTypes.GCP,
61
- gcp=StorageGCP(
62
- bucket_name="cifar100bucket",
63
- project="Hirundo-global",
64
- credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
65
- ),
66
- ),
67
- path="/pytorch-cifar/data",
66
+ labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
67
+ storage_config=StorageConfig(
68
+ name="cifar100bucket",
69
+ type=StorageTypes.GCP,
70
+ gcp=gcp_bucket,
71
+ ),
72
+ data_root_url=gcp_bucket.get_url(path="/pytorch-cifar/data"),
73
+ labeling_info=HirundoCSV(
74
+ csv_url=gcp_bucket.get_url(path="/pytorch-cifar/data/cifar100.csv"),
68
75
  ),
69
- dataset_metadata_path="cifar100.csv",
70
76
  classes=cifar100_classes,
71
77
  )
72
78
 
@@ -78,29 +84,53 @@ print(results)
78
84
 
79
85
  Object detection example:
80
86
 
81
- ```
82
- from hirundo.dataset_optimization import OptimizationDataset
83
- from hirundo.enum import LabellingType
84
- from hirundo.storage import StorageIntegration, StorageLink, StorageTypes
87
+ ```python
88
+ from hirundo import (
89
+ GitRepo,
90
+ HirundoCSV,
91
+ LabelingType,
92
+ OptimizationDataset,
93
+ StorageGit,
94
+ StorageConfig,
95
+ StorageTypes,
96
+ )
85
97
 
98
+ git_storage = StorageGit(
99
+ repo=GitRepo(
100
+ name="BDD-100k-validation-dataset",
101
+ repository_url="https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only",
102
+ ),
103
+ branch="main",
104
+ )
86
105
  test_dataset = OptimizationDataset(
87
- name=f"TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset{unique_id}",
88
- labelling_type=LabellingType.ObjectDetection,
89
- dataset_storage=StorageLink(
90
- storage_integration=StorageIntegration(
91
- name=f"BDD-100k-validation-dataset{unique_id}",
92
- type=StorageTypes.GIT,
93
- git=StorageGit(
94
- repo=GitRepo(
95
- name=f"BDD-100k-validation-dataset{unique_id}",
96
- repository_url="https://git@hf.co/datasets/hirundo-io/bdd100k-validation-only",
97
- ),
98
- branch="main",
99
- ),
106
+ name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
107
+ labeling_type=LabelingType.OBJECT_DETECTION,
108
+ storage_config=StorageConfig(
109
+ name="BDD-100k-validation-dataset",
110
+ type=StorageTypes.GIT,
111
+ git=git_storage,
112
+ ),
113
+ data_root_url=git_storage.get_url(path="/BDD100K Val from Hirundo.zip/bdd100k"),
114
+ labeling_info=HirundoCSV(
115
+ csv_url=git_storage.get_url(
116
+ path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
100
117
  ),
101
- path="/BDD100K Val from Hirundo.zip/bdd100k",
102
118
  ),
103
- dataset_metadata_path="bdd100k.csv",
119
+ classes=[
120
+ "traffic light",
121
+ "traffic sign",
122
+ "car",
123
+ "pedestrian",
124
+ "bus",
125
+ "truck",
126
+ "rider",
127
+ "bicycle",
128
+ "motorcycle",
129
+ "train",
130
+ "other vehicle",
131
+ "other person",
132
+ "trailer",
133
+ ],
104
134
  )
105
135
 
106
136
  test_dataset.run_optimization()
@@ -112,4 +142,4 @@ Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyP
112
142
 
113
143
  ## Further documentation
114
144
 
115
- To learn about mroe how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
145
+ To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
@@ -0,0 +1,50 @@
1
+ from .dataset_enum import (
2
+ DatasetMetadataType,
3
+ LabelingType,
4
+ )
5
+ from .dataset_optimization import (
6
+ COCO,
7
+ YOLO,
8
+ HirundoCSV,
9
+ HirundoError,
10
+ OptimizationDataset,
11
+ RunArgs,
12
+ VisionRunArgs,
13
+ )
14
+ from .dataset_optimization_results import DatasetOptimizationResults
15
+ from .git import GitPlainAuth, GitRepo, GitSSHAuth
16
+ from .storage import (
17
+ StorageConfig,
18
+ StorageGCP,
19
+ # StorageAzure, TODO: Azure storage is coming soon
20
+ StorageGit,
21
+ StorageS3,
22
+ StorageTypes,
23
+ )
24
+ from .unzip import load_df, load_from_zip
25
+
26
+ __all__ = [
27
+ "COCO",
28
+ "YOLO",
29
+ "HirundoCSV",
30
+ "HirundoError",
31
+ "OptimizationDataset",
32
+ "RunArgs",
33
+ "VisionRunArgs",
34
+ "LabelingType",
35
+ "DatasetMetadataType",
36
+ "GitPlainAuth",
37
+ "GitRepo",
38
+ "GitSSHAuth",
39
+ "StorageTypes",
40
+ "StorageS3",
41
+ "StorageGCP",
42
+ # "StorageAzure", TODO: Azure storage is coming soon
43
+ "StorageGit",
44
+ "StorageConfig",
45
+ "DatasetOptimizationResults",
46
+ "load_df",
47
+ "load_from_zip",
48
+ ]
49
+
50
+ __version__ = "0.1.16"
@@ -0,0 +1,53 @@
1
+ from typing import Annotated
2
+
3
+ from pydantic import StringConstraints, UrlConstraints
4
+ from pydantic_core import Url
5
+
6
+ S3BucketUrl = Annotated[
7
+ str,
8
+ StringConstraints(
9
+ min_length=8,
10
+ max_length=1023,
11
+ pattern=r"s3?://[a-z0-9.-]{3,64}[/]?", # Only allow real S3 bucket URLs
12
+ ),
13
+ ]
14
+
15
+ StorageConfigName = Annotated[
16
+ str,
17
+ StringConstraints(
18
+ min_length=1,
19
+ max_length=255,
20
+ pattern=r"^[a-zA-Z0-9-_]+$",
21
+ ),
22
+ ]
23
+
24
+ S3_MIN_LENGTH = 8
25
+ S3_MAX_LENGTH = 1023
26
+ S3_PATTERN = r"s3://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
27
+ GCP_MIN_LENGTH = 8
28
+ GCP_MAX_LENGTH = 1023
29
+ GCP_PATTERN = r"gs://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
30
+
31
+ RepoUrl = Annotated[
32
+ Url,
33
+ UrlConstraints(
34
+ allowed_schemes=[
35
+ "ssh",
36
+ "https",
37
+ "http",
38
+ ]
39
+ ),
40
+ ]
41
+ HirundoUrl = Annotated[
42
+ Url,
43
+ UrlConstraints(
44
+ allowed_schemes=[
45
+ "file",
46
+ "https",
47
+ "http",
48
+ "s3",
49
+ "gs",
50
+ "ssh",
51
+ ]
52
+ ),
53
+ ]
@@ -0,0 +1,43 @@
1
+ has_pandas = False
2
+ has_polars = False
3
+
4
+ pd = None
5
+ pl = None
6
+ int32 = type[None]
7
+ float32 = type[None]
8
+ string = type[None]
9
+ # ⬆️ These are just placeholders for the int32, float32 and string types
10
+ # for when neither pandas nor polars are available
11
+
12
+ try:
13
+ import numpy as np
14
+ import pandas as pd
15
+
16
+ has_pandas = True
17
+ int32 = np.int32
18
+ float32 = np.float32
19
+ string = str
20
+ except ImportError:
21
+ pass
22
+
23
+ try:
24
+ import polars as pl
25
+ import polars.datatypes as pl_datatypes
26
+
27
+ has_polars = True
28
+ int32 = pl_datatypes.Int32
29
+ float32 = pl_datatypes.Float32
30
+ string = pl_datatypes.String
31
+ except ImportError:
32
+ pass
33
+
34
+
35
+ __all__ = [
36
+ "has_polars",
37
+ "has_pandas",
38
+ "pd",
39
+ "pl",
40
+ "int32",
41
+ "float32",
42
+ "string",
43
+ ]
@@ -2,11 +2,11 @@ import enum
2
2
  import os
3
3
  from pathlib import Path
4
4
 
5
- from dotenv import load_dotenv
5
+ from dotenv import find_dotenv, load_dotenv
6
6
 
7
7
 
8
8
  class EnvLocation(enum.Enum):
9
- DOTENV = Path.cwd() / ".env"
9
+ DOTENV = find_dotenv(".env")
10
10
  HOME = Path.home() / ".hirundo.conf"
11
11
 
12
12
 
@@ -0,0 +1,29 @@
1
+ from hirundo._env import API_KEY, check_api_key
2
+
3
+ HIRUNDO_API_VERSION = "0.2"
4
+
5
+ _json_headers = {
6
+ "Content-Type": "application/json",
7
+ "Accept": "application/json",
8
+ }
9
+
10
+
11
+ def _get_auth_headers():
12
+ check_api_key()
13
+ return {
14
+ "Authorization": f"Bearer {API_KEY}",
15
+ }
16
+
17
+
18
+ def _get_api_version_header():
19
+ return {
20
+ "HIRUNDO-API-VERSION": HIRUNDO_API_VERSION,
21
+ }
22
+
23
+
24
+ def get_headers():
25
+ return {
26
+ **_json_headers,
27
+ **_get_auth_headers(),
28
+ **_get_api_version_header(),
29
+ }
@@ -0,0 +1,19 @@
1
+ from requests import Response
2
+
3
+ import hirundo.logger
4
+
5
+ logger = hirundo.logger.get_logger(__name__)
6
+
7
+ MINIMUM_CLIENT_SERVER_ERROR_CODE = 400
8
+
9
+
10
+ def raise_for_status_with_reason(response: Response):
11
+ try:
12
+ if response.status_code >= MINIMUM_CLIENT_SERVER_ERROR_CODE:
13
+ response.reason = response.json().get("reason", None)
14
+ if response.reason is None:
15
+ response.reason = response.json().get("detail", None)
16
+ except Exception as e:
17
+ logger.debug("Could not parse response as JSON: %s", e)
18
+
19
+ response.raise_for_status()
@@ -1,12 +1,20 @@
1
1
  import asyncio
2
2
  import time
3
3
  import typing
4
+ import uuid
4
5
  from collections.abc import AsyncGenerator, Generator
5
6
 
6
7
  import httpx
7
- from httpx_sse import ServerSentEvent, aconnect_sse, connect_sse
8
+ import requests
9
+ import urllib3
10
+ from httpx_sse import ServerSentEvent, SSEError, aconnect_sse, connect_sse
8
11
  from stamina import retry
9
12
 
13
+ from hirundo._timeouts import READ_TIMEOUT
14
+ from hirundo.logger import get_logger
15
+
16
+ logger = get_logger(__name__)
17
+
10
18
 
11
19
  # Credit: https://github.com/florimondmanca/httpx-sse/blob/master/README.md#handling-reconnections
12
20
  def iter_sse_retrying(
@@ -28,7 +36,13 @@ def iter_sse_retrying(
28
36
  # This may happen when the server is overloaded and closes the connection or
29
37
  # when Kubernetes restarts / replaces a pod.
30
38
  # Likewise, this will likely be temporary, hence the retries.
31
- @retry(on=(httpx.ReadError, httpx.RemoteProtocolError))
39
+ @retry(
40
+ on=(
41
+ httpx.ReadError,
42
+ httpx.RemoteProtocolError,
43
+ urllib3.exceptions.ReadTimeoutError,
44
+ )
45
+ )
32
46
  def _iter_sse():
33
47
  nonlocal last_event_id, reconnection_delay
34
48
 
@@ -44,13 +58,27 @@ def iter_sse_retrying(
44
58
  connect_headers["Last-Event-ID"] = last_event_id
45
59
 
46
60
  with connect_sse(client, method, url, headers=connect_headers) as event_source:
47
- for sse in event_source.iter_sse():
48
- last_event_id = sse.id
49
-
50
- if sse.retry is not None:
51
- reconnection_delay = sse.retry / 1000
52
-
53
- yield sse
61
+ try:
62
+ for sse in event_source.iter_sse():
63
+ last_event_id = sse.id
64
+
65
+ if sse.retry is not None:
66
+ reconnection_delay = sse.retry / 1000
67
+
68
+ yield sse
69
+ except SSEError:
70
+ logger.error("SSE error occurred. Trying regular request")
71
+ response = requests.get(
72
+ url,
73
+ headers=connect_headers,
74
+ timeout=READ_TIMEOUT,
75
+ )
76
+ yield ServerSentEvent(
77
+ event="",
78
+ data=response.text,
79
+ id=uuid.uuid4().hex,
80
+ retry=None,
81
+ )
54
82
 
55
83
  return _iter_sse()
56
84
 
@@ -72,7 +100,13 @@ async def aiter_sse_retrying(
72
100
  # This may happen when the server is overloaded and closes the connection or
73
101
  # when Kubernetes restarts / replaces a pod.
74
102
  # Likewise, this will likely be temporary, hence the retries.
75
- @retry(on=(httpx.ReadError, httpx.RemoteProtocolError))
103
+ @retry(
104
+ on=(
105
+ httpx.ReadError,
106
+ httpx.RemoteProtocolError,
107
+ urllib3.exceptions.ReadTimeoutError,
108
+ )
109
+ )
76
110
  async def _iter_sse() -> AsyncGenerator[ServerSentEvent, None]:
77
111
  nonlocal last_event_id, reconnection_delay
78
112
 
@@ -86,12 +120,22 @@ async def aiter_sse_retrying(
86
120
  async with aconnect_sse(
87
121
  client, method, url, headers=connect_headers
88
122
  ) as event_source:
89
- async for sse in event_source.aiter_sse():
90
- last_event_id = sse.id
91
-
92
- if sse.retry is not None:
93
- reconnection_delay = sse.retry / 1000
94
-
95
- yield sse
123
+ try:
124
+ async for sse in event_source.aiter_sse():
125
+ last_event_id = sse.id
126
+
127
+ if sse.retry is not None:
128
+ reconnection_delay = sse.retry / 1000
129
+
130
+ yield sse
131
+ except SSEError:
132
+ logger.error("SSE error occurred. Trying regular request")
133
+ response = await client.get(url, headers=connect_headers)
134
+ yield ServerSentEvent(
135
+ event="",
136
+ data=response.text,
137
+ id=uuid.uuid4().hex,
138
+ retry=None,
139
+ )
96
140
 
97
141
  return _iter_sse()
@@ -0,0 +1,3 @@
1
+ READ_TIMEOUT = 30.0
2
+ MODIFY_TIMEOUT = 60.0
3
+ DOWNLOAD_READ_TIMEOUT = 600.0 # 10 minutes