hirundo 0.1.8__tar.gz → 0.1.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hirundo-0.1.8 → hirundo-0.1.16}/PKG-INFO +83 -43
- {hirundo-0.1.8 → hirundo-0.1.16}/README.md +68 -38
- hirundo-0.1.16/hirundo/__init__.py +50 -0
- hirundo-0.1.16/hirundo/_constraints.py +53 -0
- hirundo-0.1.16/hirundo/_dataframe.py +43 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/_env.py +2 -2
- hirundo-0.1.16/hirundo/_headers.py +29 -0
- hirundo-0.1.16/hirundo/_http.py +19 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/_iter_sse_retrying.py +61 -17
- hirundo-0.1.16/hirundo/_timeouts.py +3 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/cli.py +52 -0
- hirundo-0.1.16/hirundo/dataset_enum.py +23 -0
- hirundo-0.1.16/hirundo/dataset_optimization.py +839 -0
- hirundo-0.1.16/hirundo/dataset_optimization_results.py +42 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/git.py +93 -35
- hirundo-0.1.16/hirundo/storage.py +463 -0
- hirundo-0.1.16/hirundo/unzip.py +247 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/PKG-INFO +83 -43
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/SOURCES.txt +4 -1
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/requires.txt +14 -3
- {hirundo-0.1.8 → hirundo-0.1.16}/pyproject.toml +21 -4
- hirundo-0.1.8/hirundo/__init__.py +0 -35
- hirundo-0.1.8/hirundo/_constraints.py +0 -21
- hirundo-0.1.8/hirundo/_headers.py +0 -13
- hirundo-0.1.8/hirundo/_http.py +0 -14
- hirundo-0.1.8/hirundo/_timeouts.py +0 -2
- hirundo-0.1.8/hirundo/dataset_optimization.py +0 -576
- hirundo-0.1.8/hirundo/enum.py +0 -20
- hirundo-0.1.8/hirundo/storage.py +0 -295
- {hirundo-0.1.8 → hirundo-0.1.16}/LICENSE +0 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/__main__.py +0 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo/logger.py +0 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/dependency_links.txt +0 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/entry_points.txt +0 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/hirundo.egg-info/top_level.txt +0 -0
- {hirundo-0.1.8 → hirundo-0.1.16}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: hirundo
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.16
|
|
4
4
|
Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
|
|
5
5
|
Author-email: Hirundo <dev@hirundo.io>
|
|
6
6
|
License: MIT License
|
|
@@ -31,7 +31,6 @@ Requires-Dist: typer>=0.12.3
|
|
|
31
31
|
Requires-Dist: httpx>=0.27.0
|
|
32
32
|
Requires-Dist: stamina>=24.2.0
|
|
33
33
|
Requires-Dist: httpx-sse>=0.4.0
|
|
34
|
-
Requires-Dist: pandas>=2.2.2
|
|
35
34
|
Requires-Dist: tqdm>=4.66.5
|
|
36
35
|
Provides-Extra: dev
|
|
37
36
|
Requires-Dist: pyyaml>=6.0.1; extra == "dev"
|
|
@@ -47,10 +46,13 @@ Requires-Dist: stamina>=24.2.0; extra == "dev"
|
|
|
47
46
|
Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
|
|
48
47
|
Requires-Dist: pytest>=8.2.0; extra == "dev"
|
|
49
48
|
Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
|
|
50
|
-
Requires-Dist: uv; extra == "dev"
|
|
49
|
+
Requires-Dist: uv>=0.5.8; extra == "dev"
|
|
51
50
|
Requires-Dist: pre-commit>=3.7.1; extra == "dev"
|
|
52
|
-
Requires-Dist:
|
|
51
|
+
Requires-Dist: virtualenv>=20.6.6; extra == "dev"
|
|
52
|
+
Requires-Dist: ruff>=0.11.6; extra == "dev"
|
|
53
53
|
Requires-Dist: bumpver; extra == "dev"
|
|
54
|
+
Requires-Dist: platformdirs>=4.3.6; extra == "dev"
|
|
55
|
+
Requires-Dist: safety>=3.2.13; extra == "dev"
|
|
54
56
|
Provides-Extra: docs
|
|
55
57
|
Requires-Dist: sphinx>=7.4.7; extra == "docs"
|
|
56
58
|
Requires-Dist: sphinx-autobuild>=2024.4.16; extra == "docs"
|
|
@@ -58,6 +60,14 @@ Requires-Dist: sphinx-click>=5.0.1; extra == "docs"
|
|
|
58
60
|
Requires-Dist: autodoc_pydantic>=2.2.0; extra == "docs"
|
|
59
61
|
Requires-Dist: furo; extra == "docs"
|
|
60
62
|
Requires-Dist: sphinx-multiversion; extra == "docs"
|
|
63
|
+
Requires-Dist: esbonio; extra == "docs"
|
|
64
|
+
Requires-Dist: starlette>0.40.0; extra == "docs"
|
|
65
|
+
Requires-Dist: markupsafe>=3.0.2; extra == "docs"
|
|
66
|
+
Provides-Extra: pandas
|
|
67
|
+
Requires-Dist: pandas>=2.2.2; extra == "pandas"
|
|
68
|
+
Provides-Extra: polars
|
|
69
|
+
Requires-Dist: polars>=1.0.0; extra == "polars"
|
|
70
|
+
Dynamic: license-file
|
|
61
71
|
|
|
62
72
|
# Hirundo
|
|
63
73
|
|
|
@@ -66,7 +76,7 @@ This package exposes access to Hirundo APIs for dataset optimization for Machine
|
|
|
66
76
|
Dataset optimization is currently available for datasets labelled for classification and object detection.
|
|
67
77
|
|
|
68
78
|
|
|
69
|
-
Support dataset storage
|
|
79
|
+
Support dataset storage configs include:
|
|
70
80
|
- Google Cloud (GCP) Storage
|
|
71
81
|
- Amazon Web Services (AWS) S3
|
|
72
82
|
- Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
|
|
@@ -107,27 +117,33 @@ You can install the codebase with a simple `pip install hirundo` to install the
|
|
|
107
117
|
## Usage
|
|
108
118
|
|
|
109
119
|
Classification example:
|
|
110
|
-
```
|
|
111
|
-
from hirundo
|
|
112
|
-
|
|
113
|
-
|
|
120
|
+
```python
|
|
121
|
+
from hirundo import (
|
|
122
|
+
HirundoCSV,
|
|
123
|
+
LabelingType,
|
|
124
|
+
OptimizationDataset,
|
|
125
|
+
StorageGCP,
|
|
126
|
+
StorageConfig,
|
|
127
|
+
StorageTypes,
|
|
128
|
+
)
|
|
114
129
|
|
|
130
|
+
gcp_bucket = StorageGCP(
|
|
131
|
+
bucket_name="cifar100bucket",
|
|
132
|
+
project="Hirundo-global",
|
|
133
|
+
credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
|
|
134
|
+
)
|
|
115
135
|
test_dataset = OptimizationDataset(
|
|
116
136
|
name="TEST-GCP cifar 100 classification dataset",
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
),
|
|
127
|
-
),
|
|
128
|
-
path="/pytorch-cifar/data",
|
|
137
|
+
labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
|
|
138
|
+
storage_config=StorageConfig(
|
|
139
|
+
name="cifar100bucket",
|
|
140
|
+
type=StorageTypes.GCP,
|
|
141
|
+
gcp=gcp_bucket,
|
|
142
|
+
),
|
|
143
|
+
data_root_url=gcp_bucket.get_url(path="/pytorch-cifar/data"),
|
|
144
|
+
labeling_info=HirundoCSV(
|
|
145
|
+
csv_url=gcp_bucket.get_url(path="/pytorch-cifar/data/cifar100.csv"),
|
|
129
146
|
),
|
|
130
|
-
dataset_metadata_path="cifar100.csv",
|
|
131
147
|
classes=cifar100_classes,
|
|
132
148
|
)
|
|
133
149
|
|
|
@@ -139,29 +155,53 @@ print(results)
|
|
|
139
155
|
|
|
140
156
|
Object detection example:
|
|
141
157
|
|
|
142
|
-
```
|
|
143
|
-
from hirundo
|
|
144
|
-
|
|
145
|
-
|
|
158
|
+
```python
|
|
159
|
+
from hirundo import (
|
|
160
|
+
GitRepo,
|
|
161
|
+
HirundoCSV,
|
|
162
|
+
LabelingType,
|
|
163
|
+
OptimizationDataset,
|
|
164
|
+
StorageGit,
|
|
165
|
+
StorageConfig,
|
|
166
|
+
StorageTypes,
|
|
167
|
+
)
|
|
146
168
|
|
|
169
|
+
git_storage = StorageGit(
|
|
170
|
+
repo=GitRepo(
|
|
171
|
+
name="BDD-100k-validation-dataset",
|
|
172
|
+
repository_url="https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only",
|
|
173
|
+
),
|
|
174
|
+
branch="main",
|
|
175
|
+
)
|
|
147
176
|
test_dataset = OptimizationDataset(
|
|
148
|
-
name=
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
branch="main",
|
|
160
|
-
),
|
|
177
|
+
name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
|
|
178
|
+
labeling_type=LabelingType.OBJECT_DETECTION,
|
|
179
|
+
storage_config=StorageConfig(
|
|
180
|
+
name="BDD-100k-validation-dataset",
|
|
181
|
+
type=StorageTypes.GIT,
|
|
182
|
+
git=git_storage,
|
|
183
|
+
),
|
|
184
|
+
data_root_url=git_storage.get_url(path="/BDD100K Val from Hirundo.zip/bdd100k"),
|
|
185
|
+
labeling_info=HirundoCSV(
|
|
186
|
+
csv_url=git_storage.get_url(
|
|
187
|
+
path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
|
|
161
188
|
),
|
|
162
|
-
path="/BDD100K Val from Hirundo.zip/bdd100k",
|
|
163
189
|
),
|
|
164
|
-
|
|
190
|
+
classes=[
|
|
191
|
+
"traffic light",
|
|
192
|
+
"traffic sign",
|
|
193
|
+
"car",
|
|
194
|
+
"pedestrian",
|
|
195
|
+
"bus",
|
|
196
|
+
"truck",
|
|
197
|
+
"rider",
|
|
198
|
+
"bicycle",
|
|
199
|
+
"motorcycle",
|
|
200
|
+
"train",
|
|
201
|
+
"other vehicle",
|
|
202
|
+
"other person",
|
|
203
|
+
"trailer",
|
|
204
|
+
],
|
|
165
205
|
)
|
|
166
206
|
|
|
167
207
|
test_dataset.run_optimization()
|
|
@@ -173,4 +213,4 @@ Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyP
|
|
|
173
213
|
|
|
174
214
|
## Further documentation
|
|
175
215
|
|
|
176
|
-
To learn about
|
|
216
|
+
To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
|
|
@@ -5,7 +5,7 @@ This package exposes access to Hirundo APIs for dataset optimization for Machine
|
|
|
5
5
|
Dataset optimization is currently available for datasets labelled for classification and object detection.
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
Support dataset storage
|
|
8
|
+
Support dataset storage configs include:
|
|
9
9
|
- Google Cloud (GCP) Storage
|
|
10
10
|
- Amazon Web Services (AWS) S3
|
|
11
11
|
- Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
|
|
@@ -46,27 +46,33 @@ You can install the codebase with a simple `pip install hirundo` to install the
|
|
|
46
46
|
## Usage
|
|
47
47
|
|
|
48
48
|
Classification example:
|
|
49
|
-
```
|
|
50
|
-
from hirundo
|
|
51
|
-
|
|
52
|
-
|
|
49
|
+
```python
|
|
50
|
+
from hirundo import (
|
|
51
|
+
HirundoCSV,
|
|
52
|
+
LabelingType,
|
|
53
|
+
OptimizationDataset,
|
|
54
|
+
StorageGCP,
|
|
55
|
+
StorageConfig,
|
|
56
|
+
StorageTypes,
|
|
57
|
+
)
|
|
53
58
|
|
|
59
|
+
gcp_bucket = StorageGCP(
|
|
60
|
+
bucket_name="cifar100bucket",
|
|
61
|
+
project="Hirundo-global",
|
|
62
|
+
credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
|
|
63
|
+
)
|
|
54
64
|
test_dataset = OptimizationDataset(
|
|
55
65
|
name="TEST-GCP cifar 100 classification dataset",
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
),
|
|
66
|
-
),
|
|
67
|
-
path="/pytorch-cifar/data",
|
|
66
|
+
labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
|
|
67
|
+
storage_config=StorageConfig(
|
|
68
|
+
name="cifar100bucket",
|
|
69
|
+
type=StorageTypes.GCP,
|
|
70
|
+
gcp=gcp_bucket,
|
|
71
|
+
),
|
|
72
|
+
data_root_url=gcp_bucket.get_url(path="/pytorch-cifar/data"),
|
|
73
|
+
labeling_info=HirundoCSV(
|
|
74
|
+
csv_url=gcp_bucket.get_url(path="/pytorch-cifar/data/cifar100.csv"),
|
|
68
75
|
),
|
|
69
|
-
dataset_metadata_path="cifar100.csv",
|
|
70
76
|
classes=cifar100_classes,
|
|
71
77
|
)
|
|
72
78
|
|
|
@@ -78,29 +84,53 @@ print(results)
|
|
|
78
84
|
|
|
79
85
|
Object detection example:
|
|
80
86
|
|
|
81
|
-
```
|
|
82
|
-
from hirundo
|
|
83
|
-
|
|
84
|
-
|
|
87
|
+
```python
|
|
88
|
+
from hirundo import (
|
|
89
|
+
GitRepo,
|
|
90
|
+
HirundoCSV,
|
|
91
|
+
LabelingType,
|
|
92
|
+
OptimizationDataset,
|
|
93
|
+
StorageGit,
|
|
94
|
+
StorageConfig,
|
|
95
|
+
StorageTypes,
|
|
96
|
+
)
|
|
85
97
|
|
|
98
|
+
git_storage = StorageGit(
|
|
99
|
+
repo=GitRepo(
|
|
100
|
+
name="BDD-100k-validation-dataset",
|
|
101
|
+
repository_url="https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only",
|
|
102
|
+
),
|
|
103
|
+
branch="main",
|
|
104
|
+
)
|
|
86
105
|
test_dataset = OptimizationDataset(
|
|
87
|
-
name=
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
branch="main",
|
|
99
|
-
),
|
|
106
|
+
name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
|
|
107
|
+
labeling_type=LabelingType.OBJECT_DETECTION,
|
|
108
|
+
storage_config=StorageConfig(
|
|
109
|
+
name="BDD-100k-validation-dataset",
|
|
110
|
+
type=StorageTypes.GIT,
|
|
111
|
+
git=git_storage,
|
|
112
|
+
),
|
|
113
|
+
data_root_url=git_storage.get_url(path="/BDD100K Val from Hirundo.zip/bdd100k"),
|
|
114
|
+
labeling_info=HirundoCSV(
|
|
115
|
+
csv_url=git_storage.get_url(
|
|
116
|
+
path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
|
|
100
117
|
),
|
|
101
|
-
path="/BDD100K Val from Hirundo.zip/bdd100k",
|
|
102
118
|
),
|
|
103
|
-
|
|
119
|
+
classes=[
|
|
120
|
+
"traffic light",
|
|
121
|
+
"traffic sign",
|
|
122
|
+
"car",
|
|
123
|
+
"pedestrian",
|
|
124
|
+
"bus",
|
|
125
|
+
"truck",
|
|
126
|
+
"rider",
|
|
127
|
+
"bicycle",
|
|
128
|
+
"motorcycle",
|
|
129
|
+
"train",
|
|
130
|
+
"other vehicle",
|
|
131
|
+
"other person",
|
|
132
|
+
"trailer",
|
|
133
|
+
],
|
|
104
134
|
)
|
|
105
135
|
|
|
106
136
|
test_dataset.run_optimization()
|
|
@@ -112,4 +142,4 @@ Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyP
|
|
|
112
142
|
|
|
113
143
|
## Further documentation
|
|
114
144
|
|
|
115
|
-
To learn about
|
|
145
|
+
To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from .dataset_enum import (
|
|
2
|
+
DatasetMetadataType,
|
|
3
|
+
LabelingType,
|
|
4
|
+
)
|
|
5
|
+
from .dataset_optimization import (
|
|
6
|
+
COCO,
|
|
7
|
+
YOLO,
|
|
8
|
+
HirundoCSV,
|
|
9
|
+
HirundoError,
|
|
10
|
+
OptimizationDataset,
|
|
11
|
+
RunArgs,
|
|
12
|
+
VisionRunArgs,
|
|
13
|
+
)
|
|
14
|
+
from .dataset_optimization_results import DatasetOptimizationResults
|
|
15
|
+
from .git import GitPlainAuth, GitRepo, GitSSHAuth
|
|
16
|
+
from .storage import (
|
|
17
|
+
StorageConfig,
|
|
18
|
+
StorageGCP,
|
|
19
|
+
# StorageAzure, TODO: Azure storage is coming soon
|
|
20
|
+
StorageGit,
|
|
21
|
+
StorageS3,
|
|
22
|
+
StorageTypes,
|
|
23
|
+
)
|
|
24
|
+
from .unzip import load_df, load_from_zip
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"COCO",
|
|
28
|
+
"YOLO",
|
|
29
|
+
"HirundoCSV",
|
|
30
|
+
"HirundoError",
|
|
31
|
+
"OptimizationDataset",
|
|
32
|
+
"RunArgs",
|
|
33
|
+
"VisionRunArgs",
|
|
34
|
+
"LabelingType",
|
|
35
|
+
"DatasetMetadataType",
|
|
36
|
+
"GitPlainAuth",
|
|
37
|
+
"GitRepo",
|
|
38
|
+
"GitSSHAuth",
|
|
39
|
+
"StorageTypes",
|
|
40
|
+
"StorageS3",
|
|
41
|
+
"StorageGCP",
|
|
42
|
+
# "StorageAzure", TODO: Azure storage is coming soon
|
|
43
|
+
"StorageGit",
|
|
44
|
+
"StorageConfig",
|
|
45
|
+
"DatasetOptimizationResults",
|
|
46
|
+
"load_df",
|
|
47
|
+
"load_from_zip",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
__version__ = "0.1.16"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from pydantic import StringConstraints, UrlConstraints
|
|
4
|
+
from pydantic_core import Url
|
|
5
|
+
|
|
6
|
+
S3BucketUrl = Annotated[
|
|
7
|
+
str,
|
|
8
|
+
StringConstraints(
|
|
9
|
+
min_length=8,
|
|
10
|
+
max_length=1023,
|
|
11
|
+
pattern=r"s3?://[a-z0-9.-]{3,64}[/]?", # Only allow real S3 bucket URLs
|
|
12
|
+
),
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
StorageConfigName = Annotated[
|
|
16
|
+
str,
|
|
17
|
+
StringConstraints(
|
|
18
|
+
min_length=1,
|
|
19
|
+
max_length=255,
|
|
20
|
+
pattern=r"^[a-zA-Z0-9-_]+$",
|
|
21
|
+
),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
S3_MIN_LENGTH = 8
|
|
25
|
+
S3_MAX_LENGTH = 1023
|
|
26
|
+
S3_PATTERN = r"s3://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
|
|
27
|
+
GCP_MIN_LENGTH = 8
|
|
28
|
+
GCP_MAX_LENGTH = 1023
|
|
29
|
+
GCP_PATTERN = r"gs://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
|
|
30
|
+
|
|
31
|
+
RepoUrl = Annotated[
|
|
32
|
+
Url,
|
|
33
|
+
UrlConstraints(
|
|
34
|
+
allowed_schemes=[
|
|
35
|
+
"ssh",
|
|
36
|
+
"https",
|
|
37
|
+
"http",
|
|
38
|
+
]
|
|
39
|
+
),
|
|
40
|
+
]
|
|
41
|
+
HirundoUrl = Annotated[
|
|
42
|
+
Url,
|
|
43
|
+
UrlConstraints(
|
|
44
|
+
allowed_schemes=[
|
|
45
|
+
"file",
|
|
46
|
+
"https",
|
|
47
|
+
"http",
|
|
48
|
+
"s3",
|
|
49
|
+
"gs",
|
|
50
|
+
"ssh",
|
|
51
|
+
]
|
|
52
|
+
),
|
|
53
|
+
]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
has_pandas = False
|
|
2
|
+
has_polars = False
|
|
3
|
+
|
|
4
|
+
pd = None
|
|
5
|
+
pl = None
|
|
6
|
+
int32 = type[None]
|
|
7
|
+
float32 = type[None]
|
|
8
|
+
string = type[None]
|
|
9
|
+
# ⬆️ These are just placeholders for the int32, float32 and string types
|
|
10
|
+
# for when neither pandas nor polars are available
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
|
|
16
|
+
has_pandas = True
|
|
17
|
+
int32 = np.int32
|
|
18
|
+
float32 = np.float32
|
|
19
|
+
string = str
|
|
20
|
+
except ImportError:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import polars as pl
|
|
25
|
+
import polars.datatypes as pl_datatypes
|
|
26
|
+
|
|
27
|
+
has_polars = True
|
|
28
|
+
int32 = pl_datatypes.Int32
|
|
29
|
+
float32 = pl_datatypes.Float32
|
|
30
|
+
string = pl_datatypes.String
|
|
31
|
+
except ImportError:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"has_polars",
|
|
37
|
+
"has_pandas",
|
|
38
|
+
"pd",
|
|
39
|
+
"pl",
|
|
40
|
+
"int32",
|
|
41
|
+
"float32",
|
|
42
|
+
"string",
|
|
43
|
+
]
|
|
@@ -2,11 +2,11 @@ import enum
|
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
|
-
from dotenv import load_dotenv
|
|
5
|
+
from dotenv import find_dotenv, load_dotenv
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class EnvLocation(enum.Enum):
|
|
9
|
-
DOTENV =
|
|
9
|
+
DOTENV = find_dotenv(".env")
|
|
10
10
|
HOME = Path.home() / ".hirundo.conf"
|
|
11
11
|
|
|
12
12
|
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from hirundo._env import API_KEY, check_api_key
|
|
2
|
+
|
|
3
|
+
HIRUNDO_API_VERSION = "0.2"
|
|
4
|
+
|
|
5
|
+
_json_headers = {
|
|
6
|
+
"Content-Type": "application/json",
|
|
7
|
+
"Accept": "application/json",
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _get_auth_headers():
|
|
12
|
+
check_api_key()
|
|
13
|
+
return {
|
|
14
|
+
"Authorization": f"Bearer {API_KEY}",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _get_api_version_header():
|
|
19
|
+
return {
|
|
20
|
+
"HIRUNDO-API-VERSION": HIRUNDO_API_VERSION,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_headers():
|
|
25
|
+
return {
|
|
26
|
+
**_json_headers,
|
|
27
|
+
**_get_auth_headers(),
|
|
28
|
+
**_get_api_version_header(),
|
|
29
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from requests import Response
|
|
2
|
+
|
|
3
|
+
import hirundo.logger
|
|
4
|
+
|
|
5
|
+
logger = hirundo.logger.get_logger(__name__)
|
|
6
|
+
|
|
7
|
+
MINIMUM_CLIENT_SERVER_ERROR_CODE = 400
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def raise_for_status_with_reason(response: Response):
|
|
11
|
+
try:
|
|
12
|
+
if response.status_code >= MINIMUM_CLIENT_SERVER_ERROR_CODE:
|
|
13
|
+
response.reason = response.json().get("reason", None)
|
|
14
|
+
if response.reason is None:
|
|
15
|
+
response.reason = response.json().get("detail", None)
|
|
16
|
+
except Exception as e:
|
|
17
|
+
logger.debug("Could not parse response as JSON: %s", e)
|
|
18
|
+
|
|
19
|
+
response.raise_for_status()
|
|
@@ -1,12 +1,20 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import time
|
|
3
3
|
import typing
|
|
4
|
+
import uuid
|
|
4
5
|
from collections.abc import AsyncGenerator, Generator
|
|
5
6
|
|
|
6
7
|
import httpx
|
|
7
|
-
|
|
8
|
+
import requests
|
|
9
|
+
import urllib3
|
|
10
|
+
from httpx_sse import ServerSentEvent, SSEError, aconnect_sse, connect_sse
|
|
8
11
|
from stamina import retry
|
|
9
12
|
|
|
13
|
+
from hirundo._timeouts import READ_TIMEOUT
|
|
14
|
+
from hirundo.logger import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
10
18
|
|
|
11
19
|
# Credit: https://github.com/florimondmanca/httpx-sse/blob/master/README.md#handling-reconnections
|
|
12
20
|
def iter_sse_retrying(
|
|
@@ -28,7 +36,13 @@ def iter_sse_retrying(
|
|
|
28
36
|
# This may happen when the server is overloaded and closes the connection or
|
|
29
37
|
# when Kubernetes restarts / replaces a pod.
|
|
30
38
|
# Likewise, this will likely be temporary, hence the retries.
|
|
31
|
-
@retry(
|
|
39
|
+
@retry(
|
|
40
|
+
on=(
|
|
41
|
+
httpx.ReadError,
|
|
42
|
+
httpx.RemoteProtocolError,
|
|
43
|
+
urllib3.exceptions.ReadTimeoutError,
|
|
44
|
+
)
|
|
45
|
+
)
|
|
32
46
|
def _iter_sse():
|
|
33
47
|
nonlocal last_event_id, reconnection_delay
|
|
34
48
|
|
|
@@ -44,13 +58,27 @@ def iter_sse_retrying(
|
|
|
44
58
|
connect_headers["Last-Event-ID"] = last_event_id
|
|
45
59
|
|
|
46
60
|
with connect_sse(client, method, url, headers=connect_headers) as event_source:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
61
|
+
try:
|
|
62
|
+
for sse in event_source.iter_sse():
|
|
63
|
+
last_event_id = sse.id
|
|
64
|
+
|
|
65
|
+
if sse.retry is not None:
|
|
66
|
+
reconnection_delay = sse.retry / 1000
|
|
67
|
+
|
|
68
|
+
yield sse
|
|
69
|
+
except SSEError:
|
|
70
|
+
logger.error("SSE error occurred. Trying regular request")
|
|
71
|
+
response = requests.get(
|
|
72
|
+
url,
|
|
73
|
+
headers=connect_headers,
|
|
74
|
+
timeout=READ_TIMEOUT,
|
|
75
|
+
)
|
|
76
|
+
yield ServerSentEvent(
|
|
77
|
+
event="",
|
|
78
|
+
data=response.text,
|
|
79
|
+
id=uuid.uuid4().hex,
|
|
80
|
+
retry=None,
|
|
81
|
+
)
|
|
54
82
|
|
|
55
83
|
return _iter_sse()
|
|
56
84
|
|
|
@@ -72,7 +100,13 @@ async def aiter_sse_retrying(
|
|
|
72
100
|
# This may happen when the server is overloaded and closes the connection or
|
|
73
101
|
# when Kubernetes restarts / replaces a pod.
|
|
74
102
|
# Likewise, this will likely be temporary, hence the retries.
|
|
75
|
-
@retry(
|
|
103
|
+
@retry(
|
|
104
|
+
on=(
|
|
105
|
+
httpx.ReadError,
|
|
106
|
+
httpx.RemoteProtocolError,
|
|
107
|
+
urllib3.exceptions.ReadTimeoutError,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
76
110
|
async def _iter_sse() -> AsyncGenerator[ServerSentEvent, None]:
|
|
77
111
|
nonlocal last_event_id, reconnection_delay
|
|
78
112
|
|
|
@@ -86,12 +120,22 @@ async def aiter_sse_retrying(
|
|
|
86
120
|
async with aconnect_sse(
|
|
87
121
|
client, method, url, headers=connect_headers
|
|
88
122
|
) as event_source:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
123
|
+
try:
|
|
124
|
+
async for sse in event_source.aiter_sse():
|
|
125
|
+
last_event_id = sse.id
|
|
126
|
+
|
|
127
|
+
if sse.retry is not None:
|
|
128
|
+
reconnection_delay = sse.retry / 1000
|
|
129
|
+
|
|
130
|
+
yield sse
|
|
131
|
+
except SSEError:
|
|
132
|
+
logger.error("SSE error occurred. Trying regular request")
|
|
133
|
+
response = await client.get(url, headers=connect_headers)
|
|
134
|
+
yield ServerSentEvent(
|
|
135
|
+
event="",
|
|
136
|
+
data=response.text,
|
|
137
|
+
id=uuid.uuid4().hex,
|
|
138
|
+
retry=None,
|
|
139
|
+
)
|
|
96
140
|
|
|
97
141
|
return _iter_sse()
|