hirundo 0.1.18__tar.gz → 0.2.3.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {hirundo-0.1.18 → hirundo-0.2.3.post1}/PKG-INFO +59 -20
  2. {hirundo-0.1.18 → hirundo-0.2.3.post1}/README.md +36 -12
  3. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/__init__.py +28 -8
  4. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/_constraints.py +3 -4
  5. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/_headers.py +1 -1
  6. hirundo-0.2.3.post1/hirundo/_http.py +72 -0
  7. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/_iter_sse_retrying.py +8 -5
  8. hirundo-0.2.3.post1/hirundo/_llm_pipeline.py +153 -0
  9. hirundo-0.2.3.post1/hirundo/_run_checking.py +283 -0
  10. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/_urls.py +1 -0
  11. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/cli.py +8 -11
  12. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/dataset_enum.py +2 -0
  13. hirundo-0.1.18/hirundo/dataset_optimization.py → hirundo-0.2.3.post1/hirundo/dataset_qa.py +213 -256
  14. hirundo-0.1.18/hirundo/dataset_optimization_results.py → hirundo-0.2.3.post1/hirundo/dataset_qa_results.py +7 -7
  15. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/git.py +8 -10
  16. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/labeling.py +22 -19
  17. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/storage.py +26 -26
  18. hirundo-0.2.3.post1/hirundo/unlearning_llm.py +599 -0
  19. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/unzip.py +12 -13
  20. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo.egg-info/PKG-INFO +59 -20
  21. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo.egg-info/SOURCES.txt +7 -3
  22. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo.egg-info/requires.txt +21 -5
  23. {hirundo-0.1.18 → hirundo-0.2.3.post1}/pyproject.toml +42 -14
  24. hirundo-0.2.3.post1/tests/testing_utils.py +7 -0
  25. hirundo-0.1.18/hirundo/_http.py +0 -19
  26. {hirundo-0.1.18 → hirundo-0.2.3.post1}/LICENSE +0 -0
  27. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/__main__.py +0 -0
  28. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/_dataframe.py +0 -0
  29. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/_env.py +0 -0
  30. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/_timeouts.py +0 -0
  31. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo/logger.py +0 -0
  32. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo.egg-info/dependency_links.txt +0 -0
  33. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo.egg-info/entry_points.txt +0 -0
  34. {hirundo-0.1.18 → hirundo-0.2.3.post1}/hirundo.egg-info/top_level.txt +0 -0
  35. {hirundo-0.1.18 → hirundo-0.2.3.post1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hirundo
3
- Version: 0.1.18
3
+ Version: 0.2.3.post1
4
4
  Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
5
5
  Author-email: Hirundo <dev@hirundo.io>
6
6
  License: MIT License
@@ -13,12 +13,12 @@ License: MIT License
13
13
 
14
14
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15
15
 
16
- Project-URL: Homepage, https://github.com/Hirundo-io/hirundo-client
16
+ Project-URL: Homepage, https://github.com/Hirundo-io/hirundo-python-sdk
17
17
  Keywords: dataset,machine learning,data science,data engineering
18
18
  Classifier: License :: OSI Approved :: MIT License
19
19
  Classifier: Programming Language :: Python
20
20
  Classifier: Programming Language :: Python :: 3
21
- Requires-Python: >=3.9
21
+ Requires-Python: >=3.10
22
22
  Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
24
  Requires-Dist: pyyaml>=6.0.1
@@ -32,6 +32,11 @@ Requires-Dist: httpx>=0.27.0
32
32
  Requires-Dist: stamina>=24.2.0
33
33
  Requires-Dist: httpx-sse>=0.4.0
34
34
  Requires-Dist: tqdm>=4.66.5
35
+ Requires-Dist: h11>=0.16.0
36
+ Requires-Dist: requests>=2.32.4
37
+ Requires-Dist: urllib3>=2.6.3
38
+ Requires-Dist: setuptools>=78.1.1
39
+ Requires-Dist: docutils<0.22.0
35
40
  Provides-Extra: dev
36
41
  Requires-Dist: pyyaml>=6.0.1; extra == "dev"
37
42
  Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
@@ -46,13 +51,18 @@ Requires-Dist: stamina>=24.2.0; extra == "dev"
46
51
  Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
47
52
  Requires-Dist: pytest>=8.2.0; extra == "dev"
48
53
  Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
49
- Requires-Dist: uv>=0.5.8; extra == "dev"
54
+ Requires-Dist: uv>=0.9.6; extra == "dev"
50
55
  Requires-Dist: pre-commit>=3.7.1; extra == "dev"
56
+ Requires-Dist: basedpyright==1.37.1; extra == "dev"
51
57
  Requires-Dist: virtualenv>=20.6.6; extra == "dev"
52
- Requires-Dist: ruff>=0.11.6; extra == "dev"
53
- Requires-Dist: bumpver; extra == "dev"
58
+ Requires-Dist: authlib>=1.6.6; extra == "dev"
59
+ Requires-Dist: ruff>=0.12.0; extra == "dev"
60
+ Requires-Dist: bumpver>=2025.1131; extra == "dev"
54
61
  Requires-Dist: platformdirs>=4.3.6; extra == "dev"
55
- Requires-Dist: safety>=3.2.13; extra == "dev"
62
+ Requires-Dist: cryptography>=44.0.1; extra == "dev"
63
+ Requires-Dist: jinja2>=3.1.6; extra == "dev"
64
+ Requires-Dist: filelock>=3.20.1; extra == "dev"
65
+ Requires-Dist: marshmallow>=3.26.2; extra == "dev"
56
66
  Provides-Extra: docs
57
67
  Requires-Dist: sphinx>=7.4.7; extra == "docs"
58
68
  Requires-Dist: sphinx-autobuild>=2024.9.3; extra == "docs"
@@ -61,19 +71,24 @@ Requires-Dist: autodoc_pydantic>=2.2.0; extra == "docs"
61
71
  Requires-Dist: furo; extra == "docs"
62
72
  Requires-Dist: sphinx-multiversion; extra == "docs"
63
73
  Requires-Dist: esbonio; extra == "docs"
64
- Requires-Dist: starlette>0.40.0; extra == "docs"
74
+ Requires-Dist: starlette>=0.49.1; extra == "docs"
65
75
  Requires-Dist: markupsafe>=3.0.2; extra == "docs"
76
+ Requires-Dist: jinja2>=3.1.6; extra == "docs"
66
77
  Provides-Extra: pandas
67
78
  Requires-Dist: pandas>=2.2.3; extra == "pandas"
68
79
  Provides-Extra: polars
69
80
  Requires-Dist: polars>=1.0.0; extra == "polars"
81
+ Provides-Extra: transformers
82
+ Requires-Dist: transformers>=4.57.3; extra == "transformers"
83
+ Requires-Dist: peft>=0.18.1; extra == "transformers"
84
+ Requires-Dist: accelerate>=1.12.0; extra == "transformers"
70
85
  Dynamic: license-file
71
86
 
72
87
  # Hirundo
73
88
 
74
- This package exposes access to Hirundo APIs for dataset optimization for Machine Learning.
89
+ This package exposes access to Hirundo APIs for dataset QA for Machine Learning.
75
90
 
76
- Dataset optimization is currently available for datasets labelled for classification and object detection.
91
+ Dataset QA is currently available for datasets labelled for classification and object detection.
77
92
 
78
93
  Support dataset storage configs include:
79
94
 
@@ -138,13 +153,37 @@ You can install the codebase with a simple `pip install hirundo` to install the
138
153
 
139
154
  ## Usage
140
155
 
141
- Classification example:
156
+ ### Unlearning LLM behavior
157
+
158
+ Make sure to install the `transformers` extra, i.e. `pip install hirundo[transformers]` or `uv pip install hirundo[transformers]` if you have `uv` installed which is much faster than `pip`.
159
+
160
+ ```python
161
+ llm = LlmModel(
162
+ model_name="Nemotron-Flash-1B",
163
+ model_source=HuggingFaceTransformersModel(
164
+ model_name="nvidia/Nemotron-Flash-1B",
165
+ ),
166
+ )
167
+ llm_id = llm.create()
168
+ run_info = BiasRunInfo(
169
+ bias_type=BiasType.ALL,
170
+ )
171
+ run_id = LlmUnlearningRun.launch(
172
+ llm_id,
173
+ run_info,
174
+ )
175
+ new_adapter = llm.get_hf_pipeline_for_run(run_id)
176
+ ```
177
+
178
+ ### Dataset QA
179
+
180
+ #### Classification example:
142
181
 
143
182
  ```python
144
183
  from hirundo import (
145
184
  HirundoCSV,
146
185
  LabelingType,
147
- OptimizationDataset,
186
+ QADataset,
148
187
  StorageGCP,
149
188
  StorageConfig,
150
189
  StorageTypes,
@@ -155,7 +194,7 @@ gcp_bucket = StorageGCP(
155
194
  project="Hirundo-global",
156
195
  credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
157
196
  )
158
- test_dataset = OptimizationDataset(
197
+ test_dataset = QADataset(
159
198
  name="TEST-GCP cifar 100 classification dataset",
160
199
  labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
161
200
  storage_config=StorageConfig(
@@ -170,19 +209,19 @@ test_dataset = OptimizationDataset(
170
209
  classes=cifar100_classes,
171
210
  )
172
211
 
173
- test_dataset.run_optimization()
212
+ test_dataset.run_qa()
174
213
  results = test_dataset.check_run()
175
214
  print(results)
176
215
  ```
177
216
 
178
- Object detection example:
217
+ #### Object detection example:
179
218
 
180
219
  ```python
181
220
  from hirundo import (
182
221
  GitRepo,
183
222
  HirundoCSV,
184
223
  LabelingType,
185
- OptimizationDataset,
224
+ QADataset,
186
225
  StorageGit,
187
226
  StorageConfig,
188
227
  StorageTypes,
@@ -195,7 +234,7 @@ git_storage = StorageGit(
195
234
  ),
196
235
  branch="main",
197
236
  )
198
- test_dataset = OptimizationDataset(
237
+ test_dataset = QADataset(
199
238
  name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
200
239
  labeling_type=LabelingType.OBJECT_DETECTION,
201
240
  storage_config=StorageConfig(
@@ -211,13 +250,13 @@ test_dataset = OptimizationDataset(
211
250
  ),
212
251
  )
213
252
 
214
- test_dataset.run_optimization()
253
+ test_dataset.run_qa()
215
254
  results = test_dataset.check_run()
216
255
  print(results)
217
256
  ```
218
257
 
219
- Note: Currently we only support the main CPython release 3.9, 3.10, 3.11, 3.12 & 3.13. PyPy support may be introduced in the future.
258
+ Note: Currently we only support the main CPython release 3.10, 3.11, 3.12 & 3.13. PyPy support may be introduced in the future.
220
259
 
221
260
  ## Further documentation
222
261
 
223
- To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the [Google Colab examples](https://github.com/Hirundo-io/hirundo-client/tree/main/notebooks).
262
+ To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the [Google Colab examples](https://github.com/Hirundo-io/hirundo-python-sdk/tree/main/notebooks).
@@ -1,8 +1,8 @@
1
1
  # Hirundo
2
2
 
3
- This package exposes access to Hirundo APIs for dataset optimization for Machine Learning.
3
+ This package exposes access to Hirundo APIs for dataset QA for Machine Learning.
4
4
 
5
- Dataset optimization is currently available for datasets labelled for classification and object detection.
5
+ Dataset QA is currently available for datasets labelled for classification and object detection.
6
6
 
7
7
  Support dataset storage configs include:
8
8
 
@@ -67,13 +67,37 @@ You can install the codebase with a simple `pip install hirundo` to install the
67
67
 
68
68
  ## Usage
69
69
 
70
- Classification example:
70
+ ### Unlearning LLM behavior
71
+
72
+ Make sure to install the `transformers` extra, i.e. `pip install hirundo[transformers]` or `uv pip install hirundo[transformers]` if you have `uv` installed which is much faster than `pip`.
73
+
74
+ ```python
75
+ llm = LlmModel(
76
+ model_name="Nemotron-Flash-1B",
77
+ model_source=HuggingFaceTransformersModel(
78
+ model_name="nvidia/Nemotron-Flash-1B",
79
+ ),
80
+ )
81
+ llm_id = llm.create()
82
+ run_info = BiasRunInfo(
83
+ bias_type=BiasType.ALL,
84
+ )
85
+ run_id = LlmUnlearningRun.launch(
86
+ llm_id,
87
+ run_info,
88
+ )
89
+ new_adapter = llm.get_hf_pipeline_for_run(run_id)
90
+ ```
91
+
92
+ ### Dataset QA
93
+
94
+ #### Classification example:
71
95
 
72
96
  ```python
73
97
  from hirundo import (
74
98
  HirundoCSV,
75
99
  LabelingType,
76
- OptimizationDataset,
100
+ QADataset,
77
101
  StorageGCP,
78
102
  StorageConfig,
79
103
  StorageTypes,
@@ -84,7 +108,7 @@ gcp_bucket = StorageGCP(
84
108
  project="Hirundo-global",
85
109
  credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
86
110
  )
87
- test_dataset = OptimizationDataset(
111
+ test_dataset = QADataset(
88
112
  name="TEST-GCP cifar 100 classification dataset",
89
113
  labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
90
114
  storage_config=StorageConfig(
@@ -99,19 +123,19 @@ test_dataset = OptimizationDataset(
99
123
  classes=cifar100_classes,
100
124
  )
101
125
 
102
- test_dataset.run_optimization()
126
+ test_dataset.run_qa()
103
127
  results = test_dataset.check_run()
104
128
  print(results)
105
129
  ```
106
130
 
107
- Object detection example:
131
+ #### Object detection example:
108
132
 
109
133
  ```python
110
134
  from hirundo import (
111
135
  GitRepo,
112
136
  HirundoCSV,
113
137
  LabelingType,
114
- OptimizationDataset,
138
+ QADataset,
115
139
  StorageGit,
116
140
  StorageConfig,
117
141
  StorageTypes,
@@ -124,7 +148,7 @@ git_storage = StorageGit(
124
148
  ),
125
149
  branch="main",
126
150
  )
127
- test_dataset = OptimizationDataset(
151
+ test_dataset = QADataset(
128
152
  name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
129
153
  labeling_type=LabelingType.OBJECT_DETECTION,
130
154
  storage_config=StorageConfig(
@@ -140,13 +164,13 @@ test_dataset = OptimizationDataset(
140
164
  ),
141
165
  )
142
166
 
143
- test_dataset.run_optimization()
167
+ test_dataset.run_qa()
144
168
  results = test_dataset.check_run()
145
169
  print(results)
146
170
  ```
147
171
 
148
- Note: Currently we only support the main CPython release 3.9, 3.10, 3.11, 3.12 & 3.13. PyPy support may be introduced in the future.
172
+ Note: Currently we only support the main CPython release 3.10, 3.11, 3.12 & 3.13. PyPy support may be introduced in the future.
149
173
 
150
174
  ## Further documentation
151
175
 
152
- To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the [Google Colab examples](https://github.com/Hirundo-io/hirundo-client/tree/main/notebooks).
176
+ To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the [Google Colab examples](https://github.com/Hirundo-io/hirundo-python-sdk/tree/main/notebooks).
@@ -3,13 +3,15 @@ from .dataset_enum import (
3
3
  LabelingType,
4
4
  StorageTypes,
5
5
  )
6
- from .dataset_optimization import (
6
+ from .dataset_qa import (
7
+ ClassificationRunArgs,
7
8
  HirundoError,
8
- OptimizationDataset,
9
+ ModalityType,
10
+ ObjectDetectionRunArgs,
11
+ QADataset,
9
12
  RunArgs,
10
- VisionRunArgs,
11
13
  )
12
- from .dataset_optimization_results import DatasetOptimizationResults
14
+ from .dataset_qa_results import DatasetQAResults
13
15
  from .git import GitPlainAuth, GitRepo, GitSSHAuth
14
16
  from .labeling import (
15
17
  COCO,
@@ -28,6 +30,15 @@ from .storage import (
28
30
  StorageGit,
29
31
  StorageS3,
30
32
  )
33
+ from .unlearning_llm import (
34
+ BiasRunInfo,
35
+ BiasType,
36
+ HuggingFaceTransformersModel,
37
+ LlmModel,
38
+ LlmSources,
39
+ LlmUnlearningRun,
40
+ LocalTransformersModel,
41
+ )
31
42
  from .unzip import load_df, load_from_zip
32
43
 
33
44
  __all__ = [
@@ -40,9 +51,11 @@ __all__ = [
40
51
  "KeylabsObjDetVideo",
41
52
  "KeylabsObjSegImages",
42
53
  "KeylabsObjSegVideo",
43
- "OptimizationDataset",
54
+ "QADataset",
55
+ "ModalityType",
44
56
  "RunArgs",
45
- "VisionRunArgs",
57
+ "ClassificationRunArgs",
58
+ "ObjectDetectionRunArgs",
46
59
  "DatasetMetadataType",
47
60
  "LabelingType",
48
61
  "GitPlainAuth",
@@ -54,9 +67,16 @@ __all__ = [
54
67
  # "StorageAzure", TODO: Azure storage is coming soon
55
68
  "StorageGit",
56
69
  "StorageConfig",
57
- "DatasetOptimizationResults",
70
+ "DatasetQAResults",
71
+ "BiasRunInfo",
72
+ "BiasType",
73
+ "HuggingFaceTransformersModel",
74
+ "LlmModel",
75
+ "LlmSources",
76
+ "LlmUnlearningRun",
77
+ "LocalTransformersModel",
58
78
  "load_df",
59
79
  "load_from_zip",
60
80
  ]
61
81
 
62
- __version__ = "0.1.18"
82
+ __version__ = "0.2.3.post1"
@@ -1,5 +1,4 @@
1
1
  import re
2
- import typing
3
2
  from typing import TYPE_CHECKING
4
3
 
5
4
  from hirundo._urls import (
@@ -11,7 +10,7 @@ from hirundo.labeling import COCO, YOLO, HirundoCSV, Keylabs
11
10
 
12
11
  if TYPE_CHECKING:
13
12
  from hirundo._urls import HirundoUrl
14
- from hirundo.dataset_optimization import LabelingInfo
13
+ from hirundo.dataset_qa import LabelingInfo
15
14
  from hirundo.storage import (
16
15
  ResponseStorageConfig,
17
16
  StorageConfig,
@@ -135,8 +134,8 @@ def validate_labeling_type(
135
134
 
136
135
  def validate_labeling_info(
137
136
  labeling_type: "LabelingType",
138
- labeling_info: "typing.Union[LabelingInfo, list[LabelingInfo]]",
139
- storage_config: "typing.Union[StorageConfig, ResponseStorageConfig]",
137
+ labeling_info: "LabelingInfo | list[LabelingInfo]",
138
+ storage_config: "StorageConfig | ResponseStorageConfig",
140
139
  ) -> None:
141
140
  """
142
141
  Validate the labeling info for a dataset
@@ -1,6 +1,6 @@
1
1
  from hirundo._env import API_KEY, check_api_key
2
2
 
3
- HIRUNDO_API_VERSION = "0.2"
3
+ HIRUNDO_API_VERSION = "0.3"
4
4
 
5
5
  _json_headers = {
6
6
  "Content-Type": "application/json",
@@ -0,0 +1,72 @@
1
+ import requests as _requests
2
+ from requests import Response
3
+ from requests.adapters import HTTPAdapter
4
+ from urllib3.util.retry import Retry
5
+
6
+ import hirundo.logger
7
+
8
+ logger = hirundo.logger.get_logger(__name__)
9
+
10
+ MINIMUM_CLIENT_SERVER_ERROR_CODE = 400
11
+
12
+
13
+ def _build_retrying_session() -> _requests.Session:
14
+ # No more than 10 tries total (including the initial attempt)
15
+ # urllib3 Retry.total counts retries, not total attempts, so use 9 retries
16
+ retries = Retry(
17
+ total=9,
18
+ backoff_factor=1.0,
19
+ status_forcelist=(429,),
20
+ allowed_methods=("HEAD", "GET", "PUT", "POST", "PATCH", "DELETE", "OPTIONS"),
21
+ respect_retry_after_header=True,
22
+ raise_on_status=False,
23
+ )
24
+ adapter = HTTPAdapter(max_retries=retries)
25
+ session = _requests.Session()
26
+ session.mount("http://", adapter)
27
+ session.mount("https://", adapter)
28
+ return session
29
+
30
+
31
+ _SESSION = _build_retrying_session()
32
+
33
+
34
+ class _RequestsShim:
35
+ """Shim exposing a subset of the requests API but backed by a retrying Session."""
36
+
37
+ HTTPError = _requests.HTTPError
38
+ Response = _requests.Response
39
+
40
+ def request(self, method: str, url: str, **kwargs) -> Response:
41
+ return _SESSION.request(method=method, url=url, **kwargs)
42
+
43
+ def get(self, url: str, **kwargs) -> Response:
44
+ return _SESSION.get(url, **kwargs)
45
+
46
+ def post(self, url: str, **kwargs) -> Response:
47
+ return _SESSION.post(url, **kwargs)
48
+
49
+ def delete(self, url: str, **kwargs) -> Response:
50
+ return _SESSION.delete(url, **kwargs)
51
+
52
+ def patch(self, url: str, **kwargs) -> Response:
53
+ return _SESSION.patch(url, **kwargs)
54
+
55
+ def put(self, url: str, **kwargs) -> Response:
56
+ return _SESSION.put(url, **kwargs)
57
+
58
+
59
+ # Public shim to be imported by modules instead of the raw requests package
60
+ requests = _RequestsShim()
61
+
62
+
63
+ def raise_for_status_with_reason(response: Response):
64
+ try:
65
+ if response.status_code >= MINIMUM_CLIENT_SERVER_ERROR_CODE:
66
+ response.reason = response.json().get("reason", None)
67
+ if response.reason is None:
68
+ response.reason = response.json().get("detail", None)
69
+ except Exception as e:
70
+ logger.debug("Could not parse response as JSON: %s", e)
71
+
72
+ response.raise_for_status()
@@ -1,27 +1,28 @@
1
1
  import asyncio
2
2
  import time
3
- import typing
4
3
  import uuid
5
4
  from collections.abc import AsyncGenerator, Generator
6
5
 
7
6
  import httpx
8
- import requests
9
7
  import urllib3
10
8
  from httpx_sse import ServerSentEvent, SSEError, aconnect_sse, connect_sse
11
9
  from stamina import retry
12
10
 
11
+ from hirundo._http import requests
13
12
  from hirundo._timeouts import READ_TIMEOUT
14
13
  from hirundo.logger import get_logger
15
14
 
16
15
  logger = get_logger(__name__)
17
16
 
17
+ MAX_RETRIES = 50
18
+
18
19
 
19
20
  # Credit: https://github.com/florimondmanca/httpx-sse/blob/master/README.md#handling-reconnections
20
21
  def iter_sse_retrying(
21
22
  client: httpx.Client,
22
23
  method: str,
23
24
  url: str,
24
- headers: typing.Optional[dict[str, str]] = None,
25
+ headers: dict[str, str] | None = None,
25
26
  ) -> Generator[ServerSentEvent, None, None]:
26
27
  if headers is None:
27
28
  headers = {}
@@ -41,7 +42,8 @@ def iter_sse_retrying(
41
42
  httpx.ReadError,
42
43
  httpx.RemoteProtocolError,
43
44
  urllib3.exceptions.ReadTimeoutError,
44
- )
45
+ ),
46
+ attempts=MAX_RETRIES,
45
47
  )
46
48
  def _iter_sse():
47
49
  nonlocal last_event_id, reconnection_delay
@@ -105,7 +107,8 @@ async def aiter_sse_retrying(
105
107
  httpx.ReadError,
106
108
  httpx.RemoteProtocolError,
107
109
  urllib3.exceptions.ReadTimeoutError,
108
- )
110
+ ),
111
+ attempts=MAX_RETRIES,
109
112
  )
110
113
  async def _iter_sse() -> AsyncGenerator[ServerSentEvent, None]:
111
114
  nonlocal last_event_id, reconnection_delay
@@ -0,0 +1,153 @@
1
+ import importlib.util
2
+ import tempfile
3
+ import zipfile
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, cast
6
+
7
+ from hirundo import HirundoError
8
+ from hirundo._http import requests
9
+ from hirundo._timeouts import DOWNLOAD_READ_TIMEOUT
10
+ from hirundo.logger import get_logger
11
+
12
+ if TYPE_CHECKING:
13
+ from torch import device as torch_device
14
+ from transformers.configuration_utils import PretrainedConfig
15
+ from transformers.modeling_utils import PreTrainedModel
16
+ from transformers.pipelines.base import Pipeline
17
+
18
+ from hirundo.unlearning_llm import LlmModel, LlmModelOut
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ ZIP_FILE_CHUNK_SIZE = 50 * 1024 * 1024 # 50 MB
24
+ REQUIRED_PACKAGES_FOR_PIPELINE = ["peft", "transformers", "accelerate"]
25
+
26
+
27
+ def get_hf_pipeline_for_run_given_model(
28
+ llm: "LlmModel | LlmModelOut",
29
+ run_id: str,
30
+ config: "PretrainedConfig | None" = None,
31
+ device: "str | int | torch_device | None" = None,
32
+ device_map: str | dict[str, int | str] | None = None,
33
+ trust_remote_code: bool = False,
34
+ token: str | None = None,
35
+ ) -> "Pipeline":
36
+ for package in REQUIRED_PACKAGES_FOR_PIPELINE:
37
+ if importlib.util.find_spec(package) is None:
38
+ raise HirundoError(
39
+ f'{package} is not installed. Please install transformers extra with pip install "hirundo[transformers]"'
40
+ )
41
+ from peft import PeftModel
42
+ from transformers.models.auto.configuration_auto import AutoConfig
43
+ from transformers.models.auto.modeling_auto import (
44
+ MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES,
45
+ AutoModelForCausalLM,
46
+ AutoModelForImageTextToText,
47
+ )
48
+ from transformers.models.auto.tokenization_auto import AutoTokenizer
49
+ from transformers.pipelines import pipeline
50
+
51
+ from hirundo.unlearning_llm import (
52
+ HuggingFaceTransformersModel,
53
+ HuggingFaceTransformersModelOutput,
54
+ LlmUnlearningRun,
55
+ )
56
+
57
+ run_results = LlmUnlearningRun.check_run_by_id(run_id)
58
+ if run_results is None:
59
+ raise HirundoError("No run results found")
60
+ result_payload = (
61
+ run_results.get("result", run_results)
62
+ if isinstance(run_results, dict)
63
+ else run_results
64
+ )
65
+ if isinstance(result_payload, dict):
66
+ result_url = result_payload.get("result")
67
+ else:
68
+ result_url = result_payload
69
+ if not isinstance(result_url, str):
70
+ raise HirundoError("Run results did not include a download URL")
71
+ # Stream the zip file download
72
+
73
+ zip_file_path = tempfile.NamedTemporaryFile(delete=False).name
74
+ with requests.get(
75
+ result_url,
76
+ timeout=DOWNLOAD_READ_TIMEOUT,
77
+ stream=True,
78
+ ) as r:
79
+ r.raise_for_status()
80
+ with open(zip_file_path, "wb") as zip_file:
81
+ for chunk in r.iter_content(chunk_size=ZIP_FILE_CHUNK_SIZE):
82
+ zip_file.write(chunk)
83
+ logger.info(
84
+ "Successfully downloaded the result zip file for run ID %s to %s",
85
+ run_id,
86
+ zip_file_path,
87
+ )
88
+
89
+ with tempfile.TemporaryDirectory() as temp_dir:
90
+ temp_dir_path = Path(temp_dir)
91
+ with zipfile.ZipFile(zip_file_path, "r") as zip_file:
92
+ zip_file.extractall(temp_dir_path)
93
+ # Attempt to load the tokenizer normally
94
+ base_model_name = (
95
+ llm.model_source.model_name
96
+ if isinstance(
97
+ llm.model_source,
98
+ HuggingFaceTransformersModel | HuggingFaceTransformersModelOutput,
99
+ )
100
+ else llm.model_source.local_path
101
+ )
102
+ token = (
103
+ llm.model_source.token
104
+ if isinstance(
105
+ llm.model_source,
106
+ HuggingFaceTransformersModel,
107
+ )
108
+ else token
109
+ )
110
+ tokenizer = AutoTokenizer.from_pretrained(
111
+ base_model_name,
112
+ token=token,
113
+ trust_remote_code=trust_remote_code,
114
+ )
115
+ if tokenizer.pad_token is None:
116
+ tokenizer.pad_token = tokenizer.eos_token
117
+ config = AutoConfig.from_pretrained(
118
+ base_model_name,
119
+ token=token,
120
+ trust_remote_code=trust_remote_code,
121
+ )
122
+ config_dict = config.to_dict() if hasattr(config, "to_dict") else config
123
+ is_multimodal = (
124
+ config_dict.get("model_type")
125
+ in MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES.keys()
126
+ )
127
+ if is_multimodal:
128
+ base_model = AutoModelForImageTextToText.from_pretrained(
129
+ base_model_name,
130
+ token=token,
131
+ trust_remote_code=trust_remote_code,
132
+ )
133
+ else:
134
+ base_model = AutoModelForCausalLM.from_pretrained(
135
+ base_model_name,
136
+ token=token,
137
+ trust_remote_code=trust_remote_code,
138
+ )
139
+ model = cast(
140
+ "PreTrainedModel",
141
+ PeftModel.from_pretrained(
142
+ base_model, str(temp_dir_path / "unlearned_model_folder")
143
+ ),
144
+ )
145
+
146
+ return pipeline(
147
+ task="text-generation",
148
+ model=model,
149
+ tokenizer=tokenizer,
150
+ config=config,
151
+ device=device,
152
+ device_map=device_map,
153
+ )