elaunira-airflow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
elaunira/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Elaunira namespace package."""
@@ -0,0 +1,5 @@
1
+ """Elaunira Airflow operators and hooks for R2Index."""
2
+
3
+ from importlib.metadata import version
4
+
5
+ __version__ = version("elaunira-airflow")
@@ -0,0 +1,5 @@
1
+ """Elaunira Airflow TaskFlow decorators."""
2
+
3
+ from elaunira.airflow.decorators.r2index import r2index_download, r2index_upload
4
+
5
+ __all__ = ["r2index_upload", "r2index_download"]
@@ -0,0 +1,139 @@
1
+ """R2Index TaskFlow decorators."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Any, Callable, TypeVar
6
+
7
+ from airflow.decorators.base import task_decorator_factory
8
+
9
+ from elaunira.airflow.operators.r2index import (
10
+ DownloadItem,
11
+ R2IndexDownloadOperator,
12
+ R2IndexUploadOperator,
13
+ UploadItem,
14
+ )
15
+
16
+ if TYPE_CHECKING:
17
+ from airflow.decorators.base import TaskDecorator
18
+
19
+ F = TypeVar("F", bound=Callable[..., Any])
20
+
21
+
22
+ def r2index_upload(
23
+ bucket: str,
24
+ r2index_conn_id: str = "r2index_default",
25
+ **kwargs: Any,
26
+ ) -> TaskDecorator:
27
+ """
28
+ Decorator to upload files to R2Index.
29
+
30
+ The decorated function should return an UploadItem or list of UploadItems.
31
+
32
+ Example:
33
+ @task.r2index_upload(bucket="my-bucket")
34
+ def prepare_upload() -> UploadItem:
35
+ return UploadItem(
36
+ source="/tmp/data.csv",
37
+ category="acme",
38
+ entity="acme-data",
39
+ extension="csv",
40
+ media_type="text/csv",
41
+ destination_path="acme/data",
42
+ destination_filename="data.csv",
43
+ destination_version="{{ ds }}",
44
+ )
45
+
46
+ :param bucket: R2 bucket name.
47
+ :param r2index_conn_id: Airflow connection ID for R2Index.
48
+ """
49
+ return task_decorator_factory(
50
+ decorated_operator_class=_R2IndexUploadDecoratedOperator,
51
+ bucket=bucket,
52
+ r2index_conn_id=r2index_conn_id,
53
+ **kwargs,
54
+ )
55
+
56
+
57
+ def r2index_download(
58
+ bucket: str,
59
+ r2index_conn_id: str = "r2index_default",
60
+ **kwargs: Any,
61
+ ) -> TaskDecorator:
62
+ """
63
+ Decorator to download files from R2Index.
64
+
65
+ The decorated function should return a DownloadItem or list of DownloadItems.
66
+
67
+ Example:
68
+ @task.r2index_download(bucket="my-bucket")
69
+ def prepare_download() -> DownloadItem:
70
+ return DownloadItem(
71
+ source_path="acme/data",
72
+ source_filename="data.csv",
73
+ source_version="{{ ds }}",
74
+ destination="/tmp/downloaded.csv",
75
+ )
76
+
77
+ :param bucket: R2 bucket name.
78
+ :param r2index_conn_id: Airflow connection ID for R2Index.
79
+ """
80
+ return task_decorator_factory(
81
+ decorated_operator_class=_R2IndexDownloadDecoratedOperator,
82
+ bucket=bucket,
83
+ r2index_conn_id=r2index_conn_id,
84
+ **kwargs,
85
+ )
86
+
87
+
88
+ class _R2IndexUploadDecoratedOperator(R2IndexUploadOperator):
89
+ """Decorated operator for R2Index uploads."""
90
+
91
+ custom_operator_name = "@task.r2index_upload"
92
+
93
+ def __init__(
94
+ self,
95
+ *,
96
+ python_callable: Callable[..., UploadItem | list[UploadItem]],
97
+ op_args: list[Any] | None = None,
98
+ op_kwargs: dict[str, Any] | None = None,
99
+ **kwargs: Any,
100
+ ) -> None:
101
+ self.python_callable = python_callable
102
+ self.op_args = op_args or []
103
+ self.op_kwargs = op_kwargs or {}
104
+ # items will be set in execute()
105
+ kwargs["items"] = []
106
+ super().__init__(**kwargs)
107
+
108
+ def execute(self, context: Any) -> list[dict[str, Any]]:
109
+ """Execute the decorated function and upload the result."""
110
+ items = self.python_callable(*self.op_args, **self.op_kwargs)
111
+ self.items = [items] if isinstance(items, UploadItem) else items
112
+ return super().execute(context)
113
+
114
+
115
+ class _R2IndexDownloadDecoratedOperator(R2IndexDownloadOperator):
116
+ """Decorated operator for R2Index downloads."""
117
+
118
+ custom_operator_name = "@task.r2index_download"
119
+
120
+ def __init__(
121
+ self,
122
+ *,
123
+ python_callable: Callable[..., DownloadItem | list[DownloadItem]],
124
+ op_args: list[Any] | None = None,
125
+ op_kwargs: dict[str, Any] | None = None,
126
+ **kwargs: Any,
127
+ ) -> None:
128
+ self.python_callable = python_callable
129
+ self.op_args = op_args or []
130
+ self.op_kwargs = op_kwargs or {}
131
+ # items will be set in execute()
132
+ kwargs["items"] = []
133
+ super().__init__(**kwargs)
134
+
135
+ def execute(self, context: Any) -> list[dict[str, Any]]:
136
+ """Execute the decorated function and download the result."""
137
+ items = self.python_callable(*self.op_args, **self.op_kwargs)
138
+ self.items = [items] if isinstance(items, DownloadItem) else items
139
+ return super().execute(context)
@@ -0,0 +1,5 @@
1
+ """Elaunira Airflow hooks."""
2
+
3
+ from elaunira.airflow.provider.r2index.hooks import R2IndexHook
4
+
5
+ __all__ = ["R2IndexHook"]
@@ -0,0 +1,5 @@
1
+ """Elaunira Airflow operator extra links."""
2
+
3
+ from elaunira.airflow.links.r2index import R2IndexFileLink
4
+
5
+ __all__ = ["R2IndexFileLink"]
@@ -0,0 +1,37 @@
1
+ """R2Index operator extra links."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ from airflow.models import BaseOperatorLink
8
+
9
+ if TYPE_CHECKING:
10
+ from airflow.models.taskinstancekey import TaskInstanceKey
11
+
12
+
13
+ class R2IndexFileLink(BaseOperatorLink):
14
+ """
15
+ Link to the R2Index file details.
16
+
17
+ This link extracts the file ID from the operator's XCom return value
18
+ and constructs a URL to view the file in the R2Index UI.
19
+ """
20
+
21
+ name = "R2Index File"
22
+
23
+ def get_link(
24
+ self,
25
+ operator: Any,
26
+ *,
27
+ ti_key: TaskInstanceKey,
28
+ ) -> str:
29
+ """Get the link to the R2Index file."""
30
+ from airflow.models import XCom
31
+
32
+ result = XCom.get_value(ti_key=ti_key)
33
+ if result and isinstance(result, dict):
34
+ file_id = result.get("id") or result.get("file_record", {}).get("id")
35
+ if file_id:
36
+ return f"https://r2index.elaunira.com/files/{file_id}"
37
+ return ""
@@ -0,0 +1,15 @@
1
+ """Elaunira Airflow operators."""
2
+
3
+ from elaunira.airflow.operators.r2index import (
4
+ DownloadItem,
5
+ R2IndexDownloadOperator,
6
+ R2IndexUploadOperator,
7
+ UploadItem,
8
+ )
9
+
10
+ __all__ = [
11
+ "R2IndexUploadOperator",
12
+ "R2IndexDownloadOperator",
13
+ "UploadItem",
14
+ "DownloadItem",
15
+ ]
@@ -0,0 +1,232 @@
1
+ """R2Index operators for Airflow."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from collections.abc import Sequence
7
+ from dataclasses import dataclass
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ from airflow.exceptions import AirflowException
11
+ from airflow.models import BaseOperator
12
+
13
+ from elaunira.airflow.hooks.r2index import R2IndexHook
14
+ from elaunira.airflow.links.r2index import R2IndexFileLink
15
+ from elaunira.r2index import AsyncR2IndexClient
16
+
17
+ if TYPE_CHECKING:
18
+ from airflow.utils.context import Context
19
+
20
+
21
+ @dataclass
22
+ class UploadItem:
23
+ """Defines a single file to upload."""
24
+
25
+ source: str
26
+ category: str
27
+ entity: str
28
+ extension: str
29
+ media_type: str
30
+ destination_path: str
31
+ destination_filename: str
32
+ destination_version: str
33
+ bucket: str | None = None
34
+ name: str | None = None
35
+ tags: list[str] | None = None
36
+ extra: dict[str, Any] | None = None
37
+ create_checksum_files: bool = False
38
+ r2index_conn_id: str | None = None
39
+
40
+
41
+ @dataclass
42
+ class DownloadItem:
43
+ """Defines a single file to download."""
44
+
45
+ source_path: str
46
+ source_filename: str
47
+ source_version: str
48
+ destination: str
49
+ bucket: str | None = None
50
+ verify_checksum: bool = True
51
+ r2index_conn_id: str | None = None
52
+
53
+
54
+ class R2IndexUploadOperator(BaseOperator):
55
+ """
56
+ Upload one or more files to R2 in parallel.
57
+
58
+ When multiple items are provided, all uploads run concurrently using
59
+ asyncio within the worker process.
60
+
61
+ :param bucket: Default R2 bucket name (can be overridden per item).
62
+ :param items: List of UploadItem or single UploadItem defining files to upload.
63
+ :param r2index_conn_id: Default Airflow connection ID (can be overridden per item).
64
+ """
65
+
66
+ template_fields: Sequence[str] = ("items",)
67
+ template_ext: Sequence[str] = ()
68
+ ui_color = "#e4f0e8"
69
+ operator_extra_links = (R2IndexFileLink(),)
70
+
71
+ def __init__(
72
+ self,
73
+ *,
74
+ bucket: str,
75
+ items: list[UploadItem] | UploadItem,
76
+ r2index_conn_id: str = "r2index_default",
77
+ **kwargs: Any,
78
+ ) -> None:
79
+ super().__init__(**kwargs)
80
+ self.bucket = bucket
81
+ self.items = [items] if isinstance(items, UploadItem) else items
82
+ self.r2index_conn_id = r2index_conn_id
83
+
84
+ def _get_client_config(self, conn_id: str) -> dict[str, Any]:
85
+ """Get client configuration using the hook's priority chain."""
86
+ hook = R2IndexHook(r2index_conn_id=conn_id)
87
+ config = hook._get_config_from_connection()
88
+ if config is None or not config.get("index_api_url"):
89
+ config = hook._get_config_from_env()
90
+ return config or {}
91
+
92
+ def execute(self, context: Context) -> list[dict[str, Any]]:
93
+ """Execute the uploads in parallel."""
94
+ # Group items by connection ID for efficient client reuse
95
+ conn_configs: dict[str, dict[str, Any]] = {}
96
+
97
+ async def get_or_create_config(conn_id: str) -> dict[str, Any]:
98
+ if conn_id not in conn_configs:
99
+ conn_configs[conn_id] = self._get_client_config(conn_id)
100
+ return conn_configs[conn_id]
101
+
102
+ async def upload_one(item: UploadItem) -> dict[str, Any]:
103
+ conn_id = item.r2index_conn_id or self.r2index_conn_id
104
+ bucket = item.bucket or self.bucket
105
+ config = await get_or_create_config(conn_id)
106
+
107
+ try:
108
+ async with AsyncR2IndexClient(**config) as client:
109
+ file_record = await client.upload(
110
+ bucket=bucket,
111
+ source=item.source,
112
+ category=item.category,
113
+ entity=item.entity,
114
+ extension=item.extension,
115
+ media_type=item.media_type,
116
+ destination_path=item.destination_path,
117
+ destination_filename=item.destination_filename,
118
+ destination_version=item.destination_version,
119
+ name=item.name,
120
+ tags=item.tags,
121
+ extra=item.extra,
122
+ create_checksum_files=item.create_checksum_files,
123
+ )
124
+ return {"status": "success", "file_record": file_record.model_dump()}
125
+ except Exception as e:
126
+ return {"status": "error", "message": str(e), "source": item.source}
127
+
128
+ async def upload_all() -> list[dict[str, Any]]:
129
+ tasks = [upload_one(item) for item in self.items]
130
+ return await asyncio.gather(*tasks)
131
+
132
+ results = asyncio.run(upload_all())
133
+
134
+ # Check for errors
135
+ errors = [r for r in results if r.get("status") == "error"]
136
+ if errors:
137
+ self.log.error("Failed uploads: %s", errors)
138
+ raise AirflowException(f"{len(errors)}/{len(results)} uploads failed")
139
+
140
+ self.log.info("Uploaded %d files in parallel", len(results))
141
+ return [r["file_record"] for r in results]
142
+
143
+
144
+ class R2IndexDownloadOperator(BaseOperator):
145
+ """
146
+ Download one or more files from R2 in parallel.
147
+
148
+ When multiple items are provided, all downloads run concurrently using
149
+ asyncio within the worker process.
150
+
151
+ :param bucket: Default R2 bucket name (can be overridden per item).
152
+ :param items: List of DownloadItem or single DownloadItem defining files to download.
153
+ :param r2index_conn_id: Default Airflow connection ID (can be overridden per item).
154
+ """
155
+
156
+ template_fields: Sequence[str] = ("items",)
157
+ template_ext: Sequence[str] = ()
158
+ ui_color = "#f0e4e8"
159
+ operator_extra_links = (R2IndexFileLink(),)
160
+
161
+ def __init__(
162
+ self,
163
+ *,
164
+ bucket: str,
165
+ items: list[DownloadItem] | DownloadItem,
166
+ r2index_conn_id: str = "r2index_default",
167
+ **kwargs: Any,
168
+ ) -> None:
169
+ super().__init__(**kwargs)
170
+ self.bucket = bucket
171
+ self.items = [items] if isinstance(items, DownloadItem) else items
172
+ self.r2index_conn_id = r2index_conn_id
173
+
174
+ def _get_client_config(self, conn_id: str) -> dict[str, Any]:
175
+ """Get client configuration using the hook's priority chain."""
176
+ hook = R2IndexHook(r2index_conn_id=conn_id)
177
+ config = hook._get_config_from_connection()
178
+ if config is None or not config.get("index_api_url"):
179
+ config = hook._get_config_from_env()
180
+ return config or {}
181
+
182
+ def execute(self, context: Context) -> list[dict[str, Any]]:
183
+ """Execute the downloads in parallel."""
184
+ # Group items by connection ID for efficient client reuse
185
+ conn_configs: dict[str, dict[str, Any]] = {}
186
+
187
+ async def get_or_create_config(conn_id: str) -> dict[str, Any]:
188
+ if conn_id not in conn_configs:
189
+ conn_configs[conn_id] = self._get_client_config(conn_id)
190
+ return conn_configs[conn_id]
191
+
192
+ async def download_one(item: DownloadItem) -> dict[str, Any]:
193
+ conn_id = item.r2index_conn_id or self.r2index_conn_id
194
+ bucket = item.bucket or self.bucket
195
+ config = await get_or_create_config(conn_id)
196
+
197
+ try:
198
+ async with AsyncR2IndexClient(**config) as client:
199
+ downloaded_path, file_record = await client.download(
200
+ bucket=bucket,
201
+ source_path=item.source_path,
202
+ source_filename=item.source_filename,
203
+ source_version=item.source_version,
204
+ destination=item.destination,
205
+ verify_checksum=item.verify_checksum,
206
+ )
207
+ return {
208
+ "status": "success",
209
+ "path": str(downloaded_path),
210
+ "file_record": file_record.model_dump(),
211
+ }
212
+ except Exception as e:
213
+ return {
214
+ "status": "error",
215
+ "message": str(e),
216
+ "source_path": item.source_path,
217
+ }
218
+
219
+ async def download_all() -> list[dict[str, Any]]:
220
+ tasks = [download_one(item) for item in self.items]
221
+ return await asyncio.gather(*tasks)
222
+
223
+ results = asyncio.run(download_all())
224
+
225
+ # Check for errors
226
+ errors = [r for r in results if r.get("status") == "error"]
227
+ if errors:
228
+ self.log.error("Failed downloads: %s", errors)
229
+ raise AirflowException(f"{len(errors)}/{len(results)} downloads failed")
230
+
231
+ self.log.info("Downloaded %d files in parallel", len(results))
232
+ return [{"path": r["path"], "file_record": r["file_record"]} for r in results]
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: elaunira-airflow
3
+ Version: 0.1.0
4
+ Summary: Elaunira Airflow operators and hooks for R2Index
5
+ Project-URL: Repository, https://github.com/elaunira/elaunira-airflow
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: apache-airflow-providers-edge3
9
+ Requires-Dist: apache-airflow-providers-git
10
+ Requires-Dist: apache-airflow>=3.0.0
11
+ Requires-Dist: cryptography
12
+ Requires-Dist: elaunira-airflow-provider-r2index
13
+ Requires-Dist: elaunira-r2index
14
+ Requires-Dist: pyjwt
@@ -0,0 +1,13 @@
1
+ elaunira/__init__.py,sha256=qaXVGBU6uIJyveNTEbWux5EcfVSM186PvDwjyxiXLw4,34
2
+ elaunira/airflow/__init__.py,sha256=7pO55pTaG_1iZrfvDW8A26V_yBgwK7MbxiD7g22s2_4,139
3
+ elaunira/airflow/decorators/__init__.py,sha256=YP5PKFqMAAN0F-T-0imzyix2Ld_FWQISilc33XNmeE0,176
4
+ elaunira/airflow/decorators/r2index.py,sha256=tXAyiJpg3t271xYXuab0A6E87mBd9UsezY7sHgAj0fs,4294
5
+ elaunira/airflow/hooks/__init__.py,sha256=N17rG-6soMLH4WBvx-495-n7UXF3RDdLuEX7icxqjxI,122
6
+ elaunira/airflow/links/__init__.py,sha256=PYO9UsgLrLMO6tyOjNDfFK8ZvA5WejIpYVVaqCIe4nQ,136
7
+ elaunira/airflow/links/r2index.py,sha256=YSYrTZVxyQ-6bXFTKJcZOPlnQFhoZJZ_ZDlqMiCXnD8,990
8
+ elaunira/airflow/operators/__init__.py,sha256=xZuu18w7Vh5Azmxq9lYz_oNGjH7E26n4ogdfeVPrPBw,289
9
+ elaunira/airflow/operators/r2index.py,sha256=s1RONuoEk1xnOtlLVCjyw1bXCuFHqRREXOxIicp8lso,8670
10
+ elaunira_airflow-0.1.0.dist-info/METADATA,sha256=Q0Xiqxq3b0Cnx_YDxdqz0c8oFT17wCpedTz3ofX011I,493
11
+ elaunira_airflow-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
12
+ elaunira_airflow-0.1.0.dist-info/entry_points.txt,sha256=A8fFcVohG1m1_0pzCIJcqoZJIFoexzjZdrpvgIEn_JY,166
13
+ elaunira_airflow-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [airflow.task_decorators]
2
+ r2index_download = elaunira.airflow.decorators.r2index:r2index_download
3
+ r2index_upload = elaunira.airflow.decorators.r2index:r2index_upload