elaunira-airflow-provider-r2index 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elaunira/__init__.py +1 -0
- elaunira/airflow/__init__.py +1 -0
- elaunira/airflow/provider/__init__.py +1 -0
- elaunira/airflow/provider/r2index/__init__.py +21 -0
- elaunira/airflow/provider/r2index/hooks/__init__.py +5 -0
- elaunira/airflow/provider/r2index/hooks/r2index.py +342 -0
- elaunira_airflow_provider_r2index-0.1.0.dist-info/METADATA +9 -0
- elaunira_airflow_provider_r2index-0.1.0.dist-info/RECORD +10 -0
- elaunira_airflow_provider_r2index-0.1.0.dist-info/WHEEL +4 -0
- elaunira_airflow_provider_r2index-0.1.0.dist-info/entry_points.txt +2 -0
elaunira/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Elaunira namespace package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Elaunira Airflow namespace package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Elaunira Airflow provider namespace package."""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Elaunira R2Index Airflow provider."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import version
|
|
4
|
+
|
|
5
|
+
__version__ = version("elaunira-airflow-provider-r2index")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_provider_info():
|
|
9
|
+
"""Return provider metadata for Airflow."""
|
|
10
|
+
return {
|
|
11
|
+
"package-name": "elaunira-airflow-provider-r2index",
|
|
12
|
+
"name": "Elaunira R2Index",
|
|
13
|
+
"description": "Airflow provider for Elaunira R2Index connections",
|
|
14
|
+
"connection-types": [
|
|
15
|
+
{
|
|
16
|
+
"connection-type": "elaunira_r2index",
|
|
17
|
+
"hook-class-name": "elaunira.airflow.provider.r2index.hooks.r2index.R2IndexHook",
|
|
18
|
+
}
|
|
19
|
+
],
|
|
20
|
+
"versions": [__version__],
|
|
21
|
+
}
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
"""R2Index hook for Airflow."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
from airflow.hooks.base import BaseHook
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from elaunira.r2index import R2IndexClient
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class R2IndexHook(BaseHook):
|
|
16
|
+
"""
|
|
17
|
+
Hook for interacting with R2Index API and R2 storage.
|
|
18
|
+
|
|
19
|
+
Configuration priority:
|
|
20
|
+
1. Airflow connection with Vault/OpenBao reference (fetches secrets dynamically)
|
|
21
|
+
2. Airflow connection with direct credentials
|
|
22
|
+
3. Environment variables (fallback)
|
|
23
|
+
|
|
24
|
+
Airflow connection with Vault/OpenBao reference (extra JSON):
|
|
25
|
+
{
|
|
26
|
+
"vault_conn_id": "openbao_default",
|
|
27
|
+
"vault_namespace": "elaunira/production",
|
|
28
|
+
"vault_secrets": {
|
|
29
|
+
"r2index_api_url": "cloudflare/r2index#api-url",
|
|
30
|
+
"r2index_api_token": "cloudflare/r2index#api-token",
|
|
31
|
+
"r2_access_key_id": "cloudflare/r2/airflow#access-key-id",
|
|
32
|
+
"r2_secret_access_key": "cloudflare/r2/airflow#secret-access-key",
|
|
33
|
+
"r2_endpoint_url": "cloudflare/r2/airflow#endpoint-url"
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
The vault_conn_id references an Airflow HashiCorp Vault connection
|
|
38
|
+
configured with AppRole or other auth method.
|
|
39
|
+
|
|
40
|
+
vault_secrets format: "path#key" or "path" (uses config key as secret key)
|
|
41
|
+
Required keys:
|
|
42
|
+
- r2index_api_url
|
|
43
|
+
- r2index_api_token
|
|
44
|
+
- r2_access_key_id
|
|
45
|
+
- r2_secret_access_key
|
|
46
|
+
- r2_endpoint_url
|
|
47
|
+
|
|
48
|
+
Airflow connection with direct credentials:
|
|
49
|
+
- host: R2Index API URL
|
|
50
|
+
- password: R2Index API token
|
|
51
|
+
- extra.r2_access_key_id: R2 access key ID
|
|
52
|
+
- extra.r2_secret_access_key: R2 secret access key
|
|
53
|
+
- extra.r2_endpoint_url: R2 endpoint URL
|
|
54
|
+
|
|
55
|
+
Environment variables (fallback):
|
|
56
|
+
- R2INDEX_API_URL
|
|
57
|
+
- R2INDEX_API_TOKEN
|
|
58
|
+
- R2_ACCESS_KEY_ID
|
|
59
|
+
- R2_SECRET_ACCESS_KEY
|
|
60
|
+
- R2_ENDPOINT_URL
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
conn_name_attr = "r2index_conn_id"
|
|
64
|
+
default_conn_name = "r2index_default"
|
|
65
|
+
conn_type = "elaunira_r2index"
|
|
66
|
+
hook_name = "Elaunira R2Index"
|
|
67
|
+
|
|
68
|
+
CONFIG_KEYS = [
|
|
69
|
+
"r2index_api_url",
|
|
70
|
+
"r2index_api_token",
|
|
71
|
+
"r2_access_key_id",
|
|
72
|
+
"r2_secret_access_key",
|
|
73
|
+
"r2_endpoint_url",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def get_ui_field_behaviour(cls) -> dict[str, Any]:
|
|
78
|
+
"""Customize connection form UI."""
|
|
79
|
+
return {
|
|
80
|
+
"hidden_fields": ["port", "schema", "login", "extra"],
|
|
81
|
+
"relabeling": {
|
|
82
|
+
"host": "R2Index API URL (direct mode only)",
|
|
83
|
+
"password": "R2Index API Token (direct mode only)",
|
|
84
|
+
},
|
|
85
|
+
"placeholders": {
|
|
86
|
+
"host": "https://r2index.example.com",
|
|
87
|
+
"password": "API token for direct connection",
|
|
88
|
+
"vault_conn_id": "openbao-myservice",
|
|
89
|
+
"vault_namespace": "myservice/production",
|
|
90
|
+
"vault_secrets": '{"r2index_api_url": "cloudflare/r2index#api-url", ...}',
|
|
91
|
+
"r2_access_key_id": "Direct mode: R2 access key ID",
|
|
92
|
+
"r2_secret_access_key": "Direct mode: R2 secret access key",
|
|
93
|
+
"r2_endpoint_url": "https://account.r2.cloudflarestorage.com",
|
|
94
|
+
},
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def get_connection_form_widgets(cls) -> dict[str, Any]:
|
|
99
|
+
"""Define custom connection form widgets."""
|
|
100
|
+
from flask_appbuilder.fieldwidgets import BS3PasswordFieldWidget, BS3TextFieldWidget
|
|
101
|
+
from flask_babel import lazy_gettext
|
|
102
|
+
from wtforms import PasswordField, StringField
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
"vault_conn_id": StringField(
|
|
106
|
+
lazy_gettext("Vault Connection ID"),
|
|
107
|
+
widget=BS3TextFieldWidget(),
|
|
108
|
+
description="Airflow Vault connection ID (e.g., openbao-ipregistry)",
|
|
109
|
+
),
|
|
110
|
+
"vault_namespace": StringField(
|
|
111
|
+
lazy_gettext("Vault Namespace"),
|
|
112
|
+
widget=BS3TextFieldWidget(),
|
|
113
|
+
description="OpenBao namespace (e.g., ipregistry/production)",
|
|
114
|
+
),
|
|
115
|
+
"vault_secrets": StringField(
|
|
116
|
+
lazy_gettext("Vault Secrets (JSON)"),
|
|
117
|
+
widget=BS3TextFieldWidget(),
|
|
118
|
+
description="JSON mapping of config keys to secret paths",
|
|
119
|
+
),
|
|
120
|
+
"r2_access_key_id": StringField(
|
|
121
|
+
lazy_gettext("R2 Access Key ID"),
|
|
122
|
+
widget=BS3TextFieldWidget(),
|
|
123
|
+
description="Direct mode: Cloudflare R2 access key ID",
|
|
124
|
+
),
|
|
125
|
+
"r2_secret_access_key": PasswordField(
|
|
126
|
+
lazy_gettext("R2 Secret Access Key"),
|
|
127
|
+
widget=BS3PasswordFieldWidget(),
|
|
128
|
+
description="Direct mode: Cloudflare R2 secret access key",
|
|
129
|
+
),
|
|
130
|
+
"r2_endpoint_url": StringField(
|
|
131
|
+
lazy_gettext("R2 Endpoint URL"),
|
|
132
|
+
widget=BS3TextFieldWidget(),
|
|
133
|
+
description="Direct mode: Cloudflare R2 endpoint URL",
|
|
134
|
+
),
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
def __init__(self, r2index_conn_id: str = default_conn_name) -> None:
|
|
138
|
+
super().__init__()
|
|
139
|
+
self.r2index_conn_id = r2index_conn_id
|
|
140
|
+
self._client: R2IndexClient | None = None
|
|
141
|
+
|
|
142
|
+
def _parse_secret_ref(self, secret_ref: str, default_key: str) -> tuple[str, str]:
|
|
143
|
+
"""Parse a secret reference into (path, key).
|
|
144
|
+
|
|
145
|
+
Format: "path#key" or just "path" (uses default_key).
|
|
146
|
+
"""
|
|
147
|
+
if "#" in secret_ref:
|
|
148
|
+
path, key = secret_ref.rsplit("#", 1)
|
|
149
|
+
return path, key
|
|
150
|
+
return secret_ref, default_key
|
|
151
|
+
|
|
152
|
+
def _get_config_from_env(self) -> dict[str, str | None]:
|
|
153
|
+
"""Get configuration from environment variables."""
|
|
154
|
+
return {
|
|
155
|
+
"index_api_url": os.environ.get("R2INDEX_API_URL"),
|
|
156
|
+
"index_api_token": os.environ.get("R2INDEX_API_TOKEN"),
|
|
157
|
+
"r2_access_key_id": os.environ.get("R2_ACCESS_KEY_ID"),
|
|
158
|
+
"r2_secret_access_key": os.environ.get("R2_SECRET_ACCESS_KEY"),
|
|
159
|
+
"r2_endpoint_url": os.environ.get("R2_ENDPOINT_URL"),
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
def _get_config_from_vault(
|
|
163
|
+
self,
|
|
164
|
+
vault_conn_id: str,
|
|
165
|
+
secrets: dict[str, str],
|
|
166
|
+
namespace: str | None = None,
|
|
167
|
+
) -> dict[str, str | None] | None:
|
|
168
|
+
"""Get configuration from Vault/OpenBao using Airflow's VaultHook.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
vault_conn_id: Airflow connection ID for Vault/OpenBao
|
|
172
|
+
secrets: Mapping of config key to secret reference (path#key format)
|
|
173
|
+
namespace: OpenBao namespace to use
|
|
174
|
+
"""
|
|
175
|
+
from airflow.providers.hashicorp.hooks.vault import VaultHook
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
vault_hook = VaultHook(vault_conn_id=vault_conn_id, vault_namespace=namespace)
|
|
179
|
+
secret_cache: dict[str, dict[str, Any]] = {}
|
|
180
|
+
|
|
181
|
+
def get_secret_value(config_key: str) -> str | None:
|
|
182
|
+
secret_ref = secrets.get(config_key)
|
|
183
|
+
if not secret_ref:
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
path, key = self._parse_secret_ref(secret_ref, config_key)
|
|
187
|
+
|
|
188
|
+
if path not in secret_cache:
|
|
189
|
+
data = vault_hook.get_secret(secret_path=path, secret_version=None)
|
|
190
|
+
secret_cache[path] = data or {}
|
|
191
|
+
|
|
192
|
+
return secret_cache[path].get(key)
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
"index_api_url": get_secret_value("r2index_api_url"),
|
|
196
|
+
"index_api_token": get_secret_value("r2index_api_token"),
|
|
197
|
+
"r2_access_key_id": get_secret_value("r2_access_key_id"),
|
|
198
|
+
"r2_secret_access_key": get_secret_value("r2_secret_access_key"),
|
|
199
|
+
"r2_endpoint_url": get_secret_value("r2_endpoint_url"),
|
|
200
|
+
}
|
|
201
|
+
except Exception:
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
def _get_config_from_connection(self) -> dict[str, str | None] | None:
|
|
205
|
+
"""Get configuration from Airflow connection.
|
|
206
|
+
|
|
207
|
+
If connection has vault_conn_id, fetches from Vault/OpenBao.
|
|
208
|
+
Otherwise uses direct credentials from connection fields.
|
|
209
|
+
"""
|
|
210
|
+
try:
|
|
211
|
+
conn = self.get_connection(self.r2index_conn_id)
|
|
212
|
+
extra = conn.extra_dejson
|
|
213
|
+
|
|
214
|
+
vault_conn_id = extra.get("vault_conn_id")
|
|
215
|
+
if vault_conn_id:
|
|
216
|
+
secrets_raw = extra.get("vault_secrets")
|
|
217
|
+
if not secrets_raw:
|
|
218
|
+
return None
|
|
219
|
+
if isinstance(secrets_raw, str):
|
|
220
|
+
secrets = json.loads(secrets_raw)
|
|
221
|
+
else:
|
|
222
|
+
secrets = secrets_raw
|
|
223
|
+
return self._get_config_from_vault(
|
|
224
|
+
vault_conn_id=vault_conn_id,
|
|
225
|
+
secrets=secrets,
|
|
226
|
+
namespace=extra.get("vault_namespace"),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
"index_api_url": conn.host,
|
|
231
|
+
"index_api_token": conn.password,
|
|
232
|
+
"r2_access_key_id": extra.get("r2_access_key_id"),
|
|
233
|
+
"r2_secret_access_key": extra.get("r2_secret_access_key"),
|
|
234
|
+
"r2_endpoint_url": extra.get("r2_endpoint_url"),
|
|
235
|
+
}
|
|
236
|
+
except Exception:
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
def get_conn(self) -> R2IndexClient:
|
|
240
|
+
"""Get the R2IndexClient."""
|
|
241
|
+
if self._client is not None:
|
|
242
|
+
return self._client
|
|
243
|
+
|
|
244
|
+
from elaunira.r2index import R2IndexClient
|
|
245
|
+
|
|
246
|
+
config = self._get_config_from_connection()
|
|
247
|
+
if config is None or not config.get("index_api_url"):
|
|
248
|
+
config = self._get_config_from_env()
|
|
249
|
+
|
|
250
|
+
self._client = R2IndexClient(
|
|
251
|
+
index_api_url=config["index_api_url"],
|
|
252
|
+
index_api_token=config["index_api_token"],
|
|
253
|
+
r2_access_key_id=config["r2_access_key_id"],
|
|
254
|
+
r2_secret_access_key=config["r2_secret_access_key"],
|
|
255
|
+
r2_endpoint_url=config["r2_endpoint_url"],
|
|
256
|
+
)
|
|
257
|
+
return self._client
|
|
258
|
+
|
|
259
|
+
def upload(
|
|
260
|
+
self,
|
|
261
|
+
bucket: str,
|
|
262
|
+
source: str,
|
|
263
|
+
category: str,
|
|
264
|
+
entity: str,
|
|
265
|
+
extension: str,
|
|
266
|
+
media_type: str,
|
|
267
|
+
destination_path: str,
|
|
268
|
+
destination_filename: str,
|
|
269
|
+
destination_version: str,
|
|
270
|
+
name: str | None = None,
|
|
271
|
+
tags: list[str] | None = None,
|
|
272
|
+
extra: dict[str, Any] | None = None,
|
|
273
|
+
create_checksum_files: bool = False,
|
|
274
|
+
) -> dict[str, Any]:
|
|
275
|
+
"""Upload a file to R2 and register it with R2Index."""
|
|
276
|
+
client = self.get_conn()
|
|
277
|
+
file_record = client.upload(
|
|
278
|
+
bucket=bucket,
|
|
279
|
+
source=source,
|
|
280
|
+
category=category,
|
|
281
|
+
entity=entity,
|
|
282
|
+
extension=extension,
|
|
283
|
+
media_type=media_type,
|
|
284
|
+
destination_path=destination_path,
|
|
285
|
+
destination_filename=destination_filename,
|
|
286
|
+
destination_version=destination_version,
|
|
287
|
+
name=name,
|
|
288
|
+
tags=tags,
|
|
289
|
+
extra=extra,
|
|
290
|
+
create_checksum_files=create_checksum_files,
|
|
291
|
+
)
|
|
292
|
+
return file_record.model_dump()
|
|
293
|
+
|
|
294
|
+
def download(
|
|
295
|
+
self,
|
|
296
|
+
bucket: str,
|
|
297
|
+
source_path: str,
|
|
298
|
+
source_filename: str,
|
|
299
|
+
source_version: str,
|
|
300
|
+
destination: str,
|
|
301
|
+
verify_checksum: bool = True,
|
|
302
|
+
) -> dict[str, Any]:
|
|
303
|
+
"""Download a file from R2."""
|
|
304
|
+
client = self.get_conn()
|
|
305
|
+
downloaded_path, file_record = client.download(
|
|
306
|
+
bucket=bucket,
|
|
307
|
+
source_path=source_path,
|
|
308
|
+
source_filename=source_filename,
|
|
309
|
+
source_version=source_version,
|
|
310
|
+
destination=destination,
|
|
311
|
+
verify_checksum=verify_checksum,
|
|
312
|
+
)
|
|
313
|
+
return {
|
|
314
|
+
"path": str(downloaded_path),
|
|
315
|
+
"file_record": file_record.model_dump(),
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
def get_file(self, file_id: str) -> dict[str, Any]:
|
|
319
|
+
"""Get a file record by ID."""
|
|
320
|
+
client = self.get_conn()
|
|
321
|
+
return client.get(file_id).model_dump()
|
|
322
|
+
|
|
323
|
+
def list_files(
|
|
324
|
+
self,
|
|
325
|
+
bucket: str | None = None,
|
|
326
|
+
category: str | None = None,
|
|
327
|
+
entity: str | None = None,
|
|
328
|
+
extension: str | None = None,
|
|
329
|
+
tags: list[str] | None = None,
|
|
330
|
+
limit: int | None = None,
|
|
331
|
+
) -> dict[str, Any]:
|
|
332
|
+
"""List files with optional filters."""
|
|
333
|
+
client = self.get_conn()
|
|
334
|
+
response = client.list_files(
|
|
335
|
+
bucket=bucket,
|
|
336
|
+
category=category,
|
|
337
|
+
entity=entity,
|
|
338
|
+
extension=extension,
|
|
339
|
+
tags=tags,
|
|
340
|
+
limit=limit,
|
|
341
|
+
)
|
|
342
|
+
return response.model_dump()
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: elaunira-airflow-provider-r2index
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Airflow provider for Elaunira R2Index connections
|
|
5
|
+
Project-URL: Repository, https://github.com/elaunira/elaunira-airflow-provider-r2index
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Requires-Dist: apache-airflow-providers-hashicorp
|
|
9
|
+
Requires-Dist: apache-airflow>=3.0.0
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
elaunira/__init__.py,sha256=qaXVGBU6uIJyveNTEbWux5EcfVSM186PvDwjyxiXLw4,34
|
|
2
|
+
elaunira/airflow/__init__.py,sha256=joy3454dE9nG4_TL4Mh2gw-m2pm-3PijpWpfpTR6nmQ,42
|
|
3
|
+
elaunira/airflow/provider/__init__.py,sha256=zapSJV1rJS7wCpkwklwtIgm-H65A93dulwXGgmArN3M,51
|
|
4
|
+
elaunira/airflow/provider/r2index/__init__.py,sha256=MamrRK2euHjlAJjLmXE-66Ewj7fdVGGCvce5FWOILpE,665
|
|
5
|
+
elaunira/airflow/provider/r2index/hooks/__init__.py,sha256=fLCw13d7Z61tf2qhKdNiJN3KdhwE4PDF3SyEZaf2PfU,130
|
|
6
|
+
elaunira/airflow/provider/r2index/hooks/r2index.py,sha256=B-rWEKOOFHbJwb5ppIEyCqIf8ozhO6ZJlMAKWySlPQs,12467
|
|
7
|
+
elaunira_airflow_provider_r2index-0.1.0.dist-info/METADATA,sha256=LDKeboMZS1jW8jFBeLFjpnCL-WI64ZkeyQtG5qMnDTM,358
|
|
8
|
+
elaunira_airflow_provider_r2index-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
9
|
+
elaunira_airflow_provider_r2index-0.1.0.dist-info/entry_points.txt,sha256=LHYlCOo_WbImlAdr1JvsogvEQsKPJwRe68cCA_Ne9A0,94
|
|
10
|
+
elaunira_airflow_provider_r2index-0.1.0.dist-info/RECORD,,
|