elaunira-airflow-provider-r2index 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: elaunira-airflow-provider-r2index
3
+ Version: 0.1.0
4
+ Summary: Airflow provider for Elaunira R2Index connections
5
+ Project-URL: Repository, https://github.com/elaunira/elaunira-airflow-provider-r2index
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: apache-airflow-providers-hashicorp
9
+ Requires-Dist: apache-airflow>=3.0.0
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "elaunira-airflow-provider-r2index"
7
+ version = "0.1.0"
8
+ description = "Airflow provider for Elaunira R2Index connections"
9
+ requires-python = ">=3.12"
10
+ license = "MIT"
11
+ dependencies = [
12
+ "apache-airflow>=3.0.0",
13
+ "apache-airflow-providers-hashicorp",
14
+ ]
15
+
16
+ [project.urls]
17
+ Repository = "https://github.com/elaunira/elaunira-airflow-provider-r2index"
18
+
19
+ [project.entry-points."apache_airflow_provider"]
20
+ provider_info = "elaunira.airflow.provider.r2index:get_provider_info"
21
+
22
+ [tool.hatch.build.targets.wheel]
23
+ packages = ["src/elaunira"]
@@ -0,0 +1 @@
1
+ """Elaunira namespace package."""
@@ -0,0 +1 @@
1
+ """Elaunira Airflow namespace package."""
@@ -0,0 +1 @@
1
+ """Elaunira Airflow provider namespace package."""
@@ -0,0 +1,21 @@
1
+ """Elaunira R2Index Airflow provider."""
2
+
3
+ from importlib.metadata import version
4
+
5
+ __version__ = version("elaunira-airflow-provider-r2index")
6
+
7
+
8
+ def get_provider_info():
9
+ """Return provider metadata for Airflow."""
10
+ return {
11
+ "package-name": "elaunira-airflow-provider-r2index",
12
+ "name": "Elaunira R2Index",
13
+ "description": "Airflow provider for Elaunira R2Index connections",
14
+ "connection-types": [
15
+ {
16
+ "connection-type": "elaunira_r2index",
17
+ "hook-class-name": "elaunira.airflow.provider.r2index.hooks.r2index.R2IndexHook",
18
+ }
19
+ ],
20
+ "versions": [__version__],
21
+ }
@@ -0,0 +1,5 @@
1
+ """Elaunira R2Index hooks."""
2
+
3
+ from elaunira.airflow.provider.r2index.hooks.r2index import R2IndexHook
4
+
5
+ __all__ = ["R2IndexHook"]
@@ -0,0 +1,342 @@
1
+ """R2Index hook for Airflow."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ from airflow.hooks.base import BaseHook
10
+
11
+ if TYPE_CHECKING:
12
+ from elaunira.r2index import R2IndexClient
13
+
14
+
15
+ class R2IndexHook(BaseHook):
16
+ """
17
+ Hook for interacting with R2Index API and R2 storage.
18
+
19
+ Configuration priority:
20
+ 1. Airflow connection with Vault/OpenBao reference (fetches secrets dynamically)
21
+ 2. Airflow connection with direct credentials
22
+ 3. Environment variables (fallback)
23
+
24
+ Airflow connection with Vault/OpenBao reference (extra JSON):
25
+ {
26
+ "vault_conn_id": "openbao_default",
27
+ "vault_namespace": "elaunira/production",
28
+ "vault_secrets": {
29
+ "r2index_api_url": "cloudflare/r2index#api-url",
30
+ "r2index_api_token": "cloudflare/r2index#api-token",
31
+ "r2_access_key_id": "cloudflare/r2/airflow#access-key-id",
32
+ "r2_secret_access_key": "cloudflare/r2/airflow#secret-access-key",
33
+ "r2_endpoint_url": "cloudflare/r2/airflow#endpoint-url"
34
+ }
35
+ }
36
+
37
+ The vault_conn_id references an Airflow HashiCorp Vault connection
38
+ configured with AppRole or other auth method.
39
+
40
+ vault_secrets format: "path#key" or "path" (uses config key as secret key)
41
+ Required keys:
42
+ - r2index_api_url
43
+ - r2index_api_token
44
+ - r2_access_key_id
45
+ - r2_secret_access_key
46
+ - r2_endpoint_url
47
+
48
+ Airflow connection with direct credentials:
49
+ - host: R2Index API URL
50
+ - password: R2Index API token
51
+ - extra.r2_access_key_id: R2 access key ID
52
+ - extra.r2_secret_access_key: R2 secret access key
53
+ - extra.r2_endpoint_url: R2 endpoint URL
54
+
55
+ Environment variables (fallback):
56
+ - R2INDEX_API_URL
57
+ - R2INDEX_API_TOKEN
58
+ - R2_ACCESS_KEY_ID
59
+ - R2_SECRET_ACCESS_KEY
60
+ - R2_ENDPOINT_URL
61
+ """
62
+
63
+ conn_name_attr = "r2index_conn_id"
64
+ default_conn_name = "r2index_default"
65
+ conn_type = "elaunira_r2index"
66
+ hook_name = "Elaunira R2Index"
67
+
68
+ CONFIG_KEYS = [
69
+ "r2index_api_url",
70
+ "r2index_api_token",
71
+ "r2_access_key_id",
72
+ "r2_secret_access_key",
73
+ "r2_endpoint_url",
74
+ ]
75
+
76
+ @classmethod
77
+ def get_ui_field_behaviour(cls) -> dict[str, Any]:
78
+ """Customize connection form UI."""
79
+ return {
80
+ "hidden_fields": ["port", "schema", "login", "extra"],
81
+ "relabeling": {
82
+ "host": "R2Index API URL (direct mode only)",
83
+ "password": "R2Index API Token (direct mode only)",
84
+ },
85
+ "placeholders": {
86
+ "host": "https://r2index.example.com",
87
+ "password": "API token for direct connection",
88
+ "vault_conn_id": "openbao-myservice",
89
+ "vault_namespace": "myservice/production",
90
+ "vault_secrets": '{"r2index_api_url": "cloudflare/r2index#api-url", ...}',
91
+ "r2_access_key_id": "Direct mode: R2 access key ID",
92
+ "r2_secret_access_key": "Direct mode: R2 secret access key",
93
+ "r2_endpoint_url": "https://account.r2.cloudflarestorage.com",
94
+ },
95
+ }
96
+
97
+ @classmethod
98
+ def get_connection_form_widgets(cls) -> dict[str, Any]:
99
+ """Define custom connection form widgets."""
100
+ from flask_appbuilder.fieldwidgets import BS3PasswordFieldWidget, BS3TextFieldWidget
101
+ from flask_babel import lazy_gettext
102
+ from wtforms import PasswordField, StringField
103
+
104
+ return {
105
+ "vault_conn_id": StringField(
106
+ lazy_gettext("Vault Connection ID"),
107
+ widget=BS3TextFieldWidget(),
108
+ description="Airflow Vault connection ID (e.g., openbao-ipregistry)",
109
+ ),
110
+ "vault_namespace": StringField(
111
+ lazy_gettext("Vault Namespace"),
112
+ widget=BS3TextFieldWidget(),
113
+ description="OpenBao namespace (e.g., ipregistry/production)",
114
+ ),
115
+ "vault_secrets": StringField(
116
+ lazy_gettext("Vault Secrets (JSON)"),
117
+ widget=BS3TextFieldWidget(),
118
+ description="JSON mapping of config keys to secret paths",
119
+ ),
120
+ "r2_access_key_id": StringField(
121
+ lazy_gettext("R2 Access Key ID"),
122
+ widget=BS3TextFieldWidget(),
123
+ description="Direct mode: Cloudflare R2 access key ID",
124
+ ),
125
+ "r2_secret_access_key": PasswordField(
126
+ lazy_gettext("R2 Secret Access Key"),
127
+ widget=BS3PasswordFieldWidget(),
128
+ description="Direct mode: Cloudflare R2 secret access key",
129
+ ),
130
+ "r2_endpoint_url": StringField(
131
+ lazy_gettext("R2 Endpoint URL"),
132
+ widget=BS3TextFieldWidget(),
133
+ description="Direct mode: Cloudflare R2 endpoint URL",
134
+ ),
135
+ }
136
+
137
+ def __init__(self, r2index_conn_id: str = default_conn_name) -> None:
138
+ super().__init__()
139
+ self.r2index_conn_id = r2index_conn_id
140
+ self._client: R2IndexClient | None = None
141
+
142
+ def _parse_secret_ref(self, secret_ref: str, default_key: str) -> tuple[str, str]:
143
+ """Parse a secret reference into (path, key).
144
+
145
+ Format: "path#key" or just "path" (uses default_key).
146
+ """
147
+ if "#" in secret_ref:
148
+ path, key = secret_ref.rsplit("#", 1)
149
+ return path, key
150
+ return secret_ref, default_key
151
+
152
+ def _get_config_from_env(self) -> dict[str, str | None]:
153
+ """Get configuration from environment variables."""
154
+ return {
155
+ "index_api_url": os.environ.get("R2INDEX_API_URL"),
156
+ "index_api_token": os.environ.get("R2INDEX_API_TOKEN"),
157
+ "r2_access_key_id": os.environ.get("R2_ACCESS_KEY_ID"),
158
+ "r2_secret_access_key": os.environ.get("R2_SECRET_ACCESS_KEY"),
159
+ "r2_endpoint_url": os.environ.get("R2_ENDPOINT_URL"),
160
+ }
161
+
162
+ def _get_config_from_vault(
163
+ self,
164
+ vault_conn_id: str,
165
+ secrets: dict[str, str],
166
+ namespace: str | None = None,
167
+ ) -> dict[str, str | None] | None:
168
+ """Get configuration from Vault/OpenBao using Airflow's VaultHook.
169
+
170
+ Args:
171
+ vault_conn_id: Airflow connection ID for Vault/OpenBao
172
+ secrets: Mapping of config key to secret reference (path#key format)
173
+ namespace: OpenBao namespace to use
174
+ """
175
+ from airflow.providers.hashicorp.hooks.vault import VaultHook
176
+
177
+ try:
178
+ vault_hook = VaultHook(vault_conn_id=vault_conn_id, vault_namespace=namespace)
179
+ secret_cache: dict[str, dict[str, Any]] = {}
180
+
181
+ def get_secret_value(config_key: str) -> str | None:
182
+ secret_ref = secrets.get(config_key)
183
+ if not secret_ref:
184
+ return None
185
+
186
+ path, key = self._parse_secret_ref(secret_ref, config_key)
187
+
188
+ if path not in secret_cache:
189
+ data = vault_hook.get_secret(secret_path=path, secret_version=None)
190
+ secret_cache[path] = data or {}
191
+
192
+ return secret_cache[path].get(key)
193
+
194
+ return {
195
+ "index_api_url": get_secret_value("r2index_api_url"),
196
+ "index_api_token": get_secret_value("r2index_api_token"),
197
+ "r2_access_key_id": get_secret_value("r2_access_key_id"),
198
+ "r2_secret_access_key": get_secret_value("r2_secret_access_key"),
199
+ "r2_endpoint_url": get_secret_value("r2_endpoint_url"),
200
+ }
201
+ except Exception:
202
+ return None
203
+
204
+ def _get_config_from_connection(self) -> dict[str, str | None] | None:
205
+ """Get configuration from Airflow connection.
206
+
207
+ If connection has vault_conn_id, fetches from Vault/OpenBao.
208
+ Otherwise uses direct credentials from connection fields.
209
+ """
210
+ try:
211
+ conn = self.get_connection(self.r2index_conn_id)
212
+ extra = conn.extra_dejson
213
+
214
+ vault_conn_id = extra.get("vault_conn_id")
215
+ if vault_conn_id:
216
+ secrets_raw = extra.get("vault_secrets")
217
+ if not secrets_raw:
218
+ return None
219
+ if isinstance(secrets_raw, str):
220
+ secrets = json.loads(secrets_raw)
221
+ else:
222
+ secrets = secrets_raw
223
+ return self._get_config_from_vault(
224
+ vault_conn_id=vault_conn_id,
225
+ secrets=secrets,
226
+ namespace=extra.get("vault_namespace"),
227
+ )
228
+
229
+ return {
230
+ "index_api_url": conn.host,
231
+ "index_api_token": conn.password,
232
+ "r2_access_key_id": extra.get("r2_access_key_id"),
233
+ "r2_secret_access_key": extra.get("r2_secret_access_key"),
234
+ "r2_endpoint_url": extra.get("r2_endpoint_url"),
235
+ }
236
+ except Exception:
237
+ return None
238
+
239
+ def get_conn(self) -> R2IndexClient:
240
+ """Get the R2IndexClient."""
241
+ if self._client is not None:
242
+ return self._client
243
+
244
+ from elaunira.r2index import R2IndexClient
245
+
246
+ config = self._get_config_from_connection()
247
+ if config is None or not config.get("index_api_url"):
248
+ config = self._get_config_from_env()
249
+
250
+ self._client = R2IndexClient(
251
+ index_api_url=config["index_api_url"],
252
+ index_api_token=config["index_api_token"],
253
+ r2_access_key_id=config["r2_access_key_id"],
254
+ r2_secret_access_key=config["r2_secret_access_key"],
255
+ r2_endpoint_url=config["r2_endpoint_url"],
256
+ )
257
+ return self._client
258
+
259
+ def upload(
260
+ self,
261
+ bucket: str,
262
+ source: str,
263
+ category: str,
264
+ entity: str,
265
+ extension: str,
266
+ media_type: str,
267
+ destination_path: str,
268
+ destination_filename: str,
269
+ destination_version: str,
270
+ name: str | None = None,
271
+ tags: list[str] | None = None,
272
+ extra: dict[str, Any] | None = None,
273
+ create_checksum_files: bool = False,
274
+ ) -> dict[str, Any]:
275
+ """Upload a file to R2 and register it with R2Index."""
276
+ client = self.get_conn()
277
+ file_record = client.upload(
278
+ bucket=bucket,
279
+ source=source,
280
+ category=category,
281
+ entity=entity,
282
+ extension=extension,
283
+ media_type=media_type,
284
+ destination_path=destination_path,
285
+ destination_filename=destination_filename,
286
+ destination_version=destination_version,
287
+ name=name,
288
+ tags=tags,
289
+ extra=extra,
290
+ create_checksum_files=create_checksum_files,
291
+ )
292
+ return file_record.model_dump()
293
+
294
+ def download(
295
+ self,
296
+ bucket: str,
297
+ source_path: str,
298
+ source_filename: str,
299
+ source_version: str,
300
+ destination: str,
301
+ verify_checksum: bool = True,
302
+ ) -> dict[str, Any]:
303
+ """Download a file from R2."""
304
+ client = self.get_conn()
305
+ downloaded_path, file_record = client.download(
306
+ bucket=bucket,
307
+ source_path=source_path,
308
+ source_filename=source_filename,
309
+ source_version=source_version,
310
+ destination=destination,
311
+ verify_checksum=verify_checksum,
312
+ )
313
+ return {
314
+ "path": str(downloaded_path),
315
+ "file_record": file_record.model_dump(),
316
+ }
317
+
318
+ def get_file(self, file_id: str) -> dict[str, Any]:
319
+ """Get a file record by ID."""
320
+ client = self.get_conn()
321
+ return client.get(file_id).model_dump()
322
+
323
+ def list_files(
324
+ self,
325
+ bucket: str | None = None,
326
+ category: str | None = None,
327
+ entity: str | None = None,
328
+ extension: str | None = None,
329
+ tags: list[str] | None = None,
330
+ limit: int | None = None,
331
+ ) -> dict[str, Any]:
332
+ """List files with optional filters."""
333
+ client = self.get_conn()
334
+ response = client.list_files(
335
+ bucket=bucket,
336
+ category=category,
337
+ entity=entity,
338
+ extension=extension,
339
+ tags=tags,
340
+ limit=limit,
341
+ )
342
+ return response.model_dump()