elaunira-r2index 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- """Asynchronous R2 uploader using aioboto3."""
1
+ """Asynchronous R2 storage operations using aioboto3."""
2
2
 
3
3
  from collections.abc import Callable
4
4
  from pathlib import Path
@@ -6,20 +6,16 @@ from pathlib import Path
6
6
  import aioboto3
7
7
  from aiobotocore.config import AioConfig
8
8
 
9
- from .exceptions import UploadError
10
- from .uploader import R2Config
9
+ from .exceptions import DownloadError, UploadError
10
+ from .storage import R2Config, R2TransferConfig
11
11
 
12
- # 100MB threshold and part size for multipart uploads
13
- MULTIPART_THRESHOLD = 100 * 1024 * 1024
14
- MULTIPART_PART_SIZE = 100 * 1024 * 1024
15
12
 
16
-
17
- class AsyncR2Uploader:
18
- """Asynchronous R2 uploader using aioboto3."""
13
+ class AsyncR2Storage:
14
+ """Asynchronous R2 storage client using aioboto3."""
19
15
 
20
16
  def __init__(self, config: R2Config) -> None:
21
17
  """
22
- Initialize the async R2 uploader.
18
+ Initialize the async R2 storage client.
23
19
 
24
20
  Args:
25
21
  config: R2 configuration with credentials and endpoint.
@@ -30,20 +26,24 @@ class AsyncR2Uploader:
30
26
  async def upload_file(
31
27
  self,
32
28
  file_path: str | Path,
29
+ bucket: str,
33
30
  object_key: str,
34
31
  content_type: str | None = None,
35
32
  progress_callback: Callable[[int], None] | None = None,
33
+ transfer_config: R2TransferConfig | None = None,
36
34
  ) -> str:
37
35
  """
38
36
  Upload a file to R2 asynchronously.
39
37
 
40
- Uses multipart upload for files larger than 100MB.
38
+ Uses multipart upload for files larger than the configured threshold.
41
39
 
42
40
  Args:
43
41
  file_path: Path to the file to upload.
42
+ bucket: The R2 bucket name.
44
43
  object_key: The key (path) to store the object under in R2.
45
44
  content_type: Optional content type for the object.
46
45
  progress_callback: Optional callback called with bytes uploaded so far.
46
+ transfer_config: Optional transfer configuration for multipart/threading.
47
47
 
48
48
  Returns:
49
49
  The object key of the uploaded file.
@@ -56,8 +56,9 @@ class AsyncR2Uploader:
56
56
  if not file_path.exists():
57
57
  raise UploadError(f"File not found: {file_path}")
58
58
 
59
- transfer_config = AioConfig(
60
- max_pool_connections=10,
59
+ tc = transfer_config or R2TransferConfig()
60
+ aio_config = AioConfig(
61
+ max_pool_connections=tc.max_concurrency,
61
62
  )
62
63
 
63
64
  extra_args = {}
@@ -71,7 +72,7 @@ class AsyncR2Uploader:
71
72
  aws_secret_access_key=self.config.secret_access_key,
72
73
  endpoint_url=self.config.endpoint_url,
73
74
  region_name=self.config.region,
74
- config=transfer_config,
75
+ config=aio_config,
75
76
  ) as client:
76
77
  callback = None
77
78
  if progress_callback:
@@ -79,7 +80,7 @@ class AsyncR2Uploader:
79
80
 
80
81
  await client.upload_file(
81
82
  str(file_path),
82
- self.config.bucket,
83
+ bucket,
83
84
  object_key,
84
85
  ExtraArgs=extra_args if extra_args else None,
85
86
  Callback=callback,
@@ -89,11 +90,12 @@ class AsyncR2Uploader:
89
90
 
90
91
  return object_key
91
92
 
92
- async def delete_object(self, object_key: str) -> None:
93
+ async def delete_object(self, bucket: str, object_key: str) -> None:
93
94
  """
94
95
  Delete an object from R2 asynchronously.
95
96
 
96
97
  Args:
98
+ bucket: The R2 bucket name.
97
99
  object_key: The key of the object to delete.
98
100
 
99
101
  Raises:
@@ -107,15 +109,16 @@ class AsyncR2Uploader:
107
109
  endpoint_url=self.config.endpoint_url,
108
110
  region_name=self.config.region,
109
111
  ) as client:
110
- await client.delete_object(Bucket=self.config.bucket, Key=object_key)
112
+ await client.delete_object(Bucket=bucket, Key=object_key)
111
113
  except Exception as e:
112
114
  raise UploadError(f"Failed to delete object from R2: {e}") from e
113
115
 
114
- async def object_exists(self, object_key: str) -> bool:
116
+ async def object_exists(self, bucket: str, object_key: str) -> bool:
115
117
  """
116
118
  Check if an object exists in R2 asynchronously.
117
119
 
118
120
  Args:
121
+ bucket: The R2 bucket name.
119
122
  object_key: The key of the object to check.
120
123
 
121
124
  Returns:
@@ -129,13 +132,71 @@ class AsyncR2Uploader:
129
132
  endpoint_url=self.config.endpoint_url,
130
133
  region_name=self.config.region,
131
134
  ) as client:
132
- await client.head_object(Bucket=self.config.bucket, Key=object_key)
135
+ await client.head_object(Bucket=bucket, Key=object_key)
133
136
  return True
134
137
  except client.exceptions.ClientError as e:
135
138
  if e.response["Error"]["Code"] == "404":
136
139
  return False
137
140
  raise UploadError(f"Failed to check object existence: {e}") from e
138
141
 
142
+ async def download_file(
143
+ self,
144
+ bucket: str,
145
+ object_key: str,
146
+ file_path: str | Path,
147
+ progress_callback: Callable[[int], None] | None = None,
148
+ transfer_config: R2TransferConfig | None = None,
149
+ ) -> Path:
150
+ """
151
+ Download a file from R2 asynchronously.
152
+
153
+ Args:
154
+ bucket: The R2 bucket name.
155
+ object_key: The key (path) of the object in R2.
156
+ file_path: Local path where the file will be saved.
157
+ progress_callback: Optional callback called with bytes downloaded so far.
158
+ transfer_config: Optional transfer configuration for multipart/threading.
159
+
160
+ Returns:
161
+ The path to the downloaded file.
162
+
163
+ Raises:
164
+ DownloadError: If the download fails.
165
+ """
166
+ file_path = Path(file_path)
167
+
168
+ # Ensure parent directory exists
169
+ file_path.parent.mkdir(parents=True, exist_ok=True)
170
+
171
+ tc = transfer_config or R2TransferConfig()
172
+ aio_config = AioConfig(
173
+ max_pool_connections=tc.max_concurrency,
174
+ )
175
+
176
+ try:
177
+ async with self._session.client(
178
+ "s3",
179
+ aws_access_key_id=self.config.access_key_id,
180
+ aws_secret_access_key=self.config.secret_access_key,
181
+ endpoint_url=self.config.endpoint_url,
182
+ region_name=self.config.region,
183
+ config=aio_config,
184
+ ) as client:
185
+ callback = None
186
+ if progress_callback:
187
+ callback = _AsyncProgressCallback(progress_callback)
188
+
189
+ await client.download_file(
190
+ bucket,
191
+ object_key,
192
+ str(file_path),
193
+ Callback=callback,
194
+ )
195
+ except Exception as e:
196
+ raise DownloadError(f"Failed to download file from R2: {e}") from e
197
+
198
+ return file_path
199
+
139
200
 
140
201
  class _AsyncProgressCallback:
141
202
  """Wrapper to track cumulative progress for aioboto3 callback."""
@@ -31,7 +31,48 @@ from .models import (
31
31
  TimeseriesResponse,
32
32
  UserAgentsResponse,
33
33
  )
34
- from .uploader import R2Config, R2Uploader
34
+ from . import __version__
35
+ from .storage import R2Config, R2Storage, R2TransferConfig
36
+
37
+ CHECKIP_URL = "https://checkip.amazonaws.com"
38
+ DEFAULT_USER_AGENT = f"elaunira-r2index/{__version__}"
39
+
40
+
41
+ def _parse_object_id(object_id: str, bucket: str) -> RemoteTuple:
42
+ """
43
+ Parse an object_id into remote_path, remote_version, and remote_filename.
44
+
45
+ Format: /path/to/object/version/filename.ext
46
+ - remote_filename: last component (filename.ext)
47
+ - remote_version: second-to-last component (version)
48
+ - remote_path: everything before that (/path/to/object)
49
+
50
+ Args:
51
+ object_id: Full object path like /releases/myapp/v1/myapp.zip
52
+ bucket: The S3/R2 bucket name.
53
+
54
+ Returns:
55
+ RemoteTuple with parsed components including bucket.
56
+
57
+ Raises:
58
+ ValueError: If object_id doesn't have enough components.
59
+ """
60
+ parts = object_id.strip("/").split("/")
61
+ if len(parts) < 3:
62
+ raise ValueError(
63
+ f"object_id must have at least 3 components (path/version/filename), got: {object_id}"
64
+ )
65
+
66
+ remote_filename = parts[-1]
67
+ remote_version = parts[-2]
68
+ remote_path = "/" + "/".join(parts[:-2])
69
+
70
+ return RemoteTuple(
71
+ bucket=bucket,
72
+ remote_path=remote_path,
73
+ remote_filename=remote_filename,
74
+ remote_version=remote_version,
75
+ )
35
76
 
36
77
 
37
78
  class R2IndexClient:
@@ -39,29 +80,42 @@ class R2IndexClient:
39
80
 
40
81
  def __init__(
41
82
  self,
42
- api_url: str,
43
- api_token: str,
44
- r2_config: R2Config | None = None,
83
+ index_api_url: str,
84
+ index_api_token: str,
85
+ r2_access_key_id: str | None = None,
86
+ r2_secret_access_key: str | None = None,
87
+ r2_endpoint_url: str | None = None,
45
88
  timeout: float = 30.0,
46
89
  ) -> None:
47
90
  """
48
91
  Initialize the R2Index client.
49
92
 
50
93
  Args:
51
- api_url: Base URL of the r2index API.
52
- api_token: Bearer token for authentication.
53
- r2_config: Optional R2 configuration for upload operations.
94
+ index_api_url: Base URL of the r2index API.
95
+ index_api_token: Bearer token for authentication.
96
+ r2_access_key_id: R2 access key ID for storage operations.
97
+ r2_secret_access_key: R2 secret access key for storage operations.
98
+ r2_endpoint_url: R2 endpoint URL for storage operations.
54
99
  timeout: Request timeout in seconds.
55
100
  """
56
- self.api_url = api_url.rstrip("/")
57
- self._token = api_token
101
+ self.api_url = index_api_url.rstrip("/")
102
+ self._token = index_api_token
58
103
  self._timeout = timeout
59
- self._r2_config = r2_config
60
- self._uploader: R2Uploader | None = None
104
+ self._storage: R2Storage | None = None
105
+
106
+ # Build R2 config if credentials provided
107
+ if r2_access_key_id and r2_secret_access_key and r2_endpoint_url:
108
+ self._r2_config: R2Config | None = R2Config(
109
+ access_key_id=r2_access_key_id,
110
+ secret_access_key=r2_secret_access_key,
111
+ endpoint_url=r2_endpoint_url,
112
+ )
113
+ else:
114
+ self._r2_config = None
61
115
 
62
116
  self._client = httpx.Client(
63
117
  base_url=self.api_url,
64
- headers={"Authorization": f"Bearer {api_token}"},
118
+ headers={"Authorization": f"Bearer {index_api_token}"},
65
119
  timeout=timeout,
66
120
  )
67
121
 
@@ -75,13 +129,13 @@ class R2IndexClient:
75
129
  """Close the HTTP client."""
76
130
  self._client.close()
77
131
 
78
- def _get_uploader(self) -> R2Uploader:
132
+ def _get_storage(self) -> R2Storage:
79
133
  """Get or create the R2 uploader."""
80
134
  if self._r2_config is None:
81
135
  raise R2IndexError("R2 configuration required for upload operations")
82
- if self._uploader is None:
83
- self._uploader = R2Uploader(self._r2_config)
84
- return self._uploader
136
+ if self._storage is None:
137
+ self._storage = R2Storage(self._r2_config)
138
+ return self._storage
85
139
 
86
140
  def _handle_response(self, response: httpx.Response) -> Any:
87
141
  """Handle API response and raise appropriate exceptions."""
@@ -108,8 +162,9 @@ class R2IndexClient:
108
162
 
109
163
  # File Operations
110
164
 
111
- def list_files(
165
+ def list(
112
166
  self,
167
+ bucket: str | None = None,
113
168
  category: str | None = None,
114
169
  entity: str | None = None,
115
170
  tags: list[str] | None = None,
@@ -120,6 +175,7 @@ class R2IndexClient:
120
175
  List files with optional filters.
121
176
 
122
177
  Args:
178
+ bucket: Filter by bucket.
123
179
  category: Filter by category.
124
180
  entity: Filter by entity.
125
181
  tags: Filter by tags.
@@ -130,6 +186,8 @@ class R2IndexClient:
130
186
  FileListResponse with files and pagination info.
131
187
  """
132
188
  params: dict[str, Any] = {}
189
+ if bucket:
190
+ params["bucket"] = bucket
133
191
  if category:
134
192
  params["category"] = category
135
193
  if entity:
@@ -145,7 +203,7 @@ class R2IndexClient:
145
203
  data = self._handle_response(response)
146
204
  return FileListResponse.model_validate(data)
147
205
 
148
- def create_file(self, data: FileCreateRequest) -> FileRecord:
206
+ def create(self, data: FileCreateRequest) -> FileRecord:
149
207
  """
150
208
  Create or upsert a file record.
151
209
 
@@ -159,7 +217,7 @@ class R2IndexClient:
159
217
  result = self._handle_response(response)
160
218
  return FileRecord.model_validate(result)
161
219
 
162
- def get_file(self, file_id: str) -> FileRecord:
220
+ def get(self, file_id: str) -> FileRecord:
163
221
  """
164
222
  Get a file by ID.
165
223
 
@@ -176,7 +234,7 @@ class R2IndexClient:
176
234
  data = self._handle_response(response)
177
235
  return FileRecord.model_validate(data)
178
236
 
179
- def update_file(self, file_id: str, data: FileUpdateRequest) -> FileRecord:
237
+ def update(self, file_id: str, data: FileUpdateRequest) -> FileRecord:
180
238
  """
181
239
  Update a file record.
182
240
 
@@ -194,7 +252,7 @@ class R2IndexClient:
194
252
  result = self._handle_response(response)
195
253
  return FileRecord.model_validate(result)
196
254
 
197
- def delete_file(self, file_id: str) -> None:
255
+ def delete(self, file_id: str) -> None:
198
256
  """
199
257
  Delete a file by ID.
200
258
 
@@ -207,17 +265,18 @@ class R2IndexClient:
207
265
  response = self._client.delete(f"/files/{file_id}")
208
266
  self._handle_response(response)
209
267
 
210
- def delete_file_by_tuple(self, remote_tuple: RemoteTuple) -> None:
268
+ def delete_by_tuple(self, remote_tuple: RemoteTuple) -> None:
211
269
  """
212
270
  Delete a file by remote tuple.
213
271
 
214
272
  Args:
215
- remote_tuple: The remote path, filename, and version.
273
+ remote_tuple: The bucket, remote path, filename, and version.
216
274
 
217
275
  Raises:
218
276
  NotFoundError: If the file is not found.
219
277
  """
220
278
  params = {
279
+ "bucket": remote_tuple.bucket,
221
280
  "remotePath": remote_tuple.remote_path,
222
281
  "remoteFilename": remote_tuple.remote_filename,
223
282
  "remoteVersion": remote_tuple.remote_version,
@@ -225,8 +284,32 @@ class R2IndexClient:
225
284
  response = self._client.delete("/files", params=params)
226
285
  self._handle_response(response)
227
286
 
228
- def get_index(
287
+ def get_by_tuple(self, remote_tuple: RemoteTuple) -> FileRecord:
288
+ """
289
+ Get a file by remote tuple.
290
+
291
+ Args:
292
+ remote_tuple: The bucket, remote path, filename, and version.
293
+
294
+ Returns:
295
+ The FileRecord.
296
+
297
+ Raises:
298
+ NotFoundError: If the file is not found.
299
+ """
300
+ params = {
301
+ "bucket": remote_tuple.bucket,
302
+ "remotePath": remote_tuple.remote_path,
303
+ "remoteFilename": remote_tuple.remote_filename,
304
+ "remoteVersion": remote_tuple.remote_version,
305
+ }
306
+ response = self._client.get("/files/by-tuple", params=params)
307
+ data = self._handle_response(response)
308
+ return FileRecord.model_validate(data)
309
+
310
+ def index(
229
311
  self,
312
+ bucket: str | None = None,
230
313
  category: str | None = None,
231
314
  entity: str | None = None,
232
315
  tags: list[str] | None = None,
@@ -235,6 +318,7 @@ class R2IndexClient:
235
318
  Get file index (lightweight listing).
236
319
 
237
320
  Args:
321
+ bucket: Filter by bucket.
238
322
  category: Filter by category.
239
323
  entity: Filter by entity.
240
324
  tags: Filter by tags.
@@ -243,6 +327,8 @@ class R2IndexClient:
243
327
  List of IndexEntry objects.
244
328
  """
245
329
  params: dict[str, Any] = {}
330
+ if bucket:
331
+ params["bucket"] = bucket
246
332
  if category:
247
333
  params["category"] = category
248
334
  if entity:
@@ -423,9 +509,10 @@ class R2IndexClient:
423
509
 
424
510
  # High-Level Pipeline
425
511
 
426
- def upload_and_register(
512
+ def upload(
427
513
  self,
428
- file_path: str | Path,
514
+ bucket: str,
515
+ local_path: str | Path,
429
516
  category: str,
430
517
  entity: str,
431
518
  remote_path: str,
@@ -446,7 +533,8 @@ class R2IndexClient:
446
533
  3. Register with r2index API
447
534
 
448
535
  Args:
449
- file_path: Path to the file to upload.
536
+ bucket: The S3/R2 bucket name.
537
+ local_path: Local path to the file to upload.
450
538
  category: File category.
451
539
  entity: File entity.
452
540
  remote_path: Remote path in R2 (e.g., "/data/files").
@@ -465,18 +553,19 @@ class R2IndexClient:
465
553
  R2IndexError: If R2 config is not provided.
466
554
  UploadError: If upload fails.
467
555
  """
468
- file_path = Path(file_path)
469
- uploader = self._get_uploader()
556
+ local_path = Path(local_path)
557
+ uploader = self._get_storage()
470
558
 
471
559
  # Step 1: Compute checksums
472
- checksums = compute_checksums(file_path)
560
+ checksums = compute_checksums(local_path)
473
561
 
474
562
  # Step 2: Build R2 object key
475
563
  object_key = f"{remote_path.strip('/')}/{remote_filename}"
476
564
 
477
565
  # Step 3: Upload to R2
478
566
  uploader.upload_file(
479
- file_path,
567
+ local_path,
568
+ bucket,
480
569
  object_key,
481
570
  content_type=content_type,
482
571
  progress_callback=progress_callback,
@@ -484,6 +573,7 @@ class R2IndexClient:
484
573
 
485
574
  # Step 4: Register with API
486
575
  create_request = FileCreateRequest(
576
+ bucket=bucket,
487
577
  category=category,
488
578
  entity=entity,
489
579
  remote_path=remote_path,
@@ -499,4 +589,85 @@ class R2IndexClient:
499
589
  sha512=checksums.sha512,
500
590
  )
501
591
 
502
- return self.create_file(create_request)
592
+ return self.create(create_request)
593
+
594
+ def _get_public_ip(self) -> str:
595
+ """Fetch public IP address from checkip.amazonaws.com."""
596
+ response = httpx.get(CHECKIP_URL, timeout=10.0)
597
+ return response.text.strip()
598
+
599
+ def download(
600
+ self,
601
+ bucket: str,
602
+ object_id: str,
603
+ destination: str | Path,
604
+ ip_address: str | None = None,
605
+ user_agent: str | None = None,
606
+ progress_callback: Callable[[int], None] | None = None,
607
+ transfer_config: R2TransferConfig | None = None,
608
+ ) -> tuple[Path, FileRecord]:
609
+ """
610
+ Download a file from R2 and record the download in the index.
611
+
612
+ This is a convenience method that performs:
613
+ 1. Parse object_id into remote_path, remote_version, remote_filename
614
+ 2. Fetch file record from the API using these components
615
+ 3. Download the file from R2
616
+ 4. Record the download in the index for analytics
617
+
618
+ Args:
619
+ bucket: The S3/R2 bucket name.
620
+ object_id: Full S3 object path in format: /path/to/object/version/filename
621
+ Example: /releases/myapp/v1/myapp.zip
622
+ - remote_path: /releases/myapp
623
+ - remote_version: v1
624
+ - remote_filename: myapp.zip
625
+ destination: Local path where the file will be saved.
626
+ ip_address: IP address of the downloader. If not provided, fetched
627
+ from checkip.amazonaws.com.
628
+ user_agent: User agent string. Defaults to "elaunira-r2index/<version>".
629
+ progress_callback: Optional callback for download progress.
630
+ transfer_config: Optional transfer configuration for multipart/threading.
631
+
632
+ Returns:
633
+ A tuple of (downloaded file path, file record).
634
+
635
+ Raises:
636
+ R2IndexError: If R2 config is not provided.
637
+ ValueError: If object_id format is invalid.
638
+ NotFoundError: If the file is not found in the index.
639
+ DownloadError: If download fails.
640
+ """
641
+ storage = self._get_storage()
642
+
643
+ # Resolve defaults
644
+ if ip_address is None:
645
+ ip_address = self._get_public_ip()
646
+ if user_agent is None:
647
+ user_agent = DEFAULT_USER_AGENT
648
+
649
+ # Step 1: Parse object_id into components
650
+ remote_tuple = _parse_object_id(object_id, bucket)
651
+
652
+ # Step 2: Get file record by tuple
653
+ file_record = self.get_by_tuple(remote_tuple)
654
+
655
+ # Step 3: Build R2 object key and download
656
+ object_key = object_id.strip("/")
657
+ downloaded_path = storage.download_file(
658
+ bucket,
659
+ object_key,
660
+ destination,
661
+ progress_callback=progress_callback,
662
+ transfer_config=transfer_config,
663
+ )
664
+
665
+ # Step 4: Record the download
666
+ download_request = DownloadRecordRequest(
667
+ file_id=file_record.id,
668
+ ip_address=ip_address,
669
+ user_agent=user_agent,
670
+ )
671
+ self.record_download(download_request)
672
+
673
+ return downloaded_path, file_record
@@ -38,3 +38,9 @@ class UploadError(R2IndexError):
38
38
  """Raised for R2 upload failures."""
39
39
 
40
40
  pass
41
+
42
+
43
+ class DownloadError(R2IndexError):
44
+ """Raised for R2 download failures."""
45
+
46
+ pass
@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field
9
9
  class RemoteTuple(BaseModel):
10
10
  """Remote file identifier tuple."""
11
11
 
12
+ bucket: str
12
13
  remote_path: str
13
14
  remote_filename: str
14
15
  remote_version: str
@@ -17,6 +18,7 @@ class RemoteTuple(BaseModel):
17
18
  class FileCreateRequest(BaseModel):
18
19
  """Request payload for creating/upserting a file record."""
19
20
 
21
+ bucket: str
20
22
  category: str
21
23
  entity: str
22
24
  remote_path: str
@@ -35,6 +37,7 @@ class FileCreateRequest(BaseModel):
35
37
  class FileUpdateRequest(BaseModel):
36
38
  """Request payload for updating a file record."""
37
39
 
40
+ bucket: str | None = None
38
41
  category: str | None = None
39
42
  entity: str | None = None
40
43
  remote_path: str | None = None
@@ -54,6 +57,7 @@ class FileRecord(BaseModel):
54
57
  """File record as returned by the API."""
55
58
 
56
59
  id: str
60
+ bucket: str
57
61
  category: str
58
62
  entity: str
59
63
  remote_path: str
@@ -86,6 +90,7 @@ class IndexEntry(BaseModel):
86
90
  """Single entry in the index response."""
87
91
 
88
92
  id: str
93
+ bucket: str
89
94
  category: str
90
95
  entity: str
91
96
  remote_path: str
File without changes