atdata 0.1.3b4__py3-none-any.whl → 0.2.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,393 @@
1
+ """ATProto client wrapper for atdata.
2
+
3
+ This module provides the ``AtmosphereClient`` class which wraps the atproto SDK
4
+ client with atdata-specific helpers for publishing and querying records.
5
+ """
6
+
7
+ from typing import Optional, Any
8
+
9
+ from ._types import AtUri, LEXICON_NAMESPACE
10
+
11
+ # Lazy import to avoid requiring atproto if not using atmosphere features
12
+ _atproto_client_class: Optional[type] = None
13
+
14
+
15
+ def _get_atproto_client_class():
16
+ """Lazily import the atproto Client class."""
17
+ global _atproto_client_class
18
+ if _atproto_client_class is None:
19
+ try:
20
+ from atproto import Client
21
+ _atproto_client_class = Client
22
+ except ImportError as e:
23
+ raise ImportError(
24
+ "The 'atproto' package is required for ATProto integration. "
25
+ "Install it with: pip install atproto"
26
+ ) from e
27
+ return _atproto_client_class
28
+
29
+
30
+ class AtmosphereClient:
31
+ """ATProto client wrapper for atdata operations.
32
+
33
+ This class wraps the atproto SDK client and provides higher-level methods
34
+ for working with atdata records (schemas, datasets, lenses).
35
+
36
+ Example:
37
+ >>> client = AtmosphereClient()
38
+ >>> client.login("alice.bsky.social", "app-password")
39
+ >>> print(client.did)
40
+ 'did:plc:...'
41
+
42
+ Note:
43
+ The password should be an app-specific password, not your main account
44
+ password. Create app passwords in your Bluesky account settings.
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ base_url: Optional[str] = None,
50
+ *,
51
+ _client: Optional[Any] = None,
52
+ ):
53
+ """Initialize the ATProto client.
54
+
55
+ Args:
56
+ base_url: Optional PDS base URL. Defaults to bsky.social.
57
+ _client: Optional pre-configured atproto Client for testing.
58
+ """
59
+ if _client is not None:
60
+ self._client = _client
61
+ else:
62
+ Client = _get_atproto_client_class()
63
+ self._client = Client(base_url=base_url) if base_url else Client()
64
+
65
+ self._session: Optional[dict] = None
66
+
67
+ def login(self, handle: str, password: str) -> None:
68
+ """Authenticate with the ATProto PDS.
69
+
70
+ Args:
71
+ handle: Your Bluesky handle (e.g., 'alice.bsky.social').
72
+ password: App-specific password (not your main password).
73
+
74
+ Raises:
75
+ atproto.exceptions.AtProtocolError: If authentication fails.
76
+ """
77
+ profile = self._client.login(handle, password)
78
+ self._session = {
79
+ "did": profile.did,
80
+ "handle": profile.handle,
81
+ }
82
+
83
+ def login_with_session(self, session_string: str) -> None:
84
+ """Authenticate using an exported session string.
85
+
86
+ This allows reusing a session without re-authenticating, which helps
87
+ avoid rate limits on session creation.
88
+
89
+ Args:
90
+ session_string: Session string from ``export_session()``.
91
+ """
92
+ self._client.login(session_string=session_string)
93
+ self._session = {
94
+ "did": self._client.me.did,
95
+ "handle": self._client.me.handle,
96
+ }
97
+
98
+ def export_session(self) -> str:
99
+ """Export the current session for later reuse.
100
+
101
+ Returns:
102
+ Session string that can be passed to ``login_with_session()``.
103
+
104
+ Raises:
105
+ ValueError: If not authenticated.
106
+ """
107
+ if not self.is_authenticated:
108
+ raise ValueError("Not authenticated")
109
+ return self._client.export_session_string()
110
+
111
+ @property
112
+ def is_authenticated(self) -> bool:
113
+ """Check if the client has a valid session."""
114
+ return self._session is not None
115
+
116
+ @property
117
+ def did(self) -> str:
118
+ """Get the DID of the authenticated user.
119
+
120
+ Returns:
121
+ The DID string (e.g., 'did:plc:...').
122
+
123
+ Raises:
124
+ ValueError: If not authenticated.
125
+ """
126
+ if not self._session:
127
+ raise ValueError("Not authenticated")
128
+ return self._session["did"]
129
+
130
+ @property
131
+ def handle(self) -> str:
132
+ """Get the handle of the authenticated user.
133
+
134
+ Returns:
135
+ The handle string (e.g., 'alice.bsky.social').
136
+
137
+ Raises:
138
+ ValueError: If not authenticated.
139
+ """
140
+ if not self._session:
141
+ raise ValueError("Not authenticated")
142
+ return self._session["handle"]
143
+
144
+ def _ensure_authenticated(self) -> None:
145
+ """Raise if not authenticated."""
146
+ if not self.is_authenticated:
147
+ raise ValueError("Client must be authenticated to perform this operation")
148
+
149
+ # Low-level record operations
150
+
151
+ def create_record(
152
+ self,
153
+ collection: str,
154
+ record: dict,
155
+ *,
156
+ rkey: Optional[str] = None,
157
+ validate: bool = False,
158
+ ) -> AtUri:
159
+ """Create a record in the user's repository.
160
+
161
+ Args:
162
+ collection: The NSID of the record collection
163
+ (e.g., 'ac.foundation.dataset.sampleSchema').
164
+ record: The record data. Must include a '$type' field.
165
+ rkey: Optional explicit record key. If not provided, a TID is generated.
166
+ validate: Whether to validate against the Lexicon schema. Set to False
167
+ for custom lexicons that the PDS doesn't know about.
168
+
169
+ Returns:
170
+ The AT URI of the created record.
171
+
172
+ Raises:
173
+ ValueError: If not authenticated.
174
+ atproto.exceptions.AtProtocolError: If record creation fails.
175
+ """
176
+ self._ensure_authenticated()
177
+
178
+ response = self._client.com.atproto.repo.create_record(
179
+ data={
180
+ "repo": self.did,
181
+ "collection": collection,
182
+ "record": record,
183
+ "rkey": rkey,
184
+ "validate": validate,
185
+ }
186
+ )
187
+
188
+ return AtUri.parse(response.uri)
189
+
190
+ def put_record(
191
+ self,
192
+ collection: str,
193
+ rkey: str,
194
+ record: dict,
195
+ *,
196
+ validate: bool = False,
197
+ swap_commit: Optional[str] = None,
198
+ ) -> AtUri:
199
+ """Create or update a record at a specific key.
200
+
201
+ Args:
202
+ collection: The NSID of the record collection.
203
+ rkey: The record key.
204
+ record: The record data. Must include a '$type' field.
205
+ validate: Whether to validate against the Lexicon schema.
206
+ swap_commit: Optional CID for compare-and-swap update.
207
+
208
+ Returns:
209
+ The AT URI of the record.
210
+
211
+ Raises:
212
+ ValueError: If not authenticated.
213
+ atproto.exceptions.AtProtocolError: If operation fails.
214
+ """
215
+ self._ensure_authenticated()
216
+
217
+ data: dict[str, Any] = {
218
+ "repo": self.did,
219
+ "collection": collection,
220
+ "rkey": rkey,
221
+ "record": record,
222
+ "validate": validate,
223
+ }
224
+ if swap_commit:
225
+ data["swapCommit"] = swap_commit
226
+
227
+ response = self._client.com.atproto.repo.put_record(data=data)
228
+
229
+ return AtUri.parse(response.uri)
230
+
231
+ def get_record(
232
+ self,
233
+ uri: str | AtUri,
234
+ ) -> dict:
235
+ """Fetch a record by AT URI.
236
+
237
+ Args:
238
+ uri: The AT URI of the record.
239
+
240
+ Returns:
241
+ The record data as a dictionary.
242
+
243
+ Raises:
244
+ atproto.exceptions.AtProtocolError: If record not found.
245
+ """
246
+ if isinstance(uri, str):
247
+ uri = AtUri.parse(uri)
248
+
249
+ response = self._client.com.atproto.repo.get_record(
250
+ params={
251
+ "repo": uri.authority,
252
+ "collection": uri.collection,
253
+ "rkey": uri.rkey,
254
+ }
255
+ )
256
+
257
+ return response.value
258
+
259
+ def delete_record(
260
+ self,
261
+ uri: str | AtUri,
262
+ *,
263
+ swap_commit: Optional[str] = None,
264
+ ) -> None:
265
+ """Delete a record.
266
+
267
+ Args:
268
+ uri: The AT URI of the record to delete.
269
+ swap_commit: Optional CID for compare-and-swap delete.
270
+
271
+ Raises:
272
+ ValueError: If not authenticated.
273
+ atproto.exceptions.AtProtocolError: If deletion fails.
274
+ """
275
+ self._ensure_authenticated()
276
+
277
+ if isinstance(uri, str):
278
+ uri = AtUri.parse(uri)
279
+
280
+ data: dict[str, Any] = {
281
+ "repo": self.did,
282
+ "collection": uri.collection,
283
+ "rkey": uri.rkey,
284
+ }
285
+ if swap_commit:
286
+ data["swapCommit"] = swap_commit
287
+
288
+ self._client.com.atproto.repo.delete_record(data=data)
289
+
290
+ def list_records(
291
+ self,
292
+ collection: str,
293
+ *,
294
+ repo: Optional[str] = None,
295
+ limit: int = 100,
296
+ cursor: Optional[str] = None,
297
+ ) -> tuple[list[dict], Optional[str]]:
298
+ """List records in a collection.
299
+
300
+ Args:
301
+ collection: The NSID of the record collection.
302
+ repo: The DID of the repository to query. Defaults to the
303
+ authenticated user's repository.
304
+ limit: Maximum number of records to return (default 100).
305
+ cursor: Pagination cursor from a previous call.
306
+
307
+ Returns:
308
+ A tuple of (records, next_cursor). The cursor is None if there
309
+ are no more records.
310
+
311
+ Raises:
312
+ ValueError: If repo is None and not authenticated.
313
+ """
314
+ if repo is None:
315
+ self._ensure_authenticated()
316
+ repo = self.did
317
+
318
+ response = self._client.com.atproto.repo.list_records(
319
+ params={
320
+ "repo": repo,
321
+ "collection": collection,
322
+ "limit": limit,
323
+ "cursor": cursor,
324
+ }
325
+ )
326
+
327
+ records = [r.value for r in response.records]
328
+ return records, response.cursor
329
+
330
+ # Convenience methods for atdata collections
331
+
332
+ def list_schemas(
333
+ self,
334
+ repo: Optional[str] = None,
335
+ limit: int = 100,
336
+ ) -> list[dict]:
337
+ """List schema records.
338
+
339
+ Args:
340
+ repo: The DID to query. Defaults to authenticated user.
341
+ limit: Maximum number to return.
342
+
343
+ Returns:
344
+ List of schema records.
345
+ """
346
+ records, _ = self.list_records(
347
+ f"{LEXICON_NAMESPACE}.sampleSchema",
348
+ repo=repo,
349
+ limit=limit,
350
+ )
351
+ return records
352
+
353
+ def list_datasets(
354
+ self,
355
+ repo: Optional[str] = None,
356
+ limit: int = 100,
357
+ ) -> list[dict]:
358
+ """List dataset records.
359
+
360
+ Args:
361
+ repo: The DID to query. Defaults to authenticated user.
362
+ limit: Maximum number to return.
363
+
364
+ Returns:
365
+ List of dataset records.
366
+ """
367
+ records, _ = self.list_records(
368
+ f"{LEXICON_NAMESPACE}.record",
369
+ repo=repo,
370
+ limit=limit,
371
+ )
372
+ return records
373
+
374
+ def list_lenses(
375
+ self,
376
+ repo: Optional[str] = None,
377
+ limit: int = 100,
378
+ ) -> list[dict]:
379
+ """List lens records.
380
+
381
+ Args:
382
+ repo: The DID to query. Defaults to authenticated user.
383
+ limit: Maximum number to return.
384
+
385
+ Returns:
386
+ List of lens records.
387
+ """
388
+ records, _ = self.list_records(
389
+ f"{LEXICON_NAMESPACE}.lens",
390
+ repo=repo,
391
+ limit=limit,
392
+ )
393
+ return records
@@ -0,0 +1,280 @@
1
+ """Lens transformation publishing for ATProto.
2
+
3
+ This module provides classes for publishing Lens transformation records to
4
+ ATProto. Lenses are published as ``ac.foundation.dataset.lens`` records.
5
+
6
+ Note:
7
+ For security reasons, lens code is stored as references to git repositories
8
+ rather than inline code. Users must manually install and import lens
9
+ implementations.
10
+ """
11
+
12
+ from typing import Optional, Callable
13
+
14
+ from .client import AtmosphereClient
15
+ from ._types import (
16
+ AtUri,
17
+ LensRecord,
18
+ CodeReference,
19
+ LEXICON_NAMESPACE,
20
+ )
21
+
22
+ # Import for type checking only
23
+ from typing import TYPE_CHECKING
24
+ if TYPE_CHECKING:
25
+ from ..lens import Lens
26
+
27
+
28
+ class LensPublisher:
29
+ """Publishes Lens transformation records to ATProto.
30
+
31
+ This class creates lens records that reference source and target schemas
32
+ and point to the transformation code in a git repository.
33
+
34
+ Example:
35
+ >>> @atdata.lens
36
+ ... def my_lens(source: SourceType) -> TargetType:
37
+ ... return TargetType(field=source.other_field)
38
+ >>>
39
+ >>> client = AtmosphereClient()
40
+ >>> client.login("handle", "password")
41
+ >>>
42
+ >>> publisher = LensPublisher(client)
43
+ >>> uri = publisher.publish(
44
+ ... name="my_lens",
45
+ ... source_schema_uri="at://did:plc:abc/ac.foundation.dataset.sampleSchema/source",
46
+ ... target_schema_uri="at://did:plc:abc/ac.foundation.dataset.sampleSchema/target",
47
+ ... code_repository="https://github.com/user/repo",
48
+ ... code_commit="abc123def456",
49
+ ... getter_path="mymodule.lenses:my_lens",
50
+ ... putter_path="mymodule.lenses:my_lens_putter",
51
+ ... )
52
+
53
+ Security Note:
54
+ Lens code is stored as references to git repositories rather than
55
+ inline code. This prevents arbitrary code execution from ATProto
56
+ records. Users must manually install and trust lens implementations.
57
+ """
58
+
59
+ def __init__(self, client: AtmosphereClient):
60
+ """Initialize the lens publisher.
61
+
62
+ Args:
63
+ client: Authenticated AtmosphereClient instance.
64
+ """
65
+ self.client = client
66
+
67
+ def publish(
68
+ self,
69
+ *,
70
+ name: str,
71
+ source_schema_uri: str,
72
+ target_schema_uri: str,
73
+ description: Optional[str] = None,
74
+ code_repository: Optional[str] = None,
75
+ code_commit: Optional[str] = None,
76
+ getter_path: Optional[str] = None,
77
+ putter_path: Optional[str] = None,
78
+ rkey: Optional[str] = None,
79
+ ) -> AtUri:
80
+ """Publish a lens transformation record to ATProto.
81
+
82
+ Args:
83
+ name: Human-readable lens name.
84
+ source_schema_uri: AT URI of the source schema.
85
+ target_schema_uri: AT URI of the target schema.
86
+ description: What this transformation does.
87
+ code_repository: Git repository URL containing the lens code.
88
+ code_commit: Git commit hash for reproducibility.
89
+ getter_path: Module path to the getter function
90
+ (e.g., 'mymodule.lenses:my_getter').
91
+ putter_path: Module path to the putter function
92
+ (e.g., 'mymodule.lenses:my_putter').
93
+ rkey: Optional explicit record key.
94
+
95
+ Returns:
96
+ The AT URI of the created lens record.
97
+
98
+ Raises:
99
+ ValueError: If code references are incomplete.
100
+ """
101
+ # Build code references if provided
102
+ getter_code: Optional[CodeReference] = None
103
+ putter_code: Optional[CodeReference] = None
104
+
105
+ if code_repository and code_commit:
106
+ if getter_path:
107
+ getter_code = CodeReference(
108
+ repository=code_repository,
109
+ commit=code_commit,
110
+ path=getter_path,
111
+ )
112
+ if putter_path:
113
+ putter_code = CodeReference(
114
+ repository=code_repository,
115
+ commit=code_commit,
116
+ path=putter_path,
117
+ )
118
+
119
+ lens_record = LensRecord(
120
+ name=name,
121
+ source_schema=source_schema_uri,
122
+ target_schema=target_schema_uri,
123
+ description=description,
124
+ getter_code=getter_code,
125
+ putter_code=putter_code,
126
+ )
127
+
128
+ return self.client.create_record(
129
+ collection=f"{LEXICON_NAMESPACE}.lens",
130
+ record=lens_record.to_record(),
131
+ rkey=rkey,
132
+ validate=False,
133
+ )
134
+
135
+ def publish_from_lens(
136
+ self,
137
+ lens_obj: "Lens",
138
+ *,
139
+ name: str,
140
+ source_schema_uri: str,
141
+ target_schema_uri: str,
142
+ code_repository: str,
143
+ code_commit: str,
144
+ description: Optional[str] = None,
145
+ rkey: Optional[str] = None,
146
+ ) -> AtUri:
147
+ """Publish a lens record from an existing Lens object.
148
+
149
+ This method extracts the getter and putter function names from
150
+ the Lens object and publishes a record referencing them.
151
+
152
+ Args:
153
+ lens_obj: The Lens object to publish.
154
+ name: Human-readable lens name.
155
+ source_schema_uri: AT URI of the source schema.
156
+ target_schema_uri: AT URI of the target schema.
157
+ code_repository: Git repository URL.
158
+ code_commit: Git commit hash.
159
+ description: What this transformation does.
160
+ rkey: Optional explicit record key.
161
+
162
+ Returns:
163
+ The AT URI of the created lens record.
164
+ """
165
+ # Extract function names from the lens
166
+ getter_name = lens_obj._getter.__name__
167
+ putter_name = lens_obj._putter.__name__
168
+
169
+ # Get module info if available
170
+ getter_module = getattr(lens_obj._getter, "__module__", "")
171
+ putter_module = getattr(lens_obj._putter, "__module__", "")
172
+
173
+ getter_path = f"{getter_module}:{getter_name}" if getter_module else getter_name
174
+ putter_path = f"{putter_module}:{putter_name}" if putter_module else putter_name
175
+
176
+ return self.publish(
177
+ name=name,
178
+ source_schema_uri=source_schema_uri,
179
+ target_schema_uri=target_schema_uri,
180
+ description=description,
181
+ code_repository=code_repository,
182
+ code_commit=code_commit,
183
+ getter_path=getter_path,
184
+ putter_path=putter_path,
185
+ rkey=rkey,
186
+ )
187
+
188
+
189
+ class LensLoader:
190
+ """Loads lens records from ATProto.
191
+
192
+ This class fetches lens transformation records. Note that actually
193
+ using a lens requires installing the referenced code and importing
194
+ it manually.
195
+
196
+ Example:
197
+ >>> client = AtmosphereClient()
198
+ >>> loader = LensLoader(client)
199
+ >>>
200
+ >>> record = loader.get("at://did:plc:abc/ac.foundation.dataset.lens/xyz")
201
+ >>> print(record["name"])
202
+ >>> print(record["sourceSchema"])
203
+ >>> print(record.get("getterCode", {}).get("repository"))
204
+ """
205
+
206
+ def __init__(self, client: AtmosphereClient):
207
+ """Initialize the lens loader.
208
+
209
+ Args:
210
+ client: AtmosphereClient instance.
211
+ """
212
+ self.client = client
213
+
214
+ def get(self, uri: str | AtUri) -> dict:
215
+ """Fetch a lens record by AT URI.
216
+
217
+ Args:
218
+ uri: The AT URI of the lens record.
219
+
220
+ Returns:
221
+ The lens record as a dictionary.
222
+
223
+ Raises:
224
+ ValueError: If the record is not a lens record.
225
+ """
226
+ record = self.client.get_record(uri)
227
+
228
+ expected_type = f"{LEXICON_NAMESPACE}.lens"
229
+ if record.get("$type") != expected_type:
230
+ raise ValueError(
231
+ f"Record at {uri} is not a lens record. "
232
+ f"Expected $type='{expected_type}', got '{record.get('$type')}'"
233
+ )
234
+
235
+ return record
236
+
237
+ def list_all(
238
+ self,
239
+ repo: Optional[str] = None,
240
+ limit: int = 100,
241
+ ) -> list[dict]:
242
+ """List lens records from a repository.
243
+
244
+ Args:
245
+ repo: The DID of the repository. Defaults to authenticated user.
246
+ limit: Maximum number of records to return.
247
+
248
+ Returns:
249
+ List of lens records.
250
+ """
251
+ return self.client.list_lenses(repo=repo, limit=limit)
252
+
253
+ def find_by_schemas(
254
+ self,
255
+ source_schema_uri: str,
256
+ target_schema_uri: Optional[str] = None,
257
+ repo: Optional[str] = None,
258
+ ) -> list[dict]:
259
+ """Find lenses that transform between specific schemas.
260
+
261
+ Args:
262
+ source_schema_uri: AT URI of the source schema.
263
+ target_schema_uri: Optional AT URI of the target schema.
264
+ If not provided, returns all lenses from the source.
265
+ repo: The DID of the repository to search.
266
+
267
+ Returns:
268
+ List of matching lens records.
269
+ """
270
+ all_lenses = self.list_all(repo=repo, limit=1000)
271
+
272
+ matches = []
273
+ for lens_record in all_lenses:
274
+ if lens_record.get("sourceSchema") == source_schema_uri:
275
+ if target_schema_uri is None:
276
+ matches.append(lens_record)
277
+ elif lens_record.get("targetSchema") == target_schema_uri:
278
+ matches.append(lens_record)
279
+
280
+ return matches