atdata 0.3.1b1__py3-none-any.whl → 0.3.2b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,22 @@
1
1
  """Type definitions for ATProto record structures.
2
2
 
3
- This module defines the data structures used to represent ATProto records
4
- for schemas, datasets, and lenses. These types map to the Lexicon definitions
5
- in the ``ac.foundation.dataset.*`` namespace.
3
+ This module provides the ``AtUri`` utility class and the ``LEXICON_NAMESPACE``
4
+ constant. Lexicon-mirror record types (``LexSchemaRecord``, ``LexDatasetRecord``,
5
+ ``LexLensRecord``, etc.) have moved to ``atdata.atmosphere._lexicon_types``.
6
+
7
+ The old type names (``SchemaRecord``, ``DatasetRecord``, ``LensRecord``,
8
+ ``StorageLocation``, ``FieldType``, ``FieldDef``, ``CodeReference``) are
9
+ re-exported here as deprecated aliases for backward compatibility.
6
10
  """
7
11
 
8
- from dataclasses import dataclass, field
9
- from datetime import datetime, timezone
10
- from typing import Optional, Literal, Any
12
+ from __future__ import annotations
13
+
14
+ import warnings
15
+ from dataclasses import dataclass
16
+ from typing import Any, Literal, Optional
11
17
 
12
- # Lexicon namespace for atdata records
18
+ # Canonical constant also defined in _lexicon_types but kept here as the
19
+ # historically authoritative location so existing imports continue to work.
13
20
  LEXICON_NAMESPACE = "ac.foundation.dataset"
14
21
 
15
22
 
@@ -39,7 +46,7 @@ class AtUri:
39
46
  """The record key within the collection."""
40
47
 
41
48
  @classmethod
42
- def parse(cls, uri: str) -> "AtUri":
49
+ def parse(cls, uri: str) -> AtUri:
43
50
  """Parse an AT URI string into components.
44
51
 
45
52
  Args:
@@ -71,261 +78,82 @@ class AtUri:
71
78
  return f"at://{self.authority}/{self.collection}/{self.rkey}"
72
79
 
73
80
 
74
- @dataclass
75
- class FieldType:
76
- """Schema field type definition.
81
+ # ---------------------------------------------------------------------------
82
+ # Deprecated re-exports (will be removed in a future version)
83
+ # ---------------------------------------------------------------------------
84
+ # These names existed in this module before the lexicon-mirror types were
85
+ # split into _lexicon_types.py. They are re-exported here so that existing
86
+ # imports like ``from atdata.atmosphere._types import SchemaRecord`` continue
87
+ # to work during the migration period.
88
+
89
+
90
+ def __getattr__(name: str) -> Any:
91
+ _DEPRECATED_ALIASES: dict[str, tuple[str, str]] = {
92
+ # old name → (new module attribute, import path in _lexicon_types)
93
+ "FieldType": ("FieldType", "atdata.atmosphere._lexicon_types"),
94
+ "FieldDef": ("FieldDef", "atdata.atmosphere._lexicon_types"),
95
+ "SchemaRecord": ("LexSchemaRecord", "atdata.atmosphere._lexicon_types"),
96
+ "DatasetRecord": ("LexDatasetRecord", "atdata.atmosphere._lexicon_types"),
97
+ "LensRecord": ("LexLensRecord", "atdata.atmosphere._lexicon_types"),
98
+ "StorageLocation": ("StorageLocation", "atdata.atmosphere._lexicon_types"),
99
+ "CodeReference": ("LexCodeReference", "atdata.atmosphere._lexicon_types"),
100
+ }
101
+ if name in _DEPRECATED_ALIASES:
102
+ new_name, mod_path = _DEPRECATED_ALIASES[name]
103
+ warnings.warn(
104
+ f"{name} has been moved. Import {new_name} from {mod_path} instead.",
105
+ DeprecationWarning,
106
+ stacklevel=2,
107
+ )
108
+ from . import _lexicon_types
77
109
 
78
- Represents a type in the schema type system, supporting primitives,
79
- ndarrays, and references to other schemas.
80
- """
110
+ # For StorageLocation, provide a lightweight shim
111
+ if name == "StorageLocation":
112
+ return _StorageLocationShim
113
+ # FieldType / FieldDef don't exist in _lexicon_types; they were
114
+ # internal-only types used by the old SchemaRecord. Return them
115
+ # from the shim definitions below.
116
+ if name in ("FieldType", "FieldDef"):
117
+ return _FIELD_SHIMS[name]
118
+ return getattr(_lexicon_types, new_name)
119
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
81
120
 
82
- kind: Literal["primitive", "ndarray", "ref", "array"]
83
- """The category of type."""
84
121
 
85
- primitive: Optional[str] = None
86
- """For kind='primitive': one of 'str', 'int', 'float', 'bool', 'bytes'."""
122
+ # Lightweight shims for types that have no direct equivalent in _lexicon_types
87
123
 
88
- dtype: Optional[str] = None
89
- """For kind='ndarray': numpy dtype string (e.g., 'float32')."""
90
124
 
91
- shape: Optional[list[int | None]] = None
92
- """For kind='ndarray': shape constraints (None for any dimension)."""
125
+ @dataclass
126
+ class _FieldTypeShim:
127
+ """Deprecated: schema field type used by the old SchemaRecord."""
93
128
 
129
+ kind: Literal["primitive", "ndarray", "ref", "array"]
130
+ primitive: Optional[str] = None
131
+ dtype: Optional[str] = None
132
+ shape: Optional[list[int | None]] = None
94
133
  ref: Optional[str] = None
95
- """For kind='ref': AT URI of referenced schema."""
96
-
97
- items: Optional["FieldType"] = None
98
- """For kind='array': type of array elements."""
134
+ items: Optional[_FieldTypeShim] = None
99
135
 
100
136
 
101
137
  @dataclass
102
- class FieldDef:
103
- """Schema field definition."""
138
+ class _FieldDefShim:
139
+ """Deprecated: schema field definition used by the old SchemaRecord."""
104
140
 
105
141
  name: str
106
- """Field name."""
107
-
108
- field_type: FieldType
109
- """Type of this field."""
110
-
142
+ field_type: _FieldTypeShim
111
143
  optional: bool = False
112
- """Whether this field can be None."""
113
-
114
- description: Optional[str] = None
115
- """Human-readable description."""
116
-
117
-
118
- @dataclass
119
- class SchemaRecord:
120
- """ATProto record for a PackableSample schema.
121
-
122
- Maps to the ``ac.foundation.dataset.schema`` Lexicon.
123
- """
124
-
125
- name: str
126
- """Human-readable schema name."""
127
-
128
- version: str
129
- """Semantic version string (e.g., '1.0.0')."""
130
-
131
- fields: list[FieldDef]
132
- """List of field definitions."""
133
-
134
144
  description: Optional[str] = None
135
- """Human-readable description."""
136
-
137
- created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
138
- """When this record was created."""
139
-
140
- metadata: Optional[dict] = None
141
- """Arbitrary metadata as msgpack-encoded bytes."""
142
-
143
- def to_record(self) -> dict:
144
- """Convert to ATProto record dict for publishing."""
145
- record = {
146
- "$type": f"{LEXICON_NAMESPACE}.schema",
147
- "name": self.name,
148
- "version": self.version,
149
- "fields": [self._field_to_dict(f) for f in self.fields],
150
- "createdAt": self.created_at.isoformat(),
151
- }
152
- if self.description:
153
- record["description"] = self.description
154
- if self.metadata:
155
- record["metadata"] = self.metadata
156
- return record
157
-
158
- def _field_to_dict(self, field_def: FieldDef) -> dict:
159
- """Convert a field definition to dict."""
160
- result = {
161
- "name": field_def.name,
162
- "fieldType": self._type_to_dict(field_def.field_type),
163
- "optional": field_def.optional,
164
- }
165
- if field_def.description:
166
- result["description"] = field_def.description
167
- return result
168
-
169
- def _type_to_dict(self, field_type: FieldType) -> dict:
170
- """Convert a field type to dict."""
171
- result: dict = {"$type": f"{LEXICON_NAMESPACE}.schemaType#{field_type.kind}"}
172
-
173
- if field_type.kind == "primitive":
174
- result["primitive"] = field_type.primitive
175
- elif field_type.kind == "ndarray":
176
- result["dtype"] = field_type.dtype
177
- if field_type.shape:
178
- result["shape"] = field_type.shape
179
- elif field_type.kind == "ref":
180
- result["ref"] = field_type.ref
181
- elif field_type.kind == "array":
182
- if field_type.items:
183
- result["items"] = self._type_to_dict(field_type.items)
184
-
185
- return result
186
145
 
187
146
 
188
147
  @dataclass
189
- class StorageLocation:
190
- """Dataset storage location specification."""
148
+ class _StorageLocationShim:
149
+ """Deprecated: use StorageHttp / StorageS3 / StorageBlobs instead."""
191
150
 
192
151
  kind: Literal["external", "blobs"]
193
- """Storage type: external URLs or ATProto blobs."""
194
-
195
152
  urls: Optional[list[str]] = None
196
- """For kind='external': WebDataset URLs with brace notation."""
197
-
198
153
  blob_refs: Optional[list[dict]] = None
199
- """For kind='blobs': ATProto blob references."""
200
-
201
-
202
- @dataclass
203
- class DatasetRecord:
204
- """ATProto record for a dataset index.
205
-
206
- Maps to the ``ac.foundation.dataset.record`` Lexicon.
207
- """
208
-
209
- name: str
210
- """Human-readable dataset name."""
211
-
212
- schema_ref: str
213
- """AT URI of the schema record."""
214
-
215
- storage: StorageLocation
216
- """Where the dataset data is stored."""
217
-
218
- description: Optional[str] = None
219
- """Human-readable description."""
220
-
221
- tags: list[str] = field(default_factory=list)
222
- """Searchable tags."""
223
-
224
- license: Optional[str] = None
225
- """SPDX license identifier."""
226
-
227
- created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
228
- """When this record was created."""
229
-
230
- metadata: Optional[bytes] = None
231
- """Arbitrary metadata as msgpack-encoded bytes."""
232
-
233
- def to_record(self) -> dict:
234
- """Convert to ATProto record dict for publishing."""
235
- record = {
236
- "$type": f"{LEXICON_NAMESPACE}.record",
237
- "name": self.name,
238
- "schemaRef": self.schema_ref,
239
- "storage": self._storage_to_dict(),
240
- "createdAt": self.created_at.isoformat(),
241
- }
242
- if self.description:
243
- record["description"] = self.description
244
- if self.tags:
245
- record["tags"] = self.tags
246
- if self.license:
247
- record["license"] = self.license
248
- if self.metadata:
249
- record["metadata"] = self.metadata
250
- return record
251
-
252
- def _storage_to_dict(self) -> dict:
253
- """Convert storage location to dict."""
254
- if self.storage.kind == "external":
255
- return {
256
- "$type": f"{LEXICON_NAMESPACE}.storageExternal",
257
- "urls": self.storage.urls or [],
258
- }
259
- else:
260
- return {
261
- "$type": f"{LEXICON_NAMESPACE}.storageBlobs",
262
- "blobs": self.storage.blob_refs or [],
263
- }
264
-
265
154
 
266
- @dataclass
267
- class CodeReference:
268
- """Reference to lens code in a git repository."""
269
-
270
- repository: str
271
- """Git repository URL."""
272
-
273
- commit: str
274
- """Git commit hash."""
275
-
276
- path: str
277
- """Path to the code file/function."""
278
-
279
-
280
- @dataclass
281
- class LensRecord:
282
- """ATProto record for a lens transformation.
283
155
 
284
- Maps to the ``ac.foundation.dataset.lens`` Lexicon.
285
- """
286
-
287
- name: str
288
- """Human-readable lens name."""
289
-
290
- source_schema: str
291
- """AT URI of the source schema."""
292
-
293
- target_schema: str
294
- """AT URI of the target schema."""
295
-
296
- description: Optional[str] = None
297
- """What this transformation does."""
298
-
299
- getter_code: Optional[CodeReference] = None
300
- """Reference to getter function code."""
301
-
302
- putter_code: Optional[CodeReference] = None
303
- """Reference to putter function code."""
304
-
305
- created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
306
- """When this record was created."""
307
-
308
- def to_record(self) -> dict:
309
- """Convert to ATProto record dict for publishing."""
310
- record: dict[str, Any] = {
311
- "$type": f"{LEXICON_NAMESPACE}.lens",
312
- "name": self.name,
313
- "sourceSchema": self.source_schema,
314
- "targetSchema": self.target_schema,
315
- "createdAt": self.created_at.isoformat(),
316
- }
317
- if self.description:
318
- record["description"] = self.description
319
- if self.getter_code:
320
- record["getterCode"] = {
321
- "repository": self.getter_code.repository,
322
- "commit": self.getter_code.commit,
323
- "path": self.getter_code.path,
324
- }
325
- if self.putter_code:
326
- record["putterCode"] = {
327
- "repository": self.putter_code.repository,
328
- "commit": self.putter_code.commit,
329
- "path": self.putter_code.path,
330
- }
331
- return record
156
+ _FIELD_SHIMS: dict[str, type] = {
157
+ "FieldType": _FieldTypeShim,
158
+ "FieldDef": _FieldDefShim,
159
+ }
atdata/atmosphere/lens.py CHANGED
@@ -9,18 +9,11 @@ Note:
9
9
  implementations.
10
10
  """
11
11
 
12
- from typing import Optional
12
+ from typing import Optional, TYPE_CHECKING
13
13
 
14
14
  from .client import Atmosphere
15
- from ._types import (
16
- AtUri,
17
- LensRecord,
18
- CodeReference,
19
- LEXICON_NAMESPACE,
20
- )
21
-
22
- # Import for type checking only
23
- from typing import TYPE_CHECKING
15
+ from ._types import AtUri, LEXICON_NAMESPACE
16
+ from ._lexicon_types import LexLensRecord, LexCodeReference
24
17
 
25
18
  if TYPE_CHECKING:
26
19
  from ..lens import Lens
@@ -70,59 +63,58 @@ class LensPublisher:
70
63
  name: str,
71
64
  source_schema_uri: str,
72
65
  target_schema_uri: str,
66
+ code_repository: str,
67
+ code_commit: str,
68
+ getter_path: str,
69
+ putter_path: str,
73
70
  description: Optional[str] = None,
74
- code_repository: Optional[str] = None,
75
- code_commit: Optional[str] = None,
76
- getter_path: Optional[str] = None,
77
- putter_path: Optional[str] = None,
71
+ language: Optional[str] = None,
72
+ metadata: Optional[dict] = None,
78
73
  rkey: Optional[str] = None,
79
74
  ) -> AtUri:
80
75
  """Publish a lens transformation record to ATProto.
81
76
 
77
+ Code references are required by the ATProto lexicon. Each lens must
78
+ point to a getter and putter implementation in a git repository.
79
+
82
80
  Args:
83
81
  name: Human-readable lens name.
84
82
  source_schema_uri: AT URI of the source schema.
85
83
  target_schema_uri: AT URI of the target schema.
86
- description: What this transformation does.
87
84
  code_repository: Git repository URL containing the lens code.
88
85
  code_commit: Git commit hash for reproducibility.
89
86
  getter_path: Module path to the getter function
90
87
  (e.g., 'mymodule.lenses:my_getter').
91
88
  putter_path: Module path to the putter function
92
89
  (e.g., 'mymodule.lenses:my_putter').
90
+ description: What this transformation does.
91
+ language: Programming language (e.g., 'python').
92
+ metadata: Arbitrary metadata dictionary.
93
93
  rkey: Optional explicit record key.
94
94
 
95
95
  Returns:
96
96
  The AT URI of the created lens record.
97
-
98
- Raises:
99
- ValueError: If code references are incomplete.
100
97
  """
101
- # Build code references if provided
102
- getter_code: Optional[CodeReference] = None
103
- putter_code: Optional[CodeReference] = None
104
-
105
- if code_repository and code_commit:
106
- if getter_path:
107
- getter_code = CodeReference(
108
- repository=code_repository,
109
- commit=code_commit,
110
- path=getter_path,
111
- )
112
- if putter_path:
113
- putter_code = CodeReference(
114
- repository=code_repository,
115
- commit=code_commit,
116
- path=putter_path,
117
- )
118
-
119
- lens_record = LensRecord(
98
+ getter_code = LexCodeReference(
99
+ repository=code_repository,
100
+ commit=code_commit,
101
+ path=getter_path,
102
+ )
103
+ putter_code = LexCodeReference(
104
+ repository=code_repository,
105
+ commit=code_commit,
106
+ path=putter_path,
107
+ )
108
+
109
+ lens_record = LexLensRecord(
120
110
  name=name,
121
111
  source_schema=source_schema_uri,
122
112
  target_schema=target_schema_uri,
123
- description=description,
124
113
  getter_code=getter_code,
125
114
  putter_code=putter_code,
115
+ description=description,
116
+ language=language,
117
+ metadata=metadata,
126
118
  )
127
119
 
128
120
  return self.client.create_record(
@@ -142,6 +134,8 @@ class LensPublisher:
142
134
  code_repository: str,
143
135
  code_commit: str,
144
136
  description: Optional[str] = None,
137
+ language: Optional[str] = None,
138
+ metadata: Optional[dict] = None,
145
139
  rkey: Optional[str] = None,
146
140
  ) -> AtUri:
147
141
  """Publish a lens record from an existing Lens object.
@@ -157,16 +151,16 @@ class LensPublisher:
157
151
  code_repository: Git repository URL.
158
152
  code_commit: Git commit hash.
159
153
  description: What this transformation does.
154
+ language: Programming language (e.g., 'python').
155
+ metadata: Arbitrary metadata dictionary.
160
156
  rkey: Optional explicit record key.
161
157
 
162
158
  Returns:
163
159
  The AT URI of the created lens record.
164
160
  """
165
- # Extract function names from the lens
166
161
  getter_name = lens_obj._getter.__name__
167
162
  putter_name = lens_obj._putter.__name__
168
163
 
169
- # Get module info if available
170
164
  getter_module = getattr(lens_obj._getter, "__module__", "")
171
165
  putter_module = getattr(lens_obj._putter, "__module__", "")
172
166
 
@@ -177,11 +171,13 @@ class LensPublisher:
177
171
  name=name,
178
172
  source_schema_uri=source_schema_uri,
179
173
  target_schema_uri=target_schema_uri,
180
- description=description,
181
174
  code_repository=code_repository,
182
175
  code_commit=code_commit,
183
176
  getter_path=getter_path,
184
177
  putter_path=putter_path,
178
+ description=description,
179
+ language=language,
180
+ metadata=metadata,
185
181
  rkey=rkey,
186
182
  )
187
183
 
@@ -234,6 +230,18 @@ class LensLoader:
234
230
 
235
231
  return record
236
232
 
233
+ def get_typed(self, uri: str | AtUri) -> LexLensRecord:
234
+ """Fetch a lens record and return as a typed object.
235
+
236
+ Args:
237
+ uri: The AT URI of the lens record.
238
+
239
+ Returns:
240
+ LexLensRecord instance.
241
+ """
242
+ record = self.get(uri)
243
+ return LexLensRecord.from_record(record)
244
+
237
245
  def list_all(
238
246
  self,
239
247
  repo: Optional[str] = None,