atdata 0.3.1b1__py3-none-any.whl → 0.3.2b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,10 +16,14 @@ Lexicons:
16
16
  Extensible token for schema format identifiers.
17
17
  ac.foundation.dataset.arrayFormat
18
18
  Extensible token for array serialization formats.
19
- ac.foundation.dataset.storageExternal
20
- External URL-based storage (S3, HTTP, IPFS).
19
+ ac.foundation.dataset.storageHttp
20
+ HTTP/HTTPS URL-based storage with per-shard checksums.
21
+ ac.foundation.dataset.storageS3
22
+ S3/S3-compatible object storage with per-shard checksums.
21
23
  ac.foundation.dataset.storageBlobs
22
24
  ATProto PDS blob-based storage.
25
+ ac.foundation.dataset.storageExternal
26
+ (Deprecated) External URL-based storage.
23
27
  ac.foundation.dataset.getLatestSchema
24
28
  XRPC query for fetching the latest schema version.
25
29
 
@@ -47,8 +51,10 @@ LEXICON_IDS = (
47
51
  f"{NAMESPACE}.lens",
48
52
  f"{NAMESPACE}.schemaType",
49
53
  f"{NAMESPACE}.arrayFormat",
50
- f"{NAMESPACE}.storageExternal",
54
+ f"{NAMESPACE}.storageHttp",
55
+ f"{NAMESPACE}.storageS3",
51
56
  f"{NAMESPACE}.storageBlobs",
57
+ f"{NAMESPACE}.storageExternal", # deprecated
52
58
  f"{NAMESPACE}.getLatestSchema",
53
59
  )
54
60
 
@@ -24,11 +24,13 @@
24
24
  },
25
25
  "sourceSchema": {
26
26
  "type": "string",
27
+ "format": "at-uri",
27
28
  "description": "AT-URI reference to source schema",
28
29
  "maxLength": 500
29
30
  },
30
31
  "targetSchema": {
31
32
  "type": "string",
33
+ "format": "at-uri",
32
34
  "description": "AT-URI reference to target schema",
33
35
  "maxLength": 500
34
36
  },
@@ -30,7 +30,8 @@
30
30
  "type": "union",
31
31
  "description": "Storage location for dataset files (WebDataset tar archives)",
32
32
  "refs": [
33
- "ac.foundation.dataset.storageExternal",
33
+ "ac.foundation.dataset.storageHttp",
34
+ "ac.foundation.dataset.storageS3",
34
35
  "ac.foundation.dataset.storageBlobs"
35
36
  ]
36
37
  },
@@ -71,6 +72,26 @@
71
72
  }
72
73
  }
73
74
  },
75
+ "shardChecksum": {
76
+ "type": "object",
77
+ "description": "Content hash for shard integrity verification. Algorithm is flexible to allow SHA-256, BLAKE3, or other hash functions.",
78
+ "required": [
79
+ "algorithm",
80
+ "digest"
81
+ ],
82
+ "properties": {
83
+ "algorithm": {
84
+ "type": "string",
85
+ "description": "Hash algorithm identifier (e.g., 'sha256', 'blake3')",
86
+ "maxLength": 20
87
+ },
88
+ "digest": {
89
+ "type": "string",
90
+ "description": "Hex-encoded hash digest",
91
+ "maxLength": 128
92
+ }
93
+ }
94
+ },
74
95
  "datasetSize": {
75
96
  "type": "object",
76
97
  "description": "Information about dataset size",
@@ -4,21 +4,43 @@
4
4
  "defs": {
5
5
  "main": {
6
6
  "type": "object",
7
- "description": "Storage via ATProto PDS blobs for WebDataset tar archives. Each blob contains one or more tar files. Used in ac.foundation.dataset.record storage union for maximum decentralization.",
7
+ "description": "Storage via ATProto PDS blobs for WebDataset tar archives. Used in ac.foundation.dataset.record storage union for maximum decentralization.",
8
8
  "required": [
9
9
  "blobs"
10
10
  ],
11
11
  "properties": {
12
12
  "blobs": {
13
13
  "type": "array",
14
- "description": "Array of blob references for WebDataset tar files",
14
+ "description": "Array of blob entries for WebDataset tar files",
15
15
  "items": {
16
- "type": "blob",
17
- "description": "Blob reference to a WebDataset tar archive"
16
+ "type": "ref",
17
+ "ref": "#blobEntry"
18
18
  },
19
19
  "minLength": 1
20
20
  }
21
21
  }
22
+ },
23
+ "blobEntry": {
24
+ "type": "object",
25
+ "description": "A single PDS blob shard with optional integrity checksum",
26
+ "required": [
27
+ "blob"
28
+ ],
29
+ "properties": {
30
+ "blob": {
31
+ "type": "blob",
32
+ "accept": [
33
+ "application/x-tar"
34
+ ],
35
+ "maxSize": 52428800,
36
+ "description": "Blob reference to a WebDataset tar archive"
37
+ },
38
+ "checksum": {
39
+ "type": "ref",
40
+ "ref": "ac.foundation.dataset.record#shardChecksum",
41
+ "description": "Content hash for integrity verification (optional since PDS blobs have built-in CID integrity)"
42
+ }
43
+ }
22
44
  }
23
45
  }
24
46
  }
@@ -4,7 +4,7 @@
4
4
  "defs": {
5
5
  "main": {
6
6
  "type": "object",
7
- "description": "External storage via URLs (S3, HTTP, IPFS, etc.) for WebDataset tar archives. URLs support brace notation for sharding (e.g., 'data-{000000..000099}.tar'). Used in ac.foundation.dataset.record storage union.",
7
+ "description": "(Deprecated: use storageHttp or storageS3 instead.) External storage via URLs for WebDataset tar archives. URLs support brace notation for sharding (e.g., 'data-{000000..000099}.tar').",
8
8
  "required": [
9
9
  "urls"
10
10
  ],
@@ -0,0 +1,45 @@
1
+ {
2
+ "lexicon": 1,
3
+ "id": "ac.foundation.dataset.storageHttp",
4
+ "defs": {
5
+ "main": {
6
+ "type": "object",
7
+ "description": "HTTP/HTTPS storage for WebDataset tar archives. Each shard is listed individually with a checksum for integrity verification. Consumers build brace-expansion patterns on the fly when needed.",
8
+ "required": [
9
+ "shards"
10
+ ],
11
+ "properties": {
12
+ "shards": {
13
+ "type": "array",
14
+ "description": "Array of shard entries with URL and integrity checksum",
15
+ "items": {
16
+ "type": "ref",
17
+ "ref": "#shardEntry"
18
+ },
19
+ "minLength": 1
20
+ }
21
+ }
22
+ },
23
+ "shardEntry": {
24
+ "type": "object",
25
+ "description": "A single HTTP-accessible shard with integrity checksum",
26
+ "required": [
27
+ "url",
28
+ "checksum"
29
+ ],
30
+ "properties": {
31
+ "url": {
32
+ "type": "string",
33
+ "format": "uri",
34
+ "description": "HTTP/HTTPS URL for this WebDataset tar shard",
35
+ "maxLength": 2000
36
+ },
37
+ "checksum": {
38
+ "type": "ref",
39
+ "ref": "ac.foundation.dataset.record#shardChecksum",
40
+ "description": "Content hash for integrity verification"
41
+ }
42
+ }
43
+ }
44
+ }
45
+ }
@@ -0,0 +1,61 @@
1
+ {
2
+ "lexicon": 1,
3
+ "id": "ac.foundation.dataset.storageS3",
4
+ "defs": {
5
+ "main": {
6
+ "type": "object",
7
+ "description": "S3 or S3-compatible storage for WebDataset tar archives. Supports custom endpoints for MinIO, Cloudflare R2, and other S3-compatible services.",
8
+ "required": [
9
+ "bucket",
10
+ "shards"
11
+ ],
12
+ "properties": {
13
+ "bucket": {
14
+ "type": "string",
15
+ "description": "S3 bucket name",
16
+ "maxLength": 255
17
+ },
18
+ "region": {
19
+ "type": "string",
20
+ "description": "AWS region (e.g., 'us-east-1'). Optional for S3-compatible services.",
21
+ "maxLength": 50
22
+ },
23
+ "endpoint": {
24
+ "type": "string",
25
+ "format": "uri",
26
+ "description": "Custom S3-compatible endpoint URL (e.g., for MinIO, Cloudflare R2). Omit for standard AWS S3.",
27
+ "maxLength": 500
28
+ },
29
+ "shards": {
30
+ "type": "array",
31
+ "description": "Array of shard entries with object key and integrity checksum",
32
+ "items": {
33
+ "type": "ref",
34
+ "ref": "#shardEntry"
35
+ },
36
+ "minLength": 1
37
+ }
38
+ }
39
+ },
40
+ "shardEntry": {
41
+ "type": "object",
42
+ "description": "A single S3 object shard with integrity checksum",
43
+ "required": [
44
+ "key",
45
+ "checksum"
46
+ ],
47
+ "properties": {
48
+ "key": {
49
+ "type": "string",
50
+ "description": "S3 object key for this WebDataset tar shard",
51
+ "maxLength": 1024
52
+ },
53
+ "checksum": {
54
+ "type": "ref",
55
+ "ref": "ac.foundation.dataset.record#shardChecksum",
56
+ "description": "Content hash for integrity verification"
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
@@ -26,3 +26,7 @@ from ._manifest import MANIFEST_FORMAT_VERSION as MANIFEST_FORMAT_VERSION
26
26
  from ._writer import ManifestWriter as ManifestWriter
27
27
  from ._query import QueryExecutor as QueryExecutor
28
28
  from ._query import SampleLocation as SampleLocation
29
+ from ._proxy import FieldProxy as FieldProxy
30
+ from ._proxy import Predicate as Predicate
31
+ from ._proxy import query_fields as query_fields
32
+ from ._proxy import F as F
@@ -0,0 +1,321 @@
1
+ """Typed proxy DSL for manifest queries.
2
+
3
+ Provides ``FieldProxy`` and ``Predicate`` classes that build pandas
4
+ filter expressions with IDE autocomplete and type safety.
5
+
6
+ Components:
7
+
8
+ - ``FieldProxy``: Wraps a field name; comparison operators return ``Predicate``
9
+ - ``Predicate``: Composable boolean expression tree; compiles to pandas ops
10
+ - ``query_fields()``: Factory that creates a typed proxy from a sample type
11
+ - ``F``: Untyped convenience proxy (Django-style F expressions)
12
+
13
+ Examples:
14
+ >>> Q = query_fields(MySample)
15
+ >>> pred = (Q.confidence > 0.9) & (Q.label == "dog")
16
+
17
+ >>> from atdata.manifest import F
18
+ >>> pred = (F.confidence > 0.9)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import functools
24
+ from typing import Any, Callable, Sequence, TYPE_CHECKING
25
+
26
+ if TYPE_CHECKING:
27
+ import pandas as pd
28
+
29
+ from ._fields import resolve_manifest_fields
30
+
31
+
32
+ class Predicate:
33
+ """A composable boolean predicate over manifest fields.
34
+
35
+ Constructed by comparison operators on ``FieldProxy`` objects.
36
+ Supports ``&`` (AND), ``|`` (OR), and ``~`` (NOT).
37
+
38
+ Call the predicate directly on a DataFrame to evaluate it,
39
+ or pass it as the ``where`` argument to ``QueryExecutor.query()``.
40
+
41
+ Examples:
42
+ >>> from atdata.manifest import F
43
+ >>> pred = (F.confidence > 0.9) & (F.label == "dog")
44
+ >>> pred = (F.score >= 0.5) | (F.label.isin(["cat", "dog"]))
45
+ """
46
+
47
+ __slots__ = ("_kind", "_field", "_op", "_value", "_children", "_child", "_compiled")
48
+ __hash__ = None # type: ignore[assignment]
49
+
50
+ def __init__(
51
+ self,
52
+ kind: str,
53
+ *,
54
+ field: str | None = None,
55
+ op: str | None = None,
56
+ value: Any = None,
57
+ children: list[Predicate] | None = None,
58
+ child: Predicate | None = None,
59
+ ) -> None:
60
+ self._kind = kind
61
+ self._field = field
62
+ self._op = op
63
+ self._value = value
64
+ self._children = children
65
+ self._child = child
66
+ self._compiled: Callable[[pd.DataFrame], pd.Series] | None = None
67
+
68
+ def __and__(self, other: Predicate) -> Predicate:
69
+ if not isinstance(other, Predicate):
70
+ return NotImplemented
71
+ # Flatten nested ANDs for a cleaner tree
72
+ left = self._children if self._kind == "and" else [self]
73
+ right = other._children if other._kind == "and" else [other]
74
+ return Predicate("and", children=[*left, *right])
75
+
76
+ def __or__(self, other: Predicate) -> Predicate:
77
+ if not isinstance(other, Predicate):
78
+ return NotImplemented
79
+ left = self._children if self._kind == "or" else [self]
80
+ right = other._children if other._kind == "or" else [other]
81
+ return Predicate("or", children=[*left, *right])
82
+
83
+ def __invert__(self) -> Predicate:
84
+ return Predicate("not", child=self)
85
+
86
+ def compile(self) -> Callable[[pd.DataFrame], pd.Series]:
87
+ """Compile this predicate tree into a callable DataFrame filter.
88
+
89
+ Returns:
90
+ A callable that accepts a ``pd.DataFrame`` and returns a
91
+ boolean ``pd.Series``.
92
+ """
93
+ if self._compiled is not None:
94
+ return self._compiled
95
+
96
+ self._compiled = self._build()
97
+ return self._compiled
98
+
99
+ def _build(self) -> Callable[[pd.DataFrame], pd.Series]:
100
+ """Recursively build the pandas filter closure."""
101
+
102
+ if self._kind == "comparison":
103
+ field = self._field
104
+ op = self._op
105
+ value = self._value
106
+ return _make_comparison(field, op, value)
107
+
108
+ if self._kind == "and":
109
+ compiled_children = [c.compile() for c in self._children] # type: ignore[union-attr]
110
+ return _make_and(compiled_children)
111
+
112
+ if self._kind == "or":
113
+ compiled_children = [c.compile() for c in self._children] # type: ignore[union-attr]
114
+ return _make_or(compiled_children)
115
+
116
+ if self._kind == "not":
117
+ compiled_child = self._child.compile() # type: ignore[union-attr]
118
+ return _make_not(compiled_child)
119
+
120
+ raise ValueError(f"Unknown predicate kind: {self._kind!r}")
121
+
122
+ def __call__(self, df: pd.DataFrame) -> pd.Series:
123
+ """Evaluate this predicate against a DataFrame.
124
+
125
+ This makes ``Predicate`` directly usable as a ``where`` argument
126
+ to ``QueryExecutor.query()`` without any adapter code.
127
+ """
128
+ return self.compile()(df)
129
+
130
+ def __repr__(self) -> str:
131
+ if self._kind == "comparison":
132
+ return f"Predicate({self._field!r} {self._op} {self._value!r})"
133
+ if self._kind == "not":
134
+ return f"~{self._child!r}"
135
+ sep = " & " if self._kind == "and" else " | "
136
+ parts = sep.join(repr(c) for c in self._children) # type: ignore[union-attr]
137
+ return f"({parts})"
138
+
139
+
140
+ def _make_comparison(
141
+ field: str | None, op: str | None, value: Any
142
+ ) -> Callable[[pd.DataFrame], pd.Series]:
143
+ """Create a closure for a single comparison operation."""
144
+ if op == "gt":
145
+ return lambda df: df[field] > value
146
+ if op == "lt":
147
+ return lambda df: df[field] < value
148
+ if op == "ge":
149
+ return lambda df: df[field] >= value
150
+ if op == "le":
151
+ return lambda df: df[field] <= value
152
+ if op == "eq":
153
+ return lambda df: df[field] == value
154
+ if op == "ne":
155
+ return lambda df: df[field] != value
156
+ if op == "isin":
157
+ return lambda df: df[field].isin(value)
158
+ raise ValueError(f"Unknown operator: {op!r}")
159
+
160
+
161
+ def _make_and(
162
+ children: list[Callable[[pd.DataFrame], pd.Series]],
163
+ ) -> Callable[[pd.DataFrame], pd.Series]:
164
+ """Create a closure that ANDs multiple child predicates."""
165
+
166
+ def _and(df: pd.DataFrame) -> pd.Series:
167
+ return functools.reduce(lambda a, b: a & b, (c(df) for c in children))
168
+
169
+ return _and
170
+
171
+
172
+ def _make_or(
173
+ children: list[Callable[[pd.DataFrame], pd.Series]],
174
+ ) -> Callable[[pd.DataFrame], pd.Series]:
175
+ """Create a closure that ORs multiple child predicates."""
176
+
177
+ def _or(df: pd.DataFrame) -> pd.Series:
178
+ return functools.reduce(lambda a, b: a | b, (c(df) for c in children))
179
+
180
+ return _or
181
+
182
+
183
+ def _make_not(
184
+ child: Callable[[pd.DataFrame], pd.Series],
185
+ ) -> Callable[[pd.DataFrame], pd.Series]:
186
+ """Create a closure that negates a child predicate."""
187
+ return lambda df: ~child(df)
188
+
189
+
190
+ class FieldProxy:
191
+ """Proxy for a single manifest field.
192
+
193
+ Comparison operators return ``Predicate`` objects for composable queries.
194
+
195
+ Args:
196
+ name: The manifest field name (column name in the parquet DataFrame).
197
+
198
+ Examples:
199
+ >>> from atdata.manifest import F
200
+ >>> pred = F.confidence > 0.9
201
+ >>> pred = F.label.isin(["dog", "cat"])
202
+ """
203
+
204
+ __slots__ = ("_name",)
205
+
206
+ def __init__(self, name: str) -> None:
207
+ self._name = name
208
+
209
+ def __gt__(self, value: Any) -> Predicate:
210
+ return Predicate("comparison", field=self._name, op="gt", value=value)
211
+
212
+ def __lt__(self, value: Any) -> Predicate:
213
+ return Predicate("comparison", field=self._name, op="lt", value=value)
214
+
215
+ def __ge__(self, value: Any) -> Predicate:
216
+ return Predicate("comparison", field=self._name, op="ge", value=value)
217
+
218
+ def __le__(self, value: Any) -> Predicate:
219
+ return Predicate("comparison", field=self._name, op="le", value=value)
220
+
221
+ def __eq__(self, value: Any) -> Predicate: # type: ignore[override]
222
+ return Predicate("comparison", field=self._name, op="eq", value=value)
223
+
224
+ def __ne__(self, value: Any) -> Predicate: # type: ignore[override]
225
+ return Predicate("comparison", field=self._name, op="ne", value=value)
226
+
227
+ def isin(self, values: Sequence[Any]) -> Predicate:
228
+ """Check membership in a set of values.
229
+
230
+ Args:
231
+ values: Collection of values to test membership against.
232
+
233
+ Returns:
234
+ A ``Predicate`` that filters for rows where this field's
235
+ value is in *values*.
236
+
237
+ Examples:
238
+ >>> pred = F.label.isin(["dog", "cat", "bird"])
239
+ """
240
+ return Predicate("comparison", field=self._name, op="isin", value=values)
241
+
242
+ def between(self, low: Any, high: Any) -> Predicate:
243
+ """Check that the field value is within a closed range.
244
+
245
+ Shorthand for ``(field >= low) & (field <= high)``.
246
+
247
+ Args:
248
+ low: Lower bound (inclusive).
249
+ high: Upper bound (inclusive).
250
+
251
+ Returns:
252
+ A ``Predicate`` that filters for rows where this field's
253
+ value is between *low* and *high* inclusive.
254
+
255
+ Examples:
256
+ >>> pred = F.confidence.between(0.5, 0.9)
257
+ """
258
+ return (self >= low) & (self <= high)
259
+
260
+ def __repr__(self) -> str:
261
+ return f"FieldProxy({self._name!r})"
262
+
263
+
264
+ def query_fields(sample_type: type) -> Any:
265
+ """Create a typed field proxy for querying a sample type.
266
+
267
+ Returns an object whose attributes are ``FieldProxy`` instances for
268
+ each manifest-eligible field of *sample_type*. Provides IDE
269
+ autocomplete when the return type is inferred.
270
+
271
+ Args:
272
+ sample_type: A ``@packable`` or ``PackableSample`` subclass.
273
+
274
+ Returns:
275
+ A proxy object with one ``FieldProxy`` attribute per manifest field.
276
+
277
+ Raises:
278
+ TypeError: If *sample_type* is not a dataclass.
279
+
280
+ Examples:
281
+ >>> Q = query_fields(MySample)
282
+ >>> pred = (Q.confidence > 0.9) & (Q.label == "dog")
283
+ """
284
+ fields = resolve_manifest_fields(sample_type)
285
+ attrs: dict[str, Any] = {}
286
+ annotations: dict[str, type] = {}
287
+ for name in fields:
288
+ attrs[name] = FieldProxy(name)
289
+ annotations[name] = FieldProxy
290
+ attrs["__annotations__"] = annotations
291
+ attrs["__slots__"] = ()
292
+ attrs["__repr__"] = (
293
+ lambda self: f"{sample_type.__name__}Fields({', '.join(annotations)})"
294
+ )
295
+
296
+ proxy_cls = type(f"{sample_type.__name__}Fields", (), attrs)
297
+ return proxy_cls()
298
+
299
+
300
+ class _UntypedFieldProxy:
301
+ """Untyped convenience proxy for quick field access.
302
+
303
+ Attribute access returns a ``FieldProxy`` for any name, without
304
+ requiring a sample type. Useful for ad-hoc queries where IDE
305
+ autocomplete is not needed.
306
+
307
+ Examples:
308
+ >>> from atdata.manifest import F
309
+ >>> pred = (F.confidence > 0.9) & (F.label == "dog")
310
+ """
311
+
312
+ def __getattr__(self, name: str) -> FieldProxy:
313
+ if name.startswith("_"):
314
+ raise AttributeError(name)
315
+ return FieldProxy(name)
316
+
317
+ def __repr__(self) -> str:
318
+ return "F"
319
+
320
+
321
+ F = _UntypedFieldProxy()
atdata/repository.py CHANGED
@@ -210,14 +210,26 @@ class _AtmosphereBackend:
210
210
  *,
211
211
  name: str,
212
212
  schema_ref: str | None = None,
213
+ data_urls: list[str] | None = None,
214
+ blob_refs: list[dict] | None = None,
213
215
  **kwargs: Any,
214
216
  ) -> Any:
215
217
  """Insert a dataset into ATProto.
216
218
 
219
+ When *blob_refs* is provided the record uses ``storageBlobs`` with
220
+ embedded blob reference objects so the PDS retains the uploaded blobs.
221
+
222
+ When *data_urls* is provided (without *blob_refs*) the record uses
223
+ ``storageExternal`` with those URLs.
224
+
217
225
  Args:
218
226
  ds: The Dataset to publish.
219
227
  name: Human-readable name.
220
228
  schema_ref: Optional schema AT URI. If None, auto-publishes schema.
229
+ data_urls: Explicit shard URLs to store in the record. When
230
+ provided, these replace whatever ``ds.url`` contains.
231
+ blob_refs: Pre-uploaded blob reference dicts from
232
+ ``PDSBlobStore``. Takes precedence over *data_urls*.
221
233
  **kwargs: Additional options (description, tags, license).
222
234
 
223
235
  Returns:
@@ -226,15 +238,53 @@ class _AtmosphereBackend:
226
238
  self._ensure_loaders()
227
239
  from .atmosphere import AtmosphereIndexEntry
228
240
 
229
- uri = self._dataset_publisher.publish(
230
- ds,
231
- name=name,
232
- schema_uri=schema_ref,
233
- description=kwargs.get("description"),
234
- tags=kwargs.get("tags"),
235
- license=kwargs.get("license"),
236
- auto_publish_schema=(schema_ref is None),
237
- )
241
+ if blob_refs is not None or data_urls is not None:
242
+ # Ensure schema is published first
243
+ if schema_ref is None:
244
+ from .atmosphere import SchemaPublisher
245
+
246
+ sp = SchemaPublisher(self.client)
247
+ schema_uri_obj = sp.publish(
248
+ ds.sample_type,
249
+ version=kwargs.get("schema_version", "1.0.0"),
250
+ )
251
+ schema_ref = str(schema_uri_obj)
252
+
253
+ metadata = kwargs.get("metadata")
254
+ if metadata is None and hasattr(ds, "_metadata"):
255
+ metadata = ds._metadata
256
+
257
+ if blob_refs is not None:
258
+ uri = self._dataset_publisher.publish_with_blob_refs(
259
+ blob_refs=blob_refs,
260
+ schema_uri=schema_ref,
261
+ name=name,
262
+ description=kwargs.get("description"),
263
+ tags=kwargs.get("tags"),
264
+ license=kwargs.get("license"),
265
+ metadata=metadata,
266
+ )
267
+ else:
268
+ uri = self._dataset_publisher.publish_with_urls(
269
+ urls=data_urls,
270
+ schema_uri=schema_ref,
271
+ name=name,
272
+ description=kwargs.get("description"),
273
+ tags=kwargs.get("tags"),
274
+ license=kwargs.get("license"),
275
+ metadata=metadata,
276
+ )
277
+ else:
278
+ uri = self._dataset_publisher.publish(
279
+ ds,
280
+ name=name,
281
+ schema_uri=schema_ref,
282
+ description=kwargs.get("description"),
283
+ tags=kwargs.get("tags"),
284
+ license=kwargs.get("license"),
285
+ auto_publish_schema=(schema_ref is None),
286
+ )
287
+
238
288
  record = self._dataset_loader.get(uri)
239
289
  return AtmosphereIndexEntry(str(uri), record)
240
290