corvic-engine 0.3.0rc46__cp38-abi3-win_amd64.whl → 0.3.0rc48__cp38-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
corvic/engine/_native.pyd CHANGED
Binary file
corvic/model/_agent.py CHANGED
@@ -3,7 +3,8 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import copy
6
- from collections.abc import Sequence
6
+ import datetime
7
+ from collections.abc import Iterable, Sequence
7
8
  from typing import TypeAlias
8
9
 
9
10
  from sqlalchemy import orm as sa_orm
@@ -95,6 +96,39 @@ class Agent(BaseModel[AgentID, models_pb2.Agent, orm.Agent]):
95
96
  ),
96
97
  )
97
98
 
99
+ @classmethod
100
+ def from_proto(
101
+ cls, proto: models_pb2.Agent, client: system.Client | None = None
102
+ ) -> Agent:
103
+ client = client or Defaults.get_default_client()
104
+ return cls(client, proto)
105
+
106
+ @classmethod
107
+ def list(
108
+ cls,
109
+ *,
110
+ limit: int | None = None,
111
+ room_id: RoomID | None = None,
112
+ created_before: datetime.datetime | None = None,
113
+ client: system.Client | None = None,
114
+ ids: Iterable[AgentID] | None = None,
115
+ existing_session: sa_orm.Session | None = None,
116
+ ) -> Ok[list[Agent]] | NotFoundError | InvalidArgumentError:
117
+ """List agent models."""
118
+ client = client or Defaults.get_default_client()
119
+ match cls.list_as_proto(
120
+ client,
121
+ limit=limit,
122
+ room_id=room_id,
123
+ created_before=created_before,
124
+ ids=ids,
125
+ existing_session=existing_session,
126
+ ):
127
+ case NotFoundError() | InvalidArgumentError() as err:
128
+ return err
129
+ case Ok(protos):
130
+ return Ok([cls.from_proto(proto, client) for proto in protos])
131
+
98
132
  @property
99
133
  def name(self) -> str:
100
134
  return self.proto_self.name
@@ -206,7 +206,8 @@ class BaseModel(Generic[ID, _ProtoObj, OrmObj], UsesOrmID[ID, _ProtoObj]):
206
206
  try:
207
207
  _ = self.proto_to_orm(self.proto_self, session).unwrap_or_raise()
208
208
  session.flush()
209
- except sa.exc.DatabaseError as err:
209
+ # TODO(thunt): Possibly separate out DatabaseError into a precondition error
210
+ except (sa.exc.DatabaseError, sa.exc.StatementError) as err:
210
211
  return InvalidArgumentError.from_(err)
211
212
  return Ok(None)
212
213
 
@@ -340,7 +340,9 @@ class Relationship:
340
340
 
341
341
  result = self.joined_table().select(result_columns)
342
342
 
343
- for batch in result.to_polars().unwrap_or_raise():
343
+ for batch in result.to_polars(
344
+ room_id=self.start_source.room_id
345
+ ).unwrap_or_raise():
344
346
  for row in batch.rows(named=True):
345
347
  yield (row[result_columns[0]], row[result_columns[1]])
346
348
 
@@ -386,6 +386,37 @@ def source_proto_to_orm(
386
386
  return Ok(_add_orm_to_session(orm_obj, proto_obj.org_id, session))
387
387
 
388
388
 
389
+ def _update_agent_associations(
390
+ agent: orm.Agent,
391
+ session: sa_orm.Session,
392
+ ) -> list[orm.SpaceID]:
393
+ associated_space_ids = list[orm.SpaceID]()
394
+ agent_id = agent.id
395
+ agent_parameters = agent.parameters
396
+ if not agent_id or not agent_parameters:
397
+ return associated_space_ids
398
+ for (
399
+ raw_space_id,
400
+ instruction,
401
+ ) in agent_parameters.orchestrator_parameters.space_instructions.items():
402
+ space_id = orm.SpaceID(raw_space_id)
403
+ space_run_id = (
404
+ orm.SpaceRunID(instruction.space_run_id)
405
+ if instruction.space_run_id
406
+ else None
407
+ )
408
+ associated_space_ids.append(space_id)
409
+ session.merge(
410
+ orm.AgentSpaceAssociation(
411
+ room_id=agent.room_id,
412
+ agent_id=agent_id,
413
+ space_id=space_id,
414
+ space_run_id=space_run_id,
415
+ )
416
+ )
417
+ return associated_space_ids
418
+
419
+
389
420
  def agent_proto_to_orm(
390
421
  proto_obj: models_pb2.Agent, session: sa_orm.Session
391
422
  ) -> Ok[orm.Agent] | orm.InvalidORMIdentifierError | InvalidArgumentError:
@@ -403,7 +434,18 @@ def agent_proto_to_orm(
403
434
  parameters=proto_obj.agent_parameters,
404
435
  room_id=room_id,
405
436
  )
406
- return Ok(_add_orm_to_session(orm_obj, proto_obj.org_id, session))
437
+ agent = _add_orm_to_session(orm_obj, proto_obj.org_id, session)
438
+ session.flush()
439
+
440
+ if not agent.id:
441
+ return InvalidArgumentError("failed to add agent to session")
442
+ associated_space_ids = _update_agent_associations(agent, session)
443
+ session.execute(
444
+ sa.delete(orm.AgentSpaceAssociation)
445
+ .where(orm.AgentSpaceAssociation.agent_id == agent.id)
446
+ .where(orm.AgentSpaceAssociation.space_id.not_in(associated_space_ids))
447
+ )
448
+ return Ok(agent)
407
449
 
408
450
 
409
451
  def space_proto_to_orm(
@@ -650,6 +692,11 @@ def resource_delete_orms(
650
692
  def agent_delete_orms(
651
693
  ids: Sequence[orm.AgentID], session: orm.Session
652
694
  ) -> Ok[None] | InvalidArgumentError:
695
+ session.execute(
696
+ sa.delete(orm.AgentSpaceAssociation).where(
697
+ orm.AgentSpaceAssociation.agent_id.in_(ids)
698
+ )
699
+ )
653
700
  session.execute(sa.delete(orm.Agent).where(orm.Agent.id.in_(ids)))
654
701
  return Ok(None)
655
702
 
@@ -718,12 +765,14 @@ def room_delete_orms(
718
765
  ids: Sequence[orm.RoomID], session: orm.Session
719
766
  ) -> Ok[None] | InvalidArgumentError:
720
767
  source_refs = list(
721
- session.scalars(sa.select(orm.Source).where(orm.Source.room_id == ids).limit(1))
768
+ session.scalars(
769
+ sa.select(orm.Source).where(orm.Source.room_id.in_(ids)).limit(1)
770
+ )
722
771
  )
723
772
  if source_refs:
724
773
  return InvalidArgumentError("cannot delete a room that still has sources")
725
774
 
726
- session.execute(sa.delete(orm.Room).where(orm.Room.id == ids))
775
+ session.execute(sa.delete(orm.Room).where(orm.Room.id.in_(ids)))
727
776
  return Ok(None)
728
777
 
729
778
 
corvic/model/_resource.py CHANGED
@@ -4,7 +4,6 @@ from __future__ import annotations
4
4
 
5
5
  import copy
6
6
  import datetime
7
- import uuid
8
7
  from collections.abc import Iterable, Sequence
9
8
  from typing import TypeAlias
10
9
 
@@ -219,8 +218,8 @@ class Resource(BaseModel[ResourceID, models_pb2.Resource, orm.Resource]):
219
218
  ) -> Self:
220
219
  client = client or Defaults.get_default_client()
221
220
  room_id = room_id or Defaults.get_default_room_id(client)
222
- blob = client.storage_manager.tabular.blob(f"polars_dataframe/{uuid.uuid4()}")
223
221
 
222
+ blob = client.storage_manager.make_tabular_blob(room_id, "anonymous_tables")
224
223
  with blob.open(mode="wb") as stream:
225
224
  data_frame.write_parquet(stream)
226
225
 
corvic/orm/__init__.py CHANGED
@@ -286,14 +286,6 @@ class Space(BelongsToOrgMixin, BelongsToRoomMixin, Base):
286
286
  viewonly=True,
287
287
  )
288
288
 
289
- agent_associations: sa_orm.Mapped[list[AgentSpaceAssociation]] = (
290
- sa_orm.relationship(
291
- init=False,
292
- viewonly=True,
293
- default_factory=list,
294
- )
295
- )
296
-
297
289
  @property
298
290
  def space_key(self):
299
291
  return self.name
@@ -359,15 +351,15 @@ class Agent(SoftDeleteMixin, BelongsToOrgMixin, BelongsToRoomMixin, Base):
359
351
  class AgentSpaceAssociation(BelongsToOrgMixin, BelongsToRoomMixin, Base):
360
352
  __tablename__ = "agent_space_association"
361
353
 
354
+ space_run_id: sa_orm.Mapped[SpaceRunID | None] = sa_orm.mapped_column(
355
+ ForeignKey(SpaceRun).make()
356
+ )
362
357
  agent_id: sa_orm.Mapped[AgentID] = primary_key_foreign_column(
363
- ForeignKey(Agent).make()
358
+ ForeignKey(Agent).make(ondelete="CASCADE")
364
359
  )
365
360
  space_id: sa_orm.Mapped[SpaceID] = primary_key_foreign_column(
366
361
  ForeignKey(Space).make()
367
362
  )
368
- space: sa_orm.Mapped[Space] = sa_orm.relationship(
369
- back_populates="agent_associations", init=False, viewonly=True
370
- )
371
363
 
372
364
 
373
365
  class CompletionModel(SoftDeleteMixin, BelongsToOrgMixin, Base):
@@ -1,11 +1,13 @@
1
1
  import base64
2
2
  import datetime
3
3
  import decimal
4
+ import math
4
5
  from collections.abc import Callable, Iterable, Mapping
5
- from typing import Final, TypeAlias, cast
6
+ from typing import Any, Final, TypeAlias, cast
6
7
 
7
8
  import pyarrow as pa
8
9
  from google.protobuf import struct_pb2
10
+ from pyarrow.lib import ArrowException
9
11
 
10
12
  from corvic.pa_scalar._temporal import (
11
13
  MonthDayNanoFrac,
@@ -118,11 +120,17 @@ def _visit_uint64(
118
120
  return struct_pb2.Value(string_value=str(value))
119
121
 
120
122
 
123
+ def _is_invalid_float(value: float):
124
+ return math.isnan(value) or math.isinf(value)
125
+
126
+
121
127
  def _visit_float16(
122
128
  value: _PyValue,
123
129
  dtype: pa.DataType,
124
130
  ) -> struct_pb2.Value:
125
131
  value = cast(float, value)
132
+ if _is_invalid_float(value):
133
+ return _visit_null(value, pa.null())
126
134
  return struct_pb2.Value(number_value=value)
127
135
 
128
136
 
@@ -131,6 +139,8 @@ def _visit_float32(
131
139
  dtype: pa.DataType,
132
140
  ) -> struct_pb2.Value:
133
141
  value = cast(float, value)
142
+ if _is_invalid_float(value):
143
+ return _visit_null(value, pa.null())
134
144
  return struct_pb2.Value(number_value=value)
135
145
 
136
146
 
@@ -139,6 +149,8 @@ def _visit_float64(
139
149
  dtype: pa.DataType,
140
150
  ) -> struct_pb2.Value:
141
151
  value = cast(float, value)
152
+ if _is_invalid_float(value):
153
+ return _visit_null(value, pa.null())
142
154
  return struct_pb2.Value(number_value=value)
143
155
 
144
156
 
@@ -412,6 +424,8 @@ def to_value(
412
424
  case int():
413
425
  return struct_pb2.Value(string_value=str(scalar))
414
426
  case float():
427
+ if _is_invalid_float(scalar):
428
+ return to_value(None)
415
429
  return struct_pb2.Value(number_value=scalar)
416
430
  case str():
417
431
  return struct_pb2.Value(string_value=scalar)
@@ -424,6 +438,36 @@ def to_value(
424
438
  return _visit(value, scalar.type)
425
439
 
426
440
 
441
+ def _to_py(scalar: Scalar) -> Any:
442
+ try:
443
+ # This scalar might have malformed binary data that's incorrectly typed
444
+ # as string (e.g. non-utf-8 strings), so skip it
445
+ scalar.validate(full=True)
446
+ return scalar.as_py()
447
+ except ArrowException:
448
+ # To create examples that hit this code path, one needs to create a
449
+ # pyarrow.BinaryArray and then cast it to a StringArray but also supply
450
+ # a CastOptions with allow_invalid_utf8 set to True.
451
+ # TODO(aneesh): also check pa.types.is_dictionary
452
+ if (
453
+ pa.types.is_list(scalar.type)
454
+ or pa.types.is_large_list(scalar.type)
455
+ or pa.types.is_fixed_size_list(scalar.type)
456
+ ):
457
+ lscalar = cast(pa.ListScalar[Any], scalar)
458
+ return [_to_py(v) for v in lscalar.values] # noqa: PD011 # type: ignore[unknownMemberType]
459
+ if pa.types.is_map(scalar.type):
460
+ mscalar = cast(pa.MapScalar[Any, Any], scalar)
461
+ return {
462
+ res["key"]: res["value"]
463
+ for res in [_to_py(v) for v in mscalar.values] # noqa: PD011 # type: ignore[unknownMemberType]
464
+ }
465
+ if pa.types.is_struct(scalar.type):
466
+ sscalar = cast(pa.StructScalar, scalar)
467
+ return {k: _to_py(sscalar[k]) for k in sscalar}
468
+ return None
469
+
470
+
427
471
  def batch_to_structs(
428
472
  batch: pa.RecordBatch,
429
473
  ) -> list[struct_pb2.Struct]:
@@ -432,7 +476,8 @@ def batch_to_structs(
432
476
  field_by_name = {field.name: field for field in schema}
433
477
 
434
478
  ret: list[struct_pb2.Struct] = []
435
- for row in batch.to_pylist():
479
+ for pa_row in batch.to_struct_array():
480
+ row = _to_py(cast(Scalar, pa_row))
436
481
  fields = {
437
482
  col_name: _visit(v, field_by_name[col_name].type)
438
483
  for col_name, v in row.items()
@@ -39,3 +39,5 @@ class Scalar(Protocol):
39
39
  def equals(self, other: Any) -> bool: ...
40
40
 
41
41
  def as_py(self) -> Any: ...
42
+
43
+ def validate(self, *, full: bool = False) -> bool | None: ...
corvic/system/__init__.py CHANGED
@@ -48,7 +48,6 @@ from corvic.system.storage import (
48
48
  Blob,
49
49
  BlobClient,
50
50
  Bucket,
51
- DataKindManager,
52
51
  DataMisplacedError,
53
52
  StorageManager,
54
53
  )
@@ -71,7 +70,6 @@ __all__ = [
71
70
  "Clip",
72
71
  "ClipText",
73
72
  "DEFAULT_VECTOR_COLUMN_NAMES_TO_SIZES",
74
- "DataKindManager",
75
73
  "DataMisplacedError",
76
74
  "DimensionReducer",
77
75
  "EmbedTextContext",
corvic/system/storage.py CHANGED
@@ -6,11 +6,12 @@ import contextlib
6
6
  import io
7
7
  import uuid
8
8
  from collections.abc import Iterator
9
- from typing import Any, Literal
9
+ from typing import Any, Final, Literal
10
10
 
11
11
  from typing_extensions import Protocol
12
12
 
13
- from corvic.result import Error, Ok
13
+ from corvic import orm
14
+ from corvic.result import Error
14
15
 
15
16
 
16
17
  class DataMisplacedError(Error):
@@ -105,60 +106,16 @@ class Bucket(Protocol):
105
106
  def list_blobs(self, prefix: str | None = None) -> Iterator[Blob]: ...
106
107
 
107
108
 
108
- class DataKindManager:
109
- """Manages the names of blobs that corvic stores for a particular data kind.
110
-
111
- Kinds are managed by the NamespaceManager
112
- """
113
-
114
- def __init__(self, storage_manager: StorageManager, prefix: str):
115
- if prefix.endswith("/"):
116
- raise ValueError("prefix should not end with a path separator (/)")
117
- self._namespace_manager = storage_manager
118
- self._prefix = prefix
119
-
120
- @property
121
- def prefix(self):
122
- return self._prefix
123
-
124
- def make_anonymous_table_url(self):
125
- return self.blob(f"anonymous_tables/{uuid.uuid4()}.parquet").url
126
-
127
- def blob(self, blob_name: str):
128
- return self._namespace_manager.bucket.blob(f"{self.prefix}/{blob_name}")
129
-
130
- def _blob_from_url(self, url: str) -> Ok[Blob] | DataMisplacedError:
131
- blob = self._namespace_manager.blob_from_url(url)
132
- if self._namespace_manager.bucket.name != blob.bucket.name:
133
- return DataMisplacedError(
134
- "data stored at a different bucket than expected",
135
- url=url,
136
- expected_bucket=self._namespace_manager.bucket.name,
137
- )
138
- if not blob.name.startswith(self._prefix):
139
- return DataMisplacedError(
140
- "data stored at a different prefix than expected",
141
- url=url,
142
- expected_prefix=self._prefix,
143
- )
144
- return Ok(blob)
145
-
146
- def blob_name_from_url(self, url: str) -> Ok[str] | DataMisplacedError:
147
- def _to_name(blob: Blob) -> str:
148
- return blob.name.removeprefix(self._prefix + "/")
149
-
150
- return self._blob_from_url(url=url).map(_to_name)
151
-
152
-
153
109
  class StorageManager:
154
110
  """Manages the names of blobs that corvic stores."""
155
111
 
156
112
  _blob_client: BlobClient
157
113
  _bucket_name: str
158
- _unstructured_manager: DataKindManager
159
- _tabular_manager: DataKindManager
160
- _space_run_manager: DataKindManager
161
- _vector_manager: DataKindManager
114
+
115
+ unstructured_prefix: Final[str]
116
+ tabular_prefix: Final[str]
117
+ vector_prefix: Final[str]
118
+ bucket: Final[Bucket]
162
119
 
163
120
  def __init__(
164
121
  self,
@@ -167,36 +124,55 @@ class StorageManager:
167
124
  bucket_name: str,
168
125
  unstructured_prefix: str,
169
126
  tabular_prefix: str,
170
- space_run_prefix: str,
171
127
  vector_prefix: str,
172
128
  ):
173
129
  self._blob_client = blob_client
174
130
  self._bucket_name = bucket_name
175
131
 
176
- self._unstructured_manager = DataKindManager(self, unstructured_prefix)
177
- self._tabular_manager = DataKindManager(self, tabular_prefix)
178
- self._space_run_manager = DataKindManager(self, space_run_prefix)
179
- self._vector_manager = DataKindManager(self, vector_prefix)
180
-
181
- def blob_from_url(self, url: str):
182
- return self._blob_client.blob_from_url(url)
132
+ if unstructured_prefix.endswith("/") or tabular_prefix.endswith("/"):
133
+ raise ValueError("prefix should not end with a path separator (/)")
183
134
 
184
- @property
185
- def bucket(self) -> Bucket:
186
- return self._blob_client.bucket(self._bucket_name)
135
+ self.unstructured_prefix = unstructured_prefix
136
+ self.tabular_prefix = tabular_prefix
137
+ self.vector_prefix = vector_prefix
187
138
 
188
- @property
189
- def tabular(self):
190
- return self._tabular_manager
139
+ self.bucket = self._blob_client.bucket(self._bucket_name)
191
140
 
192
- @property
193
- def unstructured(self):
194
- return self._unstructured_manager
141
+ @staticmethod
142
+ def _render_room_id(room_id: orm.RoomID) -> str:
143
+ return f"{int(str(room_id)):016}"
195
144
 
196
- @property
197
- def space_run(self):
198
- return self._space_run_manager
145
+ def blob_from_url(self, url: str):
146
+ return self._blob_client.blob_from_url(url)
199
147
 
200
- @property
201
- def vector(self):
202
- return self._vector_manager
148
+ def make_tabular_blob(self, room_id: orm.RoomID, suffix: str | None = None) -> Blob:
149
+ if suffix:
150
+ name = f"{self._render_room_id(room_id)}/{uuid.uuid4()}/{suffix}"
151
+ else:
152
+ name = f"{self._render_room_id(room_id)}/{uuid.uuid4()}"
153
+ return self.bucket.blob(f"{self.tabular_prefix}/{name}")
154
+
155
+ def make_unstructured_blob(
156
+ self, room_id: orm.RoomID, suffix: str | None = None
157
+ ) -> Blob:
158
+ if suffix:
159
+ name = f"{self._render_room_id(room_id)}/{uuid.uuid4()}/{suffix}"
160
+ else:
161
+ name = f"{self._render_room_id(room_id)}/{uuid.uuid4()}"
162
+ return self.bucket.blob(f"{self.unstructured_prefix}/{name}")
163
+
164
+ def make_vector_blob(self, room_id: orm.RoomID, suffix: str | None = None) -> Blob:
165
+ if suffix:
166
+ name = f"{self._render_room_id(room_id)}/{uuid.uuid4()}/{suffix}"
167
+ else:
168
+ name = f"{self._render_room_id(room_id)}/{uuid.uuid4()}"
169
+ return self.bucket.blob(f"{self.vector_prefix}/{name}")
170
+
171
+ def get_tabular_blob_from_blob_name(self, blob_name: str) -> Blob:
172
+ return self.bucket.blob(f"{self.tabular_prefix}/{blob_name}")
173
+
174
+ def get_unstructured_blob_from_blob_name(self, blob_name: str) -> Blob:
175
+ return self.bucket.blob(f"{self.unstructured_prefix}/{blob_name}")
176
+
177
+ def get_vector_blob_from_blob_name(self, blob_name: str) -> Blob:
178
+ return self.bucket.blob(f"{self.vector_prefix}/{blob_name}")
@@ -135,7 +135,6 @@ class Client(corvic.system.Client):
135
135
  bucket_name=bucket.name,
136
136
  unstructured_prefix="unstructured_data",
137
137
  tabular_prefix="tabular_data",
138
- space_run_prefix="experiment_run_data",
139
138
  vector_prefix="vectors",
140
139
  )
141
140
  corvic.orm.Base.metadata.create_all(self._sa_engine)
@@ -15,7 +15,6 @@ import sqlglot.expressions
15
15
  from corvic.op_graph import Schema
16
16
  from corvic.result import InternalError, Ok
17
17
  from corvic.system import (
18
- DataKindManager,
19
18
  StagingDB,
20
19
  StorageManager,
21
20
  TableSliceArgs,
@@ -255,17 +254,18 @@ class DuckDBStaging(StagingDB):
255
254
 
256
255
  def _update_blobs_for_prefix(
257
256
  self,
258
- manager: DataKindManager,
259
257
  next_count: dict[str, int | None],
258
+ prefix: str,
260
259
  transform_table: Callable[[str, pa.Table], None] | None = None,
261
260
  ):
262
261
  bucket = self._storage_manager.bucket
263
262
  blobs = bucket.list_blobs()
264
- prefix = manager.prefix
265
263
  table_blobs = [
266
264
  (
267
265
  blob,
268
- manager.blob_name_from_url(url=blob.url).unwrap_or_raise(),
266
+ self._storage_manager.blob_from_url(blob.url).name.removeprefix(
267
+ prefix + "/"
268
+ ),
269
269
  )
270
270
  for blob in blobs
271
271
  if blob.name.startswith(prefix)
@@ -289,9 +289,14 @@ class DuckDBStaging(StagingDB):
289
289
  As a side-effect, update _table_counts.
290
290
  """
291
291
  next_count: dict[str, int | None] = {}
292
- self._update_blobs_for_prefix(self._storage_manager.tabular, next_count)
293
292
  self._update_blobs_for_prefix(
294
- self._storage_manager.vector, next_count, self._add_vector_indexes
293
+ next_count,
294
+ self._storage_manager.tabular_prefix,
295
+ )
296
+ self._update_blobs_for_prefix(
297
+ next_count,
298
+ self._storage_manager.vector_prefix,
299
+ self._add_vector_indexes,
295
300
  )
296
301
  self._table_counts = next_count
297
302
 
corvic/table/table.py CHANGED
@@ -23,7 +23,7 @@ import structlog
23
23
  from google.protobuf import struct_pb2
24
24
  from typing_extensions import Self
25
25
 
26
- from corvic import op_graph
26
+ from corvic import op_graph, orm
27
27
  from corvic.op_graph import Encoder, Schema
28
28
  from corvic.result import (
29
29
  InternalError,
@@ -341,17 +341,19 @@ class Table:
341
341
  if len(null_columns) > 0:
342
342
  _logger.warning("dropped null columns", columns=null_columns)
343
343
 
344
- match client.storage_manager.tabular.blob_name_from_url(url).and_then(
345
- lambda t: op_graph.from_staging(
346
- blob_names=[t],
347
- arrow_schema=schema.to_arrow(),
348
- feature_types=[field.ftype for field in schema],
349
- expected_rows=num_rows,
350
- )
344
+ blob_name = client.storage_manager.blob_from_url(blob.url).name.removeprefix(
345
+ client.storage_manager.tabular_prefix + "/"
346
+ )
347
+
348
+ match op_graph.from_staging(
349
+ blob_names=[blob_name],
350
+ arrow_schema=schema.to_arrow(),
351
+ feature_types=[field.ftype for field in schema],
352
+ expected_rows=num_rows,
351
353
  ):
352
354
  case Ok(op):
353
355
  pass
354
- case DataMisplacedError() | InvalidArgumentError() as error:
356
+ case InvalidArgumentError() as error:
355
357
  return error
356
358
  if len(null_columns) > 0:
357
359
  match op.select_columns(kept_columns):
@@ -366,7 +368,7 @@ class Table:
366
368
  return self.op_graph.to_bytes()
367
369
 
368
370
  def to_polars(
369
- self, *, flatten_single_field: bool = False
371
+ self, room_id: orm.RoomID, *, flatten_single_field: bool = False
370
372
  ) -> (
371
373
  Ok[Iterable[pl.DataFrame]]
372
374
  | InvalidArgumentError
@@ -374,7 +376,7 @@ class Table:
374
376
  | ResourceExhaustedError
375
377
  ):
376
378
  """Stream over the view as a series of Polars DataFrames."""
377
- match self.to_batches():
379
+ match self.to_batches(room_id=room_id):
378
380
  case Ok(batch_reader):
379
381
  pass
380
382
  case err:
@@ -438,7 +440,7 @@ class Table:
438
440
  )
439
441
 
440
442
  def to_batches(
441
- self,
443
+ self, room_id: orm.RoomID
442
444
  ) -> (
443
445
  Ok[pa.RecordBatchReader]
444
446
  | InvalidArgumentError
@@ -450,7 +452,9 @@ class Table:
450
452
  tables_to_compute=[
451
453
  TableComputeContext(
452
454
  self.op_graph,
453
- output_url_prefix=self.client.storage_manager.space_run.make_anonymous_table_url(),
455
+ output_url_prefix=self.client.storage_manager.make_tabular_blob(
456
+ room_id=room_id, suffix="anonymous_tables"
457
+ ).url,
454
458
  )
455
459
  ],
456
460
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: corvic-engine
3
- Version: 0.3.0rc46
3
+ Version: 0.3.0rc48
4
4
  Classifier: Environment :: Console
5
5
  Classifier: License :: Other/Proprietary License
6
6
  Classifier: Programming Language :: Python :: Implementation :: CPython
@@ -1,6 +1,6 @@
1
- corvic_engine-0.3.0rc46.dist-info/METADATA,sha256=_LLt2PwaSBy5Jy3CuT9BHywURG_SkNOW0g4yJ9yahC8,1876
2
- corvic_engine-0.3.0rc46.dist-info/WHEEL,sha256=_g1M2QM3kt1Ssm_sHOg_3TUY7GxNE2Ueyslb9ZDtPwk,94
3
- corvic_engine-0.3.0rc46.dist-info/licenses/LICENSE,sha256=DSS1OD0oIgssKOmAzkMRBv5jvvVuZQbrIv8lpl9DXY8,1035
1
+ corvic_engine-0.3.0rc48.dist-info/METADATA,sha256=jNKQ7u3Z2DNztauiBcBXWm27Z92rkw0GEXrFSxfPZWE,1876
2
+ corvic_engine-0.3.0rc48.dist-info/WHEEL,sha256=_g1M2QM3kt1Ssm_sHOg_3TUY7GxNE2Ueyslb9ZDtPwk,94
3
+ corvic_engine-0.3.0rc48.dist-info/licenses/LICENSE,sha256=DSS1OD0oIgssKOmAzkMRBv5jvvVuZQbrIv8lpl9DXY8,1035
4
4
  corvic/context/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  corvic/context/__init__.py,sha256=zBnPiP-tStGSVMG_0-G_0ay6-yIX2aerW_oYRzAex74,1702
6
6
  corvic/embed/node2vec.py,sha256=JnYb8f2g4XhF6LL2TjpMxLfKhn_Yp1AzptsWwrKQWgc,11146
@@ -13,16 +13,16 @@ corvic/engine/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  corvic/engine/_native.pyi,sha256=KYMPtvXqHZ-jMgZohLf4se3rr-rBpCihmjANcr6s8ag,1390
14
14
  corvic/engine/__init__.py,sha256=XL4Vg7rNcBi29ccVelpeFizR9oJtGYXDn84W9zok9d4,975
15
15
  corvic/model/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- corvic/model/_agent.py,sha256=WGdxu0oLHU7EEgBOMQ0UOnH2AIIUGfeH6VSQZgC0kEk,3621
17
- corvic/model/_base_model.py,sha256=m4JkZu8QAoQEeHqzpMUlH08PGOjVZbqld4Rm0uW73QQ,8225
16
+ corvic/model/_agent.py,sha256=8qmi1x1pBQCXgz7Zr2YdrWRHRZkbp63cyJw7j9OvqQY,4818
17
+ corvic/model/_base_model.py,sha256=qRTZBBHdkqMH6xqX1FwtTm4cWFC4aPDktJeGyxyxtpk,8336
18
18
  corvic/model/_completion_model.py,sha256=uoqF7hwxzGXXqSPZT_CIcNBSDmYhcxMotpGPucWH6Q0,6656
19
19
  corvic/model/_defaults.py,sha256=yoKPPSmYJCE5YAD5jLTEmT4XNf_zXoggNK-uyG8MfVs,1524
20
20
  corvic/model/_errors.py,sha256=Ctlq04SDwHzJPvLaL1rzqzwVqf2b50EILfW3cH4vnh8,261
21
21
  corvic/model/_feature_type.py,sha256=Y-_-wa9fv7XaCAkxfjjoCLxxK2Ftfba-PMefD7bNXzs,917
22
- corvic/model/_feature_view.py,sha256=kM8kMzb1oxAgIOMVmv6twZ-JLeP2NopdsKHCiKjuNIE,49700
22
+ corvic/model/_feature_view.py,sha256=YThcU0T4pK_W6IOJ8uQUUsc3NP7JMBWqYjU_37UjN2o,49757
23
23
  corvic/model/_pipeline.py,sha256=A_q_nWm6UBN-AKlbQkhWNMG2r-uW0IR6vGJbhYv7z3k,17578
24
- corvic/model/_proto_orm_convert.py,sha256=6CWe_b-L7JBS4Pb_SjUeuiuczWh-nPw0jElQIMOl_UA,25029
25
- corvic/model/_resource.py,sha256=O93gy2r4HjsZzVUahrGbLGmxI7sfSVQCvwvCLN3nSXQ,7097
24
+ corvic/model/_proto_orm_convert.py,sha256=rq4SJYsTCHyOkZswI9FYfj1Ne9Heed117ii89ChjPvc,26607
25
+ corvic/model/_resource.py,sha256=O6fV0reyFpS_qUIn3XUyQ2aVC1sT_DQAuovxDv1TnBo,7082
26
26
  corvic/model/_room.py,sha256=57MiBfj8hZcmUfq2PeECrOWDpBZAOSjnVqNUIGXOy2Q,2898
27
27
  corvic/model/_source.py,sha256=JBCk1I6u_rUKPiB4Fvtl7uVm0Jx0LF1oWNd1-Wn_sbI,9412
28
28
  corvic/model/_space.py,sha256=_qXYefPwwL6jGY3zUBYWW9X3ZE4FEuiOksPoCuG_O1Q,33928
@@ -50,13 +50,13 @@ corvic/orm/keys.py,sha256=Ag6Xbpvxev-VByT1KJ8ChUn9vKVEzkkMXxrjvtADCtY,2182
50
50
  corvic/orm/mixins.py,sha256=HfmzJ7LblHtddbbkDmv7nNWURL87Bnj8NeOnNbfmSN4,17794
51
51
  corvic/orm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  corvic/orm/_proto_columns.py,sha256=tcOu92UjFJFYZLasS6sWJQBDRK26yrnmpTii_LDY4iw,913
53
- corvic/orm/__init__.py,sha256=3HLMICNB1iMyMS70tlvaap8WDrAfxqSxp7W1O-cnHc4,14656
53
+ corvic/orm/__init__.py,sha256=rOYy3hi3fVbAkXIoJ5NZLDz3uHdw3Bko2ZpRDtb3Fkg,14449
54
54
  corvic/pa_scalar/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  corvic/pa_scalar/_const.py,sha256=1nk6w3Y7crd3J5jSCq7DRVa1lcGk4H1RUr1l4NjnlzE,868
56
56
  corvic/pa_scalar/_from_value.py,sha256=fS3TNPcPI3jAKGmcUIhn8rdqdQEAwgTLEneVxFUeK6M,27531
57
57
  corvic/pa_scalar/_temporal.py,sha256=HPL1z5k4frIiGWM469YGj9j0hCPXNHtI_X1jk1y1c1Q,7842
58
- corvic/pa_scalar/_to_value.py,sha256=F11uEikSft9AahBqA7aHmvHObwsTqoZN90KC4ucbem0,11574
59
- corvic/pa_scalar/_types.py,sha256=73dusgldts4o8j-FgoKjER9malSlMbGVrD4hP3mflWE,1062
58
+ corvic/pa_scalar/_to_value.py,sha256=C1l-BVsS4Rs_KL8hVPllpzjP0NVVwfaOb0WM2pjZhoA,13476
59
+ corvic/pa_scalar/_types.py,sha256=shbytO0ji-H2rBOX_1fooVOshb22wwkVU1W99VBKz1A,1131
60
60
  corvic/pa_scalar/__init__.py,sha256=1nfc0MFGpw78RQEI13VE5hpHuyw_DoE7sJbmzqx5pws,1063
61
61
  corvic/proto_wrapper/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
62
  corvic/proto_wrapper/_errors.py,sha256=0HFmBK9EGYi4lXJ3RFsktHoPZn2m3cMKroI0Eir0JcM,114
@@ -72,21 +72,21 @@ corvic/system/in_memory_executor.py,sha256=dYgcxbA_O0mM1pI19t2OXs8q5B4TX-NFacR7T
72
72
  corvic/system/op_graph_executor.py,sha256=gXFnVkemS5EwNegJdU-xVAfMLPULqMFPF7d3EG3AD_U,3482
73
73
  corvic/system/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  corvic/system/staging.py,sha256=K5P5moiuAMfPx7lxK4mArxeURBwKoyB6x9HGu9JJ16E,1846
75
- corvic/system/storage.py,sha256=N4lXnoCFPg8JbUaI9fbpPY0nt3CgXFUc8sZteki_nRk,5671
75
+ corvic/system/storage.py,sha256=35zVWxzU8xLLWBtZKauns08kea9-0Z24KLSHxBaModo,5201
76
76
  corvic/system/_dimension_reduction.py,sha256=vyD8wOs0vE-hlVnCrBTjStTowAPWYREqnQ_bVuGYvis,2907
77
77
  corvic/system/_embedder.py,sha256=0WO24IKi8VC8jsFdvNuzDsgNejyacQf6r9Q34jxhHc4,3844
78
78
  corvic/system/_image_embedder.py,sha256=iQc3KlLcqrhP6K84hncHutThAN8Qd6K7K5dceHyU1TU,8373
79
79
  corvic/system/_planner.py,sha256=HUf6UjCy1iHRrXfhU25w19TG4Ik3zVHhtzVcor0eTQY,7888
80
80
  corvic/system/_text_embedder.py,sha256=LH79_4RxhvssySHpkeEoZFgM4Sa5XAYSjoytdsuwWK4,3269
81
- corvic/system/__init__.py,sha256=DBzMQ1QCO0OHo3w2LoibfgN1X3CLGBUxXjiExN2kyi0,2838
82
- corvic/system_sqlite/client.py,sha256=ntDJtCDG1SDGUKI-BhCju4sdqd59Y8RucGZ8VmRvHuI,7142
81
+ corvic/system/__init__.py,sha256=U28LyDwpirtG0WDXqE6dzkx8PipvY2rtZSex03C7xlY,2792
82
+ corvic/system_sqlite/client.py,sha256=zKsrlXnwrb7us8VHEh5BdTV8SQpQhjPJ2FtRfXv4kxk,7089
83
83
  corvic/system_sqlite/fs_blob_store.py,sha256=pYTMPiWYC6AUIdcgmRj8lvL7Chg82rf5dac6bKGaqL0,8461
84
84
  corvic/system_sqlite/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  corvic/system_sqlite/rdbms_blob_store.py,sha256=gTP_tQfTVb3wzZkzo8ys1zaz0rSrERzb57rqMHVpuBA,10563
86
- corvic/system_sqlite/staging.py,sha256=9chn6iXGmKAsurdiBzo0FSTyOmeDn7oCIq8fekbK67c,16878
86
+ corvic/system_sqlite/staging.py,sha256=P6XdWhjpgcpOZkYxKEjpsTxaAdBKOeSVfARjqt4_xJA,16948
87
87
  corvic/system_sqlite/__init__.py,sha256=MQCUux08NcxFLghpWcZ95lt5AMabr-6sJRH-eSsTnqI,229
88
88
  corvic/table/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
- corvic/table/table.py,sha256=OjO6gU7iazq5F-T1T8JC514Psu-20196Le8s_-2Jx9Y,24814
89
+ corvic/table/table.py,sha256=suyJyC-omXGozUnqImKCIXvPr3LgLfnZ1bfcd_sP3Bk,24975
90
90
  corvic/table/__init__.py,sha256=Gj0IR8BQF5PZK92Us7PP0ZigMsVyrfWJupzH8TgzRQk,588
91
91
  corvic/version/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
92
  corvic/version/__init__.py,sha256=JlkRLvKXsu3zIxhdynO_0Ub5NfQOvGjfwCRkNnaOu9U,1125
@@ -244,5 +244,5 @@ corvic_generated/status/v1/event_pb2.pyi,sha256=eU-ibrYpvEAJSIDlSa62-bC96AQU1ykF
244
244
  corvic_generated/status/v1/event_pb2_grpc.pyi,sha256=H9-ADaiKR9iyVZvmnXutZqWwRRCDxjUIktkfJrJFIHg,417
245
245
  corvic_generated/status/v1/service_pb2.pyi,sha256=iXLR2FOKQJpBgvBzpD2kVwcYOCksP2aRwK4JYaI9CBw,558
246
246
  corvic_generated/status/v1/service_pb2_grpc.pyi,sha256=OoAnaZ64FD0UTzPoRhYvQU8ecoilhHj3ySjSfHbVDaU,1501
247
- corvic/engine/_native.pyd,sha256=6ts0z2Hw7EtkPpwjGCBvZqq9Pg282NoTZk5EwiiLZtc,438272
248
- corvic_engine-0.3.0rc46.dist-info/RECORD,,
247
+ corvic/engine/_native.pyd,sha256=2b_C9IsaSHkV5uAC0fI5WKklUtiuA1ODLJocoS_pJSY,438272
248
+ corvic_engine-0.3.0rc48.dist-info/RECORD,,