corvic-engine 0.3.0rc43__cp38-abi3-win_amd64.whl → 0.3.0rc45__cp38-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
corvic/model/_space.py CHANGED
@@ -3,15 +3,13 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import abc
6
- import copy
7
- import dataclasses
8
6
  import datetime
9
7
  import uuid
10
8
  from collections.abc import Iterable, Mapping, Sequence
11
9
  from typing import Final, Literal, TypeAlias
12
10
 
13
11
  import pyarrow as pa
14
- from more_itertools import first
12
+ import sqlalchemy as sa
15
13
  from sqlalchemy import orm as sa_orm
16
14
  from typing_extensions import Self
17
15
 
@@ -24,12 +22,10 @@ from corvic.model._proto_orm_convert import (
24
22
  space_orm_to_proto,
25
23
  space_proto_to_orm,
26
24
  )
27
- from corvic.model._source import Source
28
25
  from corvic.result import InvalidArgumentError, NotFoundError, Ok
29
26
  from corvic.table import Table
30
27
  from corvic_generated.algorithm.graph.v1 import graph_pb2
31
28
  from corvic_generated.embedding.v1 import models_pb2 as embedding_models_pb2
32
- from corvic_generated.feature.v2 import space_pb2 as feature_space_pb2
33
29
  from corvic_generated.model.v1alpha import models_pb2
34
30
  from corvic_generated.orm.v1 import space_pb2
35
31
 
@@ -72,17 +68,6 @@ class Space(BaseModel[SpaceID, models_pb2.Space, orm.Space]):
72
68
  >>> space = Space.node2vec(feature_view, dim=10, walk_length=10, window=10)
73
69
  """
74
70
 
75
- _feature_view: FeatureView | None
76
-
77
- def __init__(
78
- self,
79
- client: system.Client,
80
- proto_self: models_pb2.Space,
81
- feature_view: FeatureView | None = None,
82
- ):
83
- super().__init__(client, proto_self)
84
- self._feature_view = feature_view
85
-
86
71
  @classmethod
87
72
  def orm_class(cls):
88
73
  return orm.Space
@@ -107,43 +92,14 @@ class Space(BaseModel[SpaceID, models_pb2.Space, orm.Space]):
107
92
  ) -> Ok[None] | InvalidArgumentError:
108
93
  return space_delete_orms(ids, session)
109
94
 
110
- @property
111
- def column_embedding_parameters(
112
- self,
113
- ) -> embedding_models_pb2.ColumnEmbeddingParameters | None:
114
- if self.proto_self.space_parameters.HasField("column_embedding_parameters"):
115
- return self.proto_self.space_parameters.column_embedding_parameters
116
- return None
117
-
118
- @property
119
- def node2vec_parameters(self) -> graph_pb2.Node2VecParameters | None:
120
- if self.proto_self.space_parameters.HasField("node2vec_parameters"):
121
- return self.proto_self.space_parameters.node2vec_parameters
122
- return None
123
-
124
- @property
125
- def concat_and_embed_parameters(
126
- self,
127
- ) -> embedding_models_pb2.ConcatAndEmbedParameters | None:
128
- if self.proto_self.space_parameters.HasField("concat_and_embed_parameters"):
129
- return self.proto_self.space_parameters.concat_and_embed_parameters
130
- return None
131
-
132
- @property
133
- def embed_and_concat_parameters(
134
- self,
135
- ) -> embedding_models_pb2.EmbedAndConcatParameters | None:
136
- if self.proto_self.space_parameters.HasField("embed_and_concat_parameters"):
137
- return self.proto_self.space_parameters.embed_and_concat_parameters
138
- return None
139
-
140
- @property
141
- def embed_image_parameters(
142
- self,
143
- ) -> embedding_models_pb2.EmbedImageParameters | None:
144
- if self.proto_self.space_parameters.HasField("embed_image_parameters"):
145
- return self.proto_self.space_parameters.embed_image_parameters
146
- return None
95
+ @classmethod
96
+ def orm_load_options(cls) -> list[sa.LoaderOption]:
97
+ return [
98
+ sa_orm.selectinload(orm.Space.feature_view)
99
+ .selectinload(orm.FeatureView.feature_view_sources)
100
+ .selectinload(orm.FeatureViewSource.source)
101
+ .selectinload(orm.Source.pipeline_ref)
102
+ ]
147
103
 
148
104
  @property
149
105
  def name(self):
@@ -158,46 +114,79 @@ class Space(BaseModel[SpaceID, models_pb2.Space, orm.Space]):
158
114
  return self.proto_self.description
159
115
 
160
116
  @property
161
- def parameters(self):
162
- return self.proto_self.space_parameters
163
-
164
- @property
165
- def feature_view_id(self):
166
- return FeatureViewID(self.proto_self.feature_view_id)
117
+ def feature_view(self) -> FeatureView:
118
+ return FeatureView.from_proto(self.proto_self.feature_view, self.client)
167
119
 
168
120
  @property
169
121
  def auto_sync(self):
170
122
  return self.proto_self.auto_sync
171
123
 
172
- @property
173
- def space_type(self):
174
- if self.node2vec_parameters:
175
- return feature_space_pb2.SPACE_TYPE_RELATIONAL
176
- if self.concat_and_embed_parameters:
177
- return feature_space_pb2.SPACE_TYPE_SEMANTIC
178
- if self.embed_and_concat_parameters:
179
- return feature_space_pb2.SPACE_TYPE_TABULAR
180
- if self.embed_image_parameters:
181
- return feature_space_pb2.SPACE_TYPE_IMAGE
182
- return feature_space_pb2.SPACE_TYPE_UNSPECIFIED
183
-
184
124
  def with_auto_sync(self, *, auto_sync: bool):
185
125
  self.proto_self.auto_sync = auto_sync
186
126
  return self
187
127
 
188
- def feature_view(self):
189
- if self._feature_view:
190
- return Ok(self._feature_view)
191
- match FeatureView.load_proto_for(self.feature_view_id, self.client):
192
- case Ok(feature_view):
193
- return Ok(FeatureView.from_proto(feature_view, self.client))
194
- case NotFoundError() as err:
195
- return err
196
-
197
128
  @abc.abstractmethod
198
129
  def embeddings_tables(self) -> Ok[Mapping[str, Table]] | InvalidArgumentError:
199
130
  """Generate per-output-source embeddings tables for this space."""
200
131
 
132
+ @classmethod
133
+ def create_specific(
134
+ cls,
135
+ name: str,
136
+ description: str,
137
+ feature_view: FeatureView,
138
+ parameters: SpecificSpaceParameters,
139
+ client: system.Client | None = None,
140
+ room_id: RoomID | None = None,
141
+ *,
142
+ auto_sync: bool = False,
143
+ ) -> Ok[SpecificSpace] | InvalidArgumentError:
144
+ client = client or feature_view.client
145
+ room_id = room_id or feature_view.room_id
146
+ if room_id != feature_view.room_id:
147
+ return InvalidArgumentError("room id must match feature_view room id")
148
+ match parameters:
149
+ case Node2VecParameters():
150
+ return RelationalSpace.create(
151
+ name,
152
+ description,
153
+ feature_view,
154
+ parameters,
155
+ client,
156
+ room_id,
157
+ auto_sync=auto_sync,
158
+ )
159
+ case ConcatAndEmbedParameters():
160
+ return SemanticSpace.create(
161
+ name,
162
+ description,
163
+ feature_view,
164
+ parameters,
165
+ client,
166
+ room_id,
167
+ auto_sync=auto_sync,
168
+ )
169
+ case EmbedAndConcatParameters():
170
+ return TabularSpace.create(
171
+ name,
172
+ description,
173
+ feature_view,
174
+ parameters,
175
+ client,
176
+ room_id,
177
+ auto_sync=auto_sync,
178
+ )
179
+ case EmbedImageParameters():
180
+ return ImageSpace.create(
181
+ name,
182
+ description,
183
+ feature_view,
184
+ parameters,
185
+ client,
186
+ room_id,
187
+ auto_sync=auto_sync,
188
+ )
189
+
201
190
  @classmethod
202
191
  def from_id(
203
192
  cls,
@@ -258,46 +247,109 @@ class Space(BaseModel[SpaceID, models_pb2.Space, orm.Space]):
258
247
  class UnknownSpace(Space):
259
248
  """A space that this version of the code doesn't know what to do with."""
260
249
 
250
+ @classmethod
251
+ def create(cls, feature_view: FeatureView, client: system.Client | None = None):
252
+ client = client or feature_view.client
253
+ return cls(
254
+ client,
255
+ models_pb2.Space(
256
+ feature_view=feature_view.proto_self,
257
+ ),
258
+ )
259
+
261
260
  def embeddings_tables(self) -> Ok[Mapping[str, Table]] | InvalidArgumentError:
262
261
  """Generate per-ouput-source embeddings tables for this space."""
263
262
  return Ok({})
264
263
 
265
264
 
266
- @dataclasses.dataclass(frozen=True)
267
265
  class Node2VecParameters:
268
- dim: int = 10
269
- walk_length: int = 10
270
- window: int = 10
271
- p: float = 1.0
272
- q: float = 1.0
273
- alpha: float = 0.025
274
- min_alpha: float = 0.0001
275
- negative: int = 5
276
- epochs: int = 10
277
-
278
- def to_proto(self) -> graph_pb2.Node2VecParameters:
279
- return graph_pb2.Node2VecParameters(
280
- ndim=self.dim,
281
- walk_length=self.walk_length,
282
- window=self.window,
283
- p=self.p,
284
- q=self.q,
285
- alpha=self.alpha,
286
- min_alpha=self.min_alpha,
287
- negative=self.negative,
288
- epochs=self.epochs,
266
+ proto_self: Final[graph_pb2.Node2VecParameters]
267
+
268
+ def __init__(self, proto_self: graph_pb2.Node2VecParameters):
269
+ self.proto_self = proto_self
270
+
271
+ @classmethod
272
+ def create( # noqa: PLR0913
273
+ cls,
274
+ dim: int = 10,
275
+ walk_length: int = 10,
276
+ window: int = 10,
277
+ p: float = 1.0,
278
+ q: float = 1.0,
279
+ alpha: float = 0.025,
280
+ min_alpha: float = 0.0001,
281
+ negative: int = 5,
282
+ epochs: int = 10,
283
+ ):
284
+ return cls(
285
+ graph_pb2.Node2VecParameters(
286
+ ndim=dim,
287
+ walk_length=walk_length,
288
+ window=window,
289
+ p=p,
290
+ q=q,
291
+ alpha=alpha,
292
+ min_alpha=min_alpha,
293
+ negative=negative,
294
+ epochs=epochs,
295
+ )
289
296
  )
290
297
 
298
+ @property
299
+ def dim(self) -> int:
300
+ return self.proto_self.ndim
301
+
302
+ @property
303
+ def walk_length(self) -> int:
304
+ return self.proto_self.walk_length
305
+
306
+ @property
307
+ def window(self) -> int:
308
+ return self.proto_self.window
309
+
310
+ @property
311
+ def p(self) -> float:
312
+ return self.proto_self.p
313
+
314
+ @property
315
+ def q(self) -> float:
316
+ return self.proto_self.q
317
+
318
+ @property
319
+ def alpha(self) -> float:
320
+ return self.proto_self.alpha
321
+
322
+ @property
323
+ def min_alpha(self) -> float:
324
+ return self.proto_self.min_alpha
325
+
326
+ @property
327
+ def negative(self) -> int:
328
+ return self.proto_self.negative
329
+
330
+ @property
331
+ def epochs(self) -> int:
332
+ return self.proto_self.epochs
333
+
291
334
 
292
335
  class RelationalSpace(Space):
293
336
  """Spaces for embeddings that encode relationships."""
294
337
 
338
+ @property
339
+ def parameters(self) -> Node2VecParameters:
340
+ return Node2VecParameters(self.proto_self.space_parameters.node2vec_parameters)
341
+
295
342
  @classmethod
296
343
  def create(
297
344
  cls,
345
+ name: str,
346
+ description: str,
298
347
  feature_view: FeatureView,
348
+ parameters: Node2VecParameters,
299
349
  client: system.Client | None = None,
300
350
  room_id: orm.RoomID | None = None,
351
+ *,
352
+ auto_sync: bool = False,
301
353
  ) -> Ok[RelationalSpace] | InvalidArgumentError:
302
354
  if not feature_view.relationships:
303
355
  return InvalidArgumentError(
@@ -310,34 +362,25 @@ class RelationalSpace(Space):
310
362
  client = client or feature_view.client
311
363
  room_id = room_id or Defaults.get_default_room_id(client)
312
364
  proto_self = models_pb2.Space(
313
- feature_view_id=str(feature_view.id), room_id=str(room_id)
365
+ name=name,
366
+ description=description,
367
+ auto_sync=auto_sync,
368
+ feature_view=feature_view.proto_self,
369
+ room_id=str(room_id),
370
+ space_parameters=space_pb2.SpaceParameters(
371
+ node2vec_parameters=parameters.proto_self
372
+ ),
314
373
  )
315
374
 
316
375
  return Ok(
317
376
  RelationalSpace(
318
377
  client,
319
378
  proto_self,
320
- feature_view,
321
379
  )
322
380
  )
323
381
 
324
- def with_node2vec(self, params: Node2VecParameters):
325
- proto_self = copy.deepcopy(self.proto_self)
326
- proto_self.space_parameters.CopyFrom(
327
- space_pb2.SpaceParameters(node2vec_parameters=params.to_proto())
328
- )
329
- return RelationalSpace(
330
- self.client, proto_self, self.feature_view().unwrap_or_raise()
331
- )
332
-
333
382
  def legacy_embeddings_table(self) -> Ok[Table] | InvalidArgumentError:
334
- if not self.node2vec_parameters:
335
- return InvalidArgumentError("space was not configured")
336
- match self.feature_view():
337
- case Ok(feature_view):
338
- pass
339
- case NotFoundError() as err:
340
- return InvalidArgumentError.from_(err)
383
+ feature_view = self.feature_view
341
384
 
342
385
  def gen_edge_list_tables():
343
386
  for edge_table in feature_view.output_edge_tables():
@@ -361,7 +404,7 @@ class RelationalSpace(Space):
361
404
 
362
405
  return op_graph.op.embed_node2vec_from_edge_lists(
363
406
  edge_list_tables=edge_list_tables,
364
- params=self.node2vec_parameters,
407
+ params=self.parameters.proto_self,
365
408
  ).map(
366
409
  lambda t: Table(
367
410
  self.client,
@@ -377,11 +420,7 @@ class RelationalSpace(Space):
377
420
  return err
378
421
  case Ok(embeddings_table):
379
422
  pass
380
- match self.feature_view():
381
- case Ok(feature_view):
382
- pass
383
- case NotFoundError() as err:
384
- return InvalidArgumentError.from_(err)
423
+ feature_view = self.feature_view
385
424
  id_fields = [
386
425
  field
387
426
  for field in embeddings_table.schema
@@ -392,8 +431,7 @@ class RelationalSpace(Space):
392
431
  dtype_to_id_field = {field.dtype: field.name for field in id_fields[:-1]}
393
432
 
394
433
  tables: Mapping[str, Table] = {}
395
- for source_id in feature_view.output_sources:
396
- source = feature_view.source_id_to_feature_view_source[source_id].source
434
+ for source in feature_view.output_sources:
397
435
  primary_key_field = source.table.schema.get_primary_key()
398
436
  if primary_key_field is None:
399
437
  return InvalidArgumentError(
@@ -416,7 +454,7 @@ class RelationalSpace(Space):
416
454
  )
417
455
  )
418
456
  .and_then(
419
- lambda t, source_id=source_id: t.add_literal_column(
457
+ lambda t, source_id=source.id: t.add_literal_column(
420
458
  "source_id",
421
459
  str(source_id),
422
460
  pa.string(),
@@ -442,126 +480,93 @@ class RelationalSpace(Space):
442
480
  )
443
481
 
444
482
 
445
- @dataclasses.dataclass
446
483
  class ConcatAndEmbedParameters:
447
- column_names: list[str]
448
- model_name: str
449
- tokenizer_name: str
450
- expected_vector_length: int
484
+ proto_self: Final[embedding_models_pb2.ConcatAndEmbedParameters]
451
485
 
486
+ def __init__(self, proto_self: embedding_models_pb2.ConcatAndEmbedParameters):
487
+ self.proto_self = proto_self
452
488
 
453
- class SemanticSpace(Space):
454
- """Spaces for embedding source properties."""
489
+ @classmethod
490
+ def create(
491
+ cls, column_names: Sequence[str], model_name: str, expected_vector_length: int
492
+ ):
493
+ return cls(
494
+ embedding_models_pb2.ConcatAndEmbedParameters(
495
+ column_names=column_names,
496
+ model_parameters=embedding_models_pb2.Parameters(
497
+ model=name_to_proto_embedding_model.get(
498
+ model_name, embedding_models_pb2.MODEL_UNSPECIFIED
499
+ ),
500
+ ndim=expected_vector_length,
501
+ ),
502
+ )
503
+ )
455
504
 
456
- _output_sources: list[Source] | None
505
+ @property
506
+ def model_name(self) -> str:
507
+ return embedding_model_proto_to_name[self.proto_self.model_parameters.model]
457
508
 
458
- def __init__(
459
- self,
460
- client: system.Client,
461
- proto_self: models_pb2.Space,
462
- feature_view: FeatureView | None = None,
463
- output_sources: list[Source] | None = None,
464
- ):
465
- super().__init__(client, proto_self, feature_view)
466
- self._output_sources = output_sources
467
-
468
- def output_sources(self):
469
- if self._output_sources is not None:
470
- return Ok(self._output_sources)
471
- match self.feature_view():
472
- case Ok(feature_view):
473
- return Ok(
474
- [
475
- feature_view.source_id_to_feature_view_source[
476
- output_source
477
- ].source
478
- for output_source in feature_view.output_sources
479
- ]
480
- )
481
- case NotFoundError() as err:
482
- return err
509
+ @property
510
+ def column_names(self) -> Sequence[str]:
511
+ return self.proto_self.column_names
483
512
 
484
513
  @property
485
- def expected_coordinate_bitwidth(self) -> Literal[32]:
486
- return 32
514
+ def expected_vector_length(self) -> int:
515
+ return self.proto_self.model_parameters.ndim
516
+
517
+
518
+ class SemanticSpace(Space):
519
+ """Spaces for embedding source properties."""
487
520
 
488
521
  @property
489
- def tokenizer_name(self) -> Literal[""]:
490
- return ""
522
+ def parameters(self) -> ConcatAndEmbedParameters:
523
+ return ConcatAndEmbedParameters(
524
+ self.proto_self.space_parameters.concat_and_embed_parameters
525
+ )
491
526
 
492
527
  @property
493
- def model_name(self) -> str | None:
494
- if self.concat_and_embed_parameters:
495
- return embedding_model_proto_to_name[
496
- self.concat_and_embed_parameters.model_parameters.model
497
- ]
498
- return None
528
+ def expected_coordinate_bitwidth(self) -> Literal[32]:
529
+ return 32
499
530
 
500
531
  @classmethod
501
532
  def create(
502
533
  cls,
534
+ name: str,
535
+ description: str,
503
536
  feature_view: FeatureView,
537
+ parameters: ConcatAndEmbedParameters,
504
538
  client: system.Client | None = None,
505
539
  room_id: orm.RoomID | None = None,
540
+ *,
541
+ auto_sync: bool = False,
506
542
  ) -> Ok[SemanticSpace] | InvalidArgumentError:
507
543
  client = client or feature_view.client
508
544
  if len(feature_view.output_sources) == 0:
509
545
  return InvalidArgumentError(
510
546
  "feature view must have at least one output source"
511
547
  )
512
- sources = [
513
- feature_view.source_id_to_feature_view_source[output_source].source
514
- for output_source in feature_view.output_sources
515
- ]
516
-
517
548
  room_id = room_id or Defaults.get_default_room_id(client)
518
549
  proto_self = models_pb2.Space(
519
- feature_view_id=str(feature_view.id), room_id=str(room_id)
550
+ name=name,
551
+ description=description,
552
+ auto_sync=auto_sync,
553
+ feature_view=feature_view.proto_self,
554
+ room_id=str(room_id),
555
+ space_parameters=space_pb2.SpaceParameters(
556
+ concat_and_embed_parameters=parameters.proto_self
557
+ ),
520
558
  )
521
559
  return Ok(
522
560
  SemanticSpace(
523
561
  client,
524
562
  proto_self,
525
- feature_view,
526
- sources,
527
563
  )
528
564
  )
529
565
 
530
- def with_concat_and_embed(self, params: ConcatAndEmbedParameters) -> SemanticSpace:
531
- proto_self = copy.deepcopy(self.proto_self)
532
- proto_self.space_parameters.CopyFrom(
533
- space_pb2.SpaceParameters(
534
- concat_and_embed_parameters=embedding_models_pb2.ConcatAndEmbedParameters(
535
- column_names=params.column_names,
536
- model_parameters=embedding_models_pb2.Parameters(
537
- model=name_to_proto_embedding_model.get(
538
- params.model_name, embedding_models_pb2.MODEL_UNSPECIFIED
539
- ),
540
- ndim=params.expected_vector_length,
541
- ),
542
- )
543
- )
544
- )
545
- return SemanticSpace(
546
- self.client,
547
- proto_self,
548
- self.feature_view().unwrap_or_raise(),
549
- output_sources=self.output_sources().unwrap_or_raise(),
550
- )
551
-
552
566
  def embeddings_tables(self) -> Ok[Mapping[str, Table]] | InvalidArgumentError:
553
- model_name = self.model_name
554
- if not self.concat_and_embed_parameters or model_name is None:
555
- return InvalidArgumentError(
556
- "space was not configured to produce embeddings"
557
- )
558
- match self.output_sources():
559
- case Ok(output_sources):
560
- pass
561
- case NotFoundError() as err:
562
- return InvalidArgumentError.from_(err)
563
-
564
- params = self.concat_and_embed_parameters
567
+ params = self.parameters
568
+ model_name = params.model_name
569
+ output_sources = self.feature_view.output_sources
565
570
  combined_column_tmp_name = f"__concat-{uuid.uuid4()}"
566
571
  embedding_column_tmp_name = f"__embed-{uuid.uuid4()}"
567
572
 
@@ -590,8 +595,8 @@ class SemanticSpace(Space):
590
595
  combined_column_tmp_name,
591
596
  embedding_column_tmp_name,
592
597
  model_name,
593
- self.tokenizer_name,
594
- params.model_parameters.ndim,
598
+ "",
599
+ params.expected_vector_length,
595
600
  self.expected_coordinate_bitwidth,
596
601
  )
597
602
  )
@@ -632,99 +637,68 @@ class SemanticSpace(Space):
632
637
  return Ok(tables)
633
638
 
634
639
 
635
- @dataclasses.dataclass
636
640
  class EmbedAndConcatParameters:
637
- expected_vector_length: int
641
+ proto_self: Final[embedding_models_pb2.EmbedAndConcatParameters]
642
+
643
+ def __init__(self, proto_self: embedding_models_pb2.EmbedAndConcatParameters):
644
+ self.proto_self = proto_self
645
+
646
+ @classmethod
647
+ def create(cls, expected_vector_length: int):
648
+ return cls(
649
+ embedding_models_pb2.EmbedAndConcatParameters(ndim=expected_vector_length)
650
+ )
651
+
652
+ @property
653
+ def expected_vector_length(self) -> int:
654
+ return self.proto_self.ndim
638
655
 
639
656
 
640
657
  class TabularSpace(Space):
641
658
  """Spaces for embedding source properties."""
642
659
 
643
- _output_sources: list[Source] | None
644
-
645
- def __init__(
646
- self,
647
- client: system.Client,
648
- proto_self: models_pb2.Space,
649
- feature_view: FeatureView | None = None,
650
- output_sources: list[Source] | None = None,
651
- ):
652
- super().__init__(client, proto_self, feature_view)
653
- self._output_sources = output_sources
654
-
655
- def output_sources(self):
656
- if self._output_sources is not None:
657
- return Ok(self._output_sources)
658
- match self.feature_view():
659
- case Ok(feature_view):
660
- return Ok(
661
- [
662
- feature_view.source_id_to_feature_view_source[
663
- output_source
664
- ].source
665
- for output_source in feature_view.output_sources
666
- ]
667
- )
668
- case NotFoundError() as err:
669
- return err
660
+ @property
661
+ def parameters(self) -> EmbedAndConcatParameters:
662
+ return EmbedAndConcatParameters(
663
+ self.proto_self.space_parameters.embed_and_concat_parameters
664
+ )
670
665
 
671
666
  @classmethod
672
667
  def create(
673
668
  cls,
669
+ name: str,
670
+ description: str,
674
671
  feature_view: FeatureView,
672
+ parameters: EmbedAndConcatParameters,
675
673
  client: system.Client | None = None,
676
674
  room_id: orm.RoomID | None = None,
675
+ *,
676
+ auto_sync: bool = False,
677
677
  ) -> Ok[Self] | InvalidArgumentError:
678
678
  client = client or feature_view.client
679
679
  if len(feature_view.output_sources) == 0:
680
680
  return InvalidArgumentError(
681
681
  "feature view must have at least one output source"
682
682
  )
683
- sources = [
684
- feature_view.source_id_to_feature_view_source[output_source].source
685
- for output_source in feature_view.output_sources
686
- ]
687
683
 
688
684
  room_id = room_id or Defaults.get_default_room_id(client)
689
685
  proto_self = models_pb2.Space(
690
- feature_view_id=str(feature_view.id), room_id=str(room_id)
691
- )
692
- return Ok(
693
- cls(
694
- client,
695
- proto_self,
696
- feature_view,
697
- sources,
698
- )
699
- )
700
-
701
- def with_embed_and_concat(self, params: EmbedAndConcatParameters) -> TabularSpace:
702
- proto_self = copy.deepcopy(self.proto_self)
703
- proto_self.space_parameters.CopyFrom(
704
- space_pb2.SpaceParameters(
705
- embed_and_concat_parameters=embedding_models_pb2.EmbedAndConcatParameters(
706
- ndim=params.expected_vector_length,
707
- )
708
- )
709
- )
710
- return TabularSpace(
711
- self.client,
712
- proto_self,
713
- self.feature_view().unwrap_or_raise(),
714
- output_sources=self.output_sources().unwrap_or_raise(),
686
+ name=name,
687
+ description=description,
688
+ auto_sync=auto_sync,
689
+ feature_view=feature_view.proto_self,
690
+ room_id=str(room_id),
691
+ space_parameters=space_pb2.SpaceParameters(
692
+ embed_and_concat_parameters=parameters.proto_self
693
+ ),
715
694
  )
695
+ return Ok(cls(client, proto_self))
716
696
 
717
- def embeddings_tables(self) -> Ok[Mapping[str, Table]] | InvalidArgumentError: # noqa: C901
718
- if not self.embed_and_concat_parameters:
719
- return InvalidArgumentError(
720
- "space was not configured to produce embeddings"
721
- )
722
- match self.output_sources():
723
- case Ok(output_sources):
724
- pass
725
- case NotFoundError() as err:
726
- return InvalidArgumentError.from_(err)
727
- parameters = self.embed_and_concat_parameters
697
+ def embeddings_tables( # noqa: C901
698
+ self,
699
+ ) -> Ok[Mapping[str, Table]] | InvalidArgumentError:
700
+ output_sources = self.feature_view.output_sources
701
+ parameters = self.parameters
728
702
 
729
703
  tables: Mapping[str, Table] = {}
730
704
  first_schema = output_sources[0].table.schema
@@ -783,7 +757,9 @@ class TabularSpace(Space):
783
757
  embedding_column_tmp_name = f"__embed-{uuid.uuid4()}"
784
758
 
785
759
  # Avoid 0 padding for spaces with small numbers of columns
786
- target_list_length = min(parameters.ndim, len(embedding_column_tmp_names))
760
+ target_list_length = min(
761
+ parameters.expected_vector_length, len(embedding_column_tmp_names)
762
+ )
787
763
 
788
764
  def reduce_dimension(
789
765
  op: op_graph.Op,
@@ -856,40 +832,58 @@ class TabularSpace(Space):
856
832
  return Ok(tables)
857
833
 
858
834
 
859
- @dataclasses.dataclass
860
835
  class EmbedImageParameters:
861
- column_name: str
862
- model_name: str
863
- expected_vector_length: int
836
+ proto_self: Final[embedding_models_pb2.EmbedImageParameters]
837
+
838
+ def __init__(self, proto_self: embedding_models_pb2.EmbedImageParameters):
839
+ self.proto_self = proto_self
840
+
841
+ @classmethod
842
+ def create(
843
+ cls, column_name: str, model_name: str, expected_vector_length: int
844
+ ) -> Self:
845
+ return cls(
846
+ embedding_models_pb2.EmbedImageParameters(
847
+ column_name=column_name,
848
+ model_parameters=embedding_models_pb2.ImageModelParameters(
849
+ model=name_to_proto_image_model.get(
850
+ model_name,
851
+ embedding_models_pb2.IMAGE_MODEL_UNSPECIFIED,
852
+ ),
853
+ ndim=expected_vector_length,
854
+ ),
855
+ )
856
+ )
857
+
858
+ @property
859
+ def column_name(self) -> str:
860
+ return self.proto_self.column_name
861
+
862
+ @property
863
+ def model_name(self) -> str:
864
+ return image_model_proto_to_name[self.proto_self.model_parameters.model]
865
+
866
+ @property
867
+ def model(self) -> embedding_models_pb2.ImageModel:
868
+ return self.proto_self.model_parameters.model
869
+
870
+ @property
871
+ def expected_vector_length(self) -> int:
872
+ return self.proto_self.model_parameters.ndim
864
873
 
865
874
 
866
875
  class ImageSpace(Space):
867
876
  """Spaces for embedding images."""
868
877
 
869
- _output_source: Source | None = None
870
-
871
- def __init__(
872
- self,
873
- client: system.Client,
874
- proto_self: models_pb2.Space,
875
- feature_view: FeatureView | None = None,
876
- output_source: Source | None = None,
877
- ):
878
- super().__init__(client, proto_self, feature_view)
879
- self._output_source = output_source
878
+ @property
879
+ def parameters(self) -> EmbedImageParameters:
880
+ return EmbedImageParameters(
881
+ self.proto_self.space_parameters.embed_image_parameters
882
+ )
880
883
 
884
+ @property
881
885
  def output_source(self):
882
- if self._output_source is not None:
883
- return Ok(self._output_source)
884
- match self.feature_view():
885
- case Ok(feature_view):
886
- return Ok(
887
- feature_view.source_id_to_feature_view_source[
888
- first(iter(feature_view.output_sources))
889
- ].source
890
- )
891
- case NotFoundError() as err:
892
- return err
886
+ return self.feature_view.output_sources[0]
893
887
 
894
888
  def _sub_orm_objects(self, orm_object: orm.Space) -> Iterable[orm.Base]:
895
889
  return []
@@ -898,78 +892,45 @@ class ImageSpace(Space):
898
892
  def expected_coordinate_bitwidth(self) -> Literal[32]:
899
893
  return 32
900
894
 
901
- @property
902
- def model_name(self) -> str | None:
903
- if self.embed_image_parameters:
904
- return image_model_proto_to_name[
905
- self.embed_image_parameters.model_parameters.model
906
- ]
907
- return None
908
-
909
895
  @classmethod
910
896
  def create(
911
897
  cls,
898
+ name: str,
899
+ description: str,
912
900
  feature_view: FeatureView,
901
+ parameters: EmbedImageParameters,
913
902
  client: system.Client | None = None,
914
903
  room_id: orm.RoomID | None = None,
904
+ *,
905
+ auto_sync: bool = False,
915
906
  ) -> Ok[Self] | InvalidArgumentError:
916
907
  client = client or feature_view.client
917
908
  if len(feature_view.output_sources) != 1:
918
909
  return InvalidArgumentError(
919
910
  "feature view must have exactly one output source"
920
911
  )
921
- source = feature_view.source_id_to_feature_view_source[
922
- first(iter(feature_view.output_sources))
923
- ].source
924
-
925
912
  room_id = room_id or Defaults.get_default_room_id(client)
926
913
  proto_self = models_pb2.Space(
927
- feature_view_id=str(feature_view.id), room_id=str(room_id)
914
+ name=name,
915
+ description=description,
916
+ auto_sync=auto_sync,
917
+ feature_view=feature_view.proto_self,
918
+ room_id=str(room_id),
919
+ space_parameters=space_pb2.SpaceParameters(
920
+ embed_image_parameters=parameters.proto_self
921
+ ),
928
922
  )
929
923
  return Ok(
930
924
  cls(
931
925
  client,
932
926
  proto_self,
933
- feature_view,
934
- source,
935
927
  )
936
928
  )
937
929
 
938
- def with_embed_and_image(self, params: EmbedImageParameters) -> ImageSpace:
939
- proto_self = copy.deepcopy(self.proto_self)
940
- proto_self.space_parameters.CopyFrom(
941
- space_pb2.SpaceParameters(
942
- embed_image_parameters=embedding_models_pb2.EmbedImageParameters(
943
- column_name=params.column_name,
944
- model_parameters=embedding_models_pb2.ImageModelParameters(
945
- model=name_to_proto_image_model.get(
946
- params.model_name,
947
- embedding_models_pb2.IMAGE_MODEL_UNSPECIFIED,
948
- ),
949
- ndim=params.expected_vector_length,
950
- ),
951
- )
952
- )
953
- )
954
- return ImageSpace(
955
- self.client,
956
- proto_self,
957
- self.feature_view().unwrap_or_raise(),
958
- output_source=self.output_source().unwrap_or_raise(),
959
- )
960
-
961
930
  def embeddings_tables(self) -> Ok[Mapping[str, Table]] | InvalidArgumentError:
962
- model_name = self.model_name
963
- if not self.embed_image_parameters or model_name is None:
964
- return InvalidArgumentError(
965
- "space was not configured to produce embeddings"
966
- )
967
- match self.output_source():
968
- case Ok(output_source):
969
- pass
970
- case NotFoundError() as err:
971
- return InvalidArgumentError.from_(err)
972
- params = self.embed_image_parameters
931
+ params = self.parameters
932
+ model_name = params.model_name
933
+ output_source = self.output_source
973
934
  pk_field = output_source.table.schema.get_primary_key()
974
935
  if not pk_field:
975
936
  return InvalidArgumentError("output source must have a primary key")
@@ -981,7 +942,7 @@ class ImageSpace(Space):
981
942
  column_name=params.column_name,
982
943
  embedding_column_name=embedding_column_tmp_name,
983
944
  model_name=model_name,
984
- expected_vector_length=params.model_parameters.ndim,
945
+ expected_vector_length=params.expected_vector_length,
985
946
  expected_coordinate_bitwidth=self.expected_coordinate_bitwidth,
986
947
  )
987
948
  .and_then(
@@ -1006,3 +967,10 @@ class ImageSpace(Space):
1006
967
  SpecificSpace: TypeAlias = (
1007
968
  RelationalSpace | SemanticSpace | TabularSpace | ImageSpace | UnknownSpace
1008
969
  )
970
+
971
+ SpecificSpaceParameters: TypeAlias = (
972
+ Node2VecParameters
973
+ | ConcatAndEmbedParameters
974
+ | EmbedAndConcatParameters
975
+ | EmbedImageParameters
976
+ )