corvic-engine 0.3.0rc61__cp38-abi3-win_amd64.whl → 0.3.0rc63__cp38-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
corvic/embed/node2vec.py CHANGED
@@ -39,6 +39,8 @@ class KeyedVectors:
39
39
  index_to_key: mapping of index to key struct
40
40
  key_field_order: order of key struct fields used for index operations
41
41
  """
42
+ if dim <= 0:
43
+ raise InvalidArgumentError("number of dimensions must be positive")
42
44
  self.dim = dim
43
45
  self._index_to_key = index_to_key
44
46
  self._key_field_order = key_field_order
corvic/engine/_native.pyd CHANGED
Binary file
@@ -73,7 +73,7 @@ def _replace_concat_op_source(
73
73
  return op.concat(new_tables, concat_op.how)
74
74
 
75
75
 
76
- def replace_op_source( # noqa: C901
76
+ def replace_op_source( # noqa: C901, PLR0915
77
77
  root_op: op.Op, source_to_replace: op.Op, new_source: op.Op
78
78
  ) -> Ok[op.Op] | InvalidArgumentError:
79
79
  for source in root_op.sources():
@@ -98,6 +98,8 @@ def replace_op_source( # noqa: C901
98
98
  return new_source.select_columns(root_op.columns)
99
99
  case op.LimitRows():
100
100
  return new_source.limit_rows(root_op.num_rows)
101
+ case op.OffsetRows():
102
+ return new_source.offset_rows(root_op.num_rows)
101
103
  case op.OrderBy():
102
104
  return new_source.order_by(root_op.columns, desc=root_op.desc)
103
105
  case op.FilterRows():
corvic/op_graph/ops.py CHANGED
@@ -71,6 +71,7 @@ ProtoOp = (
71
71
  | table_pb2.JoinOp
72
72
  | table_pb2.SelectColumnsOp
73
73
  | table_pb2.LimitRowsOp
74
+ | table_pb2.OffsetRowsOp
74
75
  | table_pb2.OrderByOp
75
76
  | table_pb2.FilterRowsOp
76
77
  | table_pb2.DistinctRowsOp
@@ -162,6 +163,15 @@ def from_proto(
162
163
  ) -> LimitRows: ...
163
164
 
164
165
 
166
+ @overload
167
+ def from_proto(
168
+ proto: table_pb2.OffsetRowsOp,
169
+ parent_ops: list[Op] | None = None,
170
+ *,
171
+ skip_validate: bool = False,
172
+ ) -> OffsetRows: ...
173
+
174
+
165
175
  @overload
166
176
  def from_proto(
167
177
  proto: table_pb2.OrderByOp,
@@ -490,6 +500,8 @@ def from_proto( # noqa: C901, PLR0915
490
500
  proto = table_pb2.TableComputeOp(select_columns=proto)
491
501
  case table_pb2.LimitRowsOp():
492
502
  proto = table_pb2.TableComputeOp(limit_rows=proto)
503
+ case table_pb2.OffsetRowsOp():
504
+ proto = table_pb2.TableComputeOp(offset_rows=proto)
493
505
  case table_pb2.OrderByOp():
494
506
  proto = table_pb2.TableComputeOp(order_by=proto)
495
507
  case table_pb2.FilterRowsOp():
@@ -992,6 +1004,12 @@ class _Base(OneofProtoWrapper[table_pb2.TableComputeOp], ABC):
992
1004
  proto = table_pb2.LimitRowsOp(source=self._proto, num_rows=num_rows)
993
1005
  return Ok(from_proto(proto, skip_validate=True))
994
1006
 
1007
+ def offset_rows(self, num_rows: int) -> InvalidArgumentError | Ok[OffsetRows]:
1008
+ if num_rows <= 0:
1009
+ return InvalidArgumentError("num_rows must be positive")
1010
+ proto = table_pb2.OffsetRowsOp(source=self._proto, num_rows=num_rows)
1011
+ return Ok(from_proto(proto, skip_validate=True))
1012
+
995
1013
  def order_by(
996
1014
  self, columns: Sequence[str], *, desc: bool
997
1015
  ) -> InvalidArgumentError | Ok[OrderBy]:
@@ -1903,6 +1921,23 @@ class LimitRows(_Base):
1903
1921
  return [self.source]
1904
1922
 
1905
1923
 
1924
+ class OffsetRows(_Base):
1925
+ """Limit the number of rows in a table."""
1926
+
1927
+ @property
1928
+ def num_rows(self) -> int:
1929
+ return self._proto.offset_rows.num_rows
1930
+
1931
+ @property
1932
+ def source(self) -> Op:
1933
+ if self._parents:
1934
+ return self._parents[0]
1935
+ return from_proto(self._proto.offset_rows.source, skip_validate=True)
1936
+
1937
+ def sources(self):
1938
+ return [self.source]
1939
+
1940
+
1906
1941
  class OrderBy(_Base):
1907
1942
  """Order the rows in a table."""
1908
1943
 
@@ -2756,6 +2791,7 @@ Op = (
2756
2791
  | Join
2757
2792
  | SelectColumns
2758
2793
  | LimitRows
2794
+ | OffsetRows
2759
2795
  | OrderBy
2760
2796
  | FilterRows
2761
2797
  | DistinctRows
@@ -2798,6 +2834,7 @@ _COMPUTE_OP_FIELD_NAME_TO_OP: Final = {
2798
2834
  "join": Join,
2799
2835
  "select_columns": SelectColumns,
2800
2836
  "limit_rows": LimitRows,
2837
+ "offset_rows": OffsetRows,
2801
2838
  "order_by": OrderBy,
2802
2839
  "filter_rows": FilterRows,
2803
2840
  "distinct_rows": DistinctRows,
@@ -3323,6 +3360,7 @@ class Schema(Sequence[Field]):
3323
3360
 
3324
3361
  case (
3325
3362
  LimitRows()
3363
+ | OffsetRows()
3326
3364
  | OrderBy()
3327
3365
  | FilterRows()
3328
3366
  | DistinctRows()
corvic/sql/parse_ops.py CHANGED
@@ -24,6 +24,7 @@ _SqlComputableOp = (
24
24
  | op_graph.op.Join
25
25
  | op_graph.op.SelectColumns
26
26
  | op_graph.op.LimitRows
27
+ | op_graph.op.OffsetRows
27
28
  | op_graph.op.OrderBy
28
29
  | op_graph.op.FilterRows
29
30
  | op_graph.op.DistinctRows
@@ -74,6 +75,7 @@ def can_be_sql_computed(
74
75
  | op_graph.op.Join()
75
76
  | op_graph.op.SelectColumns()
76
77
  | op_graph.op.LimitRows()
78
+ | op_graph.op.OffsetRows()
77
79
  | op_graph.op.OrderBy()
78
80
  | op_graph.op.FilterRows()
79
81
  | op_graph.op.DistinctRows()
@@ -362,6 +364,12 @@ class _OpLogParser:
362
364
  ) -> Ok[sqlglot.exp.Query] | InvalidArgumentError | NoRowsError:
363
365
  return self.parse(op.source).map(lambda query: query.limit(op.num_rows))
364
366
 
367
+ def _offset_rows_to_sql(
368
+ self,
369
+ op: op_graph.op.OffsetRows,
370
+ ) -> Ok[sqlglot.exp.Query] | InvalidArgumentError | NoRowsError:
371
+ return self.parse(op.source).map(lambda query: query.offset(op.num_rows))
372
+
365
373
  def _order_by_to_sql(
366
374
  self,
367
375
  op: op_graph.op.OrderBy,
@@ -715,6 +723,8 @@ class _OpLogParser:
715
723
  return self._select_columns_to_sql(op)
716
724
  case op_graph.op.LimitRows():
717
725
  return self._limit_rows_to_sql(op)
726
+ case op_graph.op.OffsetRows():
727
+ return self._offset_rows_to_sql(op)
718
728
  case op_graph.op.OrderBy():
719
729
  return self._order_by_to_sql(op)
720
730
  case op_graph.op.FilterRows():
@@ -1,6 +1,6 @@
1
1
  import dataclasses
2
2
  from collections.abc import Sequence
3
- from typing import Any, Literal
3
+ from typing import TYPE_CHECKING, Any, Literal
4
4
 
5
5
  import numpy as np
6
6
  import polars as pl
@@ -9,6 +9,12 @@ from typing_extensions import Protocol
9
9
  from corvic import orm
10
10
  from corvic.result import InternalError, InvalidArgumentError, Ok
11
11
 
12
+ if TYPE_CHECKING:
13
+ from transformers import (
14
+ CLIPModel,
15
+ CLIPProcessor,
16
+ )
17
+
12
18
 
13
19
  @dataclasses.dataclass
14
20
  class EmbedTextContext:
@@ -64,6 +70,12 @@ class ImageEmbedder(Protocol):
64
70
  ) -> Ok[EmbedImageResult] | InvalidArgumentError | InternalError: ...
65
71
 
66
72
 
73
+ @dataclasses.dataclass
74
+ class ClipModels:
75
+ model: "CLIPModel"
76
+ processor: "CLIPProcessor"
77
+
78
+
67
79
  class ClipText(TextEmbedder):
68
80
  """Clip Text embedder.
69
81
 
@@ -76,28 +88,39 @@ class ClipText(TextEmbedder):
76
88
  overcoming several major challenges in computer vision.
77
89
  """
78
90
 
79
- def embed(
80
- self, context: EmbedTextContext
81
- ) -> Ok[EmbedTextResult] | InvalidArgumentError | InternalError:
82
- import torch
91
+ def _load_models(self):
83
92
  from transformers import (
84
93
  CLIPModel,
85
94
  CLIPProcessor,
86
95
  )
87
96
 
88
97
  model: CLIPModel = CLIPModel.from_pretrained( # pyright: ignore[reportUnknownMemberType]
89
- "openai/clip-vit-base-patch32"
98
+ pretrained_model_name_or_path="openai/clip-vit-base-patch32",
99
+ revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
90
100
  )
91
101
  processor: CLIPProcessor = CLIPProcessor.from_pretrained( # pyright: ignore[reportUnknownMemberType, reportAssignmentType]
92
- "openai/clip-vit-base-patch32"
102
+ pretrained_model_name_or_path="openai/clip-vit-base-patch32",
103
+ revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
104
+ use_fast=False,
93
105
  )
94
- model.eval()
106
+ return ClipModels(model=model, processor=processor)
107
+
108
+ def embed(
109
+ self, context: EmbedTextContext
110
+ ) -> Ok[EmbedTextResult] | InvalidArgumentError | InternalError:
95
111
  match context.expected_coordinate_bitwidth:
96
112
  case 64:
97
113
  coord_dtype = pl.Float64()
98
114
  case 32:
99
115
  coord_dtype = pl.Float32()
100
116
 
117
+ models = self._load_models()
118
+ model = models.model
119
+ processor = models.processor
120
+ model.eval()
121
+
122
+ import torch
123
+
101
124
  with torch.no_grad():
102
125
  inputs: dict[str, torch.Tensor] = processor( # pyright: ignore[reportAssignmentType]
103
126
  text=context.inputs,
@@ -1,3 +1,4 @@
1
+ import dataclasses
1
2
  from io import BytesIO
2
3
  from typing import TYPE_CHECKING, Any
3
4
 
@@ -13,6 +14,10 @@ from corvic.system._embedder import (
13
14
 
14
15
  if TYPE_CHECKING:
15
16
  from PIL import Image
17
+ from transformers import (
18
+ CLIPModel,
19
+ CLIPProcessor,
20
+ )
16
21
 
17
22
 
18
23
  class RandomImageEmbedder(ImageEmbedder):
@@ -58,6 +63,12 @@ def image_from_bytes(
58
63
  return InvalidArgumentError("invalid image format")
59
64
 
60
65
 
66
+ @dataclasses.dataclass
67
+ class ClipModels:
68
+ model: "CLIPModel"
69
+ processor: "CLIPProcessor"
70
+
71
+
61
72
  class Clip(ImageEmbedder):
62
73
  """Clip image embedder.
63
74
 
@@ -70,6 +81,23 @@ class Clip(ImageEmbedder):
70
81
  overcoming several major challenges in computer vision.
71
82
  """
72
83
 
84
+ def _load_models(self):
85
+ from transformers import (
86
+ CLIPModel,
87
+ CLIPProcessor,
88
+ )
89
+
90
+ model: CLIPModel = CLIPModel.from_pretrained( # pyright: ignore[reportUnknownMemberType]
91
+ pretrained_model_name_or_path="openai/clip-vit-base-patch32",
92
+ revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
93
+ )
94
+ processor: CLIPProcessor = CLIPProcessor.from_pretrained( # pyright: ignore[reportUnknownMemberType, reportAssignmentType]
95
+ pretrained_model_name_or_path="openai/clip-vit-base-patch32",
96
+ revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
97
+ use_fast=False,
98
+ )
99
+ return ClipModels(model=model, processor=processor)
100
+
73
101
  def embed(
74
102
  self, context: EmbedImageContext
75
103
  ) -> Ok[EmbedImageResult] | InvalidArgumentError | InternalError:
@@ -99,20 +127,13 @@ class Clip(ImageEmbedder):
99
127
  )
100
128
  )
101
129
 
102
- import torch
103
- from transformers import (
104
- CLIPModel,
105
- CLIPProcessor,
106
- )
107
-
108
- model: CLIPModel = CLIPModel.from_pretrained( # pyright: ignore[reportUnknownMemberType]
109
- "openai/clip-vit-base-patch32"
110
- )
111
- processor: CLIPProcessor = CLIPProcessor.from_pretrained( # pyright: ignore[reportUnknownMemberType, reportAssignmentType]
112
- "openai/clip-vit-base-patch32"
113
- )
130
+ models = self._load_models()
131
+ model = models.model
132
+ processor = models.processor
114
133
  model.eval()
115
134
 
135
+ import torch
136
+
116
137
  with torch.no_grad():
117
138
  inputs: dict[str, torch.FloatTensor] = processor( # pyright: ignore[reportAssignmentType]
118
139
  images=images, return_tensors="pt"
corvic/system/_planner.py CHANGED
@@ -166,6 +166,9 @@ class OpGraphPlanner:
166
166
  case op_graph.op.LimitRows() | op_graph.op.SampleRows():
167
167
  source_rows = cls.count_rows_upperbound(op.source)
168
168
  num_rows = min(op.num_rows, source_rows)
169
+ case op_graph.op.OffsetRows():
170
+ source_rows = cls.count_rows_upperbound(op.source)
171
+ num_rows = max(source_rows - op.num_rows, 0)
169
172
  case op_graph.op.Empty():
170
173
  num_rows = 0
171
174
  case op_graph.op.AggregateColumns():