corvic-engine 0.3.0rc61__cp38-abi3-win_amd64.whl → 0.3.0rc63__cp38-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- corvic/embed/node2vec.py +2 -0
- corvic/engine/_native.pyd +0 -0
- corvic/op_graph/_transformations.py +3 -1
- corvic/op_graph/ops.py +38 -0
- corvic/sql/parse_ops.py +10 -0
- corvic/system/_embedder.py +31 -8
- corvic/system/_image_embedder.py +33 -12
- corvic/system/_planner.py +3 -0
- corvic/system/in_memory_executor.py +299 -326
- corvic/system_sqlite/staging.py +17 -9
- corvic/table/table.py +11 -1
- {corvic_engine-0.3.0rc61.dist-info → corvic_engine-0.3.0rc63.dist-info}/METADATA +1 -1
- {corvic_engine-0.3.0rc61.dist-info → corvic_engine-0.3.0rc63.dist-info}/RECORD +23 -23
- corvic_generated/ingest/v2/pipeline_pb2.py +24 -22
- corvic_generated/ingest/v2/pipeline_pb2.pyi +4 -2
- corvic_generated/ingest/v2/room_pb2.py +31 -31
- corvic_generated/ingest/v2/room_pb2.pyi +4 -2
- corvic_generated/orm/v1/agent_pb2.py +2 -2
- corvic_generated/orm/v1/agent_pb2.pyi +6 -0
- corvic_generated/orm/v1/table_pb2.py +198 -196
- corvic_generated/orm/v1/table_pb2.pyi +12 -2
- {corvic_engine-0.3.0rc61.dist-info → corvic_engine-0.3.0rc63.dist-info}/WHEEL +0 -0
- {corvic_engine-0.3.0rc61.dist-info → corvic_engine-0.3.0rc63.dist-info}/licenses/LICENSE +0 -0
corvic/embed/node2vec.py
CHANGED
@@ -39,6 +39,8 @@ class KeyedVectors:
|
|
39
39
|
index_to_key: mapping of index to key struct
|
40
40
|
key_field_order: order of key struct fields used for index operations
|
41
41
|
"""
|
42
|
+
if dim <= 0:
|
43
|
+
raise InvalidArgumentError("number of dimensions must be positive")
|
42
44
|
self.dim = dim
|
43
45
|
self._index_to_key = index_to_key
|
44
46
|
self._key_field_order = key_field_order
|
corvic/engine/_native.pyd
CHANGED
Binary file
|
@@ -73,7 +73,7 @@ def _replace_concat_op_source(
|
|
73
73
|
return op.concat(new_tables, concat_op.how)
|
74
74
|
|
75
75
|
|
76
|
-
def replace_op_source( # noqa: C901
|
76
|
+
def replace_op_source( # noqa: C901, PLR0915
|
77
77
|
root_op: op.Op, source_to_replace: op.Op, new_source: op.Op
|
78
78
|
) -> Ok[op.Op] | InvalidArgumentError:
|
79
79
|
for source in root_op.sources():
|
@@ -98,6 +98,8 @@ def replace_op_source( # noqa: C901
|
|
98
98
|
return new_source.select_columns(root_op.columns)
|
99
99
|
case op.LimitRows():
|
100
100
|
return new_source.limit_rows(root_op.num_rows)
|
101
|
+
case op.OffsetRows():
|
102
|
+
return new_source.offset_rows(root_op.num_rows)
|
101
103
|
case op.OrderBy():
|
102
104
|
return new_source.order_by(root_op.columns, desc=root_op.desc)
|
103
105
|
case op.FilterRows():
|
corvic/op_graph/ops.py
CHANGED
@@ -71,6 +71,7 @@ ProtoOp = (
|
|
71
71
|
| table_pb2.JoinOp
|
72
72
|
| table_pb2.SelectColumnsOp
|
73
73
|
| table_pb2.LimitRowsOp
|
74
|
+
| table_pb2.OffsetRowsOp
|
74
75
|
| table_pb2.OrderByOp
|
75
76
|
| table_pb2.FilterRowsOp
|
76
77
|
| table_pb2.DistinctRowsOp
|
@@ -162,6 +163,15 @@ def from_proto(
|
|
162
163
|
) -> LimitRows: ...
|
163
164
|
|
164
165
|
|
166
|
+
@overload
|
167
|
+
def from_proto(
|
168
|
+
proto: table_pb2.OffsetRowsOp,
|
169
|
+
parent_ops: list[Op] | None = None,
|
170
|
+
*,
|
171
|
+
skip_validate: bool = False,
|
172
|
+
) -> OffsetRows: ...
|
173
|
+
|
174
|
+
|
165
175
|
@overload
|
166
176
|
def from_proto(
|
167
177
|
proto: table_pb2.OrderByOp,
|
@@ -490,6 +500,8 @@ def from_proto( # noqa: C901, PLR0915
|
|
490
500
|
proto = table_pb2.TableComputeOp(select_columns=proto)
|
491
501
|
case table_pb2.LimitRowsOp():
|
492
502
|
proto = table_pb2.TableComputeOp(limit_rows=proto)
|
503
|
+
case table_pb2.OffsetRowsOp():
|
504
|
+
proto = table_pb2.TableComputeOp(offset_rows=proto)
|
493
505
|
case table_pb2.OrderByOp():
|
494
506
|
proto = table_pb2.TableComputeOp(order_by=proto)
|
495
507
|
case table_pb2.FilterRowsOp():
|
@@ -992,6 +1004,12 @@ class _Base(OneofProtoWrapper[table_pb2.TableComputeOp], ABC):
|
|
992
1004
|
proto = table_pb2.LimitRowsOp(source=self._proto, num_rows=num_rows)
|
993
1005
|
return Ok(from_proto(proto, skip_validate=True))
|
994
1006
|
|
1007
|
+
def offset_rows(self, num_rows: int) -> InvalidArgumentError | Ok[OffsetRows]:
|
1008
|
+
if num_rows <= 0:
|
1009
|
+
return InvalidArgumentError("num_rows must be positive")
|
1010
|
+
proto = table_pb2.OffsetRowsOp(source=self._proto, num_rows=num_rows)
|
1011
|
+
return Ok(from_proto(proto, skip_validate=True))
|
1012
|
+
|
995
1013
|
def order_by(
|
996
1014
|
self, columns: Sequence[str], *, desc: bool
|
997
1015
|
) -> InvalidArgumentError | Ok[OrderBy]:
|
@@ -1903,6 +1921,23 @@ class LimitRows(_Base):
|
|
1903
1921
|
return [self.source]
|
1904
1922
|
|
1905
1923
|
|
1924
|
+
class OffsetRows(_Base):
|
1925
|
+
"""Limit the number of rows in a table."""
|
1926
|
+
|
1927
|
+
@property
|
1928
|
+
def num_rows(self) -> int:
|
1929
|
+
return self._proto.offset_rows.num_rows
|
1930
|
+
|
1931
|
+
@property
|
1932
|
+
def source(self) -> Op:
|
1933
|
+
if self._parents:
|
1934
|
+
return self._parents[0]
|
1935
|
+
return from_proto(self._proto.offset_rows.source, skip_validate=True)
|
1936
|
+
|
1937
|
+
def sources(self):
|
1938
|
+
return [self.source]
|
1939
|
+
|
1940
|
+
|
1906
1941
|
class OrderBy(_Base):
|
1907
1942
|
"""Order the rows in a table."""
|
1908
1943
|
|
@@ -2756,6 +2791,7 @@ Op = (
|
|
2756
2791
|
| Join
|
2757
2792
|
| SelectColumns
|
2758
2793
|
| LimitRows
|
2794
|
+
| OffsetRows
|
2759
2795
|
| OrderBy
|
2760
2796
|
| FilterRows
|
2761
2797
|
| DistinctRows
|
@@ -2798,6 +2834,7 @@ _COMPUTE_OP_FIELD_NAME_TO_OP: Final = {
|
|
2798
2834
|
"join": Join,
|
2799
2835
|
"select_columns": SelectColumns,
|
2800
2836
|
"limit_rows": LimitRows,
|
2837
|
+
"offset_rows": OffsetRows,
|
2801
2838
|
"order_by": OrderBy,
|
2802
2839
|
"filter_rows": FilterRows,
|
2803
2840
|
"distinct_rows": DistinctRows,
|
@@ -3323,6 +3360,7 @@ class Schema(Sequence[Field]):
|
|
3323
3360
|
|
3324
3361
|
case (
|
3325
3362
|
LimitRows()
|
3363
|
+
| OffsetRows()
|
3326
3364
|
| OrderBy()
|
3327
3365
|
| FilterRows()
|
3328
3366
|
| DistinctRows()
|
corvic/sql/parse_ops.py
CHANGED
@@ -24,6 +24,7 @@ _SqlComputableOp = (
|
|
24
24
|
| op_graph.op.Join
|
25
25
|
| op_graph.op.SelectColumns
|
26
26
|
| op_graph.op.LimitRows
|
27
|
+
| op_graph.op.OffsetRows
|
27
28
|
| op_graph.op.OrderBy
|
28
29
|
| op_graph.op.FilterRows
|
29
30
|
| op_graph.op.DistinctRows
|
@@ -74,6 +75,7 @@ def can_be_sql_computed(
|
|
74
75
|
| op_graph.op.Join()
|
75
76
|
| op_graph.op.SelectColumns()
|
76
77
|
| op_graph.op.LimitRows()
|
78
|
+
| op_graph.op.OffsetRows()
|
77
79
|
| op_graph.op.OrderBy()
|
78
80
|
| op_graph.op.FilterRows()
|
79
81
|
| op_graph.op.DistinctRows()
|
@@ -362,6 +364,12 @@ class _OpLogParser:
|
|
362
364
|
) -> Ok[sqlglot.exp.Query] | InvalidArgumentError | NoRowsError:
|
363
365
|
return self.parse(op.source).map(lambda query: query.limit(op.num_rows))
|
364
366
|
|
367
|
+
def _offset_rows_to_sql(
|
368
|
+
self,
|
369
|
+
op: op_graph.op.OffsetRows,
|
370
|
+
) -> Ok[sqlglot.exp.Query] | InvalidArgumentError | NoRowsError:
|
371
|
+
return self.parse(op.source).map(lambda query: query.offset(op.num_rows))
|
372
|
+
|
365
373
|
def _order_by_to_sql(
|
366
374
|
self,
|
367
375
|
op: op_graph.op.OrderBy,
|
@@ -715,6 +723,8 @@ class _OpLogParser:
|
|
715
723
|
return self._select_columns_to_sql(op)
|
716
724
|
case op_graph.op.LimitRows():
|
717
725
|
return self._limit_rows_to_sql(op)
|
726
|
+
case op_graph.op.OffsetRows():
|
727
|
+
return self._offset_rows_to_sql(op)
|
718
728
|
case op_graph.op.OrderBy():
|
719
729
|
return self._order_by_to_sql(op)
|
720
730
|
case op_graph.op.FilterRows():
|
corvic/system/_embedder.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import dataclasses
|
2
2
|
from collections.abc import Sequence
|
3
|
-
from typing import Any, Literal
|
3
|
+
from typing import TYPE_CHECKING, Any, Literal
|
4
4
|
|
5
5
|
import numpy as np
|
6
6
|
import polars as pl
|
@@ -9,6 +9,12 @@ from typing_extensions import Protocol
|
|
9
9
|
from corvic import orm
|
10
10
|
from corvic.result import InternalError, InvalidArgumentError, Ok
|
11
11
|
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
from transformers import (
|
14
|
+
CLIPModel,
|
15
|
+
CLIPProcessor,
|
16
|
+
)
|
17
|
+
|
12
18
|
|
13
19
|
@dataclasses.dataclass
|
14
20
|
class EmbedTextContext:
|
@@ -64,6 +70,12 @@ class ImageEmbedder(Protocol):
|
|
64
70
|
) -> Ok[EmbedImageResult] | InvalidArgumentError | InternalError: ...
|
65
71
|
|
66
72
|
|
73
|
+
@dataclasses.dataclass
|
74
|
+
class ClipModels:
|
75
|
+
model: "CLIPModel"
|
76
|
+
processor: "CLIPProcessor"
|
77
|
+
|
78
|
+
|
67
79
|
class ClipText(TextEmbedder):
|
68
80
|
"""Clip Text embedder.
|
69
81
|
|
@@ -76,28 +88,39 @@ class ClipText(TextEmbedder):
|
|
76
88
|
overcoming several major challenges in computer vision.
|
77
89
|
"""
|
78
90
|
|
79
|
-
def
|
80
|
-
self, context: EmbedTextContext
|
81
|
-
) -> Ok[EmbedTextResult] | InvalidArgumentError | InternalError:
|
82
|
-
import torch
|
91
|
+
def _load_models(self):
|
83
92
|
from transformers import (
|
84
93
|
CLIPModel,
|
85
94
|
CLIPProcessor,
|
86
95
|
)
|
87
96
|
|
88
97
|
model: CLIPModel = CLIPModel.from_pretrained( # pyright: ignore[reportUnknownMemberType]
|
89
|
-
"openai/clip-vit-base-patch32"
|
98
|
+
pretrained_model_name_or_path="openai/clip-vit-base-patch32",
|
99
|
+
revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
|
90
100
|
)
|
91
101
|
processor: CLIPProcessor = CLIPProcessor.from_pretrained( # pyright: ignore[reportUnknownMemberType, reportAssignmentType]
|
92
|
-
"openai/clip-vit-base-patch32"
|
102
|
+
pretrained_model_name_or_path="openai/clip-vit-base-patch32",
|
103
|
+
revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
|
104
|
+
use_fast=False,
|
93
105
|
)
|
94
|
-
model
|
106
|
+
return ClipModels(model=model, processor=processor)
|
107
|
+
|
108
|
+
def embed(
|
109
|
+
self, context: EmbedTextContext
|
110
|
+
) -> Ok[EmbedTextResult] | InvalidArgumentError | InternalError:
|
95
111
|
match context.expected_coordinate_bitwidth:
|
96
112
|
case 64:
|
97
113
|
coord_dtype = pl.Float64()
|
98
114
|
case 32:
|
99
115
|
coord_dtype = pl.Float32()
|
100
116
|
|
117
|
+
models = self._load_models()
|
118
|
+
model = models.model
|
119
|
+
processor = models.processor
|
120
|
+
model.eval()
|
121
|
+
|
122
|
+
import torch
|
123
|
+
|
101
124
|
with torch.no_grad():
|
102
125
|
inputs: dict[str, torch.Tensor] = processor( # pyright: ignore[reportAssignmentType]
|
103
126
|
text=context.inputs,
|
corvic/system/_image_embedder.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import dataclasses
|
1
2
|
from io import BytesIO
|
2
3
|
from typing import TYPE_CHECKING, Any
|
3
4
|
|
@@ -13,6 +14,10 @@ from corvic.system._embedder import (
|
|
13
14
|
|
14
15
|
if TYPE_CHECKING:
|
15
16
|
from PIL import Image
|
17
|
+
from transformers import (
|
18
|
+
CLIPModel,
|
19
|
+
CLIPProcessor,
|
20
|
+
)
|
16
21
|
|
17
22
|
|
18
23
|
class RandomImageEmbedder(ImageEmbedder):
|
@@ -58,6 +63,12 @@ def image_from_bytes(
|
|
58
63
|
return InvalidArgumentError("invalid image format")
|
59
64
|
|
60
65
|
|
66
|
+
@dataclasses.dataclass
|
67
|
+
class ClipModels:
|
68
|
+
model: "CLIPModel"
|
69
|
+
processor: "CLIPProcessor"
|
70
|
+
|
71
|
+
|
61
72
|
class Clip(ImageEmbedder):
|
62
73
|
"""Clip image embedder.
|
63
74
|
|
@@ -70,6 +81,23 @@ class Clip(ImageEmbedder):
|
|
70
81
|
overcoming several major challenges in computer vision.
|
71
82
|
"""
|
72
83
|
|
84
|
+
def _load_models(self):
|
85
|
+
from transformers import (
|
86
|
+
CLIPModel,
|
87
|
+
CLIPProcessor,
|
88
|
+
)
|
89
|
+
|
90
|
+
model: CLIPModel = CLIPModel.from_pretrained( # pyright: ignore[reportUnknownMemberType]
|
91
|
+
pretrained_model_name_or_path="openai/clip-vit-base-patch32",
|
92
|
+
revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
|
93
|
+
)
|
94
|
+
processor: CLIPProcessor = CLIPProcessor.from_pretrained( # pyright: ignore[reportUnknownMemberType, reportAssignmentType]
|
95
|
+
pretrained_model_name_or_path="openai/clip-vit-base-patch32",
|
96
|
+
revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
|
97
|
+
use_fast=False,
|
98
|
+
)
|
99
|
+
return ClipModels(model=model, processor=processor)
|
100
|
+
|
73
101
|
def embed(
|
74
102
|
self, context: EmbedImageContext
|
75
103
|
) -> Ok[EmbedImageResult] | InvalidArgumentError | InternalError:
|
@@ -99,20 +127,13 @@ class Clip(ImageEmbedder):
|
|
99
127
|
)
|
100
128
|
)
|
101
129
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
CLIPProcessor,
|
106
|
-
)
|
107
|
-
|
108
|
-
model: CLIPModel = CLIPModel.from_pretrained( # pyright: ignore[reportUnknownMemberType]
|
109
|
-
"openai/clip-vit-base-patch32"
|
110
|
-
)
|
111
|
-
processor: CLIPProcessor = CLIPProcessor.from_pretrained( # pyright: ignore[reportUnknownMemberType, reportAssignmentType]
|
112
|
-
"openai/clip-vit-base-patch32"
|
113
|
-
)
|
130
|
+
models = self._load_models()
|
131
|
+
model = models.model
|
132
|
+
processor = models.processor
|
114
133
|
model.eval()
|
115
134
|
|
135
|
+
import torch
|
136
|
+
|
116
137
|
with torch.no_grad():
|
117
138
|
inputs: dict[str, torch.FloatTensor] = processor( # pyright: ignore[reportAssignmentType]
|
118
139
|
images=images, return_tensors="pt"
|
corvic/system/_planner.py
CHANGED
@@ -166,6 +166,9 @@ class OpGraphPlanner:
|
|
166
166
|
case op_graph.op.LimitRows() | op_graph.op.SampleRows():
|
167
167
|
source_rows = cls.count_rows_upperbound(op.source)
|
168
168
|
num_rows = min(op.num_rows, source_rows)
|
169
|
+
case op_graph.op.OffsetRows():
|
170
|
+
source_rows = cls.count_rows_upperbound(op.source)
|
171
|
+
num_rows = max(source_rows - op.num_rows, 0)
|
169
172
|
case op_graph.op.Empty():
|
170
173
|
num_rows = 0
|
171
174
|
case op_graph.op.AggregateColumns():
|