corvic-engine 0.3.0rc62__cp38-abi3-win_amd64.whl → 0.3.0rc63__cp38-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
corvic/engine/_native.pyd CHANGED
Binary file
@@ -1,6 +1,6 @@
1
1
  import dataclasses
2
2
  from collections.abc import Sequence
3
- from typing import Any, Literal
3
+ from typing import TYPE_CHECKING, Any, Literal
4
4
 
5
5
  import numpy as np
6
6
  import polars as pl
@@ -9,6 +9,12 @@ from typing_extensions import Protocol
9
9
  from corvic import orm
10
10
  from corvic.result import InternalError, InvalidArgumentError, Ok
11
11
 
12
+ if TYPE_CHECKING:
13
+ from transformers import (
14
+ CLIPModel,
15
+ CLIPProcessor,
16
+ )
17
+
12
18
 
13
19
  @dataclasses.dataclass
14
20
  class EmbedTextContext:
@@ -64,6 +70,12 @@ class ImageEmbedder(Protocol):
64
70
  ) -> Ok[EmbedImageResult] | InvalidArgumentError | InternalError: ...
65
71
 
66
72
 
73
+ @dataclasses.dataclass
74
+ class ClipModels:
75
+ model: "CLIPModel"
76
+ processor: "CLIPProcessor"
77
+
78
+
67
79
  class ClipText(TextEmbedder):
68
80
  """Clip Text embedder.
69
81
 
@@ -76,28 +88,39 @@ class ClipText(TextEmbedder):
76
88
  overcoming several major challenges in computer vision.
77
89
  """
78
90
 
79
- def embed(
80
- self, context: EmbedTextContext
81
- ) -> Ok[EmbedTextResult] | InvalidArgumentError | InternalError:
82
- import torch
91
+ def _load_models(self):
83
92
  from transformers import (
84
93
  CLIPModel,
85
94
  CLIPProcessor,
86
95
  )
87
96
 
88
97
  model: CLIPModel = CLIPModel.from_pretrained( # pyright: ignore[reportUnknownMemberType]
89
- "openai/clip-vit-base-patch32"
98
+ pretrained_model_name_or_path="openai/clip-vit-base-patch32",
99
+ revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
90
100
  )
91
101
  processor: CLIPProcessor = CLIPProcessor.from_pretrained( # pyright: ignore[reportUnknownMemberType, reportAssignmentType]
92
- "openai/clip-vit-base-patch32"
102
+ pretrained_model_name_or_path="openai/clip-vit-base-patch32",
103
+ revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
104
+ use_fast=False,
93
105
  )
94
- model.eval()
106
+ return ClipModels(model=model, processor=processor)
107
+
108
+ def embed(
109
+ self, context: EmbedTextContext
110
+ ) -> Ok[EmbedTextResult] | InvalidArgumentError | InternalError:
95
111
  match context.expected_coordinate_bitwidth:
96
112
  case 64:
97
113
  coord_dtype = pl.Float64()
98
114
  case 32:
99
115
  coord_dtype = pl.Float32()
100
116
 
117
+ models = self._load_models()
118
+ model = models.model
119
+ processor = models.processor
120
+ model.eval()
121
+
122
+ import torch
123
+
101
124
  with torch.no_grad():
102
125
  inputs: dict[str, torch.Tensor] = processor( # pyright: ignore[reportAssignmentType]
103
126
  text=context.inputs,
@@ -1,3 +1,4 @@
1
+ import dataclasses
1
2
  from io import BytesIO
2
3
  from typing import TYPE_CHECKING, Any
3
4
 
@@ -13,6 +14,10 @@ from corvic.system._embedder import (
13
14
 
14
15
  if TYPE_CHECKING:
15
16
  from PIL import Image
17
+ from transformers import (
18
+ CLIPModel,
19
+ CLIPProcessor,
20
+ )
16
21
 
17
22
 
18
23
  class RandomImageEmbedder(ImageEmbedder):
@@ -58,6 +63,12 @@ def image_from_bytes(
58
63
  return InvalidArgumentError("invalid image format")
59
64
 
60
65
 
66
+ @dataclasses.dataclass
67
+ class ClipModels:
68
+ model: "CLIPModel"
69
+ processor: "CLIPProcessor"
70
+
71
+
61
72
  class Clip(ImageEmbedder):
62
73
  """Clip image embedder.
63
74
 
@@ -70,6 +81,23 @@ class Clip(ImageEmbedder):
70
81
  overcoming several major challenges in computer vision.
71
82
  """
72
83
 
84
+ def _load_models(self):
85
+ from transformers import (
86
+ CLIPModel,
87
+ CLIPProcessor,
88
+ )
89
+
90
+ model: CLIPModel = CLIPModel.from_pretrained( # pyright: ignore[reportUnknownMemberType]
91
+ pretrained_model_name_or_path="openai/clip-vit-base-patch32",
92
+ revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
93
+ )
94
+ processor: CLIPProcessor = CLIPProcessor.from_pretrained( # pyright: ignore[reportUnknownMemberType, reportAssignmentType]
95
+ pretrained_model_name_or_path="openai/clip-vit-base-patch32",
96
+ revision="5812e510083bb2d23fa43778a39ac065d205ed4d",
97
+ use_fast=False,
98
+ )
99
+ return ClipModels(model=model, processor=processor)
100
+
73
101
  def embed(
74
102
  self, context: EmbedImageContext
75
103
  ) -> Ok[EmbedImageResult] | InvalidArgumentError | InternalError:
@@ -99,20 +127,13 @@ class Clip(ImageEmbedder):
99
127
  )
100
128
  )
101
129
 
102
- import torch
103
- from transformers import (
104
- CLIPModel,
105
- CLIPProcessor,
106
- )
107
-
108
- model: CLIPModel = CLIPModel.from_pretrained( # pyright: ignore[reportUnknownMemberType]
109
- "openai/clip-vit-base-patch32"
110
- )
111
- processor: CLIPProcessor = CLIPProcessor.from_pretrained( # pyright: ignore[reportUnknownMemberType, reportAssignmentType]
112
- "openai/clip-vit-base-patch32"
113
- )
130
+ models = self._load_models()
131
+ model = models.model
132
+ processor = models.processor
114
133
  model.eval()
115
134
 
135
+ import torch
136
+
116
137
  with torch.no_grad():
117
138
  inputs: dict[str, torch.FloatTensor] = processor( # pyright: ignore[reportAssignmentType]
118
139
  images=images, return_tensors="pt"