gimlet-api 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gml/tensor.py CHANGED
@@ -19,6 +19,7 @@ from typing import List, Literal, Optional, Tuple
19
19
 
20
20
  import gml.proto.src.api.corepb.v1.model_exec_pb2 as modelexecpb
21
21
  import google.protobuf.wrappers_pb2 as wrapperspb
22
+ import numpy as np
22
23
 
23
24
 
24
25
  def box_format_str_to_proto(box_format: str):
@@ -122,17 +123,21 @@ class DetectionNumCandidatesDimension(DimensionSemantics):
122
123
  class DetectionOutputDimension(DimensionSemantics):
123
124
  def __init__(
124
125
  self,
125
- coordinates_start_index: int,
126
- box_format: BoundingBoxFormat,
126
+ coordinates_start_index: Optional[int] = None,
127
+ box_format: Optional[BoundingBoxFormat] = None,
127
128
  box_confidence_index: Optional[int] = None,
128
129
  class_index: Optional[int] = None,
129
130
  scores_range: Optional[Tuple[int, int]] = None,
131
+ scores_are_logits: bool = False,
130
132
  ):
131
- self.coordinates_range = (coordinates_start_index, 4)
133
+ self.coordinates_range = None
134
+ if coordinates_start_index is not None:
135
+ self.coordinates_range = (coordinates_start_index, 4)
132
136
  self.box_format = box_format
133
137
  self.box_confidence_index = box_confidence_index
134
138
  self.class_index = class_index
135
139
  self.scores_range = scores_range
140
+ self.scores_are_logits = scores_are_logits
136
141
 
137
142
  def to_proto(self) -> modelexecpb.DimensionSemantics:
138
143
  scores_range = None
@@ -143,20 +148,29 @@ class DetectionOutputDimension(DimensionSemantics):
143
148
  size=self.scores_range[1],
144
149
  )
145
150
  )
146
- box_confidence_index = -1
151
+ box_confidence_index = np.iinfo(np.int32).min
147
152
  if self.box_confidence_index is not None:
148
153
  box_confidence_index = self.box_confidence_index
154
+ box_format_proto = None
155
+ if self.box_format is not None:
156
+ box_format_proto = self.box_format.to_proto()
157
+ box_coordinate_range_proto = None
158
+ if self.coordinates_range is not None:
159
+ box_coordinate_range_proto = (
160
+ modelexecpb.DimensionSemantics.DetectionOutputParams.IndexRange(
161
+ start=self.coordinates_range[0],
162
+ size=self.coordinates_range[1],
163
+ )
164
+ )
149
165
  return modelexecpb.DimensionSemantics(
150
166
  kind=modelexecpb.DimensionSemantics.DIMENSION_SEMANTICS_KIND_DETECTION_OUTPUT,
151
167
  detection_output_params=modelexecpb.DimensionSemantics.DetectionOutputParams(
152
- box_coordinate_range=modelexecpb.DimensionSemantics.DetectionOutputParams.IndexRange(
153
- start=self.coordinates_range[0],
154
- size=self.coordinates_range[1],
155
- ),
156
- box_format=self.box_format.to_proto(),
168
+ box_coordinate_range=box_coordinate_range_proto,
169
+ box_format=box_format_proto,
157
170
  box_confidence_index=box_confidence_index,
158
171
  class_index=self.class_index,
159
172
  scores_range=scores_range,
173
+ scores_are_logits=self.scores_are_logits,
160
174
  ),
161
175
  )
162
176
 
@@ -171,16 +185,23 @@ def _segmentation_mask_kind_to_proto(kind: str):
171
185
  return (
172
186
  modelexecpb.DimensionSemantics.SegmentationMaskParams.SEGMENTATION_MASK_KIND_CLASS_LABEL
173
187
  )
174
- case "score_masks":
188
+ case "score_mask":
175
189
  return (
176
190
  modelexecpb.DimensionSemantics.SegmentationMaskParams.SEGMENTATION_MASK_KIND_SCORE
177
191
  )
192
+ case "logits_mask":
193
+ return (
194
+ modelexecpb.DimensionSemantics.SegmentationMaskParams.SEGMENTATION_MASK_KIND_LOGITS
195
+ )
178
196
  case _:
179
197
  raise ValueError("Invalid segmentation mask kind: {}".format(kind))
180
198
 
181
199
 
182
200
  class SegmentationMaskChannel(DimensionSemantics):
183
- def __init__(self, kind: Literal["bool_masks", "int_label_masks", "score_masks"]):
201
+ def __init__(
202
+ self,
203
+ kind: Literal["bool_masks", "int_label_masks", "score_mask", "logits_mask"],
204
+ ):
184
205
  self.kind = _segmentation_mask_kind_to_proto(kind)
185
206
 
186
207
  def to_proto(self) -> modelexecpb.DimensionSemantics:
@@ -224,13 +245,63 @@ class RegressionValueDimension(DimensionSemantics):
224
245
  )
225
246
 
226
247
 
248
+ class TokensDimension(DimensionSemantics):
249
+ def to_proto(self) -> modelexecpb.DimensionSemantics:
250
+ return modelexecpb.DimensionSemantics(
251
+ kind=modelexecpb.DimensionSemantics.DIMENSION_SEMANTICS_KIND_TOKENS,
252
+ )
253
+
254
+
255
+ class AttentionMaskDimension(DimensionSemantics):
256
+ def to_proto(self) -> modelexecpb.DimensionSemantics:
257
+ return modelexecpb.DimensionSemantics(
258
+ kind=modelexecpb.DimensionSemantics.DIMENSION_SEMANTICS_KIND_ATTENTION_MASK,
259
+ )
260
+
261
+
262
+ class VocabLogitsDimension(DimensionSemantics):
263
+ def to_proto(self) -> modelexecpb.DimensionSemantics:
264
+ return modelexecpb.DimensionSemantics(
265
+ kind=modelexecpb.DimensionSemantics.DIMENSION_SEMANTICS_KIND_VOCAB_LOGITS,
266
+ )
267
+
268
+
269
+ class EmbeddingDimension(DimensionSemantics):
270
+ def to_proto(self) -> modelexecpb.DimensionSemantics:
271
+ return modelexecpb.DimensionSemantics(
272
+ kind=modelexecpb.DimensionSemantics.DIMENSION_SEMANTICS_KIND_EMBEDDING,
273
+ )
274
+
275
+
227
276
  class TensorSemantics:
228
- def __init__(self, dimensions: List[DimensionSemantics]):
277
+ def __init__(
278
+ self,
279
+ dimensions: List[DimensionSemantics],
280
+ kind: modelexecpb.TensorSemantics.TensorSemanticsKind = modelexecpb.TensorSemantics.TENSOR_SEMANTICS_KIND_DIMENSION,
281
+ ):
229
282
  self.dimensions = dimensions
283
+ self.kind = kind
230
284
 
231
285
  def to_proto(self) -> modelexecpb.TensorSemantics:
232
286
  return modelexecpb.TensorSemantics(
233
287
  dimensions=[dim.to_proto() for dim in self.dimensions],
288
+ kind=self.kind,
289
+ )
290
+
291
+
292
+ class UnusedTensorSemantics(TensorSemantics):
293
+ def __init__(self):
294
+ super().__init__(
295
+ dimensions=[],
296
+ kind=modelexecpb.TensorSemantics.TENSOR_SEMANTICS_KIND_UNUSED,
297
+ )
298
+
299
+
300
+ class AttentionKeyValueCacheTensorSemantics(TensorSemantics):
301
+ def __init__(self):
302
+ super().__init__(
303
+ dimensions=[],
304
+ kind=modelexecpb.TensorSemantics.TENSOR_SEMANTICS_KIND_ATTENTION_KEY_VALUE_CACHE,
234
305
  )
235
306
 
236
307
 
@@ -297,37 +368,47 @@ class BinarySegmentationMasks(TensorSemantics):
297
368
  class YOLOOutput(TensorSemantics):
298
369
  """YOLOOutput represents a detection output from a YOLO model.
299
370
 
300
- The YOLO model should output a tensor of shape [B, N_BOXES, (4 or 5) + NUM_CLASSES].
371
+ If `has_box_conf=True` then the YOLO model should output a tensor of shape [B, NUM_BOXES, 5 + NUM_CLASSES].
372
+ Otherwise, it should output a tensor of shape [B, NUM_BOXES, 4 + NUM_CLASSES].
301
373
  """
302
374
 
303
- def __init__(self, has_box_conf=True):
375
+ def __init__(self, version="v5"):
376
+ if version != "v5" and version != "v8":
377
+ raise ValueError(
378
+ "gml.tensor.YOLOOutput alias currently only supports YOLO versions v5 and v8"
379
+ )
304
380
  dimensions = [
305
381
  BatchDimension(),
306
- DetectionNumCandidatesDimension(is_nms=False),
307
382
  ]
308
383
 
309
- if has_box_conf:
310
- dimensions.append(
311
- DetectionOutputDimension(
312
- coordinates_start_index=0,
313
- box_format=BoundingBoxFormat(
314
- box_format="cxcywh",
315
- is_normalized=False,
384
+ if version == "v5":
385
+ dimensions.extend(
386
+ [
387
+ DetectionNumCandidatesDimension(is_nms=False),
388
+ DetectionOutputDimension(
389
+ coordinates_start_index=0,
390
+ box_format=BoundingBoxFormat(
391
+ box_format="cxcywh",
392
+ is_normalized=False,
393
+ ),
394
+ box_confidence_index=4,
395
+ scores_range=(5, -1),
316
396
  ),
317
- box_confidence_index=4,
318
- scores_range=(5, -1),
319
- )
397
+ ]
320
398
  )
321
- else:
322
- dimensions.append(
323
- DetectionOutputDimension(
324
- coordinates_start_index=0,
325
- box_format=BoundingBoxFormat(
326
- box_format="cxcywh",
327
- is_normalized=False,
399
+ elif version == "v8":
400
+ dimensions.extend(
401
+ [
402
+ DetectionOutputDimension(
403
+ coordinates_start_index=0,
404
+ box_format=BoundingBoxFormat(
405
+ box_format="cxcywh",
406
+ is_normalized=False,
407
+ ),
408
+ scores_range=(4, -1),
328
409
  ),
329
- scores_range=(4, -1),
330
- )
410
+ DetectionNumCandidatesDimension(is_nms=False),
411
+ ]
331
412
  )
332
413
 
333
414
  super().__init__(dimensions)