gimlet-api 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gimlet_api-0.0.7.dist-info → gimlet_api-0.0.9.dist-info}/METADATA +1 -1
- {gimlet_api-0.0.7.dist-info → gimlet_api-0.0.9.dist-info}/RECORD +19 -18
- gml/client.py +9 -8
- gml/compile.py +13 -93
- gml/device.py +5 -7
- gml/hf.py +302 -32
- gml/model.py +2 -1
- gml/pipelines.py +146 -7
- gml/preprocessing.py +2 -1
- gml/proto/src/api/corepb/v1/controlplane_pb2.py +40 -20
- gml/proto/src/api/corepb/v1/cp_edge_pb2.py +43 -49
- gml/proto/src/api/corepb/v1/device_info_pb2.py +19 -7
- gml/proto/src/api/corepb/v1/gem_config_pb2.py +24 -15
- gml/proto/src/api/corepb/v1/mediastream_pb2.py +23 -19
- gml/proto/src/api/corepb/v1/model_exec_pb2.py +131 -112
- gml/proto/src/controlplane/compiler/cpb/v1/cpb_pb2.py +10 -11
- gml/register_submodules.py +134 -0
- gml/tensor.py +6 -1
- {gimlet_api-0.0.7.dist-info → gimlet_api-0.0.9.dist-info}/WHEEL +0 -0
gml/hf.py
CHANGED
@@ -15,14 +15,23 @@
|
|
15
15
|
# SPDX-License-Identifier: Apache-2.0
|
16
16
|
|
17
17
|
import glob
|
18
|
+
import math
|
18
19
|
import tempfile
|
20
|
+
import warnings
|
19
21
|
from collections.abc import Iterable
|
20
22
|
from pathlib import Path
|
21
23
|
from typing import Any, BinaryIO, Dict, List, Optional, TextIO, Tuple
|
22
24
|
|
23
|
-
import gml.proto.src.api.corepb.v1.model_exec_pb2 as modelexecpb
|
24
25
|
import torch
|
25
26
|
import transformers
|
27
|
+
from transformers import (
|
28
|
+
BaseImageProcessor,
|
29
|
+
Pipeline,
|
30
|
+
PreTrainedModel,
|
31
|
+
PreTrainedTokenizer,
|
32
|
+
)
|
33
|
+
|
34
|
+
import gml.proto.src.api.corepb.v1.model_exec_pb2 as modelexecpb
|
26
35
|
from gml.asset_manager import AssetManager
|
27
36
|
from gml.model import GenerationConfig, Model, TorchModel
|
28
37
|
from gml.preprocessing import (
|
@@ -34,10 +43,12 @@ from gml.preprocessing import (
|
|
34
43
|
)
|
35
44
|
from gml.tensor import (
|
36
45
|
AttentionKeyValueCacheTensorSemantics,
|
46
|
+
AttentionMaskDimension,
|
37
47
|
BatchDimension,
|
38
48
|
BoundingBoxFormat,
|
39
49
|
DetectionNumCandidatesDimension,
|
40
50
|
DetectionOutputDimension,
|
51
|
+
DimensionSemantics,
|
41
52
|
ImageChannelDimension,
|
42
53
|
ImageHeightDimension,
|
43
54
|
ImageWidthDimension,
|
@@ -46,12 +57,8 @@ from gml.tensor import (
|
|
46
57
|
TokensDimension,
|
47
58
|
VocabLogitsDimension,
|
48
59
|
)
|
49
|
-
|
50
|
-
|
51
|
-
Pipeline,
|
52
|
-
PreTrainedModel,
|
53
|
-
PreTrainedTokenizer,
|
54
|
-
)
|
60
|
+
|
61
|
+
FALLBACK_RESIZE_SIZE = 512
|
55
62
|
|
56
63
|
|
57
64
|
class HuggingFaceTokenizer(Model):
|
@@ -77,7 +84,6 @@ class HuggingFaceTokenizer(Model):
|
|
77
84
|
|
78
85
|
|
79
86
|
class HuggingFaceGenerationConfig(GenerationConfig):
|
80
|
-
|
81
87
|
def __init__(self, model: PreTrainedModel):
|
82
88
|
config = model.generation_config
|
83
89
|
eos_tokens = config.eos_token_id
|
@@ -242,25 +248,34 @@ class HuggingFaceTextGenerationPipeline:
|
|
242
248
|
|
243
249
|
|
244
250
|
class HuggingFaceImageProcessor:
|
245
|
-
|
246
251
|
def __init__(
|
247
252
|
self,
|
248
253
|
model: PreTrainedModel,
|
249
254
|
processor: BaseImageProcessor,
|
255
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
250
256
|
):
|
251
257
|
self.model = model
|
252
258
|
self.processor = processor
|
259
|
+
self.image_size_override = image_size_override
|
253
260
|
|
254
261
|
def input_spec(self) -> Dict[str, Any]:
|
255
262
|
target_size = None
|
256
263
|
image_preprocessing_steps = []
|
257
|
-
|
258
|
-
hasattr(self.processor, "do_resize")
|
259
|
-
|
260
|
-
|
261
|
-
|
264
|
+
has_do_resize = (
|
265
|
+
hasattr(self.processor, "do_resize") and self.processor.do_resize
|
266
|
+
)
|
267
|
+
has_do_pad = hasattr(self.processor, "do_pad") and self.processor.do_pad
|
268
|
+
# NOTE: it is possible for both do_resize and do_pad to be set, in which case we only use do_resize.
|
269
|
+
if has_do_resize:
|
262
270
|
target_size, preprocessing_step = self._convert_resize()
|
263
271
|
image_preprocessing_steps.append(preprocessing_step)
|
272
|
+
elif has_do_pad:
|
273
|
+
target_size, preprocessing_step = self._convert_pad()
|
274
|
+
image_preprocessing_steps.append(preprocessing_step)
|
275
|
+
else:
|
276
|
+
raise ValueError(
|
277
|
+
"could not determine target size for resize from model config"
|
278
|
+
)
|
264
279
|
|
265
280
|
if (
|
266
281
|
hasattr(self.processor, "do_rescale")
|
@@ -291,7 +306,7 @@ class HuggingFaceImageProcessor:
|
|
291
306
|
# TODO(james): figure out if this is specified anywhere in the huggingface pipeline.
|
292
307
|
channel_format = "rgb"
|
293
308
|
|
294
|
-
dimensions = [
|
309
|
+
dimensions: list[DimensionSemantics] = [
|
295
310
|
BatchDimension(),
|
296
311
|
]
|
297
312
|
input_shape = [1]
|
@@ -319,6 +334,14 @@ class HuggingFaceImageProcessor:
|
|
319
334
|
raise NotImplementedError(
|
320
335
|
"only semantic segmentation is currently supported"
|
321
336
|
)
|
337
|
+
# TODO(philkuz): Support panoptic segmentation models. Multiple outputs come from panoptic segmentation models.
|
338
|
+
# We need to decide whether we should invest in converting the panoptic segmentation output to semantic segmentation
|
339
|
+
# format or if we should directly support panoptic segmentation output.
|
340
|
+
if hasattr(self.processor, "post_process_panoptic_segmentation"):
|
341
|
+
raise NotImplementedError(
|
342
|
+
"panoptic segmentation models are not supported yet"
|
343
|
+
)
|
344
|
+
|
322
345
|
dimensions = [
|
323
346
|
BatchDimension(),
|
324
347
|
# TODO(james): verify all semantic segmentation in hugging face output a logits mask.
|
@@ -342,21 +365,38 @@ class HuggingFaceImageProcessor:
|
|
342
365
|
"class_labels": labels,
|
343
366
|
}
|
344
367
|
|
345
|
-
def
|
368
|
+
def output_spec_depth(self) -> Dict[str, Any]:
|
369
|
+
dimensions = [
|
370
|
+
BatchDimension(),
|
371
|
+
ImageHeightDimension(),
|
372
|
+
ImageWidthDimension(),
|
373
|
+
]
|
374
|
+
output_tensor_semantics = [
|
375
|
+
TensorSemantics(dimensions),
|
376
|
+
]
|
377
|
+
return {
|
378
|
+
"output_tensor_semantics": output_tensor_semantics,
|
379
|
+
}
|
380
|
+
|
381
|
+
def output_spec_object_detection(self, zero_shot=False) -> Dict[str, Any]:
|
346
382
|
if not hasattr(self.processor, "post_process_object_detection"):
|
347
383
|
raise NotImplementedError(
|
348
384
|
"processor must have post_process_object_detection set"
|
349
385
|
)
|
350
386
|
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
387
|
+
if zero_shot:
|
388
|
+
num_classes = -1
|
389
|
+
labels = []
|
390
|
+
else:
|
391
|
+
id_to_label = self.model.config.id2label
|
392
|
+
max_id = max(id_to_label)
|
393
|
+
labels = []
|
394
|
+
for i in range(max_id):
|
395
|
+
if i not in id_to_label:
|
396
|
+
labels.append("")
|
397
|
+
continue
|
398
|
+
labels.append(id_to_label[i])
|
399
|
+
num_classes = max_id + 1
|
360
400
|
|
361
401
|
# TODO(james): verify assumptions made here apply broadly.
|
362
402
|
output_tensor_semantics = []
|
@@ -366,7 +406,7 @@ class HuggingFaceImageProcessor:
|
|
366
406
|
DetectionNumCandidatesDimension(is_nms=False),
|
367
407
|
DetectionOutputDimension(
|
368
408
|
scores_range=(0, num_classes),
|
369
|
-
scores_are_logits=
|
409
|
+
scores_are_logits=not zero_shot,
|
370
410
|
),
|
371
411
|
]
|
372
412
|
output_tensor_semantics.append(TensorSemantics(logits_dimensions))
|
@@ -385,12 +425,45 @@ class HuggingFaceImageProcessor:
|
|
385
425
|
"class_labels": labels,
|
386
426
|
}
|
387
427
|
|
428
|
+
def _get_size(self) -> Dict[str, int]:
|
429
|
+
size = None
|
430
|
+
if self.image_size_override:
|
431
|
+
size = {
|
432
|
+
"height": self.image_size_override[0],
|
433
|
+
"width": self.image_size_override[1],
|
434
|
+
}
|
435
|
+
elif hasattr(self.processor, "size") and self.processor.size is not None:
|
436
|
+
size = self.processor.size
|
437
|
+
elif (
|
438
|
+
hasattr(self.model.config, "image_size")
|
439
|
+
and self.model.config.image_size is not None
|
440
|
+
):
|
441
|
+
size = {
|
442
|
+
"height": self.model.config.image_size,
|
443
|
+
"width": self.model.config.image_size,
|
444
|
+
}
|
445
|
+
else:
|
446
|
+
warnings.warn(
|
447
|
+
f"using fallback resize size of {FALLBACK_RESIZE_SIZE} for model",
|
448
|
+
stacklevel=1,
|
449
|
+
)
|
450
|
+
size = {
|
451
|
+
"width": FALLBACK_RESIZE_SIZE,
|
452
|
+
"height": FALLBACK_RESIZE_SIZE,
|
453
|
+
}
|
454
|
+
return size
|
455
|
+
|
388
456
|
def _convert_resize(self) -> Tuple[Tuple[int, int], ImagePreprocessingStep]:
|
389
|
-
size = self.
|
457
|
+
size = self._get_size()
|
458
|
+
size_divisor: int | None = None
|
459
|
+
if hasattr(self.processor, "size_divisor"):
|
460
|
+
size_divisor = self.processor.size_divisor
|
461
|
+
|
390
462
|
target_size = None
|
391
463
|
preprocess_step = None
|
464
|
+
|
392
465
|
if "height" in size and "width" in size:
|
393
|
-
target_size =
|
466
|
+
target_size = (size["height"], size["width"])
|
394
467
|
preprocess_step = ResizeImage()
|
395
468
|
elif (
|
396
469
|
"shortest_edge" in size
|
@@ -410,12 +483,55 @@ class HuggingFaceImageProcessor:
|
|
410
483
|
if not min_size or edge_size < min_size:
|
411
484
|
min_size = edge_size
|
412
485
|
|
413
|
-
|
486
|
+
if min_size is None:
|
487
|
+
raise ValueError(
|
488
|
+
"could not determine target size for resize from model config"
|
489
|
+
)
|
490
|
+
target_size = (min_size, min_size)
|
414
491
|
preprocess_step = LetterboxImage()
|
415
492
|
else:
|
416
493
|
raise ValueError(
|
417
494
|
"could not determine target size for resize from model config"
|
418
495
|
)
|
496
|
+
if size_divisor:
|
497
|
+
target_size = (
|
498
|
+
math.ceil(target_size[0] / size_divisor) * size_divisor,
|
499
|
+
math.ceil(target_size[1] / size_divisor) * size_divisor,
|
500
|
+
)
|
501
|
+
return target_size, preprocess_step
|
502
|
+
|
503
|
+
def _convert_pad(self) -> Tuple[Tuple[int, int], ImagePreprocessingStep]:
|
504
|
+
# NOTE: There is a wide variety of ways that huggingface pads images.
|
505
|
+
# We found at least 3 different ways to pad images in the codebase:
|
506
|
+
# 1. Center pad (pad top,left, bottom, right) to match target size
|
507
|
+
# https://github.com/huggingface/transformers/blob/70b07d97cf2c5f61fff55700b65528a1b6845cd2/src/transformers/models/dpt/image_processing_dpt.py#L231
|
508
|
+
# 2. Right/Top pad (pad top, and right) to match target size
|
509
|
+
# https://github.com/huggingface/transformers/blob/174890280b340b89c5bfa092f6b4fb0e2dc2d7fc/src/transformers/models/conditional_detr/image_processing_conditional_detr.py#L846
|
510
|
+
# 3. Pad to nearest multiple of size_divisor
|
511
|
+
# https://github.com/huggingface/transformers/blob/70b07d97cf2c5f61fff55700b65528a1b6845cd2/src/transformers/models/llava_onevision/image_processing_llava_onevision.py#L177-179
|
512
|
+
#
|
513
|
+
# We decided to simply implement padding with LetterBoxImage(),
|
514
|
+
# because we assume the models won't be that sensitive to the type of padding,
|
515
|
+
# but this may need to be revisited in the future.
|
516
|
+
size = self._get_size()
|
517
|
+
size_divisor: int | None = None
|
518
|
+
if hasattr(self.processor, "size_divisor"):
|
519
|
+
size_divisor = self.processor.size_divisor
|
520
|
+
|
521
|
+
target_size = None
|
522
|
+
preprocess_step = None
|
523
|
+
if "height" in size and "width" in size:
|
524
|
+
target_size = (size["height"], size["width"])
|
525
|
+
preprocess_step = LetterboxImage()
|
526
|
+
else:
|
527
|
+
raise ValueError(
|
528
|
+
"could not determine target size for resize from model config"
|
529
|
+
)
|
530
|
+
if size_divisor:
|
531
|
+
target_size = (
|
532
|
+
math.ceil(target_size[0] / size_divisor) * size_divisor,
|
533
|
+
math.ceil(target_size[1] / size_divisor) * size_divisor,
|
534
|
+
)
|
419
535
|
return target_size, preprocess_step
|
420
536
|
|
421
537
|
|
@@ -424,11 +540,13 @@ class HuggingFaceImageSegmentationPipeline:
|
|
424
540
|
self,
|
425
541
|
pipeline: Pipeline,
|
426
542
|
name: Optional[str] = None,
|
543
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
427
544
|
):
|
428
545
|
self.pipeline = pipeline
|
429
546
|
if name is None:
|
430
547
|
name = pipeline.model.name_or_path
|
431
548
|
|
549
|
+
self.image_size_override = image_size_override
|
432
550
|
self.model = TorchModel(
|
433
551
|
name,
|
434
552
|
torch_module=self.pipeline.model,
|
@@ -446,7 +564,9 @@ class HuggingFaceImageSegmentationPipeline:
|
|
446
564
|
)
|
447
565
|
|
448
566
|
image_processor = HuggingFaceImageProcessor(
|
449
|
-
self.pipeline.model,
|
567
|
+
self.pipeline.model,
|
568
|
+
self.pipeline.image_processor,
|
569
|
+
image_size_override=self.image_size_override,
|
450
570
|
)
|
451
571
|
spec = image_processor.input_spec()
|
452
572
|
spec.update(image_processor.output_spec_segmentation())
|
@@ -471,11 +591,13 @@ class HuggingFaceObjectDetectionPipeline:
|
|
471
591
|
self,
|
472
592
|
pipeline: Pipeline,
|
473
593
|
name: Optional[str] = None,
|
594
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
474
595
|
):
|
475
596
|
self.pipeline = pipeline
|
476
597
|
if name is None:
|
477
598
|
name = pipeline.model.name_or_path
|
478
599
|
|
600
|
+
self.image_size_override = image_size_override
|
479
601
|
self.model = TorchModel(
|
480
602
|
name,
|
481
603
|
torch_module=ObjectDetectionWrapper(self.pipeline.model),
|
@@ -493,7 +615,9 @@ class HuggingFaceObjectDetectionPipeline:
|
|
493
615
|
)
|
494
616
|
|
495
617
|
image_processor = HuggingFaceImageProcessor(
|
496
|
-
self.pipeline.model,
|
618
|
+
self.pipeline.model,
|
619
|
+
self.pipeline.image_processor,
|
620
|
+
image_size_override=self.image_size_override,
|
497
621
|
)
|
498
622
|
spec = image_processor.input_spec()
|
499
623
|
spec.update(image_processor.output_spec_object_detection())
|
@@ -503,6 +627,141 @@ class HuggingFaceObjectDetectionPipeline:
|
|
503
627
|
return [self.model]
|
504
628
|
|
505
629
|
|
630
|
+
class ZeroShotObjectDetectionWrapper(torch.nn.Module):
|
631
|
+
def __init__(self, model: PreTrainedModel):
|
632
|
+
super().__init__()
|
633
|
+
self.model = model
|
634
|
+
|
635
|
+
def forward(self, image, tokens, attention_mask):
|
636
|
+
outputs = self.model(
|
637
|
+
input_ids=tokens, pixel_values=image, attention_mask=attention_mask
|
638
|
+
)
|
639
|
+
return torch.sigmoid(outputs.logits), outputs.pred_boxes
|
640
|
+
|
641
|
+
|
642
|
+
class HuggingFaceZeroShotObjectDetectionPipeline:
|
643
|
+
def __init__(
|
644
|
+
self,
|
645
|
+
pipeline: Pipeline,
|
646
|
+
name: Optional[str] = None,
|
647
|
+
tokenizer_name: Optional[str] = None,
|
648
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
649
|
+
):
|
650
|
+
self.pipeline = pipeline
|
651
|
+
if name is None:
|
652
|
+
name = pipeline.model.name_or_path
|
653
|
+
|
654
|
+
self.tokenizer_model = HuggingFaceTokenizer(
|
655
|
+
self.pipeline.tokenizer, tokenizer_name
|
656
|
+
)
|
657
|
+
|
658
|
+
self.image_size_override = image_size_override
|
659
|
+
self.detection_model = TorchModel(
|
660
|
+
name,
|
661
|
+
torch_module=ZeroShotObjectDetectionWrapper(self.pipeline.model),
|
662
|
+
**self._guess_model_spec(),
|
663
|
+
)
|
664
|
+
|
665
|
+
def _add_zero_shot_inputs(self, spec: Dict):
|
666
|
+
example_inputs = spec["example_inputs"]
|
667
|
+
if "dynamic_shapes" not in spec:
|
668
|
+
spec["dynamic_shapes"] = [{} for _ in example_inputs]
|
669
|
+
|
670
|
+
max_length = self.pipeline.model.config.text_config.max_length
|
671
|
+
example_inputs.extend(
|
672
|
+
[
|
673
|
+
torch.randint(200, [2, max_length]).to(torch.int32),
|
674
|
+
torch.ones([2, max_length]).to(torch.int32),
|
675
|
+
]
|
676
|
+
)
|
677
|
+
|
678
|
+
input_tensor_semantics = spec["input_tensor_semantics"]
|
679
|
+
input_tensor_semantics.extend(
|
680
|
+
[
|
681
|
+
TensorSemantics(
|
682
|
+
[
|
683
|
+
BatchDimension(),
|
684
|
+
TokensDimension(),
|
685
|
+
]
|
686
|
+
),
|
687
|
+
TensorSemantics(
|
688
|
+
[
|
689
|
+
BatchDimension(),
|
690
|
+
AttentionMaskDimension(),
|
691
|
+
]
|
692
|
+
),
|
693
|
+
]
|
694
|
+
)
|
695
|
+
|
696
|
+
spec["dynamic_shapes"].extend(
|
697
|
+
[
|
698
|
+
{0: "num_labels"},
|
699
|
+
{0: "num_labels"},
|
700
|
+
]
|
701
|
+
)
|
702
|
+
|
703
|
+
def _guess_model_spec(self) -> Dict:
|
704
|
+
if self.pipeline.image_processor is None:
|
705
|
+
raise ValueError(
|
706
|
+
"Could not determine image preprocessing for pipeline with image_processor=None"
|
707
|
+
)
|
708
|
+
|
709
|
+
image_processor = HuggingFaceImageProcessor(
|
710
|
+
self.pipeline.model,
|
711
|
+
self.pipeline.image_processor,
|
712
|
+
image_size_override=self.image_size_override,
|
713
|
+
)
|
714
|
+
spec = image_processor.input_spec()
|
715
|
+
self._add_zero_shot_inputs(spec)
|
716
|
+
spec.update(image_processor.output_spec_object_detection(zero_shot=True))
|
717
|
+
return spec
|
718
|
+
|
719
|
+
def models(self) -> List[Model]:
|
720
|
+
return [self.detection_model, self.tokenizer_model]
|
721
|
+
|
722
|
+
|
723
|
+
class HuggingFaceDepthEstimationPipeline:
|
724
|
+
def __init__(
|
725
|
+
self,
|
726
|
+
pipeline: Pipeline,
|
727
|
+
name: Optional[str] = None,
|
728
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
729
|
+
):
|
730
|
+
self.pipeline = pipeline
|
731
|
+
if name is None:
|
732
|
+
name = pipeline.model.name_or_path
|
733
|
+
|
734
|
+
self.image_size_override = image_size_override
|
735
|
+
|
736
|
+
self.model = TorchModel(
|
737
|
+
name,
|
738
|
+
torch_module=self.pipeline.model,
|
739
|
+
**self._guess_model_spec(),
|
740
|
+
)
|
741
|
+
|
742
|
+
def _guess_model_spec(self) -> Dict:
|
743
|
+
if self.pipeline.image_processor is None:
|
744
|
+
raise ValueError(
|
745
|
+
"Could not determine image preprocessing for pipeline with image_processor=None"
|
746
|
+
)
|
747
|
+
if self.pipeline.tokenizer is not None:
|
748
|
+
raise NotImplementedError(
|
749
|
+
"HuggingFaceDepthEstimationPipeline does not yet support token inputs"
|
750
|
+
)
|
751
|
+
|
752
|
+
image_processor = HuggingFaceImageProcessor(
|
753
|
+
self.pipeline.model,
|
754
|
+
self.pipeline.image_processor,
|
755
|
+
image_size_override=self.image_size_override,
|
756
|
+
)
|
757
|
+
spec = image_processor.input_spec()
|
758
|
+
spec.update(image_processor.output_spec_depth())
|
759
|
+
return spec
|
760
|
+
|
761
|
+
def models(self) -> List[Model]:
|
762
|
+
return [self.model]
|
763
|
+
|
764
|
+
|
506
765
|
def import_huggingface_pipeline(pipeline: Pipeline, **kwargs) -> List[Model]:
|
507
766
|
if pipeline.framework != "pt":
|
508
767
|
raise ValueError(
|
@@ -517,8 +776,19 @@ def import_huggingface_pipeline(pipeline: Pipeline, **kwargs) -> List[Model]:
|
|
517
776
|
return HuggingFaceImageSegmentationPipeline(pipeline, **kwargs).models()
|
518
777
|
elif pipeline.task == "object-detection":
|
519
778
|
return HuggingFaceObjectDetectionPipeline(pipeline, **kwargs).models()
|
779
|
+
elif pipeline.task == "zero-shot-object-detection":
|
780
|
+
return HuggingFaceZeroShotObjectDetectionPipeline(pipeline, **kwargs).models()
|
781
|
+
elif pipeline.task == "depth-estimation":
|
782
|
+
return HuggingFaceDepthEstimationPipeline(pipeline, **kwargs).models()
|
520
783
|
raise ValueError(
|
521
784
|
"unimplemented: hugging face pipeline task: {} (supported tasks: [{}])".format(
|
522
|
-
pipeline.task,
|
785
|
+
pipeline.task,
|
786
|
+
[
|
787
|
+
"text-generation",
|
788
|
+
"image-segmentation",
|
789
|
+
"object-detection",
|
790
|
+
"zero-shot-object-detection",
|
791
|
+
"depth-estimation",
|
792
|
+
],
|
523
793
|
)
|
524
794
|
)
|
gml/model.py
CHANGED
@@ -21,8 +21,9 @@ import io
|
|
21
21
|
from pathlib import Path
|
22
22
|
from typing import BinaryIO, Dict, List, Literal, Optional, Sequence, TextIO, Tuple
|
23
23
|
|
24
|
-
import gml.proto.src.api.corepb.v1.model_exec_pb2 as modelexecpb
|
25
24
|
import torch
|
25
|
+
|
26
|
+
import gml.proto.src.api.corepb.v1.model_exec_pb2 as modelexecpb
|
26
27
|
from gml.asset_manager import AssetManager, TempFileAssetManager
|
27
28
|
from gml.compile import to_torch_mlir
|
28
29
|
from gml.preprocessing import ImagePreprocessingStep
|