gimlet-api 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gimlet_api-0.0.6.dist-info → gimlet_api-0.0.8.dist-info}/METADATA +3 -2
- {gimlet_api-0.0.6.dist-info → gimlet_api-0.0.8.dist-info}/RECORD +20 -16
- gml/asset_manager.py +75 -0
- gml/client.py +7 -6
- gml/compile.py +148 -84
- gml/device.py +15 -16
- gml/hf.py +299 -34
- gml/model.py +28 -12
- gml/pipelines.py +120 -40
- gml/preprocessing.py +2 -1
- gml/proto/src/api/corepb/v1/controlplane_pb2.py +37 -18
- gml/proto/src/api/corepb/v1/cp_edge_pb2.py +67 -77
- gml/proto/src/api/corepb/v1/device_info_pb2.py +51 -0
- gml/proto/src/api/corepb/v1/gem_config_pb2.py +45 -0
- gml/proto/src/api/corepb/v1/mediastream_pb2.py +23 -19
- gml/proto/src/api/corepb/v1/model_exec_pb2.py +127 -112
- gml/proto/src/controlplane/compiler/cpb/v1/cpb_pb2.py +7 -11
- gml/register_submodules.py +134 -0
- gml/tensor.py +2 -1
- {gimlet_api-0.0.6.dist-info → gimlet_api-0.0.8.dist-info}/WHEEL +0 -0
gml/hf.py
CHANGED
@@ -15,14 +15,24 @@
|
|
15
15
|
# SPDX-License-Identifier: Apache-2.0
|
16
16
|
|
17
17
|
import glob
|
18
|
+
import math
|
18
19
|
import tempfile
|
20
|
+
import warnings
|
19
21
|
from collections.abc import Iterable
|
20
22
|
from pathlib import Path
|
21
23
|
from typing import Any, BinaryIO, Dict, List, Optional, TextIO, Tuple
|
22
24
|
|
23
|
-
import gml.proto.src.api.corepb.v1.model_exec_pb2 as modelexecpb
|
24
25
|
import torch
|
25
26
|
import transformers
|
27
|
+
from transformers import (
|
28
|
+
BaseImageProcessor,
|
29
|
+
Pipeline,
|
30
|
+
PreTrainedModel,
|
31
|
+
PreTrainedTokenizer,
|
32
|
+
)
|
33
|
+
|
34
|
+
import gml.proto.src.api.corepb.v1.model_exec_pb2 as modelexecpb
|
35
|
+
from gml.asset_manager import AssetManager
|
26
36
|
from gml.model import GenerationConfig, Model, TorchModel
|
27
37
|
from gml.preprocessing import (
|
28
38
|
ImagePreprocessingStep,
|
@@ -33,10 +43,12 @@ from gml.preprocessing import (
|
|
33
43
|
)
|
34
44
|
from gml.tensor import (
|
35
45
|
AttentionKeyValueCacheTensorSemantics,
|
46
|
+
AttentionMaskDimension,
|
36
47
|
BatchDimension,
|
37
48
|
BoundingBoxFormat,
|
38
49
|
DetectionNumCandidatesDimension,
|
39
50
|
DetectionOutputDimension,
|
51
|
+
DimensionSemantics,
|
40
52
|
ImageChannelDimension,
|
41
53
|
ImageHeightDimension,
|
42
54
|
ImageWidthDimension,
|
@@ -45,12 +57,8 @@ from gml.tensor import (
|
|
45
57
|
TokensDimension,
|
46
58
|
VocabLogitsDimension,
|
47
59
|
)
|
48
|
-
|
49
|
-
|
50
|
-
Pipeline,
|
51
|
-
PreTrainedModel,
|
52
|
-
PreTrainedTokenizer,
|
53
|
-
)
|
60
|
+
|
61
|
+
FALLBACK_RESIZE_SIZE = 512
|
54
62
|
|
55
63
|
|
56
64
|
class HuggingFaceTokenizer(Model):
|
@@ -66,7 +74,9 @@ class HuggingFaceTokenizer(Model):
|
|
66
74
|
)
|
67
75
|
self.tokenizer = tokenizer
|
68
76
|
|
69
|
-
def _collect_assets(
|
77
|
+
def _collect_assets(
|
78
|
+
self, weight_manager: Optional[AssetManager] = None
|
79
|
+
) -> Dict[str, TextIO | BinaryIO | Path]:
|
70
80
|
with tempfile.TemporaryDirectory() as tmpdir:
|
71
81
|
self.tokenizer.save_pretrained(tmpdir)
|
72
82
|
paths = [Path(f) for f in glob.glob(tmpdir + "/*")]
|
@@ -74,7 +84,6 @@ class HuggingFaceTokenizer(Model):
|
|
74
84
|
|
75
85
|
|
76
86
|
class HuggingFaceGenerationConfig(GenerationConfig):
|
77
|
-
|
78
87
|
def __init__(self, model: PreTrainedModel):
|
79
88
|
config = model.generation_config
|
80
89
|
eos_tokens = config.eos_token_id
|
@@ -239,25 +248,34 @@ class HuggingFaceTextGenerationPipeline:
|
|
239
248
|
|
240
249
|
|
241
250
|
class HuggingFaceImageProcessor:
|
242
|
-
|
243
251
|
def __init__(
|
244
252
|
self,
|
245
253
|
model: PreTrainedModel,
|
246
254
|
processor: BaseImageProcessor,
|
255
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
247
256
|
):
|
248
257
|
self.model = model
|
249
258
|
self.processor = processor
|
259
|
+
self.image_size_override = image_size_override
|
250
260
|
|
251
261
|
def input_spec(self) -> Dict[str, Any]:
|
252
262
|
target_size = None
|
253
263
|
image_preprocessing_steps = []
|
254
|
-
|
255
|
-
hasattr(self.processor, "do_resize")
|
256
|
-
|
257
|
-
|
258
|
-
|
264
|
+
has_do_resize = (
|
265
|
+
hasattr(self.processor, "do_resize") and self.processor.do_resize
|
266
|
+
)
|
267
|
+
has_do_pad = hasattr(self.processor, "do_pad") and self.processor.do_pad
|
268
|
+
# NOTE: it is possible for both do_resize and do_pad to be set, in which case we only use do_resize.
|
269
|
+
if has_do_resize:
|
259
270
|
target_size, preprocessing_step = self._convert_resize()
|
260
271
|
image_preprocessing_steps.append(preprocessing_step)
|
272
|
+
elif has_do_pad:
|
273
|
+
target_size, preprocessing_step = self._convert_pad()
|
274
|
+
image_preprocessing_steps.append(preprocessing_step)
|
275
|
+
else:
|
276
|
+
raise ValueError(
|
277
|
+
"could not determine target size for resize from model config"
|
278
|
+
)
|
261
279
|
|
262
280
|
if (
|
263
281
|
hasattr(self.processor, "do_rescale")
|
@@ -288,7 +306,7 @@ class HuggingFaceImageProcessor:
|
|
288
306
|
# TODO(james): figure out if this is specified anywhere in the huggingface pipeline.
|
289
307
|
channel_format = "rgb"
|
290
308
|
|
291
|
-
dimensions = [
|
309
|
+
dimensions: list[DimensionSemantics] = [
|
292
310
|
BatchDimension(),
|
293
311
|
]
|
294
312
|
input_shape = [1]
|
@@ -339,21 +357,38 @@ class HuggingFaceImageProcessor:
|
|
339
357
|
"class_labels": labels,
|
340
358
|
}
|
341
359
|
|
342
|
-
def
|
360
|
+
def output_spec_depth(self) -> Dict[str, Any]:
|
361
|
+
dimensions = [
|
362
|
+
BatchDimension(),
|
363
|
+
ImageHeightDimension(),
|
364
|
+
ImageWidthDimension(),
|
365
|
+
]
|
366
|
+
output_tensor_semantics = [
|
367
|
+
TensorSemantics(dimensions),
|
368
|
+
]
|
369
|
+
return {
|
370
|
+
"output_tensor_semantics": output_tensor_semantics,
|
371
|
+
}
|
372
|
+
|
373
|
+
def output_spec_object_detection(self, zero_shot=False) -> Dict[str, Any]:
|
343
374
|
if not hasattr(self.processor, "post_process_object_detection"):
|
344
375
|
raise NotImplementedError(
|
345
|
-
"
|
376
|
+
"processor must have post_process_object_detection set"
|
346
377
|
)
|
347
378
|
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
379
|
+
if zero_shot:
|
380
|
+
num_classes = -1
|
381
|
+
labels = []
|
382
|
+
else:
|
383
|
+
id_to_label = self.model.config.id2label
|
384
|
+
max_id = max(id_to_label)
|
385
|
+
labels = []
|
386
|
+
for i in range(max_id):
|
387
|
+
if i not in id_to_label:
|
388
|
+
labels.append("")
|
389
|
+
continue
|
390
|
+
labels.append(id_to_label[i])
|
391
|
+
num_classes = max_id + 1
|
357
392
|
|
358
393
|
# TODO(james): verify assumptions made here apply broadly.
|
359
394
|
output_tensor_semantics = []
|
@@ -363,7 +398,7 @@ class HuggingFaceImageProcessor:
|
|
363
398
|
DetectionNumCandidatesDimension(is_nms=False),
|
364
399
|
DetectionOutputDimension(
|
365
400
|
scores_range=(0, num_classes),
|
366
|
-
scores_are_logits=
|
401
|
+
scores_are_logits=not zero_shot,
|
367
402
|
),
|
368
403
|
]
|
369
404
|
output_tensor_semantics.append(TensorSemantics(logits_dimensions))
|
@@ -382,12 +417,45 @@ class HuggingFaceImageProcessor:
|
|
382
417
|
"class_labels": labels,
|
383
418
|
}
|
384
419
|
|
420
|
+
def _get_size(self) -> Dict[str, int]:
|
421
|
+
size = None
|
422
|
+
if self.image_size_override:
|
423
|
+
size = {
|
424
|
+
"height": self.image_size_override[0],
|
425
|
+
"width": self.image_size_override[1],
|
426
|
+
}
|
427
|
+
elif hasattr(self.processor, "size") and self.processor.size is not None:
|
428
|
+
size = self.processor.size
|
429
|
+
elif (
|
430
|
+
hasattr(self.model.config, "image_size")
|
431
|
+
and self.model.config.image_size is not None
|
432
|
+
):
|
433
|
+
size = {
|
434
|
+
"height": self.model.config.image_size,
|
435
|
+
"width": self.model.config.image_size,
|
436
|
+
}
|
437
|
+
else:
|
438
|
+
warnings.warn(
|
439
|
+
f"using fallback resize size of {FALLBACK_RESIZE_SIZE} for model",
|
440
|
+
stacklevel=1,
|
441
|
+
)
|
442
|
+
size = {
|
443
|
+
"width": FALLBACK_RESIZE_SIZE,
|
444
|
+
"height": FALLBACK_RESIZE_SIZE,
|
445
|
+
}
|
446
|
+
return size
|
447
|
+
|
385
448
|
def _convert_resize(self) -> Tuple[Tuple[int, int], ImagePreprocessingStep]:
|
386
|
-
size = self.
|
449
|
+
size = self._get_size()
|
450
|
+
size_divisor: int | None = None
|
451
|
+
if hasattr(self.processor, "size_divisor"):
|
452
|
+
size_divisor = self.processor.size_divisor
|
453
|
+
|
387
454
|
target_size = None
|
388
455
|
preprocess_step = None
|
456
|
+
|
389
457
|
if "height" in size and "width" in size:
|
390
|
-
target_size =
|
458
|
+
target_size = (size["height"], size["width"])
|
391
459
|
preprocess_step = ResizeImage()
|
392
460
|
elif (
|
393
461
|
"shortest_edge" in size
|
@@ -407,12 +475,55 @@ class HuggingFaceImageProcessor:
|
|
407
475
|
if not min_size or edge_size < min_size:
|
408
476
|
min_size = edge_size
|
409
477
|
|
410
|
-
|
478
|
+
if min_size is None:
|
479
|
+
raise ValueError(
|
480
|
+
"could not determine target size for resize from model config"
|
481
|
+
)
|
482
|
+
target_size = (min_size, min_size)
|
483
|
+
preprocess_step = LetterboxImage()
|
484
|
+
else:
|
485
|
+
raise ValueError(
|
486
|
+
"could not determine target size for resize from model config"
|
487
|
+
)
|
488
|
+
if size_divisor:
|
489
|
+
target_size = (
|
490
|
+
math.ceil(target_size[0] / size_divisor) * size_divisor,
|
491
|
+
math.ceil(target_size[1] / size_divisor) * size_divisor,
|
492
|
+
)
|
493
|
+
return target_size, preprocess_step
|
494
|
+
|
495
|
+
def _convert_pad(self) -> Tuple[Tuple[int, int], ImagePreprocessingStep]:
|
496
|
+
# NOTE: There is a wide variety of ways that huggingface pads images.
|
497
|
+
# We found at least 3 different ways to pad images in the codebase:
|
498
|
+
# 1. Center pad (pad top,left, bottom, right) to match target size
|
499
|
+
# https://github.com/huggingface/transformers/blob/70b07d97cf2c5f61fff55700b65528a1b6845cd2/src/transformers/models/dpt/image_processing_dpt.py#L231
|
500
|
+
# 2. Right/Top pad (pad top, and right) to match target size
|
501
|
+
# https://github.com/huggingface/transformers/blob/174890280b340b89c5bfa092f6b4fb0e2dc2d7fc/src/transformers/models/conditional_detr/image_processing_conditional_detr.py#L846
|
502
|
+
# 3. Pad to nearest multiple of size_divisor
|
503
|
+
# https://github.com/huggingface/transformers/blob/70b07d97cf2c5f61fff55700b65528a1b6845cd2/src/transformers/models/llava_onevision/image_processing_llava_onevision.py#L177-179
|
504
|
+
#
|
505
|
+
# We decided to simply implement padding with LetterBoxImage(),
|
506
|
+
# because we assume the models won't be that sensitive to the type of padding,
|
507
|
+
# but this may need to be revisited in the future.
|
508
|
+
size = self._get_size()
|
509
|
+
size_divisor: int | None = None
|
510
|
+
if hasattr(self.processor, "size_divisor"):
|
511
|
+
size_divisor = self.processor.size_divisor
|
512
|
+
|
513
|
+
target_size = None
|
514
|
+
preprocess_step = None
|
515
|
+
if "height" in size and "width" in size:
|
516
|
+
target_size = (size["height"], size["width"])
|
411
517
|
preprocess_step = LetterboxImage()
|
412
518
|
else:
|
413
519
|
raise ValueError(
|
414
520
|
"could not determine target size for resize from model config"
|
415
521
|
)
|
522
|
+
if size_divisor:
|
523
|
+
target_size = (
|
524
|
+
math.ceil(target_size[0] / size_divisor) * size_divisor,
|
525
|
+
math.ceil(target_size[1] / size_divisor) * size_divisor,
|
526
|
+
)
|
416
527
|
return target_size, preprocess_step
|
417
528
|
|
418
529
|
|
@@ -421,11 +532,13 @@ class HuggingFaceImageSegmentationPipeline:
|
|
421
532
|
self,
|
422
533
|
pipeline: Pipeline,
|
423
534
|
name: Optional[str] = None,
|
535
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
424
536
|
):
|
425
537
|
self.pipeline = pipeline
|
426
538
|
if name is None:
|
427
539
|
name = pipeline.model.name_or_path
|
428
540
|
|
541
|
+
self.image_size_override = image_size_override
|
429
542
|
self.model = TorchModel(
|
430
543
|
name,
|
431
544
|
torch_module=self.pipeline.model,
|
@@ -443,7 +556,9 @@ class HuggingFaceImageSegmentationPipeline:
|
|
443
556
|
)
|
444
557
|
|
445
558
|
image_processor = HuggingFaceImageProcessor(
|
446
|
-
self.pipeline.model,
|
559
|
+
self.pipeline.model,
|
560
|
+
self.pipeline.image_processor,
|
561
|
+
image_size_override=self.image_size_override,
|
447
562
|
)
|
448
563
|
spec = image_processor.input_spec()
|
449
564
|
spec.update(image_processor.output_spec_segmentation())
|
@@ -468,11 +583,13 @@ class HuggingFaceObjectDetectionPipeline:
|
|
468
583
|
self,
|
469
584
|
pipeline: Pipeline,
|
470
585
|
name: Optional[str] = None,
|
586
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
471
587
|
):
|
472
588
|
self.pipeline = pipeline
|
473
589
|
if name is None:
|
474
590
|
name = pipeline.model.name_or_path
|
475
591
|
|
592
|
+
self.image_size_override = image_size_override
|
476
593
|
self.model = TorchModel(
|
477
594
|
name,
|
478
595
|
torch_module=ObjectDetectionWrapper(self.pipeline.model),
|
@@ -490,7 +607,9 @@ class HuggingFaceObjectDetectionPipeline:
|
|
490
607
|
)
|
491
608
|
|
492
609
|
image_processor = HuggingFaceImageProcessor(
|
493
|
-
self.pipeline.model,
|
610
|
+
self.pipeline.model,
|
611
|
+
self.pipeline.image_processor,
|
612
|
+
image_size_override=self.image_size_override,
|
494
613
|
)
|
495
614
|
spec = image_processor.input_spec()
|
496
615
|
spec.update(image_processor.output_spec_object_detection())
|
@@ -500,6 +619,141 @@ class HuggingFaceObjectDetectionPipeline:
|
|
500
619
|
return [self.model]
|
501
620
|
|
502
621
|
|
622
|
+
class ZeroShotObjectDetectionWrapper(torch.nn.Module):
|
623
|
+
def __init__(self, model: PreTrainedModel):
|
624
|
+
super().__init__()
|
625
|
+
self.model = model
|
626
|
+
|
627
|
+
def forward(self, image, tokens, attention_mask):
|
628
|
+
outputs = self.model(
|
629
|
+
input_ids=tokens, pixel_values=image, attention_mask=attention_mask
|
630
|
+
)
|
631
|
+
return torch.sigmoid(outputs.logits), outputs.pred_boxes
|
632
|
+
|
633
|
+
|
634
|
+
class HuggingFaceZeroShotObjectDetectionPipeline:
|
635
|
+
def __init__(
|
636
|
+
self,
|
637
|
+
pipeline: Pipeline,
|
638
|
+
name: Optional[str] = None,
|
639
|
+
tokenizer_name: Optional[str] = None,
|
640
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
641
|
+
):
|
642
|
+
self.pipeline = pipeline
|
643
|
+
if name is None:
|
644
|
+
name = pipeline.model.name_or_path
|
645
|
+
|
646
|
+
self.tokenizer_model = HuggingFaceTokenizer(
|
647
|
+
self.pipeline.tokenizer, tokenizer_name
|
648
|
+
)
|
649
|
+
|
650
|
+
self.image_size_override = image_size_override
|
651
|
+
self.detection_model = TorchModel(
|
652
|
+
name,
|
653
|
+
torch_module=ZeroShotObjectDetectionWrapper(self.pipeline.model),
|
654
|
+
**self._guess_model_spec(),
|
655
|
+
)
|
656
|
+
|
657
|
+
def _add_zero_shot_inputs(self, spec: Dict):
|
658
|
+
example_inputs = spec["example_inputs"]
|
659
|
+
if "dynamic_shapes" not in spec:
|
660
|
+
spec["dynamic_shapes"] = [{} for _ in example_inputs]
|
661
|
+
|
662
|
+
max_length = self.pipeline.model.config.text_config.max_length
|
663
|
+
example_inputs.extend(
|
664
|
+
[
|
665
|
+
torch.randint(200, [2, max_length]).to(torch.int32),
|
666
|
+
torch.ones([2, max_length]).to(torch.int32),
|
667
|
+
]
|
668
|
+
)
|
669
|
+
|
670
|
+
input_tensor_semantics = spec["input_tensor_semantics"]
|
671
|
+
input_tensor_semantics.extend(
|
672
|
+
[
|
673
|
+
TensorSemantics(
|
674
|
+
[
|
675
|
+
BatchDimension(),
|
676
|
+
TokensDimension(),
|
677
|
+
]
|
678
|
+
),
|
679
|
+
TensorSemantics(
|
680
|
+
[
|
681
|
+
BatchDimension(),
|
682
|
+
AttentionMaskDimension(),
|
683
|
+
]
|
684
|
+
),
|
685
|
+
]
|
686
|
+
)
|
687
|
+
|
688
|
+
spec["dynamic_shapes"].extend(
|
689
|
+
[
|
690
|
+
{0: "num_labels"},
|
691
|
+
{0: "num_labels"},
|
692
|
+
]
|
693
|
+
)
|
694
|
+
|
695
|
+
def _guess_model_spec(self) -> Dict:
|
696
|
+
if self.pipeline.image_processor is None:
|
697
|
+
raise ValueError(
|
698
|
+
"Could not determine image preprocessing for pipeline with image_processor=None"
|
699
|
+
)
|
700
|
+
|
701
|
+
image_processor = HuggingFaceImageProcessor(
|
702
|
+
self.pipeline.model,
|
703
|
+
self.pipeline.image_processor,
|
704
|
+
image_size_override=self.image_size_override,
|
705
|
+
)
|
706
|
+
spec = image_processor.input_spec()
|
707
|
+
self._add_zero_shot_inputs(spec)
|
708
|
+
spec.update(image_processor.output_spec_object_detection(zero_shot=True))
|
709
|
+
return spec
|
710
|
+
|
711
|
+
def models(self) -> List[Model]:
|
712
|
+
return [self.detection_model, self.tokenizer_model]
|
713
|
+
|
714
|
+
|
715
|
+
class HuggingFaceDepthEstimationPipeline:
|
716
|
+
def __init__(
|
717
|
+
self,
|
718
|
+
pipeline: Pipeline,
|
719
|
+
name: Optional[str] = None,
|
720
|
+
image_size_override: Optional[Tuple[int, int]] = None,
|
721
|
+
):
|
722
|
+
self.pipeline = pipeline
|
723
|
+
if name is None:
|
724
|
+
name = pipeline.model.name_or_path
|
725
|
+
|
726
|
+
self.image_size_override = image_size_override
|
727
|
+
|
728
|
+
self.model = TorchModel(
|
729
|
+
name,
|
730
|
+
torch_module=self.pipeline.model,
|
731
|
+
**self._guess_model_spec(),
|
732
|
+
)
|
733
|
+
|
734
|
+
def _guess_model_spec(self) -> Dict:
|
735
|
+
if self.pipeline.image_processor is None:
|
736
|
+
raise ValueError(
|
737
|
+
"Could not determine image preprocessing for pipeline with image_processor=None"
|
738
|
+
)
|
739
|
+
if self.pipeline.tokenizer is not None:
|
740
|
+
raise NotImplementedError(
|
741
|
+
"HuggingFaceDepthEstimationPipeline does not yet support token inputs"
|
742
|
+
)
|
743
|
+
|
744
|
+
image_processor = HuggingFaceImageProcessor(
|
745
|
+
self.pipeline.model,
|
746
|
+
self.pipeline.image_processor,
|
747
|
+
image_size_override=self.image_size_override,
|
748
|
+
)
|
749
|
+
spec = image_processor.input_spec()
|
750
|
+
spec.update(image_processor.output_spec_depth())
|
751
|
+
return spec
|
752
|
+
|
753
|
+
def models(self) -> List[Model]:
|
754
|
+
return [self.model]
|
755
|
+
|
756
|
+
|
503
757
|
def import_huggingface_pipeline(pipeline: Pipeline, **kwargs) -> List[Model]:
|
504
758
|
if pipeline.framework != "pt":
|
505
759
|
raise ValueError(
|
@@ -514,8 +768,19 @@ def import_huggingface_pipeline(pipeline: Pipeline, **kwargs) -> List[Model]:
|
|
514
768
|
return HuggingFaceImageSegmentationPipeline(pipeline, **kwargs).models()
|
515
769
|
elif pipeline.task == "object-detection":
|
516
770
|
return HuggingFaceObjectDetectionPipeline(pipeline, **kwargs).models()
|
771
|
+
elif pipeline.task == "zero-shot-object-detection":
|
772
|
+
return HuggingFaceZeroShotObjectDetectionPipeline(pipeline, **kwargs).models()
|
773
|
+
elif pipeline.task == "depth-estimation":
|
774
|
+
return HuggingFaceDepthEstimationPipeline(pipeline, **kwargs).models()
|
517
775
|
raise ValueError(
|
518
776
|
"unimplemented: hugging face pipeline task: {} (supported tasks: [{}])".format(
|
519
|
-
pipeline.task,
|
777
|
+
pipeline.task,
|
778
|
+
[
|
779
|
+
"text-generation",
|
780
|
+
"image-segmentation",
|
781
|
+
"object-detection",
|
782
|
+
"zero-shot-object-detection",
|
783
|
+
"depth-estimation",
|
784
|
+
],
|
520
785
|
)
|
521
786
|
)
|
gml/model.py
CHANGED
@@ -21,9 +21,11 @@ import io
|
|
21
21
|
from pathlib import Path
|
22
22
|
from typing import BinaryIO, Dict, List, Literal, Optional, Sequence, TextIO, Tuple
|
23
23
|
|
24
|
-
import gml.proto.src.api.corepb.v1.model_exec_pb2 as modelexecpb
|
25
24
|
import torch
|
26
|
-
|
25
|
+
|
26
|
+
import gml.proto.src.api.corepb.v1.model_exec_pb2 as modelexecpb
|
27
|
+
from gml.asset_manager import AssetManager, TempFileAssetManager
|
28
|
+
from gml.compile import to_torch_mlir
|
27
29
|
from gml.preprocessing import ImagePreprocessingStep
|
28
30
|
from gml.tensor import TensorSemantics
|
29
31
|
|
@@ -90,12 +92,14 @@ class Model(abc.ABC):
|
|
90
92
|
)
|
91
93
|
|
92
94
|
@abc.abstractmethod
|
93
|
-
def _collect_assets(
|
95
|
+
def _collect_assets(
|
96
|
+
self, weight_manager: Optional[AssetManager] = None
|
97
|
+
) -> Dict[str, TextIO | BinaryIO | Path]:
|
94
98
|
pass
|
95
99
|
|
96
100
|
@contextlib.contextmanager
|
97
|
-
def collect_assets(self):
|
98
|
-
yield from self._collect_assets()
|
101
|
+
def collect_assets(self, weight_manager: Optional[AssetManager] = None):
|
102
|
+
yield from self._collect_assets(weight_manager)
|
99
103
|
|
100
104
|
|
101
105
|
class TorchModel(Model):
|
@@ -111,7 +115,7 @@ class TorchModel(Model):
|
|
111
115
|
):
|
112
116
|
super().__init__(
|
113
117
|
name,
|
114
|
-
|
118
|
+
modelexecpb.ModelInfo.MODEL_KIND_TORCH,
|
115
119
|
modelexecpb.ModelInfo.MODEL_STORAGE_FORMAT_MLIR_TEXT,
|
116
120
|
**kwargs,
|
117
121
|
)
|
@@ -130,17 +134,27 @@ class TorchModel(Model):
|
|
130
134
|
for shape, dtype in zip(self.input_shapes, self.input_dtypes)
|
131
135
|
]
|
132
136
|
|
133
|
-
def _convert_to_torch_mlir(self):
|
137
|
+
def _convert_to_torch_mlir(self, weight_manager: Optional[AssetManager] = None):
|
134
138
|
return to_torch_mlir(
|
135
139
|
self.torch_module,
|
136
140
|
self.example_inputs,
|
137
141
|
self.dynamic_shapes,
|
142
|
+
weight_manager=weight_manager,
|
138
143
|
)
|
139
144
|
|
140
|
-
def _collect_assets(
|
141
|
-
|
142
|
-
|
143
|
-
|
145
|
+
def _collect_assets(
|
146
|
+
self, weight_manager: Optional[AssetManager] = None
|
147
|
+
) -> Dict[str, TextIO | BinaryIO | Path]:
|
148
|
+
if weight_manager is None:
|
149
|
+
# If the user does not provide a weight manager, use temp files.
|
150
|
+
weight_manager = TempFileAssetManager()
|
151
|
+
|
152
|
+
with weight_manager as weight_mgr:
|
153
|
+
compiled = self._convert_to_torch_mlir(weight_mgr)
|
154
|
+
file = io.BytesIO(str(compiled).encode("utf-8"))
|
155
|
+
assets = {"": file}
|
156
|
+
assets.update(weight_mgr.assets())
|
157
|
+
yield assets
|
144
158
|
|
145
159
|
|
146
160
|
def _kind_str_to_kind_format_protos(
|
@@ -178,5 +192,7 @@ class ModelFromFiles(Model):
|
|
178
192
|
super().__init__(name=name, kind=kind, storage_format=storage_format, **kwargs)
|
179
193
|
self.files = files
|
180
194
|
|
181
|
-
def _collect_assets(
|
195
|
+
def _collect_assets(
|
196
|
+
self, weight_manager: Optional[AssetManager] = None
|
197
|
+
) -> Dict[str, TextIO | BinaryIO | Path]:
|
182
198
|
yield self.files
|