kmodels 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kmodels-0.2.2 → kmodels-0.2.4}/PKG-INFO +8 -2
- {kmodels-0.2.2 → kmodels-0.2.4}/README.md +7 -1
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/__init__.py +1 -1
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/__init__.py +3 -0
- kmodels-0.2.4/kmodels/models/deeplabv3/__init__.py +8 -0
- kmodels-0.2.4/kmodels/models/deeplabv3/deeplabv3_image_processor.py +186 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/detr/detr_image_processor.py +8 -5
- kmodels-0.2.4/kmodels/models/dfine/__init__.py +12 -0
- kmodels-0.2.4/kmodels/models/dfine/config.py +105 -0
- kmodels-0.2.4/kmodels/models/dfine/convert_dfine_hf_to_keras.py +509 -0
- kmodels-0.2.4/kmodels/models/dfine/dfine_image_processor.py +242 -0
- kmodels-0.2.4/kmodels/models/dfine/dfine_layers.py +692 -0
- kmodels-0.2.4/kmodels/models/dfine/dfine_model.py +1679 -0
- kmodels-0.2.4/kmodels/models/maxvit/__init__.py +7 -0
- kmodels-0.2.4/kmodels/models/maxvit/config.py +108 -0
- kmodels-0.2.4/kmodels/models/maxvit/convert_maxvit_timm_to_keras.py +173 -0
- kmodels-0.2.4/kmodels/models/maxvit/maxvit_layers.py +503 -0
- kmodels-0.2.4/kmodels/models/maxvit/maxvit_model.py +711 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rf_detr/rf_detr_image_processor.py +7 -4
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rt_detr/rt_detr_image_processor.py +7 -4
- kmodels-0.2.4/kmodels/models/sam3/__init__.py +6 -0
- kmodels-0.2.4/kmodels/models/sam3/config.py +44 -0
- kmodels-0.2.4/kmodels/models/sam3/convert_sam3_hf_to_keras.py +366 -0
- kmodels-0.2.4/kmodels/models/sam3/sam3_clip_tokenizer.py +205 -0
- kmodels-0.2.4/kmodels/models/sam3/sam3_downstream.py +549 -0
- kmodels-0.2.4/kmodels/models/sam3/sam3_layers.py +1249 -0
- kmodels-0.2.4/kmodels/models/sam3/sam3_model.py +1510 -0
- kmodels-0.2.4/kmodels/models/sam3/sam3_processor.py +557 -0
- kmodels-0.2.4/kmodels/models/sam3/sam3_utils.py +384 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/segformer/__init__.py +3 -0
- kmodels-0.2.4/kmodels/models/segformer/segformer_image_preprocessor.py +363 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/swin/swin_model.py +4 -3
- kmodels-0.2.4/kmodels/models/swinv2/__init__.py +10 -0
- kmodels-0.2.4/kmodels/models/swinv2/config.py +117 -0
- kmodels-0.2.4/kmodels/models/swinv2/convert_swinv2_torch_to_keras.py +268 -0
- kmodels-0.2.4/kmodels/models/swinv2/swinv2_layers.py +636 -0
- kmodels-0.2.4/kmodels/models/swinv2/swinv2_model.py +889 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/utils/model_equivalence_tester.py +5 -5
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/version.py +1 -1
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels.egg-info/PKG-INFO +8 -2
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels.egg-info/SOURCES.txt +26 -0
- kmodels-0.2.2/kmodels/models/deeplabv3/__init__.py +0 -4
- kmodels-0.2.2/kmodels/models/segformer/segformer_image_preprocessor.py +0 -121
- {kmodels-0.2.2 → kmodels-0.2.4}/LICENSE +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/_test_runner.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/layers/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/layers/image_normalization.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/layers/layer_scale.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/layers/stochastic_depth.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/model_registry.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/cait/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/cait/cait_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/cait/cait_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/cait/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/cait/convert_cait_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/clip/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/clip/clip_image_processor.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/clip/clip_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/clip/clip_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/clip/clip_processor.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/clip/clip_tokenizer.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/clip/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/clip/convert_clip_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convmixer/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convmixer/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convmixer/convert_convmixer_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convmixer/convmixer_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convnext/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convnext/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convnext/convert_convnext_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convnext/convnext_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convnext/convnext_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convnextv2/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convnextv2/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convnextv2/convert_convnextv2_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/convnextv2/convnextv2_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/deeplabv3/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/deeplabv3/convert_deeplabv3_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/deeplabv3/deeplabv3_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/deit/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/deit/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/deit/convert_deit_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/deit/deit_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/densenet/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/densenet/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/densenet/convert_densenet_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/densenet/densenet_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/detr/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/detr/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/detr/convert_detr_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/detr/detr_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/detr/detr_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientformer/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientformer/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientformer/convert_efficientformer_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientformer/efficientformer_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientformer/efficientformer_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnet/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnet/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnet/convert_efficientnet_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnet/efficientnet_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnet_lite/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnet_lite/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnet_lite/convert_efficientnet_lite_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnet_lite/efficientnet_lite_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnetv2/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnetv2/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnetv2/convert_efficientnetv2_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/efficientnetv2/efficientnetv2_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/eomt/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/eomt/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/eomt/convert_eomt_hf_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/eomt/eomt_image_processor.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/eomt/eomt_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/eomt/eomt_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/flexivit/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/flexivit/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/flexivit/convert_flexivit_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/flexivit/flexivit_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inception_next/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inception_next/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inception_next/convert_inception_next_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inception_next/inception_next_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inception_resnetv2/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inception_resnetv2/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inception_resnetv2/convert_inceptionresnetv2_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inception_resnetv2/inceptionresnetv2_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inceptionv3/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inceptionv3/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inceptionv3/convert_inceptionv3_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inceptionv3/inceptionv3_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inceptionv4/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inceptionv4/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inceptionv4/convert_inceptionv4_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/inceptionv4/inceptionv4_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mit/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mit/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mit/convert_mit_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mit/mit_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mit/mit_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mlp_mixer/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mlp_mixer/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mlp_mixer/convert_mlpmixer_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mlp_mixer/mlp_mixer_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilenetv2/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilenetv2/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilenetv2/convert_mobilenetv2_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilenetv2/mobilenetv2_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilenetv3/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilenetv3/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilenetv3/convert_mobilenetv3_keras_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilenetv3/mobilenetv3_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilevit/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilevit/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilevit/convert_mobilevit_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilevit/mobilevit_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilevit/mobilevit_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilevitv2/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilevitv2/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilevitv2/convert_mobilevitv2_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/mobilevitv2/mobilevitv2_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/nextvit/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/nextvit/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/nextvit/convert_nextvit_timm_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/nextvit/nextvit_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/nextvit/nextvit_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/pit/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/pit/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/pit/convert_pit_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/pit/pit_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/poolformer/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/poolformer/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/poolformer/convert_poolformer_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/poolformer/poolformer_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/res2net/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/res2net/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/res2net/convert_res2net_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/res2net/res2net_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resmlp/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resmlp/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resmlp/convert_resmlp_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resmlp/resmlp_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resmlp/resmlp_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnet/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnet/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnet/convert_resnet_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnet/resnet_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnetv2/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnetv2/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnetv2/convert_resnetv2_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnetv2/resnetv2_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnetv2/resnetv2_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnext/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnext/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnext/convert_resnext_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/resnext/resnext_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rf_detr/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rf_detr/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rf_detr/convert_rf_detr_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rf_detr/rf_detr_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rf_detr/rf_detr_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rt_detr/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rt_detr/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rt_detr/convert_rt_detr_hf_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rt_detr/rt_detr_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/rt_detr/rt_detr_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam/convert_sam_hf_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam/sam_image_processor.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam/sam_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam/sam_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam2/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam2/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam2/convert_sam2_hf_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam2/sam2_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/sam2/sam2_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/segformer/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/segformer/convert_segformer_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/segformer/segformer_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/senet/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/senet/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/senet/convert_senet_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/senet/senet_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip/convert_siglip_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip/siglip_image_processor.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip/siglip_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip/siglip_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip/siglip_processor.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip/siglip_tokenizer.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip2/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip2/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip2/convert_siglip2_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip2/siglip2_image_processor.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip2/siglip2_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip2/siglip2_processor.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/siglip2/siglip2_tokenizer.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/swin/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/swin/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/swin/convert_swin_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/swin/swin_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/vgg/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/vgg/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/vgg/convert_vgg_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/vgg/vgg_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/vit/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/vit/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/vit/convert_vit_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/vit/vit_layers.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/vit/vit_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/xception/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/xception/config.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/xception/convert_xception_org_keras_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/models/xception/xception_model.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/utils/__init__.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/utils/custom_exception.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/utils/file_downloader.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/utils/model_weights_util.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/utils/weight_split_torch_and_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels/utils/weight_transfer_torch_to_keras.py +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels.egg-info/dependency_links.txt +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels.egg-info/entry_points.txt +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels.egg-info/requires.txt +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/kmodels.egg-info/top_level.txt +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/pyproject.toml +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/setup.cfg +0 -0
- {kmodels-0.2.2 → kmodels-0.2.4}/tests/test_modelling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kmodels
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Pretrained keras 3 vision models
|
|
5
5
|
Author-email: Gitesh Chawda <gitesh.ch.0912@gmail.com>
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -39,7 +39,7 @@ Dynamic: license-file
|
|
|
39
39
|
|
|
40
40
|
## 📖 Introduction
|
|
41
41
|
|
|
42
|
-
Keras Models (kmodels) is a collection of models with pretrained weights, built entirely with Keras 3. It supports a range of tasks, including classification, object detection (DETR, RT-DETR, RF-DETR), segmentation (SAM, SAM2, SegFormer, DeepLabV3, EoMT), vision-language modeling (CLIP, SigLIP, SigLIP2), and more. kmodels includes custom layers and backbone support, providing flexibility and efficiency across various applications. For backbones, there are various weight variants like `in1k`, `in21k`, `fb_dist_in1k`, `ms_in22k`, `fb_in22k_ft_in1k`, `ns_jft_in1k`, `aa_in1k`, `cvnets_in1k`, `augreg_in21k_ft_in1k`, `augreg_in21k`, and many more.
|
|
42
|
+
Keras Models (kmodels) is a collection of models with pretrained weights, built entirely with Keras 3. It supports a range of tasks, including classification, object detection (DETR, RT-DETR, RF-DETR, D-FINE), segmentation (SAM, SAM2, SAM3, SegFormer, DeepLabV3, EoMT), vision-language modeling (CLIP, SigLIP, SigLIP2), and more. It includes hybrid architectures like MaxViT alongside traditional CNNs and pure transformers. kmodels includes custom layers and backbone support, providing flexibility and efficiency across various applications. For backbones, there are various weight variants like `in1k`, `in21k`, `fb_dist_in1k`, `ms_in22k`, `fb_in22k_ft_in1k`, `ns_jft_in1k`, `aa_in1k`, `cvnets_in1k`, `augreg_in21k_ft_in1k`, `augreg_in21k`, and many more.
|
|
43
43
|
|
|
44
44
|
## ⚡ Installation
|
|
45
45
|
|
|
@@ -67,6 +67,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
67
67
|
|-------|-------------|
|
|
68
68
|
| [SAM](docs/sam.md) | Segment Anything Model — promptable segmentation with points, boxes, or masks (ViT-B/L/H) |
|
|
69
69
|
| [SAM2](docs/sam2.md) | Segment Anything Model 2 — next generation of promptable visual segmentation (Hiera Tiny/Small/Base+/Large) |
|
|
70
|
+
| [SAM3](docs/sam3.md) | Segment Anything Model 3 — open-vocabulary detection + segmentation with CLIP text encoder (ViT-L/14) |
|
|
70
71
|
| [SegFormer](docs/segformer.md) | Transformer-based semantic segmentation with MLP decoder, Cityscapes & ADE20K weights |
|
|
71
72
|
| [DeepLabV3](docs/deeplabv3.md) | Atrous convolution-based semantic segmentation |
|
|
72
73
|
| [EoMT](docs/eomt.md) | Encoder-only Mask Transformer for panoptic segmentation |
|
|
@@ -78,6 +79,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
78
79
|
| [DETR](docs/detr.md) | End-to-end object detection with Transformers (ResNet-50/101 backbones) |
|
|
79
80
|
| [RT-DETR](docs/rt_detr.md) | Real-time DETR with ResNet-vd backbone and hybrid encoder (ResNet-18/34/50/101 variants) |
|
|
80
81
|
| [RF-DETR](docs/rf_detr.md) | Real-time detection transformer (Nano, Small, Medium, Base, Large variants) |
|
|
82
|
+
| [D-FINE](docs/dfine.md) | Fine-grained distribution refinement detector with HGNetV2 backbone (Nano/Small/Medium/Large/XLarge) |
|
|
81
83
|
|
|
82
84
|
**Vision-Language Models**
|
|
83
85
|
|
|
@@ -107,6 +109,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
107
109
|
| Inception-ResNet-v2 | [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261) | `timm` |
|
|
108
110
|
| Inception-v3 | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | `timm` |
|
|
109
111
|
| Inception-v4 | [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261) | `timm` |
|
|
112
|
+
| MaxViT | [MaxViT: Multi-Axis Vision Transformer](https://arxiv.org/abs/2204.01697) | `timm` |
|
|
110
113
|
| MiT | [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) | `transformers` |
|
|
111
114
|
| MLP-Mixer | [MLP-Mixer: An all-MLP Architecture for Vision](https://arxiv.org/abs/2105.01601) | `timm` |
|
|
112
115
|
| MobileNetV2 | [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) | `timm` |
|
|
@@ -123,6 +126,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
123
126
|
| ResNeXt | [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431) | `timm` |
|
|
124
127
|
| SENet | [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507) | `timm` |
|
|
125
128
|
| Swin Transformer | [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) | `timm` |
|
|
129
|
+
| Swin Transformer V2 | [Swin Transformer V2: Scaling Up Capacity and Resolution](https://arxiv.org/abs/2111.09883) | `timm` |
|
|
126
130
|
| VGG | [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) | `timm` |
|
|
127
131
|
| ViT | [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) | `timm` |
|
|
128
132
|
| Xception | [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357) | `keras` |
|
|
@@ -133,6 +137,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
133
137
|
|
|
134
138
|
| 🏷️ Model Name | 📜 Reference Paper | 📦 Source of Weights |
|
|
135
139
|
|---------------|-------------------|---------------------|
|
|
140
|
+
| D-FINE | [D-FINE: Redefine Regression Task of DETRs as Fine-grained Distribution Refinement](https://arxiv.org/abs/2410.13842) | `transformers` |
|
|
136
141
|
| DETR | [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) | `transformers`|
|
|
137
142
|
| RT-DETR | [DETRs Beat YOLOs on Real-time Object Detection](https://arxiv.org/abs/2304.08069) | `transformers` |
|
|
138
143
|
| RF-DETR | [RF-DETR: Real-Time Detection Transformer](https://arxiv.org/abs/2502.18860) | `rfdetr` |
|
|
@@ -147,6 +152,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
147
152
|
| EoMT | [Encoder-only Mask Transformer for Panoptic Segmentation](https://arxiv.org/abs/2504.07957) | `transformers` |
|
|
148
153
|
| SAM | [Segment Anything](https://arxiv.org/abs/2304.02643) | `transformers` |
|
|
149
154
|
| SAM2 | [SAM 2: Segment Anything in Images and Videos](https://arxiv.org/abs/2408.00714) | `transformers` |
|
|
155
|
+
| SAM3 | [SAM 3](https://arxiv.org/abs/2506.09011) | `transformers` |
|
|
150
156
|
| SegFormer | [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) | `transformers`|
|
|
151
157
|
|
|
152
158
|
<br>
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
## 📖 Introduction
|
|
8
8
|
|
|
9
|
-
Keras Models (kmodels) is a collection of models with pretrained weights, built entirely with Keras 3. It supports a range of tasks, including classification, object detection (DETR, RT-DETR, RF-DETR), segmentation (SAM, SAM2, SegFormer, DeepLabV3, EoMT), vision-language modeling (CLIP, SigLIP, SigLIP2), and more. kmodels includes custom layers and backbone support, providing flexibility and efficiency across various applications. For backbones, there are various weight variants like `in1k`, `in21k`, `fb_dist_in1k`, `ms_in22k`, `fb_in22k_ft_in1k`, `ns_jft_in1k`, `aa_in1k`, `cvnets_in1k`, `augreg_in21k_ft_in1k`, `augreg_in21k`, and many more.
|
|
9
|
+
Keras Models (kmodels) is a collection of models with pretrained weights, built entirely with Keras 3. It supports a range of tasks, including classification, object detection (DETR, RT-DETR, RF-DETR, D-FINE), segmentation (SAM, SAM2, SAM3, SegFormer, DeepLabV3, EoMT), vision-language modeling (CLIP, SigLIP, SigLIP2), and more. It includes hybrid architectures like MaxViT alongside traditional CNNs and pure transformers. kmodels includes custom layers and backbone support, providing flexibility and efficiency across various applications. For backbones, there are various weight variants like `in1k`, `in21k`, `fb_dist_in1k`, `ms_in22k`, `fb_in22k_ft_in1k`, `ns_jft_in1k`, `aa_in1k`, `cvnets_in1k`, `augreg_in21k_ft_in1k`, `augreg_in21k`, and many more.
|
|
10
10
|
|
|
11
11
|
## ⚡ Installation
|
|
12
12
|
|
|
@@ -34,6 +34,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
34
34
|
|-------|-------------|
|
|
35
35
|
| [SAM](docs/sam.md) | Segment Anything Model — promptable segmentation with points, boxes, or masks (ViT-B/L/H) |
|
|
36
36
|
| [SAM2](docs/sam2.md) | Segment Anything Model 2 — next generation of promptable visual segmentation (Hiera Tiny/Small/Base+/Large) |
|
|
37
|
+
| [SAM3](docs/sam3.md) | Segment Anything Model 3 — open-vocabulary detection + segmentation with CLIP text encoder (ViT-L/14) |
|
|
37
38
|
| [SegFormer](docs/segformer.md) | Transformer-based semantic segmentation with MLP decoder, Cityscapes & ADE20K weights |
|
|
38
39
|
| [DeepLabV3](docs/deeplabv3.md) | Atrous convolution-based semantic segmentation |
|
|
39
40
|
| [EoMT](docs/eomt.md) | Encoder-only Mask Transformer for panoptic segmentation |
|
|
@@ -45,6 +46,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
45
46
|
| [DETR](docs/detr.md) | End-to-end object detection with Transformers (ResNet-50/101 backbones) |
|
|
46
47
|
| [RT-DETR](docs/rt_detr.md) | Real-time DETR with ResNet-vd backbone and hybrid encoder (ResNet-18/34/50/101 variants) |
|
|
47
48
|
| [RF-DETR](docs/rf_detr.md) | Real-time detection transformer (Nano, Small, Medium, Base, Large variants) |
|
|
49
|
+
| [D-FINE](docs/dfine.md) | Fine-grained distribution refinement detector with HGNetV2 backbone (Nano/Small/Medium/Large/XLarge) |
|
|
48
50
|
|
|
49
51
|
**Vision-Language Models**
|
|
50
52
|
|
|
@@ -74,6 +76,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
74
76
|
| Inception-ResNet-v2 | [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261) | `timm` |
|
|
75
77
|
| Inception-v3 | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | `timm` |
|
|
76
78
|
| Inception-v4 | [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261) | `timm` |
|
|
79
|
+
| MaxViT | [MaxViT: Multi-Axis Vision Transformer](https://arxiv.org/abs/2204.01697) | `timm` |
|
|
77
80
|
| MiT | [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) | `transformers` |
|
|
78
81
|
| MLP-Mixer | [MLP-Mixer: An all-MLP Architecture for Vision](https://arxiv.org/abs/2105.01601) | `timm` |
|
|
79
82
|
| MobileNetV2 | [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) | `timm` |
|
|
@@ -90,6 +93,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
90
93
|
| ResNeXt | [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431) | `timm` |
|
|
91
94
|
| SENet | [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507) | `timm` |
|
|
92
95
|
| Swin Transformer | [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) | `timm` |
|
|
96
|
+
| Swin Transformer V2 | [Swin Transformer V2: Scaling Up Capacity and Resolution](https://arxiv.org/abs/2111.09883) | `timm` |
|
|
93
97
|
| VGG | [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) | `timm` |
|
|
94
98
|
| ViT | [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) | `timm` |
|
|
95
99
|
| Xception | [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357) | `keras` |
|
|
@@ -100,6 +104,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
100
104
|
|
|
101
105
|
| 🏷️ Model Name | 📜 Reference Paper | 📦 Source of Weights |
|
|
102
106
|
|---------------|-------------------|---------------------|
|
|
107
|
+
| D-FINE | [D-FINE: Redefine Regression Task of DETRs as Fine-grained Distribution Refinement](https://arxiv.org/abs/2410.13842) | `transformers` |
|
|
103
108
|
| DETR | [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) | `transformers`|
|
|
104
109
|
| RT-DETR | [DETRs Beat YOLOs on Real-time Object Detection](https://arxiv.org/abs/2304.08069) | `transformers` |
|
|
105
110
|
| RF-DETR | [RF-DETR: Real-Time Detection Transformer](https://arxiv.org/abs/2502.18860) | `rfdetr` |
|
|
@@ -114,6 +119,7 @@ pip install -U git+https://github.com/IMvision12/keras-models
|
|
|
114
119
|
| EoMT | [Encoder-only Mask Transformer for Panoptic Segmentation](https://arxiv.org/abs/2504.07957) | `transformers` |
|
|
115
120
|
| SAM | [Segment Anything](https://arxiv.org/abs/2304.02643) | `transformers` |
|
|
116
121
|
| SAM2 | [SAM 2: Segment Anything in Images and Videos](https://arxiv.org/abs/2408.00714) | `transformers` |
|
|
122
|
+
| SAM3 | [SAM 3](https://arxiv.org/abs/2506.09011) | `transformers` |
|
|
117
123
|
| SegFormer | [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) | `transformers`|
|
|
118
124
|
|
|
119
125
|
<br>
|
|
@@ -8,6 +8,7 @@ from kmodels.models import (
|
|
|
8
8
|
deit,
|
|
9
9
|
densenet,
|
|
10
10
|
detr,
|
|
11
|
+
dfine,
|
|
11
12
|
efficientformer,
|
|
12
13
|
efficientnet,
|
|
13
14
|
efficientnet_lite,
|
|
@@ -18,6 +19,7 @@ from kmodels.models import (
|
|
|
18
19
|
inception_resnetv2,
|
|
19
20
|
inceptionv3,
|
|
20
21
|
inceptionv4,
|
|
22
|
+
maxvit,
|
|
21
23
|
mit,
|
|
22
24
|
mlp_mixer,
|
|
23
25
|
mobilenetv2,
|
|
@@ -41,6 +43,7 @@ from kmodels.models import (
|
|
|
41
43
|
siglip,
|
|
42
44
|
siglip2,
|
|
43
45
|
swin,
|
|
46
|
+
swinv2,
|
|
44
47
|
vgg,
|
|
45
48
|
vit,
|
|
46
49
|
xception,
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Preprocessing and postprocessing for DeepLabV3 semantic segmentation."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
import keras
|
|
6
|
+
import numpy as np
|
|
7
|
+
from PIL import Image
|
|
8
|
+
|
|
9
|
+
VOC_CLASSES = [
|
|
10
|
+
"background",
|
|
11
|
+
"aeroplane",
|
|
12
|
+
"bicycle",
|
|
13
|
+
"bird",
|
|
14
|
+
"boat",
|
|
15
|
+
"bottle",
|
|
16
|
+
"bus",
|
|
17
|
+
"car",
|
|
18
|
+
"cat",
|
|
19
|
+
"chair",
|
|
20
|
+
"cow",
|
|
21
|
+
"dining table",
|
|
22
|
+
"dog",
|
|
23
|
+
"horse",
|
|
24
|
+
"motorbike",
|
|
25
|
+
"person",
|
|
26
|
+
"potted plant",
|
|
27
|
+
"sheep",
|
|
28
|
+
"sofa",
|
|
29
|
+
"train",
|
|
30
|
+
"tv/monitor",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def DeepLabV3ImageProcessor(
|
|
35
|
+
image: Union[str, np.ndarray, "Image.Image"],
|
|
36
|
+
size: Optional[Dict[str, int]] = None,
|
|
37
|
+
resample: str = "bilinear",
|
|
38
|
+
do_rescale: bool = True,
|
|
39
|
+
rescale_factor: float = 1 / 255,
|
|
40
|
+
do_normalize: bool = True,
|
|
41
|
+
image_mean: Optional[Tuple[float, ...]] = None,
|
|
42
|
+
image_std: Optional[Tuple[float, ...]] = None,
|
|
43
|
+
return_tensor: bool = True,
|
|
44
|
+
) -> Union["keras.KerasTensor", np.ndarray]:
|
|
45
|
+
"""Preprocess an image for DeepLabV3 inference.
|
|
46
|
+
|
|
47
|
+
Handles loading, resizing, rescaling, and ImageNet normalization to match
|
|
48
|
+
the preprocessing used during DeepLabV3 training (torchvision convention).
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
image: Input image as a file path, numpy array, or PIL Image.
|
|
52
|
+
size: Target size as ``{"height": H, "width": W}``.
|
|
53
|
+
Default: ``{"height": 520, "width": 520}``.
|
|
54
|
+
resample: Interpolation method (``"nearest"``, ``"bilinear"``,
|
|
55
|
+
or ``"bicubic"``).
|
|
56
|
+
do_rescale: Whether to divide pixel values by 255.
|
|
57
|
+
rescale_factor: Rescale factor (default ``1/255``).
|
|
58
|
+
do_normalize: Whether to apply ImageNet normalization.
|
|
59
|
+
image_mean: Per-channel mean for normalization.
|
|
60
|
+
Default: ``(0.485, 0.456, 0.406)``.
|
|
61
|
+
image_std: Per-channel std for normalization.
|
|
62
|
+
Default: ``(0.229, 0.224, 0.225)``.
|
|
63
|
+
return_tensor: If True return a Keras tensor, otherwise numpy array.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Preprocessed image with shape ``(1, H, W, 3)`` ready for model input.
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
```python
|
|
70
|
+
from kmodels.models.deeplabv3 import DeepLabV3ImageProcessor, DeepLabV3ResNet50
|
|
71
|
+
|
|
72
|
+
model = DeepLabV3ResNet50(weights="voc")
|
|
73
|
+
img = DeepLabV3ImageProcessor("photo.jpg")
|
|
74
|
+
output = model(img, training=False)
|
|
75
|
+
```
|
|
76
|
+
"""
|
|
77
|
+
if size is None:
|
|
78
|
+
size = {"height": 520, "width": 520}
|
|
79
|
+
if image_mean is None:
|
|
80
|
+
image_mean = (0.485, 0.456, 0.406)
|
|
81
|
+
if image_std is None:
|
|
82
|
+
image_std = (0.229, 0.224, 0.225)
|
|
83
|
+
|
|
84
|
+
if isinstance(image, str):
|
|
85
|
+
image = Image.open(image).convert("RGB")
|
|
86
|
+
image = np.array(image, dtype=np.float32)
|
|
87
|
+
elif isinstance(image, Image.Image):
|
|
88
|
+
image = np.array(image.convert("RGB"), dtype=np.float32)
|
|
89
|
+
elif isinstance(image, np.ndarray):
|
|
90
|
+
image = image.astype(np.float32)
|
|
91
|
+
if image.ndim == 4:
|
|
92
|
+
image = image[0]
|
|
93
|
+
else:
|
|
94
|
+
raise TypeError("Input must be a file path (str), numpy array, or PIL Image.")
|
|
95
|
+
|
|
96
|
+
if image.ndim != 3 or image.shape[-1] != 3:
|
|
97
|
+
raise ValueError(f"Expected image shape (H, W, 3), got {image.shape}")
|
|
98
|
+
|
|
99
|
+
image = keras.ops.convert_to_tensor(image, dtype="float32")
|
|
100
|
+
image = keras.ops.expand_dims(image, axis=0)
|
|
101
|
+
|
|
102
|
+
target_size = (size["height"], size["width"])
|
|
103
|
+
image = keras.ops.image.resize(image, size=target_size, interpolation=resample)
|
|
104
|
+
|
|
105
|
+
if do_rescale:
|
|
106
|
+
image = image * rescale_factor
|
|
107
|
+
|
|
108
|
+
if do_normalize:
|
|
109
|
+
mean = keras.ops.reshape(
|
|
110
|
+
keras.ops.convert_to_tensor(image_mean, dtype="float32"), (1, 1, 1, 3)
|
|
111
|
+
)
|
|
112
|
+
std = keras.ops.reshape(
|
|
113
|
+
keras.ops.convert_to_tensor(image_std, dtype="float32"), (1, 1, 1, 3)
|
|
114
|
+
)
|
|
115
|
+
image = (image - mean) / std
|
|
116
|
+
|
|
117
|
+
if not return_tensor:
|
|
118
|
+
image = keras.ops.convert_to_numpy(image)
|
|
119
|
+
|
|
120
|
+
return image
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def DeepLabV3PostProcessor(
|
|
124
|
+
outputs: "keras.KerasTensor",
|
|
125
|
+
target_size: Optional[Tuple[int, int]] = None,
|
|
126
|
+
label_names: Optional[List[str]] = None,
|
|
127
|
+
) -> Dict:
|
|
128
|
+
"""Post-process raw DeepLabV3 outputs into semantic segmentation results.
|
|
129
|
+
|
|
130
|
+
Takes the raw logits from DeepLabV3, computes the argmax class map,
|
|
131
|
+
optionally resizes to the original image size, and maps class indices
|
|
132
|
+
to human-readable names.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
outputs: Raw model output tensor of shape ``(1, H, W, num_classes)``.
|
|
136
|
+
target_size: Original image ``(height, width)`` for resizing the
|
|
137
|
+
prediction mask. If ``None``, the mask is returned at model
|
|
138
|
+
output resolution.
|
|
139
|
+
label_names: Custom class name list for mapping label indices to
|
|
140
|
+
names. If ``None``, defaults to Pascal VOC class names (21
|
|
141
|
+
classes). Provide this when using a model fine-tuned on a
|
|
142
|
+
custom dataset.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Dict with:
|
|
146
|
+
- ``"segmentation"``: Integer array of shape ``(H, W)`` with
|
|
147
|
+
class indices.
|
|
148
|
+
- ``"class_names"``: List of unique class names detected in the
|
|
149
|
+
image.
|
|
150
|
+
- ``"unique_classes"``: Array of unique class indices.
|
|
151
|
+
|
|
152
|
+
Example:
|
|
153
|
+
```python
|
|
154
|
+
from kmodels.models.deeplabv3 import (
|
|
155
|
+
DeepLabV3ResNet50, DeepLabV3ImageProcessor, DeepLabV3PostProcessor,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
model = DeepLabV3ResNet50(weights="voc")
|
|
159
|
+
img = DeepLabV3ImageProcessor("photo.jpg")
|
|
160
|
+
output = model(img, training=False)
|
|
161
|
+
result = DeepLabV3PostProcessor(output, target_size=(orig_h, orig_w))
|
|
162
|
+
print(result["class_names"])
|
|
163
|
+
```
|
|
164
|
+
"""
|
|
165
|
+
_names = label_names if label_names is not None else VOC_CLASSES
|
|
166
|
+
|
|
167
|
+
logits = keras.ops.convert_to_numpy(outputs)
|
|
168
|
+
pred_mask = np.argmax(logits[0], axis=-1) # (H, W)
|
|
169
|
+
|
|
170
|
+
if target_size is not None:
|
|
171
|
+
pred_mask = np.array(
|
|
172
|
+
Image.fromarray(pred_mask.astype(np.uint8)).resize(
|
|
173
|
+
(target_size[1], target_size[0]), Image.NEAREST
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
unique_classes = np.unique(pred_mask)
|
|
178
|
+
class_names = [
|
|
179
|
+
_names[c] if c < len(_names) else f"class_{c}" for c in unique_classes
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
return {
|
|
183
|
+
"segmentation": pred_mask,
|
|
184
|
+
"class_names": class_names,
|
|
185
|
+
"unique_classes": unique_classes,
|
|
186
|
+
}
|
|
@@ -202,6 +202,7 @@ def DETRPostProcessor(
|
|
|
202
202
|
outputs: Dict[str, keras.KerasTensor],
|
|
203
203
|
threshold: float = 0.7,
|
|
204
204
|
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
|
205
|
+
label_names: Optional[List[str]] = None,
|
|
205
206
|
) -> List[Dict[str, np.ndarray]]:
|
|
206
207
|
"""Post-process raw DETR outputs into usable detections.
|
|
207
208
|
|
|
@@ -217,6 +218,9 @@ def DETRPostProcessor(
|
|
|
217
218
|
target_sizes: List of ``(height, width)`` tuples for each image in
|
|
218
219
|
the batch. Used to convert normalized boxes to pixel coordinates.
|
|
219
220
|
If None, boxes are returned in normalized ``[0, 1]`` coordinates.
|
|
221
|
+
label_names: Custom class name list for mapping label indices to
|
|
222
|
+
names. If ``None``, defaults to COCO class names. Provide this
|
|
223
|
+
when using a model fine-tuned on a custom dataset.
|
|
220
224
|
|
|
221
225
|
Returns:
|
|
222
226
|
List of dicts (one per image in the batch), each containing:
|
|
@@ -278,16 +282,15 @@ def DETRPostProcessor(
|
|
|
278
282
|
scale = np.array([img_w, img_h, img_w, img_h], dtype=np.float32)
|
|
279
283
|
xyxy_boxes = xyxy_boxes * scale
|
|
280
284
|
|
|
281
|
-
# Map label indices to
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
]
|
|
285
|
+
# Map label indices to class names
|
|
286
|
+
_names = label_names if label_names is not None else COCO_CLASSES
|
|
287
|
+
mapped_names = [_names[l] if l < len(_names) else f"class_{l}" for l in labels]
|
|
285
288
|
|
|
286
289
|
results.append(
|
|
287
290
|
{
|
|
288
291
|
"scores": scores,
|
|
289
292
|
"labels": labels,
|
|
290
|
-
"label_names":
|
|
293
|
+
"label_names": mapped_names,
|
|
291
294
|
"boxes": xyxy_boxes,
|
|
292
295
|
}
|
|
293
296
|
)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
DFINE_MODEL_CONFIG = {
|
|
2
|
+
"DFineNano": {
|
|
3
|
+
"stem_channels": [3, 16, 16],
|
|
4
|
+
"stage_in_channels": [16, 64, 256, 512],
|
|
5
|
+
"stage_mid_channels": [16, 32, 64, 128],
|
|
6
|
+
"stage_out_channels": [64, 256, 512, 1024],
|
|
7
|
+
"stage_num_blocks": [1, 1, 2, 1],
|
|
8
|
+
"stage_numb_of_layers": [3, 3, 3, 3],
|
|
9
|
+
"use_lab": True,
|
|
10
|
+
"encoder_in_channels": [512, 1024],
|
|
11
|
+
"encoder_hidden_dim": 128,
|
|
12
|
+
"d_model": 128,
|
|
13
|
+
"decoder_layers": 3,
|
|
14
|
+
"decoder_n_points": [6, 6],
|
|
15
|
+
"hidden_expansion": 0.34,
|
|
16
|
+
"ccfm_num_blocks": 2,
|
|
17
|
+
"num_feature_levels": 2,
|
|
18
|
+
"feat_strides": [16, 32],
|
|
19
|
+
"encode_proj_layers": [1],
|
|
20
|
+
"encoder_ffn_dim": 512,
|
|
21
|
+
"decoder_ffn_dim": 512,
|
|
22
|
+
},
|
|
23
|
+
"DFineSmall": {
|
|
24
|
+
"stem_channels": [3, 16, 16],
|
|
25
|
+
"stage_in_channels": [16, 64, 256, 512],
|
|
26
|
+
"stage_mid_channels": [16, 32, 64, 128],
|
|
27
|
+
"stage_out_channels": [64, 256, 512, 1024],
|
|
28
|
+
"stage_num_blocks": [1, 1, 2, 1],
|
|
29
|
+
"stage_numb_of_layers": [3, 3, 3, 3],
|
|
30
|
+
"use_lab": True,
|
|
31
|
+
"encoder_in_channels": [256, 512, 1024],
|
|
32
|
+
"decoder_layers": 3,
|
|
33
|
+
"decoder_n_points": [3, 6, 3],
|
|
34
|
+
"hidden_expansion": 0.5,
|
|
35
|
+
},
|
|
36
|
+
"DFineMedium": {
|
|
37
|
+
"stem_channels": [3, 24, 32],
|
|
38
|
+
"stage_in_channels": [32, 96, 384, 768],
|
|
39
|
+
"stage_mid_channels": [32, 64, 128, 256],
|
|
40
|
+
"stage_out_channels": [96, 384, 768, 1536],
|
|
41
|
+
"stage_num_blocks": [1, 1, 3, 1],
|
|
42
|
+
"stage_numb_of_layers": [4, 4, 4, 4],
|
|
43
|
+
"use_lab": True,
|
|
44
|
+
"encoder_in_channels": [384, 768, 1536],
|
|
45
|
+
"ccfm_num_blocks": 2,
|
|
46
|
+
"decoder_layers": 4,
|
|
47
|
+
"decoder_n_points": [3, 6, 3],
|
|
48
|
+
},
|
|
49
|
+
"DFineLarge": {
|
|
50
|
+
"stem_channels": [3, 32, 48],
|
|
51
|
+
"stage_in_channels": [48, 128, 512, 1024],
|
|
52
|
+
"stage_mid_channels": [48, 96, 192, 384],
|
|
53
|
+
"stage_out_channels": [128, 512, 1024, 2048],
|
|
54
|
+
"stage_num_blocks": [1, 1, 3, 1],
|
|
55
|
+
"stage_numb_of_layers": [6, 6, 6, 6],
|
|
56
|
+
"use_lab": False,
|
|
57
|
+
"encoder_in_channels": [512, 1024, 2048],
|
|
58
|
+
"ccfm_num_blocks": 3,
|
|
59
|
+
"decoder_layers": 6,
|
|
60
|
+
"decoder_n_points": [3, 6, 3],
|
|
61
|
+
},
|
|
62
|
+
"DFineXLarge": {
|
|
63
|
+
"stem_channels": [3, 32, 64],
|
|
64
|
+
"stage_in_channels": [64, 128, 512, 1024],
|
|
65
|
+
"stage_mid_channels": [64, 128, 256, 512],
|
|
66
|
+
"stage_out_channels": [128, 512, 1024, 2048],
|
|
67
|
+
"stage_num_blocks": [1, 2, 5, 2],
|
|
68
|
+
"stage_numb_of_layers": [6, 6, 6, 6],
|
|
69
|
+
"use_lab": False,
|
|
70
|
+
"encoder_in_channels": [512, 1024, 2048],
|
|
71
|
+
"encoder_hidden_dim": 384,
|
|
72
|
+
"ccfm_num_blocks": 3,
|
|
73
|
+
"decoder_layers": 6,
|
|
74
|
+
"decoder_n_points": [3, 6, 3],
|
|
75
|
+
"encoder_ffn_dim": 2048,
|
|
76
|
+
},
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
DFINE_WEIGHTS_CONFIG = {
|
|
80
|
+
"DFineNano": {
|
|
81
|
+
"coco": {
|
|
82
|
+
"url": "https://github.com/IMvision12/keras-models/releases/download/D-FINE/dfine_nano_coco.weights.h5",
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
"DFineSmall": {
|
|
86
|
+
"coco": {
|
|
87
|
+
"url": "https://github.com/IMvision12/keras-models/releases/download/D-FINE/dfine_small_coco.weights.h5",
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
"DFineMedium": {
|
|
91
|
+
"coco": {
|
|
92
|
+
"url": "https://github.com/IMvision12/keras-models/releases/download/D-FINE/dfine_medium_coco.weights.h5",
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
"DFineLarge": {
|
|
96
|
+
"coco": {
|
|
97
|
+
"url": "https://github.com/IMvision12/keras-models/releases/download/D-FINE/dfine_large_coco.weights.h5",
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
"DFineXLarge": {
|
|
101
|
+
"coco": {
|
|
102
|
+
"url": "https://github.com/IMvision12/keras-models/releases/download/D-FINE/dfine_xlarge_coco.weights.h5",
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
}
|