@huggingface/tasks 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +169 -85
- package/dist/index.d.ts +27 -2
- package/dist/index.js +169 -85
- package/package.json +1 -1
- package/src/model-libraries-snippets.ts +14 -0
- package/src/model-libraries.ts +23 -0
- package/src/tasks/feature-extraction/about.md +2 -9
- package/src/tasks/feature-extraction/data.ts +1 -2
- package/src/tasks/image-feature-extraction/about.md +23 -0
- package/src/tasks/image-feature-extraction/data.ts +51 -0
- package/src/tasks/index.ts +2 -1
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
## Use Cases
|
|
2
|
+
|
|
3
|
+
### Transfer Learning
|
|
4
|
+
|
|
5
|
+
Models trained on a specific dataset can learn features about the data. For instance, a model trained on a car classification dataset learns to recognize edges and curves on a very high level and car-specific features on a low level. This information can be transferred to a new model that is going to be trained on classifying trucks. This process of extracting features and transferring to another model is called transfer learning.
|
|
6
|
+
|
|
7
|
+
### Similarity
|
|
8
|
+
|
|
9
|
+
Features extracted from models contain semantically meaningful information about the world. These features can be used to detect the similarity between two images. Assume there are two images: a photo of a stray cat in a street setting and a photo of a cat at home. These images both contain cats, and the features will contain the information that there's a cat in the image. Thus, comparing the features of a stray cat photo to the features of a domestic cat photo will result in higher similarity compared to any other image that doesn't contain any cats.
|
|
10
|
+
|
|
11
|
+
## Inference
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import torch
|
|
15
|
+
from transformers import pipeline
|
|
16
|
+
|
|
17
|
+
pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", framework="pt", pool=True)
|
|
18
|
+
pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png")
|
|
19
|
+
|
|
20
|
+
feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0)
|
|
21
|
+
|
|
22
|
+
'[[[0.21236686408519745, 1.0919708013534546, 0.8512550592422485, ...]]]'
|
|
23
|
+
```
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { TaskDataCustom } from "..";
|
|
2
|
+
|
|
3
|
+
const taskData: TaskDataCustom = {
|
|
4
|
+
datasets: [
|
|
5
|
+
{
|
|
6
|
+
description:
|
|
7
|
+
"ImageNet-1K is a image classification dataset in which images are used to train image-feature-extraction models.",
|
|
8
|
+
id: "imagenet-1k",
|
|
9
|
+
},
|
|
10
|
+
],
|
|
11
|
+
demo: {
|
|
12
|
+
inputs: [
|
|
13
|
+
{
|
|
14
|
+
filename: "mask-generation-input.png",
|
|
15
|
+
type: "img",
|
|
16
|
+
},
|
|
17
|
+
],
|
|
18
|
+
outputs: [
|
|
19
|
+
{
|
|
20
|
+
table: [
|
|
21
|
+
["Dimension 1", "Dimension 2", "Dimension 3"],
|
|
22
|
+
["0.21236686408519745", "1.0919708013534546", "0.8512550592422485"],
|
|
23
|
+
["0.809657871723175", "-0.18544459342956543", "-0.7851548194885254"],
|
|
24
|
+
["1.3103108406066895", "-0.2479034662246704", "-0.9107287526130676"],
|
|
25
|
+
["1.8536205291748047", "-0.36419737339019775", "0.09717650711536407"],
|
|
26
|
+
],
|
|
27
|
+
type: "tabular",
|
|
28
|
+
},
|
|
29
|
+
],
|
|
30
|
+
},
|
|
31
|
+
metrics: [],
|
|
32
|
+
models: [
|
|
33
|
+
{
|
|
34
|
+
description: "A powerful image feature extraction model.",
|
|
35
|
+
id: "timm/vit_large_patch14_dinov2.lvd142m",
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
description: "A strong image feature extraction model.",
|
|
39
|
+
id: "google/vit-base-patch16-224-in21k",
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
description: "A robust image feature extraction models.",
|
|
43
|
+
id: "facebook/dino-vitb16",
|
|
44
|
+
},
|
|
45
|
+
],
|
|
46
|
+
spaces: [],
|
|
47
|
+
summary: "Image feature extraction is the task of extracting features learnt in a computer vision model.",
|
|
48
|
+
widgetModels: [],
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
export default taskData;
|
package/src/tasks/index.ts
CHANGED
|
@@ -8,6 +8,7 @@ import documentQuestionAnswering from "./document-question-answering/data";
|
|
|
8
8
|
import featureExtraction from "./feature-extraction/data";
|
|
9
9
|
import fillMask from "./fill-mask/data";
|
|
10
10
|
import imageClassification from "./image-classification/data";
|
|
11
|
+
import imageFeatureExtraction from "./image-feature-extraction/data";
|
|
11
12
|
import imageToImage from "./image-to-image/data";
|
|
12
13
|
import imageToText from "./image-to-text/data";
|
|
13
14
|
import imageSegmentation from "./image-segmentation/data";
|
|
@@ -200,6 +201,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
|
200
201
|
"fill-mask": getData("fill-mask", fillMask),
|
|
201
202
|
"graph-ml": undefined,
|
|
202
203
|
"image-classification": getData("image-classification", imageClassification),
|
|
204
|
+
"image-feature-extraction": getData("image-feature-extraction", imageFeatureExtraction),
|
|
203
205
|
"image-segmentation": getData("image-segmentation", imageSegmentation),
|
|
204
206
|
"image-text-to-text": undefined,
|
|
205
207
|
"image-to-image": getData("image-to-image", imageToImage),
|
|
@@ -239,7 +241,6 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
|
239
241
|
"zero-shot-object-detection": getData("zero-shot-object-detection", zeroShotObjectDetection),
|
|
240
242
|
"text-to-3d": getData("text-to-3d", placeholder),
|
|
241
243
|
"image-to-3d": getData("image-to-3d", placeholder),
|
|
242
|
-
"image-feature-extraction": getData("image-feature-extraction", placeholder),
|
|
243
244
|
} as const;
|
|
244
245
|
|
|
245
246
|
export interface ExampleRepo {
|