@huggingface/tasks 0.13.1-test → 0.13.1-test2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/package.json +4 -2
  2. package/src/dataset-libraries.ts +89 -0
  3. package/src/default-widget-inputs.ts +718 -0
  4. package/src/gguf.ts +40 -0
  5. package/src/hardware.ts +482 -0
  6. package/src/index.ts +59 -0
  7. package/src/library-to-tasks.ts +76 -0
  8. package/src/local-apps.ts +412 -0
  9. package/src/model-data.ts +149 -0
  10. package/src/model-libraries-downloads.ts +18 -0
  11. package/src/model-libraries-snippets.ts +1128 -0
  12. package/src/model-libraries.ts +820 -0
  13. package/src/pipelines.ts +698 -0
  14. package/src/snippets/common.ts +39 -0
  15. package/src/snippets/curl.spec.ts +94 -0
  16. package/src/snippets/curl.ts +120 -0
  17. package/src/snippets/index.ts +7 -0
  18. package/src/snippets/inputs.ts +167 -0
  19. package/src/snippets/js.spec.ts +148 -0
  20. package/src/snippets/js.ts +305 -0
  21. package/src/snippets/python.spec.ts +144 -0
  22. package/src/snippets/python.ts +321 -0
  23. package/src/snippets/types.ts +16 -0
  24. package/src/tasks/audio-classification/about.md +86 -0
  25. package/src/tasks/audio-classification/data.ts +81 -0
  26. package/src/tasks/audio-classification/inference.ts +52 -0
  27. package/src/tasks/audio-classification/spec/input.json +35 -0
  28. package/src/tasks/audio-classification/spec/output.json +11 -0
  29. package/src/tasks/audio-to-audio/about.md +56 -0
  30. package/src/tasks/audio-to-audio/data.ts +70 -0
  31. package/src/tasks/automatic-speech-recognition/about.md +90 -0
  32. package/src/tasks/automatic-speech-recognition/data.ts +82 -0
  33. package/src/tasks/automatic-speech-recognition/inference.ts +160 -0
  34. package/src/tasks/automatic-speech-recognition/spec/input.json +35 -0
  35. package/src/tasks/automatic-speech-recognition/spec/output.json +38 -0
  36. package/src/tasks/chat-completion/inference.ts +322 -0
  37. package/src/tasks/chat-completion/spec/input.json +350 -0
  38. package/src/tasks/chat-completion/spec/output.json +206 -0
  39. package/src/tasks/chat-completion/spec/stream_output.json +213 -0
  40. package/src/tasks/common-definitions.json +100 -0
  41. package/src/tasks/depth-estimation/about.md +45 -0
  42. package/src/tasks/depth-estimation/data.ts +70 -0
  43. package/src/tasks/depth-estimation/inference.ts +35 -0
  44. package/src/tasks/depth-estimation/spec/input.json +25 -0
  45. package/src/tasks/depth-estimation/spec/output.json +16 -0
  46. package/src/tasks/document-question-answering/about.md +53 -0
  47. package/src/tasks/document-question-answering/data.ts +85 -0
  48. package/src/tasks/document-question-answering/inference.ts +110 -0
  49. package/src/tasks/document-question-answering/spec/input.json +85 -0
  50. package/src/tasks/document-question-answering/spec/output.json +36 -0
  51. package/src/tasks/feature-extraction/about.md +72 -0
  52. package/src/tasks/feature-extraction/data.ts +57 -0
  53. package/src/tasks/feature-extraction/inference.ts +40 -0
  54. package/src/tasks/feature-extraction/spec/input.json +47 -0
  55. package/src/tasks/feature-extraction/spec/output.json +15 -0
  56. package/src/tasks/fill-mask/about.md +51 -0
  57. package/src/tasks/fill-mask/data.ts +79 -0
  58. package/src/tasks/fill-mask/inference.ts +62 -0
  59. package/src/tasks/fill-mask/spec/input.json +38 -0
  60. package/src/tasks/fill-mask/spec/output.json +29 -0
  61. package/src/tasks/image-classification/about.md +50 -0
  62. package/src/tasks/image-classification/data.ts +88 -0
  63. package/src/tasks/image-classification/inference.ts +52 -0
  64. package/src/tasks/image-classification/spec/input.json +35 -0
  65. package/src/tasks/image-classification/spec/output.json +11 -0
  66. package/src/tasks/image-feature-extraction/about.md +23 -0
  67. package/src/tasks/image-feature-extraction/data.ts +59 -0
  68. package/src/tasks/image-segmentation/about.md +63 -0
  69. package/src/tasks/image-segmentation/data.ts +99 -0
  70. package/src/tasks/image-segmentation/inference.ts +69 -0
  71. package/src/tasks/image-segmentation/spec/input.json +45 -0
  72. package/src/tasks/image-segmentation/spec/output.json +26 -0
  73. package/src/tasks/image-text-to-text/about.md +76 -0
  74. package/src/tasks/image-text-to-text/data.ts +102 -0
  75. package/src/tasks/image-to-3d/about.md +62 -0
  76. package/src/tasks/image-to-3d/data.ts +75 -0
  77. package/src/tasks/image-to-image/about.md +129 -0
  78. package/src/tasks/image-to-image/data.ts +101 -0
  79. package/src/tasks/image-to-image/inference.ts +68 -0
  80. package/src/tasks/image-to-image/spec/input.json +55 -0
  81. package/src/tasks/image-to-image/spec/output.json +12 -0
  82. package/src/tasks/image-to-text/about.md +61 -0
  83. package/src/tasks/image-to-text/data.ts +82 -0
  84. package/src/tasks/image-to-text/inference.ts +143 -0
  85. package/src/tasks/image-to-text/spec/input.json +34 -0
  86. package/src/tasks/image-to-text/spec/output.json +14 -0
  87. package/src/tasks/index.ts +312 -0
  88. package/src/tasks/keypoint-detection/about.md +57 -0
  89. package/src/tasks/keypoint-detection/data.ts +50 -0
  90. package/src/tasks/mask-generation/about.md +65 -0
  91. package/src/tasks/mask-generation/data.ts +55 -0
  92. package/src/tasks/object-detection/about.md +37 -0
  93. package/src/tasks/object-detection/data.ts +86 -0
  94. package/src/tasks/object-detection/inference.ts +75 -0
  95. package/src/tasks/object-detection/spec/input.json +31 -0
  96. package/src/tasks/object-detection/spec/output.json +50 -0
  97. package/src/tasks/placeholder/about.md +15 -0
  98. package/src/tasks/placeholder/data.ts +21 -0
  99. package/src/tasks/placeholder/spec/input.json +35 -0
  100. package/src/tasks/placeholder/spec/output.json +17 -0
  101. package/src/tasks/question-answering/about.md +56 -0
  102. package/src/tasks/question-answering/data.ts +75 -0
  103. package/src/tasks/question-answering/inference.ts +99 -0
  104. package/src/tasks/question-answering/spec/input.json +67 -0
  105. package/src/tasks/question-answering/spec/output.json +29 -0
  106. package/src/tasks/reinforcement-learning/about.md +167 -0
  107. package/src/tasks/reinforcement-learning/data.ts +75 -0
  108. package/src/tasks/sentence-similarity/about.md +97 -0
  109. package/src/tasks/sentence-similarity/data.ts +101 -0
  110. package/src/tasks/sentence-similarity/inference.ts +32 -0
  111. package/src/tasks/sentence-similarity/spec/input.json +40 -0
  112. package/src/tasks/sentence-similarity/spec/output.json +12 -0
  113. package/src/tasks/summarization/about.md +58 -0
  114. package/src/tasks/summarization/data.ts +76 -0
  115. package/src/tasks/summarization/inference.ts +57 -0
  116. package/src/tasks/summarization/spec/input.json +42 -0
  117. package/src/tasks/summarization/spec/output.json +14 -0
  118. package/src/tasks/table-question-answering/about.md +43 -0
  119. package/src/tasks/table-question-answering/data.ts +59 -0
  120. package/src/tasks/table-question-answering/inference.ts +61 -0
  121. package/src/tasks/table-question-answering/spec/input.json +44 -0
  122. package/src/tasks/table-question-answering/spec/output.json +40 -0
  123. package/src/tasks/tabular-classification/about.md +65 -0
  124. package/src/tasks/tabular-classification/data.ts +68 -0
  125. package/src/tasks/tabular-regression/about.md +87 -0
  126. package/src/tasks/tabular-regression/data.ts +57 -0
  127. package/src/tasks/text-classification/about.md +173 -0
  128. package/src/tasks/text-classification/data.ts +103 -0
  129. package/src/tasks/text-classification/inference.ts +51 -0
  130. package/src/tasks/text-classification/spec/input.json +35 -0
  131. package/src/tasks/text-classification/spec/output.json +11 -0
  132. package/src/tasks/text-generation/about.md +154 -0
  133. package/src/tasks/text-generation/data.ts +114 -0
  134. package/src/tasks/text-generation/inference.ts +200 -0
  135. package/src/tasks/text-generation/spec/input.json +219 -0
  136. package/src/tasks/text-generation/spec/output.json +179 -0
  137. package/src/tasks/text-generation/spec/stream_output.json +103 -0
  138. package/src/tasks/text-to-3d/about.md +62 -0
  139. package/src/tasks/text-to-3d/data.ts +56 -0
  140. package/src/tasks/text-to-audio/inference.ts +143 -0
  141. package/src/tasks/text-to-audio/spec/input.json +31 -0
  142. package/src/tasks/text-to-audio/spec/output.json +17 -0
  143. package/src/tasks/text-to-image/about.md +96 -0
  144. package/src/tasks/text-to-image/data.ts +100 -0
  145. package/src/tasks/text-to-image/inference.ts +75 -0
  146. package/src/tasks/text-to-image/spec/input.json +63 -0
  147. package/src/tasks/text-to-image/spec/output.json +13 -0
  148. package/src/tasks/text-to-speech/about.md +63 -0
  149. package/src/tasks/text-to-speech/data.ts +79 -0
  150. package/src/tasks/text-to-speech/inference.ts +145 -0
  151. package/src/tasks/text-to-speech/spec/input.json +31 -0
  152. package/src/tasks/text-to-speech/spec/output.json +7 -0
  153. package/src/tasks/text-to-video/about.md +41 -0
  154. package/src/tasks/text-to-video/data.ts +102 -0
  155. package/src/tasks/text2text-generation/inference.ts +55 -0
  156. package/src/tasks/text2text-generation/spec/input.json +55 -0
  157. package/src/tasks/text2text-generation/spec/output.json +14 -0
  158. package/src/tasks/token-classification/about.md +76 -0
  159. package/src/tasks/token-classification/data.ts +92 -0
  160. package/src/tasks/token-classification/inference.ts +85 -0
  161. package/src/tasks/token-classification/spec/input.json +65 -0
  162. package/src/tasks/token-classification/spec/output.json +37 -0
  163. package/src/tasks/translation/about.md +65 -0
  164. package/src/tasks/translation/data.ts +70 -0
  165. package/src/tasks/translation/inference.ts +67 -0
  166. package/src/tasks/translation/spec/input.json +50 -0
  167. package/src/tasks/translation/spec/output.json +14 -0
  168. package/src/tasks/unconditional-image-generation/about.md +50 -0
  169. package/src/tasks/unconditional-image-generation/data.ts +72 -0
  170. package/src/tasks/video-classification/about.md +37 -0
  171. package/src/tasks/video-classification/data.ts +84 -0
  172. package/src/tasks/video-classification/inference.ts +59 -0
  173. package/src/tasks/video-classification/spec/input.json +42 -0
  174. package/src/tasks/video-classification/spec/output.json +10 -0
  175. package/src/tasks/video-text-to-text/about.md +98 -0
  176. package/src/tasks/video-text-to-text/data.ts +66 -0
  177. package/src/tasks/visual-question-answering/about.md +48 -0
  178. package/src/tasks/visual-question-answering/data.ts +97 -0
  179. package/src/tasks/visual-question-answering/inference.ts +62 -0
  180. package/src/tasks/visual-question-answering/spec/input.json +41 -0
  181. package/src/tasks/visual-question-answering/spec/output.json +21 -0
  182. package/src/tasks/zero-shot-classification/about.md +40 -0
  183. package/src/tasks/zero-shot-classification/data.ts +70 -0
  184. package/src/tasks/zero-shot-classification/inference.ts +67 -0
  185. package/src/tasks/zero-shot-classification/spec/input.json +50 -0
  186. package/src/tasks/zero-shot-classification/spec/output.json +11 -0
  187. package/src/tasks/zero-shot-image-classification/about.md +75 -0
  188. package/src/tasks/zero-shot-image-classification/data.ts +84 -0
  189. package/src/tasks/zero-shot-image-classification/inference.ts +61 -0
  190. package/src/tasks/zero-shot-image-classification/spec/input.json +45 -0
  191. package/src/tasks/zero-shot-image-classification/spec/output.json +10 -0
  192. package/src/tasks/zero-shot-object-detection/about.md +45 -0
  193. package/src/tasks/zero-shot-object-detection/data.ts +67 -0
  194. package/src/tasks/zero-shot-object-detection/inference.ts +66 -0
  195. package/src/tasks/zero-shot-object-detection/spec/input.json +40 -0
  196. package/src/tasks/zero-shot-object-detection/spec/output.json +47 -0
  197. package/src/tokenizer-data.ts +32 -0
  198. package/src/widget-example.ts +125 -0
@@ -0,0 +1,16 @@
1
+ {
2
+ "$id": "/inference/schemas/depth-estimation/output.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Outputs of inference for the Depth Estimation task",
5
+ "title": "DepthEstimationOutput",
6
+
7
+ "type": "object",
8
+ "properties": {
9
+ "predicted_depth": {
10
+ "description": "The predicted depth as a tensor"
11
+ },
12
+ "depth": {
13
+ "description": "The predicted depth as an image"
14
+ }
15
+ }
16
+ }
@@ -0,0 +1,53 @@
1
+ ## Use Cases
2
+
3
+ Document Question Answering models can be used to answer natural language questions about documents. Typically, document QA models consider textual, layout and potentially visual information. This is useful when the question requires some understanding of the visual aspects of the document.
4
+ Nevertheless, certain document QA models can work without document images. Hence the task is not limited to visually-rich documents and allows users to ask questions based on spreadsheets, text PDFs, etc!
5
+
6
+ ### Document Parsing
7
+
8
+ One of the most popular use cases of document question answering models is the parsing of structured documents. For example, you can extract the name, address, and other information from a form. You can also use the model to extract information from a table, or even a resume.
9
+
10
+ ### Invoice Information Extraction
11
+
12
+ Another very popular use case is invoice information extraction. For example, you can extract the invoice number, the invoice date, the total amount, the VAT number, and the invoice recipient.
13
+
14
+ ## Inference
15
+
16
+ You can infer with Document QA models with the 🤗 Transformers library using the [`document-question-answering` pipeline](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.DocumentQuestionAnsweringPipeline). If no model checkpoint is given, the pipeline will be initialized with [`impira/layoutlm-document-qa`](https://huggingface.co/impira/layoutlm-document-qa). This pipeline takes question(s) and document(s) as input, and returns the answer.
17
+ 👉 Note that the question answering task solved here is extractive: the model extracts the answer from a context (the document).
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+ from PIL import Image
22
+
23
+ pipe = pipeline("document-question-answering", model="naver-clova-ix/donut-base-finetuned-docvqa")
24
+
25
+ question = "What is the purchase amount?"
26
+ image = Image.open("your-document.png")
27
+
28
+ pipe(image=image, question=question)
29
+
30
+ ## [{'answer': '20,000$'}]
31
+ ```
32
+
33
+ ## Useful Resources
34
+
35
+ Would you like to learn more about Document QA? Awesome! Here are some curated resources that you may find helpful!
36
+
37
+ - [Document Visual Question Answering (DocVQA) challenge](https://rrc.cvc.uab.es/?ch=17)
38
+ - [DocVQA: A Dataset for Document Visual Question Answering](https://arxiv.org/abs/2007.00398) (Dataset paper)
39
+ - [ICDAR 2021 Competition on Document Visual Question Answering](https://lilianweng.github.io/lil-log/2020/10/29/open-domain-question-answering.html) (Conference paper)
40
+ - [HuggingFace's Document Question Answering pipeline](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.DocumentQuestionAnsweringPipeline)
41
+ - [Github repo: DocQuery - Document Query Engine Powered by Large Language Models](https://github.com/impira/docquery)
42
+
43
+ ### Notebooks
44
+
45
+ - [Fine-tuning Donut on DocVQA dataset](https://github.com/NielsRogge/Transformers-Tutorials/tree/0ea77f29d01217587d7e32a848f3691d9c15d6ab/Donut/DocVQA)
46
+ - [Fine-tuning LayoutLMv2 on DocVQA dataset](https://github.com/NielsRogge/Transformers-Tutorials/tree/1b4bad710c41017d07a8f63b46a12523bfd2e835/LayoutLMv2/DocVQA)
47
+ - [Accelerating Document AI](https://huggingface.co/blog/document-ai)
48
+
49
+ ### Documentation
50
+
51
+ - [Document question answering task guide](https://huggingface.co/docs/transformers/tasks/document_question_answering)
52
+
53
+ The contents of this page are contributed by [Eliott Zemour](https://huggingface.co/eliolio) and reviewed by [Kwadwo Agyapon-Ntra](https://huggingface.co/KayO) and [Ankur Goyal](https://huggingface.co/ankrgyl).
@@ -0,0 +1,85 @@
1
+ import type { TaskDataCustom } from "../index.js";
2
+
3
+ const taskData: TaskDataCustom = {
4
+ datasets: [
5
+ {
6
+ description: "Largest document understanding dataset.",
7
+ id: "HuggingFaceM4/Docmatix",
8
+ },
9
+ {
10
+ description:
11
+ "Dataset from the 2020 DocVQA challenge. The documents are taken from the UCSF Industry Documents Library.",
12
+ id: "eliolio/docvqa",
13
+ },
14
+ ],
15
+ demo: {
16
+ inputs: [
17
+ {
18
+ label: "Question",
19
+ content: "What is the idea behind the consumer relations efficiency team?",
20
+ type: "text",
21
+ },
22
+ {
23
+ filename: "document-question-answering-input.png",
24
+ type: "img",
25
+ },
26
+ ],
27
+ outputs: [
28
+ {
29
+ label: "Answer",
30
+ content: "Balance cost efficiency with quality customer service",
31
+ type: "text",
32
+ },
33
+ ],
34
+ },
35
+ metrics: [
36
+ {
37
+ description:
38
+ "The evaluation metric for the DocVQA challenge is the Average Normalized Levenshtein Similarity (ANLS). This metric is flexible to character regognition errors and compares the predicted answer with the ground truth answer.",
39
+ id: "anls",
40
+ },
41
+ {
42
+ description:
43
+ "Exact Match is a metric based on the strict character match of the predicted answer and the right answer. For answers predicted correctly, the Exact Match will be 1. Even if only one character is different, Exact Match will be 0",
44
+ id: "exact-match",
45
+ },
46
+ ],
47
+ models: [
48
+ {
49
+ description: "A robust document question answering model.",
50
+ id: "impira/layoutlm-document-qa",
51
+ },
52
+ {
53
+ description: "A document question answering model specialized in invoices.",
54
+ id: "impira/layoutlm-invoices",
55
+ },
56
+ {
57
+ description: "A special model for OCR-free document question answering.",
58
+ id: "microsoft/udop-large",
59
+ },
60
+ {
61
+ description: "A powerful model for document question answering.",
62
+ id: "google/pix2struct-docvqa-large",
63
+ },
64
+ ],
65
+ spaces: [
66
+ {
67
+ description: "A robust document question answering application.",
68
+ id: "impira/docquery",
69
+ },
70
+ {
71
+ description: "An application that can answer questions from invoices.",
72
+ id: "impira/invoices",
73
+ },
74
+ {
75
+ description: "An application to compare different document question answering models.",
76
+ id: "merve/compare_docvqa_models",
77
+ },
78
+ ],
79
+ summary:
80
+ "Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
81
+ widgetModels: ["impira/layoutlm-invoices"],
82
+ youtubeId: "",
83
+ };
84
+
85
+ export default taskData;
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Inference code generated from the JSON schema spec in ./spec
3
+ *
4
+ * Using src/scripts/inference-codegen
5
+ */
6
+ /**
7
+ * Inputs for Document Question Answering inference
8
+ */
9
+ export interface DocumentQuestionAnsweringInput {
10
+ /**
11
+ * One (document, question) pair to answer
12
+ */
13
+ inputs: DocumentQuestionAnsweringInputData;
14
+ /**
15
+ * Additional inference parameters
16
+ */
17
+ parameters?: DocumentQuestionAnsweringParameters;
18
+ [property: string]: unknown;
19
+ }
20
+ /**
21
+ * One (document, question) pair to answer
22
+ */
23
+ export interface DocumentQuestionAnsweringInputData {
24
+ /**
25
+ * The image on which the question is asked
26
+ */
27
+ image: unknown;
28
+ /**
29
+ * A question to ask of the document
30
+ */
31
+ question: string;
32
+ [property: string]: unknown;
33
+ }
34
+ /**
35
+ * Additional inference parameters
36
+ *
37
+ * Additional inference parameters for Document Question Answering
38
+ */
39
+ export interface DocumentQuestionAnsweringParameters {
40
+ /**
41
+ * If the words in the document are too long to fit with the question for the model, it will
42
+ * be split in several chunks with some overlap. This argument controls the size of that
43
+ * overlap.
44
+ */
45
+ doc_stride?: number;
46
+ /**
47
+ * Whether to accept impossible as an answer
48
+ */
49
+ handle_impossible_answer?: boolean;
50
+ /**
51
+ * Language to use while running OCR. Defaults to english.
52
+ */
53
+ lang?: string;
54
+ /**
55
+ * The maximum length of predicted answers (e.g., only answers with a shorter length are
56
+ * considered).
57
+ */
58
+ max_answer_len?: number;
59
+ /**
60
+ * The maximum length of the question after tokenization. It will be truncated if needed.
61
+ */
62
+ max_question_len?: number;
63
+ /**
64
+ * The maximum length of the total sentence (context + question) in tokens of each chunk
65
+ * passed to the model. The context will be split in several chunks (using doc_stride as
66
+ * overlap) if needed.
67
+ */
68
+ max_seq_len?: number;
69
+ /**
70
+ * The number of answers to return (will be chosen by order of likelihood). Can return less
71
+ * than top_k answers if there are not enough options available within the context.
72
+ */
73
+ top_k?: number;
74
+ /**
75
+ * A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
76
+ * skip the OCR step and use the provided bounding boxes instead.
77
+ */
78
+ word_boxes?: WordBox[];
79
+ [property: string]: unknown;
80
+ }
81
+ export type WordBox = number[] | string;
82
+ export type DocumentQuestionAnsweringOutput = DocumentQuestionAnsweringOutputElement[];
83
+ /**
84
+ * Outputs of inference for the Document Question Answering task
85
+ */
86
+ export interface DocumentQuestionAnsweringOutputElement {
87
+ /**
88
+ * The answer to the question.
89
+ */
90
+ answer: string;
91
+ /**
92
+ * The end word index of the answer (in the OCR’d version of the input or provided word
93
+ * boxes).
94
+ */
95
+ end: number;
96
+ /**
97
+ * The probability associated to the answer.
98
+ */
99
+ score: number;
100
+ /**
101
+ * The start word index of the answer (in the OCR’d version of the input or provided word
102
+ * boxes).
103
+ */
104
+ start: number;
105
+ /**
106
+ * The index of each word/box pair that is in the answer
107
+ */
108
+ words: number[];
109
+ [property: string]: unknown;
110
+ }
@@ -0,0 +1,85 @@
1
+ {
2
+ "$id": "/inference/schemas/document-question-answering/input.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Inputs for Document Question Answering inference",
5
+ "title": "DocumentQuestionAnsweringInput",
6
+ "type": "object",
7
+ "properties": {
8
+ "inputs": {
9
+ "description": "One (document, question) pair to answer",
10
+ "type": "object",
11
+ "title": "DocumentQuestionAnsweringInputData",
12
+ "properties": {
13
+ "image": {
14
+ "description": "The image on which the question is asked"
15
+ },
16
+ "question": {
17
+ "type": "string",
18
+ "description": "A question to ask of the document"
19
+ }
20
+ },
21
+ "required": ["image", "question"]
22
+ },
23
+ "parameters": {
24
+ "description": "Additional inference parameters",
25
+ "$ref": "#/$defs/DocumentQuestionAnsweringParameters"
26
+ }
27
+ },
28
+ "$defs": {
29
+ "DocumentQuestionAnsweringParameters": {
30
+ "title": "DocumentQuestionAnsweringParameters",
31
+ "description": "Additional inference parameters for Document Question Answering",
32
+ "type": "object",
33
+ "properties": {
34
+ "doc_stride": {
35
+ "type": "integer",
36
+ "description": "If the words in the document are too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
37
+ },
38
+ "handle_impossible_answer": {
39
+ "type": "boolean",
40
+ "description": "Whether to accept impossible as an answer"
41
+ },
42
+ "lang": {
43
+ "type": "string",
44
+ "description": "Language to use while running OCR. Defaults to english."
45
+ },
46
+ "max_answer_len": {
47
+ "type": "integer",
48
+ "description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
49
+ },
50
+ "max_seq_len": {
51
+ "type": "integer",
52
+ "description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using doc_stride as overlap) if needed."
53
+ },
54
+ "max_question_len": {
55
+ "type": "integer",
56
+ "description": "The maximum length of the question after tokenization. It will be truncated if needed."
57
+ },
58
+ "top_k": {
59
+ "type": "integer",
60
+ "description": "The number of answers to return (will be chosen by order of likelihood). Can return less than top_k answers if there are not enough options available within the context."
61
+ },
62
+ "word_boxes": {
63
+ "type": "array",
64
+ "description": "A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR step and use the provided bounding boxes instead.",
65
+ "items": {
66
+ "anyOf": [
67
+ {
68
+ "type": "string"
69
+ },
70
+ {
71
+ "type": "array",
72
+ "items": {
73
+ "type": "number"
74
+ },
75
+ "maxLength": 4,
76
+ "minLength": 4
77
+ }
78
+ ]
79
+ }
80
+ }
81
+ }
82
+ }
83
+ },
84
+ "required": ["inputs"]
85
+ }
@@ -0,0 +1,36 @@
1
+ {
2
+ "$id": "/inference/schemas/document-question-answering/output.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Outputs of inference for the Document Question Answering task",
5
+ "title": "DocumentQuestionAnsweringOutput",
6
+ "type": "array",
7
+ "items": {
8
+ "type": "object",
9
+ "properties": {
10
+ "answer": {
11
+ "type": "string",
12
+ "description": "The answer to the question."
13
+ },
14
+ "score": {
15
+ "type": "number",
16
+ "description": "The probability associated to the answer."
17
+ },
18
+ "start": {
19
+ "type": "integer",
20
+ "description": "The start word index of the answer (in the OCR\u2019d version of the input or provided word boxes)."
21
+ },
22
+ "end": {
23
+ "type": "integer",
24
+ "description": "The end word index of the answer (in the OCR\u2019d version of the input or provided word boxes)."
25
+ },
26
+ "words": {
27
+ "type": "array",
28
+ "items": {
29
+ "type": "integer"
30
+ },
31
+ "description": "The index of each word/box pair that is in the answer"
32
+ }
33
+ },
34
+ "required": ["answer", "score", "start", "end", "words"]
35
+ }
36
+ }
@@ -0,0 +1,72 @@
1
+ ## Use Cases
2
+
3
+ ### Transfer Learning
4
+
5
+ Models trained on a specific dataset can learn features about the data. For instance, a model trained on an English poetry dataset learns English grammar at a very high level. This information can be transferred to a new model that is going to be trained on tweets. This process of extracting features and transferring to another model is called transfer learning. One can pass their dataset through a feature extraction pipeline and feed the result to a classifier.
6
+
7
+ ### Retrieval and Reranking
8
+
9
+ Retrieval is the process of obtaining relevant documents or information based on a user's search query. In the context of NLP, retrieval systems aim to find relevant text passages or documents from a large corpus of data that match the user's query. The goal is to return a set of results that are likely to be useful to the user. On the other hand, reranking is a technique used to improve the quality of retrieval results by reordering them based on their relevance to the query.
10
+
11
+ ### Retrieval Augmented Generation
12
+
13
+ Retrieval-augmented generation (RAG) is a technique in which user inputs to generative models are first queried through a knowledge base, and the most relevant information from the knowledge base is used to augment the prompt to reduce hallucinations during generation. Feature extraction models (primarily retrieval and reranking models) can be used in RAG to reduce model hallucinations and ground the model.
14
+
15
+ ## Inference
16
+
17
+ You can infer feature extraction models using `pipeline` of transformers library.
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+ checkpoint = "facebook/bart-base"
22
+ feature_extractor = pipeline("feature-extraction", framework="pt", model=checkpoint)
23
+ text = "Transformers is an awesome library!"
24
+
25
+ #Reducing along the first dimension to get a 768 dimensional array
26
+ feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0)
27
+
28
+ '''tensor([[[ 2.5834, 2.7571, 0.9024, ..., 1.5036, -0.0435, -0.8603],
29
+ [-1.2850, -1.0094, -2.0826, ..., 1.5993, -0.9017, 0.6426],
30
+ [ 0.9082, 0.3896, -0.6843, ..., 0.7061, 0.6517, 1.0550],
31
+ ...,
32
+ [ 0.6919, -1.1946, 0.2438, ..., 1.3646, -1.8661, -0.1642],
33
+ [-0.1701, -2.0019, -0.4223, ..., 0.3680, -1.9704, -0.0068],
34
+ [ 0.2520, -0.6869, -1.0582, ..., 0.5198, -2.2106, 0.4547]]])'''
35
+ ```
36
+
37
+ A very popular library for training similarity and search models is called `sentence-transformers`.  To get started, install the library.
38
+
39
+ ```bash
40
+ pip install -U sentence-transformers
41
+ ```
42
+
43
+ You can infer with `sentence-transformers` models as follows.
44
+
45
+ ```python
46
+ from sentence_transformers import SentenceTransformer
47
+
48
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
49
+ sentences = [
50
+ "The weather is lovely today.",
51
+ "It's so sunny outside!",
52
+ "He drove to the stadium.",
53
+ ]
54
+
55
+ embeddings = model.encode(sentences)
56
+ similarities = model.similarity(embeddings, embeddings)
57
+ print(similarities)
58
+ # tensor([[1.0000, 0.6660, 0.1046],
59
+ # [0.6660, 1.0000, 0.1411],
60
+ # [0.1046, 0.1411, 1.0000]])
61
+ ```
62
+
63
+ ### Text Embedding Inference
64
+
65
+ [Text Embeddings Inference (TEI)](https://github.com/huggingface/text-embeddings-inference) is a toolkit to easily serve feature extraction models using few lines of code.
66
+
67
+ ## Useful resources
68
+
69
+ - [Documentation for feature extraction task in 🤗Transformers](https://huggingface.co/docs/transformers/main_classes/feature_extractor)
70
+ - [Introduction to MTEB Benchmark](https://huggingface.co/blog/mteb)
71
+ - [Cookbook: Simple RAG for GitHub issues using Hugging Face Zephyr and LangChain](https://huggingface.co/learn/cookbook/rag_zephyr_langchain)
72
+ - [sentence-transformers organization on Hugging Face Hub](https://huggingface.co/sentence-transformers)
@@ -0,0 +1,57 @@
1
+ import type { TaskDataCustom } from "../index.js";
2
+
3
+ const taskData: TaskDataCustom = {
4
+ datasets: [
5
+ {
6
+ description:
7
+ "Wikipedia dataset containing cleaned articles of all languages. Can be used to train `feature-extraction` models.",
8
+ id: "wikipedia",
9
+ },
10
+ ],
11
+ demo: {
12
+ inputs: [
13
+ {
14
+ label: "Input",
15
+ content: "India, officially the Republic of India, is a country in South Asia.",
16
+ type: "text",
17
+ },
18
+ ],
19
+ outputs: [
20
+ {
21
+ table: [
22
+ ["Dimension 1", "Dimension 2", "Dimension 3"],
23
+ ["2.583383083343506", "2.757075071334839", "0.9023529887199402"],
24
+ ["8.29393482208252", "1.1071064472198486", "2.03399395942688"],
25
+ ["-0.7754912972450256", "-1.647324562072754", "-0.6113331913948059"],
26
+ ["0.07087723910808563", "1.5942802429199219", "1.4610432386398315"],
27
+ ],
28
+ type: "tabular",
29
+ },
30
+ ],
31
+ },
32
+ metrics: [],
33
+ models: [
34
+ {
35
+ description: "A powerful feature extraction model for natural language processing tasks.",
36
+ id: "thenlper/gte-large",
37
+ },
38
+ {
39
+ description: "A strong feature extraction model for retrieval.",
40
+ id: "Alibaba-NLP/gte-Qwen1.5-7B-instruct",
41
+ },
42
+ ],
43
+ spaces: [
44
+ {
45
+ description: "A leaderboard to rank text feature extraction models based on a benchmark.",
46
+ id: "mteb/leaderboard",
47
+ },
48
+ {
49
+ description: "A leaderboard to rank best feature extraction models based on human feedback.",
50
+ id: "mteb/arena",
51
+ },
52
+ ],
53
+ summary: "Feature extraction is the task of extracting features learnt in a model.",
54
+ widgetModels: ["facebook/bart-base"],
55
+ };
56
+
57
+ export default taskData;
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Inference code generated from the JSON schema spec in ./spec
3
+ *
4
+ * Using src/scripts/inference-codegen
5
+ */
6
+
7
+ export type FeatureExtractionOutput = Array<number[]>;
8
+
9
+ /**
10
+ * Feature Extraction Input.
11
+ *
12
+ * Auto-generated from TEI specs.
13
+ * For more details, check out
14
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.
15
+ */
16
+ export interface FeatureExtractionInput {
17
+ /**
18
+ * The text to embed.
19
+ */
20
+ inputs: string;
21
+ normalize?: boolean;
22
+ /**
23
+ * The name of the prompt that should be used by for encoding. If not set, no prompt
24
+ * will be applied.
25
+ *
26
+ * Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.
27
+ *
28
+ * For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ",
29
+ * ...},
30
+ * then the sentence "What is the capital of France?" will be encoded as
31
+ * "query: What is the capital of France?" because the prompt text will be prepended before
32
+ * any text to encode.
33
+ */
34
+ prompt_name?: string;
35
+ truncate?: boolean;
36
+ truncation_direction?: FeatureExtractionInputTruncationDirection;
37
+ [property: string]: unknown;
38
+ }
39
+
40
+ export type FeatureExtractionInputTruncationDirection = "Left" | "Right";
@@ -0,0 +1,47 @@
1
+ {
2
+ "$id": "/inference/schemas/feature-extraction/input.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Feature Extraction Input.\n\nAuto-generated from TEI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.",
5
+ "title": "FeatureExtractionInput",
6
+ "type": "object",
7
+ "required": ["inputs"],
8
+ "properties": {
9
+ "inputs": {
10
+ "type": "string",
11
+ "description": "The text to embed."
12
+ },
13
+ "normalize": {
14
+ "type": "boolean",
15
+ "default": "true",
16
+ "example": "true"
17
+ },
18
+ "prompt_name": {
19
+ "type": "string",
20
+ "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `Sentence Transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
21
+ "default": "null",
22
+ "example": "null",
23
+ "nullable": true
24
+ },
25
+ "truncate": {
26
+ "type": "boolean",
27
+ "default": "false",
28
+ "example": "false",
29
+ "nullable": true
30
+ },
31
+ "truncation_direction": {
32
+ "allOf": [
33
+ {
34
+ "$ref": "#/$defs/FeatureExtractionInputTruncationDirection"
35
+ }
36
+ ],
37
+ "default": "right"
38
+ }
39
+ },
40
+ "$defs": {
41
+ "FeatureExtractionInputTruncationDirection": {
42
+ "type": "string",
43
+ "enum": ["Left", "Right"],
44
+ "title": "FeatureExtractionInputTruncationDirection"
45
+ }
46
+ }
47
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "$id": "/inference/schemas/feature-extraction/output.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Feature Extraction Output.\n\nAuto-generated from TEI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.",
5
+ "title": "FeatureExtractionOutput",
6
+ "type": "array",
7
+ "$defs": {},
8
+ "items": {
9
+ "type": "array",
10
+ "items": {
11
+ "type": "number",
12
+ "format": "float"
13
+ }
14
+ }
15
+ }