@huggingface/tasks 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +20 -0
  3. package/dist/index.d.ts +368 -46
  4. package/dist/index.js +117 -41
  5. package/dist/{index.cjs → index.mjs} +84 -67
  6. package/package.json +43 -33
  7. package/src/Types.ts +49 -43
  8. package/src/audio-classification/about.md +5 -5
  9. package/src/audio-classification/data.ts +11 -11
  10. package/src/audio-to-audio/about.md +4 -3
  11. package/src/audio-to-audio/data.ts +18 -15
  12. package/src/automatic-speech-recognition/about.md +5 -4
  13. package/src/automatic-speech-recognition/data.ts +18 -17
  14. package/src/const.ts +52 -44
  15. package/src/conversational/about.md +9 -9
  16. package/src/conversational/data.ts +22 -18
  17. package/src/depth-estimation/about.md +1 -3
  18. package/src/depth-estimation/data.ts +11 -11
  19. package/src/document-question-answering/about.md +1 -2
  20. package/src/document-question-answering/data.ts +22 -19
  21. package/src/feature-extraction/about.md +2 -3
  22. package/src/feature-extraction/data.ts +12 -15
  23. package/src/fill-mask/about.md +1 -1
  24. package/src/fill-mask/data.ts +16 -14
  25. package/src/image-classification/about.md +5 -3
  26. package/src/image-classification/data.ts +15 -15
  27. package/src/image-segmentation/about.md +4 -4
  28. package/src/image-segmentation/data.ts +26 -23
  29. package/src/image-to-image/about.md +10 -12
  30. package/src/image-to-image/data.ts +31 -27
  31. package/src/image-to-text/about.md +13 -6
  32. package/src/image-to-text/data.ts +20 -21
  33. package/src/index.ts +11 -0
  34. package/src/modelLibraries.ts +43 -0
  35. package/src/object-detection/about.md +2 -1
  36. package/src/object-detection/data.ts +20 -17
  37. package/src/pipelines.ts +619 -0
  38. package/src/placeholder/about.md +3 -3
  39. package/src/placeholder/data.ts +8 -8
  40. package/src/question-answering/about.md +1 -1
  41. package/src/question-answering/data.ts +21 -19
  42. package/src/reinforcement-learning/about.md +167 -176
  43. package/src/reinforcement-learning/data.ts +75 -78
  44. package/src/sentence-similarity/data.ts +29 -28
  45. package/src/summarization/about.md +6 -5
  46. package/src/summarization/data.ts +23 -20
  47. package/src/table-question-answering/about.md +5 -5
  48. package/src/table-question-answering/data.ts +35 -39
  49. package/src/tabular-classification/about.md +4 -6
  50. package/src/tabular-classification/data.ts +11 -12
  51. package/src/tabular-regression/about.md +14 -18
  52. package/src/tabular-regression/data.ts +10 -11
  53. package/src/tasksData.ts +47 -50
  54. package/src/text-classification/about.md +5 -4
  55. package/src/text-classification/data.ts +21 -20
  56. package/src/text-generation/about.md +7 -6
  57. package/src/text-generation/data.ts +36 -34
  58. package/src/text-to-image/about.md +19 -18
  59. package/src/text-to-image/data.ts +32 -26
  60. package/src/text-to-speech/about.md +4 -5
  61. package/src/text-to-speech/data.ts +16 -17
  62. package/src/text-to-video/about.md +41 -36
  63. package/src/text-to-video/data.ts +43 -38
  64. package/src/token-classification/about.md +1 -3
  65. package/src/token-classification/data.ts +26 -25
  66. package/src/translation/about.md +4 -4
  67. package/src/translation/data.ts +21 -21
  68. package/src/unconditional-image-generation/about.md +10 -5
  69. package/src/unconditional-image-generation/data.ts +26 -20
  70. package/src/video-classification/about.md +5 -1
  71. package/src/video-classification/data.ts +14 -14
  72. package/src/visual-question-answering/about.md +8 -3
  73. package/src/visual-question-answering/data.ts +22 -19
  74. package/src/zero-shot-classification/about.md +5 -4
  75. package/src/zero-shot-classification/data.ts +20 -20
  76. package/src/zero-shot-image-classification/about.md +17 -9
  77. package/src/zero-shot-image-classification/data.ts +12 -14
  78. package/tsconfig.json +18 -0
  79. package/assets/audio-classification/audio.wav +0 -0
  80. package/assets/audio-to-audio/input.wav +0 -0
  81. package/assets/audio-to-audio/label-0.wav +0 -0
  82. package/assets/audio-to-audio/label-1.wav +0 -0
  83. package/assets/automatic-speech-recognition/input.flac +0 -0
  84. package/assets/automatic-speech-recognition/wav2vec2.png +0 -0
  85. package/assets/contribution-guide/anatomy.png +0 -0
  86. package/assets/contribution-guide/libraries.png +0 -0
  87. package/assets/depth-estimation/depth-estimation-input.jpg +0 -0
  88. package/assets/depth-estimation/depth-estimation-output.png +0 -0
  89. package/assets/document-question-answering/document-question-answering-input.png +0 -0
  90. package/assets/image-classification/image-classification-input.jpeg +0 -0
  91. package/assets/image-segmentation/image-segmentation-input.jpeg +0 -0
  92. package/assets/image-segmentation/image-segmentation-output.png +0 -0
  93. package/assets/image-to-image/image-to-image-input.jpeg +0 -0
  94. package/assets/image-to-image/image-to-image-output.png +0 -0
  95. package/assets/image-to-image/pix2pix_examples.jpg +0 -0
  96. package/assets/image-to-text/savanna.jpg +0 -0
  97. package/assets/object-detection/object-detection-input.jpg +0 -0
  98. package/assets/object-detection/object-detection-output.jpg +0 -0
  99. package/assets/table-question-answering/tableQA.jpg +0 -0
  100. package/assets/text-to-image/image.jpeg +0 -0
  101. package/assets/text-to-speech/audio.wav +0 -0
  102. package/assets/text-to-video/text-to-video-output.gif +0 -0
  103. package/assets/unconditional-image-generation/unconditional-image-generation-output.jpeg +0 -0
  104. package/assets/video-classification/video-classification-input.gif +0 -0
  105. package/assets/visual-question-answering/elephant.jpeg +0 -0
  106. package/assets/zero-shot-image-classification/image-classification-input.jpeg +0 -0
  107. package/dist/index.d.cts +0 -145
@@ -4,35 +4,31 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "Bing queries with relevant passages from various web sources.",
7
- id: "ms_marco",
7
+ id: "ms_marco",
8
8
  },
9
-
10
9
  ],
11
10
  demo: {
12
11
  inputs: [
13
-
14
-
15
12
  {
16
- label: "Source sentence",
13
+ label: "Source sentence",
17
14
  content: "Machine learning is so easy.",
18
- type: "text",
15
+ type: "text",
19
16
  },
20
17
  {
21
- label: "Sentences to compare to",
18
+ label: "Sentences to compare to",
22
19
  content: "Deep learning is so straightforward.",
23
- type: "text",
20
+ type: "text",
24
21
  },
25
22
  {
26
- label: "",
23
+ label: "",
27
24
  content: "This is so difficult, like rocket science.",
28
- type: "text",
25
+ type: "text",
29
26
  },
30
27
  {
31
- label: "",
28
+ label: "",
32
29
  content: "I can't believe how much I struggled with this.",
33
- type: "text",
30
+ type: "text",
34
31
  },
35
-
36
32
  ],
37
33
  outputs: [
38
34
  {
@@ -56,45 +52,50 @@ const taskData: TaskDataCustom = {
56
52
  },
57
53
  metrics: [
58
54
  {
59
- description: "Reciprocal Rank is a measure used to rank the relevancy of documents given a set of documents. Reciprocal Rank is the reciprocal of the rank of the document retrieved, meaning, if the rank is 3, the Reciprocal Rank is 0.33. If the rank is 1, the Reciprocal Rank is 1",
60
- id: "Mean Reciprocal Rank",
55
+ description:
56
+ "Reciprocal Rank is a measure used to rank the relevancy of documents given a set of documents. Reciprocal Rank is the reciprocal of the rank of the document retrieved, meaning, if the rank is 3, the Reciprocal Rank is 0.33. If the rank is 1, the Reciprocal Rank is 1",
57
+ id: "Mean Reciprocal Rank",
61
58
  },
62
59
  {
63
- description: "The similarity of the embeddings is evaluated mainly on cosine similarity. It is calculated as the cosine of the angle between two vectors. It is particularly useful when your texts are not the same length",
64
- id: "Cosine Similarity",
60
+ description:
61
+ "The similarity of the embeddings is evaluated mainly on cosine similarity. It is calculated as the cosine of the angle between two vectors. It is particularly useful when your texts are not the same length",
62
+ id: "Cosine Similarity",
65
63
  },
66
64
  ],
67
65
  models: [
68
66
  {
69
- description: "This model works well for sentences and paragraphs and can be used for clustering/grouping and semantic searches.",
70
- id: "sentence-transformers/all-mpnet-base-v2",
67
+ description:
68
+ "This model works well for sentences and paragraphs and can be used for clustering/grouping and semantic searches.",
69
+ id: "sentence-transformers/all-mpnet-base-v2",
71
70
  },
72
71
  {
73
72
  description: "A multilingual model trained for FAQ retrieval.",
74
- id: "clips/mfaq",
73
+ id: "clips/mfaq",
75
74
  },
76
75
  ],
77
- spaces: [
76
+ spaces: [
78
77
  {
79
78
  description: "An application that leverages sentence similarity to answer questions from YouTube videos.",
80
- id: "Gradio-Blocks/Ask_Questions_To_YouTube_Videos",
79
+ id: "Gradio-Blocks/Ask_Questions_To_YouTube_Videos",
81
80
  },
82
81
  {
83
- description: "An application that retrieves relevant PubMed abstracts for a given online article which can be used as further references.",
84
- id: "Gradio-Blocks/pubmed-abstract-retriever",
82
+ description:
83
+ "An application that retrieves relevant PubMed abstracts for a given online article which can be used as further references.",
84
+ id: "Gradio-Blocks/pubmed-abstract-retriever",
85
85
  },
86
86
  {
87
87
  description: "An application that leverages sentence similarity to summarize text.",
88
- id: "nickmuchi/article-text-summarizer",
88
+ id: "nickmuchi/article-text-summarizer",
89
89
  },
90
90
  {
91
91
  description: "A guide that explains how Sentence Transformers can be used for semantic search.",
92
- id: "sentence-transformers/Sentence_Transformers_for_semantic_search",
92
+ id: "sentence-transformers/Sentence_Transformers_for_semantic_search",
93
93
  },
94
94
  ],
95
- summary: "Sentence Similarity is the task of determining how similar two texts are. Sentence similarity models convert input texts into vectors (embeddings) that capture semantic information and calculate how close (similar) they are between them. This task is particularly useful for information retrieval and clustering/grouping.",
95
+ summary:
96
+ "Sentence Similarity is the task of determining how similar two texts are. Sentence similarity models convert input texts into vectors (embeddings) that capture semantic information and calculate how close (similar) they are between them. This task is particularly useful for information retrieval and clustering/grouping.",
96
97
  widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
97
- youtubeId: "VCZq5AkbNEU",
98
+ youtubeId: "VCZq5AkbNEU",
98
99
  };
99
100
 
100
101
  export default taskData;
@@ -26,12 +26,13 @@ You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to i
26
26
  import { HfInference } from "@huggingface/inference";
27
27
 
28
28
  const inference = new HfInference(HF_ACCESS_TOKEN);
29
- const inputs = "Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018, in an area of more than 105 square kilometres (41 square miles). The City of Paris is the centre and seat of government of the region and province of Île-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017."
29
+ const inputs =
30
+ "Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018, in an area of more than 105 square kilometres (41 square miles). The City of Paris is the centre and seat of government of the region and province of Île-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017.";
30
31
 
31
32
  await inference.summarization({
32
- model: 'sshleifer/distilbart-cnn-12-6',
33
- inputs
34
- })
33
+ model: "sshleifer/distilbart-cnn-12-6",
34
+ inputs,
35
+ });
35
36
  ```
36
37
 
37
38
  ## Useful Resources
@@ -54,4 +55,4 @@ Would you like to learn more about the topic? Awesome! Here you can find some cu
54
55
 
55
56
  ### Documentation
56
57
 
57
- - [Summarization task guide](https://huggingface.co/docs/transformers/tasks/summarization)
58
+ - [Summarization task guide](https://huggingface.co/docs/transformers/tasks/summarization)
@@ -3,70 +3,73 @@ import type { TaskDataCustom } from "../Types";
3
3
  const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
- description: "News articles in five different languages along with their summaries. Widely used for benchmarking multilingual summarization models.",
7
- id: "mlsum",
6
+ description:
7
+ "News articles in five different languages along with their summaries. Widely used for benchmarking multilingual summarization models.",
8
+ id: "mlsum",
8
9
  },
9
10
  {
10
11
  description: "English conversations and their summaries. Useful for benchmarking conversational agents.",
11
- id: "samsum",
12
+ id: "samsum",
12
13
  },
13
14
  ],
14
15
  demo: {
15
16
  inputs: [
16
17
  {
17
- label: "Input",
18
+ label: "Input",
18
19
  content:
19
- "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. It was the first structure to reach a height of 300 metres. Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
20
+ "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. It was the first structure to reach a height of 300 metres. Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
20
21
  type: "text",
21
22
  },
22
-
23
23
  ],
24
24
  outputs: [
25
25
  {
26
- label: "Output",
26
+ label: "Output",
27
27
  content:
28
- "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building. It was the first structure to reach a height of 300 metres.",
28
+ "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building. It was the first structure to reach a height of 300 metres.",
29
29
  type: "text",
30
30
  },
31
31
  ],
32
32
  },
33
33
  metrics: [
34
34
  {
35
- description: "The generated sequence is compared against its summary, and the overlap of tokens are counted. ROUGE-N refers to overlap of N subsequent tokens, ROUGE-1 refers to overlap of single tokens and ROUGE-2 is the overlap of two subsequent tokens.",
36
- id: "rouge",
35
+ description:
36
+ "The generated sequence is compared against its summary, and the overlap of tokens are counted. ROUGE-N refers to overlap of N subsequent tokens, ROUGE-1 refers to overlap of single tokens and ROUGE-2 is the overlap of two subsequent tokens.",
37
+ id: "rouge",
37
38
  },
38
39
  ],
39
40
  models: [
40
41
  {
41
- description: "A strong summarization model trained on English news articles. Excels at generating factual summaries.",
42
- id: "facebook/bart-large-cnn",
42
+ description:
43
+ "A strong summarization model trained on English news articles. Excels at generating factual summaries.",
44
+ id: "facebook/bart-large-cnn",
43
45
  },
44
46
  {
45
47
  description: "A summarization model trained on medical articles.",
46
- id: "google/bigbird-pegasus-large-pubmed",
48
+ id: "google/bigbird-pegasus-large-pubmed",
47
49
  },
48
50
  ],
49
- spaces: [
51
+ spaces: [
50
52
  {
51
53
  description: "An application that can summarize long paragraphs.",
52
- id: "pszemraj/summarize-long-text",
54
+ id: "pszemraj/summarize-long-text",
53
55
  },
54
56
  {
55
57
  description: "A much needed summarization application for terms and conditions.",
56
- id: "ml6team/distilbart-tos-summarizer-tosdr",
58
+ id: "ml6team/distilbart-tos-summarizer-tosdr",
57
59
  },
58
60
  {
59
61
  description: "An application that summarizes long documents.",
60
- id: "pszemraj/document-summarization",
62
+ id: "pszemraj/document-summarization",
61
63
  },
62
64
  {
63
65
  description: "An application that can detect errors in abstractive summarization.",
64
- id: "ml6team/post-processing-summarization",
66
+ id: "ml6team/post-processing-summarization",
65
67
  },
66
68
  ],
67
- summary: "Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.",
69
+ summary:
70
+ "Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.",
68
71
  widgetModels: ["sshleifer/distilbart-cnn-12-6"],
69
- youtubeId: "yHnr5Dk2zCI",
72
+ youtubeId: "yHnr5Dk2zCI",
70
73
  };
71
74
 
72
75
  export default taskData;
@@ -2,9 +2,9 @@
2
2
 
3
3
  ### SQL execution
4
4
 
5
- You can use the Table Question Answering models to simulate SQL execution by inputting a table.
5
+ You can use the Table Question Answering models to simulate SQL execution by inputting a table.
6
6
 
7
- ### Table Question Answering
7
+ ### Table Question Answering
8
8
 
9
9
  Table Question Answering models are capable of answering questions based on a table.
10
10
 
@@ -12,11 +12,11 @@ Table Question Answering models are capable of answering questions based on a ta
12
12
 
13
13
  This place can be filled with variants of this task if there's any.
14
14
 
15
- ## Inference
15
+ ## Inference
16
16
 
17
17
  You can infer with TableQA models using the 🤗 Transformers library.
18
18
 
19
- ```python
19
+ ```python
20
20
  from transformers import pipeline
21
21
  import pandas as pd
22
22
 
@@ -40,4 +40,4 @@ print(tqa(table=table, query=query)['cells'][0])
40
40
 
41
41
  In this area, you can insert useful resources about how to train or use a model for this task.
42
42
 
43
- This task page is complete thanks to the efforts of [Hao Kim Tieu](https://huggingface.co/haotieu). 🦸
43
+ This task page is complete thanks to the efforts of [Hao Kim Tieu](https://huggingface.co/haotieu). 🦸
@@ -3,61 +3,57 @@ import type { TaskDataCustom } from "../Types";
3
3
  const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
- description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
7
- id: "wikitablequestions",
6
+ description:
7
+ "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
8
+ id: "wikitablequestions",
8
9
  },
9
10
  {
10
- description: "WikiSQL is a dataset of 80654 hand-annotated examples of questions and SQL queries distributed across 24241 tables from Wikipedia.",
11
- id: "wikisql",
11
+ description:
12
+ "WikiSQL is a dataset of 80654 hand-annotated examples of questions and SQL queries distributed across 24241 tables from Wikipedia.",
13
+ id: "wikisql",
12
14
  },
13
15
  ],
14
16
  demo: {
15
17
  inputs: [
16
- { table: [
17
- ["Rank", "Name", "No.of reigns", "Combined days"],
18
- ["1", "lou Thesz", "3", "3749"],
19
- ["2", "Ric Flair", "8", "3103"],
20
- ["3", "Harley Race", "7", "1799"],
21
- ],
22
- type: "tabular" },
23
-
24
- { label: "Question",
25
- content: "What is the number of reigns for Harley Race?",
26
- type: "text" },
27
- ],
28
- outputs: [
29
- { label: "Result",
30
- content: "7",
31
- type: "text" },
32
- ],
33
- },
34
- metrics:
35
- [
36
- { description: "Checks whether the predicted answer(s) is the same as the ground-truth answer(s).",
37
- id: "Denotation Accuracy" },
38
- ],
39
- models:
40
- [
41
18
  {
42
- description: "A table question answering model that is capable of neural SQL execution, i.e., employ TAPEX to execute a SQL query on a given table.",
43
- id: "microsoft/tapex-base",
19
+ table: [
20
+ ["Rank", "Name", "No.of reigns", "Combined days"],
21
+ ["1", "lou Thesz", "3", "3749"],
22
+ ["2", "Ric Flair", "8", "3103"],
23
+ ["3", "Harley Race", "7", "1799"],
24
+ ],
25
+ type: "tabular",
44
26
  },
45
- {
46
- description: "A robust table question answering model.",
47
- id: "google/tapas-base-finetuned-wtq",
48
27
 
49
- },
28
+ { label: "Question", content: "What is the number of reigns for Harley Race?", type: "text" },
50
29
  ],
30
+ outputs: [{ label: "Result", content: "7", type: "text" }],
31
+ },
32
+ metrics: [
33
+ {
34
+ description: "Checks whether the predicted answer(s) is the same as the ground-truth answer(s).",
35
+ id: "Denotation Accuracy",
36
+ },
37
+ ],
38
+ models: [
39
+ {
40
+ description:
41
+ "A table question answering model that is capable of neural SQL execution, i.e., employ TAPEX to execute a SQL query on a given table.",
42
+ id: "microsoft/tapex-base",
43
+ },
44
+ {
45
+ description: "A robust table question answering model.",
46
+ id: "google/tapas-base-finetuned-wtq",
47
+ },
48
+ ],
51
49
  spaces: [
52
50
  {
53
51
  description: "An application that answers questions based on table CSV files.",
54
- id: "katanaml/table-query",
55
-
52
+ id: "katanaml/table-query",
56
53
  },
57
54
  ],
58
- summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
55
+ summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
59
56
  widgetModels: ["google/tapas-base-finetuned-wtq"],
60
57
  };
61
58
 
62
-
63
59
  export default taskData;
@@ -15,16 +15,16 @@ Tabular classification models can be used in detecting fraudulent credit card tr
15
15
  ### Churn Prediction
16
16
  Tabular classification models can be used in predicting customer churn in telecommunication. An example dataset for the task is hosted [here](https://huggingface.co/datasets/scikit-learn/churn-prediction).
17
17
 
18
+ # Model Hosting and Inference
18
19
 
19
- # Model Hosting and Inference
20
+ You can use [skops](https://skops.readthedocs.io/) for model hosting and inference on the Hugging Face Hub. This library is built to improve production workflows of various libraries that are used to train tabular models, including [sklearn](https://scikit-learn.org/stable/) and [xgboost](https://xgboost.readthedocs.io/en/stable/). Using `skops` you can:
20
21
 
21
- You can use [skops](https://skops.readthedocs.io/) for model hosting and inference on the Hugging Face Hub. This library is built to improve production workflows of various libraries that are used to train tabular models, including [sklearn](https://scikit-learn.org/stable/) and [xgboost](https://xgboost.readthedocs.io/en/stable/). Using `skops` you can:
22
22
  - Easily use inference API,
23
23
  - Build neat UIs with one line of code,
24
24
  - Programmatically create model cards,
25
25
  - Securely serialize your scikit-learn model. (See limitations of using pickle [here](https://huggingface.co/docs/hub/security-pickle).)
26
26
 
27
- You can push your model as follows:
27
+ You can push your model as follows:
28
28
 
29
29
  ```python
30
30
  from skops import hub_utils
@@ -52,7 +52,6 @@ import gradio as gr
52
52
  gr.Interface.load("huggingface/username/my-awesome-model").launch()
53
53
  ```
54
54
 
55
-
56
55
  ## Useful Resources
57
56
 
58
57
  - Check out the [scikit-learn organization](https://huggingface.co/scikit-learn) to learn more about different algorithms used for this task.
@@ -61,7 +60,6 @@ gr.Interface.load("huggingface/username/my-awesome-model").launch()
61
60
  - [Notebook: Persisting your scikit-learn model using skops](https://www.kaggle.com/code/unofficialmerve/persisting-your-scikit-learn-model-using-skops)
62
61
  - Check out [interactive sklearn examples](https://huggingface.co/sklearn-docs) built with ❤️ using Gradio.
63
62
 
64
-
65
63
  ### Training your own model in just a few seconds
66
64
 
67
- We have built a [baseline trainer](https://huggingface.co/spaces/scikit-learn/baseline-trainer) application to which you can drag and drop your dataset. It will train a baseline and push it to your Hugging Face Hub profile with a model card containing information about the model.
65
+ We have built a [baseline trainer](https://huggingface.co/spaces/scikit-learn/baseline-trainer) application to which you can drag and drop your dataset. It will train a baseline and push it to your Hugging Face Hub profile with a model card containing information about the model.
@@ -4,7 +4,7 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "A comprehensive curation of datasets covering all benchmarks.",
7
- id: "inria-soda/tabular-benchmark",
7
+ id: "inria-soda/tabular-benchmark",
8
8
  },
9
9
  ],
10
10
  demo: {
@@ -22,48 +22,47 @@ const taskData: TaskDataCustom = {
22
22
  outputs: [
23
23
  {
24
24
  table: [["Diabetes"], ["1"], ["1"], ["0"]],
25
- type: "tabular",
25
+ type: "tabular",
26
26
  },
27
27
  ],
28
28
  },
29
29
  metrics: [
30
30
  {
31
31
  description: "",
32
- id: "accuracy",
32
+ id: "accuracy",
33
33
  },
34
34
  {
35
35
  description: "",
36
- id: "recall",
36
+ id: "recall",
37
37
  },
38
38
  {
39
39
  description: "",
40
- id: "precision",
40
+ id: "precision",
41
41
  },
42
42
  {
43
43
  description: "",
44
- id: "f1",
44
+ id: "f1",
45
45
  },
46
46
  ],
47
47
  models: [
48
48
  {
49
49
  description: "Breast cancer prediction model based on decision trees.",
50
- id: "scikit-learn/cancer-prediction-trees",
50
+ id: "scikit-learn/cancer-prediction-trees",
51
51
  },
52
52
  ],
53
53
  spaces: [
54
54
  {
55
55
  description: "An application that can predict defective products on a production line.",
56
- id: "scikit-learn/tabular-playground",
56
+ id: "scikit-learn/tabular-playground",
57
57
  },
58
58
  {
59
59
  description: "An application that compares various tabular classification techniques on different datasets.",
60
- id: "scikit-learn/classification",
60
+ id: "scikit-learn/classification",
61
61
  },
62
62
  ],
63
- summary:
64
- "Tabular classification is the task of classifying a target category (a group) based on set of attributes.",
63
+ summary: "Tabular classification is the task of classifying a target category (a group) based on set of attributes.",
65
64
  widgetModels: ["scikit-learn/tabular-playground"],
66
- youtubeId: "",
65
+ youtubeId: "",
67
66
  };
68
67
 
69
68
  export default taskData;
@@ -1,16 +1,15 @@
1
1
  ## About the Task
2
2
 
3
- Tabular regression is the task of predicting a numerical value given a set of attributes/features. *Tabular* meaning that data is stored in a table (like an excel sheet), and each sample is contained in its own row. The features used to predict our target can be both numerical and categorical. However, including categorical features often requires additional preprocessing/feature engineering (a few models do accept categorical features directly, like [CatBoost](https://catboost.ai/)). An example of tabular regression would be predicting the weight of a fish given its' species and length.
3
+ Tabular regression is the task of predicting a numerical value given a set of attributes/features. _Tabular_ meaning that data is stored in a table (like an excel sheet), and each sample is contained in its own row. The features used to predict our target can be both numerical and categorical. However, including categorical features often requires additional preprocessing/feature engineering (a few models do accept categorical features directly, like [CatBoost](https://catboost.ai/)). An example of tabular regression would be predicting the weight of a fish given its' species and length.
4
4
 
5
5
  ## Use Cases
6
6
 
7
- ### Sales Prediction: a Use Case for Predicting a Continuous Target Variable
8
- Here the objective is to predict a continuous variable based on a set of input variable(s). For example, predicting `sales` of an ice cream shop based on `temperature` of weather and `duration of hours` shop was open. Here we can build a regression model with `temperature` and `duration of hours` as input variable and `sales` as target variable.
9
-
10
- ### Missing Value Imputation for Other Tabular Tasks
11
- In real-world applications, due to human error or other reasons, some of the input values can be missing or there might not be any recorded data. Considering the example above, say the shopkeeper's watch was broken and they forgot to calculate the `hours` for which the shop was open. This will lead to a missing value in their dataset. In this case, missing values could be replaced it with zero, or average hours for which the shop is kept open. Another approach we can try is to use `temperature` and `sales` variables to predict the `hours` variable here.
7
+ ### Sales Prediction: a Use Case for Predicting a Continuous Target Variable
12
8
 
9
+ Here the objective is to predict a continuous variable based on a set of input variable(s). For example, predicting `sales` of an ice cream shop based on `temperature` of weather and `duration of hours` shop was open. Here we can build a regression model with `temperature` and `duration of hours` as input variable and `sales` as target variable.
13
10
 
11
+ ### Missing Value Imputation for Other Tabular Tasks
12
+ In real-world applications, due to human error or other reasons, some of the input values can be missing or there might not be any recorded data. Considering the example above, say the shopkeeper's watch was broken and they forgot to calculate the `hours` for which the shop was open. This will lead to a missing value in their dataset. In this case, missing values could be replaced it with zero, or average hours for which the shop is kept open. Another approach we can try is to use `temperature` and `sales` variables to predict the `hours` variable here.
14
13
 
15
14
  ## Model Training
16
15
 
@@ -27,16 +26,16 @@ model = LinearRegression()
27
26
  model.fit(X, y)
28
27
  ```
29
28
 
30
-
31
29
  # Model Hosting and Inference
32
30
 
33
- You can use [skops](https://skops.readthedocs.io/) for model hosting and inference on the Hugging Face Hub. This library is built to improve production workflows of various libraries that are used to train tabular models, including [sklearn](https://scikit-learn.org/stable/) and [xgboost](https://xgboost.readthedocs.io/en/stable/). Using `skops` you can:
31
+ You can use [skops](https://skops.readthedocs.io/) for model hosting and inference on the Hugging Face Hub. This library is built to improve production workflows of various libraries that are used to train tabular models, including [sklearn](https://scikit-learn.org/stable/) and [xgboost](https://xgboost.readthedocs.io/en/stable/). Using `skops` you can:
32
+
34
33
  - Easily use inference API,
35
34
  - Build neat UIs with one line of code,
36
35
  - Programmatically create model cards,
37
36
  - Securely serialize your models. (See limitations of using pickle [here](https://huggingface.co/docs/hub/security-pickle).)
38
37
 
39
- You can push your model as follows:
38
+ You can push your model as follows:
40
39
 
41
40
  ```python
42
41
  from skops import hub_utils
@@ -71,21 +70,18 @@ gr.Interface.load("huggingface/username/my-awesome-model").launch()
71
70
  - Check out [interactive sklearn examples](https://huggingface.co/sklearn-docs) built with ❤️ using Gradio.
72
71
  - [Notebook: Persisting your scikit-learn model using skops](https://www.kaggle.com/code/unofficialmerve/persisting-your-scikit-learn-model-using-skops)
73
72
 
74
-
75
73
  - For starting with tabular regression:
76
- - Doing [Exploratory Data Analysis](https://neptune.ai/blog/exploratory-data-analysis-for-tabular-data) for tabular data.
77
- - The data considered here consists of details of Olympic athletes and medal results from Athens 1896 to Rio 2016.
78
- - Here you can learn more about how to explore and analyse the data and visualize them in order to get a better understanding of dataset.
79
- - Building your [first ML model](https://www.kaggle.com/code/dansbecker/your-first-machine-learning-model).
80
-
81
- - Intermediate level tutorials on tabular regression:
82
- - [A Short Chronology of Deep Learning for Tabular Data](https://sebastianraschka.com/blog/2022/deep-learning-for-tabular-data.html) by Sebastian Raschka.
83
74
 
75
+ - Doing [Exploratory Data Analysis](https://neptune.ai/blog/exploratory-data-analysis-for-tabular-data) for tabular data.
76
+ - The data considered here consists of details of Olympic athletes and medal results from Athens 1896 to Rio 2016.
77
+ - Here you can learn more about how to explore and analyse the data and visualize them in order to get a better understanding of dataset.
78
+ - Building your [first ML model](https://www.kaggle.com/code/dansbecker/your-first-machine-learning-model).
84
79
 
80
+ - Intermediate level tutorials on tabular regression:
81
+ - [A Short Chronology of Deep Learning for Tabular Data](https://sebastianraschka.com/blog/2022/deep-learning-for-tabular-data.html) by Sebastian Raschka.
85
82
 
86
83
  ### Training your own model in just a few seconds
87
84
 
88
85
  We have built a [baseline trainer](https://huggingface.co/spaces/scikit-learn/baseline-trainer) application to which you can drag and drop your dataset. It will train a baseline and push it to your Hugging Face Hub profile with a model card containing information about the model.
89
86
 
90
87
  This page was made possible thanks to efforts of [Brenden Connors](https://huggingface.co/brendenc) and [Ayush Bihani](https://huggingface.co/hsuyab).
91
-
@@ -4,7 +4,7 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "A comprehensive curation of datasets covering all benchmarks.",
7
- id: "inria-soda/tabular-benchmark",
7
+ id: "inria-soda/tabular-benchmark",
8
8
  },
9
9
  ],
10
10
  demo: {
@@ -15,7 +15,6 @@ const taskData: TaskDataCustom = {
15
15
  ["ford torino", "140", "3,449"],
16
16
  ["amc hornet", "97", "2,774"],
17
17
  ["toyota corolla", "65", "1,773"],
18
-
19
18
  ],
20
19
  type: "tabular",
21
20
  },
@@ -23,36 +22,36 @@ const taskData: TaskDataCustom = {
23
22
  outputs: [
24
23
  {
25
24
  table: [["MPG (miles per gallon)"], ["17"], ["18"], ["31"]],
26
- type: "tabular",
25
+ type: "tabular",
27
26
  },
28
27
  ],
29
28
  },
30
29
  metrics: [
31
30
  {
32
31
  description: "",
33
- id: "mse",
32
+ id: "mse",
34
33
  },
35
34
  {
36
- description: "Coefficient of determination (or R-squared) is a measure of how well the model fits the data. Higher R-squared is considered a better fit.",
37
- id: "r-squared",
35
+ description:
36
+ "Coefficient of determination (or R-squared) is a measure of how well the model fits the data. Higher R-squared is considered a better fit.",
37
+ id: "r-squared",
38
38
  },
39
39
  ],
40
40
  models: [
41
41
  {
42
42
  description: "Fish weight prediction based on length measurements and species.",
43
- id: "scikit-learn/Fish-Weight",
43
+ id: "scikit-learn/Fish-Weight",
44
44
  },
45
45
  ],
46
46
  spaces: [
47
47
  {
48
48
  description: "An application that can predict weight of a fish based on set of attributes.",
49
- id: "scikit-learn/fish-weight-prediction",
49
+ id: "scikit-learn/fish-weight-prediction",
50
50
  },
51
51
  ],
52
- summary:
53
- "Tabular regression is the task of predicting a numerical value given a set of attributes.",
52
+ summary: "Tabular regression is the task of predicting a numerical value given a set of attributes.",
54
53
  widgetModels: ["scikit-learn/Fish-Weight"],
55
- youtubeId: "",
54
+ youtubeId: "",
56
55
  };
57
56
 
58
57
  export default taskData;