@llmindset/hf-mcp 0.3.10 → 0.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/create-repo.d.ts +47 -0
- package/dist/create-repo.d.ts.map +1 -0
- package/dist/create-repo.js +83 -0
- package/dist/create-repo.js.map +1 -0
- package/dist/create-repo.test.d.ts +2 -0
- package/dist/create-repo.test.d.ts.map +1 -0
- package/dist/create-repo.test.js +155 -0
- package/dist/create-repo.test.js.map +1 -0
- package/dist/dataset-detail.js +1 -1
- package/dist/dataset-detail.js.map +1 -1
- package/dist/dataset-viewer-inspect.d.ts +48 -0
- package/dist/dataset-viewer-inspect.d.ts.map +1 -0
- package/dist/dataset-viewer-inspect.js +660 -0
- package/dist/dataset-viewer-inspect.js.map +1 -0
- package/dist/dataset-viewer-inspect.test.d.ts +2 -0
- package/dist/dataset-viewer-inspect.test.d.ts.map +1 -0
- package/dist/dataset-viewer-inspect.test.js +218 -0
- package/dist/dataset-viewer-inspect.test.js.map +1 -0
- package/dist/gradio-files.d.ts +2 -2
- package/dist/hub-inspect.d.ts +19 -2
- package/dist/hub-inspect.d.ts.map +1 -1
- package/dist/hub-inspect.js +68 -4
- package/dist/hub-inspect.js.map +1 -1
- package/dist/hub-inspect.test.d.ts +2 -0
- package/dist/hub-inspect.test.d.ts.map +1 -0
- package/dist/hub-inspect.test.js +24 -0
- package/dist/hub-inspect.test.js.map +1 -0
- package/dist/index.browser.d.ts +5 -3
- package/dist/index.browser.d.ts.map +1 -1
- package/dist/index.browser.js +16 -1
- package/dist/index.browser.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/jobs/types.d.ts +23 -23
- package/dist/jobs/types.d.ts.map +1 -1
- package/dist/jobs/types.js +7 -2
- package/dist/jobs/types.js.map +1 -1
- package/dist/model-detail.js +1 -1
- package/dist/model-detail.js.map +1 -1
- package/dist/readme-utils.d.ts +1 -1
- package/dist/readme-utils.d.ts.map +1 -1
- package/dist/readme-utils.js +2 -13
- package/dist/readme-utils.js.map +1 -1
- package/dist/repo-search.d.ts +2 -2
- package/dist/tool-ids.d.ts +4 -3
- package/dist/tool-ids.d.ts.map +1 -1
- package/dist/tool-ids.js +4 -1
- package/dist/tool-ids.js.map +1 -1
- package/package.json +1 -1
- package/src/create-repo.test.ts +182 -0
- package/src/create-repo.ts +103 -0
- package/src/dataset-detail.ts +1 -1
- package/src/dataset-viewer-inspect.test.ts +234 -0
- package/src/dataset-viewer-inspect.ts +809 -0
- package/src/hub-inspect.test.ts +28 -0
- package/src/hub-inspect.ts +88 -4
- package/src/index.browser.ts +18 -1
- package/src/index.ts +2 -0
- package/src/jobs/types.ts +10 -2
- package/src/model-detail.ts +1 -1
- package/src/readme-utils.ts +2 -32
- package/src/tool-ids.ts +4 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import { createRepo } from '@huggingface/hub';
|
|
2
|
+
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
|
3
|
+
import { CreateRepoTool, formatCreateRepoResult } from './create-repo.js';
|
|
4
|
+
|
|
5
|
+
vi.mock('@huggingface/hub', () => ({
|
|
6
|
+
createRepo: vi.fn(),
|
|
7
|
+
}));
|
|
8
|
+
|
|
9
|
+
describe('CreateRepoTool', () => {
|
|
10
|
+
beforeEach(() => {
|
|
11
|
+
vi.mocked(createRepo).mockReset();
|
|
12
|
+
vi.mocked(createRepo).mockResolvedValue({
|
|
13
|
+
repoUrl: 'https://huggingface.co/alice/example-model',
|
|
14
|
+
id: '0123456789abcdef01234567',
|
|
15
|
+
});
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('creates a model repository with client-native repo params', async () => {
|
|
19
|
+
const tool = new CreateRepoTool('token');
|
|
20
|
+
const result = await tool.create({
|
|
21
|
+
name: 'alice/example-model',
|
|
22
|
+
repo_type: 'model',
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
expect(createRepo).toHaveBeenCalledWith({
|
|
26
|
+
accessToken: 'token',
|
|
27
|
+
repo: { name: 'alice/example-model', type: 'model' },
|
|
28
|
+
private: undefined,
|
|
29
|
+
});
|
|
30
|
+
expect(result).toEqual({
|
|
31
|
+
url: 'https://huggingface.co/alice/example-model',
|
|
32
|
+
name: 'alice/example-model',
|
|
33
|
+
repoType: 'model',
|
|
34
|
+
id: '0123456789abcdef01234567',
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('defaults to model repositories', async () => {
|
|
39
|
+
const tool = new CreateRepoTool('token');
|
|
40
|
+
await tool.create({
|
|
41
|
+
name: 'alice/default-model',
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
expect(createRepo).toHaveBeenCalledWith({
|
|
45
|
+
accessToken: 'token',
|
|
46
|
+
repo: { name: 'alice/default-model', type: 'model' },
|
|
47
|
+
private: undefined,
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('creates a dataset repository', async () => {
|
|
52
|
+
const tool = new CreateRepoTool('token');
|
|
53
|
+
await tool.create({
|
|
54
|
+
name: 'alice/example-dataset',
|
|
55
|
+
repo_type: 'dataset',
|
|
56
|
+
private: true,
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
expect(createRepo).toHaveBeenCalledWith({
|
|
60
|
+
accessToken: 'token',
|
|
61
|
+
repo: { name: 'alice/example-dataset', type: 'dataset' },
|
|
62
|
+
private: true,
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('creates a bucket repository', async () => {
|
|
67
|
+
vi.mocked(createRepo).mockResolvedValue({
|
|
68
|
+
repoUrl: 'https://huggingface.co/buckets/alice/example-bucket',
|
|
69
|
+
id: 'bucket-id',
|
|
70
|
+
});
|
|
71
|
+
const tool = new CreateRepoTool('token');
|
|
72
|
+
const result = await tool.create({
|
|
73
|
+
name: 'alice/example-bucket',
|
|
74
|
+
repo_type: 'bucket',
|
|
75
|
+
private: true,
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
expect(createRepo).toHaveBeenCalledWith({
|
|
79
|
+
accessToken: 'token',
|
|
80
|
+
repo: { name: 'alice/example-bucket', type: 'bucket' },
|
|
81
|
+
private: true,
|
|
82
|
+
});
|
|
83
|
+
expect(result).toEqual({
|
|
84
|
+
url: 'https://huggingface.co/buckets/alice/example-bucket',
|
|
85
|
+
name: 'alice/example-bucket',
|
|
86
|
+
repoType: 'bucket',
|
|
87
|
+
id: 'bucket-id',
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it('creates a Space repository with sdk', async () => {
|
|
92
|
+
vi.mocked(createRepo).mockResolvedValue({
|
|
93
|
+
repoUrl: 'https://huggingface.co/spaces/alice/demo',
|
|
94
|
+
id: 'abcdefabcdefabcdefabcdef',
|
|
95
|
+
});
|
|
96
|
+
const tool = new CreateRepoTool('token');
|
|
97
|
+
await tool.create({
|
|
98
|
+
name: 'alice/demo',
|
|
99
|
+
repo_type: 'space',
|
|
100
|
+
sdk: 'gradio',
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
expect(createRepo).toHaveBeenCalledWith({
|
|
104
|
+
accessToken: 'token',
|
|
105
|
+
repo: { name: 'alice/demo', type: 'space' },
|
|
106
|
+
private: undefined,
|
|
107
|
+
sdk: 'gradio',
|
|
108
|
+
});
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('creates a Streamlit Space repository', async () => {
|
|
112
|
+
vi.mocked(createRepo).mockResolvedValue({
|
|
113
|
+
repoUrl: 'https://huggingface.co/spaces/alice/streamlit-demo',
|
|
114
|
+
id: 'streamlit-space-id',
|
|
115
|
+
});
|
|
116
|
+
const tool = new CreateRepoTool('token');
|
|
117
|
+
await tool.create({
|
|
118
|
+
name: 'alice/streamlit-demo',
|
|
119
|
+
repo_type: 'space',
|
|
120
|
+
sdk: 'streamlit',
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
expect(createRepo).toHaveBeenCalledWith({
|
|
124
|
+
accessToken: 'token',
|
|
125
|
+
repo: { name: 'alice/streamlit-demo', type: 'space' },
|
|
126
|
+
private: undefined,
|
|
127
|
+
sdk: 'streamlit',
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
it('requires sdk for Space repositories', async () => {
|
|
132
|
+
const tool = new CreateRepoTool('token');
|
|
133
|
+
await expect(
|
|
134
|
+
tool.create({
|
|
135
|
+
name: 'alice/demo',
|
|
136
|
+
repo_type: 'space',
|
|
137
|
+
})
|
|
138
|
+
).rejects.toThrow('sdk is required when repo_type is space');
|
|
139
|
+
expect(createRepo).not.toHaveBeenCalled();
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it('requires fully-qualified repo names', async () => {
|
|
143
|
+
const tool = new CreateRepoTool('token');
|
|
144
|
+
await expect(
|
|
145
|
+
tool.create({
|
|
146
|
+
name: 'example-model',
|
|
147
|
+
repo_type: 'model',
|
|
148
|
+
})
|
|
149
|
+
).rejects.toThrow("name must be fully qualified in 'namespace/repo-name' format");
|
|
150
|
+
expect(createRepo).not.toHaveBeenCalled();
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it('requires an auth token', async () => {
|
|
154
|
+
const tool = new CreateRepoTool(undefined);
|
|
155
|
+
await expect(
|
|
156
|
+
tool.create({
|
|
157
|
+
name: 'alice/example-model',
|
|
158
|
+
repo_type: 'model',
|
|
159
|
+
})
|
|
160
|
+
).rejects.toThrow('Requires Authentication');
|
|
161
|
+
expect(createRepo).not.toHaveBeenCalled();
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it('formats the created repository result', () => {
|
|
165
|
+
expect(
|
|
166
|
+
formatCreateRepoResult({
|
|
167
|
+
url: 'https://huggingface.co/alice/example-model',
|
|
168
|
+
name: 'alice/example-model',
|
|
169
|
+
repoType: 'model',
|
|
170
|
+
id: '0123456789abcdef01234567',
|
|
171
|
+
})
|
|
172
|
+
).toBe(
|
|
173
|
+
[
|
|
174
|
+
'Repository created.',
|
|
175
|
+
'Name: alice/example-model',
|
|
176
|
+
'Type: model',
|
|
177
|
+
'URL: https://huggingface.co/alice/example-model',
|
|
178
|
+
'ID: 0123456789abcdef01234567',
|
|
179
|
+
].join('\n')
|
|
180
|
+
);
|
|
181
|
+
});
|
|
182
|
+
});
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { createRepo, type RepoType as HubRepoType, type SpaceSdk } from '@huggingface/hub';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { NO_TOKEN_INSTRUCTIONS } from './utilities.js';
|
|
4
|
+
|
|
5
|
+
const REPO_TYPES = ['model', 'dataset', 'space', 'bucket'] as const satisfies readonly HubRepoType[];
|
|
6
|
+
const SPACE_SDKS = ['streamlit', 'gradio', 'docker', 'static'] as const satisfies readonly SpaceSdk[];
|
|
7
|
+
|
|
8
|
+
export const CREATE_REPO_TOOL_CONFIG = {
|
|
9
|
+
name: 'create_repo',
|
|
10
|
+
description: '',
|
|
11
|
+
schema: z.object({
|
|
12
|
+
name: z.string().min(1).describe("Fully-qualified repository name in 'namespace/repo-name' format."),
|
|
13
|
+
repo_type: z.enum(REPO_TYPES).optional().default('bucket').describe('Repository type. Defaults to bucket.'),
|
|
14
|
+
private: z.boolean().optional().describe('Whether to create the repository as private.'),
|
|
15
|
+
sdk: z.enum(SPACE_SDKS).optional().default('static').describe("SDK type - only required for repo_type='space'."),
|
|
16
|
+
}),
|
|
17
|
+
annotations: {
|
|
18
|
+
title: 'Create Hugging Face Repository',
|
|
19
|
+
destructiveHint: false,
|
|
20
|
+
readOnlyHint: false,
|
|
21
|
+
openWorldHint: true,
|
|
22
|
+
},
|
|
23
|
+
} as const;
|
|
24
|
+
|
|
25
|
+
export type CreateRepoParams = z.input<typeof CREATE_REPO_TOOL_CONFIG.schema>;
|
|
26
|
+
type RepoType = (typeof REPO_TYPES)[number];
|
|
27
|
+
type SupportedSpaceSdk = (typeof SPACE_SDKS)[number];
|
|
28
|
+
|
|
29
|
+
function assertExhaustiveUnion<T extends never>(_value?: T): void {
|
|
30
|
+
void _value;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
assertExhaustiveUnion<Exclude<HubRepoType, RepoType>>();
|
|
34
|
+
assertExhaustiveUnion<Exclude<RepoType, HubRepoType>>();
|
|
35
|
+
assertExhaustiveUnion<Exclude<SpaceSdk, SupportedSpaceSdk>>();
|
|
36
|
+
assertExhaustiveUnion<Exclude<SupportedSpaceSdk, SpaceSdk>>();
|
|
37
|
+
|
|
38
|
+
export interface CreateRepoResult {
|
|
39
|
+
url: string;
|
|
40
|
+
name: string;
|
|
41
|
+
repoType: RepoType;
|
|
42
|
+
id: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export class CreateRepoTool {
|
|
46
|
+
constructor(private readonly hfToken?: string) {}
|
|
47
|
+
|
|
48
|
+
static createToolConfig(): Omit<typeof CREATE_REPO_TOOL_CONFIG, 'description'> & { description: string } {
|
|
49
|
+
return {
|
|
50
|
+
...CREATE_REPO_TOOL_CONFIG,
|
|
51
|
+
description: 'Create a Hugging Face model, dataset, space, or bucket repository.',
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async create(params: CreateRepoParams): Promise<CreateRepoResult> {
|
|
56
|
+
if (!this.hfToken) throw new Error(NO_TOKEN_INSTRUCTIONS);
|
|
57
|
+
|
|
58
|
+
const repoType = params.repo_type ?? 'model';
|
|
59
|
+
validateParams(params, repoType);
|
|
60
|
+
|
|
61
|
+
const result = await createRepo({
|
|
62
|
+
accessToken: this.hfToken,
|
|
63
|
+
repo: {
|
|
64
|
+
name: params.name,
|
|
65
|
+
type: repoType,
|
|
66
|
+
},
|
|
67
|
+
private: params.private,
|
|
68
|
+
...(repoType === 'space' ? { sdk: params.sdk } : {}),
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
url: result.repoUrl,
|
|
73
|
+
name: params.name,
|
|
74
|
+
repoType,
|
|
75
|
+
id: result.id,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function validateParams(params: CreateRepoParams, repoType: RepoType): void {
|
|
81
|
+
if (!isFullyQualifiedRepoName(params.name)) {
|
|
82
|
+
throw new Error("name must be fully qualified in 'namespace/repo-name' format.");
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (repoType === 'space' && !params.sdk) {
|
|
86
|
+
throw new Error('sdk is required when repo_type is space.');
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function isFullyQualifiedRepoName(name: string): boolean {
|
|
91
|
+
const parts = name.split('/');
|
|
92
|
+
return parts.length === 2 && parts.every((part) => part.length > 0);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export const formatCreateRepoResult = (result: CreateRepoResult): string => {
|
|
96
|
+
return [
|
|
97
|
+
'Repository created.',
|
|
98
|
+
`Name: ${result.name}`,
|
|
99
|
+
`Type: ${result.repoType}`,
|
|
100
|
+
`URL: ${result.url}`,
|
|
101
|
+
`ID: ${result.id}`,
|
|
102
|
+
].join('\n');
|
|
103
|
+
};
|
package/src/dataset-detail.ts
CHANGED
|
@@ -163,7 +163,7 @@ export class DatasetDetailTool {
|
|
|
163
163
|
|
|
164
164
|
// Fetch and append README content if requested
|
|
165
165
|
if (includeReadme) {
|
|
166
|
-
const readmeContent = await fetchReadmeContent(datasetDetails.name, 'datasets'
|
|
166
|
+
const readmeContent = await fetchReadmeContent(datasetDetails.name, 'datasets');
|
|
167
167
|
if (readmeContent) {
|
|
168
168
|
const result = formatDatasetDetails(datasetDetails);
|
|
169
169
|
result.formatted +=
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { DatasetViewerInspector } from './dataset-viewer-inspect.js';
|
|
3
|
+
|
|
4
|
+
const squadSplits = {
|
|
5
|
+
splits: [
|
|
6
|
+
{ dataset: 'rajpurkar/squad', config: 'plain_text', split: 'validation' },
|
|
7
|
+
{ dataset: 'rajpurkar/squad', config: 'plain_text', split: 'train' },
|
|
8
|
+
],
|
|
9
|
+
pending: [],
|
|
10
|
+
failed: [],
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
const squadSize = {
|
|
14
|
+
size: {
|
|
15
|
+
dataset: {
|
|
16
|
+
dataset: 'rajpurkar/squad',
|
|
17
|
+
num_bytes_original_files: 16278203,
|
|
18
|
+
num_bytes_parquet_files: 16278203,
|
|
19
|
+
num_bytes_memory: 98346470,
|
|
20
|
+
num_rows: 98169,
|
|
21
|
+
estimated_num_rows: null,
|
|
22
|
+
},
|
|
23
|
+
configs: [
|
|
24
|
+
{
|
|
25
|
+
dataset: 'rajpurkar/squad',
|
|
26
|
+
config: 'plain_text',
|
|
27
|
+
num_bytes_original_files: 16278203,
|
|
28
|
+
num_bytes_parquet_files: 16278203,
|
|
29
|
+
num_bytes_memory: 98346470,
|
|
30
|
+
num_rows: 98169,
|
|
31
|
+
num_columns: 5,
|
|
32
|
+
estimated_num_rows: null,
|
|
33
|
+
},
|
|
34
|
+
],
|
|
35
|
+
splits: [
|
|
36
|
+
{
|
|
37
|
+
dataset: 'rajpurkar/squad',
|
|
38
|
+
config: 'plain_text',
|
|
39
|
+
split: 'train',
|
|
40
|
+
num_bytes_parquet_files: 14458314,
|
|
41
|
+
num_bytes_memory: 89338716,
|
|
42
|
+
num_rows: 87599,
|
|
43
|
+
num_columns: 5,
|
|
44
|
+
estimated_num_rows: null,
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
dataset: 'rajpurkar/squad',
|
|
48
|
+
config: 'plain_text',
|
|
49
|
+
split: 'validation',
|
|
50
|
+
num_bytes_parquet_files: 1819889,
|
|
51
|
+
num_bytes_memory: 9007754,
|
|
52
|
+
num_rows: 10570,
|
|
53
|
+
num_columns: 5,
|
|
54
|
+
estimated_num_rows: null,
|
|
55
|
+
},
|
|
56
|
+
],
|
|
57
|
+
},
|
|
58
|
+
pending: [],
|
|
59
|
+
failed: [],
|
|
60
|
+
partial: false,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const squadParquet = {
|
|
64
|
+
parquet_files: [
|
|
65
|
+
{
|
|
66
|
+
dataset: 'rajpurkar/squad',
|
|
67
|
+
config: 'plain_text',
|
|
68
|
+
split: 'train',
|
|
69
|
+
url: 'https://huggingface.co/datasets/rajpurkar/squad/resolve/refs%2Fconvert%2Fparquet/plain_text/train/0000.parquet',
|
|
70
|
+
filename: '0000.parquet',
|
|
71
|
+
size: 14458314,
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
dataset: 'rajpurkar/squad',
|
|
75
|
+
config: 'plain_text',
|
|
76
|
+
split: 'validation',
|
|
77
|
+
url: 'https://huggingface.co/datasets/rajpurkar/squad/resolve/refs%2Fconvert%2Fparquet/plain_text/validation/0000.parquet',
|
|
78
|
+
filename: '0000.parquet',
|
|
79
|
+
size: 1819889,
|
|
80
|
+
},
|
|
81
|
+
],
|
|
82
|
+
pending: [],
|
|
83
|
+
failed: [],
|
|
84
|
+
partial: false,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
const squadRows = {
|
|
88
|
+
features: [
|
|
89
|
+
{ feature_idx: 0, name: 'id', type: { dtype: 'string', _type: 'Value' } },
|
|
90
|
+
{ feature_idx: 1, name: 'context', type: { dtype: 'string', _type: 'Value' } },
|
|
91
|
+
{
|
|
92
|
+
feature_idx: 2,
|
|
93
|
+
name: 'answers',
|
|
94
|
+
type: {
|
|
95
|
+
text: { feature: { dtype: 'string', _type: 'Value' }, _type: 'List' },
|
|
96
|
+
answer_start: { feature: { dtype: 'int32', _type: 'Value' }, _type: 'List' },
|
|
97
|
+
},
|
|
98
|
+
},
|
|
99
|
+
],
|
|
100
|
+
rows: [
|
|
101
|
+
{
|
|
102
|
+
row_idx: 0,
|
|
103
|
+
row: {
|
|
104
|
+
id: '5733be284776f41900661182',
|
|
105
|
+
context:
|
|
106
|
+
'Architecturally, the school has a Catholic character. Atop the Main Building gold dome is a golden statue of the Virgin Mary.',
|
|
107
|
+
answers: { text: ['Saint Bernadette Soubirous'], answer_start: [515] },
|
|
108
|
+
},
|
|
109
|
+
truncated_cells: [],
|
|
110
|
+
},
|
|
111
|
+
],
|
|
112
|
+
num_rows_total: 87599,
|
|
113
|
+
num_rows_per_page: 100,
|
|
114
|
+
partial: false,
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
class SimulatedDatasetViewerClient {
|
|
118
|
+
readonly calls: Array<{ path: string; params: Record<string, string | number | undefined> }> = [];
|
|
119
|
+
|
|
120
|
+
constructor(private readonly responses: Record<string, unknown>) {}
|
|
121
|
+
|
|
122
|
+
getJson<T>(path: string, params: Record<string, string | number | undefined>): Promise<T> {
|
|
123
|
+
this.calls.push({ path, params });
|
|
124
|
+
const response = this.responses[path];
|
|
125
|
+
if (response instanceof Error) return Promise.reject(response);
|
|
126
|
+
return Promise.resolve(response as T);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function createInspector(client: SimulatedDatasetViewerClient): DatasetViewerInspector {
|
|
131
|
+
return new DatasetViewerInspector(undefined, {
|
|
132
|
+
client,
|
|
133
|
+
metadataProvider: {
|
|
134
|
+
getMetadata() {
|
|
135
|
+
return Promise.resolve({
|
|
136
|
+
name: 'rajpurkar/squad',
|
|
137
|
+
author: 'rajpurkar',
|
|
138
|
+
downloadsAllTime: 123456,
|
|
139
|
+
likes: 42,
|
|
140
|
+
updatedAt: new Date('2025-01-02T00:00:00Z'),
|
|
141
|
+
cardData: { license: 'cc-by-sa-4.0', task_categories: ['question-answering'] },
|
|
142
|
+
});
|
|
143
|
+
},
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
describe('DatasetViewerInspector', () => {
|
|
149
|
+
it('formats structure from splits, size, parquet, and schema preview', async () => {
|
|
150
|
+
const client = new SimulatedDatasetViewerClient({
|
|
151
|
+
'/splits': squadSplits,
|
|
152
|
+
'/size': squadSize,
|
|
153
|
+
'/parquet': squadParquet,
|
|
154
|
+
'/rows': squadRows,
|
|
155
|
+
});
|
|
156
|
+
const result = await createInspector(client).getStructure('rajpurkar/squad');
|
|
157
|
+
|
|
158
|
+
expect(result).toContain('## Dataset Structure');
|
|
159
|
+
expect(result).toContain('### Hub Metadata');
|
|
160
|
+
expect(result).toContain('| plain\\_text | train | 87.6K | — | 5 | 14.5 MB |');
|
|
161
|
+
expect(result).toContain('| plain\\_text | validation | 10.6K | — | 5 | 1.8 MB |');
|
|
162
|
+
expect(result).toContain('### Parquet Exports');
|
|
163
|
+
expect(result).toContain('| plain\\_text | train | 1 | 14.5 MB |');
|
|
164
|
+
expect(result).toContain('Using `plain_text/train`.');
|
|
165
|
+
expect(result).toContain('| 1 | id | {"dtype":"string","\\_type":"Value"} |');
|
|
166
|
+
expect(result).toContain('| 3 | answers |');
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it('requires config and split for multi-split previews', async () => {
|
|
170
|
+
const client = new SimulatedDatasetViewerClient({
|
|
171
|
+
'/splits': squadSplits,
|
|
172
|
+
});
|
|
173
|
+
const result = await createInspector(client).getPreview('rajpurkar/squad', {});
|
|
174
|
+
|
|
175
|
+
expect(result).toContain('requires `config` and `split`');
|
|
176
|
+
expect(result).toContain('- `plain_text` / `train`');
|
|
177
|
+
expect(result).toContain('- `plain_text` / `validation`');
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it('infers config and split for single-split previews and clamps limit', async () => {
|
|
181
|
+
const client = new SimulatedDatasetViewerClient({
|
|
182
|
+
'/splits': { splits: [{ dataset: 'x/y', config: 'default', split: 'train' }], pending: [], failed: [] },
|
|
183
|
+
'/rows': squadRows,
|
|
184
|
+
});
|
|
185
|
+
const result = await createInspector(client).getPreview('x/y', { limit: 500 });
|
|
186
|
+
|
|
187
|
+
expect(result).toContain('- Config: `default`');
|
|
188
|
+
expect(result).toContain('- Split: `train`');
|
|
189
|
+
expect(result).toContain('- Requested limit: `100`');
|
|
190
|
+
expect(client.calls.find((call) => call.path === '/rows')?.params).toMatchObject({ length: 100 });
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
it('rejects negative offsets before fetching rows', async () => {
|
|
194
|
+
const client = new SimulatedDatasetViewerClient({
|
|
195
|
+
'/splits': squadSplits,
|
|
196
|
+
'/rows': squadRows,
|
|
197
|
+
});
|
|
198
|
+
const result = await createInspector(client).getPreview('rajpurkar/squad', { offset: -1 });
|
|
199
|
+
|
|
200
|
+
expect(result).toContain('`offset` must be a non-negative integer');
|
|
201
|
+
expect(client.calls.some((call) => call.path === '/rows')).toBe(false);
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
it('surfaces Dataset Viewer endpoint failures as section warnings', async () => {
|
|
205
|
+
const client = new SimulatedDatasetViewerClient({
|
|
206
|
+
'/splits': new Error('The dataset has been renamed. Please use the current dataset name.'),
|
|
207
|
+
'/size': new Error('The dataset has been renamed. Please use the current dataset name.'),
|
|
208
|
+
'/parquet': new Error('The dataset has been renamed. Please use the current dataset name.'),
|
|
209
|
+
});
|
|
210
|
+
const result = await createInspector(client).getStructure('beans');
|
|
211
|
+
|
|
212
|
+
expect(result).toContain('Could not fetch splits from Dataset Viewer');
|
|
213
|
+
expect(result).toContain('Could not fetch size information from Dataset Viewer');
|
|
214
|
+
expect(result).toContain('Could not fetch parquet export information from Dataset Viewer');
|
|
215
|
+
expect(result).toContain('No config/split target was available for schema preview');
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
it('truncates row preview by cumulative output size rather than per-cell formatting', async () => {
|
|
219
|
+
const client = new SimulatedDatasetViewerClient({
|
|
220
|
+
'/splits': { splits: [{ dataset: 'x/y', config: 'default', split: 'train' }], pending: [], failed: [] },
|
|
221
|
+
'/rows': {
|
|
222
|
+
features: [{ feature_idx: 0, name: 'text', type: { dtype: 'string', _type: 'Value' } }],
|
|
223
|
+
rows: [{ row_idx: 0, row: { text: 'x'.repeat(80_000) }, truncated_cells: [] }],
|
|
224
|
+
num_rows_total: 1,
|
|
225
|
+
num_rows_per_page: 100,
|
|
226
|
+
partial: false,
|
|
227
|
+
},
|
|
228
|
+
});
|
|
229
|
+
const result = await createInspector(client).getPreview('x/y', {});
|
|
230
|
+
|
|
231
|
+
expect(result).toContain('Row preview output was truncated after approximately');
|
|
232
|
+
expect(result.length).toBeLessThan(70_000);
|
|
233
|
+
});
|
|
234
|
+
});
|