@llmindset/hf-mcp 0.3.10 → 0.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/create-repo.d.ts +47 -0
- package/dist/create-repo.d.ts.map +1 -0
- package/dist/create-repo.js +83 -0
- package/dist/create-repo.js.map +1 -0
- package/dist/create-repo.test.d.ts +2 -0
- package/dist/create-repo.test.d.ts.map +1 -0
- package/dist/create-repo.test.js +155 -0
- package/dist/create-repo.test.js.map +1 -0
- package/dist/dataset-detail.js +1 -1
- package/dist/dataset-detail.js.map +1 -1
- package/dist/dataset-viewer-inspect.d.ts +48 -0
- package/dist/dataset-viewer-inspect.d.ts.map +1 -0
- package/dist/dataset-viewer-inspect.js +660 -0
- package/dist/dataset-viewer-inspect.js.map +1 -0
- package/dist/dataset-viewer-inspect.test.d.ts +2 -0
- package/dist/dataset-viewer-inspect.test.d.ts.map +1 -0
- package/dist/dataset-viewer-inspect.test.js +218 -0
- package/dist/dataset-viewer-inspect.test.js.map +1 -0
- package/dist/gradio-files.d.ts +2 -2
- package/dist/hub-inspect.d.ts +19 -2
- package/dist/hub-inspect.d.ts.map +1 -1
- package/dist/hub-inspect.js +68 -4
- package/dist/hub-inspect.js.map +1 -1
- package/dist/hub-inspect.test.d.ts +2 -0
- package/dist/hub-inspect.test.d.ts.map +1 -0
- package/dist/hub-inspect.test.js +24 -0
- package/dist/hub-inspect.test.js.map +1 -0
- package/dist/index.browser.d.ts +5 -3
- package/dist/index.browser.d.ts.map +1 -1
- package/dist/index.browser.js +16 -1
- package/dist/index.browser.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/jobs/types.d.ts +23 -23
- package/dist/jobs/types.d.ts.map +1 -1
- package/dist/jobs/types.js +7 -2
- package/dist/jobs/types.js.map +1 -1
- package/dist/model-detail.js +1 -1
- package/dist/model-detail.js.map +1 -1
- package/dist/readme-utils.d.ts +1 -1
- package/dist/readme-utils.d.ts.map +1 -1
- package/dist/readme-utils.js +2 -13
- package/dist/readme-utils.js.map +1 -1
- package/dist/repo-search.d.ts +2 -2
- package/dist/tool-ids.d.ts +4 -3
- package/dist/tool-ids.d.ts.map +1 -1
- package/dist/tool-ids.js +4 -1
- package/dist/tool-ids.js.map +1 -1
- package/package.json +1 -1
- package/src/create-repo.test.ts +182 -0
- package/src/create-repo.ts +103 -0
- package/src/dataset-detail.ts +1 -1
- package/src/dataset-viewer-inspect.test.ts +234 -0
- package/src/dataset-viewer-inspect.ts +809 -0
- package/src/hub-inspect.test.ts +28 -0
- package/src/hub-inspect.ts +88 -4
- package/src/index.browser.ts +18 -1
- package/src/index.ts +2 -0
- package/src/jobs/types.ts +10 -2
- package/src/model-detail.ts +1 -1
- package/src/readme-utils.ts +2 -32
- package/src/tool-ids.ts +4 -0
|
@@ -0,0 +1,660 @@
|
|
|
1
|
+
import { datasetInfo, HubApiError } from '@huggingface/hub';
|
|
2
|
+
import { HfApiCall, HfApiError } from './hf-api-call.js';
|
|
3
|
+
import { escapeMarkdown, formatBytes, formatDate, formatNumber } from './utilities.js';
|
|
4
|
+
const DATASET_VIEWER_BASE_URL = 'https://datasets-server.huggingface.co';
|
|
5
|
+
const DEFAULT_PREVIEW_LIMIT = 5;
|
|
6
|
+
const MAX_PREVIEW_LIMIT = 100;
|
|
7
|
+
const MAX_TABLE_COLUMNS = 30;
|
|
8
|
+
const MAX_ROW_PREVIEW_CHARS = 66_000;
|
|
9
|
+
const MAX_URLS_TO_SHOW = 5;
|
|
10
|
+
class DatasetViewerClient extends HfApiCall {
|
|
11
|
+
constructor(hfToken) {
|
|
12
|
+
super(DATASET_VIEWER_BASE_URL, hfToken);
|
|
13
|
+
}
|
|
14
|
+
async getJson(path, params) {
|
|
15
|
+
const url = new URL(path, this.apiUrl);
|
|
16
|
+
for (const [key, value] of Object.entries(params)) {
|
|
17
|
+
if (value !== undefined)
|
|
18
|
+
url.searchParams.set(key, String(value));
|
|
19
|
+
}
|
|
20
|
+
return this.fetchFromApi(url);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
class HubDatasetMetadataProvider {
|
|
24
|
+
hfToken;
|
|
25
|
+
hubUrl;
|
|
26
|
+
constructor(hfToken, hubUrl) {
|
|
27
|
+
this.hfToken = hfToken;
|
|
28
|
+
this.hubUrl = hubUrl;
|
|
29
|
+
}
|
|
30
|
+
async getMetadata(datasetId) {
|
|
31
|
+
const additionalFields = [
|
|
32
|
+
'author',
|
|
33
|
+
'description',
|
|
34
|
+
'downloadsAllTime',
|
|
35
|
+
'tags',
|
|
36
|
+
'cardData',
|
|
37
|
+
'sha',
|
|
38
|
+
'createdAt',
|
|
39
|
+
];
|
|
40
|
+
const info = await datasetInfo({
|
|
41
|
+
name: datasetId,
|
|
42
|
+
additionalFields: Array.from(additionalFields),
|
|
43
|
+
...(this.hfToken && { credentials: { accessToken: this.hfToken } }),
|
|
44
|
+
...(this.hubUrl && { hubUrl: this.hubUrl }),
|
|
45
|
+
});
|
|
46
|
+
return {
|
|
47
|
+
name: info.name,
|
|
48
|
+
author: info.author,
|
|
49
|
+
description: info.description,
|
|
50
|
+
downloadsAllTime: info.downloadsAllTime,
|
|
51
|
+
likes: info.likes,
|
|
52
|
+
gated: info.gated,
|
|
53
|
+
private: info.private,
|
|
54
|
+
updatedAt: info.updatedAt,
|
|
55
|
+
createdAt: info.createdAt,
|
|
56
|
+
tags: info.tags,
|
|
57
|
+
sha: info.sha,
|
|
58
|
+
cardData: isRecord(info.cardData) ? info.cardData : undefined,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
export class DatasetViewerInspector {
|
|
63
|
+
client;
|
|
64
|
+
metadataProvider;
|
|
65
|
+
constructor(hfToken, options = {}) {
|
|
66
|
+
this.client = options.client ?? new DatasetViewerClient(hfToken);
|
|
67
|
+
this.metadataProvider = options.metadataProvider ?? new HubDatasetMetadataProvider(hfToken, options.hubUrl);
|
|
68
|
+
}
|
|
69
|
+
async getStructure(datasetId, options = {}) {
|
|
70
|
+
const lines = ['## Dataset Structure', ''];
|
|
71
|
+
let metadata;
|
|
72
|
+
try {
|
|
73
|
+
metadata = await this.metadataProvider.getMetadata(datasetId);
|
|
74
|
+
lines.push(...formatMetadata(metadata));
|
|
75
|
+
}
|
|
76
|
+
catch (error) {
|
|
77
|
+
lines.push(`> Could not fetch Hub dataset metadata: ${formatErrorMessage(error)}`, '');
|
|
78
|
+
}
|
|
79
|
+
const splitsResult = await this.fetchSplits(datasetId);
|
|
80
|
+
const sizeResult = await this.fetchSize(datasetId);
|
|
81
|
+
const parquetResult = await this.fetchParquet(datasetId);
|
|
82
|
+
if (splitsResult.warning)
|
|
83
|
+
lines.push(`> ${splitsResult.warning}`, '');
|
|
84
|
+
if (sizeResult.warning)
|
|
85
|
+
lines.push(`> ${sizeResult.warning}`, '');
|
|
86
|
+
if (parquetResult.warning)
|
|
87
|
+
lines.push(`> ${parquetResult.warning}`, '');
|
|
88
|
+
const splits = splitsResult.data ?? [];
|
|
89
|
+
const size = sizeResult.data;
|
|
90
|
+
if (splits.length > 0 || size) {
|
|
91
|
+
lines.push(...formatSplitsAndSize(splits, size));
|
|
92
|
+
}
|
|
93
|
+
if (parquetResult.data) {
|
|
94
|
+
lines.push(...formatParquet(parquetResult.data));
|
|
95
|
+
}
|
|
96
|
+
const target = resolveStructureTarget(splits, options);
|
|
97
|
+
if (target) {
|
|
98
|
+
const rowsResult = await this.fetchRows(datasetId, target.config, target.split, 0, 1);
|
|
99
|
+
if (rowsResult.data) {
|
|
100
|
+
lines.push(...formatSchemaPreview(rowsResult.data, target.config, target.split));
|
|
101
|
+
}
|
|
102
|
+
else if (rowsResult.warning) {
|
|
103
|
+
lines.push('### Schema Preview', '', `> ${rowsResult.warning}`, '');
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
else if (splits.length === 0) {
|
|
107
|
+
lines.push('### Schema Preview', '', '> No config/split target was available for schema preview.', '');
|
|
108
|
+
}
|
|
109
|
+
if (metadata && !lines.some((line) => line.startsWith('**Link:**'))) {
|
|
110
|
+
lines.push(`**Link:** [https://hf.co/datasets/${metadata.name}](https://hf.co/datasets/${metadata.name})`);
|
|
111
|
+
}
|
|
112
|
+
return trimBlankLines(lines).join('\n');
|
|
113
|
+
}
|
|
114
|
+
async getPreview(datasetId, options) {
|
|
115
|
+
const offset = options.offset ?? 0;
|
|
116
|
+
if (!Number.isInteger(offset) || offset < 0) {
|
|
117
|
+
return '## Dataset Preview\n\n- Error: `offset` must be a non-negative integer.';
|
|
118
|
+
}
|
|
119
|
+
const limit = clampLimit(options.limit);
|
|
120
|
+
const splitsResult = await this.fetchSplits(datasetId);
|
|
121
|
+
if (!splitsResult.data) {
|
|
122
|
+
return `## Dataset Preview\n\n- Error: ${splitsResult.warning ?? 'Could not fetch dataset splits.'}`;
|
|
123
|
+
}
|
|
124
|
+
const resolved = resolvePreviewTarget(splitsResult.data, options.config, options.split);
|
|
125
|
+
if (!resolved.ok) {
|
|
126
|
+
return ['## Dataset Preview', '', resolved.message].join('\n');
|
|
127
|
+
}
|
|
128
|
+
const rowsResult = await this.fetchRows(datasetId, resolved.config, resolved.split, offset, limit);
|
|
129
|
+
if (!rowsResult.data) {
|
|
130
|
+
return [
|
|
131
|
+
'## Dataset Preview',
|
|
132
|
+
'',
|
|
133
|
+
`- Dataset: \`${datasetId}\``,
|
|
134
|
+
`- Config: \`${resolved.config}\``,
|
|
135
|
+
`- Split: \`${resolved.split}\``,
|
|
136
|
+
'',
|
|
137
|
+
`> ${rowsResult.warning ?? 'Could not fetch row preview.'}`,
|
|
138
|
+
].join('\n');
|
|
139
|
+
}
|
|
140
|
+
return formatPreview(datasetId, resolved.config, resolved.split, offset, limit, rowsResult.data).join('\n');
|
|
141
|
+
}
|
|
142
|
+
async fetchSplits(datasetId) {
|
|
143
|
+
try {
|
|
144
|
+
const raw = await this.client.getJson('/splits', { dataset: datasetId });
|
|
145
|
+
return { data: parseSplits(raw) };
|
|
146
|
+
}
|
|
147
|
+
catch (error) {
|
|
148
|
+
return { warning: `Could not fetch splits from Dataset Viewer: ${formatErrorMessage(error)}` };
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
async fetchSize(datasetId) {
|
|
152
|
+
try {
|
|
153
|
+
const raw = await this.client.getJson('/size', { dataset: datasetId });
|
|
154
|
+
return { data: parseSize(raw) };
|
|
155
|
+
}
|
|
156
|
+
catch (error) {
|
|
157
|
+
return { warning: `Could not fetch size information from Dataset Viewer: ${formatErrorMessage(error)}` };
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
async fetchParquet(datasetId) {
|
|
161
|
+
try {
|
|
162
|
+
const raw = await this.client.getJson('/parquet', { dataset: datasetId });
|
|
163
|
+
return { data: parseParquet(raw) };
|
|
164
|
+
}
|
|
165
|
+
catch (error) {
|
|
166
|
+
return {
|
|
167
|
+
warning: `Could not fetch parquet export information from Dataset Viewer: ${formatErrorMessage(error)}`,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
async fetchRows(datasetId, config, split, offset, length) {
|
|
172
|
+
try {
|
|
173
|
+
const raw = await this.client.getJson('/rows', { dataset: datasetId, config, split, offset, length });
|
|
174
|
+
return { data: parseRows(raw) };
|
|
175
|
+
}
|
|
176
|
+
catch (error) {
|
|
177
|
+
return { warning: `Could not fetch rows from Dataset Viewer: ${formatErrorMessage(error)}` };
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
function parseSplits(raw) {
|
|
182
|
+
const root = expectRecord(raw, '/splits response');
|
|
183
|
+
const splitsRaw = root.splits;
|
|
184
|
+
if (!Array.isArray(splitsRaw))
|
|
185
|
+
return [];
|
|
186
|
+
return splitsRaw
|
|
187
|
+
.map((entry) => {
|
|
188
|
+
if (!isRecord(entry))
|
|
189
|
+
return undefined;
|
|
190
|
+
const dataset = stringValue(entry.dataset);
|
|
191
|
+
const config = stringValue(entry.config);
|
|
192
|
+
const split = stringValue(entry.split);
|
|
193
|
+
if (!dataset || !config || !split)
|
|
194
|
+
return undefined;
|
|
195
|
+
return { dataset, config, split };
|
|
196
|
+
})
|
|
197
|
+
.filter(isDefined)
|
|
198
|
+
.sort(compareConfigSplit);
|
|
199
|
+
}
|
|
200
|
+
function parseSize(raw) {
|
|
201
|
+
const root = expectRecord(raw, '/size response');
|
|
202
|
+
const sizeRoot = isRecord(root.size) ? root.size : {};
|
|
203
|
+
return {
|
|
204
|
+
dataset: parseSizeEntry(sizeRoot.dataset),
|
|
205
|
+
configs: parseSizeEntries(sizeRoot.configs),
|
|
206
|
+
splits: parseSizeEntries(sizeRoot.splits).sort(compareSizeEntry),
|
|
207
|
+
pending: Array.isArray(root.pending) ? root.pending : [],
|
|
208
|
+
failed: Array.isArray(root.failed) ? root.failed : [],
|
|
209
|
+
partial: root.partial === true,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
function parseSizeEntries(raw) {
|
|
213
|
+
return Array.isArray(raw) ? raw.map(parseSizeEntry).filter(isDefined) : [];
|
|
214
|
+
}
|
|
215
|
+
function parseSizeEntry(raw) {
|
|
216
|
+
if (!isRecord(raw))
|
|
217
|
+
return undefined;
|
|
218
|
+
const config = stringValue(raw.config);
|
|
219
|
+
const split = stringValue(raw.split);
|
|
220
|
+
const entry = {
|
|
221
|
+
dataset: stringValue(raw.dataset),
|
|
222
|
+
config,
|
|
223
|
+
split,
|
|
224
|
+
numRows: numberValue(raw.num_rows),
|
|
225
|
+
estimatedNumRows: nullableNumberValue(raw.estimated_num_rows),
|
|
226
|
+
numColumns: numberValue(raw.num_columns),
|
|
227
|
+
numBytesOriginalFiles: nullableNumberValue(raw.num_bytes_original_files),
|
|
228
|
+
numBytesParquetFiles: nullableNumberValue(raw.num_bytes_parquet_files),
|
|
229
|
+
numBytesMemory: nullableNumberValue(raw.num_bytes_memory),
|
|
230
|
+
};
|
|
231
|
+
return entry;
|
|
232
|
+
}
|
|
233
|
+
function parseParquet(raw) {
|
|
234
|
+
const root = expectRecord(raw, '/parquet response');
|
|
235
|
+
const files = Array.isArray(root.parquet_files)
|
|
236
|
+
? root.parquet_files
|
|
237
|
+
.map((entry) => {
|
|
238
|
+
if (!isRecord(entry))
|
|
239
|
+
return undefined;
|
|
240
|
+
const config = stringValue(entry.config);
|
|
241
|
+
const split = stringValue(entry.split);
|
|
242
|
+
const url = stringValue(entry.url);
|
|
243
|
+
const size = numberValue(entry.size);
|
|
244
|
+
if (!config || !split || !url || size === undefined)
|
|
245
|
+
return undefined;
|
|
246
|
+
return {
|
|
247
|
+
dataset: stringValue(entry.dataset),
|
|
248
|
+
config,
|
|
249
|
+
split,
|
|
250
|
+
url,
|
|
251
|
+
filename: stringValue(entry.filename),
|
|
252
|
+
size,
|
|
253
|
+
};
|
|
254
|
+
})
|
|
255
|
+
.filter(isDefined)
|
|
256
|
+
.sort((a, b) => compareConfigSplit(a, b) || a.url.localeCompare(b.url))
|
|
257
|
+
: [];
|
|
258
|
+
return {
|
|
259
|
+
files,
|
|
260
|
+
pending: Array.isArray(root.pending) ? root.pending : [],
|
|
261
|
+
failed: Array.isArray(root.failed) ? root.failed : [],
|
|
262
|
+
partial: root.partial === true,
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
function parseRows(raw) {
|
|
266
|
+
const root = expectRecord(raw, '/rows response');
|
|
267
|
+
const features = Array.isArray(root.features)
|
|
268
|
+
? root.features
|
|
269
|
+
.map((entry) => {
|
|
270
|
+
if (!isRecord(entry))
|
|
271
|
+
return undefined;
|
|
272
|
+
const name = stringValue(entry.name);
|
|
273
|
+
if (!name)
|
|
274
|
+
return undefined;
|
|
275
|
+
return { name, type: entry.type };
|
|
276
|
+
})
|
|
277
|
+
.filter(isDefined)
|
|
278
|
+
: [];
|
|
279
|
+
const rows = Array.isArray(root.rows)
|
|
280
|
+
? root.rows
|
|
281
|
+
.map((entry) => {
|
|
282
|
+
if (!isRecord(entry) || !isRecord(entry.row))
|
|
283
|
+
return undefined;
|
|
284
|
+
const rowIdx = numberValue(entry.row_idx);
|
|
285
|
+
if (rowIdx === undefined)
|
|
286
|
+
return undefined;
|
|
287
|
+
return {
|
|
288
|
+
rowIdx,
|
|
289
|
+
row: entry.row,
|
|
290
|
+
truncatedCells: Array.isArray(entry.truncated_cells)
|
|
291
|
+
? entry.truncated_cells.filter((value) => typeof value === 'string')
|
|
292
|
+
: [],
|
|
293
|
+
};
|
|
294
|
+
})
|
|
295
|
+
.filter(isDefined)
|
|
296
|
+
: [];
|
|
297
|
+
return {
|
|
298
|
+
features,
|
|
299
|
+
rows,
|
|
300
|
+
numRowsTotal: numberValue(root.num_rows_total),
|
|
301
|
+
numRowsPerPage: numberValue(root.num_rows_per_page),
|
|
302
|
+
partial: root.partial === true,
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
function formatMetadata(metadata) {
|
|
306
|
+
const lines = ['### Hub Metadata'];
|
|
307
|
+
if (metadata.description)
|
|
308
|
+
lines.push(truncateMarkdown(metadata.description, 500));
|
|
309
|
+
const details = [];
|
|
310
|
+
if (metadata.author)
|
|
311
|
+
details.push(`- **Author:** ${metadata.author}`);
|
|
312
|
+
if (metadata.downloadsAllTime !== undefined)
|
|
313
|
+
details.push(`- **Downloads:** ${formatNumber(metadata.downloadsAllTime)}`);
|
|
314
|
+
if (metadata.likes !== undefined)
|
|
315
|
+
details.push(`- **Likes:** ${metadata.likes.toString()}`);
|
|
316
|
+
if (metadata.updatedAt)
|
|
317
|
+
details.push(`- **Updated:** ${formatDate(metadata.updatedAt)}`);
|
|
318
|
+
if (metadata.createdAt)
|
|
319
|
+
details.push(`- **Created:** ${formatDate(metadata.createdAt)}`);
|
|
320
|
+
if (metadata.gated)
|
|
321
|
+
details.push('- **Status:** 🔒 Gated');
|
|
322
|
+
if (metadata.private)
|
|
323
|
+
details.push('- **Status:** 🔐 Private');
|
|
324
|
+
lines.push(...details);
|
|
325
|
+
const card = metadata.cardData;
|
|
326
|
+
if (card) {
|
|
327
|
+
const cardLines = [];
|
|
328
|
+
addCardValue(cardLines, 'License', card.license);
|
|
329
|
+
addCardValue(cardLines, 'Language', card.language);
|
|
330
|
+
addCardValue(cardLines, 'Task Categories', card.task_categories);
|
|
331
|
+
addCardValue(cardLines, 'Size Categories', card.size_categories);
|
|
332
|
+
addCardValue(cardLines, 'Pretty Name', card.pretty_name);
|
|
333
|
+
addCardValue(cardLines, 'Papers With Code ID', card.paperswithcode_id);
|
|
334
|
+
if (cardLines.length)
|
|
335
|
+
lines.push(...cardLines);
|
|
336
|
+
}
|
|
337
|
+
if (metadata.tags && metadata.tags.length) {
|
|
338
|
+
lines.push(`- **Tags:** ${metadata.tags
|
|
339
|
+
.slice(0, 20)
|
|
340
|
+
.map((tag) => `\`${tag}\``)
|
|
341
|
+
.join(' ')}`);
|
|
342
|
+
}
|
|
343
|
+
lines.push('');
|
|
344
|
+
return lines;
|
|
345
|
+
}
|
|
346
|
+
function formatSplitsAndSize(splits, size) {
|
|
347
|
+
const lines = ['### Configs and Splits'];
|
|
348
|
+
const sizeByKey = new Map(size?.splits.map((entry) => [`${entry.config ?? ''}\n${entry.split ?? ''}`, entry]) ?? []);
|
|
349
|
+
lines.push('| Config | Split | Rows | Estimated Rows | Columns | Parquet Size |');
|
|
350
|
+
lines.push('|---|---|---:|---:|---:|---:|');
|
|
351
|
+
const rows = splits.length
|
|
352
|
+
? splits
|
|
353
|
+
: (size?.splits ?? []).map((entry) => ({
|
|
354
|
+
dataset: entry.dataset ?? '',
|
|
355
|
+
config: entry.config ?? '',
|
|
356
|
+
split: entry.split ?? '',
|
|
357
|
+
}));
|
|
358
|
+
for (const split of rows) {
|
|
359
|
+
const entry = sizeByKey.get(`${split.config}\n${split.split}`);
|
|
360
|
+
lines.push(`| ${[
|
|
361
|
+
escapeTableCell(split.config),
|
|
362
|
+
escapeTableCell(split.split),
|
|
363
|
+
formatOptionalNumber(entry?.numRows),
|
|
364
|
+
formatOptionalNumber(entry?.estimatedNumRows ?? undefined),
|
|
365
|
+
formatOptionalNumber(entry?.numColumns),
|
|
366
|
+
formatOptionalBytes(entry?.numBytesParquetFiles ?? undefined),
|
|
367
|
+
].join(' | ')} |`);
|
|
368
|
+
}
|
|
369
|
+
if (size?.dataset) {
|
|
370
|
+
const total = size.dataset;
|
|
371
|
+
lines.push('');
|
|
372
|
+
lines.push(`Total rows: ${formatOptionalNumber(total.numRows)}${total.estimatedNumRows ? ` (estimated ${formatNumber(total.estimatedNumRows)})` : ''}.`);
|
|
373
|
+
}
|
|
374
|
+
if (size?.partial)
|
|
375
|
+
lines.push('', '> Size information is partial; estimated rows may be shown where exact counts are unavailable.');
|
|
376
|
+
lines.push('');
|
|
377
|
+
return lines;
|
|
378
|
+
}
|
|
379
|
+
function formatParquet(parquet) {
|
|
380
|
+
const lines = ['### Parquet Exports'];
|
|
381
|
+
if (parquet.files.length === 0) {
|
|
382
|
+
lines.push('', '> No parquet export files were listed.');
|
|
383
|
+
if (parquet.pending.length)
|
|
384
|
+
lines.push(`> Pending parquet jobs: ${parquet.pending.length.toString()}.`);
|
|
385
|
+
if (parquet.failed.length)
|
|
386
|
+
lines.push(`> Failed parquet jobs: ${parquet.failed.length.toString()}.`);
|
|
387
|
+
lines.push('');
|
|
388
|
+
return lines;
|
|
389
|
+
}
|
|
390
|
+
const groups = new Map();
|
|
391
|
+
for (const file of parquet.files) {
|
|
392
|
+
const key = `${file.config}\n${file.split}`;
|
|
393
|
+
const current = groups.get(key) ?? { config: file.config, split: file.split, files: 0, size: 0 };
|
|
394
|
+
current.files += 1;
|
|
395
|
+
current.size += file.size;
|
|
396
|
+
groups.set(key, current);
|
|
397
|
+
}
|
|
398
|
+
const entries = [...groups.values()].sort(compareConfigSplit);
|
|
399
|
+
lines.push('| Config | Split | Files | Total Size |');
|
|
400
|
+
lines.push('|---|---|---:|---:|');
|
|
401
|
+
for (const entry of entries) {
|
|
402
|
+
lines.push(`| ${escapeTableCell(entry.config)} | ${escapeTableCell(entry.split)} | ${entry.files.toString()} | ${formatBytes(entry.size)} |`);
|
|
403
|
+
}
|
|
404
|
+
if (parquet.partial)
|
|
405
|
+
lines.push('', '> Parquet export information is partial.');
|
|
406
|
+
if (parquet.pending.length)
|
|
407
|
+
lines.push('', `> Pending parquet jobs: ${parquet.pending.length.toString()}.`);
|
|
408
|
+
if (parquet.failed.length)
|
|
409
|
+
lines.push('', `> Failed parquet jobs: ${parquet.failed.length.toString()}.`);
|
|
410
|
+
if (parquet.files.length <= MAX_URLS_TO_SHOW) {
|
|
411
|
+
lines.push('', 'Parquet URLs:');
|
|
412
|
+
for (const file of parquet.files)
|
|
413
|
+
lines.push(`- \`${file.config}/${file.split}/${file.filename ?? 'file'}\`: ${file.url}`);
|
|
414
|
+
}
|
|
415
|
+
else {
|
|
416
|
+
lines.push('', `Parquet file URLs omitted for brevity (${parquet.files.length.toString()} files).`);
|
|
417
|
+
}
|
|
418
|
+
lines.push('');
|
|
419
|
+
return lines;
|
|
420
|
+
}
|
|
421
|
+
function formatSchemaPreview(rows, config, split) {
|
|
422
|
+
const lines = ['### Schema Preview', '', `Using \`${config}/${split}\`.`, ''];
|
|
423
|
+
lines.push(...formatFeatures(rows.features));
|
|
424
|
+
if (rows.partial)
|
|
425
|
+
lines.push('', '> Row/schema response is partial.');
|
|
426
|
+
lines.push('');
|
|
427
|
+
return lines;
|
|
428
|
+
}
|
|
429
|
+
function formatPreview(datasetId, config, split, offset, limit, rows) {
|
|
430
|
+
const end = rows.rows.length ? offset + rows.rows.length - 1 : offset;
|
|
431
|
+
const lines = [
|
|
432
|
+
'## Dataset Preview',
|
|
433
|
+
'',
|
|
434
|
+
`- Dataset: \`${datasetId}\``,
|
|
435
|
+
`- Config: \`${config}\``,
|
|
436
|
+
`- Split: \`${split}\``,
|
|
437
|
+
`- Rows: \`${offset.toString()}-${end.toString()}\``,
|
|
438
|
+
`- Requested limit: \`${limit.toString()}\``,
|
|
439
|
+
];
|
|
440
|
+
if (rows.numRowsTotal !== undefined)
|
|
441
|
+
lines.push(`- Total rows: \`${formatNumber(rows.numRowsTotal)}\``);
|
|
442
|
+
lines.push('', '### Features', '', ...formatFeatures(rows.features), '### Rows', '');
|
|
443
|
+
lines.push(...formatRows(rows));
|
|
444
|
+
if (rows.partial)
|
|
445
|
+
lines.push('', '> Row response is partial.');
|
|
446
|
+
return trimBlankLines(lines);
|
|
447
|
+
}
|
|
448
|
+
function formatFeatures(features) {
|
|
449
|
+
const lines = ['| # | Column | Type |', '|---:|---|---|'];
|
|
450
|
+
features.forEach((feature, index) => {
|
|
451
|
+
lines.push(`| ${(index + 1).toString()} | ${escapeTableCell(feature.name)} | ${escapeTableCell(formatFeatureType(feature.type))} |`);
|
|
452
|
+
});
|
|
453
|
+
if (features.length === 0)
|
|
454
|
+
lines.push('| — | — | — |');
|
|
455
|
+
lines.push('');
|
|
456
|
+
return lines;
|
|
457
|
+
}
|
|
458
|
+
function formatRows(rows) {
|
|
459
|
+
if (rows.rows.length === 0)
|
|
460
|
+
return ['No rows returned.'];
|
|
461
|
+
const columns = rows.features.map((feature) => feature.name);
|
|
462
|
+
const visibleColumns = columns.slice(0, MAX_TABLE_COLUMNS);
|
|
463
|
+
const omitted = columns.length - visibleColumns.length;
|
|
464
|
+
const lines = [`| # | ${visibleColumns.map(escapeTableCell).join(' | ')} |`];
|
|
465
|
+
lines.push(`|---:|${visibleColumns.map(() => '---').join('|')}|`);
|
|
466
|
+
let rowPreviewChars = lines.join('\n').length;
|
|
467
|
+
let truncatedByBudget = false;
|
|
468
|
+
let truncatedByDatasetViewer = false;
|
|
469
|
+
for (const row of rows.rows) {
|
|
470
|
+
const cells = visibleColumns.map((column) => {
|
|
471
|
+
if (row.truncatedCells.includes(column))
|
|
472
|
+
truncatedByDatasetViewer = true;
|
|
473
|
+
return escapeTableCell(formatCell(row.row[column]));
|
|
474
|
+
});
|
|
475
|
+
const rowLine = `| ${row.rowIdx.toString()} | ${cells.join(' | ')} |`;
|
|
476
|
+
const nextChars = rowPreviewChars + rowLine.length + 1;
|
|
477
|
+
if (nextChars > MAX_ROW_PREVIEW_CHARS) {
|
|
478
|
+
truncatedByBudget = true;
|
|
479
|
+
if (lines.length === 2) {
|
|
480
|
+
const remaining = Math.max(0, MAX_ROW_PREVIEW_CHARS - rowPreviewChars - 2);
|
|
481
|
+
if (remaining > 0)
|
|
482
|
+
lines.push(`${rowLine.slice(0, remaining)}… |`);
|
|
483
|
+
}
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
lines.push(rowLine);
|
|
487
|
+
rowPreviewChars = nextChars;
|
|
488
|
+
}
|
|
489
|
+
if (omitted > 0)
|
|
490
|
+
lines.push('', `Showing first ${visibleColumns.length.toString()} columns; omitted ${omitted.toString()} wider columns.`);
|
|
491
|
+
if (truncatedByBudget) {
|
|
492
|
+
lines.push('', `Row preview output was truncated after approximately ${MAX_ROW_PREVIEW_CHARS.toLocaleString()} characters. Use a lower \`limit\`, narrower config/split, or later \`offset\` to inspect more.`);
|
|
493
|
+
}
|
|
494
|
+
if (truncatedByDatasetViewer)
|
|
495
|
+
lines.push('', 'Dataset Viewer reported truncated cells in the returned rows.');
|
|
496
|
+
return lines;
|
|
497
|
+
}
|
|
498
|
+
function resolveStructureTarget(splits, options) {
|
|
499
|
+
if (options.config && options.split) {
|
|
500
|
+
const exact = splits.find((split) => split.config === options.config && split.split === options.split);
|
|
501
|
+
return exact ?? { dataset: '', config: options.config, split: options.split };
|
|
502
|
+
}
|
|
503
|
+
return splits[0];
|
|
504
|
+
}
|
|
505
|
+
function resolvePreviewTarget(splits, config, split) {
|
|
506
|
+
if (splits.length === 0) {
|
|
507
|
+
return { ok: false, message: 'No Dataset Viewer config/split options are available for this dataset.' };
|
|
508
|
+
}
|
|
509
|
+
if (config && split) {
|
|
510
|
+
const exact = splits.find((entry) => entry.config === config && entry.split === split);
|
|
511
|
+
if (exact)
|
|
512
|
+
return { ok: true, config, split };
|
|
513
|
+
return {
|
|
514
|
+
ok: false,
|
|
515
|
+
message: [`Config/split \`${config}\` / \`${split}\` was not found.`, '', ...formatAvailableOptions(splits)].join('\n'),
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
if (splits.length === 1) {
|
|
519
|
+
const only = splits[0];
|
|
520
|
+
if (!only)
|
|
521
|
+
return { ok: false, message: 'No Dataset Viewer config/split options are available for this dataset.' };
|
|
522
|
+
if ((config && config !== only.config) || (split && split !== only.split)) {
|
|
523
|
+
return {
|
|
524
|
+
ok: false,
|
|
525
|
+
message: [
|
|
526
|
+
`Config/split \`${config ?? only.config}\` / \`${split ?? only.split}\` was not found.`,
|
|
527
|
+
'',
|
|
528
|
+
...formatAvailableOptions(splits),
|
|
529
|
+
].join('\n'),
|
|
530
|
+
};
|
|
531
|
+
}
|
|
532
|
+
return { ok: true, config: config ?? only.config, split: split ?? only.split };
|
|
533
|
+
}
|
|
534
|
+
return {
|
|
535
|
+
ok: false,
|
|
536
|
+
message: [
|
|
537
|
+
'`dataset_preview` requires `config` and `split` because this dataset has multiple choices.',
|
|
538
|
+
'',
|
|
539
|
+
...formatAvailableOptions(splits),
|
|
540
|
+
].join('\n'),
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
function formatAvailableOptions(splits) {
|
|
544
|
+
return ['Available options:', ...splits.slice(0, 50).map((entry) => `- \`${entry.config}\` / \`${entry.split}\``)];
|
|
545
|
+
}
|
|
546
|
+
function clampLimit(value) {
|
|
547
|
+
if (value === undefined)
|
|
548
|
+
return DEFAULT_PREVIEW_LIMIT;
|
|
549
|
+
if (!Number.isInteger(value))
|
|
550
|
+
return DEFAULT_PREVIEW_LIMIT;
|
|
551
|
+
if (value < 1)
|
|
552
|
+
return 1;
|
|
553
|
+
return Math.min(value, MAX_PREVIEW_LIMIT);
|
|
554
|
+
}
|
|
555
|
+
function formatFeatureType(type) {
|
|
556
|
+
return compactJson(type);
|
|
557
|
+
}
|
|
558
|
+
function formatCell(value) {
|
|
559
|
+
if (typeof value === 'string')
|
|
560
|
+
return value;
|
|
561
|
+
if (value === null || value === undefined || typeof value === 'number' || typeof value === 'boolean') {
|
|
562
|
+
return String(value);
|
|
563
|
+
}
|
|
564
|
+
return compactJson(value);
|
|
565
|
+
}
|
|
566
|
+
function compactJson(value) {
|
|
567
|
+
try {
|
|
568
|
+
const rendered = JSON.stringify(value);
|
|
569
|
+
return rendered === undefined ? String(value) : rendered;
|
|
570
|
+
}
|
|
571
|
+
catch {
|
|
572
|
+
return String(value);
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
function addCardValue(lines, label, value) {
|
|
576
|
+
if (value === undefined || value === null)
|
|
577
|
+
return;
|
|
578
|
+
if (Array.isArray(value)) {
|
|
579
|
+
const text = value.map((item) => String(item)).join(', ');
|
|
580
|
+
if (text)
|
|
581
|
+
lines.push(`- **${label}:** ${text}`);
|
|
582
|
+
return;
|
|
583
|
+
}
|
|
584
|
+
if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
|
585
|
+
lines.push(`- **${label}:** ${String(value)}`);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
function formatErrorMessage(error) {
|
|
589
|
+
if (error instanceof HfApiError) {
|
|
590
|
+
const detail = extractApiErrorDetail(error.responseBody);
|
|
591
|
+
return detail
|
|
592
|
+
? `${error.status.toString()} ${error.statusText}: ${detail}`
|
|
593
|
+
: `${error.status.toString()} ${error.statusText}`;
|
|
594
|
+
}
|
|
595
|
+
if (error instanceof HubApiError) {
|
|
596
|
+
return `${error.statusCode.toString()}: ${error.message}`;
|
|
597
|
+
}
|
|
598
|
+
return error instanceof Error ? error.message : String(error);
|
|
599
|
+
}
|
|
600
|
+
function extractApiErrorDetail(body) {
|
|
601
|
+
if (!body)
|
|
602
|
+
return undefined;
|
|
603
|
+
try {
|
|
604
|
+
const parsed = JSON.parse(body);
|
|
605
|
+
if (isRecord(parsed)) {
|
|
606
|
+
return stringValue(parsed.error) ?? stringValue(parsed.cause_message) ?? stringValue(parsed.message);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
catch {
|
|
610
|
+
return body.length > 200 ? `${body.slice(0, 200)}…` : body;
|
|
611
|
+
}
|
|
612
|
+
return undefined;
|
|
613
|
+
}
|
|
614
|
+
function expectRecord(value, label) {
|
|
615
|
+
if (!isRecord(value))
|
|
616
|
+
throw new Error(`Invalid ${label}: expected object`);
|
|
617
|
+
return value;
|
|
618
|
+
}
|
|
619
|
+
function isRecord(value) {
|
|
620
|
+
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
621
|
+
}
|
|
622
|
+
function isDefined(value) {
|
|
623
|
+
return value !== undefined;
|
|
624
|
+
}
|
|
625
|
+
function stringValue(value) {
|
|
626
|
+
return typeof value === 'string' ? value : undefined;
|
|
627
|
+
}
|
|
628
|
+
function numberValue(value) {
|
|
629
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
|
630
|
+
}
|
|
631
|
+
function nullableNumberValue(value) {
|
|
632
|
+
return value === null ? null : numberValue(value);
|
|
633
|
+
}
|
|
634
|
+
function compareConfigSplit(a, b) {
|
|
635
|
+
return a.config.localeCompare(b.config) || a.split.localeCompare(b.split);
|
|
636
|
+
}
|
|
637
|
+
function compareSizeEntry(a, b) {
|
|
638
|
+
return (a.config ?? '').localeCompare(b.config ?? '') || (a.split ?? '').localeCompare(b.split ?? '');
|
|
639
|
+
}
|
|
640
|
+
function formatOptionalNumber(value) {
|
|
641
|
+
return value === undefined || value === null ? '—' : formatNumber(value);
|
|
642
|
+
}
|
|
643
|
+
function formatOptionalBytes(value) {
|
|
644
|
+
return value === undefined || value === null ? '—' : formatBytes(value);
|
|
645
|
+
}
|
|
646
|
+
function escapeTableCell(value) {
|
|
647
|
+
return escapeMarkdown(value);
|
|
648
|
+
}
|
|
649
|
+
function truncateMarkdown(value, maxChars) {
|
|
650
|
+
return value.length <= maxChars ? value : `${value.slice(0, maxChars - 1)}…`;
|
|
651
|
+
}
|
|
652
|
+
function trimBlankLines(lines) {
|
|
653
|
+
const result = [...lines];
|
|
654
|
+
while (result.length > 0 && result[0] === '')
|
|
655
|
+
result.shift();
|
|
656
|
+
while (result.length > 0 && result[result.length - 1] === '')
|
|
657
|
+
result.pop();
|
|
658
|
+
return result;
|
|
659
|
+
}
|
|
660
|
+
//# sourceMappingURL=dataset-viewer-inspect.js.map
|