tryll-dataset-builder-mcp 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +228 -28
- package/index.js +75 -18
- package/lib/store.js +63 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
# Tryll Dataset Builder — MCP Server
|
|
2
2
|
|
|
3
|
-
An MCP (Model Context Protocol) server for building structured RAG knowledge base datasets. Use it with Claude Code to create, manage, and export JSON datasets via natural language.
|
|
3
|
+
An MCP (Model Context Protocol) server for building structured RAG knowledge base datasets. Use it with Claude Code to create, manage, and export JSON datasets via natural language — with optional real-time sync to the [Dataset Builder web app](https://trylljsoncreator.onrender.com).
|
|
4
4
|
|
|
5
5
|
Built by [Tryll Engine](https://tryllengine.com) | [Discord](https://discord.gg/CMnMrmapyB)
|
|
6
6
|
|
|
7
|
+
---
|
|
8
|
+
|
|
7
9
|
## Quick Start
|
|
8
10
|
|
|
9
11
|
### 1. Install
|
|
@@ -14,8 +16,6 @@ npm install -g tryll-dataset-builder-mcp
|
|
|
14
16
|
|
|
15
17
|
### 2. Add to Claude Code
|
|
16
18
|
|
|
17
|
-
Run in your terminal:
|
|
18
|
-
|
|
19
19
|
```bash
|
|
20
20
|
claude mcp add dataset-builder -- npx tryll-dataset-builder-mcp
|
|
21
21
|
```
|
|
@@ -42,55 +42,208 @@ Just talk to Claude:
|
|
|
42
42
|
|
|
43
43
|
> "Create a knowledge base about Minecraft with categories: Mobs, Blocks, Biomes. Add 10 chunks to each category."
|
|
44
44
|
|
|
45
|
-
> "
|
|
45
|
+
> "Parse this wiki page and add it to my dataset: https://minecraft.wiki/w/Creeper"
|
|
46
|
+
|
|
47
|
+
> "Show me the version history of my project"
|
|
46
48
|
|
|
47
|
-
|
|
49
|
+
---
|
|
48
50
|
|
|
49
51
|
## Configuration
|
|
50
52
|
|
|
51
53
|
| Variable | Default | Description |
|
|
52
54
|
|----------|---------|-------------|
|
|
53
|
-
| `DATA_DIR` | `./datasets` | Directory
|
|
55
|
+
| `DATA_DIR` | `./datasets` | Directory for project JSON files (local mode) |
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Two Modes of Operation
|
|
60
|
+
|
|
61
|
+
### Local Mode (default)
|
|
62
|
+
Data is stored as JSON files in `DATA_DIR`. No server needed.
|
|
63
|
+
|
|
64
|
+
### Connected Mode (real-time sync)
|
|
65
|
+
Connect to the [Dataset Builder web app](https://trylljsoncreator.onrender.com) for live collaboration. Changes made via MCP appear instantly in the browser, and vice versa.
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
You: "Connect to session ABC123"
|
|
69
|
+
Claude: *connects via WebSocket*
|
|
70
|
+
You: "Add 5 chunks about dragons"
|
|
71
|
+
→ chunks appear in the browser in real-time
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
---
|
|
54
75
|
|
|
55
|
-
## Available Tools (
|
|
76
|
+
## Available Tools (27)
|
|
77
|
+
|
|
78
|
+
### Session Management
|
|
79
|
+
|
|
80
|
+
| Tool | Description |
|
|
81
|
+
|------|-------------|
|
|
82
|
+
| `connect_session` | Connect to the web app for real-time collaboration. Requires a 6-character session code from the browser UI |
|
|
83
|
+
| `disconnect_session` | Disconnect from the web app, switch back to local storage |
|
|
56
84
|
|
|
57
85
|
### Project Management
|
|
86
|
+
|
|
58
87
|
| Tool | Description |
|
|
59
88
|
|------|-------------|
|
|
60
89
|
| `create_project` | Create a new dataset project |
|
|
61
90
|
| `list_projects` | List all projects with stats |
|
|
62
|
-
| `delete_project` |
|
|
63
|
-
| `get_project_stats` | Detailed statistics |
|
|
91
|
+
| `delete_project` | Permanently delete a project |
|
|
92
|
+
| `get_project_stats` | Detailed statistics (categories, chunks, text lengths) |
|
|
64
93
|
|
|
65
94
|
### Category Management
|
|
95
|
+
|
|
66
96
|
| Tool | Description |
|
|
67
97
|
|------|-------------|
|
|
68
|
-
| `create_category` | Add a category to
|
|
98
|
+
| `create_category` | Add a category to organize chunks |
|
|
69
99
|
| `list_categories` | List categories with chunk counts |
|
|
70
100
|
| `rename_category` | Rename a category |
|
|
71
|
-
| `delete_category` | Delete a category and its chunks |
|
|
101
|
+
| `delete_category` | Delete a category and all its chunks |
|
|
72
102
|
|
|
73
103
|
### Chunk Operations
|
|
104
|
+
|
|
74
105
|
| Tool | Description |
|
|
75
106
|
|------|-------------|
|
|
76
|
-
| `add_chunk` | Add a single knowledge chunk |
|
|
77
|
-
| `bulk_add_chunks` | Add multiple chunks at once |
|
|
78
|
-
| `get_chunk` | Get
|
|
79
|
-
| `update_chunk` | Update chunk fields |
|
|
80
|
-
| `delete_chunk` | Delete a chunk |
|
|
81
|
-
| `duplicate_chunk` | Clone a chunk |
|
|
82
|
-
| `move_chunk` | Move chunk between categories |
|
|
107
|
+
| `add_chunk` | Add a single knowledge chunk with ID, text, and metadata |
|
|
108
|
+
| `bulk_add_chunks` | Add multiple chunks at once (faster than one by one) |
|
|
109
|
+
| `get_chunk` | Get full content of a chunk by ID |
|
|
110
|
+
| `update_chunk` | Update chunk fields (ID, text, metadata) |
|
|
111
|
+
| `delete_chunk` | Delete a chunk by ID |
|
|
112
|
+
| `duplicate_chunk` | Clone a chunk (creates `id_copy`) |
|
|
113
|
+
| `move_chunk` | Move a chunk between categories |
|
|
83
114
|
|
|
84
115
|
### Search & Export
|
|
116
|
+
|
|
117
|
+
| Tool | Description |
|
|
118
|
+
|------|-------------|
|
|
119
|
+
| `search_chunks` | Search by chunk ID or text content |
|
|
120
|
+
| `export_project` | Export as flat JSON array (RAG-ready) |
|
|
121
|
+
| `import_json` | Import an existing JSON dataset |
|
|
122
|
+
| `export_category` | Export a single category as JSON |
|
|
123
|
+
|
|
124
|
+
### URL Parsing
|
|
125
|
+
|
|
126
|
+
| Tool | Description |
|
|
127
|
+
|------|-------------|
|
|
128
|
+
| `parse_url` | Fetch a web page, extract text, auto-create chunks. Splits text > 2000 chars into multiple chunks. Extracts wiki infobox metadata |
|
|
129
|
+
| `batch_parse_urls` | Parse multiple URLs at once |
|
|
130
|
+
|
|
131
|
+
### Bulk Operations
|
|
132
|
+
|
|
85
133
|
| Tool | Description |
|
|
86
134
|
|------|-------------|
|
|
87
|
-
| `
|
|
88
|
-
| `
|
|
89
|
-
|
|
135
|
+
| `bulk_update_metadata` | Set a metadata field across all chunks (or per category) |
|
|
136
|
+
| `merge_projects` | Merge all data from one project into another |
|
|
137
|
+
|
|
138
|
+
### Version History
|
|
139
|
+
|
|
140
|
+
| Tool | Description |
|
|
141
|
+
|------|-------------|
|
|
142
|
+
| `get_history` | Get version history (last 50 commits) for a project |
|
|
143
|
+
| `get_commit` | Get a specific commit with full snapshot data for diffing |
|
|
144
|
+
| `rollback` | Rollback a project to a previous commit's state |
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Tool Details
|
|
149
|
+
|
|
150
|
+
### `add_chunk`
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
project: "minecraft"
|
|
154
|
+
category: "Mobs"
|
|
155
|
+
id: "creeper"
|
|
156
|
+
text: "A Creeper is a hostile mob that silently approaches players..."
|
|
157
|
+
metadata:
|
|
158
|
+
page_title: "Creeper"
|
|
159
|
+
source: "Minecraft Wiki"
|
|
160
|
+
license: "CC BY-NC-SA 3.0"
|
|
161
|
+
health: "20" ← custom metadata field
|
|
162
|
+
behavior: "explodes" ← custom metadata field
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Standard metadata fields: `page_title`, `source`, `license`. Any extra fields become custom metadata.
|
|
166
|
+
|
|
167
|
+
### `parse_url`
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
project: "minecraft"
|
|
171
|
+
category: "Mobs"
|
|
172
|
+
url: "https://minecraft.wiki/w/Creeper"
|
|
173
|
+
chunk_id: "creeper"
|
|
174
|
+
license: "CC BY-NC-SA 3.0"
|
|
175
|
+
```
|
|
90
176
|
|
|
91
|
-
|
|
177
|
+
- Fetches the page, extracts main text content
|
|
178
|
+
- If text > 2000 chars → auto-splits into `creeper_1`, `creeper_2`, etc.
|
|
179
|
+
- Extracts page title and source URL as metadata
|
|
180
|
+
- For wiki pages: extracts infobox/sidebar data as custom metadata fields
|
|
92
181
|
|
|
93
|
-
|
|
182
|
+
### `get_history`
|
|
183
|
+
|
|
184
|
+
```
|
|
185
|
+
project: "minecraft"
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
```json
|
|
190
|
+
[
|
|
191
|
+
{
|
|
192
|
+
"id": "uuid",
|
|
193
|
+
"timestamp": "2026-02-27T14:30:00.000Z",
|
|
194
|
+
"source": "mcp",
|
|
195
|
+
"action": "addChunk",
|
|
196
|
+
"summary": "Added chunk 'creeper' to 'Mobs'",
|
|
197
|
+
"stats": { "categories": 3, "chunks": 12 }
|
|
198
|
+
}
|
|
199
|
+
]
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### `rollback`
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
project: "minecraft"
|
|
206
|
+
commit_id: "uuid-of-target-commit"
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
Restores the project to that commit's snapshot. Creates a new "rollback" commit so you can undo the rollback later.
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## Data Formats
|
|
214
|
+
|
|
215
|
+
### Project JSON (internal)
|
|
216
|
+
|
|
217
|
+
```json
|
|
218
|
+
{
|
|
219
|
+
"name": "minecraft",
|
|
220
|
+
"createdAt": "2026-02-27T10:00:00.000Z",
|
|
221
|
+
"categories": [
|
|
222
|
+
{
|
|
223
|
+
"id": "uuid",
|
|
224
|
+
"name": "Mobs",
|
|
225
|
+
"expanded": true,
|
|
226
|
+
"chunks": [
|
|
227
|
+
{
|
|
228
|
+
"_uid": "uuid",
|
|
229
|
+
"id": "creeper",
|
|
230
|
+
"text": "A Creeper is a hostile mob...",
|
|
231
|
+
"metadata": {
|
|
232
|
+
"page_title": "Creeper",
|
|
233
|
+
"source": "Minecraft Wiki",
|
|
234
|
+
"license": "CC BY-NC-SA 3.0"
|
|
235
|
+
},
|
|
236
|
+
"customFields": [
|
|
237
|
+
{ "key": "health", "value": "20" }
|
|
238
|
+
]
|
|
239
|
+
}
|
|
240
|
+
]
|
|
241
|
+
}
|
|
242
|
+
]
|
|
243
|
+
}
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### Export Format (RAG-ready)
|
|
94
247
|
|
|
95
248
|
```json
|
|
96
249
|
[
|
|
@@ -101,20 +254,67 @@ The exported JSON is a flat array, compatible with the [Dataset Builder web app]
|
|
|
101
254
|
"page_title": "Creeper",
|
|
102
255
|
"source": "Minecraft Wiki",
|
|
103
256
|
"license": "CC BY-NC-SA 3.0",
|
|
104
|
-
"type": "hostile_mob",
|
|
105
257
|
"health": "20"
|
|
106
258
|
}
|
|
107
259
|
}
|
|
108
260
|
]
|
|
109
261
|
```
|
|
110
262
|
|
|
263
|
+
### History Commit
|
|
264
|
+
|
|
265
|
+
```json
|
|
266
|
+
{
|
|
267
|
+
"id": "uuid",
|
|
268
|
+
"timestamp": "2026-02-27T14:30:00.000Z",
|
|
269
|
+
"source": "browser | mcp",
|
|
270
|
+
"action": "addChunk",
|
|
271
|
+
"summary": "Added chunk 'creeper' to 'Mobs'",
|
|
272
|
+
"stats": { "categories": 3, "chunks": 12 },
|
|
273
|
+
"snapshot": { "...full project state..." }
|
|
274
|
+
}
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
## Real-Time Collaboration
|
|
280
|
+
|
|
281
|
+
```
|
|
282
|
+
┌─────────────┐ WebSocket ┌──────────────┐ REST API ┌─────────────┐
|
|
283
|
+
│ Browser │ ◄──────────────► │ Web Server │ ◄──────────────► │ MCP Server │
|
|
284
|
+
│ (Dataset │ data:changed │ (Express + │ POST/PUT/DEL │ (Claude │
|
|
285
|
+
│ Builder) │ mcp:connected │ WebSocket) │ + source:mcp │ Code) │
|
|
286
|
+
└─────────────┘ └──────────────┘ └─────────────┘
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
1. Open the [Dataset Builder](https://trylljsoncreator.onrender.com) in your browser
|
|
290
|
+
2. Copy the 6-character session code from the top bar
|
|
291
|
+
3. Tell Claude: *"Connect to session ABC123"*
|
|
292
|
+
4. All changes sync in real-time between browser and Claude
|
|
293
|
+
5. Version history tracks who made each change (browser vs MCP)
|
|
294
|
+
|
|
295
|
+
---
|
|
296
|
+
|
|
111
297
|
## Example Prompts
|
|
112
298
|
|
|
113
299
|
- *"Create a Dark Souls knowledge base with categories for Bosses, Weapons, and Locations"*
|
|
114
|
-
- *"
|
|
115
|
-
- *"
|
|
116
|
-
- *"
|
|
117
|
-
- *"
|
|
300
|
+
- *"Parse these wiki pages and add them to my Minecraft project: [url1], [url2], [url3]"*
|
|
301
|
+
- *"Bulk update the license field to 'MIT' for all chunks in the Mobs category"*
|
|
302
|
+
- *"Show me the version history of my project"*
|
|
303
|
+
- *"Rollback my project to the commit before I deleted that category"*
|
|
304
|
+
- *"Merge my test_data project into the main production project"*
|
|
305
|
+
- *"Export the Bosses category as JSON"*
|
|
306
|
+
- *"Connect to session XYZ789 and add 20 chunks about potions"*
|
|
307
|
+
|
|
308
|
+
---
|
|
309
|
+
|
|
310
|
+
## Links
|
|
311
|
+
|
|
312
|
+
- **Web App**: [trylljsoncreator.onrender.com](https://trylljsoncreator.onrender.com)
|
|
313
|
+
- **Web App Repo**: [github.com/Skizziik/json_creator](https://github.com/Skizziik/json_creator)
|
|
314
|
+
- **MCP Repo**: [github.com/Skizziik/tryll_dataset_builder](https://github.com/Skizziik/tryll_dataset_builder)
|
|
315
|
+
- **npm**: [tryll-dataset-builder-mcp](https://www.npmjs.com/package/tryll-dataset-builder-mcp)
|
|
316
|
+
- **Tryll Engine**: [tryllengine.com](https://tryllengine.com)
|
|
317
|
+
- **Discord**: [discord.gg/CMnMrmapyB](https://discord.gg/CMnMrmapyB)
|
|
118
318
|
|
|
119
319
|
## License
|
|
120
320
|
|
package/index.js
CHANGED
|
@@ -10,7 +10,7 @@ import * as cheerio from "cheerio";
|
|
|
10
10
|
const store = new Store(process.env.DATA_DIR);
|
|
11
11
|
|
|
12
12
|
const server = new Server(
|
|
13
|
-
{ name: "tryll-dataset-builder", version: "1.
|
|
13
|
+
{ name: "tryll-dataset-builder", version: "1.3.0" },
|
|
14
14
|
{ capabilities: { tools: {} } }
|
|
15
15
|
);
|
|
16
16
|
|
|
@@ -483,6 +483,43 @@ const TOOLS = [
|
|
|
483
483
|
required: ["project", "category"],
|
|
484
484
|
},
|
|
485
485
|
},
|
|
486
|
+
|
|
487
|
+
// ---- History ----
|
|
488
|
+
{
|
|
489
|
+
name: "get_history",
|
|
490
|
+
description: "Get version history (last 50 commits) for a project. Each commit shows who made the change (browser/MCP), what was changed, and when. Returns lightweight list without snapshots.",
|
|
491
|
+
inputSchema: {
|
|
492
|
+
type: "object",
|
|
493
|
+
properties: {
|
|
494
|
+
project: { type: "string", description: "Project name" },
|
|
495
|
+
},
|
|
496
|
+
required: ["project"],
|
|
497
|
+
},
|
|
498
|
+
},
|
|
499
|
+
{
|
|
500
|
+
name: "get_commit",
|
|
501
|
+
description: "Get a specific commit with full snapshot data. Returns the commit's snapshot and the previous commit's snapshot for computing diffs.",
|
|
502
|
+
inputSchema: {
|
|
503
|
+
type: "object",
|
|
504
|
+
properties: {
|
|
505
|
+
project: { type: "string", description: "Project name" },
|
|
506
|
+
commit_id: { type: "string", description: "Commit UUID" },
|
|
507
|
+
},
|
|
508
|
+
required: ["project", "commit_id"],
|
|
509
|
+
},
|
|
510
|
+
},
|
|
511
|
+
{
|
|
512
|
+
name: "rollback",
|
|
513
|
+
description: "Rollback a project to a specific commit's state. Restores the project data from that commit's snapshot and creates a new 'rollback' commit in history. Safe: you can undo a rollback by rolling back to a later commit.",
|
|
514
|
+
inputSchema: {
|
|
515
|
+
type: "object",
|
|
516
|
+
properties: {
|
|
517
|
+
project: { type: "string", description: "Project name" },
|
|
518
|
+
commit_id: { type: "string", description: "Commit UUID to rollback to" },
|
|
519
|
+
},
|
|
520
|
+
required: ["project", "commit_id"],
|
|
521
|
+
},
|
|
522
|
+
},
|
|
486
523
|
];
|
|
487
524
|
|
|
488
525
|
// ============================================
|
|
@@ -501,28 +538,28 @@ async function handleRemote(name, args) {
|
|
|
501
538
|
|
|
502
539
|
switch (name) {
|
|
503
540
|
case "create_project":
|
|
504
|
-
return apiCall('POST', '/api/projects', { name: args.name, session: s });
|
|
541
|
+
return apiCall('POST', '/api/projects', { name: args.name, session: s, source: 'mcp' });
|
|
505
542
|
case "list_projects":
|
|
506
543
|
return apiCall('GET', '/api/projects');
|
|
507
544
|
case "delete_project":
|
|
508
|
-
return apiCall('DELETE', `/api/projects/${p(args.name)}?session=${s}`);
|
|
545
|
+
return apiCall('DELETE', `/api/projects/${p(args.name)}?session=${s}&source=mcp`);
|
|
509
546
|
case "get_project_stats":
|
|
510
547
|
return apiCall('GET', `/api/projects/${p(args.name)}/stats`);
|
|
511
548
|
case "create_category":
|
|
512
|
-
return apiCall('POST', `/api/projects/${p(args.project)}/categories`, { name: args.name, session: s });
|
|
549
|
+
return apiCall('POST', `/api/projects/${p(args.project)}/categories`, { name: args.name, session: s, source: 'mcp' });
|
|
513
550
|
case "list_categories":
|
|
514
551
|
return apiCall('GET', `/api/projects/${p(args.project)}/categories`);
|
|
515
552
|
case "rename_category":
|
|
516
|
-
return apiCall('PUT', `/api/projects/${p(args.project)}/categories/${p(args.old_name)}`, { newName: args.new_name, session: s });
|
|
553
|
+
return apiCall('PUT', `/api/projects/${p(args.project)}/categories/${p(args.old_name)}`, { newName: args.new_name, session: s, source: 'mcp' });
|
|
517
554
|
case "delete_category":
|
|
518
|
-
return apiCall('DELETE', `/api/projects/${p(args.project)}/categories/${p(args.name)}?session=${s}`);
|
|
555
|
+
return apiCall('DELETE', `/api/projects/${p(args.project)}/categories/${p(args.name)}?session=${s}&source=mcp`);
|
|
519
556
|
case "add_chunk":
|
|
520
557
|
return apiCall('POST', `/api/projects/${p(args.project)}/categories/${p(args.category)}/chunks`, {
|
|
521
|
-
id: args.id, text: args.text, metadata: args.metadata, session: s,
|
|
558
|
+
id: args.id, text: args.text, metadata: args.metadata, session: s, source: 'mcp',
|
|
522
559
|
});
|
|
523
560
|
case "bulk_add_chunks":
|
|
524
561
|
return apiCall('POST', `/api/projects/${p(args.project)}/categories/${p(args.category)}/chunks/bulk`, {
|
|
525
|
-
chunks: args.chunks, session: s,
|
|
562
|
+
chunks: args.chunks, session: s, source: 'mcp',
|
|
526
563
|
});
|
|
527
564
|
case "get_chunk": {
|
|
528
565
|
const proj = await apiCall('GET', `/api/projects/${p(args.project)}`);
|
|
@@ -537,7 +574,7 @@ async function handleRemote(name, args) {
|
|
|
537
574
|
for (const cat of proj2.categories) {
|
|
538
575
|
const ch = cat.chunks.find(c => c.id === args.id);
|
|
539
576
|
if (ch) {
|
|
540
|
-
const body = { session: s };
|
|
577
|
+
const body = { session: s, source: 'mcp' };
|
|
541
578
|
if (args.new_id !== undefined) body.id = args.new_id;
|
|
542
579
|
if (args.text !== undefined) body.text = args.text;
|
|
543
580
|
const meta = {};
|
|
@@ -558,7 +595,7 @@ async function handleRemote(name, args) {
|
|
|
558
595
|
for (const cat of proj3.categories) {
|
|
559
596
|
const ch = cat.chunks.find(c => c.id === args.id);
|
|
560
597
|
if (ch) {
|
|
561
|
-
return apiCall('DELETE', `/api/projects/${p(args.project)}/categories/${cat.id}/chunks/${ch._uid}?session=${s}`);
|
|
598
|
+
return apiCall('DELETE', `/api/projects/${p(args.project)}/categories/${cat.id}/chunks/${ch._uid}?session=${s}&source=mcp`);
|
|
562
599
|
}
|
|
563
600
|
}
|
|
564
601
|
throw new Error(`Chunk "${args.id}" not found`);
|
|
@@ -568,14 +605,14 @@ async function handleRemote(name, args) {
|
|
|
568
605
|
for (const cat of proj4.categories) {
|
|
569
606
|
const ch = cat.chunks.find(c => c.id === args.id);
|
|
570
607
|
if (ch) {
|
|
571
|
-
return apiCall('POST', `/api/projects/${p(args.project)}/categories/${cat.id}/chunks/${ch._uid}/duplicate
|
|
608
|
+
return apiCall('POST', `/api/projects/${p(args.project)}/categories/${cat.id}/chunks/${ch._uid}/duplicate`, { source: 'mcp' });
|
|
572
609
|
}
|
|
573
610
|
}
|
|
574
611
|
throw new Error(`Chunk "${args.id}" not found`);
|
|
575
612
|
}
|
|
576
613
|
case "move_chunk":
|
|
577
614
|
return apiCall('POST', `/api/projects/${p(args.project)}/chunks/${p(args.id)}/move`, {
|
|
578
|
-
targetCategory: args.target_category, session: s,
|
|
615
|
+
targetCategory: args.target_category, session: s, source: 'mcp',
|
|
579
616
|
});
|
|
580
617
|
case "search_chunks":
|
|
581
618
|
return apiCall('GET', `/api/projects/${p(args.project)}/search?q=${encodeURIComponent(args.query)}`);
|
|
@@ -589,7 +626,7 @@ async function handleRemote(name, args) {
|
|
|
589
626
|
}
|
|
590
627
|
if (!jsonData) throw new Error('Provide either "json_path" or "data" parameter');
|
|
591
628
|
return apiCall('POST', `/api/projects/${p(args.project)}/import`, {
|
|
592
|
-
data: jsonData, category: args.category, session: s,
|
|
629
|
+
data: jsonData, category: args.category, session: s, source: 'mcp',
|
|
593
630
|
});
|
|
594
631
|
}
|
|
595
632
|
case "parse_url": {
|
|
@@ -601,7 +638,7 @@ async function handleRemote(name, args) {
|
|
|
601
638
|
metadata: { page_title: parsed.pageTitle, source: parsed.source, license, ...parsed.infobox },
|
|
602
639
|
}));
|
|
603
640
|
const result = await apiCall('POST', `/api/projects/${p(args.project)}/categories/${p(args.category)}/chunks/bulk`, {
|
|
604
|
-
chunks: chunkData, session: s,
|
|
641
|
+
chunks: chunkData, session: s, source: 'mcp',
|
|
605
642
|
});
|
|
606
643
|
return { ...result, pageTitle: parsed.pageTitle, chunksCreated: chunks.length, infoboxFields: Object.keys(parsed.infobox) };
|
|
607
644
|
}
|
|
@@ -617,7 +654,7 @@ async function handleRemote(name, args) {
|
|
|
617
654
|
metadata: { page_title: parsed.pageTitle, source: parsed.source, license, ...parsed.infobox },
|
|
618
655
|
}));
|
|
619
656
|
const r = await apiCall('POST', `/api/projects/${p(args.project)}/categories/${p(args.category)}/chunks/bulk`, {
|
|
620
|
-
chunks: chunkData, session: s,
|
|
657
|
+
chunks: chunkData, session: s, source: 'mcp',
|
|
621
658
|
});
|
|
622
659
|
results.push({ url: entry.url, chunk_id: entry.chunk_id, chunks: chunks.length, added: r.added, errors: r.errors });
|
|
623
660
|
} catch (err) {
|
|
@@ -628,14 +665,22 @@ async function handleRemote(name, args) {
|
|
|
628
665
|
}
|
|
629
666
|
case "bulk_update_metadata":
|
|
630
667
|
return apiCall('POST', `/api/projects/${p(args.project)}/bulk-metadata`, {
|
|
631
|
-
field: args.field, value: args.value, category: args.category, session: s,
|
|
668
|
+
field: args.field, value: args.value, category: args.category, session: s, source: 'mcp',
|
|
632
669
|
});
|
|
633
670
|
case "merge_projects":
|
|
634
671
|
return apiCall('POST', `/api/projects/${p(args.source)}/merge`, {
|
|
635
|
-
target: args.target, session: s,
|
|
672
|
+
target: args.target, session: s, source: 'mcp',
|
|
636
673
|
});
|
|
637
674
|
case "export_category":
|
|
638
675
|
return apiCall('GET', `/api/projects/${p(args.project)}/categories/${p(args.category)}/export`);
|
|
676
|
+
case "get_history":
|
|
677
|
+
return apiCall('GET', `/api/projects/${p(args.project)}/history`);
|
|
678
|
+
case "get_commit":
|
|
679
|
+
return apiCall('GET', `/api/projects/${p(args.project)}/history/${args.commit_id}`);
|
|
680
|
+
case "rollback":
|
|
681
|
+
return apiCall('POST', `/api/projects/${p(args.project)}/history/${args.commit_id}/rollback`, {
|
|
682
|
+
session: s, source: 'mcp',
|
|
683
|
+
});
|
|
639
684
|
default:
|
|
640
685
|
throw new Error(`Unknown tool: ${name}`);
|
|
641
686
|
}
|
|
@@ -866,6 +911,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
866
911
|
break;
|
|
867
912
|
}
|
|
868
913
|
|
|
914
|
+
case "get_history":
|
|
915
|
+
result = store.getHistory(args.project);
|
|
916
|
+
break;
|
|
917
|
+
|
|
918
|
+
case "get_commit":
|
|
919
|
+
result = store.getCommit(args.project, args.commit_id);
|
|
920
|
+
break;
|
|
921
|
+
|
|
922
|
+
case "rollback":
|
|
923
|
+
result = store.rollback(args.project, args.commit_id, 'mcp');
|
|
924
|
+
break;
|
|
925
|
+
|
|
869
926
|
default:
|
|
870
927
|
throw new Error(`Unknown tool: ${name}`);
|
|
871
928
|
}
|
|
@@ -890,7 +947,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
890
947
|
async function main() {
|
|
891
948
|
const transport = new StdioServerTransport();
|
|
892
949
|
await server.connect(transport);
|
|
893
|
-
console.error("Tryll Dataset Builder MCP server running (v1.
|
|
950
|
+
console.error("Tryll Dataset Builder MCP server running (v1.3.0)");
|
|
894
951
|
}
|
|
895
952
|
|
|
896
953
|
main().catch((err) => {
|
package/lib/store.js
CHANGED
|
@@ -4,6 +4,7 @@ import { randomUUID } from 'crypto';
|
|
|
4
4
|
|
|
5
5
|
const DEFAULT_LICENSE = 'CC BY-NC-SA 3.0';
|
|
6
6
|
const STANDARD_META = ['page_title', 'source', 'license'];
|
|
7
|
+
const MAX_HISTORY = 50;
|
|
7
8
|
|
|
8
9
|
export class Store {
|
|
9
10
|
constructor(dataDir) {
|
|
@@ -25,7 +26,7 @@ export class Store {
|
|
|
25
26
|
|
|
26
27
|
listProjects() {
|
|
27
28
|
this._ensureDir();
|
|
28
|
-
const files = readdirSync(this.dataDir).filter(f => f.endsWith('.json'));
|
|
29
|
+
const files = readdirSync(this.dataDir).filter(f => f.endsWith('.json') && !f.endsWith('.history.json'));
|
|
29
30
|
return files.map(f => {
|
|
30
31
|
const name = f.replace(/\.json$/, '');
|
|
31
32
|
try {
|
|
@@ -439,6 +440,67 @@ export class Store {
|
|
|
439
440
|
return false;
|
|
440
441
|
}
|
|
441
442
|
|
|
443
|
+
// ---- HISTORY ----
|
|
444
|
+
|
|
445
|
+
_historyFilePath(name) {
|
|
446
|
+
return join(this.dataDir, `${name}.history.json`);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
_loadHistory(name) {
|
|
450
|
+
const fp = this._historyFilePath(name);
|
|
451
|
+
if (!existsSync(fp)) return { project: name, commits: [] };
|
|
452
|
+
return JSON.parse(readFileSync(fp, 'utf-8'));
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
_saveHistory(name, history) {
|
|
456
|
+
this._ensureDir();
|
|
457
|
+
writeFileSync(this._historyFilePath(name), JSON.stringify(history, null, 2), 'utf-8');
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
_commit(projectName, action, summary, source) {
|
|
461
|
+
try {
|
|
462
|
+
const data = this._load(projectName);
|
|
463
|
+
const history = this._loadHistory(projectName);
|
|
464
|
+
const totalChunks = data.categories.reduce((sum, c) => sum + c.chunks.length, 0);
|
|
465
|
+
history.commits.unshift({
|
|
466
|
+
id: randomUUID(),
|
|
467
|
+
timestamp: new Date().toISOString(),
|
|
468
|
+
source: source || 'mcp',
|
|
469
|
+
action, summary,
|
|
470
|
+
stats: { categories: data.categories.length, chunks: totalChunks },
|
|
471
|
+
snapshot: JSON.parse(JSON.stringify(data)),
|
|
472
|
+
});
|
|
473
|
+
if (history.commits.length > MAX_HISTORY) history.commits.length = MAX_HISTORY;
|
|
474
|
+
this._saveHistory(projectName, history);
|
|
475
|
+
} catch { /* history logging should never break mutations */ }
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
getHistory(name) {
|
|
479
|
+
const history = this._loadHistory(name);
|
|
480
|
+
return history.commits.map(c => ({
|
|
481
|
+
id: c.id, timestamp: c.timestamp, source: c.source,
|
|
482
|
+
action: c.action, summary: c.summary, stats: c.stats,
|
|
483
|
+
}));
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
getCommit(name, commitId) {
|
|
487
|
+
const history = this._loadHistory(name);
|
|
488
|
+
const idx = history.commits.findIndex(c => c.id === commitId);
|
|
489
|
+
if (idx === -1) throw new Error('Commit not found');
|
|
490
|
+
const commit = history.commits[idx];
|
|
491
|
+
const prev = idx + 1 < history.commits.length ? history.commits[idx + 1].snapshot : null;
|
|
492
|
+
return { ...commit, prevSnapshot: prev };
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
rollback(name, commitId, source) {
|
|
496
|
+
const history = this._loadHistory(name);
|
|
497
|
+
const commit = history.commits.find(c => c.id === commitId);
|
|
498
|
+
if (!commit) throw new Error('Commit not found');
|
|
499
|
+
this._save(name, commit.snapshot);
|
|
500
|
+
this._commit(name, 'rollback', `Rolled back to commit from ${commit.timestamp}`, source || 'mcp');
|
|
501
|
+
return this._load(name);
|
|
502
|
+
}
|
|
503
|
+
|
|
442
504
|
_parseCustomFields(metadata) {
|
|
443
505
|
if (!metadata || typeof metadata !== 'object') return [];
|
|
444
506
|
return Object.entries(metadata)
|
package/package.json
CHANGED