tryll-dataset-builder-mcp 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +228 -28
  2. package/index.js +75 -18
  3. package/lib/store.js +63 -1
  4. package/package.json +1 -1
package/README.md CHANGED
@@ -1,9 +1,11 @@
1
1
  # Tryll Dataset Builder — MCP Server
2
2
 
3
- An MCP (Model Context Protocol) server for building structured RAG knowledge base datasets. Use it with Claude Code to create, manage, and export JSON datasets via natural language.
3
+ An MCP (Model Context Protocol) server for building structured RAG knowledge base datasets. Use it with Claude Code to create, manage, and export JSON datasets via natural language — with optional real-time sync to the [Dataset Builder web app](https://trylljsoncreator.onrender.com).
4
4
 
5
5
  Built by [Tryll Engine](https://tryllengine.com) | [Discord](https://discord.gg/CMnMrmapyB)
6
6
 
7
+ ---
8
+
7
9
  ## Quick Start
8
10
 
9
11
  ### 1. Install
@@ -14,8 +16,6 @@ npm install -g tryll-dataset-builder-mcp
14
16
 
15
17
  ### 2. Add to Claude Code
16
18
 
17
- Run in your terminal:
18
-
19
19
  ```bash
20
20
  claude mcp add dataset-builder -- npx tryll-dataset-builder-mcp
21
21
  ```
@@ -42,55 +42,208 @@ Just talk to Claude:
42
42
 
43
43
  > "Create a knowledge base about Minecraft with categories: Mobs, Blocks, Biomes. Add 10 chunks to each category."
44
44
 
45
- > "Import my existing dataset from ./data/minecraft.json"
45
+ > "Parse this wiki page and add it to my dataset: https://minecraft.wiki/w/Creeper"
46
+
47
+ > "Show me the version history of my project"
46
48
 
47
- > "Search for all chunks mentioning 'diamond' in my project"
49
+ ---
48
50
 
49
51
  ## Configuration
50
52
 
51
53
  | Variable | Default | Description |
52
54
  |----------|---------|-------------|
53
- | `DATA_DIR` | `./datasets` | Directory where project JSON files are stored |
55
+ | `DATA_DIR` | `./datasets` | Directory for project JSON files (local mode) |
56
+
57
+ ---
58
+
59
+ ## Two Modes of Operation
60
+
61
+ ### Local Mode (default)
62
+ Data is stored as JSON files in `DATA_DIR`. No server needed.
63
+
64
+ ### Connected Mode (real-time sync)
65
+ Connect to the [Dataset Builder web app](https://trylljsoncreator.onrender.com) for live collaboration. Changes made via MCP appear instantly in the browser, and vice versa.
66
+
67
+ ```
68
+ You: "Connect to session ABC123"
69
+ Claude: *connects via WebSocket*
70
+ You: "Add 5 chunks about dragons"
71
+ → chunks appear in the browser in real-time
72
+ ```
73
+
74
+ ---
54
75
 
55
- ## Available Tools (18)
76
+ ## Available Tools (27)
77
+
78
+ ### Session Management
79
+
80
+ | Tool | Description |
81
+ |------|-------------|
82
+ | `connect_session` | Connect to the web app for real-time collaboration. Requires a 6-character session code from the browser UI |
83
+ | `disconnect_session` | Disconnect from the web app, switch back to local storage |
56
84
 
57
85
  ### Project Management
86
+
58
87
  | Tool | Description |
59
88
  |------|-------------|
60
89
  | `create_project` | Create a new dataset project |
61
90
  | `list_projects` | List all projects with stats |
62
- | `delete_project` | Delete a project |
63
- | `get_project_stats` | Detailed statistics |
91
+ | `delete_project` | Permanently delete a project |
92
+ | `get_project_stats` | Detailed statistics (categories, chunks, text lengths) |
64
93
 
65
94
  ### Category Management
95
+
66
96
  | Tool | Description |
67
97
  |------|-------------|
68
- | `create_category` | Add a category to a project |
98
+ | `create_category` | Add a category to organize chunks |
69
99
  | `list_categories` | List categories with chunk counts |
70
100
  | `rename_category` | Rename a category |
71
- | `delete_category` | Delete a category and its chunks |
101
+ | `delete_category` | Delete a category and all its chunks |
72
102
 
73
103
  ### Chunk Operations
104
+
74
105
  | Tool | Description |
75
106
  |------|-------------|
76
- | `add_chunk` | Add a single knowledge chunk |
77
- | `bulk_add_chunks` | Add multiple chunks at once |
78
- | `get_chunk` | Get chunk content by ID |
79
- | `update_chunk` | Update chunk fields |
80
- | `delete_chunk` | Delete a chunk |
81
- | `duplicate_chunk` | Clone a chunk |
82
- | `move_chunk` | Move chunk between categories |
107
+ | `add_chunk` | Add a single knowledge chunk with ID, text, and metadata |
108
+ | `bulk_add_chunks` | Add multiple chunks at once (faster than one by one) |
109
+ | `get_chunk` | Get full content of a chunk by ID |
110
+ | `update_chunk` | Update chunk fields (ID, text, metadata) |
111
+ | `delete_chunk` | Delete a chunk by ID |
112
+ | `duplicate_chunk` | Clone a chunk (creates `id_copy`) |
113
+ | `move_chunk` | Move a chunk between categories |
83
114
 
84
115
  ### Search & Export
116
+
117
+ | Tool | Description |
118
+ |------|-------------|
119
+ | `search_chunks` | Search by chunk ID or text content |
120
+ | `export_project` | Export as flat JSON array (RAG-ready) |
121
+ | `import_json` | Import an existing JSON dataset |
122
+ | `export_category` | Export a single category as JSON |
123
+
124
+ ### URL Parsing
125
+
126
+ | Tool | Description |
127
+ |------|-------------|
128
+ | `parse_url` | Fetch a web page, extract text, auto-create chunks. Splits text > 2000 chars into multiple chunks. Extracts wiki infobox metadata |
129
+ | `batch_parse_urls` | Parse multiple URLs at once |
130
+
131
+ ### Bulk Operations
132
+
85
133
  | Tool | Description |
86
134
  |------|-------------|
87
- | `search_chunks` | Search by ID or text content |
88
- | `export_project` | Export as flat JSON (RAG-ready) |
89
- | `import_json` | Import existing JSON dataset |
135
+ | `bulk_update_metadata` | Set a metadata field across all chunks (or per category) |
136
+ | `merge_projects` | Merge all data from one project into another |
137
+
138
+ ### Version History
139
+
140
+ | Tool | Description |
141
+ |------|-------------|
142
+ | `get_history` | Get version history (last 50 commits) for a project |
143
+ | `get_commit` | Get a specific commit with full snapshot data for diffing |
144
+ | `rollback` | Rollback a project to a previous commit's state |
145
+
146
+ ---
147
+
148
+ ## Tool Details
149
+
150
+ ### `add_chunk`
151
+
152
+ ```
153
+ project: "minecraft"
154
+ category: "Mobs"
155
+ id: "creeper"
156
+ text: "A Creeper is a hostile mob that silently approaches players..."
157
+ metadata:
158
+ page_title: "Creeper"
159
+ source: "Minecraft Wiki"
160
+ license: "CC BY-NC-SA 3.0"
161
+ health: "20" ← custom metadata field
162
+ behavior: "explodes" ← custom metadata field
163
+ ```
164
+
165
+ Standard metadata fields: `page_title`, `source`, `license`. Any extra fields become custom metadata.
166
+
167
+ ### `parse_url`
168
+
169
+ ```
170
+ project: "minecraft"
171
+ category: "Mobs"
172
+ url: "https://minecraft.wiki/w/Creeper"
173
+ chunk_id: "creeper"
174
+ license: "CC BY-NC-SA 3.0"
175
+ ```
90
176
 
91
- ## Export Format
177
+ - Fetches the page, extracts main text content
178
+ - If text > 2000 chars → auto-splits into `creeper_1`, `creeper_2`, etc.
179
+ - Extracts page title and source URL as metadata
180
+ - For wiki pages: extracts infobox/sidebar data as custom metadata fields
92
181
 
93
- The exported JSON is a flat array, compatible with the [Dataset Builder web app](https://github.com/Skizziik/json_creator) and ready for RAG pipelines:
182
+ ### `get_history`
183
+
184
+ ```
185
+ project: "minecraft"
186
+ ```
187
+
188
+ Returns:
189
+ ```json
190
+ [
191
+ {
192
+ "id": "uuid",
193
+ "timestamp": "2026-02-27T14:30:00.000Z",
194
+ "source": "mcp",
195
+ "action": "addChunk",
196
+ "summary": "Added chunk 'creeper' to 'Mobs'",
197
+ "stats": { "categories": 3, "chunks": 12 }
198
+ }
199
+ ]
200
+ ```
201
+
202
+ ### `rollback`
203
+
204
+ ```
205
+ project: "minecraft"
206
+ commit_id: "uuid-of-target-commit"
207
+ ```
208
+
209
+ Restores the project to that commit's snapshot. Creates a new "rollback" commit so you can undo the rollback later.
210
+
211
+ ---
212
+
213
+ ## Data Formats
214
+
215
+ ### Project JSON (internal)
216
+
217
+ ```json
218
+ {
219
+ "name": "minecraft",
220
+ "createdAt": "2026-02-27T10:00:00.000Z",
221
+ "categories": [
222
+ {
223
+ "id": "uuid",
224
+ "name": "Mobs",
225
+ "expanded": true,
226
+ "chunks": [
227
+ {
228
+ "_uid": "uuid",
229
+ "id": "creeper",
230
+ "text": "A Creeper is a hostile mob...",
231
+ "metadata": {
232
+ "page_title": "Creeper",
233
+ "source": "Minecraft Wiki",
234
+ "license": "CC BY-NC-SA 3.0"
235
+ },
236
+ "customFields": [
237
+ { "key": "health", "value": "20" }
238
+ ]
239
+ }
240
+ ]
241
+ }
242
+ ]
243
+ }
244
+ ```
245
+
246
+ ### Export Format (RAG-ready)
94
247
 
95
248
  ```json
96
249
  [
@@ -101,20 +254,67 @@ The exported JSON is a flat array, compatible with the [Dataset Builder web app]
101
254
  "page_title": "Creeper",
102
255
  "source": "Minecraft Wiki",
103
256
  "license": "CC BY-NC-SA 3.0",
104
- "type": "hostile_mob",
105
257
  "health": "20"
106
258
  }
107
259
  }
108
260
  ]
109
261
  ```
110
262
 
263
+ ### History Commit
264
+
265
+ ```json
266
+ {
267
+ "id": "uuid",
268
+ "timestamp": "2026-02-27T14:30:00.000Z",
269
+ "source": "browser | mcp",
270
+ "action": "addChunk",
271
+ "summary": "Added chunk 'creeper' to 'Mobs'",
272
+ "stats": { "categories": 3, "chunks": 12 },
273
+ "snapshot": { "...full project state..." }
274
+ }
275
+ ```
276
+
277
+ ---
278
+
279
+ ## Real-Time Collaboration
280
+
281
+ ```
282
+ ┌─────────────┐ WebSocket ┌──────────────┐ REST API ┌─────────────┐
283
+ │ Browser │ ◄──────────────► │ Web Server │ ◄──────────────► │ MCP Server │
284
+ │ (Dataset │ data:changed │ (Express + │ POST/PUT/DEL │ (Claude │
285
+ │ Builder) │ mcp:connected │ WebSocket) │ + source:mcp │ Code) │
286
+ └─────────────┘ └──────────────┘ └─────────────┘
287
+ ```
288
+
289
+ 1. Open the [Dataset Builder](https://trylljsoncreator.onrender.com) in your browser
290
+ 2. Copy the 6-character session code from the top bar
291
+ 3. Tell Claude: *"Connect to session ABC123"*
292
+ 4. All changes sync in real-time between browser and Claude
293
+ 5. Version history tracks who made each change (browser vs MCP)
294
+
295
+ ---
296
+
111
297
  ## Example Prompts
112
298
 
113
299
  - *"Create a Dark Souls knowledge base with categories for Bosses, Weapons, and Locations"*
114
- - *"Add 15 chunks about Minecraft mobs with detailed descriptions"*
115
- - *"Export my project as JSON and save to file"*
116
- - *"Search for chunks about 'fire' in my dark_souls project"*
117
- - *"Move chunk 'ancient_dragon' from Bosses to Enemies category"*
300
+ - *"Parse these wiki pages and add them to my Minecraft project: [url1], [url2], [url3]"*
301
+ - *"Bulk update the license field to 'MIT' for all chunks in the Mobs category"*
302
+ - *"Show me the version history of my project"*
303
+ - *"Rollback my project to the commit before I deleted that category"*
304
+ - *"Merge my test_data project into the main production project"*
305
+ - *"Export the Bosses category as JSON"*
306
+ - *"Connect to session XYZ789 and add 20 chunks about potions"*
307
+
308
+ ---
309
+
310
+ ## Links
311
+
312
+ - **Web App**: [trylljsoncreator.onrender.com](https://trylljsoncreator.onrender.com)
313
+ - **Web App Repo**: [github.com/Skizziik/json_creator](https://github.com/Skizziik/json_creator)
314
+ - **MCP Repo**: [github.com/Skizziik/tryll_dataset_builder](https://github.com/Skizziik/tryll_dataset_builder)
315
+ - **npm**: [tryll-dataset-builder-mcp](https://www.npmjs.com/package/tryll-dataset-builder-mcp)
316
+ - **Tryll Engine**: [tryllengine.com](https://tryllengine.com)
317
+ - **Discord**: [discord.gg/CMnMrmapyB](https://discord.gg/CMnMrmapyB)
118
318
 
119
319
  ## License
120
320
 
package/index.js CHANGED
@@ -10,7 +10,7 @@ import * as cheerio from "cheerio";
10
10
  const store = new Store(process.env.DATA_DIR);
11
11
 
12
12
  const server = new Server(
13
- { name: "tryll-dataset-builder", version: "1.2.0" },
13
+ { name: "tryll-dataset-builder", version: "1.3.0" },
14
14
  { capabilities: { tools: {} } }
15
15
  );
16
16
 
@@ -483,6 +483,43 @@ const TOOLS = [
483
483
  required: ["project", "category"],
484
484
  },
485
485
  },
486
+
487
+ // ---- History ----
488
+ {
489
+ name: "get_history",
490
+ description: "Get version history (last 50 commits) for a project. Each commit shows who made the change (browser/MCP), what was changed, and when. Returns lightweight list without snapshots.",
491
+ inputSchema: {
492
+ type: "object",
493
+ properties: {
494
+ project: { type: "string", description: "Project name" },
495
+ },
496
+ required: ["project"],
497
+ },
498
+ },
499
+ {
500
+ name: "get_commit",
501
+ description: "Get a specific commit with full snapshot data. Returns the commit's snapshot and the previous commit's snapshot for computing diffs.",
502
+ inputSchema: {
503
+ type: "object",
504
+ properties: {
505
+ project: { type: "string", description: "Project name" },
506
+ commit_id: { type: "string", description: "Commit UUID" },
507
+ },
508
+ required: ["project", "commit_id"],
509
+ },
510
+ },
511
+ {
512
+ name: "rollback",
513
+ description: "Rollback a project to a specific commit's state. Restores the project data from that commit's snapshot and creates a new 'rollback' commit in history. Safe: you can undo a rollback by rolling back to a later commit.",
514
+ inputSchema: {
515
+ type: "object",
516
+ properties: {
517
+ project: { type: "string", description: "Project name" },
518
+ commit_id: { type: "string", description: "Commit UUID to rollback to" },
519
+ },
520
+ required: ["project", "commit_id"],
521
+ },
522
+ },
486
523
  ];
487
524
 
488
525
  // ============================================
@@ -501,28 +538,28 @@ async function handleRemote(name, args) {
501
538
 
502
539
  switch (name) {
503
540
  case "create_project":
504
- return apiCall('POST', '/api/projects', { name: args.name, session: s });
541
+ return apiCall('POST', '/api/projects', { name: args.name, session: s, source: 'mcp' });
505
542
  case "list_projects":
506
543
  return apiCall('GET', '/api/projects');
507
544
  case "delete_project":
508
- return apiCall('DELETE', `/api/projects/${p(args.name)}?session=${s}`);
545
+ return apiCall('DELETE', `/api/projects/${p(args.name)}?session=${s}&source=mcp`);
509
546
  case "get_project_stats":
510
547
  return apiCall('GET', `/api/projects/${p(args.name)}/stats`);
511
548
  case "create_category":
512
- return apiCall('POST', `/api/projects/${p(args.project)}/categories`, { name: args.name, session: s });
549
+ return apiCall('POST', `/api/projects/${p(args.project)}/categories`, { name: args.name, session: s, source: 'mcp' });
513
550
  case "list_categories":
514
551
  return apiCall('GET', `/api/projects/${p(args.project)}/categories`);
515
552
  case "rename_category":
516
- return apiCall('PUT', `/api/projects/${p(args.project)}/categories/${p(args.old_name)}`, { newName: args.new_name, session: s });
553
+ return apiCall('PUT', `/api/projects/${p(args.project)}/categories/${p(args.old_name)}`, { newName: args.new_name, session: s, source: 'mcp' });
517
554
  case "delete_category":
518
- return apiCall('DELETE', `/api/projects/${p(args.project)}/categories/${p(args.name)}?session=${s}`);
555
+ return apiCall('DELETE', `/api/projects/${p(args.project)}/categories/${p(args.name)}?session=${s}&source=mcp`);
519
556
  case "add_chunk":
520
557
  return apiCall('POST', `/api/projects/${p(args.project)}/categories/${p(args.category)}/chunks`, {
521
- id: args.id, text: args.text, metadata: args.metadata, session: s,
558
+ id: args.id, text: args.text, metadata: args.metadata, session: s, source: 'mcp',
522
559
  });
523
560
  case "bulk_add_chunks":
524
561
  return apiCall('POST', `/api/projects/${p(args.project)}/categories/${p(args.category)}/chunks/bulk`, {
525
- chunks: args.chunks, session: s,
562
+ chunks: args.chunks, session: s, source: 'mcp',
526
563
  });
527
564
  case "get_chunk": {
528
565
  const proj = await apiCall('GET', `/api/projects/${p(args.project)}`);
@@ -537,7 +574,7 @@ async function handleRemote(name, args) {
537
574
  for (const cat of proj2.categories) {
538
575
  const ch = cat.chunks.find(c => c.id === args.id);
539
576
  if (ch) {
540
- const body = { session: s };
577
+ const body = { session: s, source: 'mcp' };
541
578
  if (args.new_id !== undefined) body.id = args.new_id;
542
579
  if (args.text !== undefined) body.text = args.text;
543
580
  const meta = {};
@@ -558,7 +595,7 @@ async function handleRemote(name, args) {
558
595
  for (const cat of proj3.categories) {
559
596
  const ch = cat.chunks.find(c => c.id === args.id);
560
597
  if (ch) {
561
- return apiCall('DELETE', `/api/projects/${p(args.project)}/categories/${cat.id}/chunks/${ch._uid}?session=${s}`);
598
+ return apiCall('DELETE', `/api/projects/${p(args.project)}/categories/${cat.id}/chunks/${ch._uid}?session=${s}&source=mcp`);
562
599
  }
563
600
  }
564
601
  throw new Error(`Chunk "${args.id}" not found`);
@@ -568,14 +605,14 @@ async function handleRemote(name, args) {
568
605
  for (const cat of proj4.categories) {
569
606
  const ch = cat.chunks.find(c => c.id === args.id);
570
607
  if (ch) {
571
- return apiCall('POST', `/api/projects/${p(args.project)}/categories/${cat.id}/chunks/${ch._uid}/duplicate`);
608
+ return apiCall('POST', `/api/projects/${p(args.project)}/categories/${cat.id}/chunks/${ch._uid}/duplicate`, { source: 'mcp' });
572
609
  }
573
610
  }
574
611
  throw new Error(`Chunk "${args.id}" not found`);
575
612
  }
576
613
  case "move_chunk":
577
614
  return apiCall('POST', `/api/projects/${p(args.project)}/chunks/${p(args.id)}/move`, {
578
- targetCategory: args.target_category, session: s,
615
+ targetCategory: args.target_category, session: s, source: 'mcp',
579
616
  });
580
617
  case "search_chunks":
581
618
  return apiCall('GET', `/api/projects/${p(args.project)}/search?q=${encodeURIComponent(args.query)}`);
@@ -589,7 +626,7 @@ async function handleRemote(name, args) {
589
626
  }
590
627
  if (!jsonData) throw new Error('Provide either "json_path" or "data" parameter');
591
628
  return apiCall('POST', `/api/projects/${p(args.project)}/import`, {
592
- data: jsonData, category: args.category, session: s,
629
+ data: jsonData, category: args.category, session: s, source: 'mcp',
593
630
  });
594
631
  }
595
632
  case "parse_url": {
@@ -601,7 +638,7 @@ async function handleRemote(name, args) {
601
638
  metadata: { page_title: parsed.pageTitle, source: parsed.source, license, ...parsed.infobox },
602
639
  }));
603
640
  const result = await apiCall('POST', `/api/projects/${p(args.project)}/categories/${p(args.category)}/chunks/bulk`, {
604
- chunks: chunkData, session: s,
641
+ chunks: chunkData, session: s, source: 'mcp',
605
642
  });
606
643
  return { ...result, pageTitle: parsed.pageTitle, chunksCreated: chunks.length, infoboxFields: Object.keys(parsed.infobox) };
607
644
  }
@@ -617,7 +654,7 @@ async function handleRemote(name, args) {
617
654
  metadata: { page_title: parsed.pageTitle, source: parsed.source, license, ...parsed.infobox },
618
655
  }));
619
656
  const r = await apiCall('POST', `/api/projects/${p(args.project)}/categories/${p(args.category)}/chunks/bulk`, {
620
- chunks: chunkData, session: s,
657
+ chunks: chunkData, session: s, source: 'mcp',
621
658
  });
622
659
  results.push({ url: entry.url, chunk_id: entry.chunk_id, chunks: chunks.length, added: r.added, errors: r.errors });
623
660
  } catch (err) {
@@ -628,14 +665,22 @@ async function handleRemote(name, args) {
628
665
  }
629
666
  case "bulk_update_metadata":
630
667
  return apiCall('POST', `/api/projects/${p(args.project)}/bulk-metadata`, {
631
- field: args.field, value: args.value, category: args.category, session: s,
668
+ field: args.field, value: args.value, category: args.category, session: s, source: 'mcp',
632
669
  });
633
670
  case "merge_projects":
634
671
  return apiCall('POST', `/api/projects/${p(args.source)}/merge`, {
635
- target: args.target, session: s,
672
+ target: args.target, session: s, source: 'mcp',
636
673
  });
637
674
  case "export_category":
638
675
  return apiCall('GET', `/api/projects/${p(args.project)}/categories/${p(args.category)}/export`);
676
+ case "get_history":
677
+ return apiCall('GET', `/api/projects/${p(args.project)}/history`);
678
+ case "get_commit":
679
+ return apiCall('GET', `/api/projects/${p(args.project)}/history/${args.commit_id}`);
680
+ case "rollback":
681
+ return apiCall('POST', `/api/projects/${p(args.project)}/history/${args.commit_id}/rollback`, {
682
+ session: s, source: 'mcp',
683
+ });
639
684
  default:
640
685
  throw new Error(`Unknown tool: ${name}`);
641
686
  }
@@ -866,6 +911,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
866
911
  break;
867
912
  }
868
913
 
914
+ case "get_history":
915
+ result = store.getHistory(args.project);
916
+ break;
917
+
918
+ case "get_commit":
919
+ result = store.getCommit(args.project, args.commit_id);
920
+ break;
921
+
922
+ case "rollback":
923
+ result = store.rollback(args.project, args.commit_id, 'mcp');
924
+ break;
925
+
869
926
  default:
870
927
  throw new Error(`Unknown tool: ${name}`);
871
928
  }
@@ -890,7 +947,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
890
947
  async function main() {
891
948
  const transport = new StdioServerTransport();
892
949
  await server.connect(transport);
893
- console.error("Tryll Dataset Builder MCP server running (v1.2.0)");
950
+ console.error("Tryll Dataset Builder MCP server running (v1.3.0)");
894
951
  }
895
952
 
896
953
  main().catch((err) => {
package/lib/store.js CHANGED
@@ -4,6 +4,7 @@ import { randomUUID } from 'crypto';
4
4
 
5
5
  const DEFAULT_LICENSE = 'CC BY-NC-SA 3.0';
6
6
  const STANDARD_META = ['page_title', 'source', 'license'];
7
+ const MAX_HISTORY = 50;
7
8
 
8
9
  export class Store {
9
10
  constructor(dataDir) {
@@ -25,7 +26,7 @@ export class Store {
25
26
 
26
27
  listProjects() {
27
28
  this._ensureDir();
28
- const files = readdirSync(this.dataDir).filter(f => f.endsWith('.json'));
29
+ const files = readdirSync(this.dataDir).filter(f => f.endsWith('.json') && !f.endsWith('.history.json'));
29
30
  return files.map(f => {
30
31
  const name = f.replace(/\.json$/, '');
31
32
  try {
@@ -439,6 +440,67 @@ export class Store {
439
440
  return false;
440
441
  }
441
442
 
443
+ // ---- HISTORY ----
444
+
445
+ _historyFilePath(name) {
446
+ return join(this.dataDir, `${name}.history.json`);
447
+ }
448
+
449
+ _loadHistory(name) {
450
+ const fp = this._historyFilePath(name);
451
+ if (!existsSync(fp)) return { project: name, commits: [] };
452
+ return JSON.parse(readFileSync(fp, 'utf-8'));
453
+ }
454
+
455
+ _saveHistory(name, history) {
456
+ this._ensureDir();
457
+ writeFileSync(this._historyFilePath(name), JSON.stringify(history, null, 2), 'utf-8');
458
+ }
459
+
460
+ _commit(projectName, action, summary, source) {
461
+ try {
462
+ const data = this._load(projectName);
463
+ const history = this._loadHistory(projectName);
464
+ const totalChunks = data.categories.reduce((sum, c) => sum + c.chunks.length, 0);
465
+ history.commits.unshift({
466
+ id: randomUUID(),
467
+ timestamp: new Date().toISOString(),
468
+ source: source || 'mcp',
469
+ action, summary,
470
+ stats: { categories: data.categories.length, chunks: totalChunks },
471
+ snapshot: JSON.parse(JSON.stringify(data)),
472
+ });
473
+ if (history.commits.length > MAX_HISTORY) history.commits.length = MAX_HISTORY;
474
+ this._saveHistory(projectName, history);
475
+ } catch { /* history logging should never break mutations */ }
476
+ }
477
+
478
+ getHistory(name) {
479
+ const history = this._loadHistory(name);
480
+ return history.commits.map(c => ({
481
+ id: c.id, timestamp: c.timestamp, source: c.source,
482
+ action: c.action, summary: c.summary, stats: c.stats,
483
+ }));
484
+ }
485
+
486
+ getCommit(name, commitId) {
487
+ const history = this._loadHistory(name);
488
+ const idx = history.commits.findIndex(c => c.id === commitId);
489
+ if (idx === -1) throw new Error('Commit not found');
490
+ const commit = history.commits[idx];
491
+ const prev = idx + 1 < history.commits.length ? history.commits[idx + 1].snapshot : null;
492
+ return { ...commit, prevSnapshot: prev };
493
+ }
494
+
495
+ rollback(name, commitId, source) {
496
+ const history = this._loadHistory(name);
497
+ const commit = history.commits.find(c => c.id === commitId);
498
+ if (!commit) throw new Error('Commit not found');
499
+ this._save(name, commit.snapshot);
500
+ this._commit(name, 'rollback', `Rolled back to commit from ${commit.timestamp}`, source || 'mcp');
501
+ return this._load(name);
502
+ }
503
+
442
504
  _parseCustomFields(metadata) {
443
505
  if (!metadata || typeof metadata !== 'object') return [];
444
506
  return Object.entries(metadata)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tryll-dataset-builder-mcp",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "description": "MCP server for building RAG knowledge base datasets. Create, manage and export structured JSON datasets via Claude Code.",
5
5
  "type": "module",
6
6
  "main": "index.js",