openwriter 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client/assets/index-CbSQ8xxn.css +1 -0
- package/dist/client/assets/{index-B5MXw2pg.js → index-JMMJM_G_.js} +53 -53
- package/dist/client/index.html +2 -2
- package/dist/plugins/authors-voice/dist/index.d.ts +41 -0
- package/dist/plugins/authors-voice/dist/index.js +206 -0
- package/dist/plugins/authors-voice/package.json +23 -0
- package/dist/plugins/image-gen/dist/index.d.ts +35 -0
- package/dist/plugins/image-gen/dist/index.js +141 -0
- package/dist/plugins/image-gen/package.json +26 -0
- package/dist/plugins/publish/dist/helpers.d.ts +66 -0
- package/dist/plugins/publish/dist/helpers.js +199 -0
- package/dist/plugins/publish/dist/index.d.ts +3 -0
- package/dist/plugins/publish/dist/index.js +1130 -0
- package/dist/plugins/publish/dist/newsletter-tools.d.ts +2 -0
- package/dist/plugins/publish/dist/newsletter-tools.js +394 -0
- package/dist/plugins/publish/package.json +31 -0
- package/dist/plugins/x-api/dist/index.d.ts +27 -0
- package/dist/plugins/x-api/dist/index.js +240 -0
- package/dist/plugins/x-api/package.json +27 -0
- package/dist/server/documents.js +234 -3
- package/dist/server/enrichment.js +114 -0
- package/dist/server/install-skill.js +15 -0
- package/dist/server/markdown-parse.js +71 -14
- package/dist/server/markdown-serialize.js +14 -16
- package/dist/server/mcp.js +250 -23
- package/dist/server/node-fingerprint.js +347 -73
- package/dist/server/node-matcher.js +19 -44
- package/dist/server/pending-overlay.js +21 -4
- package/dist/server/state.js +203 -26
- package/dist/server/workspaces.js +27 -5
- package/dist/server/ws.js +10 -0
- package/package.json +1 -1
- package/skill/SKILL.md +26 -7
- package/skill/agents/openwriter-enrichment-minion.md +184 -0
- package/skill/docs/enrichment.md +179 -0
- package/dist/client/assets/index-B3iORmCT.css +0 -1
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* X API plugin for OpenWriter.
|
|
3
|
+
* Registers routes for checking X connection status and posting tweets.
|
|
4
|
+
* Uses @xdevplatform/xdk with OAuth1 credentials from plugin config.
|
|
5
|
+
*/
|
|
6
|
+
import { Client, OAuth1 } from '@xdevplatform/xdk';
|
|
7
|
+
import { join, extname } from 'path';
|
|
8
|
+
import { readFileSync, existsSync } from 'fs';
|
|
9
|
+
import sharp from 'sharp';
|
|
10
|
+
import twitter from 'twitter-text';
|
|
11
|
+
const { parseTweet } = twitter;
|
|
12
|
+
function createXClient(config) {
|
|
13
|
+
const apiKey = config['api-key'] || process.env.X_API_KEY || '';
|
|
14
|
+
const apiSecret = config['api-secret'] || process.env.X_API_SECRET || '';
|
|
15
|
+
const accessToken = config['access-token'] || process.env.X_ACCESS_TOKEN || '';
|
|
16
|
+
const accessTokenSecret = config['access-token-secret'] || process.env.X_ACCESS_TOKEN_SECRET || '';
|
|
17
|
+
if (!apiKey || !apiSecret || !accessToken || !accessTokenSecret)
|
|
18
|
+
return null;
|
|
19
|
+
const oauth1 = new OAuth1({
|
|
20
|
+
apiKey,
|
|
21
|
+
apiSecret,
|
|
22
|
+
callback: 'oob',
|
|
23
|
+
accessToken,
|
|
24
|
+
accessTokenSecret,
|
|
25
|
+
});
|
|
26
|
+
return new Client({ oauth1 });
|
|
27
|
+
}
|
|
28
|
+
const plugin = {
|
|
29
|
+
name: '@openwriter/plugin-x-api',
|
|
30
|
+
version: '0.1.0',
|
|
31
|
+
description: 'Post tweets from OpenWriter',
|
|
32
|
+
category: 'social-media',
|
|
33
|
+
configSchema: {
|
|
34
|
+
'api-key': { type: 'string', env: 'X_API_KEY', description: 'X API Key' },
|
|
35
|
+
'api-secret': { type: 'string', env: 'X_API_SECRET', description: 'X API Secret' },
|
|
36
|
+
'access-token': { type: 'string', env: 'X_ACCESS_TOKEN', description: 'X Access Token' },
|
|
37
|
+
'access-token-secret': { type: 'string', env: 'X_ACCESS_TOKEN_SECRET', description: 'X Access Token Secret' },
|
|
38
|
+
},
|
|
39
|
+
registerRoutes(ctx) {
|
|
40
|
+
// GET /api/x/status — check if plugin is configured + authenticated
|
|
41
|
+
ctx.app.get('/api/x/status', async (_req, res) => {
|
|
42
|
+
try {
|
|
43
|
+
const client = createXClient(ctx.config);
|
|
44
|
+
if (!client) {
|
|
45
|
+
res.json({ connected: false });
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
const me = await client.users.getMe();
|
|
49
|
+
const username = me?.data?.username;
|
|
50
|
+
res.json({ connected: true, username: username || undefined });
|
|
51
|
+
}
|
|
52
|
+
catch (err) {
|
|
53
|
+
console.error('[X Plugin] Status check failed:', err.message);
|
|
54
|
+
res.json({ connected: false, error: err.message });
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
// POST /api/x/post — post a tweet (with optional media)
|
|
58
|
+
ctx.app.post('/api/x/post', async (req, res) => {
|
|
59
|
+
try {
|
|
60
|
+
const { text, replyTo, quoteTweetId, mediaIds } = req.body;
|
|
61
|
+
if ((!text || typeof text !== 'string') && (!Array.isArray(mediaIds) || mediaIds.length === 0)) {
|
|
62
|
+
res.status(400).json({ success: false, error: 'text or mediaIds is required' });
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
if (mediaIds && (!Array.isArray(mediaIds) || mediaIds.length > 4)) {
|
|
66
|
+
res.status(400).json({ success: false, error: 'mediaIds must be an array of 1-4 IDs' });
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
const client = createXClient(ctx.config);
|
|
70
|
+
if (!client) {
|
|
71
|
+
res.status(400).json({ success: false, error: 'X API credentials not configured' });
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
const body = {};
|
|
75
|
+
if (text)
|
|
76
|
+
body.text = text;
|
|
77
|
+
if (replyTo) {
|
|
78
|
+
body.reply = { inReplyToTweetId: replyTo };
|
|
79
|
+
}
|
|
80
|
+
if (quoteTweetId) {
|
|
81
|
+
body.quoteTweetId = quoteTweetId;
|
|
82
|
+
}
|
|
83
|
+
if (mediaIds && mediaIds.length > 0) {
|
|
84
|
+
body.media = { media_ids: mediaIds };
|
|
85
|
+
}
|
|
86
|
+
const result = await client.posts.create(body);
|
|
87
|
+
const tweetId = result?.data?.id;
|
|
88
|
+
const tweetUrl = tweetId ? `https://x.com/i/status/${tweetId}` : undefined;
|
|
89
|
+
res.json({ success: true, tweetId, tweetUrl });
|
|
90
|
+
}
|
|
91
|
+
catch (err) {
|
|
92
|
+
const detail = err.data ? JSON.stringify(err.data) : err.message;
|
|
93
|
+
console.error('[X Plugin] Post failed:', detail);
|
|
94
|
+
res.status(500).json({ success: false, error: detail });
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
// POST /api/x/post-thread — post a full thread as a reply chain (with optional media per tweet)
|
|
98
|
+
ctx.app.post('/api/x/post-thread', async (req, res) => {
|
|
99
|
+
try {
|
|
100
|
+
const { tweets, replyTo } = req.body;
|
|
101
|
+
if (!Array.isArray(tweets) || tweets.length === 0) {
|
|
102
|
+
res.status(400).json({ success: false, error: 'tweets must be a non-empty array' });
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
// Normalize: accept string[] or { text, mediaIds? }[]
|
|
106
|
+
const normalized = tweets.map((t) => typeof t === 'string' ? { text: t, mediaIds: undefined } : t);
|
|
107
|
+
// Validate character limits using X's weighted counting (emojis=2, URLs=23, CJK=2)
|
|
108
|
+
const CHAR_LIMIT = 25000;
|
|
109
|
+
const overLimit = normalized.map((t, i) => ({ i, len: parseTweet(t.text).weightedLength })).filter(x => x.len > CHAR_LIMIT);
|
|
110
|
+
if (overLimit.length > 0) {
|
|
111
|
+
res.status(400).json({
|
|
112
|
+
success: false,
|
|
113
|
+
error: `${overLimit.length} tweet(s) exceed ${CHAR_LIMIT} chars: ${overLimit.map(x => `#${x.i + 1} (${x.len})`).join(', ')}`,
|
|
114
|
+
});
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
// Validate mediaIds per tweet
|
|
118
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
119
|
+
const ids = normalized[i].mediaIds;
|
|
120
|
+
if (ids && (!Array.isArray(ids) || ids.length > 4)) {
|
|
121
|
+
res.status(400).json({ success: false, error: `Tweet ${i + 1}: mediaIds must be an array of 1-4 IDs` });
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
const client = createXClient(ctx.config);
|
|
126
|
+
if (!client) {
|
|
127
|
+
res.status(400).json({ success: false, error: 'X API credentials not configured' });
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
const postedTweets = [];
|
|
131
|
+
let previousTweetId = replyTo;
|
|
132
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
133
|
+
const { text, mediaIds } = normalized[i];
|
|
134
|
+
const body = {};
|
|
135
|
+
if (text)
|
|
136
|
+
body.text = text;
|
|
137
|
+
if (previousTweetId) {
|
|
138
|
+
body.reply = { inReplyToTweetId: previousTweetId };
|
|
139
|
+
}
|
|
140
|
+
if (mediaIds && mediaIds.length > 0) {
|
|
141
|
+
body.media = { media_ids: mediaIds };
|
|
142
|
+
}
|
|
143
|
+
const result = await client.posts.create(body);
|
|
144
|
+
const tweetId = result?.data?.id;
|
|
145
|
+
if (!tweetId) {
|
|
146
|
+
res.status(500).json({
|
|
147
|
+
success: false,
|
|
148
|
+
postedTweets,
|
|
149
|
+
failedAt: i,
|
|
150
|
+
error: `Tweet ${i + 1} posted but no ID returned`,
|
|
151
|
+
});
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
postedTweets.push({ index: i, tweetId, text });
|
|
155
|
+
previousTweetId = tweetId;
|
|
156
|
+
}
|
|
157
|
+
// Build thread URL from first tweet
|
|
158
|
+
const firstTweetId = postedTweets[0]?.tweetId;
|
|
159
|
+
const threadUrl = firstTweetId ? `https://x.com/i/status/${firstTweetId}` : undefined;
|
|
160
|
+
console.log(`[X Plugin] Thread posted: ${postedTweets.length} tweets, ${threadUrl}`);
|
|
161
|
+
res.json({ success: true, postedTweets, threadUrl });
|
|
162
|
+
}
|
|
163
|
+
catch (err) {
|
|
164
|
+
console.error('[X Plugin] Post thread failed:', err.message);
|
|
165
|
+
res.status(500).json({ success: false, error: err.message });
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
// POST /api/x/upload-media — upload a local /_images/ file for tweet attachment
|
|
169
|
+
ctx.app.post('/api/x/upload-media', async (req, res) => {
|
|
170
|
+
try {
|
|
171
|
+
const { src } = req.body;
|
|
172
|
+
if (!src || typeof src !== 'string') {
|
|
173
|
+
res.status(400).json({ success: false, error: 'src is required' });
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
// Security: only allow /_images/ paths, no traversal
|
|
177
|
+
if (!/^\/_images\/[^/\\]+$/.test(src)) {
|
|
178
|
+
res.status(400).json({ success: false, error: 'Invalid image path — must be /_images/<filename>' });
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
const filename = src.replace('/_images/', '');
|
|
182
|
+
const filePath = join(ctx.dataDir, '_images', filename);
|
|
183
|
+
if (!existsSync(filePath)) {
|
|
184
|
+
res.status(404).json({ success: false, error: `Image not found: ${filename}` });
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
const ext = extname(filename).toLowerCase();
|
|
188
|
+
const mimeMap = {
|
|
189
|
+
'.jpg': 'image/jpeg', '.jpeg': 'image/jpeg',
|
|
190
|
+
'.png': 'image/png', '.webp': 'image/webp',
|
|
191
|
+
'.gif': 'image/jpeg', '.bmp': 'image/bmp',
|
|
192
|
+
'.tiff': 'image/tiff', '.tif': 'image/tiff',
|
|
193
|
+
};
|
|
194
|
+
const mediaType = mimeMap[ext] || 'image/jpeg';
|
|
195
|
+
const client = createXClient(ctx.config);
|
|
196
|
+
if (!client) {
|
|
197
|
+
res.status(400).json({ success: false, error: 'X API credentials not configured' });
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
let fileBuffer = readFileSync(filePath);
|
|
201
|
+
let uploadType = mediaType;
|
|
202
|
+
const origSize = fileBuffer.length;
|
|
203
|
+
// Compress large images or PNGs to JPEG to stay under X API limits
|
|
204
|
+
if (fileBuffer.length > 3 * 1024 * 1024 || ext === '.png') {
|
|
205
|
+
fileBuffer = Buffer.from(await sharp(fileBuffer).jpeg({ quality: 85 }).toBuffer());
|
|
206
|
+
uploadType = 'image/jpeg';
|
|
207
|
+
console.log(`[X Plugin] Compressed ${filename}: ${(origSize / 1024 / 1024).toFixed(2)}MB → ${(fileBuffer.length / 1024 / 1024).toFixed(2)}MB`);
|
|
208
|
+
}
|
|
209
|
+
console.log(`[X Plugin] Uploading ${filename}: ${(fileBuffer.length / 1024 / 1024).toFixed(2)}MB, type: ${uploadType}`);
|
|
210
|
+
const mediaBase64 = fileBuffer.toString('base64');
|
|
211
|
+
const uploadResult = await client.media.upload({
|
|
212
|
+
body: { media: mediaBase64, mediaCategory: 'tweet_image', mediaType: uploadType },
|
|
213
|
+
});
|
|
214
|
+
const mediaId = uploadResult?.data?.id
|
|
215
|
+
|| uploadResult?.media_id_string;
|
|
216
|
+
if (!mediaId) {
|
|
217
|
+
res.status(500).json({ success: false, error: 'Upload succeeded but no media ID returned' });
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
console.log(`[X Plugin] Media uploaded: ${filename} → ${mediaId}`);
|
|
221
|
+
res.json({ success: true, mediaId });
|
|
222
|
+
}
|
|
223
|
+
catch (err) {
|
|
224
|
+
console.error('[X Plugin] Media upload failed:', err.message);
|
|
225
|
+
if (err.response) {
|
|
226
|
+
try {
|
|
227
|
+
const body = await err.response.text();
|
|
228
|
+
console.error('[X Plugin] X API response:', err.response.status, body);
|
|
229
|
+
}
|
|
230
|
+
catch { /* ignore */ }
|
|
231
|
+
}
|
|
232
|
+
if (err.data)
|
|
233
|
+
console.error('[X Plugin] Error data:', JSON.stringify(err.data));
|
|
234
|
+
console.error('[X Plugin] Full error:', JSON.stringify(err, Object.getOwnPropertyNames(err)));
|
|
235
|
+
res.status(500).json({ success: false, error: err.message });
|
|
236
|
+
}
|
|
237
|
+
});
|
|
238
|
+
},
|
|
239
|
+
};
|
|
240
|
+
export default plugin;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@openwriter/plugin-x-api",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Post tweets from OpenWriter",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"dev": "tsc --watch"
|
|
10
|
+
},
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"@xdevplatform/xdk": "^0.4.0",
|
|
13
|
+
"twitter-text": "^3.1.0"
|
|
14
|
+
},
|
|
15
|
+
"devDependencies": {
|
|
16
|
+
"@types/express": "^5.0.0",
|
|
17
|
+
"typescript": "^5.6.0"
|
|
18
|
+
},
|
|
19
|
+
"openwriter": {
|
|
20
|
+
"displayName": "X / Twitter",
|
|
21
|
+
"category": "social-media"
|
|
22
|
+
},
|
|
23
|
+
"files": [
|
|
24
|
+
"dist/",
|
|
25
|
+
"package.json"
|
|
26
|
+
]
|
|
27
|
+
}
|
package/dist/server/documents.js
CHANGED
|
@@ -12,9 +12,10 @@ import { parseMarkdownContent } from './compact.js';
|
|
|
12
12
|
import { getDocument, getTitle, getFilePath, getIsTemp, getMetadata, save, cancelDebouncedSave, setActiveDocument, registerExternalDoc, unregisterExternalDoc, getExternalDocs, cacheActiveDocument, getCachedDocument, invalidateDocCache, removePendingCacheEntry, resetDocVersion, markAsAgentStub, unmarkAgentStub, isAgentStub, } from './state.js';
|
|
13
13
|
import { getDataDir, TEMP_PREFIX, ensureDataDir, filePathForTitle, tempFilePath, generateNodeId, resolveDocPath, isExternalDoc, atomicWriteFileSync, canonicalizePath } from './helpers.js';
|
|
14
14
|
import { ensureDocId } from './versions.js';
|
|
15
|
-
import { renameDocInAllWorkspaces, removeDocFromAllWorkspaces } from './workspaces.js';
|
|
15
|
+
import { renameDocInAllWorkspaces, removeDocFromAllWorkspaces, listWorkspaces, getWorkspace } from './workspaces.js';
|
|
16
|
+
import { collectAllFiles } from './workspace-tree.js';
|
|
16
17
|
import { renameComments } from './comments.js';
|
|
17
|
-
import { deleteOverlay } from './pending-overlay.js';
|
|
18
|
+
import { deleteOverlay, diagLog } from './pending-overlay.js';
|
|
18
19
|
import { getDocId as getActiveDocId } from './state.js';
|
|
19
20
|
function getDocOrderFile() { return join(getDataDir(), '_doc-order.json'); }
|
|
20
21
|
/** Scan files for matching docId. Checks active doc first (free), then getDataDir(), then external docs. */
|
|
@@ -124,6 +125,18 @@ export function listDocuments() {
|
|
|
124
125
|
...(data.masterDocId ? { masterDocId: data.masterDocId } : {}),
|
|
125
126
|
...(data.variantType ? { variantType: data.variantType } : {}),
|
|
126
127
|
...(typeof data.autoAccept === 'boolean' ? { autoAccept: data.autoAccept } : {}),
|
|
128
|
+
// Tags ride along with the doc listing so the sidebar can populate its
|
|
129
|
+
// tag overlay from one HTTP round-trip instead of N. The server already
|
|
130
|
+
// has the parsed frontmatter in hand here; emitting tags is free.
|
|
131
|
+
...(Array.isArray(data.tags) && data.tags.length > 0 ? { tags: data.tags } : {}),
|
|
132
|
+
// Enrichment fields — also free at this point since data is in hand.
|
|
133
|
+
// See brief 2026-05-18-frontmatter-enrichment-system.
|
|
134
|
+
...(typeof data.logline === 'string' && data.logline ? { logline: data.logline } : {}),
|
|
135
|
+
...(typeof data.domain === 'string' && data.domain ? { domain: data.domain } : {}),
|
|
136
|
+
...(Array.isArray(data.concepts) && data.concepts.length > 0 ? { concepts: data.concepts } : {}),
|
|
137
|
+
...(typeof data.docRole === 'string' && data.docRole ? { docRole: data.docRole } : {}),
|
|
138
|
+
...(typeof data.status === 'string' && data.status ? { status: data.status } : {}),
|
|
139
|
+
...(data.enrichmentStale === true ? { enrichmentStale: true } : {}),
|
|
127
140
|
};
|
|
128
141
|
}
|
|
129
142
|
catch {
|
|
@@ -238,6 +251,211 @@ export function listArchivedDocuments() {
|
|
|
238
251
|
files.sort((a, b) => new Date(b.archivedAt).getTime() - new Date(a.archivedAt).getTime());
|
|
239
252
|
return files;
|
|
240
253
|
}
|
|
254
|
+
// ============================================================================
|
|
255
|
+
// ENRICHMENT — list dirty docs + crawl + surfacing helpers
|
|
256
|
+
// See brief 2026-05-18-frontmatter-enrichment-system.
|
|
257
|
+
// ============================================================================
|
|
258
|
+
/** One-line footer the high-frequency MCP discovery tools (list_documents,
|
|
259
|
+
* list_workspaces, get_workspace_structure) append when dirty docs exist.
|
|
260
|
+
* Constant pressure that doesn't require hook setup. The agent's openwriter
|
|
261
|
+
* skill teaches: see this footer → spawn the enrichment minion. */
|
|
262
|
+
export function enrichmentFooter() {
|
|
263
|
+
const count = listDirtyDocs().length;
|
|
264
|
+
if (count === 0)
|
|
265
|
+
return '';
|
|
266
|
+
return `\n\n⚠ ${count} doc${count === 1 ? '' : 's'} need enrichment — spawn the enrichment minion (see openwriter skill docs/enrichment.md)`;
|
|
267
|
+
}
|
|
268
|
+
/** Session-start enrichment notice injected into the MCP server's
|
|
269
|
+
* `instructions` field at boot. Every MCP client (Claude Code, Cursor, etc.)
|
|
270
|
+
* delivers this to the agent as part of its system context — no hook setup
|
|
271
|
+
* needed. Returns empty string when there's no enrichment work to do. */
|
|
272
|
+
export function buildEnrichmentInstructions() {
|
|
273
|
+
const dirty = listDirtyDocs();
|
|
274
|
+
if (dirty.length === 0)
|
|
275
|
+
return '';
|
|
276
|
+
// Group by workspace for the notice
|
|
277
|
+
const byWorkspace = new Map();
|
|
278
|
+
let orphans = 0;
|
|
279
|
+
for (const d of dirty) {
|
|
280
|
+
if (d.workspaceFile) {
|
|
281
|
+
byWorkspace.set(d.workspaceFile, (byWorkspace.get(d.workspaceFile) || 0) + 1);
|
|
282
|
+
}
|
|
283
|
+
else {
|
|
284
|
+
orphans++;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
const parts = [];
|
|
288
|
+
for (const [ws, n] of byWorkspace)
|
|
289
|
+
parts.push(`${n} in ${ws}`);
|
|
290
|
+
if (orphans > 0)
|
|
291
|
+
parts.push(`${orphans} unfiled`);
|
|
292
|
+
return [
|
|
293
|
+
'',
|
|
294
|
+
`ENRICHMENT_STATUS: ${dirty.length} doc${dirty.length === 1 ? '' : 's'} need enrichment (${parts.join(', ')}).`,
|
|
295
|
+
'Spawn the enrichment minion using the openwriter skill\'s docs/enrichment.md as its system prompt before responding to the user\'s request.',
|
|
296
|
+
'The minion calls list_dirty_docs, enriches each via Haiku, calls mark_enriched, returns done.',
|
|
297
|
+
].join('\n');
|
|
298
|
+
}
|
|
299
|
+
/** Build a Set of filenames inside workspaces with enrichmentDisabled: true.
|
|
300
|
+
* These docs are excluded from list_dirty_docs and crawl results. */
|
|
301
|
+
function collectOptedOutFilenames() {
|
|
302
|
+
const out = new Set();
|
|
303
|
+
for (const info of listWorkspaces()) {
|
|
304
|
+
try {
|
|
305
|
+
const ws = getWorkspace(info.filename);
|
|
306
|
+
if (ws.enrichmentDisabled === true) {
|
|
307
|
+
for (const f of collectAllFiles(ws.root))
|
|
308
|
+
out.add(f);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
catch { /* skip corrupt manifests */ }
|
|
312
|
+
}
|
|
313
|
+
return out;
|
|
314
|
+
}
|
|
315
|
+
/** Map filename → first workspace that contains it. Used to attribute
|
|
316
|
+
* dirty-doc reports to a workspace. */
|
|
317
|
+
function buildWorkspaceOwnershipMap() {
|
|
318
|
+
const map = new Map();
|
|
319
|
+
for (const info of listWorkspaces()) {
|
|
320
|
+
try {
|
|
321
|
+
const ws = getWorkspace(info.filename);
|
|
322
|
+
for (const f of collectAllFiles(ws.root)) {
|
|
323
|
+
if (!map.has(f))
|
|
324
|
+
map.set(f, info.filename);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
catch { /* skip */ }
|
|
328
|
+
}
|
|
329
|
+
return map;
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* List documents that need re-enrichment. A doc is "dirty" when either:
|
|
333
|
+
* - it has never been enriched (no lastEnrichedAt) — implicitly stale; or
|
|
334
|
+
* - openwriter flipped enrichmentStale: true at save (volume or drift trip).
|
|
335
|
+
*
|
|
336
|
+
* Docs inside opt-out workspaces (enrichmentDisabled: true) are excluded.
|
|
337
|
+
* Archived docs are excluded.
|
|
338
|
+
*
|
|
339
|
+
* Optional `scopeWorkspace` narrows results to a single workspace.
|
|
340
|
+
*
|
|
341
|
+
* Cheap: reads each .md file's frontmatter via gray-matter (no TipTap parse,
|
|
342
|
+
* no body scan). Output carries only identity + reason — no enrichment fields.
|
|
343
|
+
*/
|
|
344
|
+
export function listDirtyDocs(scopeWorkspace) {
|
|
345
|
+
ensureDataDir();
|
|
346
|
+
const optedOut = collectOptedOutFilenames();
|
|
347
|
+
const ownership = buildWorkspaceOwnershipMap();
|
|
348
|
+
// If a workspace scope is given, build a Set of its files to filter against.
|
|
349
|
+
let scopeFiles = null;
|
|
350
|
+
if (scopeWorkspace) {
|
|
351
|
+
try {
|
|
352
|
+
const ws = getWorkspace(scopeWorkspace);
|
|
353
|
+
scopeFiles = new Set(collectAllFiles(ws.root));
|
|
354
|
+
}
|
|
355
|
+
catch {
|
|
356
|
+
// Unknown workspace → return empty rather than throw
|
|
357
|
+
return [];
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
const out = [];
|
|
361
|
+
for (const f of readdirSync(getDataDir()).filter((f) => f.endsWith('.md'))) {
|
|
362
|
+
if (optedOut.has(f))
|
|
363
|
+
continue;
|
|
364
|
+
if (scopeFiles && !scopeFiles.has(f))
|
|
365
|
+
continue;
|
|
366
|
+
try {
|
|
367
|
+
const raw = readFileSync(join(getDataDir(), f), 'utf-8');
|
|
368
|
+
const { data } = matter(raw);
|
|
369
|
+
if (data.archivedAt)
|
|
370
|
+
continue; // archived docs don't participate
|
|
371
|
+
const explicitStale = data.enrichmentStale === true;
|
|
372
|
+
const implicitStale = !data.lastEnrichedAt;
|
|
373
|
+
if (!explicitStale && !implicitStale)
|
|
374
|
+
continue;
|
|
375
|
+
out.push({
|
|
376
|
+
docId: data.docId || '',
|
|
377
|
+
filename: f,
|
|
378
|
+
title: data.title || f.replace(/\.md$/, ''),
|
|
379
|
+
...(ownership.get(f) ? { workspaceFile: ownership.get(f) } : {}),
|
|
380
|
+
reason: explicitStale ? 'stale_flag' : 'never_enriched',
|
|
381
|
+
...(typeof data.lastEnrichedAt === 'string' ? { lastEnrichedAt: data.lastEnrichedAt } : {}),
|
|
382
|
+
});
|
|
383
|
+
}
|
|
384
|
+
catch { /* skip unreadable */ }
|
|
385
|
+
}
|
|
386
|
+
return out;
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Bulk-read primitive for agents building working sets. Returns enriched
|
|
390
|
+
* fields per doc, filtered by criteria. No bodies, no nodes/graveyard, no
|
|
391
|
+
* pending overlay state.
|
|
392
|
+
*
|
|
393
|
+
* Filters compose with AND semantics — a doc must match every supplied
|
|
394
|
+
* criterion. Empty filter object returns every non-archived doc with its
|
|
395
|
+
* enrichment fields (whatever's present in frontmatter).
|
|
396
|
+
*
|
|
397
|
+
* Optimization: one disk pass, one gray-matter parse per file.
|
|
398
|
+
*/
|
|
399
|
+
export function crawlDocs(filter = {}) {
|
|
400
|
+
ensureDataDir();
|
|
401
|
+
// If a workspace scope is given, prebuild a set of its filenames.
|
|
402
|
+
let scopeFiles = null;
|
|
403
|
+
if (filter.workspaceFile) {
|
|
404
|
+
try {
|
|
405
|
+
const ws = getWorkspace(filter.workspaceFile);
|
|
406
|
+
scopeFiles = new Set(collectAllFiles(ws.root));
|
|
407
|
+
}
|
|
408
|
+
catch {
|
|
409
|
+
return [];
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
const out = [];
|
|
413
|
+
for (const f of readdirSync(getDataDir()).filter((f) => f.endsWith('.md'))) {
|
|
414
|
+
if (scopeFiles && !scopeFiles.has(f))
|
|
415
|
+
continue;
|
|
416
|
+
try {
|
|
417
|
+
const raw = readFileSync(join(getDataDir(), f), 'utf-8');
|
|
418
|
+
const { data, content } = matter(raw);
|
|
419
|
+
if (data.archivedAt)
|
|
420
|
+
continue;
|
|
421
|
+
// Apply filters
|
|
422
|
+
if (filter.domain && data.domain !== filter.domain)
|
|
423
|
+
continue;
|
|
424
|
+
if (filter.docRole && data.docRole !== filter.docRole)
|
|
425
|
+
continue;
|
|
426
|
+
if (filter.hasLogline === true && !data.logline)
|
|
427
|
+
continue;
|
|
428
|
+
if (filter.hasLogline === false && data.logline)
|
|
429
|
+
continue;
|
|
430
|
+
if (filter.tags && filter.tags.length > 0) {
|
|
431
|
+
const docTags = Array.isArray(data.tags) ? data.tags : [];
|
|
432
|
+
if (!filter.tags.every((t) => docTags.includes(t)))
|
|
433
|
+
continue;
|
|
434
|
+
}
|
|
435
|
+
if (filter.concepts && filter.concepts.length > 0) {
|
|
436
|
+
const docConcepts = Array.isArray(data.concepts) ? data.concepts : [];
|
|
437
|
+
if (!filter.concepts.every((c) => docConcepts.includes(c)))
|
|
438
|
+
continue;
|
|
439
|
+
}
|
|
440
|
+
const trimmed = content.trim();
|
|
441
|
+
out.push({
|
|
442
|
+
docId: data.docId || '',
|
|
443
|
+
filename: f,
|
|
444
|
+
title: data.title || f.replace(/\.md$/, ''),
|
|
445
|
+
wordCount: trimmed ? trimmed.split(/\s+/).length : 0,
|
|
446
|
+
...(typeof data.logline === 'string' && data.logline ? { logline: data.logline } : {}),
|
|
447
|
+
...(typeof data.domain === 'string' && data.domain ? { domain: data.domain } : {}),
|
|
448
|
+
...(Array.isArray(data.tags) && data.tags.length > 0 ? { tags: data.tags } : {}),
|
|
449
|
+
...(Array.isArray(data.concepts) && data.concepts.length > 0 ? { concepts: data.concepts } : {}),
|
|
450
|
+
...(typeof data.docRole === 'string' && data.docRole ? { docRole: data.docRole } : {}),
|
|
451
|
+
...(typeof data.status === 'string' && data.status ? { status: data.status } : {}),
|
|
452
|
+
...(data.enrichmentStale === true ? { enrichmentStale: true } : {}),
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
catch { /* skip */ }
|
|
456
|
+
}
|
|
457
|
+
return out;
|
|
458
|
+
}
|
|
241
459
|
export function archiveDocument(filename) {
|
|
242
460
|
ensureDataDir();
|
|
243
461
|
const targetPath = resolveDocPath(filename);
|
|
@@ -381,15 +599,21 @@ export function searchDocuments(query, includeArchived = false) {
|
|
|
381
599
|
return results;
|
|
382
600
|
}
|
|
383
601
|
export function switchDocument(filename) {
|
|
602
|
+
const tStart = performance.now();
|
|
603
|
+
const prevFilename = getActiveFilename();
|
|
384
604
|
// No-op if already on this document — avoids save/reload cycle that can clear editor content
|
|
385
|
-
if (filename ===
|
|
605
|
+
if (filename === prevFilename) {
|
|
606
|
+
diagLog(`[Switch] NOOP ${filename} (${(performance.now() - tStart).toFixed(1)}ms)`);
|
|
386
607
|
return { document: getDocument(), title: getTitle(), filename };
|
|
387
608
|
}
|
|
388
609
|
// Cancel any pending debounced save, then save current doc immediately.
|
|
389
610
|
cancelDebouncedSave();
|
|
611
|
+
const tSaveStart = performance.now();
|
|
390
612
|
save();
|
|
613
|
+
const tSaveEnd = performance.now();
|
|
391
614
|
// Cache current doc before switching (preserves node IDs)
|
|
392
615
|
cacheActiveDocument();
|
|
616
|
+
const tCacheEnd = performance.now();
|
|
393
617
|
// Reset version counter — new document starts a fresh version lineage
|
|
394
618
|
resetDocVersion();
|
|
395
619
|
// Read target from disk — markdownToTiptap rehydrates pending state
|
|
@@ -405,15 +629,22 @@ export function switchDocument(filename) {
|
|
|
405
629
|
const cached = getCachedDocument(targetPath);
|
|
406
630
|
if (cached) {
|
|
407
631
|
setActiveDocument(cached.document, cached.title, targetPath, cached.isTemp, cached.lastModified, cached.metadata, cached.originalFrontmatter);
|
|
632
|
+
const tEnd = performance.now();
|
|
633
|
+
diagLog(`[Switch] ${prevFilename} → ${filename} CACHE-HIT total=${(tEnd - tStart).toFixed(1)}ms save=${(tSaveEnd - tSaveStart).toFixed(1)}ms cache=${(tCacheEnd - tSaveEnd).toFixed(1)}ms setActive=${(tEnd - tCacheEnd).toFixed(1)}ms`);
|
|
408
634
|
return { document: getDocument(), title: getTitle(), filename };
|
|
409
635
|
}
|
|
636
|
+
const tReadStart = performance.now();
|
|
410
637
|
const raw = readFileSync(targetPath, 'utf-8');
|
|
638
|
+
const tReadEnd = performance.now();
|
|
411
639
|
const parsed = markdownToTiptap(raw);
|
|
640
|
+
const tParseEnd = performance.now();
|
|
412
641
|
const mtime = new Date(statSync(targetPath).mtimeMs);
|
|
413
642
|
// Ensure docId exists on loaded doc metadata (lazy migration)
|
|
414
643
|
ensureDocId(parsed.metadata);
|
|
415
644
|
const baseName = targetPath.split(/[/\\]/).pop() || '';
|
|
416
645
|
setActiveDocument(parsed.document, parsed.title, targetPath, baseName.startsWith(TEMP_PREFIX), mtime, parsed.metadata, parsed.rawFrontmatter);
|
|
646
|
+
const tEnd = performance.now();
|
|
647
|
+
diagLog(`[Switch] ${prevFilename} → ${filename} CACHE-MISS total=${(tEnd - tStart).toFixed(1)}ms save=${(tSaveEnd - tSaveStart).toFixed(1)}ms cache=${(tCacheEnd - tSaveEnd).toFixed(1)}ms read=${(tReadEnd - tReadStart).toFixed(1)}ms parse=${(tParseEnd - tReadEnd).toFixed(1)}ms setActive=${(tEnd - tParseEnd).toFixed(1)}ms`);
|
|
417
648
|
return { document: getDocument(), title: getTitle(), filename };
|
|
418
649
|
}
|
|
419
650
|
export function createDocument(title, content, path) {
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Frontmatter enrichment staleness detection.
|
|
3
|
+
*
|
|
4
|
+
* The matcher already splits every block into sentences and hashes each one on
|
|
5
|
+
* every save (see node-fingerprint.ts). We reuse that machinery here — no new
|
|
6
|
+
* algorithm, no new splitter. Save-time staleness is a small tag-on after the
|
|
7
|
+
* matcher: harvest the current sentence-hash set + char count, compare against
|
|
8
|
+
* the at-enrichment baseline stored in frontmatter, set `enrichmentStale: true`
|
|
9
|
+
* when either threshold trips.
|
|
10
|
+
*
|
|
11
|
+
* Volume ratio captures growth and shrinkage symmetrically. Jaccard distance
|
|
12
|
+
* over the sentence-hash set captures rewrites at constant length. Either
|
|
13
|
+
* tripping flags the doc.
|
|
14
|
+
*
|
|
15
|
+
* OpenWriter owns "is this doc stale". The agent clears the flag via
|
|
16
|
+
* mark_enriched (Phase 4). Both sides read the same field, never compute it
|
|
17
|
+
* independently.
|
|
18
|
+
*
|
|
19
|
+
* See brief: 2026-05-18-frontmatter-enrichment-system.
|
|
20
|
+
*/
|
|
21
|
+
import { splitSentences, simpleHash } from './node-fingerprint.js';
|
|
22
|
+
/** Volume-ratio threshold above which a doc is flagged stale by size delta. */
|
|
23
|
+
export const DEFAULT_ENRICHMENT_VOLUME_THRESHOLD = 1.5;
|
|
24
|
+
/** Jaccard-distance threshold above which a doc is flagged stale by drift. */
|
|
25
|
+
export const DEFAULT_ENRICHMENT_DRIFT_THRESHOLD = 0.3;
|
|
26
|
+
/**
|
|
27
|
+
* Flatten every block's per-sentence hashes into one sorted unique set.
|
|
28
|
+
* Sorted so the on-disk representation is stable across saves (no spurious
|
|
29
|
+
* frontmatter diffs from set-order drift). Unique so duplicate sentences
|
|
30
|
+
* in the same doc don't double-count in the Jaccard math.
|
|
31
|
+
*/
|
|
32
|
+
export function harvestSentenceHashes(blocks) {
|
|
33
|
+
const set = new Set();
|
|
34
|
+
for (const block of blocks) {
|
|
35
|
+
const sentences = splitSentences(block.text || '');
|
|
36
|
+
for (const s of sentences) {
|
|
37
|
+
set.add(simpleHash(s.text + s.terminator));
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return Array.from(set).sort();
|
|
41
|
+
}
|
|
42
|
+
/** Total char count across all blocks' text — the volume signal. */
|
|
43
|
+
export function harvestCharCount(blocks) {
|
|
44
|
+
let n = 0;
|
|
45
|
+
for (const b of blocks)
|
|
46
|
+
n += (b.text || '').length;
|
|
47
|
+
return n;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Symmetric size delta. Returns 1 when sizes match, grows toward infinity as
|
|
51
|
+
* they diverge in either direction. Handles zero-size docs safely.
|
|
52
|
+
*/
|
|
53
|
+
export function volumeRatio(current, baseline) {
|
|
54
|
+
if (current === 0 && baseline === 0)
|
|
55
|
+
return 1;
|
|
56
|
+
if (current === 0 || baseline === 0)
|
|
57
|
+
return Infinity;
|
|
58
|
+
return Math.max(current, baseline) / Math.min(current, baseline);
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Jaccard distance over two sentence-hash sets. 0 = identical, 1 = disjoint.
|
|
62
|
+
* (union - intersection) / union. Empty-vs-empty returns 0.
|
|
63
|
+
*/
|
|
64
|
+
export function jaccardDistance(a, b) {
|
|
65
|
+
if (a.length === 0 && b.length === 0)
|
|
66
|
+
return 0;
|
|
67
|
+
const setA = new Set(a);
|
|
68
|
+
const setB = new Set(b);
|
|
69
|
+
let intersection = 0;
|
|
70
|
+
for (const x of setA)
|
|
71
|
+
if (setB.has(x))
|
|
72
|
+
intersection++;
|
|
73
|
+
const union = setA.size + setB.size - intersection;
|
|
74
|
+
return union === 0 ? 0 : (union - intersection) / union;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Compute staleness for a single doc given current matcher-derived signals
|
|
78
|
+
* and the at-enrichment baseline stored in its frontmatter.
|
|
79
|
+
*
|
|
80
|
+
* Returns true when:
|
|
81
|
+
* - the doc has never been enriched (no lastEnrichedAt) — brief: "absent flag = stale"
|
|
82
|
+
* - volumeRatio trips its threshold
|
|
83
|
+
* - Jaccard drift trips its threshold
|
|
84
|
+
*
|
|
85
|
+
* Thresholds: doc-level overrides first, then global defaults. Workspace-level
|
|
86
|
+
* overrides (per the brief) will be layered in when the surfacing handlers
|
|
87
|
+
* (Phase 6) get a workspace pointer — for now the doc carries no workspace
|
|
88
|
+
* reference in writeToDisk's scope.
|
|
89
|
+
*/
|
|
90
|
+
export function isEnrichmentStale(currentSentenceHashes, currentCharCount, metadata, workspaceOverrides) {
|
|
91
|
+
// Never enriched → stale by default. New docs land here.
|
|
92
|
+
if (!metadata.lastEnrichedAt)
|
|
93
|
+
return true;
|
|
94
|
+
const baselineHashes = Array.isArray(metadata.lastEnrichedSentences)
|
|
95
|
+
? metadata.lastEnrichedSentences
|
|
96
|
+
: [];
|
|
97
|
+
const baselineChars = typeof metadata.lastEnrichedCharCount === 'number'
|
|
98
|
+
? metadata.lastEnrichedCharCount
|
|
99
|
+
: 0;
|
|
100
|
+
const volTh = pickThreshold(metadata.enrichmentVolumeThreshold, workspaceOverrides?.volume, DEFAULT_ENRICHMENT_VOLUME_THRESHOLD);
|
|
101
|
+
const driftTh = pickThreshold(metadata.enrichmentDriftThreshold, workspaceOverrides?.drift, DEFAULT_ENRICHMENT_DRIFT_THRESHOLD);
|
|
102
|
+
if (volumeRatio(currentCharCount, baselineChars) >= volTh)
|
|
103
|
+
return true;
|
|
104
|
+
if (jaccardDistance(currentSentenceHashes, baselineHashes) >= driftTh)
|
|
105
|
+
return true;
|
|
106
|
+
return false;
|
|
107
|
+
}
|
|
108
|
+
function pickThreshold(docLevel, wsLevel, fallback) {
|
|
109
|
+
if (typeof docLevel === 'number' && docLevel > 0)
|
|
110
|
+
return docLevel;
|
|
111
|
+
if (typeof wsLevel === 'number' && wsLevel > 0)
|
|
112
|
+
return wsLevel;
|
|
113
|
+
return fallback;
|
|
114
|
+
}
|