workspace-architect 1.3.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/assets/collections/ai-prompt-engineering.json +79 -2
- package/assets/collections/angular-development.json +14 -2
- package/assets/collections/azure-cloud-architect.json +123 -2
- package/assets/collections/cpp-development.json +2 -4
- package/assets/collections/database-administration.json +8 -3
- package/assets/collections/devops-sre.json +24 -2
- package/assets/collections/dotnet-development.json +94 -2
- package/assets/collections/general-productivity.json +2 -7
- package/assets/collections/go-development.json +37 -2
- package/assets/collections/java-spring-developer.json +87 -2
- package/assets/collections/learning-mentoring.json +4 -5
- package/assets/collections/legacy-migration.json +4 -2
- package/assets/collections/mcp-specialist.json +53 -2
- package/assets/collections/mobile-development.json +10 -2
- package/assets/collections/php-cms-development.json +59 -2
- package/assets/collections/power-platform-specialist.json +99 -2
- package/assets/collections/project-management.json +40 -2
- package/assets/collections/python-development.json +55 -2
- package/assets/collections/quality-assurance.json +45 -2
- package/assets/collections/ruby-development.json +36 -2
- package/assets/collections/rust-development.json +55 -2
- package/assets/collections/security-specialist.json +59 -2
- package/assets/collections/software-architect.json +62 -2
- package/assets/collections/technical-writing.json +16 -2
- package/assets/collections/web-frontend-development.json +40 -2
- package/package.json +2 -1
- package/scripts/analyze-collections.js +387 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { program } from 'commander';
|
|
4
|
+
import fs from 'fs-extra';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
import matter from 'gray-matter';
|
|
7
|
+
import chalk from 'chalk';
|
|
8
|
+
import { fileURLToPath } from 'url';
|
|
9
|
+
|
|
10
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
11
|
+
const ROOT_DIR = path.join(__dirname, '..');
|
|
12
|
+
const ASSETS_DIR = path.join(ROOT_DIR, 'assets');
|
|
13
|
+
|
|
14
|
+
const DIRS = {
|
|
15
|
+
collections: path.join(ASSETS_DIR, 'collections'),
|
|
16
|
+
chatmodes: path.join(ASSETS_DIR, 'chatmodes'),
|
|
17
|
+
instructions: path.join(ASSETS_DIR, 'instructions'),
|
|
18
|
+
prompts: path.join(ASSETS_DIR, 'prompts'),
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
// Expanded stop words
|
|
22
|
+
const STOP_WORDS = new Set([
|
|
23
|
+
'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'this', 'that', 'it', 'as', 'from', 'mode', 'chat', 'prompt', 'instruction', 'file', 'use', 'using', 'create', 'make', 'expert', 'guide', 'help', 'code', 'generate', 'write', 'user', 'system', 'assistant', 'response', 'output', 'input', 'example', 'task', 'context', 'role', 'act', 'like', 'you', 'your', 'my', 'i', 'me', 'we', 'us', 'our', 'can', 'could', 'would', 'should', 'will', 'shall', 'may', 'might', 'must', 'do', 'does', 'did', 'done', 'doing', 'have', 'has', 'had', 'having', 'get', 'gets', 'got', 'getting', 'go', 'goes', 'went', 'gone', 'going', 'say', 'says', 'said', 'saying', 'tell', 'tells', 'told', 'telling', 'ask', 'asks', 'asked', 'asking', 'answer', 'answers', 'answered', 'answering', 'question', 'questions', 'questioning', 'problem', 'problems', 'issue', 'issues', 'solution', 'solutions', 'solve', 'solves', 'solved', 'solving', 'fix', 'fixes', 'fixed', 'fixing', 'bug', 'bugs', 'error', 'errors', 'warning', 'warnings', 'info', 'information', 'data', 'value', 'values', 'variable', 'variables', 'function', 'functions', 'method', 'methods', 'class', 'classes', 'object', 'objects', 'array', 'arrays', 'string', 'strings', 'number', 'numbers', 'boolean', 'booleans', 'true', 'false', 'null', 'undefined', 'nan', 'infinity'
|
|
24
|
+
]);
|
|
25
|
+
|
|
26
|
+
// Simple Stemmer
|
|
27
|
+
function stem(word) {
|
|
28
|
+
if (word.length < 4) return word;
|
|
29
|
+
if (word.endsWith('ies') && word.length > 4) return word.slice(0, -3) + 'y';
|
|
30
|
+
if (word.endsWith('es') && word.length > 3) return word.slice(0, -2);
|
|
31
|
+
if (word.endsWith('s') && !word.endsWith('ss') && word.length > 2) return word.slice(0, -1);
|
|
32
|
+
if (word.endsWith('ing') && word.length > 4) return word.slice(0, -3);
|
|
33
|
+
if (word.endsWith('ed') && word.length > 3) return word.slice(0, -2);
|
|
34
|
+
if (word.endsWith('ly') && word.length > 4) return word.slice(0, -2);
|
|
35
|
+
return word;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function getTokens(text) {
|
|
39
|
+
if (!text) return [];
|
|
40
|
+
// Normalize text: split camelCase, snake_case, kebab-case
|
|
41
|
+
const normalized = text
|
|
42
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
43
|
+
.replace(/[-_.]/g, ' ');
|
|
44
|
+
|
|
45
|
+
return normalized
|
|
46
|
+
.toLowerCase()
|
|
47
|
+
.split(/[^a-z0-9]+/)
|
|
48
|
+
.filter(t => t.length > 2 && !STOP_WORDS.has(t))
|
|
49
|
+
.map(t => stem(t));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Custom TF-IDF Implementation
|
|
53
|
+
class TfIdf {
|
|
54
|
+
constructor() {
|
|
55
|
+
this.documents = []; // Array of { termCounts: Map<Term, Count>, totalTerms: number }
|
|
56
|
+
this.docFrequencies = new Map(); // Term -> DocCount
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
addDocument(tokens) {
|
|
60
|
+
const termCounts = new Map();
|
|
61
|
+
const uniqueTerms = new Set(tokens);
|
|
62
|
+
|
|
63
|
+
for (const token of tokens) {
|
|
64
|
+
termCounts.set(token, (termCounts.get(token) || 0) + 1);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
this.documents.push({ termCounts, totalTerms: tokens.length });
|
|
68
|
+
|
|
69
|
+
for (const term of uniqueTerms) {
|
|
70
|
+
this.docFrequencies.set(term, (this.docFrequencies.get(term) || 0) + 1);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
getTfIdfVector(docIndex) {
|
|
75
|
+
const doc = this.documents[docIndex];
|
|
76
|
+
const vector = {};
|
|
77
|
+
const totalDocs = this.documents.length;
|
|
78
|
+
|
|
79
|
+
for (const [term, count] of doc.termCounts) {
|
|
80
|
+
const tf = count / doc.totalTerms;
|
|
81
|
+
const df = this.docFrequencies.get(term) || 0;
|
|
82
|
+
const idf = Math.log(1 + (totalDocs / (1 + df))); // Smooth IDF
|
|
83
|
+
vector[term] = tf * idf;
|
|
84
|
+
}
|
|
85
|
+
return vector;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Helper to create a vector for a new set of tokens (like a query or profile)
|
|
89
|
+
// using the existing IDF stats
|
|
90
|
+
getVectorForTokens(tokens) {
|
|
91
|
+
const termCounts = new Map();
|
|
92
|
+
for (const token of tokens) {
|
|
93
|
+
termCounts.set(token, (termCounts.get(token) || 0) + 1);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const vector = {};
|
|
97
|
+
const totalDocs = this.documents.length;
|
|
98
|
+
|
|
99
|
+
for (const [term, count] of termCounts) {
|
|
100
|
+
const tf = count / tokens.length;
|
|
101
|
+
const df = this.docFrequencies.get(term) || 0;
|
|
102
|
+
const idf = Math.log(1 + (totalDocs / (1 + df)));
|
|
103
|
+
vector[term] = tf * idf;
|
|
104
|
+
}
|
|
105
|
+
return vector;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function extractBoostedContent(content) {
|
|
110
|
+
if (!content) return '';
|
|
111
|
+
|
|
112
|
+
let boosted = '';
|
|
113
|
+
|
|
114
|
+
// Headers
|
|
115
|
+
const headers = content.match(/^#+\s+(.*)$/gm);
|
|
116
|
+
if (headers) {
|
|
117
|
+
boosted += headers.map(h => h.replace(/^#+\s+/, '')).join(' ') + ' ';
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Bold text
|
|
121
|
+
const bold = content.match(/\*\*(.*?)\*\*/g);
|
|
122
|
+
if (bold) {
|
|
123
|
+
boosted += bold.map(b => b.replace(/\*\*/g, '')).join(' ') + ' ';
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return boosted;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function loadAssets(type) {
|
|
130
|
+
const dir = DIRS[type];
|
|
131
|
+
if (!await fs.pathExists(dir)) return [];
|
|
132
|
+
|
|
133
|
+
const files = await fs.readdir(dir);
|
|
134
|
+
const assets = [];
|
|
135
|
+
|
|
136
|
+
for (const file of files) {
|
|
137
|
+
if (!file.endsWith('.md')) continue;
|
|
138
|
+
|
|
139
|
+
const filePath = path.join(dir, file);
|
|
140
|
+
const content = await fs.readFile(filePath, 'utf8');
|
|
141
|
+
const parsed = matter(content);
|
|
142
|
+
const id = file.replace(/\.(chatmode|instructions|prompt)\.md$/, '');
|
|
143
|
+
|
|
144
|
+
// Boosted content (headers, bold) is repeated to increase term frequency
|
|
145
|
+
const boosted = extractBoostedContent(parsed.content);
|
|
146
|
+
const fullText = `${id} ${parsed.data.title || ''} ${parsed.data.description || ''} ${boosted} ${boosted} ${parsed.content}`;
|
|
147
|
+
|
|
148
|
+
assets.push({
|
|
149
|
+
id,
|
|
150
|
+
type,
|
|
151
|
+
key: `${type}:${id}`,
|
|
152
|
+
filename: file,
|
|
153
|
+
data: parsed.data,
|
|
154
|
+
fullText,
|
|
155
|
+
tokens: getTokens(fullText)
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
return assets;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
async function loadCollections() {
|
|
162
|
+
const dir = DIRS.collections;
|
|
163
|
+
if (!await fs.pathExists(dir)) return [];
|
|
164
|
+
|
|
165
|
+
const files = await fs.readdir(dir);
|
|
166
|
+
const collections = [];
|
|
167
|
+
|
|
168
|
+
for (const file of files) {
|
|
169
|
+
if (!file.endsWith('.json')) continue;
|
|
170
|
+
|
|
171
|
+
const filePath = path.join(dir, file);
|
|
172
|
+
const content = await fs.readJson(filePath);
|
|
173
|
+
|
|
174
|
+
collections.push({
|
|
175
|
+
filename: file,
|
|
176
|
+
filePath,
|
|
177
|
+
data: content,
|
|
178
|
+
// Initial tokens from metadata
|
|
179
|
+
tokens: getTokens(`${file.replace('.json', '')} ${content.description || ''}`)
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
return collections;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function calculateTfIdfVectors(assets) {
|
|
186
|
+
const tfidf = new TfIdf();
|
|
187
|
+
|
|
188
|
+
assets.forEach(asset => {
|
|
189
|
+
tfidf.addDocument(asset.tokens);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
// Build vectors for each asset
|
|
193
|
+
const assetVectors = new Map();
|
|
194
|
+
|
|
195
|
+
assets.forEach((asset, index) => {
|
|
196
|
+
const vector = tfidf.getTfIdfVector(index);
|
|
197
|
+
assetVectors.set(asset.key, vector);
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
return { tfidf, assetVectors };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function cosineSimilarity(vecA, vecB) {
|
|
204
|
+
const terms = new Set([...Object.keys(vecA), ...Object.keys(vecB)]);
|
|
205
|
+
let dotProduct = 0;
|
|
206
|
+
let magA = 0;
|
|
207
|
+
let magB = 0;
|
|
208
|
+
|
|
209
|
+
for (const term of terms) {
|
|
210
|
+
const valA = vecA[term] || 0;
|
|
211
|
+
const valB = vecB[term] || 0;
|
|
212
|
+
dotProduct += valA * valB;
|
|
213
|
+
magA += valA * valA;
|
|
214
|
+
magB += valB * valB;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (magA === 0 || magB === 0) return 0;
|
|
218
|
+
return dotProduct / (Math.sqrt(magA) * Math.sqrt(magB));
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function getCollectionProfileVector(collection, allAssetsMap, assetVectors, tfidf) {
|
|
222
|
+
// Start with collection's own metadata tokens
|
|
223
|
+
// We use the global TF-IDF model to get the vector for these tokens
|
|
224
|
+
const baseVector = tfidf.getVectorForTokens(collection.tokens);
|
|
225
|
+
|
|
226
|
+
// Boost metadata importance
|
|
227
|
+
for (const key in baseVector) {
|
|
228
|
+
baseVector[key] *= 2;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Aggregate vectors of existing items
|
|
232
|
+
const existingItems = collection.data.items || [];
|
|
233
|
+
if (existingItems.length === 0) return baseVector;
|
|
234
|
+
|
|
235
|
+
const combinedVector = { ...baseVector };
|
|
236
|
+
|
|
237
|
+
for (const itemKey of existingItems) {
|
|
238
|
+
const assetVector = assetVectors.get(itemKey);
|
|
239
|
+
if (assetVector) {
|
|
240
|
+
for (const [term, score] of Object.entries(assetVector)) {
|
|
241
|
+
combinedVector[term] = (combinedVector[term] || 0) + score;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return combinedVector;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function subtractVector(vecA, vecB) {
|
|
250
|
+
const result = { ...vecA };
|
|
251
|
+
for (const [term, valB] of Object.entries(vecB)) {
|
|
252
|
+
if (result[term]) {
|
|
253
|
+
result[term] -= valB;
|
|
254
|
+
if (result[term] <= 0.0001) delete result[term]; // Remove if effectively zero
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
return result;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
async function main() {
|
|
261
|
+
program
|
|
262
|
+
.name('analyze-collections')
|
|
263
|
+
.description('Analyze collections and suggest missing assets using TF-IDF and Cosine Similarity')
|
|
264
|
+
.option('-f, --fix', 'Automatically add high-confidence matches to collections')
|
|
265
|
+
.option('-p, --prune', 'Automatically remove low-confidence items from collections')
|
|
266
|
+
.option('-t, --threshold <number>', 'Similarity threshold for adding (0.0 to 1.0)', '0.2')
|
|
267
|
+
.option('--prune-threshold <number>', 'Similarity threshold for pruning (0.0 to 1.0)', '0.05')
|
|
268
|
+
.parse(process.argv);
|
|
269
|
+
|
|
270
|
+
const options = program.opts();
|
|
271
|
+
const threshold = parseFloat(options.threshold);
|
|
272
|
+
const pruneThreshold = parseFloat(options.pruneThreshold);
|
|
273
|
+
|
|
274
|
+
console.log(chalk.blue('Loading assets...'));
|
|
275
|
+
|
|
276
|
+
const [chatmodes, instructions, prompts] = await Promise.all([
|
|
277
|
+
loadAssets('chatmodes'),
|
|
278
|
+
loadAssets('instructions'),
|
|
279
|
+
loadAssets('prompts')
|
|
280
|
+
]);
|
|
281
|
+
|
|
282
|
+
const allAssets = [...chatmodes, ...instructions, ...prompts];
|
|
283
|
+
const allAssetsMap = new Map(allAssets.map(a => [a.key, a]));
|
|
284
|
+
|
|
285
|
+
console.log(chalk.blue(`Loaded ${allAssets.length} assets.`));
|
|
286
|
+
|
|
287
|
+
const collections = await loadCollections();
|
|
288
|
+
console.log(chalk.blue(`Loaded ${collections.length} collections.`));
|
|
289
|
+
|
|
290
|
+
console.log(chalk.yellow('\nCalculating TF-IDF vectors...'));
|
|
291
|
+
const { tfidf, assetVectors } = calculateTfIdfVectors(allAssets);
|
|
292
|
+
|
|
293
|
+
console.log(chalk.yellow('Analyzing collections...'));
|
|
294
|
+
|
|
295
|
+
for (const collection of collections) {
|
|
296
|
+
const profileVector = getCollectionProfileVector(collection, allAssetsMap, assetVectors, tfidf);
|
|
297
|
+
let isModified = false;
|
|
298
|
+
|
|
299
|
+
console.log(chalk.green(`\nCollection: ${collection.filename}`));
|
|
300
|
+
console.log(chalk.dim(collection.data.description || 'No description'));
|
|
301
|
+
|
|
302
|
+
// --- PRUNING PHASE ---
|
|
303
|
+
const existingItems = collection.data.items || [];
|
|
304
|
+
const outliers = [];
|
|
305
|
+
|
|
306
|
+
for (const itemKey of existingItems) {
|
|
307
|
+
const assetVector = assetVectors.get(itemKey);
|
|
308
|
+
|
|
309
|
+
if (!assetVector) {
|
|
310
|
+
console.log(chalk.red(` [MISSING] ${itemKey} (File not found)`));
|
|
311
|
+
// We don't auto-prune missing files unless explicitly asked, but for now let's just flag them
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// "Leave-One-Out" Validation:
|
|
316
|
+
// Check how well this item fits the collection *without* itself included in the profile.
|
|
317
|
+
// This prevents an item from validating itself.
|
|
318
|
+
const profileMinusItem = subtractVector(profileVector, assetVector);
|
|
319
|
+
const score = cosineSimilarity(profileMinusItem, assetVector);
|
|
320
|
+
|
|
321
|
+
if (score < pruneThreshold) {
|
|
322
|
+
outliers.push({ key: itemKey, score });
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (outliers.length > 0) {
|
|
327
|
+
outliers.sort((a, b) => a.score - b.score);
|
|
328
|
+
console.log(chalk.yellow(` Outliers (Low Similarity < ${pruneThreshold}):`));
|
|
329
|
+
|
|
330
|
+
for (const { key, score } of outliers) {
|
|
331
|
+
console.log(` [${score.toFixed(3)}] ${chalk.red(key)}`);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if (options.prune) {
|
|
335
|
+
const keysToRemove = new Set(outliers.map(o => o.key));
|
|
336
|
+
collection.data.items = collection.data.items.filter(k => !keysToRemove.has(k));
|
|
337
|
+
console.log(chalk.red(` -> Pruned ${outliers.length} items`));
|
|
338
|
+
isModified = true;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// --- SUGGESTION PHASE ---
|
|
343
|
+
const suggestions = [];
|
|
344
|
+
|
|
345
|
+
for (const asset of allAssets) {
|
|
346
|
+
// Skip if already in collection
|
|
347
|
+
if (collection.data.items && collection.data.items.includes(asset.key)) continue;
|
|
348
|
+
|
|
349
|
+
const assetVector = assetVectors.get(asset.key);
|
|
350
|
+
const score = cosineSimilarity(profileVector, assetVector);
|
|
351
|
+
|
|
352
|
+
if (score >= threshold) {
|
|
353
|
+
suggestions.push({ asset, score });
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if (suggestions.length > 0) {
|
|
358
|
+
suggestions.sort((a, b) => b.score - a.score);
|
|
359
|
+
|
|
360
|
+
const newItems = [];
|
|
361
|
+
const topSuggestions = suggestions.slice(0, 10); // Limit to top 10
|
|
362
|
+
|
|
363
|
+
for (const { asset, score } of topSuggestions) {
|
|
364
|
+
console.log(` [${score.toFixed(3)}] ${chalk.cyan(asset.key)} - ${chalk.dim(asset.data.title || asset.id)}`);
|
|
365
|
+
newItems.push(asset.key);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
if (options.fix) {
|
|
369
|
+
if (!collection.data.items) collection.data.items = [];
|
|
370
|
+
collection.data.items.push(...newItems);
|
|
371
|
+
collection.data.items = [...new Set(collection.data.items)]; // Dedupe
|
|
372
|
+
console.log(chalk.magenta(` -> Added ${newItems.length} items`));
|
|
373
|
+
isModified = true;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// --- SAVE ---
|
|
378
|
+
if (isModified) {
|
|
379
|
+
await fs.writeJson(collection.filePath, collection.data, { spaces: 2 });
|
|
380
|
+
console.log(chalk.blue(` -> Saved changes to ${collection.filename}`));
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
console.log(chalk.blue('\nDone.'));
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
main().catch(console.error);
|