workspace-architect 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/assets/collections/ai-prompt-engineering.json +79 -2
  3. package/assets/collections/angular-development.json +14 -2
  4. package/assets/collections/azure-cloud-architect.json +123 -2
  5. package/assets/collections/cpp-development.json +2 -4
  6. package/assets/collections/database-administration.json +8 -3
  7. package/assets/collections/devops-sre.json +24 -2
  8. package/assets/collections/dotnet-development.json +94 -2
  9. package/assets/collections/general-productivity.json +2 -7
  10. package/assets/collections/go-development.json +37 -2
  11. package/assets/collections/java-spring-developer.json +87 -2
  12. package/assets/collections/learning-mentoring.json +4 -5
  13. package/assets/collections/legacy-migration.json +4 -2
  14. package/assets/collections/mcp-specialist.json +53 -2
  15. package/assets/collections/mobile-development.json +10 -2
  16. package/assets/collections/php-cms-development.json +59 -2
  17. package/assets/collections/power-platform-specialist.json +99 -2
  18. package/assets/collections/project-management.json +40 -2
  19. package/assets/collections/python-development.json +55 -2
  20. package/assets/collections/quality-assurance.json +45 -2
  21. package/assets/collections/ruby-development.json +36 -2
  22. package/assets/collections/rust-development.json +55 -2
  23. package/assets/collections/security-specialist.json +59 -2
  24. package/assets/collections/software-architect.json +62 -2
  25. package/assets/collections/technical-writing.json +16 -2
  26. package/assets/collections/web-frontend-development.json +40 -2
  27. package/package.json +2 -1
  28. package/scripts/analyze-collections.js +387 -0
@@ -0,0 +1,387 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { program } from 'commander';
4
+ import fs from 'fs-extra';
5
+ import path from 'path';
6
+ import matter from 'gray-matter';
7
+ import chalk from 'chalk';
8
+ import { fileURLToPath } from 'url';
9
+
10
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
11
+ const ROOT_DIR = path.join(__dirname, '..');
12
+ const ASSETS_DIR = path.join(ROOT_DIR, 'assets');
13
+
14
+ const DIRS = {
15
+ collections: path.join(ASSETS_DIR, 'collections'),
16
+ chatmodes: path.join(ASSETS_DIR, 'chatmodes'),
17
+ instructions: path.join(ASSETS_DIR, 'instructions'),
18
+ prompts: path.join(ASSETS_DIR, 'prompts'),
19
+ };
20
+
21
+ // Expanded stop words
22
+ const STOP_WORDS = new Set([
23
+ 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'this', 'that', 'it', 'as', 'from', 'mode', 'chat', 'prompt', 'instruction', 'file', 'use', 'using', 'create', 'make', 'expert', 'guide', 'help', 'code', 'generate', 'write', 'user', 'system', 'assistant', 'response', 'output', 'input', 'example', 'task', 'context', 'role', 'act', 'like', 'you', 'your', 'my', 'i', 'me', 'we', 'us', 'our', 'can', 'could', 'would', 'should', 'will', 'shall', 'may', 'might', 'must', 'do', 'does', 'did', 'done', 'doing', 'have', 'has', 'had', 'having', 'get', 'gets', 'got', 'getting', 'go', 'goes', 'went', 'gone', 'going', 'say', 'says', 'said', 'saying', 'tell', 'tells', 'told', 'telling', 'ask', 'asks', 'asked', 'asking', 'answer', 'answers', 'answered', 'answering', 'question', 'questions', 'questioning', 'problem', 'problems', 'issue', 'issues', 'solution', 'solutions', 'solve', 'solves', 'solved', 'solving', 'fix', 'fixes', 'fixed', 'fixing', 'bug', 'bugs', 'error', 'errors', 'warning', 'warnings', 'info', 'information', 'data', 'value', 'values', 'variable', 'variables', 'function', 'functions', 'method', 'methods', 'class', 'classes', 'object', 'objects', 'array', 'arrays', 'string', 'strings', 'number', 'numbers', 'boolean', 'booleans', 'true', 'false', 'null', 'undefined', 'nan', 'infinity'
24
+ ]);
25
+
26
+ // Simple Stemmer
27
+ function stem(word) {
28
+ if (word.length < 4) return word;
29
+ if (word.endsWith('ies') && word.length > 4) return word.slice(0, -3) + 'y';
30
+ if (word.endsWith('es') && word.length > 3) return word.slice(0, -2);
31
+ if (word.endsWith('s') && !word.endsWith('ss') && word.length > 2) return word.slice(0, -1);
32
+ if (word.endsWith('ing') && word.length > 4) return word.slice(0, -3);
33
+ if (word.endsWith('ed') && word.length > 3) return word.slice(0, -2);
34
+ if (word.endsWith('ly') && word.length > 4) return word.slice(0, -2);
35
+ return word;
36
+ }
37
+
38
+ function getTokens(text) {
39
+ if (!text) return [];
40
+ // Normalize text: split camelCase, snake_case, kebab-case
41
+ const normalized = text
42
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
43
+ .replace(/[-_.]/g, ' ');
44
+
45
+ return normalized
46
+ .toLowerCase()
47
+ .split(/[^a-z0-9]+/)
48
+ .filter(t => t.length > 2 && !STOP_WORDS.has(t))
49
+ .map(t => stem(t));
50
+ }
51
+
52
+ // Custom TF-IDF Implementation
53
+ class TfIdf {
54
+ constructor() {
55
+ this.documents = []; // Array of { termCounts: Map<Term, Count>, totalTerms: number }
56
+ this.docFrequencies = new Map(); // Term -> DocCount
57
+ }
58
+
59
+ addDocument(tokens) {
60
+ const termCounts = new Map();
61
+ const uniqueTerms = new Set(tokens);
62
+
63
+ for (const token of tokens) {
64
+ termCounts.set(token, (termCounts.get(token) || 0) + 1);
65
+ }
66
+
67
+ this.documents.push({ termCounts, totalTerms: tokens.length });
68
+
69
+ for (const term of uniqueTerms) {
70
+ this.docFrequencies.set(term, (this.docFrequencies.get(term) || 0) + 1);
71
+ }
72
+ }
73
+
74
+ getTfIdfVector(docIndex) {
75
+ const doc = this.documents[docIndex];
76
+ const vector = {};
77
+ const totalDocs = this.documents.length;
78
+
79
+ for (const [term, count] of doc.termCounts) {
80
+ const tf = count / doc.totalTerms;
81
+ const df = this.docFrequencies.get(term) || 0;
82
+ const idf = Math.log(1 + (totalDocs / (1 + df))); // Smooth IDF
83
+ vector[term] = tf * idf;
84
+ }
85
+ return vector;
86
+ }
87
+
88
+ // Helper to create a vector for a new set of tokens (like a query or profile)
89
+ // using the existing IDF stats
90
+ getVectorForTokens(tokens) {
91
+ const termCounts = new Map();
92
+ for (const token of tokens) {
93
+ termCounts.set(token, (termCounts.get(token) || 0) + 1);
94
+ }
95
+
96
+ const vector = {};
97
+ const totalDocs = this.documents.length;
98
+
99
+ for (const [term, count] of termCounts) {
100
+ const tf = count / tokens.length;
101
+ const df = this.docFrequencies.get(term) || 0;
102
+ const idf = Math.log(1 + (totalDocs / (1 + df)));
103
+ vector[term] = tf * idf;
104
+ }
105
+ return vector;
106
+ }
107
+ }
108
+
109
+ function extractBoostedContent(content) {
110
+ if (!content) return '';
111
+
112
+ let boosted = '';
113
+
114
+ // Headers
115
+ const headers = content.match(/^#+\s+(.*)$/gm);
116
+ if (headers) {
117
+ boosted += headers.map(h => h.replace(/^#+\s+/, '')).join(' ') + ' ';
118
+ }
119
+
120
+ // Bold text
121
+ const bold = content.match(/\*\*(.*?)\*\*/g);
122
+ if (bold) {
123
+ boosted += bold.map(b => b.replace(/\*\*/g, '')).join(' ') + ' ';
124
+ }
125
+
126
+ return boosted;
127
+ }
128
+
129
+ async function loadAssets(type) {
130
+ const dir = DIRS[type];
131
+ if (!await fs.pathExists(dir)) return [];
132
+
133
+ const files = await fs.readdir(dir);
134
+ const assets = [];
135
+
136
+ for (const file of files) {
137
+ if (!file.endsWith('.md')) continue;
138
+
139
+ const filePath = path.join(dir, file);
140
+ const content = await fs.readFile(filePath, 'utf8');
141
+ const parsed = matter(content);
142
+ const id = file.replace(/\.(chatmode|instructions|prompt)\.md$/, '');
143
+
144
+ // Boosted content (headers, bold) is repeated to increase term frequency
145
+ const boosted = extractBoostedContent(parsed.content);
146
+ const fullText = `${id} ${parsed.data.title || ''} ${parsed.data.description || ''} ${boosted} ${boosted} ${parsed.content}`;
147
+
148
+ assets.push({
149
+ id,
150
+ type,
151
+ key: `${type}:${id}`,
152
+ filename: file,
153
+ data: parsed.data,
154
+ fullText,
155
+ tokens: getTokens(fullText)
156
+ });
157
+ }
158
+ return assets;
159
+ }
160
+
161
+ async function loadCollections() {
162
+ const dir = DIRS.collections;
163
+ if (!await fs.pathExists(dir)) return [];
164
+
165
+ const files = await fs.readdir(dir);
166
+ const collections = [];
167
+
168
+ for (const file of files) {
169
+ if (!file.endsWith('.json')) continue;
170
+
171
+ const filePath = path.join(dir, file);
172
+ const content = await fs.readJson(filePath);
173
+
174
+ collections.push({
175
+ filename: file,
176
+ filePath,
177
+ data: content,
178
+ // Initial tokens from metadata
179
+ tokens: getTokens(`${file.replace('.json', '')} ${content.description || ''}`)
180
+ });
181
+ }
182
+ return collections;
183
+ }
184
+
185
+ function calculateTfIdfVectors(assets) {
186
+ const tfidf = new TfIdf();
187
+
188
+ assets.forEach(asset => {
189
+ tfidf.addDocument(asset.tokens);
190
+ });
191
+
192
+ // Build vectors for each asset
193
+ const assetVectors = new Map();
194
+
195
+ assets.forEach((asset, index) => {
196
+ const vector = tfidf.getTfIdfVector(index);
197
+ assetVectors.set(asset.key, vector);
198
+ });
199
+
200
+ return { tfidf, assetVectors };
201
+ }
202
+
203
+ function cosineSimilarity(vecA, vecB) {
204
+ const terms = new Set([...Object.keys(vecA), ...Object.keys(vecB)]);
205
+ let dotProduct = 0;
206
+ let magA = 0;
207
+ let magB = 0;
208
+
209
+ for (const term of terms) {
210
+ const valA = vecA[term] || 0;
211
+ const valB = vecB[term] || 0;
212
+ dotProduct += valA * valB;
213
+ magA += valA * valA;
214
+ magB += valB * valB;
215
+ }
216
+
217
+ if (magA === 0 || magB === 0) return 0;
218
+ return dotProduct / (Math.sqrt(magA) * Math.sqrt(magB));
219
+ }
220
+
221
+ function getCollectionProfileVector(collection, allAssetsMap, assetVectors, tfidf) {
222
+ // Start with collection's own metadata tokens
223
+ // We use the global TF-IDF model to get the vector for these tokens
224
+ const baseVector = tfidf.getVectorForTokens(collection.tokens);
225
+
226
+ // Boost metadata importance
227
+ for (const key in baseVector) {
228
+ baseVector[key] *= 2;
229
+ }
230
+
231
+ // Aggregate vectors of existing items
232
+ const existingItems = collection.data.items || [];
233
+ if (existingItems.length === 0) return baseVector;
234
+
235
+ const combinedVector = { ...baseVector };
236
+
237
+ for (const itemKey of existingItems) {
238
+ const assetVector = assetVectors.get(itemKey);
239
+ if (assetVector) {
240
+ for (const [term, score] of Object.entries(assetVector)) {
241
+ combinedVector[term] = (combinedVector[term] || 0) + score;
242
+ }
243
+ }
244
+ }
245
+
246
+ return combinedVector;
247
+ }
248
+
249
+ function subtractVector(vecA, vecB) {
250
+ const result = { ...vecA };
251
+ for (const [term, valB] of Object.entries(vecB)) {
252
+ if (result[term]) {
253
+ result[term] -= valB;
254
+ if (result[term] <= 0.0001) delete result[term]; // Remove if effectively zero
255
+ }
256
+ }
257
+ return result;
258
+ }
259
+
260
+ async function main() {
261
+ program
262
+ .name('analyze-collections')
263
+ .description('Analyze collections and suggest missing assets using TF-IDF and Cosine Similarity')
264
+ .option('-f, --fix', 'Automatically add high-confidence matches to collections')
265
+ .option('-p, --prune', 'Automatically remove low-confidence items from collections')
266
+ .option('-t, --threshold <number>', 'Similarity threshold for adding (0.0 to 1.0)', '0.2')
267
+ .option('--prune-threshold <number>', 'Similarity threshold for pruning (0.0 to 1.0)', '0.05')
268
+ .parse(process.argv);
269
+
270
+ const options = program.opts();
271
+ const threshold = parseFloat(options.threshold);
272
+ const pruneThreshold = parseFloat(options.pruneThreshold);
273
+
274
+ console.log(chalk.blue('Loading assets...'));
275
+
276
+ const [chatmodes, instructions, prompts] = await Promise.all([
277
+ loadAssets('chatmodes'),
278
+ loadAssets('instructions'),
279
+ loadAssets('prompts')
280
+ ]);
281
+
282
+ const allAssets = [...chatmodes, ...instructions, ...prompts];
283
+ const allAssetsMap = new Map(allAssets.map(a => [a.key, a]));
284
+
285
+ console.log(chalk.blue(`Loaded ${allAssets.length} assets.`));
286
+
287
+ const collections = await loadCollections();
288
+ console.log(chalk.blue(`Loaded ${collections.length} collections.`));
289
+
290
+ console.log(chalk.yellow('\nCalculating TF-IDF vectors...'));
291
+ const { tfidf, assetVectors } = calculateTfIdfVectors(allAssets);
292
+
293
+ console.log(chalk.yellow('Analyzing collections...'));
294
+
295
+ for (const collection of collections) {
296
+ const profileVector = getCollectionProfileVector(collection, allAssetsMap, assetVectors, tfidf);
297
+ let isModified = false;
298
+
299
+ console.log(chalk.green(`\nCollection: ${collection.filename}`));
300
+ console.log(chalk.dim(collection.data.description || 'No description'));
301
+
302
+ // --- PRUNING PHASE ---
303
+ const existingItems = collection.data.items || [];
304
+ const outliers = [];
305
+
306
+ for (const itemKey of existingItems) {
307
+ const assetVector = assetVectors.get(itemKey);
308
+
309
+ if (!assetVector) {
310
+ console.log(chalk.red(` [MISSING] ${itemKey} (File not found)`));
311
+ // We don't auto-prune missing files unless explicitly asked, but for now let's just flag them
312
+ continue;
313
+ }
314
+
315
+ // "Leave-One-Out" Validation:
316
+ // Check how well this item fits the collection *without* itself included in the profile.
317
+ // This prevents an item from validating itself.
318
+ const profileMinusItem = subtractVector(profileVector, assetVector);
319
+ const score = cosineSimilarity(profileMinusItem, assetVector);
320
+
321
+ if (score < pruneThreshold) {
322
+ outliers.push({ key: itemKey, score });
323
+ }
324
+ }
325
+
326
+ if (outliers.length > 0) {
327
+ outliers.sort((a, b) => a.score - b.score);
328
+ console.log(chalk.yellow(` Outliers (Low Similarity < ${pruneThreshold}):`));
329
+
330
+ for (const { key, score } of outliers) {
331
+ console.log(` [${score.toFixed(3)}] ${chalk.red(key)}`);
332
+ }
333
+
334
+ if (options.prune) {
335
+ const keysToRemove = new Set(outliers.map(o => o.key));
336
+ collection.data.items = collection.data.items.filter(k => !keysToRemove.has(k));
337
+ console.log(chalk.red(` -> Pruned ${outliers.length} items`));
338
+ isModified = true;
339
+ }
340
+ }
341
+
342
+ // --- SUGGESTION PHASE ---
343
+ const suggestions = [];
344
+
345
+ for (const asset of allAssets) {
346
+ // Skip if already in collection
347
+ if (collection.data.items && collection.data.items.includes(asset.key)) continue;
348
+
349
+ const assetVector = assetVectors.get(asset.key);
350
+ const score = cosineSimilarity(profileVector, assetVector);
351
+
352
+ if (score >= threshold) {
353
+ suggestions.push({ asset, score });
354
+ }
355
+ }
356
+
357
+ if (suggestions.length > 0) {
358
+ suggestions.sort((a, b) => b.score - a.score);
359
+
360
+ const newItems = [];
361
+ const topSuggestions = suggestions.slice(0, 10); // Limit to top 10
362
+
363
+ for (const { asset, score } of topSuggestions) {
364
+ console.log(` [${score.toFixed(3)}] ${chalk.cyan(asset.key)} - ${chalk.dim(asset.data.title || asset.id)}`);
365
+ newItems.push(asset.key);
366
+ }
367
+
368
+ if (options.fix) {
369
+ if (!collection.data.items) collection.data.items = [];
370
+ collection.data.items.push(...newItems);
371
+ collection.data.items = [...new Set(collection.data.items)]; // Dedupe
372
+ console.log(chalk.magenta(` -> Added ${newItems.length} items`));
373
+ isModified = true;
374
+ }
375
+ }
376
+
377
+ // --- SAVE ---
378
+ if (isModified) {
379
+ await fs.writeJson(collection.filePath, collection.data, { spaces: 2 });
380
+ console.log(chalk.blue(` -> Saved changes to ${collection.filename}`));
381
+ }
382
+ }
383
+
384
+ console.log(chalk.blue('\nDone.'));
385
+ }
386
+
387
+ main().catch(console.error);