cursor-history-mcp 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/index.cjs +1360 -8
- package/dist/index.cjs.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1957,7 +1957,7 @@ var require_adm_zip = __commonJS({
|
|
|
1957
1957
|
}
|
|
1958
1958
|
}
|
|
1959
1959
|
const _zip = new ZipFile(inBuffer, opts);
|
|
1960
|
-
const { canonical, sanitize, zipnamefix } = Utils;
|
|
1960
|
+
const { canonical, sanitize: sanitize2, zipnamefix } = Utils;
|
|
1961
1961
|
function getEntry(entry) {
|
|
1962
1962
|
if (entry && _zip) {
|
|
1963
1963
|
var item;
|
|
@@ -2459,7 +2459,7 @@ var require_adm_zip = __commonJS({
|
|
|
2459
2459
|
throw Utils.Errors.NO_ENTRY();
|
|
2460
2460
|
}
|
|
2461
2461
|
var entryName = canonical(item.entryName);
|
|
2462
|
-
var target =
|
|
2462
|
+
var target = sanitize2(targetPath, outFileName && !item.isDirectory ? outFileName : maintainEntryPath ? entryName : pth.basename(entryName));
|
|
2463
2463
|
if (item.isDirectory) {
|
|
2464
2464
|
var children = _zip.getEntryChildren(item);
|
|
2465
2465
|
children.forEach(function(child) {
|
|
@@ -2469,7 +2469,7 @@ var require_adm_zip = __commonJS({
|
|
|
2469
2469
|
throw Utils.Errors.CANT_EXTRACT_FILE();
|
|
2470
2470
|
}
|
|
2471
2471
|
var name = canonical(child.entryName);
|
|
2472
|
-
var childName =
|
|
2472
|
+
var childName = sanitize2(targetPath, maintainEntryPath ? name : pth.basename(name));
|
|
2473
2473
|
const fileAttr2 = keepOriginalPermission ? child.header.fileAttr : void 0;
|
|
2474
2474
|
filetools.writeFileTo(childName, content2, overwrite, fileAttr2);
|
|
2475
2475
|
});
|
|
@@ -2523,7 +2523,7 @@ var require_adm_zip = __commonJS({
|
|
|
2523
2523
|
overwrite = get_Bool(false, overwrite);
|
|
2524
2524
|
if (!_zip) throw Utils.Errors.NO_ZIP();
|
|
2525
2525
|
_zip.entries.forEach(function(entry) {
|
|
2526
|
-
var entryName =
|
|
2526
|
+
var entryName = sanitize2(targetPath, canonical(entry.entryName));
|
|
2527
2527
|
if (entry.isDirectory) {
|
|
2528
2528
|
filetools.makeDir(entryName);
|
|
2529
2529
|
return;
|
|
@@ -2571,7 +2571,7 @@ var require_adm_zip = __commonJS({
|
|
|
2571
2571
|
return;
|
|
2572
2572
|
}
|
|
2573
2573
|
targetPath = pth.resolve(targetPath);
|
|
2574
|
-
const getPath = (entry) =>
|
|
2574
|
+
const getPath = (entry) => sanitize2(targetPath, pth.normalize(canonical(entry.entryName)));
|
|
2575
2575
|
const getError = (msg, file) => new Error(msg + ': "' + file + '"');
|
|
2576
2576
|
const dirEntries = [];
|
|
2577
2577
|
const fileEntries = [];
|
|
@@ -2599,7 +2599,7 @@ var require_adm_zip = __commonJS({
|
|
|
2599
2599
|
next(err);
|
|
2600
2600
|
} else {
|
|
2601
2601
|
const entryName = pth.normalize(canonical(entry.entryName));
|
|
2602
|
-
const filePath =
|
|
2602
|
+
const filePath = sanitize2(targetPath, entryName);
|
|
2603
2603
|
entry.getDataAsync(function(content, err_1) {
|
|
2604
2604
|
if (err_1) {
|
|
2605
2605
|
next(err_1);
|
|
@@ -11973,6 +11973,1356 @@ async function handleMigrate(args) {
|
|
|
11973
11973
|
}
|
|
11974
11974
|
}
|
|
11975
11975
|
|
|
11976
|
+
// src/tools/year-pack/types.ts
|
|
11977
|
+
var YearPackInputSchema = z.object({
|
|
11978
|
+
year: z.number().int().min(1970).max((/* @__PURE__ */ new Date()).getFullYear()).optional().default(() => (/* @__PURE__ */ new Date()).getFullYear()),
|
|
11979
|
+
language: z.enum(["en", "zh"]).optional().default("en"),
|
|
11980
|
+
workspace: z.string().optional(),
|
|
11981
|
+
maxSamples: z.number().int().min(0).max(100).optional().default(30),
|
|
11982
|
+
maxSampleLength: z.number().int().min(50).max(500).optional().default(120),
|
|
11983
|
+
topicsCount: z.number().int().min(3).max(15).optional().default(7)
|
|
11984
|
+
});
|
|
11985
|
+
var SAFETY_GUARANTEES = [
|
|
11986
|
+
"no_executable_content",
|
|
11987
|
+
"no_file_paths",
|
|
11988
|
+
"no_urls",
|
|
11989
|
+
"no_credentials"
|
|
11990
|
+
];
|
|
11991
|
+
var MIN_QUESTIONS_FOR_TOPICS = 50;
|
|
11992
|
+
var DEFAULT_CONFIG = {
|
|
11993
|
+
maxLineChars: 280,
|
|
11994
|
+
maxSamples: 30,
|
|
11995
|
+
maxSampleLength: 120,
|
|
11996
|
+
maxVocab: 3e3,
|
|
11997
|
+
minDf: 5,
|
|
11998
|
+
maxDfRatio: 0.6,
|
|
11999
|
+
kTopics: 7,
|
|
12000
|
+
kmeansIterations: 25,
|
|
12001
|
+
topTermsPerTopic: 5,
|
|
12002
|
+
topUnigrams: 50,
|
|
12003
|
+
topBigrams: 30
|
|
12004
|
+
};
|
|
12005
|
+
|
|
12006
|
+
// src/tools/year-pack/sanitizer.ts
|
|
12007
|
+
var CODE_BLOCK_PATTERN = /```[\s\S]*?```|~~~[\s\S]*?~~~/g;
|
|
12008
|
+
var INLINE_CODE_PATTERN = /`[^`]+`/g;
|
|
12009
|
+
var COMMAND_LINE_PATTERN = /^[\s]*[$>#]\s*.+$|^[\s]*(npm|yarn|pnpm|git|python|pip|node|deno|bun|cargo|go|make|docker|kubectl|helm|terraform|aws|gcloud|az)\s+.+$/gim;
|
|
12010
|
+
var FILE_PATH_PATTERN = /(?:~\/|\.\.?\/|\/(?!\/)|[A-Za-z]:\\)[^\s\n\r<>"|?*]+/g;
|
|
12011
|
+
var URL_PATTERN = /(?:https?|ftp|file):\/\/[^\s<>"\])}]+|www\.[^\s<>"\])}]+/gi;
|
|
12012
|
+
var EMAIL_PATTERN = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
|
|
12013
|
+
var IPV4_PATTERN = /\b(?:\d{1,3}\.){3}\d{1,3}\b/g;
|
|
12014
|
+
var IPV6_PATTERN = /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b|\b(?:[0-9a-fA-F]{1,4}:){1,7}:|::(?:[0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}\b/g;
|
|
12015
|
+
var SECRET_PATTERNS = [
|
|
12016
|
+
// API keys with common prefixes (sk_live_, pk_test_, etc.)
|
|
12017
|
+
/\b(?:sk|pk|api|token|key|secret|password|auth|bearer|access)[-_][a-zA-Z0-9_-]{16,}\b/gi,
|
|
12018
|
+
// Long hex strings (likely hashes or tokens)
|
|
12019
|
+
/\b[0-9a-fA-F]{32,}\b/g,
|
|
12020
|
+
// Long alphanumeric strings that look like tokens
|
|
12021
|
+
/\b[A-Za-z0-9+/=]{40,}\b/g,
|
|
12022
|
+
// AWS-style keys
|
|
12023
|
+
/\bAKIA[0-9A-Z]{16}\b/g,
|
|
12024
|
+
// GitHub tokens
|
|
12025
|
+
/\bghp_[a-zA-Z0-9]{36}\b/g,
|
|
12026
|
+
/\bgho_[a-zA-Z0-9]{36}\b/g,
|
|
12027
|
+
/\bghu_[a-zA-Z0-9]{36}\b/g,
|
|
12028
|
+
/\bghs_[a-zA-Z0-9]{36}\b/g,
|
|
12029
|
+
/\bghr_[a-zA-Z0-9]{36}\b/g,
|
|
12030
|
+
// npm tokens
|
|
12031
|
+
/\bnpm_[a-zA-Z0-9]{36}\b/g
|
|
12032
|
+
];
|
|
12033
|
+
function removeCodeBlocks(text) {
|
|
12034
|
+
return text.replace(CODE_BLOCK_PATTERN, " ").replace(INLINE_CODE_PATTERN, " ");
|
|
12035
|
+
}
|
|
12036
|
+
function removeCommands(text) {
|
|
12037
|
+
return text.replace(COMMAND_LINE_PATTERN, " ");
|
|
12038
|
+
}
|
|
12039
|
+
function maskPaths(text) {
|
|
12040
|
+
return text.replace(FILE_PATH_PATTERN, "[PATH]");
|
|
12041
|
+
}
|
|
12042
|
+
function maskUrls(text) {
|
|
12043
|
+
return text.replace(URL_PATTERN, "[URL]");
|
|
12044
|
+
}
|
|
12045
|
+
function maskEmails(text) {
|
|
12046
|
+
return text.replace(EMAIL_PATTERN, "[EMAIL]");
|
|
12047
|
+
}
|
|
12048
|
+
function maskIps(text) {
|
|
12049
|
+
return text.replace(IPV4_PATTERN, "[IP]").replace(IPV6_PATTERN, "[IP]");
|
|
12050
|
+
}
|
|
12051
|
+
function maskSecrets(text) {
|
|
12052
|
+
let result = text;
|
|
12053
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
12054
|
+
result = result.replace(pattern, "[SECRET]");
|
|
12055
|
+
}
|
|
12056
|
+
return result;
|
|
12057
|
+
}
|
|
12058
|
+
function truncateText(text, maxLength = DEFAULT_CONFIG.maxLineChars) {
|
|
12059
|
+
if (text.length <= maxLength) {
|
|
12060
|
+
return text;
|
|
12061
|
+
}
|
|
12062
|
+
const cutoff = maxLength - 12;
|
|
12063
|
+
return text.slice(0, cutoff) + " [TRUNCATED]";
|
|
12064
|
+
}
|
|
12065
|
+
function normalizeWhitespace(text) {
|
|
12066
|
+
return text.replace(/\s+/g, " ").trim();
|
|
12067
|
+
}
|
|
12068
|
+
var DEFAULT_SANITIZE_OPTIONS = {
|
|
12069
|
+
maxLength: DEFAULT_CONFIG.maxLineChars,
|
|
12070
|
+
removeCode: true,
|
|
12071
|
+
removeCommands: true,
|
|
12072
|
+
maskPaths: true,
|
|
12073
|
+
maskUrls: true,
|
|
12074
|
+
maskEmails: true,
|
|
12075
|
+
maskIps: true,
|
|
12076
|
+
maskSecrets: true,
|
|
12077
|
+
truncate: true
|
|
12078
|
+
};
|
|
12079
|
+
function sanitize(text, options = {}) {
|
|
12080
|
+
const opts = { ...DEFAULT_SANITIZE_OPTIONS, ...options };
|
|
12081
|
+
let result = text;
|
|
12082
|
+
if (opts.removeCode) {
|
|
12083
|
+
result = removeCodeBlocks(result);
|
|
12084
|
+
}
|
|
12085
|
+
if (opts.removeCommands) {
|
|
12086
|
+
result = removeCommands(result);
|
|
12087
|
+
}
|
|
12088
|
+
if (opts.maskUrls) {
|
|
12089
|
+
result = maskUrls(result);
|
|
12090
|
+
}
|
|
12091
|
+
if (opts.maskPaths) {
|
|
12092
|
+
result = maskPaths(result);
|
|
12093
|
+
}
|
|
12094
|
+
if (opts.maskEmails) {
|
|
12095
|
+
result = maskEmails(result);
|
|
12096
|
+
}
|
|
12097
|
+
if (opts.maskIps) {
|
|
12098
|
+
result = maskIps(result);
|
|
12099
|
+
}
|
|
12100
|
+
if (opts.maskSecrets) {
|
|
12101
|
+
result = maskSecrets(result);
|
|
12102
|
+
}
|
|
12103
|
+
result = normalizeWhitespace(result);
|
|
12104
|
+
if (opts.truncate) {
|
|
12105
|
+
result = truncateText(result, opts.maxLength);
|
|
12106
|
+
}
|
|
12107
|
+
return result;
|
|
12108
|
+
}
|
|
12109
|
+
function containsSensitiveContent(text) {
|
|
12110
|
+
if (FILE_PATH_PATTERN.test(text)) return true;
|
|
12111
|
+
if (URL_PATTERN.test(text)) return true;
|
|
12112
|
+
if (EMAIL_PATTERN.test(text)) return true;
|
|
12113
|
+
if (IPV4_PATTERN.test(text)) return true;
|
|
12114
|
+
if (/[{}();]/.test(text) && /[a-z]+\s*[({]/.test(text)) return true;
|
|
12115
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
12116
|
+
if (pattern.test(text)) return true;
|
|
12117
|
+
}
|
|
12118
|
+
return false;
|
|
12119
|
+
}
|
|
12120
|
+
function getAppliedFilters(options = {}) {
|
|
12121
|
+
const opts = { ...DEFAULT_SANITIZE_OPTIONS, ...options };
|
|
12122
|
+
const filters = ["user_messages_only"];
|
|
12123
|
+
if (opts.removeCode) filters.push("code_blocks_removed");
|
|
12124
|
+
if (opts.removeCommands) filters.push("commands_removed");
|
|
12125
|
+
if (opts.maskPaths) filters.push("paths_masked");
|
|
12126
|
+
if (opts.maskUrls) filters.push("urls_masked");
|
|
12127
|
+
if (opts.maskEmails) filters.push("emails_masked");
|
|
12128
|
+
if (opts.maskIps) filters.push("ips_masked");
|
|
12129
|
+
if (opts.maskSecrets) filters.push("secrets_masked");
|
|
12130
|
+
if (opts.truncate) filters.push("truncated_long_text");
|
|
12131
|
+
return filters;
|
|
12132
|
+
}
|
|
12133
|
+
|
|
12134
|
+
// src/tools/year-pack/stopwords.ts
|
|
12135
|
+
var ENGLISH_STOPWORDS = /* @__PURE__ */ new Set([
|
|
12136
|
+
// Articles
|
|
12137
|
+
"a",
|
|
12138
|
+
"an",
|
|
12139
|
+
"the",
|
|
12140
|
+
// Pronouns
|
|
12141
|
+
"i",
|
|
12142
|
+
"me",
|
|
12143
|
+
"my",
|
|
12144
|
+
"myself",
|
|
12145
|
+
"we",
|
|
12146
|
+
"our",
|
|
12147
|
+
"ours",
|
|
12148
|
+
"ourselves",
|
|
12149
|
+
"you",
|
|
12150
|
+
"your",
|
|
12151
|
+
"yours",
|
|
12152
|
+
"yourself",
|
|
12153
|
+
"yourselves",
|
|
12154
|
+
"he",
|
|
12155
|
+
"him",
|
|
12156
|
+
"his",
|
|
12157
|
+
"himself",
|
|
12158
|
+
"she",
|
|
12159
|
+
"her",
|
|
12160
|
+
"hers",
|
|
12161
|
+
"herself",
|
|
12162
|
+
"it",
|
|
12163
|
+
"its",
|
|
12164
|
+
"itself",
|
|
12165
|
+
"they",
|
|
12166
|
+
"them",
|
|
12167
|
+
"their",
|
|
12168
|
+
"theirs",
|
|
12169
|
+
"themselves",
|
|
12170
|
+
"what",
|
|
12171
|
+
"which",
|
|
12172
|
+
"who",
|
|
12173
|
+
"whom",
|
|
12174
|
+
"this",
|
|
12175
|
+
"that",
|
|
12176
|
+
"these",
|
|
12177
|
+
"those",
|
|
12178
|
+
// Verbs (common)
|
|
12179
|
+
"am",
|
|
12180
|
+
"is",
|
|
12181
|
+
"are",
|
|
12182
|
+
"was",
|
|
12183
|
+
"were",
|
|
12184
|
+
"be",
|
|
12185
|
+
"been",
|
|
12186
|
+
"being",
|
|
12187
|
+
"have",
|
|
12188
|
+
"has",
|
|
12189
|
+
"had",
|
|
12190
|
+
"having",
|
|
12191
|
+
"do",
|
|
12192
|
+
"does",
|
|
12193
|
+
"did",
|
|
12194
|
+
"doing",
|
|
12195
|
+
"would",
|
|
12196
|
+
"should",
|
|
12197
|
+
"could",
|
|
12198
|
+
"ought",
|
|
12199
|
+
"might",
|
|
12200
|
+
"must",
|
|
12201
|
+
"shall",
|
|
12202
|
+
"will",
|
|
12203
|
+
"can",
|
|
12204
|
+
// Prepositions
|
|
12205
|
+
"at",
|
|
12206
|
+
"by",
|
|
12207
|
+
"for",
|
|
12208
|
+
"from",
|
|
12209
|
+
"in",
|
|
12210
|
+
"into",
|
|
12211
|
+
"of",
|
|
12212
|
+
"on",
|
|
12213
|
+
"to",
|
|
12214
|
+
"with",
|
|
12215
|
+
"about",
|
|
12216
|
+
"against",
|
|
12217
|
+
"between",
|
|
12218
|
+
"through",
|
|
12219
|
+
"during",
|
|
12220
|
+
"before",
|
|
12221
|
+
"after",
|
|
12222
|
+
"above",
|
|
12223
|
+
"below",
|
|
12224
|
+
"up",
|
|
12225
|
+
"down",
|
|
12226
|
+
"out",
|
|
12227
|
+
"off",
|
|
12228
|
+
"over",
|
|
12229
|
+
"under",
|
|
12230
|
+
"again",
|
|
12231
|
+
"further",
|
|
12232
|
+
// Conjunctions
|
|
12233
|
+
"and",
|
|
12234
|
+
"but",
|
|
12235
|
+
"or",
|
|
12236
|
+
"nor",
|
|
12237
|
+
"so",
|
|
12238
|
+
"yet",
|
|
12239
|
+
"both",
|
|
12240
|
+
"either",
|
|
12241
|
+
"neither",
|
|
12242
|
+
"not",
|
|
12243
|
+
"only",
|
|
12244
|
+
"own",
|
|
12245
|
+
"same",
|
|
12246
|
+
"than",
|
|
12247
|
+
"too",
|
|
12248
|
+
"very",
|
|
12249
|
+
"just",
|
|
12250
|
+
// Other common words
|
|
12251
|
+
"then",
|
|
12252
|
+
"now",
|
|
12253
|
+
"here",
|
|
12254
|
+
"there",
|
|
12255
|
+
"when",
|
|
12256
|
+
"where",
|
|
12257
|
+
"why",
|
|
12258
|
+
"how",
|
|
12259
|
+
"all",
|
|
12260
|
+
"each",
|
|
12261
|
+
"few",
|
|
12262
|
+
"more",
|
|
12263
|
+
"most",
|
|
12264
|
+
"other",
|
|
12265
|
+
"some",
|
|
12266
|
+
"such",
|
|
12267
|
+
"no",
|
|
12268
|
+
"any",
|
|
12269
|
+
"if",
|
|
12270
|
+
"because",
|
|
12271
|
+
"as",
|
|
12272
|
+
"until",
|
|
12273
|
+
"while",
|
|
12274
|
+
"once",
|
|
12275
|
+
// Common tech words that don't add meaning
|
|
12276
|
+
"use",
|
|
12277
|
+
"using",
|
|
12278
|
+
"used",
|
|
12279
|
+
"get",
|
|
12280
|
+
"got",
|
|
12281
|
+
"make",
|
|
12282
|
+
"made",
|
|
12283
|
+
"want",
|
|
12284
|
+
"need",
|
|
12285
|
+
"like",
|
|
12286
|
+
"know",
|
|
12287
|
+
"see",
|
|
12288
|
+
"look",
|
|
12289
|
+
"think",
|
|
12290
|
+
"also",
|
|
12291
|
+
"new",
|
|
12292
|
+
"one",
|
|
12293
|
+
"two",
|
|
12294
|
+
"first",
|
|
12295
|
+
"way",
|
|
12296
|
+
"even",
|
|
12297
|
+
"well",
|
|
12298
|
+
"back",
|
|
12299
|
+
"still",
|
|
12300
|
+
"going",
|
|
12301
|
+
"come",
|
|
12302
|
+
"take",
|
|
12303
|
+
"give",
|
|
12304
|
+
"let",
|
|
12305
|
+
"try",
|
|
12306
|
+
"keep",
|
|
12307
|
+
"put",
|
|
12308
|
+
"say",
|
|
12309
|
+
"said",
|
|
12310
|
+
"tell",
|
|
12311
|
+
"told",
|
|
12312
|
+
"ask",
|
|
12313
|
+
"work",
|
|
12314
|
+
"works",
|
|
12315
|
+
"working",
|
|
12316
|
+
"right",
|
|
12317
|
+
"good",
|
|
12318
|
+
"bad",
|
|
12319
|
+
"best",
|
|
12320
|
+
"better",
|
|
12321
|
+
"worst",
|
|
12322
|
+
"worse",
|
|
12323
|
+
"long",
|
|
12324
|
+
"short",
|
|
12325
|
+
"big",
|
|
12326
|
+
"small",
|
|
12327
|
+
// Placeholders (our sanitization markers)
|
|
12328
|
+
"path",
|
|
12329
|
+
"url",
|
|
12330
|
+
"email",
|
|
12331
|
+
"secret",
|
|
12332
|
+
"truncated"
|
|
12333
|
+
]);
|
|
12334
|
+
var CHINESE_STOPWORDS = /* @__PURE__ */ new Set([
|
|
12335
|
+
// Particles
|
|
12336
|
+
"\u7684",
|
|
12337
|
+
"\u5730",
|
|
12338
|
+
"\u5F97",
|
|
12339
|
+
"\u4E86",
|
|
12340
|
+
"\u7740",
|
|
12341
|
+
"\u8FC7",
|
|
12342
|
+
"\u5417",
|
|
12343
|
+
"\u5462",
|
|
12344
|
+
"\u5427",
|
|
12345
|
+
"\u554A",
|
|
12346
|
+
"\u5440",
|
|
12347
|
+
"\u54E6",
|
|
12348
|
+
"\u54C8",
|
|
12349
|
+
"\u561B",
|
|
12350
|
+
"\u5566",
|
|
12351
|
+
"\u55EF",
|
|
12352
|
+
// Pronouns
|
|
12353
|
+
"\u6211",
|
|
12354
|
+
"\u4F60",
|
|
12355
|
+
"\u4ED6",
|
|
12356
|
+
"\u5979",
|
|
12357
|
+
"\u5B83",
|
|
12358
|
+
"\u6211\u4EEC",
|
|
12359
|
+
"\u4F60\u4EEC",
|
|
12360
|
+
"\u4ED6\u4EEC",
|
|
12361
|
+
"\u5979\u4EEC",
|
|
12362
|
+
"\u5B83\u4EEC",
|
|
12363
|
+
"\u81EA\u5DF1",
|
|
12364
|
+
"\u8FD9",
|
|
12365
|
+
"\u90A3",
|
|
12366
|
+
"\u8FD9\u4E2A",
|
|
12367
|
+
"\u90A3\u4E2A",
|
|
12368
|
+
"\u8FD9\u4E9B",
|
|
12369
|
+
"\u90A3\u4E9B",
|
|
12370
|
+
"\u4EC0\u4E48",
|
|
12371
|
+
"\u54EA",
|
|
12372
|
+
"\u54EA\u4E2A",
|
|
12373
|
+
"\u8C01",
|
|
12374
|
+
"\u600E\u4E48",
|
|
12375
|
+
"\u600E\u6837",
|
|
12376
|
+
"\u5982\u4F55",
|
|
12377
|
+
"\u4E3A\u4EC0\u4E48",
|
|
12378
|
+
"\u54EA\u91CC",
|
|
12379
|
+
"\u54EA\u513F",
|
|
12380
|
+
// Verbs (common auxiliary)
|
|
12381
|
+
"\u662F",
|
|
12382
|
+
"\u6709",
|
|
12383
|
+
"\u5728",
|
|
12384
|
+
"\u88AB",
|
|
12385
|
+
"\u628A",
|
|
12386
|
+
"\u8BA9",
|
|
12387
|
+
"\u7ED9",
|
|
12388
|
+
"\u8DDF",
|
|
12389
|
+
"\u548C",
|
|
12390
|
+
"\u4E0E",
|
|
12391
|
+
"\u5BF9",
|
|
12392
|
+
"\u5411",
|
|
12393
|
+
"\u4ECE",
|
|
12394
|
+
"\u5230",
|
|
12395
|
+
"\u7528",
|
|
12396
|
+
"\u4EE5",
|
|
12397
|
+
"\u4E3A",
|
|
12398
|
+
"\u56E0\u4E3A",
|
|
12399
|
+
"\u6240\u4EE5",
|
|
12400
|
+
"\u4F46\u662F",
|
|
12401
|
+
"\u4F46",
|
|
12402
|
+
"\u800C",
|
|
12403
|
+
"\u6216",
|
|
12404
|
+
"\u6216\u8005",
|
|
12405
|
+
"\u5982\u679C",
|
|
12406
|
+
"\u867D\u7136",
|
|
12407
|
+
"\u867D",
|
|
12408
|
+
"\u5373\u4F7F",
|
|
12409
|
+
"\u5C31",
|
|
12410
|
+
"\u624D",
|
|
12411
|
+
"\u90FD",
|
|
12412
|
+
"\u4E5F",
|
|
12413
|
+
"\u8FD8",
|
|
12414
|
+
"\u53C8",
|
|
12415
|
+
"\u518D",
|
|
12416
|
+
"\u53EA",
|
|
12417
|
+
"\u53EA\u662F",
|
|
12418
|
+
"\u4E0D",
|
|
12419
|
+
"\u6CA1",
|
|
12420
|
+
"\u6CA1\u6709",
|
|
12421
|
+
"\u522B",
|
|
12422
|
+
"\u4E0D\u8981",
|
|
12423
|
+
"\u80FD",
|
|
12424
|
+
"\u4F1A",
|
|
12425
|
+
"\u53EF\u4EE5",
|
|
12426
|
+
"\u53EF\u80FD",
|
|
12427
|
+
"\u5E94\u8BE5",
|
|
12428
|
+
"\u5FC5\u987B",
|
|
12429
|
+
"\u9700\u8981",
|
|
12430
|
+
"\u60F3",
|
|
12431
|
+
"\u8981",
|
|
12432
|
+
// Conjunctions and connectives
|
|
12433
|
+
"\u7136\u540E",
|
|
12434
|
+
"\u63A5\u7740",
|
|
12435
|
+
"\u9996\u5148",
|
|
12436
|
+
"\u5176\u6B21",
|
|
12437
|
+
"\u6700\u540E",
|
|
12438
|
+
"\u53E6\u5916",
|
|
12439
|
+
"\u6B64\u5916",
|
|
12440
|
+
"\u800C\u4E14",
|
|
12441
|
+
"\u5E76\u4E14",
|
|
12442
|
+
"\u4EE5\u53CA",
|
|
12443
|
+
"\u4E0D\u4EC5",
|
|
12444
|
+
"\u4E0D\u4F46",
|
|
12445
|
+
"\u65E2",
|
|
12446
|
+
"\u53C8",
|
|
12447
|
+
// Adverbs
|
|
12448
|
+
"\u5F88",
|
|
12449
|
+
"\u975E\u5E38",
|
|
12450
|
+
"\u592A",
|
|
12451
|
+
"\u66F4",
|
|
12452
|
+
"\u6700",
|
|
12453
|
+
"\u771F",
|
|
12454
|
+
"\u6BD4\u8F83",
|
|
12455
|
+
"\u76F8\u5F53",
|
|
12456
|
+
"\u7279\u522B",
|
|
12457
|
+
"\u5C24\u5176",
|
|
12458
|
+
"\u5DF2\u7ECF",
|
|
12459
|
+
"\u66FE\u7ECF",
|
|
12460
|
+
"\u6B63\u5728",
|
|
12461
|
+
"\u5C06",
|
|
12462
|
+
"\u5C06\u8981",
|
|
12463
|
+
"\u521A",
|
|
12464
|
+
"\u521A\u521A",
|
|
12465
|
+
"\u9A6C\u4E0A",
|
|
12466
|
+
"\u7ACB\u523B",
|
|
12467
|
+
"\u4E00\u76F4",
|
|
12468
|
+
"\u603B\u662F",
|
|
12469
|
+
"\u7ECF\u5E38",
|
|
12470
|
+
"\u5E38\u5E38",
|
|
12471
|
+
"\u5F80\u5F80",
|
|
12472
|
+
"\u5927\u6982",
|
|
12473
|
+
"\u53EF\u80FD",
|
|
12474
|
+
"\u4E5F\u8BB8",
|
|
12475
|
+
"\u5927\u7EA6",
|
|
12476
|
+
// Measure words
|
|
12477
|
+
"\u4E2A",
|
|
12478
|
+
"\u4E9B",
|
|
12479
|
+
"\u79CD",
|
|
12480
|
+
"\u6B21",
|
|
12481
|
+
"\u4EF6",
|
|
12482
|
+
"\u6761",
|
|
12483
|
+
"\u70B9",
|
|
12484
|
+
// Other common words
|
|
12485
|
+
"\u4E00",
|
|
12486
|
+
"\u4E8C",
|
|
12487
|
+
"\u4E24",
|
|
12488
|
+
"\u4E09",
|
|
12489
|
+
"\u7B2C",
|
|
12490
|
+
"\u4E0A",
|
|
12491
|
+
"\u4E0B",
|
|
12492
|
+
"\u524D",
|
|
12493
|
+
"\u540E",
|
|
12494
|
+
"\u91CC",
|
|
12495
|
+
"\u4E2D",
|
|
12496
|
+
"\u5916",
|
|
12497
|
+
"\u5185",
|
|
12498
|
+
"\u5DE6",
|
|
12499
|
+
"\u53F3",
|
|
12500
|
+
"\u591A",
|
|
12501
|
+
"\u5C11",
|
|
12502
|
+
"\u5927",
|
|
12503
|
+
"\u5C0F",
|
|
12504
|
+
"\u597D",
|
|
12505
|
+
"\u574F",
|
|
12506
|
+
"\u65B0",
|
|
12507
|
+
"\u65E7",
|
|
12508
|
+
"\u957F",
|
|
12509
|
+
"\u77ED",
|
|
12510
|
+
"\u9AD8",
|
|
12511
|
+
"\u4F4E",
|
|
12512
|
+
"\u5FEB",
|
|
12513
|
+
"\u6162",
|
|
12514
|
+
"\u65E9",
|
|
12515
|
+
"\u665A",
|
|
12516
|
+
"\u5148",
|
|
12517
|
+
"\u540E",
|
|
12518
|
+
"\u65F6",
|
|
12519
|
+
"\u65F6\u5019",
|
|
12520
|
+
"\u5730\u65B9",
|
|
12521
|
+
"\u4E1C\u897F",
|
|
12522
|
+
"\u4E8B\u60C5",
|
|
12523
|
+
"\u95EE\u9898",
|
|
12524
|
+
"\u60C5\u51B5",
|
|
12525
|
+
"\u65B9\u9762",
|
|
12526
|
+
"\u65B9\u6CD5",
|
|
12527
|
+
"\u65B9\u5F0F"
|
|
12528
|
+
]);
|
|
12529
|
+
function getStopwords(language) {
|
|
12530
|
+
return language === "zh" ? CHINESE_STOPWORDS : ENGLISH_STOPWORDS;
|
|
12531
|
+
}
|
|
12532
|
+
|
|
12533
|
+
// src/tools/year-pack/analyzer.ts
|
|
12534
|
+
function tokenize(text) {
|
|
12535
|
+
const tokens = text.toLowerCase().match(/[a-z][a-z0-9_-]*[a-z0-9]|[a-z]{3,}/g);
|
|
12536
|
+
if (!tokens) return [];
|
|
12537
|
+
return tokens.filter((t) => t.length >= 3);
|
|
12538
|
+
}
|
|
12539
|
+
function tokenizeWithoutStopwords(text, language = "en") {
|
|
12540
|
+
const tokens = tokenize(text);
|
|
12541
|
+
const stopwords = getStopwords(language);
|
|
12542
|
+
return tokens.filter((t) => !stopwords.has(t));
|
|
12543
|
+
}
|
|
12544
|
+
function generateBigrams(tokens) {
|
|
12545
|
+
if (tokens.length < 2) return [];
|
|
12546
|
+
const bigrams = [];
|
|
12547
|
+
for (let i = 0; i < tokens.length - 1; i++) {
|
|
12548
|
+
const token1 = tokens[i];
|
|
12549
|
+
const token2 = tokens[i + 1];
|
|
12550
|
+
if (token1 && token2) {
|
|
12551
|
+
bigrams.push(`${token1} ${token2}`);
|
|
12552
|
+
}
|
|
12553
|
+
}
|
|
12554
|
+
return bigrams;
|
|
12555
|
+
}
|
|
12556
|
+
function countFrequencies(terms) {
|
|
12557
|
+
const counts = /* @__PURE__ */ new Map();
|
|
12558
|
+
for (const term of terms) {
|
|
12559
|
+
counts.set(term, (counts.get(term) ?? 0) + 1);
|
|
12560
|
+
}
|
|
12561
|
+
return counts;
|
|
12562
|
+
}
|
|
12563
|
+
function getTopTerms(frequencies, n) {
|
|
12564
|
+
return Array.from(frequencies.entries()).sort((a, b) => b[1] - a[1]).slice(0, n).map(([term, count]) => ({ term, count }));
|
|
12565
|
+
}
|
|
12566
|
+
function extractKeywords(texts, language = "en", topUnigrams = DEFAULT_CONFIG.topUnigrams, topBigrams = DEFAULT_CONFIG.topBigrams) {
|
|
12567
|
+
const allUnigrams = [];
|
|
12568
|
+
const allBigrams = [];
|
|
12569
|
+
for (const text of texts) {
|
|
12570
|
+
const tokens = tokenizeWithoutStopwords(text, language);
|
|
12571
|
+
allUnigrams.push(...tokens);
|
|
12572
|
+
allBigrams.push(...generateBigrams(tokens));
|
|
12573
|
+
}
|
|
12574
|
+
const unigramFreq = countFrequencies(allUnigrams);
|
|
12575
|
+
const bigramFreq = countFrequencies(allBigrams);
|
|
12576
|
+
return {
|
|
12577
|
+
topUnigrams: getTopTerms(unigramFreq, topUnigrams),
|
|
12578
|
+
topBigrams: getTopTerms(bigramFreq, topBigrams)
|
|
12579
|
+
};
|
|
12580
|
+
}
|
|
12581
|
+
function getWeekNumber(date) {
|
|
12582
|
+
const d = new Date(
|
|
12583
|
+
Date.UTC(date.getFullYear(), date.getMonth(), date.getDate())
|
|
12584
|
+
);
|
|
12585
|
+
const dayNum = d.getUTCDay() || 7;
|
|
12586
|
+
d.setUTCDate(d.getUTCDate() + 4 - dayNum);
|
|
12587
|
+
const yearStart = new Date(Date.UTC(d.getUTCFullYear(), 0, 1));
|
|
12588
|
+
return Math.ceil(((d.getTime() - yearStart.getTime()) / 864e5 + 1) / 7);
|
|
12589
|
+
}
|
|
12590
|
+
function getLengthBucket(length) {
|
|
12591
|
+
if (length <= 100) return "short";
|
|
12592
|
+
if (length <= 280) return "medium";
|
|
12593
|
+
return "long";
|
|
12594
|
+
}
|
|
12595
|
+
function calculateStats(questions) {
|
|
12596
|
+
const monthlyDistribution = {};
|
|
12597
|
+
const lengthBuckets = { short: 0, medium: 0, long: 0 };
|
|
12598
|
+
for (const q of questions) {
|
|
12599
|
+
monthlyDistribution[q.month] = (monthlyDistribution[q.month] ?? 0) + 1;
|
|
12600
|
+
const bucket = getLengthBucket(q.originalLength);
|
|
12601
|
+
lengthBuckets[bucket]++;
|
|
12602
|
+
}
|
|
12603
|
+
const activeMonths = Object.keys(monthlyDistribution).length;
|
|
12604
|
+
return {
|
|
12605
|
+
totalQuestions: questions.length,
|
|
12606
|
+
activeMonths,
|
|
12607
|
+
monthlyDistribution,
|
|
12608
|
+
lengthBuckets
|
|
12609
|
+
};
|
|
12610
|
+
}
|
|
12611
|
+
function processQuestion(content, timestamp, maxLength = DEFAULT_CONFIG.maxLineChars) {
|
|
12612
|
+
const originalLength = content.length;
|
|
12613
|
+
const sanitized = sanitize(content, { maxLength });
|
|
12614
|
+
const month = `${timestamp.getFullYear()}-${String(
|
|
12615
|
+
timestamp.getMonth() + 1
|
|
12616
|
+
).padStart(2, "0")}`;
|
|
12617
|
+
const week = getWeekNumber(timestamp);
|
|
12618
|
+
return {
|
|
12619
|
+
content: sanitized,
|
|
12620
|
+
originalLength,
|
|
12621
|
+
timestamp,
|
|
12622
|
+
month,
|
|
12623
|
+
week
|
|
12624
|
+
};
|
|
12625
|
+
}
|
|
12626
|
+
function isQuestionFormat(text) {
|
|
12627
|
+
if (text.trim().endsWith("?")) return true;
|
|
12628
|
+
const questionWords = [
|
|
12629
|
+
"how",
|
|
12630
|
+
"what",
|
|
12631
|
+
"why",
|
|
12632
|
+
"when",
|
|
12633
|
+
"where",
|
|
12634
|
+
"who",
|
|
12635
|
+
"which",
|
|
12636
|
+
"can",
|
|
12637
|
+
"could",
|
|
12638
|
+
"would",
|
|
12639
|
+
"should",
|
|
12640
|
+
"is",
|
|
12641
|
+
"are",
|
|
12642
|
+
"does",
|
|
12643
|
+
"do",
|
|
12644
|
+
"will"
|
|
12645
|
+
];
|
|
12646
|
+
const firstWord = text.trim().toLowerCase().split(/\s+/)[0];
|
|
12647
|
+
return firstWord ? questionWords.includes(firstWord) : false;
|
|
12648
|
+
}
|
|
12649
|
+
function selectSafeSamples(questions, maxSamples = DEFAULT_CONFIG.maxSamples, maxLength = DEFAULT_CONFIG.maxSampleLength) {
|
|
12650
|
+
const candidates = questions.filter((q) => {
|
|
12651
|
+
if (q.content.length > maxLength) return false;
|
|
12652
|
+
if (containsSensitiveContent(q.content)) return false;
|
|
12653
|
+
if (q.content.length < 20) return false;
|
|
12654
|
+
if (/^\s*\[(?:PATH|URL|EMAIL|IP|SECRET|TRUNCATED)\]\s*$/.test(q.content))
|
|
12655
|
+
return false;
|
|
12656
|
+
return true;
|
|
12657
|
+
}).map((q) => ({
|
|
12658
|
+
content: q.content,
|
|
12659
|
+
isQuestion: isQuestionFormat(q.content),
|
|
12660
|
+
length: q.content.length
|
|
12661
|
+
}));
|
|
12662
|
+
candidates.sort((a, b) => {
|
|
12663
|
+
if (a.isQuestion !== b.isQuestion) {
|
|
12664
|
+
return a.isQuestion ? -1 : 1;
|
|
12665
|
+
}
|
|
12666
|
+
return a.length - b.length;
|
|
12667
|
+
});
|
|
12668
|
+
const selected = [];
|
|
12669
|
+
const seen = /* @__PURE__ */ new Set();
|
|
12670
|
+
for (const candidate of candidates) {
|
|
12671
|
+
const normalized = candidate.content.toLowerCase().trim();
|
|
12672
|
+
if (seen.has(normalized)) continue;
|
|
12673
|
+
selected.push(candidate.content);
|
|
12674
|
+
seen.add(normalized);
|
|
12675
|
+
if (selected.length >= maxSamples) break;
|
|
12676
|
+
}
|
|
12677
|
+
return {
|
|
12678
|
+
questions: selected,
|
|
12679
|
+
maxLength
|
|
12680
|
+
};
|
|
12681
|
+
}
|
|
12682
|
+
|
|
12683
|
+
// src/tools/year-pack/topics.ts
|
|
12684
|
+
function aggregateByWeek(questions, year) {
|
|
12685
|
+
const weekMap = /* @__PURE__ */ new Map();
|
|
12686
|
+
for (const q of questions) {
|
|
12687
|
+
if (q.timestamp.getFullYear() !== year) continue;
|
|
12688
|
+
const existing = weekMap.get(q.week) ?? { content: [], count: 0 };
|
|
12689
|
+
existing.content.push(q.content);
|
|
12690
|
+
existing.count++;
|
|
12691
|
+
weekMap.set(q.week, existing);
|
|
12692
|
+
}
|
|
12693
|
+
const documents = [];
|
|
12694
|
+
for (const [week, data] of weekMap) {
|
|
12695
|
+
let period;
|
|
12696
|
+
if (week <= 17) period = "early";
|
|
12697
|
+
else if (week <= 35) period = "mid";
|
|
12698
|
+
else period = "late";
|
|
12699
|
+
documents.push({
|
|
12700
|
+
week,
|
|
12701
|
+
year,
|
|
12702
|
+
period,
|
|
12703
|
+
content: data.content.join(" "),
|
|
12704
|
+
questionCount: data.count
|
|
12705
|
+
});
|
|
12706
|
+
}
|
|
12707
|
+
return documents.sort((a, b) => a.week - b.week);
|
|
12708
|
+
}
|
|
12709
|
+
function calculateTf(tokens) {
|
|
12710
|
+
const counts = /* @__PURE__ */ new Map();
|
|
12711
|
+
for (const token of tokens) {
|
|
12712
|
+
counts.set(token, (counts.get(token) ?? 0) + 1);
|
|
12713
|
+
}
|
|
12714
|
+
const length = tokens.length;
|
|
12715
|
+
const tf = /* @__PURE__ */ new Map();
|
|
12716
|
+
for (const [term, count] of counts) {
|
|
12717
|
+
tf.set(term, count / length);
|
|
12718
|
+
}
|
|
12719
|
+
return tf;
|
|
12720
|
+
}
|
|
12721
|
+
function calculateDf(documents, minDf, maxDfRatio) {
|
|
12722
|
+
const df = /* @__PURE__ */ new Map();
|
|
12723
|
+
const docCount = documents.length;
|
|
12724
|
+
for (const tokens of documents) {
|
|
12725
|
+
const uniqueTerms = new Set(tokens);
|
|
12726
|
+
for (const term of uniqueTerms) {
|
|
12727
|
+
df.set(term, (df.get(term) ?? 0) + 1);
|
|
12728
|
+
}
|
|
12729
|
+
}
|
|
12730
|
+
const maxDf = Math.floor(docCount * maxDfRatio);
|
|
12731
|
+
const filtered = /* @__PURE__ */ new Map();
|
|
12732
|
+
for (const [term, count] of df) {
|
|
12733
|
+
if (count >= minDf && count <= maxDf) {
|
|
12734
|
+
filtered.set(term, count);
|
|
12735
|
+
}
|
|
12736
|
+
}
|
|
12737
|
+
return filtered;
|
|
12738
|
+
}
|
|
12739
|
+
function calculateTfIdf(documents, language = "en", minDf = DEFAULT_CONFIG.minDf, maxDfRatio = DEFAULT_CONFIG.maxDfRatio) {
|
|
12740
|
+
const tokenizedDocs = documents.map(
|
|
12741
|
+
(doc) => tokenizeWithoutStopwords(doc.content, language)
|
|
12742
|
+
);
|
|
12743
|
+
const df = calculateDf(tokenizedDocs, minDf, maxDfRatio);
|
|
12744
|
+
const vocabulary = Array.from(df.keys());
|
|
12745
|
+
const N = documents.length;
|
|
12746
|
+
const vectors = [];
|
|
12747
|
+
for (let i = 0; i < tokenizedDocs.length; i++) {
|
|
12748
|
+
const tokens = tokenizedDocs[i];
|
|
12749
|
+
if (!tokens) continue;
|
|
12750
|
+
const tf = calculateTf(tokens);
|
|
12751
|
+
const tfidf = /* @__PURE__ */ new Map();
|
|
12752
|
+
for (const term of vocabulary) {
|
|
12753
|
+
const tfVal = tf.get(term) ?? 0;
|
|
12754
|
+
if (tfVal === 0) continue;
|
|
12755
|
+
const dfVal = df.get(term) ?? 1;
|
|
12756
|
+
const idf = Math.log(N / dfVal);
|
|
12757
|
+
tfidf.set(term, tfVal * idf);
|
|
12758
|
+
}
|
|
12759
|
+
vectors.push({ docId: i, terms: tfidf });
|
|
12760
|
+
}
|
|
12761
|
+
return { vectors, vocabulary };
|
|
12762
|
+
}
|
|
12763
|
+
function vectorDistance(v1, v2) {
|
|
12764
|
+
const allTerms = /* @__PURE__ */ new Set([...v1.keys(), ...v2.keys()]);
|
|
12765
|
+
let sum = 0;
|
|
12766
|
+
for (const term of allTerms) {
|
|
12767
|
+
const diff = (v1.get(term) ?? 0) - (v2.get(term) ?? 0);
|
|
12768
|
+
sum += diff * diff;
|
|
12769
|
+
}
|
|
12770
|
+
return Math.sqrt(sum);
|
|
12771
|
+
}
|
|
12772
|
+
function calculateCentroid(vectors, memberIds) {
|
|
12773
|
+
if (memberIds.length === 0) return /* @__PURE__ */ new Map();
|
|
12774
|
+
const sum = /* @__PURE__ */ new Map();
|
|
12775
|
+
for (const id of memberIds) {
|
|
12776
|
+
const vector = vectors.find((v) => v.docId === id);
|
|
12777
|
+
if (!vector) continue;
|
|
12778
|
+
for (const [term, value] of vector.terms) {
|
|
12779
|
+
sum.set(term, (sum.get(term) ?? 0) + value);
|
|
12780
|
+
}
|
|
12781
|
+
}
|
|
12782
|
+
const centroid = /* @__PURE__ */ new Map();
|
|
12783
|
+
for (const [term, value] of sum) {
|
|
12784
|
+
centroid.set(term, value / memberIds.length);
|
|
12785
|
+
}
|
|
12786
|
+
return centroid;
|
|
12787
|
+
}
|
|
12788
|
+
function kMeansPlusPlusInit(vectors, k) {
|
|
12789
|
+
if (vectors.length === 0) return [];
|
|
12790
|
+
if (vectors.length <= k) {
|
|
12791
|
+
return vectors.map((v) => new Map(v.terms));
|
|
12792
|
+
}
|
|
12793
|
+
const centroids = [];
|
|
12794
|
+
const firstIdx = Math.floor(Math.random() * vectors.length);
|
|
12795
|
+
const firstVector = vectors[firstIdx];
|
|
12796
|
+
if (firstVector) {
|
|
12797
|
+
centroids.push(new Map(firstVector.terms));
|
|
12798
|
+
}
|
|
12799
|
+
while (centroids.length < k) {
|
|
12800
|
+
const distances = [];
|
|
12801
|
+
let totalDist = 0;
|
|
12802
|
+
for (const vector of vectors) {
|
|
12803
|
+
let minDist = Infinity;
|
|
12804
|
+
for (const centroid of centroids) {
|
|
12805
|
+
const dist = vectorDistance(vector.terms, centroid);
|
|
12806
|
+
minDist = Math.min(minDist, dist);
|
|
12807
|
+
}
|
|
12808
|
+
distances.push(minDist * minDist);
|
|
12809
|
+
totalDist += minDist * minDist;
|
|
12810
|
+
}
|
|
12811
|
+
let threshold = Math.random() * totalDist;
|
|
12812
|
+
for (let i = 0; i < distances.length; i++) {
|
|
12813
|
+
const dist = distances[i];
|
|
12814
|
+
if (dist === void 0) continue;
|
|
12815
|
+
threshold -= dist;
|
|
12816
|
+
if (threshold <= 0) {
|
|
12817
|
+
const selectedVector = vectors[i];
|
|
12818
|
+
if (selectedVector) {
|
|
12819
|
+
centroids.push(new Map(selectedVector.terms));
|
|
12820
|
+
}
|
|
12821
|
+
break;
|
|
12822
|
+
}
|
|
12823
|
+
}
|
|
12824
|
+
}
|
|
12825
|
+
return centroids;
|
|
12826
|
+
}
|
|
12827
|
+
function assignToClusters(vectors, centroids) {
|
|
12828
|
+
const clusters = centroids.map(() => []);
|
|
12829
|
+
for (const vector of vectors) {
|
|
12830
|
+
let minDist = Infinity;
|
|
12831
|
+
let closestCluster = 0;
|
|
12832
|
+
for (let i = 0; i < centroids.length; i++) {
|
|
12833
|
+
const centroid = centroids[i];
|
|
12834
|
+
if (!centroid) continue;
|
|
12835
|
+
const dist = vectorDistance(vector.terms, centroid);
|
|
12836
|
+
if (dist < minDist) {
|
|
12837
|
+
minDist = dist;
|
|
12838
|
+
closestCluster = i;
|
|
12839
|
+
}
|
|
12840
|
+
}
|
|
12841
|
+
clusters[closestCluster]?.push(vector.docId);
|
|
12842
|
+
}
|
|
12843
|
+
return clusters;
|
|
12844
|
+
}
|
|
12845
|
+
function kMeansClustering(vectors, k, maxIterations = DEFAULT_CONFIG.kmeansIterations) {
|
|
12846
|
+
if (vectors.length === 0) return [];
|
|
12847
|
+
const actualK = Math.min(k, vectors.length);
|
|
12848
|
+
let centroids = kMeansPlusPlusInit(vectors, actualK);
|
|
12849
|
+
let assignments = [];
|
|
12850
|
+
for (let iter = 0; iter < maxIterations; iter++) {
|
|
12851
|
+
assignments = assignToClusters(vectors, centroids);
|
|
12852
|
+
const newCentroids = [];
|
|
12853
|
+
for (let i = 0; i < actualK; i++) {
|
|
12854
|
+
const members = assignments[i] ?? [];
|
|
12855
|
+
if (members.length > 0) {
|
|
12856
|
+
newCentroids.push(calculateCentroid(vectors, members));
|
|
12857
|
+
} else {
|
|
12858
|
+
const oldCentroid = centroids[i];
|
|
12859
|
+
if (oldCentroid) {
|
|
12860
|
+
newCentroids.push(oldCentroid);
|
|
12861
|
+
}
|
|
12862
|
+
}
|
|
12863
|
+
}
|
|
12864
|
+
centroids = newCentroids;
|
|
12865
|
+
}
|
|
12866
|
+
const clusters = [];
|
|
12867
|
+
for (let i = 0; i < actualK; i++) {
|
|
12868
|
+
const members = assignments[i] ?? [];
|
|
12869
|
+
const centroid = centroids[i] ?? /* @__PURE__ */ new Map();
|
|
12870
|
+
const topTerms = Array.from(centroid.entries()).sort((a, b) => b[1] - a[1]).slice(0, DEFAULT_CONFIG.topTermsPerTopic).map(([term]) => term);
|
|
12871
|
+
clusters.push({
|
|
12872
|
+
id: i,
|
|
12873
|
+
centroid,
|
|
12874
|
+
members,
|
|
12875
|
+
topTerms
|
|
12876
|
+
});
|
|
12877
|
+
}
|
|
12878
|
+
return clusters;
|
|
12879
|
+
}
|
|
12880
|
+
function generateTopicName(keywords) {
|
|
12881
|
+
if (keywords.length === 0) return "General";
|
|
12882
|
+
const primary = keywords[0];
|
|
12883
|
+
if (!primary) return "General";
|
|
12884
|
+
const formatted = primary.charAt(0).toUpperCase() + primary.slice(1).replace(/-/g, " ");
|
|
12885
|
+
if (keywords.length > 1 && keywords[1]) {
|
|
12886
|
+
const secondary = keywords[1].replace(/-/g, " ");
|
|
12887
|
+
return `${formatted} & ${secondary}`;
|
|
12888
|
+
}
|
|
12889
|
+
return formatted;
|
|
12890
|
+
}
|
|
12891
|
+
function calculateTopicTrend(cluster, documents) {
|
|
12892
|
+
const periodCounts = { early: 0, mid: 0, late: 0 };
|
|
12893
|
+
let totalQuestions = 0;
|
|
12894
|
+
for (const docId of cluster.members) {
|
|
12895
|
+
const doc = documents[docId];
|
|
12896
|
+
if (!doc) continue;
|
|
12897
|
+
periodCounts[doc.period] += doc.questionCount;
|
|
12898
|
+
totalQuestions += doc.questionCount;
|
|
12899
|
+
}
|
|
12900
|
+
if (totalQuestions === 0) {
|
|
12901
|
+
return { early: 0, mid: 0, late: 0 };
|
|
12902
|
+
}
|
|
12903
|
+
return {
|
|
12904
|
+
early: periodCounts.early / totalQuestions,
|
|
12905
|
+
mid: periodCounts.mid / totalQuestions,
|
|
12906
|
+
late: periodCounts.late / totalQuestions
|
|
12907
|
+
};
|
|
12908
|
+
}
|
|
12909
|
+
function extractTopics(questions, year, language = "en", k = DEFAULT_CONFIG.kTopics) {
|
|
12910
|
+
if (questions.length < MIN_QUESTIONS_FOR_TOPICS) {
|
|
12911
|
+
return [];
|
|
12912
|
+
}
|
|
12913
|
+
const documents = aggregateByWeek(questions, year);
|
|
12914
|
+
if (documents.length < k) {
|
|
12915
|
+
k = Math.max(1, documents.length);
|
|
12916
|
+
}
|
|
12917
|
+
const { vectors } = calculateTfIdf(documents, language);
|
|
12918
|
+
const clusters = kMeansClustering(vectors, k);
|
|
12919
|
+
const totalQuestions = questions.filter(
|
|
12920
|
+
(q) => q.timestamp.getFullYear() === year
|
|
12921
|
+
).length;
|
|
12922
|
+
const topics = [];
|
|
12923
|
+
for (const cluster of clusters) {
|
|
12924
|
+
let clusterQuestions = 0;
|
|
12925
|
+
for (const docId of cluster.members) {
|
|
12926
|
+
const doc = documents[docId];
|
|
12927
|
+
if (doc) clusterQuestions += doc.questionCount;
|
|
12928
|
+
}
|
|
12929
|
+
const share = totalQuestions > 0 ? clusterQuestions / totalQuestions : 0;
|
|
12930
|
+
if (share < 0.02) continue;
|
|
12931
|
+
topics.push({
|
|
12932
|
+
id: cluster.id,
|
|
12933
|
+
name: generateTopicName(cluster.topTerms),
|
|
12934
|
+
share,
|
|
12935
|
+
keywords: cluster.topTerms,
|
|
12936
|
+
trend: calculateTopicTrend(cluster, documents)
|
|
12937
|
+
});
|
|
12938
|
+
}
|
|
12939
|
+
topics.sort((a, b) => b.share - a.share);
|
|
12940
|
+
topics.forEach((t, i) => t.id = i);
|
|
12941
|
+
return topics;
|
|
12942
|
+
}
|
|
12943
|
+
function shouldSkipTopics(questionCount) {
|
|
12944
|
+
return questionCount < MIN_QUESTIONS_FOR_TOPICS;
|
|
12945
|
+
}
|
|
12946
|
+
|
|
12947
|
+
// src/tools/year-pack/prompt.ts
|
|
12948
|
+
var SECTION_ALLOWLISTS = {
|
|
12949
|
+
title: ["meta.year", "topics[0..1].name", "keywords.topUnigrams[0..5]"],
|
|
12950
|
+
poem: ["topics", "keywords.topUnigrams", "keywords.topBigrams"],
|
|
12951
|
+
highlights: [
|
|
12952
|
+
"topics (share + trend)",
|
|
12953
|
+
"keywords.topBigrams[0..2]",
|
|
12954
|
+
"stats.monthlyDistribution"
|
|
12955
|
+
],
|
|
12956
|
+
stats: ["stats.*", "lengthBuckets", "keywords (optional)"],
|
|
12957
|
+
style: ["lengthBuckets", "keywords", "samples (max 2-3, \u2264120 chars)"],
|
|
12958
|
+
awards: ["topics", "keywords", "stats"],
|
|
12959
|
+
archetype: ["topics.trend", "lengthBuckets", "keywords.topBigrams"],
|
|
12960
|
+
timeline: ["stats.monthlyDistribution", "topics.trend"],
|
|
12961
|
+
future: ["topics.trend (ascending)", "keywords"]
|
|
12962
|
+
};
|
|
12963
|
+
function generateEnglishPrompt(yearPack) {
|
|
12964
|
+
const { meta, stats, topics } = yearPack;
|
|
12965
|
+
const yearPackJson = JSON.stringify(yearPack, null, 2);
|
|
12966
|
+
const hasTopics = topics.length > 0;
|
|
12967
|
+
const topTopicNames = topics.slice(0, 3).map((t) => t.name).join(", ");
|
|
12968
|
+
return `# Year in Review Report Generator
|
|
12969
|
+
|
|
12970
|
+
You are generating a personalized, entertainment-focused "Year in Review" report based on the user's AI assistant chat history from ${meta.year}.
|
|
12971
|
+
|
|
12972
|
+
## Important Guidelines
|
|
12973
|
+
|
|
12974
|
+
1. **Tone**: Fun, insightful, celebratory - like Spotify Wrapped but for coding
|
|
12975
|
+
2. **Language**: English
|
|
12976
|
+
3. **Data Source**: Use ONLY the year_pack data provided below
|
|
12977
|
+
4. **Privacy**: The data has been sanitized. Do not speculate about personal details.
|
|
12978
|
+
5. **No Fabrication**: Only mention themes/topics that appear in the data
|
|
12979
|
+
|
|
12980
|
+
## Report Sections
|
|
12981
|
+
|
|
12982
|
+
Generate the following 9 sections in order:
|
|
12983
|
+
|
|
12984
|
+
### 1. Title / Cover
|
|
12985
|
+
Create a catchy title for this year's coding journey.
|
|
12986
|
+
**Allowed data**: ${SECTION_ALLOWLISTS.title.join(", ")}
|
|
12987
|
+
|
|
12988
|
+
### 2. Poem of Reflection
|
|
12989
|
+
Write a short, creative poem (4-8 lines) capturing the year's themes.
|
|
12990
|
+
**Allowed data**: ${SECTION_ALLOWLISTS.poem.join(", ")}
|
|
12991
|
+
**Note**: Do NOT use the samples field for poetry.
|
|
12992
|
+
|
|
12993
|
+
### 3. Three Highlights of the Year
|
|
12994
|
+
Identify 3 standout themes or achievements based on topic trends.
|
|
12995
|
+
**Allowed data**: ${SECTION_ALLOWLISTS.highlights.join(", ")}
|
|
12996
|
+
|
|
12997
|
+
### 4. Chat Stats
|
|
12998
|
+
Present the key statistics in an engaging way.
|
|
12999
|
+
**Allowed data**: ${SECTION_ALLOWLISTS.stats.join(", ")}
|
|
13000
|
+
- Total questions: ${stats.totalQuestions.toLocaleString()}
|
|
13001
|
+
- Active months: ${stats.activeMonths}
|
|
13002
|
+
- Most active period: [calculate from monthlyDistribution]
|
|
13003
|
+
|
|
13004
|
+
### 5. Conversation Style
|
|
13005
|
+
Describe the user's questioning style based on length patterns and keywords.
|
|
13006
|
+
**Allowed data**: ${SECTION_ALLOWLISTS.style.join(", ")}
|
|
13007
|
+
|
|
13008
|
+
### 6. Annual Awards
|
|
13009
|
+
Create 3-5 fun awards based on the data (e.g., "Most Consistent Topic", "Late-Night Coder").
|
|
13010
|
+
**Allowed data**: ${SECTION_ALLOWLISTS.awards.join(", ")}
|
|
13011
|
+
|
|
13012
|
+
### 7. Archetype
|
|
13013
|
+
Assign a developer archetype (e.g., "The System Architect", "The Bug Hunter", "The Feature Builder").
|
|
13014
|
+
**Allowed data**: ${SECTION_ALLOWLISTS.archetype.join(", ")}
|
|
13015
|
+
**Rules**:
|
|
13016
|
+
- Do NOT mention MBTI or personality types
|
|
13017
|
+
- Do NOT infer personal attributes
|
|
13018
|
+
- Base it purely on coding patterns
|
|
13019
|
+
|
|
13020
|
+
### 8. Timeline & Inflection Points
|
|
13021
|
+
Describe how focus shifted across the year (early \u2192 mid \u2192 late).
|
|
13022
|
+
**Allowed data**: ${SECTION_ALLOWLISTS.timeline.join(", ")}
|
|
13023
|
+
${hasTopics ? `Key topics: ${topTopicNames}` : "Note: Topic data unavailable (insufficient data)"}
|
|
13024
|
+
|
|
13025
|
+
### 9. Future Surprise Lines
|
|
13026
|
+
Based on rising trends, suggest what might be next.
|
|
13027
|
+
**Allowed data**: ${SECTION_ALLOWLISTS.future.join(", ")}
|
|
13028
|
+
|
|
13029
|
+
## Year Pack Data
|
|
13030
|
+
|
|
13031
|
+
\`\`\`json
|
|
13032
|
+
${yearPackJson}
|
|
13033
|
+
\`\`\`
|
|
13034
|
+
|
|
13035
|
+
## Output Format
|
|
13036
|
+
|
|
13037
|
+
Generate the report with clear section headers (##) and engaging content. Make it feel personal and celebratory while staying grounded in the data provided.
|
|
13038
|
+
|
|
13039
|
+
Remember: This is entertainment, not analysis. Have fun with it!`;
|
|
13040
|
+
}
|
|
13041
|
+
function generateChinesePrompt(yearPack) {
|
|
13042
|
+
const { meta, stats, topics } = yearPack;
|
|
13043
|
+
const yearPackJson = JSON.stringify(yearPack, null, 2);
|
|
13044
|
+
const hasTopics = topics.length > 0;
|
|
13045
|
+
const topTopicNames = topics.slice(0, 3).map((t) => t.name).join("\u3001");
|
|
13046
|
+
return `# \u5E74\u5EA6\u56DE\u987E\u62A5\u544A\u751F\u6210\u5668
|
|
13047
|
+
|
|
13048
|
+
\u4F60\u6B63\u5728\u6839\u636E\u7528\u6237 ${meta.year} \u5E74\u7684 AI \u52A9\u624B\u804A\u5929\u8BB0\u5F55\uFF0C\u751F\u6210\u4E00\u4EFD\u4E2A\u6027\u5316\u3001\u5A31\u4E50\u6027\u7684"\u5E74\u5EA6\u56DE\u987E"\u62A5\u544A\u3002
|
|
13049
|
+
|
|
13050
|
+
## \u91CD\u8981\u6307\u5357
|
|
13051
|
+
|
|
13052
|
+
1. **\u8BED\u6C14**\uFF1A\u6709\u8DA3\u3001\u6D1E\u5BDF\u529B\u5F3A\u3001\u5E86\u795D\u6027\u7684 - \u7C7B\u4F3C\u4E8E Spotify Wrapped\uFF0C\u4F46\u7528\u4E8E\u7F16\u7A0B
|
|
13053
|
+
2. **\u8BED\u8A00**\uFF1A\u4E2D\u6587
|
|
13054
|
+
3. **\u6570\u636E\u6765\u6E90**\uFF1A\u4EC5\u4F7F\u7528\u4E0B\u65B9\u63D0\u4F9B\u7684 year_pack \u6570\u636E
|
|
13055
|
+
4. **\u9690\u79C1**\uFF1A\u6570\u636E\u5DF2\u7ECF\u8FC7\u8131\u654F\u5904\u7406\u3002\u4E0D\u8981\u63A8\u6D4B\u4E2A\u4EBA\u8BE6\u60C5\u3002
|
|
13056
|
+
5. **\u4E0D\u8981\u865A\u6784**\uFF1A\u53EA\u63D0\u53CA\u6570\u636E\u4E2D\u51FA\u73B0\u7684\u4E3B\u9898
|
|
13057
|
+
|
|
13058
|
+
## \u62A5\u544A\u7AE0\u8282
|
|
13059
|
+
|
|
13060
|
+
\u6309\u987A\u5E8F\u751F\u6210\u4EE5\u4E0B 9 \u4E2A\u7AE0\u8282\uFF1A
|
|
13061
|
+
|
|
13062
|
+
### 1. \u6807\u9898 / \u5C01\u9762
|
|
13063
|
+
\u4E3A\u4ECA\u5E74\u7684\u7F16\u7A0B\u4E4B\u65C5\u521B\u5EFA\u4E00\u4E2A\u5438\u5F15\u4EBA\u7684\u6807\u9898\u3002
|
|
13064
|
+
**\u53EF\u7528\u6570\u636E**: ${SECTION_ALLOWLISTS.title.join(", ")}
|
|
13065
|
+
|
|
13066
|
+
### 2. \u5E74\u5EA6\u8BD7\u7BC7
|
|
13067
|
+
\u5199\u4E00\u9996\u7B80\u77ED\u7684\u521B\u610F\u8BD7\uFF084-8\u884C\uFF09\uFF0C\u6355\u6349\u4ECA\u5E74\u7684\u4E3B\u9898\u3002
|
|
13068
|
+
**\u53EF\u7528\u6570\u636E**: ${SECTION_ALLOWLISTS.poem.join(", ")}
|
|
13069
|
+
**\u6CE8\u610F**\uFF1A\u8BD7\u6B4C\u4E2D\u4E0D\u8981\u4F7F\u7528 samples \u5B57\u6BB5\u3002
|
|
13070
|
+
|
|
13071
|
+
### 3. \u5E74\u5EA6\u4E09\u5927\u4EAE\u70B9
|
|
13072
|
+
\u6839\u636E\u4E3B\u9898\u8D8B\u52BF\uFF0C\u8BC6\u522B 3 \u4E2A\u7A81\u51FA\u7684\u4E3B\u9898\u6216\u6210\u5C31\u3002
|
|
13073
|
+
**\u53EF\u7528\u6570\u636E**: ${SECTION_ALLOWLISTS.highlights.join(", ")}
|
|
13074
|
+
|
|
13075
|
+
### 4. \u804A\u5929\u7EDF\u8BA1
|
|
13076
|
+
\u4EE5\u6709\u8DA3\u7684\u65B9\u5F0F\u5448\u73B0\u5173\u952E\u7EDF\u8BA1\u6570\u636E\u3002
|
|
13077
|
+
**\u53EF\u7528\u6570\u636E**: ${SECTION_ALLOWLISTS.stats.join(", ")}
|
|
13078
|
+
- \u603B\u95EE\u9898\u6570\uFF1A${stats.totalQuestions.toLocaleString()}
|
|
13079
|
+
- \u6D3B\u8DC3\u6708\u4EFD\uFF1A${stats.activeMonths}
|
|
13080
|
+
- \u6700\u6D3B\u8DC3\u65F6\u671F\uFF1A[\u4ECE monthlyDistribution \u8BA1\u7B97]
|
|
13081
|
+
|
|
13082
|
+
### 5. \u5BF9\u8BDD\u98CE\u683C
|
|
13083
|
+
\u6839\u636E\u95EE\u9898\u957F\u5EA6\u6A21\u5F0F\u548C\u5173\u952E\u8BCD\u63CF\u8FF0\u7528\u6237\u7684\u63D0\u95EE\u98CE\u683C\u3002
|
|
13084
|
+
**\u53EF\u7528\u6570\u636E**: ${SECTION_ALLOWLISTS.style.join(", ")}
|
|
13085
|
+
|
|
13086
|
+
### 6. \u5E74\u5EA6\u5956\u9879
|
|
13087
|
+
\u6839\u636E\u6570\u636E\u521B\u5EFA 3-5 \u4E2A\u6709\u8DA3\u7684\u5956\u9879\uFF08\u4F8B\u5982\uFF1A"\u6700\u7A33\u5B9A\u4E3B\u9898\u5956"\u3001"\u6DF1\u591C\u7F16\u7A0B\u5956"\uFF09\u3002
|
|
13088
|
+
**\u53EF\u7528\u6570\u636E**: ${SECTION_ALLOWLISTS.awards.join(", ")}
|
|
13089
|
+
|
|
13090
|
+
### 7. \u5F00\u53D1\u8005\u4EBA\u8BBE
|
|
13091
|
+
\u5206\u914D\u4E00\u4E2A\u5F00\u53D1\u8005\u4EBA\u8BBE\uFF08\u4F8B\u5982\uFF1A"\u7CFB\u7EDF\u67B6\u6784\u5E08"\u3001"Bug \u730E\u624B"\u3001"\u529F\u80FD\u6784\u5EFA\u8005"\uFF09\u3002
|
|
13092
|
+
**\u53EF\u7528\u6570\u636E**: ${SECTION_ALLOWLISTS.archetype.join(", ")}
|
|
13093
|
+
**\u89C4\u5219**\uFF1A
|
|
13094
|
+
- \u4E0D\u8981\u63D0\u53CA MBTI \u6216\u6027\u683C\u7C7B\u578B
|
|
13095
|
+
- \u4E0D\u8981\u63A8\u65AD\u4E2A\u4EBA\u5C5E\u6027
|
|
13096
|
+
- \u7EAF\u7CB9\u57FA\u4E8E\u7F16\u7A0B\u6A21\u5F0F
|
|
13097
|
+
|
|
13098
|
+
### 8. \u65F6\u95F4\u7EBF\u4E0E\u8F6C\u6298\u70B9
|
|
13099
|
+
\u63CF\u8FF0\u5168\u5E74\u7126\u70B9\u7684\u53D8\u5316\uFF08\u5E74\u521D \u2192 \u5E74\u4E2D \u2192 \u5E74\u672B\uFF09\u3002
|
|
13100
|
+
**\u53EF\u7528\u6570\u636E**: ${SECTION_ALLOWLISTS.timeline.join(", ")}
|
|
13101
|
+
${hasTopics ? `\u4E3B\u8981\u4E3B\u9898\uFF1A${topTopicNames}` : "\u6CE8\u610F\uFF1A\u4E3B\u9898\u6570\u636E\u4E0D\u53EF\u7528\uFF08\u6570\u636E\u4E0D\u8DB3\uFF09"}
|
|
13102
|
+
|
|
13103
|
+
### 9. \u672A\u6765\u5C55\u671B
|
|
13104
|
+
\u6839\u636E\u4E0A\u5347\u8D8B\u52BF\uFF0C\u9884\u6D4B\u63A5\u4E0B\u6765\u53EF\u80FD\u7684\u65B9\u5411\u3002
|
|
13105
|
+
**\u53EF\u7528\u6570\u636E**: ${SECTION_ALLOWLISTS.future.join(", ")}
|
|
13106
|
+
|
|
13107
|
+
## Year Pack \u6570\u636E
|
|
13108
|
+
|
|
13109
|
+
\`\`\`json
|
|
13110
|
+
${yearPackJson}
|
|
13111
|
+
\`\`\`
|
|
13112
|
+
|
|
13113
|
+
## \u8F93\u51FA\u683C\u5F0F
|
|
13114
|
+
|
|
13115
|
+
\u751F\u6210\u5E26\u6709\u6E05\u6670\u7AE0\u8282\u6807\u9898\uFF08##\uFF09\u548C\u5438\u5F15\u4EBA\u5185\u5BB9\u7684\u62A5\u544A\u3002\u8BA9\u5B83\u65E2\u4E2A\u6027\u5316\u53C8\u6709\u5E86\u795D\u611F\uFF0C\u540C\u65F6\u4FDD\u6301\u57FA\u4E8E\u63D0\u4F9B\u7684\u6570\u636E\u3002
|
|
13116
|
+
|
|
13117
|
+
\u8BB0\u4F4F\uFF1A\u8FD9\u662F\u5A31\u4E50\u6027\u62A5\u544A\uFF0C\u4E0D\u662F\u5206\u6790\u62A5\u544A\u3002\u5C3D\u60C5\u53D1\u6325\uFF01`;
|
|
13118
|
+
}
|
|
13119
|
+
function generatePromptTemplate(yearPack, language = "en") {
|
|
13120
|
+
return language === "zh" ? generateChinesePrompt(yearPack) : generateEnglishPrompt(yearPack);
|
|
13121
|
+
}
|
|
13122
|
+
|
|
13123
|
+
// src/tools/year-pack/index.ts
|
|
13124
|
+
var yearPackTool = {
|
|
13125
|
+
name: "cursor_history_year_pack",
|
|
13126
|
+
description: "Generate a year-in-review data package from Cursor AI chat history. Produces a sanitized JSON summary with statistics, topics, and keywords, plus a prompt template for LLM-based report generation. Read-only operation.",
|
|
13127
|
+
inputSchema: {
|
|
13128
|
+
type: "object",
|
|
13129
|
+
properties: {
|
|
13130
|
+
year: {
|
|
13131
|
+
type: "integer",
|
|
13132
|
+
description: `Calendar year to analyze (default: current year)`,
|
|
13133
|
+
minimum: 1970,
|
|
13134
|
+
maximum: (/* @__PURE__ */ new Date()).getFullYear()
|
|
13135
|
+
},
|
|
13136
|
+
language: {
|
|
13137
|
+
type: "string",
|
|
13138
|
+
description: "Report language preference (default: en)",
|
|
13139
|
+
enum: ["en", "zh"]
|
|
13140
|
+
},
|
|
13141
|
+
workspace: {
|
|
13142
|
+
type: "string",
|
|
13143
|
+
description: "Optional workspace path to filter sessions"
|
|
13144
|
+
},
|
|
13145
|
+
maxSamples: {
|
|
13146
|
+
type: "integer",
|
|
13147
|
+
description: "Maximum sample questions to include (default: 30)",
|
|
13148
|
+
minimum: 0,
|
|
13149
|
+
maximum: 100
|
|
13150
|
+
},
|
|
13151
|
+
maxSampleLength: {
|
|
13152
|
+
type: "integer",
|
|
13153
|
+
description: "Maximum characters per sample (default: 120)",
|
|
13154
|
+
minimum: 50,
|
|
13155
|
+
maximum: 500
|
|
13156
|
+
},
|
|
13157
|
+
topicsCount: {
|
|
13158
|
+
type: "integer",
|
|
13159
|
+
description: "Number of topics to extract (default: 7)",
|
|
13160
|
+
minimum: 3,
|
|
13161
|
+
maximum: 15
|
|
13162
|
+
}
|
|
13163
|
+
},
|
|
13164
|
+
additionalProperties: false
|
|
13165
|
+
}
|
|
13166
|
+
};
|
|
13167
|
+
async function fetchSessionsForYear(year, workspace) {
|
|
13168
|
+
const yearStart = new Date(year, 0, 1);
|
|
13169
|
+
const yearEnd = new Date(year + 1, 0, 1);
|
|
13170
|
+
const sessions = [];
|
|
13171
|
+
let offset = 0;
|
|
13172
|
+
const limit = 100;
|
|
13173
|
+
let hasMore = true;
|
|
13174
|
+
while (hasMore) {
|
|
13175
|
+
const result = await listSessions2({ limit, offset, workspace });
|
|
13176
|
+
const batch = result.data;
|
|
13177
|
+
for (let i = 0; i < batch.length; i++) {
|
|
13178
|
+
const session = batch[i];
|
|
13179
|
+
if (!session) continue;
|
|
13180
|
+
const sessionDate = new Date(session.timestamp);
|
|
13181
|
+
if (sessionDate >= yearStart && sessionDate < yearEnd) {
|
|
13182
|
+
const sessionIndex = offset + i;
|
|
13183
|
+
const fullSession = await getSession2(sessionIndex);
|
|
13184
|
+
sessions.push(fullSession);
|
|
13185
|
+
}
|
|
13186
|
+
}
|
|
13187
|
+
hasMore = result.pagination.hasMore;
|
|
13188
|
+
offset += limit;
|
|
13189
|
+
if (offset > 1e5) break;
|
|
13190
|
+
}
|
|
13191
|
+
return { sessions, sessionCount: sessions.length };
|
|
13192
|
+
}
|
|
13193
|
+
function extractUserQuestions(sessions, year, maxLength) {
|
|
13194
|
+
const questions = [];
|
|
13195
|
+
for (const session of sessions) {
|
|
13196
|
+
const messages = session.messages ?? [];
|
|
13197
|
+
for (const msg of messages) {
|
|
13198
|
+
if (msg.role !== "user") continue;
|
|
13199
|
+
if (!msg.content || msg.content.trim().length === 0) continue;
|
|
13200
|
+
let timestamp;
|
|
13201
|
+
if (msg.timestamp) {
|
|
13202
|
+
timestamp = new Date(msg.timestamp);
|
|
13203
|
+
} else if (session.timestamp) {
|
|
13204
|
+
timestamp = new Date(session.timestamp);
|
|
13205
|
+
} else {
|
|
13206
|
+
continue;
|
|
13207
|
+
}
|
|
13208
|
+
if (timestamp.getFullYear() !== year) continue;
|
|
13209
|
+
const processed = processQuestion(msg.content, timestamp, maxLength);
|
|
13210
|
+
if (processed.content.trim().length < 10) continue;
|
|
13211
|
+
questions.push(processed);
|
|
13212
|
+
}
|
|
13213
|
+
}
|
|
13214
|
+
return questions;
|
|
13215
|
+
}
|
|
13216
|
+
function buildYearPack(questions, input, sessionCount) {
|
|
13217
|
+
const { year, language, workspace, maxSamples, maxSampleLength, topicsCount } = input;
|
|
13218
|
+
const { totalQuestions, activeMonths, monthlyDistribution, lengthBuckets } = calculateStats(questions);
|
|
13219
|
+
const keywords = extractKeywords(
|
|
13220
|
+
questions.map((q) => q.content),
|
|
13221
|
+
language,
|
|
13222
|
+
DEFAULT_CONFIG.topUnigrams,
|
|
13223
|
+
DEFAULT_CONFIG.topBigrams
|
|
13224
|
+
);
|
|
13225
|
+
const topics = shouldSkipTopics(totalQuestions) ? [] : extractTopics(questions, year, language, topicsCount);
|
|
13226
|
+
const samples = selectSafeSamples(questions, maxSamples, maxSampleLength);
|
|
13227
|
+
const meta = {
|
|
13228
|
+
year,
|
|
13229
|
+
language,
|
|
13230
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
13231
|
+
workspace: workspace ?? null,
|
|
13232
|
+
questionCount: totalQuestions,
|
|
13233
|
+
sessionCount
|
|
13234
|
+
};
|
|
13235
|
+
const safety = {
|
|
13236
|
+
filtersApplied: getAppliedFilters(),
|
|
13237
|
+
guarantees: [...SAFETY_GUARANTEES]
|
|
13238
|
+
};
|
|
13239
|
+
const notes = [
|
|
13240
|
+
"Data is aggregated and sanitized for entertainment purposes.",
|
|
13241
|
+
"Topic trends indicate focus shifts, not exact timelines."
|
|
13242
|
+
];
|
|
13243
|
+
if (shouldSkipTopics(totalQuestions)) {
|
|
13244
|
+
notes.push(
|
|
13245
|
+
`Topic extraction skipped: fewer than 50 questions (${totalQuestions} found).`
|
|
13246
|
+
);
|
|
13247
|
+
}
|
|
13248
|
+
return {
|
|
13249
|
+
meta,
|
|
13250
|
+
stats: {
|
|
13251
|
+
totalQuestions,
|
|
13252
|
+
activeMonths,
|
|
13253
|
+
monthlyDistribution
|
|
13254
|
+
},
|
|
13255
|
+
lengthBuckets,
|
|
13256
|
+
keywords,
|
|
13257
|
+
topics,
|
|
13258
|
+
samples,
|
|
13259
|
+
safety,
|
|
13260
|
+
notes
|
|
13261
|
+
};
|
|
13262
|
+
}
|
|
13263
|
+
async function handleYearPack(args) {
|
|
13264
|
+
const startTime = Date.now();
|
|
13265
|
+
try {
|
|
13266
|
+
const input = YearPackInputSchema.parse(args);
|
|
13267
|
+
const { year, language, workspace } = input;
|
|
13268
|
+
const { sessions, sessionCount } = await fetchSessionsForYear(
|
|
13269
|
+
year,
|
|
13270
|
+
workspace
|
|
13271
|
+
);
|
|
13272
|
+
if (sessionCount === 0) {
|
|
13273
|
+
const workspaceMsg = workspace ? ` in workspace "${workspace}"` : "";
|
|
13274
|
+
return `No chat sessions found for year ${year}${workspaceMsg}. Make sure you have Cursor AI chat history for this period.`;
|
|
13275
|
+
}
|
|
13276
|
+
const questions = extractUserQuestions(
|
|
13277
|
+
sessions,
|
|
13278
|
+
year,
|
|
13279
|
+
DEFAULT_CONFIG.maxLineChars
|
|
13280
|
+
);
|
|
13281
|
+
if (questions.length === 0) {
|
|
13282
|
+
return `Found ${sessionCount} session(s) for year ${year}, but no user questions were extracted. Sessions may only contain assistant responses or tool calls.`;
|
|
13283
|
+
}
|
|
13284
|
+
const yearPack = buildYearPack(questions, input, sessionCount);
|
|
13285
|
+
const promptTemplate = generatePromptTemplate(yearPack, language);
|
|
13286
|
+
const response = {
|
|
13287
|
+
yearPack,
|
|
13288
|
+
promptTemplate
|
|
13289
|
+
};
|
|
13290
|
+
const processingTimeMs = Date.now() - startTime;
|
|
13291
|
+
const processingTimeSec = (processingTimeMs / 1e3).toFixed(2);
|
|
13292
|
+
const output = [
|
|
13293
|
+
`# Year Pack Generated Successfully`,
|
|
13294
|
+
``,
|
|
13295
|
+
`**Year**: ${year}`,
|
|
13296
|
+
`**Language**: ${language}`,
|
|
13297
|
+
`**Sessions Analyzed**: ${sessionCount}`,
|
|
13298
|
+
`**Questions Processed**: ${questions.length}`,
|
|
13299
|
+
`**Topics Extracted**: ${yearPack.topics.length}`,
|
|
13300
|
+
`**Processing Time**: ${processingTimeSec}s`,
|
|
13301
|
+
``,
|
|
13302
|
+
`## Year Pack JSON`,
|
|
13303
|
+
``,
|
|
13304
|
+
"```json",
|
|
13305
|
+
JSON.stringify(response.yearPack, null, 2),
|
|
13306
|
+
"```",
|
|
13307
|
+
``,
|
|
13308
|
+
`## Prompt Template`,
|
|
13309
|
+
``,
|
|
13310
|
+
`The prompt template is included below. Copy it and use it with any LLM to generate your annual report.`,
|
|
13311
|
+
``,
|
|
13312
|
+
"---",
|
|
13313
|
+
``,
|
|
13314
|
+
response.promptTemplate
|
|
13315
|
+
];
|
|
13316
|
+
return output.join("\n");
|
|
13317
|
+
} catch (error) {
|
|
13318
|
+
const mcpError = mapCursorHistoryError(error);
|
|
13319
|
+
if (isMcpError(mcpError)) {
|
|
13320
|
+
throw new Error(mcpError.message);
|
|
13321
|
+
}
|
|
13322
|
+
throw error;
|
|
13323
|
+
}
|
|
13324
|
+
}
|
|
13325
|
+
|
|
11976
13326
|
// src/index.ts
|
|
11977
13327
|
var TOOLS = [
|
|
11978
13328
|
listSessionsTool,
|
|
@@ -11981,7 +13331,8 @@ var TOOLS = [
|
|
|
11981
13331
|
exportTool,
|
|
11982
13332
|
backupTool,
|
|
11983
13333
|
restoreTool,
|
|
11984
|
-
migrateTool
|
|
13334
|
+
migrateTool,
|
|
13335
|
+
yearPackTool
|
|
11985
13336
|
];
|
|
11986
13337
|
var TOOL_HANDLERS = {
|
|
11987
13338
|
cursor_history_list: handleListSessions,
|
|
@@ -11990,7 +13341,8 @@ var TOOL_HANDLERS = {
|
|
|
11990
13341
|
cursor_history_export: handleExport,
|
|
11991
13342
|
cursor_history_backup: handleBackup,
|
|
11992
13343
|
cursor_history_restore: handleRestore,
|
|
11993
|
-
cursor_history_migrate: handleMigrate
|
|
13344
|
+
cursor_history_migrate: handleMigrate,
|
|
13345
|
+
cursor_history_year_pack: handleYearPack
|
|
11994
13346
|
};
|
|
11995
13347
|
function createServer() {
|
|
11996
13348
|
const server = new Server(
|