@crowdlisten/harness 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +167 -0
- package/LICENSE +21 -0
- package/README.md +153 -0
- package/dist/agent-proxy.d.ts +24 -0
- package/dist/agent-proxy.js +140 -0
- package/dist/agent-tools.d.ts +736 -0
- package/dist/agent-tools.js +409 -0
- package/dist/context/api.d.ts +5 -0
- package/dist/context/api.js +164 -0
- package/dist/context/cli.d.ts +19 -0
- package/dist/context/cli.js +108 -0
- package/dist/context/extractor.d.ts +12 -0
- package/dist/context/extractor.js +43 -0
- package/dist/context/index.d.ts +12 -0
- package/dist/context/index.js +11 -0
- package/dist/context/matcher.d.ts +39 -0
- package/dist/context/matcher.js +246 -0
- package/dist/context/parser.d.ts +28 -0
- package/dist/context/parser.js +157 -0
- package/dist/context/pipeline.d.ts +26 -0
- package/dist/context/pipeline.js +56 -0
- package/dist/context/prompts.d.ts +6 -0
- package/dist/context/prompts.js +60 -0
- package/dist/context/providers.d.ts +6 -0
- package/dist/context/providers.js +106 -0
- package/dist/context/redactor.d.ts +10 -0
- package/dist/context/redactor.js +68 -0
- package/dist/context/server.d.ts +5 -0
- package/dist/context/server.js +134 -0
- package/dist/context/store.d.ts +12 -0
- package/dist/context/store.js +82 -0
- package/dist/context/types.d.ts +79 -0
- package/dist/context/types.js +4 -0
- package/dist/context/user-state.d.ts +40 -0
- package/dist/context/user-state.js +144 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.js +385 -0
- package/dist/insights/browser/BrowserPool.d.ts +87 -0
- package/dist/insights/browser/BrowserPool.js +266 -0
- package/dist/insights/browser/RequestInterceptor.d.ts +46 -0
- package/dist/insights/browser/RequestInterceptor.js +115 -0
- package/dist/insights/cli.d.ts +8 -0
- package/dist/insights/cli.js +206 -0
- package/dist/insights/core/base/BaseAdapter.d.ts +37 -0
- package/dist/insights/core/base/BaseAdapter.js +123 -0
- package/dist/insights/core/health/HealthMonitor.d.ts +75 -0
- package/dist/insights/core/health/HealthMonitor.js +171 -0
- package/dist/insights/core/interfaces/SocialMediaPlatform.d.ts +125 -0
- package/dist/insights/core/interfaces/SocialMediaPlatform.js +42 -0
- package/dist/insights/core/utils/DataNormalizer.d.ts +53 -0
- package/dist/insights/core/utils/DataNormalizer.js +349 -0
- package/dist/insights/core/utils/InstagramUrlUtils.d.ts +11 -0
- package/dist/insights/core/utils/InstagramUrlUtils.js +60 -0
- package/dist/insights/core/utils/TikTokUrlUtils.d.ts +10 -0
- package/dist/insights/core/utils/TikTokUrlUtils.js +57 -0
- package/dist/insights/handlers.d.ts +157 -0
- package/dist/insights/handlers.js +246 -0
- package/dist/insights/index.d.ts +437 -0
- package/dist/insights/index.js +426 -0
- package/dist/insights/platforms/instagram/InstagramAdapter.d.ts +34 -0
- package/dist/insights/platforms/instagram/InstagramAdapter.js +342 -0
- package/dist/insights/platforms/moltbook/MoltbookAdapter.d.ts +31 -0
- package/dist/insights/platforms/moltbook/MoltbookAdapter.js +227 -0
- package/dist/insights/platforms/reddit/RedditAdapter.d.ts +21 -0
- package/dist/insights/platforms/reddit/RedditAdapter.js +212 -0
- package/dist/insights/platforms/tiktok/TikTokAdapter.d.ts +34 -0
- package/dist/insights/platforms/tiktok/TikTokAdapter.js +269 -0
- package/dist/insights/platforms/twitter/TwitterAdapter.d.ts +23 -0
- package/dist/insights/platforms/twitter/TwitterAdapter.js +211 -0
- package/dist/insights/platforms/xiaohongshu/XiaohongshuAdapter.d.ts +35 -0
- package/dist/insights/platforms/xiaohongshu/XiaohongshuAdapter.js +258 -0
- package/dist/insights/platforms/youtube/YouTubeAdapter.d.ts +22 -0
- package/dist/insights/platforms/youtube/YouTubeAdapter.js +254 -0
- package/dist/insights/service-config.d.ts +7 -0
- package/dist/insights/service-config.js +60 -0
- package/dist/insights/services/UnifiedSocialMediaService.d.ts +94 -0
- package/dist/insights/services/UnifiedSocialMediaService.js +259 -0
- package/dist/insights/vision/VisionExtractor.d.ts +46 -0
- package/dist/insights/vision/VisionExtractor.js +236 -0
- package/dist/learnings.d.ts +50 -0
- package/dist/learnings.js +130 -0
- package/dist/openapi.d.ts +29 -0
- package/dist/openapi.js +169 -0
- package/dist/server-factory.d.ts +20 -0
- package/dist/server-factory.js +41 -0
- package/dist/suggestions.d.ts +16 -0
- package/dist/suggestions.js +72 -0
- package/dist/telemetry.d.ts +44 -0
- package/dist/telemetry.js +93 -0
- package/dist/tools/registry.d.ts +65 -0
- package/dist/tools/registry.js +256 -0
- package/dist/tools.d.ts +2433 -0
- package/dist/tools.js +2294 -0
- package/dist/transport/http.d.ts +15 -0
- package/dist/transport/http.js +154 -0
- package/package.json +76 -0
- package/skills/catalog.json +272 -0
- package/skills/community-catalog.json +4202 -0
- package/skills/competitive-analysis/SKILL.md +174 -0
- package/skills/content-creator/SKILL.md +256 -0
- package/skills/content-strategy/SKILL.md +222 -0
- package/skills/data-storytelling/SKILL.md +248 -0
- package/skills/heuristic-evaluation/SKILL.md +201 -0
- package/skills/market-research-reports/SKILL.md +184 -0
- package/skills/user-stories/SKILL.md +178 -0
- package/skills/ux-researcher/SKILL.md +239 -0
- package/web-dist/assets/index-B1b25lNd.css +1 -0
- package/web-dist/assets/index-CDWHwHbl.js +64 -0
- package/web-dist/index.html +16 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI subcommands for context extraction.
|
|
3
|
+
*
|
|
4
|
+
* npx @crowdlisten/harness context # Launch web UI
|
|
5
|
+
* npx @crowdlisten/harness context <file> # CLI-only processing
|
|
6
|
+
* npx @crowdlisten/harness setup # Configure LLM provider (handled in index.ts)
|
|
7
|
+
*/
|
|
8
|
+
import { runPipeline } from "./pipeline.js";
|
|
9
|
+
import { loadConfig, saveConfig } from "./store.js";
|
|
10
|
+
import { startContextServer } from "./server.js";
|
|
11
|
+
import * as readline from "readline";
|
|
12
|
+
function prompt(question) {
|
|
13
|
+
const rl = readline.createInterface({
|
|
14
|
+
input: process.stdin,
|
|
15
|
+
output: process.stderr,
|
|
16
|
+
});
|
|
17
|
+
return new Promise((resolve) => {
|
|
18
|
+
rl.question(question, (answer) => {
|
|
19
|
+
rl.close();
|
|
20
|
+
resolve(answer.trim());
|
|
21
|
+
});
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Interactive setup: configure LLM provider and API key.
|
|
26
|
+
*/
|
|
27
|
+
export async function runSetupContext() {
|
|
28
|
+
console.error("\n🔧 CrowdListen Context — LLM Provider Setup\n");
|
|
29
|
+
const existing = loadConfig();
|
|
30
|
+
if (existing) {
|
|
31
|
+
console.error(`Current config: ${existing.provider} (${existing.model || "default model"})`);
|
|
32
|
+
}
|
|
33
|
+
const providerInput = await prompt("Provider (openai / anthropic) [openai]: ");
|
|
34
|
+
const provider = (providerInput || "openai");
|
|
35
|
+
const apiKey = await prompt(`${provider === "openai" ? "OpenAI" : "Anthropic"} API key: `);
|
|
36
|
+
if (!apiKey) {
|
|
37
|
+
console.error("❌ API key required.");
|
|
38
|
+
process.exit(1);
|
|
39
|
+
}
|
|
40
|
+
const modelInput = await prompt("Model (press Enter for default): ");
|
|
41
|
+
const config = {
|
|
42
|
+
provider,
|
|
43
|
+
apiKey,
|
|
44
|
+
...(modelInput ? { model: modelInput } : {}),
|
|
45
|
+
};
|
|
46
|
+
saveConfig(config);
|
|
47
|
+
console.error(`\n✅ Saved to ~/.crowdlisten/config.json`);
|
|
48
|
+
console.error(` Provider: ${config.provider}`);
|
|
49
|
+
console.error(` Model: ${config.model || "default"}\n`);
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Process a file via CLI (no web UI).
|
|
53
|
+
*/
|
|
54
|
+
export async function runContextCLI(filePath) {
|
|
55
|
+
const config = loadConfig();
|
|
56
|
+
if (!config) {
|
|
57
|
+
console.error("No LLM provider configured. Run: npx @crowdlisten/harness setup");
|
|
58
|
+
process.exit(1);
|
|
59
|
+
}
|
|
60
|
+
console.error(`\n📄 Processing: ${filePath}`);
|
|
61
|
+
console.error(` Provider: ${config.provider} (${config.model || "default"})\n`);
|
|
62
|
+
try {
|
|
63
|
+
const result = await runPipeline({
|
|
64
|
+
filePath,
|
|
65
|
+
source: filePath,
|
|
66
|
+
onProgress: ({ current, total, blocksFound }) => {
|
|
67
|
+
console.error(` Chunk ${current}/${total} — ${blocksFound} blocks found`);
|
|
68
|
+
},
|
|
69
|
+
});
|
|
70
|
+
console.error(`\n🛡️ PII Redaction:`);
|
|
71
|
+
if (result.totalRedactions > 0) {
|
|
72
|
+
for (const [type, count] of Object.entries(result.redactionStats)) {
|
|
73
|
+
console.error(` ${type}: ${count} redacted`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
console.error(" No PII detected");
|
|
78
|
+
}
|
|
79
|
+
console.error(`\n📦 Extracted ${result.blocks.length} context blocks:\n`);
|
|
80
|
+
for (const block of result.blocks) {
|
|
81
|
+
console.error(` [${block.type}] ${block.title}`);
|
|
82
|
+
console.error(` ${block.content}\n`);
|
|
83
|
+
}
|
|
84
|
+
if (result.skills.length > 0) {
|
|
85
|
+
console.error(`🎯 Recommended Skills:\n`);
|
|
86
|
+
for (const skill of result.skills) {
|
|
87
|
+
console.error(` ${skill.name} (${Math.round(skill.score * 100)}% match)`);
|
|
88
|
+
console.error(` ${skill.description}\n`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
// Output JSON to stdout for piping
|
|
92
|
+
process.stdout.write(JSON.stringify(result, null, 2));
|
|
93
|
+
}
|
|
94
|
+
catch (err) {
|
|
95
|
+
console.error(`\n❌ ${err instanceof Error ? err.message : String(err)}`);
|
|
96
|
+
process.exit(1);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Launch the web UI server.
|
|
101
|
+
*/
|
|
102
|
+
export async function runContextWeb() {
|
|
103
|
+
const config = loadConfig();
|
|
104
|
+
if (!config) {
|
|
105
|
+
console.error("⚠️ No LLM provider configured. You can configure it in the web UI or run: npx @crowdlisten/harness setup\n");
|
|
106
|
+
}
|
|
107
|
+
await startContextServer();
|
|
108
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract context blocks from text using the configured LLM provider.
|
|
3
|
+
*/
|
|
4
|
+
import type { LLMProvider, ContextBlock } from "./types.js";
|
|
5
|
+
/**
|
|
6
|
+
* Extract context blocks from a text chunk.
|
|
7
|
+
* @param provider - LLM provider instance
|
|
8
|
+
* @param text - Text to extract from (will be truncated to 80k chars)
|
|
9
|
+
* @param source - Source label for the text
|
|
10
|
+
* @param isChat - Whether this is chat history (uses 4-type prompt) or general text (2-type)
|
|
11
|
+
*/
|
|
12
|
+
export declare function extractBlocks(provider: LLMProvider, text: string, source: string, isChat?: boolean): Promise<ContextBlock[]>;
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract context blocks from text using the configured LLM provider.
|
|
3
|
+
*/
|
|
4
|
+
import { CHAT_EXTRACT_SYSTEM_PROMPT, EXTRACT_SYSTEM_PROMPT } from "./prompts.js";
|
|
5
|
+
const MAX_INPUT_CHARS = 80_000;
|
|
6
|
+
const MAX_RETRIES = 2;
|
|
7
|
+
/**
|
|
8
|
+
* Extract context blocks from a text chunk.
|
|
9
|
+
* @param provider - LLM provider instance
|
|
10
|
+
* @param text - Text to extract from (will be truncated to 80k chars)
|
|
11
|
+
* @param source - Source label for the text
|
|
12
|
+
* @param isChat - Whether this is chat history (uses 4-type prompt) or general text (2-type)
|
|
13
|
+
*/
|
|
14
|
+
export async function extractBlocks(provider, text, source, isChat = true) {
|
|
15
|
+
const trimmed = text.slice(0, MAX_INPUT_CHARS);
|
|
16
|
+
const systemPrompt = isChat
|
|
17
|
+
? CHAT_EXTRACT_SYSTEM_PROMPT
|
|
18
|
+
: EXTRACT_SYSTEM_PROMPT;
|
|
19
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
20
|
+
try {
|
|
21
|
+
const response = await provider.complete([
|
|
22
|
+
{ role: "system", content: systemPrompt },
|
|
23
|
+
{ role: "user", content: `Source: "${source}"\n\n${trimmed}` },
|
|
24
|
+
], { temperature: 0.3, jsonMode: true });
|
|
25
|
+
const parsed = JSON.parse(response);
|
|
26
|
+
const blocks = (parsed.blocks || []).map((b) => ({
|
|
27
|
+
type: b.type,
|
|
28
|
+
title: b.title,
|
|
29
|
+
content: b.content,
|
|
30
|
+
source,
|
|
31
|
+
}));
|
|
32
|
+
return blocks;
|
|
33
|
+
}
|
|
34
|
+
catch (err) {
|
|
35
|
+
if (attempt === MAX_RETRIES) {
|
|
36
|
+
throw new Error(`Context extraction failed after ${MAX_RETRIES + 1} attempts: ${err instanceof Error ? err.message : String(err)}`);
|
|
37
|
+
}
|
|
38
|
+
// Brief delay before retry
|
|
39
|
+
await new Promise((r) => setTimeout(r, 1000 * (attempt + 1)));
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return []; // unreachable but satisfies TS
|
|
43
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context extraction pipeline — barrel export.
|
|
3
|
+
*/
|
|
4
|
+
export { redactPII } from "./redactor.js";
|
|
5
|
+
export { parseFile, parseTextContent, parseZipBuffer, chunkText } from "./parser.js";
|
|
6
|
+
export { extractBlocks } from "./extractor.js";
|
|
7
|
+
export { matchSkills, discoverSkills, searchSkills, getSkillCategories, getFullCatalog } from "./matcher.js";
|
|
8
|
+
export { createProvider } from "./providers.js";
|
|
9
|
+
export { runPipeline } from "./pipeline.js";
|
|
10
|
+
export type { PipelineOpts } from "./pipeline.js";
|
|
11
|
+
export * from "./store.js";
|
|
12
|
+
export * from "./types.js";
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context extraction pipeline — barrel export.
|
|
3
|
+
*/
|
|
4
|
+
export { redactPII } from "./redactor.js";
|
|
5
|
+
export { parseFile, parseTextContent, parseZipBuffer, chunkText } from "./parser.js";
|
|
6
|
+
export { extractBlocks } from "./extractor.js";
|
|
7
|
+
export { matchSkills, discoverSkills, searchSkills, getSkillCategories, getFullCatalog } from "./matcher.js";
|
|
8
|
+
export { createProvider } from "./providers.js";
|
|
9
|
+
export { runPipeline } from "./pipeline.js";
|
|
10
|
+
export * from "./store.js";
|
|
11
|
+
export * from "./types.js";
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Skill matching: cosine similarity on keyword overlap + TF-IDF-like scoring
|
|
3
|
+
* against both native and community skill catalogs.
|
|
4
|
+
*/
|
|
5
|
+
import type { ContextBlock, SkillMatch, ExtendedSkillCatalogEntry, ExtendedSkillMatch, SkillTier, SkillCategory } from "./types.js";
|
|
6
|
+
/**
|
|
7
|
+
* Load both catalogs combined. Exported for install lookups.
|
|
8
|
+
*/
|
|
9
|
+
export declare function getFullCatalog(): ExtendedSkillCatalogEntry[];
|
|
10
|
+
/**
|
|
11
|
+
* Match context blocks against the skill catalog (backward-compatible).
|
|
12
|
+
* Returns top matching skills sorted by score.
|
|
13
|
+
*/
|
|
14
|
+
export declare function matchSkills(blocks: ContextBlock[], topN?: number): Promise<SkillMatch[]>;
|
|
15
|
+
/**
|
|
16
|
+
* Enhanced skill discovery with tier boost and category affinity.
|
|
17
|
+
* Scores against BOTH native and community catalogs.
|
|
18
|
+
*/
|
|
19
|
+
export declare function discoverSkills(blocks: ContextBlock[], options?: {
|
|
20
|
+
category?: SkillCategory;
|
|
21
|
+
tier?: SkillTier;
|
|
22
|
+
limit?: number;
|
|
23
|
+
}): Promise<ExtendedSkillMatch[]>;
|
|
24
|
+
/**
|
|
25
|
+
* Full-text search across both skill catalogs.
|
|
26
|
+
*/
|
|
27
|
+
export declare function searchSkills(query: string, options?: {
|
|
28
|
+
tier?: SkillTier;
|
|
29
|
+
category?: SkillCategory;
|
|
30
|
+
}): ExtendedSkillMatch[];
|
|
31
|
+
/**
|
|
32
|
+
* Get category summary with counts.
|
|
33
|
+
*/
|
|
34
|
+
export declare function getSkillCategories(): Array<{
|
|
35
|
+
category: SkillCategory;
|
|
36
|
+
count: number;
|
|
37
|
+
nativeCount: number;
|
|
38
|
+
communityCount: number;
|
|
39
|
+
}>;
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Skill matching: cosine similarity on keyword overlap + TF-IDF-like scoring
|
|
3
|
+
* against both native and community skill catalogs.
|
|
4
|
+
*/
|
|
5
|
+
import * as fs from "fs";
|
|
6
|
+
import * as path from "path";
|
|
7
|
+
import { fileURLToPath } from "url";
|
|
8
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
9
|
+
const __dirname = path.dirname(__filename);
|
|
10
|
+
let nativeCatalogCache = null;
|
|
11
|
+
let communityCatalogCache = null;
|
|
12
|
+
function loadJsonFile(candidates) {
|
|
13
|
+
for (const p of candidates) {
|
|
14
|
+
try {
|
|
15
|
+
const raw = fs.readFileSync(p, "utf-8");
|
|
16
|
+
return JSON.parse(raw);
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Load native CrowdListen skills and wrap them as ExtendedSkillCatalogEntry.
|
|
26
|
+
*/
|
|
27
|
+
function loadNativeCatalog() {
|
|
28
|
+
if (nativeCatalogCache)
|
|
29
|
+
return nativeCatalogCache;
|
|
30
|
+
const raw = loadJsonFile([
|
|
31
|
+
path.join(__dirname, "..", "..", "skills", "catalog.json"),
|
|
32
|
+
path.join(__dirname, "..", "..", "dist", "skills", "catalog.json"),
|
|
33
|
+
]);
|
|
34
|
+
if (!raw) {
|
|
35
|
+
nativeCatalogCache = [];
|
|
36
|
+
return [];
|
|
37
|
+
}
|
|
38
|
+
nativeCatalogCache = raw.map((entry) => ({
|
|
39
|
+
...entry,
|
|
40
|
+
tier: "crowdlisten",
|
|
41
|
+
category: "research", // CrowdListen skills are research/audience intel
|
|
42
|
+
installMethod: "copy",
|
|
43
|
+
installTarget: `crowdlisten:${entry.id}`,
|
|
44
|
+
source: "crowdlisten/native",
|
|
45
|
+
}));
|
|
46
|
+
return nativeCatalogCache;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Load community skill catalog.
|
|
50
|
+
*/
|
|
51
|
+
function loadCommunityCatalog() {
|
|
52
|
+
if (communityCatalogCache)
|
|
53
|
+
return communityCatalogCache;
|
|
54
|
+
const raw = loadJsonFile([
|
|
55
|
+
path.join(__dirname, "..", "..", "skills", "community-catalog.json"),
|
|
56
|
+
path.join(__dirname, "..", "..", "dist", "skills", "community-catalog.json"),
|
|
57
|
+
]);
|
|
58
|
+
communityCatalogCache = raw || [];
|
|
59
|
+
return communityCatalogCache;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Load both catalogs combined. Exported for install lookups.
|
|
63
|
+
*/
|
|
64
|
+
export function getFullCatalog() {
|
|
65
|
+
return loadFullCatalog();
|
|
66
|
+
}
|
|
67
|
+
function loadFullCatalog() {
|
|
68
|
+
return [...loadNativeCatalog(), ...loadCommunityCatalog()];
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Extract keywords from context blocks for matching.
|
|
72
|
+
*/
|
|
73
|
+
function extractBlockKeywords(blocks) {
|
|
74
|
+
const words = [];
|
|
75
|
+
for (const block of blocks) {
|
|
76
|
+
const text = `${block.title} ${block.content}`.toLowerCase();
|
|
77
|
+
const tokens = text
|
|
78
|
+
.replace(/[^a-z0-9\s-]/g, " ")
|
|
79
|
+
.split(/\s+/)
|
|
80
|
+
.filter((w) => w.length > 3);
|
|
81
|
+
words.push(...tokens);
|
|
82
|
+
}
|
|
83
|
+
return words;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Detect dominant category from block keywords.
|
|
87
|
+
*/
|
|
88
|
+
const CATEGORY_KEYWORD_MAP = {
|
|
89
|
+
development: ["code", "debug", "refactor", "typescript", "javascript", "react", "component", "testing", "git", "api", "function", "class"],
|
|
90
|
+
data: ["data", "database", "sql", "analytics", "machine", "learning", "model", "statistics", "visualization", "pipeline"],
|
|
91
|
+
content: ["content", "writing", "blog", "seo", "copy", "editorial", "article", "social", "media"],
|
|
92
|
+
research: ["research", "analysis", "market", "competitive", "audience", "insights", "trends"],
|
|
93
|
+
automation: ["deploy", "docker", "kubernetes", "cicd", "pipeline", "terraform", "cloud", "monitoring", "automation"],
|
|
94
|
+
design: ["design", "ui", "ux", "accessibility", "css", "layout", "responsive", "figma"],
|
|
95
|
+
business: ["strategy", "pricing", "marketing", "sales", "growth", "funnel", "acquisition", "revenue"],
|
|
96
|
+
productivity: ["workflow", "documentation", "onboarding", "planning", "template", "organization"],
|
|
97
|
+
};
|
|
98
|
+
function categoryAffinity(blockKeywords, category) {
|
|
99
|
+
const categoryKws = CATEGORY_KEYWORD_MAP[category] || [];
|
|
100
|
+
const blockSet = new Set(blockKeywords);
|
|
101
|
+
let matches = 0;
|
|
102
|
+
for (const kw of categoryKws) {
|
|
103
|
+
for (const bk of blockSet) {
|
|
104
|
+
if (bk.includes(kw) || kw.includes(bk)) {
|
|
105
|
+
matches++;
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return categoryKws.length > 0 ? matches / categoryKws.length : 0;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Calculate keyword overlap score between block keywords and skill keywords.
|
|
114
|
+
* Returns a score between 0 and 1.
|
|
115
|
+
*/
|
|
116
|
+
function keywordOverlapScore(blockKeywords, skillKeywords) {
|
|
117
|
+
const blockSet = new Set(blockKeywords);
|
|
118
|
+
const matched = [];
|
|
119
|
+
for (const kw of skillKeywords) {
|
|
120
|
+
// Check exact match
|
|
121
|
+
if (blockSet.has(kw)) {
|
|
122
|
+
matched.push(kw);
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
// Check partial match (block keyword contains skill keyword or vice versa)
|
|
126
|
+
for (const bk of blockSet) {
|
|
127
|
+
if (bk.includes(kw) || kw.includes(bk)) {
|
|
128
|
+
matched.push(kw);
|
|
129
|
+
break;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (skillKeywords.length === 0)
|
|
134
|
+
return { score: 0, matchedKeywords: [] };
|
|
135
|
+
// Score: fraction of skill keywords that matched, weighted by frequency
|
|
136
|
+
const score = matched.length / skillKeywords.length;
|
|
137
|
+
return { score, matchedKeywords: [...new Set(matched)] };
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Match context blocks against the skill catalog (backward-compatible).
|
|
141
|
+
* Returns top matching skills sorted by score.
|
|
142
|
+
*/
|
|
143
|
+
export async function matchSkills(blocks, topN = 5) {
|
|
144
|
+
const results = await discoverSkills(blocks, { limit: topN });
|
|
145
|
+
// Return as SkillMatch for backward compatibility
|
|
146
|
+
return results.map(({ tier, category, installMethod, installTarget, ...rest }) => rest);
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Enhanced skill discovery with tier boost and category affinity.
|
|
150
|
+
* Scores against BOTH native and community catalogs.
|
|
151
|
+
*/
|
|
152
|
+
export async function discoverSkills(blocks, options = {}) {
|
|
153
|
+
let catalog = loadFullCatalog();
|
|
154
|
+
if (catalog.length === 0 || blocks.length === 0)
|
|
155
|
+
return [];
|
|
156
|
+
// Apply filters
|
|
157
|
+
if (options.tier)
|
|
158
|
+
catalog = catalog.filter((s) => s.tier === options.tier);
|
|
159
|
+
if (options.category)
|
|
160
|
+
catalog = catalog.filter((s) => s.category === options.category);
|
|
161
|
+
const blockKeywords = extractBlockKeywords(blocks);
|
|
162
|
+
const results = [];
|
|
163
|
+
for (const skill of catalog) {
|
|
164
|
+
const { score: baseScore, matchedKeywords } = keywordOverlapScore(blockKeywords, skill.keywords);
|
|
165
|
+
// Tier boost: native skills get a slight advantage
|
|
166
|
+
const tierBoost = skill.tier === "crowdlisten" ? 0.10 : 0;
|
|
167
|
+
// Category affinity: bonus when context keywords cluster matches skill category
|
|
168
|
+
const catBoost = categoryAffinity(blockKeywords, skill.category) * 0.05;
|
|
169
|
+
const finalScore = Math.min(baseScore + tierBoost + catBoost, 1);
|
|
170
|
+
if (finalScore > 0.05) {
|
|
171
|
+
results.push({
|
|
172
|
+
skillId: skill.id,
|
|
173
|
+
name: skill.name,
|
|
174
|
+
description: skill.description,
|
|
175
|
+
score: Math.round(finalScore * 100) / 100,
|
|
176
|
+
matchedKeywords,
|
|
177
|
+
tier: skill.tier,
|
|
178
|
+
category: skill.category,
|
|
179
|
+
installMethod: skill.installMethod,
|
|
180
|
+
installTarget: skill.installTarget,
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// Sort by score descending
|
|
185
|
+
results.sort((a, b) => b.score - a.score);
|
|
186
|
+
return results.slice(0, options.limit || 10);
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Full-text search across both skill catalogs.
|
|
190
|
+
*/
|
|
191
|
+
export function searchSkills(query, options = {}) {
|
|
192
|
+
let catalog = loadFullCatalog();
|
|
193
|
+
if (options.tier)
|
|
194
|
+
catalog = catalog.filter((s) => s.tier === options.tier);
|
|
195
|
+
if (options.category)
|
|
196
|
+
catalog = catalog.filter((s) => s.category === options.category);
|
|
197
|
+
const q = query.toLowerCase();
|
|
198
|
+
const results = [];
|
|
199
|
+
for (const skill of catalog) {
|
|
200
|
+
const haystack = `${skill.name} ${skill.description} ${skill.keywords.join(" ")}`.toLowerCase();
|
|
201
|
+
if (!haystack.includes(q))
|
|
202
|
+
continue;
|
|
203
|
+
// Simple relevance: how early the query appears + name match bonus
|
|
204
|
+
const nameMatch = skill.name.toLowerCase().includes(q) ? 0.3 : 0;
|
|
205
|
+
const descMatch = skill.description.toLowerCase().includes(q) ? 0.15 : 0;
|
|
206
|
+
const kwMatch = skill.keywords.some((kw) => kw.includes(q)) ? 0.1 : 0;
|
|
207
|
+
const score = Math.min(nameMatch + descMatch + kwMatch + 0.2, 1);
|
|
208
|
+
results.push({
|
|
209
|
+
skillId: skill.id,
|
|
210
|
+
name: skill.name,
|
|
211
|
+
description: skill.description,
|
|
212
|
+
score: Math.round(score * 100) / 100,
|
|
213
|
+
matchedKeywords: skill.keywords.filter((kw) => kw.includes(q)),
|
|
214
|
+
tier: skill.tier,
|
|
215
|
+
category: skill.category,
|
|
216
|
+
installMethod: skill.installMethod,
|
|
217
|
+
installTarget: skill.installTarget,
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
results.sort((a, b) => b.score - a.score);
|
|
221
|
+
return results;
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Get category summary with counts.
|
|
225
|
+
*/
|
|
226
|
+
export function getSkillCategories() {
|
|
227
|
+
const catalog = loadFullCatalog();
|
|
228
|
+
const cats = {};
|
|
229
|
+
for (const skill of catalog) {
|
|
230
|
+
if (!cats[skill.category])
|
|
231
|
+
cats[skill.category] = { total: 0, native: 0, community: 0 };
|
|
232
|
+
cats[skill.category].total++;
|
|
233
|
+
if (skill.tier === "crowdlisten")
|
|
234
|
+
cats[skill.category].native++;
|
|
235
|
+
else
|
|
236
|
+
cats[skill.category].community++;
|
|
237
|
+
}
|
|
238
|
+
return Object.entries(cats)
|
|
239
|
+
.map(([category, { total, native, community }]) => ({
|
|
240
|
+
category: category,
|
|
241
|
+
count: total,
|
|
242
|
+
nativeCount: native,
|
|
243
|
+
communityCount: community,
|
|
244
|
+
}))
|
|
245
|
+
.sort((a, b) => b.count - a.count);
|
|
246
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse chat exports (.json, .zip, .txt, .md, .pdf) and chunk text.
|
|
3
|
+
* Ported from crowdlisten_deployed/frontend/src/services/chatHistoryService.js
|
|
4
|
+
* Decoupled from browser File API — works with Node.js buffers/strings.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Parse a single file from a buffer or string. Handles .json, .txt, .md.
|
|
8
|
+
* For .zip, use parseZipBuffer(). For .pdf, use parsePdfBuffer().
|
|
9
|
+
*/
|
|
10
|
+
export declare function parseTextContent(content: string, filename: string): string;
|
|
11
|
+
/**
|
|
12
|
+
* Parse a ZIP buffer containing chat exports.
|
|
13
|
+
* Requires jszip to be installed.
|
|
14
|
+
*/
|
|
15
|
+
export declare function parseZipBuffer(buffer: Buffer): Promise<string>;
|
|
16
|
+
/**
|
|
17
|
+
* Parse a PDF buffer into text.
|
|
18
|
+
* Requires pdf-parse to be installed.
|
|
19
|
+
*/
|
|
20
|
+
export declare function parsePdfBuffer(buffer: Buffer): Promise<string>;
|
|
21
|
+
/**
|
|
22
|
+
* Parse a file from disk by path.
|
|
23
|
+
*/
|
|
24
|
+
export declare function parseFile(filePath: string): Promise<string>;
|
|
25
|
+
/**
|
|
26
|
+
* Split text into chunks of ~maxChars at paragraph boundaries.
|
|
27
|
+
*/
|
|
28
|
+
export declare function chunkText(text: string, maxChars?: number): string[];
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse chat exports (.json, .zip, .txt, .md, .pdf) and chunk text.
|
|
3
|
+
* Ported from crowdlisten_deployed/frontend/src/services/chatHistoryService.js
|
|
4
|
+
* Decoupled from browser File API — works with Node.js buffers/strings.
|
|
5
|
+
*/
|
|
6
|
+
const BATCH_CHAR_LIMIT = 12_000;
|
|
7
|
+
// ─── Parsers ──────────────────────────────────────────────────────────────────
|
|
8
|
+
/**
|
|
9
|
+
* Parse ChatGPT export JSON (conversations[].mapping -> user+assistant messages)
|
|
10
|
+
*/
|
|
11
|
+
function parseChatGPTExport(json) {
|
|
12
|
+
const texts = [];
|
|
13
|
+
const conversations = Array.isArray(json) ? json : [json];
|
|
14
|
+
for (const convo of conversations) {
|
|
15
|
+
if (!convo?.mapping)
|
|
16
|
+
continue;
|
|
17
|
+
for (const node of Object.values(convo.mapping)) {
|
|
18
|
+
const msg = node?.message;
|
|
19
|
+
if (!msg?.content?.parts?.length)
|
|
20
|
+
continue;
|
|
21
|
+
if (msg.author?.role === "user" || msg.author?.role === "assistant") {
|
|
22
|
+
texts.push(msg.content.parts.join("\n"));
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return texts.join("\n\n---\n\n");
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Parse Claude export JSON (conversations[].chat_messages -> text content)
|
|
30
|
+
*/
|
|
31
|
+
function parseClaudeExport(json) {
|
|
32
|
+
const texts = [];
|
|
33
|
+
const conversations = Array.isArray(json) ? json : [json];
|
|
34
|
+
for (const convo of conversations) {
|
|
35
|
+
const c = convo;
|
|
36
|
+
const messages = c.chat_messages || c.messages || [];
|
|
37
|
+
for (const msg of messages) {
|
|
38
|
+
const text = msg.text ||
|
|
39
|
+
(typeof msg.content === "string"
|
|
40
|
+
? msg.content
|
|
41
|
+
: Array.isArray(msg.content)
|
|
42
|
+
? msg.content
|
|
43
|
+
.filter((c) => c.type === "text")
|
|
44
|
+
.map((c) => c.text)
|
|
45
|
+
.join("\n")
|
|
46
|
+
: null);
|
|
47
|
+
if (text)
|
|
48
|
+
texts.push(text);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return texts.join("\n\n---\n\n");
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Auto-detect format and parse JSON content.
|
|
55
|
+
*/
|
|
56
|
+
function parseJSON(json) {
|
|
57
|
+
const arr = Array.isArray(json) ? json : [json];
|
|
58
|
+
const sample = (arr[0] || {});
|
|
59
|
+
if (sample.mapping)
|
|
60
|
+
return parseChatGPTExport(json);
|
|
61
|
+
if (sample.chat_messages || (sample.messages && sample.uuid))
|
|
62
|
+
return parseClaudeExport(json);
|
|
63
|
+
return JSON.stringify(json, null, 2);
|
|
64
|
+
}
|
|
65
|
+
// ─── File Parsing ────────────────────────────────────────────────────────────
|
|
66
|
+
/**
|
|
67
|
+
* Parse a single file from a buffer or string. Handles .json, .txt, .md.
|
|
68
|
+
* For .zip, use parseZipBuffer(). For .pdf, use parsePdfBuffer().
|
|
69
|
+
*/
|
|
70
|
+
export function parseTextContent(content, filename) {
|
|
71
|
+
const ext = filename.split(".").pop()?.toLowerCase();
|
|
72
|
+
if (ext === "json") {
|
|
73
|
+
try {
|
|
74
|
+
return parseJSON(JSON.parse(content));
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
return content;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return content;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Parse a ZIP buffer containing chat exports.
|
|
84
|
+
* Requires jszip to be installed.
|
|
85
|
+
*/
|
|
86
|
+
export async function parseZipBuffer(buffer) {
|
|
87
|
+
const JSZip = (await import("jszip")).default;
|
|
88
|
+
const zip = await JSZip.loadAsync(buffer);
|
|
89
|
+
const texts = [];
|
|
90
|
+
for (const [name, entry] of Object.entries(zip.files)) {
|
|
91
|
+
if (entry.dir)
|
|
92
|
+
continue;
|
|
93
|
+
const content = await entry.async("string");
|
|
94
|
+
if (name.endsWith(".json")) {
|
|
95
|
+
try {
|
|
96
|
+
texts.push(parseJSON(JSON.parse(content)));
|
|
97
|
+
}
|
|
98
|
+
catch {
|
|
99
|
+
texts.push(content);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
texts.push(content);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return texts.join("\n\n---\n\n");
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Parse a PDF buffer into text.
|
|
110
|
+
* Requires pdf-parse to be installed.
|
|
111
|
+
*/
|
|
112
|
+
export async function parsePdfBuffer(buffer) {
|
|
113
|
+
const pdfParse = (await import("pdf-parse")).default;
|
|
114
|
+
const data = await pdfParse(buffer);
|
|
115
|
+
return data.text;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Parse a file from disk by path.
|
|
119
|
+
*/
|
|
120
|
+
export async function parseFile(filePath) {
|
|
121
|
+
const fs = await import("fs/promises");
|
|
122
|
+
const ext = filePath.split(".").pop()?.toLowerCase();
|
|
123
|
+
if (ext === "zip") {
|
|
124
|
+
const buffer = await fs.readFile(filePath);
|
|
125
|
+
return parseZipBuffer(buffer);
|
|
126
|
+
}
|
|
127
|
+
if (ext === "pdf") {
|
|
128
|
+
const buffer = await fs.readFile(filePath);
|
|
129
|
+
return parsePdfBuffer(buffer);
|
|
130
|
+
}
|
|
131
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
132
|
+
return parseTextContent(content, filePath);
|
|
133
|
+
}
|
|
134
|
+
// ─── Chunking ────────────────────────────────────────────────────────────────
|
|
135
|
+
/**
|
|
136
|
+
* Split text into chunks of ~maxChars at paragraph boundaries.
|
|
137
|
+
*/
|
|
138
|
+
export function chunkText(text, maxChars = BATCH_CHAR_LIMIT) {
|
|
139
|
+
if (text.length <= maxChars)
|
|
140
|
+
return [text];
|
|
141
|
+
const chunks = [];
|
|
142
|
+
let start = 0;
|
|
143
|
+
while (start < text.length) {
|
|
144
|
+
let end = start + maxChars;
|
|
145
|
+
if (end >= text.length) {
|
|
146
|
+
chunks.push(text.slice(start));
|
|
147
|
+
break;
|
|
148
|
+
}
|
|
149
|
+
const breakPoint = text.lastIndexOf("\n\n", end);
|
|
150
|
+
if (breakPoint > start + maxChars * 0.5) {
|
|
151
|
+
end = breakPoint;
|
|
152
|
+
}
|
|
153
|
+
chunks.push(text.slice(start, end));
|
|
154
|
+
start = end;
|
|
155
|
+
}
|
|
156
|
+
return chunks;
|
|
157
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Orchestrator: parse -> redact -> chunk -> extract -> match -> store
|
|
3
|
+
*/
|
|
4
|
+
import type { ContextConfig, PipelineResult } from "./types.js";
|
|
5
|
+
export interface PipelineOpts {
|
|
6
|
+
/** Raw text input (from paste or direct input) */
|
|
7
|
+
text?: string;
|
|
8
|
+
/** File path to process */
|
|
9
|
+
filePath?: string;
|
|
10
|
+
/** Source label for tracking */
|
|
11
|
+
source?: string;
|
|
12
|
+
/** Override config (otherwise loads from ~/.crowdlisten/config.json) */
|
|
13
|
+
config?: ContextConfig;
|
|
14
|
+
/** Whether input is chat history (uses 4-type prompt) */
|
|
15
|
+
isChat?: boolean;
|
|
16
|
+
/** Progress callback */
|
|
17
|
+
onProgress?: (progress: {
|
|
18
|
+
current: number;
|
|
19
|
+
total: number;
|
|
20
|
+
blocksFound: number;
|
|
21
|
+
}) => void;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Run the full context extraction pipeline.
|
|
25
|
+
*/
|
|
26
|
+
export declare function runPipeline(opts: PipelineOpts): Promise<PipelineResult>;
|