sc-research 1.0.3 โ 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +11 -11
- package/README.md +3 -1
- package/dist/cli.js +7 -4
- package/package.json +8 -6
- package/templates/base/commands/controversy.md +1 -1
- package/templates/base/commands/deep-research.md +1 -1
- package/templates/base/commands/discovery.md +1 -1
- package/templates/base/commands/rank.md +1 -1
- package/templates/base/commands/sentiment.md +1 -1
- package/templates/base/commands/trend.md +1 -1
- package/templates/base/manifest.json +34 -34
- package/templates/base/skills/social_media_controversy.md +94 -0
- package/templates/base/skills/social_media_discovery.md +91 -0
- package/templates/base/skills/social_media_fetch.md +82 -0
- package/templates/base/skills/social_media_rank.md +94 -0
- package/templates/base/skills/social_media_research_test.md +52 -0
- package/templates/base/skills/social_media_sentiment.md +93 -0
- package/templates/base/skills/social_media_trend.md +87 -0
- package/templates/base/skills/social_media_visualize.md +63 -0
- package/templates/base/skills/using_social_media_research.md +115 -0
- package/templates/base/skills/communities_controversy.md +0 -65
- package/templates/base/skills/communities_discovery.md +0 -55
- package/templates/base/skills/communities_fetch.md +0 -56
- package/templates/base/skills/communities_rank.md +0 -57
- package/templates/base/skills/communities_research_test.md +0 -64
- package/templates/base/skills/communities_sentiment.md +0 -61
- package/templates/base/skills/communities_trend.md +0 -71
- package/templates/base/skills/communities_visualize.md +0 -46
- package/templates/base/skills/using_communities_research.md +0 -146
package/CLAUDE.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Social Media Research Skill
|
|
2
2
|
|
|
3
3
|
> **"The Skill provides the catch; the Agent cooks the meal."**
|
|
4
4
|
|
|
@@ -15,7 +15,7 @@ This project is a headless social media research tool designed for **Claude Code
|
|
|
15
15
|
This project uses `bun`.
|
|
16
16
|
|
|
17
17
|
### Environment Variables
|
|
18
|
-
Required in `.
|
|
18
|
+
Required in `.sc-research`:
|
|
19
19
|
- `OPENAI_API_KEY`: For Reddit URL discovery and general LLM ops
|
|
20
20
|
- `XAI_API_KEY`: (Optional) For X/Twitter data
|
|
21
21
|
|
|
@@ -38,16 +38,16 @@ Required in `.env`:
|
|
|
38
38
|
|
|
39
39
|
## ๐๏ธ Architecture
|
|
40
40
|
|
|
41
|
-
- **Orchestrator**: `
|
|
41
|
+
- **Orchestrator**: `using_social_media_research` (The routing brain)
|
|
42
42
|
- **Workers**:
|
|
43
|
-
- `
|
|
44
|
-
- `
|
|
45
|
-
- `
|
|
46
|
-
- `
|
|
47
|
-
- `
|
|
48
|
-
- `
|
|
49
|
-
- `
|
|
50
|
-
- `
|
|
43
|
+
- `social_media_fetch` (Data provider)
|
|
44
|
+
- `social_media_rank` (Ranking analysis)
|
|
45
|
+
- `social_media_sentiment` (Sentiment analysis)
|
|
46
|
+
- `social_media_trend` (Timeline analysis)
|
|
47
|
+
- `social_media_controversy` (Debate analysis)
|
|
48
|
+
- `social_media_discovery` (Viral topic clustering)
|
|
49
|
+
- `social_media_visualize` (Dashboard launcher)
|
|
50
|
+
- `social_media_research_test` (Legacy/fixed-link test helper)
|
|
51
51
|
|
|
52
52
|
## ๐งช Testing Rules
|
|
53
53
|
|
package/README.md
CHANGED
|
@@ -99,13 +99,15 @@ console.log(results);
|
|
|
99
99
|
|
|
100
100
|
## Configuration
|
|
101
101
|
|
|
102
|
-
Set the following environment variables in your `.
|
|
102
|
+
Set the following environment variables in your `.sc-research` file:
|
|
103
103
|
|
|
104
104
|
```env
|
|
105
105
|
OPENAI_API_KEY=sk-... # Required for Reddit (URL Discovery)
|
|
106
106
|
XAI_API_KEY=... # Required for X (Twitter)
|
|
107
107
|
```
|
|
108
108
|
|
|
109
|
+
Optional override: set `SC_RESEARCH_ENV_FILE` to use a different env file path.
|
|
110
|
+
|
|
109
111
|
## License
|
|
110
112
|
|
|
111
113
|
MIT
|
package/dist/cli.js
CHANGED
|
@@ -401,7 +401,7 @@ async function runInit(opts) {
|
|
|
401
401
|
logger.info(` Scripts overwritten: ${packageSummary.scriptsOverwritten}`);
|
|
402
402
|
logger.info("");
|
|
403
403
|
logger.info("Next steps:");
|
|
404
|
-
logger.info(" 1) Ensure your project has required env vars set:");
|
|
404
|
+
logger.info(" 1) Ensure your project has required env vars set (for example in .sc-research):");
|
|
405
405
|
logger.info(" - OPENAI_API_KEY");
|
|
406
406
|
logger.info(" - XAI_API_KEY (optional, for X)");
|
|
407
407
|
logger.info(" 2) You can now run from this project:");
|
|
@@ -492,6 +492,8 @@ function sanitizePackageName(name) {
|
|
|
492
492
|
}
|
|
493
493
|
|
|
494
494
|
// src/entries/cli.ts
|
|
495
|
+
var DEFAULT_ENV_FILE = ".sc-research";
|
|
496
|
+
var ENV_FILE_OVERRIDE_VAR = "SC_RESEARCH_ENV_FILE";
|
|
495
497
|
async function main() {
|
|
496
498
|
const args = process3.argv.slice(2);
|
|
497
499
|
if (args.length === 1 && (args[0] === "--version" || args[0] === "-v") || args[0] === "version") {
|
|
@@ -627,7 +629,7 @@ async function runPackagedEntry(entryFile, args) {
|
|
|
627
629
|
const logger = new Logger;
|
|
628
630
|
const entryPath = resolvePackagedEntry(entryFile);
|
|
629
631
|
const resolvedEntryPath = entryPath ?? logger.exitWithError(`Internal error: could not find packaged entry "${entryFile}". Rebuild/reinstall sc-research and retry.`);
|
|
630
|
-
const dotEnvVars =
|
|
632
|
+
const dotEnvVars = loadProjectEnv(process3.cwd());
|
|
631
633
|
const result = spawnSync("node", [resolvedEntryPath, ...args], {
|
|
632
634
|
stdio: "inherit",
|
|
633
635
|
env: { ...dotEnvVars, ...process3.env }
|
|
@@ -651,8 +653,9 @@ function resolvePackagedEntry(entryFile) {
|
|
|
651
653
|
}
|
|
652
654
|
return null;
|
|
653
655
|
}
|
|
654
|
-
function
|
|
655
|
-
const
|
|
656
|
+
function loadProjectEnv(cwd) {
|
|
657
|
+
const configuredPath = process3.env[ENV_FILE_OVERRIDE_VAR]?.trim();
|
|
658
|
+
const envPath = configuredPath ? path5.isAbsolute(configuredPath) ? configuredPath : path5.join(cwd, configuredPath) : path5.join(cwd, DEFAULT_ENV_FILE);
|
|
656
659
|
if (!fs5.existsSync(envPath))
|
|
657
660
|
return {};
|
|
658
661
|
const out = {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sc-research",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.4",
|
|
4
4
|
"description": "Headless Social Media Research Data Provider for AI Agents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -15,11 +15,13 @@
|
|
|
15
15
|
"sc-research": "dist/cli.js"
|
|
16
16
|
},
|
|
17
17
|
"scripts": {
|
|
18
|
-
"research": "bun src/entries/research.ts",
|
|
19
|
-
"research:deep": "bun src/entries/research.ts --depth=deep",
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
18
|
+
"research": "bun --env-file=.sc-research src/entries/research.ts",
|
|
19
|
+
"research:deep": "bun --env-file=.sc-research src/entries/research.ts --depth=deep",
|
|
20
|
+
"init": "node dist/cli.js init",
|
|
21
|
+
"init:all": "node dist/cli.js init --ai all",
|
|
22
|
+
"visualize": "bun --env-file=.sc-research src/entries/visualize.ts",
|
|
23
|
+
"test:fixed": "bun --env-file=.sc-research src/test/manual-fixed-links.ts",
|
|
24
|
+
"test:x": "bun --env-file=.sc-research src/test/manual-xai.ts",
|
|
23
25
|
"build:index": "bun build src/entries/research.ts --outfile dist/index.js --target node",
|
|
24
26
|
"build:visualize": "bun build src/entries/visualize.ts --outdir dist --target node",
|
|
25
27
|
"build:test:fixed": "bun build src/test/manual-fixed-links.ts --outdir dist/test --target node",
|
|
@@ -12,7 +12,7 @@ description: Find controversial debates in existing research data.
|
|
|
12
12
|
|
|
13
13
|
3. Run the controversy skill
|
|
14
14
|
|
|
15
|
-
> Use the `
|
|
15
|
+
> Use the `social_media_controversy` skill to identify polarizing topics, extract opposing quotes, and generate `classified_controversy.json`.
|
|
16
16
|
|
|
17
17
|
4. Validate output schema
|
|
18
18
|
|
|
@@ -8,7 +8,7 @@ description: Deeply research a topic and route to the best analysis template. Th
|
|
|
8
8
|
|
|
9
9
|
2. Route to the right analysis mode
|
|
10
10
|
|
|
11
|
-
> Follow **Intent โ Template Routing** rules in `
|
|
11
|
+
> Follow **Intent โ Template Routing** rules in `using_social_media_research`.
|
|
12
12
|
>
|
|
13
13
|
> - If the user explicitly asks for "full analysis", "everything", or "all views": run all 4 templates in order (`rank` -> `sentiment` -> `trend` -> `controversy`).
|
|
14
14
|
> - Otherwise: pick the SINGLE most suitable template for the user's question.
|
|
@@ -12,7 +12,7 @@ description: Discover viral topics and emerging themes from existing research da
|
|
|
12
12
|
|
|
13
13
|
3. Run the discovery skill
|
|
14
14
|
|
|
15
|
-
> Use the `
|
|
15
|
+
> Use the `social_media_discovery` skill to cluster posts by topic and generate `classified_discovery.json`.
|
|
16
16
|
|
|
17
17
|
4. Validate output schema
|
|
18
18
|
|
|
@@ -12,7 +12,7 @@ description: Rank and classify existing research data.
|
|
|
12
12
|
|
|
13
13
|
3. Run the ranking skill
|
|
14
14
|
|
|
15
|
-
> Use the `
|
|
15
|
+
> Use the `social_media_rank` skill to generate `classified_rank.json` from the existing data files.
|
|
16
16
|
|
|
17
17
|
4. Validate output schema
|
|
18
18
|
|
|
@@ -12,7 +12,7 @@ description: Analyze sentiment from existing research data.
|
|
|
12
12
|
|
|
13
13
|
3. Run the sentiment skill
|
|
14
14
|
|
|
15
|
-
> Use the `
|
|
15
|
+
> Use the `social_media_sentiment` skill to analyze the raw data and generate `classified_sentiment.json`.
|
|
16
16
|
|
|
17
17
|
4. Validate output schema
|
|
18
18
|
|
|
@@ -12,7 +12,7 @@ description: Analyze discussion trends over time from existing research data.
|
|
|
12
12
|
|
|
13
13
|
3. Run the trend skill
|
|
14
14
|
|
|
15
|
-
> Use the `
|
|
15
|
+
> Use the `social_media_trend` skill to parse dates, choose an adaptive granularity (day/week/month), and generate `classified_trend.json`.
|
|
16
16
|
|
|
17
17
|
4. Validate output schema
|
|
18
18
|
|
|
@@ -1,65 +1,65 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
{
|
|
11
|
-
"id": "
|
|
2
|
+
"version": 1,
|
|
3
|
+
"templates": [
|
|
4
|
+
{
|
|
5
|
+
"id": "social_media_fetch",
|
|
6
|
+
"kind": "skill",
|
|
7
|
+
"description": "Worker skill that fetches raw discussion data from Reddit and X (Twitter) for a given topic. Returns raw JSON files.",
|
|
8
|
+
"bodyFile": "skills/social_media_fetch.md"
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"id": "social_media_rank",
|
|
12
12
|
"kind": "skill",
|
|
13
13
|
"description": "Analyze raw social media data (Reddit/X) to produce a ranked, classified report with strict JSON output.",
|
|
14
|
-
"bodyFile": "skills/
|
|
14
|
+
"bodyFile": "skills/social_media_rank.md"
|
|
15
15
|
},
|
|
16
16
|
{
|
|
17
|
-
"id": "
|
|
17
|
+
"id": "social_media_sentiment",
|
|
18
18
|
"kind": "skill",
|
|
19
19
|
"description": "Worker skill that analyzes raw social media data to produce a sentiment breakdown report with strict JSON output.",
|
|
20
|
-
"bodyFile": "skills/
|
|
20
|
+
"bodyFile": "skills/social_media_sentiment.md"
|
|
21
21
|
},
|
|
22
22
|
{
|
|
23
|
-
"id": "
|
|
23
|
+
"id": "social_media_trend",
|
|
24
24
|
"kind": "skill",
|
|
25
25
|
"description": "Worker skill that analyzes raw social media data to produce a trend timeline report with strict JSON output.",
|
|
26
|
-
"bodyFile": "skills/
|
|
26
|
+
"bodyFile": "skills/social_media_trend.md"
|
|
27
27
|
},
|
|
28
28
|
{
|
|
29
|
-
"id": "
|
|
29
|
+
"id": "social_media_controversy",
|
|
30
30
|
"kind": "skill",
|
|
31
31
|
"description": "Worker skill that analyzes raw social media data to identify polarizing topics and produce a controversy map with strict JSON output.",
|
|
32
|
-
"bodyFile": "skills/
|
|
32
|
+
"bodyFile": "skills/social_media_controversy.md"
|
|
33
33
|
},
|
|
34
34
|
{
|
|
35
|
-
"id": "
|
|
35
|
+
"id": "social_media_discovery",
|
|
36
36
|
"kind": "skill",
|
|
37
37
|
"description": "Worker skill that analyzes raw social media data to discover and cluster high-signal emerging topics.",
|
|
38
|
-
"bodyFile": "skills/
|
|
38
|
+
"bodyFile": "skills/social_media_discovery.md"
|
|
39
39
|
},
|
|
40
40
|
{
|
|
41
|
-
"id": "
|
|
41
|
+
"id": "social_media_visualize",
|
|
42
42
|
"kind": "skill",
|
|
43
43
|
"description": "Worker skill that launches a local web dashboard to visualize all available classified research data.",
|
|
44
|
-
"bodyFile": "skills/
|
|
44
|
+
"bodyFile": "skills/social_media_visualize.md"
|
|
45
45
|
},
|
|
46
46
|
{
|
|
47
|
-
"id": "
|
|
47
|
+
"id": "social_media_research_test",
|
|
48
48
|
"kind": "skill",
|
|
49
|
-
"description": "Test the
|
|
50
|
-
"bodyFile": "skills/
|
|
49
|
+
"description": "Test the social media research skill with fixed Reddit links (no API key needed). Fetches data and returns JSON for AI classification.",
|
|
50
|
+
"bodyFile": "skills/social_media_research_test.md"
|
|
51
51
|
},
|
|
52
52
|
{
|
|
53
|
-
"id": "
|
|
53
|
+
"id": "using_social_media_research",
|
|
54
54
|
"kind": "skill",
|
|
55
|
-
"description": "
|
|
56
|
-
"bodyFile": "skills/
|
|
55
|
+
"description": "Entrypoint router for social-media research questions. Use when user asks what Reddit/X users think (best/top, compare, sentiment, trend, controversy, discovery, quick summary, full analysis) and route to the correct worker + fetch strategy.",
|
|
56
|
+
"bodyFile": "skills/using_social_media_research.md"
|
|
57
57
|
},
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
58
|
+
{
|
|
59
|
+
"id": "research",
|
|
60
|
+
"kind": "command",
|
|
61
|
+
"description": "Research a topic using the Quick Answer flow.",
|
|
62
|
+
"bodyFile": "commands/research.md"
|
|
63
63
|
},
|
|
64
64
|
{
|
|
65
65
|
"id": "quick",
|
|
@@ -114,6 +114,6 @@
|
|
|
114
114
|
"kind": "command",
|
|
115
115
|
"description": "Run fixed-link test research pipeline.",
|
|
116
116
|
"bodyFile": "commands/test-research.md"
|
|
117
|
-
|
|
118
|
-
|
|
117
|
+
}
|
|
118
|
+
]
|
|
119
119
|
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: social_media_controversy
|
|
3
|
+
description: Analyze existing Reddit/X raw data to identify divisive topics and generate `classified_controversy.json` with strict `ControversyData` output. Use for debate, disagreement, or polarizing-topic requests.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Social Media Controversy Skill
|
|
7
|
+
|
|
8
|
+
This worker maps where social media opinions conflict and presents both sides with evidence.
|
|
9
|
+
|
|
10
|
+
## Required Inputs
|
|
11
|
+
|
|
12
|
+
Use existing raw files only:
|
|
13
|
+
|
|
14
|
+
- `reddit_data.json`
|
|
15
|
+
- `x_data.json`
|
|
16
|
+
|
|
17
|
+
At least one valid source file must exist.
|
|
18
|
+
|
|
19
|
+
## Step 1: Preflight Validation
|
|
20
|
+
|
|
21
|
+
1. Parse each available source file.
|
|
22
|
+
2. Confirm top-level `items` arrays.
|
|
23
|
+
3. Skip malformed records and track skipped count.
|
|
24
|
+
4. Stop if no usable discussion text remains.
|
|
25
|
+
|
|
26
|
+
## Step 2: Lock Output Schema
|
|
27
|
+
|
|
28
|
+
Read `web/src/types.ts` and treat `ControversyData` as source of truth.
|
|
29
|
+
|
|
30
|
+
Required top-level fields:
|
|
31
|
+
|
|
32
|
+
- `topic`
|
|
33
|
+
- `overall_divisiveness`
|
|
34
|
+
- `controversies`
|
|
35
|
+
|
|
36
|
+
## Step 3: Identify Genuine Controversies
|
|
37
|
+
|
|
38
|
+
Find 2-5 topics with clear opposing viewpoints.
|
|
39
|
+
|
|
40
|
+
Good signals:
|
|
41
|
+
|
|
42
|
+
- explicit disagreement language
|
|
43
|
+
- conflicting claims about the same product/theme
|
|
44
|
+
- platform splits (Reddit vs X)
|
|
45
|
+
- high-engagement threads arguing opposite positions
|
|
46
|
+
|
|
47
|
+
Do not manufacture controversy where consensus is strong.
|
|
48
|
+
|
|
49
|
+
## Step 4: Structure Each Controversy
|
|
50
|
+
|
|
51
|
+
For each controversy:
|
|
52
|
+
|
|
53
|
+
- `topic`: concise debate label
|
|
54
|
+
- `heat_score`: `0-100` intensity score based on volume + engagement + disagreement strength
|
|
55
|
+
- `divisiveness`: `"Low" | "Medium" | "High"`
|
|
56
|
+
- `side_a` and `side_b` each with:
|
|
57
|
+
- `position`
|
|
58
|
+
- `supporter_count` (best estimate; `0` if unknown)
|
|
59
|
+
- `sample_quotes` (up to 3 real quotes)
|
|
60
|
+
|
|
61
|
+
Quote rules:
|
|
62
|
+
|
|
63
|
+
- Use real text, real author, real link.
|
|
64
|
+
- Prefer diverse arguments per side (not duplicates).
|
|
65
|
+
- If limited evidence exists, include fewer quotes instead of fabricating.
|
|
66
|
+
|
|
67
|
+
## Step 5: Set Overall Divisiveness
|
|
68
|
+
|
|
69
|
+
Set `overall_divisiveness` from the full set:
|
|
70
|
+
|
|
71
|
+
- `"High"` if multiple high-heat, strongly split controversies exist
|
|
72
|
+
- `"Medium"` for mixed or moderate splits
|
|
73
|
+
- `"Low"` if disagreements are minor or sparse
|
|
74
|
+
|
|
75
|
+
## Step 6: Write Output
|
|
76
|
+
|
|
77
|
+
Save strict JSON to:
|
|
78
|
+
|
|
79
|
+
- `classified_controversy.json`
|
|
80
|
+
|
|
81
|
+
## Final Validation Checklist
|
|
82
|
+
|
|
83
|
+
- JSON parse succeeds.
|
|
84
|
+
- Object matches `ControversyData` shape.
|
|
85
|
+
- Enum values use allowed casing exactly.
|
|
86
|
+
- Array fields are arrays (never null/undefined).
|
|
87
|
+
- Every quote references real source evidence.
|
|
88
|
+
|
|
89
|
+
## Critical Rules
|
|
90
|
+
|
|
91
|
+
1. **No external fetch**: analyze existing data only.
|
|
92
|
+
2. **No fabricated arguments**: only report controversies present in source text.
|
|
93
|
+
3. **No fabricated citations**: quote text, author, and link must be real.
|
|
94
|
+
4. **Schema strictness**: if instructions conflict with `types.ts`, `types.ts` wins.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: social_media_discovery
|
|
3
|
+
description: Analyze existing Reddit/X raw data to find emerging or viral themes and generate `classified_discovery.json` with strict `DiscoveryData` output.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Social Media Discovery Skill
|
|
7
|
+
|
|
8
|
+
This worker clusters noisy social discussions into trend themes that can be visualized.
|
|
9
|
+
|
|
10
|
+
## Required Inputs
|
|
11
|
+
|
|
12
|
+
Use existing raw files only:
|
|
13
|
+
|
|
14
|
+
- `reddit_data.json`
|
|
15
|
+
- `x_data.json`
|
|
16
|
+
|
|
17
|
+
At least one valid source file must exist.
|
|
18
|
+
|
|
19
|
+
## Step 1: Preflight Validation
|
|
20
|
+
|
|
21
|
+
1. Parse each available source file.
|
|
22
|
+
2. Confirm top-level `items` arrays.
|
|
23
|
+
3. Skip malformed records and track skipped count.
|
|
24
|
+
4. Stop if no usable posts remain.
|
|
25
|
+
|
|
26
|
+
## Step 2: Lock Schema
|
|
27
|
+
|
|
28
|
+
Read `web/src/types.ts` and follow `DiscoveryData` and `DiscoveryTopic` exactly.
|
|
29
|
+
|
|
30
|
+
Important enum constraints:
|
|
31
|
+
|
|
32
|
+
- `sentiment`: `"positive" | "negative" | "neutral" | "mixed"`
|
|
33
|
+
- `platform`: `"reddit" | "x"`
|
|
34
|
+
|
|
35
|
+
## Step 3: Cluster Topics
|
|
36
|
+
|
|
37
|
+
Create 3-8 meaningful topic clusters from actual post content.
|
|
38
|
+
|
|
39
|
+
- Merge near-duplicate themes.
|
|
40
|
+
- Ignore clear spam/noise.
|
|
41
|
+
- Prefer clusters with both relevance and engagement.
|
|
42
|
+
|
|
43
|
+
Each cluster should have:
|
|
44
|
+
|
|
45
|
+
- `id` (stable slug-like id)
|
|
46
|
+
- `topic_name`
|
|
47
|
+
- `description`
|
|
48
|
+
- `category`
|
|
49
|
+
|
|
50
|
+
## Step 4: Compute Topic Metrics
|
|
51
|
+
|
|
52
|
+
For each cluster:
|
|
53
|
+
|
|
54
|
+
- `engagement_score` = sum of per-item engagement where item engagement =
|
|
55
|
+
`max(score, likes, upvotes, 0) + comments + shares`
|
|
56
|
+
- `sentiment` using allowed discovery sentiment enum
|
|
57
|
+
|
|
58
|
+
Also compute top-level:
|
|
59
|
+
|
|
60
|
+
- `total_posts_analyzed`
|
|
61
|
+
- `period` (always string, derived from available date range)
|
|
62
|
+
|
|
63
|
+
## Step 5: Attach Evidence Content
|
|
64
|
+
|
|
65
|
+
For each topic include:
|
|
66
|
+
|
|
67
|
+
- `key_posts` (high-signal posts, preferably 1-3 entries)
|
|
68
|
+
- `highlight_comments` (up to 3 real excerpts with author/link/platform)
|
|
69
|
+
|
|
70
|
+
Use real source text only. If evidence is limited, include fewer entries instead of fabricating.
|
|
71
|
+
|
|
72
|
+
## Step 6: Write Output
|
|
73
|
+
|
|
74
|
+
Save strict JSON to:
|
|
75
|
+
|
|
76
|
+
- `classified_discovery.json`
|
|
77
|
+
|
|
78
|
+
## Final Validation Checklist
|
|
79
|
+
|
|
80
|
+
- JSON parse succeeds.
|
|
81
|
+
- Output matches `DiscoveryData` shape.
|
|
82
|
+
- All enum values are valid and correctly cased.
|
|
83
|
+
- Array fields are arrays (never null/undefined).
|
|
84
|
+
- Every quote/comment/link is traceable to raw input data.
|
|
85
|
+
|
|
86
|
+
## Critical Rules
|
|
87
|
+
|
|
88
|
+
1. **No external fetch**: do not run data collection here.
|
|
89
|
+
2. **No fabricated clusters or quotes**: everything must map to real evidence.
|
|
90
|
+
3. **Schema strictness**: `types.ts` is authoritative.
|
|
91
|
+
4. **Graceful fallback**: use empty arrays for missing optional evidence; never invent content.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: social_media_fetch
|
|
3
|
+
description: Worker skill that fetches raw discussion data from Reddit and X into `reddit_data.json` and `x_data.json`. Use before running rank, sentiment, trend, controversy, or discovery analysis.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Social Media Fetch Skill
|
|
7
|
+
|
|
8
|
+
This worker is the data-ingestion step for the pipeline. It fetches raw social data only and does not classify or analyze it.
|
|
9
|
+
|
|
10
|
+
## Inputs and Outputs
|
|
11
|
+
|
|
12
|
+
- **Input**: User topic and optional filters (`source`, `from/to`, `mode`)
|
|
13
|
+
- **Output files** (project root):
|
|
14
|
+
- `reddit_data.json`
|
|
15
|
+
- `x_data.json`
|
|
16
|
+
|
|
17
|
+
At least one output file must be produced for a successful fetch.
|
|
18
|
+
|
|
19
|
+
## Step 1: Choose Fetch Mode
|
|
20
|
+
|
|
21
|
+
- **Quick mode** (faster, lighter coverage):
|
|
22
|
+
- `sc-research research "TOPIC"`
|
|
23
|
+
- **Deep mode** (default for analysis workflows):
|
|
24
|
+
- `sc-research research:deep "TOPIC"`
|
|
25
|
+
- **Discovery mode** (theme clustering data):
|
|
26
|
+
- `sc-research research:deep "TOPIC" --mode=discovery`
|
|
27
|
+
|
|
28
|
+
## Step 2: Build Command with Optional Flags
|
|
29
|
+
|
|
30
|
+
Use flags only when requested:
|
|
31
|
+
|
|
32
|
+
- `--source=reddit|x|both`
|
|
33
|
+
- `--from=YYYY-MM-DD --to=YYYY-MM-DD`
|
|
34
|
+
- `--mode=discovery`
|
|
35
|
+
|
|
36
|
+
Examples:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
sc-research research:deep "wireless earbuds"
|
|
40
|
+
sc-research research:deep "wireless earbuds" --source=reddit --from=2025-01-01 --to=2025-12-31
|
|
41
|
+
sc-research research:deep "wireless earbuds" --mode=discovery --source=both
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Step 3: Validate Fetch Results
|
|
45
|
+
|
|
46
|
+
After running the command, verify each produced source file:
|
|
47
|
+
|
|
48
|
+
1. File exists.
|
|
49
|
+
2. JSON is parseable.
|
|
50
|
+
3. Top-level `items` exists and is an array.
|
|
51
|
+
4. `query` and `dateRange` are present.
|
|
52
|
+
5. Items include usable fields (`text`, `author`, `url`, `date`, `engagement`).
|
|
53
|
+
|
|
54
|
+
If a source was explicitly requested but its file is missing or malformed, report the failure clearly.
|
|
55
|
+
|
|
56
|
+
## Step 4: Return a Fetch Summary
|
|
57
|
+
|
|
58
|
+
Return:
|
|
59
|
+
|
|
60
|
+
- topic
|
|
61
|
+
- selected mode
|
|
62
|
+
- selected sources
|
|
63
|
+
- date range used
|
|
64
|
+
- item count per source
|
|
65
|
+
- any missing-source or partial-result warnings
|
|
66
|
+
|
|
67
|
+
## Critical Rules
|
|
68
|
+
|
|
69
|
+
1. **No analysis here**: do not rank/classify in this skill.
|
|
70
|
+
2. **No fabricated data**: do not create synthetic posts to fill gaps.
|
|
71
|
+
3. **Prefer deep mode for analysis pipelines**: quick mode is for explicit quick-answer requests.
|
|
72
|
+
4. **Fail loudly on malformed output**: do not continue as if fetch succeeded when validation fails.
|
|
73
|
+
|
|
74
|
+
## Error Handling
|
|
75
|
+
|
|
76
|
+
| Scenario | Symptom | Action |
|
|
77
|
+
|---|---|---|
|
|
78
|
+
| Missing `OPENAI_API_KEY` | Auth failure on Reddit fetch | Set valid `OPENAI_API_KEY` in `.sc-research` |
|
|
79
|
+
| Missing `XAI_API_KEY` | X file missing/empty while Reddit succeeds | Set `XAI_API_KEY` in `.sc-research` or continue Reddit-only |
|
|
80
|
+
| No relevant results | `items` is empty | Broaden topic keywords and retry |
|
|
81
|
+
| Rate limit / transient API failure | Timeout or provider error | Wait, then retry once with same parameters |
|
|
82
|
+
| Malformed output | JSON parse failure or missing `items` | Re-run fetch; if repeated, report failure explicitly |
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: social_media_rank
|
|
3
|
+
description: Analyze existing Reddit/X raw data and generate `classified_rank.json` using the strict `ClassifiedData` schema. Use for ranking, best-of, compare, or recommendation requests.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Social Media Ranking Skill
|
|
7
|
+
|
|
8
|
+
This worker converts raw discussion data into a ranked report suitable for the dashboard.
|
|
9
|
+
|
|
10
|
+
## Required Inputs
|
|
11
|
+
|
|
12
|
+
Use existing files only:
|
|
13
|
+
|
|
14
|
+
- `reddit_data.json`
|
|
15
|
+
- `x_data.json`
|
|
16
|
+
|
|
17
|
+
At least one valid source file must exist.
|
|
18
|
+
|
|
19
|
+
## Step 1: Preflight Validation
|
|
20
|
+
|
|
21
|
+
Before analysis:
|
|
22
|
+
|
|
23
|
+
1. Confirm at least one input file exists.
|
|
24
|
+
2. Parse each existing file as JSON.
|
|
25
|
+
3. Confirm top-level `items` is an array.
|
|
26
|
+
4. Ignore malformed items, but keep count of dropped items for transparency.
|
|
27
|
+
|
|
28
|
+
If both sources are missing or invalid, stop and report the failure.
|
|
29
|
+
|
|
30
|
+
## Step 2: Lock Output Schema
|
|
31
|
+
|
|
32
|
+
Read `web/src/types.ts` and treat it as source of truth.
|
|
33
|
+
|
|
34
|
+
- Output type must match `ClassifiedData`.
|
|
35
|
+
- Product entries must match `Product`.
|
|
36
|
+
- If this file and `types.ts` conflict, `types.ts` wins.
|
|
37
|
+
|
|
38
|
+
## Step 3: Build Product Candidates
|
|
39
|
+
|
|
40
|
+
From raw text, identify products/topics that are actually discussed.
|
|
41
|
+
|
|
42
|
+
- Prefer candidates with repeated mentions across multiple posts.
|
|
43
|
+
- Merge obvious aliases into a single canonical product name.
|
|
44
|
+
- Keep the final ranked set to 1-5 products.
|
|
45
|
+
|
|
46
|
+
## Step 4: Rank with Consistent Signals
|
|
47
|
+
|
|
48
|
+
For each candidate product:
|
|
49
|
+
|
|
50
|
+
- **mentions**: count relevant references across items.
|
|
51
|
+
- **estimated_engagement_score**: sum per-item engagement where item engagement = `max(score, likes, upvotes, 0) + comments + shares`.
|
|
52
|
+
- **sentiment**: one of `"Very Positive" | "Positive" | "Mixed" | "Negative"`.
|
|
53
|
+
- **consensus**: short evidence-based summary sentence.
|
|
54
|
+
- **pros/cons**: distilled from real discussion content.
|
|
55
|
+
|
|
56
|
+
Sort by strongest combined community support (mentions + engagement + sentiment quality), then assign contiguous ranks starting at 1.
|
|
57
|
+
|
|
58
|
+
## Step 5: Extract Highlight Quotes
|
|
59
|
+
|
|
60
|
+
For each ranked product, include up to 3 real quotes:
|
|
61
|
+
|
|
62
|
+
- Quote must come from raw data text.
|
|
63
|
+
- Include real `author` and real `link`.
|
|
64
|
+
- Set `context` to `"pro"`, `"con"`, or `"general"`.
|
|
65
|
+
- Prefer a balanced set of contexts when evidence exists.
|
|
66
|
+
|
|
67
|
+
Do not fabricate quotes. If fewer than 3 valid quotes exist, include fewer.
|
|
68
|
+
|
|
69
|
+
## Step 6: Write Output
|
|
70
|
+
|
|
71
|
+
Save final JSON to:
|
|
72
|
+
|
|
73
|
+
- `classified_rank.json`
|
|
74
|
+
|
|
75
|
+
Required top-level fields:
|
|
76
|
+
|
|
77
|
+
- `topic`
|
|
78
|
+
- `products`
|
|
79
|
+
- `key_insights`
|
|
80
|
+
|
|
81
|
+
## Final Validation Checklist
|
|
82
|
+
|
|
83
|
+
- JSON is parseable.
|
|
84
|
+
- `products` is an array with rank values `1..N` and no duplicates.
|
|
85
|
+
- Every product includes all required fields from `Product`.
|
|
86
|
+
- `key_insights` is non-empty and based on observed evidence.
|
|
87
|
+
- No null for array fields (`pros`, `cons`, `highlight_quotes`).
|
|
88
|
+
|
|
89
|
+
## Critical Rules
|
|
90
|
+
|
|
91
|
+
1. **No external fetch**: do not run new research in this skill.
|
|
92
|
+
2. **Schema strictness**: dashboard expects `ClassifiedData` shape exactly.
|
|
93
|
+
3. **Evidence-first ranking**: never rank based on assumptions alone.
|
|
94
|
+
4. **No fabricated citations**: quotes, authors, and links must be real.
|