sc-research 1.0.3 โ†’ 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/CLAUDE.md +11 -11
  2. package/README.md +3 -1
  3. package/dist/cli.js +7 -4
  4. package/package.json +8 -6
  5. package/templates/base/commands/controversy.md +1 -1
  6. package/templates/base/commands/deep-research.md +1 -1
  7. package/templates/base/commands/discovery.md +1 -1
  8. package/templates/base/commands/rank.md +1 -1
  9. package/templates/base/commands/sentiment.md +1 -1
  10. package/templates/base/commands/trend.md +1 -1
  11. package/templates/base/manifest.json +34 -34
  12. package/templates/base/skills/social_media_controversy.md +94 -0
  13. package/templates/base/skills/social_media_discovery.md +91 -0
  14. package/templates/base/skills/social_media_fetch.md +82 -0
  15. package/templates/base/skills/social_media_rank.md +94 -0
  16. package/templates/base/skills/social_media_research_test.md +52 -0
  17. package/templates/base/skills/social_media_sentiment.md +93 -0
  18. package/templates/base/skills/social_media_trend.md +87 -0
  19. package/templates/base/skills/social_media_visualize.md +63 -0
  20. package/templates/base/skills/using_social_media_research.md +115 -0
  21. package/templates/base/skills/communities_controversy.md +0 -65
  22. package/templates/base/skills/communities_discovery.md +0 -55
  23. package/templates/base/skills/communities_fetch.md +0 -56
  24. package/templates/base/skills/communities_rank.md +0 -57
  25. package/templates/base/skills/communities_research_test.md +0 -64
  26. package/templates/base/skills/communities_sentiment.md +0 -61
  27. package/templates/base/skills/communities_trend.md +0 -71
  28. package/templates/base/skills/communities_visualize.md +0 -46
  29. package/templates/base/skills/using_communities_research.md +0 -146
package/CLAUDE.md CHANGED
@@ -1,4 +1,4 @@
1
- # Communities Research Skill
1
+ # Social Media Research Skill
2
2
 
3
3
  > **"The Skill provides the catch; the Agent cooks the meal."**
4
4
 
@@ -15,7 +15,7 @@ This project is a headless social media research tool designed for **Claude Code
15
15
  This project uses `bun`.
16
16
 
17
17
  ### Environment Variables
18
- Required in `.env`:
18
+ Required in `.sc-research`:
19
19
  - `OPENAI_API_KEY`: For Reddit URL discovery and general LLM ops
20
20
  - `XAI_API_KEY`: (Optional) For X/Twitter data
21
21
 
@@ -38,16 +38,16 @@ Required in `.env`:
38
38
 
39
39
  ## ๐Ÿ—๏ธ Architecture
40
40
 
41
- - **Orchestrator**: `using_communities_research` (The routing brain)
41
+ - **Orchestrator**: `using_social_media_research` (The routing brain)
42
42
  - **Workers**:
43
- - `communities_fetch` (Data provider)
44
- - `communities_rank` (Ranking analysis)
45
- - `communities_sentiment` (Sentiment analysis)
46
- - `communities_trend` (Timeline analysis)
47
- - `communities_controversy` (Debate analysis)
48
- - `communities_discovery` (Viral topic clustering)
49
- - `communities_visualize` (Dashboard launcher)
50
- - `communities_research_test` (Legacy/fixed-link test helper)
43
+ - `social_media_fetch` (Data provider)
44
+ - `social_media_rank` (Ranking analysis)
45
+ - `social_media_sentiment` (Sentiment analysis)
46
+ - `social_media_trend` (Timeline analysis)
47
+ - `social_media_controversy` (Debate analysis)
48
+ - `social_media_discovery` (Viral topic clustering)
49
+ - `social_media_visualize` (Dashboard launcher)
50
+ - `social_media_research_test` (Legacy/fixed-link test helper)
51
51
 
52
52
  ## ๐Ÿงช Testing Rules
53
53
 
package/README.md CHANGED
@@ -99,13 +99,15 @@ console.log(results);
99
99
 
100
100
  ## Configuration
101
101
 
102
- Set the following environment variables in your `.env` file:
102
+ Set the following environment variables in your `.sc-research` file:
103
103
 
104
104
  ```env
105
105
  OPENAI_API_KEY=sk-... # Required for Reddit (URL Discovery)
106
106
  XAI_API_KEY=... # Required for X (Twitter)
107
107
  ```
108
108
 
109
+ Optional override: set `SC_RESEARCH_ENV_FILE` to use a different env file path.
110
+
109
111
  ## License
110
112
 
111
113
  MIT
package/dist/cli.js CHANGED
@@ -401,7 +401,7 @@ async function runInit(opts) {
401
401
  logger.info(` Scripts overwritten: ${packageSummary.scriptsOverwritten}`);
402
402
  logger.info("");
403
403
  logger.info("Next steps:");
404
- logger.info(" 1) Ensure your project has required env vars set:");
404
+ logger.info(" 1) Ensure your project has required env vars set (for example in .sc-research):");
405
405
  logger.info(" - OPENAI_API_KEY");
406
406
  logger.info(" - XAI_API_KEY (optional, for X)");
407
407
  logger.info(" 2) You can now run from this project:");
@@ -492,6 +492,8 @@ function sanitizePackageName(name) {
492
492
  }
493
493
 
494
494
  // src/entries/cli.ts
495
+ var DEFAULT_ENV_FILE = ".sc-research";
496
+ var ENV_FILE_OVERRIDE_VAR = "SC_RESEARCH_ENV_FILE";
495
497
  async function main() {
496
498
  const args = process3.argv.slice(2);
497
499
  if (args.length === 1 && (args[0] === "--version" || args[0] === "-v") || args[0] === "version") {
@@ -627,7 +629,7 @@ async function runPackagedEntry(entryFile, args) {
627
629
  const logger = new Logger;
628
630
  const entryPath = resolvePackagedEntry(entryFile);
629
631
  const resolvedEntryPath = entryPath ?? logger.exitWithError(`Internal error: could not find packaged entry "${entryFile}". Rebuild/reinstall sc-research and retry.`);
630
- const dotEnvVars = loadDotEnv(process3.cwd());
632
+ const dotEnvVars = loadProjectEnv(process3.cwd());
631
633
  const result = spawnSync("node", [resolvedEntryPath, ...args], {
632
634
  stdio: "inherit",
633
635
  env: { ...dotEnvVars, ...process3.env }
@@ -651,8 +653,9 @@ function resolvePackagedEntry(entryFile) {
651
653
  }
652
654
  return null;
653
655
  }
654
- function loadDotEnv(cwd) {
655
- const envPath = path5.join(cwd, ".env");
656
+ function loadProjectEnv(cwd) {
657
+ const configuredPath = process3.env[ENV_FILE_OVERRIDE_VAR]?.trim();
658
+ const envPath = configuredPath ? path5.isAbsolute(configuredPath) ? configuredPath : path5.join(cwd, configuredPath) : path5.join(cwd, DEFAULT_ENV_FILE);
656
659
  if (!fs5.existsSync(envPath))
657
660
  return {};
658
661
  const out = {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sc-research",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "Headless Social Media Research Data Provider for AI Agents",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -15,11 +15,13 @@
15
15
  "sc-research": "dist/cli.js"
16
16
  },
17
17
  "scripts": {
18
- "research": "bun src/entries/research.ts",
19
- "research:deep": "bun src/entries/research.ts --depth=deep",
20
- "visualize": "bun src/entries/visualize.ts",
21
- "test:fixed": "bun src/test/manual-fixed-links.ts",
22
- "test:x": "bun src/test/manual-xai.ts",
18
+ "research": "bun --env-file=.sc-research src/entries/research.ts",
19
+ "research:deep": "bun --env-file=.sc-research src/entries/research.ts --depth=deep",
20
+ "init": "node dist/cli.js init",
21
+ "init:all": "node dist/cli.js init --ai all",
22
+ "visualize": "bun --env-file=.sc-research src/entries/visualize.ts",
23
+ "test:fixed": "bun --env-file=.sc-research src/test/manual-fixed-links.ts",
24
+ "test:x": "bun --env-file=.sc-research src/test/manual-xai.ts",
23
25
  "build:index": "bun build src/entries/research.ts --outfile dist/index.js --target node",
24
26
  "build:visualize": "bun build src/entries/visualize.ts --outdir dist --target node",
25
27
  "build:test:fixed": "bun build src/test/manual-fixed-links.ts --outdir dist/test --target node",
@@ -12,7 +12,7 @@ description: Find controversial debates in existing research data.
12
12
 
13
13
  3. Run the controversy skill
14
14
 
15
- > Use the `communities_controversy` skill to identify polarizing topics, extract opposing quotes, and generate `classified_controversy.json`.
15
+ > Use the `social_media_controversy` skill to identify polarizing topics, extract opposing quotes, and generate `classified_controversy.json`.
16
16
 
17
17
  4. Validate output schema
18
18
 
@@ -8,7 +8,7 @@ description: Deeply research a topic and route to the best analysis template. Th
8
8
 
9
9
  2. Route to the right analysis mode
10
10
 
11
- > Follow **Intent โ†’ Template Routing** rules in `using_communities_research`.
11
+ > Follow **Intent โ†’ Template Routing** rules in `using_social_media_research`.
12
12
  >
13
13
  > - If the user explicitly asks for "full analysis", "everything", or "all views": run all 4 templates in order (`rank` -> `sentiment` -> `trend` -> `controversy`).
14
14
  > - Otherwise: pick the SINGLE most suitable template for the user's question.
@@ -12,7 +12,7 @@ description: Discover viral topics and emerging themes from existing research da
12
12
 
13
13
  3. Run the discovery skill
14
14
 
15
- > Use the `communities_discovery` skill to cluster posts by topic and generate `classified_discovery.json`.
15
+ > Use the `social_media_discovery` skill to cluster posts by topic and generate `classified_discovery.json`.
16
16
 
17
17
  4. Validate output schema
18
18
 
@@ -12,7 +12,7 @@ description: Rank and classify existing research data.
12
12
 
13
13
  3. Run the ranking skill
14
14
 
15
- > Use the `communities_rank` skill to generate `classified_rank.json` from the existing data files.
15
+ > Use the `social_media_rank` skill to generate `classified_rank.json` from the existing data files.
16
16
 
17
17
  4. Validate output schema
18
18
 
@@ -12,7 +12,7 @@ description: Analyze sentiment from existing research data.
12
12
 
13
13
  3. Run the sentiment skill
14
14
 
15
- > Use the `communities_sentiment` skill to analyze the raw data and generate `classified_sentiment.json`.
15
+ > Use the `social_media_sentiment` skill to analyze the raw data and generate `classified_sentiment.json`.
16
16
 
17
17
  4. Validate output schema
18
18
 
@@ -12,7 +12,7 @@ description: Analyze discussion trends over time from existing research data.
12
12
 
13
13
  3. Run the trend skill
14
14
 
15
- > Use the `communities_trend` skill to parse dates, choose an adaptive granularity (day/week/month), and generate `classified_trend.json`.
15
+ > Use the `social_media_trend` skill to parse dates, choose an adaptive granularity (day/week/month), and generate `classified_trend.json`.
16
16
 
17
17
  4. Validate output schema
18
18
 
@@ -1,65 +1,65 @@
1
1
  {
2
- "version": 1,
3
- "templates": [
4
- {
5
- "id": "communities_fetch",
6
- "kind": "skill",
7
- "description": "Worker skill that fetches raw discussion data from Reddit and X (Twitter) for a given topic. Returns raw JSON files.",
8
- "bodyFile": "skills/communities_fetch.md"
9
- },
10
- {
11
- "id": "communities_rank",
2
+ "version": 1,
3
+ "templates": [
4
+ {
5
+ "id": "social_media_fetch",
6
+ "kind": "skill",
7
+ "description": "Worker skill that fetches raw discussion data from Reddit and X (Twitter) for a given topic. Returns raw JSON files.",
8
+ "bodyFile": "skills/social_media_fetch.md"
9
+ },
10
+ {
11
+ "id": "social_media_rank",
12
12
  "kind": "skill",
13
13
  "description": "Analyze raw social media data (Reddit/X) to produce a ranked, classified report with strict JSON output.",
14
- "bodyFile": "skills/communities_rank.md"
14
+ "bodyFile": "skills/social_media_rank.md"
15
15
  },
16
16
  {
17
- "id": "communities_sentiment",
17
+ "id": "social_media_sentiment",
18
18
  "kind": "skill",
19
19
  "description": "Worker skill that analyzes raw social media data to produce a sentiment breakdown report with strict JSON output.",
20
- "bodyFile": "skills/communities_sentiment.md"
20
+ "bodyFile": "skills/social_media_sentiment.md"
21
21
  },
22
22
  {
23
- "id": "communities_trend",
23
+ "id": "social_media_trend",
24
24
  "kind": "skill",
25
25
  "description": "Worker skill that analyzes raw social media data to produce a trend timeline report with strict JSON output.",
26
- "bodyFile": "skills/communities_trend.md"
26
+ "bodyFile": "skills/social_media_trend.md"
27
27
  },
28
28
  {
29
- "id": "communities_controversy",
29
+ "id": "social_media_controversy",
30
30
  "kind": "skill",
31
31
  "description": "Worker skill that analyzes raw social media data to identify polarizing topics and produce a controversy map with strict JSON output.",
32
- "bodyFile": "skills/communities_controversy.md"
32
+ "bodyFile": "skills/social_media_controversy.md"
33
33
  },
34
34
  {
35
- "id": "communities_discovery",
35
+ "id": "social_media_discovery",
36
36
  "kind": "skill",
37
37
  "description": "Worker skill that analyzes raw social media data to discover and cluster high-signal emerging topics.",
38
- "bodyFile": "skills/communities_discovery.md"
38
+ "bodyFile": "skills/social_media_discovery.md"
39
39
  },
40
40
  {
41
- "id": "communities_visualize",
41
+ "id": "social_media_visualize",
42
42
  "kind": "skill",
43
43
  "description": "Worker skill that launches a local web dashboard to visualize all available classified research data.",
44
- "bodyFile": "skills/communities_visualize.md"
44
+ "bodyFile": "skills/social_media_visualize.md"
45
45
  },
46
46
  {
47
- "id": "communities_research_test",
47
+ "id": "social_media_research_test",
48
48
  "kind": "skill",
49
- "description": "Test the communities research skill with fixed Reddit links (no API key needed). Fetches data and returns JSON for AI classification.",
50
- "bodyFile": "skills/communities_research_test.md"
49
+ "description": "Test the social media research skill with fixed Reddit links (no API key needed). Fetches data and returns JSON for AI classification.",
50
+ "bodyFile": "skills/social_media_research_test.md"
51
51
  },
52
52
  {
53
- "id": "using_communities_research",
53
+ "id": "using_social_media_research",
54
54
  "kind": "skill",
55
- "description": "Orchestrator skill that understands user intent and routes to the most suitable analysis template.",
56
- "bodyFile": "skills/using_communities_research.md"
55
+ "description": "Entrypoint router for social-media research questions. Use when user asks what Reddit/X users think (best/top, compare, sentiment, trend, controversy, discovery, quick summary, full analysis) and route to the correct worker + fetch strategy.",
56
+ "bodyFile": "skills/using_social_media_research.md"
57
57
  },
58
- {
59
- "id": "research",
60
- "kind": "command",
61
- "description": "Research a topic using the Quick Answer flow.",
62
- "bodyFile": "commands/research.md"
58
+ {
59
+ "id": "research",
60
+ "kind": "command",
61
+ "description": "Research a topic using the Quick Answer flow.",
62
+ "bodyFile": "commands/research.md"
63
63
  },
64
64
  {
65
65
  "id": "quick",
@@ -114,6 +114,6 @@
114
114
  "kind": "command",
115
115
  "description": "Run fixed-link test research pipeline.",
116
116
  "bodyFile": "commands/test-research.md"
117
- }
118
- ]
117
+ }
118
+ ]
119
119
  }
@@ -0,0 +1,94 @@
1
+ ---
2
+ name: social_media_controversy
3
+ description: Analyze existing Reddit/X raw data to identify divisive topics and generate `classified_controversy.json` with strict `ControversyData` output. Use for debate, disagreement, or polarizing-topic requests.
4
+ ---
5
+
6
+ # Social Media Controversy Skill
7
+
8
+ This worker maps where social media opinions conflict and presents both sides with evidence.
9
+
10
+ ## Required Inputs
11
+
12
+ Use existing raw files only:
13
+
14
+ - `reddit_data.json`
15
+ - `x_data.json`
16
+
17
+ At least one valid source file must exist.
18
+
19
+ ## Step 1: Preflight Validation
20
+
21
+ 1. Parse each available source file.
22
+ 2. Confirm top-level `items` arrays.
23
+ 3. Skip malformed records and track skipped count.
24
+ 4. Stop if no usable discussion text remains.
25
+
26
+ ## Step 2: Lock Output Schema
27
+
28
+ Read `web/src/types.ts` and treat `ControversyData` as source of truth.
29
+
30
+ Required top-level fields:
31
+
32
+ - `topic`
33
+ - `overall_divisiveness`
34
+ - `controversies`
35
+
36
+ ## Step 3: Identify Genuine Controversies
37
+
38
+ Find 2-5 topics with clear opposing viewpoints.
39
+
40
+ Good signals:
41
+
42
+ - explicit disagreement language
43
+ - conflicting claims about the same product/theme
44
+ - platform splits (Reddit vs X)
45
+ - high-engagement threads arguing opposite positions
46
+
47
+ Do not manufacture controversy where consensus is strong.
48
+
49
+ ## Step 4: Structure Each Controversy
50
+
51
+ For each controversy:
52
+
53
+ - `topic`: concise debate label
54
+ - `heat_score`: `0-100` intensity score based on volume + engagement + disagreement strength
55
+ - `divisiveness`: `"Low" | "Medium" | "High"`
56
+ - `side_a` and `side_b` each with:
57
+ - `position`
58
+ - `supporter_count` (best estimate; `0` if unknown)
59
+ - `sample_quotes` (up to 3 real quotes)
60
+
61
+ Quote rules:
62
+
63
+ - Use real text, real author, real link.
64
+ - Prefer diverse arguments per side (not duplicates).
65
+ - If limited evidence exists, include fewer quotes instead of fabricating.
66
+
67
+ ## Step 5: Set Overall Divisiveness
68
+
69
+ Set `overall_divisiveness` from the full set:
70
+
71
+ - `"High"` if multiple high-heat, strongly split controversies exist
72
+ - `"Medium"` for mixed or moderate splits
73
+ - `"Low"` if disagreements are minor or sparse
74
+
75
+ ## Step 6: Write Output
76
+
77
+ Save strict JSON to:
78
+
79
+ - `classified_controversy.json`
80
+
81
+ ## Final Validation Checklist
82
+
83
+ - JSON parse succeeds.
84
+ - Object matches `ControversyData` shape.
85
+ - Enum values use allowed casing exactly.
86
+ - Array fields are arrays (never null/undefined).
87
+ - Every quote references real source evidence.
88
+
89
+ ## Critical Rules
90
+
91
+ 1. **No external fetch**: analyze existing data only.
92
+ 2. **No fabricated arguments**: only report controversies present in source text.
93
+ 3. **No fabricated citations**: quote text, author, and link must be real.
94
+ 4. **Schema strictness**: if instructions conflict with `types.ts`, `types.ts` wins.
@@ -0,0 +1,91 @@
1
+ ---
2
+ name: social_media_discovery
3
+ description: Analyze existing Reddit/X raw data to find emerging or viral themes and generate `classified_discovery.json` with strict `DiscoveryData` output.
4
+ ---
5
+
6
+ # Social Media Discovery Skill
7
+
8
+ This worker clusters noisy social discussions into trend themes that can be visualized.
9
+
10
+ ## Required Inputs
11
+
12
+ Use existing raw files only:
13
+
14
+ - `reddit_data.json`
15
+ - `x_data.json`
16
+
17
+ At least one valid source file must exist.
18
+
19
+ ## Step 1: Preflight Validation
20
+
21
+ 1. Parse each available source file.
22
+ 2. Confirm top-level `items` arrays.
23
+ 3. Skip malformed records and track skipped count.
24
+ 4. Stop if no usable posts remain.
25
+
26
+ ## Step 2: Lock Schema
27
+
28
+ Read `web/src/types.ts` and follow `DiscoveryData` and `DiscoveryTopic` exactly.
29
+
30
+ Important enum constraints:
31
+
32
+ - `sentiment`: `"positive" | "negative" | "neutral" | "mixed"`
33
+ - `platform`: `"reddit" | "x"`
34
+
35
+ ## Step 3: Cluster Topics
36
+
37
+ Create 3-8 meaningful topic clusters from actual post content.
38
+
39
+ - Merge near-duplicate themes.
40
+ - Ignore clear spam/noise.
41
+ - Prefer clusters with both relevance and engagement.
42
+
43
+ Each cluster should have:
44
+
45
+ - `id` (stable slug-like id)
46
+ - `topic_name`
47
+ - `description`
48
+ - `category`
49
+
50
+ ## Step 4: Compute Topic Metrics
51
+
52
+ For each cluster:
53
+
54
+ - `engagement_score` = sum of per-item engagement where item engagement =
55
+ `max(score, likes, upvotes, 0) + comments + shares`
56
+ - `sentiment` using allowed discovery sentiment enum
57
+
58
+ Also compute top-level:
59
+
60
+ - `total_posts_analyzed`
61
+ - `period` (always string, derived from available date range)
62
+
63
+ ## Step 5: Attach Evidence Content
64
+
65
+ For each topic include:
66
+
67
+ - `key_posts` (high-signal posts, preferably 1-3 entries)
68
+ - `highlight_comments` (up to 3 real excerpts with author/link/platform)
69
+
70
+ Use real source text only. If evidence is limited, include fewer entries instead of fabricating.
71
+
72
+ ## Step 6: Write Output
73
+
74
+ Save strict JSON to:
75
+
76
+ - `classified_discovery.json`
77
+
78
+ ## Final Validation Checklist
79
+
80
+ - JSON parse succeeds.
81
+ - Output matches `DiscoveryData` shape.
82
+ - All enum values are valid and correctly cased.
83
+ - Array fields are arrays (never null/undefined).
84
+ - Every quote/comment/link is traceable to raw input data.
85
+
86
+ ## Critical Rules
87
+
88
+ 1. **No external fetch**: do not run data collection here.
89
+ 2. **No fabricated clusters or quotes**: everything must map to real evidence.
90
+ 3. **Schema strictness**: `types.ts` is authoritative.
91
+ 4. **Graceful fallback**: use empty arrays for missing optional evidence; never invent content.
@@ -0,0 +1,82 @@
1
+ ---
2
+ name: social_media_fetch
3
+ description: Worker skill that fetches raw discussion data from Reddit and X into `reddit_data.json` and `x_data.json`. Use before running rank, sentiment, trend, controversy, or discovery analysis.
4
+ ---
5
+
6
+ # Social Media Fetch Skill
7
+
8
+ This worker is the data-ingestion step for the pipeline. It fetches raw social data only and does not classify or analyze it.
9
+
10
+ ## Inputs and Outputs
11
+
12
+ - **Input**: User topic and optional filters (`source`, `from/to`, `mode`)
13
+ - **Output files** (project root):
14
+ - `reddit_data.json`
15
+ - `x_data.json`
16
+
17
+ At least one output file must be produced for a successful fetch.
18
+
19
+ ## Step 1: Choose Fetch Mode
20
+
21
+ - **Quick mode** (faster, lighter coverage):
22
+ - `sc-research research "TOPIC"`
23
+ - **Deep mode** (default for analysis workflows):
24
+ - `sc-research research:deep "TOPIC"`
25
+ - **Discovery mode** (theme clustering data):
26
+ - `sc-research research:deep "TOPIC" --mode=discovery`
27
+
28
+ ## Step 2: Build Command with Optional Flags
29
+
30
+ Use flags only when requested:
31
+
32
+ - `--source=reddit|x|both`
33
+ - `--from=YYYY-MM-DD --to=YYYY-MM-DD`
34
+ - `--mode=discovery`
35
+
36
+ Examples:
37
+
38
+ ```bash
39
+ sc-research research:deep "wireless earbuds"
40
+ sc-research research:deep "wireless earbuds" --source=reddit --from=2025-01-01 --to=2025-12-31
41
+ sc-research research:deep "wireless earbuds" --mode=discovery --source=both
42
+ ```
43
+
44
+ ## Step 3: Validate Fetch Results
45
+
46
+ After running the command, verify each produced source file:
47
+
48
+ 1. File exists.
49
+ 2. JSON is parseable.
50
+ 3. Top-level `items` exists and is an array.
51
+ 4. `query` and `dateRange` are present.
52
+ 5. Items include usable fields (`text`, `author`, `url`, `date`, `engagement`).
53
+
54
+ If a source was explicitly requested but its file is missing or malformed, report the failure clearly.
55
+
56
+ ## Step 4: Return a Fetch Summary
57
+
58
+ Return:
59
+
60
+ - topic
61
+ - selected mode
62
+ - selected sources
63
+ - date range used
64
+ - item count per source
65
+ - any missing-source or partial-result warnings
66
+
67
+ ## Critical Rules
68
+
69
+ 1. **No analysis here**: do not rank/classify in this skill.
70
+ 2. **No fabricated data**: do not create synthetic posts to fill gaps.
71
+ 3. **Prefer deep mode for analysis pipelines**: quick mode is for explicit quick-answer requests.
72
+ 4. **Fail loudly on malformed output**: do not continue as if fetch succeeded when validation fails.
73
+
74
+ ## Error Handling
75
+
76
+ | Scenario | Symptom | Action |
77
+ |---|---|---|
78
+ | Missing `OPENAI_API_KEY` | Auth failure on Reddit fetch | Set valid `OPENAI_API_KEY` in `.sc-research` |
79
+ | Missing `XAI_API_KEY` | X file missing/empty while Reddit succeeds | Set `XAI_API_KEY` in `.sc-research` or continue Reddit-only |
80
+ | No relevant results | `items` is empty | Broaden topic keywords and retry |
81
+ | Rate limit / transient API failure | Timeout or provider error | Wait, then retry once with same parameters |
82
+ | Malformed output | JSON parse failure or missing `items` | Re-run fetch; if repeated, report failure explicitly |
@@ -0,0 +1,94 @@
1
+ ---
2
+ name: social_media_rank
3
+ description: Analyze existing Reddit/X raw data and generate `classified_rank.json` using the strict `ClassifiedData` schema. Use for ranking, best-of, compare, or recommendation requests.
4
+ ---
5
+
6
+ # Social Media Ranking Skill
7
+
8
+ This worker converts raw discussion data into a ranked report suitable for the dashboard.
9
+
10
+ ## Required Inputs
11
+
12
+ Use existing files only:
13
+
14
+ - `reddit_data.json`
15
+ - `x_data.json`
16
+
17
+ At least one valid source file must exist.
18
+
19
+ ## Step 1: Preflight Validation
20
+
21
+ Before analysis:
22
+
23
+ 1. Confirm at least one input file exists.
24
+ 2. Parse each existing file as JSON.
25
+ 3. Confirm top-level `items` is an array.
26
+ 4. Ignore malformed items, but keep count of dropped items for transparency.
27
+
28
+ If both sources are missing or invalid, stop and report the failure.
29
+
30
+ ## Step 2: Lock Output Schema
31
+
32
+ Read `web/src/types.ts` and treat it as source of truth.
33
+
34
+ - Output type must match `ClassifiedData`.
35
+ - Product entries must match `Product`.
36
+ - If this file and `types.ts` conflict, `types.ts` wins.
37
+
38
+ ## Step 3: Build Product Candidates
39
+
40
+ From raw text, identify products/topics that are actually discussed.
41
+
42
+ - Prefer candidates with repeated mentions across multiple posts.
43
+ - Merge obvious aliases into a single canonical product name.
44
+ - Keep the final ranked set to 1-5 products.
45
+
46
+ ## Step 4: Rank with Consistent Signals
47
+
48
+ For each candidate product:
49
+
50
+ - **mentions**: count relevant references across items.
51
+ - **estimated_engagement_score**: sum per-item engagement where item engagement = `max(score, likes, upvotes, 0) + comments + shares`.
52
+ - **sentiment**: one of `"Very Positive" | "Positive" | "Mixed" | "Negative"`.
53
+ - **consensus**: short evidence-based summary sentence.
54
+ - **pros/cons**: distilled from real discussion content.
55
+
56
+ Sort by strongest combined community support (mentions + engagement + sentiment quality), then assign contiguous ranks starting at 1.
57
+
58
+ ## Step 5: Extract Highlight Quotes
59
+
60
+ For each ranked product, include up to 3 real quotes:
61
+
62
+ - Quote must come from raw data text.
63
+ - Include real `author` and real `link`.
64
+ - Set `context` to `"pro"`, `"con"`, or `"general"`.
65
+ - Prefer a balanced set of contexts when evidence exists.
66
+
67
+ Do not fabricate quotes. If fewer than 3 valid quotes exist, include fewer.
68
+
69
+ ## Step 6: Write Output
70
+
71
+ Save final JSON to:
72
+
73
+ - `classified_rank.json`
74
+
75
+ Required top-level fields:
76
+
77
+ - `topic`
78
+ - `products`
79
+ - `key_insights`
80
+
81
+ ## Final Validation Checklist
82
+
83
+ - JSON is parseable.
84
+ - `products` is an array with rank values `1..N` and no duplicates.
85
+ - Every product includes all required fields from `Product`.
86
+ - `key_insights` is non-empty and based on observed evidence.
87
+ - No null for array fields (`pros`, `cons`, `highlight_quotes`).
88
+
89
+ ## Critical Rules
90
+
91
+ 1. **No external fetch**: do not run new research in this skill.
92
+ 2. **Schema strictness**: dashboard expects `ClassifiedData` shape exactly.
93
+ 3. **Evidence-first ranking**: never rank based on assumptions alone.
94
+ 4. **No fabricated citations**: quotes, authors, and links must be real.