npm - sc-research - Versions diffs - 1.0.13 → 1.0.14 - Mend

sc-research 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +132 -63
package/package.json +1 -1
package/templates/base/commands/controversy.md +8 -20
package/templates/base/commands/deep-research.md +8 -19
package/templates/base/commands/discovery.md +10 -22
package/templates/base/commands/quick.md +6 -7
package/templates/base/commands/rank.md +8 -19
package/templates/base/commands/research.md +12 -6
package/templates/base/commands/sentiment.md +8 -20
package/templates/base/commands/trend.md +8 -19
package/templates/base/commands/visualize.md +7 -7
package/templates/base/skills/social_media_controversy.md +53 -23
package/templates/base/skills/social_media_discovery.md +55 -43
package/templates/base/skills/social_media_fetch.md +81 -49
package/templates/base/skills/social_media_rank.md +49 -20
package/templates/base/skills/social_media_schema.md +105 -19
package/templates/base/skills/social_media_sentiment.md +59 -23
package/templates/base/skills/social_media_trend.md +60 -20
package/templates/base/skills/using_social_media_research.md +92 -74

package/templates/base/skills/social_media_discovery.md CHANGED Viewed

@@ -1,57 +1,21 @@
 ---
 name: social_media_discovery
-description: Analyze existing Reddit/X raw data to find emerging or viral themes and generate `classified_discovery.json` with strict `DiscoveryData` output.
+description: Analysis-only worker. Reads existing raw Reddit/X data to find emerging or viral themes and generates `classified_discovery.json` with strict `DiscoveryData` output.
 ---
 # Social Media Discovery Skill
-This worker clusters noisy social discussions into trend themes that can be visualized.
+This worker clusters noisy social discussions into trend themes that can be visualized. It performs **analysis only** — fetching is handled by the orchestrator via `social_media_fetch`.
-## Required Inputs
+## Prerequisites
-Use existing raw files only:
+The following files must already exist (produced by `social_media_fetch`):
-- `reddit_data.json`
-- `x_data.json`
+- `reddit_data.json` and/or `x_data.json`
-At least one valid source file must exist.
+At least one valid source file must be present. If both are missing, **stop and report failure** — do not attempt to fetch data.
-## Command Execution Flow
-Use this sequence when running discovery end-to-end:
-1. Fetch or refresh discovery raw data (outside this worker):
-- Broad weekly discovery feed: `sc-research research:deep "DISCOVERY_WEEKLY" --mode=discovery`
-- Topic-focused discovery: `sc-research research:deep "TOPIC" --mode=discovery`
-- Optional filters: `--source=reddit|x|both --from=YYYY-MM-DD --to=YYYY-MM-DD`
-2. Run this `social_media_discovery` worker to analyze existing raw files and produce `classified_discovery.json`.
-3. Optional visualization step:
-- `sc-research visualize`
-If raw files are missing, stale, or mismatched for the requested topic/date range, instruct the caller to run step 1 first.
-## Discovery Fetch Behavior (Code-Aligned)
-When `--mode=discovery` is used, runtime behavior differs by topic:
-1. If topic is exactly `DISCOVERY_WEEKLY` and Reddit is enabled:
-- Fetches Reddit trending posts from `r/popular/top` with `t=week` and limit `25`.
-2. For other topics in discovery mode and Reddit enabled:
-- Maps topic to candidate subreddits first.
-- Per subreddit, uses either top posts (`week`, limit `5`) or subreddit search (`week`, limit `5`) based on topic/subreddit match.
-- If mapping returns no subreddits, falls back to legacy Reddit keyword-thread flow.
-3. X source behavior:
-- X fetch still runs through normal X search flow (`maxItems` based on depth), even in discovery mode.
-This skill consumes the resulting `reddit_data.json` / `x_data.json`; it does not perform fetching itself.
+**Note on discovery data**: For best results, the orchestrator should have fetched with `--mode=discovery`. This skill analyzes whatever raw data exists — it does not control how data was fetched.
 ## Step 1: Preflight Validation
@@ -112,6 +76,54 @@ Save strict JSON to:
 - `classified_discovery.json`
+## Output Type Contract
+Your output MUST match this exact shape. The dashboard detects discovery data by checking for `trending_topics` (array). Missing this field = broken tab.
+**WARNING**: Discovery uses DIFFERENT enum casing than other skills. Everything is **lowercase** here.
+```json
+{
+  "topic": "AI tools 2025",
+  "period": "2025-01-01 to 2025-01-31",
+  "total_posts_analyzed": 156,
+  "trending_topics": [
+    {
+      "id": "local-llm-hosting",
+      "topic_name": "Local LLM Hosting",
+      "description": "Growing interest in running language models locally using consumer hardware",
+      "category": "Technology",
+      "engagement_score": 4500,
+      "sentiment": "positive",
+      "key_posts": [
+        {
+          "title": "I got Llama 3 running on my M3 MacBook and it's incredible",
+          "url": "https://reddit.com/r/LocalLLaMA/comments/mno345",
+          "platform": "reddit",
+          "engagement": 2100
+        }
+      ],
+      "highlight_comments": [
+        {
+          "text": "The performance gains with quantization are impressive",
+          "author": "u/ml_enthusiast",
+          "link": "https://reddit.com/r/LocalLLaMA/comments/mno345/comment/abc",
+          "platform": "reddit"
+        }
+      ]
+    }
+  ]
+}
+```
+### Enum Rules for Discovery (ALL LOWERCASE)
+Discovery is the **only** classified type that uses lowercase sentiment values. Do NOT copy Title Case from rank/sentiment/controversy.
+- `sentiment` on DiscoveryTopic: **lowercase** — `"positive"`, `"negative"`, `"neutral"`, `"mixed"`. NEVER use `"Positive"`, `"Mixed"`, or `"Very Positive"`.
+- `platform` on key_posts and highlight_comments: **lowercase** — `"reddit"`, `"x"`. NEVER use `"Reddit"`, `"X"`, or `"twitter"`.
+- `id`: use a **slug-like** identifier (e.g., `"local-llm-hosting"`), not a UUID or number.
 ## Final Validation Checklist
 - JSON parse succeeds.

package/templates/base/skills/social_media_fetch.md CHANGED Viewed

@@ -1,61 +1,83 @@
 ---
 name: social_media_fetch
-description: Worker skill that fetches raw discussion data from Reddit and X into `reddit_data.json` and `x_data.json`. Use before running rank, sentiment, trend, controversy, or discovery analysis.
+description: The sole data-fetching authority for the research pipeline. Handles CLI execution, data freshness checks, and output validation. Always called by the orchestrator — never by worker skills directly.
 ---
 # Social Media Fetch Skill
-This worker is the data-ingestion step for the pipeline. It fetches raw social data only and does not classify or analyze it.
+This is the **only** skill that runs `sc-research research` CLI commands. No other skill or command should execute fetch commands. The orchestrator delegates here; worker skills consume the output.
-## Inputs and Outputs
+## Inputs (provided by orchestrator)
-- **Input**: User topic and optional filters (`source`, `from/to`, `mode`)
-- **Output files** (project root):
-  - `reddit_data.json`
-  - `x_data.json`
+The orchestrator will specify:
+- **topic**: the search query string
+- **depth**: `quick` or `deep`
+- **mode**: `research` (default) or `discovery`
+- **source** (optional): `reddit`, `x`, or omit for all available
+- **date range** (optional): `from` / `to` as `YYYY-MM-DD`
+## Outputs
+- `reddit_data.json` (project root)
+- `x_data.json` (project root)
 At least one output file must be produced for a successful fetch.
-## Command Execution Flow
+---
-Run one of these commands based on requested depth:
+## Step 1: Check Data Freshness
-- Quick fetch: `sc-research research "TOPIC"`
-- Deep fetch: `sc-research research:deep "TOPIC"`
-- Discovery fetch: `sc-research research:deep "TOPIC" --mode=discovery`
+Before running a new fetch, check whether existing raw files can be reused:
-Then validate outputs (`reddit_data.json` / `x_data.json`) before handing off to analysis workers.
+1. Does `reddit_data.json` or `x_data.json` exist?
+2. Is the file valid JSON with a top-level `items` array?
+3. Does the `query` field match the current topic (same or equivalent intent)?
+4. Does the `dateRange` match the requested window (if provided)?
+5. Does the source scope match (e.g., if user asked for Reddit-only, is Reddit data present)?
-## Step 1: Choose Fetch Mode
+**If all checks pass** → skip fetch, use existing data and report "Using cached data."
+**If any check fails** → proceed with fresh fetch.
-- **Quick mode** (faster, lighter coverage):
-  - `sc-research research "TOPIC"`
-- **Deep mode** (default for analysis workflows):
-  - `sc-research research:deep "TOPIC"`
-- **Discovery mode** (theme clustering data):
-  - `sc-research research:deep "TOPIC" --mode=discovery`
+## Step 2: Build CLI Command
-## Step 2: Build Command with Optional Flags
+Construct the command based on inputs:
-Use flags only when requested:
+**Standard analysis routes:**
-- `--source=reddit|x|both`
-- `--from=YYYY-MM-DD --to=YYYY-MM-DD`
-- `--mode=discovery`
+```bash
+sc-research research:deep "TOPIC"
+```
+**Quick-answer route:**
-When `--source` is omitted, runtime attempts all enabled sources (based on available API keys). A source without its required key is skipped.
+```bash
+sc-research research "TOPIC" --source=reddit
+```
-Examples:
+**Discovery route (broad weekly):**
 ```bash
-sc-research research:deep "wireless earbuds"
-sc-research research:deep "wireless earbuds" --source=reddit --from=2025-01-01 --to=2025-12-31
-sc-research research:deep "wireless earbuds" --mode=discovery --source=both
+sc-research research:deep "DISCOVERY_WEEKLY" --mode=discovery
 ```
-## Step 3: Validate Fetch Results
+**Discovery route (topic-focused):**
+```bash
+sc-research research:deep "TOPIC" --mode=discovery
+```
-After running the command, verify each produced source file:
+Append optional flags only when provided by the orchestrator:
+- `--source=reddit|x|both`
+- `--from=YYYY-MM-DD --to=YYYY-MM-DD`
+- `--mode=discovery`
+When `--source` is omitted, runtime uses all sources whose API keys are available.
+## Step 3: Execute and Validate
+Run the constructed command, then validate each produced file:
 1. File exists.
 2. JSON is parseable.
@@ -65,30 +87,40 @@ After running the command, verify each produced source file:
 If a source was explicitly requested but its file is missing or malformed, report the failure clearly.
-## Step 4: Return a Fetch Summary
+## Step 4: Return Fetch Summary
-Return:
+Return to the orchestrator:
 - topic
-- selected mode
-- selected sources
+- mode used (`research` / `discovery`)
+- sources fetched
 - date range used
 - item count per source
-- any missing-source or partial-result warnings
+- any warnings (missing source, partial results, cached data reuse)
-## Critical Rules
+---
+## Discovery Fetch Behavior (runtime details)
+When `--mode=discovery` is used, runtime behavior differs by topic:
-1. **No analysis here**: do not rank/classify in this skill.
-2. **No fabricated data**: do not create synthetic posts to fill gaps.
-3. **Prefer deep mode for analysis pipelines**: quick mode is for explicit quick-answer requests.
-4. **Fail loudly on malformed output**: do not continue as if fetch succeeded when validation fails.
+1. Topic is exactly `DISCOVERY_WEEKLY` with Reddit enabled → fetches `r/popular/top` with `t=week`, limit `25`.
+2. Other topics in discovery mode with Reddit enabled → maps topic to candidate subreddits, then fetches top posts or searches per subreddit.
+3. X source → runs normal X search flow regardless of discovery mode.
 ## Error Handling
-| Scenario                           | Symptom                                    | Action                                                   |
-| ---------------------------------- | ------------------------------------------ | -------------------------------------------------------- |
-| Missing `OPENAI_API_KEY`           | Auth failure on Reddit fetch               | Set valid `OPENAI_API_KEY` in `.sc-research`             |
-| Missing `XAI_API_KEY`              | X file missing/empty while Reddit succeeds | Set `XAI_API_KEY` in `.sc-research` to enable X fetching |
-| No relevant results                | `items` is empty                           | Broaden topic keywords and retry                         |
-| Rate limit / transient API failure | Timeout or provider error                  | Wait, then retry once with same parameters               |
-| Malformed output                   | JSON parse failure or missing `items`      | Re-run fetch; if repeated, report failure explicitly     |
+| Scenario                           | Symptom                               | Action                                               |
+| ---------------------------------- | ------------------------------------- | ---------------------------------------------------- |
+| Missing `OPENAI_API_KEY`           | Auth failure on Reddit fetch          | Set valid `OPENAI_API_KEY` in `.sc-research`         |
+| Missing `XAI_API_KEY`              | X file missing while Reddit succeeds  | Set `XAI_API_KEY` in `.sc-research` to enable X      |
+| No relevant results                | `items` is empty                      | Broaden topic keywords and retry                     |
+| Rate limit / transient API failure | Timeout or provider error             | Wait, then retry once with same parameters           |
+| Malformed output                   | JSON parse failure or missing `items` | Re-run fetch; if repeated, report failure explicitly |
+## Critical Rules
+1. **No analysis here** — do not rank, classify, or generate classified files.
+2. **No fabricated data** — do not create synthetic posts.
+3. **This is the only fetch point** — worker skills must never run fetch commands.
+4. **Fail loudly** — do not continue as if fetch succeeded when validation fails.

package/templates/base/skills/social_media_rank.md CHANGED Viewed

@@ -1,33 +1,19 @@
 ---
 name: social_media_rank
-description: Analyze existing Reddit/X raw data and generate `classified_rank.json` using the strict `ClassifiedData` schema. Use for ranking, best-of, compare, or recommendation requests.
+description: Analysis-only worker. Reads existing raw Reddit/X data and generates `classified_rank.json` using the strict `ClassifiedData` schema. Use for ranking, best-of, compare, or recommendation requests.
 ---
 # Social Media Ranking Skill
-This worker converts raw discussion data into a ranked report suitable for the dashboard.
+This worker converts raw discussion data into a ranked report suitable for the dashboard. It performs **analysis only** — fetching is handled by the orchestrator via `social_media_fetch`.
-## Required Inputs
+## Prerequisites
-Use existing files only:
+The following files must already exist (produced by `social_media_fetch`):
-- `reddit_data.json`
-- `x_data.json`
+- `reddit_data.json` and/or `x_data.json`
-At least one valid source file must exist.
-## Command Execution Flow
-Use this sequence for ranking:
-1. Fetch or refresh raw data (outside this worker):
-   - `sc-research research:deep "TOPIC"`
-   - Optional filters: `--source=reddit|x|both --from=YYYY-MM-DD --to=YYYY-MM-DD`
-2. Run this `social_media_rank` worker to generate `classified_rank.json`.
-3. Optional visualization:
-   - `sc-research visualize`
-If raw files are missing, stale, or mismatched for the requested topic/date range, run step 1 first.
+At least one valid source file must be present. If both are missing, **stop and report failure** — do not attempt to fetch data.
 ## Step 1: Preflight Validation
@@ -91,6 +77,49 @@ Required top-level fields:
 - `products`
 - `key_insights`
+## Output Type Contract
+Your output MUST match this exact shape. The dashboard detects rank data by checking for `products` (array) + `key_insights` (array). Missing either field = broken tab.
+```json
+{
+  "topic": "best wireless earbuds 2025",
+  "products": [
+    {
+      "rank": 1,
+      "name": "Sony WF-1000XM5",
+      "sentiment": "Very Positive",
+      "mentions": 42,
+      "estimated_engagement_score": 8750,
+      "consensus": "Widely praised for ANC quality and sound clarity",
+      "pros": [
+        "Best-in-class ANC",
+        "Excellent sound quality",
+        "Comfortable fit"
+      ],
+      "cons": ["Premium price", "Average battery life"],
+      "highlight_quotes": [
+        {
+          "text": "The XM5s completely changed how I listen to music",
+          "author": "u/audiophile_reviews",
+          "link": "https://reddit.com/r/headphones/comments/abc123",
+          "context": "pro"
+        }
+      ]
+    }
+  ],
+  "key_insights": [
+    "Sony and Apple dominate recommendations with 70% of mentions",
+    "ANC quality is the most-discussed factor across all posts"
+  ]
+}
+```
+### Enum Rules for Rank
+- `sentiment` on Product: **Title Case** — `"Very Positive"`, `"Positive"`, `"Mixed"`, `"Negative"`. NEVER use `"positive"` or `"neutral"`.
+- `context` on quotes: **lowercase** — `"pro"`, `"con"`, `"general"`. NEVER use `"Pro"` or `"Con"`.
 ## Final Validation Checklist
 - JSON is parseable.

package/templates/base/skills/social_media_schema.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: social_media_schema
-description: Reference-only skill that defines canonical JSON schemas for classified output files.
+description: Reference-only skill that defines canonical JSON schemas for classified output files. Includes enum warnings and dashboard detection rules.
 ---
 # Social Media Schema Reference
@@ -13,7 +13,7 @@ Use this file as the canonical schema source for all classified outputs:
 - `classified_controversy.json`
 - `classified_discovery.json`
-If another skill instruction conflicts with this file, this file wins.
+If another skill instruction conflicts with this file, **this file wins**.
 ## Command Execution Note
@@ -22,20 +22,91 @@ This is a reference-only skill. There is no direct CLI command to run this skill
 - Use it by reading this schema before writing any `classified_*.json` output.
 - Runnable commands belong to fetch/analysis/visualize skills (for example `sc-research research:deep "TOPIC"` and `sc-research visualize`).
+---
+## Dashboard Detection Rules
+The web dashboard auto-detects which classified type a JSON file contains by checking for **unique field signatures**. If required fields are missing or misnamed, that tab will not appear.
+| Classified Type | Detection Rule (fields checked)                             | Dashboard Tab       |
+| --------------- | ----------------------------------------------------------- | ------------------- |
+| **rank**        | `products` (array) AND `key_insights` (array)               | Product Rankings    |
+| **sentiment**   | `distribution` (object) AND `by_source` (object)            | Sentiment Analysis  |
+| **trend**       | `date_range` (object) AND `timeline` (array)                | Trend Timeline      |
+| **controversy** | `overall_divisiveness` (string) AND `controversies` (array) | Controversy Map     |
+| **discovery**   | `trending_topics` (array)                                   | Discovery Dashboard |
+**These field names are non-negotiable.** Renaming, omitting, or nesting them differently will break dashboard detection.
+---
+## Enum Value Warnings
+### SentimentLabel (Title Case — used in rank, sentiment, controversy contexts)
+```
+CORRECT: "Very Positive", "Positive", "Mixed", "Negative"
+WRONG:   "very positive", "very_positive", "POSITIVE", "positive", "neutral"
+```
+`"neutral"` is NOT a valid SentimentLabel. Use `"Mixed"` instead.
+### Divisiveness (Title Case — used in controversy)
+```
+CORRECT: "Low", "Medium", "High"
+WRONG:   "low", "medium", "high", "LOW", "MEDIUM", "HIGH"
+```
+### Discovery sentiment (lowercase — DIFFERENT from SentimentLabel)
+```
+CORRECT: "positive", "negative", "neutral", "mixed"
+WRONG:   "Positive", "Negative", "Neutral", "Mixed", "Very Positive"
+```
+Discovery is the ONLY type that uses lowercase sentiment and includes `"neutral"`.
+### Discovery platform (lowercase)
+```
+CORRECT: "reddit", "x"
+WRONG:   "Reddit", "X", "twitter", "Twitter"
+```
+### Key Moment significance (lowercase — used in trend)
+```
+CORRECT: "high", "medium", "low"
+WRONG:   "High", "Medium", "Low"
+```
+### Quote context (lowercase — used in rank)
+```
+CORRECT: "pro", "con", "general"
+WRONG:   "Pro", "Con", "General"
+```
+---
 ## Canonical Type Definitions
 ```typescript
+// === RANK (classified_rank.json) ===
+// Dashboard detects via: products + key_insights
 export interface ClassifiedData {
   topic: string;
   source_file?: string;
-  products: Product[];
-  key_insights: string[];
+  products: Product[]; // REQUIRED for dashboard detection
+  key_insights: string[]; // REQUIRED for dashboard detection
 }
 export interface Product {
   rank: number;
   name: string;
-  sentiment: SentimentLabel;
+  sentiment: SentimentLabel; // Title Case: "Positive", "Mixed", etc.
   mentions: number;
   estimated_engagement_score: number;
   consensus: string;
@@ -45,7 +116,7 @@ export interface Product {
     text: string;
     author: string;
     link: string;
-    context?: "pro" | "con" | "general";
+    context?: "pro" | "con" | "general"; // lowercase
   }>;
 }
@@ -55,16 +126,21 @@ export type SentimentLabel =
   | "Mixed"
   | "Very Positive";
+// === SENTIMENT (classified_sentiment.json) ===
+// Dashboard detects via: distribution + by_source
 export interface SentimentData {
   topic: string;
   overall_mood: SentimentLabel;
   distribution: {
-    very_positive: number;
+    // REQUIRED for dashboard detection
+    very_positive: number; // snake_case keys, not camelCase
     positive: number;
     mixed: number;
     negative: number;
   };
   by_source: {
+    // REQUIRED for dashboard detection
     reddit: SourceSentiment;
     x: SourceSentiment;
   };
@@ -87,23 +163,27 @@ export interface ProductSentiment {
     text: string;
     author: string;
     link: string;
-    sentiment: SentimentLabel;
+    sentiment: SentimentLabel; // Title Case
   }>;
 }
+// === TREND (classified_trend.json) ===
+// Dashboard detects via: date_range + timeline
 export interface TrendData {
   topic: string;
   date_range: {
+    // REQUIRED for dashboard detection
     from: string;
     to: string;
   };
   granularity?: "day" | "week" | "month";
-  timeline: TimelinePoint[];
+  timeline: TimelinePoint[]; // REQUIRED for dashboard detection
   key_moments: KeyMoment[];
 }
 export interface TimelinePoint {
-  period: string;
+  period: string; // Format depends on granularity (see worker skill)
   post_count: number;
   total_engagement: number;
   reddit_posts: number;
@@ -113,20 +193,23 @@ export interface TimelinePoint {
 export interface KeyMoment {
   date: string;
   event: string;
-  significance: "high" | "medium" | "low";
+  significance: "high" | "medium" | "low"; // lowercase
   url?: string;
 }
+// === CONTROVERSY (classified_controversy.json) ===
+// Dashboard detects via: overall_divisiveness + controversies
 export interface ControversyData {
   topic: string;
-  overall_divisiveness: "Low" | "Medium" | "High";
-  controversies: Controversy[];
+  overall_divisiveness: "Low" | "Medium" | "High"; // REQUIRED + Title Case
+  controversies: Controversy[]; // REQUIRED for dashboard detection
 }
 export interface Controversy {
   topic: string;
-  heat_score: number;
-  divisiveness: "Low" | "Medium" | "High";
+  heat_score: number; // 0-100
+  divisiveness: "Low" | "Medium" | "High"; // Title Case
   side_a: ControversySide;
   side_b: ControversySide;
 }
@@ -141,11 +224,14 @@ export interface ControversySide {
   }>;
 }
+// === DISCOVERY (classified_discovery.json) ===
+// Dashboard detects via: trending_topics
 export interface DiscoveryData {
   topic: string;
   period: string;
   total_posts_analyzed: number;
-  trending_topics: DiscoveryTopic[];
+  trending_topics: DiscoveryTopic[]; // REQUIRED for dashboard detection
 }
 export interface DiscoveryTopic {
@@ -154,20 +240,20 @@ export interface DiscoveryTopic {
   description: string;
   category: string;
   engagement_score: number;
-  sentiment: "positive" | "negative" | "neutral" | "mixed";
+  sentiment: "positive" | "negative" | "neutral" | "mixed"; // lowercase!
   key_posts: KeyPost[];
   highlight_comments: Array<{
     text: string;
     author: string;
     link: string;
-    platform: "reddit" | "x";
+    platform: "reddit" | "x"; // lowercase
   }>;
 }
 export interface KeyPost {
   title: string;
   url: string;
-  platform: "reddit" | "x";
+  platform: "reddit" | "x"; // lowercase
   engagement: number;
   thumbnail?: string;
 }