sc-research 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -2
- package/dist/cli.js +2 -8
- package/package.json +2 -4
- package/templates/base/commands/deep-research.md +1 -1
- package/templates/base/manifest.json +6 -12
- package/templates/base/skills/social_media_controversy.md +2 -2
- package/templates/base/skills/social_media_discovery.md +2 -2
- package/templates/base/skills/social_media_rank.md +2 -2
- package/templates/base/skills/social_media_schema.md +167 -0
- package/templates/base/skills/social_media_sentiment.md +2 -2
- package/templates/base/skills/social_media_trend.md +2 -2
- package/templates/platforms/agent.json +1 -1
- package/dist/test/manual-fixed-links.js +0 -186
- package/templates/base/commands/test-research.md +0 -9
- package/templates/base/skills/social_media_research_test.md +0 -52
package/README.md
CHANGED
|
@@ -58,7 +58,6 @@ Template architecture:
|
|
|
58
58
|
- `research`: `sc-research research`
|
|
59
59
|
- `research:deep`: `sc-research research --depth=deep`
|
|
60
60
|
- `visualize`: `sc-research visualize`
|
|
61
|
-
- `test:fixed`: `sc-research test:fixed`
|
|
62
61
|
|
|
63
62
|
2. **Open the project in Claude Code**
|
|
64
63
|
|
|
@@ -72,7 +71,6 @@ Template architecture:
|
|
|
72
71
|
- `/controversy` – Build controversy map
|
|
73
72
|
- `/discovery` – Discover viral topics
|
|
74
73
|
- `/visualize` – Launch dashboard
|
|
75
|
-
- `/test-research` – Run fixed-link debug test
|
|
76
74
|
|
|
77
75
|
## Direct Bun CLI (Optional, existing behavior)
|
|
78
76
|
|
package/dist/cli.js
CHANGED
|
@@ -223,7 +223,7 @@ var windsurfAdapter = {
|
|
|
223
223
|
const config = loadPlatformConfig("windsurf");
|
|
224
224
|
return templates.map((template) => renderMarkdownOnly(template, {
|
|
225
225
|
rootDir: config.folderStructure.root,
|
|
226
|
-
commandDirName: "
|
|
226
|
+
commandDirName: "workflows",
|
|
227
227
|
skillDirName: "skills",
|
|
228
228
|
skillFileName: "SKILL.md"
|
|
229
229
|
}));
|
|
@@ -440,8 +440,7 @@ function syncPackageJson(projectRoot, options) {
|
|
|
440
440
|
const desiredScripts = {
|
|
441
441
|
research: "sc-research research",
|
|
442
442
|
"research:deep": "sc-research research --depth=deep",
|
|
443
|
-
visualize: "sc-research visualize"
|
|
444
|
-
"test:fixed": "sc-research test:fixed"
|
|
443
|
+
visualize: "sc-research visualize"
|
|
445
444
|
};
|
|
446
445
|
let pkg;
|
|
447
446
|
if (fs4.existsSync(packageJsonPath)) {
|
|
@@ -522,10 +521,6 @@ async function main() {
|
|
|
522
521
|
await runPackagedEntry("visualize.js", rest);
|
|
523
522
|
return;
|
|
524
523
|
}
|
|
525
|
-
if (command === "test:fixed") {
|
|
526
|
-
await runPackagedEntry("manual-fixed-links.js", rest);
|
|
527
|
-
return;
|
|
528
|
-
}
|
|
529
524
|
const logger = new Logger;
|
|
530
525
|
logger.exitWithError(`Unknown command "${command}". Run "sc-research --help" for usage.`);
|
|
531
526
|
}
|
|
@@ -591,7 +586,6 @@ Commands:
|
|
|
591
586
|
init Initialize SC-Research support files for a project
|
|
592
587
|
research Run research engine (same as project "research" script)
|
|
593
588
|
visualize Launch visualization app
|
|
594
|
-
test:fixed Run fixed-link test flow
|
|
595
589
|
|
|
596
590
|
Options:
|
|
597
591
|
--ai TARGETS Target AI(s): claude,cursor,windsurf,antigravity or "all"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sc-research",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.8",
|
|
4
4
|
"description": "Headless Social Media Research Data Provider for AI Agents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -20,15 +20,13 @@
|
|
|
20
20
|
"init": "node dist/cli.js init",
|
|
21
21
|
"init:all": "node dist/cli.js init --ai all",
|
|
22
22
|
"visualize": "bun --env-file=.sc-research src/entries/visualize.ts",
|
|
23
|
-
"test:fixed": "bun --env-file=.sc-research src/test/manual-fixed-links.ts",
|
|
24
23
|
"test:x": "bun --env-file=.sc-research src/test/manual-xai.ts",
|
|
25
24
|
"build:index": "bun build src/entries/research.ts --outfile dist/index.js --target node",
|
|
26
25
|
"build:visualize": "bun build src/entries/visualize.ts --outdir dist --target node",
|
|
27
|
-
"build:test:fixed": "bun build src/test/manual-fixed-links.ts --outdir dist/test --target node",
|
|
28
26
|
"build:cli": "bun build src/entries/cli.ts --outdir dist --target node",
|
|
29
27
|
"build:web": "npm --prefix web run build",
|
|
30
28
|
"build:web:copy": "node scripts/copy-web-dist.mjs",
|
|
31
|
-
"build": "bun run build:index && bun run build:visualize && bun run build:
|
|
29
|
+
"build": "bun run build:index && bun run build:visualize && bun run build:cli && bun run build:web && bun run build:web:copy",
|
|
32
30
|
"prepack": "bun run build",
|
|
33
31
|
"release:check": "npm pack --dry-run",
|
|
34
32
|
"prepublishOnly": "npm run release:check"
|
|
@@ -19,7 +19,7 @@ description: Deeply research a topic and route to the best analysis template. Th
|
|
|
19
19
|
>
|
|
20
20
|
> - Match the requested topic (or close variant of the same topic).
|
|
21
21
|
> - Match requested date window when `--from/--to` was provided.
|
|
22
|
-
> - Include required fields from
|
|
22
|
+
> - Include required fields from `../skills/social_media_schema/SKILL.md`.
|
|
23
23
|
|
|
24
24
|
4. Display the results
|
|
25
25
|
> Present whichever template output(s) were selected after validation.
|
|
@@ -7,6 +7,12 @@
|
|
|
7
7
|
"description": "Worker skill that fetches raw discussion data from Reddit and X (Twitter) for a given topic. Returns raw JSON files.",
|
|
8
8
|
"bodyFile": "skills/social_media_fetch.md"
|
|
9
9
|
},
|
|
10
|
+
{
|
|
11
|
+
"id": "social_media_schema",
|
|
12
|
+
"kind": "skill",
|
|
13
|
+
"description": "Reference-only skill containing canonical output schemas for classified JSON files.",
|
|
14
|
+
"bodyFile": "skills/social_media_schema.md"
|
|
15
|
+
},
|
|
10
16
|
{
|
|
11
17
|
"id": "social_media_rank",
|
|
12
18
|
"kind": "skill",
|
|
@@ -43,12 +49,6 @@
|
|
|
43
49
|
"description": "Worker skill that launches a local web dashboard to visualize all available classified research data.",
|
|
44
50
|
"bodyFile": "skills/social_media_visualize.md"
|
|
45
51
|
},
|
|
46
|
-
{
|
|
47
|
-
"id": "social_media_research_test",
|
|
48
|
-
"kind": "skill",
|
|
49
|
-
"description": "Test the social media research skill with fixed Reddit links (no API key needed). Fetches data and returns JSON for AI classification.",
|
|
50
|
-
"bodyFile": "skills/social_media_research_test.md"
|
|
51
|
-
},
|
|
52
52
|
{
|
|
53
53
|
"id": "using_social_media_research",
|
|
54
54
|
"kind": "skill",
|
|
@@ -108,12 +108,6 @@
|
|
|
108
108
|
"kind": "command",
|
|
109
109
|
"description": "Launch dashboard for available classified data files.",
|
|
110
110
|
"bodyFile": "commands/visualize.md"
|
|
111
|
-
},
|
|
112
|
-
{
|
|
113
|
-
"id": "test-research",
|
|
114
|
-
"kind": "command",
|
|
115
|
-
"description": "Run fixed-link test research pipeline.",
|
|
116
|
-
"bodyFile": "commands/test-research.md"
|
|
117
111
|
}
|
|
118
112
|
]
|
|
119
113
|
}
|
|
@@ -25,7 +25,7 @@ At least one valid source file must exist.
|
|
|
25
25
|
|
|
26
26
|
## Step 2: Lock Output Schema
|
|
27
27
|
|
|
28
|
-
Read
|
|
28
|
+
Read `../social_media_schema/SKILL.md` and treat `ControversyData` as source of truth.
|
|
29
29
|
|
|
30
30
|
Required top-level fields:
|
|
31
31
|
|
|
@@ -91,4 +91,4 @@ Save strict JSON to:
|
|
|
91
91
|
1. **No external fetch**: analyze existing data only.
|
|
92
92
|
2. **No fabricated arguments**: only report controversies present in source text.
|
|
93
93
|
3. **No fabricated citations**: quote text, author, and link must be real.
|
|
94
|
-
4. **Schema strictness**: if instructions conflict with
|
|
94
|
+
4. **Schema strictness**: if instructions conflict with schema reference, schema reference wins.
|
|
@@ -25,7 +25,7 @@ At least one valid source file must exist.
|
|
|
25
25
|
|
|
26
26
|
## Step 2: Lock Schema
|
|
27
27
|
|
|
28
|
-
Read
|
|
28
|
+
Read `../social_media_schema/SKILL.md` and follow `DiscoveryData` and `DiscoveryTopic` exactly.
|
|
29
29
|
|
|
30
30
|
Important enum constraints:
|
|
31
31
|
|
|
@@ -87,5 +87,5 @@ Save strict JSON to:
|
|
|
87
87
|
|
|
88
88
|
1. **No external fetch**: do not run data collection here.
|
|
89
89
|
2. **No fabricated clusters or quotes**: everything must map to real evidence.
|
|
90
|
-
3. **Schema strictness**:
|
|
90
|
+
3. **Schema strictness**: the schema reference skill is authoritative.
|
|
91
91
|
4. **Graceful fallback**: use empty arrays for missing optional evidence; never invent content.
|
|
@@ -29,11 +29,11 @@ If both sources are missing or invalid, stop and report the failure.
|
|
|
29
29
|
|
|
30
30
|
## Step 2: Lock Output Schema
|
|
31
31
|
|
|
32
|
-
Read
|
|
32
|
+
Read `../social_media_schema/SKILL.md` and treat it as source of truth.
|
|
33
33
|
|
|
34
34
|
- Output type must match `ClassifiedData`.
|
|
35
35
|
- Product entries must match `Product`.
|
|
36
|
-
- If this file and
|
|
36
|
+
- If this file and schema reference conflict, schema reference wins.
|
|
37
37
|
|
|
38
38
|
## Step 3: Build Product Candidates
|
|
39
39
|
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: social_media_schema
|
|
3
|
+
description: Reference-only skill that defines canonical JSON schemas for classified output files.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Social Media Schema Reference
|
|
7
|
+
|
|
8
|
+
Use this file as the canonical schema source for all classified outputs:
|
|
9
|
+
|
|
10
|
+
- `classified_rank.json`
|
|
11
|
+
- `classified_sentiment.json`
|
|
12
|
+
- `classified_trend.json`
|
|
13
|
+
- `classified_controversy.json`
|
|
14
|
+
- `classified_discovery.json`
|
|
15
|
+
|
|
16
|
+
If another skill instruction conflicts with this file, this file wins.
|
|
17
|
+
|
|
18
|
+
## Canonical Type Definitions
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
export interface ClassifiedData {
|
|
22
|
+
topic: string;
|
|
23
|
+
source_file?: string;
|
|
24
|
+
products: Product[];
|
|
25
|
+
key_insights: string[];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface Product {
|
|
29
|
+
rank: number;
|
|
30
|
+
name: string;
|
|
31
|
+
sentiment: SentimentLabel;
|
|
32
|
+
mentions: number;
|
|
33
|
+
estimated_engagement_score: number;
|
|
34
|
+
consensus: string;
|
|
35
|
+
pros: string[];
|
|
36
|
+
cons: string[];
|
|
37
|
+
highlight_quotes: Array<{
|
|
38
|
+
text: string;
|
|
39
|
+
author: string;
|
|
40
|
+
link: string;
|
|
41
|
+
context?: "pro" | "con" | "general";
|
|
42
|
+
}>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export type SentimentLabel =
|
|
46
|
+
| "Positive"
|
|
47
|
+
| "Negative"
|
|
48
|
+
| "Mixed"
|
|
49
|
+
| "Very Positive";
|
|
50
|
+
|
|
51
|
+
export interface SentimentData {
|
|
52
|
+
topic: string;
|
|
53
|
+
overall_mood: SentimentLabel;
|
|
54
|
+
distribution: {
|
|
55
|
+
very_positive: number;
|
|
56
|
+
positive: number;
|
|
57
|
+
mixed: number;
|
|
58
|
+
negative: number;
|
|
59
|
+
};
|
|
60
|
+
by_source: {
|
|
61
|
+
reddit: SourceSentiment;
|
|
62
|
+
x: SourceSentiment;
|
|
63
|
+
};
|
|
64
|
+
product_sentiments: ProductSentiment[];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface SourceSentiment {
|
|
68
|
+
very_positive: number;
|
|
69
|
+
positive: number;
|
|
70
|
+
mixed: number;
|
|
71
|
+
negative: number;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface ProductSentiment {
|
|
75
|
+
name: string;
|
|
76
|
+
overall: SentimentLabel;
|
|
77
|
+
reddit_sentiment: SentimentLabel | null;
|
|
78
|
+
x_sentiment: SentimentLabel | null;
|
|
79
|
+
evidence_quotes: Array<{
|
|
80
|
+
text: string;
|
|
81
|
+
author: string;
|
|
82
|
+
link: string;
|
|
83
|
+
sentiment: SentimentLabel;
|
|
84
|
+
}>;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export interface TrendData {
|
|
88
|
+
topic: string;
|
|
89
|
+
date_range: {
|
|
90
|
+
from: string;
|
|
91
|
+
to: string;
|
|
92
|
+
};
|
|
93
|
+
granularity?: "day" | "week" | "month";
|
|
94
|
+
timeline: TimelinePoint[];
|
|
95
|
+
key_moments: KeyMoment[];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export interface TimelinePoint {
|
|
99
|
+
period: string;
|
|
100
|
+
post_count: number;
|
|
101
|
+
total_engagement: number;
|
|
102
|
+
reddit_posts: number;
|
|
103
|
+
x_posts: number;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export interface KeyMoment {
|
|
107
|
+
date: string;
|
|
108
|
+
event: string;
|
|
109
|
+
significance: "high" | "medium" | "low";
|
|
110
|
+
url?: string;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export interface ControversyData {
|
|
114
|
+
topic: string;
|
|
115
|
+
overall_divisiveness: "Low" | "Medium" | "High";
|
|
116
|
+
controversies: Controversy[];
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export interface Controversy {
|
|
120
|
+
topic: string;
|
|
121
|
+
heat_score: number;
|
|
122
|
+
divisiveness: "Low" | "Medium" | "High";
|
|
123
|
+
side_a: ControversySide;
|
|
124
|
+
side_b: ControversySide;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export interface ControversySide {
|
|
128
|
+
position: string;
|
|
129
|
+
supporter_count: number;
|
|
130
|
+
sample_quotes: Array<{
|
|
131
|
+
text: string;
|
|
132
|
+
author: string;
|
|
133
|
+
link: string;
|
|
134
|
+
}>;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export interface DiscoveryData {
|
|
138
|
+
topic: string;
|
|
139
|
+
period: string;
|
|
140
|
+
total_posts_analyzed: number;
|
|
141
|
+
trending_topics: DiscoveryTopic[];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export interface DiscoveryTopic {
|
|
145
|
+
id: string;
|
|
146
|
+
topic_name: string;
|
|
147
|
+
description: string;
|
|
148
|
+
category: string;
|
|
149
|
+
engagement_score: number;
|
|
150
|
+
sentiment: "positive" | "negative" | "neutral" | "mixed";
|
|
151
|
+
key_posts: KeyPost[];
|
|
152
|
+
highlight_comments: Array<{
|
|
153
|
+
text: string;
|
|
154
|
+
author: string;
|
|
155
|
+
link: string;
|
|
156
|
+
platform: "reddit" | "x";
|
|
157
|
+
}>;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
export interface KeyPost {
|
|
161
|
+
title: string;
|
|
162
|
+
url: string;
|
|
163
|
+
platform: "reddit" | "x";
|
|
164
|
+
engagement: number;
|
|
165
|
+
thumbnail?: string;
|
|
166
|
+
}
|
|
167
|
+
```
|
|
@@ -25,7 +25,7 @@ At least one valid source file must exist.
|
|
|
25
25
|
|
|
26
26
|
## Step 2: Lock Schema and Allowed Labels
|
|
27
27
|
|
|
28
|
-
Read
|
|
28
|
+
Read `../social_media_schema/SKILL.md` before building output.
|
|
29
29
|
|
|
30
30
|
- Output must match `SentimentData`.
|
|
31
31
|
- Allowed labels are only:
|
|
@@ -89,5 +89,5 @@ Save result to:
|
|
|
89
89
|
|
|
90
90
|
1. **No external fetch**: analyze only provided raw files.
|
|
91
91
|
2. **Evidence over guesswork**: every product label must be explainable from quotes/content.
|
|
92
|
-
3. **Strict schema**:
|
|
92
|
+
3. **Strict schema**: the schema reference skill is the only schema source of truth.
|
|
93
93
|
4. **No fabricated citations**: quote text, author, and link must exist in raw data.
|
|
@@ -67,7 +67,7 @@ Prefer evidence-backed moments over generic commentary.
|
|
|
67
67
|
|
|
68
68
|
## Step 5: Write Output
|
|
69
69
|
|
|
70
|
-
Read
|
|
70
|
+
Read `../social_media_schema/SKILL.md` and output strict `TrendData` JSON to:
|
|
71
71
|
|
|
72
72
|
- `classified_trend.json`
|
|
73
73
|
|
|
@@ -83,5 +83,5 @@ Read `web/src/types.ts` and output strict `TrendData` JSON to:
|
|
|
83
83
|
|
|
84
84
|
1. **No external fetch**: analyze existing raw files only.
|
|
85
85
|
2. **No invented dates/events**: every bucket and key moment must come from real data.
|
|
86
|
-
3. **Schema strictness**:
|
|
86
|
+
3. **Schema strictness**: the schema reference skill is authoritative.
|
|
87
87
|
4. **Graceful degradation**: skip bad records; never crash the whole report for a few malformed items.
|
|
@@ -1,186 +0,0 @@
|
|
|
1
|
-
import { createRequire } from "node:module";
|
|
2
|
-
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
3
|
-
|
|
4
|
-
// src/core/clients/reddit.ts
|
|
5
|
-
var DEFAULT_CONFIG = {
|
|
6
|
-
userAgent: "CommunitiesResearchSkill/1.0.0 (Research)",
|
|
7
|
-
maxThreads: 20,
|
|
8
|
-
delayMs: 2000
|
|
9
|
-
};
|
|
10
|
-
var sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
11
|
-
async function fetchRedditThread(threadUrl, config = {}) {
|
|
12
|
-
const { userAgent } = { ...DEFAULT_CONFIG, ...config };
|
|
13
|
-
let jsonUrl = threadUrl.split("?")[0];
|
|
14
|
-
if (jsonUrl.endsWith("/")) {
|
|
15
|
-
jsonUrl = jsonUrl.slice(0, -1);
|
|
16
|
-
}
|
|
17
|
-
jsonUrl += ".json?raw_json=1";
|
|
18
|
-
let attempts = 0;
|
|
19
|
-
const maxRetries = 3;
|
|
20
|
-
while (attempts < maxRetries) {
|
|
21
|
-
attempts++;
|
|
22
|
-
try {
|
|
23
|
-
const response = await fetch(jsonUrl, {
|
|
24
|
-
headers: {
|
|
25
|
-
"User-Agent": userAgent || `ResearchSkill/${Date.now()}`,
|
|
26
|
-
Accept: "application/json",
|
|
27
|
-
"Accept-Language": "en-US,en;q=0.9"
|
|
28
|
-
},
|
|
29
|
-
redirect: "follow"
|
|
30
|
-
});
|
|
31
|
-
if (!response.ok) {
|
|
32
|
-
if (response.status === 404) {
|
|
33
|
-
console.warn(`[404] Thread not found: ${jsonUrl}`);
|
|
34
|
-
return [];
|
|
35
|
-
}
|
|
36
|
-
if (response.status === 403) {
|
|
37
|
-
if (attempts === 1) {
|
|
38
|
-
console.warn(`[403] Blocked. Waiting 5s before single retry...`);
|
|
39
|
-
await sleep(5000);
|
|
40
|
-
continue;
|
|
41
|
-
}
|
|
42
|
-
throw new Error(`Reddit Blocked (403): Access denied.`);
|
|
43
|
-
}
|
|
44
|
-
if (response.status === 429) {
|
|
45
|
-
const waitTime = Math.pow(2, attempts) * 1000 + Math.random() * 1000;
|
|
46
|
-
console.warn(`[429] Rate limit. Backing off for ${Math.round(waitTime)}ms...`);
|
|
47
|
-
await sleep(waitTime);
|
|
48
|
-
continue;
|
|
49
|
-
}
|
|
50
|
-
if (response.status >= 500) {
|
|
51
|
-
const waitTime = 1000 * attempts;
|
|
52
|
-
console.warn(`[${response.status}] Server error. Retrying in ${waitTime}ms...`);
|
|
53
|
-
await sleep(waitTime);
|
|
54
|
-
continue;
|
|
55
|
-
}
|
|
56
|
-
throw new Error(`Failed to fetch Reddit thread: ${response.statusText} (${response.status})`);
|
|
57
|
-
}
|
|
58
|
-
const data = await response.json();
|
|
59
|
-
if (!Array.isArray(data) || data.length < 2) {
|
|
60
|
-
return [];
|
|
61
|
-
}
|
|
62
|
-
const postListing = data[0].data.children[0]?.data;
|
|
63
|
-
const commentListing = data[1].data.children;
|
|
64
|
-
if (!postListing)
|
|
65
|
-
return [];
|
|
66
|
-
const posts = [];
|
|
67
|
-
posts.push({
|
|
68
|
-
text: postListing.title + `
|
|
69
|
-
|
|
70
|
-
` + (postListing.selftext || ""),
|
|
71
|
-
title: postListing.title,
|
|
72
|
-
author: postListing.author,
|
|
73
|
-
engagement: {
|
|
74
|
-
score: postListing.score,
|
|
75
|
-
comments: postListing.num_comments
|
|
76
|
-
},
|
|
77
|
-
url: threadUrl,
|
|
78
|
-
date: new Date(postListing.created_utc * 1000).toISOString()
|
|
79
|
-
});
|
|
80
|
-
const validComments = [];
|
|
81
|
-
for (const child of commentListing) {
|
|
82
|
-
if (child.kind === "t1" && child.data.body) {
|
|
83
|
-
const comment = child.data;
|
|
84
|
-
const body = comment.body.trim();
|
|
85
|
-
if (body.length < 30 || body === "[deleted]" || body === "[removed]" || body.includes("RemindMe!") || body.endsWith("?")) {
|
|
86
|
-
continue;
|
|
87
|
-
}
|
|
88
|
-
validComments.push({
|
|
89
|
-
text: body,
|
|
90
|
-
author: comment.author,
|
|
91
|
-
engagement: {
|
|
92
|
-
score: comment.score
|
|
93
|
-
},
|
|
94
|
-
url: `https://reddit.com${comment.permalink}`,
|
|
95
|
-
date: new Date(comment.created_utc * 1000).toISOString()
|
|
96
|
-
});
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
validComments.sort((a, b) => (b.engagement.score || 0) - (a.engagement.score || 0));
|
|
100
|
-
posts.push(...validComments.slice(0, 5));
|
|
101
|
-
return posts;
|
|
102
|
-
} catch (error) {
|
|
103
|
-
if (attempts >= maxRetries) {
|
|
104
|
-
console.error(`Error fetching Reddit thread ${threadUrl} after ${maxRetries} attempts:`, error);
|
|
105
|
-
throw error;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
return [];
|
|
110
|
-
}
|
|
111
|
-
async function fetchRedditThreads(threadUrls, config = {}) {
|
|
112
|
-
const { maxThreads = 20, delayMs = 2000 } = { ...DEFAULT_CONFIG, ...config };
|
|
113
|
-
const allPosts = [];
|
|
114
|
-
const limitedUrls = threadUrls.slice(0, maxThreads);
|
|
115
|
-
console.log(`[Reddit] Fetching ${limitedUrls.length} threads sequentially with jitter...`);
|
|
116
|
-
for (const url of limitedUrls) {
|
|
117
|
-
try {
|
|
118
|
-
const posts = await fetchRedditThread(url, config);
|
|
119
|
-
allPosts.push(...posts);
|
|
120
|
-
if (limitedUrls.indexOf(url) < limitedUrls.length - 1) {
|
|
121
|
-
const jitter = delayMs * 0.5 * Math.random();
|
|
122
|
-
const finalDelay = delayMs + jitter;
|
|
123
|
-
await sleep(finalDelay);
|
|
124
|
-
}
|
|
125
|
-
} catch (error) {
|
|
126
|
-
console.error(`Skipping thread due to final error: ${url}`);
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
return allPosts;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
// src/core/services/data-prep.ts
|
|
133
|
-
class DataPrepService {
|
|
134
|
-
prepare(posts) {
|
|
135
|
-
const items = posts.map((post) => {
|
|
136
|
-
return {
|
|
137
|
-
...post
|
|
138
|
-
};
|
|
139
|
-
});
|
|
140
|
-
items.sort((a, b) => (b.engagement.score || 0) - (a.engagement.score || 0));
|
|
141
|
-
return items.slice(0, 50);
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
// src/core/services/formatting.ts
|
|
146
|
-
class FormattingService {
|
|
147
|
-
formatJson(result) {
|
|
148
|
-
return JSON.stringify(result, null, 2);
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
// src/test/manual-fixed-links.ts
|
|
153
|
-
async function run() {
|
|
154
|
-
const args = process.argv.slice(2);
|
|
155
|
-
const fixedUrls = args.length > 0 ? args : [
|
|
156
|
-
"https://www.reddit.com/r/iems/comments/1olzu0g/the_best_iem_builds_at_each_price_2025_ultimate/",
|
|
157
|
-
"https://www.reddit.com/r/headphones/comments/1lbcngj/new_iem_tierlist_2025/",
|
|
158
|
-
"https://www.reddit.com/r/iems/comments/1c7imln/iem_tier_list/"
|
|
159
|
-
];
|
|
160
|
-
console.log(`\uD83E\uDDEA Starting Fixed-Link Test with ${fixedUrls.length} URLs...`);
|
|
161
|
-
try {
|
|
162
|
-
const posts = await fetchRedditThreads(fixedUrls, { maxThreads: 5 });
|
|
163
|
-
console.log(`✅ Successfully fetched ${posts.length} items (Post + Top Comments).`);
|
|
164
|
-
console.log(`
|
|
165
|
-
2️⃣ Preparing raw data for AI analysis...`);
|
|
166
|
-
const dataPrep = new DataPrepService;
|
|
167
|
-
const researchItems = dataPrep.prepare(posts);
|
|
168
|
-
console.log(`
|
|
169
|
-
3️⃣ Generating Output (JSON)...`);
|
|
170
|
-
const formatter = new FormattingService;
|
|
171
|
-
const mockResult = {
|
|
172
|
-
query: "Manual Test: Best IEMs 2025",
|
|
173
|
-
dateRange: { from: "2025-01-01", to: "2025-12-31" },
|
|
174
|
-
items: researchItems
|
|
175
|
-
};
|
|
176
|
-
const jsonOutput = formatter.formatJson(mockResult);
|
|
177
|
-
const fs = __require("fs");
|
|
178
|
-
const path = __require("path");
|
|
179
|
-
const outputPath = path.resolve(process.cwd(), "reddit_data_test.json");
|
|
180
|
-
fs.writeFileSync(outputPath, jsonOutput);
|
|
181
|
-
console.log(`✅ Saved test output to: ${outputPath}`);
|
|
182
|
-
} catch (error) {
|
|
183
|
-
console.error("❌ Test Failed:", error);
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
run();
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: social_media_research_test
|
|
3
|
-
description: Legacy debug worker for fixed-link Reddit pipeline testing. Generates `reddit_data_test.json` without normal search/discovery flow and is useful for manual sanity checks.
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# Social Media Research Test Skill
|
|
7
|
-
|
|
8
|
-
> Legacy/debug helper. Prefer the normal pipeline (`social_media_fetch` + analysis workers) for production research.
|
|
9
|
-
|
|
10
|
-
Use this skill to sanity-check the fixed-link ingestion path with known Reddit URLs.
|
|
11
|
-
|
|
12
|
-
## Workflow
|
|
13
|
-
|
|
14
|
-
### 1) Run Fixed-Link Test
|
|
15
|
-
|
|
16
|
-
```bash
|
|
17
|
-
sc-research test:fixed [URL1] [URL2] ...
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
If no URLs are provided, built-in default Reddit links are used.
|
|
21
|
-
|
|
22
|
-
### 2) Verify Output File
|
|
23
|
-
|
|
24
|
-
This command writes:
|
|
25
|
-
|
|
26
|
-
- `reddit_data_test.json` (in current working directory)
|
|
27
|
-
|
|
28
|
-
Validate that:
|
|
29
|
-
|
|
30
|
-
1. file exists
|
|
31
|
-
2. JSON parses
|
|
32
|
-
3. top-level `items` is a non-empty array when URLs are valid
|
|
33
|
-
|
|
34
|
-
### 3) Use Output (Optional)
|
|
35
|
-
|
|
36
|
-
For manual worker testing, copy/rename test output as needed (for example to `reddit_data.json`) and run one analysis worker (`social_media_rank`, `social_media_sentiment`, etc.).
|
|
37
|
-
|
|
38
|
-
## Expected Output Shape
|
|
39
|
-
|
|
40
|
-
`reddit_data_test.json` should contain:
|
|
41
|
-
|
|
42
|
-
- `query`
|
|
43
|
-
- `dateRange`
|
|
44
|
-
- `items` (normalized Reddit post/comment records)
|
|
45
|
-
|
|
46
|
-
`items` commonly include fields like `author`, `url`, text content, engagement, and date fields.
|
|
47
|
-
|
|
48
|
-
## Critical Rules
|
|
49
|
-
|
|
50
|
-
1. **Debug only**: this is not the canonical production fetch path.
|
|
51
|
-
2. **No schema assumptions from legacy output**: analysis workers must still validate against `web/src/types.ts` when generating classified files.
|
|
52
|
-
3. **Fail clearly**: if output file is missing or malformed, report test failure instead of continuing.
|