sc-research 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -9
- package/dist/cli.js +0 -76
- package/package.json +2 -4
- package/templates/base/commands/deep-research.md +1 -1
- package/templates/base/manifest.json +6 -12
- package/templates/base/skills/social_media_controversy.md +2 -2
- package/templates/base/skills/social_media_discovery.md +2 -2
- package/templates/base/skills/social_media_rank.md +2 -2
- package/templates/base/skills/social_media_schema.md +167 -0
- package/templates/base/skills/social_media_sentiment.md +2 -2
- package/templates/base/skills/social_media_trend.md +2 -2
- package/dist/test/manual-fixed-links.js +0 -186
- package/templates/base/commands/test-research.md +0 -9
- package/templates/base/skills/social_media_research_test.md +0 -52
package/README.md
CHANGED
|
@@ -38,9 +38,8 @@ sc-research init --ai claude,cursor
|
|
|
38
38
|
sc-research init --ai all
|
|
39
39
|
```
|
|
40
40
|
|
|
41
|
-
This now
|
|
42
|
-
|
|
43
|
-
- Reads/updates your project `package.json` scripts so commands work immediately
|
|
41
|
+
This now installs SC-Research templates for selected platform(s) only.
|
|
42
|
+
It does not read, write, or update `package.json`.
|
|
44
43
|
|
|
45
44
|
Template architecture:
|
|
46
45
|
- `templates/base/` is the canonical source for command/skill content
|
|
@@ -54,11 +53,10 @@ Template architecture:
|
|
|
54
53
|
sc-research init --ai claude
|
|
55
54
|
```
|
|
56
55
|
|
|
57
|
-
|
|
58
|
-
- `
|
|
59
|
-
- `
|
|
60
|
-
- `
|
|
61
|
-
- `test:fixed`: `sc-research test:fixed`
|
|
56
|
+
Then run commands directly from your terminal:
|
|
57
|
+
- `sc-research research "<topic>"`
|
|
58
|
+
- `sc-research research:deep "<topic>"`
|
|
59
|
+
- `sc-research visualize`
|
|
62
60
|
|
|
63
61
|
2. **Open the project in Claude Code**
|
|
64
62
|
|
|
@@ -72,7 +70,6 @@ Template architecture:
|
|
|
72
70
|
- `/controversy` – Build controversy map
|
|
73
71
|
- `/discovery` – Discover viral topics
|
|
74
72
|
- `/visualize` – Launch dashboard
|
|
75
|
-
- `/test-research` – Run fixed-link debug test
|
|
76
73
|
|
|
77
74
|
## Direct Bun CLI (Optional, existing behavior)
|
|
78
75
|
|
package/dist/cli.js
CHANGED
|
@@ -358,10 +358,6 @@ async function runInit(opts) {
|
|
|
358
358
|
logger.info(` Mode: dry-run (no files will be written)`);
|
|
359
359
|
}
|
|
360
360
|
ensureDir(projectRoot);
|
|
361
|
-
const packageSummary = syncPackageJson(projectRoot, {
|
|
362
|
-
dryRun: opts.dryRun,
|
|
363
|
-
force: opts.force
|
|
364
|
-
});
|
|
365
361
|
const summariesByTarget = new Map;
|
|
366
362
|
for (const target of targets) {
|
|
367
363
|
const adapter = getAdapter(target);
|
|
@@ -394,12 +390,6 @@ async function runInit(opts) {
|
|
|
394
390
|
logger.info(` Total skipped: ${totalSummary.skipped}`);
|
|
395
391
|
logger.info(` Total overwritten: ${totalSummary.overwritten}`);
|
|
396
392
|
logger.info("");
|
|
397
|
-
logger.info("package.json:");
|
|
398
|
-
logger.info(` Created: ${packageSummary.created ? 1 : 0}`);
|
|
399
|
-
logger.info(` Scripts added: ${packageSummary.scriptsAdded}`);
|
|
400
|
-
logger.info(` Scripts skipped: ${packageSummary.scriptsSkipped}`);
|
|
401
|
-
logger.info(` Scripts overwritten: ${packageSummary.scriptsOverwritten}`);
|
|
402
|
-
logger.info("");
|
|
403
393
|
logger.info("Next steps:");
|
|
404
394
|
logger.info(" 1) Ensure your project has required env vars set (for example in .sc-research):");
|
|
405
395
|
logger.info(" - OPENAI_API_KEY");
|
|
@@ -429,67 +419,6 @@ function findPackageRoot2() {
|
|
|
429
419
|
}
|
|
430
420
|
return null;
|
|
431
421
|
}
|
|
432
|
-
function syncPackageJson(projectRoot, options) {
|
|
433
|
-
const packageJsonPath = path4.join(projectRoot, "package.json");
|
|
434
|
-
const summary = {
|
|
435
|
-
created: false,
|
|
436
|
-
scriptsAdded: 0,
|
|
437
|
-
scriptsSkipped: 0,
|
|
438
|
-
scriptsOverwritten: 0
|
|
439
|
-
};
|
|
440
|
-
const desiredScripts = {
|
|
441
|
-
research: "sc-research research",
|
|
442
|
-
"research:deep": "sc-research research --depth=deep",
|
|
443
|
-
visualize: "sc-research visualize",
|
|
444
|
-
"test:fixed": "sc-research test:fixed"
|
|
445
|
-
};
|
|
446
|
-
let pkg;
|
|
447
|
-
if (fs4.existsSync(packageJsonPath)) {
|
|
448
|
-
const raw = fs4.readFileSync(packageJsonPath, "utf-8");
|
|
449
|
-
try {
|
|
450
|
-
pkg = JSON.parse(raw);
|
|
451
|
-
} catch {
|
|
452
|
-
throw new Error(`Invalid JSON in ${packageJsonPath}. Fix it before running init.`);
|
|
453
|
-
}
|
|
454
|
-
} else {
|
|
455
|
-
summary.created = true;
|
|
456
|
-
pkg = {
|
|
457
|
-
name: sanitizePackageName(path4.basename(projectRoot)),
|
|
458
|
-
private: true,
|
|
459
|
-
version: "0.0.0",
|
|
460
|
-
scripts: {}
|
|
461
|
-
};
|
|
462
|
-
}
|
|
463
|
-
const scriptsValue = pkg.scripts;
|
|
464
|
-
const scripts = scriptsValue && typeof scriptsValue === "object" && !Array.isArray(scriptsValue) ? { ...scriptsValue } : {};
|
|
465
|
-
for (const [name, command] of Object.entries(desiredScripts)) {
|
|
466
|
-
const current = scripts[name];
|
|
467
|
-
if (!current) {
|
|
468
|
-
scripts[name] = command;
|
|
469
|
-
summary.scriptsAdded++;
|
|
470
|
-
continue;
|
|
471
|
-
}
|
|
472
|
-
if (current !== command) {
|
|
473
|
-
if (options.force) {
|
|
474
|
-
scripts[name] = command;
|
|
475
|
-
summary.scriptsOverwritten++;
|
|
476
|
-
} else {
|
|
477
|
-
summary.scriptsSkipped++;
|
|
478
|
-
}
|
|
479
|
-
}
|
|
480
|
-
}
|
|
481
|
-
pkg.scripts = scripts;
|
|
482
|
-
const shouldWrite = summary.created || summary.scriptsAdded > 0 || summary.scriptsOverwritten > 0;
|
|
483
|
-
if (shouldWrite && !options.dryRun) {
|
|
484
|
-
fs4.writeFileSync(packageJsonPath, `${JSON.stringify(pkg, null, 2)}
|
|
485
|
-
`);
|
|
486
|
-
}
|
|
487
|
-
return summary;
|
|
488
|
-
}
|
|
489
|
-
function sanitizePackageName(name) {
|
|
490
|
-
const cleaned = name.toLowerCase().trim().replace(/[^a-z0-9-_.]/g, "-").replace(/^-+/, "").replace(/-+$/, "");
|
|
491
|
-
return cleaned || "sc-research-project";
|
|
492
|
-
}
|
|
493
422
|
|
|
494
423
|
// src/entries/cli.ts
|
|
495
424
|
var DEFAULT_ENV_FILE = ".sc-research";
|
|
@@ -522,10 +451,6 @@ async function main() {
|
|
|
522
451
|
await runPackagedEntry("visualize.js", rest);
|
|
523
452
|
return;
|
|
524
453
|
}
|
|
525
|
-
if (command === "test:fixed") {
|
|
526
|
-
await runPackagedEntry("manual-fixed-links.js", rest);
|
|
527
|
-
return;
|
|
528
|
-
}
|
|
529
454
|
const logger = new Logger;
|
|
530
455
|
logger.exitWithError(`Unknown command "${command}". Run "sc-research --help" for usage.`);
|
|
531
456
|
}
|
|
@@ -591,7 +516,6 @@ Commands:
|
|
|
591
516
|
init Initialize SC-Research support files for a project
|
|
592
517
|
research Run research engine (same as project "research" script)
|
|
593
518
|
visualize Launch visualization app
|
|
594
|
-
test:fixed Run fixed-link test flow
|
|
595
519
|
|
|
596
520
|
Options:
|
|
597
521
|
--ai TARGETS Target AI(s): claude,cursor,windsurf,antigravity or "all"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sc-research",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.9",
|
|
4
4
|
"description": "Headless Social Media Research Data Provider for AI Agents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -20,15 +20,13 @@
|
|
|
20
20
|
"init": "node dist/cli.js init",
|
|
21
21
|
"init:all": "node dist/cli.js init --ai all",
|
|
22
22
|
"visualize": "bun --env-file=.sc-research src/entries/visualize.ts",
|
|
23
|
-
"test:fixed": "bun --env-file=.sc-research src/test/manual-fixed-links.ts",
|
|
24
23
|
"test:x": "bun --env-file=.sc-research src/test/manual-xai.ts",
|
|
25
24
|
"build:index": "bun build src/entries/research.ts --outfile dist/index.js --target node",
|
|
26
25
|
"build:visualize": "bun build src/entries/visualize.ts --outdir dist --target node",
|
|
27
|
-
"build:test:fixed": "bun build src/test/manual-fixed-links.ts --outdir dist/test --target node",
|
|
28
26
|
"build:cli": "bun build src/entries/cli.ts --outdir dist --target node",
|
|
29
27
|
"build:web": "npm --prefix web run build",
|
|
30
28
|
"build:web:copy": "node scripts/copy-web-dist.mjs",
|
|
31
|
-
"build": "bun run build:index && bun run build:visualize && bun run build:
|
|
29
|
+
"build": "bun run build:index && bun run build:visualize && bun run build:cli && bun run build:web && bun run build:web:copy",
|
|
32
30
|
"prepack": "bun run build",
|
|
33
31
|
"release:check": "npm pack --dry-run",
|
|
34
32
|
"prepublishOnly": "npm run release:check"
|
|
@@ -19,7 +19,7 @@ description: Deeply research a topic and route to the best analysis template. Th
|
|
|
19
19
|
>
|
|
20
20
|
> - Match the requested topic (or close variant of the same topic).
|
|
21
21
|
> - Match requested date window when `--from/--to` was provided.
|
|
22
|
-
> - Include required fields from
|
|
22
|
+
> - Include required fields from `../skills/social_media_schema/SKILL.md`.
|
|
23
23
|
|
|
24
24
|
4. Display the results
|
|
25
25
|
> Present whichever template output(s) were selected after validation.
|
|
@@ -7,6 +7,12 @@
|
|
|
7
7
|
"description": "Worker skill that fetches raw discussion data from Reddit and X (Twitter) for a given topic. Returns raw JSON files.",
|
|
8
8
|
"bodyFile": "skills/social_media_fetch.md"
|
|
9
9
|
},
|
|
10
|
+
{
|
|
11
|
+
"id": "social_media_schema",
|
|
12
|
+
"kind": "skill",
|
|
13
|
+
"description": "Reference-only skill containing canonical output schemas for classified JSON files.",
|
|
14
|
+
"bodyFile": "skills/social_media_schema.md"
|
|
15
|
+
},
|
|
10
16
|
{
|
|
11
17
|
"id": "social_media_rank",
|
|
12
18
|
"kind": "skill",
|
|
@@ -43,12 +49,6 @@
|
|
|
43
49
|
"description": "Worker skill that launches a local web dashboard to visualize all available classified research data.",
|
|
44
50
|
"bodyFile": "skills/social_media_visualize.md"
|
|
45
51
|
},
|
|
46
|
-
{
|
|
47
|
-
"id": "social_media_research_test",
|
|
48
|
-
"kind": "skill",
|
|
49
|
-
"description": "Test the social media research skill with fixed Reddit links (no API key needed). Fetches data and returns JSON for AI classification.",
|
|
50
|
-
"bodyFile": "skills/social_media_research_test.md"
|
|
51
|
-
},
|
|
52
52
|
{
|
|
53
53
|
"id": "using_social_media_research",
|
|
54
54
|
"kind": "skill",
|
|
@@ -108,12 +108,6 @@
|
|
|
108
108
|
"kind": "command",
|
|
109
109
|
"description": "Launch dashboard for available classified data files.",
|
|
110
110
|
"bodyFile": "commands/visualize.md"
|
|
111
|
-
},
|
|
112
|
-
{
|
|
113
|
-
"id": "test-research",
|
|
114
|
-
"kind": "command",
|
|
115
|
-
"description": "Run fixed-link test research pipeline.",
|
|
116
|
-
"bodyFile": "commands/test-research.md"
|
|
117
111
|
}
|
|
118
112
|
]
|
|
119
113
|
}
|
|
@@ -25,7 +25,7 @@ At least one valid source file must exist.
|
|
|
25
25
|
|
|
26
26
|
## Step 2: Lock Output Schema
|
|
27
27
|
|
|
28
|
-
Read
|
|
28
|
+
Read `../social_media_schema/SKILL.md` and treat `ControversyData` as source of truth.
|
|
29
29
|
|
|
30
30
|
Required top-level fields:
|
|
31
31
|
|
|
@@ -91,4 +91,4 @@ Save strict JSON to:
|
|
|
91
91
|
1. **No external fetch**: analyze existing data only.
|
|
92
92
|
2. **No fabricated arguments**: only report controversies present in source text.
|
|
93
93
|
3. **No fabricated citations**: quote text, author, and link must be real.
|
|
94
|
-
4. **Schema strictness**: if instructions conflict with
|
|
94
|
+
4. **Schema strictness**: if instructions conflict with schema reference, schema reference wins.
|
|
@@ -25,7 +25,7 @@ At least one valid source file must exist.
|
|
|
25
25
|
|
|
26
26
|
## Step 2: Lock Schema
|
|
27
27
|
|
|
28
|
-
Read
|
|
28
|
+
Read `../social_media_schema/SKILL.md` and follow `DiscoveryData` and `DiscoveryTopic` exactly.
|
|
29
29
|
|
|
30
30
|
Important enum constraints:
|
|
31
31
|
|
|
@@ -87,5 +87,5 @@ Save strict JSON to:
|
|
|
87
87
|
|
|
88
88
|
1. **No external fetch**: do not run data collection here.
|
|
89
89
|
2. **No fabricated clusters or quotes**: everything must map to real evidence.
|
|
90
|
-
3. **Schema strictness**:
|
|
90
|
+
3. **Schema strictness**: the schema reference skill is authoritative.
|
|
91
91
|
4. **Graceful fallback**: use empty arrays for missing optional evidence; never invent content.
|
|
@@ -29,11 +29,11 @@ If both sources are missing or invalid, stop and report the failure.
|
|
|
29
29
|
|
|
30
30
|
## Step 2: Lock Output Schema
|
|
31
31
|
|
|
32
|
-
Read
|
|
32
|
+
Read `../social_media_schema/SKILL.md` and treat it as source of truth.
|
|
33
33
|
|
|
34
34
|
- Output type must match `ClassifiedData`.
|
|
35
35
|
- Product entries must match `Product`.
|
|
36
|
-
- If this file and
|
|
36
|
+
- If this file and schema reference conflict, schema reference wins.
|
|
37
37
|
|
|
38
38
|
## Step 3: Build Product Candidates
|
|
39
39
|
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: social_media_schema
|
|
3
|
+
description: Reference-only skill that defines canonical JSON schemas for classified output files.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Social Media Schema Reference
|
|
7
|
+
|
|
8
|
+
Use this file as the canonical schema source for all classified outputs:
|
|
9
|
+
|
|
10
|
+
- `classified_rank.json`
|
|
11
|
+
- `classified_sentiment.json`
|
|
12
|
+
- `classified_trend.json`
|
|
13
|
+
- `classified_controversy.json`
|
|
14
|
+
- `classified_discovery.json`
|
|
15
|
+
|
|
16
|
+
If another skill instruction conflicts with this file, this file wins.
|
|
17
|
+
|
|
18
|
+
## Canonical Type Definitions
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
export interface ClassifiedData {
|
|
22
|
+
topic: string;
|
|
23
|
+
source_file?: string;
|
|
24
|
+
products: Product[];
|
|
25
|
+
key_insights: string[];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface Product {
|
|
29
|
+
rank: number;
|
|
30
|
+
name: string;
|
|
31
|
+
sentiment: SentimentLabel;
|
|
32
|
+
mentions: number;
|
|
33
|
+
estimated_engagement_score: number;
|
|
34
|
+
consensus: string;
|
|
35
|
+
pros: string[];
|
|
36
|
+
cons: string[];
|
|
37
|
+
highlight_quotes: Array<{
|
|
38
|
+
text: string;
|
|
39
|
+
author: string;
|
|
40
|
+
link: string;
|
|
41
|
+
context?: "pro" | "con" | "general";
|
|
42
|
+
}>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export type SentimentLabel =
|
|
46
|
+
| "Positive"
|
|
47
|
+
| "Negative"
|
|
48
|
+
| "Mixed"
|
|
49
|
+
| "Very Positive";
|
|
50
|
+
|
|
51
|
+
export interface SentimentData {
|
|
52
|
+
topic: string;
|
|
53
|
+
overall_mood: SentimentLabel;
|
|
54
|
+
distribution: {
|
|
55
|
+
very_positive: number;
|
|
56
|
+
positive: number;
|
|
57
|
+
mixed: number;
|
|
58
|
+
negative: number;
|
|
59
|
+
};
|
|
60
|
+
by_source: {
|
|
61
|
+
reddit: SourceSentiment;
|
|
62
|
+
x: SourceSentiment;
|
|
63
|
+
};
|
|
64
|
+
product_sentiments: ProductSentiment[];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface SourceSentiment {
|
|
68
|
+
very_positive: number;
|
|
69
|
+
positive: number;
|
|
70
|
+
mixed: number;
|
|
71
|
+
negative: number;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface ProductSentiment {
|
|
75
|
+
name: string;
|
|
76
|
+
overall: SentimentLabel;
|
|
77
|
+
reddit_sentiment: SentimentLabel | null;
|
|
78
|
+
x_sentiment: SentimentLabel | null;
|
|
79
|
+
evidence_quotes: Array<{
|
|
80
|
+
text: string;
|
|
81
|
+
author: string;
|
|
82
|
+
link: string;
|
|
83
|
+
sentiment: SentimentLabel;
|
|
84
|
+
}>;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export interface TrendData {
|
|
88
|
+
topic: string;
|
|
89
|
+
date_range: {
|
|
90
|
+
from: string;
|
|
91
|
+
to: string;
|
|
92
|
+
};
|
|
93
|
+
granularity?: "day" | "week" | "month";
|
|
94
|
+
timeline: TimelinePoint[];
|
|
95
|
+
key_moments: KeyMoment[];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export interface TimelinePoint {
|
|
99
|
+
period: string;
|
|
100
|
+
post_count: number;
|
|
101
|
+
total_engagement: number;
|
|
102
|
+
reddit_posts: number;
|
|
103
|
+
x_posts: number;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export interface KeyMoment {
|
|
107
|
+
date: string;
|
|
108
|
+
event: string;
|
|
109
|
+
significance: "high" | "medium" | "low";
|
|
110
|
+
url?: string;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export interface ControversyData {
|
|
114
|
+
topic: string;
|
|
115
|
+
overall_divisiveness: "Low" | "Medium" | "High";
|
|
116
|
+
controversies: Controversy[];
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export interface Controversy {
|
|
120
|
+
topic: string;
|
|
121
|
+
heat_score: number;
|
|
122
|
+
divisiveness: "Low" | "Medium" | "High";
|
|
123
|
+
side_a: ControversySide;
|
|
124
|
+
side_b: ControversySide;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export interface ControversySide {
|
|
128
|
+
position: string;
|
|
129
|
+
supporter_count: number;
|
|
130
|
+
sample_quotes: Array<{
|
|
131
|
+
text: string;
|
|
132
|
+
author: string;
|
|
133
|
+
link: string;
|
|
134
|
+
}>;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export interface DiscoveryData {
|
|
138
|
+
topic: string;
|
|
139
|
+
period: string;
|
|
140
|
+
total_posts_analyzed: number;
|
|
141
|
+
trending_topics: DiscoveryTopic[];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export interface DiscoveryTopic {
|
|
145
|
+
id: string;
|
|
146
|
+
topic_name: string;
|
|
147
|
+
description: string;
|
|
148
|
+
category: string;
|
|
149
|
+
engagement_score: number;
|
|
150
|
+
sentiment: "positive" | "negative" | "neutral" | "mixed";
|
|
151
|
+
key_posts: KeyPost[];
|
|
152
|
+
highlight_comments: Array<{
|
|
153
|
+
text: string;
|
|
154
|
+
author: string;
|
|
155
|
+
link: string;
|
|
156
|
+
platform: "reddit" | "x";
|
|
157
|
+
}>;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
export interface KeyPost {
|
|
161
|
+
title: string;
|
|
162
|
+
url: string;
|
|
163
|
+
platform: "reddit" | "x";
|
|
164
|
+
engagement: number;
|
|
165
|
+
thumbnail?: string;
|
|
166
|
+
}
|
|
167
|
+
```
|
|
@@ -25,7 +25,7 @@ At least one valid source file must exist.
|
|
|
25
25
|
|
|
26
26
|
## Step 2: Lock Schema and Allowed Labels
|
|
27
27
|
|
|
28
|
-
Read
|
|
28
|
+
Read `../social_media_schema/SKILL.md` before building output.
|
|
29
29
|
|
|
30
30
|
- Output must match `SentimentData`.
|
|
31
31
|
- Allowed labels are only:
|
|
@@ -89,5 +89,5 @@ Save result to:
|
|
|
89
89
|
|
|
90
90
|
1. **No external fetch**: analyze only provided raw files.
|
|
91
91
|
2. **Evidence over guesswork**: every product label must be explainable from quotes/content.
|
|
92
|
-
3. **Strict schema**:
|
|
92
|
+
3. **Strict schema**: the schema reference skill is the only schema source of truth.
|
|
93
93
|
4. **No fabricated citations**: quote text, author, and link must exist in raw data.
|
|
@@ -67,7 +67,7 @@ Prefer evidence-backed moments over generic commentary.
|
|
|
67
67
|
|
|
68
68
|
## Step 5: Write Output
|
|
69
69
|
|
|
70
|
-
Read
|
|
70
|
+
Read `../social_media_schema/SKILL.md` and output strict `TrendData` JSON to:
|
|
71
71
|
|
|
72
72
|
- `classified_trend.json`
|
|
73
73
|
|
|
@@ -83,5 +83,5 @@ Read `web/src/types.ts` and output strict `TrendData` JSON to:
|
|
|
83
83
|
|
|
84
84
|
1. **No external fetch**: analyze existing raw files only.
|
|
85
85
|
2. **No invented dates/events**: every bucket and key moment must come from real data.
|
|
86
|
-
3. **Schema strictness**:
|
|
86
|
+
3. **Schema strictness**: the schema reference skill is authoritative.
|
|
87
87
|
4. **Graceful degradation**: skip bad records; never crash the whole report for a few malformed items.
|
|
@@ -1,186 +0,0 @@
|
|
|
1
|
-
import { createRequire } from "node:module";
|
|
2
|
-
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
3
|
-
|
|
4
|
-
// src/core/clients/reddit.ts
|
|
5
|
-
var DEFAULT_CONFIG = {
|
|
6
|
-
userAgent: "CommunitiesResearchSkill/1.0.0 (Research)",
|
|
7
|
-
maxThreads: 20,
|
|
8
|
-
delayMs: 2000
|
|
9
|
-
};
|
|
10
|
-
var sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
11
|
-
async function fetchRedditThread(threadUrl, config = {}) {
|
|
12
|
-
const { userAgent } = { ...DEFAULT_CONFIG, ...config };
|
|
13
|
-
let jsonUrl = threadUrl.split("?")[0];
|
|
14
|
-
if (jsonUrl.endsWith("/")) {
|
|
15
|
-
jsonUrl = jsonUrl.slice(0, -1);
|
|
16
|
-
}
|
|
17
|
-
jsonUrl += ".json?raw_json=1";
|
|
18
|
-
let attempts = 0;
|
|
19
|
-
const maxRetries = 3;
|
|
20
|
-
while (attempts < maxRetries) {
|
|
21
|
-
attempts++;
|
|
22
|
-
try {
|
|
23
|
-
const response = await fetch(jsonUrl, {
|
|
24
|
-
headers: {
|
|
25
|
-
"User-Agent": userAgent || `ResearchSkill/${Date.now()}`,
|
|
26
|
-
Accept: "application/json",
|
|
27
|
-
"Accept-Language": "en-US,en;q=0.9"
|
|
28
|
-
},
|
|
29
|
-
redirect: "follow"
|
|
30
|
-
});
|
|
31
|
-
if (!response.ok) {
|
|
32
|
-
if (response.status === 404) {
|
|
33
|
-
console.warn(`[404] Thread not found: ${jsonUrl}`);
|
|
34
|
-
return [];
|
|
35
|
-
}
|
|
36
|
-
if (response.status === 403) {
|
|
37
|
-
if (attempts === 1) {
|
|
38
|
-
console.warn(`[403] Blocked. Waiting 5s before single retry...`);
|
|
39
|
-
await sleep(5000);
|
|
40
|
-
continue;
|
|
41
|
-
}
|
|
42
|
-
throw new Error(`Reddit Blocked (403): Access denied.`);
|
|
43
|
-
}
|
|
44
|
-
if (response.status === 429) {
|
|
45
|
-
const waitTime = Math.pow(2, attempts) * 1000 + Math.random() * 1000;
|
|
46
|
-
console.warn(`[429] Rate limit. Backing off for ${Math.round(waitTime)}ms...`);
|
|
47
|
-
await sleep(waitTime);
|
|
48
|
-
continue;
|
|
49
|
-
}
|
|
50
|
-
if (response.status >= 500) {
|
|
51
|
-
const waitTime = 1000 * attempts;
|
|
52
|
-
console.warn(`[${response.status}] Server error. Retrying in ${waitTime}ms...`);
|
|
53
|
-
await sleep(waitTime);
|
|
54
|
-
continue;
|
|
55
|
-
}
|
|
56
|
-
throw new Error(`Failed to fetch Reddit thread: ${response.statusText} (${response.status})`);
|
|
57
|
-
}
|
|
58
|
-
const data = await response.json();
|
|
59
|
-
if (!Array.isArray(data) || data.length < 2) {
|
|
60
|
-
return [];
|
|
61
|
-
}
|
|
62
|
-
const postListing = data[0].data.children[0]?.data;
|
|
63
|
-
const commentListing = data[1].data.children;
|
|
64
|
-
if (!postListing)
|
|
65
|
-
return [];
|
|
66
|
-
const posts = [];
|
|
67
|
-
posts.push({
|
|
68
|
-
text: postListing.title + `
|
|
69
|
-
|
|
70
|
-
` + (postListing.selftext || ""),
|
|
71
|
-
title: postListing.title,
|
|
72
|
-
author: postListing.author,
|
|
73
|
-
engagement: {
|
|
74
|
-
score: postListing.score,
|
|
75
|
-
comments: postListing.num_comments
|
|
76
|
-
},
|
|
77
|
-
url: threadUrl,
|
|
78
|
-
date: new Date(postListing.created_utc * 1000).toISOString()
|
|
79
|
-
});
|
|
80
|
-
const validComments = [];
|
|
81
|
-
for (const child of commentListing) {
|
|
82
|
-
if (child.kind === "t1" && child.data.body) {
|
|
83
|
-
const comment = child.data;
|
|
84
|
-
const body = comment.body.trim();
|
|
85
|
-
if (body.length < 30 || body === "[deleted]" || body === "[removed]" || body.includes("RemindMe!") || body.endsWith("?")) {
|
|
86
|
-
continue;
|
|
87
|
-
}
|
|
88
|
-
validComments.push({
|
|
89
|
-
text: body,
|
|
90
|
-
author: comment.author,
|
|
91
|
-
engagement: {
|
|
92
|
-
score: comment.score
|
|
93
|
-
},
|
|
94
|
-
url: `https://reddit.com${comment.permalink}`,
|
|
95
|
-
date: new Date(comment.created_utc * 1000).toISOString()
|
|
96
|
-
});
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
validComments.sort((a, b) => (b.engagement.score || 0) - (a.engagement.score || 0));
|
|
100
|
-
posts.push(...validComments.slice(0, 5));
|
|
101
|
-
return posts;
|
|
102
|
-
} catch (error) {
|
|
103
|
-
if (attempts >= maxRetries) {
|
|
104
|
-
console.error(`Error fetching Reddit thread ${threadUrl} after ${maxRetries} attempts:`, error);
|
|
105
|
-
throw error;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
return [];
|
|
110
|
-
}
|
|
111
|
-
async function fetchRedditThreads(threadUrls, config = {}) {
|
|
112
|
-
const { maxThreads = 20, delayMs = 2000 } = { ...DEFAULT_CONFIG, ...config };
|
|
113
|
-
const allPosts = [];
|
|
114
|
-
const limitedUrls = threadUrls.slice(0, maxThreads);
|
|
115
|
-
console.log(`[Reddit] Fetching ${limitedUrls.length} threads sequentially with jitter...`);
|
|
116
|
-
for (const url of limitedUrls) {
|
|
117
|
-
try {
|
|
118
|
-
const posts = await fetchRedditThread(url, config);
|
|
119
|
-
allPosts.push(...posts);
|
|
120
|
-
if (limitedUrls.indexOf(url) < limitedUrls.length - 1) {
|
|
121
|
-
const jitter = delayMs * 0.5 * Math.random();
|
|
122
|
-
const finalDelay = delayMs + jitter;
|
|
123
|
-
await sleep(finalDelay);
|
|
124
|
-
}
|
|
125
|
-
} catch (error) {
|
|
126
|
-
console.error(`Skipping thread due to final error: ${url}`);
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
return allPosts;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
// src/core/services/data-prep.ts
|
|
133
|
-
class DataPrepService {
|
|
134
|
-
prepare(posts) {
|
|
135
|
-
const items = posts.map((post) => {
|
|
136
|
-
return {
|
|
137
|
-
...post
|
|
138
|
-
};
|
|
139
|
-
});
|
|
140
|
-
items.sort((a, b) => (b.engagement.score || 0) - (a.engagement.score || 0));
|
|
141
|
-
return items.slice(0, 50);
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
// src/core/services/formatting.ts
|
|
146
|
-
class FormattingService {
|
|
147
|
-
formatJson(result) {
|
|
148
|
-
return JSON.stringify(result, null, 2);
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
// src/test/manual-fixed-links.ts
|
|
153
|
-
async function run() {
|
|
154
|
-
const args = process.argv.slice(2);
|
|
155
|
-
const fixedUrls = args.length > 0 ? args : [
|
|
156
|
-
"https://www.reddit.com/r/iems/comments/1olzu0g/the_best_iem_builds_at_each_price_2025_ultimate/",
|
|
157
|
-
"https://www.reddit.com/r/headphones/comments/1lbcngj/new_iem_tierlist_2025/",
|
|
158
|
-
"https://www.reddit.com/r/iems/comments/1c7imln/iem_tier_list/"
|
|
159
|
-
];
|
|
160
|
-
console.log(`\uD83E\uDDEA Starting Fixed-Link Test with ${fixedUrls.length} URLs...`);
|
|
161
|
-
try {
|
|
162
|
-
const posts = await fetchRedditThreads(fixedUrls, { maxThreads: 5 });
|
|
163
|
-
console.log(`✅ Successfully fetched ${posts.length} items (Post + Top Comments).`);
|
|
164
|
-
console.log(`
|
|
165
|
-
2️⃣ Preparing raw data for AI analysis...`);
|
|
166
|
-
const dataPrep = new DataPrepService;
|
|
167
|
-
const researchItems = dataPrep.prepare(posts);
|
|
168
|
-
console.log(`
|
|
169
|
-
3️⃣ Generating Output (JSON)...`);
|
|
170
|
-
const formatter = new FormattingService;
|
|
171
|
-
const mockResult = {
|
|
172
|
-
query: "Manual Test: Best IEMs 2025",
|
|
173
|
-
dateRange: { from: "2025-01-01", to: "2025-12-31" },
|
|
174
|
-
items: researchItems
|
|
175
|
-
};
|
|
176
|
-
const jsonOutput = formatter.formatJson(mockResult);
|
|
177
|
-
const fs = __require("fs");
|
|
178
|
-
const path = __require("path");
|
|
179
|
-
const outputPath = path.resolve(process.cwd(), "reddit_data_test.json");
|
|
180
|
-
fs.writeFileSync(outputPath, jsonOutput);
|
|
181
|
-
console.log(`✅ Saved test output to: ${outputPath}`);
|
|
182
|
-
} catch (error) {
|
|
183
|
-
console.error("❌ Test Failed:", error);
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
run();
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: social_media_research_test
|
|
3
|
-
description: Legacy debug worker for fixed-link Reddit pipeline testing. Generates `reddit_data_test.json` without normal search/discovery flow and is useful for manual sanity checks.
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# Social Media Research Test Skill
|
|
7
|
-
|
|
8
|
-
> Legacy/debug helper. Prefer the normal pipeline (`social_media_fetch` + analysis workers) for production research.
|
|
9
|
-
|
|
10
|
-
Use this skill to sanity-check the fixed-link ingestion path with known Reddit URLs.
|
|
11
|
-
|
|
12
|
-
## Workflow
|
|
13
|
-
|
|
14
|
-
### 1) Run Fixed-Link Test
|
|
15
|
-
|
|
16
|
-
```bash
|
|
17
|
-
sc-research test:fixed [URL1] [URL2] ...
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
If no URLs are provided, built-in default Reddit links are used.
|
|
21
|
-
|
|
22
|
-
### 2) Verify Output File
|
|
23
|
-
|
|
24
|
-
This command writes:
|
|
25
|
-
|
|
26
|
-
- `reddit_data_test.json` (in current working directory)
|
|
27
|
-
|
|
28
|
-
Validate that:
|
|
29
|
-
|
|
30
|
-
1. file exists
|
|
31
|
-
2. JSON parses
|
|
32
|
-
3. top-level `items` is a non-empty array when URLs are valid
|
|
33
|
-
|
|
34
|
-
### 3) Use Output (Optional)
|
|
35
|
-
|
|
36
|
-
For manual worker testing, copy/rename test output as needed (for example to `reddit_data.json`) and run one analysis worker (`social_media_rank`, `social_media_sentiment`, etc.).
|
|
37
|
-
|
|
38
|
-
## Expected Output Shape
|
|
39
|
-
|
|
40
|
-
`reddit_data_test.json` should contain:
|
|
41
|
-
|
|
42
|
-
- `query`
|
|
43
|
-
- `dateRange`
|
|
44
|
-
- `items` (normalized Reddit post/comment records)
|
|
45
|
-
|
|
46
|
-
`items` commonly include fields like `author`, `url`, text content, engagement, and date fields.
|
|
47
|
-
|
|
48
|
-
## Critical Rules
|
|
49
|
-
|
|
50
|
-
1. **Debug only**: this is not the canonical production fetch path.
|
|
51
|
-
2. **No schema assumptions from legacy output**: analysis workers must still validate against `web/src/types.ts` when generating classified files.
|
|
52
|
-
3. **Fail clearly**: if output file is missing or malformed, report test failure instead of continuing.
|