imperium-crawl 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +17 -10
  2. package/dist/constants.d.ts +1 -1
  3. package/dist/constants.js +1 -1
  4. package/dist/recipes/index.d.ts.map +1 -1
  5. package/dist/recipes/index.js +6 -0
  6. package/dist/recipes/index.js.map +1 -1
  7. package/dist/recipes/influencer-competitor-spy.json +14 -0
  8. package/dist/recipes/influencer-hashtag-scout.json +14 -0
  9. package/dist/recipes/influencer-niche-discovery.json +14 -0
  10. package/dist/skills/manager.d.ts +11 -2
  11. package/dist/skills/manager.d.ts.map +1 -1
  12. package/dist/skills/manager.js.map +1 -1
  13. package/dist/social/ai-fallback.d.ts +22 -0
  14. package/dist/social/ai-fallback.d.ts.map +1 -0
  15. package/dist/social/ai-fallback.js +137 -0
  16. package/dist/social/ai-fallback.js.map +1 -0
  17. package/dist/social/parsers.d.ts +28 -0
  18. package/dist/social/parsers.d.ts.map +1 -0
  19. package/dist/social/parsers.js +146 -0
  20. package/dist/social/parsers.js.map +1 -0
  21. package/dist/social/types.d.ts +55 -0
  22. package/dist/social/types.d.ts.map +1 -0
  23. package/dist/social/types.js +5 -0
  24. package/dist/social/types.js.map +1 -0
  25. package/dist/social/whisper.d.ts +29 -0
  26. package/dist/social/whisper.d.ts.map +1 -0
  27. package/dist/social/whisper.js +88 -0
  28. package/dist/social/whisper.js.map +1 -0
  29. package/dist/tools/index.d.ts.map +1 -1
  30. package/dist/tools/index.js +5 -0
  31. package/dist/tools/index.js.map +1 -1
  32. package/dist/tools/manifest.d.ts.map +1 -1
  33. package/dist/tools/manifest.js +9 -0
  34. package/dist/tools/manifest.js.map +1 -1
  35. package/dist/tools/reddit.d.ts +36 -0
  36. package/dist/tools/reddit.d.ts.map +1 -0
  37. package/dist/tools/reddit.js +190 -0
  38. package/dist/tools/reddit.js.map +1 -0
  39. package/dist/tools/run-skill.d.ts +18 -0
  40. package/dist/tools/run-skill.d.ts.map +1 -1
  41. package/dist/tools/run-skill.js +681 -0
  42. package/dist/tools/run-skill.js.map +1 -1
  43. package/dist/tools/tiktok.d.ts +30 -0
  44. package/dist/tools/tiktok.d.ts.map +1 -0
  45. package/dist/tools/tiktok.js +246 -0
  46. package/dist/tools/tiktok.js.map +1 -0
  47. package/dist/tools/youtube.d.ts +33 -0
  48. package/dist/tools/youtube.d.ts.map +1 -0
  49. package/dist/tools/youtube.js +489 -0
  50. package/dist/tools/youtube.js.map +1 -0
  51. package/package.json +1 -1
@@ -13,6 +13,13 @@ export const schema = z.object({
13
13
  chrome_profile: z.string().max(1000).optional().describe("Path to Chrome user data directory for authenticated sessions (cookies, localStorage). Overrides CHROME_PROFILE_PATH env var."),
14
14
  duration_seconds: z.number().min(1).max(300).optional().describe("Override WebSocket monitoring duration (seconds). Only applies to monitor_websocket recipes."),
15
15
  max_messages: z.number().min(1).max(1000).optional().describe("Override max WebSocket messages to capture. Only applies to monitor_websocket recipes."),
16
+ // Influencer discovery params
17
+ niche: z.string().max(200).optional().describe("Niche keywords for influencer discovery"),
18
+ location: z.string().max(100).optional().describe("Location filter"),
19
+ hashtags: z.array(z.string()).max(10).optional().describe("Hashtags to search (hashtag_scout workflow)"),
20
+ competitor: z.string().max(200).optional().describe("Competitor brand/handle (competitor_spy workflow)"),
21
+ output_format: z.enum(["json", "markdown", "csv"]).optional().describe("Output format for influencer discovery"),
22
+ threshold: z.number().min(0).max(100).optional().describe("Tier qualification threshold (default 60)"),
16
23
  });
17
24
  // --- Helpers ---
18
25
  function mcpResult(data) {
@@ -307,6 +314,678 @@ async function runMonitorWebsocket(config, url, input) {
307
314
  await handle.cleanup();
308
315
  }
309
316
  }
317
+ // Brave Search helper
318
+ async function braveSearch(query, count = 10) {
319
+ const apiKey = process.env.BRAVE_API_KEY;
320
+ if (!apiKey)
321
+ return null;
322
+ const { issueRequest } = await import("../brave-api/index.js");
323
+ try {
324
+ return await issueRequest(apiKey, "/web/search", { q: query, count });
325
+ }
326
+ catch {
327
+ return null;
328
+ }
329
+ }
330
+ // YouTube tool helper — direct in-process call
331
+ async function ytExecute(action, params) {
332
+ const yt = await import("./youtube.js");
333
+ const result = await yt.execute({ action, limit: 10, sort: "relevance", ...params });
334
+ try {
335
+ return JSON.parse(result.content[0].text || "{}");
336
+ }
337
+ catch {
338
+ return null;
339
+ }
340
+ }
341
+ // Parse IG handle from YouTube description
342
+ function parseIgHandles(description) {
343
+ if (!description)
344
+ return [];
345
+ const handles = [];
346
+ // Look for patterns like "instagram: @handle", "ig: @handle", "insta: @handle"
347
+ const patterns = [
348
+ /(?:instagram|ig|insta)[:\s]*@?([\w.]{3,30})/gi,
349
+ /instagram\.com\/([\w.]{3,30})/gi,
350
+ ];
351
+ for (const pattern of patterns) {
352
+ let match;
353
+ while ((match = pattern.exec(description)) !== null) {
354
+ const h = match[1].toLowerCase();
355
+ if (!handles.includes(h) && h !== "com" && h !== "www")
356
+ handles.push(h);
357
+ }
358
+ }
359
+ return handles;
360
+ }
361
+ // Parse followers from Brave snippet — uses inline compact number parsing
362
+ function parseFollowersFromSnippet(snippet) {
363
+ if (!snippet)
364
+ return undefined;
365
+ // Match patterns like "1.2M Followers", "842K followers"
366
+ const match = snippet.match(/([\d,.]+)\s*([KMB])?\s*[Ff]ollowers/i);
367
+ if (!match)
368
+ return undefined;
369
+ const num = parseFloat(match[1].replace(/,/g, ""));
370
+ if (isNaN(num))
371
+ return undefined;
372
+ const suffix = match[2]?.toUpperCase();
373
+ const mult = { K: 1_000, M: 1_000_000, B: 1_000_000_000 };
374
+ return Math.round(num * (suffix ? (mult[suffix] || 1) : 1));
375
+ }
376
+ // IG API call with rate limiting
377
+ async function igApiCall(endpoint, igCallsUsed, igMaxCalls) {
378
+ if (igCallsUsed.count >= igMaxCalls)
379
+ return null;
380
+ const sessionId = process.env.IG_SESSION_ID;
381
+ const csrfToken = process.env.IG_CSRF_TOKEN;
382
+ const dsUserId = process.env.IG_DS_USER_ID;
383
+ if (!sessionId)
384
+ return null;
385
+ igCallsUsed.count++;
386
+ try {
387
+ const res = await fetch(`https://www.instagram.com/${endpoint}`, {
388
+ headers: {
389
+ "Cookie": `sessionid=${sessionId}; csrftoken=${csrfToken || ""}; ds_user_id=${dsUserId || ""}`,
390
+ "X-CSRFToken": csrfToken || "",
391
+ "X-IG-App-ID": "936619743392459",
392
+ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36",
393
+ },
394
+ signal: AbortSignal.timeout(10_000),
395
+ });
396
+ if (!res.ok)
397
+ return null;
398
+ return await res.json();
399
+ }
400
+ catch {
401
+ return null;
402
+ }
403
+ }
404
+ // Calculate engagement rate
405
+ function calcEngagement(avgViews, avgLikes, subscribers) {
406
+ if (!subscribers || subscribers === 0)
407
+ return 0;
408
+ if (avgLikes && avgViews)
409
+ return ((avgLikes / avgViews) * 100);
410
+ if (avgLikes)
411
+ return ((avgLikes / subscribers) * 100);
412
+ if (avgViews)
413
+ return ((avgViews / subscribers) * 100);
414
+ return 0;
415
+ }
416
+ // Scoring weights per category
417
+ const REACH_WEIGHTS = { audience_size: 25, engagement_rate: 20, niche_relevance: 15, multi_platform: 15, consistency: 10, frequency: 8, contact: 5, collab_signals: 2 };
418
+ const CONVERSION_WEIGHTS = { engagement_rate: 30, niche_relevance: 25, consistency: 15, contact: 10, collab_signals: 8, frequency: 5, audience_size: 5, multi_platform: 2 };
419
+ const PARTNERSHIP_WEIGHTS = { niche_relevance: 20, engagement_rate: 20, contact: 15, collab_signals: 15, consistency: 12, frequency: 8, audience_size: 5, multi_platform: 5 };
420
+ function scoreCriterion(profile, criterion) {
421
+ switch (criterion) {
422
+ case "audience_size": {
423
+ const total = (profile.subscribers || 0) + (profile.ig_followers || 0);
424
+ if (total >= 100_000)
425
+ return 10; // macro — lower score (harder to partner)
426
+ if (total >= 10_000)
427
+ return 25; // micro — sweet spot
428
+ if (total >= 1_000)
429
+ return 15; // nano — good for niche
430
+ return 5;
431
+ }
432
+ case "engagement_rate": {
433
+ const er = profile.engagement_rate || 0;
434
+ if (er > 8)
435
+ return 20;
436
+ if (er > 5)
437
+ return 16;
438
+ if (er > 3)
439
+ return 12;
440
+ if (er > 1)
441
+ return 8;
442
+ return 4;
443
+ }
444
+ case "niche_relevance":
445
+ return Math.round((profile.niche_match_pct || 0) / 100 * 15);
446
+ case "multi_platform": {
447
+ if (profile.platform_count >= 3)
448
+ return 15;
449
+ if (profile.platform_count >= 2)
450
+ return 10;
451
+ return 5;
452
+ }
453
+ case "consistency":
454
+ return Math.round((profile.niche_match_pct || 50) / 100 * 10);
455
+ case "frequency": {
456
+ const freq = profile.posting_frequency;
457
+ if (freq === "weekly")
458
+ return 8;
459
+ if (freq === "biweekly")
460
+ return 5;
461
+ if (freq === "monthly")
462
+ return 3;
463
+ return 0;
464
+ }
465
+ case "contact": {
466
+ if (profile.email)
467
+ return 5;
468
+ if (profile.website)
469
+ return 3;
470
+ if (profile.has_business_contact)
471
+ return 2;
472
+ return 0;
473
+ }
474
+ case "collab_signals": {
475
+ let s = 0;
476
+ if (profile.has_collab_signals)
477
+ s += 1;
478
+ if (profile.has_business_contact)
479
+ s += 1;
480
+ return s;
481
+ }
482
+ default: return 0;
483
+ }
484
+ }
485
+ function calculateScores(profile) {
486
+ const calc = (weights) => {
487
+ let score = 0;
488
+ const totalWeight = Object.values(weights).reduce((a, b) => a + b, 0);
489
+ for (const [criterion, weight] of Object.entries(weights)) {
490
+ const raw = scoreCriterion(profile, criterion);
491
+ // Normalize: raw is scored out of the max for that criterion, scale to weight
492
+ const maxRaw = criterion === "audience_size" ? 25 : criterion === "engagement_rate" ? 20 : criterion === "niche_relevance" ? 15 : criterion === "multi_platform" ? 15 : criterion === "consistency" ? 10 : criterion === "frequency" ? 8 : criterion === "contact" ? 5 : 2;
493
+ score += (raw / maxRaw) * weight;
494
+ }
495
+ return Math.round((score / totalWeight) * 100);
496
+ };
497
+ return {
498
+ reach: calc(REACH_WEIGHTS),
499
+ conversion: calc(CONVERSION_WEIGHTS),
500
+ partnership: calc(PARTNERSHIP_WEIGHTS),
501
+ };
502
+ }
503
+ function classifyTier(scores, threshold) {
504
+ const above = [scores.reach, scores.conversion, scores.partnership].filter(s => s >= threshold).length;
505
+ if (above >= 3)
506
+ return "GOLDEN";
507
+ if (above >= 2)
508
+ return "SILVER";
509
+ if (above >= 1)
510
+ return "BRONZE";
511
+ return "UNRANKED";
512
+ }
513
+ // Estimate posting frequency from recent video dates
514
+ function estimateFrequency(videos) {
515
+ if (!videos.length)
516
+ return "inactive";
517
+ // Simple heuristic based on last 3 videos' published text
518
+ const hasRecent = videos.some(v => {
519
+ const p = v.published?.toLowerCase() || "";
520
+ return p.includes("day") || p.includes("hour") || p.includes("minute");
521
+ });
522
+ if (hasRecent && videos.length >= 3)
523
+ return "weekly";
524
+ const hasWeekly = videos.some(v => {
525
+ const p = v.published?.toLowerCase() || "";
526
+ return p.includes("week");
527
+ });
528
+ if (hasWeekly)
529
+ return "biweekly";
530
+ return "monthly";
531
+ }
532
+ // Calculate niche match % from descriptions/titles
533
+ function calcNicheMatch(texts, nicheKeywords) {
534
+ if (!texts.length || !nicheKeywords.length)
535
+ return 50;
536
+ const lowerTexts = texts.map(t => t.toLowerCase()).join(" ");
537
+ let matches = 0;
538
+ for (const kw of nicheKeywords) {
539
+ if (lowerTexts.includes(kw.toLowerCase()))
540
+ matches++;
541
+ }
542
+ return Math.round((matches / nicheKeywords.length) * 100);
543
+ }
544
+ // Extract contact info from description
545
+ function extractContactInfo(desc) {
546
+ const emailMatch = desc.match(/[\w.+-]+@[\w-]+\.[\w.]+/);
547
+ const websiteMatch = desc.match(/https?:\/\/(?!(?:youtube|instagram|twitter|facebook|tiktok)\.com)[^\s"'<>]+/i);
548
+ const hasBusiness = /business|collab|partner|sponsor|inquir/i.test(desc);
549
+ const hasCollab = /collab|partner|sponsor|brand|work with/i.test(desc);
550
+ return {
551
+ email: emailMatch?.[0],
552
+ website: websiteMatch?.[0],
553
+ hasBusiness,
554
+ hasCollab,
555
+ };
556
+ }
557
+ // Format output
558
+ function formatInfluencerOutput(influencers, format, meta) {
559
+ // Sort by tier then by highest avg score
560
+ const tierOrder = { GOLDEN: 0, SILVER: 1, BRONZE: 2, UNRANKED: 3 };
561
+ influencers.sort((a, b) => {
562
+ const td = tierOrder[a.tier] - tierOrder[b.tier];
563
+ if (td !== 0)
564
+ return td;
565
+ const avgA = (a.scores.reach + a.scores.conversion + a.scores.partnership) / 3;
566
+ const avgB = (b.scores.reach + b.scores.conversion + b.scores.partnership) / 3;
567
+ return avgB - avgA;
568
+ });
569
+ if (format === "csv") {
570
+ const header = "handle,name,tier,reach,conversion,partnership,subscribers,ig_followers,engagement_rate,youtube_url,instagram_url,email";
571
+ const rows = influencers.map(i => [i.handle, i.name, i.tier, i.scores.reach, i.scores.conversion, i.scores.partnership,
572
+ i.subscribers || "", i.ig_followers || "", i.engagement_rate?.toFixed(1) || "",
573
+ i.youtube_url || "", i.instagram_url || "", i.email || ""].join(","));
574
+ return header + "\n" + rows.join("\n");
575
+ }
576
+ if (format === "markdown") {
577
+ const tierBadge = { GOLDEN: "🥇", SILVER: "🥈", BRONZE: "🥉", UNRANKED: "⬜" };
578
+ let md = `# Influencer Discovery: ${meta.niche}\n\n`;
579
+ md += `**Workflow**: ${meta.workflow} | **Threshold**: ${meta.threshold} | **Found**: ${influencers.length}\n\n`;
580
+ md += `| Tier | Handle | Subscribers | IG Followers | Engagement | Reach | Conv | Partner | Contact |\n`;
581
+ md += `|------|--------|-------------|-------------|-----------|-------|------|---------|--------|\n`;
582
+ for (const i of influencers) {
583
+ const subs = i.subscribers ? formatNum(i.subscribers) : "-";
584
+ const igf = i.ig_followers ? formatNum(i.ig_followers) : "-";
585
+ const er = i.engagement_rate ? `${i.engagement_rate.toFixed(1)}%` : "-";
586
+ const contact = i.email ? "📧" : i.website ? "🌐" : i.has_business_contact ? "💼" : "-";
587
+ md += `| ${tierBadge[i.tier]} ${i.tier} | ${i.handle} | ${subs} | ${igf} | ${er} | ${i.scores.reach} | ${i.scores.conversion} | ${i.scores.partnership} | ${contact} |\n`;
588
+ }
589
+ return md;
590
+ }
591
+ // JSON (default)
592
+ return {
593
+ workflow: meta.workflow,
594
+ niche: meta.niche,
595
+ threshold: meta.threshold,
596
+ total_found: influencers.length,
597
+ tiers: {
598
+ golden: influencers.filter(i => i.tier === "GOLDEN").length,
599
+ silver: influencers.filter(i => i.tier === "SILVER").length,
600
+ bronze: influencers.filter(i => i.tier === "BRONZE").length,
601
+ unranked: influencers.filter(i => i.tier === "UNRANKED").length,
602
+ },
603
+ influencers,
604
+ };
605
+ }
606
+ function formatNum(n) {
607
+ if (n >= 1_000_000)
608
+ return `${(n / 1_000_000).toFixed(1)}M`;
609
+ if (n >= 1_000)
610
+ return `${(n / 1_000).toFixed(1)}K`;
611
+ return String(n);
612
+ }
613
+ // --- Workflow: niche_discovery ---
614
+ async function runNicheDiscovery(config, input) {
615
+ const { parseCompactNumber } = await import("../social/parsers.js");
616
+ const niche = input.niche || config.niche;
617
+ const location = input.location || "";
618
+ const nicheKeywords = niche.split(/[\s,]+/).filter(Boolean);
619
+ // Step 1: YouTube search with 3 queries
620
+ const queries = [
621
+ `${niche} vlog`,
622
+ `${niche} guide`,
623
+ location ? `${location} ${niche}` : `best ${niche}`,
624
+ ];
625
+ const searchResults = [];
626
+ for (const q of queries) {
627
+ const data = await ytExecute("search", { query: q });
628
+ if (data?.results) {
629
+ for (const v of data.results) {
630
+ searchResults.push({ author: v.author, author_url: v.author_url });
631
+ }
632
+ }
633
+ }
634
+ // Step 2: Deduplicate by author_url
635
+ const uniqueCreators = new Map();
636
+ for (const r of searchResults) {
637
+ if (r.author_url && !uniqueCreators.has(r.author_url)) {
638
+ uniqueCreators.set(r.author_url, r.author);
639
+ }
640
+ }
641
+ // Take top 10
642
+ const creatorEntries = Array.from(uniqueCreators.entries()).slice(0, 10);
643
+ const igCallsUsed = { count: 0 };
644
+ const igMaxCalls = input.threshold !== undefined ? config.ig_max_calls ?? 15 : config.ig_max_calls ?? 15;
645
+ const influencers = [];
646
+ for (const [channelUrl, authorName] of creatorEntries) {
647
+ // Step 3: Get channel details
648
+ const channelHandle = channelUrl.replace("https://www.youtube.com", "");
649
+ const channel = await ytExecute("channel", { channel_url: channelUrl });
650
+ // Step 4: Get recent videos
651
+ const recentSearch = await ytExecute("search", { query: `${authorName} ${niche}` });
652
+ const recentVideos = (recentSearch?.results || []).slice(0, 3);
653
+ // Calculate engagement from recent videos
654
+ const videoDetails = [];
655
+ for (const v of recentVideos) {
656
+ if (v.url) {
657
+ const vd = await ytExecute("video", { url: v.url });
658
+ if (vd && !vd.error) {
659
+ videoDetails.push({
660
+ title: vd.title || v.title,
661
+ views: vd.views,
662
+ likes: vd.likes,
663
+ published: vd.published,
664
+ });
665
+ }
666
+ }
667
+ }
668
+ const avgViews = videoDetails.length > 0
669
+ ? videoDetails.reduce((s, v) => s + (v.views || 0), 0) / videoDetails.length
670
+ : undefined;
671
+ const avgLikes = videoDetails.length > 0
672
+ ? videoDetails.reduce((s, v) => s + (v.likes || 0), 0) / videoDetails.length
673
+ : undefined;
674
+ const subscribers = channel?.subscribers;
675
+ const description = channel?.description || "";
676
+ const contactInfo = extractContactInfo(description);
677
+ // Step 5: Parse IG handles from description
678
+ const igHandles = parseIgHandles(description);
679
+ let igFollowers;
680
+ let igUrl;
681
+ // Step 6: Brave Search for IG data
682
+ if (igHandles.length > 0) {
683
+ const braveResult = await braveSearch(`"${igHandles[0]}" instagram`, 3);
684
+ if (braveResult?.web?.results) {
685
+ for (const r of braveResult.web.results) {
686
+ const f = parseFollowersFromSnippet(r.description || "");
687
+ if (f) {
688
+ igFollowers = f;
689
+ igUrl = `https://instagram.com/${igHandles[0]}`;
690
+ break;
691
+ }
692
+ }
693
+ }
694
+ // Step 7: IG API enrichment for top candidates
695
+ if (!igFollowers && process.env.IG_SESSION_ID) {
696
+ const igData = await igApiCall(`api/v1/users/web_profile_info/?username=${igHandles[0]}`, igCallsUsed, config.ig_max_calls ?? 15);
697
+ if (igData?.data?.user) {
698
+ igFollowers = igData.data.user.edge_followed_by?.count;
699
+ igUrl = `https://instagram.com/${igHandles[0]}`;
700
+ }
701
+ }
702
+ }
703
+ const platformCount = 1 + (igFollowers ? 1 : 0); // YouTube + IG if found
704
+ const engagementRate = calcEngagement(avgViews, avgLikes, subscribers);
705
+ const nicheMatchPct = calcNicheMatch([description, ...videoDetails.map(v => v.title)], nicheKeywords);
706
+ const postingFreq = estimateFrequency(videoDetails);
707
+ const profile = {
708
+ handle: channelHandle || authorName,
709
+ name: channel?.name || authorName,
710
+ youtube_url: channelUrl,
711
+ instagram_url: igUrl,
712
+ subscribers,
713
+ ig_followers: igFollowers,
714
+ description: description.substring(0, 300),
715
+ engagement_rate: Math.round(engagementRate * 10) / 10,
716
+ avg_views: avgViews ? Math.round(avgViews) : undefined,
717
+ avg_likes: avgLikes ? Math.round(avgLikes) : undefined,
718
+ recent_videos: videoDetails,
719
+ email: contactInfo.email,
720
+ website: contactInfo.website,
721
+ has_business_contact: contactInfo.hasBusiness,
722
+ has_collab_signals: contactInfo.hasCollab,
723
+ niche_match_pct: nicheMatchPct,
724
+ posting_frequency: postingFreq,
725
+ platform_count: platformCount,
726
+ scores: { reach: 0, conversion: 0, partnership: 0 },
727
+ tier: "UNRANKED",
728
+ };
729
+ profile.scores = calculateScores(profile);
730
+ profile.tier = classifyTier(profile.scores, input.threshold ?? config.threshold ?? 60);
731
+ influencers.push(profile);
732
+ }
733
+ return influencers;
734
+ }
735
+ // --- Workflow: hashtag_scout ---
736
+ async function runHashtagScout(config, input) {
737
+ const { parseCompactNumber } = await import("../social/parsers.js");
738
+ const niche = input.niche || config.niche;
739
+ const nicheKeywords = niche.split(/[\s,]+/).filter(Boolean);
740
+ const hashtags = input.hashtags || [niche.replace(/\s+/g, "")];
741
+ const igCallsUsed = { count: 0 };
742
+ const igMaxCalls = config.ig_max_calls ?? 15;
743
+ const handles = new Map();
744
+ if (process.env.IG_SESSION_ID) {
745
+ // IG hashtag API
746
+ for (const tag of hashtags) {
747
+ const data = await igApiCall(`api/v1/tags/${tag}/sections/`, igCallsUsed, igMaxCalls);
748
+ if (data?.sections) {
749
+ for (const section of data.sections) {
750
+ const medias = section?.layout_content?.medias || [];
751
+ for (const m of medias) {
752
+ const username = m?.media?.user?.username;
753
+ if (username && !handles.has(username)) {
754
+ handles.set(username, { source: `#${tag}` });
755
+ }
756
+ }
757
+ }
758
+ }
759
+ }
760
+ }
761
+ // Fallback: Brave Search for hashtag discovery
762
+ if (handles.size === 0) {
763
+ for (const tag of hashtags) {
764
+ const data = await braveSearch(`site:instagram.com #${tag} ${niche}`, 10);
765
+ if (data?.web?.results) {
766
+ for (const r of data.web.results) {
767
+ const urlMatch = r.url?.match(/instagram\.com\/([\w.]+)/);
768
+ if (urlMatch && urlMatch[1] !== "p" && urlMatch[1] !== "explore") {
769
+ handles.set(urlMatch[1], { source: `#${tag}` });
770
+ }
771
+ }
772
+ }
773
+ }
774
+ }
775
+ // Take top 10
776
+ const topHandles = Array.from(handles.entries()).slice(0, 10);
777
+ const influencers = [];
778
+ for (const [handle, meta] of topHandles) {
779
+ // Brave Search cross-ref for followers
780
+ let igFollowers;
781
+ const braveResult = await braveSearch(`"${handle}" instagram followers`, 3);
782
+ if (braveResult?.web?.results) {
783
+ for (const r of braveResult.web.results) {
784
+ const f = parseFollowersFromSnippet(r.description || "");
785
+ if (f) {
786
+ igFollowers = f;
787
+ break;
788
+ }
789
+ }
790
+ }
791
+ // IG API enrichment
792
+ if (!igFollowers && process.env.IG_SESSION_ID) {
793
+ const igData = await igApiCall(`api/v1/users/web_profile_info/?username=${handle}`, igCallsUsed, igMaxCalls);
794
+ if (igData?.data?.user) {
795
+ igFollowers = igData.data.user.edge_followed_by?.count;
796
+ }
797
+ }
798
+ // YouTube verification
799
+ let ytChannel = null;
800
+ const ytSearch = await ytExecute("search", { query: handle });
801
+ if (ytSearch?.results?.[0]?.author_url) {
802
+ ytChannel = await ytExecute("channel", { channel_url: ytSearch.results[0].author_url });
803
+ }
804
+ const subscribers = ytChannel?.subscribers;
805
+ const description = ytChannel?.description || "";
806
+ const contactInfo = extractContactInfo(description);
807
+ const platformCount = (igFollowers ? 1 : 0) + (subscribers ? 1 : 0) || 1;
808
+ const profile = {
809
+ handle: `@${handle}`,
810
+ name: ytChannel?.name || handle,
811
+ youtube_url: ytChannel?.url,
812
+ instagram_url: `https://instagram.com/${handle}`,
813
+ subscribers,
814
+ ig_followers: igFollowers,
815
+ description: description.substring(0, 300),
816
+ engagement_rate: 0,
817
+ niche_match_pct: calcNicheMatch([description, handle], nicheKeywords),
818
+ posting_frequency: "monthly",
819
+ platform_count: platformCount,
820
+ email: contactInfo.email,
821
+ website: contactInfo.website,
822
+ has_business_contact: contactInfo.hasBusiness,
823
+ has_collab_signals: contactInfo.hasCollab,
824
+ scores: { reach: 0, conversion: 0, partnership: 0 },
825
+ tier: "UNRANKED",
826
+ };
827
+ profile.scores = calculateScores(profile);
828
+ profile.tier = classifyTier(profile.scores, input.threshold ?? config.threshold ?? 60);
829
+ influencers.push(profile);
830
+ }
831
+ return influencers;
832
+ }
833
+ // --- Workflow: competitor_spy ---
834
+ async function runCompetitorSpy(config, input) {
835
+ const { parseCompactNumber } = await import("../social/parsers.js");
836
+ const niche = input.niche || config.niche;
837
+ const nicheKeywords = niche.split(/[\s,]+/).filter(Boolean);
838
+ const competitor = input.competitor || niche;
839
+ // Step 1: Brave Search for sponsored/collab content
840
+ const braveQueries = [
841
+ `"${competitor}" sponsored site:youtube.com`,
842
+ `"${competitor}" collab site:youtube.com`,
843
+ ];
844
+ const creatorUrls = new Map();
845
+ for (const q of braveQueries) {
846
+ const data = await braveSearch(q, 10);
847
+ if (data?.web?.results) {
848
+ for (const r of data.web.results) {
849
+ // Extract channel from YouTube video URLs
850
+ if (r.url?.includes("youtube.com/watch")) {
851
+ // Use the title to extract channel name if available
852
+ const channelMatch = r.description?.match(/by\s+([\w\s]+)/i);
853
+ if (channelMatch) {
854
+ creatorUrls.set(r.url, channelMatch[1].trim());
855
+ }
856
+ else {
857
+ creatorUrls.set(r.url, r.title || "Unknown");
858
+ }
859
+ }
860
+ }
861
+ }
862
+ }
863
+ // Step 2: YouTube search for reviews/unboxings
864
+ const ytQueries = [`${competitor} review`, `${competitor} unboxing`];
865
+ for (const q of ytQueries) {
866
+ const data = await ytExecute("search", { query: q });
867
+ if (data?.results) {
868
+ for (const v of data.results) {
869
+ if (v.author_url && !creatorUrls.has(v.author_url)) {
870
+ creatorUrls.set(v.author_url, v.author);
871
+ }
872
+ }
873
+ }
874
+ }
875
+ // Deduplicate by channel URL
876
+ const uniqueChannels = new Map();
877
+ for (const [url, name] of creatorUrls) {
878
+ // If it's a video URL, we need to get the channel from video details
879
+ if (url.includes("youtube.com/watch")) {
880
+ const vd = await ytExecute("video", { url });
881
+ if (vd?.author_url && !uniqueChannels.has(vd.author_url)) {
882
+ uniqueChannels.set(vd.author_url, vd.author || name);
883
+ }
884
+ }
885
+ else if (url.includes("youtube.com/@") || url.includes("youtube.com/c/") || url.includes("youtube.com/channel/")) {
886
+ if (!uniqueChannels.has(url))
887
+ uniqueChannels.set(url, name);
888
+ }
889
+ }
890
+ const topCreators = Array.from(uniqueChannels.entries()).slice(0, 10);
891
+ const influencers = [];
892
+ for (const [channelUrl, authorName] of topCreators) {
893
+ const channel = await ytExecute("channel", { channel_url: channelUrl });
894
+ const recentSearch = await ytExecute("search", { query: `${authorName} ${competitor}` });
895
+ const recentVideos = (recentSearch?.results || []).slice(0, 3);
896
+ const videoDetails = [];
897
+ for (const v of recentVideos) {
898
+ if (v.url) {
899
+ const vd = await ytExecute("video", { url: v.url });
900
+ if (vd && !vd.error) {
901
+ videoDetails.push({ title: vd.title || v.title, views: vd.views, likes: vd.likes, published: vd.published });
902
+ }
903
+ }
904
+ }
905
+ const avgViews = videoDetails.length > 0
906
+ ? videoDetails.reduce((s, v) => s + (v.views || 0), 0) / videoDetails.length
907
+ : undefined;
908
+ const avgLikes = videoDetails.length > 0
909
+ ? videoDetails.reduce((s, v) => s + (v.likes || 0), 0) / videoDetails.length
910
+ : undefined;
911
+ const subscribers = channel?.subscribers;
912
+ const description = channel?.description || "";
913
+ const contactInfo = extractContactInfo(description);
914
+ const igHandles = parseIgHandles(description);
915
+ let igFollowers;
916
+ let igUrl;
917
+ if (igHandles.length > 0) {
918
+ const braveResult = await braveSearch(`"${igHandles[0]}" instagram`, 3);
919
+ if (braveResult?.web?.results) {
920
+ for (const r of braveResult.web.results) {
921
+ const f = parseFollowersFromSnippet(r.description || "");
922
+ if (f) {
923
+ igFollowers = f;
924
+ igUrl = `https://instagram.com/${igHandles[0]}`;
925
+ break;
926
+ }
927
+ }
928
+ }
929
+ }
930
+ const platformCount = 1 + (igFollowers ? 1 : 0);
931
+ const engagementRate = calcEngagement(avgViews, avgLikes, subscribers);
932
+ const nicheMatchPct = calcNicheMatch([description, ...videoDetails.map(v => v.title)], nicheKeywords);
933
+ const profile = {
934
+ handle: channelUrl.replace("https://www.youtube.com", "") || authorName,
935
+ name: channel?.name || authorName,
936
+ youtube_url: channelUrl,
937
+ instagram_url: igUrl,
938
+ subscribers,
939
+ ig_followers: igFollowers,
940
+ description: description.substring(0, 300),
941
+ engagement_rate: Math.round(engagementRate * 10) / 10,
942
+ avg_views: avgViews ? Math.round(avgViews) : undefined,
943
+ avg_likes: avgLikes ? Math.round(avgLikes) : undefined,
944
+ recent_videos: videoDetails,
945
+ email: contactInfo.email,
946
+ website: contactInfo.website,
947
+ has_business_contact: contactInfo.hasBusiness,
948
+ has_collab_signals: contactInfo.hasCollab,
949
+ niche_match_pct: nicheMatchPct,
950
+ posting_frequency: estimateFrequency(videoDetails),
951
+ platform_count: platformCount,
952
+ scores: { reach: 0, conversion: 0, partnership: 0 },
953
+ tier: "UNRANKED",
954
+ };
955
+ profile.scores = calculateScores(profile);
956
+ profile.tier = classifyTier(profile.scores, input.threshold ?? config.threshold ?? 60);
957
+ influencers.push(profile);
958
+ }
959
+ return influencers;
960
+ }
961
+ // --- Main influencer discovery dispatcher ---
962
+ async function runInfluencerDiscovery(config, input) {
963
+ let influencers;
964
+ switch (config.workflow) {
965
+ case "niche_discovery":
966
+ influencers = await runNicheDiscovery(config, input);
967
+ break;
968
+ case "hashtag_scout":
969
+ influencers = await runHashtagScout(config, input);
970
+ break;
971
+ case "competitor_spy":
972
+ influencers = await runCompetitorSpy(config, input);
973
+ break;
974
+ default:
975
+ return mcpResult({ error: `Unknown influencer discovery workflow: ${config.workflow}` });
976
+ }
977
+ const outputFormat = input.output_format ?? config.output_format ?? "json";
978
+ const threshold = input.threshold ?? config.threshold ?? 60;
979
+ const data = formatInfluencerOutput(influencers, outputFormat, {
980
+ workflow: config.workflow,
981
+ niche: input.niche || config.niche,
982
+ threshold,
983
+ });
984
+ if (typeof data === "string") {
985
+ return { content: [{ type: "text", text: data }] };
986
+ }
987
+ return mcpResult(data);
988
+ }
310
989
  // --- Main execute ---
311
990
  export async function execute(input) {
312
991
  // Load with recipe fallback
@@ -334,6 +1013,8 @@ export async function execute(input) {
334
1013
  return runScrape(config, url, input);
335
1014
  case "monitor_websocket":
336
1015
  return runMonitorWebsocket(config, url, input);
1016
+ case "influencer_discovery":
1017
+ return runInfluencerDiscovery(config, input);
337
1018
  default:
338
1019
  return mcpResult({ error: `Unknown skill tool type: ${tool}` });
339
1020
  }