@vellumai/assistant 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +2 -0
- package/README.md +88 -2
- package/eslint.config.mjs +31 -0
- package/package.json +1 -1
- package/scripts/ipc/check-swift-decoder-drift.ts +4 -1
- package/scripts/ipc/generate-swift.ts +31 -2
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +438 -1
- package/src/__tests__/approval-conversation-turn.test.ts +214 -0
- package/src/__tests__/approval-hardcoded-copy-guard.test.ts +41 -0
- package/src/__tests__/approval-message-composer.test.ts +253 -0
- package/src/__tests__/browser-manager.test.ts +1 -0
- package/src/__tests__/call-conversation-messages.test.ts +130 -0
- package/src/__tests__/call-domain.test.ts +12 -2
- package/src/__tests__/call-orchestrator.test.ts +799 -249
- package/src/__tests__/call-pointer-messages.test.ts +148 -0
- package/src/__tests__/call-recovery.test.ts +3 -0
- package/src/__tests__/call-routes-http.test.ts +32 -2
- package/src/__tests__/call-store.test.ts +3 -0
- package/src/__tests__/channel-approval-routes.test.ts +1277 -98
- package/src/__tests__/channel-approval.test.ts +37 -0
- package/src/__tests__/channel-approvals.test.ts +36 -50
- package/src/__tests__/channel-guardian.test.ts +630 -22
- package/src/__tests__/channel-readiness-service.test.ts +324 -0
- package/src/__tests__/checker.test.ts +14 -7
- package/src/__tests__/clarification-resolver.test.ts +44 -24
- package/src/__tests__/commit-message-enrichment-service.test.ts +9 -4
- package/src/__tests__/computer-use-session-working-dir.test.ts +8 -0
- package/src/__tests__/config-schema.test.ts +14 -8
- package/src/__tests__/context-window-manager.test.ts +30 -2
- package/src/__tests__/contradiction-checker.test.ts +20 -5
- package/src/__tests__/credential-security-invariants.test.ts +7 -2
- package/src/__tests__/daemon-lifecycle.test.ts +13 -12
- package/src/__tests__/db-migration-rollback.test.ts +752 -0
- package/src/__tests__/dictation-mode-detection.test.ts +63 -0
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +2 -0
- package/src/__tests__/entity-search.test.ts +615 -0
- package/src/__tests__/fuzzy-match-property.test.ts +5 -5
- package/src/__tests__/guardian-action-store.test.ts +123 -0
- package/src/__tests__/guardian-action-sweep.test.ts +277 -0
- package/src/__tests__/guardian-dispatch.test.ts +389 -0
- package/src/__tests__/guardian-question-copy.test.ts +47 -0
- package/src/__tests__/handlers-telegram-config.test.ts +4 -2
- package/src/__tests__/handlers-twilio-config.test.ts +533 -0
- package/src/__tests__/intent-routing.test.ts +2 -0
- package/src/__tests__/ipc-snapshot.test.ts +291 -1
- package/src/__tests__/memory-upsert-concurrency.test.ts +828 -0
- package/src/__tests__/messaging-send-tool.test.ts +65 -0
- package/src/__tests__/model-intents.test.ts +96 -0
- package/src/__tests__/no-direct-anthropic-sdk-imports.test.ts +42 -0
- package/src/__tests__/oauth2-gateway-transport.test.ts +130 -0
- package/src/__tests__/onboarding-starter-tasks.test.ts +2 -0
- package/src/__tests__/provider-commit-message-generator.test.ts +89 -13
- package/src/__tests__/provider-error-scenarios.test.ts +621 -0
- package/src/__tests__/provider-fail-open-selection.test.ts +119 -0
- package/src/__tests__/qdrant-manager.test.ts +27 -20
- package/src/__tests__/relay-server.test.ts +779 -40
- package/src/__tests__/run-orchestrator-assistant-events.test.ts +6 -0
- package/src/__tests__/run-orchestrator.test.ts +42 -4
- package/src/__tests__/runtime-runs-http.test.ts +17 -1
- package/src/__tests__/runtime-runs.test.ts +16 -0
- package/src/__tests__/schedule-store.test.ts +18 -4
- package/src/__tests__/scheduler-recurrence.test.ts +13 -4
- package/src/__tests__/session-abort-tool-results.test.ts +6 -0
- package/src/__tests__/session-agent-loop.test.ts +857 -0
- package/src/__tests__/session-conflict-gate.test.ts +6 -0
- package/src/__tests__/session-pre-run-repair.test.ts +6 -0
- package/src/__tests__/session-profile-injection.test.ts +6 -0
- package/src/__tests__/session-provider-retry-repair.test.ts +6 -0
- package/src/__tests__/session-queue.test.ts +6 -0
- package/src/__tests__/session-runtime-assembly.test.ts +321 -13
- package/src/__tests__/session-slash-known.test.ts +6 -0
- package/src/__tests__/session-slash-queue.test.ts +6 -0
- package/src/__tests__/session-slash-unknown.test.ts +6 -0
- package/src/__tests__/session-surfaces-task-progress.test.ts +2 -0
- package/src/__tests__/session-tool-setup-app-refresh.test.ts +1 -0
- package/src/__tests__/session-tool-setup-memory-scope.test.ts +1 -0
- package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +1 -0
- package/src/__tests__/session-workspace-injection.test.ts +6 -0
- package/src/__tests__/session-workspace-tool-tracking.test.ts +6 -0
- package/src/__tests__/skills.test.ts +2 -0
- package/src/__tests__/sms-messaging-provider.test.ts +126 -0
- package/src/__tests__/starter-task-flow.test.ts +2 -0
- package/src/__tests__/swarm-dag-pathological.test.ts +535 -0
- package/src/__tests__/system-prompt.test.ts +2 -0
- package/src/__tests__/task-management-tools.test.ts +2 -2
- package/src/__tests__/task-runner.test.ts +14 -4
- package/src/__tests__/terminal-tools.test.ts +25 -19
- package/src/__tests__/tool-execution-abort-cleanup.test.ts +545 -0
- package/src/__tests__/tool-executor-shell-integration.test.ts +11 -11
- package/src/__tests__/tool-executor.test.ts +23 -24
- package/src/__tests__/trust-store.test.ts +3 -3
- package/src/__tests__/twilio-rest.test.ts +29 -0
- package/src/__tests__/twilio-routes-elevenlabs.test.ts +3 -0
- package/src/__tests__/twilio-routes-twiml.test.ts +11 -0
- package/src/__tests__/twilio-routes.test.ts +167 -11
- package/src/__tests__/twitter-cli-error-shaping.test.ts +2 -2
- package/src/__tests__/user-reference.test.ts +2 -0
- package/src/__tests__/voice-quality.test.ts +222 -0
- package/src/__tests__/web-search.test.ts +46 -30
- package/src/__tests__/work-item-output.test.ts +110 -0
- package/src/agent/loop.ts +1 -1
- package/src/agent-heartbeat/agent-heartbeat-service.ts +2 -10
- package/src/amazon/client.ts +1418 -0
- package/src/amazon/request-extractor.ts +135 -0
- package/src/amazon/session.ts +109 -0
- package/src/autonomy/autonomy-store.ts +5 -5
- package/src/browser-extension-relay/client.ts +124 -0
- package/src/browser-extension-relay/protocol.ts +63 -0
- package/src/browser-extension-relay/server.ts +177 -0
- package/src/bundler/app-bundler.ts +3 -3
- package/src/bundler/bundle-signer.ts +1 -1
- package/src/bundler/signature-verifier.ts +1 -1
- package/src/calls/call-conversation-messages.ts +33 -0
- package/src/calls/call-domain.ts +114 -10
- package/src/calls/call-orchestrator.ts +268 -59
- package/src/calls/call-pointer-messages.ts +53 -0
- package/src/calls/call-recovery.ts +3 -8
- package/src/calls/call-store.ts +69 -87
- package/src/calls/elevenlabs-config.ts +3 -2
- package/src/calls/guardian-action-sweep.ts +105 -0
- package/src/calls/guardian-dispatch.ts +203 -0
- package/src/calls/guardian-question-copy.ts +133 -0
- package/src/calls/relay-server.ts +466 -8
- package/src/calls/speaker-identification.ts +1 -1
- package/src/calls/twilio-config.ts +22 -14
- package/src/calls/twilio-provider.ts +6 -4
- package/src/calls/twilio-rest.ts +308 -7
- package/src/calls/twilio-routes.ts +65 -12
- package/src/calls/types.ts +3 -1
- package/src/channels/types.ts +25 -0
- package/src/cli/amazon.ts +815 -0
- package/src/cli/config-commands.ts +2 -2
- package/src/cli/core-commands.ts +4 -3
- package/src/cli/influencer.ts +244 -0
- package/src/cli/map.ts +89 -6
- package/src/cli.ts +1 -1
- package/src/config/agent-schema.ts +171 -0
- package/src/config/bundled-skills/amazon/SKILL.md +127 -0
- package/src/config/bundled-skills/amazon/icon.svg +13 -0
- package/src/config/bundled-skills/api-mapping/SKILL.md +78 -0
- package/src/config/bundled-skills/browser/SKILL.md +1 -0
- package/src/config/bundled-skills/browser/TOOLS.json +17 -0
- package/src/config/bundled-skills/browser/tools/browser-wait-for-download.ts +25 -0
- package/src/config/bundled-skills/doordash/SKILL.md +51 -51
- package/src/config/bundled-skills/email-setup/SKILL.md +14 -5
- package/src/config/bundled-skills/google-oauth-setup/SKILL.md +183 -0
- package/src/config/bundled-skills/influencer/SKILL.md +144 -0
- package/src/config/bundled-skills/knowledge-graph/SKILL.md +15 -0
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +56 -0
- package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +185 -0
- package/src/config/bundled-skills/macos-automation/icon.svg +12 -0
- package/src/config/bundled-skills/media-processing/SKILL.md +176 -0
- package/src/config/bundled-skills/media-processing/TOOLS.json +230 -0
- package/src/config/bundled-skills/media-processing/__tests__/concurrency-pool.test.ts +77 -0
- package/src/config/bundled-skills/media-processing/__tests__/cost-tracker.test.ts +69 -0
- package/src/config/bundled-skills/media-processing/__tests__/preprocess.test.ts +303 -0
- package/src/config/bundled-skills/media-processing/services/concurrency-pool.ts +55 -0
- package/src/config/bundled-skills/media-processing/services/cost-tracker.ts +86 -0
- package/src/config/bundled-skills/media-processing/services/gemini-map.ts +339 -0
- package/src/config/bundled-skills/media-processing/services/preprocess.ts +551 -0
- package/src/config/bundled-skills/media-processing/services/processing-pipeline.ts +259 -0
- package/src/config/bundled-skills/media-processing/services/reduce.ts +197 -0
- package/src/config/bundled-skills/media-processing/tools/analyze-keyframes.ts +136 -0
- package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +59 -0
- package/src/config/bundled-skills/media-processing/tools/generate-clip.ts +195 -0
- package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +197 -0
- package/src/config/bundled-skills/media-processing/tools/media-diagnostics.ts +143 -0
- package/src/config/bundled-skills/media-processing/tools/media-status.ts +75 -0
- package/src/config/bundled-skills/media-processing/tools/query-media-events.ts +65 -0
- package/src/config/bundled-skills/messaging/SKILL.md +33 -8
- package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +4 -7
- package/src/config/bundled-skills/messaging/tools/messaging-reply.ts +2 -1
- package/src/config/bundled-skills/messaging/tools/messaging-send.ts +5 -1
- package/src/config/bundled-skills/phone-calls/SKILL.md +88 -23
- package/src/config/bundled-skills/twitter/SKILL.md +19 -3
- package/src/config/bundled-skills/twitter/icon.svg +14 -0
- package/src/config/bundled-tool-registry.ts +310 -0
- package/src/config/calls-schema.ts +181 -0
- package/src/config/core-schema.ts +309 -0
- package/src/config/defaults.ts +28 -3
- package/src/config/env-registry.ts +162 -0
- package/src/config/env.ts +175 -0
- package/src/config/loader.ts +6 -6
- package/src/config/memory-schema.ts +528 -0
- package/src/config/sandbox-schema.ts +55 -0
- package/src/config/schema.ts +158 -1133
- package/src/config/skill-state.ts +1 -1
- package/src/config/skills-schema.ts +32 -0
- package/src/config/skills.ts +35 -24
- package/src/config/system-prompt.ts +131 -56
- package/src/config/templates/IDENTITY.md +2 -2
- package/src/config/templates/SOUL.md +1 -1
- package/src/config/types.ts +1 -0
- package/src/config/user-reference.ts +4 -9
- package/src/config/vellum-skills/catalog.json +6 -7
- package/src/config/vellum-skills/chatgpt-import/tools/chatgpt-import.ts +5 -1
- package/src/config/vellum-skills/slack-oauth-setup/SKILL.md +4 -3
- package/src/config/vellum-skills/sms-setup/SKILL.md +216 -0
- package/src/config/vellum-skills/twilio-setup/SKILL.md +40 -8
- package/src/context/window-manager.ts +27 -7
- package/src/daemon/approval-generators.ts +186 -0
- package/src/daemon/approved-devices-store.ts +140 -0
- package/src/daemon/assistant-attachments.ts +1 -1
- package/src/daemon/classifier.ts +35 -32
- package/src/daemon/config-watcher.ts +1 -1
- package/src/daemon/daemon-control.ts +217 -0
- package/src/daemon/handlers/apps.ts +2 -3
- package/src/daemon/handlers/config-channels.ts +158 -0
- package/src/daemon/handlers/config-inbox.ts +540 -0
- package/src/daemon/handlers/config-ingress.ts +231 -0
- package/src/daemon/handlers/config-integrations.ts +258 -0
- package/src/daemon/handlers/config-model.ts +143 -0
- package/src/daemon/handlers/config-parental.ts +163 -0
- package/src/daemon/handlers/config-scheduling.ts +172 -0
- package/src/daemon/handlers/config-slack.ts +92 -0
- package/src/daemon/handlers/config-telegram.ts +301 -0
- package/src/daemon/handlers/config-tools.ts +177 -0
- package/src/daemon/handlers/config-trust.ts +104 -0
- package/src/daemon/handlers/config-twilio.ts +1080 -0
- package/src/daemon/handlers/config.ts +53 -1689
- package/src/daemon/handlers/diagnostics.ts +1 -1
- package/src/daemon/handlers/dictation.ts +180 -0
- package/src/daemon/handlers/documents.ts +18 -32
- package/src/daemon/handlers/identity.ts +14 -23
- package/src/daemon/handlers/index.ts +11 -0
- package/src/daemon/handlers/misc.ts +3 -5
- package/src/daemon/handlers/pairing.ts +98 -0
- package/src/daemon/handlers/sessions.ts +56 -5
- package/src/daemon/handlers/shared.ts +6 -1
- package/src/daemon/handlers/skills.ts +1 -1
- package/src/daemon/handlers/twitter-auth.ts +2 -0
- package/src/daemon/handlers/work-items.ts +17 -9
- package/src/daemon/handlers/workspace-files.ts +4 -3
- package/src/daemon/install-cli-launchers.ts +113 -0
- package/src/daemon/ipc-contract/apps.ts +356 -0
- package/src/daemon/ipc-contract/browser.ts +74 -0
- package/src/daemon/ipc-contract/computer-use.ts +151 -0
- package/src/daemon/ipc-contract/diagnostics.ts +56 -0
- package/src/daemon/ipc-contract/documents.ts +74 -0
- package/src/daemon/ipc-contract/inbox.ts +209 -0
- package/src/daemon/ipc-contract/integrations.ts +284 -0
- package/src/daemon/ipc-contract/memory.ts +48 -0
- package/src/daemon/ipc-contract/messages.ts +211 -0
- package/src/daemon/ipc-contract/pairing.ts +45 -0
- package/src/daemon/ipc-contract/parental-control.ts +95 -0
- package/src/daemon/ipc-contract/schedules.ts +97 -0
- package/src/daemon/ipc-contract/sessions.ts +315 -0
- package/src/daemon/ipc-contract/shared.ts +42 -0
- package/src/daemon/ipc-contract/skills.ts +120 -0
- package/src/daemon/ipc-contract/subagents.ts +58 -0
- package/src/daemon/ipc-contract/surfaces.ts +250 -0
- package/src/daemon/ipc-contract/trust.ts +60 -0
- package/src/daemon/ipc-contract/work-items.ts +225 -0
- package/src/daemon/ipc-contract/workspace.ts +113 -0
- package/src/daemon/ipc-contract-inventory.json +70 -0
- package/src/daemon/ipc-contract-inventory.ts +55 -29
- package/src/daemon/ipc-contract.ts +229 -2426
- package/src/daemon/ipc-protocol.ts +1 -1
- package/src/daemon/ipc-validate.ts +7 -0
- package/src/daemon/lifecycle.ts +97 -377
- package/src/daemon/pairing-store.ts +177 -0
- package/src/daemon/providers-setup.ts +43 -0
- package/src/daemon/ride-shotgun-handler.ts +68 -3
- package/src/daemon/server.ts +66 -46
- package/src/daemon/session-agent-loop-handlers.ts +421 -0
- package/src/daemon/session-agent-loop.ts +117 -275
- package/src/daemon/session-dynamic-profile.ts +1 -1
- package/src/daemon/session-history.ts +1 -1
- package/src/daemon/session-media-retry.ts +1 -1
- package/src/daemon/session-messaging.ts +37 -2
- package/src/daemon/session-notifiers.ts +5 -25
- package/src/daemon/session-process.ts +99 -59
- package/src/daemon/session-queue-manager.ts +96 -4
- package/src/daemon/session-runtime-assembly.ts +199 -10
- package/src/daemon/session-surfaces.ts +19 -4
- package/src/daemon/session-tool-setup.ts +30 -30
- package/src/daemon/session-workspace.ts +1 -1
- package/src/daemon/session.ts +35 -2
- package/src/daemon/shutdown-handlers.ts +122 -0
- package/src/daemon/trace-emitter.ts +1 -1
- package/src/daemon/watch-handler.ts +36 -33
- package/src/doordash/cart-queries.ts +787 -0
- package/src/doordash/client.ts +144 -127
- package/src/doordash/order-queries.ts +85 -0
- package/src/doordash/queries.ts +10 -1308
- package/src/doordash/search-queries.ts +203 -0
- package/src/doordash/session.ts +3 -2
- package/src/doordash/store-queries.ts +246 -0
- package/src/doordash/types.ts +367 -0
- package/src/email/providers/agentmail.ts +2 -1
- package/src/email/providers/index.ts +3 -2
- package/src/email/service.ts +3 -2
- package/src/errors.ts +43 -0
- package/src/home-base/prebuilt/seed.ts +1 -1
- package/src/hooks/cli.ts +6 -5
- package/src/hooks/config.ts +6 -8
- package/src/hooks/discovery.ts +6 -5
- package/src/hooks/manager.ts +4 -3
- package/src/hooks/runner.ts +2 -2
- package/src/hooks/templates.ts +5 -5
- package/src/inbound/public-ingress-urls.ts +6 -4
- package/src/index.ts +4 -2
- package/src/influencer/client.ts +1104 -0
- package/src/instrument.ts +4 -3
- package/src/logfire.ts +4 -3
- package/src/memory/admin.ts +25 -35
- package/src/memory/attachments-store.ts +4 -7
- package/src/memory/channel-delivery-store.ts +30 -1
- package/src/memory/channel-guardian-store.ts +202 -2
- package/src/memory/clarification-resolver.ts +37 -33
- package/src/memory/conflict-store.ts +67 -61
- package/src/memory/contradiction-checker.ts +141 -117
- package/src/memory/conversation-store.ts +335 -51
- package/src/memory/db-connection.ts +27 -4
- package/src/memory/db-init.ts +265 -4
- package/src/memory/db.ts +14 -1
- package/src/memory/embedding-backend.ts +27 -5
- package/src/memory/embedding-ollama.ts +2 -1
- package/src/memory/entity-extractor.ts +38 -35
- package/src/memory/guardian-action-store.ts +430 -0
- package/src/memory/inbox-escalation-projection.ts +59 -0
- package/src/memory/inbox-thread-store.ts +218 -0
- package/src/memory/ingress-invite-store.ts +338 -0
- package/src/memory/ingress-member-store.ts +350 -0
- package/src/memory/items-extractor.ts +91 -97
- package/src/memory/job-handlers/index-maintenance.ts +3 -3
- package/src/memory/job-handlers/media-processing.ts +69 -0
- package/src/memory/job-handlers/summarization.ts +32 -26
- package/src/memory/job-utils.ts +3 -10
- package/src/memory/jobs-store.ts +8 -10
- package/src/memory/jobs-worker.ts +55 -36
- package/src/memory/media-store.ts +759 -0
- package/src/memory/migrations/001-job-deferrals.ts +45 -0
- package/src/memory/migrations/002-tool-invocations-fk.ts +43 -0
- package/src/memory/migrations/003-memory-fts-backfill.ts +24 -0
- package/src/memory/migrations/004-entity-relation-dedup.ts +87 -0
- package/src/memory/migrations/005-fingerprint-scope-unique.ts +80 -0
- package/src/memory/migrations/006-scope-salted-fingerprints.ts +62 -0
- package/src/memory/migrations/007-assistant-id-to-self.ts +254 -0
- package/src/memory/migrations/008-remove-assistant-id-columns.ts +208 -0
- package/src/memory/migrations/009-llm-usage-events-drop-assistant-id.ts +83 -0
- package/src/memory/migrations/010-ext-conv-bindings-channel-chat-unique.ts +56 -0
- package/src/memory/migrations/011-call-sessions-provider-sid-dedup.ts +63 -0
- package/src/memory/migrations/012-call-sessions-add-initiated-from.ts +19 -0
- package/src/memory/migrations/013-guardian-action-tables.ts +68 -0
- package/src/memory/migrations/014-backfill-inbox-thread-state.ts +76 -0
- package/src/memory/migrations/015-drop-active-search-index.ts +27 -0
- package/src/memory/migrations/016-memory-segments-indexes.ts +11 -0
- package/src/memory/migrations/017-memory-items-indexes.ts +10 -0
- package/src/memory/migrations/018-remaining-table-indexes.ts +13 -0
- package/src/memory/migrations/index.ts +24 -0
- package/src/memory/migrations/registry.ts +79 -0
- package/src/memory/migrations/validate-migration-state.ts +69 -0
- package/src/memory/qdrant-manager.ts +49 -8
- package/src/memory/query-builder.ts +1 -1
- package/src/memory/raw-query.ts +119 -0
- package/src/memory/recall-cache.ts +4 -1
- package/src/memory/retriever.ts +165 -47
- package/src/memory/schema-migration.ts +25 -984
- package/src/memory/schema.ts +228 -7
- package/src/memory/search/entity.ts +205 -31
- package/src/memory/search/lexical.ts +81 -52
- package/src/memory/search/ranking.ts +27 -23
- package/src/memory/search/semantic.ts +157 -19
- package/src/memory/search/types.ts +24 -0
- package/src/memory/shared-app-links-store.ts +4 -5
- package/src/memory/validation.ts +19 -0
- package/src/messaging/draft-store.ts +5 -6
- package/src/messaging/provider-types.ts +2 -0
- package/src/messaging/providers/sms/adapter.ts +201 -0
- package/src/messaging/providers/sms/client.ts +93 -0
- package/src/messaging/providers/sms/types.ts +7 -0
- package/src/messaging/providers/telegram-bot/adapter.ts +2 -5
- package/src/messaging/providers/whatsapp/adapter.ts +136 -0
- package/src/messaging/providers/whatsapp/client.ts +67 -0
- package/src/messaging/style-analyzer.ts +5 -4
- package/src/messaging/thread-summarizer.ts +61 -69
- package/src/messaging/triage-engine.ts +62 -71
- package/src/migrations/config-merge.ts +53 -0
- package/src/migrations/data-layout.ts +68 -0
- package/src/migrations/data-merge.ts +33 -0
- package/src/migrations/hooks-merge.ts +90 -0
- package/src/migrations/index.ts +6 -0
- package/src/migrations/log.ts +23 -0
- package/src/migrations/skills-merge.ts +33 -0
- package/src/migrations/workspace-layout.ts +79 -0
- package/src/permissions/checker.ts +133 -11
- package/src/permissions/prompter.ts +14 -0
- package/src/permissions/shell-identity.ts +31 -1
- package/src/permissions/trust-store.ts +21 -1
- package/src/providers/anthropic/client.ts +4 -4
- package/src/providers/failover.ts +2 -2
- package/src/providers/model-intents.ts +70 -0
- package/src/providers/ollama/client.ts +2 -1
- package/src/providers/provider-send-message.ts +176 -0
- package/src/providers/registry.ts +71 -30
- package/src/providers/retry.ts +35 -1
- package/src/providers/types.ts +12 -1
- package/src/runtime/approval-conversation-turn.ts +97 -0
- package/src/runtime/approval-message-composer.ts +253 -0
- package/src/runtime/channel-approval-parser.ts +36 -2
- package/src/runtime/channel-approvals.ts +11 -24
- package/src/runtime/channel-guardian-service.ts +88 -21
- package/src/runtime/channel-readiness-service.ts +418 -0
- package/src/runtime/channel-readiness-types.ts +35 -0
- package/src/runtime/channel-retry-sweep.ts +184 -0
- package/src/runtime/guardian-context-resolver.ts +108 -0
- package/src/runtime/http-server.ts +275 -717
- package/src/runtime/http-types.ts +59 -3
- package/src/runtime/middleware/auth.ts +116 -0
- package/src/runtime/middleware/error-handler.ts +33 -0
- package/src/runtime/middleware/twilio-validation.ts +127 -0
- package/src/runtime/routes/app-routes.ts +1 -1
- package/src/runtime/routes/call-routes.ts +51 -7
- package/src/runtime/routes/channel-delivery-routes.ts +170 -0
- package/src/runtime/routes/channel-guardian-routes.ts +1191 -0
- package/src/runtime/routes/channel-inbound-routes.ts +1152 -0
- package/src/runtime/routes/channel-route-shared.ts +144 -0
- package/src/runtime/routes/channel-routes.ts +32 -1588
- package/src/runtime/routes/conversation-routes.ts +50 -7
- package/src/runtime/routes/events-routes.ts +2 -2
- package/src/runtime/routes/identity-routes.ts +126 -0
- package/src/runtime/routes/pairing-routes.ts +143 -0
- package/src/runtime/routes/run-routes.ts +15 -1
- package/src/runtime/run-orchestrator.ts +86 -35
- package/src/schedule/schedule-store.ts +36 -32
- package/src/schedule/scheduler.ts +3 -3
- package/src/security/encrypted-store.ts +5 -7
- package/src/security/oauth2.ts +45 -15
- package/src/security/parental-control-store.ts +183 -0
- package/src/security/secret-allowlist.ts +4 -3
- package/src/security/secret-scanner.ts +5 -5
- package/src/security/secure-keys.ts +1 -1
- package/src/security/token-manager.ts +3 -2
- package/src/services/vercel-deploy.ts +6 -2
- package/src/skills/tool-manifest.ts +3 -3
- package/src/skills/vellum-catalog-remote.ts +75 -16
- package/src/slack/slack-webhook.ts +2 -1
- package/src/swarm/orchestrator.ts +92 -1
- package/src/swarm/router-planner.ts +6 -9
- package/src/swarm/worker-prompts.ts +9 -12
- package/src/tasks/task-compiler.ts +19 -28
- package/src/tasks/task-runner.ts +1 -1
- package/src/tools/assets/materialize.ts +2 -2
- package/src/tools/assets/search.ts +15 -14
- package/src/tools/browser/__tests__/auth-detector.test.ts +1 -0
- package/src/tools/browser/auto-navigate.ts +1 -0
- package/src/tools/browser/browser-execution.ts +10 -1
- package/src/tools/browser/browser-manager.ts +119 -4
- package/src/tools/browser/network-recorder.ts +5 -0
- package/src/tools/calls/call-start.ts +1 -0
- package/src/tools/credentials/broker.ts +11 -2
- package/src/tools/credentials/metadata-store.ts +18 -14
- package/src/tools/credentials/post-connect-hooks.ts +61 -0
- package/src/tools/credentials/vault.ts +49 -23
- package/src/tools/execution-target.ts +11 -1
- package/src/tools/executor.ts +68 -9
- package/src/tools/host-terminal/cli-discover.ts +1 -1
- package/src/tools/network/script-proxy/http-forwarder.ts +1 -1
- package/src/tools/network/script-proxy/mitm-handler.ts +1 -1
- package/src/tools/network/script-proxy/server.ts +1 -1
- package/src/tools/network/script-proxy/session-manager.ts +6 -5
- package/src/tools/network/web-fetch.ts +18 -2
- package/src/tools/network/web-search.ts +8 -4
- package/src/tools/reminder/reminder-store.ts +14 -15
- package/src/tools/schedule/create.ts +1 -0
- package/src/tools/schedule/list.ts +2 -1
- package/src/tools/shared/filesystem/file-ops-service.ts +5 -7
- package/src/tools/skills/skill-script-runner.ts +24 -9
- package/src/tools/skills/skill-tool-factory.ts +1 -0
- package/src/tools/tasks/work-item-enqueue.ts +2 -2
- package/src/tools/terminal/evaluate-typescript.ts +21 -12
- package/src/tools/terminal/parser.ts +50 -0
- package/src/tools/types.ts +2 -0
- package/src/tools/watcher/delete.ts +6 -0
- package/src/tools/weather/service.ts +1 -1
- package/src/twitter/client.ts +190 -24
- package/src/twitter/router.ts +1 -1
- package/src/twitter/session.ts +4 -3
- package/src/util/clipboard.ts +1 -1
- package/src/util/errors.ts +65 -8
- package/src/util/fs.ts +40 -0
- package/src/util/json.ts +10 -0
- package/src/util/log-redact.ts +189 -0
- package/src/util/logger.ts +19 -17
- package/src/util/object.ts +3 -0
- package/src/util/platform.ts +105 -363
- package/src/util/pricing.ts +1 -1
- package/src/util/promise-guard.ts +1 -1
- package/src/util/retry.ts +19 -0
- package/src/util/row-mapper.ts +79 -0
- package/src/util/silently.ts +21 -0
- package/src/watcher/engine.ts +5 -1
- package/src/watcher/provider-types.ts +20 -0
- package/src/watcher/providers/github.ts +156 -0
- package/src/watcher/providers/gmail.ts +1 -0
- package/src/watcher/providers/google-calendar.ts +1 -0
- package/src/watcher/providers/linear.ts +460 -0
- package/src/watcher/providers/slack.ts +1 -0
- package/src/work-items/work-item-runner.ts +1 -1
- package/src/workspace/git-service.ts +1 -1
- package/src/workspace/provider-commit-message-generator.ts +51 -22
- package/src/__tests__/call-bridge.test.ts +0 -517
- package/src/__tests__/session-process-bridge.test.ts +0 -244
- package/src/calls/call-bridge.ts +0 -168
- package/src/config/vellum-skills/google-oauth-setup/SKILL.md +0 -199
|
@@ -0,0 +1,1104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Influencer Research Client
|
|
3
|
+
*
|
|
4
|
+
* ARCHITECTURE
|
|
5
|
+
* ============
|
|
6
|
+
* All scraping runs inside Chrome browser tabs via the extension relay. The
|
|
7
|
+
* relay's evaluate command uses CDP Runtime.evaluate (via chrome.debugger API)
|
|
8
|
+
* as a fallback, which bypasses strict CSP on sites like Instagram.
|
|
9
|
+
*
|
|
10
|
+
* The user must be logged into Instagram, TikTok, and/or X in their Chrome
|
|
11
|
+
* browser for this to work.
|
|
12
|
+
*
|
|
13
|
+
* INSTAGRAM DISCOVERY FLOW
|
|
14
|
+
* ========================
|
|
15
|
+
* Instagram's search at /explore/search/keyword/?q=... returns a grid of POSTS
|
|
16
|
+
* (not profiles). To discover influencers:
|
|
17
|
+
* 1. Search by keyword → get grid of post links (/p/ and /reel/)
|
|
18
|
+
* 2. Visit each post → extract the author username from page text
|
|
19
|
+
* 3. Deduplicate usernames
|
|
20
|
+
* 4. Visit each unique profile → scrape stats from meta[name="description"]
|
|
21
|
+
* which reliably contains "49K Followers, 463 Following, 551 Posts - ..."
|
|
22
|
+
* 5. Filter by criteria and rank
|
|
23
|
+
*
|
|
24
|
+
* TIKTOK DISCOVERY FLOW
|
|
25
|
+
* =====================
|
|
26
|
+
* TikTok has a dedicated user search at /search/user?q=... which returns
|
|
27
|
+
* profile cards directly with follower counts and bios.
|
|
28
|
+
*
|
|
29
|
+
* X/TWITTER DISCOVERY FLOW
|
|
30
|
+
* ========================
|
|
31
|
+
* X has a people search at /search?q=...&f=user which returns UserCell
|
|
32
|
+
* components with profile data.
|
|
33
|
+
*
|
|
34
|
+
* EVALUATE SCRIPTS
|
|
35
|
+
* ================
|
|
36
|
+
* All scripts passed to evalInTab() are wrapped in (function(){ ... })() by
|
|
37
|
+
* the relay's CDP Runtime.evaluate. Use `return` to return values. Results
|
|
38
|
+
* should be JSON strings for complex data.
|
|
39
|
+
*
|
|
40
|
+
* LIMITATIONS
|
|
41
|
+
* ===========
|
|
42
|
+
* - Requires the user to be logged in on each platform in Chrome
|
|
43
|
+
* - Rate limiting may apply; built-in delays of 1.5-3s between navigations
|
|
44
|
+
* - Platform HTML structures change frequently; selectors may need updates
|
|
45
|
+
* - The chrome.debugger API shows a yellow infobar on the tab being debugged
|
|
46
|
+
*/
|
|
47
|
+
|
|
48
|
+
import { extensionRelayServer } from '../browser-extension-relay/server.js';
|
|
49
|
+
import type { ExtensionCommand, ExtensionResponse } from '../browser-extension-relay/protocol.js';
|
|
50
|
+
import { readHttpToken } from '../util/platform.js';
|
|
51
|
+
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Types
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
export interface InfluencerSearchCriteria {
|
|
57
|
+
/** Keywords, niche, or topic to search for */
|
|
58
|
+
query: string;
|
|
59
|
+
/** Platforms to search on */
|
|
60
|
+
platforms?: ('instagram' | 'tiktok' | 'twitter')[];
|
|
61
|
+
/** Minimum follower count */
|
|
62
|
+
minFollowers?: number;
|
|
63
|
+
/** Maximum follower count */
|
|
64
|
+
maxFollowers?: number;
|
|
65
|
+
/** Maximum number of results per platform */
|
|
66
|
+
limit?: number;
|
|
67
|
+
/** Language/locale filter */
|
|
68
|
+
language?: string;
|
|
69
|
+
/** Look for verified accounts only */
|
|
70
|
+
verifiedOnly?: boolean;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export interface InfluencerProfile {
|
|
74
|
+
/** Platform the profile was found on */
|
|
75
|
+
platform: 'instagram' | 'tiktok' | 'twitter';
|
|
76
|
+
/** Username/handle */
|
|
77
|
+
username: string;
|
|
78
|
+
/** Display name */
|
|
79
|
+
displayName: string;
|
|
80
|
+
/** Profile URL */
|
|
81
|
+
profileUrl: string;
|
|
82
|
+
/** Bio/description */
|
|
83
|
+
bio: string;
|
|
84
|
+
/** Follower count (numeric) */
|
|
85
|
+
followers: number | undefined;
|
|
86
|
+
/** Follower count (display string, e.g. "1.2M") */
|
|
87
|
+
followersDisplay: string;
|
|
88
|
+
/** Following count */
|
|
89
|
+
following: number | undefined;
|
|
90
|
+
/** Post/video count */
|
|
91
|
+
postCount: number | undefined;
|
|
92
|
+
/** Whether the account is verified */
|
|
93
|
+
isVerified: boolean;
|
|
94
|
+
/** Profile picture URL */
|
|
95
|
+
avatarUrl: string | undefined;
|
|
96
|
+
/** Engagement rate estimate (if available) */
|
|
97
|
+
engagementRate: number | undefined;
|
|
98
|
+
/** Average likes per post (if available from recent posts) */
|
|
99
|
+
avgLikes: number | undefined;
|
|
100
|
+
/** Average comments per post (if available from recent posts) */
|
|
101
|
+
avgComments: number | undefined;
|
|
102
|
+
/** Content categories/themes detected from bio and recent posts */
|
|
103
|
+
contentThemes: string[];
|
|
104
|
+
/** Recent post captions/snippets for context */
|
|
105
|
+
recentPosts: { text: string; likes?: number; comments?: number }[];
|
|
106
|
+
/** Raw score for ranking */
|
|
107
|
+
relevanceScore: number;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export interface InfluencerSearchResult {
|
|
111
|
+
platform: string;
|
|
112
|
+
profiles: InfluencerProfile[];
|
|
113
|
+
count: number;
|
|
114
|
+
query: string;
|
|
115
|
+
error?: string;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
// Relay command routing (same pattern as Amazon client)
|
|
120
|
+
// ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
async function sendRelayCommand(command: Record<string, unknown>): Promise<ExtensionResponse> {
|
|
123
|
+
const status = extensionRelayServer.getStatus();
|
|
124
|
+
if (status.connected) {
|
|
125
|
+
return extensionRelayServer.sendCommand(command as Omit<ExtensionCommand, 'id'>);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Fall back to HTTP relay endpoint on the daemon
|
|
129
|
+
const token = readHttpToken();
|
|
130
|
+
if (!token) {
|
|
131
|
+
throw new Error(
|
|
132
|
+
'Browser extension relay is not connected and no HTTP token found. Is the daemon running?',
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const resp = await fetch('http://127.0.0.1:7821/v1/browser-relay/command', {
|
|
137
|
+
method: 'POST',
|
|
138
|
+
headers: {
|
|
139
|
+
'Content-Type': 'application/json',
|
|
140
|
+
Authorization: `Bearer ${token}`,
|
|
141
|
+
},
|
|
142
|
+
body: JSON.stringify(command),
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
if (!resp.ok) {
|
|
146
|
+
const body = await resp.text();
|
|
147
|
+
throw new Error(`Relay HTTP command failed (${resp.status}): ${body}`);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return (await resp.json()) as ExtensionResponse;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// ---------------------------------------------------------------------------
|
|
154
|
+
// Tab management & eval
|
|
155
|
+
// ---------------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
async function findOrOpenTab(urlPattern: string, fallbackUrl: string): Promise<number> {
|
|
158
|
+
const resp = await sendRelayCommand({ action: 'find_tab', url: urlPattern });
|
|
159
|
+
if (resp.success && resp.tabId !== undefined) {
|
|
160
|
+
return resp.tabId;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const newTab = await sendRelayCommand({ action: 'new_tab', url: fallbackUrl });
|
|
164
|
+
if (!newTab.success || newTab.tabId === undefined) {
|
|
165
|
+
throw new Error(`Could not open tab for ${fallbackUrl}`);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
await sleep(2500);
|
|
169
|
+
return newTab.tabId;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
async function navigateTab(tabId: number, url: string): Promise<void> {
|
|
173
|
+
const resp = await sendRelayCommand({ action: 'navigate', tabId, url });
|
|
174
|
+
if (!resp.success) {
|
|
175
|
+
throw new Error(`Failed to navigate: ${resp.error ?? 'unknown error'}`);
|
|
176
|
+
}
|
|
177
|
+
await sleep(3000);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Evaluate a JS script in a tab. The script is wrapped in an IIFE by the relay
|
|
182
|
+
* so use `return` to yield a value. For complex results, return a JSON string.
|
|
183
|
+
*/
|
|
184
|
+
async function evalInTab(tabId: number, script: string): Promise<unknown> {
|
|
185
|
+
const resp = await sendRelayCommand({ action: 'evaluate', tabId, code: script });
|
|
186
|
+
if (!resp.success) {
|
|
187
|
+
throw new Error(`Browser eval failed: ${resp.error ?? 'unknown error'}`);
|
|
188
|
+
}
|
|
189
|
+
return resp.result;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function sleep(ms: number): Promise<void> {
|
|
193
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// ---------------------------------------------------------------------------
|
|
197
|
+
// Follower count parser
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
function parseFollowerCount(text: string): number | undefined {
|
|
201
|
+
if (!text) return undefined;
|
|
202
|
+
const cleaned = text.toLowerCase().replace(/,/g, '').replace(/\s+/g, '').trim();
|
|
203
|
+
const match = cleaned.match(/([\d.]+)\s*([kmbt]?)/);
|
|
204
|
+
if (!match) return undefined;
|
|
205
|
+
|
|
206
|
+
const num = parseFloat(match[1]);
|
|
207
|
+
const suffix = match[2];
|
|
208
|
+
const multipliers: Record<string, number> = {
|
|
209
|
+
'': 1,
|
|
210
|
+
k: 1_000,
|
|
211
|
+
m: 1_000_000,
|
|
212
|
+
b: 1_000_000_000,
|
|
213
|
+
t: 1_000_000_000_000,
|
|
214
|
+
};
|
|
215
|
+
return Math.round(num * (multipliers[suffix] || 1));
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// ---------------------------------------------------------------------------
|
|
219
|
+
// Instagram scraping
|
|
220
|
+
// ---------------------------------------------------------------------------
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Search Instagram for influencers by keyword.
|
|
224
|
+
*
|
|
225
|
+
* Strategy: search by keyword → extract post links → visit each post to find
|
|
226
|
+
* the author → deduplicate → visit each unique profile for stats.
|
|
227
|
+
*/
|
|
228
|
+
async function searchInstagram(
|
|
229
|
+
criteria: InfluencerSearchCriteria,
|
|
230
|
+
): Promise<InfluencerProfile[]> {
|
|
231
|
+
const limit = criteria.limit ?? 10;
|
|
232
|
+
const tabId = await findOrOpenTab('*://*.instagram.com/*', 'https://www.instagram.com');
|
|
233
|
+
|
|
234
|
+
// Step 1: Navigate to keyword search (shows a grid of posts)
|
|
235
|
+
const searchUrl = `https://www.instagram.com/explore/search/keyword/?q=${encodeURIComponent(criteria.query)}`;
|
|
236
|
+
await navigateTab(tabId, searchUrl);
|
|
237
|
+
await sleep(2000);
|
|
238
|
+
|
|
239
|
+
// Step 2: Extract post links from the search grid
|
|
240
|
+
const postLinksRaw = await evalInTab(tabId, `
|
|
241
|
+
var links = [];
|
|
242
|
+
document.querySelectorAll('a[href]').forEach(function(a) {
|
|
243
|
+
var h = a.getAttribute('href');
|
|
244
|
+
if (h && (h.indexOf('/p/') > -1 || h.indexOf('/reel/') > -1)) links.push(h);
|
|
245
|
+
});
|
|
246
|
+
return JSON.stringify(links.slice(0, ${limit * 2}));
|
|
247
|
+
`);
|
|
248
|
+
|
|
249
|
+
let postLinks: string[];
|
|
250
|
+
try {
|
|
251
|
+
postLinks = JSON.parse(String(postLinksRaw));
|
|
252
|
+
} catch {
|
|
253
|
+
postLinks = [];
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (postLinks.length === 0) {
|
|
257
|
+
return [];
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Step 3: Visit each post to extract the author username
|
|
261
|
+
const seenUsernames = new Set<string>();
|
|
262
|
+
const authorUsernames: string[] = [];
|
|
263
|
+
|
|
264
|
+
// Navigation skip list — known non-profile IG paths
|
|
265
|
+
const skipUsernames = new Set([
|
|
266
|
+
'reels', 'explore', 'stories', 'direct', 'accounts', 'about',
|
|
267
|
+
'p', 'reel', 'tv', 'search', 'nametag', 'directory', '',
|
|
268
|
+
]);
|
|
269
|
+
|
|
270
|
+
for (const postLink of postLinks) {
|
|
271
|
+
if (authorUsernames.length >= limit) break;
|
|
272
|
+
|
|
273
|
+
try {
|
|
274
|
+
await navigateTab(tabId, `https://www.instagram.com${postLink}`);
|
|
275
|
+
await sleep(1000);
|
|
276
|
+
|
|
277
|
+
// Extract the author username from the post page.
|
|
278
|
+
// The post page body text starts with navigation items, then shows:
|
|
279
|
+
// "username\n...audio info...\nFollow\nusername\n..."
|
|
280
|
+
// We look for the first profile link that isn't a nav item.
|
|
281
|
+
const authorRaw = await evalInTab(tabId, `
|
|
282
|
+
var bodyText = document.body.innerText;
|
|
283
|
+
// The author name appears after navigation elements, usually right before "Follow"
|
|
284
|
+
// Also try extracting from links
|
|
285
|
+
var links = document.querySelectorAll('a[href]');
|
|
286
|
+
var skip = ['', 'reels', 'explore', 'stories', 'direct', 'accounts', 'about',
|
|
287
|
+
'p', 'reel', 'tv', 'search', 'nametag', 'directory'];
|
|
288
|
+
var navLabels = ['Instagram', 'Home', 'HomeHome', 'Reels', 'ReelsReels', 'Messages',
|
|
289
|
+
'MessagesMessages', 'Search', 'SearchSearch', 'Explore', 'ExploreExplore',
|
|
290
|
+
'Notifications', 'NotificationsNotifications', 'Create', 'New postCreate',
|
|
291
|
+
'Profile', 'More', 'SettingsMore', 'Also from Meta', 'Also from MetaAlso from Meta'];
|
|
292
|
+
var author = null;
|
|
293
|
+
for (var i = 0; i < links.length; i++) {
|
|
294
|
+
var href = links[i].getAttribute('href') || '';
|
|
295
|
+
var text = links[i].textContent.trim();
|
|
296
|
+
var match = href.match(/^\\/([a-zA-Z0-9_.]+)\\/$/);
|
|
297
|
+
if (!match) continue;
|
|
298
|
+
var username = match[1];
|
|
299
|
+
if (skip.indexOf(username) > -1) continue;
|
|
300
|
+
if (navLabels.indexOf(text) > -1) continue;
|
|
301
|
+
// Skip the logged-in user's profile link (usually "Profile" or their own name in nav)
|
|
302
|
+
if (text === 'Profile' || text === '') continue;
|
|
303
|
+
author = username;
|
|
304
|
+
break;
|
|
305
|
+
}
|
|
306
|
+
// Fallback: parse from body text — look for the pattern after "Follow\\n"
|
|
307
|
+
if (!author) {
|
|
308
|
+
var followIdx = bodyText.indexOf('Follow\\n');
|
|
309
|
+
if (followIdx > -1) {
|
|
310
|
+
var afterFollow = bodyText.substring(followIdx + 7, followIdx + 50);
|
|
311
|
+
var lineEnd = afterFollow.indexOf('\\n');
|
|
312
|
+
if (lineEnd > -1) {
|
|
313
|
+
author = afterFollow.substring(0, lineEnd).trim();
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
return author;
|
|
318
|
+
`);
|
|
319
|
+
|
|
320
|
+
const authorUsername = String(authorRaw || '').trim();
|
|
321
|
+
if (authorUsername && !skipUsernames.has(authorUsername) && !seenUsernames.has(authorUsername)) {
|
|
322
|
+
seenUsernames.add(authorUsername);
|
|
323
|
+
authorUsernames.push(authorUsername);
|
|
324
|
+
}
|
|
325
|
+
} catch {
|
|
326
|
+
// Skip posts that fail
|
|
327
|
+
continue;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
if (authorUsernames.length === 0) {
|
|
332
|
+
return [];
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Step 4: Visit each unique profile to scrape stats
|
|
336
|
+
const profiles: InfluencerProfile[] = [];
|
|
337
|
+
|
|
338
|
+
for (const username of authorUsernames) {
|
|
339
|
+
try {
|
|
340
|
+
const profile = await scrapeInstagramProfile(tabId, username, criteria);
|
|
341
|
+
if (profile && matchesCriteria(profile, criteria)) {
|
|
342
|
+
profiles.push(profile);
|
|
343
|
+
}
|
|
344
|
+
await sleep(1500);
|
|
345
|
+
} catch {
|
|
346
|
+
continue;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return profiles;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Scrape a single Instagram profile page for stats.
|
|
355
|
+
*
|
|
356
|
+
* The most reliable data source is the meta[name="description"] tag which
|
|
357
|
+
* contains: "49K Followers, 463 Following, 551 Posts - Display Name (@username)
|
|
358
|
+
* on Instagram: "bio text""
|
|
359
|
+
*
|
|
360
|
+
* Falls back to parsing from body text.
|
|
361
|
+
*/
|
|
362
|
+
async function scrapeInstagramProfile(
|
|
363
|
+
tabId: number,
|
|
364
|
+
username: string,
|
|
365
|
+
criteria: InfluencerSearchCriteria,
|
|
366
|
+
): Promise<InfluencerProfile | null> {
|
|
367
|
+
await navigateTab(tabId, `https://www.instagram.com/${username}/`);
|
|
368
|
+
await sleep(2000);
|
|
369
|
+
|
|
370
|
+
const raw = await evalInTab(tabId, `
|
|
371
|
+
var r = { username: '${username}' };
|
|
372
|
+
|
|
373
|
+
// Primary source: meta description tag
|
|
374
|
+
// Format: "49K Followers, 463 Following, 551 Posts - Display Name (@user) on Instagram: \\"bio\\""
|
|
375
|
+
var meta = document.querySelector('meta[name="description"]');
|
|
376
|
+
r.meta = meta ? meta.getAttribute('content') : '';
|
|
377
|
+
|
|
378
|
+
// Parse meta for structured data
|
|
379
|
+
if (r.meta) {
|
|
380
|
+
var fMatch = r.meta.match(/([\\d,.]+[KkMmBb]?)\\s*Follower/i);
|
|
381
|
+
var fgMatch = r.meta.match(/([\\d,.]+[KkMmBb]?)\\s*Following/i);
|
|
382
|
+
var pMatch = r.meta.match(/([\\d,.]+[KkMmBb]?)\\s*Post/i);
|
|
383
|
+
r.followers = fMatch ? fMatch[1] : '';
|
|
384
|
+
r.following = fgMatch ? fgMatch[1] : '';
|
|
385
|
+
r.posts = pMatch ? pMatch[1] : '';
|
|
386
|
+
|
|
387
|
+
// Display name: between "Posts - " and " (@"
|
|
388
|
+
var nameMatch = r.meta.match(/Posts\\s*-\\s*(.+?)\\s*\\(@/);
|
|
389
|
+
r.displayName = nameMatch ? nameMatch[1].trim() : '';
|
|
390
|
+
|
|
391
|
+
// Bio: after 'on Instagram: "' until end quote
|
|
392
|
+
var bioMatch = r.meta.match(/on Instagram:\\s*"(.+?)"/);
|
|
393
|
+
r.bio = bioMatch ? bioMatch[1] : '';
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Fallback: parse from body text
|
|
397
|
+
var bodyText = document.body.innerText;
|
|
398
|
+
if (!r.followers) {
|
|
399
|
+
var bfMatch = bodyText.match(/([\\d,.]+[KkMmBb]?)\\s*followers/i);
|
|
400
|
+
r.followers = bfMatch ? bfMatch[1] : '';
|
|
401
|
+
}
|
|
402
|
+
if (!r.following) {
|
|
403
|
+
var bgMatch = bodyText.match(/([\\d,.]+[KkMmBb]?)\\s*following/i);
|
|
404
|
+
r.following = bgMatch ? bgMatch[1] : '';
|
|
405
|
+
}
|
|
406
|
+
if (!r.posts) {
|
|
407
|
+
var bpMatch = bodyText.match(/([\\d,.]+[KkMmBb]?)\\s*posts/i);
|
|
408
|
+
r.posts = bpMatch ? bpMatch[1] : '';
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// Verified status
|
|
412
|
+
r.isVerified = bodyText.indexOf('Verified') > -1;
|
|
413
|
+
|
|
414
|
+
// Bio fallback: grab the text between "following" and "Follow" button
|
|
415
|
+
if (!r.bio) {
|
|
416
|
+
var followingIdx = bodyText.indexOf(' following');
|
|
417
|
+
if (followingIdx > -1) {
|
|
418
|
+
var afterFollowing = bodyText.substring(followingIdx + 10, followingIdx + 400);
|
|
419
|
+
// Cut at common boundaries
|
|
420
|
+
var cutPoints = ['Follow', 'Message', 'Meta', 'About'];
|
|
421
|
+
var minCut = afterFollowing.length;
|
|
422
|
+
for (var c = 0; c < cutPoints.length; c++) {
|
|
423
|
+
var idx = afterFollowing.indexOf(cutPoints[c]);
|
|
424
|
+
if (idx > -1 && idx < minCut) minCut = idx;
|
|
425
|
+
}
|
|
426
|
+
r.bio = afterFollowing.substring(0, minCut).trim();
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// Avatar
|
|
431
|
+
var avatarEl = document.querySelector('header img') ||
|
|
432
|
+
document.querySelector('img[alt*="profile"]');
|
|
433
|
+
r.avatarUrl = avatarEl ? avatarEl.getAttribute('src') : null;
|
|
434
|
+
|
|
435
|
+
return JSON.stringify(r);
|
|
436
|
+
`);
|
|
437
|
+
|
|
438
|
+
let data: Record<string, unknown>;
|
|
439
|
+
try {
|
|
440
|
+
data = JSON.parse(String(raw));
|
|
441
|
+
} catch {
|
|
442
|
+
return null;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
const followersNum = parseFollowerCount(String(data.followers || ''));
|
|
446
|
+
const followingNum = parseFollowerCount(String(data.following || ''));
|
|
447
|
+
const postCount = parseFollowerCount(String(data.posts || ''));
|
|
448
|
+
|
|
449
|
+
return {
|
|
450
|
+
platform: 'instagram',
|
|
451
|
+
username,
|
|
452
|
+
displayName: String(data.displayName || username),
|
|
453
|
+
profileUrl: `https://www.instagram.com/${username}/`,
|
|
454
|
+
bio: String(data.bio || ''),
|
|
455
|
+
followers: followersNum,
|
|
456
|
+
followersDisplay: String(data.followers || 'unknown'),
|
|
457
|
+
following: followingNum,
|
|
458
|
+
postCount,
|
|
459
|
+
isVerified: Boolean(data.isVerified),
|
|
460
|
+
avatarUrl: data.avatarUrl ? String(data.avatarUrl) : undefined,
|
|
461
|
+
engagementRate: undefined,
|
|
462
|
+
avgLikes: undefined,
|
|
463
|
+
avgComments: undefined,
|
|
464
|
+
contentThemes: extractThemes(String(data.bio || '') + ' ' + String(data.meta || ''), criteria.query),
|
|
465
|
+
recentPosts: [],
|
|
466
|
+
relevanceScore: 0,
|
|
467
|
+
};
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// ---------------------------------------------------------------------------
|
|
471
|
+
// TikTok scraping
|
|
472
|
+
// ---------------------------------------------------------------------------
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Search TikTok for influencers by keyword.
|
|
476
|
+
*
|
|
477
|
+
* TikTok's user search at /search/user?q=... renders a list where each card
|
|
478
|
+
* produces a predictable text pattern in innerText:
|
|
479
|
+
*
|
|
480
|
+
* DisplayName
|
|
481
|
+
* username
|
|
482
|
+
* 77.9K (follower count)
|
|
483
|
+
* Followers
|
|
484
|
+
* ·
|
|
485
|
+
* 1.5M (like count)
|
|
486
|
+
* Likes
|
|
487
|
+
* Follow
|
|
488
|
+
*
|
|
489
|
+
* DOM class-based selectors are unreliable on TikTok (obfuscated class names),
|
|
490
|
+
* so we parse this text pattern directly.
|
|
491
|
+
*/
|
|
492
|
+
async function searchTikTok(
|
|
493
|
+
criteria: InfluencerSearchCriteria,
|
|
494
|
+
): Promise<InfluencerProfile[]> {
|
|
495
|
+
const limit = criteria.limit ?? 10;
|
|
496
|
+
const tabId = await findOrOpenTab('*://*.tiktok.com/*', 'https://www.tiktok.com');
|
|
497
|
+
|
|
498
|
+
const searchUrl = `https://www.tiktok.com/search/user?q=${encodeURIComponent(criteria.query)}`;
|
|
499
|
+
await navigateTab(tabId, searchUrl);
|
|
500
|
+
await sleep(3000);
|
|
501
|
+
|
|
502
|
+
// Scroll to load more results
|
|
503
|
+
await evalInTab(tabId, `window.scrollTo(0, document.body.scrollHeight); return 'scrolled'`);
|
|
504
|
+
await sleep(2000);
|
|
505
|
+
|
|
506
|
+
// Parse the text pattern: DisplayName, username, count, "Followers", "·", count, "Likes"
|
|
507
|
+
const raw = await evalInTab(tabId, `
|
|
508
|
+
var text = document.body.innerText;
|
|
509
|
+
var lines = text.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
|
|
510
|
+
var users = [];
|
|
511
|
+
for (var i = 0; i < lines.length - 6; i++) {
|
|
512
|
+
if (lines[i+2] &&
|
|
513
|
+
lines[i+2].match(/^[\\d,.]+[KkMmBb]?$/) &&
|
|
514
|
+
lines[i+3] === 'Followers' &&
|
|
515
|
+
lines[i+4] === '·' &&
|
|
516
|
+
lines[i+6] === 'Likes') {
|
|
517
|
+
var username = lines[i+1];
|
|
518
|
+
if (!username.match(/^[a-zA-Z0-9_.]+$/)) continue;
|
|
519
|
+
users.push({
|
|
520
|
+
displayName: lines[i],
|
|
521
|
+
username: username,
|
|
522
|
+
followers: lines[i+2],
|
|
523
|
+
likes: lines[i+5],
|
|
524
|
+
});
|
|
525
|
+
i += 7;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
return JSON.stringify(users.slice(0, ${limit * 2}));
|
|
529
|
+
`);
|
|
530
|
+
|
|
531
|
+
let searchResults: Array<{
|
|
532
|
+
username: string;
|
|
533
|
+
displayName: string;
|
|
534
|
+
followers: string;
|
|
535
|
+
likes: string;
|
|
536
|
+
}>;
|
|
537
|
+
try {
|
|
538
|
+
searchResults = JSON.parse(String(raw));
|
|
539
|
+
} catch {
|
|
540
|
+
return [];
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Convert to profiles — we only have basic data from search, no bios yet
|
|
544
|
+
const profiles: InfluencerProfile[] = searchResults.map((p) => ({
|
|
545
|
+
platform: 'tiktok' as const,
|
|
546
|
+
username: p.username,
|
|
547
|
+
displayName: p.displayName || p.username,
|
|
548
|
+
profileUrl: `https://www.tiktok.com/@${p.username}`,
|
|
549
|
+
bio: '',
|
|
550
|
+
followers: parseFollowerCount(p.followers),
|
|
551
|
+
followersDisplay: p.followers || 'unknown',
|
|
552
|
+
following: undefined,
|
|
553
|
+
postCount: undefined,
|
|
554
|
+
isVerified: false,
|
|
555
|
+
avatarUrl: undefined,
|
|
556
|
+
engagementRate: undefined,
|
|
557
|
+
avgLikes: undefined,
|
|
558
|
+
avgComments: undefined,
|
|
559
|
+
contentThemes: extractThemes(p.displayName, criteria.query),
|
|
560
|
+
recentPosts: [],
|
|
561
|
+
relevanceScore: 0,
|
|
562
|
+
}));
|
|
563
|
+
|
|
564
|
+
// Filter by criteria first to avoid unnecessary profile visits
|
|
565
|
+
const filtered = profiles.filter((p) => matchesCriteria(p, criteria));
|
|
566
|
+
|
|
567
|
+
// Enrich with bios by visiting each profile
|
|
568
|
+
const enriched: InfluencerProfile[] = [];
|
|
569
|
+
for (const profile of filtered.slice(0, limit)) {
|
|
570
|
+
try {
|
|
571
|
+
const detailed = await scrapeTikTokProfile(tabId, profile.username, criteria);
|
|
572
|
+
if (detailed) {
|
|
573
|
+
enriched.push(detailed);
|
|
574
|
+
} else {
|
|
575
|
+
enriched.push(profile);
|
|
576
|
+
}
|
|
577
|
+
await sleep(1500);
|
|
578
|
+
} catch {
|
|
579
|
+
enriched.push(profile);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
return enriched;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* Scrape a single TikTok profile page for detailed stats.
|
|
588
|
+
*
|
|
589
|
+
* TikTok profile pages show stats and bio in the body text. We use a
|
|
590
|
+
* combination of data-e2e selectors (when they work) and body text regex
|
|
591
|
+
* as a fallback. The bio is also extracted from the region between
|
|
592
|
+
* "Following" and "Videos" in the body text.
|
|
593
|
+
*/
|
|
594
|
+
async function scrapeTikTokProfile(
|
|
595
|
+
tabId: number,
|
|
596
|
+
username: string,
|
|
597
|
+
criteria: InfluencerSearchCriteria,
|
|
598
|
+
): Promise<InfluencerProfile | null> {
|
|
599
|
+
await navigateTab(tabId, `https://www.tiktok.com/@${username}`);
|
|
600
|
+
await sleep(2500);
|
|
601
|
+
|
|
602
|
+
const raw = await evalInTab(tabId, `
|
|
603
|
+
var r = { username: '${username}' };
|
|
604
|
+
var bodyText = document.body.innerText;
|
|
605
|
+
|
|
606
|
+
// Stats from body text (most reliable)
|
|
607
|
+
var fMatch = bodyText.match(/([\\d,.]+[KkMmBb]?)\\s*[Ff]ollower/);
|
|
608
|
+
var fgMatch = bodyText.match(/([\\d,.]+[KkMmBb]?)\\s*[Ff]ollowing/);
|
|
609
|
+
var lMatch = bodyText.match(/([\\d,.]+[KkMmBb]?)\\s*[Ll]ike/);
|
|
610
|
+
r.followers = fMatch ? fMatch[1] : '';
|
|
611
|
+
r.following = fgMatch ? fgMatch[1] : '';
|
|
612
|
+
r.likes = lMatch ? lMatch[1] : '';
|
|
613
|
+
|
|
614
|
+
// Bio: try data-e2e selector first, fall back to text parsing
|
|
615
|
+
var bioEl = document.querySelector('[data-e2e="user-bio"]') ||
|
|
616
|
+
document.querySelector('h2[data-e2e="user-subtitle"]');
|
|
617
|
+
r.bio = bioEl ? bioEl.textContent.trim() : '';
|
|
618
|
+
|
|
619
|
+
if (!r.bio) {
|
|
620
|
+
// Fallback: extract bio from between "Following" and "Videos" in body text
|
|
621
|
+
var followingIdx = bodyText.indexOf('Following');
|
|
622
|
+
if (followingIdx > -1) {
|
|
623
|
+
var chunk = bodyText.substring(followingIdx + 10, followingIdx + 500);
|
|
624
|
+
var videosIdx = chunk.indexOf('Videos');
|
|
625
|
+
if (videosIdx > -1) chunk = chunk.substring(0, videosIdx);
|
|
626
|
+
// Also cut at "Liked" or "Reposts"
|
|
627
|
+
var likedIdx = chunk.indexOf('Liked');
|
|
628
|
+
if (likedIdx > -1 && likedIdx < chunk.length) chunk = chunk.substring(0, likedIdx);
|
|
629
|
+
r.bio = chunk.trim();
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// Display name: try data-e2e, fall back to page title
|
|
634
|
+
var nameEl = document.querySelector('[data-e2e="user-title"]') ||
|
|
635
|
+
document.querySelector('h1[data-e2e="user-title"]');
|
|
636
|
+
r.displayName = nameEl ? nameEl.textContent.trim() : '';
|
|
637
|
+
if (!r.displayName) {
|
|
638
|
+
// TikTok titles are often "displayname (@username) | TikTok"
|
|
639
|
+
var titleMatch = document.title.match(/^(.+?)\\s*\\(@/);
|
|
640
|
+
r.displayName = titleMatch ? titleMatch[1].trim() : '${username}';
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// Verified
|
|
644
|
+
r.isVerified = bodyText.indexOf('Verified') > -1 ||
|
|
645
|
+
!!document.querySelector('svg[class*="verify"]') ||
|
|
646
|
+
!!document.querySelector('[class*="verified"]');
|
|
647
|
+
|
|
648
|
+
// Avatar
|
|
649
|
+
var img = document.querySelector('img[class*="avatar"]') ||
|
|
650
|
+
document.querySelector('img[src*="tiktokcdn"]');
|
|
651
|
+
r.avatarUrl = img ? img.getAttribute('src') : null;
|
|
652
|
+
|
|
653
|
+
return JSON.stringify(r);
|
|
654
|
+
`);
|
|
655
|
+
|
|
656
|
+
let data: Record<string, unknown>;
|
|
657
|
+
try {
|
|
658
|
+
data = JSON.parse(String(raw));
|
|
659
|
+
} catch {
|
|
660
|
+
return null;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
const bio = String(data.bio || '');
|
|
664
|
+
|
|
665
|
+
return {
|
|
666
|
+
platform: 'tiktok',
|
|
667
|
+
username,
|
|
668
|
+
displayName: String(data.displayName || username),
|
|
669
|
+
profileUrl: `https://www.tiktok.com/@${username}`,
|
|
670
|
+
bio,
|
|
671
|
+
followers: parseFollowerCount(String(data.followers || '')),
|
|
672
|
+
followersDisplay: String(data.followers || 'unknown'),
|
|
673
|
+
following: parseFollowerCount(String(data.following || '')),
|
|
674
|
+
postCount: undefined,
|
|
675
|
+
isVerified: Boolean(data.isVerified),
|
|
676
|
+
avatarUrl: data.avatarUrl ? String(data.avatarUrl) : undefined,
|
|
677
|
+
engagementRate: undefined,
|
|
678
|
+
avgLikes: undefined,
|
|
679
|
+
avgComments: undefined,
|
|
680
|
+
contentThemes: extractThemes(bio, criteria.query),
|
|
681
|
+
recentPosts: [],
|
|
682
|
+
relevanceScore: 0,
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
// ---------------------------------------------------------------------------
|
|
687
|
+
// X / Twitter scraping
|
|
688
|
+
// ---------------------------------------------------------------------------
|
|
689
|
+
|
|
690
|
+
/**
|
|
691
|
+
* Search X/Twitter for influencers by keyword.
|
|
692
|
+
*
|
|
693
|
+
* X has a people search at /search?q=...&f=user. Results are rendered as
|
|
694
|
+
* [data-testid="UserCell"] components. Each cell's innerText follows this
|
|
695
|
+
* pattern:
|
|
696
|
+
*
|
|
697
|
+
* [Followed by X and Y others] (optional social proof line)
|
|
698
|
+
* Display Name
|
|
699
|
+
* @username
|
|
700
|
+
* Follow
|
|
701
|
+
* Bio text...
|
|
702
|
+
*
|
|
703
|
+
* We parse the @username from the text (the DOM selector approach picks up
|
|
704
|
+
* "Followed by..." text instead of handles). After extracting from search,
|
|
705
|
+
* we visit each profile to get follower counts since the search page doesn't
|
|
706
|
+
* include them.
|
|
707
|
+
*
|
|
708
|
+
* NOTE: Keep search queries SHORT (2-4 words). X returns "No results" for
|
|
709
|
+
* long multi-word people searches.
|
|
710
|
+
*/
|
|
711
|
+
async function searchTwitter(
|
|
712
|
+
criteria: InfluencerSearchCriteria,
|
|
713
|
+
): Promise<InfluencerProfile[]> {
|
|
714
|
+
const limit = criteria.limit ?? 10;
|
|
715
|
+
const tabId = await findOrOpenTab('*://*.x.com/*', 'https://x.com');
|
|
716
|
+
|
|
717
|
+
// Use a short query — X people search fails with long queries
|
|
718
|
+
const queryWords = criteria.query.split(/\s+/).slice(0, 4).join(' ');
|
|
719
|
+
const searchUrl = `https://x.com/search?q=${encodeURIComponent(queryWords)}&f=user`;
|
|
720
|
+
await navigateTab(tabId, searchUrl);
|
|
721
|
+
await sleep(4000);
|
|
722
|
+
|
|
723
|
+
// Scroll to load more results
|
|
724
|
+
await evalInTab(tabId, `window.scrollTo(0, 800); return 'ok'`);
|
|
725
|
+
await sleep(2000);
|
|
726
|
+
await evalInTab(tabId, `window.scrollTo(0, document.body.scrollHeight); return 'ok'`);
|
|
727
|
+
await sleep(2000);
|
|
728
|
+
|
|
729
|
+
// Extract profiles from UserCell components using text pattern parsing
|
|
730
|
+
const raw = await evalInTab(tabId, `
|
|
731
|
+
var cells = document.querySelectorAll('[data-testid="UserCell"]');
|
|
732
|
+
var results = [];
|
|
733
|
+
var seen = {};
|
|
734
|
+
for (var j = 0; j < cells.length; j++) {
|
|
735
|
+
var text = cells[j].innerText;
|
|
736
|
+
var lines = text.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
|
|
737
|
+
|
|
738
|
+
var username = '';
|
|
739
|
+
var displayName = '';
|
|
740
|
+
var bio = '';
|
|
741
|
+
for (var k = 0; k < lines.length; k++) {
|
|
742
|
+
var m = lines[k].match(/^@([a-zA-Z0-9_]+)$/);
|
|
743
|
+
if (m) {
|
|
744
|
+
username = m[1];
|
|
745
|
+
// Display name is the line before @username (unless it's "Followed by...")
|
|
746
|
+
if (k > 0 && !lines[k-1].startsWith('Followed')) {
|
|
747
|
+
displayName = lines[k-1];
|
|
748
|
+
} else if (k > 1) {
|
|
749
|
+
displayName = lines[k-2] || '';
|
|
750
|
+
}
|
|
751
|
+
// Bio is everything after "Follow" button text
|
|
752
|
+
var afterFollow = false;
|
|
753
|
+
for (var n = k + 1; n < lines.length; n++) {
|
|
754
|
+
if (lines[n] === 'Follow') { afterFollow = true; continue; }
|
|
755
|
+
if (afterFollow) {
|
|
756
|
+
bio = lines.slice(n).join(' ').substring(0, 250);
|
|
757
|
+
break;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
break;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
if (!username || seen[username]) continue;
|
|
765
|
+
seen[username] = true;
|
|
766
|
+
if (!displayName || displayName.startsWith('Followed')) displayName = username;
|
|
767
|
+
|
|
768
|
+
var verified = !!cells[j].querySelector('svg[data-testid="icon-verified"]');
|
|
769
|
+
var img = cells[j].querySelector('img[src*="profile_images"]');
|
|
770
|
+
|
|
771
|
+
results.push({
|
|
772
|
+
username: username,
|
|
773
|
+
displayName: displayName,
|
|
774
|
+
bio: bio,
|
|
775
|
+
isVerified: verified,
|
|
776
|
+
avatarUrl: img ? img.getAttribute('src') : null,
|
|
777
|
+
});
|
|
778
|
+
}
|
|
779
|
+
return JSON.stringify(results.slice(0, ${limit * 3}));
|
|
780
|
+
`);
|
|
781
|
+
|
|
782
|
+
let searchResults: Array<{
|
|
783
|
+
username: string;
|
|
784
|
+
displayName: string;
|
|
785
|
+
bio: string;
|
|
786
|
+
isVerified: boolean;
|
|
787
|
+
avatarUrl: string | null;
|
|
788
|
+
}>;
|
|
789
|
+
try {
|
|
790
|
+
searchResults = JSON.parse(String(raw));
|
|
791
|
+
} catch {
|
|
792
|
+
return [];
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
if (searchResults.length === 0) return [];
|
|
796
|
+
|
|
797
|
+
// Visit each profile to get follower counts (search results don't include them)
|
|
798
|
+
const profiles: InfluencerProfile[] = [];
|
|
799
|
+
for (const sr of searchResults.slice(0, limit)) {
|
|
800
|
+
try {
|
|
801
|
+
const profile = await scrapeTwitterProfile(tabId, sr.username, criteria);
|
|
802
|
+
if (profile && matchesCriteria(profile, criteria)) {
|
|
803
|
+
profiles.push(profile);
|
|
804
|
+
}
|
|
805
|
+
await sleep(1500);
|
|
806
|
+
} catch {
|
|
807
|
+
// Still include with search data if profile visit fails
|
|
808
|
+
profiles.push({
|
|
809
|
+
platform: 'twitter',
|
|
810
|
+
username: sr.username,
|
|
811
|
+
displayName: sr.displayName,
|
|
812
|
+
profileUrl: `https://x.com/${sr.username}`,
|
|
813
|
+
bio: sr.bio,
|
|
814
|
+
followers: undefined,
|
|
815
|
+
followersDisplay: 'unknown',
|
|
816
|
+
following: undefined,
|
|
817
|
+
postCount: undefined,
|
|
818
|
+
isVerified: sr.isVerified,
|
|
819
|
+
avatarUrl: sr.avatarUrl ?? undefined,
|
|
820
|
+
engagementRate: undefined,
|
|
821
|
+
avgLikes: undefined,
|
|
822
|
+
avgComments: undefined,
|
|
823
|
+
contentThemes: extractThemes(sr.bio, criteria.query),
|
|
824
|
+
recentPosts: [],
|
|
825
|
+
relevanceScore: 0,
|
|
826
|
+
});
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
return profiles;
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
/**
|
|
834
|
+
* Scrape a single X/Twitter profile page for detailed stats.
|
|
835
|
+
*
|
|
836
|
+
* Uses a combination of data-testid selectors (reliable on X) and body text
|
|
837
|
+
* regex for follower/following counts. The data-testid="UserName",
|
|
838
|
+
* data-testid="UserDescription" selectors work well on X profile pages.
|
|
839
|
+
* Follower counts are extracted from body text as the DOM structure for
|
|
840
|
+
* stat links varies.
|
|
841
|
+
*/
|
|
842
|
+
async function scrapeTwitterProfile(
|
|
843
|
+
tabId: number,
|
|
844
|
+
username: string,
|
|
845
|
+
_criteria: InfluencerSearchCriteria,
|
|
846
|
+
): Promise<InfluencerProfile | null> {
|
|
847
|
+
await navigateTab(tabId, `https://x.com/${username}`);
|
|
848
|
+
await sleep(2500);
|
|
849
|
+
|
|
850
|
+
const raw = await evalInTab(tabId, `
|
|
851
|
+
var r = { username: '${username}' };
|
|
852
|
+
|
|
853
|
+
// Display name from UserName testid
|
|
854
|
+
var nameEl = document.querySelector('[data-testid="UserName"]');
|
|
855
|
+
if (nameEl) {
|
|
856
|
+
var spans = nameEl.querySelectorAll('span');
|
|
857
|
+
if (spans.length > 0) r.displayName = spans[0].textContent.trim();
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
// Bio from UserDescription testid
|
|
861
|
+
var bioEl = document.querySelector('[data-testid="UserDescription"]');
|
|
862
|
+
r.bio = bioEl ? bioEl.textContent.trim() : '';
|
|
863
|
+
|
|
864
|
+
// Follower/following counts from body text (most reliable)
|
|
865
|
+
var bodyText = document.body.innerText;
|
|
866
|
+
var fMatch = bodyText.match(/([\\.\\d,]+[KkMm]?)\\s*Follower/);
|
|
867
|
+
var fgMatch = bodyText.match(/([\\.\\d,]+[KkMm]?)\\s*Following/);
|
|
868
|
+
r.followers = fMatch ? fMatch[1] : '';
|
|
869
|
+
r.following = fgMatch ? fgMatch[1] : '';
|
|
870
|
+
|
|
871
|
+
// Verified
|
|
872
|
+
r.isVerified = !!document.querySelector('svg[data-testid="icon-verified"]') ||
|
|
873
|
+
!!document.querySelector('[aria-label*="Verified"]');
|
|
874
|
+
|
|
875
|
+
// Avatar
|
|
876
|
+
var img = document.querySelector('img[src*="profile_images"]');
|
|
877
|
+
r.avatarUrl = img ? img.getAttribute('src') : null;
|
|
878
|
+
|
|
879
|
+
return JSON.stringify(r);
|
|
880
|
+
`);
|
|
881
|
+
|
|
882
|
+
let data: Record<string, unknown>;
|
|
883
|
+
try {
|
|
884
|
+
data = JSON.parse(String(raw));
|
|
885
|
+
} catch {
|
|
886
|
+
return null;
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
return {
|
|
890
|
+
platform: 'twitter',
|
|
891
|
+
username,
|
|
892
|
+
displayName: String(data.displayName || username),
|
|
893
|
+
profileUrl: `https://x.com/${username}`,
|
|
894
|
+
bio: String(data.bio || ''),
|
|
895
|
+
followers: parseFollowerCount(String(data.followers || '')),
|
|
896
|
+
followersDisplay: String(data.followers || 'unknown'),
|
|
897
|
+
following: parseFollowerCount(String(data.following || '')),
|
|
898
|
+
postCount: undefined,
|
|
899
|
+
isVerified: Boolean(data.isVerified),
|
|
900
|
+
avatarUrl: data.avatarUrl ? String(data.avatarUrl) : undefined,
|
|
901
|
+
engagementRate: undefined,
|
|
902
|
+
avgLikes: undefined,
|
|
903
|
+
avgComments: undefined,
|
|
904
|
+
contentThemes: extractThemes(String(data.bio || ''), ''),
|
|
905
|
+
recentPosts: [],
|
|
906
|
+
relevanceScore: 0,
|
|
907
|
+
};
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// ---------------------------------------------------------------------------
|
|
911
|
+
// Scoring & filtering
|
|
912
|
+
// ---------------------------------------------------------------------------
|
|
913
|
+
|
|
914
|
+
function matchesCriteria(
|
|
915
|
+
profile: InfluencerProfile,
|
|
916
|
+
criteria: InfluencerSearchCriteria,
|
|
917
|
+
): boolean {
|
|
918
|
+
if (criteria.minFollowers && profile.followers !== undefined) {
|
|
919
|
+
if (profile.followers < criteria.minFollowers) return false;
|
|
920
|
+
}
|
|
921
|
+
if (criteria.maxFollowers && profile.followers !== undefined) {
|
|
922
|
+
if (profile.followers > criteria.maxFollowers) return false;
|
|
923
|
+
}
|
|
924
|
+
if (criteria.verifiedOnly && !profile.isVerified) {
|
|
925
|
+
return false;
|
|
926
|
+
}
|
|
927
|
+
return true;
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
function scoreProfile(
|
|
931
|
+
profile: InfluencerProfile,
|
|
932
|
+
criteria: InfluencerSearchCriteria,
|
|
933
|
+
): number {
|
|
934
|
+
let score = 0;
|
|
935
|
+
|
|
936
|
+
// Follower count scoring
|
|
937
|
+
if (profile.followers !== undefined) {
|
|
938
|
+
if (profile.followers >= 1_000) score += 10;
|
|
939
|
+
if (profile.followers >= 10_000) score += 20;
|
|
940
|
+
if (profile.followers >= 100_000) score += 30;
|
|
941
|
+
if (profile.followers >= 1_000_000) score += 20;
|
|
942
|
+
|
|
943
|
+
// Bonus for being within requested range
|
|
944
|
+
if (criteria.minFollowers && criteria.maxFollowers) {
|
|
945
|
+
const mid = (criteria.minFollowers + criteria.maxFollowers) / 2;
|
|
946
|
+
const distance = Math.abs(profile.followers - mid) / mid;
|
|
947
|
+
score += Math.max(0, 20 - distance * 20);
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
// Verified boost
|
|
952
|
+
if (profile.isVerified) score += 15;
|
|
953
|
+
|
|
954
|
+
// Bio relevance
|
|
955
|
+
const queryTerms = criteria.query.toLowerCase().split(/\s+/);
|
|
956
|
+
const bioLower = profile.bio.toLowerCase();
|
|
957
|
+
for (const term of queryTerms) {
|
|
958
|
+
if (bioLower.includes(term)) score += 10;
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
// Content theme matching
|
|
962
|
+
if (profile.contentThemes.length > 0) score += 5 * profile.contentThemes.length;
|
|
963
|
+
|
|
964
|
+
// Completeness bonuses
|
|
965
|
+
if (profile.avatarUrl) score += 5;
|
|
966
|
+
if (profile.bio.length > 20) score += 5;
|
|
967
|
+
|
|
968
|
+
return score;
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
function extractThemes(bio: string, query: string): string[] {
|
|
972
|
+
const themes: string[] = [];
|
|
973
|
+
const text = (bio + ' ' + query).toLowerCase();
|
|
974
|
+
|
|
975
|
+
const themeKeywords: Record<string, string[]> = {
|
|
976
|
+
fashion: ['fashion', 'style', 'outfit', 'ootd', 'clothing', 'wear', 'designer'],
|
|
977
|
+
beauty: ['beauty', 'makeup', 'skincare', 'cosmetic', 'hair', 'glow'],
|
|
978
|
+
fitness: ['fitness', 'gym', 'workout', 'health', 'training', 'athlete', 'sports'],
|
|
979
|
+
food: ['food', 'recipe', 'cooking', 'chef', 'foodie', 'restaurant', 'eat'],
|
|
980
|
+
travel: ['travel', 'wanderlust', 'adventure', 'explore', 'tourism', 'destination'],
|
|
981
|
+
tech: ['tech', 'technology', 'gadget', 'software', 'coding', 'developer', 'ai', 'artificial intelligence'],
|
|
982
|
+
gaming: ['gaming', 'gamer', 'esports', 'twitch', 'stream', 'game'],
|
|
983
|
+
music: ['music', 'musician', 'singer', 'artist', 'producer', 'dj'],
|
|
984
|
+
lifestyle: ['lifestyle', 'daily', 'vlog', 'life', 'mom', 'dad', 'family'],
|
|
985
|
+
business: ['business', 'entrepreneur', 'startup', 'marketing', 'ceo', 'founder'],
|
|
986
|
+
photography: ['photo', 'photography', 'photographer', 'visual', 'creative'],
|
|
987
|
+
comedy: ['comedy', 'funny', 'humor', 'meme', 'comedian', 'laugh'],
|
|
988
|
+
education: ['education', 'learn', 'teach', 'tutor', 'tips', 'howto', 'teaching'],
|
|
989
|
+
wellness: ['wellness', 'mindfulness', 'meditation', 'yoga', 'mental health'],
|
|
990
|
+
career: ['career', 'job', 'hiring', 'resume', 'interview', 'salary', 'remote work'],
|
|
991
|
+
};
|
|
992
|
+
|
|
993
|
+
for (const [theme, keywords] of Object.entries(themeKeywords)) {
|
|
994
|
+
if (keywords.some((kw) => text.includes(kw))) {
|
|
995
|
+
themes.push(theme);
|
|
996
|
+
}
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
return themes;
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
// ---------------------------------------------------------------------------
|
|
1003
|
+
// Main search orchestrator
|
|
1004
|
+
// ---------------------------------------------------------------------------
|
|
1005
|
+
|
|
1006
|
+
/**
|
|
1007
|
+
* Search for influencers across specified platforms.
|
|
1008
|
+
*/
|
|
1009
|
+
export async function searchInfluencers(
|
|
1010
|
+
criteria: InfluencerSearchCriteria,
|
|
1011
|
+
): Promise<InfluencerSearchResult[]> {
|
|
1012
|
+
const platforms = criteria.platforms ?? ['instagram', 'tiktok', 'twitter'];
|
|
1013
|
+
const results: InfluencerSearchResult[] = [];
|
|
1014
|
+
|
|
1015
|
+
for (const platform of platforms) {
|
|
1016
|
+
try {
|
|
1017
|
+
let profiles: InfluencerProfile[];
|
|
1018
|
+
|
|
1019
|
+
switch (platform) {
|
|
1020
|
+
case 'instagram':
|
|
1021
|
+
profiles = await searchInstagram(criteria);
|
|
1022
|
+
break;
|
|
1023
|
+
case 'tiktok':
|
|
1024
|
+
profiles = await searchTikTok(criteria);
|
|
1025
|
+
break;
|
|
1026
|
+
case 'twitter':
|
|
1027
|
+
profiles = await searchTwitter(criteria);
|
|
1028
|
+
break;
|
|
1029
|
+
default:
|
|
1030
|
+
continue;
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
// Score and sort
|
|
1034
|
+
profiles = profiles.map((p) => ({
|
|
1035
|
+
...p,
|
|
1036
|
+
relevanceScore: scoreProfile(p, criteria),
|
|
1037
|
+
}));
|
|
1038
|
+
profiles.sort((a, b) => b.relevanceScore - a.relevanceScore);
|
|
1039
|
+
|
|
1040
|
+
results.push({
|
|
1041
|
+
platform,
|
|
1042
|
+
profiles,
|
|
1043
|
+
count: profiles.length,
|
|
1044
|
+
query: criteria.query,
|
|
1045
|
+
});
|
|
1046
|
+
} catch (err) {
|
|
1047
|
+
results.push({
|
|
1048
|
+
platform,
|
|
1049
|
+
profiles: [],
|
|
1050
|
+
count: 0,
|
|
1051
|
+
query: criteria.query,
|
|
1052
|
+
error: err instanceof Error ? err.message : String(err),
|
|
1053
|
+
});
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
return results;
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
/**
|
|
1061
|
+
* Get detailed profile data for a specific influencer.
|
|
1062
|
+
*/
|
|
1063
|
+
export async function getInfluencerProfile(
|
|
1064
|
+
platform: 'instagram' | 'tiktok' | 'twitter',
|
|
1065
|
+
username: string,
|
|
1066
|
+
): Promise<InfluencerProfile | null> {
|
|
1067
|
+
const criteria: InfluencerSearchCriteria = { query: '' };
|
|
1068
|
+
|
|
1069
|
+
switch (platform) {
|
|
1070
|
+
case 'instagram': {
|
|
1071
|
+
const tabId = await findOrOpenTab('*://*.instagram.com/*', 'https://www.instagram.com');
|
|
1072
|
+
return scrapeInstagramProfile(tabId, username, criteria);
|
|
1073
|
+
}
|
|
1074
|
+
case 'twitter': {
|
|
1075
|
+
const tabId = await findOrOpenTab('*://*.x.com/*', 'https://x.com');
|
|
1076
|
+
return scrapeTwitterProfile(tabId, username, criteria);
|
|
1077
|
+
}
|
|
1078
|
+
case 'tiktok': {
|
|
1079
|
+
const tabId = await findOrOpenTab('*://*.tiktok.com/*', 'https://www.tiktok.com');
|
|
1080
|
+
return scrapeTikTokProfile(tabId, username, criteria);
|
|
1081
|
+
}
|
|
1082
|
+
default:
|
|
1083
|
+
return null;
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
/**
|
|
1088
|
+
* Compare multiple influencers side by side.
|
|
1089
|
+
*/
|
|
1090
|
+
export async function compareInfluencers(
|
|
1091
|
+
influencers: { platform: 'instagram' | 'tiktok' | 'twitter'; username: string }[],
|
|
1092
|
+
): Promise<InfluencerProfile[]> {
|
|
1093
|
+
const profiles: InfluencerProfile[] = [];
|
|
1094
|
+
|
|
1095
|
+
for (const inf of influencers) {
|
|
1096
|
+
const profile = await getInfluencerProfile(inf.platform, inf.username);
|
|
1097
|
+
if (profile) {
|
|
1098
|
+
profiles.push(profile);
|
|
1099
|
+
}
|
|
1100
|
+
await sleep(2000);
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
return profiles;
|
|
1104
|
+
}
|