@hasna/experts 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +21 -13
  2. package/dist/cli/index.js +1842 -74
  3. package/dist/connectors.d.ts +63 -4
  4. package/dist/connectors.d.ts.map +1 -1
  5. package/dist/contacts.d.ts +96 -0
  6. package/dist/contacts.d.ts.map +1 -0
  7. package/dist/crawl.d.ts +1 -0
  8. package/dist/crawl.d.ts.map +1 -1
  9. package/dist/db.d.ts +97 -2
  10. package/dist/db.d.ts.map +1 -1
  11. package/dist/embed.d.ts +57 -0
  12. package/dist/embed.d.ts.map +1 -0
  13. package/dist/enrich.d.ts +81 -1
  14. package/dist/enrich.d.ts.map +1 -1
  15. package/dist/format.d.ts +4 -1
  16. package/dist/format.d.ts.map +1 -1
  17. package/dist/identity.d.ts +23 -0
  18. package/dist/identity.d.ts.map +1 -0
  19. package/dist/index.d.ts +7 -2
  20. package/dist/index.d.ts.map +1 -1
  21. package/dist/index.js +1544 -21
  22. package/dist/score.d.ts +25 -0
  23. package/dist/score.d.ts.map +1 -0
  24. package/dist/sdk.d.ts +26 -1
  25. package/dist/sdk.d.ts.map +1 -1
  26. package/dist/sdk.js +12 -1
  27. package/dist/server/index.d.ts.map +1 -1
  28. package/dist/server/index.js +960 -14
  29. package/dist/sources/adplist.d.ts +43 -0
  30. package/dist/sources/adplist.d.ts.map +1 -0
  31. package/dist/sources/clarity.d.ts +37 -0
  32. package/dist/sources/clarity.d.ts.map +1 -0
  33. package/dist/sources/common.d.ts +14 -0
  34. package/dist/sources/common.d.ts.map +1 -0
  35. package/dist/sources/glg.d.ts +36 -0
  36. package/dist/sources/glg.d.ts.map +1 -0
  37. package/dist/sources/index.d.ts +5 -1
  38. package/dist/sources/index.d.ts.map +1 -1
  39. package/dist/sources/mentorcruise.d.ts +47 -0
  40. package/dist/sources/mentorcruise.d.ts.map +1 -0
  41. package/dist/sync.d.ts +71 -0
  42. package/dist/sync.d.ts.map +1 -0
  43. package/dist/types.d.ts +34 -0
  44. package/dist/types.d.ts.map +1 -1
  45. package/package.json +1 -1
package/dist/cli/index.js CHANGED
@@ -32,6 +32,206 @@ function expertText(e) {
32
32
  return [e.title, e.headline, e.bio].filter(Boolean).join(". ");
33
33
  }
34
34
 
35
+ // src/score.ts
36
+ var DEFAULT_WEIGHTS = {
37
+ rating: 0.3,
38
+ reviews: 0.2,
39
+ followers: 0.25,
40
+ featured: 0.1,
41
+ verified: 0.05,
42
+ recency: 0.1
43
+ };
44
+ var clamp01 = (n) => Math.max(0, Math.min(1, n));
45
+ var logNorm = (x, cap) => clamp01(Math.log10(1 + Math.max(0, x)) / Math.log10(1 + cap));
46
+ function authorityScore(e, inputs = {}, weights = DEFAULT_WEIGHTS) {
47
+ const rating = clamp01((e.rating || 0) / 5);
48
+ const reviews = logNorm(e.ratingCount || 0, 1000);
49
+ const followers = logNorm(inputs.followers ?? 0, 1e6);
50
+ const featured = e.featured ? 1 : 0;
51
+ const verified = e.verified ? 1 : 0;
52
+ const recency = inputs.daysSinceLastTweet == null ? 0 : clamp01(1 - inputs.daysSinceLastTweet / 30);
53
+ const raw = weights.rating * rating + weights.reviews * reviews + weights.followers * followers + weights.featured * featured + weights.verified * verified + weights.recency * recency;
54
+ return Math.round(raw * 1000) / 10;
55
+ }
56
+
57
+ // src/embed.ts
58
+ var STOPWORDS = new Set([
59
+ "the",
60
+ "a",
61
+ "an",
62
+ "and",
63
+ "or",
64
+ "of",
65
+ "to",
66
+ "in",
67
+ "for",
68
+ "on",
69
+ "at",
70
+ "is",
71
+ "are",
72
+ "with",
73
+ "by",
74
+ "as",
75
+ "be",
76
+ "this",
77
+ "that",
78
+ "it",
79
+ "from",
80
+ "i",
81
+ "you",
82
+ "we",
83
+ "they"
84
+ ]);
85
+ function tokenize(text) {
86
+ return (text || "").toLowerCase().replace(/https?:\/\/\S+/g, " ").split(/[^a-z0-9]+/).filter((t) => t.length >= 2 && !STOPWORDS.has(t));
87
+ }
88
+ function fnv1a(s) {
89
+ let h = 2166136261;
90
+ for (let i = 0;i < s.length; i++) {
91
+ h ^= s.charCodeAt(i);
92
+ h = h + ((h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24)) >>> 0;
93
+ }
94
+ return h >>> 0;
95
+ }
96
+
97
+ class HashingEmbedder {
98
+ id = "hash-v1";
99
+ dim;
100
+ constructor(dim = 512) {
101
+ this.dim = dim;
102
+ }
103
+ one(text) {
104
+ const v = new Array(this.dim).fill(0);
105
+ const toks = tokenize(text);
106
+ for (let i = 0;i < toks.length; i++) {
107
+ const uni = toks[i];
108
+ v[fnv1a(uni) % this.dim] += 1;
109
+ if (i + 1 < toks.length) {
110
+ const bi = uni + "_" + toks[i + 1];
111
+ v[fnv1a(bi) % this.dim] += 0.5;
112
+ }
113
+ }
114
+ const norm = Math.sqrt(v.reduce((s, x) => s + x * x, 0)) || 1;
115
+ return v.map((x) => x / norm);
116
+ }
117
+ async embed(texts) {
118
+ return texts.map((t) => this.one(t));
119
+ }
120
+ }
121
+
122
+ class OpenAIEmbedder {
123
+ id;
124
+ dim = 1536;
125
+ apiKey;
126
+ model;
127
+ fetchFn;
128
+ constructor(opts = {}) {
129
+ this.apiKey = opts.apiKey ?? process.env.OPENAI_API_KEY ?? "";
130
+ this.model = opts.model ?? "text-embedding-3-small";
131
+ this.fetchFn = opts.fetchFn ?? fetch;
132
+ this.id = `openai:${this.model}`;
133
+ }
134
+ async embed(texts) {
135
+ const res = await this.fetchFn("https://api.openai.com/v1/embeddings", {
136
+ method: "POST",
137
+ headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}` },
138
+ body: JSON.stringify({ model: this.model, input: texts })
139
+ });
140
+ if (!res.ok)
141
+ throw new Error(`OpenAI embeddings ${res.status}: ${(await res.text()).slice(0, 200)}`);
142
+ const data = await res.json();
143
+ return data.data.map((d) => d.embedding);
144
+ }
145
+ }
146
+ function getEmbedder() {
147
+ if (process.env.EXPERTS_EMBEDDER === "openai" && process.env.OPENAI_API_KEY) {
148
+ return new OpenAIEmbedder;
149
+ }
150
+ return new HashingEmbedder;
151
+ }
152
+ function cosine(a, b) {
153
+ let dot = 0;
154
+ let na = 0;
155
+ let nb = 0;
156
+ const n = Math.min(a.length, b.length);
157
+ for (let i = 0;i < n; i++) {
158
+ dot += a[i] * b[i];
159
+ na += a[i] * a[i];
160
+ nb += b[i] * b[i];
161
+ }
162
+ const d = Math.sqrt(na) * Math.sqrt(nb);
163
+ return d === 0 ? 0 : dot / d;
164
+ }
165
+ function packVector(v) {
166
+ const f = new Float32Array(v);
167
+ return new Uint8Array(f.buffer);
168
+ }
169
+ function unpackVector(buf) {
170
+ const ab = buf instanceof Uint8Array ? buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) : buf;
171
+ return Array.from(new Float32Array(ab));
172
+ }
173
+ function expertEmbedText(e) {
174
+ return [e.fullName, e.title, e.headline, e.bio, e.topics.join(" "), e.tags.join(" ")].filter(Boolean).join(". ");
175
+ }
176
+
177
+ // src/identity.ts
178
+ function socialHandles(e) {
179
+ const out = new Set;
180
+ for (const [platform, url] of Object.entries(e.socials || {})) {
181
+ if (!url)
182
+ continue;
183
+ const m = String(url).match(/(?:[a-z]+\.[a-z]+\/@?)([A-Za-z0-9_.-]+)/i);
184
+ const handle = (m ? m[1] : String(url)).toLowerCase().replace(/\/+$/, "");
185
+ if (handle)
186
+ out.add(`${platform}:${handle}`);
187
+ }
188
+ return out;
189
+ }
190
+ var key = (e) => `${e.source}:${e.sourceId}`;
191
+ function clusterPersons(experts) {
192
+ const parent = new Map;
193
+ const find = (x) => {
194
+ let r = x;
195
+ while (parent.get(r) !== r)
196
+ r = parent.get(r);
197
+ let c = x;
198
+ while (parent.get(c) !== r) {
199
+ const n = parent.get(c);
200
+ parent.set(c, r);
201
+ c = n;
202
+ }
203
+ return r;
204
+ };
205
+ const union = (a, b) => {
206
+ const ra = find(a);
207
+ const rb = find(b);
208
+ if (ra === rb)
209
+ return;
210
+ if (ra < rb)
211
+ parent.set(rb, ra);
212
+ else
213
+ parent.set(ra, rb);
214
+ };
215
+ for (const e of experts)
216
+ parent.set(key(e), key(e));
217
+ const byHandle = new Map;
218
+ for (const e of experts) {
219
+ for (const h of socialHandles(e)) {
220
+ if (!byHandle.has(h))
221
+ byHandle.set(h, []);
222
+ byHandle.get(h).push(key(e));
223
+ }
224
+ }
225
+ for (const members of byHandle.values()) {
226
+ for (let i = 1;i < members.length; i++)
227
+ union(members[0], members[i]);
228
+ }
229
+ const out = new Map;
230
+ for (const e of experts)
231
+ out.set(key(e), find(key(e)));
232
+ return out;
233
+ }
234
+
35
235
  // src/db.ts
36
236
  function defaultDbPath() {
37
237
  return process.env.OPEN_EXPERTS_DB || join(homedir(), ".hasna", "experts", "experts.db");
@@ -115,8 +315,59 @@ class ExpertsDB {
115
315
  PRIMARY KEY (source, tweet_id)
116
316
  );
117
317
  CREATE INDEX IF NOT EXISTS idx_tweets_expert ON tweets(source, source_id, created_at DESC);
318
+
319
+ -- Enrichment: recent YouTube videos per expert.
320
+ CREATE TABLE IF NOT EXISTS videos (
321
+ source TEXT NOT NULL, source_id TEXT NOT NULL,
322
+ video_id TEXT NOT NULL, title TEXT, description TEXT,
323
+ published_at TEXT, url TEXT, thumbnail TEXT, view_count INTEGER,
324
+ PRIMARY KEY (source, video_id)
325
+ );
326
+ CREATE INDEX IF NOT EXISTS idx_videos_expert ON videos(source, source_id, published_at DESC);
327
+
328
+ -- Generic external enrichment (linkedin, site/newsletter, \u2026) as JSON.
329
+ CREATE TABLE IF NOT EXISTS ext_profiles (
330
+ source TEXT NOT NULL, source_id TEXT NOT NULL, kind TEXT NOT NULL,
331
+ data TEXT, enriched_at TEXT,
332
+ PRIMARY KEY (source, source_id, kind)
333
+ );
334
+
335
+ -- Enrichment: discovered contact methods (multiple email/phone per expert).
336
+ CREATE TABLE IF NOT EXISTS contacts (
337
+ source TEXT NOT NULL, source_id TEXT NOT NULL,
338
+ type TEXT NOT NULL, value TEXT NOT NULL,
339
+ label TEXT, provider TEXT, confidence REAL,
340
+ status TEXT DEFAULT 'unverified', verified_at TEXT, created_at TEXT,
341
+ PRIMARY KEY (source, source_id, type, value)
342
+ );
343
+ CREATE INDEX IF NOT EXISTS idx_contacts_expert ON contacts(source, source_id);
344
+ CREATE INDEX IF NOT EXISTS idx_contacts_status ON contacts(status);
345
+
346
+ -- Semantic search: one embedding vector per expert.
347
+ CREATE TABLE IF NOT EXISTS vectors (
348
+ source TEXT NOT NULL, source_id TEXT NOT NULL,
349
+ embedder TEXT NOT NULL, dim INTEGER, vec BLOB,
350
+ PRIMARY KEY (source, source_id)
351
+ );
352
+
353
+ -- Identity resolution: maps each expert record to a canonical person.
354
+ CREATE TABLE IF NOT EXISTS persons (
355
+ source TEXT NOT NULL, source_id TEXT NOT NULL, person_id TEXT NOT NULL,
356
+ PRIMARY KEY (source, source_id)
357
+ );
358
+ CREATE INDEX IF NOT EXISTS idx_persons_person ON persons(person_id);
359
+
360
+ -- Change detection: a log of what changed between crawls.
361
+ CREATE TABLE IF NOT EXISTS changes (
362
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
363
+ source TEXT NOT NULL, source_id TEXT NOT NULL,
364
+ kind TEXT NOT NULL, field TEXT, old_value TEXT, new_value TEXT,
365
+ detected_at TEXT
366
+ );
367
+ CREATE INDEX IF NOT EXISTS idx_changes_time ON changes(detected_at DESC);
118
368
  `);
119
369
  this.addColumnIfMissing("experts", "avatar_local", "TEXT");
370
+ this.addColumnIfMissing("experts", "authority", "REAL DEFAULT 0");
120
371
  }
121
372
  addColumnIfMissing(table, column, type) {
122
373
  const cols = this.db.query(`PRAGMA table_info(${table})`).all();
@@ -202,6 +453,7 @@ class ExpertsDB {
202
453
  socials: JSON.parse(r.socials || "{}"),
203
454
  extra: JSON.parse(r.extra || "{}"),
204
455
  avatarLocal: r.avatar_local || undefined,
456
+ authority: r.authority ?? 0,
205
457
  crawledAt: r.crawled_at
206
458
  };
207
459
  }
@@ -249,7 +501,7 @@ class ExpertsDB {
249
501
  where.push("rating >= ?");
250
502
  params.push(filters.minRating);
251
503
  }
252
- const sortCol = filters.sort === "price" ? "price" : filters.sort === "name" ? "full_name" : filters.sort === "reviews" ? "rating_count" : "rating";
504
+ const sortCol = filters.sort === "price" ? "price" : filters.sort === "name" ? "full_name" : filters.sort === "reviews" ? "rating_count" : filters.sort === "authority" ? "authority" : "rating";
253
505
  const defaultAsc = filters.sort === "name";
254
506
  const dir = filters.desc ?? !defaultAsc ? "DESC" : "ASC";
255
507
  let sql = "SELECT * FROM experts";
@@ -325,11 +577,11 @@ class ExpertsDB {
325
577
  sql += " ORDER BY name";
326
578
  return this.db.query(sql).all(...params);
327
579
  }
328
- setMeta(key, value) {
329
- this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(key, value);
580
+ setMeta(key2, value) {
581
+ this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(key2, value);
330
582
  }
331
- getMeta(key) {
332
- const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(key);
583
+ getMeta(key2) {
584
+ const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(key2);
333
585
  return row ? row.value : null;
334
586
  }
335
587
  stats(source) {
@@ -353,12 +605,12 @@ class ExpertsDB {
353
605
  const nodeIds = new Map;
354
606
  const insertNode = this.db.query("INSERT INTO kg_nodes (type, key, label) VALUES (?, ?, ?) ON CONFLICT(type, key) DO UPDATE SET label=excluded.label RETURNING id");
355
607
  const insertEdge = this.db.query("INSERT OR REPLACE INTO kg_edges (src, dst, rel, weight) VALUES (?, ?, ?, ?)");
356
- const node = (type, key, label) => {
357
- const ck = `${type}\x00${key.toLowerCase()}`;
608
+ const node = (type, key2, label) => {
609
+ const ck = `${type}\x00${key2.toLowerCase()}`;
358
610
  const cached = nodeIds.get(ck);
359
611
  if (cached != null)
360
612
  return cached;
361
- const id = insertNode.get(type, key.toLowerCase(), label).id;
613
+ const id = insertNode.get(type, key2.toLowerCase(), label).id;
362
614
  nodeIds.set(ck, id);
363
615
  return id;
364
616
  };
@@ -368,7 +620,8 @@ class ExpertsDB {
368
620
  for (const topic of e.topics) {
369
621
  insertEdge.run(eId, node("topic", topic, topic), "IN_TOPIC", 1);
370
622
  }
371
- const tags = inferTags(expertText(e), vocabulary);
623
+ const tweetText = this.recentTweets(e.source, e.sourceId, 30).map((t) => t.text).join(". ");
624
+ const tags = inferTags(expertText(e) + ". " + tweetText, vocabulary);
372
625
  for (const tag of tags) {
373
626
  insertEdge.run(eId, node("tag", tag, tag), "HAS_TAG", 1);
374
627
  }
@@ -381,11 +634,51 @@ class ExpertsDB {
381
634
  this.setMeta("graph_built", new Date().toISOString());
382
635
  return { nodes, edges };
383
636
  }
384
- expertFromNodeKey(key) {
385
- const idx = key.indexOf(":");
637
+ rescore(source) {
638
+ const experts = this.list({ source });
639
+ const upd = this.db.query("UPDATE experts SET authority = ? WHERE source = ? AND source_id = ?");
640
+ const followerStmt = this.db.query("SELECT followers FROM x_profiles WHERE source = ? AND source_id = ?");
641
+ const lastTweetStmt = this.db.query("SELECT MAX(created_at) AS t FROM tweets WHERE source = ? AND source_id = ?");
642
+ const tx = this.db.transaction((rows) => {
643
+ for (const e of rows) {
644
+ const fr = followerStmt.get(e.source, e.sourceId);
645
+ const lt = lastTweetStmt.get(e.source, e.sourceId);
646
+ let daysSince;
647
+ if (lt?.t) {
648
+ const ms = Date.now() - Date.parse(lt.t);
649
+ if (!Number.isNaN(ms))
650
+ daysSince = ms / 86400000;
651
+ }
652
+ const score = authorityScore(e, { followers: fr?.followers ?? 0, daysSinceLastTweet: daysSince });
653
+ upd.run(score, e.source, e.sourceId);
654
+ }
655
+ });
656
+ tx(experts);
657
+ this.setMeta("rescored_at", new Date().toISOString());
658
+ return experts.length;
659
+ }
660
+ stalest(opts = {}) {
661
+ const where = opts.source ? "WHERE e.source = ?" : "";
662
+ const params = opts.source ? [opts.source] : [];
663
+ const sql = `
664
+ SELECT e.*, COALESCE(
665
+ (SELECT MAX(enriched_at) FROM x_profiles xp WHERE xp.source=e.source AND xp.source_id=e.source_id),
666
+ e.crawled_at
667
+ ) AS last_seen
668
+ FROM experts e ${where}
669
+ ORDER BY last_seen ASC
670
+ LIMIT ?`;
671
+ params.push(opts.limit ?? 25);
672
+ return this.db.query(sql).all(...params).map((r) => ({
673
+ expert: this.rowToExpert(r),
674
+ lastSeen: r.last_seen || ""
675
+ }));
676
+ }
677
+ expertFromNodeKey(key2) {
678
+ const idx = key2.indexOf(":");
386
679
  if (idx < 0)
387
680
  return null;
388
- return this.get(key.slice(idx + 1), key.slice(0, idx));
681
+ return this.get(key2.slice(idx + 1), key2.slice(0, idx));
389
682
  }
390
683
  findByNeeds(needs, opts = {}) {
391
684
  const cleaned = needs.map((n) => n.trim().toLowerCase()).filter(Boolean);
@@ -574,6 +867,270 @@ class ExpertsDB {
574
867
  const avatars = this.db.query(`SELECT COUNT(*) AS n FROM experts WHERE avatar_local IS NOT NULL${source ? " AND source = ?" : ""}`).get(...args).n;
575
868
  return { withHandle, enriched, tweets, avatars };
576
869
  }
870
+ recordChanges(source, incoming) {
871
+ const existing = new Map(this.list({ source }).map((e) => [e.sourceId, e]));
872
+ const now = new Date().toISOString();
873
+ const watched = ["price", "title", "headline", "bio", "slug"];
874
+ const stmt = this.db.query("INSERT INTO changes (source, source_id, kind, field, old_value, new_value, detected_at) VALUES (?, ?, ?, ?, ?, ?, ?)");
875
+ let count = 0;
876
+ const tx = this.db.transaction((rows) => {
877
+ for (const e of rows) {
878
+ const prev = existing.get(e.sourceId);
879
+ if (!prev) {
880
+ stmt.run(source, e.sourceId, "added", null, null, e.fullName || e.slug, now);
881
+ count++;
882
+ continue;
883
+ }
884
+ for (const f of watched) {
885
+ const a = String(prev[f] ?? "");
886
+ const b = String(e[f] ?? "");
887
+ if (a !== b) {
888
+ stmt.run(source, e.sourceId, "updated", f, a, b, now);
889
+ count++;
890
+ }
891
+ }
892
+ }
893
+ });
894
+ tx(incoming);
895
+ return count;
896
+ }
897
+ changes(opts = {}) {
898
+ const where = opts.source ? "WHERE source = ?" : "";
899
+ const params = opts.source ? [opts.source] : [];
900
+ params.push(opts.limit ?? 50);
901
+ return this.db.query(`SELECT * FROM changes ${where} ORDER BY detected_at DESC, id DESC LIMIT ?`).all(...params);
902
+ }
903
+ rebuildPersons() {
904
+ const experts = this.list();
905
+ const mapping = clusterPersons(experts);
906
+ const tx = this.db.transaction(() => {
907
+ this.db.exec("DELETE FROM persons");
908
+ const stmt = this.db.query("INSERT OR REPLACE INTO persons (source, source_id, person_id) VALUES (?, ?, ?)");
909
+ for (const [k, pid] of mapping) {
910
+ const idx = k.indexOf(":");
911
+ stmt.run(k.slice(0, idx), k.slice(idx + 1), pid);
912
+ }
913
+ });
914
+ tx();
915
+ const persons = new Set(mapping.values()).size;
916
+ this.setMeta("persons_built", new Date().toISOString());
917
+ return { experts: experts.length, persons };
918
+ }
919
+ personIdOf(source, sourceId) {
920
+ const r = this.db.query("SELECT person_id FROM persons WHERE source = ? AND source_id = ?").get(source, sourceId);
921
+ return r ? r.person_id : `${source}:${sourceId}`;
922
+ }
923
+ expertsForPerson(personId) {
924
+ const rows = this.db.query("SELECT e.* FROM persons p JOIN experts e ON e.source=p.source AND e.source_id=p.source_id WHERE p.person_id = ?").all(personId);
925
+ return rows.map((r) => this.rowToExpert(r));
926
+ }
927
+ personStats() {
928
+ const experts = this.count();
929
+ const row = this.db.query("SELECT COUNT(DISTINCT person_id) n FROM persons").get();
930
+ const persons = row?.n ?? 0;
931
+ const dupes = this.db.query("SELECT person_id, COUNT(*) c FROM persons GROUP BY person_id HAVING c > 1 ORDER BY c DESC").all();
932
+ return { experts, persons: persons || experts, duplicated: dupes.length };
933
+ }
934
+ async buildEmbeddings(embedder, opts = {}) {
935
+ const log = opts.onLog ?? (() => {});
936
+ const experts = this.list({ source: opts.source });
937
+ const batch = opts.batch ?? 64;
938
+ const stmt = this.db.query("INSERT OR REPLACE INTO vectors (source, source_id, embedder, dim, vec) VALUES (?, ?, ?, ?, ?)");
939
+ let done = 0;
940
+ for (let i = 0;i < experts.length; i += batch) {
941
+ const slice = experts.slice(i, i + batch);
942
+ const vecs = await embedder.embed(slice.map((e) => expertEmbedText(e)));
943
+ const tx = this.db.transaction(() => {
944
+ slice.forEach((e, j) => stmt.run(e.source, e.sourceId, embedder.id, embedder.dim, packVector(vecs[j])));
945
+ });
946
+ tx();
947
+ done += slice.length;
948
+ log(` embedded ${done}/${experts.length}`);
949
+ }
950
+ this.setMeta("embedder", embedder.id);
951
+ this.setMeta("embedded_at", new Date().toISOString());
952
+ return done;
953
+ }
954
+ vectorCount() {
955
+ return this.db.query("SELECT COUNT(*) n FROM vectors").get().n;
956
+ }
957
+ semanticSearch(queryVec, opts = {}) {
958
+ const where = opts.source ? "WHERE v.source = ?" : "";
959
+ const params = opts.source ? [opts.source] : [];
960
+ const rows = this.db.query(`SELECT e.*, v.vec AS _vec FROM vectors v JOIN experts e ON e.source=v.source AND e.source_id=v.source_id ${where}`).all(...params);
961
+ const scored = rows.map((r) => ({ expert: this.rowToExpert(r), score: cosine(queryVec, unpackVector(r._vec)) }));
962
+ scored.sort((a, b) => b.score - a.score);
963
+ return scored.slice(0, opts.limit ?? 25);
964
+ }
965
+ upsertExt(source, sourceId, kind, data) {
966
+ this.db.query(`
967
+ INSERT INTO ext_profiles (source, source_id, kind, data, enriched_at)
968
+ VALUES (?, ?, ?, ?, ?)
969
+ ON CONFLICT(source, source_id, kind) DO UPDATE SET data=excluded.data, enriched_at=excluded.enriched_at
970
+ `).run(source, sourceId, kind, JSON.stringify(data), new Date().toISOString());
971
+ }
972
+ getExt(source, sourceId, kind) {
973
+ const r = this.db.query("SELECT data FROM ext_profiles WHERE source=? AND source_id=? AND kind=?").get(source, sourceId, kind);
974
+ return r ? JSON.parse(r.data || "{}") : null;
975
+ }
976
+ allExt(source, sourceId) {
977
+ const rows = this.db.query("SELECT kind, data FROM ext_profiles WHERE source=? AND source_id=?").all(source, sourceId);
978
+ return Object.fromEntries(rows.map((r) => [r.kind, JSON.parse(r.data || "{}")]));
979
+ }
980
+ expertsNeedingExt(platform, kind, opts = {}) {
981
+ const where = [`json_extract(socials, '$.${platform}') IS NOT NULL`];
982
+ const bind = [];
983
+ if (opts.source) {
984
+ where.push("source = ?");
985
+ bind.push(opts.source);
986
+ }
987
+ if (!opts.refresh) {
988
+ where.push("NOT EXISTS (SELECT 1 FROM ext_profiles x WHERE x.source=experts.source AND x.source_id=experts.source_id AND x.kind=?)");
989
+ bind.push(kind);
990
+ }
991
+ let sql = "SELECT * FROM experts WHERE " + where.join(" AND ") + " ORDER BY rating_count DESC";
992
+ if (opts.limit) {
993
+ sql += " LIMIT ?";
994
+ bind.push(opts.limit);
995
+ }
996
+ return this.db.query(sql).all(...bind).map((r) => this.rowToExpert(r));
997
+ }
998
+ replaceVideos(source, sourceId, videos) {
999
+ const tx = this.db.transaction((rows) => {
1000
+ this.db.query("DELETE FROM videos WHERE source = ? AND source_id = ?").run(source, sourceId);
1001
+ const stmt = this.db.query("INSERT OR REPLACE INTO videos (source, source_id, video_id, title, description, published_at, url, thumbnail, view_count) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)");
1002
+ for (const v of rows) {
1003
+ stmt.run(v.source, v.sourceId, v.videoId, v.title, v.description, v.publishedAt, v.url, v.thumbnail, v.viewCount);
1004
+ }
1005
+ });
1006
+ tx(videos);
1007
+ }
1008
+ recentVideos(source, sourceId, limit = 10) {
1009
+ const rows = this.db.query("SELECT * FROM videos WHERE source = ? AND source_id = ? ORDER BY published_at DESC LIMIT ?").all(source, sourceId, limit);
1010
+ return rows.map((r) => ({
1011
+ source: r.source,
1012
+ sourceId: r.source_id,
1013
+ videoId: r.video_id,
1014
+ title: r.title || "",
1015
+ description: r.description || "",
1016
+ publishedAt: r.published_at || "",
1017
+ url: r.url || "",
1018
+ thumbnail: r.thumbnail || "",
1019
+ viewCount: r.view_count ?? 0
1020
+ }));
1021
+ }
1022
+ expertsNeedingVideos(opts = {}) {
1023
+ const where = ["json_extract(socials, '$.youtube') IS NOT NULL"];
1024
+ const params = [];
1025
+ if (opts.source) {
1026
+ where.push("source = ?");
1027
+ params.push(opts.source);
1028
+ }
1029
+ if (!opts.refresh) {
1030
+ where.push("NOT EXISTS (SELECT 1 FROM videos v WHERE v.source=experts.source AND v.source_id=experts.source_id)");
1031
+ }
1032
+ let sql = "SELECT * FROM experts WHERE " + where.join(" AND ") + " ORDER BY rating_count DESC";
1033
+ if (opts.limit) {
1034
+ sql += " LIMIT ?";
1035
+ params.push(opts.limit);
1036
+ }
1037
+ return this.db.query(sql).all(...params).map((r) => this.rowToExpert(r));
1038
+ }
1039
+ upsertContact(c) {
1040
+ this.db.query(`
1041
+ INSERT INTO contacts (source, source_id, type, value, label, provider, confidence, status, verified_at, created_at)
1042
+ VALUES ($source, $source_id, $type, $value, $label, $provider, $confidence, $status, $verified_at, $created_at)
1043
+ ON CONFLICT(source, source_id, type, value) DO UPDATE SET
1044
+ label=excluded.label, provider=excluded.provider, confidence=excluded.confidence,
1045
+ status=CASE WHEN excluded.status != 'unverified' THEN excluded.status ELSE contacts.status END,
1046
+ verified_at=COALESCE(excluded.verified_at, contacts.verified_at)
1047
+ `).run({
1048
+ $source: c.source,
1049
+ $source_id: c.sourceId,
1050
+ $type: c.type,
1051
+ $value: c.value,
1052
+ $label: c.label,
1053
+ $provider: c.provider,
1054
+ $confidence: c.confidence,
1055
+ $status: c.status,
1056
+ $verified_at: c.verifiedAt || null,
1057
+ $created_at: c.createdAt || new Date().toISOString()
1058
+ });
1059
+ }
1060
+ setContactStatus(source, sourceId, type, value, status) {
1061
+ this.db.query("UPDATE contacts SET status = ?, verified_at = ? WHERE source = ? AND source_id = ? AND type = ? AND value = ?").run(status, new Date().toISOString(), source, sourceId, type, value);
1062
+ }
1063
+ contacts(source, sourceId) {
1064
+ const rows = this.db.query("SELECT * FROM contacts WHERE source = ? AND source_id = ? ORDER BY type, confidence DESC").all(source, sourceId);
1065
+ return rows.map((r) => ({
1066
+ source: r.source,
1067
+ sourceId: r.source_id,
1068
+ type: r.type,
1069
+ value: r.value,
1070
+ label: r.label || "",
1071
+ provider: r.provider || "",
1072
+ confidence: r.confidence ?? 0,
1073
+ status: r.status || "unverified",
1074
+ verifiedAt: r.verified_at || "",
1075
+ createdAt: r.created_at || ""
1076
+ }));
1077
+ }
1078
+ contactsToVerify(opts = {}) {
1079
+ const where = ["status = 'unverified'"];
1080
+ const params = [];
1081
+ if (opts.source) {
1082
+ where.push("source = ?");
1083
+ params.push(opts.source);
1084
+ }
1085
+ let sql = "SELECT * FROM contacts WHERE " + where.join(" AND ") + " ORDER BY confidence DESC";
1086
+ if (opts.limit) {
1087
+ sql += " LIMIT ?";
1088
+ params.push(opts.limit);
1089
+ }
1090
+ return this.db.query(sql).all(...params).map((r) => ({
1091
+ source: r.source,
1092
+ sourceId: r.source_id,
1093
+ type: r.type,
1094
+ value: r.value,
1095
+ label: r.label || "",
1096
+ provider: r.provider || "",
1097
+ confidence: r.confidence ?? 0,
1098
+ status: r.status || "unverified",
1099
+ verifiedAt: r.verified_at || "",
1100
+ createdAt: r.created_at || ""
1101
+ }));
1102
+ }
1103
+ expertsNeedingContacts(opts = {}) {
1104
+ const where = [];
1105
+ const params = [];
1106
+ if (opts.source) {
1107
+ where.push("source = ?");
1108
+ params.push(opts.source);
1109
+ }
1110
+ if (!opts.refresh) {
1111
+ where.push("NOT EXISTS (SELECT 1 FROM contacts c WHERE c.source = experts.source AND c.source_id = experts.source_id)");
1112
+ }
1113
+ let sql = "SELECT * FROM experts";
1114
+ if (where.length)
1115
+ sql += " WHERE " + where.join(" AND ");
1116
+ sql += " ORDER BY rating_count DESC";
1117
+ if (opts.limit) {
1118
+ sql += " LIMIT ?";
1119
+ params.push(opts.limit);
1120
+ }
1121
+ return this.db.query(sql).all(...params).map((r) => this.rowToExpert(r));
1122
+ }
1123
+ contactStats(source) {
1124
+ const filt = source ? " WHERE source = ?" : "";
1125
+ const args = source ? [source] : [];
1126
+ const total = this.db.query(`SELECT COUNT(*) n FROM contacts${filt}`).get(...args).n;
1127
+ const valid = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE status='valid'${source ? " AND source = ?" : ""}`).get(...args).n;
1128
+ const invalid = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE status='invalid'${source ? " AND source = ?" : ""}`).get(...args).n;
1129
+ const emails = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE type='email'${source ? " AND source = ?" : ""}`).get(...args).n;
1130
+ const phones = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE type='phone'${source ? " AND source = ?" : ""}`).get(...args).n;
1131
+ const expertsWith = this.db.query(`SELECT COUNT(DISTINCT source||source_id) n FROM contacts${filt}`).get(...args).n;
1132
+ return { total, valid, invalid, emails, phones, expertsWith };
1133
+ }
577
1134
  close() {
578
1135
  this.db.close();
579
1136
  }
@@ -792,62 +1349,393 @@ class IntroSource {
792
1349
  }
793
1350
  }
794
1351
 
795
- // src/sources/index.ts
796
- var registry = new Map;
797
- function registerSource(source) {
798
- registry.set(source.name, source);
799
- }
800
- function getSource(name) {
801
- return registry.get(name);
1352
+ // src/sources/common.ts
1353
+ function makeExpert(p) {
1354
+ const first = p.firstName ?? "";
1355
+ const last = p.lastName ?? "";
1356
+ return {
1357
+ source: p.source,
1358
+ sourceId: p.sourceId,
1359
+ slug: p.slug ?? "",
1360
+ url: p.url ?? "",
1361
+ fullName: p.fullName ?? [first, last].filter(Boolean).join(" "),
1362
+ firstName: first,
1363
+ lastName: last,
1364
+ title: p.title ?? "",
1365
+ headline: p.headline ?? "",
1366
+ bio: p.bio ?? "",
1367
+ avatar: p.avatar ?? "",
1368
+ price: p.price ?? 0,
1369
+ priceCurrency: p.priceCurrency ?? "USD",
1370
+ priceUnit: p.priceUnit ?? "",
1371
+ rating: p.rating ?? 0,
1372
+ ratingCount: p.ratingCount ?? 0,
1373
+ verified: p.verified ?? false,
1374
+ featured: p.featured ?? false,
1375
+ topics: p.topics ? [...new Set(p.topics)].sort() : [],
1376
+ tags: p.tags ?? [],
1377
+ socials: p.socials ?? {},
1378
+ extra: p.extra ?? {},
1379
+ crawledAt: p.crawledAt ?? new Date().toISOString()
1380
+ };
802
1381
  }
803
- function listSources() {
804
- return [...registry.values()];
1382
+ function slugify(s) {
1383
+ return (s || "").toLowerCase().normalize("NFKD").replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
805
1384
  }
806
- registerSource(new IntroSource);
807
-
808
- // src/crawl.ts
809
- async function crawlSource(db, sourceName, opts = {}) {
810
- const source = getSource(sourceName);
811
- if (!source) {
812
- throw new Error(`Unknown source "${sourceName}". Run \`experts sources\` to list options.`);
1385
+ async function fetchJson(url, fetchFn, init = {}) {
1386
+ try {
1387
+ const res = await fetchFn(url, {
1388
+ ...init,
1389
+ headers: {
1390
+ "User-Agent": "open-experts (+https://github.com/hasna/experts)",
1391
+ Accept: "application/json",
1392
+ ...init.headers || {}
1393
+ }
1394
+ });
1395
+ if (!res.ok)
1396
+ return null;
1397
+ return await res.json();
1398
+ } catch {
1399
+ return null;
813
1400
  }
814
- const data = await source.crawl(opts);
815
- db.upsertExperts(data.experts);
816
- if (data.topics.length)
817
- db.setTopics(source.name, data.topics);
818
- db.setTags(source.name, data.tags);
819
- db.setMeta(`last_crawl:${source.name}`, new Date().toISOString());
820
- db.setMeta(`catalog_total:${source.name}`, String(data.total));
821
- opts.onLog?.("building knowledge graph\u2026");
822
- const graph = db.rebuildGraph();
823
- return {
824
- source: source.name,
825
- experts: data.experts.length,
826
- topics: data.topics.map((t) => ({ name: t.name, count: t.expertCount })),
827
- tags: new Set(data.tags.map((t) => t.name)).size,
828
- total: data.total,
829
- graph
830
- };
831
1401
  }
832
1402
 
833
- // src/format.ts
834
- import chalk from "chalk";
835
- function money(amount, currency = "USD") {
836
- const symbol = currency === "USD" ? "$" : `${currency} `;
837
- return `${symbol}${amount.toLocaleString("en-US")}`;
1403
+ // src/sources/mentorcruise.ts
1404
+ function normalizeMentor(m, crawledAt) {
1405
+ const slug = m.slug || slugify(m.name || String(m.id ?? ""));
1406
+ const socials = {};
1407
+ if (m.twitter)
1408
+ socials.twitter = m.twitter.startsWith("http") ? m.twitter : `https://x.com/${m.twitter}`;
1409
+ if (m.linkedin)
1410
+ socials.linkedin = m.linkedin;
1411
+ return makeExpert({
1412
+ source: "mentorcruise",
1413
+ sourceId: String(m.id ?? slug),
1414
+ slug,
1415
+ url: `https://mentorcruise.com/mentor/${slug}/`,
1416
+ fullName: m.name ?? [m.first_name, m.last_name].filter(Boolean).join(" "),
1417
+ firstName: m.first_name ?? "",
1418
+ lastName: m.last_name ?? "",
1419
+ title: m.job_title ?? "",
1420
+ bio: m.bio ?? "",
1421
+ avatar: m.avatar ?? m.photo ?? "",
1422
+ price: m.price ?? 0,
1423
+ priceCurrency: m.currency ?? "USD",
1424
+ priceUnit: m.price ? "per month" : "",
1425
+ rating: m.rating ?? 0,
1426
+ ratingCount: m.reviews_count ?? 0,
1427
+ verified: Boolean(m.verified),
1428
+ featured: Boolean(m.is_top_mentor),
1429
+ topics: m.categories ?? [],
1430
+ tags: m.skills ?? [],
1431
+ socials,
1432
+ crawledAt
1433
+ });
838
1434
  }
839
- function stars(rating, count) {
840
- const full = Math.round(rating);
841
- const bar = "\u2605".repeat(full) + "\u2606".repeat(Math.max(0, 5 - full));
842
- const label = rating ? rating.toFixed(2) : "\u2014";
843
- return `${chalk.yellow(bar)} ${label} ${chalk.dim(`(${count})`)}`;
1435
+
1436
+ class MentorCruiseSource {
1437
+ name = "mentorcruise";
1438
+ description = "MentorCruise \u2014 long-term mentorship from vetted mentors";
1439
+ website = "https://mentorcruise.com";
1440
+ fetchFn;
1441
+ apiBase;
1442
+ pageSize;
1443
+ constructor(opts = {}) {
1444
+ this.fetchFn = opts.fetchFn ?? fetch;
1445
+ this.apiBase = opts.apiBase ?? process.env.MENTORCRUISE_API_BASE ?? "https://mentorcruise.com/api";
1446
+ this.pageSize = opts.pageSize ?? 50;
1447
+ }
1448
+ async crawl(opts = {}) {
1449
+ const log = opts.onLog ?? (() => {});
1450
+ const crawledAt = new Date().toISOString();
1451
+ const experts = [];
1452
+ const tags = new Set;
1453
+ let offset = 0;
1454
+ for (;; ) {
1455
+ const data = await fetchJson(`${this.apiBase}/mentors/?limit=${this.pageSize}&offset=${offset}`, this.fetchFn);
1456
+ const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
1457
+ if (!items.length)
1458
+ break;
1459
+ for (const m of items) {
1460
+ const e = normalizeMentor(m, crawledAt);
1461
+ experts.push(e);
1462
+ for (const t of e.tags)
1463
+ tags.add(t);
1464
+ }
1465
+ offset += items.length;
1466
+ log(` mentorcruise: ${experts.length}`);
1467
+ if (opts.max && experts.length >= opts.max)
1468
+ break;
1469
+ if (items.length < this.pageSize)
1470
+ break;
1471
+ }
1472
+ if (experts.length === 0) {
1473
+ log("mentorcruise: no public listing reachable (set MENTORCRUISE_API_BASE or provide a fetchFn).");
1474
+ }
1475
+ const topics = [];
1476
+ return {
1477
+ experts: opts.max ? experts.slice(0, opts.max) : experts,
1478
+ topics,
1479
+ tags: [...tags].map((name) => ({ name, topic: "" })),
1480
+ total: experts.length
1481
+ };
1482
+ }
844
1483
  }
845
- function truncate(s, n) {
846
- const clean = (s || "").replace(/\s+/g, " ").trim();
847
- return clean.length > n ? clean.slice(0, n - 1) + "\u2026" : clean;
1484
+
1485
+ // src/sources/adplist.ts
1486
+ function normalizeAdpMentor(m, crawledAt) {
1487
+ const slug = m.username || slugify(m.name || m.full_name || String(m.id ?? ""));
1488
+ const socials = {};
1489
+ if (m.twitter)
1490
+ socials.twitter = m.twitter.startsWith("http") ? m.twitter : `https://x.com/${m.twitter}`;
1491
+ if (m.linkedin)
1492
+ socials.linkedin = m.linkedin;
1493
+ return makeExpert({
1494
+ source: "adplist",
1495
+ sourceId: String(m.id ?? slug),
1496
+ slug,
1497
+ url: `https://adplist.org/mentors/${slug}`,
1498
+ fullName: m.name ?? m.full_name ?? "",
1499
+ title: m.headline ?? m.tagline ?? "",
1500
+ headline: m.tagline ?? "",
1501
+ bio: m.bio ?? m.about ?? "",
1502
+ avatar: m.profile_photo ?? m.avatar ?? "",
1503
+ price: 0,
1504
+ priceUnit: "free session",
1505
+ rating: m.rating ?? 0,
1506
+ ratingCount: m.total_reviews ?? 0,
1507
+ verified: Boolean(m.verified),
1508
+ featured: Boolean(m.is_featured),
1509
+ tags: m.expertise ?? m.skills ?? [],
1510
+ socials,
1511
+ crawledAt
1512
+ });
848
1513
  }
849
- function badges(e) {
850
- const parts = [];
1514
+
1515
+ class ADPListSource {
1516
+ name = "adplist";
1517
+ description = "ADPList \u2014 free mentorship across design, product & engineering";
1518
+ website = "https://adplist.org";
1519
+ fetchFn;
1520
+ apiBase;
1521
+ pageSize;
1522
+ constructor(opts = {}) {
1523
+ this.fetchFn = opts.fetchFn ?? fetch;
1524
+ this.apiBase = opts.apiBase ?? process.env.ADPLIST_API_BASE ?? "https://api.adplist.org/api";
1525
+ this.pageSize = opts.pageSize ?? 50;
1526
+ }
1527
+ async crawl(opts = {}) {
1528
+ const log = opts.onLog ?? (() => {});
1529
+ const crawledAt = new Date().toISOString();
1530
+ const experts = [];
1531
+ const tags = new Set;
1532
+ let page = 1;
1533
+ for (;; ) {
1534
+ const data = await fetchJson(`${this.apiBase}/mentors/?page=${page}&page_size=${this.pageSize}`, this.fetchFn);
1535
+ const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
1536
+ if (!items.length)
1537
+ break;
1538
+ for (const m of items) {
1539
+ const e = normalizeAdpMentor(m, crawledAt);
1540
+ experts.push(e);
1541
+ for (const t of e.tags)
1542
+ tags.add(t);
1543
+ }
1544
+ log(` adplist: ${experts.length}`);
1545
+ page++;
1546
+ if (opts.max && experts.length >= opts.max)
1547
+ break;
1548
+ if (items.length < this.pageSize)
1549
+ break;
1550
+ }
1551
+ if (experts.length === 0)
1552
+ log("adplist: no public listing reachable (set ADPLIST_API_BASE or provide a fetchFn).");
1553
+ return {
1554
+ experts: opts.max ? experts.slice(0, opts.max) : experts,
1555
+ topics: [],
1556
+ tags: [...tags].map((name) => ({ name, topic: "" })),
1557
+ total: experts.length
1558
+ };
1559
+ }
1560
+ }
1561
+
1562
+ // src/sources/clarity.ts
1563
+ function normalizeClarityExpert(c, crawledAt) {
1564
+ const slug = c.username || slugify(c.name || String(c.id ?? ""));
1565
+ return makeExpert({
1566
+ source: "clarity",
1567
+ sourceId: String(c.id ?? slug),
1568
+ slug,
1569
+ url: `https://clarity.fm/${slug}`,
1570
+ fullName: c.name ?? "",
1571
+ title: c.title ?? "",
1572
+ bio: c.bio ?? "",
1573
+ avatar: c.image ?? "",
1574
+ price: c.rate_per_minute ?? 0,
1575
+ priceCurrency: "USD",
1576
+ priceUnit: c.rate_per_minute ? "per minute" : "",
1577
+ rating: c.rating ?? 0,
1578
+ ratingCount: c.reviews ?? 0,
1579
+ topics: c.categories ?? [],
1580
+ tags: c.expertise ?? [],
1581
+ crawledAt
1582
+ });
1583
+ }
1584
+
1585
+ class ClaritySource {
1586
+ name = "clarity";
1587
+ description = "Clarity.fm \u2014 on-demand expert calls billed per minute";
1588
+ website = "https://clarity.fm";
1589
+ fetchFn;
1590
+ apiBase;
1591
+ constructor(opts = {}) {
1592
+ this.fetchFn = opts.fetchFn ?? fetch;
1593
+ this.apiBase = opts.apiBase ?? process.env.CLARITY_API_BASE;
1594
+ }
1595
+ async crawl(opts = {}) {
1596
+ const log = opts.onLog ?? (() => {});
1597
+ if (!this.apiBase) {
1598
+ log("clarity: no public listing API; set CLARITY_API_BASE or inject a fetchFn to crawl.");
1599
+ return { experts: [], topics: [], tags: [], total: 0 };
1600
+ }
1601
+ const crawledAt = new Date().toISOString();
1602
+ const data = await fetchJson(`${this.apiBase}/experts`, this.fetchFn);
1603
+ const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
1604
+ const experts = items.map((c) => normalizeClarityExpert(c, crawledAt));
1605
+ const tags = new Set;
1606
+ for (const e of experts)
1607
+ for (const t of e.tags)
1608
+ tags.add(t);
1609
+ return {
1610
+ experts: opts.max ? experts.slice(0, opts.max) : experts,
1611
+ topics: [],
1612
+ tags: [...tags].map((name) => ({ name, topic: "" })),
1613
+ total: experts.length
1614
+ };
1615
+ }
1616
+ }
1617
+
1618
+ // src/sources/glg.ts
1619
+ function normalizeGlgExpert(g, crawledAt) {
1620
+ const slug = slugify(g.name || String(g.id ?? ""));
1621
+ return makeExpert({
1622
+ source: "glg",
1623
+ sourceId: String(g.id ?? slug),
1624
+ slug,
1625
+ url: "https://glginsights.com",
1626
+ fullName: g.name ?? "",
1627
+ title: g.title ?? "",
1628
+ bio: g.biography ?? "",
1629
+ price: g.hourly_rate ?? 0,
1630
+ priceCurrency: g.currency ?? "USD",
1631
+ priceUnit: g.hourly_rate ? "per hour" : "",
1632
+ topics: g.industries ?? [],
1633
+ tags: g.expertise_areas ?? [],
1634
+ crawledAt
1635
+ });
1636
+ }
1637
+
1638
+ class GLGSource {
1639
+ name = "glg";
1640
+ description = "GLG \u2014 enterprise expert network (requires partner API access)";
1641
+ website = "https://glginsights.com";
1642
+ fetchFn;
1643
+ apiBase;
1644
+ apiKey;
1645
+ constructor(opts = {}) {
1646
+ this.fetchFn = opts.fetchFn ?? fetch;
1647
+ this.apiBase = opts.apiBase ?? process.env.GLG_API_BASE;
1648
+ this.apiKey = opts.apiKey ?? process.env.GLG_API_KEY;
1649
+ }
1650
+ async crawl(opts = {}) {
1651
+ const log = opts.onLog ?? (() => {});
1652
+ if (!this.apiBase) {
1653
+ log("glg: enterprise-gated; no public directory. Set GLG_API_BASE + GLG_API_KEY (partner access) to crawl.");
1654
+ return { experts: [], topics: [], tags: [], total: 0 };
1655
+ }
1656
+ const crawledAt = new Date().toISOString();
1657
+ const data = await fetchJson(`${this.apiBase}/experts`, this.fetchFn, {
1658
+ headers: this.apiKey ? { Authorization: `Bearer ${this.apiKey}` } : {}
1659
+ });
1660
+ const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
1661
+ const experts = items.map((g) => normalizeGlgExpert(g, crawledAt));
1662
+ const tags = new Set;
1663
+ for (const e of experts)
1664
+ for (const t of e.tags)
1665
+ tags.add(t);
1666
+ return {
1667
+ experts: opts.max ? experts.slice(0, opts.max) : experts,
1668
+ topics: [],
1669
+ tags: [...tags].map((name) => ({ name, topic: "" })),
1670
+ total: experts.length
1671
+ };
1672
+ }
1673
+ }
1674
+
1675
+ // src/sources/index.ts
1676
+ var registry = new Map;
1677
+ function registerSource(source) {
1678
+ registry.set(source.name, source);
1679
+ }
1680
+ function getSource(name) {
1681
+ return registry.get(name);
1682
+ }
1683
+ function listSources() {
1684
+ return [...registry.values()];
1685
+ }
1686
+ registerSource(new IntroSource);
1687
+ registerSource(new MentorCruiseSource);
1688
+ registerSource(new ADPListSource);
1689
+ registerSource(new ClaritySource);
1690
+ registerSource(new GLGSource);
1691
+
1692
+ // src/crawl.ts
1693
+ async function crawlSource(db, sourceName, opts = {}) {
1694
+ const source = getSource(sourceName);
1695
+ if (!source) {
1696
+ throw new Error(`Unknown source "${sourceName}". Run \`experts sources\` to list options.`);
1697
+ }
1698
+ const data = await source.crawl(opts);
1699
+ const changes = db.recordChanges(source.name, data.experts);
1700
+ db.upsertExperts(data.experts);
1701
+ if (data.topics.length)
1702
+ db.setTopics(source.name, data.topics);
1703
+ db.setTags(source.name, data.tags);
1704
+ db.setMeta(`last_crawl:${source.name}`, new Date().toISOString());
1705
+ db.setMeta(`catalog_total:${source.name}`, String(data.total));
1706
+ opts.onLog?.("building knowledge graph\u2026");
1707
+ const graph = db.rebuildGraph();
1708
+ db.rescore(source.name);
1709
+ db.rebuildPersons();
1710
+ return {
1711
+ source: source.name,
1712
+ experts: data.experts.length,
1713
+ topics: data.topics.map((t) => ({ name: t.name, count: t.expertCount })),
1714
+ tags: new Set(data.tags.map((t) => t.name)).size,
1715
+ total: data.total,
1716
+ graph,
1717
+ changes
1718
+ };
1719
+ }
1720
+
1721
+ // src/format.ts
1722
+ import chalk from "chalk";
1723
+ function money(amount, currency = "USD") {
1724
+ const symbol = currency === "USD" ? "$" : `${currency} `;
1725
+ return `${symbol}${amount.toLocaleString("en-US")}`;
1726
+ }
1727
+ function stars(rating, count) {
1728
+ const full = Math.round(rating);
1729
+ const bar = "\u2605".repeat(full) + "\u2606".repeat(Math.max(0, 5 - full));
1730
+ const label = rating ? rating.toFixed(2) : "\u2014";
1731
+ return `${chalk.yellow(bar)} ${label} ${chalk.dim(`(${count})`)}`;
1732
+ }
1733
+ function truncate(s, n) {
1734
+ const clean = (s || "").replace(/\s+/g, " ").trim();
1735
+ return clean.length > n ? clean.slice(0, n - 1) + "\u2026" : clean;
1736
+ }
1737
+ function badges(e) {
1738
+ const parts = [];
851
1739
  if (e.featured)
852
1740
  parts.push(chalk.bgYellow.black(" TOP "));
853
1741
  if (e.verified)
@@ -907,6 +1795,36 @@ function formatDetail(e, enrichment = {}) {
907
1795
  L.push(` ${chalk.dim((t.createdAt || "").slice(0, 10))} ${truncate(t.text, 80)}`);
908
1796
  }
909
1797
  }
1798
+ const ext = enrichment.ext ?? {};
1799
+ const li = ext.linkedin;
1800
+ if (li && (li.headline || li.company)) {
1801
+ L.push("");
1802
+ L.push(chalk.bold.blue("LinkedIn") + ` ${[li.headline, li.company].filter(Boolean).join(" \xB7 ")}`);
1803
+ if (li.about)
1804
+ L.push(chalk.dim(wrap(String(li.about), 80)));
1805
+ }
1806
+ const site = ext.site;
1807
+ if (site && site.summary) {
1808
+ L.push("");
1809
+ L.push(chalk.dim("Site ") + chalk.dim(String(site.url || "")));
1810
+ L.push(wrap(truncate(String(site.summary), 280), 88));
1811
+ }
1812
+ const videos = enrichment.videos ?? [];
1813
+ if (videos.length) {
1814
+ L.push("");
1815
+ L.push(chalk.dim(`Recent videos (${videos.length}):`));
1816
+ for (const v of videos.slice(0, 5))
1817
+ L.push(` ${chalk.dim((v.publishedAt || "").slice(0, 10))} ${truncate(v.title, 70)}`);
1818
+ }
1819
+ const contacts = enrichment.contacts ?? [];
1820
+ if (contacts.length) {
1821
+ L.push("");
1822
+ L.push(chalk.dim("Contacts:"));
1823
+ for (const c of contacts) {
1824
+ const mark = c.status === "valid" ? chalk.green("\u2713") : c.status === "invalid" ? chalk.red("\u2717") : chalk.dim("\xB7");
1825
+ L.push(` ${mark} ${c.type}: ${c.value}`);
1826
+ }
1827
+ }
910
1828
  const eq = e.extra?.exampleQuestions || [];
911
1829
  if (eq.length) {
912
1830
  L.push("");
@@ -970,11 +1888,48 @@ import { join as join2 } from "path";
970
1888
  import { mkdirSync as mkdirSync2 } from "fs";
971
1889
 
972
1890
  // src/connectors.ts
1891
+ class CredentialPool {
1892
+ creds;
1893
+ cooldownMs;
1894
+ cooldownUntil = new Map;
1895
+ constructor(creds, cooldownMs = 15 * 60 * 1000) {
1896
+ this.creds = creds;
1897
+ this.cooldownMs = cooldownMs;
1898
+ }
1899
+ static fromEnv(variable) {
1900
+ const sets = [];
1901
+ const csv = process.env[`${variable}S`];
1902
+ if (csv) {
1903
+ csv.split(",").map((t) => t.trim()).filter(Boolean).forEach((t, i) => sets.push({ name: `${variable}#csv${i + 1}`, env: { [variable]: t } }));
1904
+ }
1905
+ if (process.env[variable])
1906
+ sets.push({ name: `${variable}#1`, env: { [variable]: process.env[variable] } });
1907
+ for (let i = 2;i <= 10; i++) {
1908
+ const v = process.env[`${variable}_${i}`];
1909
+ if (v)
1910
+ sets.push({ name: `${variable}#${i}`, env: { [variable]: v } });
1911
+ }
1912
+ return new CredentialPool(sets);
1913
+ }
1914
+ size() {
1915
+ return this.creds.length;
1916
+ }
1917
+ available(now = Date.now()) {
1918
+ return this.creds.filter((c) => (this.cooldownUntil.get(c.name) ?? 0) <= now);
1919
+ }
1920
+ next(now = Date.now()) {
1921
+ return this.available(now)[0] ?? null;
1922
+ }
1923
+ cooldown(name, now = Date.now()) {
1924
+ this.cooldownUntil.set(name, now + this.cooldownMs);
1925
+ }
1926
+ }
973
1927
  function defaultRunner(bin = "connectors") {
974
- return async (connector, args) => {
1928
+ return async (connector, args, env) => {
975
1929
  const proc = Bun.spawn([bin, "run", connector, ...args, "--format", "json"], {
976
1930
  stdout: "pipe",
977
- stderr: "pipe"
1931
+ stderr: "pipe",
1932
+ env: env ? { ...process.env, ...env } : process.env
978
1933
  });
979
1934
  const [out, err, code] = await Promise.all([
980
1935
  new Response(proc.stdout).text(),
@@ -1018,9 +1973,30 @@ function extractJson(text) {
1018
1973
  }
1019
1974
 
1020
1975
  class ConnectorsClient {
1021
- run;
1976
+ runner;
1977
+ pool;
1022
1978
  constructor(opts = {}) {
1023
- this.run = opts.runner ?? defaultRunner(opts.bin);
1979
+ this.runner = opts.runner ?? defaultRunner(opts.bin);
1980
+ this.pool = opts.pool;
1981
+ }
1982
+ async run(connector, args) {
1983
+ if (!this.pool || this.pool.size() <= 1) {
1984
+ return this.runner(connector, args, this.pool?.next()?.env);
1985
+ }
1986
+ const tries = this.pool.size();
1987
+ for (let i = 0;i < tries; i++) {
1988
+ const cred = this.pool.next();
1989
+ if (!cred) {
1990
+ return { success: false, quotaExhausted: true, error: "all credentials are in cooldown" };
1991
+ }
1992
+ const res = await this.runner(connector, args, cred.env);
1993
+ if (res.quotaExhausted || res.rateLimited) {
1994
+ this.pool.cooldown(cred.name);
1995
+ continue;
1996
+ }
1997
+ return res;
1998
+ }
1999
+ return { success: false, quotaExhausted: true, error: "all credentials exhausted" };
1024
2000
  }
1025
2001
  async xUser(username) {
1026
2002
  const result = await this.run("x", ["users", "get", username]);
@@ -1073,10 +2049,65 @@ class ConnectorsClient {
1073
2049
  });
1074
2050
  return { tweets, result };
1075
2051
  }
2052
+ async ytVideos(query, opts = {}) {
2053
+ const result = await this.run("youtube", [
2054
+ "search",
2055
+ "--query",
2056
+ query,
2057
+ "--type",
2058
+ "video",
2059
+ "--order",
2060
+ "date",
2061
+ "--max",
2062
+ String(opts.max ?? 10)
2063
+ ]);
2064
+ if (!result.success)
2065
+ return { videos: [], result };
2066
+ const items = result.data?.items ?? result.data?.data ?? (Array.isArray(result.data) ? result.data : []);
2067
+ const videos = items.map((it) => parseYouTubeItem(it)).filter((v) => v !== null);
2068
+ return { videos, result };
2069
+ }
2070
+ async linkedInProfile(handle) {
2071
+ const result = await this.run("linkedin", ["profile", "get", handle]);
2072
+ if (!result.success)
2073
+ return { profile: null, result };
2074
+ const d = result.data?.data ?? result.data ?? {};
2075
+ if (!d || !d.headline && !d.about && !d.summary)
2076
+ return { profile: null, result };
2077
+ return {
2078
+ profile: {
2079
+ headline: d.headline ?? d.occupation ?? "",
2080
+ company: d.company ?? d.companyName ?? "",
2081
+ about: d.about ?? d.summary ?? "",
2082
+ location: d.location ?? d.locationName ?? ""
2083
+ },
2084
+ result
2085
+ };
2086
+ }
2087
+ }
2088
+ function parseYouTubeItem(it) {
2089
+ const id = typeof it?.id === "string" ? it.id : it?.id?.videoId ?? it?.videoId;
2090
+ if (!id)
2091
+ return null;
2092
+ const sn = it?.snippet ?? it ?? {};
2093
+ const stats = it?.statistics ?? {};
2094
+ const thumb = sn?.thumbnails?.high?.url ?? sn?.thumbnails?.default?.url ?? "";
2095
+ return {
2096
+ id: String(id),
2097
+ title: sn.title ?? "",
2098
+ description: sn.description ?? "",
2099
+ publishedAt: sn.publishedAt ?? sn.publishTime ?? "",
2100
+ url: `https://www.youtube.com/watch?v=${id}`,
2101
+ thumbnail: thumb,
2102
+ viewCount: Number(stats.viewCount ?? 0)
2103
+ };
1076
2104
  }
1077
2105
 
1078
2106
  // src/enrich.ts
1079
2107
  var sleep2 = (ms) => new Promise((r) => setTimeout(r, ms));
2108
+ function defaultXClient() {
2109
+ return new ConnectorsClient({ pool: CredentialPool.fromEnv("X_BEARER_TOKEN") });
2110
+ }
1080
2111
  function avatarDir() {
1081
2112
  return process.env.OPEN_EXPERTS_AVATARS || join2(homedir2(), ".hasna", "experts", "avatars");
1082
2113
  }
@@ -1116,7 +2147,7 @@ async function downloadAvatar(url, e, fetchFn = fetch) {
1116
2147
  return path;
1117
2148
  }
1118
2149
  async function enrichExpert(db, e, opts) {
1119
- const client = opts.client ?? new ConnectorsClient;
2150
+ const client = opts.client ?? defaultXClient();
1120
2151
  const handle = handleFromSocial(e.socials.twitter || "");
1121
2152
  const now = new Date().toISOString();
1122
2153
  if (!handle)
@@ -1208,9 +2239,37 @@ async function enrichExpert(db, e, opts) {
1208
2239
  }
1209
2240
  return { ok: true, notFound: false, tweets: tweetCount, avatar, rateLimited: false, quotaExhausted: false };
1210
2241
  }
2242
+ async function backfillAvatars(db, opts = {}) {
2243
+ const log = opts.onLog ?? (() => {});
2244
+ const fetchFn = opts.fetchFn ?? fetch;
2245
+ const delayMs = opts.delayMs ?? 150;
2246
+ const experts = db.list({ source: opts.source });
2247
+ const res = { downloaded: 0, skipped: 0, failed: 0 };
2248
+ for (const e of experts) {
2249
+ if (e.avatarLocal || !e.avatar) {
2250
+ res.skipped++;
2251
+ continue;
2252
+ }
2253
+ try {
2254
+ const path = await downloadAvatar(e.avatar, e, fetchFn);
2255
+ if (path) {
2256
+ db.setAvatarLocal(e.source, e.sourceId, path);
2257
+ res.downloaded++;
2258
+ if (res.downloaded % 100 === 0)
2259
+ log(` avatars: ${res.downloaded} downloaded`);
2260
+ } else {
2261
+ res.failed++;
2262
+ }
2263
+ } catch {
2264
+ res.failed++;
2265
+ }
2266
+ await sleep2(delayMs);
2267
+ }
2268
+ return res;
2269
+ }
1211
2270
  async function enrichX(db, opts = {}) {
1212
2271
  const log = opts.onLog ?? (() => {});
1213
- const client = opts.client ?? new ConnectorsClient;
2272
+ const client = opts.client ?? defaultXClient();
1214
2273
  const delayMs = opts.delayMs ?? 1200;
1215
2274
  const staleBefore = opts.refresh ? undefined : opts.sinceDays ? new Date(Date.now() - opts.sinceDays * 86400000).toISOString() : undefined;
1216
2275
  const targets = db.expertsToEnrich({
@@ -1267,13 +2326,440 @@ async function enrichX(db, opts = {}) {
1267
2326
  db.setMeta(`last_enrich:x:${opts.source ?? "all"}`, new Date().toISOString());
1268
2327
  return res;
1269
2328
  }
2329
+ async function enrichYouTube(db, opts = {}) {
2330
+ const log = opts.onLog ?? (() => {});
2331
+ const client = opts.client ?? new ConnectorsClient;
2332
+ const delayMs = opts.delayMs ?? 500;
2333
+ const targets = db.expertsNeedingVideos({ source: opts.source, refresh: opts.refresh, limit: opts.max });
2334
+ const res = { attempted: 0, withVideos: 0, videos: 0, stoppedEarly: false };
2335
+ for (let i = 0;i < targets.length; i++) {
2336
+ const e = targets[i];
2337
+ res.attempted++;
2338
+ let found;
2339
+ try {
2340
+ found = await client.ytVideos(e.fullName || e.slug, { max: opts.videoMax ?? 10 });
2341
+ } catch (err) {
2342
+ log(` ${e.slug}: error ${err?.message || err}`);
2343
+ await sleep2(delayMs);
2344
+ continue;
2345
+ }
2346
+ if (found.result.quotaExhausted) {
2347
+ res.stoppedEarly = true;
2348
+ res.reason = "YouTube API quota exhausted \u2014 top up or wait, then re-run";
2349
+ res.attempted--;
2350
+ break;
2351
+ }
2352
+ if (found.result.rateLimited && found.videos.length === 0) {
2353
+ res.stoppedEarly = true;
2354
+ res.reason = "YouTube rate limited \u2014 resume later";
2355
+ res.attempted--;
2356
+ break;
2357
+ }
2358
+ const rows = found.videos.map((v) => ({
2359
+ source: e.source,
2360
+ sourceId: e.sourceId,
2361
+ videoId: v.id,
2362
+ title: v.title,
2363
+ description: v.description,
2364
+ publishedAt: v.publishedAt,
2365
+ url: v.url,
2366
+ thumbnail: v.thumbnail,
2367
+ viewCount: v.viewCount
2368
+ }));
2369
+ db.replaceVideos(e.source, e.sourceId, rows);
2370
+ if (rows.length)
2371
+ res.withVideos++;
2372
+ res.videos += rows.length;
2373
+ log(` [${res.attempted}/${targets.length}] ${e.fullName || e.slug}: ${rows.length} videos`);
2374
+ await sleep2(delayMs);
2375
+ }
2376
+ db.setMeta(`last_youtube:${opts.source ?? "all"}`, new Date().toISOString());
2377
+ return res;
2378
+ }
2379
+ async function liveTweets(db, e, opts = {}) {
2380
+ const client = opts.client ?? defaultXClient();
2381
+ let xId = db.getXProfile(e.source, e.sourceId)?.xId || "";
2382
+ if (!xId) {
2383
+ const handle = handleFromSocial(e.socials.twitter || "");
2384
+ if (!handle)
2385
+ return { tweets: [], rateLimited: false, quotaExhausted: false };
2386
+ const { user, result: result2 } = await client.xUser(handle);
2387
+ if (!user)
2388
+ return { tweets: [], rateLimited: !!result2.rateLimited, quotaExhausted: !!result2.quotaExhausted };
2389
+ xId = user.id;
2390
+ }
2391
+ const { tweets, result } = await client.xTimeline(xId, { max: opts.max ?? 10, replies: false });
2392
+ const rows = tweets.map((t) => ({
2393
+ source: e.source,
2394
+ sourceId: e.sourceId,
2395
+ tweetId: t.id,
2396
+ text: t.text,
2397
+ createdAt: t.createdAt,
2398
+ retweetCount: t.retweetCount,
2399
+ replyCount: t.replyCount,
2400
+ likeCount: t.likeCount,
2401
+ quoteCount: t.quoteCount,
2402
+ impressionCount: t.impressionCount,
2403
+ isRetweet: t.isRetweet,
2404
+ isReply: t.isReply
2405
+ }));
2406
+ return { tweets: rows, rateLimited: !!result.rateLimited, quotaExhausted: !!result.quotaExhausted };
2407
+ }
2408
+ function linkedinHandle(value) {
2409
+ if (!value)
2410
+ return "";
2411
+ const m = value.match(/linkedin\.com\/(?:in|company)\/([^/?#]+)/i);
2412
+ if (m)
2413
+ return m[1];
2414
+ return value.replace(/^https?:\/\/[^/]+\//, "").replace(/^@/, "").split(/[/?#]/)[0] ?? "";
2415
+ }
2416
+ async function enrichLinkedIn(db, opts = {}) {
2417
+ const log = opts.onLog ?? (() => {});
2418
+ const client = opts.client ?? new ConnectorsClient;
2419
+ const delayMs = opts.delayMs ?? 500;
2420
+ const targets = db.expertsNeedingExt("linkedin", "linkedin", { source: opts.source, refresh: opts.refresh, limit: opts.max });
2421
+ const res = { attempted: 0, enriched: 0, stoppedEarly: false };
2422
+ for (const e of targets) {
2423
+ res.attempted++;
2424
+ const handle = linkedinHandle(e.socials.linkedin || "");
2425
+ if (!handle)
2426
+ continue;
2427
+ let out;
2428
+ try {
2429
+ out = await client.linkedInProfile(handle);
2430
+ } catch (err) {
2431
+ log(` ${e.slug}: error ${err?.message || err}`);
2432
+ await sleep2(delayMs);
2433
+ continue;
2434
+ }
2435
+ if (out.result.quotaExhausted || out.result.rateLimited) {
2436
+ res.stoppedEarly = true;
2437
+ res.reason = "linkedin connector quota/rate-limited or not authenticated \u2014 resume later";
2438
+ res.attempted--;
2439
+ break;
2440
+ }
2441
+ if (out.profile) {
2442
+ db.upsertExt(e.source, e.sourceId, "linkedin", { ...out.profile });
2443
+ res.enriched++;
2444
+ log(` [${res.attempted}/${targets.length}] ${e.fullName || e.slug}: linkedin \u2713`);
2445
+ }
2446
+ await sleep2(delayMs);
2447
+ }
2448
+ return res;
2449
+ }
2450
+ function htmlToText(html) {
2451
+ return (html || "").replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/&[a-z#0-9]+;/gi, " ").replace(/\s+/g, " ").trim();
2452
+ }
2453
+ async function enrichSite(db, opts = {}) {
2454
+ const log = opts.onLog ?? (() => {});
2455
+ const fetchFn = opts.fetchFn ?? fetch;
2456
+ const delayMs = opts.delayMs ?? 500;
2457
+ const maxChars = opts.maxChars ?? 1500;
2458
+ const all = db.list({ source: opts.source });
2459
+ const res = { attempted: 0, enriched: 0, stoppedEarly: false };
2460
+ let processed = 0;
2461
+ for (const e of all) {
2462
+ if (opts.max && processed >= opts.max)
2463
+ break;
2464
+ const x = db.getXProfile(e.source, e.sourceId);
2465
+ const url = x?.url || "";
2466
+ if (!url || /t\.co\//.test(url))
2467
+ continue;
2468
+ if (!opts.refresh && db.getExt(e.source, e.sourceId, "site"))
2469
+ continue;
2470
+ processed++;
2471
+ res.attempted++;
2472
+ try {
2473
+ const r = await fetchFn(url, { headers: { "User-Agent": "open-experts (+https://github.com/hasna/experts)" } });
2474
+ if (r.ok) {
2475
+ const text = htmlToText(await r.text()).slice(0, maxChars);
2476
+ db.upsertExt(e.source, e.sourceId, "site", { url, summary: text });
2477
+ res.enriched++;
2478
+ log(` [${res.attempted}] ${e.fullName || e.slug}: site \u2713`);
2479
+ }
2480
+ } catch {}
2481
+ await sleep2(delayMs);
2482
+ }
2483
+ return res;
2484
+ }
2485
+
2486
+ // src/contacts.ts
2487
+ import { resolveMx } from "dns/promises";
2488
+ var sleep3 = (ms) => new Promise((r) => setTimeout(r, ms));
2489
+ var EMAIL_RE = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
2490
+ function normalizeEmail(v) {
2491
+ return v.trim().toLowerCase().replace(/^mailto:/, "");
2492
+ }
2493
+ function normalizePhone(v) {
2494
+ const cleaned = v.trim().replace(/[^\d+]/g, "");
2495
+ if (cleaned.startsWith("+"))
2496
+ return "+" + cleaned.slice(1).replace(/\D/g, "");
2497
+ return cleaned;
2498
+ }
2499
+ function validatePhone(v) {
2500
+ const n = normalizePhone(v);
2501
+ const digits = n.replace(/\D/g, "");
2502
+ return digits.length >= 7 && digits.length <= 15 ? "valid" : "invalid";
2503
+ }
2504
+ async function validateEmail(email, resolver = resolveMx) {
2505
+ const e = normalizeEmail(email);
2506
+ if (!EMAIL_RE.test(e))
2507
+ return "invalid";
2508
+ const domain = e.split("@")[1];
2509
+ try {
2510
+ const mx = await resolver(domain);
2511
+ return mx && mx.length > 0 ? "valid" : "invalid";
2512
+ } catch (err) {
2513
+ const code = err?.code || "";
2514
+ if (code === "ENOTFOUND" || code === "ENODATA" || code === "NXDOMAIN")
2515
+ return "invalid";
2516
+ return "unknown";
2517
+ }
2518
+ }
2519
+
2520
+ class ExaWebsetsProvider {
2521
+ name = "exa";
2522
+ apiKey;
2523
+ fetchFn;
2524
+ pollMs;
2525
+ maxPollMs;
2526
+ baseUrl;
2527
+ constructor(opts = {}) {
2528
+ this.apiKey = opts.apiKey ?? process.env.EXA_API_KEY ?? "";
2529
+ this.fetchFn = opts.fetchFn ?? fetch;
2530
+ this.pollMs = opts.pollMs ?? 3000;
2531
+ this.maxPollMs = opts.maxPollMs ?? 120000;
2532
+ this.baseUrl = (opts.baseUrl ?? "https://api.exa.ai").replace(/\/+$/, "");
2533
+ }
2534
+ headers() {
2535
+ return { "content-type": "application/json", "x-api-key": this.apiKey };
2536
+ }
2537
+ query(e) {
2538
+ const company = e.extra?.company || "";
2539
+ const bits = [e.fullName, e.title, company].filter(Boolean).join(", ");
2540
+ return `Contact information (email and phone) for ${bits || e.fullName || e.slug}`;
2541
+ }
2542
+ async find(e) {
2543
+ if (!this.apiKey)
2544
+ return { contacts: [], quotaExhausted: true };
2545
+ const createRes = await this.fetchFn(`${this.baseUrl}/websets/v0/websets`, {
2546
+ method: "POST",
2547
+ headers: this.headers(),
2548
+ body: JSON.stringify({
2549
+ search: { query: this.query(e), count: 3 },
2550
+ enrichments: [
2551
+ { description: "email address of this person", format: "email" },
2552
+ { description: "phone number of this person", format: "phone" }
2553
+ ]
2554
+ })
2555
+ });
2556
+ if (createRes.status === 429)
2557
+ return { contacts: [], rateLimited: true };
2558
+ if (createRes.status === 402 || createRes.status === 403)
2559
+ return { contacts: [], quotaExhausted: true };
2560
+ if (!createRes.ok)
2561
+ return { contacts: [] };
2562
+ const created = await createRes.json();
2563
+ const id = created?.id;
2564
+ if (!id)
2565
+ return { contacts: [] };
2566
+ const deadline = Date.now() + this.maxPollMs;
2567
+ let status = created?.status;
2568
+ while (status !== "idle" && status !== "completed" && Date.now() < deadline) {
2569
+ await sleep3(this.pollMs);
2570
+ const r = await this.fetchFn(`${this.baseUrl}/websets/v0/websets/${id}`, { headers: this.headers() });
2571
+ if (!r.ok)
2572
+ break;
2573
+ status = (await r.json())?.status;
2574
+ }
2575
+ const itemsRes = await this.fetchFn(`${this.baseUrl}/websets/v0/websets/${id}/items`, { headers: this.headers() });
2576
+ if (!itemsRes.ok)
2577
+ return { contacts: [] };
2578
+ const items = await itemsRes.json();
2579
+ return { contacts: extractExaContacts(items) };
2580
+ }
2581
+ }
2582
+ function extractExaContacts(payload) {
2583
+ const out = [];
2584
+ const seen = new Set;
2585
+ const items = payload?.data ?? payload?.items ?? (Array.isArray(payload) ? payload : []);
2586
+ for (const item of items) {
2587
+ const enrichments = item?.enrichments ?? [];
2588
+ for (const en of enrichments) {
2589
+ const fmt = en?.format;
2590
+ const results = en?.result ?? en?.results ?? (en?.value != null ? [en.value] : []);
2591
+ for (const r of results) {
2592
+ const raw = typeof r === "string" ? r : r?.value ?? r?.text;
2593
+ if (!raw)
2594
+ continue;
2595
+ if (fmt === "email" || /@/.test(raw)) {
2596
+ const v = normalizeEmail(String(raw));
2597
+ if (EMAIL_RE.test(v) && !seen.has("e:" + v)) {
2598
+ seen.add("e:" + v);
2599
+ out.push({ type: "email", value: v, confidence: en?.confidence ?? 0.5 });
2600
+ }
2601
+ } else if (fmt === "phone" || /\+?\d[\d\s().-]{6,}/.test(raw)) {
2602
+ const v = normalizePhone(String(raw));
2603
+ if (v.replace(/\D/g, "").length >= 7 && !seen.has("p:" + v)) {
2604
+ seen.add("p:" + v);
2605
+ out.push({ type: "phone", value: v, confidence: en?.confidence ?? 0.5 });
2606
+ }
2607
+ }
2608
+ }
2609
+ }
2610
+ }
2611
+ return out;
2612
+ }
2613
+ async function discoverContacts(db, opts = {}) {
2614
+ const log = opts.onLog ?? (() => {});
2615
+ const provider = opts.provider ?? new ExaWebsetsProvider;
2616
+ const delayMs = opts.delayMs ?? 500;
2617
+ const targets = db.expertsNeedingContacts({ source: opts.source, refresh: opts.refresh, limit: opts.max });
2618
+ const res = { attempted: 0, withContacts: 0, contacts: 0, stoppedEarly: false };
2619
+ const now = new Date().toISOString();
2620
+ for (let i = 0;i < targets.length; i++) {
2621
+ const e = targets[i];
2622
+ res.attempted++;
2623
+ let found;
2624
+ try {
2625
+ found = await provider.find(e);
2626
+ } catch (err) {
2627
+ log(` ${e.slug}: error ${err?.message || err}`);
2628
+ await sleep3(delayMs);
2629
+ continue;
2630
+ }
2631
+ if (found.quotaExhausted) {
2632
+ res.stoppedEarly = true;
2633
+ res.reason = `${provider.name} quota/credits exhausted \u2014 top up or wait, then re-run`;
2634
+ res.attempted--;
2635
+ log(`${provider.name} quota exhausted; stopping at ${i}/${targets.length}`);
2636
+ break;
2637
+ }
2638
+ if (found.rateLimited && found.contacts.length === 0) {
2639
+ res.stoppedEarly = true;
2640
+ res.reason = `${provider.name} rate limited \u2014 resume later`;
2641
+ res.attempted--;
2642
+ break;
2643
+ }
2644
+ for (const c of found.contacts) {
2645
+ db.upsertContact({
2646
+ source: e.source,
2647
+ sourceId: e.sourceId,
2648
+ type: c.type,
2649
+ value: c.value,
2650
+ label: c.label ?? "",
2651
+ provider: provider.name,
2652
+ confidence: c.confidence,
2653
+ status: "unverified",
2654
+ verifiedAt: "",
2655
+ createdAt: now
2656
+ });
2657
+ }
2658
+ if (found.contacts.length)
2659
+ res.withContacts++;
2660
+ res.contacts += found.contacts.length;
2661
+ log(` [${res.attempted}/${targets.length}] ${e.fullName || e.slug}: ${found.contacts.length} contacts`);
2662
+ await sleep3(delayMs);
2663
+ }
2664
+ db.setMeta(`last_contacts:${opts.source ?? "all"}`, now);
2665
+ return res;
2666
+ }
2667
+ async function verifyContacts(db, opts = {}) {
2668
+ const log = opts.onLog ?? (() => {});
2669
+ const delayMs = opts.delayMs ?? 50;
2670
+ const targets = db.contactsToVerify({ source: opts.source, limit: opts.max });
2671
+ const res = { checked: 0, valid: 0, invalid: 0, unknown: 0 };
2672
+ for (const c of targets) {
2673
+ let status;
2674
+ if (c.type === "email")
2675
+ status = await validateEmail(c.value, opts.resolver);
2676
+ else
2677
+ status = validatePhone(c.value);
2678
+ db.setContactStatus(c.source, c.sourceId, c.type, c.value, status);
2679
+ res.checked++;
2680
+ res[status]++;
2681
+ if (res.checked % 50 === 0)
2682
+ log(` verified ${res.checked}`);
2683
+ if (delayMs)
2684
+ await sleep3(delayMs);
2685
+ }
2686
+ return res;
2687
+ }
2688
+
2689
+ // src/sync.ts
2690
+ function expertToContactRecord(e, contacts) {
2691
+ const li = e.extra?.company || "";
2692
+ return {
2693
+ name: e.fullName || e.slug,
2694
+ title: e.title,
2695
+ company: li,
2696
+ emails: contacts.filter((c) => c.type === "email").map((c) => ({ value: c.value, status: c.status })),
2697
+ phones: contacts.filter((c) => c.type === "phone").map((c) => ({ value: c.value, status: c.status })),
2698
+ socials: e.socials,
2699
+ url: e.url,
2700
+ source: e.source,
2701
+ sourceId: e.sourceId,
2702
+ tags: e.tags,
2703
+ notes: e.headline || e.bio.slice(0, 200)
2704
+ };
2705
+ }
2706
+
2707
+ class JsonSink {
2708
+ name = "json";
2709
+ records = [];
2710
+ async push(records) {
2711
+ this.records.push(...records);
2712
+ return { ok: records.length, failed: 0, output: JSON.stringify(records, null, 2) };
2713
+ }
2714
+ }
2715
+
2716
+ class CliSink {
2717
+ bin;
2718
+ name = "contacts-cli";
2719
+ constructor(bin = "contacts") {
2720
+ this.bin = bin;
2721
+ }
2722
+ async push(records) {
2723
+ let ok = 0;
2724
+ let failed = 0;
2725
+ for (const r of records) {
2726
+ try {
2727
+ const proc = Bun.spawn([this.bin, "upsert", "--json", JSON.stringify(r)], { stdout: "pipe", stderr: "pipe" });
2728
+ const code = await proc.exited;
2729
+ if (code === 0)
2730
+ ok++;
2731
+ else
2732
+ failed++;
2733
+ } catch {
2734
+ failed++;
2735
+ }
2736
+ }
2737
+ return { ok, failed };
2738
+ }
2739
+ }
2740
+ async function syncContacts(db, idsOrSlugs, opts = {}) {
2741
+ const sink = opts.sink ?? new JsonSink;
2742
+ const records = [];
2743
+ for (const id of idsOrSlugs) {
2744
+ const e = db.get(id, opts.source);
2745
+ if (!e)
2746
+ continue;
2747
+ records.push(expertToContactRecord(e, db.contacts(e.source, e.sourceId)));
2748
+ }
2749
+ const res = await sink.push(records);
2750
+ return { records, ...res };
2751
+ }
1270
2752
 
1271
2753
  // src/cli/index.ts
1272
- var VERSION = "0.0.5";
2754
+ var VERSION = "0.0.7";
1273
2755
  function openDb() {
1274
2756
  const opts = program.opts();
1275
2757
  return new ExpertsDB(opts.db || defaultDbPath());
1276
2758
  }
2759
+ function truncateMid(v, n = 32) {
2760
+ const s = String(v ?? "").replace(/\s+/g, " ").trim();
2761
+ return s.length > n ? s.slice(0, n - 1) + "\u2026" : s;
2762
+ }
1277
2763
  function requireData(db) {
1278
2764
  if (db.count() === 0) {
1279
2765
  console.error(chalk2.yellow("No experts stored yet. Run ") + chalk2.bold("experts crawl intro") + chalk2.yellow(" first."));
@@ -1305,6 +2791,8 @@ program.command("crawl [source]").description("Fetch experts from a source into
1305
2791
  console.log(` ${chalk2.dim(t.name.padEnd(20))} ${t.count}`);
1306
2792
  }
1307
2793
  console.log(chalk2.dim(`graph: ${res.graph.nodes} nodes, ${res.graph.edges} edges`));
2794
+ if (res.changes)
2795
+ console.log(chalk2.dim(`changes since last crawl: ${res.changes}`));
1308
2796
  db.close();
1309
2797
  });
1310
2798
  program.command("enrich [source]").description("Enrich experts via X/Twitter: profile, recent tweets, avatar").option("--max <n>", "limit experts processed (resumable)", (v) => parseInt(v, 10)).option("--refresh", "re-enrich experts already done").option("--since-days <n>", "tweet window in days", (v) => parseInt(v, 10), 30).option("--delay <ms>", "min delay between experts", (v) => parseInt(v, 10), 1200).option("--tweet-max <n>", "max tweets fetched per expert", (v) => parseInt(v, 10), 100).option("--no-avatars", "skip downloading profile pictures").option("--no-tweets", "skip fetching tweets").action(async (source, cmdOpts) => {
@@ -1326,6 +2814,7 @@ program.command("enrich [source]").description("Enrich experts via X/Twitter: pr
1326
2814
  `))
1327
2815
  });
1328
2816
  db.rebuildGraph();
2817
+ db.rescore(source);
1329
2818
  console.log(chalk2.green(`\u2713 enriched ${res.enriched}`) + chalk2.dim(` (${res.notFound} not found, ${res.tweets} tweets, ${res.avatars} avatars, ${res.attempted} attempted)`));
1330
2819
  if (res.stoppedEarly)
1331
2820
  console.log(chalk2.yellow(`\u26A0 stopped early: ${res.reason}`));
@@ -1333,14 +2822,93 @@ program.command("enrich [source]").description("Enrich experts via X/Twitter: pr
1333
2822
  console.log(chalk2.dim(`progress: ${after.enriched}/${after.withHandle} enriched`));
1334
2823
  db.close();
1335
2824
  });
1336
- program.command("tweets <idOrSlug>").description("Show an expert's stored recent tweets").option("-s, --source <name>", "disambiguate by source").option("-n, --limit <n>", "max tweets", (v) => parseInt(v, 10), 10).action((idOrSlug, cmdOpts) => {
2825
+ program.command("avatars [source]").description("Download + properly name profile pictures for experts missing one").option("--delay <ms>", "delay between downloads", (v) => parseInt(v, 10), 150).action(async (source, cmdOpts) => {
2826
+ const db = openDb();
2827
+ requireData(db);
2828
+ console.error(chalk2.dim("Backfilling profile pictures from source media\u2026"));
2829
+ const res = await backfillAvatars(db, {
2830
+ source,
2831
+ delayMs: cmdOpts.delay,
2832
+ onLog: (m) => process.stderr.write(chalk2.dim(m + `
2833
+ `))
2834
+ });
2835
+ console.log(chalk2.green(`\u2713 ${res.downloaded} avatars downloaded`) + chalk2.dim(` (${res.skipped} already had one or no URL, ${res.failed} failed)`));
2836
+ const total = db.enrichmentStats(source).avatars;
2837
+ console.log(chalk2.dim(`total experts with a named avatar: ${total}`));
2838
+ db.close();
2839
+ });
2840
+ program.command("enrich-linkedin [source]").description("Enrich experts with LinkedIn headline/company/about (needs linkedin connector auth)").option("--max <n>", "limit experts processed", (v) => parseInt(v, 10)).option("--refresh", "re-enrich").option("--delay <ms>", "delay between experts", (v) => parseInt(v, 10), 500).action(async (source, cmdOpts) => {
2841
+ const db = openDb();
2842
+ requireData(db);
2843
+ const res = await enrichLinkedIn(db, { source, max: cmdOpts.max, refresh: cmdOpts.refresh, delayMs: cmdOpts.delay, onLog: (m) => process.stderr.write(chalk2.dim(m + `
2844
+ `)) });
2845
+ console.log(chalk2.green(`\u2713 enriched ${res.enriched}`) + chalk2.dim(` (${res.attempted} attempted)`));
2846
+ if (res.stoppedEarly)
2847
+ console.log(chalk2.yellow(`\u26A0 ${res.reason}`));
2848
+ db.close();
2849
+ });
2850
+ program.command("enrich-sites [source]").description("Fetch experts' personal sites and store a text summary").option("--max <n>", "limit experts processed", (v) => parseInt(v, 10)).option("--refresh", "re-enrich").option("--delay <ms>", "delay between requests", (v) => parseInt(v, 10), 500).action(async (source, cmdOpts) => {
2851
+ const db = openDb();
2852
+ requireData(db);
2853
+ const res = await enrichSite(db, { source, max: cmdOpts.max, refresh: cmdOpts.refresh, delayMs: cmdOpts.delay, onLog: (m) => process.stderr.write(chalk2.dim(m + `
2854
+ `)) });
2855
+ console.log(chalk2.green(`\u2713 enriched ${res.enriched} sites`) + chalk2.dim(` (${res.attempted} attempted)`));
2856
+ db.close();
2857
+ });
2858
+ program.command("enrich-youtube [source]").description("Fetch + store recent YouTube videos for experts with a YT handle").option("--max <n>", "limit experts processed", (v) => parseInt(v, 10)).option("--refresh", "re-fetch for experts that already have videos").option("--video-max <n>", "videos per expert", (v) => parseInt(v, 10), 10).option("--delay <ms>", "delay between experts", (v) => parseInt(v, 10), 500).action(async (source, cmdOpts) => {
2859
+ const db = openDb();
2860
+ requireData(db);
2861
+ const remaining = db.expertsNeedingVideos({ source, refresh: cmdOpts.refresh }).length;
2862
+ console.error(chalk2.dim(`Fetching YouTube videos \u2014 ${remaining} experts with handles to do`));
2863
+ const res = await enrichYouTube(db, {
2864
+ source,
2865
+ max: cmdOpts.max,
2866
+ refresh: cmdOpts.refresh,
2867
+ videoMax: cmdOpts.videoMax,
2868
+ delayMs: cmdOpts.delay,
2869
+ onLog: (m) => process.stderr.write(chalk2.dim(m + `
2870
+ `))
2871
+ });
2872
+ console.log(chalk2.green(`\u2713 ${res.videos} videos for ${res.withVideos} experts`) + chalk2.dim(` (${res.attempted} attempted)`));
2873
+ if (res.stoppedEarly)
2874
+ console.log(chalk2.yellow(`\u26A0 stopped early: ${res.reason}`));
2875
+ db.close();
2876
+ });
2877
+ program.command("videos <idOrSlug>").description("Show an expert's recent YouTube videos").option("-s, --source <name>", "disambiguate by source").option("-n, --limit <n>", "max videos", (v) => parseInt(v, 10), 10).action((idOrSlug, cmdOpts) => {
2878
+ const db = openDb();
2879
+ const e = db.get(idOrSlug, cmdOpts.source);
2880
+ if (!e) {
2881
+ console.error(chalk2.red(`No expert found for "${idOrSlug}".`));
2882
+ process.exit(1);
2883
+ }
2884
+ const videos = db.recentVideos(e.source, e.sourceId, cmdOpts.limit);
2885
+ if (program.opts().json) {
2886
+ console.log(JSON.stringify(videos, null, 2));
2887
+ } else if (videos.length === 0) {
2888
+ console.log(chalk2.yellow(`No videos for ${e.fullName || e.slug}. Run \`experts enrich-youtube\`.`));
2889
+ } else {
2890
+ console.log(chalk2.bold(`YouTube \u2014 ${e.fullName || e.slug}`));
2891
+ for (const v of videos)
2892
+ console.log(` ${chalk2.dim((v.publishedAt || "").slice(0, 10))} ${v.title}
2893
+ ${chalk2.dim(v.url)}`);
2894
+ }
2895
+ db.close();
2896
+ });
2897
+ program.command("tweets <idOrSlug>").description("Show an expert's recent tweets (stored, or --live)").option("-s, --source <name>", "disambiguate by source").option("-n, --limit <n>", "max tweets", (v) => parseInt(v, 10), 10).option("--live", "fetch latest live via the connector (needs X credits)").action(async (idOrSlug, cmdOpts) => {
1337
2898
  const db = openDb();
1338
2899
  const e = db.get(idOrSlug, cmdOpts.source);
1339
2900
  if (!e) {
1340
2901
  console.error(chalk2.red(`No expert found for "${idOrSlug}".`));
1341
2902
  process.exit(1);
1342
2903
  }
1343
- const tweets = db.recentTweets(e.source, e.sourceId, cmdOpts.limit);
2904
+ let tweets = db.recentTweets(e.source, e.sourceId, cmdOpts.limit);
2905
+ if (cmdOpts.live) {
2906
+ const live = await liveTweets(db, e, { max: cmdOpts.limit });
2907
+ if (live.quotaExhausted)
2908
+ console.error(chalk2.yellow("X API quota exhausted \u2014 showing stored tweets."));
2909
+ else if (live.tweets.length)
2910
+ tweets = live.tweets;
2911
+ }
1344
2912
  if (program.opts().json) {
1345
2913
  console.log(JSON.stringify(tweets, null, 2));
1346
2914
  } else if (tweets.length === 0) {
@@ -1357,14 +2925,121 @@ ${chalk2.dim(date)} ${chalk2.yellow(`\u2665${t.likeCount} \u21BA${t.retweetCount
1357
2925
  }
1358
2926
  db.close();
1359
2927
  });
1360
- program.command("reindex").description("Rebuild the knowledge graph from stored experts (no network)").action(() => {
2928
+ program.command("find-contacts [source]").description("Discover email + phone for experts via Exa.ai websets (resumable)").option("--max <n>", "limit experts processed", (v) => parseInt(v, 10)).option("--refresh", "re-discover for experts that already have contacts").option("--delay <ms>", "delay between experts", (v) => parseInt(v, 10), 500).action(async (source, cmdOpts) => {
2929
+ const db = openDb();
2930
+ requireData(db);
2931
+ const remaining = db.expertsNeedingContacts({ source, refresh: cmdOpts.refresh }).length;
2932
+ console.error(chalk2.dim(`Discovering contacts via Exa \u2014 ${remaining} experts to do${cmdOpts.max ? ` (capped ${cmdOpts.max})` : ""}`));
2933
+ const res = await discoverContacts(db, {
2934
+ source,
2935
+ max: cmdOpts.max,
2936
+ refresh: cmdOpts.refresh,
2937
+ delayMs: cmdOpts.delay,
2938
+ onLog: (m) => process.stderr.write(chalk2.dim(m + `
2939
+ `))
2940
+ });
2941
+ console.log(chalk2.green(`\u2713 ${res.contacts} contacts for ${res.withContacts} experts`) + chalk2.dim(` (${res.attempted} attempted)`));
2942
+ if (res.stoppedEarly)
2943
+ console.log(chalk2.yellow(`\u26A0 stopped early: ${res.reason}`));
2944
+ console.log(chalk2.dim("Next: `experts verify-contacts` to set working/not status."));
2945
+ db.close();
2946
+ });
2947
+ program.command("verify-contacts [source]").description("Validate discovered contacts and set working/not status").option("--max <n>", "limit contacts checked", (v) => parseInt(v, 10)).action(async (source, cmdOpts) => {
2948
+ const db = openDb();
2949
+ requireData(db);
2950
+ const res = await verifyContacts(db, {
2951
+ source,
2952
+ max: cmdOpts.max,
2953
+ onLog: (m) => process.stderr.write(chalk2.dim(m + `
2954
+ `))
2955
+ });
2956
+ console.log(chalk2.green(`\u2713 verified ${res.checked}`) + chalk2.dim(` (${res.valid} valid, ${res.invalid} invalid, ${res.unknown} unknown)`));
2957
+ db.close();
2958
+ });
2959
+ program.command("contacts <idOrSlug>").description("Show an expert's discovered contacts with working/not status").option("-s, --source <name>", "disambiguate by source").action((idOrSlug, cmdOpts) => {
2960
+ const db = openDb();
2961
+ const e = db.get(idOrSlug, cmdOpts.source);
2962
+ if (!e) {
2963
+ console.error(chalk2.red(`No expert found for "${idOrSlug}".`));
2964
+ process.exit(1);
2965
+ }
2966
+ const list = db.contacts(e.source, e.sourceId);
2967
+ if (program.opts().json) {
2968
+ console.log(JSON.stringify(list, null, 2));
2969
+ } else if (list.length === 0) {
2970
+ console.log(chalk2.yellow(`No contacts for ${e.fullName || e.slug}. Run \`experts find-contacts\`.`));
2971
+ } else {
2972
+ console.log(chalk2.bold(`Contacts \u2014 ${e.fullName || e.slug}`));
2973
+ for (const c of list) {
2974
+ const mark = c.status === "valid" ? chalk2.green("\u2713 working") : c.status === "invalid" ? chalk2.red("\u2717 not working") : chalk2.dim(c.status);
2975
+ console.log(` ${c.type.padEnd(5)} ${c.value.padEnd(36)} ${mark}`);
2976
+ }
2977
+ }
2978
+ db.close();
2979
+ });
2980
+ program.command("reindex").description("Rebuild the knowledge graph + authority scores (no network)").action(() => {
1361
2981
  const db = openDb();
1362
2982
  requireData(db);
1363
2983
  const g = db.rebuildGraph();
1364
- console.log(chalk2.green(`\u2713 graph rebuilt: ${g.nodes} nodes, ${g.edges} edges`));
2984
+ const n = db.rescore();
2985
+ const p = db.rebuildPersons();
2986
+ console.log(chalk2.green(`\u2713 graph: ${g.nodes} nodes/${g.edges} edges; rescored ${n}; ${p.persons} persons (${p.experts} records)`));
2987
+ db.close();
2988
+ });
2989
+ program.command("changes").description("What changed between crawls (price/title/bio edits, new experts)").option("-s, --source <name>", "filter by source").option("-n, --limit <n>", "max rows", (v) => parseInt(v, 10), 50).action((cmdOpts) => {
2990
+ const db = openDb();
2991
+ requireData(db);
2992
+ const rows = db.changes({ source: cmdOpts.source, limit: cmdOpts.limit });
2993
+ if (program.opts().json) {
2994
+ console.log(JSON.stringify(rows, null, 2));
2995
+ } else if (rows.length === 0) {
2996
+ console.log(chalk2.dim("No changes recorded yet (changes are detected on re-crawl)."));
2997
+ } else {
2998
+ for (const c of rows) {
2999
+ const when = (c.detected_at || "").slice(0, 10);
3000
+ if (c.kind === "added") {
3001
+ console.log(`${chalk2.dim(when)} ${chalk2.green("\uFF0B added")} ${c.new_value} ${chalk2.dim(`[${c.source}:${c.source_id}]`)}`);
3002
+ } else {
3003
+ console.log(`${chalk2.dim(when)} ${chalk2.yellow("~ " + c.field)} ${chalk2.dim(truncateMid(c.old_value))} \u2192 ${truncateMid(c.new_value)} ${chalk2.dim(`[${c.source}:${c.source_id}]`)}`);
3004
+ }
3005
+ }
3006
+ console.log(chalk2.dim(`
3007
+ ${rows.length} changes`));
3008
+ }
1365
3009
  db.close();
1366
3010
  });
1367
- program.command("list").description("List experts with filters").option("-s, --source <name>", "filter by source").option("-t, --topic <topic>", "filter by topic/category").option("--verified", "only verified experts").option("--top", "only featured/top experts").option("--min-price <n>", "minimum price", (v) => parseInt(v, 10)).option("--max-price <n>", "maximum price", (v) => parseInt(v, 10)).option("--min-rating <n>", "minimum rating", parseFloat).option("--sort <field>", "rating|price|name|reviews", "rating").option("--asc", "ascending order").option("-n, --limit <n>", "max rows", (v) => parseInt(v, 10), 25).action((cmdOpts) => {
3011
+ program.command("persons").description("Identity-resolution overview (canonical persons vs. records)").action(() => {
3012
+ const db = openDb();
3013
+ requireData(db);
3014
+ const s = db.personStats();
3015
+ if (program.opts().json)
3016
+ console.log(JSON.stringify(s, null, 2));
3017
+ else {
3018
+ console.log(chalk2.bold("Identity resolution"));
3019
+ console.log(` Expert records ${s.experts}`);
3020
+ console.log(` Unique persons ${s.persons}`);
3021
+ console.log(` Merged (>1 src) ${s.duplicated}`);
3022
+ }
3023
+ db.close();
3024
+ });
3025
+ program.command("stale").description("List the experts whose data is oldest (refresh candidates)").option("-s, --source <name>", "filter by source").option("-n, --limit <n>", "max rows", (v) => parseInt(v, 10), 25).action((cmdOpts) => {
3026
+ const db = openDb();
3027
+ requireData(db);
3028
+ const rows = db.stalest({ source: cmdOpts.source, limit: cmdOpts.limit });
3029
+ if (program.opts().json) {
3030
+ console.log(JSON.stringify(rows, null, 2));
3031
+ } else {
3032
+ const now = Date.now();
3033
+ for (const { expert, lastSeen } of rows) {
3034
+ const days = lastSeen ? Math.floor((now - Date.parse(lastSeen)) / 86400000) : "?";
3035
+ console.log(`${chalk2.dim(String(days).padStart(4) + "d")} ${formatRow(expert)}`);
3036
+ }
3037
+ console.log(chalk2.dim(`
3038
+ ${rows.length} stalest experts`));
3039
+ }
3040
+ db.close();
3041
+ });
3042
+ program.command("list").description("List experts with filters").option("-s, --source <name>", "filter by source").option("-t, --topic <topic>", "filter by topic/category").option("--verified", "only verified experts").option("--top", "only featured/top experts").option("--min-price <n>", "minimum price", (v) => parseInt(v, 10)).option("--max-price <n>", "maximum price", (v) => parseInt(v, 10)).option("--min-rating <n>", "minimum rating", parseFloat).option("--sort <field>", "rating|price|name|reviews|authority", "rating").option("--asc", "ascending order").option("-n, --limit <n>", "max rows", (v) => parseInt(v, 10), 25).action((cmdOpts) => {
1368
3043
  const db = openDb();
1369
3044
  requireData(db);
1370
3045
  const filters = {
@@ -1391,6 +3066,81 @@ ${rows.length} experts`));
1391
3066
  }
1392
3067
  db.close();
1393
3068
  });
3069
+ program.command("embed [source]").description("Build the semantic search index (embeddings) for stored experts").action(async (source) => {
3070
+ const db = openDb();
3071
+ requireData(db);
3072
+ const embedder = getEmbedder();
3073
+ console.error(chalk2.dim(`Embedding with ${embedder.id}\u2026`));
3074
+ const n = await db.buildEmbeddings(embedder, {
3075
+ source,
3076
+ onLog: (m) => process.stderr.write(chalk2.dim(`\r${m}`.padEnd(40)))
3077
+ });
3078
+ process.stderr.write(`
3079
+ `);
3080
+ console.log(chalk2.green(`\u2713 embedded ${n} experts (${embedder.id})`));
3081
+ db.close();
3082
+ });
3083
+ program.command("ask <query...>").description("Natural-language semantic search: 'who can help with X'").option("-s, --source <name>", "filter by source").option("-n, --limit <n>", "max rows", (v) => parseInt(v, 10), 15).action(async (query, cmdOpts) => {
3084
+ const db = openDb();
3085
+ requireData(db);
3086
+ if (db.vectorCount() === 0) {
3087
+ console.error(chalk2.yellow("No semantic index yet. Run ") + chalk2.bold("experts embed") + chalk2.yellow(" first."));
3088
+ process.exit(1);
3089
+ }
3090
+ const embedder = getEmbedder();
3091
+ const [qv] = await embedder.embed([query.join(" ")]);
3092
+ const results = db.semanticSearch(qv, { source: cmdOpts.source, limit: cmdOpts.limit });
3093
+ if (program.opts().json) {
3094
+ console.log(JSON.stringify(results, null, 2));
3095
+ } else {
3096
+ const multi = new Set(results.map((r) => r.expert.source)).size > 1;
3097
+ for (const { expert, score } of results) {
3098
+ console.log(chalk2.dim(score.toFixed(3)) + " " + formatRow(expert, { showSource: multi }));
3099
+ }
3100
+ console.log(chalk2.dim(`
3101
+ ${results.length} matches for "${query.join(" ")}"`));
3102
+ }
3103
+ db.close();
3104
+ });
3105
+ program.command("brief <text...>").description("Paste a brief \u2192 ranked, de-duplicated expert shortlist with why each matched").option("-s, --source <name>", "filter by source").option("-n, --limit <n>", "shortlist size", (v) => parseInt(v, 10), 10).action(async (text, cmdOpts) => {
3106
+ const db = openDb();
3107
+ requireData(db);
3108
+ if (db.vectorCount() === 0) {
3109
+ console.error(chalk2.yellow("No semantic index. Run ") + chalk2.bold("experts embed") + chalk2.yellow(" first."));
3110
+ process.exit(1);
3111
+ }
3112
+ const brief = text.join(" ");
3113
+ const [qv] = await getEmbedder().embed([brief]);
3114
+ const raw = db.semanticSearch(qv, { source: cmdOpts.source, limit: (cmdOpts.limit + 5) * 4 });
3115
+ const seen = new Set;
3116
+ const shortlist = [];
3117
+ for (const r of raw) {
3118
+ const pid = db.personIdOf(r.expert.source, r.expert.sourceId);
3119
+ if (seen.has(pid))
3120
+ continue;
3121
+ seen.add(pid);
3122
+ shortlist.push(r);
3123
+ if (shortlist.length >= cmdOpts.limit)
3124
+ break;
3125
+ }
3126
+ const briefLc = brief.toLowerCase();
3127
+ const annotated = shortlist.map((r) => ({
3128
+ ...r,
3129
+ why: r.expert.tags.filter((t) => briefLc.includes(t.toLowerCase())).slice(0, 4)
3130
+ }));
3131
+ if (program.opts().json) {
3132
+ console.log(JSON.stringify(annotated, null, 2));
3133
+ } else {
3134
+ console.log(chalk2.bold(`Shortlist for: "${brief}"
3135
+ `));
3136
+ annotated.forEach((r, i) => {
3137
+ console.log(`${chalk2.cyan(`${i + 1}.`)} ${formatRow(r.expert)}`);
3138
+ const why = r.why.length ? r.why.join(", ") : "semantic match";
3139
+ console.log(` ${chalk2.dim("why:")} ${why} ${chalk2.dim(`(${r.score.toFixed(3)})`)}`);
3140
+ });
3141
+ }
3142
+ db.close();
3143
+ });
1394
3144
  program.command("search <query...>").description("Full-text search across name, title and bio").option("-s, --source <name>", "filter by source").option("-n, --limit <n>", "max rows", (v) => parseInt(v, 10), 25).action((query, cmdOpts) => {
1395
3145
  const db = openDb();
1396
3146
  requireData(db);
@@ -1473,10 +3223,13 @@ program.command("show <idOrSlug>").description("Show full detail for one expert"
1473
3223
  }
1474
3224
  const xProfile = db.getXProfile(e.source, e.sourceId);
1475
3225
  const tweets = db.recentTweets(e.source, e.sourceId, 10);
3226
+ const contacts = db.contacts(e.source, e.sourceId);
3227
+ const videos = db.recentVideos(e.source, e.sourceId, 5);
3228
+ const ext = db.allExt(e.source, e.sourceId);
1476
3229
  if (program.opts().json) {
1477
- console.log(JSON.stringify({ ...e, xProfile, tweets }, null, 2));
3230
+ console.log(JSON.stringify({ ...e, xProfile, tweets, contacts, videos, ext }, null, 2));
1478
3231
  } else {
1479
- console.log(formatDetail(e, { xProfile, tweets }));
3232
+ console.log(formatDetail(e, { xProfile, tweets, contacts, videos, ext }));
1480
3233
  }
1481
3234
  db.close();
1482
3235
  });
@@ -1543,6 +3296,21 @@ program.command("export").description("Export experts as JSON or CSV").option("-
1543
3296
  }
1544
3297
  db.close();
1545
3298
  });
3299
+ program.command("sync-contacts <idsOrSlugs...>").description("Export experts + their contacts to the contacts system (JSON, or --via-cli)").option("-s, --source <name>", "disambiguate by source").option("--via-cli", "push via the `contacts` CLI instead of emitting JSON").option("-o, --out <file>", "write JSON to a file").action(async (ids, cmdOpts) => {
3300
+ const db = openDb();
3301
+ requireData(db);
3302
+ const sink = cmdOpts.viaCli ? new CliSink : new JsonSink;
3303
+ const res = await syncContacts(db, ids, { source: cmdOpts.source, sink });
3304
+ if (cmdOpts.viaCli) {
3305
+ console.log(chalk2.green(`\u2713 pushed ${res.ok} contacts`) + chalk2.dim(` (${res.failed} failed)`));
3306
+ } else if (cmdOpts.out) {
3307
+ await Bun.write(cmdOpts.out, res.output ?? "[]");
3308
+ console.error(chalk2.green(`\u2713 wrote ${res.records.length} contact records to ${cmdOpts.out}`));
3309
+ } else {
3310
+ console.log(res.output ?? "[]");
3311
+ }
3312
+ db.close();
3313
+ });
1546
3314
  program.command("sources").description("List available marketplace sources").action(() => {
1547
3315
  const db = openDb();
1548
3316
  const inStore = new Map(db.sourcesInStore().map((r) => [r.source, r.count]));