@hasna/experts 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +21 -13
  2. package/dist/cli/index.js +1842 -74
  3. package/dist/connectors.d.ts +63 -4
  4. package/dist/connectors.d.ts.map +1 -1
  5. package/dist/contacts.d.ts +96 -0
  6. package/dist/contacts.d.ts.map +1 -0
  7. package/dist/crawl.d.ts +1 -0
  8. package/dist/crawl.d.ts.map +1 -1
  9. package/dist/db.d.ts +97 -2
  10. package/dist/db.d.ts.map +1 -1
  11. package/dist/embed.d.ts +57 -0
  12. package/dist/embed.d.ts.map +1 -0
  13. package/dist/enrich.d.ts +81 -1
  14. package/dist/enrich.d.ts.map +1 -1
  15. package/dist/format.d.ts +4 -1
  16. package/dist/format.d.ts.map +1 -1
  17. package/dist/identity.d.ts +23 -0
  18. package/dist/identity.d.ts.map +1 -0
  19. package/dist/index.d.ts +7 -2
  20. package/dist/index.d.ts.map +1 -1
  21. package/dist/index.js +1544 -21
  22. package/dist/score.d.ts +25 -0
  23. package/dist/score.d.ts.map +1 -0
  24. package/dist/sdk.d.ts +26 -1
  25. package/dist/sdk.d.ts.map +1 -1
  26. package/dist/sdk.js +12 -1
  27. package/dist/server/index.d.ts.map +1 -1
  28. package/dist/server/index.js +960 -14
  29. package/dist/sources/adplist.d.ts +43 -0
  30. package/dist/sources/adplist.d.ts.map +1 -0
  31. package/dist/sources/clarity.d.ts +37 -0
  32. package/dist/sources/clarity.d.ts.map +1 -0
  33. package/dist/sources/common.d.ts +14 -0
  34. package/dist/sources/common.d.ts.map +1 -0
  35. package/dist/sources/glg.d.ts +36 -0
  36. package/dist/sources/glg.d.ts.map +1 -0
  37. package/dist/sources/index.d.ts +5 -1
  38. package/dist/sources/index.d.ts.map +1 -1
  39. package/dist/sources/mentorcruise.d.ts +47 -0
  40. package/dist/sources/mentorcruise.d.ts.map +1 -0
  41. package/dist/sync.d.ts +71 -0
  42. package/dist/sync.d.ts.map +1 -0
  43. package/dist/types.d.ts +34 -0
  44. package/dist/types.d.ts.map +1 -1
  45. package/package.json +1 -1
@@ -28,6 +28,206 @@ function expertText(e) {
28
28
  return [e.title, e.headline, e.bio].filter(Boolean).join(". ");
29
29
  }
30
30
 
31
+ // src/score.ts
32
+ var DEFAULT_WEIGHTS = {
33
+ rating: 0.3,
34
+ reviews: 0.2,
35
+ followers: 0.25,
36
+ featured: 0.1,
37
+ verified: 0.05,
38
+ recency: 0.1
39
+ };
40
+ var clamp01 = (n) => Math.max(0, Math.min(1, n));
41
+ var logNorm = (x, cap) => clamp01(Math.log10(1 + Math.max(0, x)) / Math.log10(1 + cap));
42
+ function authorityScore(e, inputs = {}, weights = DEFAULT_WEIGHTS) {
43
+ const rating = clamp01((e.rating || 0) / 5);
44
+ const reviews = logNorm(e.ratingCount || 0, 1000);
45
+ const followers = logNorm(inputs.followers ?? 0, 1e6);
46
+ const featured = e.featured ? 1 : 0;
47
+ const verified = e.verified ? 1 : 0;
48
+ const recency = inputs.daysSinceLastTweet == null ? 0 : clamp01(1 - inputs.daysSinceLastTweet / 30);
49
+ const raw = weights.rating * rating + weights.reviews * reviews + weights.followers * followers + weights.featured * featured + weights.verified * verified + weights.recency * recency;
50
+ return Math.round(raw * 1000) / 10;
51
+ }
52
+
53
+ // src/embed.ts
54
+ var STOPWORDS = new Set([
55
+ "the",
56
+ "a",
57
+ "an",
58
+ "and",
59
+ "or",
60
+ "of",
61
+ "to",
62
+ "in",
63
+ "for",
64
+ "on",
65
+ "at",
66
+ "is",
67
+ "are",
68
+ "with",
69
+ "by",
70
+ "as",
71
+ "be",
72
+ "this",
73
+ "that",
74
+ "it",
75
+ "from",
76
+ "i",
77
+ "you",
78
+ "we",
79
+ "they"
80
+ ]);
81
+ function tokenize(text) {
82
+ return (text || "").toLowerCase().replace(/https?:\/\/\S+/g, " ").split(/[^a-z0-9]+/).filter((t) => t.length >= 2 && !STOPWORDS.has(t));
83
+ }
84
+ function fnv1a(s) {
85
+ let h = 2166136261;
86
+ for (let i = 0;i < s.length; i++) {
87
+ h ^= s.charCodeAt(i);
88
+ h = h + ((h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24)) >>> 0;
89
+ }
90
+ return h >>> 0;
91
+ }
92
+
93
+ class HashingEmbedder {
94
+ id = "hash-v1";
95
+ dim;
96
+ constructor(dim = 512) {
97
+ this.dim = dim;
98
+ }
99
+ one(text) {
100
+ const v = new Array(this.dim).fill(0);
101
+ const toks = tokenize(text);
102
+ for (let i = 0;i < toks.length; i++) {
103
+ const uni = toks[i];
104
+ v[fnv1a(uni) % this.dim] += 1;
105
+ if (i + 1 < toks.length) {
106
+ const bi = uni + "_" + toks[i + 1];
107
+ v[fnv1a(bi) % this.dim] += 0.5;
108
+ }
109
+ }
110
+ const norm = Math.sqrt(v.reduce((s, x) => s + x * x, 0)) || 1;
111
+ return v.map((x) => x / norm);
112
+ }
113
+ async embed(texts) {
114
+ return texts.map((t) => this.one(t));
115
+ }
116
+ }
117
+
118
+ class OpenAIEmbedder {
119
+ id;
120
+ dim = 1536;
121
+ apiKey;
122
+ model;
123
+ fetchFn;
124
+ constructor(opts = {}) {
125
+ this.apiKey = opts.apiKey ?? process.env.OPENAI_API_KEY ?? "";
126
+ this.model = opts.model ?? "text-embedding-3-small";
127
+ this.fetchFn = opts.fetchFn ?? fetch;
128
+ this.id = `openai:${this.model}`;
129
+ }
130
+ async embed(texts) {
131
+ const res = await this.fetchFn("https://api.openai.com/v1/embeddings", {
132
+ method: "POST",
133
+ headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}` },
134
+ body: JSON.stringify({ model: this.model, input: texts })
135
+ });
136
+ if (!res.ok)
137
+ throw new Error(`OpenAI embeddings ${res.status}: ${(await res.text()).slice(0, 200)}`);
138
+ const data = await res.json();
139
+ return data.data.map((d) => d.embedding);
140
+ }
141
+ }
142
+ function getEmbedder() {
143
+ if (process.env.EXPERTS_EMBEDDER === "openai" && process.env.OPENAI_API_KEY) {
144
+ return new OpenAIEmbedder;
145
+ }
146
+ return new HashingEmbedder;
147
+ }
148
+ function cosine(a, b) {
149
+ let dot = 0;
150
+ let na = 0;
151
+ let nb = 0;
152
+ const n = Math.min(a.length, b.length);
153
+ for (let i = 0;i < n; i++) {
154
+ dot += a[i] * b[i];
155
+ na += a[i] * a[i];
156
+ nb += b[i] * b[i];
157
+ }
158
+ const d = Math.sqrt(na) * Math.sqrt(nb);
159
+ return d === 0 ? 0 : dot / d;
160
+ }
161
+ function packVector(v) {
162
+ const f = new Float32Array(v);
163
+ return new Uint8Array(f.buffer);
164
+ }
165
+ function unpackVector(buf) {
166
+ const ab = buf instanceof Uint8Array ? buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) : buf;
167
+ return Array.from(new Float32Array(ab));
168
+ }
169
+ function expertEmbedText(e) {
170
+ return [e.fullName, e.title, e.headline, e.bio, e.topics.join(" "), e.tags.join(" ")].filter(Boolean).join(". ");
171
+ }
172
+
173
+ // src/identity.ts
174
+ function socialHandles(e) {
175
+ const out = new Set;
176
+ for (const [platform, url] of Object.entries(e.socials || {})) {
177
+ if (!url)
178
+ continue;
179
+ const m = String(url).match(/(?:[a-z]+\.[a-z]+\/@?)([A-Za-z0-9_.-]+)/i);
180
+ const handle = (m ? m[1] : String(url)).toLowerCase().replace(/\/+$/, "");
181
+ if (handle)
182
+ out.add(`${platform}:${handle}`);
183
+ }
184
+ return out;
185
+ }
186
+ var key = (e) => `${e.source}:${e.sourceId}`;
187
+ function clusterPersons(experts) {
188
+ const parent = new Map;
189
+ const find = (x) => {
190
+ let r = x;
191
+ while (parent.get(r) !== r)
192
+ r = parent.get(r);
193
+ let c = x;
194
+ while (parent.get(c) !== r) {
195
+ const n = parent.get(c);
196
+ parent.set(c, r);
197
+ c = n;
198
+ }
199
+ return r;
200
+ };
201
+ const union = (a, b) => {
202
+ const ra = find(a);
203
+ const rb = find(b);
204
+ if (ra === rb)
205
+ return;
206
+ if (ra < rb)
207
+ parent.set(rb, ra);
208
+ else
209
+ parent.set(ra, rb);
210
+ };
211
+ for (const e of experts)
212
+ parent.set(key(e), key(e));
213
+ const byHandle = new Map;
214
+ for (const e of experts) {
215
+ for (const h of socialHandles(e)) {
216
+ if (!byHandle.has(h))
217
+ byHandle.set(h, []);
218
+ byHandle.get(h).push(key(e));
219
+ }
220
+ }
221
+ for (const members of byHandle.values()) {
222
+ for (let i = 1;i < members.length; i++)
223
+ union(members[0], members[i]);
224
+ }
225
+ const out = new Map;
226
+ for (const e of experts)
227
+ out.set(key(e), find(key(e)));
228
+ return out;
229
+ }
230
+
31
231
  // src/db.ts
32
232
  function defaultDbPath() {
33
233
  return process.env.OPEN_EXPERTS_DB || join(homedir(), ".hasna", "experts", "experts.db");
@@ -111,8 +311,59 @@ class ExpertsDB {
111
311
  PRIMARY KEY (source, tweet_id)
112
312
  );
113
313
  CREATE INDEX IF NOT EXISTS idx_tweets_expert ON tweets(source, source_id, created_at DESC);
314
+
315
+ -- Enrichment: recent YouTube videos per expert.
316
+ CREATE TABLE IF NOT EXISTS videos (
317
+ source TEXT NOT NULL, source_id TEXT NOT NULL,
318
+ video_id TEXT NOT NULL, title TEXT, description TEXT,
319
+ published_at TEXT, url TEXT, thumbnail TEXT, view_count INTEGER,
320
+ PRIMARY KEY (source, video_id)
321
+ );
322
+ CREATE INDEX IF NOT EXISTS idx_videos_expert ON videos(source, source_id, published_at DESC);
323
+
324
+ -- Generic external enrichment (linkedin, site/newsletter, \u2026) as JSON.
325
+ CREATE TABLE IF NOT EXISTS ext_profiles (
326
+ source TEXT NOT NULL, source_id TEXT NOT NULL, kind TEXT NOT NULL,
327
+ data TEXT, enriched_at TEXT,
328
+ PRIMARY KEY (source, source_id, kind)
329
+ );
330
+
331
+ -- Enrichment: discovered contact methods (multiple email/phone per expert).
332
+ CREATE TABLE IF NOT EXISTS contacts (
333
+ source TEXT NOT NULL, source_id TEXT NOT NULL,
334
+ type TEXT NOT NULL, value TEXT NOT NULL,
335
+ label TEXT, provider TEXT, confidence REAL,
336
+ status TEXT DEFAULT 'unverified', verified_at TEXT, created_at TEXT,
337
+ PRIMARY KEY (source, source_id, type, value)
338
+ );
339
+ CREATE INDEX IF NOT EXISTS idx_contacts_expert ON contacts(source, source_id);
340
+ CREATE INDEX IF NOT EXISTS idx_contacts_status ON contacts(status);
341
+
342
+ -- Semantic search: one embedding vector per expert.
343
+ CREATE TABLE IF NOT EXISTS vectors (
344
+ source TEXT NOT NULL, source_id TEXT NOT NULL,
345
+ embedder TEXT NOT NULL, dim INTEGER, vec BLOB,
346
+ PRIMARY KEY (source, source_id)
347
+ );
348
+
349
+ -- Identity resolution: maps each expert record to a canonical person.
350
+ CREATE TABLE IF NOT EXISTS persons (
351
+ source TEXT NOT NULL, source_id TEXT NOT NULL, person_id TEXT NOT NULL,
352
+ PRIMARY KEY (source, source_id)
353
+ );
354
+ CREATE INDEX IF NOT EXISTS idx_persons_person ON persons(person_id);
355
+
356
+ -- Change detection: a log of what changed between crawls.
357
+ CREATE TABLE IF NOT EXISTS changes (
358
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
359
+ source TEXT NOT NULL, source_id TEXT NOT NULL,
360
+ kind TEXT NOT NULL, field TEXT, old_value TEXT, new_value TEXT,
361
+ detected_at TEXT
362
+ );
363
+ CREATE INDEX IF NOT EXISTS idx_changes_time ON changes(detected_at DESC);
114
364
  `);
115
365
  this.addColumnIfMissing("experts", "avatar_local", "TEXT");
366
+ this.addColumnIfMissing("experts", "authority", "REAL DEFAULT 0");
116
367
  }
117
368
  addColumnIfMissing(table, column, type) {
118
369
  const cols = this.db.query(`PRAGMA table_info(${table})`).all();
@@ -198,6 +449,7 @@ class ExpertsDB {
198
449
  socials: JSON.parse(r.socials || "{}"),
199
450
  extra: JSON.parse(r.extra || "{}"),
200
451
  avatarLocal: r.avatar_local || undefined,
452
+ authority: r.authority ?? 0,
201
453
  crawledAt: r.crawled_at
202
454
  };
203
455
  }
@@ -245,7 +497,7 @@ class ExpertsDB {
245
497
  where.push("rating >= ?");
246
498
  params.push(filters.minRating);
247
499
  }
248
- const sortCol = filters.sort === "price" ? "price" : filters.sort === "name" ? "full_name" : filters.sort === "reviews" ? "rating_count" : "rating";
500
+ const sortCol = filters.sort === "price" ? "price" : filters.sort === "name" ? "full_name" : filters.sort === "reviews" ? "rating_count" : filters.sort === "authority" ? "authority" : "rating";
249
501
  const defaultAsc = filters.sort === "name";
250
502
  const dir = filters.desc ?? !defaultAsc ? "DESC" : "ASC";
251
503
  let sql = "SELECT * FROM experts";
@@ -321,11 +573,11 @@ class ExpertsDB {
321
573
  sql += " ORDER BY name";
322
574
  return this.db.query(sql).all(...params);
323
575
  }
324
- setMeta(key, value) {
325
- this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(key, value);
576
+ setMeta(key2, value) {
577
+ this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(key2, value);
326
578
  }
327
- getMeta(key) {
328
- const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(key);
579
+ getMeta(key2) {
580
+ const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(key2);
329
581
  return row ? row.value : null;
330
582
  }
331
583
  stats(source) {
@@ -349,12 +601,12 @@ class ExpertsDB {
349
601
  const nodeIds = new Map;
350
602
  const insertNode = this.db.query("INSERT INTO kg_nodes (type, key, label) VALUES (?, ?, ?) ON CONFLICT(type, key) DO UPDATE SET label=excluded.label RETURNING id");
351
603
  const insertEdge = this.db.query("INSERT OR REPLACE INTO kg_edges (src, dst, rel, weight) VALUES (?, ?, ?, ?)");
352
- const node = (type, key, label) => {
353
- const ck = `${type}\x00${key.toLowerCase()}`;
604
+ const node = (type, key2, label) => {
605
+ const ck = `${type}\x00${key2.toLowerCase()}`;
354
606
  const cached = nodeIds.get(ck);
355
607
  if (cached != null)
356
608
  return cached;
357
- const id = insertNode.get(type, key.toLowerCase(), label).id;
609
+ const id = insertNode.get(type, key2.toLowerCase(), label).id;
358
610
  nodeIds.set(ck, id);
359
611
  return id;
360
612
  };
@@ -364,7 +616,8 @@ class ExpertsDB {
364
616
  for (const topic of e.topics) {
365
617
  insertEdge.run(eId, node("topic", topic, topic), "IN_TOPIC", 1);
366
618
  }
367
- const tags = inferTags(expertText(e), vocabulary);
619
+ const tweetText = this.recentTweets(e.source, e.sourceId, 30).map((t) => t.text).join(". ");
620
+ const tags = inferTags(expertText(e) + ". " + tweetText, vocabulary);
368
621
  for (const tag of tags) {
369
622
  insertEdge.run(eId, node("tag", tag, tag), "HAS_TAG", 1);
370
623
  }
@@ -377,11 +630,51 @@ class ExpertsDB {
377
630
  this.setMeta("graph_built", new Date().toISOString());
378
631
  return { nodes, edges };
379
632
  }
380
- expertFromNodeKey(key) {
381
- const idx = key.indexOf(":");
633
+ rescore(source) {
634
+ const experts = this.list({ source });
635
+ const upd = this.db.query("UPDATE experts SET authority = ? WHERE source = ? AND source_id = ?");
636
+ const followerStmt = this.db.query("SELECT followers FROM x_profiles WHERE source = ? AND source_id = ?");
637
+ const lastTweetStmt = this.db.query("SELECT MAX(created_at) AS t FROM tweets WHERE source = ? AND source_id = ?");
638
+ const tx = this.db.transaction((rows) => {
639
+ for (const e of rows) {
640
+ const fr = followerStmt.get(e.source, e.sourceId);
641
+ const lt = lastTweetStmt.get(e.source, e.sourceId);
642
+ let daysSince;
643
+ if (lt?.t) {
644
+ const ms = Date.now() - Date.parse(lt.t);
645
+ if (!Number.isNaN(ms))
646
+ daysSince = ms / 86400000;
647
+ }
648
+ const score = authorityScore(e, { followers: fr?.followers ?? 0, daysSinceLastTweet: daysSince });
649
+ upd.run(score, e.source, e.sourceId);
650
+ }
651
+ });
652
+ tx(experts);
653
+ this.setMeta("rescored_at", new Date().toISOString());
654
+ return experts.length;
655
+ }
656
+ stalest(opts = {}) {
657
+ const where = opts.source ? "WHERE e.source = ?" : "";
658
+ const params = opts.source ? [opts.source] : [];
659
+ const sql = `
660
+ SELECT e.*, COALESCE(
661
+ (SELECT MAX(enriched_at) FROM x_profiles xp WHERE xp.source=e.source AND xp.source_id=e.source_id),
662
+ e.crawled_at
663
+ ) AS last_seen
664
+ FROM experts e ${where}
665
+ ORDER BY last_seen ASC
666
+ LIMIT ?`;
667
+ params.push(opts.limit ?? 25);
668
+ return this.db.query(sql).all(...params).map((r) => ({
669
+ expert: this.rowToExpert(r),
670
+ lastSeen: r.last_seen || ""
671
+ }));
672
+ }
673
+ expertFromNodeKey(key2) {
674
+ const idx = key2.indexOf(":");
382
675
  if (idx < 0)
383
676
  return null;
384
- return this.get(key.slice(idx + 1), key.slice(0, idx));
677
+ return this.get(key2.slice(idx + 1), key2.slice(0, idx));
385
678
  }
386
679
  findByNeeds(needs, opts = {}) {
387
680
  const cleaned = needs.map((n) => n.trim().toLowerCase()).filter(Boolean);
@@ -570,6 +863,270 @@ class ExpertsDB {
570
863
  const avatars = this.db.query(`SELECT COUNT(*) AS n FROM experts WHERE avatar_local IS NOT NULL${source ? " AND source = ?" : ""}`).get(...args).n;
571
864
  return { withHandle, enriched, tweets, avatars };
572
865
  }
866
+ recordChanges(source, incoming) {
867
+ const existing = new Map(this.list({ source }).map((e) => [e.sourceId, e]));
868
+ const now = new Date().toISOString();
869
+ const watched = ["price", "title", "headline", "bio", "slug"];
870
+ const stmt = this.db.query("INSERT INTO changes (source, source_id, kind, field, old_value, new_value, detected_at) VALUES (?, ?, ?, ?, ?, ?, ?)");
871
+ let count = 0;
872
+ const tx = this.db.transaction((rows) => {
873
+ for (const e of rows) {
874
+ const prev = existing.get(e.sourceId);
875
+ if (!prev) {
876
+ stmt.run(source, e.sourceId, "added", null, null, e.fullName || e.slug, now);
877
+ count++;
878
+ continue;
879
+ }
880
+ for (const f of watched) {
881
+ const a = String(prev[f] ?? "");
882
+ const b = String(e[f] ?? "");
883
+ if (a !== b) {
884
+ stmt.run(source, e.sourceId, "updated", f, a, b, now);
885
+ count++;
886
+ }
887
+ }
888
+ }
889
+ });
890
+ tx(incoming);
891
+ return count;
892
+ }
893
+ changes(opts = {}) {
894
+ const where = opts.source ? "WHERE source = ?" : "";
895
+ const params = opts.source ? [opts.source] : [];
896
+ params.push(opts.limit ?? 50);
897
+ return this.db.query(`SELECT * FROM changes ${where} ORDER BY detected_at DESC, id DESC LIMIT ?`).all(...params);
898
+ }
899
+ rebuildPersons() {
900
+ const experts = this.list();
901
+ const mapping = clusterPersons(experts);
902
+ const tx = this.db.transaction(() => {
903
+ this.db.exec("DELETE FROM persons");
904
+ const stmt = this.db.query("INSERT OR REPLACE INTO persons (source, source_id, person_id) VALUES (?, ?, ?)");
905
+ for (const [k, pid] of mapping) {
906
+ const idx = k.indexOf(":");
907
+ stmt.run(k.slice(0, idx), k.slice(idx + 1), pid);
908
+ }
909
+ });
910
+ tx();
911
+ const persons = new Set(mapping.values()).size;
912
+ this.setMeta("persons_built", new Date().toISOString());
913
+ return { experts: experts.length, persons };
914
+ }
915
+ personIdOf(source, sourceId) {
916
+ const r = this.db.query("SELECT person_id FROM persons WHERE source = ? AND source_id = ?").get(source, sourceId);
917
+ return r ? r.person_id : `${source}:${sourceId}`;
918
+ }
919
+ expertsForPerson(personId) {
920
+ const rows = this.db.query("SELECT e.* FROM persons p JOIN experts e ON e.source=p.source AND e.source_id=p.source_id WHERE p.person_id = ?").all(personId);
921
+ return rows.map((r) => this.rowToExpert(r));
922
+ }
923
+ personStats() {
924
+ const experts = this.count();
925
+ const row = this.db.query("SELECT COUNT(DISTINCT person_id) n FROM persons").get();
926
+ const persons = row?.n ?? 0;
927
+ const dupes = this.db.query("SELECT person_id, COUNT(*) c FROM persons GROUP BY person_id HAVING c > 1 ORDER BY c DESC").all();
928
+ return { experts, persons: persons || experts, duplicated: dupes.length };
929
+ }
930
+ async buildEmbeddings(embedder, opts = {}) {
931
+ const log = opts.onLog ?? (() => {});
932
+ const experts = this.list({ source: opts.source });
933
+ const batch = opts.batch ?? 64;
934
+ const stmt = this.db.query("INSERT OR REPLACE INTO vectors (source, source_id, embedder, dim, vec) VALUES (?, ?, ?, ?, ?)");
935
+ let done = 0;
936
+ for (let i = 0;i < experts.length; i += batch) {
937
+ const slice = experts.slice(i, i + batch);
938
+ const vecs = await embedder.embed(slice.map((e) => expertEmbedText(e)));
939
+ const tx = this.db.transaction(() => {
940
+ slice.forEach((e, j) => stmt.run(e.source, e.sourceId, embedder.id, embedder.dim, packVector(vecs[j])));
941
+ });
942
+ tx();
943
+ done += slice.length;
944
+ log(` embedded ${done}/${experts.length}`);
945
+ }
946
+ this.setMeta("embedder", embedder.id);
947
+ this.setMeta("embedded_at", new Date().toISOString());
948
+ return done;
949
+ }
950
+ vectorCount() {
951
+ return this.db.query("SELECT COUNT(*) n FROM vectors").get().n;
952
+ }
953
+ semanticSearch(queryVec, opts = {}) {
954
+ const where = opts.source ? "WHERE v.source = ?" : "";
955
+ const params = opts.source ? [opts.source] : [];
956
+ const rows = this.db.query(`SELECT e.*, v.vec AS _vec FROM vectors v JOIN experts e ON e.source=v.source AND e.source_id=v.source_id ${where}`).all(...params);
957
+ const scored = rows.map((r) => ({ expert: this.rowToExpert(r), score: cosine(queryVec, unpackVector(r._vec)) }));
958
+ scored.sort((a, b) => b.score - a.score);
959
+ return scored.slice(0, opts.limit ?? 25);
960
+ }
961
+ upsertExt(source, sourceId, kind, data) {
962
+ this.db.query(`
963
+ INSERT INTO ext_profiles (source, source_id, kind, data, enriched_at)
964
+ VALUES (?, ?, ?, ?, ?)
965
+ ON CONFLICT(source, source_id, kind) DO UPDATE SET data=excluded.data, enriched_at=excluded.enriched_at
966
+ `).run(source, sourceId, kind, JSON.stringify(data), new Date().toISOString());
967
+ }
968
+ getExt(source, sourceId, kind) {
969
+ const r = this.db.query("SELECT data FROM ext_profiles WHERE source=? AND source_id=? AND kind=?").get(source, sourceId, kind);
970
+ return r ? JSON.parse(r.data || "{}") : null;
971
+ }
972
+ allExt(source, sourceId) {
973
+ const rows = this.db.query("SELECT kind, data FROM ext_profiles WHERE source=? AND source_id=?").all(source, sourceId);
974
+ return Object.fromEntries(rows.map((r) => [r.kind, JSON.parse(r.data || "{}")]));
975
+ }
976
+ expertsNeedingExt(platform, kind, opts = {}) {
977
+ const where = [`json_extract(socials, '$.${platform}') IS NOT NULL`];
978
+ const bind = [];
979
+ if (opts.source) {
980
+ where.push("source = ?");
981
+ bind.push(opts.source);
982
+ }
983
+ if (!opts.refresh) {
984
+ where.push("NOT EXISTS (SELECT 1 FROM ext_profiles x WHERE x.source=experts.source AND x.source_id=experts.source_id AND x.kind=?)");
985
+ bind.push(kind);
986
+ }
987
+ let sql = "SELECT * FROM experts WHERE " + where.join(" AND ") + " ORDER BY rating_count DESC";
988
+ if (opts.limit) {
989
+ sql += " LIMIT ?";
990
+ bind.push(opts.limit);
991
+ }
992
+ return this.db.query(sql).all(...bind).map((r) => this.rowToExpert(r));
993
+ }
994
+ replaceVideos(source, sourceId, videos) {
995
+ const tx = this.db.transaction((rows) => {
996
+ this.db.query("DELETE FROM videos WHERE source = ? AND source_id = ?").run(source, sourceId);
997
+ const stmt = this.db.query("INSERT OR REPLACE INTO videos (source, source_id, video_id, title, description, published_at, url, thumbnail, view_count) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)");
998
+ for (const v of rows) {
999
+ stmt.run(v.source, v.sourceId, v.videoId, v.title, v.description, v.publishedAt, v.url, v.thumbnail, v.viewCount);
1000
+ }
1001
+ });
1002
+ tx(videos);
1003
+ }
1004
+ recentVideos(source, sourceId, limit = 10) {
1005
+ const rows = this.db.query("SELECT * FROM videos WHERE source = ? AND source_id = ? ORDER BY published_at DESC LIMIT ?").all(source, sourceId, limit);
1006
+ return rows.map((r) => ({
1007
+ source: r.source,
1008
+ sourceId: r.source_id,
1009
+ videoId: r.video_id,
1010
+ title: r.title || "",
1011
+ description: r.description || "",
1012
+ publishedAt: r.published_at || "",
1013
+ url: r.url || "",
1014
+ thumbnail: r.thumbnail || "",
1015
+ viewCount: r.view_count ?? 0
1016
+ }));
1017
+ }
1018
+ expertsNeedingVideos(opts = {}) {
1019
+ const where = ["json_extract(socials, '$.youtube') IS NOT NULL"];
1020
+ const params = [];
1021
+ if (opts.source) {
1022
+ where.push("source = ?");
1023
+ params.push(opts.source);
1024
+ }
1025
+ if (!opts.refresh) {
1026
+ where.push("NOT EXISTS (SELECT 1 FROM videos v WHERE v.source=experts.source AND v.source_id=experts.source_id)");
1027
+ }
1028
+ let sql = "SELECT * FROM experts WHERE " + where.join(" AND ") + " ORDER BY rating_count DESC";
1029
+ if (opts.limit) {
1030
+ sql += " LIMIT ?";
1031
+ params.push(opts.limit);
1032
+ }
1033
+ return this.db.query(sql).all(...params).map((r) => this.rowToExpert(r));
1034
+ }
1035
+ upsertContact(c) {
1036
+ this.db.query(`
1037
+ INSERT INTO contacts (source, source_id, type, value, label, provider, confidence, status, verified_at, created_at)
1038
+ VALUES ($source, $source_id, $type, $value, $label, $provider, $confidence, $status, $verified_at, $created_at)
1039
+ ON CONFLICT(source, source_id, type, value) DO UPDATE SET
1040
+ label=excluded.label, provider=excluded.provider, confidence=excluded.confidence,
1041
+ status=CASE WHEN excluded.status != 'unverified' THEN excluded.status ELSE contacts.status END,
1042
+ verified_at=COALESCE(excluded.verified_at, contacts.verified_at)
1043
+ `).run({
1044
+ $source: c.source,
1045
+ $source_id: c.sourceId,
1046
+ $type: c.type,
1047
+ $value: c.value,
1048
+ $label: c.label,
1049
+ $provider: c.provider,
1050
+ $confidence: c.confidence,
1051
+ $status: c.status,
1052
+ $verified_at: c.verifiedAt || null,
1053
+ $created_at: c.createdAt || new Date().toISOString()
1054
+ });
1055
+ }
1056
+ setContactStatus(source, sourceId, type, value, status) {
1057
+ this.db.query("UPDATE contacts SET status = ?, verified_at = ? WHERE source = ? AND source_id = ? AND type = ? AND value = ?").run(status, new Date().toISOString(), source, sourceId, type, value);
1058
+ }
1059
+ contacts(source, sourceId) {
1060
+ const rows = this.db.query("SELECT * FROM contacts WHERE source = ? AND source_id = ? ORDER BY type, confidence DESC").all(source, sourceId);
1061
+ return rows.map((r) => ({
1062
+ source: r.source,
1063
+ sourceId: r.source_id,
1064
+ type: r.type,
1065
+ value: r.value,
1066
+ label: r.label || "",
1067
+ provider: r.provider || "",
1068
+ confidence: r.confidence ?? 0,
1069
+ status: r.status || "unverified",
1070
+ verifiedAt: r.verified_at || "",
1071
+ createdAt: r.created_at || ""
1072
+ }));
1073
+ }
1074
+ contactsToVerify(opts = {}) {
1075
+ const where = ["status = 'unverified'"];
1076
+ const params = [];
1077
+ if (opts.source) {
1078
+ where.push("source = ?");
1079
+ params.push(opts.source);
1080
+ }
1081
+ let sql = "SELECT * FROM contacts WHERE " + where.join(" AND ") + " ORDER BY confidence DESC";
1082
+ if (opts.limit) {
1083
+ sql += " LIMIT ?";
1084
+ params.push(opts.limit);
1085
+ }
1086
+ return this.db.query(sql).all(...params).map((r) => ({
1087
+ source: r.source,
1088
+ sourceId: r.source_id,
1089
+ type: r.type,
1090
+ value: r.value,
1091
+ label: r.label || "",
1092
+ provider: r.provider || "",
1093
+ confidence: r.confidence ?? 0,
1094
+ status: r.status || "unverified",
1095
+ verifiedAt: r.verified_at || "",
1096
+ createdAt: r.created_at || ""
1097
+ }));
1098
+ }
1099
+ expertsNeedingContacts(opts = {}) {
1100
+ const where = [];
1101
+ const params = [];
1102
+ if (opts.source) {
1103
+ where.push("source = ?");
1104
+ params.push(opts.source);
1105
+ }
1106
+ if (!opts.refresh) {
1107
+ where.push("NOT EXISTS (SELECT 1 FROM contacts c WHERE c.source = experts.source AND c.source_id = experts.source_id)");
1108
+ }
1109
+ let sql = "SELECT * FROM experts";
1110
+ if (where.length)
1111
+ sql += " WHERE " + where.join(" AND ");
1112
+ sql += " ORDER BY rating_count DESC";
1113
+ if (opts.limit) {
1114
+ sql += " LIMIT ?";
1115
+ params.push(opts.limit);
1116
+ }
1117
+ return this.db.query(sql).all(...params).map((r) => this.rowToExpert(r));
1118
+ }
1119
+ contactStats(source) {
1120
+ const filt = source ? " WHERE source = ?" : "";
1121
+ const args = source ? [source] : [];
1122
+ const total = this.db.query(`SELECT COUNT(*) n FROM contacts${filt}`).get(...args).n;
1123
+ const valid = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE status='valid'${source ? " AND source = ?" : ""}`).get(...args).n;
1124
+ const invalid = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE status='invalid'${source ? " AND source = ?" : ""}`).get(...args).n;
1125
+ const emails = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE type='email'${source ? " AND source = ?" : ""}`).get(...args).n;
1126
+ const phones = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE type='phone'${source ? " AND source = ?" : ""}`).get(...args).n;
1127
+ const expertsWith = this.db.query(`SELECT COUNT(DISTINCT source||source_id) n FROM contacts${filt}`).get(...args).n;
1128
+ return { total, valid, invalid, emails, phones, expertsWith };
1129
+ }
573
1130
  close() {
574
1131
  this.db.close();
575
1132
  }
@@ -788,6 +1345,329 @@ class IntroSource {
788
1345
  }
789
1346
  }
790
1347
 
1348
+ // src/sources/common.ts
1349
+ function makeExpert(p) {
1350
+ const first = p.firstName ?? "";
1351
+ const last = p.lastName ?? "";
1352
+ return {
1353
+ source: p.source,
1354
+ sourceId: p.sourceId,
1355
+ slug: p.slug ?? "",
1356
+ url: p.url ?? "",
1357
+ fullName: p.fullName ?? [first, last].filter(Boolean).join(" "),
1358
+ firstName: first,
1359
+ lastName: last,
1360
+ title: p.title ?? "",
1361
+ headline: p.headline ?? "",
1362
+ bio: p.bio ?? "",
1363
+ avatar: p.avatar ?? "",
1364
+ price: p.price ?? 0,
1365
+ priceCurrency: p.priceCurrency ?? "USD",
1366
+ priceUnit: p.priceUnit ?? "",
1367
+ rating: p.rating ?? 0,
1368
+ ratingCount: p.ratingCount ?? 0,
1369
+ verified: p.verified ?? false,
1370
+ featured: p.featured ?? false,
1371
+ topics: p.topics ? [...new Set(p.topics)].sort() : [],
1372
+ tags: p.tags ?? [],
1373
+ socials: p.socials ?? {},
1374
+ extra: p.extra ?? {},
1375
+ crawledAt: p.crawledAt ?? new Date().toISOString()
1376
+ };
1377
+ }
1378
+ function slugify(s) {
1379
+ return (s || "").toLowerCase().normalize("NFKD").replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
1380
+ }
1381
+ async function fetchJson(url, fetchFn, init = {}) {
1382
+ try {
1383
+ const res = await fetchFn(url, {
1384
+ ...init,
1385
+ headers: {
1386
+ "User-Agent": "open-experts (+https://github.com/hasna/experts)",
1387
+ Accept: "application/json",
1388
+ ...init.headers || {}
1389
+ }
1390
+ });
1391
+ if (!res.ok)
1392
+ return null;
1393
+ return await res.json();
1394
+ } catch {
1395
+ return null;
1396
+ }
1397
+ }
1398
+
1399
+ // src/sources/mentorcruise.ts
1400
+ function normalizeMentor(m, crawledAt) {
1401
+ const slug = m.slug || slugify(m.name || String(m.id ?? ""));
1402
+ const socials = {};
1403
+ if (m.twitter)
1404
+ socials.twitter = m.twitter.startsWith("http") ? m.twitter : `https://x.com/${m.twitter}`;
1405
+ if (m.linkedin)
1406
+ socials.linkedin = m.linkedin;
1407
+ return makeExpert({
1408
+ source: "mentorcruise",
1409
+ sourceId: String(m.id ?? slug),
1410
+ slug,
1411
+ url: `https://mentorcruise.com/mentor/${slug}/`,
1412
+ fullName: m.name ?? [m.first_name, m.last_name].filter(Boolean).join(" "),
1413
+ firstName: m.first_name ?? "",
1414
+ lastName: m.last_name ?? "",
1415
+ title: m.job_title ?? "",
1416
+ bio: m.bio ?? "",
1417
+ avatar: m.avatar ?? m.photo ?? "",
1418
+ price: m.price ?? 0,
1419
+ priceCurrency: m.currency ?? "USD",
1420
+ priceUnit: m.price ? "per month" : "",
1421
+ rating: m.rating ?? 0,
1422
+ ratingCount: m.reviews_count ?? 0,
1423
+ verified: Boolean(m.verified),
1424
+ featured: Boolean(m.is_top_mentor),
1425
+ topics: m.categories ?? [],
1426
+ tags: m.skills ?? [],
1427
+ socials,
1428
+ crawledAt
1429
+ });
1430
+ }
1431
+
1432
+ class MentorCruiseSource {
1433
+ name = "mentorcruise";
1434
+ description = "MentorCruise \u2014 long-term mentorship from vetted mentors";
1435
+ website = "https://mentorcruise.com";
1436
+ fetchFn;
1437
+ apiBase;
1438
+ pageSize;
1439
+ constructor(opts = {}) {
1440
+ this.fetchFn = opts.fetchFn ?? fetch;
1441
+ this.apiBase = opts.apiBase ?? process.env.MENTORCRUISE_API_BASE ?? "https://mentorcruise.com/api";
1442
+ this.pageSize = opts.pageSize ?? 50;
1443
+ }
1444
+ async crawl(opts = {}) {
1445
+ const log = opts.onLog ?? (() => {});
1446
+ const crawledAt = new Date().toISOString();
1447
+ const experts = [];
1448
+ const tags = new Set;
1449
+ let offset = 0;
1450
+ for (;; ) {
1451
+ const data = await fetchJson(`${this.apiBase}/mentors/?limit=${this.pageSize}&offset=${offset}`, this.fetchFn);
1452
+ const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
1453
+ if (!items.length)
1454
+ break;
1455
+ for (const m of items) {
1456
+ const e = normalizeMentor(m, crawledAt);
1457
+ experts.push(e);
1458
+ for (const t of e.tags)
1459
+ tags.add(t);
1460
+ }
1461
+ offset += items.length;
1462
+ log(` mentorcruise: ${experts.length}`);
1463
+ if (opts.max && experts.length >= opts.max)
1464
+ break;
1465
+ if (items.length < this.pageSize)
1466
+ break;
1467
+ }
1468
+ if (experts.length === 0) {
1469
+ log("mentorcruise: no public listing reachable (set MENTORCRUISE_API_BASE or provide a fetchFn).");
1470
+ }
1471
+ const topics = [];
1472
+ return {
1473
+ experts: opts.max ? experts.slice(0, opts.max) : experts,
1474
+ topics,
1475
+ tags: [...tags].map((name) => ({ name, topic: "" })),
1476
+ total: experts.length
1477
+ };
1478
+ }
1479
+ }
1480
+
1481
+ // src/sources/adplist.ts
1482
+ function normalizeAdpMentor(m, crawledAt) {
1483
+ const slug = m.username || slugify(m.name || m.full_name || String(m.id ?? ""));
1484
+ const socials = {};
1485
+ if (m.twitter)
1486
+ socials.twitter = m.twitter.startsWith("http") ? m.twitter : `https://x.com/${m.twitter}`;
1487
+ if (m.linkedin)
1488
+ socials.linkedin = m.linkedin;
1489
+ return makeExpert({
1490
+ source: "adplist",
1491
+ sourceId: String(m.id ?? slug),
1492
+ slug,
1493
+ url: `https://adplist.org/mentors/${slug}`,
1494
+ fullName: m.name ?? m.full_name ?? "",
1495
+ title: m.headline ?? m.tagline ?? "",
1496
+ headline: m.tagline ?? "",
1497
+ bio: m.bio ?? m.about ?? "",
1498
+ avatar: m.profile_photo ?? m.avatar ?? "",
1499
+ price: 0,
1500
+ priceUnit: "free session",
1501
+ rating: m.rating ?? 0,
1502
+ ratingCount: m.total_reviews ?? 0,
1503
+ verified: Boolean(m.verified),
1504
+ featured: Boolean(m.is_featured),
1505
+ tags: m.expertise ?? m.skills ?? [],
1506
+ socials,
1507
+ crawledAt
1508
+ });
1509
+ }
1510
+
1511
+ class ADPListSource {
1512
+ name = "adplist";
1513
+ description = "ADPList \u2014 free mentorship across design, product & engineering";
1514
+ website = "https://adplist.org";
1515
+ fetchFn;
1516
+ apiBase;
1517
+ pageSize;
1518
+ constructor(opts = {}) {
1519
+ this.fetchFn = opts.fetchFn ?? fetch;
1520
+ this.apiBase = opts.apiBase ?? process.env.ADPLIST_API_BASE ?? "https://api.adplist.org/api";
1521
+ this.pageSize = opts.pageSize ?? 50;
1522
+ }
1523
+ async crawl(opts = {}) {
1524
+ const log = opts.onLog ?? (() => {});
1525
+ const crawledAt = new Date().toISOString();
1526
+ const experts = [];
1527
+ const tags = new Set;
1528
+ let page = 1;
1529
+ for (;; ) {
1530
+ const data = await fetchJson(`${this.apiBase}/mentors/?page=${page}&page_size=${this.pageSize}`, this.fetchFn);
1531
+ const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
1532
+ if (!items.length)
1533
+ break;
1534
+ for (const m of items) {
1535
+ const e = normalizeAdpMentor(m, crawledAt);
1536
+ experts.push(e);
1537
+ for (const t of e.tags)
1538
+ tags.add(t);
1539
+ }
1540
+ log(` adplist: ${experts.length}`);
1541
+ page++;
1542
+ if (opts.max && experts.length >= opts.max)
1543
+ break;
1544
+ if (items.length < this.pageSize)
1545
+ break;
1546
+ }
1547
+ if (experts.length === 0)
1548
+ log("adplist: no public listing reachable (set ADPLIST_API_BASE or provide a fetchFn).");
1549
+ return {
1550
+ experts: opts.max ? experts.slice(0, opts.max) : experts,
1551
+ topics: [],
1552
+ tags: [...tags].map((name) => ({ name, topic: "" })),
1553
+ total: experts.length
1554
+ };
1555
+ }
1556
+ }
1557
+
1558
+ // src/sources/clarity.ts
1559
+ function normalizeClarityExpert(c, crawledAt) {
1560
+ const slug = c.username || slugify(c.name || String(c.id ?? ""));
1561
+ return makeExpert({
1562
+ source: "clarity",
1563
+ sourceId: String(c.id ?? slug),
1564
+ slug,
1565
+ url: `https://clarity.fm/${slug}`,
1566
+ fullName: c.name ?? "",
1567
+ title: c.title ?? "",
1568
+ bio: c.bio ?? "",
1569
+ avatar: c.image ?? "",
1570
+ price: c.rate_per_minute ?? 0,
1571
+ priceCurrency: "USD",
1572
+ priceUnit: c.rate_per_minute ? "per minute" : "",
1573
+ rating: c.rating ?? 0,
1574
+ ratingCount: c.reviews ?? 0,
1575
+ topics: c.categories ?? [],
1576
+ tags: c.expertise ?? [],
1577
+ crawledAt
1578
+ });
1579
+ }
1580
+
1581
+ class ClaritySource {
1582
+ name = "clarity";
1583
+ description = "Clarity.fm \u2014 on-demand expert calls billed per minute";
1584
+ website = "https://clarity.fm";
1585
+ fetchFn;
1586
+ apiBase;
1587
+ constructor(opts = {}) {
1588
+ this.fetchFn = opts.fetchFn ?? fetch;
1589
+ this.apiBase = opts.apiBase ?? process.env.CLARITY_API_BASE;
1590
+ }
1591
+ async crawl(opts = {}) {
1592
+ const log = opts.onLog ?? (() => {});
1593
+ if (!this.apiBase) {
1594
+ log("clarity: no public listing API; set CLARITY_API_BASE or inject a fetchFn to crawl.");
1595
+ return { experts: [], topics: [], tags: [], total: 0 };
1596
+ }
1597
+ const crawledAt = new Date().toISOString();
1598
+ const data = await fetchJson(`${this.apiBase}/experts`, this.fetchFn);
1599
+ const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
1600
+ const experts = items.map((c) => normalizeClarityExpert(c, crawledAt));
1601
+ const tags = new Set;
1602
+ for (const e of experts)
1603
+ for (const t of e.tags)
1604
+ tags.add(t);
1605
+ return {
1606
+ experts: opts.max ? experts.slice(0, opts.max) : experts,
1607
+ topics: [],
1608
+ tags: [...tags].map((name) => ({ name, topic: "" })),
1609
+ total: experts.length
1610
+ };
1611
+ }
1612
+ }
1613
+
1614
+ // src/sources/glg.ts
1615
+ function normalizeGlgExpert(g, crawledAt) {
1616
+ const slug = slugify(g.name || String(g.id ?? ""));
1617
+ return makeExpert({
1618
+ source: "glg",
1619
+ sourceId: String(g.id ?? slug),
1620
+ slug,
1621
+ url: "https://glginsights.com",
1622
+ fullName: g.name ?? "",
1623
+ title: g.title ?? "",
1624
+ bio: g.biography ?? "",
1625
+ price: g.hourly_rate ?? 0,
1626
+ priceCurrency: g.currency ?? "USD",
1627
+ priceUnit: g.hourly_rate ? "per hour" : "",
1628
+ topics: g.industries ?? [],
1629
+ tags: g.expertise_areas ?? [],
1630
+ crawledAt
1631
+ });
1632
+ }
1633
+
1634
+ class GLGSource {
1635
+ name = "glg";
1636
+ description = "GLG \u2014 enterprise expert network (requires partner API access)";
1637
+ website = "https://glginsights.com";
1638
+ fetchFn;
1639
+ apiBase;
1640
+ apiKey;
1641
+ constructor(opts = {}) {
1642
+ this.fetchFn = opts.fetchFn ?? fetch;
1643
+ this.apiBase = opts.apiBase ?? process.env.GLG_API_BASE;
1644
+ this.apiKey = opts.apiKey ?? process.env.GLG_API_KEY;
1645
+ }
1646
+ async crawl(opts = {}) {
1647
+ const log = opts.onLog ?? (() => {});
1648
+ if (!this.apiBase) {
1649
+ log("glg: enterprise-gated; no public directory. Set GLG_API_BASE + GLG_API_KEY (partner access) to crawl.");
1650
+ return { experts: [], topics: [], tags: [], total: 0 };
1651
+ }
1652
+ const crawledAt = new Date().toISOString();
1653
+ const data = await fetchJson(`${this.apiBase}/experts`, this.fetchFn, {
1654
+ headers: this.apiKey ? { Authorization: `Bearer ${this.apiKey}` } : {}
1655
+ });
1656
+ const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
1657
+ const experts = items.map((g) => normalizeGlgExpert(g, crawledAt));
1658
+ const tags = new Set;
1659
+ for (const e of experts)
1660
+ for (const t of e.tags)
1661
+ tags.add(t);
1662
+ return {
1663
+ experts: opts.max ? experts.slice(0, opts.max) : experts,
1664
+ topics: [],
1665
+ tags: [...tags].map((name) => ({ name, topic: "" })),
1666
+ total: experts.length
1667
+ };
1668
+ }
1669
+ }
1670
+
791
1671
  // src/sources/index.ts
792
1672
  var registry = new Map;
793
1673
  function registerSource(source) {
@@ -800,6 +1680,10 @@ function listSources() {
800
1680
  return [...registry.values()];
801
1681
  }
802
1682
  registerSource(new IntroSource);
1683
+ registerSource(new MentorCruiseSource);
1684
+ registerSource(new ADPListSource);
1685
+ registerSource(new ClaritySource);
1686
+ registerSource(new GLGSource);
803
1687
 
804
1688
  // src/crawl.ts
805
1689
  async function crawlSource(db, sourceName, opts = {}) {
@@ -808,6 +1692,7 @@ async function crawlSource(db, sourceName, opts = {}) {
808
1692
  throw new Error(`Unknown source "${sourceName}". Run \`experts sources\` to list options.`);
809
1693
  }
810
1694
  const data = await source.crawl(opts);
1695
+ const changes = db.recordChanges(source.name, data.experts);
811
1696
  db.upsertExperts(data.experts);
812
1697
  if (data.topics.length)
813
1698
  db.setTopics(source.name, data.topics);
@@ -816,13 +1701,16 @@ async function crawlSource(db, sourceName, opts = {}) {
816
1701
  db.setMeta(`catalog_total:${source.name}`, String(data.total));
817
1702
  opts.onLog?.("building knowledge graph\u2026");
818
1703
  const graph = db.rebuildGraph();
1704
+ db.rescore(source.name);
1705
+ db.rebuildPersons();
819
1706
  return {
820
1707
  source: source.name,
821
1708
  experts: data.experts.length,
822
1709
  topics: data.topics.map((t) => ({ name: t.name, count: t.expertCount })),
823
1710
  tags: new Set(data.tags.map((t) => t.name)).size,
824
1711
  total: data.total,
825
- graph
1712
+ graph,
1713
+ changes
826
1714
  };
827
1715
  }
828
1716
 
@@ -880,6 +1768,11 @@ function handle(db, req) {
880
1768
  return json(db.enrichmentStats(q.get("source") || undefined));
881
1769
  if (path === "/graph")
882
1770
  return json(db.graphStats());
1771
+ if (path === "/persons")
1772
+ return json(db.personStats());
1773
+ if (path === "/changes") {
1774
+ return json(db.changes({ source: q.get("source") || undefined, limit: num(q.get("limit")) }));
1775
+ }
883
1776
  if (path === "/find") {
884
1777
  const needs = (q.get("needs") || "").split(",").map((s) => s.trim()).filter(Boolean);
885
1778
  return json(db.findByNeeds(needs, {
@@ -922,6 +1815,8 @@ function handle(db, req) {
922
1815
  };
923
1816
  return json(db.list(filters));
924
1817
  }
1818
+ if (path === "/contacts")
1819
+ return json(db.contactStats(q.get("source") || undefined));
925
1820
  const tw = path.match(/^\/experts\/([^/]+)\/(.+)\/tweets$/);
926
1821
  if (tw) {
927
1822
  const e = db.get(decodeURIComponent(tw[2]), decodeURIComponent(tw[1]));
@@ -929,6 +1824,20 @@ function handle(db, req) {
929
1824
  return json({ error: "not found" }, 404);
930
1825
  return json(db.recentTweets(e.source, e.sourceId, num(q.get("limit")) ?? 25));
931
1826
  }
1827
+ const ct = path.match(/^\/experts\/([^/]+)\/(.+)\/contacts$/);
1828
+ if (ct) {
1829
+ const e = db.get(decodeURIComponent(ct[2]), decodeURIComponent(ct[1]));
1830
+ if (!e)
1831
+ return json({ error: "not found" }, 404);
1832
+ return json(db.contacts(e.source, e.sourceId));
1833
+ }
1834
+ const vd = path.match(/^\/experts\/([^/]+)\/(.+)\/videos$/);
1835
+ if (vd) {
1836
+ const e = db.get(decodeURIComponent(vd[2]), decodeURIComponent(vd[1]));
1837
+ if (!e)
1838
+ return json({ error: "not found" }, 404);
1839
+ return json(db.recentVideos(e.source, e.sourceId, num(q.get("limit")) ?? 25));
1840
+ }
932
1841
  const m = path.match(/^\/experts\/([^/]+)\/(.+)$/);
933
1842
  if (m) {
934
1843
  const e = db.get(decodeURIComponent(m[2]), decodeURIComponent(m[1]));
@@ -937,13 +1846,50 @@ function handle(db, req) {
937
1846
  return json({
938
1847
  ...e,
939
1848
  xProfile: db.getXProfile(e.source, e.sourceId),
940
- tweets: db.recentTweets(e.source, e.sourceId, 10)
1849
+ tweets: db.recentTweets(e.source, e.sourceId, 10),
1850
+ contacts: db.contacts(e.source, e.sourceId),
1851
+ videos: db.recentVideos(e.source, e.sourceId, 10)
941
1852
  });
942
1853
  }
943
1854
  return json({ error: "not found", path }, 404);
944
1855
  }
945
1856
  async function handleAsync(db, req) {
946
1857
  const url = new URL(req.url);
1858
+ if (url.pathname.replace(/\/+$/, "") === "/ask") {
1859
+ const q = url.searchParams.get("q") || "";
1860
+ if (!q)
1861
+ return json({ error: "missing q" }, 400);
1862
+ if (db.vectorCount() === 0)
1863
+ return json({ error: "no semantic index; run `experts embed`" }, 409);
1864
+ const [qv] = await getEmbedder().embed([q]);
1865
+ return json(db.semanticSearch(qv, {
1866
+ source: url.searchParams.get("source") || undefined,
1867
+ limit: num(url.searchParams.get("limit"))
1868
+ }));
1869
+ }
1870
+ if (url.pathname.replace(/\/+$/, "") === "/brief") {
1871
+ const q = url.searchParams.get("q") || "";
1872
+ if (!q)
1873
+ return json({ error: "missing q" }, 400);
1874
+ if (db.vectorCount() === 0)
1875
+ return json({ error: "no semantic index; run `experts embed`" }, 409);
1876
+ const limit = num(url.searchParams.get("limit")) ?? 10;
1877
+ const [qv] = await getEmbedder().embed([q]);
1878
+ const raw = db.semanticSearch(qv, { source: url.searchParams.get("source") || undefined, limit: (limit + 5) * 4 });
1879
+ const seen = new Set;
1880
+ const briefLc = q.toLowerCase();
1881
+ const out = [];
1882
+ for (const r of raw) {
1883
+ const pid = db.personIdOf(r.expert.source, r.expert.sourceId);
1884
+ if (seen.has(pid))
1885
+ continue;
1886
+ seen.add(pid);
1887
+ out.push({ ...r, why: r.expert.tags.filter((t) => briefLc.includes(t.toLowerCase())).slice(0, 4) });
1888
+ if (out.length >= limit)
1889
+ break;
1890
+ }
1891
+ return json(out);
1892
+ }
947
1893
  if (req.method === "POST") {
948
1894
  const m = url.pathname.match(/^\/crawl\/([^/]+)\/?$/);
949
1895
  if (m) {