@hasna/experts 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -13,6 +13,7 @@ var __export = (target, all) => {
13
13
  set: __exportSetter.bind(all, name)
14
14
  });
15
15
  };
16
+ var __require = import.meta.require;
16
17
  // src/db.ts
17
18
  import { Database } from "bun:sqlite";
18
19
  import { homedir } from "os";
@@ -61,6 +62,36 @@ function authorityScore(e, inputs = {}, weights = DEFAULT_WEIGHTS) {
61
62
  const raw = weights.rating * rating + weights.reviews * reviews + weights.followers * followers + weights.featured * featured + weights.verified * verified + weights.recency * recency;
62
63
  return Math.round(raw * 1000) / 10;
63
64
  }
65
+ function pricePerHour(price, priceUnit) {
66
+ if (!price || price <= 0)
67
+ return /free/i.test(priceUnit) ? 0 : null;
68
+ const u = (priceUnit || "").toLowerCase();
69
+ const minMatch = u.match(/(\d+)\s*min/);
70
+ if (minMatch)
71
+ return Math.round(price * 60 / Number(minMatch[1]));
72
+ if (/per\s*min|\/\s*min|minute/.test(u))
73
+ return price * 60;
74
+ if (/hour|\/\s*hr|per\s*hr/.test(u))
75
+ return price;
76
+ if (/free/.test(u))
77
+ return 0;
78
+ return null;
79
+ }
80
+ var DEFAULT_BLEND = { semantic: 0.8, authority: 0.2 };
81
+ function blendScore(semantic, authority, w = DEFAULT_BLEND) {
82
+ const a = Math.max(0, Math.min(1, (authority || 0) / 100));
83
+ const s = Math.max(0, Math.min(1, semantic));
84
+ return w.semantic * s + w.authority * a;
85
+ }
86
+ function explainMatch(query, e) {
87
+ const q = ` ${(query || "").toLowerCase()} `;
88
+ const hit = (label) => {
89
+ const l = label.toLowerCase();
90
+ return q.includes(` ${l} `) || q.includes(`${l},`) || q.includes(`${l}.`) || new RegExp(`\\b${l.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`).test(q);
91
+ };
92
+ const matched = [...e.topics, ...e.tags].filter(hit);
93
+ return [...new Set(matched)].slice(0, 6);
94
+ }
64
95
 
65
96
  // src/embed.ts
66
97
  var STOPWORDS = new Set([
@@ -151,11 +182,40 @@ class OpenAIEmbedder {
151
182
  return data.data.map((d) => d.embedding);
152
183
  }
153
184
  }
154
- function getEmbedder() {
155
- if (process.env.EXPERTS_EMBEDDER === "openai" && process.env.OPENAI_API_KEY) {
185
+
186
+ class TransformersEmbedder {
187
+ id = "minilm-l6-v2";
188
+ dim = 384;
189
+ model = process.env.EXPERTS_EMBED_MODEL || "Xenova/all-MiniLM-L6-v2";
190
+ extractor = null;
191
+ async ensure() {
192
+ if (this.extractor)
193
+ return;
194
+ const { pipeline } = await import("@huggingface/transformers");
195
+ this.extractor = await pipeline("feature-extraction", this.model);
196
+ }
197
+ async embed(texts) {
198
+ await this.ensure();
199
+ const out = [];
200
+ for (const t of texts) {
201
+ const r = await this.extractor(t || " ", { pooling: "mean", normalize: true });
202
+ out.push(Array.from(r.data));
203
+ }
204
+ return out;
205
+ }
206
+ }
207
+ async function getEmbedder() {
208
+ const choice = process.env.EXPERTS_EMBEDDER;
209
+ if (choice === "openai" && process.env.OPENAI_API_KEY)
156
210
  return new OpenAIEmbedder;
211
+ if (choice === "hash")
212
+ return new HashingEmbedder;
213
+ try {
214
+ await import("@huggingface/transformers");
215
+ return new TransformersEmbedder;
216
+ } catch {
217
+ return new HashingEmbedder;
157
218
  }
158
- return new HashingEmbedder;
159
219
  }
160
220
  function cosine(a, b) {
161
221
  let dot = 0;
@@ -251,6 +311,51 @@ function clusterPersons(experts) {
251
311
  return out;
252
312
  }
253
313
 
314
+ // src/crypto.ts
315
+ import { createCipheriv, createDecipheriv, createHmac, scryptSync } from "crypto";
316
+ var PREFIX = "enc1:";
317
+ var cachedKey = null;
318
+ var cachedFrom = null;
319
+ function key2() {
320
+ const secret = process.env.OPEN_EXPERTS_KEY;
321
+ if (!secret)
322
+ return null;
323
+ if (cachedKey && cachedFrom === secret)
324
+ return cachedKey;
325
+ cachedKey = scryptSync(secret, "open-experts/contacts/v1", 32);
326
+ cachedFrom = secret;
327
+ return cachedKey;
328
+ }
329
+ function encryptionEnabled() {
330
+ return !!process.env.OPEN_EXPERTS_KEY;
331
+ }
332
+ function maybeEncrypt(plaintext) {
333
+ const k = key2();
334
+ if (!k || plaintext == null)
335
+ return plaintext;
336
+ if (plaintext.startsWith(PREFIX))
337
+ return plaintext;
338
+ const iv = createHmac("sha256", k).update(plaintext).digest().subarray(0, 12);
339
+ const cipher = createCipheriv("aes-256-gcm", k, iv);
340
+ const enc = Buffer.concat([cipher.update(plaintext, "utf8"), cipher.final()]);
341
+ const tag = cipher.getAuthTag();
342
+ return PREFIX + Buffer.concat([iv, tag, enc]).toString("base64");
343
+ }
344
+ function maybeDecrypt(stored) {
345
+ if (stored == null || !stored.startsWith(PREFIX))
346
+ return stored;
347
+ const k = key2();
348
+ if (!k)
349
+ return stored;
350
+ const raw = Buffer.from(stored.slice(PREFIX.length), "base64");
351
+ const iv = raw.subarray(0, 12);
352
+ const tag = raw.subarray(12, 28);
353
+ const enc = raw.subarray(28);
354
+ const decipher = createDecipheriv("aes-256-gcm", k, iv);
355
+ decipher.setAuthTag(tag);
356
+ return Buffer.concat([decipher.update(enc), decipher.final()]).toString("utf8");
357
+ }
358
+
254
359
  // src/db.ts
255
360
  function defaultDbPath() {
256
361
  return process.env.OPEN_EXPERTS_DB || join(homedir(), ".hasna", "experts", "experts.db");
@@ -362,10 +467,11 @@ class ExpertsDB {
362
467
  CREATE INDEX IF NOT EXISTS idx_contacts_expert ON contacts(source, source_id);
363
468
  CREATE INDEX IF NOT EXISTS idx_contacts_status ON contacts(status);
364
469
 
365
- -- Semantic search: one embedding vector per expert.
470
+ -- Semantic search: one embedding vector per expert (text_hash enables
471
+ -- incremental re-embedding \u2014 skip unchanged experts).
366
472
  CREATE TABLE IF NOT EXISTS vectors (
367
473
  source TEXT NOT NULL, source_id TEXT NOT NULL,
368
- embedder TEXT NOT NULL, dim INTEGER, vec BLOB,
474
+ embedder TEXT NOT NULL, dim INTEGER, vec BLOB, text_hash TEXT,
369
475
  PRIMARY KEY (source, source_id)
370
476
  );
371
477
 
@@ -387,6 +493,7 @@ class ExpertsDB {
387
493
  `);
388
494
  this.addColumnIfMissing("experts", "avatar_local", "TEXT");
389
495
  this.addColumnIfMissing("experts", "authority", "REAL DEFAULT 0");
496
+ this.addColumnIfMissing("vectors", "text_hash", "TEXT");
390
497
  }
391
498
  addColumnIfMissing(table, column, type) {
392
499
  const cols = this.db.query(`PRAGMA table_info(${table})`).all();
@@ -473,6 +580,7 @@ class ExpertsDB {
473
580
  extra: JSON.parse(r.extra || "{}"),
474
581
  avatarLocal: r.avatar_local || undefined,
475
582
  authority: r.authority ?? 0,
583
+ pricePerHour: pricePerHour(r.price ?? 0, r.price_unit ?? ""),
476
584
  crawledAt: r.crawled_at
477
585
  };
478
586
  }
@@ -596,11 +704,11 @@ class ExpertsDB {
596
704
  sql += " ORDER BY name";
597
705
  return this.db.query(sql).all(...params);
598
706
  }
599
- setMeta(key2, value) {
600
- this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(key2, value);
707
+ setMeta(key3, value) {
708
+ this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(key3, value);
601
709
  }
602
- getMeta(key2) {
603
- const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(key2);
710
+ getMeta(key3) {
711
+ const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(key3);
604
712
  return row ? row.value : null;
605
713
  }
606
714
  stats(source) {
@@ -624,12 +732,12 @@ class ExpertsDB {
624
732
  const nodeIds = new Map;
625
733
  const insertNode = this.db.query("INSERT INTO kg_nodes (type, key, label) VALUES (?, ?, ?) ON CONFLICT(type, key) DO UPDATE SET label=excluded.label RETURNING id");
626
734
  const insertEdge = this.db.query("INSERT OR REPLACE INTO kg_edges (src, dst, rel, weight) VALUES (?, ?, ?, ?)");
627
- const node = (type, key2, label) => {
628
- const ck = `${type}\x00${key2.toLowerCase()}`;
735
+ const node = (type, key3, label) => {
736
+ const ck = `${type}\x00${key3.toLowerCase()}`;
629
737
  const cached = nodeIds.get(ck);
630
738
  if (cached != null)
631
739
  return cached;
632
- const id = insertNode.get(type, key2.toLowerCase(), label).id;
740
+ const id = insertNode.get(type, key3.toLowerCase(), label).id;
633
741
  nodeIds.set(ck, id);
634
742
  return id;
635
743
  };
@@ -639,7 +747,7 @@ class ExpertsDB {
639
747
  for (const topic of e.topics) {
640
748
  insertEdge.run(eId, node("topic", topic, topic), "IN_TOPIC", 1);
641
749
  }
642
- const tweetText = this.recentTweets(e.source, e.sourceId, 30).map((t) => t.text).join(". ");
750
+ const tweetText = this.recentTweets(e.source, e.sourceId, 30).filter((t) => !t.isRetweet).map((t) => t.text).join(". ");
643
751
  const tags = inferTags(expertText(e) + ". " + tweetText, vocabulary);
644
752
  for (const tag of tags) {
645
753
  insertEdge.run(eId, node("tag", tag, tag), "HAS_TAG", 1);
@@ -693,11 +801,11 @@ class ExpertsDB {
693
801
  lastSeen: r.last_seen || ""
694
802
  }));
695
803
  }
696
- expertFromNodeKey(key2) {
697
- const idx = key2.indexOf(":");
804
+ expertFromNodeKey(key3) {
805
+ const idx = key3.indexOf(":");
698
806
  if (idx < 0)
699
807
  return null;
700
- return this.get(key2.slice(idx + 1), key2.slice(0, idx));
808
+ return this.get(key3.slice(idx + 1), key3.slice(0, idx));
701
809
  }
702
810
  findByNeeds(needs, opts = {}) {
703
811
  const cleaned = needs.map((n) => n.trim().toLowerCase()).filter(Boolean);
@@ -824,6 +932,15 @@ class ExpertsDB {
824
932
  };
825
933
  }
826
934
  replaceTweets(source, sourceId, tweets) {
935
+ const norm = (t) => (t || "").toLowerCase().replace(/^rt @\w+:\s*/, "").replace(/https?:\/\/\S+/g, "").replace(/[^a-z0-9 ]/g, "").replace(/\s+/g, " ").trim();
936
+ const seen = new Set;
937
+ const deduped = tweets.filter((t) => {
938
+ const k = norm(t.text);
939
+ if (!k || seen.has(k))
940
+ return false;
941
+ seen.add(k);
942
+ return true;
943
+ });
827
944
  const tx = this.db.transaction((rows) => {
828
945
  this.db.query("DELETE FROM tweets WHERE source = ? AND source_id = ?").run(source, sourceId);
829
946
  const stmt = this.db.query(`
@@ -836,7 +953,7 @@ class ExpertsDB {
836
953
  stmt.run(t.source, t.sourceId, t.tweetId, t.text, t.createdAt, t.retweetCount, t.replyCount, t.likeCount, t.quoteCount, t.impressionCount, t.isRetweet ? 1 : 0, t.isReply ? 1 : 0);
837
954
  }
838
955
  });
839
- tx(tweets);
956
+ tx(deduped);
840
957
  }
841
958
  recentTweets(source, sourceId, limit = 10) {
842
959
  const rows = this.db.query("SELECT * FROM tweets WHERE source = ? AND source_id = ? ORDER BY created_at DESC LIMIT ?").all(source, sourceId, limit);
@@ -954,17 +1071,24 @@ class ExpertsDB {
954
1071
  const log = opts.onLog ?? (() => {});
955
1072
  const experts = this.list({ source: opts.source });
956
1073
  const batch = opts.batch ?? 64;
957
- const stmt = this.db.query("INSERT OR REPLACE INTO vectors (source, source_id, embedder, dim, vec) VALUES (?, ?, ?, ?, ?)");
1074
+ const stmt = this.db.query("INSERT OR REPLACE INTO vectors (source, source_id, embedder, dim, vec, text_hash) VALUES (?, ?, ?, ?, ?, ?)");
1075
+ const existing = new Map(this.db.query("SELECT source, source_id, embedder, text_hash FROM vectors").all().map((r) => [`${r.source}:${r.source_id}`, { embedder: r.embedder, hash: r.text_hash || "" }]));
1076
+ const work = experts.map((e) => ({ e, text: expertEmbedText(e), hash: String(Bun.hash(expertEmbedText(e))) })).filter(({ e, hash }) => {
1077
+ if (opts.force)
1078
+ return true;
1079
+ const prev = existing.get(`${e.source}:${e.sourceId}`);
1080
+ return !prev || prev.embedder !== embedder.id || prev.hash !== hash;
1081
+ });
958
1082
  let done = 0;
959
- for (let i = 0;i < experts.length; i += batch) {
960
- const slice = experts.slice(i, i + batch);
961
- const vecs = await embedder.embed(slice.map((e) => expertEmbedText(e)));
1083
+ for (let i = 0;i < work.length; i += batch) {
1084
+ const slice = work.slice(i, i + batch);
1085
+ const vecs = await embedder.embed(slice.map((w) => w.text));
962
1086
  const tx = this.db.transaction(() => {
963
- slice.forEach((e, j) => stmt.run(e.source, e.sourceId, embedder.id, embedder.dim, packVector(vecs[j])));
1087
+ slice.forEach((w, j) => stmt.run(w.e.source, w.e.sourceId, embedder.id, embedder.dim, packVector(vecs[j]), w.hash));
964
1088
  });
965
1089
  tx();
966
1090
  done += slice.length;
967
- log(` embedded ${done}/${experts.length}`);
1091
+ log(` embedded ${done}/${work.length} (${experts.length - work.length} unchanged)`);
968
1092
  }
969
1093
  this.setMeta("embedder", embedder.id);
970
1094
  this.setMeta("embedded_at", new Date().toISOString());
@@ -977,7 +1101,13 @@ class ExpertsDB {
977
1101
  const where = opts.source ? "WHERE v.source = ?" : "";
978
1102
  const params = opts.source ? [opts.source] : [];
979
1103
  const rows = this.db.query(`SELECT e.*, v.vec AS _vec FROM vectors v JOIN experts e ON e.source=v.source AND e.source_id=v.source_id ${where}`).all(...params);
980
- const scored = rows.map((r) => ({ expert: this.rowToExpert(r), score: cosine(queryVec, unpackVector(r._vec)) }));
1104
+ const blend = opts.blend !== false;
1105
+ const scored = rows.map((r) => {
1106
+ const expert = this.rowToExpert(r);
1107
+ const semantic = cosine(queryVec, unpackVector(r._vec));
1108
+ const score = blend ? blendScore(semantic, expert.authority ?? 0) : semantic;
1109
+ return { expert, score, semantic };
1110
+ });
981
1111
  scored.sort((a, b) => b.score - a.score);
982
1112
  return scored.slice(0, opts.limit ?? 25);
983
1113
  }
@@ -1067,7 +1197,7 @@ class ExpertsDB {
1067
1197
  $source: c.source,
1068
1198
  $source_id: c.sourceId,
1069
1199
  $type: c.type,
1070
- $value: c.value,
1200
+ $value: maybeEncrypt(c.value),
1071
1201
  $label: c.label,
1072
1202
  $provider: c.provider,
1073
1203
  $confidence: c.confidence,
@@ -1077,7 +1207,7 @@ class ExpertsDB {
1077
1207
  });
1078
1208
  }
1079
1209
  setContactStatus(source, sourceId, type, value, status) {
1080
- this.db.query("UPDATE contacts SET status = ?, verified_at = ? WHERE source = ? AND source_id = ? AND type = ? AND value = ?").run(status, new Date().toISOString(), source, sourceId, type, value);
1210
+ this.db.query("UPDATE contacts SET status = ?, verified_at = ? WHERE source = ? AND source_id = ? AND type = ? AND value = ?").run(status, new Date().toISOString(), source, sourceId, type, maybeEncrypt(value));
1081
1211
  }
1082
1212
  contacts(source, sourceId) {
1083
1213
  const rows = this.db.query("SELECT * FROM contacts WHERE source = ? AND source_id = ? ORDER BY type, confidence DESC").all(source, sourceId);
@@ -1085,7 +1215,7 @@ class ExpertsDB {
1085
1215
  source: r.source,
1086
1216
  sourceId: r.source_id,
1087
1217
  type: r.type,
1088
- value: r.value,
1218
+ value: maybeDecrypt(r.value),
1089
1219
  label: r.label || "",
1090
1220
  provider: r.provider || "",
1091
1221
  confidence: r.confidence ?? 0,
@@ -1110,7 +1240,7 @@ class ExpertsDB {
1110
1240
  source: r.source,
1111
1241
  sourceId: r.source_id,
1112
1242
  type: r.type,
1113
- value: r.value,
1243
+ value: maybeDecrypt(r.value),
1114
1244
  label: r.label || "",
1115
1245
  provider: r.provider || "",
1116
1246
  confidence: r.confidence ?? 0,
@@ -1419,34 +1549,37 @@ async function fetchJson(url, fetchFn, init = {}) {
1419
1549
  }
1420
1550
 
1421
1551
  // src/sources/mentorcruise.ts
1552
+ function stripHtml(s) {
1553
+ return (s || "").replace(/<[^>]+>/g, " ").replace(/&[a-z#0-9]+;/gi, " ").replace(/\s+/g, " ").trim();
1554
+ }
1422
1555
  function normalizeMentor(m, crawledAt) {
1423
- const slug = m.slug || slugify(m.name || String(m.id ?? ""));
1556
+ const path = m.get_absolute_url || "";
1557
+ const slug = path.match(/\/mentor\/([^/]+)/)?.[1] || slugify(m.get_full_name || String(m.objectID ?? ""));
1424
1558
  const socials = {};
1425
1559
  if (m.twitter)
1426
1560
  socials.twitter = m.twitter.startsWith("http") ? m.twitter : `https://x.com/${m.twitter}`;
1427
1561
  if (m.linkedin)
1428
1562
  socials.linkedin = m.linkedin;
1563
+ const price = m.all_prices?.length ? Math.min(...m.all_prices) : Math.round(m.avg_price_per_call ?? 0);
1429
1564
  return makeExpert({
1430
1565
  source: "mentorcruise",
1431
- sourceId: String(m.id ?? slug),
1566
+ sourceId: String(m.objectID ?? slug),
1432
1567
  slug,
1433
- url: `https://mentorcruise.com/mentor/${slug}/`,
1434
- fullName: m.name ?? [m.first_name, m.last_name].filter(Boolean).join(" "),
1435
- firstName: m.first_name ?? "",
1436
- lastName: m.last_name ?? "",
1437
- title: m.job_title ?? "",
1438
- bio: m.bio ?? "",
1439
- avatar: m.avatar ?? m.photo ?? "",
1440
- price: m.price ?? 0,
1441
- priceCurrency: m.currency ?? "USD",
1442
- priceUnit: m.price ? "per month" : "",
1443
- rating: m.rating ?? 0,
1444
- ratingCount: m.reviews_count ?? 0,
1445
- verified: Boolean(m.verified),
1568
+ url: path ? `https://mentorcruise.com${path}` : `https://mentorcruise.com/mentor/${slug}/`,
1569
+ fullName: m.get_full_name ?? "",
1570
+ title: (m.cleaned_job_title ?? []).join(", "),
1571
+ bio: stripHtml(m.bio_formatted ?? ""),
1572
+ avatar: m.get_profile_picture ?? "",
1573
+ price,
1574
+ priceCurrency: "USD",
1575
+ priceUnit: price ? "per month" : "",
1576
+ rating: m.avg_rating_float_one_decimal ?? 0,
1577
+ ratingCount: m.number_of_reviews ?? 0,
1446
1578
  featured: Boolean(m.is_top_mentor),
1447
- topics: m.categories ?? [],
1448
- tags: m.skills ?? [],
1579
+ topics: m.get_industries ?? [],
1580
+ tags: m.get_skills ?? [],
1449
1581
  socials,
1582
+ extra: { company: m.company ?? "", location: m.get_location_display ?? "", avgPricePerCall: m.avg_price_per_call ?? 0 },
1450
1583
  crawledAt
1451
1584
  });
1452
1585
  }
@@ -1456,44 +1589,61 @@ class MentorCruiseSource {
1456
1589
  description = "MentorCruise \u2014 long-term mentorship from vetted mentors";
1457
1590
  website = "https://mentorcruise.com";
1458
1591
  fetchFn;
1459
- apiBase;
1592
+ appId;
1593
+ apiKey;
1594
+ index;
1460
1595
  pageSize;
1461
1596
  constructor(opts = {}) {
1462
1597
  this.fetchFn = opts.fetchFn ?? fetch;
1463
- this.apiBase = opts.apiBase ?? process.env.MENTORCRUISE_API_BASE ?? "https://mentorcruise.com/api";
1464
- this.pageSize = opts.pageSize ?? 50;
1598
+ this.appId = opts.appId ?? process.env.MENTORCRUISE_ALGOLIA_APP_ID ?? "YD3XA4V91L";
1599
+ this.apiKey = opts.apiKey ?? process.env.MENTORCRUISE_ALGOLIA_API_KEY ?? "454b55a2e50bc884225318d99b0dad1a";
1600
+ this.index = opts.index ?? process.env.MENTORCRUISE_ALGOLIA_INDEX ?? "MentorProfile_prod";
1601
+ this.pageSize = opts.pageSize ?? 200;
1465
1602
  }
1466
1603
  async crawl(opts = {}) {
1467
1604
  const log = opts.onLog ?? (() => {});
1468
1605
  const crawledAt = new Date().toISOString();
1606
+ const url = `https://${this.appId}-dsn.algolia.net/1/indexes/${this.index}/query`;
1469
1607
  const experts = [];
1470
1608
  const tags = new Set;
1471
- let offset = 0;
1472
- for (;; ) {
1473
- const data = await fetchJson(`${this.apiBase}/mentors/?limit=${this.pageSize}&offset=${offset}`, this.fetchFn);
1474
- const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
1475
- if (!items.length)
1609
+ let page = 0;
1610
+ let pages = 1;
1611
+ while (page < pages) {
1612
+ let data;
1613
+ try {
1614
+ const res = await this.fetchFn(url, {
1615
+ method: "POST",
1616
+ headers: {
1617
+ "X-Algolia-Application-Id": this.appId,
1618
+ "X-Algolia-API-Key": this.apiKey,
1619
+ "Content-Type": "application/json"
1620
+ },
1621
+ body: JSON.stringify({ params: `hitsPerPage=${this.pageSize}&page=${page}` })
1622
+ });
1623
+ if (!res.ok)
1624
+ break;
1625
+ data = await res.json();
1626
+ } catch {
1476
1627
  break;
1477
- for (const m of items) {
1478
- const e = normalizeMentor(m, crawledAt);
1628
+ }
1629
+ pages = data.nbPages ?? 1;
1630
+ for (const hit of data.hits ?? []) {
1631
+ const e = normalizeMentor(hit, crawledAt);
1479
1632
  experts.push(e);
1480
1633
  for (const t of e.tags)
1481
1634
  tags.add(t);
1482
1635
  }
1483
- offset += items.length;
1484
- log(` mentorcruise: ${experts.length}`);
1636
+ log(` mentorcruise: ${experts.length}/${data.nbHits ?? "?"}`);
1637
+ page++;
1485
1638
  if (opts.max && experts.length >= opts.max)
1486
1639
  break;
1487
- if (items.length < this.pageSize)
1488
- break;
1489
1640
  }
1490
1641
  if (experts.length === 0) {
1491
- log("mentorcruise: no public listing reachable (set MENTORCRUISE_API_BASE or provide a fetchFn).");
1642
+ log("mentorcruise: Algolia returned nothing (set MENTORCRUISE_ALGOLIA_* or inject fetchFn).");
1492
1643
  }
1493
- const topics = [];
1494
1644
  return {
1495
1645
  experts: opts.max ? experts.slice(0, opts.max) : experts,
1496
- topics,
1646
+ topics: [],
1497
1647
  tags: [...tags].map((name) => ({ name, topic: "" })),
1498
1648
  total: experts.length
1499
1649
  };
@@ -1714,6 +1864,9 @@ async function crawlSource(db, sourceName, opts = {}) {
1714
1864
  throw new Error(`Unknown source "${sourceName}". Run \`experts sources\` to list options.`);
1715
1865
  }
1716
1866
  const data = await source.crawl(opts);
1867
+ if (data.experts.length === 0 && db.count(source.name) > 0) {
1868
+ opts.onLog?.(`\u26A0 ${source.name} returned 0 experts but ${db.count(source.name)} are stored \u2014 possible API drift; not overwriting.`);
1869
+ }
1717
1870
  const changes = db.recordChanges(source.name, data.experts);
1718
1871
  db.upsertExperts(data.experts);
1719
1872
  if (data.topics.length)
@@ -2348,10 +2501,68 @@ function normalizePhone(v) {
2348
2501
  return "+" + cleaned.slice(1).replace(/\D/g, "");
2349
2502
  return cleaned;
2350
2503
  }
2504
+ function looksFakePhone(v) {
2505
+ const d = normalizePhone(v).replace(/\D/g, "");
2506
+ if (/^(\d)\1+$/.test(d))
2507
+ return true;
2508
+ if (d === "1234567890" || d === "0123456789")
2509
+ return true;
2510
+ if (/^(\d{2,3})\1\1+$/.test(d))
2511
+ return true;
2512
+ return false;
2513
+ }
2351
2514
  function validatePhone(v) {
2352
2515
  const n = normalizePhone(v);
2353
2516
  const digits = n.replace(/\D/g, "");
2354
- return digits.length >= 7 && digits.length <= 15 ? "valid" : "invalid";
2517
+ if (digits.length < 7 || digits.length > 15)
2518
+ return "invalid";
2519
+ if (looksFakePhone(v))
2520
+ return "invalid";
2521
+ return "valid";
2522
+ }
2523
+ function interpretRcptCode(code) {
2524
+ if (code === 250 || code === 251)
2525
+ return "valid";
2526
+ if (code === 550 || code === 551 || code === 553 || code === 554)
2527
+ return "invalid";
2528
+ return "unknown";
2529
+ }
2530
+ async function smtpProbe(email, mxHost) {
2531
+ const net = await import("net");
2532
+ return new Promise((resolve) => {
2533
+ let step = 0;
2534
+ let settled = false;
2535
+ const done = (r) => {
2536
+ if (settled)
2537
+ return;
2538
+ settled = true;
2539
+ try {
2540
+ sock.end();
2541
+ } catch {}
2542
+ resolve(r);
2543
+ };
2544
+ const sock = net.createConnection({ host: mxHost, port: 25, timeout: 7000 });
2545
+ sock.on("timeout", () => done("unknown"));
2546
+ sock.on("error", () => done("unknown"));
2547
+ sock.on("data", (buf) => {
2548
+ const code = parseInt(buf.toString().slice(0, 3), 10);
2549
+ if (step === 0) {
2550
+ sock.write(`HELO open-experts.local\r
2551
+ `);
2552
+ step = 1;
2553
+ } else if (step === 1) {
2554
+ sock.write(`MAIL FROM:<probe@open-experts.local>\r
2555
+ `);
2556
+ step = 2;
2557
+ } else if (step === 2) {
2558
+ sock.write(`RCPT TO:<${email}>\r
2559
+ `);
2560
+ step = 3;
2561
+ } else if (step === 3) {
2562
+ done(interpretRcptCode(code));
2563
+ }
2564
+ });
2565
+ });
2355
2566
  }
2356
2567
  async function validateEmail(email, resolver = resolveMx) {
2357
2568
  const e = normalizeEmail(email);
@@ -2521,12 +2732,25 @@ async function verifyContacts(db, opts = {}) {
2521
2732
  const delayMs = opts.delayMs ?? 50;
2522
2733
  const targets = db.contactsToVerify({ source: opts.source, limit: opts.max });
2523
2734
  const res = { checked: 0, valid: 0, invalid: 0, unknown: 0 };
2735
+ const resolver = opts.resolver;
2736
+ const prober = opts.prober ?? smtpProbe;
2524
2737
  for (const c of targets) {
2525
2738
  let status;
2526
- if (c.type === "email")
2527
- status = await validateEmail(c.value, opts.resolver);
2528
- else
2739
+ if (c.type === "email") {
2740
+ status = await validateEmail(c.value, resolver);
2741
+ if (opts.smtp && status === "valid") {
2742
+ try {
2743
+ const mx = await (resolver ?? (await import("dns/promises")).resolveMx)(c.value.split("@")[1]);
2744
+ if (mx[0]?.exchange) {
2745
+ const probed = await prober(c.value, mx[0].exchange);
2746
+ if (probed !== "unknown")
2747
+ status = probed;
2748
+ }
2749
+ } catch {}
2750
+ }
2751
+ } else {
2529
2752
  status = validatePhone(c.value);
2753
+ }
2530
2754
  db.setContactStatus(c.source, c.sourceId, c.type, c.value, status);
2531
2755
  res.checked++;
2532
2756
  res[status]++;
@@ -2861,7 +3085,7 @@ function _supportsColor(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
2861
3085
  return 1;
2862
3086
  }
2863
3087
  if ("CI" in env) {
2864
- if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key2) => (key2 in env))) {
3088
+ if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key3) => (key3 in env))) {
2865
3089
  return 3;
2866
3090
  }
2867
3091
  if (["TRAVIS", "APPVEYOR", "GITLAB_CI", "BUILDKITE", "DRONE"].some((sign) => (sign in env)) || env.CI_NAME === "codeship") {
@@ -3262,16 +3486,22 @@ export {
3262
3486
  tokenize,
3263
3487
  syncContacts,
3264
3488
  socialHandles,
3489
+ smtpProbe,
3265
3490
  slugFromUrl,
3266
3491
  samePerson,
3267
3492
  registerSource,
3493
+ pricePerHour,
3268
3494
  normalizePhone,
3269
3495
  normalizeName,
3270
3496
  normalizeIntroExpert,
3271
3497
  normalizeEmail,
3498
+ maybeEncrypt,
3499
+ maybeDecrypt,
3500
+ looksFakePhone,
3272
3501
  liveTweets,
3273
3502
  listSources,
3274
3503
  linkedinHandle,
3504
+ interpretRcptCode,
3275
3505
  inferTags,
3276
3506
  htmlToText,
3277
3507
  handleFromSocial,
@@ -3280,6 +3510,7 @@ export {
3280
3510
  exports_format as format,
3281
3511
  extractJson,
3282
3512
  extractExaContacts,
3513
+ explainMatch,
3283
3514
  expertToContactRecord,
3284
3515
  expertText,
3285
3516
  expertEmbedText,
@@ -3288,6 +3519,7 @@ export {
3288
3519
  enrichSite,
3289
3520
  enrichLinkedIn,
3290
3521
  enrichExpert,
3522
+ encryptionEnabled,
3291
3523
  downloadAvatar,
3292
3524
  discoverContacts,
3293
3525
  defaultRunner,
@@ -3295,9 +3527,11 @@ export {
3295
3527
  crawlSource,
3296
3528
  cosine,
3297
3529
  clusterPersons,
3530
+ blendScore,
3298
3531
  backfillAvatars,
3299
3532
  avatarBasename,
3300
3533
  authorityScore,
3534
+ TransformersEmbedder,
3301
3535
  OpenAIEmbedder,
3302
3536
  JsonSink,
3303
3537
  IntroSource,
@@ -3306,6 +3540,7 @@ export {
3306
3540
  ExpertsDB,
3307
3541
  ExaWebsetsProvider,
3308
3542
  DEFAULT_WEIGHTS,
3543
+ DEFAULT_BLEND,
3309
3544
  CredentialPool,
3310
3545
  ConnectorsClient,
3311
3546
  CliSink
package/dist/score.d.ts CHANGED
@@ -22,4 +22,32 @@ export interface ScoreInputs {
22
22
  * reach, featured/verified flags, and recency of activity.
23
23
  */
24
24
  export declare function authorityScore(e: Expert, inputs?: ScoreInputs, weights?: ScoreWeights): number;
25
+ /**
26
+ * Normalize a price + unit to a comparable USD/hour figure so prices can be
27
+ * compared across sources (intro=per-15-min, Clarity=per-minute, etc.).
28
+ * Returns null when the unit isn't an hourly-comparable rate (e.g. per-month).
29
+ */
30
+ export declare function pricePerHour(price: number, priceUnit: string): number | null;
31
+ export interface BlendWeights {
32
+ /** Weight on semantic similarity (0..1 cosine). */
33
+ semantic: number;
34
+ /** Weight on the expert's authority (0..100, normalized here). */
35
+ authority: number;
36
+ }
37
+ export declare const DEFAULT_BLEND: BlendWeights;
38
+ /**
39
+ * Blend a semantic similarity score with an expert's authority so ranking isn't
40
+ * driven by text match alone — a strong text match from an unrated, no-review
41
+ * advisor shouldn't outrank a verified top expert with a near-equal match.
42
+ * Returns 0..1.
43
+ */
44
+ export declare function blendScore(semantic: number, authority: number, w?: BlendWeights): number;
45
+ /**
46
+ * Explain why an expert matched a free-text query: which of their tags/topics
47
+ * appear in the query (case-insensitive whole-token overlap). Pure + testable.
48
+ */
49
+ export declare function explainMatch(query: string, e: {
50
+ tags: string[];
51
+ topics: string[];
52
+ }): string[];
25
53
  //# sourceMappingURL=score.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"score.d.ts","sourceRoot":"","sources":["../src/score.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,KAAK,EAAE,MAAM,EAAY,MAAM,SAAS,CAAC;AAEhD,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,eAAO,MAAM,eAAe,EAAE,YAO7B,CAAC;AAMF,MAAM,WAAW,WAAW;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wDAAwD;IACxD,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,GAAE,WAAgB,EAAE,OAAO,GAAE,YAA8B,GAAG,MAAM,CAkBnH"}
1
+ {"version":3,"file":"score.d.ts","sourceRoot":"","sources":["../src/score.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,KAAK,EAAE,MAAM,EAAY,MAAM,SAAS,CAAC;AAEhD,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,eAAO,MAAM,eAAe,EAAE,YAO7B,CAAC;AAMF,MAAM,WAAW,WAAW;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wDAAwD;IACxD,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,GAAE,WAAgB,EAAE,OAAO,GAAE,YAA8B,GAAG,MAAM,CAkBnH;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAS5E;AAED,MAAM,WAAW,YAAY;IAC3B,mDAAmD;IACnD,QAAQ,EAAE,MAAM,CAAC;IACjB,kEAAkE;IAClE,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,eAAO,MAAM,aAAa,EAAE,YAAgD,CAAC;AAE7E;;;;;GAKG;AACH,wBAAgB,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,GAAE,YAA4B,GAAG,MAAM,CAIvG;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE;IAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,GAAG,MAAM,EAAE,CAQ7F"}
package/dist/sdk.js CHANGED
@@ -13,6 +13,7 @@ var __export = (target, all) => {
13
13
  set: __exportSetter.bind(all, name)
14
14
  });
15
15
  };
16
+ var __require = import.meta.require;
16
17
 
17
18
  // src/sdk.ts
18
19
  class ExpertsClient {