@hasna/experts 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +353 -81
- package/dist/contacts.d.ts +15 -1
- package/dist/contacts.d.ts.map +1 -1
- package/dist/crawl.d.ts.map +1 -1
- package/dist/crypto.d.ts +6 -0
- package/dist/crypto.d.ts.map +1 -0
- package/dist/db.d.ts +16 -4
- package/dist/db.d.ts.map +1 -1
- package/dist/embed.d.ts +20 -2
- package/dist/embed.d.ts.map +1 -1
- package/dist/index.d.ts +4 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +302 -67
- package/dist/score.d.ts +28 -0
- package/dist/score.d.ts.map +1 -1
- package/dist/sdk.js +1 -0
- package/dist/server/index.d.ts.map +1 -1
- package/dist/server/index.js +218 -68
- package/dist/sources/mentorcruise.d.ts +26 -23
- package/dist/sources/mentorcruise.d.ts.map +1 -1
- package/dist/types.d.ts +2 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +5 -2
package/dist/index.js
CHANGED
|
@@ -13,6 +13,7 @@ var __export = (target, all) => {
|
|
|
13
13
|
set: __exportSetter.bind(all, name)
|
|
14
14
|
});
|
|
15
15
|
};
|
|
16
|
+
var __require = import.meta.require;
|
|
16
17
|
// src/db.ts
|
|
17
18
|
import { Database } from "bun:sqlite";
|
|
18
19
|
import { homedir } from "os";
|
|
@@ -61,6 +62,36 @@ function authorityScore(e, inputs = {}, weights = DEFAULT_WEIGHTS) {
|
|
|
61
62
|
const raw = weights.rating * rating + weights.reviews * reviews + weights.followers * followers + weights.featured * featured + weights.verified * verified + weights.recency * recency;
|
|
62
63
|
return Math.round(raw * 1000) / 10;
|
|
63
64
|
}
|
|
65
|
+
function pricePerHour(price, priceUnit) {
|
|
66
|
+
if (!price || price <= 0)
|
|
67
|
+
return /free/i.test(priceUnit) ? 0 : null;
|
|
68
|
+
const u = (priceUnit || "").toLowerCase();
|
|
69
|
+
const minMatch = u.match(/(\d+)\s*min/);
|
|
70
|
+
if (minMatch)
|
|
71
|
+
return Math.round(price * 60 / Number(minMatch[1]));
|
|
72
|
+
if (/per\s*min|\/\s*min|minute/.test(u))
|
|
73
|
+
return price * 60;
|
|
74
|
+
if (/hour|\/\s*hr|per\s*hr/.test(u))
|
|
75
|
+
return price;
|
|
76
|
+
if (/free/.test(u))
|
|
77
|
+
return 0;
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
var DEFAULT_BLEND = { semantic: 0.8, authority: 0.2 };
|
|
81
|
+
function blendScore(semantic, authority, w = DEFAULT_BLEND) {
|
|
82
|
+
const a = Math.max(0, Math.min(1, (authority || 0) / 100));
|
|
83
|
+
const s = Math.max(0, Math.min(1, semantic));
|
|
84
|
+
return w.semantic * s + w.authority * a;
|
|
85
|
+
}
|
|
86
|
+
function explainMatch(query, e) {
|
|
87
|
+
const q = ` ${(query || "").toLowerCase()} `;
|
|
88
|
+
const hit = (label) => {
|
|
89
|
+
const l = label.toLowerCase();
|
|
90
|
+
return q.includes(` ${l} `) || q.includes(`${l},`) || q.includes(`${l}.`) || new RegExp(`\\b${l.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`).test(q);
|
|
91
|
+
};
|
|
92
|
+
const matched = [...e.topics, ...e.tags].filter(hit);
|
|
93
|
+
return [...new Set(matched)].slice(0, 6);
|
|
94
|
+
}
|
|
64
95
|
|
|
65
96
|
// src/embed.ts
|
|
66
97
|
var STOPWORDS = new Set([
|
|
@@ -151,11 +182,40 @@ class OpenAIEmbedder {
|
|
|
151
182
|
return data.data.map((d) => d.embedding);
|
|
152
183
|
}
|
|
153
184
|
}
|
|
154
|
-
|
|
155
|
-
|
|
185
|
+
|
|
186
|
+
class TransformersEmbedder {
|
|
187
|
+
id = "minilm-l6-v2";
|
|
188
|
+
dim = 384;
|
|
189
|
+
model = process.env.EXPERTS_EMBED_MODEL || "Xenova/all-MiniLM-L6-v2";
|
|
190
|
+
extractor = null;
|
|
191
|
+
async ensure() {
|
|
192
|
+
if (this.extractor)
|
|
193
|
+
return;
|
|
194
|
+
const { pipeline } = await import("@huggingface/transformers");
|
|
195
|
+
this.extractor = await pipeline("feature-extraction", this.model);
|
|
196
|
+
}
|
|
197
|
+
async embed(texts) {
|
|
198
|
+
await this.ensure();
|
|
199
|
+
const out = [];
|
|
200
|
+
for (const t of texts) {
|
|
201
|
+
const r = await this.extractor(t || " ", { pooling: "mean", normalize: true });
|
|
202
|
+
out.push(Array.from(r.data));
|
|
203
|
+
}
|
|
204
|
+
return out;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
async function getEmbedder() {
|
|
208
|
+
const choice = process.env.EXPERTS_EMBEDDER;
|
|
209
|
+
if (choice === "openai" && process.env.OPENAI_API_KEY)
|
|
156
210
|
return new OpenAIEmbedder;
|
|
211
|
+
if (choice === "hash")
|
|
212
|
+
return new HashingEmbedder;
|
|
213
|
+
try {
|
|
214
|
+
await import("@huggingface/transformers");
|
|
215
|
+
return new TransformersEmbedder;
|
|
216
|
+
} catch {
|
|
217
|
+
return new HashingEmbedder;
|
|
157
218
|
}
|
|
158
|
-
return new HashingEmbedder;
|
|
159
219
|
}
|
|
160
220
|
function cosine(a, b) {
|
|
161
221
|
let dot = 0;
|
|
@@ -251,6 +311,51 @@ function clusterPersons(experts) {
|
|
|
251
311
|
return out;
|
|
252
312
|
}
|
|
253
313
|
|
|
314
|
+
// src/crypto.ts
|
|
315
|
+
import { createCipheriv, createDecipheriv, createHmac, scryptSync } from "crypto";
|
|
316
|
+
var PREFIX = "enc1:";
|
|
317
|
+
var cachedKey = null;
|
|
318
|
+
var cachedFrom = null;
|
|
319
|
+
function key2() {
|
|
320
|
+
const secret = process.env.OPEN_EXPERTS_KEY;
|
|
321
|
+
if (!secret)
|
|
322
|
+
return null;
|
|
323
|
+
if (cachedKey && cachedFrom === secret)
|
|
324
|
+
return cachedKey;
|
|
325
|
+
cachedKey = scryptSync(secret, "open-experts/contacts/v1", 32);
|
|
326
|
+
cachedFrom = secret;
|
|
327
|
+
return cachedKey;
|
|
328
|
+
}
|
|
329
|
+
function encryptionEnabled() {
|
|
330
|
+
return !!process.env.OPEN_EXPERTS_KEY;
|
|
331
|
+
}
|
|
332
|
+
function maybeEncrypt(plaintext) {
|
|
333
|
+
const k = key2();
|
|
334
|
+
if (!k || plaintext == null)
|
|
335
|
+
return plaintext;
|
|
336
|
+
if (plaintext.startsWith(PREFIX))
|
|
337
|
+
return plaintext;
|
|
338
|
+
const iv = createHmac("sha256", k).update(plaintext).digest().subarray(0, 12);
|
|
339
|
+
const cipher = createCipheriv("aes-256-gcm", k, iv);
|
|
340
|
+
const enc = Buffer.concat([cipher.update(plaintext, "utf8"), cipher.final()]);
|
|
341
|
+
const tag = cipher.getAuthTag();
|
|
342
|
+
return PREFIX + Buffer.concat([iv, tag, enc]).toString("base64");
|
|
343
|
+
}
|
|
344
|
+
function maybeDecrypt(stored) {
|
|
345
|
+
if (stored == null || !stored.startsWith(PREFIX))
|
|
346
|
+
return stored;
|
|
347
|
+
const k = key2();
|
|
348
|
+
if (!k)
|
|
349
|
+
return stored;
|
|
350
|
+
const raw = Buffer.from(stored.slice(PREFIX.length), "base64");
|
|
351
|
+
const iv = raw.subarray(0, 12);
|
|
352
|
+
const tag = raw.subarray(12, 28);
|
|
353
|
+
const enc = raw.subarray(28);
|
|
354
|
+
const decipher = createDecipheriv("aes-256-gcm", k, iv);
|
|
355
|
+
decipher.setAuthTag(tag);
|
|
356
|
+
return Buffer.concat([decipher.update(enc), decipher.final()]).toString("utf8");
|
|
357
|
+
}
|
|
358
|
+
|
|
254
359
|
// src/db.ts
|
|
255
360
|
function defaultDbPath() {
|
|
256
361
|
return process.env.OPEN_EXPERTS_DB || join(homedir(), ".hasna", "experts", "experts.db");
|
|
@@ -362,10 +467,11 @@ class ExpertsDB {
|
|
|
362
467
|
CREATE INDEX IF NOT EXISTS idx_contacts_expert ON contacts(source, source_id);
|
|
363
468
|
CREATE INDEX IF NOT EXISTS idx_contacts_status ON contacts(status);
|
|
364
469
|
|
|
365
|
-
-- Semantic search: one embedding vector per expert
|
|
470
|
+
-- Semantic search: one embedding vector per expert (text_hash enables
|
|
471
|
+
-- incremental re-embedding \u2014 skip unchanged experts).
|
|
366
472
|
CREATE TABLE IF NOT EXISTS vectors (
|
|
367
473
|
source TEXT NOT NULL, source_id TEXT NOT NULL,
|
|
368
|
-
embedder TEXT NOT NULL, dim INTEGER, vec BLOB,
|
|
474
|
+
embedder TEXT NOT NULL, dim INTEGER, vec BLOB, text_hash TEXT,
|
|
369
475
|
PRIMARY KEY (source, source_id)
|
|
370
476
|
);
|
|
371
477
|
|
|
@@ -387,6 +493,7 @@ class ExpertsDB {
|
|
|
387
493
|
`);
|
|
388
494
|
this.addColumnIfMissing("experts", "avatar_local", "TEXT");
|
|
389
495
|
this.addColumnIfMissing("experts", "authority", "REAL DEFAULT 0");
|
|
496
|
+
this.addColumnIfMissing("vectors", "text_hash", "TEXT");
|
|
390
497
|
}
|
|
391
498
|
addColumnIfMissing(table, column, type) {
|
|
392
499
|
const cols = this.db.query(`PRAGMA table_info(${table})`).all();
|
|
@@ -473,6 +580,7 @@ class ExpertsDB {
|
|
|
473
580
|
extra: JSON.parse(r.extra || "{}"),
|
|
474
581
|
avatarLocal: r.avatar_local || undefined,
|
|
475
582
|
authority: r.authority ?? 0,
|
|
583
|
+
pricePerHour: pricePerHour(r.price ?? 0, r.price_unit ?? ""),
|
|
476
584
|
crawledAt: r.crawled_at
|
|
477
585
|
};
|
|
478
586
|
}
|
|
@@ -596,11 +704,11 @@ class ExpertsDB {
|
|
|
596
704
|
sql += " ORDER BY name";
|
|
597
705
|
return this.db.query(sql).all(...params);
|
|
598
706
|
}
|
|
599
|
-
setMeta(
|
|
600
|
-
this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(
|
|
707
|
+
setMeta(key3, value) {
|
|
708
|
+
this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(key3, value);
|
|
601
709
|
}
|
|
602
|
-
getMeta(
|
|
603
|
-
const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(
|
|
710
|
+
getMeta(key3) {
|
|
711
|
+
const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(key3);
|
|
604
712
|
return row ? row.value : null;
|
|
605
713
|
}
|
|
606
714
|
stats(source) {
|
|
@@ -624,12 +732,12 @@ class ExpertsDB {
|
|
|
624
732
|
const nodeIds = new Map;
|
|
625
733
|
const insertNode = this.db.query("INSERT INTO kg_nodes (type, key, label) VALUES (?, ?, ?) ON CONFLICT(type, key) DO UPDATE SET label=excluded.label RETURNING id");
|
|
626
734
|
const insertEdge = this.db.query("INSERT OR REPLACE INTO kg_edges (src, dst, rel, weight) VALUES (?, ?, ?, ?)");
|
|
627
|
-
const node = (type,
|
|
628
|
-
const ck = `${type}\x00${
|
|
735
|
+
const node = (type, key3, label) => {
|
|
736
|
+
const ck = `${type}\x00${key3.toLowerCase()}`;
|
|
629
737
|
const cached = nodeIds.get(ck);
|
|
630
738
|
if (cached != null)
|
|
631
739
|
return cached;
|
|
632
|
-
const id = insertNode.get(type,
|
|
740
|
+
const id = insertNode.get(type, key3.toLowerCase(), label).id;
|
|
633
741
|
nodeIds.set(ck, id);
|
|
634
742
|
return id;
|
|
635
743
|
};
|
|
@@ -639,7 +747,7 @@ class ExpertsDB {
|
|
|
639
747
|
for (const topic of e.topics) {
|
|
640
748
|
insertEdge.run(eId, node("topic", topic, topic), "IN_TOPIC", 1);
|
|
641
749
|
}
|
|
642
|
-
const tweetText = this.recentTweets(e.source, e.sourceId, 30).map((t) => t.text).join(". ");
|
|
750
|
+
const tweetText = this.recentTweets(e.source, e.sourceId, 30).filter((t) => !t.isRetweet).map((t) => t.text).join(". ");
|
|
643
751
|
const tags = inferTags(expertText(e) + ". " + tweetText, vocabulary);
|
|
644
752
|
for (const tag of tags) {
|
|
645
753
|
insertEdge.run(eId, node("tag", tag, tag), "HAS_TAG", 1);
|
|
@@ -693,11 +801,11 @@ class ExpertsDB {
|
|
|
693
801
|
lastSeen: r.last_seen || ""
|
|
694
802
|
}));
|
|
695
803
|
}
|
|
696
|
-
expertFromNodeKey(
|
|
697
|
-
const idx =
|
|
804
|
+
expertFromNodeKey(key3) {
|
|
805
|
+
const idx = key3.indexOf(":");
|
|
698
806
|
if (idx < 0)
|
|
699
807
|
return null;
|
|
700
|
-
return this.get(
|
|
808
|
+
return this.get(key3.slice(idx + 1), key3.slice(0, idx));
|
|
701
809
|
}
|
|
702
810
|
findByNeeds(needs, opts = {}) {
|
|
703
811
|
const cleaned = needs.map((n) => n.trim().toLowerCase()).filter(Boolean);
|
|
@@ -824,6 +932,15 @@ class ExpertsDB {
|
|
|
824
932
|
};
|
|
825
933
|
}
|
|
826
934
|
replaceTweets(source, sourceId, tweets) {
|
|
935
|
+
const norm = (t) => (t || "").toLowerCase().replace(/^rt @\w+:\s*/, "").replace(/https?:\/\/\S+/g, "").replace(/[^a-z0-9 ]/g, "").replace(/\s+/g, " ").trim();
|
|
936
|
+
const seen = new Set;
|
|
937
|
+
const deduped = tweets.filter((t) => {
|
|
938
|
+
const k = norm(t.text);
|
|
939
|
+
if (!k || seen.has(k))
|
|
940
|
+
return false;
|
|
941
|
+
seen.add(k);
|
|
942
|
+
return true;
|
|
943
|
+
});
|
|
827
944
|
const tx = this.db.transaction((rows) => {
|
|
828
945
|
this.db.query("DELETE FROM tweets WHERE source = ? AND source_id = ?").run(source, sourceId);
|
|
829
946
|
const stmt = this.db.query(`
|
|
@@ -836,7 +953,7 @@ class ExpertsDB {
|
|
|
836
953
|
stmt.run(t.source, t.sourceId, t.tweetId, t.text, t.createdAt, t.retweetCount, t.replyCount, t.likeCount, t.quoteCount, t.impressionCount, t.isRetweet ? 1 : 0, t.isReply ? 1 : 0);
|
|
837
954
|
}
|
|
838
955
|
});
|
|
839
|
-
tx(
|
|
956
|
+
tx(deduped);
|
|
840
957
|
}
|
|
841
958
|
recentTweets(source, sourceId, limit = 10) {
|
|
842
959
|
const rows = this.db.query("SELECT * FROM tweets WHERE source = ? AND source_id = ? ORDER BY created_at DESC LIMIT ?").all(source, sourceId, limit);
|
|
@@ -954,17 +1071,24 @@ class ExpertsDB {
|
|
|
954
1071
|
const log = opts.onLog ?? (() => {});
|
|
955
1072
|
const experts = this.list({ source: opts.source });
|
|
956
1073
|
const batch = opts.batch ?? 64;
|
|
957
|
-
const stmt = this.db.query("INSERT OR REPLACE INTO vectors (source, source_id, embedder, dim, vec) VALUES (?, ?, ?, ?, ?)");
|
|
1074
|
+
const stmt = this.db.query("INSERT OR REPLACE INTO vectors (source, source_id, embedder, dim, vec, text_hash) VALUES (?, ?, ?, ?, ?, ?)");
|
|
1075
|
+
const existing = new Map(this.db.query("SELECT source, source_id, embedder, text_hash FROM vectors").all().map((r) => [`${r.source}:${r.source_id}`, { embedder: r.embedder, hash: r.text_hash || "" }]));
|
|
1076
|
+
const work = experts.map((e) => ({ e, text: expertEmbedText(e), hash: String(Bun.hash(expertEmbedText(e))) })).filter(({ e, hash }) => {
|
|
1077
|
+
if (opts.force)
|
|
1078
|
+
return true;
|
|
1079
|
+
const prev = existing.get(`${e.source}:${e.sourceId}`);
|
|
1080
|
+
return !prev || prev.embedder !== embedder.id || prev.hash !== hash;
|
|
1081
|
+
});
|
|
958
1082
|
let done = 0;
|
|
959
|
-
for (let i = 0;i <
|
|
960
|
-
const slice =
|
|
961
|
-
const vecs = await embedder.embed(slice.map((
|
|
1083
|
+
for (let i = 0;i < work.length; i += batch) {
|
|
1084
|
+
const slice = work.slice(i, i + batch);
|
|
1085
|
+
const vecs = await embedder.embed(slice.map((w) => w.text));
|
|
962
1086
|
const tx = this.db.transaction(() => {
|
|
963
|
-
slice.forEach((
|
|
1087
|
+
slice.forEach((w, j) => stmt.run(w.e.source, w.e.sourceId, embedder.id, embedder.dim, packVector(vecs[j]), w.hash));
|
|
964
1088
|
});
|
|
965
1089
|
tx();
|
|
966
1090
|
done += slice.length;
|
|
967
|
-
log(` embedded ${done}/${experts.length}`);
|
|
1091
|
+
log(` embedded ${done}/${work.length} (${experts.length - work.length} unchanged)`);
|
|
968
1092
|
}
|
|
969
1093
|
this.setMeta("embedder", embedder.id);
|
|
970
1094
|
this.setMeta("embedded_at", new Date().toISOString());
|
|
@@ -977,7 +1101,13 @@ class ExpertsDB {
|
|
|
977
1101
|
const where = opts.source ? "WHERE v.source = ?" : "";
|
|
978
1102
|
const params = opts.source ? [opts.source] : [];
|
|
979
1103
|
const rows = this.db.query(`SELECT e.*, v.vec AS _vec FROM vectors v JOIN experts e ON e.source=v.source AND e.source_id=v.source_id ${where}`).all(...params);
|
|
980
|
-
const
|
|
1104
|
+
const blend = opts.blend !== false;
|
|
1105
|
+
const scored = rows.map((r) => {
|
|
1106
|
+
const expert = this.rowToExpert(r);
|
|
1107
|
+
const semantic = cosine(queryVec, unpackVector(r._vec));
|
|
1108
|
+
const score = blend ? blendScore(semantic, expert.authority ?? 0) : semantic;
|
|
1109
|
+
return { expert, score, semantic };
|
|
1110
|
+
});
|
|
981
1111
|
scored.sort((a, b) => b.score - a.score);
|
|
982
1112
|
return scored.slice(0, opts.limit ?? 25);
|
|
983
1113
|
}
|
|
@@ -1067,7 +1197,7 @@ class ExpertsDB {
|
|
|
1067
1197
|
$source: c.source,
|
|
1068
1198
|
$source_id: c.sourceId,
|
|
1069
1199
|
$type: c.type,
|
|
1070
|
-
$value: c.value,
|
|
1200
|
+
$value: maybeEncrypt(c.value),
|
|
1071
1201
|
$label: c.label,
|
|
1072
1202
|
$provider: c.provider,
|
|
1073
1203
|
$confidence: c.confidence,
|
|
@@ -1077,7 +1207,7 @@ class ExpertsDB {
|
|
|
1077
1207
|
});
|
|
1078
1208
|
}
|
|
1079
1209
|
setContactStatus(source, sourceId, type, value, status) {
|
|
1080
|
-
this.db.query("UPDATE contacts SET status = ?, verified_at = ? WHERE source = ? AND source_id = ? AND type = ? AND value = ?").run(status, new Date().toISOString(), source, sourceId, type, value);
|
|
1210
|
+
this.db.query("UPDATE contacts SET status = ?, verified_at = ? WHERE source = ? AND source_id = ? AND type = ? AND value = ?").run(status, new Date().toISOString(), source, sourceId, type, maybeEncrypt(value));
|
|
1081
1211
|
}
|
|
1082
1212
|
contacts(source, sourceId) {
|
|
1083
1213
|
const rows = this.db.query("SELECT * FROM contacts WHERE source = ? AND source_id = ? ORDER BY type, confidence DESC").all(source, sourceId);
|
|
@@ -1085,7 +1215,7 @@ class ExpertsDB {
|
|
|
1085
1215
|
source: r.source,
|
|
1086
1216
|
sourceId: r.source_id,
|
|
1087
1217
|
type: r.type,
|
|
1088
|
-
value: r.value,
|
|
1218
|
+
value: maybeDecrypt(r.value),
|
|
1089
1219
|
label: r.label || "",
|
|
1090
1220
|
provider: r.provider || "",
|
|
1091
1221
|
confidence: r.confidence ?? 0,
|
|
@@ -1110,7 +1240,7 @@ class ExpertsDB {
|
|
|
1110
1240
|
source: r.source,
|
|
1111
1241
|
sourceId: r.source_id,
|
|
1112
1242
|
type: r.type,
|
|
1113
|
-
value: r.value,
|
|
1243
|
+
value: maybeDecrypt(r.value),
|
|
1114
1244
|
label: r.label || "",
|
|
1115
1245
|
provider: r.provider || "",
|
|
1116
1246
|
confidence: r.confidence ?? 0,
|
|
@@ -1419,34 +1549,37 @@ async function fetchJson(url, fetchFn, init = {}) {
|
|
|
1419
1549
|
}
|
|
1420
1550
|
|
|
1421
1551
|
// src/sources/mentorcruise.ts
|
|
1552
|
+
function stripHtml(s) {
|
|
1553
|
+
return (s || "").replace(/<[^>]+>/g, " ").replace(/&[a-z#0-9]+;/gi, " ").replace(/\s+/g, " ").trim();
|
|
1554
|
+
}
|
|
1422
1555
|
function normalizeMentor(m, crawledAt) {
|
|
1423
|
-
const
|
|
1556
|
+
const path = m.get_absolute_url || "";
|
|
1557
|
+
const slug = path.match(/\/mentor\/([^/]+)/)?.[1] || slugify(m.get_full_name || String(m.objectID ?? ""));
|
|
1424
1558
|
const socials = {};
|
|
1425
1559
|
if (m.twitter)
|
|
1426
1560
|
socials.twitter = m.twitter.startsWith("http") ? m.twitter : `https://x.com/${m.twitter}`;
|
|
1427
1561
|
if (m.linkedin)
|
|
1428
1562
|
socials.linkedin = m.linkedin;
|
|
1563
|
+
const price = m.all_prices?.length ? Math.min(...m.all_prices) : Math.round(m.avg_price_per_call ?? 0);
|
|
1429
1564
|
return makeExpert({
|
|
1430
1565
|
source: "mentorcruise",
|
|
1431
|
-
sourceId: String(m.
|
|
1566
|
+
sourceId: String(m.objectID ?? slug),
|
|
1432
1567
|
slug,
|
|
1433
|
-
url: `https://mentorcruise.com/mentor/${slug}/`,
|
|
1434
|
-
fullName: m.
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
rating: m.rating ?? 0,
|
|
1444
|
-
ratingCount: m.reviews_count ?? 0,
|
|
1445
|
-
verified: Boolean(m.verified),
|
|
1568
|
+
url: path ? `https://mentorcruise.com${path}` : `https://mentorcruise.com/mentor/${slug}/`,
|
|
1569
|
+
fullName: m.get_full_name ?? "",
|
|
1570
|
+
title: (m.cleaned_job_title ?? []).join(", "),
|
|
1571
|
+
bio: stripHtml(m.bio_formatted ?? ""),
|
|
1572
|
+
avatar: m.get_profile_picture ?? "",
|
|
1573
|
+
price,
|
|
1574
|
+
priceCurrency: "USD",
|
|
1575
|
+
priceUnit: price ? "per month" : "",
|
|
1576
|
+
rating: m.avg_rating_float_one_decimal ?? 0,
|
|
1577
|
+
ratingCount: m.number_of_reviews ?? 0,
|
|
1446
1578
|
featured: Boolean(m.is_top_mentor),
|
|
1447
|
-
topics: m.
|
|
1448
|
-
tags: m.
|
|
1579
|
+
topics: m.get_industries ?? [],
|
|
1580
|
+
tags: m.get_skills ?? [],
|
|
1449
1581
|
socials,
|
|
1582
|
+
extra: { company: m.company ?? "", location: m.get_location_display ?? "", avgPricePerCall: m.avg_price_per_call ?? 0 },
|
|
1450
1583
|
crawledAt
|
|
1451
1584
|
});
|
|
1452
1585
|
}
|
|
@@ -1456,44 +1589,61 @@ class MentorCruiseSource {
|
|
|
1456
1589
|
description = "MentorCruise \u2014 long-term mentorship from vetted mentors";
|
|
1457
1590
|
website = "https://mentorcruise.com";
|
|
1458
1591
|
fetchFn;
|
|
1459
|
-
|
|
1592
|
+
appId;
|
|
1593
|
+
apiKey;
|
|
1594
|
+
index;
|
|
1460
1595
|
pageSize;
|
|
1461
1596
|
constructor(opts = {}) {
|
|
1462
1597
|
this.fetchFn = opts.fetchFn ?? fetch;
|
|
1463
|
-
this.
|
|
1464
|
-
this.
|
|
1598
|
+
this.appId = opts.appId ?? process.env.MENTORCRUISE_ALGOLIA_APP_ID ?? "YD3XA4V91L";
|
|
1599
|
+
this.apiKey = opts.apiKey ?? process.env.MENTORCRUISE_ALGOLIA_API_KEY ?? "454b55a2e50bc884225318d99b0dad1a";
|
|
1600
|
+
this.index = opts.index ?? process.env.MENTORCRUISE_ALGOLIA_INDEX ?? "MentorProfile_prod";
|
|
1601
|
+
this.pageSize = opts.pageSize ?? 200;
|
|
1465
1602
|
}
|
|
1466
1603
|
async crawl(opts = {}) {
|
|
1467
1604
|
const log = opts.onLog ?? (() => {});
|
|
1468
1605
|
const crawledAt = new Date().toISOString();
|
|
1606
|
+
const url = `https://${this.appId}-dsn.algolia.net/1/indexes/${this.index}/query`;
|
|
1469
1607
|
const experts = [];
|
|
1470
1608
|
const tags = new Set;
|
|
1471
|
-
let
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1609
|
+
let page = 0;
|
|
1610
|
+
let pages = 1;
|
|
1611
|
+
while (page < pages) {
|
|
1612
|
+
let data;
|
|
1613
|
+
try {
|
|
1614
|
+
const res = await this.fetchFn(url, {
|
|
1615
|
+
method: "POST",
|
|
1616
|
+
headers: {
|
|
1617
|
+
"X-Algolia-Application-Id": this.appId,
|
|
1618
|
+
"X-Algolia-API-Key": this.apiKey,
|
|
1619
|
+
"Content-Type": "application/json"
|
|
1620
|
+
},
|
|
1621
|
+
body: JSON.stringify({ params: `hitsPerPage=${this.pageSize}&page=${page}` })
|
|
1622
|
+
});
|
|
1623
|
+
if (!res.ok)
|
|
1624
|
+
break;
|
|
1625
|
+
data = await res.json();
|
|
1626
|
+
} catch {
|
|
1476
1627
|
break;
|
|
1477
|
-
|
|
1478
|
-
|
|
1628
|
+
}
|
|
1629
|
+
pages = data.nbPages ?? 1;
|
|
1630
|
+
for (const hit of data.hits ?? []) {
|
|
1631
|
+
const e = normalizeMentor(hit, crawledAt);
|
|
1479
1632
|
experts.push(e);
|
|
1480
1633
|
for (const t of e.tags)
|
|
1481
1634
|
tags.add(t);
|
|
1482
1635
|
}
|
|
1483
|
-
|
|
1484
|
-
|
|
1636
|
+
log(` mentorcruise: ${experts.length}/${data.nbHits ?? "?"}`);
|
|
1637
|
+
page++;
|
|
1485
1638
|
if (opts.max && experts.length >= opts.max)
|
|
1486
1639
|
break;
|
|
1487
|
-
if (items.length < this.pageSize)
|
|
1488
|
-
break;
|
|
1489
1640
|
}
|
|
1490
1641
|
if (experts.length === 0) {
|
|
1491
|
-
log("mentorcruise:
|
|
1642
|
+
log("mentorcruise: Algolia returned nothing (set MENTORCRUISE_ALGOLIA_* or inject fetchFn).");
|
|
1492
1643
|
}
|
|
1493
|
-
const topics = [];
|
|
1494
1644
|
return {
|
|
1495
1645
|
experts: opts.max ? experts.slice(0, opts.max) : experts,
|
|
1496
|
-
topics,
|
|
1646
|
+
topics: [],
|
|
1497
1647
|
tags: [...tags].map((name) => ({ name, topic: "" })),
|
|
1498
1648
|
total: experts.length
|
|
1499
1649
|
};
|
|
@@ -1714,6 +1864,9 @@ async function crawlSource(db, sourceName, opts = {}) {
|
|
|
1714
1864
|
throw new Error(`Unknown source "${sourceName}". Run \`experts sources\` to list options.`);
|
|
1715
1865
|
}
|
|
1716
1866
|
const data = await source.crawl(opts);
|
|
1867
|
+
if (data.experts.length === 0 && db.count(source.name) > 0) {
|
|
1868
|
+
opts.onLog?.(`\u26A0 ${source.name} returned 0 experts but ${db.count(source.name)} are stored \u2014 possible API drift; not overwriting.`);
|
|
1869
|
+
}
|
|
1717
1870
|
const changes = db.recordChanges(source.name, data.experts);
|
|
1718
1871
|
db.upsertExperts(data.experts);
|
|
1719
1872
|
if (data.topics.length)
|
|
@@ -2348,10 +2501,68 @@ function normalizePhone(v) {
|
|
|
2348
2501
|
return "+" + cleaned.slice(1).replace(/\D/g, "");
|
|
2349
2502
|
return cleaned;
|
|
2350
2503
|
}
|
|
2504
|
+
function looksFakePhone(v) {
|
|
2505
|
+
const d = normalizePhone(v).replace(/\D/g, "");
|
|
2506
|
+
if (/^(\d)\1+$/.test(d))
|
|
2507
|
+
return true;
|
|
2508
|
+
if (d === "1234567890" || d === "0123456789")
|
|
2509
|
+
return true;
|
|
2510
|
+
if (/^(\d{2,3})\1\1+$/.test(d))
|
|
2511
|
+
return true;
|
|
2512
|
+
return false;
|
|
2513
|
+
}
|
|
2351
2514
|
function validatePhone(v) {
|
|
2352
2515
|
const n = normalizePhone(v);
|
|
2353
2516
|
const digits = n.replace(/\D/g, "");
|
|
2354
|
-
|
|
2517
|
+
if (digits.length < 7 || digits.length > 15)
|
|
2518
|
+
return "invalid";
|
|
2519
|
+
if (looksFakePhone(v))
|
|
2520
|
+
return "invalid";
|
|
2521
|
+
return "valid";
|
|
2522
|
+
}
|
|
2523
|
+
function interpretRcptCode(code) {
|
|
2524
|
+
if (code === 250 || code === 251)
|
|
2525
|
+
return "valid";
|
|
2526
|
+
if (code === 550 || code === 551 || code === 553 || code === 554)
|
|
2527
|
+
return "invalid";
|
|
2528
|
+
return "unknown";
|
|
2529
|
+
}
|
|
2530
|
+
async function smtpProbe(email, mxHost) {
|
|
2531
|
+
const net = await import("net");
|
|
2532
|
+
return new Promise((resolve) => {
|
|
2533
|
+
let step = 0;
|
|
2534
|
+
let settled = false;
|
|
2535
|
+
const done = (r) => {
|
|
2536
|
+
if (settled)
|
|
2537
|
+
return;
|
|
2538
|
+
settled = true;
|
|
2539
|
+
try {
|
|
2540
|
+
sock.end();
|
|
2541
|
+
} catch {}
|
|
2542
|
+
resolve(r);
|
|
2543
|
+
};
|
|
2544
|
+
const sock = net.createConnection({ host: mxHost, port: 25, timeout: 7000 });
|
|
2545
|
+
sock.on("timeout", () => done("unknown"));
|
|
2546
|
+
sock.on("error", () => done("unknown"));
|
|
2547
|
+
sock.on("data", (buf) => {
|
|
2548
|
+
const code = parseInt(buf.toString().slice(0, 3), 10);
|
|
2549
|
+
if (step === 0) {
|
|
2550
|
+
sock.write(`HELO open-experts.local\r
|
|
2551
|
+
`);
|
|
2552
|
+
step = 1;
|
|
2553
|
+
} else if (step === 1) {
|
|
2554
|
+
sock.write(`MAIL FROM:<probe@open-experts.local>\r
|
|
2555
|
+
`);
|
|
2556
|
+
step = 2;
|
|
2557
|
+
} else if (step === 2) {
|
|
2558
|
+
sock.write(`RCPT TO:<${email}>\r
|
|
2559
|
+
`);
|
|
2560
|
+
step = 3;
|
|
2561
|
+
} else if (step === 3) {
|
|
2562
|
+
done(interpretRcptCode(code));
|
|
2563
|
+
}
|
|
2564
|
+
});
|
|
2565
|
+
});
|
|
2355
2566
|
}
|
|
2356
2567
|
async function validateEmail(email, resolver = resolveMx) {
|
|
2357
2568
|
const e = normalizeEmail(email);
|
|
@@ -2521,12 +2732,25 @@ async function verifyContacts(db, opts = {}) {
|
|
|
2521
2732
|
const delayMs = opts.delayMs ?? 50;
|
|
2522
2733
|
const targets = db.contactsToVerify({ source: opts.source, limit: opts.max });
|
|
2523
2734
|
const res = { checked: 0, valid: 0, invalid: 0, unknown: 0 };
|
|
2735
|
+
const resolver = opts.resolver;
|
|
2736
|
+
const prober = opts.prober ?? smtpProbe;
|
|
2524
2737
|
for (const c of targets) {
|
|
2525
2738
|
let status;
|
|
2526
|
-
if (c.type === "email")
|
|
2527
|
-
status = await validateEmail(c.value,
|
|
2528
|
-
|
|
2739
|
+
if (c.type === "email") {
|
|
2740
|
+
status = await validateEmail(c.value, resolver);
|
|
2741
|
+
if (opts.smtp && status === "valid") {
|
|
2742
|
+
try {
|
|
2743
|
+
const mx = await (resolver ?? (await import("dns/promises")).resolveMx)(c.value.split("@")[1]);
|
|
2744
|
+
if (mx[0]?.exchange) {
|
|
2745
|
+
const probed = await prober(c.value, mx[0].exchange);
|
|
2746
|
+
if (probed !== "unknown")
|
|
2747
|
+
status = probed;
|
|
2748
|
+
}
|
|
2749
|
+
} catch {}
|
|
2750
|
+
}
|
|
2751
|
+
} else {
|
|
2529
2752
|
status = validatePhone(c.value);
|
|
2753
|
+
}
|
|
2530
2754
|
db.setContactStatus(c.source, c.sourceId, c.type, c.value, status);
|
|
2531
2755
|
res.checked++;
|
|
2532
2756
|
res[status]++;
|
|
@@ -2861,7 +3085,7 @@ function _supportsColor(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
|
|
|
2861
3085
|
return 1;
|
|
2862
3086
|
}
|
|
2863
3087
|
if ("CI" in env) {
|
|
2864
|
-
if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((
|
|
3088
|
+
if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key3) => (key3 in env))) {
|
|
2865
3089
|
return 3;
|
|
2866
3090
|
}
|
|
2867
3091
|
if (["TRAVIS", "APPVEYOR", "GITLAB_CI", "BUILDKITE", "DRONE"].some((sign) => (sign in env)) || env.CI_NAME === "codeship") {
|
|
@@ -3262,16 +3486,22 @@ export {
|
|
|
3262
3486
|
tokenize,
|
|
3263
3487
|
syncContacts,
|
|
3264
3488
|
socialHandles,
|
|
3489
|
+
smtpProbe,
|
|
3265
3490
|
slugFromUrl,
|
|
3266
3491
|
samePerson,
|
|
3267
3492
|
registerSource,
|
|
3493
|
+
pricePerHour,
|
|
3268
3494
|
normalizePhone,
|
|
3269
3495
|
normalizeName,
|
|
3270
3496
|
normalizeIntroExpert,
|
|
3271
3497
|
normalizeEmail,
|
|
3498
|
+
maybeEncrypt,
|
|
3499
|
+
maybeDecrypt,
|
|
3500
|
+
looksFakePhone,
|
|
3272
3501
|
liveTweets,
|
|
3273
3502
|
listSources,
|
|
3274
3503
|
linkedinHandle,
|
|
3504
|
+
interpretRcptCode,
|
|
3275
3505
|
inferTags,
|
|
3276
3506
|
htmlToText,
|
|
3277
3507
|
handleFromSocial,
|
|
@@ -3280,6 +3510,7 @@ export {
|
|
|
3280
3510
|
exports_format as format,
|
|
3281
3511
|
extractJson,
|
|
3282
3512
|
extractExaContacts,
|
|
3513
|
+
explainMatch,
|
|
3283
3514
|
expertToContactRecord,
|
|
3284
3515
|
expertText,
|
|
3285
3516
|
expertEmbedText,
|
|
@@ -3288,6 +3519,7 @@ export {
|
|
|
3288
3519
|
enrichSite,
|
|
3289
3520
|
enrichLinkedIn,
|
|
3290
3521
|
enrichExpert,
|
|
3522
|
+
encryptionEnabled,
|
|
3291
3523
|
downloadAvatar,
|
|
3292
3524
|
discoverContacts,
|
|
3293
3525
|
defaultRunner,
|
|
@@ -3295,9 +3527,11 @@ export {
|
|
|
3295
3527
|
crawlSource,
|
|
3296
3528
|
cosine,
|
|
3297
3529
|
clusterPersons,
|
|
3530
|
+
blendScore,
|
|
3298
3531
|
backfillAvatars,
|
|
3299
3532
|
avatarBasename,
|
|
3300
3533
|
authorityScore,
|
|
3534
|
+
TransformersEmbedder,
|
|
3301
3535
|
OpenAIEmbedder,
|
|
3302
3536
|
JsonSink,
|
|
3303
3537
|
IntroSource,
|
|
@@ -3306,6 +3540,7 @@ export {
|
|
|
3306
3540
|
ExpertsDB,
|
|
3307
3541
|
ExaWebsetsProvider,
|
|
3308
3542
|
DEFAULT_WEIGHTS,
|
|
3543
|
+
DEFAULT_BLEND,
|
|
3309
3544
|
CredentialPool,
|
|
3310
3545
|
ConnectorsClient,
|
|
3311
3546
|
CliSink
|
package/dist/score.d.ts
CHANGED
|
@@ -22,4 +22,32 @@ export interface ScoreInputs {
|
|
|
22
22
|
* reach, featured/verified flags, and recency of activity.
|
|
23
23
|
*/
|
|
24
24
|
export declare function authorityScore(e: Expert, inputs?: ScoreInputs, weights?: ScoreWeights): number;
|
|
25
|
+
/**
|
|
26
|
+
* Normalize a price + unit to a comparable USD/hour figure so prices can be
|
|
27
|
+
* compared across sources (intro=per-15-min, Clarity=per-minute, etc.).
|
|
28
|
+
* Returns null when the unit isn't an hourly-comparable rate (e.g. per-month).
|
|
29
|
+
*/
|
|
30
|
+
export declare function pricePerHour(price: number, priceUnit: string): number | null;
|
|
31
|
+
export interface BlendWeights {
|
|
32
|
+
/** Weight on semantic similarity (0..1 cosine). */
|
|
33
|
+
semantic: number;
|
|
34
|
+
/** Weight on the expert's authority (0..100, normalized here). */
|
|
35
|
+
authority: number;
|
|
36
|
+
}
|
|
37
|
+
export declare const DEFAULT_BLEND: BlendWeights;
|
|
38
|
+
/**
|
|
39
|
+
* Blend a semantic similarity score with an expert's authority so ranking isn't
|
|
40
|
+
* driven by text match alone — a strong text match from an unrated, no-review
|
|
41
|
+
* advisor shouldn't outrank a verified top expert with a near-equal match.
|
|
42
|
+
* Returns 0..1.
|
|
43
|
+
*/
|
|
44
|
+
export declare function blendScore(semantic: number, authority: number, w?: BlendWeights): number;
|
|
45
|
+
/**
|
|
46
|
+
* Explain why an expert matched a free-text query: which of their tags/topics
|
|
47
|
+
* appear in the query (case-insensitive whole-token overlap). Pure + testable.
|
|
48
|
+
*/
|
|
49
|
+
export declare function explainMatch(query: string, e: {
|
|
50
|
+
tags: string[];
|
|
51
|
+
topics: string[];
|
|
52
|
+
}): string[];
|
|
25
53
|
//# sourceMappingURL=score.d.ts.map
|
package/dist/score.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"score.d.ts","sourceRoot":"","sources":["../src/score.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,KAAK,EAAE,MAAM,EAAY,MAAM,SAAS,CAAC;AAEhD,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,eAAO,MAAM,eAAe,EAAE,YAO7B,CAAC;AAMF,MAAM,WAAW,WAAW;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wDAAwD;IACxD,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,GAAE,WAAgB,EAAE,OAAO,GAAE,YAA8B,GAAG,MAAM,CAkBnH"}
|
|
1
|
+
{"version":3,"file":"score.d.ts","sourceRoot":"","sources":["../src/score.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,KAAK,EAAE,MAAM,EAAY,MAAM,SAAS,CAAC;AAEhD,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,eAAO,MAAM,eAAe,EAAE,YAO7B,CAAC;AAMF,MAAM,WAAW,WAAW;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wDAAwD;IACxD,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,GAAE,WAAgB,EAAE,OAAO,GAAE,YAA8B,GAAG,MAAM,CAkBnH;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAS5E;AAED,MAAM,WAAW,YAAY;IAC3B,mDAAmD;IACnD,QAAQ,EAAE,MAAM,CAAC;IACjB,kEAAkE;IAClE,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,eAAO,MAAM,aAAa,EAAE,YAAgD,CAAC;AAE7E;;;;;GAKG;AACH,wBAAgB,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,GAAE,YAA4B,GAAG,MAAM,CAIvG;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE;IAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,GAAG,MAAM,EAAE,CAQ7F"}
|