@hasna/experts 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -13
- package/dist/cli/index.js +1842 -74
- package/dist/connectors.d.ts +63 -4
- package/dist/connectors.d.ts.map +1 -1
- package/dist/contacts.d.ts +96 -0
- package/dist/contacts.d.ts.map +1 -0
- package/dist/crawl.d.ts +1 -0
- package/dist/crawl.d.ts.map +1 -1
- package/dist/db.d.ts +97 -2
- package/dist/db.d.ts.map +1 -1
- package/dist/embed.d.ts +57 -0
- package/dist/embed.d.ts.map +1 -0
- package/dist/enrich.d.ts +81 -1
- package/dist/enrich.d.ts.map +1 -1
- package/dist/format.d.ts +4 -1
- package/dist/format.d.ts.map +1 -1
- package/dist/identity.d.ts +23 -0
- package/dist/identity.d.ts.map +1 -0
- package/dist/index.d.ts +7 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1544 -21
- package/dist/score.d.ts +25 -0
- package/dist/score.d.ts.map +1 -0
- package/dist/sdk.d.ts +26 -1
- package/dist/sdk.d.ts.map +1 -1
- package/dist/sdk.js +12 -1
- package/dist/server/index.d.ts.map +1 -1
- package/dist/server/index.js +960 -14
- package/dist/sources/adplist.d.ts +43 -0
- package/dist/sources/adplist.d.ts.map +1 -0
- package/dist/sources/clarity.d.ts +37 -0
- package/dist/sources/clarity.d.ts.map +1 -0
- package/dist/sources/common.d.ts +14 -0
- package/dist/sources/common.d.ts.map +1 -0
- package/dist/sources/glg.d.ts +36 -0
- package/dist/sources/glg.d.ts.map +1 -0
- package/dist/sources/index.d.ts +5 -1
- package/dist/sources/index.d.ts.map +1 -1
- package/dist/sources/mentorcruise.d.ts +47 -0
- package/dist/sources/mentorcruise.d.ts.map +1 -0
- package/dist/sync.d.ts +71 -0
- package/dist/sync.d.ts.map +1 -0
- package/dist/types.d.ts +34 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/server/index.js
CHANGED
|
@@ -28,6 +28,206 @@ function expertText(e) {
|
|
|
28
28
|
return [e.title, e.headline, e.bio].filter(Boolean).join(". ");
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
// src/score.ts
|
|
32
|
+
var DEFAULT_WEIGHTS = {
|
|
33
|
+
rating: 0.3,
|
|
34
|
+
reviews: 0.2,
|
|
35
|
+
followers: 0.25,
|
|
36
|
+
featured: 0.1,
|
|
37
|
+
verified: 0.05,
|
|
38
|
+
recency: 0.1
|
|
39
|
+
};
|
|
40
|
+
var clamp01 = (n) => Math.max(0, Math.min(1, n));
|
|
41
|
+
var logNorm = (x, cap) => clamp01(Math.log10(1 + Math.max(0, x)) / Math.log10(1 + cap));
|
|
42
|
+
function authorityScore(e, inputs = {}, weights = DEFAULT_WEIGHTS) {
|
|
43
|
+
const rating = clamp01((e.rating || 0) / 5);
|
|
44
|
+
const reviews = logNorm(e.ratingCount || 0, 1000);
|
|
45
|
+
const followers = logNorm(inputs.followers ?? 0, 1e6);
|
|
46
|
+
const featured = e.featured ? 1 : 0;
|
|
47
|
+
const verified = e.verified ? 1 : 0;
|
|
48
|
+
const recency = inputs.daysSinceLastTweet == null ? 0 : clamp01(1 - inputs.daysSinceLastTweet / 30);
|
|
49
|
+
const raw = weights.rating * rating + weights.reviews * reviews + weights.followers * followers + weights.featured * featured + weights.verified * verified + weights.recency * recency;
|
|
50
|
+
return Math.round(raw * 1000) / 10;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// src/embed.ts
|
|
54
|
+
var STOPWORDS = new Set([
|
|
55
|
+
"the",
|
|
56
|
+
"a",
|
|
57
|
+
"an",
|
|
58
|
+
"and",
|
|
59
|
+
"or",
|
|
60
|
+
"of",
|
|
61
|
+
"to",
|
|
62
|
+
"in",
|
|
63
|
+
"for",
|
|
64
|
+
"on",
|
|
65
|
+
"at",
|
|
66
|
+
"is",
|
|
67
|
+
"are",
|
|
68
|
+
"with",
|
|
69
|
+
"by",
|
|
70
|
+
"as",
|
|
71
|
+
"be",
|
|
72
|
+
"this",
|
|
73
|
+
"that",
|
|
74
|
+
"it",
|
|
75
|
+
"from",
|
|
76
|
+
"i",
|
|
77
|
+
"you",
|
|
78
|
+
"we",
|
|
79
|
+
"they"
|
|
80
|
+
]);
|
|
81
|
+
function tokenize(text) {
|
|
82
|
+
return (text || "").toLowerCase().replace(/https?:\/\/\S+/g, " ").split(/[^a-z0-9]+/).filter((t) => t.length >= 2 && !STOPWORDS.has(t));
|
|
83
|
+
}
|
|
84
|
+
function fnv1a(s) {
|
|
85
|
+
let h = 2166136261;
|
|
86
|
+
for (let i = 0;i < s.length; i++) {
|
|
87
|
+
h ^= s.charCodeAt(i);
|
|
88
|
+
h = h + ((h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24)) >>> 0;
|
|
89
|
+
}
|
|
90
|
+
return h >>> 0;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
class HashingEmbedder {
|
|
94
|
+
id = "hash-v1";
|
|
95
|
+
dim;
|
|
96
|
+
constructor(dim = 512) {
|
|
97
|
+
this.dim = dim;
|
|
98
|
+
}
|
|
99
|
+
one(text) {
|
|
100
|
+
const v = new Array(this.dim).fill(0);
|
|
101
|
+
const toks = tokenize(text);
|
|
102
|
+
for (let i = 0;i < toks.length; i++) {
|
|
103
|
+
const uni = toks[i];
|
|
104
|
+
v[fnv1a(uni) % this.dim] += 1;
|
|
105
|
+
if (i + 1 < toks.length) {
|
|
106
|
+
const bi = uni + "_" + toks[i + 1];
|
|
107
|
+
v[fnv1a(bi) % this.dim] += 0.5;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
const norm = Math.sqrt(v.reduce((s, x) => s + x * x, 0)) || 1;
|
|
111
|
+
return v.map((x) => x / norm);
|
|
112
|
+
}
|
|
113
|
+
async embed(texts) {
|
|
114
|
+
return texts.map((t) => this.one(t));
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
class OpenAIEmbedder {
|
|
119
|
+
id;
|
|
120
|
+
dim = 1536;
|
|
121
|
+
apiKey;
|
|
122
|
+
model;
|
|
123
|
+
fetchFn;
|
|
124
|
+
constructor(opts = {}) {
|
|
125
|
+
this.apiKey = opts.apiKey ?? process.env.OPENAI_API_KEY ?? "";
|
|
126
|
+
this.model = opts.model ?? "text-embedding-3-small";
|
|
127
|
+
this.fetchFn = opts.fetchFn ?? fetch;
|
|
128
|
+
this.id = `openai:${this.model}`;
|
|
129
|
+
}
|
|
130
|
+
async embed(texts) {
|
|
131
|
+
const res = await this.fetchFn("https://api.openai.com/v1/embeddings", {
|
|
132
|
+
method: "POST",
|
|
133
|
+
headers: { "content-type": "application/json", authorization: `Bearer ${this.apiKey}` },
|
|
134
|
+
body: JSON.stringify({ model: this.model, input: texts })
|
|
135
|
+
});
|
|
136
|
+
if (!res.ok)
|
|
137
|
+
throw new Error(`OpenAI embeddings ${res.status}: ${(await res.text()).slice(0, 200)}`);
|
|
138
|
+
const data = await res.json();
|
|
139
|
+
return data.data.map((d) => d.embedding);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
function getEmbedder() {
|
|
143
|
+
if (process.env.EXPERTS_EMBEDDER === "openai" && process.env.OPENAI_API_KEY) {
|
|
144
|
+
return new OpenAIEmbedder;
|
|
145
|
+
}
|
|
146
|
+
return new HashingEmbedder;
|
|
147
|
+
}
|
|
148
|
+
function cosine(a, b) {
|
|
149
|
+
let dot = 0;
|
|
150
|
+
let na = 0;
|
|
151
|
+
let nb = 0;
|
|
152
|
+
const n = Math.min(a.length, b.length);
|
|
153
|
+
for (let i = 0;i < n; i++) {
|
|
154
|
+
dot += a[i] * b[i];
|
|
155
|
+
na += a[i] * a[i];
|
|
156
|
+
nb += b[i] * b[i];
|
|
157
|
+
}
|
|
158
|
+
const d = Math.sqrt(na) * Math.sqrt(nb);
|
|
159
|
+
return d === 0 ? 0 : dot / d;
|
|
160
|
+
}
|
|
161
|
+
function packVector(v) {
|
|
162
|
+
const f = new Float32Array(v);
|
|
163
|
+
return new Uint8Array(f.buffer);
|
|
164
|
+
}
|
|
165
|
+
function unpackVector(buf) {
|
|
166
|
+
const ab = buf instanceof Uint8Array ? buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) : buf;
|
|
167
|
+
return Array.from(new Float32Array(ab));
|
|
168
|
+
}
|
|
169
|
+
function expertEmbedText(e) {
|
|
170
|
+
return [e.fullName, e.title, e.headline, e.bio, e.topics.join(" "), e.tags.join(" ")].filter(Boolean).join(". ");
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// src/identity.ts
|
|
174
|
+
function socialHandles(e) {
|
|
175
|
+
const out = new Set;
|
|
176
|
+
for (const [platform, url] of Object.entries(e.socials || {})) {
|
|
177
|
+
if (!url)
|
|
178
|
+
continue;
|
|
179
|
+
const m = String(url).match(/(?:[a-z]+\.[a-z]+\/@?)([A-Za-z0-9_.-]+)/i);
|
|
180
|
+
const handle = (m ? m[1] : String(url)).toLowerCase().replace(/\/+$/, "");
|
|
181
|
+
if (handle)
|
|
182
|
+
out.add(`${platform}:${handle}`);
|
|
183
|
+
}
|
|
184
|
+
return out;
|
|
185
|
+
}
|
|
186
|
+
var key = (e) => `${e.source}:${e.sourceId}`;
|
|
187
|
+
function clusterPersons(experts) {
|
|
188
|
+
const parent = new Map;
|
|
189
|
+
const find = (x) => {
|
|
190
|
+
let r = x;
|
|
191
|
+
while (parent.get(r) !== r)
|
|
192
|
+
r = parent.get(r);
|
|
193
|
+
let c = x;
|
|
194
|
+
while (parent.get(c) !== r) {
|
|
195
|
+
const n = parent.get(c);
|
|
196
|
+
parent.set(c, r);
|
|
197
|
+
c = n;
|
|
198
|
+
}
|
|
199
|
+
return r;
|
|
200
|
+
};
|
|
201
|
+
const union = (a, b) => {
|
|
202
|
+
const ra = find(a);
|
|
203
|
+
const rb = find(b);
|
|
204
|
+
if (ra === rb)
|
|
205
|
+
return;
|
|
206
|
+
if (ra < rb)
|
|
207
|
+
parent.set(rb, ra);
|
|
208
|
+
else
|
|
209
|
+
parent.set(ra, rb);
|
|
210
|
+
};
|
|
211
|
+
for (const e of experts)
|
|
212
|
+
parent.set(key(e), key(e));
|
|
213
|
+
const byHandle = new Map;
|
|
214
|
+
for (const e of experts) {
|
|
215
|
+
for (const h of socialHandles(e)) {
|
|
216
|
+
if (!byHandle.has(h))
|
|
217
|
+
byHandle.set(h, []);
|
|
218
|
+
byHandle.get(h).push(key(e));
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
for (const members of byHandle.values()) {
|
|
222
|
+
for (let i = 1;i < members.length; i++)
|
|
223
|
+
union(members[0], members[i]);
|
|
224
|
+
}
|
|
225
|
+
const out = new Map;
|
|
226
|
+
for (const e of experts)
|
|
227
|
+
out.set(key(e), find(key(e)));
|
|
228
|
+
return out;
|
|
229
|
+
}
|
|
230
|
+
|
|
31
231
|
// src/db.ts
|
|
32
232
|
function defaultDbPath() {
|
|
33
233
|
return process.env.OPEN_EXPERTS_DB || join(homedir(), ".hasna", "experts", "experts.db");
|
|
@@ -111,8 +311,59 @@ class ExpertsDB {
|
|
|
111
311
|
PRIMARY KEY (source, tweet_id)
|
|
112
312
|
);
|
|
113
313
|
CREATE INDEX IF NOT EXISTS idx_tweets_expert ON tweets(source, source_id, created_at DESC);
|
|
314
|
+
|
|
315
|
+
-- Enrichment: recent YouTube videos per expert.
|
|
316
|
+
CREATE TABLE IF NOT EXISTS videos (
|
|
317
|
+
source TEXT NOT NULL, source_id TEXT NOT NULL,
|
|
318
|
+
video_id TEXT NOT NULL, title TEXT, description TEXT,
|
|
319
|
+
published_at TEXT, url TEXT, thumbnail TEXT, view_count INTEGER,
|
|
320
|
+
PRIMARY KEY (source, video_id)
|
|
321
|
+
);
|
|
322
|
+
CREATE INDEX IF NOT EXISTS idx_videos_expert ON videos(source, source_id, published_at DESC);
|
|
323
|
+
|
|
324
|
+
-- Generic external enrichment (linkedin, site/newsletter, \u2026) as JSON.
|
|
325
|
+
CREATE TABLE IF NOT EXISTS ext_profiles (
|
|
326
|
+
source TEXT NOT NULL, source_id TEXT NOT NULL, kind TEXT NOT NULL,
|
|
327
|
+
data TEXT, enriched_at TEXT,
|
|
328
|
+
PRIMARY KEY (source, source_id, kind)
|
|
329
|
+
);
|
|
330
|
+
|
|
331
|
+
-- Enrichment: discovered contact methods (multiple email/phone per expert).
|
|
332
|
+
CREATE TABLE IF NOT EXISTS contacts (
|
|
333
|
+
source TEXT NOT NULL, source_id TEXT NOT NULL,
|
|
334
|
+
type TEXT NOT NULL, value TEXT NOT NULL,
|
|
335
|
+
label TEXT, provider TEXT, confidence REAL,
|
|
336
|
+
status TEXT DEFAULT 'unverified', verified_at TEXT, created_at TEXT,
|
|
337
|
+
PRIMARY KEY (source, source_id, type, value)
|
|
338
|
+
);
|
|
339
|
+
CREATE INDEX IF NOT EXISTS idx_contacts_expert ON contacts(source, source_id);
|
|
340
|
+
CREATE INDEX IF NOT EXISTS idx_contacts_status ON contacts(status);
|
|
341
|
+
|
|
342
|
+
-- Semantic search: one embedding vector per expert.
|
|
343
|
+
CREATE TABLE IF NOT EXISTS vectors (
|
|
344
|
+
source TEXT NOT NULL, source_id TEXT NOT NULL,
|
|
345
|
+
embedder TEXT NOT NULL, dim INTEGER, vec BLOB,
|
|
346
|
+
PRIMARY KEY (source, source_id)
|
|
347
|
+
);
|
|
348
|
+
|
|
349
|
+
-- Identity resolution: maps each expert record to a canonical person.
|
|
350
|
+
CREATE TABLE IF NOT EXISTS persons (
|
|
351
|
+
source TEXT NOT NULL, source_id TEXT NOT NULL, person_id TEXT NOT NULL,
|
|
352
|
+
PRIMARY KEY (source, source_id)
|
|
353
|
+
);
|
|
354
|
+
CREATE INDEX IF NOT EXISTS idx_persons_person ON persons(person_id);
|
|
355
|
+
|
|
356
|
+
-- Change detection: a log of what changed between crawls.
|
|
357
|
+
CREATE TABLE IF NOT EXISTS changes (
|
|
358
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
359
|
+
source TEXT NOT NULL, source_id TEXT NOT NULL,
|
|
360
|
+
kind TEXT NOT NULL, field TEXT, old_value TEXT, new_value TEXT,
|
|
361
|
+
detected_at TEXT
|
|
362
|
+
);
|
|
363
|
+
CREATE INDEX IF NOT EXISTS idx_changes_time ON changes(detected_at DESC);
|
|
114
364
|
`);
|
|
115
365
|
this.addColumnIfMissing("experts", "avatar_local", "TEXT");
|
|
366
|
+
this.addColumnIfMissing("experts", "authority", "REAL DEFAULT 0");
|
|
116
367
|
}
|
|
117
368
|
addColumnIfMissing(table, column, type) {
|
|
118
369
|
const cols = this.db.query(`PRAGMA table_info(${table})`).all();
|
|
@@ -198,6 +449,7 @@ class ExpertsDB {
|
|
|
198
449
|
socials: JSON.parse(r.socials || "{}"),
|
|
199
450
|
extra: JSON.parse(r.extra || "{}"),
|
|
200
451
|
avatarLocal: r.avatar_local || undefined,
|
|
452
|
+
authority: r.authority ?? 0,
|
|
201
453
|
crawledAt: r.crawled_at
|
|
202
454
|
};
|
|
203
455
|
}
|
|
@@ -245,7 +497,7 @@ class ExpertsDB {
|
|
|
245
497
|
where.push("rating >= ?");
|
|
246
498
|
params.push(filters.minRating);
|
|
247
499
|
}
|
|
248
|
-
const sortCol = filters.sort === "price" ? "price" : filters.sort === "name" ? "full_name" : filters.sort === "reviews" ? "rating_count" : "rating";
|
|
500
|
+
const sortCol = filters.sort === "price" ? "price" : filters.sort === "name" ? "full_name" : filters.sort === "reviews" ? "rating_count" : filters.sort === "authority" ? "authority" : "rating";
|
|
249
501
|
const defaultAsc = filters.sort === "name";
|
|
250
502
|
const dir = filters.desc ?? !defaultAsc ? "DESC" : "ASC";
|
|
251
503
|
let sql = "SELECT * FROM experts";
|
|
@@ -321,11 +573,11 @@ class ExpertsDB {
|
|
|
321
573
|
sql += " ORDER BY name";
|
|
322
574
|
return this.db.query(sql).all(...params);
|
|
323
575
|
}
|
|
324
|
-
setMeta(
|
|
325
|
-
this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(
|
|
576
|
+
setMeta(key2, value) {
|
|
577
|
+
this.db.query("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value").run(key2, value);
|
|
326
578
|
}
|
|
327
|
-
getMeta(
|
|
328
|
-
const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(
|
|
579
|
+
getMeta(key2) {
|
|
580
|
+
const row = this.db.query("SELECT value FROM meta WHERE key = ?").get(key2);
|
|
329
581
|
return row ? row.value : null;
|
|
330
582
|
}
|
|
331
583
|
stats(source) {
|
|
@@ -349,12 +601,12 @@ class ExpertsDB {
|
|
|
349
601
|
const nodeIds = new Map;
|
|
350
602
|
const insertNode = this.db.query("INSERT INTO kg_nodes (type, key, label) VALUES (?, ?, ?) ON CONFLICT(type, key) DO UPDATE SET label=excluded.label RETURNING id");
|
|
351
603
|
const insertEdge = this.db.query("INSERT OR REPLACE INTO kg_edges (src, dst, rel, weight) VALUES (?, ?, ?, ?)");
|
|
352
|
-
const node = (type,
|
|
353
|
-
const ck = `${type}\x00${
|
|
604
|
+
const node = (type, key2, label) => {
|
|
605
|
+
const ck = `${type}\x00${key2.toLowerCase()}`;
|
|
354
606
|
const cached = nodeIds.get(ck);
|
|
355
607
|
if (cached != null)
|
|
356
608
|
return cached;
|
|
357
|
-
const id = insertNode.get(type,
|
|
609
|
+
const id = insertNode.get(type, key2.toLowerCase(), label).id;
|
|
358
610
|
nodeIds.set(ck, id);
|
|
359
611
|
return id;
|
|
360
612
|
};
|
|
@@ -364,7 +616,8 @@ class ExpertsDB {
|
|
|
364
616
|
for (const topic of e.topics) {
|
|
365
617
|
insertEdge.run(eId, node("topic", topic, topic), "IN_TOPIC", 1);
|
|
366
618
|
}
|
|
367
|
-
const
|
|
619
|
+
const tweetText = this.recentTweets(e.source, e.sourceId, 30).map((t) => t.text).join(". ");
|
|
620
|
+
const tags = inferTags(expertText(e) + ". " + tweetText, vocabulary);
|
|
368
621
|
for (const tag of tags) {
|
|
369
622
|
insertEdge.run(eId, node("tag", tag, tag), "HAS_TAG", 1);
|
|
370
623
|
}
|
|
@@ -377,11 +630,51 @@ class ExpertsDB {
|
|
|
377
630
|
this.setMeta("graph_built", new Date().toISOString());
|
|
378
631
|
return { nodes, edges };
|
|
379
632
|
}
|
|
380
|
-
|
|
381
|
-
const
|
|
633
|
+
rescore(source) {
|
|
634
|
+
const experts = this.list({ source });
|
|
635
|
+
const upd = this.db.query("UPDATE experts SET authority = ? WHERE source = ? AND source_id = ?");
|
|
636
|
+
const followerStmt = this.db.query("SELECT followers FROM x_profiles WHERE source = ? AND source_id = ?");
|
|
637
|
+
const lastTweetStmt = this.db.query("SELECT MAX(created_at) AS t FROM tweets WHERE source = ? AND source_id = ?");
|
|
638
|
+
const tx = this.db.transaction((rows) => {
|
|
639
|
+
for (const e of rows) {
|
|
640
|
+
const fr = followerStmt.get(e.source, e.sourceId);
|
|
641
|
+
const lt = lastTweetStmt.get(e.source, e.sourceId);
|
|
642
|
+
let daysSince;
|
|
643
|
+
if (lt?.t) {
|
|
644
|
+
const ms = Date.now() - Date.parse(lt.t);
|
|
645
|
+
if (!Number.isNaN(ms))
|
|
646
|
+
daysSince = ms / 86400000;
|
|
647
|
+
}
|
|
648
|
+
const score = authorityScore(e, { followers: fr?.followers ?? 0, daysSinceLastTweet: daysSince });
|
|
649
|
+
upd.run(score, e.source, e.sourceId);
|
|
650
|
+
}
|
|
651
|
+
});
|
|
652
|
+
tx(experts);
|
|
653
|
+
this.setMeta("rescored_at", new Date().toISOString());
|
|
654
|
+
return experts.length;
|
|
655
|
+
}
|
|
656
|
+
stalest(opts = {}) {
|
|
657
|
+
const where = opts.source ? "WHERE e.source = ?" : "";
|
|
658
|
+
const params = opts.source ? [opts.source] : [];
|
|
659
|
+
const sql = `
|
|
660
|
+
SELECT e.*, COALESCE(
|
|
661
|
+
(SELECT MAX(enriched_at) FROM x_profiles xp WHERE xp.source=e.source AND xp.source_id=e.source_id),
|
|
662
|
+
e.crawled_at
|
|
663
|
+
) AS last_seen
|
|
664
|
+
FROM experts e ${where}
|
|
665
|
+
ORDER BY last_seen ASC
|
|
666
|
+
LIMIT ?`;
|
|
667
|
+
params.push(opts.limit ?? 25);
|
|
668
|
+
return this.db.query(sql).all(...params).map((r) => ({
|
|
669
|
+
expert: this.rowToExpert(r),
|
|
670
|
+
lastSeen: r.last_seen || ""
|
|
671
|
+
}));
|
|
672
|
+
}
|
|
673
|
+
expertFromNodeKey(key2) {
|
|
674
|
+
const idx = key2.indexOf(":");
|
|
382
675
|
if (idx < 0)
|
|
383
676
|
return null;
|
|
384
|
-
return this.get(
|
|
677
|
+
return this.get(key2.slice(idx + 1), key2.slice(0, idx));
|
|
385
678
|
}
|
|
386
679
|
findByNeeds(needs, opts = {}) {
|
|
387
680
|
const cleaned = needs.map((n) => n.trim().toLowerCase()).filter(Boolean);
|
|
@@ -570,6 +863,270 @@ class ExpertsDB {
|
|
|
570
863
|
const avatars = this.db.query(`SELECT COUNT(*) AS n FROM experts WHERE avatar_local IS NOT NULL${source ? " AND source = ?" : ""}`).get(...args).n;
|
|
571
864
|
return { withHandle, enriched, tweets, avatars };
|
|
572
865
|
}
|
|
866
|
+
recordChanges(source, incoming) {
|
|
867
|
+
const existing = new Map(this.list({ source }).map((e) => [e.sourceId, e]));
|
|
868
|
+
const now = new Date().toISOString();
|
|
869
|
+
const watched = ["price", "title", "headline", "bio", "slug"];
|
|
870
|
+
const stmt = this.db.query("INSERT INTO changes (source, source_id, kind, field, old_value, new_value, detected_at) VALUES (?, ?, ?, ?, ?, ?, ?)");
|
|
871
|
+
let count = 0;
|
|
872
|
+
const tx = this.db.transaction((rows) => {
|
|
873
|
+
for (const e of rows) {
|
|
874
|
+
const prev = existing.get(e.sourceId);
|
|
875
|
+
if (!prev) {
|
|
876
|
+
stmt.run(source, e.sourceId, "added", null, null, e.fullName || e.slug, now);
|
|
877
|
+
count++;
|
|
878
|
+
continue;
|
|
879
|
+
}
|
|
880
|
+
for (const f of watched) {
|
|
881
|
+
const a = String(prev[f] ?? "");
|
|
882
|
+
const b = String(e[f] ?? "");
|
|
883
|
+
if (a !== b) {
|
|
884
|
+
stmt.run(source, e.sourceId, "updated", f, a, b, now);
|
|
885
|
+
count++;
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
});
|
|
890
|
+
tx(incoming);
|
|
891
|
+
return count;
|
|
892
|
+
}
|
|
893
|
+
changes(opts = {}) {
|
|
894
|
+
const where = opts.source ? "WHERE source = ?" : "";
|
|
895
|
+
const params = opts.source ? [opts.source] : [];
|
|
896
|
+
params.push(opts.limit ?? 50);
|
|
897
|
+
return this.db.query(`SELECT * FROM changes ${where} ORDER BY detected_at DESC, id DESC LIMIT ?`).all(...params);
|
|
898
|
+
}
|
|
899
|
+
rebuildPersons() {
|
|
900
|
+
const experts = this.list();
|
|
901
|
+
const mapping = clusterPersons(experts);
|
|
902
|
+
const tx = this.db.transaction(() => {
|
|
903
|
+
this.db.exec("DELETE FROM persons");
|
|
904
|
+
const stmt = this.db.query("INSERT OR REPLACE INTO persons (source, source_id, person_id) VALUES (?, ?, ?)");
|
|
905
|
+
for (const [k, pid] of mapping) {
|
|
906
|
+
const idx = k.indexOf(":");
|
|
907
|
+
stmt.run(k.slice(0, idx), k.slice(idx + 1), pid);
|
|
908
|
+
}
|
|
909
|
+
});
|
|
910
|
+
tx();
|
|
911
|
+
const persons = new Set(mapping.values()).size;
|
|
912
|
+
this.setMeta("persons_built", new Date().toISOString());
|
|
913
|
+
return { experts: experts.length, persons };
|
|
914
|
+
}
|
|
915
|
+
personIdOf(source, sourceId) {
|
|
916
|
+
const r = this.db.query("SELECT person_id FROM persons WHERE source = ? AND source_id = ?").get(source, sourceId);
|
|
917
|
+
return r ? r.person_id : `${source}:${sourceId}`;
|
|
918
|
+
}
|
|
919
|
+
expertsForPerson(personId) {
|
|
920
|
+
const rows = this.db.query("SELECT e.* FROM persons p JOIN experts e ON e.source=p.source AND e.source_id=p.source_id WHERE p.person_id = ?").all(personId);
|
|
921
|
+
return rows.map((r) => this.rowToExpert(r));
|
|
922
|
+
}
|
|
923
|
+
personStats() {
|
|
924
|
+
const experts = this.count();
|
|
925
|
+
const row = this.db.query("SELECT COUNT(DISTINCT person_id) n FROM persons").get();
|
|
926
|
+
const persons = row?.n ?? 0;
|
|
927
|
+
const dupes = this.db.query("SELECT person_id, COUNT(*) c FROM persons GROUP BY person_id HAVING c > 1 ORDER BY c DESC").all();
|
|
928
|
+
return { experts, persons: persons || experts, duplicated: dupes.length };
|
|
929
|
+
}
|
|
930
|
+
async buildEmbeddings(embedder, opts = {}) {
|
|
931
|
+
const log = opts.onLog ?? (() => {});
|
|
932
|
+
const experts = this.list({ source: opts.source });
|
|
933
|
+
const batch = opts.batch ?? 64;
|
|
934
|
+
const stmt = this.db.query("INSERT OR REPLACE INTO vectors (source, source_id, embedder, dim, vec) VALUES (?, ?, ?, ?, ?)");
|
|
935
|
+
let done = 0;
|
|
936
|
+
for (let i = 0;i < experts.length; i += batch) {
|
|
937
|
+
const slice = experts.slice(i, i + batch);
|
|
938
|
+
const vecs = await embedder.embed(slice.map((e) => expertEmbedText(e)));
|
|
939
|
+
const tx = this.db.transaction(() => {
|
|
940
|
+
slice.forEach((e, j) => stmt.run(e.source, e.sourceId, embedder.id, embedder.dim, packVector(vecs[j])));
|
|
941
|
+
});
|
|
942
|
+
tx();
|
|
943
|
+
done += slice.length;
|
|
944
|
+
log(` embedded ${done}/${experts.length}`);
|
|
945
|
+
}
|
|
946
|
+
this.setMeta("embedder", embedder.id);
|
|
947
|
+
this.setMeta("embedded_at", new Date().toISOString());
|
|
948
|
+
return done;
|
|
949
|
+
}
|
|
950
|
+
vectorCount() {
|
|
951
|
+
return this.db.query("SELECT COUNT(*) n FROM vectors").get().n;
|
|
952
|
+
}
|
|
953
|
+
semanticSearch(queryVec, opts = {}) {
|
|
954
|
+
const where = opts.source ? "WHERE v.source = ?" : "";
|
|
955
|
+
const params = opts.source ? [opts.source] : [];
|
|
956
|
+
const rows = this.db.query(`SELECT e.*, v.vec AS _vec FROM vectors v JOIN experts e ON e.source=v.source AND e.source_id=v.source_id ${where}`).all(...params);
|
|
957
|
+
const scored = rows.map((r) => ({ expert: this.rowToExpert(r), score: cosine(queryVec, unpackVector(r._vec)) }));
|
|
958
|
+
scored.sort((a, b) => b.score - a.score);
|
|
959
|
+
return scored.slice(0, opts.limit ?? 25);
|
|
960
|
+
}
|
|
961
|
+
upsertExt(source, sourceId, kind, data) {
|
|
962
|
+
this.db.query(`
|
|
963
|
+
INSERT INTO ext_profiles (source, source_id, kind, data, enriched_at)
|
|
964
|
+
VALUES (?, ?, ?, ?, ?)
|
|
965
|
+
ON CONFLICT(source, source_id, kind) DO UPDATE SET data=excluded.data, enriched_at=excluded.enriched_at
|
|
966
|
+
`).run(source, sourceId, kind, JSON.stringify(data), new Date().toISOString());
|
|
967
|
+
}
|
|
968
|
+
getExt(source, sourceId, kind) {
|
|
969
|
+
const r = this.db.query("SELECT data FROM ext_profiles WHERE source=? AND source_id=? AND kind=?").get(source, sourceId, kind);
|
|
970
|
+
return r ? JSON.parse(r.data || "{}") : null;
|
|
971
|
+
}
|
|
972
|
+
allExt(source, sourceId) {
|
|
973
|
+
const rows = this.db.query("SELECT kind, data FROM ext_profiles WHERE source=? AND source_id=?").all(source, sourceId);
|
|
974
|
+
return Object.fromEntries(rows.map((r) => [r.kind, JSON.parse(r.data || "{}")]));
|
|
975
|
+
}
|
|
976
|
+
expertsNeedingExt(platform, kind, opts = {}) {
|
|
977
|
+
const where = [`json_extract(socials, '$.${platform}') IS NOT NULL`];
|
|
978
|
+
const bind = [];
|
|
979
|
+
if (opts.source) {
|
|
980
|
+
where.push("source = ?");
|
|
981
|
+
bind.push(opts.source);
|
|
982
|
+
}
|
|
983
|
+
if (!opts.refresh) {
|
|
984
|
+
where.push("NOT EXISTS (SELECT 1 FROM ext_profiles x WHERE x.source=experts.source AND x.source_id=experts.source_id AND x.kind=?)");
|
|
985
|
+
bind.push(kind);
|
|
986
|
+
}
|
|
987
|
+
let sql = "SELECT * FROM experts WHERE " + where.join(" AND ") + " ORDER BY rating_count DESC";
|
|
988
|
+
if (opts.limit) {
|
|
989
|
+
sql += " LIMIT ?";
|
|
990
|
+
bind.push(opts.limit);
|
|
991
|
+
}
|
|
992
|
+
return this.db.query(sql).all(...bind).map((r) => this.rowToExpert(r));
|
|
993
|
+
}
|
|
994
|
+
replaceVideos(source, sourceId, videos) {
|
|
995
|
+
const tx = this.db.transaction((rows) => {
|
|
996
|
+
this.db.query("DELETE FROM videos WHERE source = ? AND source_id = ?").run(source, sourceId);
|
|
997
|
+
const stmt = this.db.query("INSERT OR REPLACE INTO videos (source, source_id, video_id, title, description, published_at, url, thumbnail, view_count) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)");
|
|
998
|
+
for (const v of rows) {
|
|
999
|
+
stmt.run(v.source, v.sourceId, v.videoId, v.title, v.description, v.publishedAt, v.url, v.thumbnail, v.viewCount);
|
|
1000
|
+
}
|
|
1001
|
+
});
|
|
1002
|
+
tx(videos);
|
|
1003
|
+
}
|
|
1004
|
+
recentVideos(source, sourceId, limit = 10) {
|
|
1005
|
+
const rows = this.db.query("SELECT * FROM videos WHERE source = ? AND source_id = ? ORDER BY published_at DESC LIMIT ?").all(source, sourceId, limit);
|
|
1006
|
+
return rows.map((r) => ({
|
|
1007
|
+
source: r.source,
|
|
1008
|
+
sourceId: r.source_id,
|
|
1009
|
+
videoId: r.video_id,
|
|
1010
|
+
title: r.title || "",
|
|
1011
|
+
description: r.description || "",
|
|
1012
|
+
publishedAt: r.published_at || "",
|
|
1013
|
+
url: r.url || "",
|
|
1014
|
+
thumbnail: r.thumbnail || "",
|
|
1015
|
+
viewCount: r.view_count ?? 0
|
|
1016
|
+
}));
|
|
1017
|
+
}
|
|
1018
|
+
expertsNeedingVideos(opts = {}) {
|
|
1019
|
+
const where = ["json_extract(socials, '$.youtube') IS NOT NULL"];
|
|
1020
|
+
const params = [];
|
|
1021
|
+
if (opts.source) {
|
|
1022
|
+
where.push("source = ?");
|
|
1023
|
+
params.push(opts.source);
|
|
1024
|
+
}
|
|
1025
|
+
if (!opts.refresh) {
|
|
1026
|
+
where.push("NOT EXISTS (SELECT 1 FROM videos v WHERE v.source=experts.source AND v.source_id=experts.source_id)");
|
|
1027
|
+
}
|
|
1028
|
+
let sql = "SELECT * FROM experts WHERE " + where.join(" AND ") + " ORDER BY rating_count DESC";
|
|
1029
|
+
if (opts.limit) {
|
|
1030
|
+
sql += " LIMIT ?";
|
|
1031
|
+
params.push(opts.limit);
|
|
1032
|
+
}
|
|
1033
|
+
return this.db.query(sql).all(...params).map((r) => this.rowToExpert(r));
|
|
1034
|
+
}
|
|
1035
|
+
upsertContact(c) {
|
|
1036
|
+
this.db.query(`
|
|
1037
|
+
INSERT INTO contacts (source, source_id, type, value, label, provider, confidence, status, verified_at, created_at)
|
|
1038
|
+
VALUES ($source, $source_id, $type, $value, $label, $provider, $confidence, $status, $verified_at, $created_at)
|
|
1039
|
+
ON CONFLICT(source, source_id, type, value) DO UPDATE SET
|
|
1040
|
+
label=excluded.label, provider=excluded.provider, confidence=excluded.confidence,
|
|
1041
|
+
status=CASE WHEN excluded.status != 'unverified' THEN excluded.status ELSE contacts.status END,
|
|
1042
|
+
verified_at=COALESCE(excluded.verified_at, contacts.verified_at)
|
|
1043
|
+
`).run({
|
|
1044
|
+
$source: c.source,
|
|
1045
|
+
$source_id: c.sourceId,
|
|
1046
|
+
$type: c.type,
|
|
1047
|
+
$value: c.value,
|
|
1048
|
+
$label: c.label,
|
|
1049
|
+
$provider: c.provider,
|
|
1050
|
+
$confidence: c.confidence,
|
|
1051
|
+
$status: c.status,
|
|
1052
|
+
$verified_at: c.verifiedAt || null,
|
|
1053
|
+
$created_at: c.createdAt || new Date().toISOString()
|
|
1054
|
+
});
|
|
1055
|
+
}
|
|
1056
|
+
setContactStatus(source, sourceId, type, value, status) {
|
|
1057
|
+
this.db.query("UPDATE contacts SET status = ?, verified_at = ? WHERE source = ? AND source_id = ? AND type = ? AND value = ?").run(status, new Date().toISOString(), source, sourceId, type, value);
|
|
1058
|
+
}
|
|
1059
|
+
contacts(source, sourceId) {
|
|
1060
|
+
const rows = this.db.query("SELECT * FROM contacts WHERE source = ? AND source_id = ? ORDER BY type, confidence DESC").all(source, sourceId);
|
|
1061
|
+
return rows.map((r) => ({
|
|
1062
|
+
source: r.source,
|
|
1063
|
+
sourceId: r.source_id,
|
|
1064
|
+
type: r.type,
|
|
1065
|
+
value: r.value,
|
|
1066
|
+
label: r.label || "",
|
|
1067
|
+
provider: r.provider || "",
|
|
1068
|
+
confidence: r.confidence ?? 0,
|
|
1069
|
+
status: r.status || "unverified",
|
|
1070
|
+
verifiedAt: r.verified_at || "",
|
|
1071
|
+
createdAt: r.created_at || ""
|
|
1072
|
+
}));
|
|
1073
|
+
}
|
|
1074
|
+
contactsToVerify(opts = {}) {
|
|
1075
|
+
const where = ["status = 'unverified'"];
|
|
1076
|
+
const params = [];
|
|
1077
|
+
if (opts.source) {
|
|
1078
|
+
where.push("source = ?");
|
|
1079
|
+
params.push(opts.source);
|
|
1080
|
+
}
|
|
1081
|
+
let sql = "SELECT * FROM contacts WHERE " + where.join(" AND ") + " ORDER BY confidence DESC";
|
|
1082
|
+
if (opts.limit) {
|
|
1083
|
+
sql += " LIMIT ?";
|
|
1084
|
+
params.push(opts.limit);
|
|
1085
|
+
}
|
|
1086
|
+
return this.db.query(sql).all(...params).map((r) => ({
|
|
1087
|
+
source: r.source,
|
|
1088
|
+
sourceId: r.source_id,
|
|
1089
|
+
type: r.type,
|
|
1090
|
+
value: r.value,
|
|
1091
|
+
label: r.label || "",
|
|
1092
|
+
provider: r.provider || "",
|
|
1093
|
+
confidence: r.confidence ?? 0,
|
|
1094
|
+
status: r.status || "unverified",
|
|
1095
|
+
verifiedAt: r.verified_at || "",
|
|
1096
|
+
createdAt: r.created_at || ""
|
|
1097
|
+
}));
|
|
1098
|
+
}
|
|
1099
|
+
expertsNeedingContacts(opts = {}) {
|
|
1100
|
+
const where = [];
|
|
1101
|
+
const params = [];
|
|
1102
|
+
if (opts.source) {
|
|
1103
|
+
where.push("source = ?");
|
|
1104
|
+
params.push(opts.source);
|
|
1105
|
+
}
|
|
1106
|
+
if (!opts.refresh) {
|
|
1107
|
+
where.push("NOT EXISTS (SELECT 1 FROM contacts c WHERE c.source = experts.source AND c.source_id = experts.source_id)");
|
|
1108
|
+
}
|
|
1109
|
+
let sql = "SELECT * FROM experts";
|
|
1110
|
+
if (where.length)
|
|
1111
|
+
sql += " WHERE " + where.join(" AND ");
|
|
1112
|
+
sql += " ORDER BY rating_count DESC";
|
|
1113
|
+
if (opts.limit) {
|
|
1114
|
+
sql += " LIMIT ?";
|
|
1115
|
+
params.push(opts.limit);
|
|
1116
|
+
}
|
|
1117
|
+
return this.db.query(sql).all(...params).map((r) => this.rowToExpert(r));
|
|
1118
|
+
}
|
|
1119
|
+
contactStats(source) {
|
|
1120
|
+
const filt = source ? " WHERE source = ?" : "";
|
|
1121
|
+
const args = source ? [source] : [];
|
|
1122
|
+
const total = this.db.query(`SELECT COUNT(*) n FROM contacts${filt}`).get(...args).n;
|
|
1123
|
+
const valid = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE status='valid'${source ? " AND source = ?" : ""}`).get(...args).n;
|
|
1124
|
+
const invalid = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE status='invalid'${source ? " AND source = ?" : ""}`).get(...args).n;
|
|
1125
|
+
const emails = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE type='email'${source ? " AND source = ?" : ""}`).get(...args).n;
|
|
1126
|
+
const phones = this.db.query(`SELECT COUNT(*) n FROM contacts WHERE type='phone'${source ? " AND source = ?" : ""}`).get(...args).n;
|
|
1127
|
+
const expertsWith = this.db.query(`SELECT COUNT(DISTINCT source||source_id) n FROM contacts${filt}`).get(...args).n;
|
|
1128
|
+
return { total, valid, invalid, emails, phones, expertsWith };
|
|
1129
|
+
}
|
|
573
1130
|
close() {
|
|
574
1131
|
this.db.close();
|
|
575
1132
|
}
|
|
@@ -788,6 +1345,329 @@ class IntroSource {
|
|
|
788
1345
|
}
|
|
789
1346
|
}
|
|
790
1347
|
|
|
1348
|
+
// src/sources/common.ts
|
|
1349
|
+
function makeExpert(p) {
|
|
1350
|
+
const first = p.firstName ?? "";
|
|
1351
|
+
const last = p.lastName ?? "";
|
|
1352
|
+
return {
|
|
1353
|
+
source: p.source,
|
|
1354
|
+
sourceId: p.sourceId,
|
|
1355
|
+
slug: p.slug ?? "",
|
|
1356
|
+
url: p.url ?? "",
|
|
1357
|
+
fullName: p.fullName ?? [first, last].filter(Boolean).join(" "),
|
|
1358
|
+
firstName: first,
|
|
1359
|
+
lastName: last,
|
|
1360
|
+
title: p.title ?? "",
|
|
1361
|
+
headline: p.headline ?? "",
|
|
1362
|
+
bio: p.bio ?? "",
|
|
1363
|
+
avatar: p.avatar ?? "",
|
|
1364
|
+
price: p.price ?? 0,
|
|
1365
|
+
priceCurrency: p.priceCurrency ?? "USD",
|
|
1366
|
+
priceUnit: p.priceUnit ?? "",
|
|
1367
|
+
rating: p.rating ?? 0,
|
|
1368
|
+
ratingCount: p.ratingCount ?? 0,
|
|
1369
|
+
verified: p.verified ?? false,
|
|
1370
|
+
featured: p.featured ?? false,
|
|
1371
|
+
topics: p.topics ? [...new Set(p.topics)].sort() : [],
|
|
1372
|
+
tags: p.tags ?? [],
|
|
1373
|
+
socials: p.socials ?? {},
|
|
1374
|
+
extra: p.extra ?? {},
|
|
1375
|
+
crawledAt: p.crawledAt ?? new Date().toISOString()
|
|
1376
|
+
};
|
|
1377
|
+
}
|
|
1378
|
+
function slugify(s) {
|
|
1379
|
+
return (s || "").toLowerCase().normalize("NFKD").replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
1380
|
+
}
|
|
1381
|
+
async function fetchJson(url, fetchFn, init = {}) {
|
|
1382
|
+
try {
|
|
1383
|
+
const res = await fetchFn(url, {
|
|
1384
|
+
...init,
|
|
1385
|
+
headers: {
|
|
1386
|
+
"User-Agent": "open-experts (+https://github.com/hasna/experts)",
|
|
1387
|
+
Accept: "application/json",
|
|
1388
|
+
...init.headers || {}
|
|
1389
|
+
}
|
|
1390
|
+
});
|
|
1391
|
+
if (!res.ok)
|
|
1392
|
+
return null;
|
|
1393
|
+
return await res.json();
|
|
1394
|
+
} catch {
|
|
1395
|
+
return null;
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
// src/sources/mentorcruise.ts
|
|
1400
|
+
function normalizeMentor(m, crawledAt) {
|
|
1401
|
+
const slug = m.slug || slugify(m.name || String(m.id ?? ""));
|
|
1402
|
+
const socials = {};
|
|
1403
|
+
if (m.twitter)
|
|
1404
|
+
socials.twitter = m.twitter.startsWith("http") ? m.twitter : `https://x.com/${m.twitter}`;
|
|
1405
|
+
if (m.linkedin)
|
|
1406
|
+
socials.linkedin = m.linkedin;
|
|
1407
|
+
return makeExpert({
|
|
1408
|
+
source: "mentorcruise",
|
|
1409
|
+
sourceId: String(m.id ?? slug),
|
|
1410
|
+
slug,
|
|
1411
|
+
url: `https://mentorcruise.com/mentor/${slug}/`,
|
|
1412
|
+
fullName: m.name ?? [m.first_name, m.last_name].filter(Boolean).join(" "),
|
|
1413
|
+
firstName: m.first_name ?? "",
|
|
1414
|
+
lastName: m.last_name ?? "",
|
|
1415
|
+
title: m.job_title ?? "",
|
|
1416
|
+
bio: m.bio ?? "",
|
|
1417
|
+
avatar: m.avatar ?? m.photo ?? "",
|
|
1418
|
+
price: m.price ?? 0,
|
|
1419
|
+
priceCurrency: m.currency ?? "USD",
|
|
1420
|
+
priceUnit: m.price ? "per month" : "",
|
|
1421
|
+
rating: m.rating ?? 0,
|
|
1422
|
+
ratingCount: m.reviews_count ?? 0,
|
|
1423
|
+
verified: Boolean(m.verified),
|
|
1424
|
+
featured: Boolean(m.is_top_mentor),
|
|
1425
|
+
topics: m.categories ?? [],
|
|
1426
|
+
tags: m.skills ?? [],
|
|
1427
|
+
socials,
|
|
1428
|
+
crawledAt
|
|
1429
|
+
});
|
|
1430
|
+
}
|
|
1431
|
+
|
|
1432
|
+
class MentorCruiseSource {
|
|
1433
|
+
name = "mentorcruise";
|
|
1434
|
+
description = "MentorCruise \u2014 long-term mentorship from vetted mentors";
|
|
1435
|
+
website = "https://mentorcruise.com";
|
|
1436
|
+
fetchFn;
|
|
1437
|
+
apiBase;
|
|
1438
|
+
pageSize;
|
|
1439
|
+
constructor(opts = {}) {
|
|
1440
|
+
this.fetchFn = opts.fetchFn ?? fetch;
|
|
1441
|
+
this.apiBase = opts.apiBase ?? process.env.MENTORCRUISE_API_BASE ?? "https://mentorcruise.com/api";
|
|
1442
|
+
this.pageSize = opts.pageSize ?? 50;
|
|
1443
|
+
}
|
|
1444
|
+
async crawl(opts = {}) {
|
|
1445
|
+
const log = opts.onLog ?? (() => {});
|
|
1446
|
+
const crawledAt = new Date().toISOString();
|
|
1447
|
+
const experts = [];
|
|
1448
|
+
const tags = new Set;
|
|
1449
|
+
let offset = 0;
|
|
1450
|
+
for (;; ) {
|
|
1451
|
+
const data = await fetchJson(`${this.apiBase}/mentors/?limit=${this.pageSize}&offset=${offset}`, this.fetchFn);
|
|
1452
|
+
const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
|
|
1453
|
+
if (!items.length)
|
|
1454
|
+
break;
|
|
1455
|
+
for (const m of items) {
|
|
1456
|
+
const e = normalizeMentor(m, crawledAt);
|
|
1457
|
+
experts.push(e);
|
|
1458
|
+
for (const t of e.tags)
|
|
1459
|
+
tags.add(t);
|
|
1460
|
+
}
|
|
1461
|
+
offset += items.length;
|
|
1462
|
+
log(` mentorcruise: ${experts.length}`);
|
|
1463
|
+
if (opts.max && experts.length >= opts.max)
|
|
1464
|
+
break;
|
|
1465
|
+
if (items.length < this.pageSize)
|
|
1466
|
+
break;
|
|
1467
|
+
}
|
|
1468
|
+
if (experts.length === 0) {
|
|
1469
|
+
log("mentorcruise: no public listing reachable (set MENTORCRUISE_API_BASE or provide a fetchFn).");
|
|
1470
|
+
}
|
|
1471
|
+
const topics = [];
|
|
1472
|
+
return {
|
|
1473
|
+
experts: opts.max ? experts.slice(0, opts.max) : experts,
|
|
1474
|
+
topics,
|
|
1475
|
+
tags: [...tags].map((name) => ({ name, topic: "" })),
|
|
1476
|
+
total: experts.length
|
|
1477
|
+
};
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
|
|
1481
|
+
// src/sources/adplist.ts
|
|
1482
|
+
function normalizeAdpMentor(m, crawledAt) {
|
|
1483
|
+
const slug = m.username || slugify(m.name || m.full_name || String(m.id ?? ""));
|
|
1484
|
+
const socials = {};
|
|
1485
|
+
if (m.twitter)
|
|
1486
|
+
socials.twitter = m.twitter.startsWith("http") ? m.twitter : `https://x.com/${m.twitter}`;
|
|
1487
|
+
if (m.linkedin)
|
|
1488
|
+
socials.linkedin = m.linkedin;
|
|
1489
|
+
return makeExpert({
|
|
1490
|
+
source: "adplist",
|
|
1491
|
+
sourceId: String(m.id ?? slug),
|
|
1492
|
+
slug,
|
|
1493
|
+
url: `https://adplist.org/mentors/${slug}`,
|
|
1494
|
+
fullName: m.name ?? m.full_name ?? "",
|
|
1495
|
+
title: m.headline ?? m.tagline ?? "",
|
|
1496
|
+
headline: m.tagline ?? "",
|
|
1497
|
+
bio: m.bio ?? m.about ?? "",
|
|
1498
|
+
avatar: m.profile_photo ?? m.avatar ?? "",
|
|
1499
|
+
price: 0,
|
|
1500
|
+
priceUnit: "free session",
|
|
1501
|
+
rating: m.rating ?? 0,
|
|
1502
|
+
ratingCount: m.total_reviews ?? 0,
|
|
1503
|
+
verified: Boolean(m.verified),
|
|
1504
|
+
featured: Boolean(m.is_featured),
|
|
1505
|
+
tags: m.expertise ?? m.skills ?? [],
|
|
1506
|
+
socials,
|
|
1507
|
+
crawledAt
|
|
1508
|
+
});
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1511
|
+
class ADPListSource {
|
|
1512
|
+
name = "adplist";
|
|
1513
|
+
description = "ADPList \u2014 free mentorship across design, product & engineering";
|
|
1514
|
+
website = "https://adplist.org";
|
|
1515
|
+
fetchFn;
|
|
1516
|
+
apiBase;
|
|
1517
|
+
pageSize;
|
|
1518
|
+
constructor(opts = {}) {
|
|
1519
|
+
this.fetchFn = opts.fetchFn ?? fetch;
|
|
1520
|
+
this.apiBase = opts.apiBase ?? process.env.ADPLIST_API_BASE ?? "https://api.adplist.org/api";
|
|
1521
|
+
this.pageSize = opts.pageSize ?? 50;
|
|
1522
|
+
}
|
|
1523
|
+
async crawl(opts = {}) {
|
|
1524
|
+
const log = opts.onLog ?? (() => {});
|
|
1525
|
+
const crawledAt = new Date().toISOString();
|
|
1526
|
+
const experts = [];
|
|
1527
|
+
const tags = new Set;
|
|
1528
|
+
let page = 1;
|
|
1529
|
+
for (;; ) {
|
|
1530
|
+
const data = await fetchJson(`${this.apiBase}/mentors/?page=${page}&page_size=${this.pageSize}`, this.fetchFn);
|
|
1531
|
+
const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
|
|
1532
|
+
if (!items.length)
|
|
1533
|
+
break;
|
|
1534
|
+
for (const m of items) {
|
|
1535
|
+
const e = normalizeAdpMentor(m, crawledAt);
|
|
1536
|
+
experts.push(e);
|
|
1537
|
+
for (const t of e.tags)
|
|
1538
|
+
tags.add(t);
|
|
1539
|
+
}
|
|
1540
|
+
log(` adplist: ${experts.length}`);
|
|
1541
|
+
page++;
|
|
1542
|
+
if (opts.max && experts.length >= opts.max)
|
|
1543
|
+
break;
|
|
1544
|
+
if (items.length < this.pageSize)
|
|
1545
|
+
break;
|
|
1546
|
+
}
|
|
1547
|
+
if (experts.length === 0)
|
|
1548
|
+
log("adplist: no public listing reachable (set ADPLIST_API_BASE or provide a fetchFn).");
|
|
1549
|
+
return {
|
|
1550
|
+
experts: opts.max ? experts.slice(0, opts.max) : experts,
|
|
1551
|
+
topics: [],
|
|
1552
|
+
tags: [...tags].map((name) => ({ name, topic: "" })),
|
|
1553
|
+
total: experts.length
|
|
1554
|
+
};
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
|
|
1558
|
+
// src/sources/clarity.ts
|
|
1559
|
+
function normalizeClarityExpert(c, crawledAt) {
|
|
1560
|
+
const slug = c.username || slugify(c.name || String(c.id ?? ""));
|
|
1561
|
+
return makeExpert({
|
|
1562
|
+
source: "clarity",
|
|
1563
|
+
sourceId: String(c.id ?? slug),
|
|
1564
|
+
slug,
|
|
1565
|
+
url: `https://clarity.fm/${slug}`,
|
|
1566
|
+
fullName: c.name ?? "",
|
|
1567
|
+
title: c.title ?? "",
|
|
1568
|
+
bio: c.bio ?? "",
|
|
1569
|
+
avatar: c.image ?? "",
|
|
1570
|
+
price: c.rate_per_minute ?? 0,
|
|
1571
|
+
priceCurrency: "USD",
|
|
1572
|
+
priceUnit: c.rate_per_minute ? "per minute" : "",
|
|
1573
|
+
rating: c.rating ?? 0,
|
|
1574
|
+
ratingCount: c.reviews ?? 0,
|
|
1575
|
+
topics: c.categories ?? [],
|
|
1576
|
+
tags: c.expertise ?? [],
|
|
1577
|
+
crawledAt
|
|
1578
|
+
});
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
class ClaritySource {
|
|
1582
|
+
name = "clarity";
|
|
1583
|
+
description = "Clarity.fm \u2014 on-demand expert calls billed per minute";
|
|
1584
|
+
website = "https://clarity.fm";
|
|
1585
|
+
fetchFn;
|
|
1586
|
+
apiBase;
|
|
1587
|
+
constructor(opts = {}) {
|
|
1588
|
+
this.fetchFn = opts.fetchFn ?? fetch;
|
|
1589
|
+
this.apiBase = opts.apiBase ?? process.env.CLARITY_API_BASE;
|
|
1590
|
+
}
|
|
1591
|
+
async crawl(opts = {}) {
|
|
1592
|
+
const log = opts.onLog ?? (() => {});
|
|
1593
|
+
if (!this.apiBase) {
|
|
1594
|
+
log("clarity: no public listing API; set CLARITY_API_BASE or inject a fetchFn to crawl.");
|
|
1595
|
+
return { experts: [], topics: [], tags: [], total: 0 };
|
|
1596
|
+
}
|
|
1597
|
+
const crawledAt = new Date().toISOString();
|
|
1598
|
+
const data = await fetchJson(`${this.apiBase}/experts`, this.fetchFn);
|
|
1599
|
+
const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
|
|
1600
|
+
const experts = items.map((c) => normalizeClarityExpert(c, crawledAt));
|
|
1601
|
+
const tags = new Set;
|
|
1602
|
+
for (const e of experts)
|
|
1603
|
+
for (const t of e.tags)
|
|
1604
|
+
tags.add(t);
|
|
1605
|
+
return {
|
|
1606
|
+
experts: opts.max ? experts.slice(0, opts.max) : experts,
|
|
1607
|
+
topics: [],
|
|
1608
|
+
tags: [...tags].map((name) => ({ name, topic: "" })),
|
|
1609
|
+
total: experts.length
|
|
1610
|
+
};
|
|
1611
|
+
}
|
|
1612
|
+
}
|
|
1613
|
+
|
|
1614
|
+
// src/sources/glg.ts
|
|
1615
|
+
function normalizeGlgExpert(g, crawledAt) {
|
|
1616
|
+
const slug = slugify(g.name || String(g.id ?? ""));
|
|
1617
|
+
return makeExpert({
|
|
1618
|
+
source: "glg",
|
|
1619
|
+
sourceId: String(g.id ?? slug),
|
|
1620
|
+
slug,
|
|
1621
|
+
url: "https://glginsights.com",
|
|
1622
|
+
fullName: g.name ?? "",
|
|
1623
|
+
title: g.title ?? "",
|
|
1624
|
+
bio: g.biography ?? "",
|
|
1625
|
+
price: g.hourly_rate ?? 0,
|
|
1626
|
+
priceCurrency: g.currency ?? "USD",
|
|
1627
|
+
priceUnit: g.hourly_rate ? "per hour" : "",
|
|
1628
|
+
topics: g.industries ?? [],
|
|
1629
|
+
tags: g.expertise_areas ?? [],
|
|
1630
|
+
crawledAt
|
|
1631
|
+
});
|
|
1632
|
+
}
|
|
1633
|
+
|
|
1634
|
+
class GLGSource {
|
|
1635
|
+
name = "glg";
|
|
1636
|
+
description = "GLG \u2014 enterprise expert network (requires partner API access)";
|
|
1637
|
+
website = "https://glginsights.com";
|
|
1638
|
+
fetchFn;
|
|
1639
|
+
apiBase;
|
|
1640
|
+
apiKey;
|
|
1641
|
+
constructor(opts = {}) {
|
|
1642
|
+
this.fetchFn = opts.fetchFn ?? fetch;
|
|
1643
|
+
this.apiBase = opts.apiBase ?? process.env.GLG_API_BASE;
|
|
1644
|
+
this.apiKey = opts.apiKey ?? process.env.GLG_API_KEY;
|
|
1645
|
+
}
|
|
1646
|
+
async crawl(opts = {}) {
|
|
1647
|
+
const log = opts.onLog ?? (() => {});
|
|
1648
|
+
if (!this.apiBase) {
|
|
1649
|
+
log("glg: enterprise-gated; no public directory. Set GLG_API_BASE + GLG_API_KEY (partner access) to crawl.");
|
|
1650
|
+
return { experts: [], topics: [], tags: [], total: 0 };
|
|
1651
|
+
}
|
|
1652
|
+
const crawledAt = new Date().toISOString();
|
|
1653
|
+
const data = await fetchJson(`${this.apiBase}/experts`, this.fetchFn, {
|
|
1654
|
+
headers: this.apiKey ? { Authorization: `Bearer ${this.apiKey}` } : {}
|
|
1655
|
+
});
|
|
1656
|
+
const items = data?.results ?? data?.data ?? (Array.isArray(data) ? data : []);
|
|
1657
|
+
const experts = items.map((g) => normalizeGlgExpert(g, crawledAt));
|
|
1658
|
+
const tags = new Set;
|
|
1659
|
+
for (const e of experts)
|
|
1660
|
+
for (const t of e.tags)
|
|
1661
|
+
tags.add(t);
|
|
1662
|
+
return {
|
|
1663
|
+
experts: opts.max ? experts.slice(0, opts.max) : experts,
|
|
1664
|
+
topics: [],
|
|
1665
|
+
tags: [...tags].map((name) => ({ name, topic: "" })),
|
|
1666
|
+
total: experts.length
|
|
1667
|
+
};
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
|
|
791
1671
|
// src/sources/index.ts
|
|
792
1672
|
var registry = new Map;
|
|
793
1673
|
function registerSource(source) {
|
|
@@ -800,6 +1680,10 @@ function listSources() {
|
|
|
800
1680
|
return [...registry.values()];
|
|
801
1681
|
}
|
|
802
1682
|
registerSource(new IntroSource);
|
|
1683
|
+
registerSource(new MentorCruiseSource);
|
|
1684
|
+
registerSource(new ADPListSource);
|
|
1685
|
+
registerSource(new ClaritySource);
|
|
1686
|
+
registerSource(new GLGSource);
|
|
803
1687
|
|
|
804
1688
|
// src/crawl.ts
|
|
805
1689
|
async function crawlSource(db, sourceName, opts = {}) {
|
|
@@ -808,6 +1692,7 @@ async function crawlSource(db, sourceName, opts = {}) {
|
|
|
808
1692
|
throw new Error(`Unknown source "${sourceName}". Run \`experts sources\` to list options.`);
|
|
809
1693
|
}
|
|
810
1694
|
const data = await source.crawl(opts);
|
|
1695
|
+
const changes = db.recordChanges(source.name, data.experts);
|
|
811
1696
|
db.upsertExperts(data.experts);
|
|
812
1697
|
if (data.topics.length)
|
|
813
1698
|
db.setTopics(source.name, data.topics);
|
|
@@ -816,13 +1701,16 @@ async function crawlSource(db, sourceName, opts = {}) {
|
|
|
816
1701
|
db.setMeta(`catalog_total:${source.name}`, String(data.total));
|
|
817
1702
|
opts.onLog?.("building knowledge graph\u2026");
|
|
818
1703
|
const graph = db.rebuildGraph();
|
|
1704
|
+
db.rescore(source.name);
|
|
1705
|
+
db.rebuildPersons();
|
|
819
1706
|
return {
|
|
820
1707
|
source: source.name,
|
|
821
1708
|
experts: data.experts.length,
|
|
822
1709
|
topics: data.topics.map((t) => ({ name: t.name, count: t.expertCount })),
|
|
823
1710
|
tags: new Set(data.tags.map((t) => t.name)).size,
|
|
824
1711
|
total: data.total,
|
|
825
|
-
graph
|
|
1712
|
+
graph,
|
|
1713
|
+
changes
|
|
826
1714
|
};
|
|
827
1715
|
}
|
|
828
1716
|
|
|
@@ -880,6 +1768,11 @@ function handle(db, req) {
|
|
|
880
1768
|
return json(db.enrichmentStats(q.get("source") || undefined));
|
|
881
1769
|
if (path === "/graph")
|
|
882
1770
|
return json(db.graphStats());
|
|
1771
|
+
if (path === "/persons")
|
|
1772
|
+
return json(db.personStats());
|
|
1773
|
+
if (path === "/changes") {
|
|
1774
|
+
return json(db.changes({ source: q.get("source") || undefined, limit: num(q.get("limit")) }));
|
|
1775
|
+
}
|
|
883
1776
|
if (path === "/find") {
|
|
884
1777
|
const needs = (q.get("needs") || "").split(",").map((s) => s.trim()).filter(Boolean);
|
|
885
1778
|
return json(db.findByNeeds(needs, {
|
|
@@ -922,6 +1815,8 @@ function handle(db, req) {
|
|
|
922
1815
|
};
|
|
923
1816
|
return json(db.list(filters));
|
|
924
1817
|
}
|
|
1818
|
+
if (path === "/contacts")
|
|
1819
|
+
return json(db.contactStats(q.get("source") || undefined));
|
|
925
1820
|
const tw = path.match(/^\/experts\/([^/]+)\/(.+)\/tweets$/);
|
|
926
1821
|
if (tw) {
|
|
927
1822
|
const e = db.get(decodeURIComponent(tw[2]), decodeURIComponent(tw[1]));
|
|
@@ -929,6 +1824,20 @@ function handle(db, req) {
|
|
|
929
1824
|
return json({ error: "not found" }, 404);
|
|
930
1825
|
return json(db.recentTweets(e.source, e.sourceId, num(q.get("limit")) ?? 25));
|
|
931
1826
|
}
|
|
1827
|
+
const ct = path.match(/^\/experts\/([^/]+)\/(.+)\/contacts$/);
|
|
1828
|
+
if (ct) {
|
|
1829
|
+
const e = db.get(decodeURIComponent(ct[2]), decodeURIComponent(ct[1]));
|
|
1830
|
+
if (!e)
|
|
1831
|
+
return json({ error: "not found" }, 404);
|
|
1832
|
+
return json(db.contacts(e.source, e.sourceId));
|
|
1833
|
+
}
|
|
1834
|
+
const vd = path.match(/^\/experts\/([^/]+)\/(.+)\/videos$/);
|
|
1835
|
+
if (vd) {
|
|
1836
|
+
const e = db.get(decodeURIComponent(vd[2]), decodeURIComponent(vd[1]));
|
|
1837
|
+
if (!e)
|
|
1838
|
+
return json({ error: "not found" }, 404);
|
|
1839
|
+
return json(db.recentVideos(e.source, e.sourceId, num(q.get("limit")) ?? 25));
|
|
1840
|
+
}
|
|
932
1841
|
const m = path.match(/^\/experts\/([^/]+)\/(.+)$/);
|
|
933
1842
|
if (m) {
|
|
934
1843
|
const e = db.get(decodeURIComponent(m[2]), decodeURIComponent(m[1]));
|
|
@@ -937,13 +1846,50 @@ function handle(db, req) {
|
|
|
937
1846
|
return json({
|
|
938
1847
|
...e,
|
|
939
1848
|
xProfile: db.getXProfile(e.source, e.sourceId),
|
|
940
|
-
tweets: db.recentTweets(e.source, e.sourceId, 10)
|
|
1849
|
+
tweets: db.recentTweets(e.source, e.sourceId, 10),
|
|
1850
|
+
contacts: db.contacts(e.source, e.sourceId),
|
|
1851
|
+
videos: db.recentVideos(e.source, e.sourceId, 10)
|
|
941
1852
|
});
|
|
942
1853
|
}
|
|
943
1854
|
return json({ error: "not found", path }, 404);
|
|
944
1855
|
}
|
|
945
1856
|
async function handleAsync(db, req) {
|
|
946
1857
|
const url = new URL(req.url);
|
|
1858
|
+
if (url.pathname.replace(/\/+$/, "") === "/ask") {
|
|
1859
|
+
const q = url.searchParams.get("q") || "";
|
|
1860
|
+
if (!q)
|
|
1861
|
+
return json({ error: "missing q" }, 400);
|
|
1862
|
+
if (db.vectorCount() === 0)
|
|
1863
|
+
return json({ error: "no semantic index; run `experts embed`" }, 409);
|
|
1864
|
+
const [qv] = await getEmbedder().embed([q]);
|
|
1865
|
+
return json(db.semanticSearch(qv, {
|
|
1866
|
+
source: url.searchParams.get("source") || undefined,
|
|
1867
|
+
limit: num(url.searchParams.get("limit"))
|
|
1868
|
+
}));
|
|
1869
|
+
}
|
|
1870
|
+
if (url.pathname.replace(/\/+$/, "") === "/brief") {
|
|
1871
|
+
const q = url.searchParams.get("q") || "";
|
|
1872
|
+
if (!q)
|
|
1873
|
+
return json({ error: "missing q" }, 400);
|
|
1874
|
+
if (db.vectorCount() === 0)
|
|
1875
|
+
return json({ error: "no semantic index; run `experts embed`" }, 409);
|
|
1876
|
+
const limit = num(url.searchParams.get("limit")) ?? 10;
|
|
1877
|
+
const [qv] = await getEmbedder().embed([q]);
|
|
1878
|
+
const raw = db.semanticSearch(qv, { source: url.searchParams.get("source") || undefined, limit: (limit + 5) * 4 });
|
|
1879
|
+
const seen = new Set;
|
|
1880
|
+
const briefLc = q.toLowerCase();
|
|
1881
|
+
const out = [];
|
|
1882
|
+
for (const r of raw) {
|
|
1883
|
+
const pid = db.personIdOf(r.expert.source, r.expert.sourceId);
|
|
1884
|
+
if (seen.has(pid))
|
|
1885
|
+
continue;
|
|
1886
|
+
seen.add(pid);
|
|
1887
|
+
out.push({ ...r, why: r.expert.tags.filter((t) => briefLc.includes(t.toLowerCase())).slice(0, 4) });
|
|
1888
|
+
if (out.length >= limit)
|
|
1889
|
+
break;
|
|
1890
|
+
}
|
|
1891
|
+
return json(out);
|
|
1892
|
+
}
|
|
947
1893
|
if (req.method === "POST") {
|
|
948
1894
|
const m = url.pathname.match(/^\/crawl\/([^/]+)\/?$/);
|
|
949
1895
|
if (m) {
|