terminalhire 0.3.5 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -147,11 +147,11 @@ var init_graph_data = __esm({
147
147
  { id: "spark", parents: ["data-engineering"], synonyms: ["apache-spark"] },
148
148
  { id: "airflow", parents: ["data-engineering"], synonyms: ["apache-airflow"] },
149
149
  { id: "dbt", parents: ["data-engineering"] },
150
- { id: "ml", synonyms: ["machine-learning"], related: [{ to: "pytorch", w: 0.5 }, { to: "tensorflow", w: 0.5 }, { to: "scikit-learn", w: 0.5 }] },
150
+ { id: "ml", synonyms: ["machine-learning"], related: [{ to: "pytorch", w: 0.5 }, { to: "tensorflow", w: 0.5 }, { to: "scikit-learn", w: 0.5 }, { to: "data-engineering", w: 0.4 }] },
151
151
  { id: "llm", parents: ["ml"], synonyms: ["llms", "genai", "generative-ai"], related: [{ to: "langchain", w: 0.5 }, { to: "rag", w: 0.55 }, { to: "openai", w: 0.45 }, { to: "anthropic", w: 0.45 }] },
152
152
  { id: "pytorch", parents: ["ml"], synonyms: ["torch"], related: [{ to: "tensorflow", w: 0.5 }] },
153
153
  { id: "tensorflow", parents: ["ml"], synonyms: ["keras", "tf-keras"] },
154
- { id: "pandas", parents: ["python"], related: [{ to: "numpy", w: 0.6 }] },
154
+ { id: "pandas", parents: ["python"], related: [{ to: "numpy", w: 0.6 }, { to: "data-engineering", w: 0.45 }, { to: "spark", w: 0.4 }] },
155
155
  { id: "numpy", parents: ["python"] },
156
156
  { id: "scikit-learn", parents: ["ml"], synonyms: ["sklearn"] },
157
157
  { id: "jupyter", parents: ["python"] },
@@ -326,6 +326,207 @@ var init_types2 = __esm({
326
326
  }
327
327
  });
328
328
 
329
+ // ../../packages/core/src/vocab/extract.ts
330
+ function tokenize(text) {
331
+ return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
332
+ }
333
+ function looksLikeEngRole(title) {
334
+ return !NON_ENG_TITLE.test(title) && ENG_INTENT.test(title);
335
+ }
336
+ function resolveToken(token) {
337
+ const tryOne = (t) => {
338
+ if (GRAPH.ids.has(t)) return { id: t, viaSynonym: false };
339
+ const mapped = GRAPH.synonyms.get(t);
340
+ return mapped ? { id: mapped, viaSynonym: true } : null;
341
+ };
342
+ return tryOne(token) ?? tryOne(token.replace(/^[.\-+#]+|[.\-+#]+$/g, ""));
343
+ }
344
+ function extractSkillTags(title, body = "") {
345
+ if (!looksLikeEngRole(title)) return [];
346
+ const text = `${title}
347
+ ${body}`;
348
+ const tokens = tokenize(text);
349
+ const ids = /* @__PURE__ */ new Set();
350
+ const ambiguousPending = /* @__PURE__ */ new Set();
351
+ for (const tok of tokens) {
352
+ const r = resolveToken(tok);
353
+ if (!r) continue;
354
+ if (NON_EXTRACTABLE.has(r.id)) continue;
355
+ if (SYNONYM_ONLY.has(r.id) && !r.viaSynonym) continue;
356
+ const cue = AMBIGUOUS[r.id];
357
+ if (cue) {
358
+ if (cue.test(text)) ids.add(r.id);
359
+ else ambiguousPending.add(r.id);
360
+ continue;
361
+ }
362
+ ids.add(r.id);
363
+ }
364
+ const hardCount = [...ids].filter((id) => !SOFT_DOMAIN.has(id)).length;
365
+ if (hardCount >= 2) for (const id of ambiguousPending) ids.add(id);
366
+ return [...ids];
367
+ }
368
+ function coreTagsFromTitle(title) {
369
+ return extractSkillTags(title, "").filter((t) => !SOFT_DOMAIN.has(t));
370
+ }
371
+ var SOFT_DOMAIN, SYNONYM_ONLY, NON_EXTRACTABLE, AMBIGUOUS, ENG_INTENT, NON_ENG_TITLE;
372
+ var init_extract = __esm({
373
+ "../../packages/core/src/vocab/extract.ts"() {
374
+ "use strict";
375
+ init_vocab();
376
+ SOFT_DOMAIN = /* @__PURE__ */ new Set([
377
+ "frontend",
378
+ "backend",
379
+ "devops",
380
+ "security",
381
+ "payments",
382
+ "billing",
383
+ "microservices",
384
+ "caching",
385
+ "search",
386
+ "observability",
387
+ "monitoring",
388
+ "testing",
389
+ "accessibility",
390
+ "seo",
391
+ "performance",
392
+ "realtime",
393
+ "authentication",
394
+ "api-design"
395
+ ]);
396
+ SYNONYM_ONLY = /* @__PURE__ */ new Set(["performance", "security", "seo"]);
397
+ NON_EXTRACTABLE = /* @__PURE__ */ new Set(["payments", "billing"]);
398
+ for (const id of SYNONYM_ONLY) {
399
+ if (!SOFT_DOMAIN.has(id)) throw new Error(`extract: SYNONYM_ONLY "${id}" not in SOFT_DOMAIN`);
400
+ }
401
+ AMBIGUOUS = {
402
+ // Accept "go" with an ecosystem cue OR an explicit-skill phrasing ("Go developer",
403
+ // "in Go", "experience with Go"). Rejects prose: "ready to go", "go above", "go live".
404
+ go: /\b(golang|goroutines?|go\.mod|gin framework|gorm)\b|\bgo\b\s+(developer|engineer|programmer|microservices?|backend|services?|lang)|\b(in|with|using|written in|built in|experience (?:in|with)|proficient in|fluent in)\s+go\b/i,
405
+ r: /\b(rstudio|tidyverse|ggplot|shiny|dplyr|cran|r-lang|rlang)\b/i,
406
+ ml: /\b(machine[\s-]?learning|pytorch|tensorflow|scikit|sklearn|keras|neural|model training|deep[\s-]?learning|numpy|pandas|ml\s+(?:engineer|platform|researcher|infrastructure)|(?:ml|ai)\s+research)\b/i
407
+ };
408
+ ENG_INTENT = /\b(engineer|engineering|developer|dev\b|swe|sde|programmer|architect|full[\s-]?stack|front[\s-]?end|back[\s-]?end|devops|sre|software|coding|codebase|technical staff|tech(?:nical)? lead)\b/i;
409
+ NON_ENG_TITLE = /\b(account executive|account manager|sales (?:rep|representative|development|manager|lead)|sdr|bdr|recruiter|recruiting|talent|marketing|administrative|business partner|billing coordinator|operations (?:administrator|coordinator)|customer success|project finance|controller|bookkeeper|graphic|brand)\b/i;
410
+ }
411
+ });
412
+
413
+ // ../../packages/core/src/vocab/idf-background.ts
414
+ var IDF_BACKGROUND;
415
+ var init_idf_background = __esm({
416
+ "../../packages/core/src/vocab/idf-background.ts"() {
417
+ "use strict";
418
+ IDF_BACKGROUND = {
419
+ N: 244,
420
+ df: {
421
+ "backend": 71,
422
+ "python": 57,
423
+ "monitoring": 44,
424
+ "nextjs": 40,
425
+ "testing": 40,
426
+ "observability": 38,
427
+ "llm": 38,
428
+ "go": 36,
429
+ "aws": 36,
430
+ "react": 33,
431
+ "frontend": 30,
432
+ "ml": 28,
433
+ "mobile": 24,
434
+ "realtime": 24,
435
+ "typescript": 23,
436
+ "devops": 22,
437
+ "kubernetes": 22,
438
+ "javascript": 21,
439
+ "java": 20,
440
+ "rag": 20,
441
+ "api-design": 20,
442
+ "linux": 19,
443
+ "postgresql": 19,
444
+ "search": 17,
445
+ "azure": 16,
446
+ "snowflake": 15,
447
+ "spark": 15,
448
+ "kotlin": 14,
449
+ "gcp": 14,
450
+ "accessibility": 14,
451
+ "nodejs": 14,
452
+ "graphql": 14,
453
+ "airflow": 14,
454
+ "docker": 14,
455
+ "ci-cd": 13,
456
+ "android": 12,
457
+ "cpp": 12,
458
+ "gitlab-ci": 11,
459
+ "anthropic": 11,
460
+ "terraform": 11,
461
+ "mysql": 11,
462
+ "r": 10,
463
+ "dbt": 9,
464
+ "langchain": 9,
465
+ "pytorch": 9,
466
+ "ruby": 9,
467
+ "rails": 9,
468
+ "cloudflare": 7,
469
+ "datadog": 7,
470
+ "css": 7,
471
+ "ansible": 7,
472
+ "openai": 6,
473
+ "kafka": 6,
474
+ "rust": 5,
475
+ "grpc": 5,
476
+ "microservices": 5,
477
+ "serverless": 5,
478
+ "scala": 5,
479
+ "prometheus": 5,
480
+ "grafana": 5,
481
+ "php": 5,
482
+ "redis": 5,
483
+ "huggingface": 4,
484
+ "pandas": 4,
485
+ "scikit-learn": 4,
486
+ "html": 4,
487
+ "ios": 4,
488
+ "authentication": 4,
489
+ "vue": 4,
490
+ "mlops": 3,
491
+ "spring": 3,
492
+ "mongodb": 3,
493
+ "csharp": 3,
494
+ "swift": 2,
495
+ "caching": 2,
496
+ "haskell": 2,
497
+ "pulumi": 2,
498
+ "argocd": 2,
499
+ "tensorflow": 2,
500
+ "express": 2,
501
+ "elasticsearch": 2,
502
+ "clickhouse": 2,
503
+ "nestjs": 2,
504
+ "vite": 2,
505
+ "svelte": 2,
506
+ "phoenix": 2,
507
+ "angular": 2,
508
+ "django": 2,
509
+ "dotnet": 2,
510
+ "elixir": 2,
511
+ "bun": 1,
512
+ "oauth": 1,
513
+ "dynamodb": 1,
514
+ "helm": 1,
515
+ "playwright": 1,
516
+ "cypress": 1,
517
+ "jest": 1,
518
+ "mocha": 1,
519
+ "typeorm": 1,
520
+ "tailwind": 1,
521
+ "prisma": 1,
522
+ "expo": 1,
523
+ "rabbitmq": 1,
524
+ "redux": 1
525
+ }
526
+ };
527
+ }
528
+ });
529
+
329
530
  // ../../packages/core/src/vocab/index.ts
330
531
  function normalize(tokens) {
331
532
  const result = /* @__PURE__ */ new Set();
@@ -362,6 +563,8 @@ var init_vocab = __esm({
362
563
  init_types2();
363
564
  init_closure();
364
565
  init_graph_data();
566
+ init_extract();
567
+ init_idf_background();
365
568
  GRAPH = buildGraph(VOCAB_NODES);
366
569
  VOCABULARY = [...GRAPH.ids];
367
570
  SYNONYMS = Object.fromEntries(GRAPH.synonyms);
@@ -376,23 +579,250 @@ var init_vocabulary = __esm({
376
579
  }
377
580
  });
378
581
 
379
- // ../../packages/core/src/matcher.ts
380
- function computeIdf(jobs) {
381
- const docFreq = /* @__PURE__ */ new Map();
382
- const N = jobs.length;
383
- for (const job of jobs) {
384
- const unique = new Set(job.tags);
385
- for (const tag of unique) {
386
- docFreq.set(tag, (docFreq.get(tag) ?? 0) + 1);
582
+ // ../../packages/core/src/github.ts
583
+ function ghHeaders(token) {
584
+ const headers = {
585
+ Accept: "application/vnd.github+json",
586
+ "X-GitHub-Api-Version": "2022-11-28"
587
+ };
588
+ if (token) headers["Authorization"] = `Bearer ${token}`;
589
+ return headers;
590
+ }
591
+ async function ghFetch(path, token) {
592
+ const url = `https://api.github.com${path}`;
593
+ const res = await fetch(url, { headers: ghHeaders(token) });
594
+ if (!res.ok) {
595
+ throw new Error(`GitHub API ${path}: HTTP ${res.status} ${res.statusText}`);
596
+ }
597
+ return res.json();
598
+ }
599
+ async function fetchGitHubProfile(login, token) {
600
+ const user = await ghFetch(`/users/${login}`, token);
601
+ let repos = [];
602
+ try {
603
+ repos = await ghFetch(
604
+ `/users/${login}/repos?sort=pushed&per_page=100`,
605
+ token
606
+ );
607
+ } catch (err) {
608
+ console.warn(`[github] ${login}: repos fetch failed, continuing \u2014`, err);
609
+ }
610
+ const langCount = {};
611
+ for (const repo of repos) {
612
+ if (repo.fork) continue;
613
+ if (repo.language) {
614
+ langCount[repo.language.toLowerCase()] = (langCount[repo.language.toLowerCase()] ?? 0) + 1;
615
+ }
616
+ }
617
+ const topLanguages = Object.entries(langCount).sort(([, a], [, b]) => b - a).slice(0, 10).map(([lang]) => lang);
618
+ const topicSet = /* @__PURE__ */ new Set();
619
+ for (const repo of repos) {
620
+ if (repo.fork) continue;
621
+ for (const t of repo.topics ?? []) topicSet.add(t.toLowerCase());
622
+ }
623
+ const topics = Array.from(topicSet).slice(0, 30);
624
+ let recentPRorgs;
625
+ try {
626
+ const q = encodeURIComponent(
627
+ `type:pr is:merged author:${login} sort:updated`
628
+ );
629
+ const result = await ghFetch(
630
+ `/search/issues?q=${q}&per_page=30`,
631
+ token
632
+ );
633
+ const orgs = /* @__PURE__ */ new Set();
634
+ for (const item of result.items ?? []) {
635
+ const orgLogin = item.repository?.owner?.login;
636
+ if (orgLogin && orgLogin !== login) orgs.add(orgLogin);
387
637
  }
638
+ if (orgs.size > 0) recentPRorgs = Array.from(orgs);
639
+ } catch {
640
+ }
641
+ return {
642
+ login: user.login,
643
+ name: user.name ?? void 0,
644
+ publicEmail: user.email ?? void 0,
645
+ avatarUrl: user.avatar_url,
646
+ accountCreatedAt: user.created_at,
647
+ publicRepos: user.public_repos,
648
+ followers: user.followers,
649
+ topLanguages,
650
+ topics,
651
+ recentPRorgs
652
+ };
653
+ }
654
+ function inferSeniority(p) {
655
+ const ageMs = Date.now() - new Date(p.accountCreatedAt).getTime();
656
+ const ageYears = ageMs / (1e3 * 60 * 60 * 24 * 365.25);
657
+ if (ageYears >= 9 && (p.publicRepos >= 40 || p.followers >= 500)) return "staff";
658
+ if (ageYears >= 5 && (p.publicRepos >= 20 || p.followers >= 100)) return "senior";
659
+ if (ageYears >= 2 && p.publicRepos >= 5) return "mid";
660
+ return "junior";
661
+ }
662
+ function githubToFingerprint(p) {
663
+ const rawTokens = [
664
+ ...p.topLanguages,
665
+ ...p.topics
666
+ // recentPRorgs intentionally excluded — org names are not skill tags
667
+ ];
668
+ const skillTags = normalize(rawTokens);
669
+ const seniorityBand = inferSeniority(p);
670
+ return { skillTags, seniorityBand };
671
+ }
672
+ async function ghFetchRaw(path, token) {
673
+ return fetch(`https://api.github.com${path}`, { headers: ghHeaders(token) });
674
+ }
675
+ function parseRepoUrl(repoUrl) {
676
+ const m = repoUrl.match(/\/repos\/([^/]+)\/([^/]+)\/?$/);
677
+ return m ? { owner: m[1], name: m[2] } : null;
678
+ }
679
+ function isTrivialPRTitle(title) {
680
+ return TRIVIAL_PR_TITLE.test(title);
681
+ }
682
+ async function fetchOwnedOrgs(token) {
683
+ try {
684
+ const memberships = await ghFetch(`/user/memberships/orgs?per_page=100`, token);
685
+ return new Set(
686
+ memberships.filter((m) => m.role === "admin").map((m) => m.organization.login.toLowerCase())
687
+ );
688
+ } catch {
689
+ return /* @__PURE__ */ new Set();
388
690
  }
389
- const idf = /* @__PURE__ */ new Map();
390
- for (const [tag, df] of docFreq) {
391
- idf.set(tag, Math.log((N + 1) / (df + 1)) + 1);
691
+ }
692
+ async function repoContributorCount(owner, name, token) {
693
+ try {
694
+ const res = await ghFetchRaw(
695
+ `/repos/${owner}/${name}/contributors?per_page=1&anon=false`,
696
+ token
697
+ );
698
+ if (!res.ok) return void 0;
699
+ const link = res.headers.get("link");
700
+ const m = link?.match(/[?&]page=(\d+)>;\s*rel="last"/);
701
+ if (m) return Number(m[1]);
702
+ const body = await res.json();
703
+ return Array.isArray(body) ? body.length : 0;
704
+ } catch {
705
+ return void 0;
392
706
  }
393
- return idf;
394
707
  }
395
- function inferSeniority(title) {
708
+ async function fetchRepoMeta(owner, name, token, cache) {
709
+ const key = `${owner}/${name}`.toLowerCase();
710
+ const cached = cache.get(key);
711
+ if (cached !== void 0) return cached;
712
+ let meta = null;
713
+ try {
714
+ const r = await ghFetch(`/repos/${owner}/${name}`, token);
715
+ const contributors = await repoContributorCount(owner, name, token);
716
+ meta = {
717
+ stars: r.stargazers_count ?? 0,
718
+ archived: !!r.archived,
719
+ fork: !!r.fork,
720
+ language: r.language ?? null,
721
+ topics: r.topics ?? [],
722
+ contributors
723
+ };
724
+ } catch {
725
+ meta = null;
726
+ }
727
+ cache.set(key, meta);
728
+ return meta;
729
+ }
730
+ async function computeAcceptanceCredential(login, token, cache = /* @__PURE__ */ new Map()) {
731
+ const computedAt = (/* @__PURE__ */ new Date()).toISOString();
732
+ const empty = (status) => ({
733
+ status,
734
+ byDomain: {},
735
+ qualifyingTotal: 0,
736
+ computedAt
737
+ });
738
+ if (!token) return empty("no-token");
739
+ const ownedOrgs = await fetchOwnedOrgs(token);
740
+ const loginLc = login.toLowerCase();
741
+ let items;
742
+ try {
743
+ const q = encodeURIComponent(`type:pr is:merged author:${login} -user:${login} sort:updated`);
744
+ const res = await ghFetch(
745
+ `/search/issues?q=${q}&per_page=${CANDIDATE_PR_PAGE}`,
746
+ token
747
+ );
748
+ items = res.items ?? [];
749
+ } catch (err) {
750
+ const msg = String(err);
751
+ return empty(/HTTP 403|HTTP 429|rate limit/i.test(msg) ? "rate-limited" : "failed");
752
+ }
753
+ const byDomain = {};
754
+ let qualifyingTotal = 0;
755
+ for (const item of items) {
756
+ const repo = parseRepoUrl(item.repository_url);
757
+ if (!repo) continue;
758
+ const ownerLc = repo.owner.toLowerCase();
759
+ if (ownerLc === loginLc) continue;
760
+ if (ownedOrgs.has(ownerLc)) continue;
761
+ if (isTrivialPRTitle(item.title)) continue;
762
+ const meta = await fetchRepoMeta(repo.owner, repo.name, token, cache);
763
+ if (!meta) continue;
764
+ if (meta.archived || meta.fork) continue;
765
+ if (meta.stars < MIN_STARS) continue;
766
+ if (meta.contributors !== void 0 && meta.contributors < MIN_CONTRIBUTORS) continue;
767
+ qualifyingTotal += 1;
768
+ const mergedAt = item.pull_request?.merged_at ?? item.closed_at ?? item.created_at;
769
+ const rawDomains = [meta.language ?? "", ...meta.topics].filter(Boolean);
770
+ for (const d of new Set(normalize(rawDomains))) {
771
+ const b = byDomain[d] ?? (byDomain[d] = { mergedPRs: 0, distinctOrgs: 0, lastMergedAt: mergedAt, orgs: /* @__PURE__ */ new Set() });
772
+ b.mergedPRs += 1;
773
+ b.orgs.add(ownerLc);
774
+ if (mergedAt > b.lastMergedAt) b.lastMergedAt = mergedAt;
775
+ }
776
+ }
777
+ const finalDomains = {};
778
+ for (const [d, b] of Object.entries(byDomain)) {
779
+ finalDomains[d] = {
780
+ mergedPRs: b.mergedPRs,
781
+ distinctOrgs: b.orgs.size,
782
+ lastMergedAt: b.lastMergedAt
783
+ };
784
+ }
785
+ return { status: "ok", byDomain: finalDomains, qualifyingTotal, computedAt };
786
+ }
787
+ function acceptanceCountForDomains(cred, domains) {
788
+ if (cred.status !== "ok") return 0;
789
+ let max = 0;
790
+ for (const d of domains) {
791
+ const c = cred.byDomain[d]?.mergedPRs ?? 0;
792
+ if (c > max) max = c;
793
+ }
794
+ return max;
795
+ }
796
+ function bestAcceptanceDomain(cred, domains) {
797
+ if (cred.status !== "ok") return null;
798
+ let best = null;
799
+ for (const d of domains) {
800
+ const count = cred.byDomain[d]?.mergedPRs ?? 0;
801
+ if (count > 0 && (best === null || count > best.count)) best = { domain: d, count };
802
+ }
803
+ return best;
804
+ }
805
+ var MIN_STARS, MIN_CONTRIBUTORS, CANDIDATE_PR_PAGE, TRIVIAL_PR_TITLE;
806
+ var init_github = __esm({
807
+ "../../packages/core/src/github.ts"() {
808
+ "use strict";
809
+ init_vocabulary();
810
+ MIN_STARS = 50;
811
+ MIN_CONTRIBUTORS = 10;
812
+ CANDIDATE_PR_PAGE = 50;
813
+ TRIVIAL_PR_TITLE = /^\s*(fix\s+typo|typo\b|update\s+readme|readme\b|docs?:|docs?\(|chore:|chore\(|style:|ci:|build:|bump\b|update\s+dependenc)/i;
814
+ }
815
+ });
816
+
817
+ // ../../packages/core/src/matcher.ts
818
+ function acceptanceDomainsOf(job) {
819
+ return job.coreTags && job.coreTags.length > 0 ? job.coreTags : job.tags;
820
+ }
821
+ function backgroundIdf(tag) {
822
+ const df = IDF_BACKGROUND.df[tag] ?? 0;
823
+ return Math.log((IDF_BACKGROUND.N + 1) / (df + 1)) + 1;
824
+ }
825
+ function inferSeniority2(title) {
396
826
  if (!ENG_TITLE.test(title)) return void 0;
397
827
  for (const [re, level] of SENIORITY_PATTERNS) {
398
828
  if (re.test(title)) return level;
@@ -401,7 +831,7 @@ function inferSeniority(title) {
401
831
  }
402
832
  function seniorityScore(fp, job) {
403
833
  if (!fp.seniorityBand) return 1;
404
- const jobLevel = inferSeniority(job.title);
834
+ const jobLevel = inferSeniority2(job.title);
405
835
  if (!jobLevel) return 0.85;
406
836
  const wanted = SENIORITY_RANK[fp.seniorityBand] ?? 1;
407
837
  const got = SENIORITY_RANK[jobLevel] ?? 1;
@@ -411,8 +841,10 @@ function seniorityScore(fp, job) {
411
841
  return 0.4;
412
842
  }
413
843
  function recencyScore(postedAt, now) {
414
- if (!postedAt) return 0.75;
415
- const ageDays2 = (now - new Date(postedAt).getTime()) / 864e5;
844
+ if (!postedAt) return UNKNOWN_RECENCY;
845
+ const ms = new Date(postedAt).getTime();
846
+ if (Number.isNaN(ms)) return UNKNOWN_RECENCY;
847
+ const ageDays2 = (now - ms) / 864e5;
416
848
  if (ageDays2 < 7) return 1;
417
849
  if (ageDays2 < 30) return 0.9;
418
850
  if (ageDays2 < 90) return 0.75;
@@ -443,9 +875,8 @@ function harmonicMean(a, b) {
443
875
  if (a <= 0 || b <= 0) return 0;
444
876
  return 2 * a * b / (a + b);
445
877
  }
446
- function match(fp, jobs, limit = 5, now = Date.now()) {
447
- const idf = computeIdf(jobs);
448
- const idfOf = (t) => idf.get(t) ?? 0;
878
+ function match(fp, jobs, limit = 5, now = Date.now(), opts = {}) {
879
+ const idfOf = backgroundIdf;
449
880
  const expanded = expandWeighted(fp.skillTags);
450
881
  const maxDevScore = fp.skillTags.reduce((acc, t) => acc + idfOf(t), 0);
451
882
  const candidates = jobs.filter((j) => passesFilters(fp, j));
@@ -471,32 +902,45 @@ function match(fp, jobs, limit = 5, now = Date.now()) {
471
902
  const jobCov = jobMaxScore > 0 ? Math.min(1, jobMatchScore / jobMaxScore) : 0;
472
903
  const tagComponent = harmonicMean(devCov, jobCov);
473
904
  if (tagComponent === 0) return null;
905
+ const coreTags = job.coreTags ?? coreTagsFromTitle(job.title);
906
+ let coreComponent = tagComponent;
907
+ if (coreTags.length > 0) {
908
+ const coreCov = Math.max(0, ...coreTags.map((ct) => expanded.get(ct)?.weight ?? 0));
909
+ if (coreCov === 0) coreComponent = tagComponent * CORE_MISS_PENALTY;
910
+ }
474
911
  details.sort((a, b) => idfOf(b.tag) * b.weight - idfOf(a.tag) * a.weight);
475
912
  const sScore = seniorityScore(fp, job);
476
913
  const rScore = recencyScore(job.postedAt, now);
477
- const score = tagComponent * 0.6 + sScore * 0.25 + rScore * 0.15;
914
+ const score = coreComponent * 0.6 + sScore * 0.25 + rScore * 0.15;
478
915
  const matchedTags = [...new Set(details.map((d) => d.via ?? d.tag))];
916
+ const badge = opts.acceptance ? bestAcceptanceDomain(opts.acceptance, acceptanceDomainsOf(job)) : null;
479
917
  return {
480
918
  job,
481
919
  score: Math.round(score * 1e3) / 1e3,
482
920
  matchedTags,
483
921
  matchDetails: details,
922
+ ...badge ? { acceptance: { status: "ok", domain: badge.domain, count: badge.count } } : {},
484
923
  reason: buildReason(details)
485
924
  };
486
925
  });
487
- return scored.filter((r) => r !== null && r.score >= MIN_SCORE).sort((a, b) => b.score - a.score).slice(0, limit);
488
- }
489
- function matchOne(fp, job) {
490
- const results = match(fp, [job], 1);
491
- return results.length > 0 ? results[0] : null;
492
- }
493
- var MIN_SCORE, SHARPEN, SENIORITY_RANK, SENIORITY_PATTERNS, ENG_TITLE;
926
+ return scored.filter((r) => r !== null && r.score >= MIN_SCORE).sort((a, b) => {
927
+ const byScore = b.score - a.score;
928
+ if (Math.abs(byScore) > TIEBREAK_EPS) return byScore;
929
+ const byAcceptance = (b.acceptance?.count ?? 0) - (a.acceptance?.count ?? 0);
930
+ if (byAcceptance !== 0) return byAcceptance;
931
+ return byScore;
932
+ }).slice(0, limit);
933
+ }
934
+ var MIN_SCORE, TIEBREAK_EPS, SHARPEN, CORE_MISS_PENALTY, SENIORITY_RANK, SENIORITY_PATTERNS, ENG_TITLE, UNKNOWN_RECENCY;
494
935
  var init_matcher = __esm({
495
936
  "../../packages/core/src/matcher.ts"() {
496
937
  "use strict";
497
938
  init_vocabulary();
939
+ init_github();
498
940
  MIN_SCORE = 0.15;
941
+ TIEBREAK_EPS = 5e-3;
499
942
  SHARPEN = 1.6;
943
+ CORE_MISS_PENALTY = 0.4;
500
944
  SENIORITY_RANK = {
501
945
  junior: 0,
502
946
  mid: 1,
@@ -510,24 +954,19 @@ var init_matcher = __esm({
510
954
  [/\bmid[\s-]?level\b|\bmid\b/i, "mid"]
511
955
  ];
512
956
  ENG_TITLE = /\b(engineer|engineering|developer|dev|swe|sde|programmer|architect)\b/i;
957
+ UNKNOWN_RECENCY = 0.75;
513
958
  }
514
959
  });
515
960
 
516
961
  // ../../packages/core/src/feeds/greenhouse.ts
517
- function tokenize(text) {
518
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
519
- }
520
962
  function extractTags(job) {
521
- const texts = [
522
- job.title,
963
+ const body = [
523
964
  ...(job.departments ?? []).map((d) => d.name),
524
965
  job.location?.name ?? "",
525
966
  ...(job.offices ?? []).map((o) => o.name),
526
- // mine the full HTML description for additional signal when present
527
967
  ...job.content ? [job.content.replace(/<[^>]*>/g, " ")] : []
528
- ].filter(Boolean);
529
- const tokens = texts.flatMap(tokenize);
530
- return normalize(tokens);
968
+ ].filter(Boolean).join(" ");
969
+ return extractSkillTags(job.title, body);
531
970
  }
532
971
  function inferRemote(location) {
533
972
  const l = location.toLowerCase();
@@ -625,17 +1064,15 @@ var init_greenhouse = __esm({
625
1064
  });
626
1065
 
627
1066
  // ../../packages/core/src/feeds/ashby.ts
628
- function tokenize2(text) {
629
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
630
- }
631
1067
  function extractTags2(job) {
632
- const texts = [
633
- job.title,
634
- job.teamName ?? "",
635
- job.locationName ?? "",
636
- ...(job.secondaryLocations ?? []).map((l) => l.locationName ?? "")
637
- ];
638
- return normalize(texts.flatMap(tokenize2));
1068
+ const body = [
1069
+ job.team ?? "",
1070
+ job.department ?? "",
1071
+ job.location ?? "",
1072
+ ...(job.secondaryLocations ?? []).map((l) => l.location ?? ""),
1073
+ job.descriptionPlain ?? ""
1074
+ ].join(" ");
1075
+ return extractSkillTags(job.title, body);
639
1076
  }
640
1077
  function mapEmploymentType(raw) {
641
1078
  if (!raw) return "full_time";
@@ -646,7 +1083,7 @@ function mapEmploymentType(raw) {
646
1083
  }
647
1084
  function inferRemote2(job) {
648
1085
  if (job.isRemote === true) return true;
649
- const loc = (job.locationName ?? "").toLowerCase();
1086
+ const loc = (job.location ?? "").toLowerCase();
650
1087
  return loc.includes("remote") || loc.includes("anywhere");
651
1088
  }
652
1089
  async function fetchSlug2(slug) {
@@ -665,14 +1102,14 @@ async function fetchSlug2(slug) {
665
1102
  source: "ashby",
666
1103
  title: j.title,
667
1104
  company: slug,
668
- url: j.applyUrl ?? `https://jobs.ashbyhq.com/${slug}/${j.id}`,
1105
+ url: j.jobUrl ?? j.applyUrl ?? `https://jobs.ashbyhq.com/${slug}/${j.id}`,
669
1106
  remote: inferRemote2(j),
670
- location: j.locationName,
1107
+ location: j.location,
671
1108
  compMin: comp?.minValue,
672
1109
  compMax: comp?.maxValue,
673
1110
  tags: extractTags2(j),
674
1111
  roleType: mapEmploymentType(j.employmentType),
675
- postedAt: j.publishedDate,
1112
+ postedAt: j.publishedAt,
676
1113
  applyMode: "direct",
677
1114
  raw: j
678
1115
  };
@@ -699,20 +1136,16 @@ var init_ashby = __esm({
699
1136
  });
700
1137
 
701
1138
  // ../../packages/core/src/feeds/lever.ts
702
- function tokenize3(text) {
703
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
704
- }
705
1139
  function extractTags3(p) {
706
1140
  const cat = p.categories ?? {};
707
- const texts = [
708
- p.text,
1141
+ const body = [
709
1142
  cat.team ?? "",
710
1143
  cat.department ?? "",
711
1144
  cat.location ?? "",
712
1145
  ...cat.allLocations ?? [],
713
1146
  p.descriptionPlain ?? ""
714
- ];
715
- return normalize(texts.flatMap(tokenize3));
1147
+ ].join(" ");
1148
+ return extractSkillTags(p.text, body);
716
1149
  }
717
1150
  function mapCommitment(raw) {
718
1151
  if (!raw) return "full_time";
@@ -785,15 +1218,8 @@ var init_lever = __esm({
785
1218
  });
786
1219
 
787
1220
  // ../../packages/core/src/feeds/himalayas.ts
788
- function tokenize4(text) {
789
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
790
- }
791
1221
  function extractTags4(job) {
792
- const texts = [
793
- job.title,
794
- ...job.tags ?? []
795
- ];
796
- return normalize(texts.flatMap(tokenize4));
1222
+ return extractSkillTags(job.title, (job.tags ?? []).join(" "));
797
1223
  }
798
1224
  function mapJobType(raw) {
799
1225
  if (!raw) return "full_time";
@@ -878,9 +1304,6 @@ var init_entities = __esm({
878
1304
  });
879
1305
 
880
1306
  // ../../packages/core/src/feeds/wwr.ts
881
- function tokenize5(text) {
882
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
883
- }
884
1307
  function stripHtml(html) {
885
1308
  return html.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
886
1309
  }
@@ -924,8 +1347,8 @@ function parseRss(xml) {
924
1347
  return items;
925
1348
  }
926
1349
  function extractTags5(item) {
927
- const text = [item.title, item.category, stripHtml(item.description)].join(" ");
928
- return normalize(tokenize5(text));
1350
+ const body = [item.category, stripHtml(item.description)].join(" ");
1351
+ return extractSkillTags(item.title, body);
929
1352
  }
930
1353
  var WWR_RSS_URL, wwr;
931
1354
  var init_wwr = __esm({
@@ -967,9 +1390,6 @@ var init_wwr = __esm({
967
1390
  });
968
1391
 
969
1392
  // ../../packages/core/src/feeds/hn.ts
970
- function tokenize6(text) {
971
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
972
- }
973
1393
  function stripHtml2(html) {
974
1394
  return decodeEntities(html.replace(/<p>/gi, " ").replace(/<[^>]*>/g, "")).replace(/\s+/g, " ").trim();
975
1395
  }
@@ -1000,7 +1420,7 @@ function parseComment(item) {
1000
1420
  return null;
1001
1421
  }
1002
1422
  const url = extractUrl(raw) || `https://news.ycombinator.com/item?id=${item.id}`;
1003
- const tags = extractTags6(raw);
1423
+ const tags = extractTags6(title, raw);
1004
1424
  if (tags.length === 0) return null;
1005
1425
  return {
1006
1426
  id: `hn:${item.id}`,
@@ -1017,8 +1437,8 @@ function parseComment(item) {
1017
1437
  raw: item
1018
1438
  };
1019
1439
  }
1020
- function extractTags6(text) {
1021
- return normalize(tokenize6(text));
1440
+ function extractTags6(title, text) {
1441
+ return extractSkillTags(title, text);
1022
1442
  }
1023
1443
  var ALGOLIA_SEARCH, ALGOLIA_ITEMS, hn;
1024
1444
  var init_hn = __esm({
@@ -1108,7 +1528,7 @@ function authHeaders() {
1108
1528
  if (token) h["Authorization"] = `Bearer ${token}`;
1109
1529
  return h;
1110
1530
  }
1111
- function tokenize7(text) {
1531
+ function tokenize2(text) {
1112
1532
  return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
1113
1533
  }
1114
1534
  function parseAmountUSD(text) {
@@ -1193,7 +1613,7 @@ async function fetchRepoBounties(repoFullName) {
1193
1613
  const body = issue.body ? decodeEntities(issue.body) : "";
1194
1614
  const amountUSD = parseAmountUSD(title) ?? parseAmountUSD(body) ?? await fetchCommentAmount(repoFullName, issue.number);
1195
1615
  const labels = labelNames(issue);
1196
- const tags = normalize(tokenize7([title, labels.join(" "), body.slice(0, 2e3)].join(" ")));
1616
+ const tags = normalize(tokenize2([title, labels.join(" "), body.slice(0, 2e3)].join(" ")));
1197
1617
  return {
1198
1618
  id: `bounty:${repoFullName}#${issue.number}`,
1199
1619
  source: "bounty",
@@ -1510,103 +1930,6 @@ var init_indexer = __esm({
1510
1930
  }
1511
1931
  });
1512
1932
 
1513
- // ../../packages/core/src/github.ts
1514
- function ghHeaders(token) {
1515
- const headers = {
1516
- Accept: "application/vnd.github+json",
1517
- "X-GitHub-Api-Version": "2022-11-28"
1518
- };
1519
- if (token) headers["Authorization"] = `Bearer ${token}`;
1520
- return headers;
1521
- }
1522
- async function ghFetch(path, token) {
1523
- const url = `https://api.github.com${path}`;
1524
- const res = await fetch(url, { headers: ghHeaders(token) });
1525
- if (!res.ok) {
1526
- throw new Error(`GitHub API ${path}: HTTP ${res.status} ${res.statusText}`);
1527
- }
1528
- return res.json();
1529
- }
1530
- async function fetchGitHubProfile(login, token) {
1531
- const user = await ghFetch(`/users/${login}`, token);
1532
- let repos = [];
1533
- try {
1534
- repos = await ghFetch(
1535
- `/users/${login}/repos?sort=pushed&per_page=100`,
1536
- token
1537
- );
1538
- } catch (err) {
1539
- console.warn(`[github] ${login}: repos fetch failed, continuing \u2014`, err);
1540
- }
1541
- const langCount = {};
1542
- for (const repo of repos) {
1543
- if (repo.fork) continue;
1544
- if (repo.language) {
1545
- langCount[repo.language.toLowerCase()] = (langCount[repo.language.toLowerCase()] ?? 0) + 1;
1546
- }
1547
- }
1548
- const topLanguages = Object.entries(langCount).sort(([, a], [, b]) => b - a).slice(0, 10).map(([lang]) => lang);
1549
- const topicSet = /* @__PURE__ */ new Set();
1550
- for (const repo of repos) {
1551
- if (repo.fork) continue;
1552
- for (const t of repo.topics ?? []) topicSet.add(t.toLowerCase());
1553
- }
1554
- const topics = Array.from(topicSet).slice(0, 30);
1555
- let recentPRorgs;
1556
- try {
1557
- const q = encodeURIComponent(
1558
- `type:pr is:merged author:${login} sort:updated`
1559
- );
1560
- const result = await ghFetch(
1561
- `/search/issues?q=${q}&per_page=30`,
1562
- token
1563
- );
1564
- const orgs = /* @__PURE__ */ new Set();
1565
- for (const item of result.items ?? []) {
1566
- const orgLogin = item.repository?.owner?.login;
1567
- if (orgLogin && orgLogin !== login) orgs.add(orgLogin);
1568
- }
1569
- if (orgs.size > 0) recentPRorgs = Array.from(orgs);
1570
- } catch {
1571
- }
1572
- return {
1573
- login: user.login,
1574
- name: user.name ?? void 0,
1575
- publicEmail: user.email ?? void 0,
1576
- avatarUrl: user.avatar_url,
1577
- accountCreatedAt: user.created_at,
1578
- publicRepos: user.public_repos,
1579
- followers: user.followers,
1580
- topLanguages,
1581
- topics,
1582
- recentPRorgs
1583
- };
1584
- }
1585
- function inferSeniority2(p) {
1586
- const ageMs = Date.now() - new Date(p.accountCreatedAt).getTime();
1587
- const ageYears = ageMs / (1e3 * 60 * 60 * 24 * 365.25);
1588
- if (ageYears >= 9 && (p.publicRepos >= 40 || p.followers >= 500)) return "staff";
1589
- if (ageYears >= 5 && (p.publicRepos >= 20 || p.followers >= 100)) return "senior";
1590
- if (ageYears >= 2 && p.publicRepos >= 5) return "mid";
1591
- return "junior";
1592
- }
1593
- function githubToFingerprint(p) {
1594
- const rawTokens = [
1595
- ...p.topLanguages,
1596
- ...p.topics
1597
- // recentPRorgs intentionally excluded — org names are not skill tags
1598
- ];
1599
- const skillTags = normalize(rawTokens);
1600
- const seniorityBand = inferSeniority2(p);
1601
- return { skillTags, seniorityBand };
1602
- }
1603
- var init_github = __esm({
1604
- "../../packages/core/src/github.ts"() {
1605
- "use strict";
1606
- init_vocabulary();
1607
- }
1608
- });
1609
-
1610
1933
  // ../../packages/core/src/index.ts
1611
1934
  var src_exports = {};
1612
1935
  __export(src_exports, {
@@ -1620,17 +1943,23 @@ __export(src_exports, {
1620
1943
  FEEDS: () => FEEDS,
1621
1944
  GRAPH: () => GRAPH,
1622
1945
  GREENHOUSE_SLUGS_BY_TIER: () => GREENHOUSE_SLUGS_BY_TIER,
1946
+ IDF_BACKGROUND: () => IDF_BACKGROUND,
1623
1947
  LEVER_SLUGS_BY_TIER: () => LEVER_SLUGS_BY_TIER,
1624
1948
  SYNONYMS: () => SYNONYMS,
1625
1949
  VOCABULARY: () => VOCABULARY,
1626
1950
  VOCAB_NODES: () => VOCAB_NODES,
1951
+ acceptanceCountForDomains: () => acceptanceCountForDomains,
1627
1952
  aggregate: () => aggregate,
1628
1953
  aggregateBounties: () => aggregateBounties,
1629
1954
  ashby: () => ashby,
1955
+ bestAcceptanceDomain: () => bestAcceptanceDomain,
1630
1956
  buildGraph: () => buildGraph,
1631
1957
  buildIndex: () => buildIndex,
1632
1958
  buildReason: () => buildReason,
1959
+ computeAcceptanceCredential: () => computeAcceptanceCredential,
1960
+ coreTagsFromTitle: () => coreTagsFromTitle,
1633
1961
  expandWeighted: () => expandWeighted,
1962
+ extractSkillTags: () => extractSkillTags,
1634
1963
  fetchGitHubProfile: () => fetchGitHubProfile,
1635
1964
  flattenTiers: () => flattenTiers,
1636
1965
  getBuyer: () => getBuyer,
@@ -1642,10 +1971,11 @@ __export(src_exports, {
1642
1971
  isBounty: () => isBounty,
1643
1972
  lever: () => lever,
1644
1973
  loadPartnerRoles: () => loadPartnerRoles,
1974
+ looksLikeEngRole: () => looksLikeEngRole,
1645
1975
  match: () => match,
1646
- matchOne: () => matchOne,
1647
1976
  normalize: () => normalize,
1648
1977
  passesMaturityGate: () => passesMaturityGate,
1978
+ tokenize: () => tokenize,
1649
1979
  validateGraph: () => validateGraph,
1650
1980
  wwr: () => wwr
1651
1981
  });