terminalhire 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -147,11 +147,11 @@ var init_graph_data = __esm({
147
147
  { id: "spark", parents: ["data-engineering"], synonyms: ["apache-spark"] },
148
148
  { id: "airflow", parents: ["data-engineering"], synonyms: ["apache-airflow"] },
149
149
  { id: "dbt", parents: ["data-engineering"] },
150
- { id: "ml", synonyms: ["machine-learning"], related: [{ to: "pytorch", w: 0.5 }, { to: "tensorflow", w: 0.5 }, { to: "scikit-learn", w: 0.5 }] },
150
+ { id: "ml", synonyms: ["machine-learning"], related: [{ to: "pytorch", w: 0.5 }, { to: "tensorflow", w: 0.5 }, { to: "scikit-learn", w: 0.5 }, { to: "data-engineering", w: 0.4 }] },
151
151
  { id: "llm", parents: ["ml"], synonyms: ["llms", "genai", "generative-ai"], related: [{ to: "langchain", w: 0.5 }, { to: "rag", w: 0.55 }, { to: "openai", w: 0.45 }, { to: "anthropic", w: 0.45 }] },
152
152
  { id: "pytorch", parents: ["ml"], synonyms: ["torch"], related: [{ to: "tensorflow", w: 0.5 }] },
153
153
  { id: "tensorflow", parents: ["ml"], synonyms: ["keras", "tf-keras"] },
154
- { id: "pandas", parents: ["python"], related: [{ to: "numpy", w: 0.6 }] },
154
+ { id: "pandas", parents: ["python"], related: [{ to: "numpy", w: 0.6 }, { to: "data-engineering", w: 0.45 }, { to: "spark", w: 0.4 }] },
155
155
  { id: "numpy", parents: ["python"] },
156
156
  { id: "scikit-learn", parents: ["ml"], synonyms: ["sklearn"] },
157
157
  { id: "jupyter", parents: ["python"] },
@@ -326,6 +326,207 @@ var init_types2 = __esm({
326
326
  }
327
327
  });
328
328
 
329
+ // ../../packages/core/src/vocab/extract.ts
330
+ function tokenize(text) {
331
+ return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
332
+ }
333
+ function looksLikeEngRole(title) {
334
+ return !NON_ENG_TITLE.test(title) && ENG_INTENT.test(title);
335
+ }
336
+ function resolveToken(token) {
337
+ const tryOne = (t) => {
338
+ if (GRAPH.ids.has(t)) return { id: t, viaSynonym: false };
339
+ const mapped = GRAPH.synonyms.get(t);
340
+ return mapped ? { id: mapped, viaSynonym: true } : null;
341
+ };
342
+ return tryOne(token) ?? tryOne(token.replace(/^[.\-+#]+|[.\-+#]+$/g, ""));
343
+ }
344
+ function extractSkillTags(title, body = "") {
345
+ if (!looksLikeEngRole(title)) return [];
346
+ const text = `${title}
347
+ ${body}`;
348
+ const tokens = tokenize(text);
349
+ const ids = /* @__PURE__ */ new Set();
350
+ const ambiguousPending = /* @__PURE__ */ new Set();
351
+ for (const tok of tokens) {
352
+ const r = resolveToken(tok);
353
+ if (!r) continue;
354
+ if (NON_EXTRACTABLE.has(r.id)) continue;
355
+ if (SYNONYM_ONLY.has(r.id) && !r.viaSynonym) continue;
356
+ const cue = AMBIGUOUS[r.id];
357
+ if (cue) {
358
+ if (cue.test(text)) ids.add(r.id);
359
+ else ambiguousPending.add(r.id);
360
+ continue;
361
+ }
362
+ ids.add(r.id);
363
+ }
364
+ const hardCount = [...ids].filter((id) => !SOFT_DOMAIN.has(id)).length;
365
+ if (hardCount >= 2) for (const id of ambiguousPending) ids.add(id);
366
+ return [...ids];
367
+ }
368
+ function coreTagsFromTitle(title) {
369
+ return extractSkillTags(title, "").filter((t) => !SOFT_DOMAIN.has(t));
370
+ }
371
+ var SOFT_DOMAIN, SYNONYM_ONLY, NON_EXTRACTABLE, AMBIGUOUS, ENG_INTENT, NON_ENG_TITLE;
372
+ var init_extract = __esm({
373
+ "../../packages/core/src/vocab/extract.ts"() {
374
+ "use strict";
375
+ init_vocab();
376
+ SOFT_DOMAIN = /* @__PURE__ */ new Set([
377
+ "frontend",
378
+ "backend",
379
+ "devops",
380
+ "security",
381
+ "payments",
382
+ "billing",
383
+ "microservices",
384
+ "caching",
385
+ "search",
386
+ "observability",
387
+ "monitoring",
388
+ "testing",
389
+ "accessibility",
390
+ "seo",
391
+ "performance",
392
+ "realtime",
393
+ "authentication",
394
+ "api-design"
395
+ ]);
396
+ SYNONYM_ONLY = /* @__PURE__ */ new Set(["performance", "security", "seo"]);
397
+ NON_EXTRACTABLE = /* @__PURE__ */ new Set(["payments", "billing"]);
398
+ for (const id of SYNONYM_ONLY) {
399
+ if (!SOFT_DOMAIN.has(id)) throw new Error(`extract: SYNONYM_ONLY "${id}" not in SOFT_DOMAIN`);
400
+ }
401
+ AMBIGUOUS = {
402
+ // Accept "go" with an ecosystem cue OR an explicit-skill phrasing ("Go developer",
403
+ // "in Go", "experience with Go"). Rejects prose: "ready to go", "go above", "go live".
404
+ go: /\b(golang|goroutines?|go\.mod|gin framework|gorm)\b|\bgo\b\s+(developer|engineer|programmer|microservices?|backend|services?|lang)|\b(in|with|using|written in|built in|experience (?:in|with)|proficient in|fluent in)\s+go\b/i,
405
+ r: /\b(rstudio|tidyverse|ggplot|shiny|dplyr|cran|r-lang|rlang)\b/i,
406
+ ml: /\b(machine[\s-]?learning|pytorch|tensorflow|scikit|sklearn|keras|neural|model training|deep[\s-]?learning|numpy|pandas|ml\s+(?:engineer|platform|researcher|infrastructure)|(?:ml|ai)\s+research)\b/i
407
+ };
408
+ ENG_INTENT = /\b(engineer|engineering|developer|dev\b|swe|sde|programmer|architect|full[\s-]?stack|front[\s-]?end|back[\s-]?end|devops|sre|software|coding|codebase|technical staff|tech(?:nical)? lead)\b/i;
409
+ NON_ENG_TITLE = /\b(account executive|account manager|sales (?:rep|representative|development|manager|lead)|sdr|bdr|recruiter|recruiting|talent|marketing|administrative|business partner|billing coordinator|operations (?:administrator|coordinator)|customer success|project finance|controller|bookkeeper|graphic|brand)\b/i;
410
+ }
411
+ });
412
+
413
+ // ../../packages/core/src/vocab/idf-background.ts
414
+ var IDF_BACKGROUND;
415
+ var init_idf_background = __esm({
416
+ "../../packages/core/src/vocab/idf-background.ts"() {
417
+ "use strict";
418
+ IDF_BACKGROUND = {
419
+ N: 244,
420
+ df: {
421
+ "backend": 71,
422
+ "python": 57,
423
+ "monitoring": 44,
424
+ "nextjs": 40,
425
+ "testing": 40,
426
+ "observability": 38,
427
+ "llm": 38,
428
+ "go": 36,
429
+ "aws": 36,
430
+ "react": 33,
431
+ "frontend": 30,
432
+ "ml": 28,
433
+ "mobile": 24,
434
+ "realtime": 24,
435
+ "typescript": 23,
436
+ "devops": 22,
437
+ "kubernetes": 22,
438
+ "javascript": 21,
439
+ "java": 20,
440
+ "rag": 20,
441
+ "api-design": 20,
442
+ "linux": 19,
443
+ "postgresql": 19,
444
+ "search": 17,
445
+ "azure": 16,
446
+ "snowflake": 15,
447
+ "spark": 15,
448
+ "kotlin": 14,
449
+ "gcp": 14,
450
+ "accessibility": 14,
451
+ "nodejs": 14,
452
+ "graphql": 14,
453
+ "airflow": 14,
454
+ "docker": 14,
455
+ "ci-cd": 13,
456
+ "android": 12,
457
+ "cpp": 12,
458
+ "gitlab-ci": 11,
459
+ "anthropic": 11,
460
+ "terraform": 11,
461
+ "mysql": 11,
462
+ "r": 10,
463
+ "dbt": 9,
464
+ "langchain": 9,
465
+ "pytorch": 9,
466
+ "ruby": 9,
467
+ "rails": 9,
468
+ "cloudflare": 7,
469
+ "datadog": 7,
470
+ "css": 7,
471
+ "ansible": 7,
472
+ "openai": 6,
473
+ "kafka": 6,
474
+ "rust": 5,
475
+ "grpc": 5,
476
+ "microservices": 5,
477
+ "serverless": 5,
478
+ "scala": 5,
479
+ "prometheus": 5,
480
+ "grafana": 5,
481
+ "php": 5,
482
+ "redis": 5,
483
+ "huggingface": 4,
484
+ "pandas": 4,
485
+ "scikit-learn": 4,
486
+ "html": 4,
487
+ "ios": 4,
488
+ "authentication": 4,
489
+ "vue": 4,
490
+ "mlops": 3,
491
+ "spring": 3,
492
+ "mongodb": 3,
493
+ "csharp": 3,
494
+ "swift": 2,
495
+ "caching": 2,
496
+ "haskell": 2,
497
+ "pulumi": 2,
498
+ "argocd": 2,
499
+ "tensorflow": 2,
500
+ "express": 2,
501
+ "elasticsearch": 2,
502
+ "clickhouse": 2,
503
+ "nestjs": 2,
504
+ "vite": 2,
505
+ "svelte": 2,
506
+ "phoenix": 2,
507
+ "angular": 2,
508
+ "django": 2,
509
+ "dotnet": 2,
510
+ "elixir": 2,
511
+ "bun": 1,
512
+ "oauth": 1,
513
+ "dynamodb": 1,
514
+ "helm": 1,
515
+ "playwright": 1,
516
+ "cypress": 1,
517
+ "jest": 1,
518
+ "mocha": 1,
519
+ "typeorm": 1,
520
+ "tailwind": 1,
521
+ "prisma": 1,
522
+ "expo": 1,
523
+ "rabbitmq": 1,
524
+ "redux": 1
525
+ }
526
+ };
527
+ }
528
+ });
529
+
329
530
  // ../../packages/core/src/vocab/index.ts
330
531
  function normalize(tokens) {
331
532
  const result = /* @__PURE__ */ new Set();
@@ -362,6 +563,8 @@ var init_vocab = __esm({
362
563
  init_types2();
363
564
  init_closure();
364
565
  init_graph_data();
566
+ init_extract();
567
+ init_idf_background();
365
568
  GRAPH = buildGraph(VOCAB_NODES);
366
569
  VOCABULARY = [...GRAPH.ids];
367
570
  SYNONYMS = Object.fromEntries(GRAPH.synonyms);
@@ -376,23 +579,250 @@ var init_vocabulary = __esm({
376
579
  }
377
580
  });
378
581
 
379
- // ../../packages/core/src/matcher.ts
380
- function computeIdf(jobs) {
381
- const docFreq = /* @__PURE__ */ new Map();
382
- const N = jobs.length;
383
- for (const job of jobs) {
384
- const unique = new Set(job.tags);
385
- for (const tag of unique) {
386
- docFreq.set(tag, (docFreq.get(tag) ?? 0) + 1);
582
+ // ../../packages/core/src/github.ts
583
+ function ghHeaders(token) {
584
+ const headers = {
585
+ Accept: "application/vnd.github+json",
586
+ "X-GitHub-Api-Version": "2022-11-28"
587
+ };
588
+ if (token) headers["Authorization"] = `Bearer ${token}`;
589
+ return headers;
590
+ }
591
+ async function ghFetch(path, token) {
592
+ const url = `https://api.github.com${path}`;
593
+ const res = await fetch(url, { headers: ghHeaders(token) });
594
+ if (!res.ok) {
595
+ throw new Error(`GitHub API ${path}: HTTP ${res.status} ${res.statusText}`);
596
+ }
597
+ return res.json();
598
+ }
599
+ async function fetchGitHubProfile(login, token) {
600
+ const user = await ghFetch(`/users/${login}`, token);
601
+ let repos = [];
602
+ try {
603
+ repos = await ghFetch(
604
+ `/users/${login}/repos?sort=pushed&per_page=100`,
605
+ token
606
+ );
607
+ } catch (err) {
608
+ console.warn(`[github] ${login}: repos fetch failed, continuing \u2014`, err);
609
+ }
610
+ const langCount = {};
611
+ for (const repo of repos) {
612
+ if (repo.fork) continue;
613
+ if (repo.language) {
614
+ langCount[repo.language.toLowerCase()] = (langCount[repo.language.toLowerCase()] ?? 0) + 1;
615
+ }
616
+ }
617
+ const topLanguages = Object.entries(langCount).sort(([, a], [, b]) => b - a).slice(0, 10).map(([lang]) => lang);
618
+ const topicSet = /* @__PURE__ */ new Set();
619
+ for (const repo of repos) {
620
+ if (repo.fork) continue;
621
+ for (const t of repo.topics ?? []) topicSet.add(t.toLowerCase());
622
+ }
623
+ const topics = Array.from(topicSet).slice(0, 30);
624
+ let recentPRorgs;
625
+ try {
626
+ const q = encodeURIComponent(
627
+ `type:pr is:merged author:${login} sort:updated`
628
+ );
629
+ const result = await ghFetch(
630
+ `/search/issues?q=${q}&per_page=30`,
631
+ token
632
+ );
633
+ const orgs = /* @__PURE__ */ new Set();
634
+ for (const item of result.items ?? []) {
635
+ const orgLogin = item.repository?.owner?.login;
636
+ if (orgLogin && orgLogin !== login) orgs.add(orgLogin);
637
+ }
638
+ if (orgs.size > 0) recentPRorgs = Array.from(orgs);
639
+ } catch {
640
+ }
641
+ return {
642
+ login: user.login,
643
+ name: user.name ?? void 0,
644
+ publicEmail: user.email ?? void 0,
645
+ avatarUrl: user.avatar_url,
646
+ accountCreatedAt: user.created_at,
647
+ publicRepos: user.public_repos,
648
+ followers: user.followers,
649
+ topLanguages,
650
+ topics,
651
+ recentPRorgs
652
+ };
653
+ }
654
+ function inferSeniority(p) {
655
+ const ageMs = Date.now() - new Date(p.accountCreatedAt).getTime();
656
+ const ageYears = ageMs / (1e3 * 60 * 60 * 24 * 365.25);
657
+ if (ageYears >= 9 && (p.publicRepos >= 40 || p.followers >= 500)) return "staff";
658
+ if (ageYears >= 5 && (p.publicRepos >= 20 || p.followers >= 100)) return "senior";
659
+ if (ageYears >= 2 && p.publicRepos >= 5) return "mid";
660
+ return "junior";
661
+ }
662
+ function githubToFingerprint(p) {
663
+ const rawTokens = [
664
+ ...p.topLanguages,
665
+ ...p.topics
666
+ // recentPRorgs intentionally excluded — org names are not skill tags
667
+ ];
668
+ const skillTags = normalize(rawTokens);
669
+ const seniorityBand = inferSeniority(p);
670
+ return { skillTags, seniorityBand };
671
+ }
672
+ async function ghFetchRaw(path, token) {
673
+ return fetch(`https://api.github.com${path}`, { headers: ghHeaders(token) });
674
+ }
675
+ function parseRepoUrl(repoUrl) {
676
+ const m = repoUrl.match(/\/repos\/([^/]+)\/([^/]+)\/?$/);
677
+ return m ? { owner: m[1], name: m[2] } : null;
678
+ }
679
+ function isTrivialPRTitle(title) {
680
+ return TRIVIAL_PR_TITLE.test(title);
681
+ }
682
+ async function fetchOwnedOrgs(token) {
683
+ try {
684
+ const memberships = await ghFetch(`/user/memberships/orgs?per_page=100`, token);
685
+ return new Set(
686
+ memberships.filter((m) => m.role === "admin").map((m) => m.organization.login.toLowerCase())
687
+ );
688
+ } catch {
689
+ return /* @__PURE__ */ new Set();
690
+ }
691
+ }
692
+ async function repoContributorCount(owner, name, token) {
693
+ try {
694
+ const res = await ghFetchRaw(
695
+ `/repos/${owner}/${name}/contributors?per_page=1&anon=false`,
696
+ token
697
+ );
698
+ if (!res.ok) return void 0;
699
+ const link = res.headers.get("link");
700
+ const m = link?.match(/[?&]page=(\d+)>;\s*rel="last"/);
701
+ if (m) return Number(m[1]);
702
+ const body = await res.json();
703
+ return Array.isArray(body) ? body.length : 0;
704
+ } catch {
705
+ return void 0;
706
+ }
707
+ }
708
+ async function fetchRepoMeta(owner, name, token, cache) {
709
+ const key = `${owner}/${name}`.toLowerCase();
710
+ const cached = cache.get(key);
711
+ if (cached !== void 0) return cached;
712
+ let meta = null;
713
+ try {
714
+ const r = await ghFetch(`/repos/${owner}/${name}`, token);
715
+ const contributors = await repoContributorCount(owner, name, token);
716
+ meta = {
717
+ stars: r.stargazers_count ?? 0,
718
+ archived: !!r.archived,
719
+ fork: !!r.fork,
720
+ language: r.language ?? null,
721
+ topics: r.topics ?? [],
722
+ contributors
723
+ };
724
+ } catch {
725
+ meta = null;
726
+ }
727
+ cache.set(key, meta);
728
+ return meta;
729
+ }
730
+ async function computeAcceptanceCredential(login, token, cache = /* @__PURE__ */ new Map()) {
731
+ const computedAt = (/* @__PURE__ */ new Date()).toISOString();
732
+ const empty = (status) => ({
733
+ status,
734
+ byDomain: {},
735
+ qualifyingTotal: 0,
736
+ computedAt
737
+ });
738
+ if (!token) return empty("no-token");
739
+ const ownedOrgs = await fetchOwnedOrgs(token);
740
+ const loginLc = login.toLowerCase();
741
+ let items;
742
+ try {
743
+ const q = encodeURIComponent(`type:pr is:merged author:${login} -user:${login} sort:updated`);
744
+ const res = await ghFetch(
745
+ `/search/issues?q=${q}&per_page=${CANDIDATE_PR_PAGE}`,
746
+ token
747
+ );
748
+ items = res.items ?? [];
749
+ } catch (err) {
750
+ const msg = String(err);
751
+ return empty(/HTTP 403|HTTP 429|rate limit/i.test(msg) ? "rate-limited" : "failed");
752
+ }
753
+ const byDomain = {};
754
+ let qualifyingTotal = 0;
755
+ for (const item of items) {
756
+ const repo = parseRepoUrl(item.repository_url);
757
+ if (!repo) continue;
758
+ const ownerLc = repo.owner.toLowerCase();
759
+ if (ownerLc === loginLc) continue;
760
+ if (ownedOrgs.has(ownerLc)) continue;
761
+ if (isTrivialPRTitle(item.title)) continue;
762
+ const meta = await fetchRepoMeta(repo.owner, repo.name, token, cache);
763
+ if (!meta) continue;
764
+ if (meta.archived || meta.fork) continue;
765
+ if (meta.stars < MIN_STARS) continue;
766
+ if (meta.contributors !== void 0 && meta.contributors < MIN_CONTRIBUTORS) continue;
767
+ qualifyingTotal += 1;
768
+ const mergedAt = item.pull_request?.merged_at ?? item.closed_at ?? item.created_at;
769
+ const rawDomains = [meta.language ?? "", ...meta.topics].filter(Boolean);
770
+ for (const d of new Set(normalize(rawDomains))) {
771
+ const b = byDomain[d] ?? (byDomain[d] = { mergedPRs: 0, distinctOrgs: 0, lastMergedAt: mergedAt, orgs: /* @__PURE__ */ new Set() });
772
+ b.mergedPRs += 1;
773
+ b.orgs.add(ownerLc);
774
+ if (mergedAt > b.lastMergedAt) b.lastMergedAt = mergedAt;
387
775
  }
388
776
  }
389
- const idf = /* @__PURE__ */ new Map();
390
- for (const [tag, df] of docFreq) {
391
- idf.set(tag, Math.log((N + 1) / (df + 1)) + 1);
777
+ const finalDomains = {};
778
+ for (const [d, b] of Object.entries(byDomain)) {
779
+ finalDomains[d] = {
780
+ mergedPRs: b.mergedPRs,
781
+ distinctOrgs: b.orgs.size,
782
+ lastMergedAt: b.lastMergedAt
783
+ };
784
+ }
785
+ return { status: "ok", byDomain: finalDomains, qualifyingTotal, computedAt };
786
+ }
787
+ function acceptanceCountForDomains(cred, domains) {
788
+ if (cred.status !== "ok") return 0;
789
+ let max = 0;
790
+ for (const d of domains) {
791
+ const c = cred.byDomain[d]?.mergedPRs ?? 0;
792
+ if (c > max) max = c;
392
793
  }
393
- return idf;
794
+ return max;
795
+ }
796
+ function bestAcceptanceDomain(cred, domains) {
797
+ if (cred.status !== "ok") return null;
798
+ let best = null;
799
+ for (const d of domains) {
800
+ const count = cred.byDomain[d]?.mergedPRs ?? 0;
801
+ if (count > 0 && (best === null || count > best.count)) best = { domain: d, count };
802
+ }
803
+ return best;
804
+ }
805
+ var MIN_STARS, MIN_CONTRIBUTORS, CANDIDATE_PR_PAGE, TRIVIAL_PR_TITLE;
806
+ var init_github = __esm({
807
+ "../../packages/core/src/github.ts"() {
808
+ "use strict";
809
+ init_vocabulary();
810
+ MIN_STARS = 50;
811
+ MIN_CONTRIBUTORS = 10;
812
+ CANDIDATE_PR_PAGE = 50;
813
+ TRIVIAL_PR_TITLE = /^\s*(fix\s+typo|typo\b|update\s+readme|readme\b|docs?:|docs?\(|chore:|chore\(|style:|ci:|build:|bump\b|update\s+dependenc)/i;
814
+ }
815
+ });
816
+
817
+ // ../../packages/core/src/matcher.ts
818
+ function acceptanceDomainsOf(job) {
819
+ return job.coreTags && job.coreTags.length > 0 ? job.coreTags : job.tags;
394
820
  }
395
- function inferSeniority(title) {
821
+ function backgroundIdf(tag) {
822
+ const df = IDF_BACKGROUND.df[tag] ?? 0;
823
+ return Math.log((IDF_BACKGROUND.N + 1) / (df + 1)) + 1;
824
+ }
825
+ function inferSeniority2(title) {
396
826
  if (!ENG_TITLE.test(title)) return void 0;
397
827
  for (const [re, level] of SENIORITY_PATTERNS) {
398
828
  if (re.test(title)) return level;
@@ -401,7 +831,7 @@ function inferSeniority(title) {
401
831
  }
402
832
  function seniorityScore(fp, job) {
403
833
  if (!fp.seniorityBand) return 1;
404
- const jobLevel = inferSeniority(job.title);
834
+ const jobLevel = inferSeniority2(job.title);
405
835
  if (!jobLevel) return 0.85;
406
836
  const wanted = SENIORITY_RANK[fp.seniorityBand] ?? 1;
407
837
  const got = SENIORITY_RANK[jobLevel] ?? 1;
@@ -411,8 +841,10 @@ function seniorityScore(fp, job) {
411
841
  return 0.4;
412
842
  }
413
843
  function recencyScore(postedAt, now) {
414
- if (!postedAt) return 0.75;
415
- const ageDays2 = (now - new Date(postedAt).getTime()) / 864e5;
844
+ if (!postedAt) return UNKNOWN_RECENCY;
845
+ const ms = new Date(postedAt).getTime();
846
+ if (Number.isNaN(ms)) return UNKNOWN_RECENCY;
847
+ const ageDays2 = (now - ms) / 864e5;
416
848
  if (ageDays2 < 7) return 1;
417
849
  if (ageDays2 < 30) return 0.9;
418
850
  if (ageDays2 < 90) return 0.75;
@@ -443,9 +875,8 @@ function harmonicMean(a, b) {
443
875
  if (a <= 0 || b <= 0) return 0;
444
876
  return 2 * a * b / (a + b);
445
877
  }
446
- function match(fp, jobs, limit = 5, now = Date.now()) {
447
- const idf = computeIdf(jobs);
448
- const idfOf = (t) => idf.get(t) ?? 0;
878
+ function match(fp, jobs, limit = 5, now = Date.now(), opts = {}) {
879
+ const idfOf = backgroundIdf;
449
880
  const expanded = expandWeighted(fp.skillTags);
450
881
  const maxDevScore = fp.skillTags.reduce((acc, t) => acc + idfOf(t), 0);
451
882
  const candidates = jobs.filter((j) => passesFilters(fp, j));
@@ -471,32 +902,45 @@ function match(fp, jobs, limit = 5, now = Date.now()) {
471
902
  const jobCov = jobMaxScore > 0 ? Math.min(1, jobMatchScore / jobMaxScore) : 0;
472
903
  const tagComponent = harmonicMean(devCov, jobCov);
473
904
  if (tagComponent === 0) return null;
905
+ const coreTags = job.coreTags ?? coreTagsFromTitle(job.title);
906
+ let coreComponent = tagComponent;
907
+ if (coreTags.length > 0) {
908
+ const coreCov = Math.max(0, ...coreTags.map((ct) => expanded.get(ct)?.weight ?? 0));
909
+ if (coreCov === 0) coreComponent = tagComponent * CORE_MISS_PENALTY;
910
+ }
474
911
  details.sort((a, b) => idfOf(b.tag) * b.weight - idfOf(a.tag) * a.weight);
475
912
  const sScore = seniorityScore(fp, job);
476
913
  const rScore = recencyScore(job.postedAt, now);
477
- const score = tagComponent * 0.6 + sScore * 0.25 + rScore * 0.15;
914
+ const score = coreComponent * 0.6 + sScore * 0.25 + rScore * 0.15;
478
915
  const matchedTags = [...new Set(details.map((d) => d.via ?? d.tag))];
916
+ const badge = opts.acceptance ? bestAcceptanceDomain(opts.acceptance, acceptanceDomainsOf(job)) : null;
479
917
  return {
480
918
  job,
481
919
  score: Math.round(score * 1e3) / 1e3,
482
920
  matchedTags,
483
921
  matchDetails: details,
922
+ ...badge ? { acceptance: { status: "ok", domain: badge.domain, count: badge.count } } : {},
484
923
  reason: buildReason(details)
485
924
  };
486
925
  });
487
- return scored.filter((r) => r !== null && r.score >= MIN_SCORE).sort((a, b) => b.score - a.score).slice(0, limit);
488
- }
489
- function matchOne(fp, job) {
490
- const results = match(fp, [job], 1);
491
- return results.length > 0 ? results[0] : null;
492
- }
493
- var MIN_SCORE, SHARPEN, SENIORITY_RANK, SENIORITY_PATTERNS, ENG_TITLE;
926
+ return scored.filter((r) => r !== null && r.score >= MIN_SCORE).sort((a, b) => {
927
+ const byScore = b.score - a.score;
928
+ if (Math.abs(byScore) > TIEBREAK_EPS) return byScore;
929
+ const byAcceptance = (b.acceptance?.count ?? 0) - (a.acceptance?.count ?? 0);
930
+ if (byAcceptance !== 0) return byAcceptance;
931
+ return byScore;
932
+ }).slice(0, limit);
933
+ }
934
+ var MIN_SCORE, TIEBREAK_EPS, SHARPEN, CORE_MISS_PENALTY, SENIORITY_RANK, SENIORITY_PATTERNS, ENG_TITLE, UNKNOWN_RECENCY;
494
935
  var init_matcher = __esm({
495
936
  "../../packages/core/src/matcher.ts"() {
496
937
  "use strict";
497
938
  init_vocabulary();
939
+ init_github();
498
940
  MIN_SCORE = 0.15;
941
+ TIEBREAK_EPS = 5e-3;
499
942
  SHARPEN = 1.6;
943
+ CORE_MISS_PENALTY = 0.4;
500
944
  SENIORITY_RANK = {
501
945
  junior: 0,
502
946
  mid: 1,
@@ -510,24 +954,19 @@ var init_matcher = __esm({
510
954
  [/\bmid[\s-]?level\b|\bmid\b/i, "mid"]
511
955
  ];
512
956
  ENG_TITLE = /\b(engineer|engineering|developer|dev|swe|sde|programmer|architect)\b/i;
957
+ UNKNOWN_RECENCY = 0.75;
513
958
  }
514
959
  });
515
960
 
516
961
  // ../../packages/core/src/feeds/greenhouse.ts
517
- function tokenize(text) {
518
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
519
- }
520
962
  function extractTags(job) {
521
- const texts = [
522
- job.title,
963
+ const body = [
523
964
  ...(job.departments ?? []).map((d) => d.name),
524
965
  job.location?.name ?? "",
525
966
  ...(job.offices ?? []).map((o) => o.name),
526
- // mine the full HTML description for additional signal when present
527
967
  ...job.content ? [job.content.replace(/<[^>]*>/g, " ")] : []
528
- ].filter(Boolean);
529
- const tokens = texts.flatMap(tokenize);
530
- return normalize(tokens);
968
+ ].filter(Boolean).join(" ");
969
+ return extractSkillTags(job.title, body);
531
970
  }
532
971
  function inferRemote(location) {
533
972
  const l = location.toLowerCase();
@@ -625,17 +1064,15 @@ var init_greenhouse = __esm({
625
1064
  });
626
1065
 
627
1066
  // ../../packages/core/src/feeds/ashby.ts
628
- function tokenize2(text) {
629
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
630
- }
631
1067
  function extractTags2(job) {
632
- const texts = [
633
- job.title,
634
- job.teamName ?? "",
635
- job.locationName ?? "",
636
- ...(job.secondaryLocations ?? []).map((l) => l.locationName ?? "")
637
- ];
638
- return normalize(texts.flatMap(tokenize2));
1068
+ const body = [
1069
+ job.team ?? "",
1070
+ job.department ?? "",
1071
+ job.location ?? "",
1072
+ ...(job.secondaryLocations ?? []).map((l) => l.location ?? ""),
1073
+ job.descriptionPlain ?? ""
1074
+ ].join(" ");
1075
+ return extractSkillTags(job.title, body);
639
1076
  }
640
1077
  function mapEmploymentType(raw) {
641
1078
  if (!raw) return "full_time";
@@ -646,7 +1083,7 @@ function mapEmploymentType(raw) {
646
1083
  }
647
1084
  function inferRemote2(job) {
648
1085
  if (job.isRemote === true) return true;
649
- const loc = (job.locationName ?? "").toLowerCase();
1086
+ const loc = (job.location ?? "").toLowerCase();
650
1087
  return loc.includes("remote") || loc.includes("anywhere");
651
1088
  }
652
1089
  async function fetchSlug2(slug) {
@@ -665,14 +1102,14 @@ async function fetchSlug2(slug) {
665
1102
  source: "ashby",
666
1103
  title: j.title,
667
1104
  company: slug,
668
- url: j.applyUrl ?? `https://jobs.ashbyhq.com/${slug}/${j.id}`,
1105
+ url: j.jobUrl ?? j.applyUrl ?? `https://jobs.ashbyhq.com/${slug}/${j.id}`,
669
1106
  remote: inferRemote2(j),
670
- location: j.locationName,
1107
+ location: j.location,
671
1108
  compMin: comp?.minValue,
672
1109
  compMax: comp?.maxValue,
673
1110
  tags: extractTags2(j),
674
1111
  roleType: mapEmploymentType(j.employmentType),
675
- postedAt: j.publishedDate,
1112
+ postedAt: j.publishedAt,
676
1113
  applyMode: "direct",
677
1114
  raw: j
678
1115
  };
@@ -699,20 +1136,16 @@ var init_ashby = __esm({
699
1136
  });
700
1137
 
701
1138
  // ../../packages/core/src/feeds/lever.ts
702
- function tokenize3(text) {
703
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
704
- }
705
1139
  function extractTags3(p) {
706
1140
  const cat = p.categories ?? {};
707
- const texts = [
708
- p.text,
1141
+ const body = [
709
1142
  cat.team ?? "",
710
1143
  cat.department ?? "",
711
1144
  cat.location ?? "",
712
1145
  ...cat.allLocations ?? [],
713
1146
  p.descriptionPlain ?? ""
714
- ];
715
- return normalize(texts.flatMap(tokenize3));
1147
+ ].join(" ");
1148
+ return extractSkillTags(p.text, body);
716
1149
  }
717
1150
  function mapCommitment(raw) {
718
1151
  if (!raw) return "full_time";
@@ -785,15 +1218,8 @@ var init_lever = __esm({
785
1218
  });
786
1219
 
787
1220
  // ../../packages/core/src/feeds/himalayas.ts
788
- function tokenize4(text) {
789
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
790
- }
791
1221
  function extractTags4(job) {
792
- const texts = [
793
- job.title,
794
- ...job.tags ?? []
795
- ];
796
- return normalize(texts.flatMap(tokenize4));
1222
+ return extractSkillTags(job.title, (job.tags ?? []).join(" "));
797
1223
  }
798
1224
  function mapJobType(raw) {
799
1225
  if (!raw) return "full_time";
@@ -878,9 +1304,6 @@ var init_entities = __esm({
878
1304
  });
879
1305
 
880
1306
  // ../../packages/core/src/feeds/wwr.ts
881
- function tokenize5(text) {
882
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
883
- }
884
1307
  function stripHtml(html) {
885
1308
  return html.replace(/<[^>]*>/g, " ").replace(/\s+/g, " ").trim();
886
1309
  }
@@ -905,10 +1328,13 @@ function parseRss(xml) {
905
1328
  return decodeEntities(plainMatch?.[1].trim() ?? "");
906
1329
  };
907
1330
  const rawTitle = get("title");
908
- const colonIdx = rawTitle.indexOf(":");
909
- const company = colonIdx !== -1 ? rawTitle.slice(0, colonIdx).trim() : "Unknown";
910
- const titleAfterColon = colonIdx !== -1 ? rawTitle.slice(colonIdx + 1).trim() : rawTitle;
1331
+ const m = rawTitle.match(/^(.*?):\s+(.*)$/);
1332
+ let company = m ? m[1].trim() : "Unknown";
1333
+ const titleAfterColon = m ? m[2].trim() : rawTitle;
911
1334
  const title = titleAfterColon.replace(/\s*\([^)]*\)\s*$/, "").trim();
1335
+ if (/^https?:\/\//i.test(company)) {
1336
+ company = company.replace(/^https?:\/\//i, "").replace(/\/.*$/, "").trim() || "Unknown";
1337
+ }
912
1338
  items.push({
913
1339
  title,
914
1340
  link: get("link") || get("guid"),
@@ -921,8 +1347,8 @@ function parseRss(xml) {
921
1347
  return items;
922
1348
  }
923
1349
  function extractTags5(item) {
924
- const text = [item.title, item.category, stripHtml(item.description)].join(" ");
925
- return normalize(tokenize5(text));
1350
+ const body = [item.category, stripHtml(item.description)].join(" ");
1351
+ return extractSkillTags(item.title, body);
926
1352
  }
927
1353
  var WWR_RSS_URL, wwr;
928
1354
  var init_wwr = __esm({
@@ -964,9 +1390,6 @@ var init_wwr = __esm({
964
1390
  });
965
1391
 
966
1392
  // ../../packages/core/src/feeds/hn.ts
967
- function tokenize6(text) {
968
- return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
969
- }
970
1393
  function stripHtml2(html) {
971
1394
  return decodeEntities(html.replace(/<p>/gi, " ").replace(/<[^>]*>/g, "")).replace(/\s+/g, " ").trim();
972
1395
  }
@@ -997,7 +1420,7 @@ function parseComment(item) {
997
1420
  return null;
998
1421
  }
999
1422
  const url = extractUrl(raw) || `https://news.ycombinator.com/item?id=${item.id}`;
1000
- const tags = extractTags6(raw);
1423
+ const tags = extractTags6(title, raw);
1001
1424
  if (tags.length === 0) return null;
1002
1425
  return {
1003
1426
  id: `hn:${item.id}`,
@@ -1014,8 +1437,8 @@ function parseComment(item) {
1014
1437
  raw: item
1015
1438
  };
1016
1439
  }
1017
- function extractTags6(text) {
1018
- return normalize(tokenize6(text));
1440
+ function extractTags6(title, text) {
1441
+ return extractSkillTags(title, text);
1019
1442
  }
1020
1443
  var ALGOLIA_SEARCH, ALGOLIA_ITEMS, hn;
1021
1444
  var init_hn = __esm({
@@ -1105,7 +1528,7 @@ function authHeaders() {
1105
1528
  if (token) h["Authorization"] = `Bearer ${token}`;
1106
1529
  return h;
1107
1530
  }
1108
- function tokenize7(text) {
1531
+ function tokenize2(text) {
1109
1532
  return text.toLowerCase().replace(/[^a-z0-9.\-+#]/g, " ").split(/\s+/).filter(Boolean);
1110
1533
  }
1111
1534
  function parseAmountUSD(text) {
@@ -1190,7 +1613,7 @@ async function fetchRepoBounties(repoFullName) {
1190
1613
  const body = issue.body ? decodeEntities(issue.body) : "";
1191
1614
  const amountUSD = parseAmountUSD(title) ?? parseAmountUSD(body) ?? await fetchCommentAmount(repoFullName, issue.number);
1192
1615
  const labels = labelNames(issue);
1193
- const tags = normalize(tokenize7([title, labels.join(" "), body.slice(0, 2e3)].join(" ")));
1616
+ const tags = normalize(tokenize2([title, labels.join(" "), body.slice(0, 2e3)].join(" ")));
1194
1617
  return {
1195
1618
  id: `bounty:${repoFullName}#${issue.number}`,
1196
1619
  source: "bounty",
@@ -1507,103 +1930,6 @@ var init_indexer = __esm({
1507
1930
  }
1508
1931
  });
1509
1932
 
1510
- // ../../packages/core/src/github.ts
1511
- function ghHeaders(token) {
1512
- const headers = {
1513
- Accept: "application/vnd.github+json",
1514
- "X-GitHub-Api-Version": "2022-11-28"
1515
- };
1516
- if (token) headers["Authorization"] = `Bearer ${token}`;
1517
- return headers;
1518
- }
1519
- async function ghFetch(path, token) {
1520
- const url = `https://api.github.com${path}`;
1521
- const res = await fetch(url, { headers: ghHeaders(token) });
1522
- if (!res.ok) {
1523
- throw new Error(`GitHub API ${path}: HTTP ${res.status} ${res.statusText}`);
1524
- }
1525
- return res.json();
1526
- }
1527
- async function fetchGitHubProfile(login, token) {
1528
- const user = await ghFetch(`/users/${login}`, token);
1529
- let repos = [];
1530
- try {
1531
- repos = await ghFetch(
1532
- `/users/${login}/repos?sort=pushed&per_page=100`,
1533
- token
1534
- );
1535
- } catch (err) {
1536
- console.warn(`[github] ${login}: repos fetch failed, continuing \u2014`, err);
1537
- }
1538
- const langCount = {};
1539
- for (const repo of repos) {
1540
- if (repo.fork) continue;
1541
- if (repo.language) {
1542
- langCount[repo.language.toLowerCase()] = (langCount[repo.language.toLowerCase()] ?? 0) + 1;
1543
- }
1544
- }
1545
- const topLanguages = Object.entries(langCount).sort(([, a], [, b]) => b - a).slice(0, 10).map(([lang]) => lang);
1546
- const topicSet = /* @__PURE__ */ new Set();
1547
- for (const repo of repos) {
1548
- if (repo.fork) continue;
1549
- for (const t of repo.topics ?? []) topicSet.add(t.toLowerCase());
1550
- }
1551
- const topics = Array.from(topicSet).slice(0, 30);
1552
- let recentPRorgs;
1553
- try {
1554
- const q = encodeURIComponent(
1555
- `type:pr is:merged author:${login} sort:updated`
1556
- );
1557
- const result = await ghFetch(
1558
- `/search/issues?q=${q}&per_page=30`,
1559
- token
1560
- );
1561
- const orgs = /* @__PURE__ */ new Set();
1562
- for (const item of result.items ?? []) {
1563
- const orgLogin = item.repository?.owner?.login;
1564
- if (orgLogin && orgLogin !== login) orgs.add(orgLogin);
1565
- }
1566
- if (orgs.size > 0) recentPRorgs = Array.from(orgs);
1567
- } catch {
1568
- }
1569
- return {
1570
- login: user.login,
1571
- name: user.name ?? void 0,
1572
- publicEmail: user.email ?? void 0,
1573
- avatarUrl: user.avatar_url,
1574
- accountCreatedAt: user.created_at,
1575
- publicRepos: user.public_repos,
1576
- followers: user.followers,
1577
- topLanguages,
1578
- topics,
1579
- recentPRorgs
1580
- };
1581
- }
1582
- function inferSeniority2(p) {
1583
- const ageMs = Date.now() - new Date(p.accountCreatedAt).getTime();
1584
- const ageYears = ageMs / (1e3 * 60 * 60 * 24 * 365.25);
1585
- if (ageYears >= 9 && (p.publicRepos >= 40 || p.followers >= 500)) return "staff";
1586
- if (ageYears >= 5 && (p.publicRepos >= 20 || p.followers >= 100)) return "senior";
1587
- if (ageYears >= 2 && p.publicRepos >= 5) return "mid";
1588
- return "junior";
1589
- }
1590
- function githubToFingerprint(p) {
1591
- const rawTokens = [
1592
- ...p.topLanguages,
1593
- ...p.topics
1594
- // recentPRorgs intentionally excluded — org names are not skill tags
1595
- ];
1596
- const skillTags = normalize(rawTokens);
1597
- const seniorityBand = inferSeniority2(p);
1598
- return { skillTags, seniorityBand };
1599
- }
1600
- var init_github = __esm({
1601
- "../../packages/core/src/github.ts"() {
1602
- "use strict";
1603
- init_vocabulary();
1604
- }
1605
- });
1606
-
1607
1933
  // ../../packages/core/src/index.ts
1608
1934
  var src_exports = {};
1609
1935
  __export(src_exports, {
@@ -1617,17 +1943,23 @@ __export(src_exports, {
1617
1943
  FEEDS: () => FEEDS,
1618
1944
  GRAPH: () => GRAPH,
1619
1945
  GREENHOUSE_SLUGS_BY_TIER: () => GREENHOUSE_SLUGS_BY_TIER,
1946
+ IDF_BACKGROUND: () => IDF_BACKGROUND,
1620
1947
  LEVER_SLUGS_BY_TIER: () => LEVER_SLUGS_BY_TIER,
1621
1948
  SYNONYMS: () => SYNONYMS,
1622
1949
  VOCABULARY: () => VOCABULARY,
1623
1950
  VOCAB_NODES: () => VOCAB_NODES,
1951
+ acceptanceCountForDomains: () => acceptanceCountForDomains,
1624
1952
  aggregate: () => aggregate,
1625
1953
  aggregateBounties: () => aggregateBounties,
1626
1954
  ashby: () => ashby,
1955
+ bestAcceptanceDomain: () => bestAcceptanceDomain,
1627
1956
  buildGraph: () => buildGraph,
1628
1957
  buildIndex: () => buildIndex,
1629
1958
  buildReason: () => buildReason,
1959
+ computeAcceptanceCredential: () => computeAcceptanceCredential,
1960
+ coreTagsFromTitle: () => coreTagsFromTitle,
1630
1961
  expandWeighted: () => expandWeighted,
1962
+ extractSkillTags: () => extractSkillTags,
1631
1963
  fetchGitHubProfile: () => fetchGitHubProfile,
1632
1964
  flattenTiers: () => flattenTiers,
1633
1965
  getBuyer: () => getBuyer,
@@ -1639,10 +1971,11 @@ __export(src_exports, {
1639
1971
  isBounty: () => isBounty,
1640
1972
  lever: () => lever,
1641
1973
  loadPartnerRoles: () => loadPartnerRoles,
1974
+ looksLikeEngRole: () => looksLikeEngRole,
1642
1975
  match: () => match,
1643
- matchOne: () => matchOne,
1644
1976
  normalize: () => normalize,
1645
1977
  passesMaturityGate: () => passesMaturityGate,
1978
+ tokenize: () => tokenize,
1646
1979
  validateGraph: () => validateGraph,
1647
1980
  wwr: () => wwr
1648
1981
  });