pathscout 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pathscout/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Local-first role discovery radar."""
2
+
3
+ __all__ = ["__version__"]
4
+
5
+ __version__ = "0.3.0"
pathscout/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ from .cli import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main())
6
+
pathscout/artifacts.py ADDED
@@ -0,0 +1,505 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ import sqlite3
6
+ import textwrap
7
+ from datetime import date, datetime, timedelta, timezone
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from . import __version__
12
+
13
+
14
+ ARTIFACT_SCHEMA_VERSION = 1
15
+ PACKAGE_SCHEMA_VERSION = 1
16
+ TIERS = ["Act Now", "Hidden Search Hypothesis", "Watch Signal", "Filtered"]
17
+
18
+
19
+ def build_artifact(
20
+ conn: sqlite3.Connection,
21
+ config: dict[str, Any],
22
+ run_result: Any,
23
+ window_days: int,
24
+ dry_run: bool,
25
+ invocation: dict[str, Any],
26
+ ) -> dict[str, Any]:
27
+ if dry_run:
28
+ raw_findings = run_result.dry_run_findings
29
+ else:
30
+ raw_findings = rows_to_raw_findings(fetch_recent_rows(conn, window_days))
31
+
32
+ generated_at = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
33
+ findings = [normalize_finding(finding, config.get("suppressions", {})) for finding in raw_findings]
34
+ return {
35
+ "artifact_type": "run_artifact",
36
+ "artifact_id": build_artifact_id("run", generated_at),
37
+ "schema_version": ARTIFACT_SCHEMA_VERSION,
38
+ "pathscout_version": __version__,
39
+ "generated_at": generated_at,
40
+ "invocation": invocation,
41
+ "summary": {
42
+ "fetched": run_result.fetched_count,
43
+ "inserted": run_result.inserted_count,
44
+ "skipped": run_result.skipped_count,
45
+ "errors": len(run_result.errors),
46
+ "dry_run": dry_run,
47
+ },
48
+ "source_stats": [
49
+ {
50
+ "id": stat.source_id,
51
+ "name": stat.source_name,
52
+ "type": stat.source_type,
53
+ "fetched": stat.fetched_count,
54
+ "error": stat.error,
55
+ }
56
+ for stat in run_result.source_stats
57
+ ],
58
+ "errors": list(run_result.errors),
59
+ "findings": findings,
60
+ }
61
+
62
+
63
+ def fetch_recent_rows(conn: sqlite3.Connection, window_days: int) -> list[sqlite3.Row]:
64
+ since = (datetime.now(timezone.utc) - timedelta(days=window_days)).replace(microsecond=0).isoformat()
65
+ return conn.execute(
66
+ """
67
+ select * from observations
68
+ where observed_at >= ?
69
+ order by score desc, observed_at desc
70
+ """,
71
+ (since,),
72
+ ).fetchall()
73
+
74
+
75
+ def rows_to_raw_findings(rows: list[sqlite3.Row]) -> list[dict[str, Any]]:
76
+ findings = []
77
+ for row in rows:
78
+ findings.append(
79
+ {
80
+ "source_id": row["source_id"],
81
+ "source_name": row["source_name"],
82
+ "source_type": row["source_type"],
83
+ "company": row["company"],
84
+ "title": row["title"],
85
+ "url": row["url"],
86
+ "text": row["text"],
87
+ "evidence_type": row["evidence_type"],
88
+ "content_hash": row["content_hash"],
89
+ "observed_at": row["observed_at"],
90
+ "score": row["score"],
91
+ "tier": row["tier"],
92
+ "reasons": json.loads(row["reasons_json"]),
93
+ "flags": json.loads(row["flags_json"]),
94
+ }
95
+ )
96
+ return findings
97
+
98
+
99
+ def normalize_finding(raw: dict[str, Any], suppressions: dict[str, Any]) -> dict[str, Any]:
100
+ finding_id = raw["content_hash"]
101
+ suppression = find_suppression(finding_id, suppressions)
102
+ evidence_strength, evidence_warnings = classify_evidence(raw)
103
+ return {
104
+ "id": finding_id,
105
+ "company": raw.get("company", ""),
106
+ "title": raw.get("title", ""),
107
+ "url": raw.get("url", ""),
108
+ "tier": raw.get("tier", ""),
109
+ "score": raw.get("score", 0),
110
+ "reasons": list(raw.get("reasons", [])),
111
+ "flags": list(raw.get("flags", [])),
112
+ "source_id": raw.get("source_id", ""),
113
+ "source_name": raw.get("source_name", ""),
114
+ "source_type": raw.get("source_type", ""),
115
+ "evidence_type": raw.get("evidence_type", ""),
116
+ "evidence_strength": evidence_strength,
117
+ "evidence_warnings": evidence_warnings,
118
+ "observed_at": raw.get("observed_at", ""),
119
+ "content_hash": raw.get("content_hash", ""),
120
+ "suppressed": suppression is not None,
121
+ "suppression": suppression,
122
+ "text": raw.get("text", ""),
123
+ }
124
+
125
+
126
+ def classify_evidence(raw: dict[str, Any]) -> tuple[str, list[str]]:
127
+ evidence_type = str(raw.get("evidence_type", "")).lower()
128
+ source_type = str(raw.get("source_type", "")).lower()
129
+ title = str(raw.get("title", "")).lower()
130
+ text = str(raw.get("text", "")).lower()
131
+ warnings: list[str] = []
132
+
133
+ if evidence_type in {"job", "job_posting", "role", "recruiter", "search_firm"}:
134
+ strength = "strong"
135
+ elif evidence_type in {"hidden_search", "portfolio", "radar_portfolio"}:
136
+ strength = "medium"
137
+ else:
138
+ strength = "weak"
139
+
140
+ if evidence_type == "job_page" or (source_type == "watchlist_careers" and ("career" in title or "job" in title)):
141
+ strength = "weak"
142
+ warnings.append("page_level_fallback")
143
+ if evidence_type in {"manual", "web_page", "rss"} or (source_type in {"web_page", "rss"} and evidence_type in {"web_page", "rss"}):
144
+ warnings.append("generic_source_evidence")
145
+ if evidence_type in {"job", "job_page", "job_posting", "role"} and not re.search(r"\b(20\d{2}|posted|updated|opened)\b", f"{title} {text}"):
146
+ warnings.append("missing_posted_date")
147
+
148
+ return strength, warnings
149
+
150
+
151
+ def find_suppression(finding_id: str, suppressions: dict[str, Any]) -> dict[str, Any] | None:
152
+ today = date.today().isoformat()
153
+ for suppression in suppressions.get("suppressions", []):
154
+ if suppression.get("id") != finding_id:
155
+ continue
156
+ expires_at = suppression.get("expires_at")
157
+ if expires_at and expires_at < today:
158
+ continue
159
+ return suppression
160
+ return None
161
+
162
+
163
+ def build_artifact_id(prefix: str, generated_at: str) -> str:
164
+ compact = generated_at.replace("+00:00", "Z")
165
+ compact = compact.replace("-", "").replace(":", "")
166
+ compact = compact.replace(".", "")
167
+ return f"{prefix}_{compact}"
168
+
169
+
170
+ def write_json_artifact(artifact: dict[str, Any], path: Path) -> Path:
171
+ path.parent.mkdir(parents=True, exist_ok=True)
172
+ path.write_text(json.dumps(artifact, indent=2) + "\n", encoding="utf-8")
173
+ return path
174
+
175
+
176
+ def write_markdown_artifact(artifact: dict[str, Any], path: Path) -> Path:
177
+ path.parent.mkdir(parents=True, exist_ok=True)
178
+ path.write_text(render_markdown(artifact), encoding="utf-8")
179
+ return path
180
+
181
+
182
+ def write_package_from_artifact(artifact: dict[str, Any], finding_id: str, out_dir: Path) -> Path:
183
+ finding = find_finding(artifact, finding_id)
184
+ package = build_opportunity_package(artifact, finding)
185
+ package_dir = out_dir / package["slug"]
186
+ data_dir = package_dir / "data"
187
+ data_dir.mkdir(parents=True, exist_ok=True)
188
+
189
+ files = {
190
+ "manifest.json": package["manifest"],
191
+ "data/opportunity.json": package["opportunity"],
192
+ "data/evidence.json": package["evidence"],
193
+ "data/findings.json": package["findings"],
194
+ }
195
+ for relative_path, data in files.items():
196
+ target = package_dir / relative_path
197
+ target.parent.mkdir(parents=True, exist_ok=True)
198
+ target.write_text(json.dumps(data, indent=2) + "\n", encoding="utf-8")
199
+ (package_dir / "package.md").write_text(render_package_markdown(package), encoding="utf-8")
200
+ (package_dir / "agent.md").write_text(render_agent_markdown(package), encoding="utf-8")
201
+ return package_dir
202
+
203
+
204
+ def find_finding(artifact: dict[str, Any], finding_id: str) -> dict[str, Any]:
205
+ findings = artifact.get("findings", [])
206
+ exact = [finding for finding in findings if finding.get("id") == finding_id or finding.get("content_hash") == finding_id]
207
+ if exact:
208
+ return exact[0]
209
+ if len(finding_id) >= 8:
210
+ prefix_matches = [finding for finding in findings if str(finding.get("id", "")).startswith(finding_id)]
211
+ if len(prefix_matches) == 1:
212
+ return prefix_matches[0]
213
+ if len(prefix_matches) > 1:
214
+ raise ValueError(f"Finding ID prefix is ambiguous: {finding_id}")
215
+ raise ValueError(f"Finding not found: {finding_id}")
216
+
217
+
218
+ def build_opportunity_package(artifact: dict[str, Any], finding: dict[str, Any]) -> dict[str, Any]:
219
+ generated_at = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
220
+ finding_id = finding.get("id", "")
221
+ company = finding.get("company") or "unknown-company"
222
+ slug = f"{slugify(company)}-{finding_id[:12] if finding_id else 'finding'}"
223
+ package_id = f"pkg_{finding_id[:16] if finding_id else slug}"
224
+ source_run_id = artifact.get("artifact_id", "")
225
+ evidence = build_evidence_document(finding, artifact, package_id, source_run_id, generated_at)
226
+ opportunity = build_opportunity_document(finding, package_id, source_run_id, generated_at)
227
+ findings = {
228
+ "schema_version": PACKAGE_SCHEMA_VERSION,
229
+ "package_id": package_id,
230
+ "source_run_artifact_id": source_run_id,
231
+ "findings": [finding],
232
+ }
233
+ manifest = build_package_manifest(package_id, source_run_id, generated_at)
234
+ return {
235
+ "slug": slug,
236
+ "manifest": manifest,
237
+ "opportunity": opportunity,
238
+ "evidence": evidence,
239
+ "findings": findings,
240
+ }
241
+
242
+
243
+ def build_package_manifest(package_id: str, source_run_id: str, generated_at: str) -> dict[str, Any]:
244
+ resources = [
245
+ {"path": "package.md", "media_type": "text/markdown", "role": "human_readable"},
246
+ {"path": "agent.md", "media_type": "text/markdown", "role": "agent_context"},
247
+ {"path": "data/opportunity.json", "media_type": "application/json", "role": "canonical_opportunity", "schema_version": PACKAGE_SCHEMA_VERSION},
248
+ {"path": "data/evidence.json", "media_type": "application/json", "role": "evidence", "schema_version": PACKAGE_SCHEMA_VERSION},
249
+ {"path": "data/findings.json", "media_type": "application/json", "role": "source_findings", "schema_version": PACKAGE_SCHEMA_VERSION},
250
+ ]
251
+ return {
252
+ "artifact_type": "opportunity_package",
253
+ "package_type": "opportunity_brief",
254
+ "schema_version": PACKAGE_SCHEMA_VERSION,
255
+ "package_id": package_id,
256
+ "source_run_artifact_id": source_run_id,
257
+ "pathscout_version": __version__,
258
+ "generated_at": generated_at,
259
+ "generator": "pathscout package",
260
+ "resources": resources,
261
+ }
262
+
263
+
264
+ def build_opportunity_document(finding: dict[str, Any], package_id: str, source_run_id: str, generated_at: str) -> dict[str, Any]:
265
+ return {
266
+ "schema_version": PACKAGE_SCHEMA_VERSION,
267
+ "artifact_type": "opportunity",
268
+ "package_type": "opportunity_brief",
269
+ "package_id": package_id,
270
+ "source_run_artifact_id": source_run_id,
271
+ "generated_at": generated_at,
272
+ "status": "skeletal_oss_export",
273
+ "finding_id": finding.get("id", ""),
274
+ "company": finding.get("company", ""),
275
+ "title": finding.get("title", ""),
276
+ "url": finding.get("url", ""),
277
+ "tier": finding.get("tier", ""),
278
+ "score": finding.get("score", 0),
279
+ "source": {
280
+ "id": finding.get("source_id", ""),
281
+ "name": finding.get("source_name", ""),
282
+ "type": finding.get("source_type", ""),
283
+ "evidence_type": finding.get("evidence_type", ""),
284
+ },
285
+ "suppressed": finding.get("suppressed", False),
286
+ "suppression": finding.get("suppression"),
287
+ "evidence_strength": finding.get("evidence_strength", "weak"),
288
+ "evidence_warnings": list(finding.get("evidence_warnings", [])),
289
+ "placeholders": [
290
+ "company_moment",
291
+ "problem_hypotheses",
292
+ "fit_notes",
293
+ "questions_to_verify",
294
+ ],
295
+ }
296
+
297
+
298
+ def build_evidence_document(
299
+ finding: dict[str, Any],
300
+ artifact: dict[str, Any],
301
+ package_id: str,
302
+ source_run_id: str,
303
+ generated_at: str,
304
+ ) -> dict[str, Any]:
305
+ gaps = list(finding.get("evidence_warnings", []))
306
+ if not finding.get("url"):
307
+ gaps.append("missing_source_url")
308
+ if not finding.get("text"):
309
+ gaps.append("missing_evidence_text")
310
+ return {
311
+ "schema_version": PACKAGE_SCHEMA_VERSION,
312
+ "artifact_type": "evidence",
313
+ "package_id": package_id,
314
+ "source_run_artifact_id": source_run_id,
315
+ "generated_at": generated_at,
316
+ "summary": {
317
+ "evidence_strength": finding.get("evidence_strength", "weak"),
318
+ "evidence_warnings": list(finding.get("evidence_warnings", [])),
319
+ "evidence_gaps": gaps,
320
+ },
321
+ "source_stats": artifact.get("source_stats", []),
322
+ "errors": artifact.get("errors", []),
323
+ "sources": [
324
+ {
325
+ "source_id": finding.get("source_id", ""),
326
+ "source_name": finding.get("source_name", ""),
327
+ "source_type": finding.get("source_type", ""),
328
+ "evidence_type": finding.get("evidence_type", ""),
329
+ "url": finding.get("url", ""),
330
+ "observed_at": finding.get("observed_at", ""),
331
+ "content_hash": finding.get("content_hash", finding.get("id", "")),
332
+ }
333
+ ],
334
+ "reasons": list(finding.get("reasons", [])),
335
+ "flags": list(finding.get("flags", [])),
336
+ "snippet": textwrap.shorten(" ".join(str(finding.get("text", "")).split()), width=600, placeholder="..."),
337
+ }
338
+
339
+
340
+ def slugify(value: str) -> str:
341
+ slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
342
+ return slug or "opportunity"
343
+
344
+
345
+ def render_package_markdown(package: dict[str, Any]) -> str:
346
+ opportunity = package["opportunity"]
347
+ evidence = package["evidence"]
348
+ title = opportunity.get("title") or "Untitled signal"
349
+ company = opportunity.get("company") or "Unknown company"
350
+ lines = [
351
+ f"# {company}: Opportunity Brief",
352
+ "",
353
+ "This is a skeletal PathScout OSS package generated from local findings. It is evidence context, not career advice.",
354
+ "",
355
+ "## Finding",
356
+ "",
357
+ f"- Title: {title}",
358
+ f"- Company: {company}",
359
+ f"- Tier: {opportunity.get('tier', '')}",
360
+ f"- Score: {opportunity.get('score', 0)}",
361
+ f"- Evidence strength: {opportunity.get('evidence_strength', 'weak')}",
362
+ ]
363
+ if opportunity.get("url"):
364
+ lines.append(f"- Source: {opportunity['url']}")
365
+ if opportunity.get("suppressed"):
366
+ reason = (opportunity.get("suppression") or {}).get("reason", "No reason provided")
367
+ lines.append(f"- Suppressed: true ({reason})")
368
+ warnings = opportunity.get("evidence_warnings", [])
369
+ if warnings:
370
+ lines.extend(["", "## Evidence Warnings", ""])
371
+ lines.extend(f"- {warning}" for warning in warnings)
372
+ lines.extend(["", "## Why It Surfaced", ""])
373
+ reasons = evidence.get("reasons", [])
374
+ lines.extend(f"- {reason}" for reason in reasons) if reasons else lines.append("- No scoring reasons were captured.")
375
+ flags = evidence.get("flags", [])
376
+ if flags:
377
+ lines.extend(["", "## Flags", ""])
378
+ lines.extend(f"- {flag}" for flag in flags)
379
+ gaps = evidence.get("summary", {}).get("evidence_gaps", [])
380
+ lines.extend(["", "## Evidence To Verify", ""])
381
+ lines.extend(f"- {gap}" for gap in gaps) if gaps else lines.append("- No evidence gaps were detected by the OSS exporter.")
382
+ if evidence.get("snippet"):
383
+ lines.extend(["", "## Evidence Snippet", "", evidence["snippet"]])
384
+ lines.extend(
385
+ [
386
+ "",
387
+ "## Package Status",
388
+ "",
389
+ "This package intentionally stays at the evidence-brief layer and does not include advanced intelligence or outreach copy.",
390
+ ]
391
+ )
392
+ return "\n".join(lines) + "\n"
393
+
394
+
395
+ def render_agent_markdown(package: dict[str, Any]) -> str:
396
+ opportunity = package["opportunity"]
397
+ evidence = package["evidence"]
398
+ lines = [
399
+ "# Agent Context",
400
+ "",
401
+ "Use this package as structured evidence from PathScout. Do not treat it as a final career recommendation.",
402
+ "",
403
+ "## Safe Use Rules",
404
+ "",
405
+ "- Preserve source URLs, finding IDs, and evidence gaps when producing downstream work.",
406
+ "- Distinguish observed evidence from inference.",
407
+ "- Do not invent a recommended role, job description, compensation, or outreach unless the user explicitly asks for that work.",
408
+ "- If evidence is weak or incomplete, say so plainly.",
409
+ "",
410
+ "## Canonical Data",
411
+ "",
412
+ "- Read `data/opportunity.json` for the canonical opportunity object.",
413
+ "- Read `data/evidence.json` for source details, reasons, flags, and evidence gaps.",
414
+ "- Read `data/findings.json` for copied source findings from the run artifact.",
415
+ "",
416
+ "## Opportunity Snapshot",
417
+ "",
418
+ f"- Finding ID: {opportunity.get('finding_id', '')}",
419
+ f"- Company: {opportunity.get('company', '')}",
420
+ f"- Title: {opportunity.get('title', '')}",
421
+ f"- Tier: {opportunity.get('tier', '')}",
422
+ f"- Score: {opportunity.get('score', 0)}",
423
+ f"- Evidence strength: {opportunity.get('evidence_strength', 'weak')}",
424
+ ]
425
+ gaps = evidence.get("summary", {}).get("evidence_gaps", [])
426
+ if gaps:
427
+ lines.extend(["", "## Evidence Gaps", ""])
428
+ lines.extend(f"- {gap}" for gap in gaps)
429
+ return "\n".join(lines) + "\n"
430
+
431
+
432
+ def render_markdown(artifact: dict[str, Any]) -> str:
433
+ lines = [
434
+ "# PathScout Executive Opportunity Digest",
435
+ "",
436
+ f"Generated: {artifact['generated_at']}",
437
+ f"Window: last {artifact['invocation'].get('digest_window_days', 7)} day(s)",
438
+ "",
439
+ "## Run Summary",
440
+ "",
441
+ f"- Fetched: {artifact['summary']['fetched']}",
442
+ f"- Inserted: {artifact['summary']['inserted']}",
443
+ f"- Dedupe skipped: {artifact['summary']['skipped']}",
444
+ f"- Errors: {artifact['summary']['errors']}",
445
+ ]
446
+ if artifact["summary"].get("dry_run"):
447
+ lines.append("- Dry run: true")
448
+ lines.extend(["", "## Source Summary", ""])
449
+ for stat in artifact.get("source_stats", []):
450
+ suffix = f" | error: {stat['error']}" if stat.get("error") else ""
451
+ lines.append(f"- {stat['name']} (`{stat['type']}`): {stat['fetched']}{suffix}")
452
+ lines.append("")
453
+
454
+ if artifact.get("errors"):
455
+ lines.extend(["## Source Errors", ""])
456
+ lines.extend(f"- {error}" for error in artifact["errors"])
457
+ lines.append("")
458
+
459
+ findings = artifact.get("findings", [])
460
+ for tier in TIERS:
461
+ tier_findings = [finding for finding in findings if finding["tier"] == tier and not finding["suppressed"]]
462
+ if tier == "Filtered" and not tier_findings:
463
+ continue
464
+ lines.extend([f"## {tier}", ""])
465
+ if not tier_findings:
466
+ lines.extend(["_No new items._", ""])
467
+ continue
468
+ for finding in tier_findings[:20]:
469
+ lines.extend(format_finding(finding))
470
+ lines.append("")
471
+
472
+ suppressed = [finding for finding in findings if finding["suppressed"]]
473
+ if suppressed:
474
+ lines.extend(["## Suppressed", ""])
475
+ for finding in suppressed[:20]:
476
+ reason = (finding.get("suppression") or {}).get("reason", "No reason provided")
477
+ lines.append(f"- {finding['title'] or 'Untitled signal'} - {finding['company']} ({reason})")
478
+ lines.append("")
479
+
480
+ return "\n".join(lines)
481
+
482
+
483
+ def format_finding(finding: dict[str, Any]) -> list[str]:
484
+ title = finding["title"] or "Untitled signal"
485
+ company = f" - {finding['company']}" if finding.get("company") else ""
486
+ url = f" ([source]({finding['url']}))" if finding.get("url") else ""
487
+ lines = [
488
+ f"### {title}{company}{url}",
489
+ "",
490
+ f"Score: {finding['score']} | Source: {finding['source_name']} | Evidence: {finding['evidence_type']} | Strength: {finding.get('evidence_strength', 'medium')}",
491
+ "",
492
+ "Why it surfaced:",
493
+ ]
494
+ for reason in finding.get("reasons", [])[:6]:
495
+ lines.append(f"- {reason}")
496
+ if finding.get("flags"):
497
+ lines.extend(["", "Flags:"])
498
+ lines.extend(f"- {flag}" for flag in finding["flags"][:5])
499
+ if finding.get("evidence_warnings"):
500
+ lines.extend(["", "Evidence warnings:"])
501
+ lines.extend(f"- {warning}" for warning in finding["evidence_warnings"][:5])
502
+ snippet = textwrap.shorten(" ".join(finding.get("text", "").split()), width=420, placeholder="...")
503
+ if snippet:
504
+ lines.extend(["", f"Evidence snippet: {snippet}"])
505
+ return lines