firecloud-devnet 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fc_mlops/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """MLOps extensions for FireCloud."""
2
+
3
+ __version__ = "0.1.0"
fc_mlops/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """fc_mlops __main__ — allows ``python -m fc_mlops.simulate_failure``."""
2
+
3
+ from fc_mlops.simulate_failure import main
4
+
5
+ main()
fc_mlops/anomaly.py ADDED
@@ -0,0 +1,112 @@
1
+ """IsolationForest-based anomaly scoring on telemetry readings."""
2
+
3
+ import json
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+
7
+ import numpy as np
8
+ from pydantic import BaseModel, ConfigDict
9
+ from sklearn.ensemble import IsolationForest
10
+
11
+ _LOG_PATH = Path.home() / ".fc_mlops" / "telemetry_log.jsonl"
12
+ _ALERTS_PATH = Path.home() / ".fc_mlops" / "alerts.jsonl"
13
+
14
+ # columns pulled from each telemetry reading
15
+ _FEATURES = [
16
+ "disk_io_read_mbps",
17
+ "chunk_upload_latency_ms",
18
+ "cpu_percent",
19
+ "memory_percent",
20
+ ]
21
+
22
+
23
+ class AnomalyReport(BaseModel):
24
+ model_config = ConfigDict(frozen=True)
25
+
26
+ timestamp: datetime
27
+ is_anomaly: bool
28
+ anomaly_score: float
29
+ flagged_metrics: list[str]
30
+ recommendation: str
31
+
32
+
33
+ def _append_alert(report: AnomalyReport) -> None:
34
+ _ALERTS_PATH.parent.mkdir(parents=True, exist_ok=True)
35
+ with open(_ALERTS_PATH, "a", encoding="utf-8") as f:
36
+ f.write(json.dumps(report.model_dump(), default=str) + "\n")
37
+
38
+
39
+ def _load_readings(log_path: Path | None = None, max_lines: int = 200) -> list[dict]:
40
+ path = log_path or _LOG_PATH
41
+ if not path.exists():
42
+ return []
43
+
44
+ lines = path.read_text(encoding="utf-8").strip().splitlines()
45
+ tail = lines[-max_lines:] if len(lines) > max_lines else lines
46
+
47
+ readings = []
48
+ for line in tail:
49
+ try:
50
+ readings.append(json.loads(line))
51
+ except json.JSONDecodeError:
52
+ continue
53
+ return readings
54
+
55
+
56
+ def check_anomaly(log_path: Path | None = None) -> AnomalyReport | dict:
57
+ """Run anomaly detection against the latest telemetry data.
58
+
59
+ Pass *log_path* to override the default telemetry log location
60
+ (used by the simulation script and tests).
61
+ """
62
+ readings = _load_readings(log_path)
63
+
64
+ if len(readings) < 50:
65
+ return {"status": "insufficient_data", "readings": len(readings)}
66
+
67
+ # build feature matrix
68
+ data = []
69
+ for r in readings:
70
+ row = [float(r.get(f, 0.0)) for f in _FEATURES]
71
+ data.append(row)
72
+ X = np.array(data)
73
+
74
+ clf = IsolationForest(contamination=0.05, random_state=42)
75
+ clf.fit(X)
76
+
77
+ latest = X[-1].reshape(1, -1)
78
+ prediction = clf.predict(latest)[0] # -1 = anomaly, 1 = normal
79
+ score = clf.decision_function(latest)[0]
80
+ is_anomaly = prediction == -1
81
+
82
+ # flag anything > 2 stddev from mean
83
+ means = X.mean(axis=0)
84
+ stds = X.std(axis=0)
85
+ flagged: list[str] = []
86
+ for i, feat in enumerate(_FEATURES):
87
+ if stds[i] > 0 and abs(X[-1, i] - means[i]) > 2 * stds[i]:
88
+ flagged.append(feat)
89
+
90
+ if not is_anomaly:
91
+ rec = "Node healthy"
92
+ elif "chunk_upload_latency_ms" in flagged:
93
+ rec = "High latency — check network"
94
+ elif "cpu_percent" in flagged:
95
+ rec = "CPU spike — check running processes"
96
+ elif "disk_io_read_mbps" in flagged:
97
+ rec = "Disk I/O degraded — check storage health"
98
+ else:
99
+ rec = "Anomalous reading — investigate node"
100
+
101
+ report = AnomalyReport(
102
+ timestamp=datetime.now(timezone.utc),
103
+ is_anomaly=is_anomaly,
104
+ anomaly_score=round(float(score), 4),
105
+ flagged_metrics=flagged,
106
+ recommendation=rec,
107
+ )
108
+
109
+ if is_anomaly:
110
+ _append_alert(report)
111
+
112
+ return report
@@ -0,0 +1,111 @@
1
+ """Version-tracked ML artifact storage backed by FireCloud's Node API."""
2
+
3
+ import json
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from typing import Literal
7
+
8
+ from pydantic import BaseModel, ConfigDict
9
+
10
+ _MANIFEST_PATH = Path.home() / ".fc_mlops" / "artifacts.json"
11
+
12
+
13
+ class ArtifactMetadata(BaseModel):
14
+ """Immutable metadata record for a stored ML artifact."""
15
+
16
+ model_config = ConfigDict(frozen=True)
17
+
18
+ name: str
19
+ version: str
20
+ artifact_type: Literal["model", "dataset", "checkpoint"]
21
+ saved_at: datetime
22
+ file_size_bytes: int
23
+ metrics: dict[str, float]
24
+ tags: list[str]
25
+ firecloud_file_id: str
26
+
27
+
28
+ def _load_manifest() -> list[dict]:
29
+ if not _MANIFEST_PATH.exists():
30
+ return []
31
+ try:
32
+ return json.loads(_MANIFEST_PATH.read_text(encoding="utf-8"))
33
+ except (json.JSONDecodeError, OSError):
34
+ return []
35
+
36
+
37
+ def _save_manifest(entries: list[dict]) -> None:
38
+ _MANIFEST_PATH.parent.mkdir(parents=True, exist_ok=True)
39
+ _MANIFEST_PATH.write_text(
40
+ json.dumps(entries, indent=2, default=str),
41
+ encoding="utf-8",
42
+ )
43
+
44
+
45
+ async def save_artifact(
46
+ node,
47
+ local_path: Path,
48
+ name: str,
49
+ version: str,
50
+ artifact_type: str,
51
+ metrics: dict[str, float] | None = None,
52
+ tags: list[str] | None = None,
53
+ ) -> ArtifactMetadata:
54
+ """Upload *local_path* to FireCloud and record metadata in the manifest.
55
+
56
+ Returns the :class:`ArtifactMetadata` for the saved artifact.
57
+ """
58
+ local_path = Path(local_path)
59
+ file_id = await node.upload(local_path)
60
+
61
+ metadata = ArtifactMetadata(
62
+ name=name,
63
+ version=version,
64
+ artifact_type=artifact_type,
65
+ saved_at=datetime.now(timezone.utc),
66
+ file_size_bytes=local_path.stat().st_size,
67
+ metrics=metrics or {},
68
+ tags=tags or [],
69
+ firecloud_file_id=file_id,
70
+ )
71
+
72
+ entries = _load_manifest()
73
+ entries.append(metadata.model_dump())
74
+ _save_manifest(entries)
75
+ return metadata
76
+
77
+
78
+ async def load_artifact(
79
+ node,
80
+ name: str,
81
+ version: str,
82
+ destination: Path,
83
+ ) -> Path:
84
+ """Download an artifact by name+version from the manifest."""
85
+ entries = _load_manifest()
86
+
87
+ match = None
88
+ for entry in entries:
89
+ if entry["name"] == name and entry["version"] == version:
90
+ match = entry
91
+ break
92
+
93
+ if match is None:
94
+ raise ValueError(
95
+ f"Artifact '{name}' version '{version}' not found in manifest"
96
+ )
97
+
98
+ destination = Path(destination)
99
+ await node.download(match["firecloud_file_id"], destination)
100
+ return destination.resolve()
101
+
102
+
103
+ def list_artifacts(artifact_type: str | None = None) -> list[ArtifactMetadata]:
104
+ """Return tracked artifacts, optionally filtered by *artifact_type*."""
105
+ entries = _load_manifest()
106
+ results = []
107
+ for entry in entries:
108
+ if artifact_type and entry.get("artifact_type") != artifact_type:
109
+ continue
110
+ results.append(ArtifactMetadata(**entry))
111
+ return results
fc_mlops/cli.py ADDED
@@ -0,0 +1,190 @@
1
+ """fc-ml CLI — artifact management, telemetry, and anomaly detection."""
2
+
3
+ from pathlib import Path
4
+
5
+ import click
6
+
7
+
8
+ @click.group()
9
+ def cli():
10
+ """fc-ml — MLOps extensions for FireCloud."""
11
+ pass
12
+
13
+
14
+ # --- Artifact commands ---
15
+
16
+ @cli.command()
17
+ @click.argument("path", type=click.Path(exists=True))
18
+ @click.option("--name", "-n", required=True, help="Artifact name.")
19
+ @click.option("--version", "-v", required=True, help="Artifact version.")
20
+ @click.option(
21
+ "--type", "artifact_type",
22
+ type=click.Choice(["model", "dataset", "checkpoint"]),
23
+ required=True,
24
+ help="Artifact type.",
25
+ )
26
+ @click.option(
27
+ "--metric", "-m",
28
+ multiple=True,
29
+ help="Metric in key=value format (repeatable).",
30
+ )
31
+ @click.option("--passphrase", prompt=True, hide_input=True, help="Network passphrase.")
32
+ @click.option("--port", default=7474, type=int)
33
+ @click.option("--storage", default=None, type=click.Path())
34
+ def save(
35
+ path: str,
36
+ name: str,
37
+ version: str,
38
+ artifact_type: str,
39
+ metric: tuple[str, ...],
40
+ passphrase: str,
41
+ port: int,
42
+ storage: str | None,
43
+ ):
44
+ """Save an artifact to the FireCloud network."""
45
+ import asyncio
46
+ from firecloud import Network, Node
47
+ from fc_mlops.artifact_store import save_artifact
48
+
49
+ metrics = {}
50
+ for m in metric:
51
+ if "=" in m:
52
+ k, v = m.split("=", 1)
53
+ metrics[k.strip()] = float(v.strip())
54
+
55
+ storage_path = Path(storage) if storage else Path.home() / ".firecloud" / "storage"
56
+
57
+ async def _run():
58
+ net = Network.load(Path.home() / ".firecloud" / "network.key", passphrase)
59
+ node = Node(network=net, storage_path=storage_path, port=port, enable_discovery=False)
60
+ await node.start()
61
+ try:
62
+ meta = await save_artifact(
63
+ node, Path(path), name, version, artifact_type, metrics, []
64
+ )
65
+ click.echo(click.style("✓ Artifact saved.", fg="green"))
66
+ click.echo(f" Name : {meta.name}")
67
+ click.echo(f" Version : {meta.version}")
68
+ click.echo(f" File ID : {meta.firecloud_file_id}")
69
+ click.echo(f" Size : {meta.file_size_bytes} bytes")
70
+ finally:
71
+ await node.stop()
72
+
73
+ asyncio.run(_run())
74
+
75
+
76
+ @cli.command()
77
+ @click.argument("name")
78
+ @click.option("--version", "-v", required=True, help="Artifact version.")
79
+ @click.option("--dest", "-d", required=True, type=click.Path(), help="Destination path.")
80
+ @click.option("--passphrase", prompt=True, hide_input=True, help="Network passphrase.")
81
+ @click.option("--port", default=7474, type=int)
82
+ @click.option("--storage", default=None, type=click.Path())
83
+ def load(
84
+ name: str,
85
+ version: str,
86
+ dest: str,
87
+ passphrase: str,
88
+ port: int,
89
+ storage: str | None,
90
+ ):
91
+ """Load an artifact from the FireCloud network."""
92
+ import asyncio
93
+ from firecloud import Network, Node
94
+ from fc_mlops.artifact_store import load_artifact
95
+
96
+ storage_path = Path(storage) if storage else Path.home() / ".firecloud" / "storage"
97
+
98
+ async def _run():
99
+ net = Network.load(Path.home() / ".firecloud" / "network.key", passphrase)
100
+ node = Node(network=net, storage_path=storage_path, port=port, enable_discovery=False)
101
+ await node.start()
102
+ try:
103
+ result_path = await load_artifact(node, name, version, Path(dest))
104
+ click.echo(click.style(f"✓ Artifact downloaded to {result_path}", fg="green"))
105
+ finally:
106
+ await node.stop()
107
+
108
+ asyncio.run(_run())
109
+
110
+
111
+ @cli.command("list")
112
+ @click.option(
113
+ "--type", "artifact_type",
114
+ type=click.Choice(["model", "dataset", "checkpoint"]),
115
+ default=None,
116
+ help="Filter by artifact type.",
117
+ )
118
+ def list_artifacts(artifact_type: str | None):
119
+ """List tracked ML artifacts."""
120
+ from fc_mlops.artifact_store import list_artifacts as _list
121
+
122
+ artifacts = _list(artifact_type)
123
+ if not artifacts:
124
+ click.echo("No artifacts found.")
125
+ return
126
+
127
+ click.echo(
128
+ click.style(
129
+ f"{'Name':<20} {'Version':<10} {'Type':<12} {'Size':<12} {'File ID':<20}",
130
+ bold=True,
131
+ )
132
+ )
133
+ click.echo("─" * 75)
134
+ for a in artifacts:
135
+ size = f"{a.file_size_bytes:,} B"
136
+ click.echo(
137
+ f"{a.name:<20} {a.version:<10} {a.artifact_type:<12} "
138
+ f"{size:<12} {a.firecloud_file_id[:20]}"
139
+ )
140
+
141
+
142
+ # --- Telemetry ---
143
+
144
+ @cli.group()
145
+ def telemetry():
146
+ """Telemetry server commands."""
147
+ pass
148
+
149
+ @telemetry.command("start")
150
+ def telemetry_start():
151
+ """Start the telemetry metrics server on localhost:7475."""
152
+ from fc_mlops.telemetry import start_server
153
+ click.echo(click.style("Starting telemetry server on http://127.0.0.1:7475", fg="cyan"))
154
+ start_server()
155
+
156
+
157
+ # --- Anomaly detection ---
158
+
159
+ @cli.group()
160
+ def anomaly():
161
+ """Anomaly detection commands."""
162
+ pass
163
+
164
+ @anomaly.command("check")
165
+ def anomaly_check():
166
+ """Run anomaly detection on recent telemetry data."""
167
+ from fc_mlops.anomaly import check_anomaly
168
+
169
+ result = check_anomaly()
170
+
171
+ if isinstance(result, dict):
172
+ click.echo(f"Insufficient data: {result.get('readings', 0)} readings (need ≥ 50)")
173
+ return
174
+
175
+ click.echo(click.style("Anomaly Detection Results", bold=True))
176
+ click.echo(f" Anomaly detected : {'Yes' if result.is_anomaly else 'No'}")
177
+ click.echo(f" Anomaly score : {result.anomaly_score}")
178
+ click.echo(f" Flagged metrics : {', '.join(result.flagged_metrics) or 'None'}")
179
+ click.echo(f" Recommendation : {result.recommendation}")
180
+
181
+
182
+ @cli.command("simulate-failure")
183
+ def simulate_failure():
184
+ """Run the failure simulation demo."""
185
+ from fc_mlops.simulate_failure import main
186
+ main()
187
+
188
+
189
+ if __name__ == "__main__":
190
+ cli()
@@ -0,0 +1,100 @@
1
+ """Standalone failure simulation demo.
2
+
3
+ Generates synthetic telemetry, injects anomalies, and runs detection.
4
+ Run via: python -m fc_mlops.simulate_failure
5
+ """
6
+
7
+ import json
8
+ import random
9
+ import tempfile
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+
13
+ from rich.console import Console
14
+ from rich.table import Table
15
+
16
+ from fc_mlops.anomaly import check_anomaly
17
+
18
+ console = Console()
19
+
20
+
21
+ def _write_reading(log_path: Path, reading: dict) -> None:
22
+ with open(log_path, "a", encoding="utf-8") as f:
23
+ f.write(json.dumps(reading, default=str) + "\n")
24
+
25
+
26
+ def _normal_reading() -> dict:
27
+ return {
28
+ "node_id": "sim-node",
29
+ "timestamp": datetime.now(timezone.utc).isoformat(),
30
+ "disk_io_read_mbps": round(random.uniform(50, 150), 2),
31
+ "disk_io_write_mbps": round(random.uniform(30, 100), 2),
32
+ "chunk_upload_latency_ms": round(random.uniform(20, 50), 2),
33
+ "active_connections": random.randint(1, 5),
34
+ "storage_used_percent": round(random.uniform(30, 60), 2),
35
+ "cpu_percent": round(random.uniform(10, 30), 2),
36
+ "memory_percent": round(random.uniform(40, 60), 2),
37
+ }
38
+
39
+
40
+ def _anomalous_reading() -> dict:
41
+ return {
42
+ "node_id": "sim-node",
43
+ "timestamp": datetime.now(timezone.utc).isoformat(),
44
+ "disk_io_read_mbps": round(random.uniform(5, 15), 2),
45
+ "disk_io_write_mbps": round(random.uniform(1, 5), 2),
46
+ "chunk_upload_latency_ms": round(random.uniform(400, 600), 2),
47
+ "active_connections": random.randint(0, 1),
48
+ "storage_used_percent": round(random.uniform(85, 98), 2),
49
+ "cpu_percent": round(random.uniform(85, 95), 2),
50
+ "memory_percent": round(random.uniform(80, 95), 2),
51
+ }
52
+
53
+
54
+ def main() -> None:
55
+ tmp_dir = Path(tempfile.mkdtemp(prefix="fc_mlops_sim_"))
56
+ log_path = tmp_dir / "telemetry_log.jsonl"
57
+
58
+ # baseline
59
+ console.print("[bold cyan][Phase 1][/bold cyan] Generating 60 baseline readings...")
60
+ for _ in range(60):
61
+ _write_reading(log_path, _normal_reading())
62
+
63
+ # inject failures
64
+ console.print("[bold yellow][Phase 2][/bold yellow] Injecting failure signatures...")
65
+ for _ in range(10):
66
+ _write_reading(log_path, _anomalous_reading())
67
+
68
+ # detect
69
+ console.print("[bold magenta][Phase 3][/bold magenta] Running anomaly detection...")
70
+ result = check_anomaly(log_path=log_path)
71
+
72
+ if isinstance(result, dict):
73
+ console.print(f"[red]Insufficient data: {result}[/red]")
74
+ console.print("[bold red]✗ FAIL: Not enough readings for detection[/bold red]")
75
+ return
76
+
77
+ table = Table(title="Anomaly Detection Results", show_header=True)
78
+ table.add_column("Metric", style="cyan", width=25)
79
+ table.add_column("Value", style="white", width=50)
80
+
81
+ table.add_row("Anomaly detected", "[red]Yes[/red]" if result.is_anomaly else "[green]No[/green]")
82
+ table.add_row("Anomaly score", str(round(result.anomaly_score, 4)))
83
+ table.add_row("Flagged metrics", ", ".join(result.flagged_metrics) if result.flagged_metrics else "None")
84
+ table.add_row("Recommendation", result.recommendation)
85
+
86
+ console.print()
87
+ console.print(table)
88
+ console.print()
89
+
90
+ if result.is_anomaly:
91
+ console.print("[bold green]✓ PASS: Anomaly correctly detected[/bold green]")
92
+ else:
93
+ console.print(
94
+ "[bold red]✗ FAIL: Anomaly not detected — "
95
+ "check contamination parameter[/bold red]"
96
+ )
97
+
98
+
99
+ if __name__ == "__main__":
100
+ main()
fc_mlops/telemetry.py ADDED
@@ -0,0 +1,72 @@
1
+ """FastAPI metrics endpoint with psutil system monitoring."""
2
+
3
+ import json
4
+ import time
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+
8
+ import psutil
9
+ from fastapi import FastAPI
10
+ from pydantic import BaseModel
11
+
12
+ _LOG_PATH = Path.home() / ".fc_mlops" / "telemetry_log.jsonl"
13
+
14
+ app = FastAPI(title="FireCloud Telemetry", version="0.1.0")
15
+
16
+
17
+ class NodeMetrics(BaseModel):
18
+ """Snapshot of system and node health metrics."""
19
+ node_id: str
20
+ timestamp: datetime
21
+ disk_io_read_mbps: float
22
+ disk_io_write_mbps: float
23
+ chunk_upload_latency_ms: float
24
+ active_connections: int
25
+ storage_used_percent: float
26
+ cpu_percent: float
27
+ memory_percent: float
28
+
29
+
30
+ def _collect_metrics() -> NodeMetrics:
31
+ # disk I/O — sample over a short window
32
+ disk1 = psutil.disk_io_counters()
33
+ if disk1:
34
+ time.sleep(0.1)
35
+ disk2 = psutil.disk_io_counters()
36
+ read_mbps = (disk2.read_bytes - disk1.read_bytes) / 0.1 / (1024 * 1024)
37
+ write_mbps = (disk2.write_bytes - disk1.write_bytes) / 0.1 / (1024 * 1024)
38
+ else:
39
+ read_mbps = write_mbps = 0.0
40
+
41
+ disk_usage = psutil.disk_usage("/")
42
+
43
+ return NodeMetrics(
44
+ node_id="local",
45
+ timestamp=datetime.now(timezone.utc),
46
+ disk_io_read_mbps=round(read_mbps, 2),
47
+ disk_io_write_mbps=round(write_mbps, 2),
48
+ chunk_upload_latency_ms=0.0,
49
+ active_connections=0,
50
+ storage_used_percent=round(disk_usage.percent, 2),
51
+ cpu_percent=round(psutil.cpu_percent(interval=None), 2),
52
+ memory_percent=round(psutil.virtual_memory().percent, 2),
53
+ )
54
+
55
+
56
+ @app.get("/metrics", response_model=NodeMetrics)
57
+ def get_metrics() -> NodeMetrics:
58
+ """Collect and return current system metrics."""
59
+ metrics = _collect_metrics()
60
+
61
+ # append to JSONL log
62
+ _LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
63
+ with open(_LOG_PATH, "a", encoding="utf-8") as fh:
64
+ fh.write(json.dumps(metrics.model_dump(), default=str) + "\n")
65
+
66
+ return metrics
67
+
68
+
69
+ def start_server() -> None:
70
+ """Start the telemetry server on localhost:7475."""
71
+ import uvicorn
72
+ uvicorn.run(app, host="127.0.0.1", port=7475)
fc_rag/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Private RAG pipeline for FireCloud — all processing runs locally."""
2
+
3
+ __version__ = "0.1.0"
fc_rag/cli.py ADDED
@@ -0,0 +1,51 @@
1
+ """fc-rag CLI — index files and query the local RAG pipeline."""
2
+
3
+ from pathlib import Path
4
+ import click
5
+
6
+
7
+ @click.group()
8
+ def cli():
9
+ """fc-rag — Private RAG pipeline for FireCloud docs."""
10
+ pass
11
+
12
+
13
+ @cli.command()
14
+ @click.argument("path", type=click.Path(exists=True))
15
+ def index(path: str):
16
+ """Index files at PATH into the local vector store."""
17
+ from fc_rag.indexer import index_path
18
+
19
+ target = Path(path)
20
+ total_chunks = index_path(target)
21
+
22
+ if target.is_file():
23
+ file_count = 1
24
+ else:
25
+ supported = {".txt", ".md", ".py", ".json"}
26
+ file_count = sum(
27
+ 1 for f in target.rglob("*")
28
+ if f.is_file() and f.suffix in supported
29
+ )
30
+
31
+ click.echo(f"Indexed {total_chunks} chunks from {file_count} files")
32
+
33
+
34
+ @cli.command()
35
+ @click.argument("question")
36
+ def query(question: str):
37
+ """Query the local RAG pipeline with a natural-language question."""
38
+ from fc_rag.query_engine import query as run_query
39
+ from fc_rag.retriever import retrieve
40
+
41
+ answer = run_query(question)
42
+ click.echo(answer)
43
+
44
+ results = retrieve(question)
45
+ if results:
46
+ sources = sorted(set(r.filename for r in results))
47
+ click.echo(f"\nSources: {', '.join(sources)}")
48
+
49
+
50
+ if __name__ == "__main__":
51
+ cli()
fc_rag/config.py ADDED
@@ -0,0 +1,24 @@
1
+ """Pydantic settings for the fc_rag pipeline."""
2
+
3
+ from functools import lru_cache
4
+ from pathlib import Path
5
+ from pydantic import BaseModel, ConfigDict
6
+
7
+
8
+ class Settings(BaseModel):
9
+ """All paths default to ~/.fc_rag/ so it works out of the box."""
10
+
11
+ model_config = ConfigDict(frozen=True)
12
+
13
+ ollama_model: str = "llama3.2:3b"
14
+ embedding_model: str = "BAAI/bge-small-en-v1.5"
15
+ qdrant_path: Path = Path.home() / ".fc_rag" / "vectors"
16
+ collection_name: str = "firecloud_docs"
17
+ top_k: int = 5
18
+ max_retries: int = 3
19
+ log_path: Path = Path.home() / ".fc_rag" / "query_log.jsonl"
20
+
21
+
22
+ @lru_cache(maxsize=1)
23
+ def get_settings() -> Settings:
24
+ return Settings()