feedloop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- feedloop/__init__.py +155 -0
- feedloop/__main__.py +44 -0
- feedloop/_config.py +6 -0
- feedloop/_db.py +172 -0
- feedloop/_export.py +30 -0
- feedloop/_models.py +30 -0
- feedloop/_routes.py +99 -0
- feedloop/_server.py +97 -0
- feedloop/py.typed +0 -0
- feedloop-0.1.0.dist-info/METADATA +41 -0
- feedloop-0.1.0.dist-info/RECORD +14 -0
- feedloop-0.1.0.dist-info/WHEEL +4 -0
- feedloop-0.1.0.dist-info/entry_points.txt +2 -0
- feedloop-0.1.0.dist-info/licenses/LICENSE +21 -0
feedloop/__init__.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""feedloop — The fastest way to collect human preference data for LLMs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import atexit
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
import webbrowser
|
|
9
|
+
|
|
10
|
+
from ._config import DEFAULT_PORT
|
|
11
|
+
from ._db import Database
|
|
12
|
+
from ._export import export_dpo
|
|
13
|
+
from ._server import BackgroundServer
|
|
14
|
+
|
|
15
|
+
__version__ = "0.1.0"
|
|
16
|
+
|
|
17
|
+
# ── module-level state ──────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
_server: BackgroundServer | None = None
|
|
20
|
+
_db: Database | None = None
|
|
21
|
+
_session_id: str | None = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _ensure_started() -> tuple[BackgroundServer, Database, str]:
|
|
25
|
+
if _server is None or _db is None or _session_id is None:
|
|
26
|
+
raise RuntimeError("Call feedloop.start() first")
|
|
27
|
+
return _server, _db, _session_id
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ── public API ──────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def start(
|
|
34
|
+
port: int = DEFAULT_PORT,
|
|
35
|
+
db_path: str | None = None,
|
|
36
|
+
open_browser: bool = True,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Launch the HITL feedback server in a background thread.
|
|
39
|
+
|
|
40
|
+
Idempotent — calling start() twice reuses the running server.
|
|
41
|
+
"""
|
|
42
|
+
global _server, _db, _session_id
|
|
43
|
+
|
|
44
|
+
if _server is not None:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
_session_id = uuid.uuid4().hex
|
|
48
|
+
_db = Database(db_path=db_path)
|
|
49
|
+
_server = BackgroundServer(_db, port=port)
|
|
50
|
+
_server.start()
|
|
51
|
+
|
|
52
|
+
print(f"feedloop: server running at {_server.url}")
|
|
53
|
+
|
|
54
|
+
if open_browser:
|
|
55
|
+
webbrowser.open(_server.url)
|
|
56
|
+
|
|
57
|
+
atexit.register(stop)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def compare(
|
|
61
|
+
prompt: str,
|
|
62
|
+
outputs: list[str],
|
|
63
|
+
*,
|
|
64
|
+
metadata: dict | None = None,
|
|
65
|
+
) -> str:
|
|
66
|
+
"""Submit a comparison for human review. Returns comparison_id.
|
|
67
|
+
|
|
68
|
+
Non-blocking — returns immediately. The comparison appears
|
|
69
|
+
in the web UI for the human to evaluate.
|
|
70
|
+
"""
|
|
71
|
+
_, db, session_id = _ensure_started()
|
|
72
|
+
|
|
73
|
+
if len(outputs) != 2:
|
|
74
|
+
raise ValueError("outputs must contain exactly 2 items")
|
|
75
|
+
|
|
76
|
+
return db.insert_comparison(
|
|
77
|
+
session_id=session_id,
|
|
78
|
+
prompt=prompt,
|
|
79
|
+
output_a=outputs[0],
|
|
80
|
+
output_b=outputs[1],
|
|
81
|
+
metadata=metadata,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def wait(
|
|
86
|
+
comparison_id: str | None = None,
|
|
87
|
+
timeout: float | None = None,
|
|
88
|
+
) -> dict | None:
|
|
89
|
+
"""Block until a specific comparison (or all pending) is rated.
|
|
90
|
+
|
|
91
|
+
Returns the result dict or None on timeout.
|
|
92
|
+
"""
|
|
93
|
+
_, db, session_id = _ensure_started()
|
|
94
|
+
deadline = time.monotonic() + timeout if timeout else None
|
|
95
|
+
|
|
96
|
+
while True:
|
|
97
|
+
if comparison_id:
|
|
98
|
+
row = db.get_comparison(comparison_id)
|
|
99
|
+
if row and row["status"] != "pending":
|
|
100
|
+
if row["chosen"] == "a":
|
|
101
|
+
chosen, rejected = row["output_a"], row["output_b"]
|
|
102
|
+
else:
|
|
103
|
+
chosen, rejected = row["output_b"], row["output_a"]
|
|
104
|
+
return {
|
|
105
|
+
"prompt": row["prompt"],
|
|
106
|
+
"chosen": chosen,
|
|
107
|
+
"rejected": rejected,
|
|
108
|
+
}
|
|
109
|
+
else:
|
|
110
|
+
stats = db.get_stats(session_id=session_id)
|
|
111
|
+
if stats["pending"] == 0 and stats["total"] > 0:
|
|
112
|
+
return {"completed": stats["completed"], "total": stats["total"]}
|
|
113
|
+
|
|
114
|
+
if deadline and time.monotonic() >= deadline:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
time.sleep(0.3)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def status() -> dict:
|
|
121
|
+
"""Return {"pending": N, "completed": M, "skipped": S, "total": T}."""
|
|
122
|
+
_, db, session_id = _ensure_started()
|
|
123
|
+
return db.get_stats(session_id=session_id)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def export(
|
|
127
|
+
path: str = "preferences.jsonl",
|
|
128
|
+
*,
|
|
129
|
+
session_id: str | None = None,
|
|
130
|
+
format: str = "dpo",
|
|
131
|
+
) -> int:
|
|
132
|
+
"""Export completed comparisons to JSONL. Returns count exported."""
|
|
133
|
+
_, db, current_session = _ensure_started()
|
|
134
|
+
|
|
135
|
+
if format != "dpo":
|
|
136
|
+
raise ValueError(f"Unsupported format: {format!r}. Only 'dpo' is supported.")
|
|
137
|
+
|
|
138
|
+
sid = session_id if session_id is not None else current_session
|
|
139
|
+
count = export_dpo(db, path, session_id=sid)
|
|
140
|
+
print(f"feedloop: exported {count} preferences to {path}")
|
|
141
|
+
return count
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def stop() -> None:
|
|
145
|
+
"""Shut down the background server."""
|
|
146
|
+
global _server, _db, _session_id
|
|
147
|
+
|
|
148
|
+
if _server:
|
|
149
|
+
_server.stop()
|
|
150
|
+
if _db:
|
|
151
|
+
_db.close()
|
|
152
|
+
|
|
153
|
+
_server = None
|
|
154
|
+
_db = None
|
|
155
|
+
_session_id = None
|
feedloop/__main__.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""CLI entry point: python -m feedloop"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import signal
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from . import start, stop
|
|
10
|
+
from ._config import DEFAULT_PORT
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main() -> None:
|
|
14
|
+
parser = argparse.ArgumentParser(
|
|
15
|
+
prog="feedloop",
|
|
16
|
+
description="The fastest way to collect human preference data for LLMs",
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"--port", type=int, default=DEFAULT_PORT, help="Port to listen on"
|
|
20
|
+
)
|
|
21
|
+
parser.add_argument("--db", type=str, default=None, help="Path to SQLite database")
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"--no-browser", action="store_true", help="Don't open browser automatically"
|
|
24
|
+
)
|
|
25
|
+
args = parser.parse_args()
|
|
26
|
+
|
|
27
|
+
start(port=args.port, db_path=args.db, open_browser=not args.no_browser)
|
|
28
|
+
|
|
29
|
+
print("feedloop: Press Ctrl+C to stop")
|
|
30
|
+
|
|
31
|
+
def _handle_signal(sig: int, frame: object) -> None:
|
|
32
|
+
print("\nfeedloop: shutting down")
|
|
33
|
+
stop()
|
|
34
|
+
sys.exit(0)
|
|
35
|
+
|
|
36
|
+
signal.signal(signal.SIGINT, _handle_signal)
|
|
37
|
+
signal.signal(signal.SIGTERM, _handle_signal)
|
|
38
|
+
|
|
39
|
+
# Block forever (server runs in daemon thread)
|
|
40
|
+
signal.pause()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
if __name__ == "__main__":
|
|
44
|
+
main()
|
feedloop/_config.py
ADDED
feedloop/_db.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import random
|
|
5
|
+
import sqlite3
|
|
6
|
+
import threading
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ._config import DEFAULT_DB_DIR, DEFAULT_DB_PATH
|
|
12
|
+
|
|
13
|
+
_SCHEMA = """\
|
|
14
|
+
CREATE TABLE IF NOT EXISTS comparisons (
|
|
15
|
+
id TEXT PRIMARY KEY,
|
|
16
|
+
session_id TEXT NOT NULL,
|
|
17
|
+
prompt TEXT NOT NULL,
|
|
18
|
+
output_a TEXT NOT NULL,
|
|
19
|
+
output_b TEXT NOT NULL,
|
|
20
|
+
display_order TEXT NOT NULL,
|
|
21
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
22
|
+
chosen TEXT,
|
|
23
|
+
created_at TEXT NOT NULL,
|
|
24
|
+
completed_at TEXT,
|
|
25
|
+
metadata TEXT
|
|
26
|
+
);
|
|
27
|
+
CREATE INDEX IF NOT EXISTS idx_comparisons_status ON comparisons(status);
|
|
28
|
+
CREATE INDEX IF NOT EXISTS idx_comparisons_session ON comparisons(session_id);
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Database:
|
|
33
|
+
def __init__(self, db_path: str | Path | None = None) -> None:
|
|
34
|
+
self._path = Path(db_path) if db_path else DEFAULT_DB_PATH
|
|
35
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
self._lock = threading.Lock()
|
|
37
|
+
self._conn = sqlite3.connect(str(self._path), check_same_thread=False)
|
|
38
|
+
self._conn.row_factory = sqlite3.Row
|
|
39
|
+
self._conn.execute("PRAGMA journal_mode=WAL")
|
|
40
|
+
self._conn.executescript(_SCHEMA)
|
|
41
|
+
|
|
42
|
+
# ── writes ──────────────────────────────────────────────
|
|
43
|
+
|
|
44
|
+
def insert_comparison(
|
|
45
|
+
self,
|
|
46
|
+
session_id: str,
|
|
47
|
+
prompt: str,
|
|
48
|
+
output_a: str,
|
|
49
|
+
output_b: str,
|
|
50
|
+
metadata: dict | None = None,
|
|
51
|
+
) -> str:
|
|
52
|
+
cid = uuid.uuid4().hex
|
|
53
|
+
display_order = random.choice(["ab", "ba"])
|
|
54
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
55
|
+
with self._lock:
|
|
56
|
+
self._conn.execute(
|
|
57
|
+
"""INSERT INTO comparisons
|
|
58
|
+
(id, session_id, prompt, output_a, output_b,
|
|
59
|
+
display_order, status, created_at, metadata)
|
|
60
|
+
VALUES (?, ?, ?, ?, ?, ?, 'pending', ?, ?)""",
|
|
61
|
+
(
|
|
62
|
+
cid,
|
|
63
|
+
session_id,
|
|
64
|
+
prompt,
|
|
65
|
+
output_a,
|
|
66
|
+
output_b,
|
|
67
|
+
display_order,
|
|
68
|
+
now,
|
|
69
|
+
json.dumps(metadata) if metadata else None,
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
self._conn.commit()
|
|
73
|
+
return cid
|
|
74
|
+
|
|
75
|
+
def record_feedback(self, comparison_id: str, chosen_side: str) -> None:
|
|
76
|
+
"""Record a preference. chosen_side is 'left' or 'right'."""
|
|
77
|
+
row = self.get_comparison(comparison_id)
|
|
78
|
+
if row is None:
|
|
79
|
+
raise ValueError(f"Comparison {comparison_id} not found")
|
|
80
|
+
|
|
81
|
+
# Map display side to original output
|
|
82
|
+
display_order = row["display_order"]
|
|
83
|
+
if chosen_side == "left":
|
|
84
|
+
chosen = display_order[0] # 'a' or 'b'
|
|
85
|
+
else:
|
|
86
|
+
chosen = display_order[1]
|
|
87
|
+
|
|
88
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
89
|
+
with self._lock:
|
|
90
|
+
self._conn.execute(
|
|
91
|
+
"""UPDATE comparisons
|
|
92
|
+
SET chosen = ?, status = 'completed', completed_at = ?
|
|
93
|
+
WHERE id = ?""",
|
|
94
|
+
(chosen, now, comparison_id),
|
|
95
|
+
)
|
|
96
|
+
self._conn.commit()
|
|
97
|
+
|
|
98
|
+
def skip_comparison(self, comparison_id: str) -> None:
|
|
99
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
100
|
+
with self._lock:
|
|
101
|
+
self._conn.execute(
|
|
102
|
+
"""UPDATE comparisons
|
|
103
|
+
SET status = 'skipped', completed_at = ?
|
|
104
|
+
WHERE id = ?""",
|
|
105
|
+
(now, comparison_id),
|
|
106
|
+
)
|
|
107
|
+
self._conn.commit()
|
|
108
|
+
|
|
109
|
+
# ── reads ───────────────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
def get_comparison(self, comparison_id: str) -> sqlite3.Row | None:
|
|
112
|
+
cur = self._conn.execute(
|
|
113
|
+
"SELECT * FROM comparisons WHERE id = ?", (comparison_id,)
|
|
114
|
+
)
|
|
115
|
+
return cur.fetchone()
|
|
116
|
+
|
|
117
|
+
def get_next_pending(self, session_id: str | None = None) -> sqlite3.Row | None:
|
|
118
|
+
if session_id:
|
|
119
|
+
cur = self._conn.execute(
|
|
120
|
+
"""SELECT * FROM comparisons
|
|
121
|
+
WHERE status = 'pending' AND session_id = ?
|
|
122
|
+
ORDER BY created_at ASC LIMIT 1""",
|
|
123
|
+
(session_id,),
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
cur = self._conn.execute(
|
|
127
|
+
"""SELECT * FROM comparisons
|
|
128
|
+
WHERE status = 'pending'
|
|
129
|
+
ORDER BY created_at ASC LIMIT 1"""
|
|
130
|
+
)
|
|
131
|
+
return cur.fetchone()
|
|
132
|
+
|
|
133
|
+
def get_stats(self, session_id: str | None = None) -> dict:
|
|
134
|
+
where = "WHERE session_id = ?" if session_id else ""
|
|
135
|
+
params: tuple = (session_id,) if session_id else ()
|
|
136
|
+
cur = self._conn.execute(
|
|
137
|
+
f"""SELECT
|
|
138
|
+
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
|
|
139
|
+
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed,
|
|
140
|
+
SUM(CASE WHEN status = 'skipped' THEN 1 ELSE 0 END) as skipped,
|
|
141
|
+
COUNT(*) as total
|
|
142
|
+
FROM comparisons {where}""",
|
|
143
|
+
params,
|
|
144
|
+
)
|
|
145
|
+
row = cur.fetchone()
|
|
146
|
+
return {
|
|
147
|
+
"pending": row["pending"] or 0,
|
|
148
|
+
"completed": row["completed"] or 0,
|
|
149
|
+
"skipped": row["skipped"] or 0,
|
|
150
|
+
"total": row["total"] or 0,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
def get_completed(self, session_id: str | None = None) -> list[sqlite3.Row]:
|
|
154
|
+
if session_id:
|
|
155
|
+
cur = self._conn.execute(
|
|
156
|
+
"""SELECT * FROM comparisons
|
|
157
|
+
WHERE status = 'completed' AND session_id = ?
|
|
158
|
+
ORDER BY completed_at ASC""",
|
|
159
|
+
(session_id,),
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
cur = self._conn.execute(
|
|
163
|
+
"""SELECT * FROM comparisons
|
|
164
|
+
WHERE status = 'completed'
|
|
165
|
+
ORDER BY completed_at ASC"""
|
|
166
|
+
)
|
|
167
|
+
return cur.fetchall()
|
|
168
|
+
|
|
169
|
+
# ── lifecycle ───────────────────────────────────────────
|
|
170
|
+
|
|
171
|
+
def close(self) -> None:
|
|
172
|
+
self._conn.close()
|
feedloop/_export.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from ._db import Database
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def export_dpo(db: Database, path: str | Path, session_id: str | None = None) -> int:
|
|
10
|
+
"""Export completed comparisons as DPO-formatted JSONL. Returns row count."""
|
|
11
|
+
rows = db.get_completed(session_id=session_id)
|
|
12
|
+
path = Path(path)
|
|
13
|
+
|
|
14
|
+
with path.open("w", encoding="utf-8") as f:
|
|
15
|
+
for row in rows:
|
|
16
|
+
if row["chosen"] == "a":
|
|
17
|
+
chosen_text = row["output_a"]
|
|
18
|
+
rejected_text = row["output_b"]
|
|
19
|
+
else:
|
|
20
|
+
chosen_text = row["output_b"]
|
|
21
|
+
rejected_text = row["output_a"]
|
|
22
|
+
|
|
23
|
+
record = {
|
|
24
|
+
"prompt": row["prompt"],
|
|
25
|
+
"chosen": chosen_text,
|
|
26
|
+
"rejected": rejected_text,
|
|
27
|
+
}
|
|
28
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
29
|
+
|
|
30
|
+
return len(rows)
|
feedloop/_models.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ComparisonOut(BaseModel):
|
|
7
|
+
id: str
|
|
8
|
+
prompt: str
|
|
9
|
+
output_left: str
|
|
10
|
+
output_right: str
|
|
11
|
+
progress_completed: int
|
|
12
|
+
progress_total: int
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FeedbackRequest(BaseModel):
|
|
16
|
+
comparison_id: str
|
|
17
|
+
chosen_side: str # "left" | "right"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class StatsResponse(BaseModel):
|
|
21
|
+
pending: int
|
|
22
|
+
completed: int
|
|
23
|
+
skipped: int
|
|
24
|
+
total: int
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ExportRow(BaseModel):
|
|
28
|
+
prompt: str
|
|
29
|
+
chosen: str
|
|
30
|
+
rejected: str
|
feedloop/_routes.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, HTTPException
|
|
7
|
+
from fastapi.responses import StreamingResponse
|
|
8
|
+
|
|
9
|
+
from ._db import Database
|
|
10
|
+
from ._models import ComparisonOut, FeedbackRequest, StatsResponse
|
|
11
|
+
|
|
12
|
+
router = APIRouter(prefix="/api")
|
|
13
|
+
|
|
14
|
+
# The database instance is set by _server.py at startup.
|
|
15
|
+
_db: Database | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def set_db(db: Database) -> None:
|
|
19
|
+
global _db
|
|
20
|
+
_db = db
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _get_db() -> Database:
|
|
24
|
+
if _db is None:
|
|
25
|
+
raise RuntimeError("Database not initialised")
|
|
26
|
+
return _db
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@router.get("/pending")
|
|
30
|
+
def get_pending(session_id: str | None = None) -> ComparisonOut | None:
|
|
31
|
+
db = _get_db()
|
|
32
|
+
row = db.get_next_pending(session_id=session_id)
|
|
33
|
+
if row is None:
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
stats = db.get_stats(session_id=session_id)
|
|
37
|
+
|
|
38
|
+
# Apply display_order to decide what the UI sees as left/right
|
|
39
|
+
if row["display_order"] == "ab":
|
|
40
|
+
left, right = row["output_a"], row["output_b"]
|
|
41
|
+
else:
|
|
42
|
+
left, right = row["output_b"], row["output_a"]
|
|
43
|
+
|
|
44
|
+
return ComparisonOut(
|
|
45
|
+
id=row["id"],
|
|
46
|
+
prompt=row["prompt"],
|
|
47
|
+
output_left=left,
|
|
48
|
+
output_right=right,
|
|
49
|
+
progress_completed=stats["completed"],
|
|
50
|
+
progress_total=stats["total"],
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@router.post("/feedback")
|
|
55
|
+
def post_feedback(req: FeedbackRequest) -> dict:
|
|
56
|
+
db = _get_db()
|
|
57
|
+
if req.chosen_side not in ("left", "right", "skip"):
|
|
58
|
+
raise HTTPException(400, "chosen_side must be 'left', 'right', or 'skip'")
|
|
59
|
+
|
|
60
|
+
if req.chosen_side == "skip":
|
|
61
|
+
db.skip_comparison(req.comparison_id)
|
|
62
|
+
else:
|
|
63
|
+
db.record_feedback(req.comparison_id, req.chosen_side)
|
|
64
|
+
|
|
65
|
+
return {"ok": True}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@router.get("/stats")
|
|
69
|
+
def get_stats(session_id: str | None = None) -> StatsResponse:
|
|
70
|
+
db = _get_db()
|
|
71
|
+
s = db.get_stats(session_id=session_id)
|
|
72
|
+
return StatsResponse(**s)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@router.get("/export")
|
|
76
|
+
def export_jsonl(session_id: str | None = None) -> StreamingResponse:
|
|
77
|
+
db = _get_db()
|
|
78
|
+
rows = db.get_completed(session_id=session_id)
|
|
79
|
+
|
|
80
|
+
lines: list[str] = []
|
|
81
|
+
for row in rows:
|
|
82
|
+
if row["chosen"] == "a":
|
|
83
|
+
chosen_text, rejected_text = row["output_a"], row["output_b"]
|
|
84
|
+
else:
|
|
85
|
+
chosen_text, rejected_text = row["output_b"], row["output_a"]
|
|
86
|
+
|
|
87
|
+
record = {
|
|
88
|
+
"prompt": row["prompt"],
|
|
89
|
+
"chosen": chosen_text,
|
|
90
|
+
"rejected": rejected_text,
|
|
91
|
+
}
|
|
92
|
+
lines.append(json.dumps(record, ensure_ascii=False))
|
|
93
|
+
|
|
94
|
+
content = "\n".join(lines) + "\n" if lines else ""
|
|
95
|
+
return StreamingResponse(
|
|
96
|
+
io.BytesIO(content.encode("utf-8")),
|
|
97
|
+
media_type="application/x-ndjson",
|
|
98
|
+
headers={"Content-Disposition": "attachment; filename=preferences.jsonl"},
|
|
99
|
+
)
|
feedloop/_server.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import socket
|
|
4
|
+
import threading
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import uvicorn
|
|
8
|
+
from fastapi import FastAPI
|
|
9
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
10
|
+
from fastapi.staticfiles import StaticFiles
|
|
11
|
+
|
|
12
|
+
from ._config import DEFAULT_PORT, PORT_SCAN_RANGE
|
|
13
|
+
from ._db import Database
|
|
14
|
+
from ._routes import router, set_db
|
|
15
|
+
|
|
16
|
+
STATIC_DIR = Path(__file__).parent / "_static"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _create_app(db: Database) -> FastAPI:
|
|
20
|
+
app = FastAPI(title="feedloop", docs_url=None, redoc_url=None)
|
|
21
|
+
|
|
22
|
+
app.add_middleware(
|
|
23
|
+
CORSMiddleware,
|
|
24
|
+
allow_origins=["*"],
|
|
25
|
+
allow_methods=["*"],
|
|
26
|
+
allow_headers=["*"],
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
set_db(db)
|
|
30
|
+
app.include_router(router)
|
|
31
|
+
|
|
32
|
+
# Serve React static files (only if built)
|
|
33
|
+
index = STATIC_DIR / "index.html"
|
|
34
|
+
if index.exists():
|
|
35
|
+
app.mount("/", StaticFiles(directory=STATIC_DIR, html=True), name="static")
|
|
36
|
+
|
|
37
|
+
return app
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _find_open_port(start: int) -> int:
|
|
41
|
+
for port in range(start, start + PORT_SCAN_RANGE):
|
|
42
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
43
|
+
try:
|
|
44
|
+
s.bind(("127.0.0.1", port))
|
|
45
|
+
return port
|
|
46
|
+
except OSError:
|
|
47
|
+
continue
|
|
48
|
+
raise RuntimeError(
|
|
49
|
+
f"No open port found in range {start}-{start + PORT_SCAN_RANGE - 1}"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class BackgroundServer:
|
|
54
|
+
def __init__(self, db: Database, port: int = DEFAULT_PORT) -> None:
|
|
55
|
+
self.db = db
|
|
56
|
+
self.port = _find_open_port(port)
|
|
57
|
+
self.app = _create_app(db)
|
|
58
|
+
self._thread: threading.Thread | None = None
|
|
59
|
+
self._server: uvicorn.Server | None = None
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def url(self) -> str:
|
|
63
|
+
return f"http://localhost:{self.port}"
|
|
64
|
+
|
|
65
|
+
def start(self) -> None:
|
|
66
|
+
if self._thread and self._thread.is_alive():
|
|
67
|
+
return # already running
|
|
68
|
+
|
|
69
|
+
config = uvicorn.Config(
|
|
70
|
+
self.app,
|
|
71
|
+
host="127.0.0.1",
|
|
72
|
+
port=self.port,
|
|
73
|
+
log_level="warning",
|
|
74
|
+
)
|
|
75
|
+
self._server = uvicorn.Server(config)
|
|
76
|
+
|
|
77
|
+
self._thread = threading.Thread(target=self._server.run, daemon=True)
|
|
78
|
+
self._thread.start()
|
|
79
|
+
|
|
80
|
+
# Wait for server to be ready
|
|
81
|
+
self._server.started = True # type: ignore[assignment]
|
|
82
|
+
import time
|
|
83
|
+
|
|
84
|
+
for _ in range(50):
|
|
85
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
86
|
+
try:
|
|
87
|
+
s.connect(("127.0.0.1", self.port))
|
|
88
|
+
return
|
|
89
|
+
except ConnectionRefusedError:
|
|
90
|
+
time.sleep(0.1)
|
|
91
|
+
|
|
92
|
+
def stop(self) -> None:
|
|
93
|
+
if self._server:
|
|
94
|
+
self._server.should_exit = True
|
|
95
|
+
if self._thread:
|
|
96
|
+
self._thread.join(timeout=3)
|
|
97
|
+
self._thread = None
|
feedloop/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: feedloop
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The fastest way to collect human preference data for LLMs
|
|
5
|
+
Project-URL: Homepage, https://github.com/rammuthiah/feedloop
|
|
6
|
+
Project-URL: Repository, https://github.com/rammuthiah/feedloop
|
|
7
|
+
Author: Ram Muthiah
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: dpo,human-feedback,llm,preference-data,rlhf
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: fastapi>=0.100
|
|
21
|
+
Requires-Dist: pydantic>=2.0
|
|
22
|
+
Requires-Dist: uvicorn[standard]>=0.20
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# feedloop
|
|
26
|
+
|
|
27
|
+
The fastest way to collect human preference data for LLMs.
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import feedloop
|
|
31
|
+
|
|
32
|
+
feedloop.start()
|
|
33
|
+
|
|
34
|
+
feedloop.compare(
|
|
35
|
+
prompt="Explain quantum computing",
|
|
36
|
+
outputs=["Response A", "Response B"],
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Rate in the browser, then export
|
|
40
|
+
feedloop.export("preferences.jsonl")
|
|
41
|
+
```
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
feedloop/__init__.py,sha256=U_UWo_v62o035CdBamG3CyXKhW1MFDAE2gbgz4bbng4,4327
|
|
2
|
+
feedloop/__main__.py,sha256=b2_sMsf8GecNM9_wOJia0XzMh20rsmncisZT__5OJGQ,1164
|
|
3
|
+
feedloop/_config.py,sha256=QBsZ_e-rxI5wexSzx50Ud0dV8URwPO-zGQZIBBY-HFw,201
|
|
4
|
+
feedloop/_db.py,sha256=2xviZRyhUV0rGXlkCHIHDqdey6H9otwfIP48RXFO7ec,6282
|
|
5
|
+
feedloop/_export.py,sha256=e2H-FNtqcf5nvlWFBtEFkYZF0xicLyZRy6A8g3dUyQk,910
|
|
6
|
+
feedloop/_models.py,sha256=BESWGX574rqo1ZoJCipewZyQuNlusXzEP7NpDfCjA-s,506
|
|
7
|
+
feedloop/_routes.py,sha256=uy3HOp5o5AdoYBH86twI_XVj9UFAfXZfEJqgNSApcSI,2727
|
|
8
|
+
feedloop/_server.py,sha256=QU1w691CcBdH3KbJDDGEthynUCsi4Tsc0CwA1VpLyEs,2745
|
|
9
|
+
feedloop/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
feedloop-0.1.0.dist-info/METADATA,sha256=CBieh0T40BwRcQGSBf_hp8SAh8URFM6m_GUgz6oZfdw,1235
|
|
11
|
+
feedloop-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
12
|
+
feedloop-0.1.0.dist-info/entry_points.txt,sha256=54qG4ozlvOnlatw-PkW-BzU1rITiPtd-moyvwWOFmi4,52
|
|
13
|
+
feedloop-0.1.0.dist-info/licenses/LICENSE,sha256=ubUN0vZDzv-csuMqe-LzQebe_3bHkuMazPGB0TstuOw,1068
|
|
14
|
+
feedloop-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ram Muthiah
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|