@geravant/sinain 1.12.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/sinain-core/package-lock.json +963 -0
- package/sinain-core/package.json +1 -0
- package/sinain-core/src/buffers/feed-buffer.ts +32 -0
- package/sinain-core/src/embedding/service.ts +66 -0
- package/sinain-core/src/index.ts +19 -2
- package/sinain-core/src/learning/local-curation.ts +137 -7
- package/sinain-core/src/server.ts +31 -0
- package/sinain-memory/README.md +105 -0
- package/sinain-memory/__pycache__/embed_client.cpython-312.pyc +0 -0
- package/sinain-memory/__pycache__/graph_query.cpython-312.pyc +0 -0
- package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
- package/sinain-memory/embed_client.py +117 -0
- package/sinain-memory/eval/benchmarks/__pycache__/meeting_adapter.cpython-312.pyc +0 -0
- package/sinain-memory/eval/benchmarks/__pycache__/meeting_runner.cpython-312.pyc +0 -0
- package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc +0 -0
- package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc +0 -0
- package/sinain-memory/eval/benchmarks/meeting_adapter.py +81 -0
- package/sinain-memory/eval/benchmarks/meeting_runner.py +230 -0
- package/sinain-memory/eval/benchmarks/query.py +37 -16
- package/sinain-memory/eval/benchmarks/run_meeting_bench.sh +318 -0
- package/sinain-memory/eval/benchmarks/runner.py +10 -3
- package/sinain-memory/graph_query.py +257 -15
- package/sinain-memory/knowledge_integrator.py +365 -72
- package/sinain-memory/memory-config.json +1 -1
- package/sinain-memory/session_distiller.py +43 -19
- package/sinain-memory/triplestore.py +60 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# ── Meeting Memory Benchmark — end-to-end capture + evaluate ─────────────────
|
|
5
|
+
# 1. Opens meeting recording fullscreen in QuickTime
|
|
6
|
+
# 2. Starts sinain (audio + sense capture, no agent, no overlay)
|
|
7
|
+
# 3. Waits for recording to finish
|
|
8
|
+
# 4. Stops sinain → saves pending session
|
|
9
|
+
# 5. Restarts sinain → distills pending session into knowledge graph
|
|
10
|
+
# 6. Runs evaluation harness against the distilled DB
|
|
11
|
+
#
|
|
12
|
+
# Usage: ./run_meeting_bench.sh <mp4_path>
|
|
13
|
+
# Output: eval/benchmarks/results/meeting_results.md
|
|
14
|
+
|
|
15
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
16
|
+
SINAIN_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
|
|
17
|
+
KOOG_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
18
|
+
|
|
19
|
+
BOLD='\033[1m'
|
|
20
|
+
GREEN='\033[0;32m'
|
|
21
|
+
YELLOW='\033[0;33m'
|
|
22
|
+
RED='\033[0;31m'
|
|
23
|
+
CYAN='\033[0;36m'
|
|
24
|
+
RESET='\033[0m'
|
|
25
|
+
|
|
26
|
+
log() { echo -e "${BOLD}[bench]${RESET} $*"; }
|
|
27
|
+
ok() { echo -e "${BOLD}[bench]${RESET} ${GREEN}✓${RESET} $*"; }
|
|
28
|
+
warn() { echo -e "${BOLD}[bench]${RESET} ${YELLOW}⚠${RESET} $*"; }
|
|
29
|
+
fail() { echo -e "${BOLD}[bench]${RESET} ${RED}✗${RESET} $*"; exit 1; }
|
|
30
|
+
|
|
31
|
+
# ── Args ─────────────────────────────────────────────────────────────────────
|
|
32
|
+
RECORDING="${1:-}"
|
|
33
|
+
if [ -z "$RECORDING" ] || [ ! -f "$RECORDING" ]; then
|
|
34
|
+
fail "Usage: $0 <path-to-mp4>"
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
# ── Setup ────────────────────────────────────────────────────────────────────
|
|
38
|
+
BENCH_DIR="/tmp/sinain-bench-$(date +%s)"
|
|
39
|
+
mkdir -p "$BENCH_DIR"
|
|
40
|
+
log "Benchmark directory: ${CYAN}${BENCH_DIR}${RESET}"
|
|
41
|
+
|
|
42
|
+
# Source .env for API keys and audio config (safe parser from start-local.sh)
|
|
43
|
+
for _env_file in "$SINAIN_ROOT/.env" "$SINAIN_ROOT/sinain-core/.env" "$HOME/.sinain/.env"; do
|
|
44
|
+
if [ -f "$_env_file" ]; then
|
|
45
|
+
log "Loading $_env_file"
|
|
46
|
+
while IFS='=' read -r _k _v; do
|
|
47
|
+
[[ -z "$_k" || "$_k" =~ ^[[:space:]]*# ]] && continue
|
|
48
|
+
_k=$(echo "$_k" | xargs)
|
|
49
|
+
_v=$(echo "$_v" | xargs)
|
|
50
|
+
_v="${_v%%#*}"
|
|
51
|
+
_v=$(echo "$_v" | xargs)
|
|
52
|
+
[[ -z "$_v" ]] && continue
|
|
53
|
+
if [ -z "${!_k+x}" ]; then export "$_k=$_v"; fi
|
|
54
|
+
done < "$_env_file"
|
|
55
|
+
break
|
|
56
|
+
fi
|
|
57
|
+
done
|
|
58
|
+
|
|
59
|
+
# Bench-specific overrides
|
|
60
|
+
export SINAIN_MEMORY_DIR="$BENCH_DIR"
|
|
61
|
+
export AGENT_ENABLED=false
|
|
62
|
+
export ESCALATION_MODE=off
|
|
63
|
+
|
|
64
|
+
# Local whisper setup (from start-local.sh)
|
|
65
|
+
MODEL_DIR="$HOME/models"
|
|
66
|
+
MODEL_NAME="ggml-large-v3-turbo.bin"
|
|
67
|
+
export LOCAL_WHISPER_MODEL="${LOCAL_WHISPER_MODEL:-$MODEL_DIR/$MODEL_NAME}"
|
|
68
|
+
export LOCAL_WHISPER_BIN="${LOCAL_WHISPER_BIN:-whisper-cli}"
|
|
69
|
+
export TRANSCRIPTION_BACKEND=local
|
|
70
|
+
|
|
71
|
+
# ── Get recording duration ───────────────────────────────────────────────────
|
|
72
|
+
DURATION_RAW=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$RECORDING" 2>/dev/null || echo "")
|
|
73
|
+
if [ -n "$DURATION_RAW" ]; then
|
|
74
|
+
DURATION=$(echo "$DURATION_RAW" | cut -d. -f1)
|
|
75
|
+
else
|
|
76
|
+
DURATION=1620 # fallback: 27 min
|
|
77
|
+
fi
|
|
78
|
+
log "Recording duration: ${DURATION}s (~$((DURATION / 60))m)"
|
|
79
|
+
|
|
80
|
+
# ── Cleanup handler ──────────────────────────────────────────────────────────
|
|
81
|
+
CORE_PID=""
|
|
82
|
+
SENSE_PID=""
|
|
83
|
+
|
|
84
|
+
cleanup() {
|
|
85
|
+
log "Cleaning up..."
|
|
86
|
+
[ -n "$SENSE_PID" ] && kill "$SENSE_PID" 2>/dev/null || true
|
|
87
|
+
[ -n "$CORE_PID" ] && kill "$CORE_PID" 2>/dev/null || true
|
|
88
|
+
sleep 2
|
|
89
|
+
[ -n "$SENSE_PID" ] && kill -9 "$SENSE_PID" 2>/dev/null || true
|
|
90
|
+
[ -n "$CORE_PID" ] && kill -9 "$CORE_PID" 2>/dev/null || true
|
|
91
|
+
# Kill anything on port 9500
|
|
92
|
+
lsof -i :9500 -sTCP:LISTEN -t 2>/dev/null | xargs kill -9 2>/dev/null || true
|
|
93
|
+
# Close QuickTime
|
|
94
|
+
osascript -e 'tell application "QuickTime Player" to quit' 2>/dev/null || true
|
|
95
|
+
}
|
|
96
|
+
trap cleanup EXIT
|
|
97
|
+
|
|
98
|
+
# ── Kill stale sinain processes ──────────────────────────────────────────────
|
|
99
|
+
log "Killing stale processes..."
|
|
100
|
+
pkill -f "tsx.*src/index.ts" 2>/dev/null || true
|
|
101
|
+
pkill -f "python3 -m sense_client" 2>/dev/null || true
|
|
102
|
+
pkill -f "Python -m sense_client" 2>/dev/null || true
|
|
103
|
+
pkill -f "tools/sck-capture/sck-capture" 2>/dev/null || true
|
|
104
|
+
lsof -i :9500 -sTCP:LISTEN -t 2>/dev/null | xargs kill -9 2>/dev/null || true
|
|
105
|
+
sleep 2
|
|
106
|
+
|
|
107
|
+
# ── Phase 1a: Open video fullscreen ─────────────────────────────────────────
|
|
108
|
+
log "Opening recording in QuickTime (fullscreen)..."
|
|
109
|
+
open -a "QuickTime Player" "$RECORDING"
|
|
110
|
+
sleep 3
|
|
111
|
+
osascript -e '
|
|
112
|
+
tell application "QuickTime Player"
|
|
113
|
+
present front document
|
|
114
|
+
delay 1
|
|
115
|
+
play front document
|
|
116
|
+
end tell
|
|
117
|
+
' 2>/dev/null || warn "Could not auto-play — check QuickTime"
|
|
118
|
+
ok "Video playing fullscreen"
|
|
119
|
+
|
|
120
|
+
# ── Phase 1b: Start sinain-core ──────────────────────────────────────────────
|
|
121
|
+
log "Starting sinain-core (capture-only, local whisper)..."
|
|
122
|
+
(cd "$SINAIN_ROOT/sinain-core" && npx tsx src/index.ts 2>&1) | \
|
|
123
|
+
sed -u "s/^/$(printf "${CYAN}[core]${RESET} ")/" &
|
|
124
|
+
CORE_PID=$!
|
|
125
|
+
|
|
126
|
+
# Wait for health
|
|
127
|
+
CORE_OK=false
|
|
128
|
+
for i in $(seq 1 20); do
|
|
129
|
+
if curl -sf http://localhost:9500/health >/dev/null 2>&1; then
|
|
130
|
+
CORE_OK=true
|
|
131
|
+
break
|
|
132
|
+
fi
|
|
133
|
+
sleep 1
|
|
134
|
+
done
|
|
135
|
+
if $CORE_OK; then
|
|
136
|
+
ok "sinain-core healthy on :9500"
|
|
137
|
+
else
|
|
138
|
+
fail "sinain-core did not start"
|
|
139
|
+
fi
|
|
140
|
+
|
|
141
|
+
# ── Phase 1c: Start sense_client ─────────────────────────────────────────────
|
|
142
|
+
log "Starting sense_client (screen capture + OCR)..."
|
|
143
|
+
|
|
144
|
+
# Propagate privacy mode
|
|
145
|
+
export PRIVACY_OCR_OPENROUTER="${PRIVACY_OCR_OPENROUTER:-full}"
|
|
146
|
+
export PRIVACY_IMAGES_OPENROUTER="${PRIVACY_IMAGES_OPENROUTER:-full}"
|
|
147
|
+
|
|
148
|
+
(cd "$SINAIN_ROOT" && python3 -m sense_client 2>&1) | \
|
|
149
|
+
sed -u "s/^/$(printf "${YELLOW}[sense]${RESET} ")/" &
|
|
150
|
+
SENSE_PID=$!
|
|
151
|
+
sleep 2
|
|
152
|
+
|
|
153
|
+
if kill -0 "$SENSE_PID" 2>/dev/null; then
|
|
154
|
+
ok "sense_client running"
|
|
155
|
+
else
|
|
156
|
+
warn "sense_client failed to start — continuing with audio only"
|
|
157
|
+
SENSE_PID=""
|
|
158
|
+
fi
|
|
159
|
+
|
|
160
|
+
# ── Phase 1d: Wait for recording to finish ───────────────────────────────────
|
|
161
|
+
BUFFER=60 # extra time for trailing transcription/OCR
|
|
162
|
+
TOTAL_WAIT=$((DURATION + BUFFER))
|
|
163
|
+
log "Waiting ${TOTAL_WAIT}s for recording + buffer..."
|
|
164
|
+
log " (recording ends at $(date -v+${DURATION}S '+%H:%M:%S'), buffer until $(date -v+${TOTAL_WAIT}S '+%H:%M:%S'))"
|
|
165
|
+
|
|
166
|
+
# Progress updates every 5 minutes
|
|
167
|
+
ELAPSED=0
|
|
168
|
+
while [ $ELAPSED -lt $TOTAL_WAIT ]; do
|
|
169
|
+
SLEEP_CHUNK=300
|
|
170
|
+
if [ $((ELAPSED + SLEEP_CHUNK)) -gt $TOTAL_WAIT ]; then
|
|
171
|
+
SLEEP_CHUNK=$((TOTAL_WAIT - ELAPSED))
|
|
172
|
+
fi
|
|
173
|
+
sleep $SLEEP_CHUNK
|
|
174
|
+
ELAPSED=$((ELAPSED + SLEEP_CHUNK))
|
|
175
|
+
REMAINING=$((TOTAL_WAIT - ELAPSED))
|
|
176
|
+
if [ $REMAINING -gt 0 ]; then
|
|
177
|
+
# Check feed count
|
|
178
|
+
FEED_COUNT=$(curl -sf http://localhost:9500/feed 2>/dev/null | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('messages',[])))" 2>/dev/null || echo "?")
|
|
179
|
+
log " ${ELAPSED}s elapsed, ${REMAINING}s remaining — feed items: ${FEED_COUNT}"
|
|
180
|
+
fi
|
|
181
|
+
done
|
|
182
|
+
|
|
183
|
+
ok "Recording capture complete"
|
|
184
|
+
|
|
185
|
+
# Check what we captured
|
|
186
|
+
FEED_COUNT=$(curl -sf http://localhost:9500/feed 2>/dev/null | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('messages',[])))" 2>/dev/null || echo "?")
|
|
187
|
+
log "Captured ${FEED_COUNT} feed items"
|
|
188
|
+
|
|
189
|
+
# ── Phase 1e: Stop sinain (saves pending session) ───────────────────────────
|
|
190
|
+
log "Stopping sinain (saving pending session)..."
|
|
191
|
+
[ -n "$SENSE_PID" ] && kill "$SENSE_PID" 2>/dev/null || true
|
|
192
|
+
SENSE_PID=""
|
|
193
|
+
|
|
194
|
+
# Send SIGINT directly to the tsx/node process (not the pipe wrapper)
|
|
195
|
+
# The pipe means $CORE_PID is sed, not tsx — so we pkill the actual process
|
|
196
|
+
pkill -INT -f "tsx src/index.ts" 2>/dev/null || true
|
|
197
|
+
log "Sent SIGINT to tsx, waiting for graceful shutdown..."
|
|
198
|
+
sleep 10
|
|
199
|
+
|
|
200
|
+
# Force if still alive
|
|
201
|
+
pkill -9 -f "tsx src/index.ts" 2>/dev/null || true
|
|
202
|
+
kill -9 "$CORE_PID" 2>/dev/null || true
|
|
203
|
+
CORE_PID=""
|
|
204
|
+
lsof -i :9500 -sTCP:LISTEN -t 2>/dev/null | xargs kill -9 2>/dev/null || true
|
|
205
|
+
sleep 2
|
|
206
|
+
|
|
207
|
+
# Close QuickTime
|
|
208
|
+
osascript -e 'tell application "QuickTime Player" to quit' 2>/dev/null || true
|
|
209
|
+
|
|
210
|
+
# Verify pending session was saved (or inline distillation already consumed it)
|
|
211
|
+
if [ -f "$BENCH_DIR/pending-session.json" ]; then
|
|
212
|
+
PENDING_ITEMS=$(python3 -c "import json; print(len(json.load(open('$BENCH_DIR/pending-session.json')).get('items',[])))" 2>/dev/null || echo "?")
|
|
213
|
+
ok "Pending session saved: ${PENDING_ITEMS} items"
|
|
214
|
+
elif [ -f "$BENCH_DIR/knowledge-graph.db" ]; then
|
|
215
|
+
ok "Inline distillation completed (pending-session.json already consumed)"
|
|
216
|
+
else
|
|
217
|
+
warn "No pending-session.json and no knowledge-graph.db — will retry with longer shutdown"
|
|
218
|
+
# Try again: start core briefly, let it capture a few items, then shut down gracefully
|
|
219
|
+
log "Starting core for a brief capture + shutdown cycle..."
|
|
220
|
+
(cd "$SINAIN_ROOT/sinain-core" && npx tsx src/index.ts 2>&1) > /tmp/sinain-bench-retry.log &
|
|
221
|
+
RETRY_PID=$!
|
|
222
|
+
sleep 15 # let it start and capture a few items
|
|
223
|
+
# Get the actual node PID and send SIGINT
|
|
224
|
+
NODE_PID=$(pgrep -f "tsx src/index.ts" 2>/dev/null | head -1 || true)
|
|
225
|
+
if [ -n "$NODE_PID" ]; then
|
|
226
|
+
kill -INT "$NODE_PID" 2>/dev/null || true
|
|
227
|
+
sleep 10
|
|
228
|
+
kill -9 "$NODE_PID" 2>/dev/null || true
|
|
229
|
+
fi
|
|
230
|
+
kill -9 "$RETRY_PID" 2>/dev/null || true
|
|
231
|
+
lsof -i :9500 -sTCP:LISTEN -t 2>/dev/null | xargs kill -9 2>/dev/null || true
|
|
232
|
+
sleep 2
|
|
233
|
+
if [ -f "$BENCH_DIR/pending-session.json" ] || [ -f "$BENCH_DIR/knowledge-graph.db" ]; then
|
|
234
|
+
ok "Recovery succeeded"
|
|
235
|
+
else
|
|
236
|
+
fail "Could not capture any session data"
|
|
237
|
+
fi
|
|
238
|
+
fi
|
|
239
|
+
|
|
240
|
+
# ── Phase 1f: Restart for distillation ───────────────────────────────────────
|
|
241
|
+
log "Restarting sinain-core for distillation..."
|
|
242
|
+
(cd "$SINAIN_ROOT/sinain-core" && npx tsx src/index.ts 2>&1) | \
|
|
243
|
+
sed -u "s/^/$(printf "${CYAN}[core]${RESET} ")/" &
|
|
244
|
+
CORE_PID=$!
|
|
245
|
+
|
|
246
|
+
# Wait for health
|
|
247
|
+
for i in $(seq 1 20); do
|
|
248
|
+
if curl -sf http://localhost:9500/health >/dev/null 2>&1; then
|
|
249
|
+
break
|
|
250
|
+
fi
|
|
251
|
+
sleep 1
|
|
252
|
+
done
|
|
253
|
+
|
|
254
|
+
# Wait for distillation to complete (knowledge-graph.db appears or grows)
|
|
255
|
+
log "Waiting for distillation..."
|
|
256
|
+
for i in $(seq 1 120); do
|
|
257
|
+
if [ -f "$BENCH_DIR/knowledge-graph.db" ]; then
|
|
258
|
+
DB_SIZE=$(stat -f%z "$BENCH_DIR/knowledge-graph.db" 2>/dev/null || echo "0")
|
|
259
|
+
if [ "$DB_SIZE" -gt 4096 ]; then
|
|
260
|
+
ok "Distillation complete (DB: ${DB_SIZE} bytes)"
|
|
261
|
+
break
|
|
262
|
+
fi
|
|
263
|
+
fi
|
|
264
|
+
# Also check if pending-session.json is gone (distillation consumed it)
|
|
265
|
+
if [ ! -f "$BENCH_DIR/pending-session.json" ] && [ -f "$BENCH_DIR/knowledge-graph.db" ]; then
|
|
266
|
+
DB_SIZE=$(stat -f%z "$BENCH_DIR/knowledge-graph.db" 2>/dev/null || echo "0")
|
|
267
|
+
ok "Distillation complete (DB: ${DB_SIZE} bytes)"
|
|
268
|
+
break
|
|
269
|
+
fi
|
|
270
|
+
sleep 5
|
|
271
|
+
done
|
|
272
|
+
|
|
273
|
+
# Keep core running for /embed endpoint during evaluation
|
|
274
|
+
log "Keeping sinain-core running for embedding service during evaluation..."
|
|
275
|
+
|
|
276
|
+
# ── Phase 2: Evaluate ────────────────────────────────────────────────────────
|
|
277
|
+
log ""
|
|
278
|
+
log "═══════════════════════════════════════════════"
|
|
279
|
+
log " Phase 2: Evaluation"
|
|
280
|
+
log "═══════════════════════════════════════════════"
|
|
281
|
+
log ""
|
|
282
|
+
|
|
283
|
+
DB_PATH="$BENCH_DIR/knowledge-graph.db"
|
|
284
|
+
if [ ! -f "$DB_PATH" ]; then
|
|
285
|
+
fail "No knowledge-graph.db found — distillation may have failed"
|
|
286
|
+
fi
|
|
287
|
+
|
|
288
|
+
# Show what's in the DB
|
|
289
|
+
log "Knowledge graph contents:"
|
|
290
|
+
cd "$KOOG_DIR"
|
|
291
|
+
python3 -c "
|
|
292
|
+
from triplestore import TripleStore
|
|
293
|
+
ts = TripleStore('$DB_PATH')
|
|
294
|
+
facts = ts.all_facts()
|
|
295
|
+
print(f' Total facts: {len(facts)}')
|
|
296
|
+
entities = set()
|
|
297
|
+
for f in facts:
|
|
298
|
+
entities.add(f.get('entity', ''))
|
|
299
|
+
print(f' Unique entities: {len(entities)}')
|
|
300
|
+
for e in sorted(entities)[:10]:
|
|
301
|
+
print(f' - {e}')
|
|
302
|
+
if len(entities) > 10:
|
|
303
|
+
print(f' ... and {len(entities) - 10} more')
|
|
304
|
+
" 2>/dev/null || warn "Could not inspect DB"
|
|
305
|
+
|
|
306
|
+
# Run evaluation
|
|
307
|
+
log "Running QA evaluation..."
|
|
308
|
+
python3 eval/benchmarks/meeting_runner.py \
|
|
309
|
+
--db "$DB_PATH" \
|
|
310
|
+
--conditions sinain-memory,full-context \
|
|
311
|
+
--format json,markdown
|
|
312
|
+
|
|
313
|
+
log ""
|
|
314
|
+
log "═══════════════════════════════════════════════"
|
|
315
|
+
log " Done!"
|
|
316
|
+
log " Results: eval/benchmarks/results/meeting_results.md"
|
|
317
|
+
log " DB: $DB_PATH"
|
|
318
|
+
log "═══════════════════════════════════════════════"
|
|
@@ -113,10 +113,17 @@ def run_benchmark(
|
|
|
113
113
|
for idx, (inst, question) in enumerate(all_questions):
|
|
114
114
|
qid = question.id
|
|
115
115
|
|
|
116
|
-
# Skip if already done
|
|
116
|
+
# Skip if already done (with all conditions scored)
|
|
117
117
|
if qid in completed:
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
prev = completed[qid]
|
|
119
|
+
all_scored = all(
|
|
120
|
+
prev.get("answers", {}).get(c, {}).get("score") is not None
|
|
121
|
+
for c in conditions
|
|
122
|
+
)
|
|
123
|
+
if all_scored:
|
|
124
|
+
details.append(prev)
|
|
125
|
+
continue
|
|
126
|
+
# Otherwise re-run this question (previous attempt had failures)
|
|
120
127
|
|
|
121
128
|
print(f"[{idx+1}/{total}] {qid} [{question.category}]")
|
|
122
129
|
|
|
@@ -129,27 +129,269 @@ def query_top_facts(db_path: str, limit: int = 30) -> list[dict]:
|
|
|
129
129
|
return []
|
|
130
130
|
|
|
131
131
|
|
|
132
|
+
def query_facts_fts(db_path: str, query: str, max_facts: int = 10) -> list[dict]:
|
|
133
|
+
"""Full-text search on fact values via FTS5 index.
|
|
134
|
+
|
|
135
|
+
Returns facts whose value field matches the query keywords.
|
|
136
|
+
Falls back to LIKE search if FTS5 is not available.
|
|
137
|
+
"""
|
|
138
|
+
if not Path(db_path).exists():
|
|
139
|
+
return []
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
from triplestore import TripleStore
|
|
143
|
+
store = TripleStore(db_path)
|
|
144
|
+
|
|
145
|
+
# Try FTS5 first
|
|
146
|
+
try:
|
|
147
|
+
rows = store._conn.execute(
|
|
148
|
+
"""SELECT DISTINCT t.entity_id
|
|
149
|
+
FROM triples_fts fts
|
|
150
|
+
JOIN triples t ON fts.rowid = t.id
|
|
151
|
+
WHERE triples_fts MATCH ?
|
|
152
|
+
AND t.attribute = 'value'
|
|
153
|
+
AND NOT t.retracted
|
|
154
|
+
LIMIT ?""",
|
|
155
|
+
(query, max_facts),
|
|
156
|
+
).fetchall()
|
|
157
|
+
except Exception:
|
|
158
|
+
# FTS5 not available — fall back to LIKE search
|
|
159
|
+
keywords = [w.lower() for w in query.split() if len(w) > 2]
|
|
160
|
+
if not keywords:
|
|
161
|
+
store.close()
|
|
162
|
+
return []
|
|
163
|
+
# Match any keyword in value
|
|
164
|
+
conditions = " OR ".join(["LOWER(value) LIKE ?"] * len(keywords))
|
|
165
|
+
params = [f"%{k}%" for k in keywords] + [max_facts]
|
|
166
|
+
rows = store._conn.execute(
|
|
167
|
+
f"""SELECT DISTINCT entity_id
|
|
168
|
+
FROM triples
|
|
169
|
+
WHERE attribute = 'value'
|
|
170
|
+
AND NOT retracted
|
|
171
|
+
AND ({conditions})
|
|
172
|
+
LIMIT ?""",
|
|
173
|
+
params,
|
|
174
|
+
).fetchall()
|
|
175
|
+
|
|
176
|
+
entity_ids = [r["entity_id"] for r in rows]
|
|
177
|
+
if not entity_ids:
|
|
178
|
+
store.close()
|
|
179
|
+
return []
|
|
180
|
+
|
|
181
|
+
# Fetch full attributes for matched entities
|
|
182
|
+
facts = []
|
|
183
|
+
for eid in entity_ids:
|
|
184
|
+
attrs = store.entity(eid)
|
|
185
|
+
fact = {"entity_id": eid, "entity": eid.split(":")[-1].rsplit("-", 1)[0] if ":" in eid else eid}
|
|
186
|
+
for attr, values in attrs.items():
|
|
187
|
+
if attr == "tag":
|
|
188
|
+
continue
|
|
189
|
+
fact[attr] = values[0] if len(values) == 1 else values
|
|
190
|
+
facts.append(fact)
|
|
191
|
+
|
|
192
|
+
store.close()
|
|
193
|
+
return facts[:max_facts]
|
|
194
|
+
except Exception:
|
|
195
|
+
return []
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def query_facts_by_entity_graph(
|
|
199
|
+
db_path: str,
|
|
200
|
+
entity_name: str,
|
|
201
|
+
max_facts: int = 10,
|
|
202
|
+
) -> list[dict]:
|
|
203
|
+
"""Find facts about an entity via VAET backref traversal.
|
|
204
|
+
|
|
205
|
+
Uses the entity graph layer: entity:* nodes linked to fact:* nodes
|
|
206
|
+
via 'about' ref edges. Also follows 'mentions' ref edges for
|
|
207
|
+
cross-entity context.
|
|
208
|
+
"""
|
|
209
|
+
if not Path(db_path).exists():
|
|
210
|
+
return []
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
from triplestore import TripleStore
|
|
214
|
+
store = TripleStore(db_path)
|
|
215
|
+
|
|
216
|
+
entity_node_id = f"entity:{entity_name.lower().replace(' ', '-')}"
|
|
217
|
+
if not store.entity(entity_node_id):
|
|
218
|
+
store.close()
|
|
219
|
+
return []
|
|
220
|
+
|
|
221
|
+
# Get all facts linked to this entity via "about" ref edge
|
|
222
|
+
fact_refs = store.backrefs(entity_node_id, attribute="about")
|
|
223
|
+
# Also get facts that "mention" this entity
|
|
224
|
+
mention_refs = store.backrefs(entity_node_id, attribute="mentions")
|
|
225
|
+
all_refs = fact_refs + mention_refs
|
|
226
|
+
|
|
227
|
+
# Load fact details
|
|
228
|
+
seen = set()
|
|
229
|
+
facts = []
|
|
230
|
+
for fact_eid, _ in all_refs:
|
|
231
|
+
if fact_eid in seen or not fact_eid.startswith("fact:"):
|
|
232
|
+
continue
|
|
233
|
+
seen.add(fact_eid)
|
|
234
|
+
attrs = store.entity(fact_eid)
|
|
235
|
+
if attrs and "value" in attrs:
|
|
236
|
+
fact = {"entity_id": fact_eid}
|
|
237
|
+
for attr, values in attrs.items():
|
|
238
|
+
if attr == "tag":
|
|
239
|
+
continue
|
|
240
|
+
fact[attr] = values[0] if len(values) == 1 else values
|
|
241
|
+
facts.append(fact)
|
|
242
|
+
|
|
243
|
+
store.close()
|
|
244
|
+
return facts[:max_facts]
|
|
245
|
+
except Exception:
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def query_facts_hybrid(
|
|
250
|
+
db_path: str,
|
|
251
|
+
query: str,
|
|
252
|
+
max_facts: int = 10,
|
|
253
|
+
) -> list[dict]:
|
|
254
|
+
"""Hybrid retrieval with Reciprocal Rank Fusion (Graphiti pattern).
|
|
255
|
+
|
|
256
|
+
Runs three independent retrieval methods, fuses via RRF, then
|
|
257
|
+
expands top results with 1-hop graph neighbors.
|
|
258
|
+
"""
|
|
259
|
+
import re
|
|
260
|
+
keywords = [w.lower() for w in re.findall(r"[a-zA-Z][a-zA-Z0-9-]+", query) if len(w) > 2]
|
|
261
|
+
|
|
262
|
+
# Entity graph pre-filter: find facts linked to mentioned entities via backrefs.
|
|
263
|
+
# Used to BOOST relevant facts in RRF, not as a separate tier (avoids dilution).
|
|
264
|
+
graph_fact_ids: set[str] = set()
|
|
265
|
+
for kw in keywords:
|
|
266
|
+
for f in query_facts_by_entity_graph(db_path, kw, max_facts=50):
|
|
267
|
+
eid = f.get("entity_id", "")
|
|
268
|
+
if eid:
|
|
269
|
+
graph_fact_ids.add(eid)
|
|
270
|
+
|
|
271
|
+
# Run three retrieval methods independently
|
|
272
|
+
candidate_limit = max_facts * 3
|
|
273
|
+
fts_results = query_facts_fts(db_path, query, max_facts=candidate_limit)
|
|
274
|
+
tag_results = query_facts_by_entities(db_path, keywords, max_facts=candidate_limit) if keywords else []
|
|
275
|
+
top_results = query_top_facts(db_path, limit=candidate_limit)
|
|
276
|
+
|
|
277
|
+
# Build ranked lists by entity_id
|
|
278
|
+
def _ranked_ids(facts: list[dict]) -> list[str]:
|
|
279
|
+
seen = set()
|
|
280
|
+
out = []
|
|
281
|
+
for f in facts:
|
|
282
|
+
eid = f.get("entity_id", "")
|
|
283
|
+
if eid and eid not in seen:
|
|
284
|
+
seen.add(eid)
|
|
285
|
+
out.append(eid)
|
|
286
|
+
return out
|
|
287
|
+
|
|
288
|
+
fts_ranked = _ranked_ids(fts_results)
|
|
289
|
+
tag_ranked = _ranked_ids(tag_results)
|
|
290
|
+
top_ranked = _ranked_ids(top_results)
|
|
291
|
+
|
|
292
|
+
# Reciprocal Rank Fusion: RRF(d) = Σ 1/(k + rank_i(d))
|
|
293
|
+
K = 60 # standard RRF constant
|
|
294
|
+
rrf_scores: dict[str, float] = {}
|
|
295
|
+
for ranked_list in [fts_ranked, tag_ranked, top_ranked]:
|
|
296
|
+
for rank, eid in enumerate(ranked_list):
|
|
297
|
+
rrf_scores[eid] = rrf_scores.get(eid, 0.0) + 1.0 / (K + rank)
|
|
298
|
+
|
|
299
|
+
# Graph boost: facts linked to mentioned entities via backrefs get priority
|
|
300
|
+
if graph_fact_ids:
|
|
301
|
+
for eid in rrf_scores:
|
|
302
|
+
if eid in graph_fact_ids:
|
|
303
|
+
rrf_scores[eid] += 0.02 # significant boost — graph-linked facts rank higher
|
|
304
|
+
|
|
305
|
+
# Apply confidence decay as secondary signal (fresh facts rank above stale ones)
|
|
306
|
+
from triplestore import decayed_confidence
|
|
307
|
+
for facts_list in [fts_results, tag_results, top_results]:
|
|
308
|
+
for f in facts_list:
|
|
309
|
+
eid = f.get("entity_id", "")
|
|
310
|
+
if eid in rrf_scores:
|
|
311
|
+
conf = 0.5
|
|
312
|
+
created = ""
|
|
313
|
+
try:
|
|
314
|
+
conf = float(f.get("confidence", 0.5))
|
|
315
|
+
created = str(f.get("first_seen", ""))
|
|
316
|
+
except (ValueError, TypeError):
|
|
317
|
+
pass
|
|
318
|
+
if created:
|
|
319
|
+
effective = decayed_confidence(conf, created)
|
|
320
|
+
rrf_scores[eid] += effective * 0.01 # small boost, preserves RRF rank
|
|
321
|
+
|
|
322
|
+
# Sort by RRF score descending
|
|
323
|
+
sorted_ids = sorted(rrf_scores, key=rrf_scores.get, reverse=True)
|
|
324
|
+
|
|
325
|
+
# Build fact lookup from all candidates
|
|
326
|
+
fact_map: dict[str, dict] = {}
|
|
327
|
+
for facts in [fts_results, tag_results, top_results]:
|
|
328
|
+
for f in facts:
|
|
329
|
+
eid = f.get("entity_id", "")
|
|
330
|
+
if eid and eid not in fact_map:
|
|
331
|
+
fact_map[eid] = f
|
|
332
|
+
|
|
333
|
+
results = [fact_map[eid] for eid in sorted_ids[:max_facts] if eid in fact_map]
|
|
334
|
+
|
|
335
|
+
# Expand top results with 1-hop graph neighbors
|
|
336
|
+
if results and len(results) < max_facts:
|
|
337
|
+
seen_ids = {f.get("entity_id", "") for f in results}
|
|
338
|
+
try:
|
|
339
|
+
from triplestore import TripleStore
|
|
340
|
+
store = TripleStore(db_path)
|
|
341
|
+
for fact in list(results):
|
|
342
|
+
eid = fact.get("entity_id", "")
|
|
343
|
+
if not eid:
|
|
344
|
+
continue
|
|
345
|
+
neighbors = store.neighbors(eid, depth=1)
|
|
346
|
+
for nid, nattrs in neighbors.items():
|
|
347
|
+
if nid not in seen_ids and len(results) < max_facts:
|
|
348
|
+
seen_ids.add(nid)
|
|
349
|
+
nfact = {"entity_id": nid, "entity": nid.split(":")[-1].rsplit("-", 1)[0] if ":" in nid else nid}
|
|
350
|
+
for attr, values in nattrs.items():
|
|
351
|
+
if attr != "tag":
|
|
352
|
+
nfact[attr] = values[0] if len(values) == 1 else values
|
|
353
|
+
results.append(nfact)
|
|
354
|
+
store.close()
|
|
355
|
+
except Exception:
|
|
356
|
+
pass
|
|
357
|
+
|
|
358
|
+
return results[:max_facts]
|
|
359
|
+
|
|
360
|
+
|
|
132
361
|
def format_facts_text(facts: list[dict], max_chars: int = 500) -> str:
|
|
133
|
-
"""Format facts
|
|
362
|
+
"""Format facts grouped by entity for better cross-fact reasoning.
|
|
363
|
+
|
|
364
|
+
Groups related facts under entity headers so the QA model sees
|
|
365
|
+
connected context (e.g., all Citibank facts together).
|
|
366
|
+
"""
|
|
134
367
|
if not facts:
|
|
135
368
|
return ""
|
|
136
369
|
|
|
137
|
-
|
|
138
|
-
|
|
370
|
+
# Group by entity name (strip fact: prefix and hash suffix)
|
|
371
|
+
from collections import OrderedDict
|
|
372
|
+
groups: OrderedDict[str, list[dict]] = OrderedDict()
|
|
139
373
|
for f in facts:
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
line = f"- [{domain}] {value} (confidence: {conf}, confirmed {count}x)"
|
|
374
|
+
entity = f.get("entity", "")
|
|
375
|
+
if isinstance(entity, list):
|
|
376
|
+
entity = entity[0] if entity else ""
|
|
377
|
+
if not entity:
|
|
378
|
+
eid = str(f.get("entity_id", ""))
|
|
379
|
+
entity = eid.split(":")[-1].rsplit("-", 1)[0] if ":" in eid else eid
|
|
380
|
+
groups.setdefault(str(entity), []).append(f)
|
|
148
381
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
382
|
+
lines = []
|
|
383
|
+
total = 0
|
|
384
|
+
for entity, group_facts in groups.items():
|
|
385
|
+
for f in group_facts:
|
|
386
|
+
value = f.get("value", "")
|
|
387
|
+
conf = f.get("confidence", "?")
|
|
388
|
+
count = f.get("reinforce_count", "1")
|
|
389
|
+
|
|
390
|
+
line = f"- [{entity}] {value} (conf: {conf}, {count}x)"
|
|
391
|
+
if total + len(line) > max_chars:
|
|
392
|
+
return "\n".join(lines)
|
|
393
|
+
lines.append(line)
|
|
394
|
+
total += len(line)
|
|
153
395
|
|
|
154
396
|
return "\n".join(lines)
|
|
155
397
|
|