contsql 0.3.1__tar.gz → 0.3.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {contsql-0.3.1 → contsql-0.3.6}/PKG-INFO +1 -1
- {contsql-0.3.1 → contsql-0.3.6}/contsql.egg-info/PKG-INFO +1 -1
- {contsql-0.3.1 → contsql-0.3.6}/contsql.py +261 -25
- {contsql-0.3.1 → contsql-0.3.6}/pyproject.toml +1 -1
- {contsql-0.3.1 → contsql-0.3.6}/MANIFEST.in +0 -0
- {contsql-0.3.1 → contsql-0.3.6}/README.md +0 -0
- {contsql-0.3.1 → contsql-0.3.6}/contsql.egg-info/SOURCES.txt +0 -0
- {contsql-0.3.1 → contsql-0.3.6}/contsql.egg-info/dependency_links.txt +0 -0
- {contsql-0.3.1 → contsql-0.3.6}/contsql.egg-info/entry_points.txt +0 -0
- {contsql-0.3.1 → contsql-0.3.6}/contsql.egg-info/requires.txt +0 -0
- {contsql-0.3.1 → contsql-0.3.6}/contsql.egg-info/top_level.txt +0 -0
- {contsql-0.3.1 → contsql-0.3.6}/domain_notes.txt +0 -0
- {contsql-0.3.1 → contsql-0.3.6}/setup.cfg +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
# v0.
|
|
2
|
+
# v0.3.5 | 2026-04-14 | warmup + DB health check + 0-satır teşhis
|
|
3
3
|
"""contsql — Minimal DuckDB SQL agent. Soru sor, SQL üret, çalıştır, göster."""
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
@@ -7,6 +7,7 @@ import json
|
|
|
7
7
|
import os
|
|
8
8
|
import re
|
|
9
9
|
import sys
|
|
10
|
+
import threading
|
|
10
11
|
import time
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
|
|
@@ -22,6 +23,66 @@ TIMEOUT = int(os.environ.get("CONTSQL_TIMEOUT", "120"))
|
|
|
22
23
|
|
|
23
24
|
BANNED_SQL = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "TRUNCATE", "EXEC"]
|
|
24
25
|
|
|
26
|
+
# ── Warmup + Health Check ──
|
|
27
|
+
|
|
28
|
+
CRITICAL_TABLES = [
|
|
29
|
+
("fact_periodic", "Dönemsel risk verileri"),
|
|
30
|
+
("map_identity", "Firma kimlik bilgileri"),
|
|
31
|
+
("dim_entity", "Firma boyut bilgileri"),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _warmup_model():
|
|
36
|
+
"""Model'i GPU'ya yükle — background thread."""
|
|
37
|
+
try:
|
|
38
|
+
requests.post(f"{OLLAMA_URL}/api/generate", json={
|
|
39
|
+
"model": MODEL, "prompt": " ", "options": {"num_predict": 1}
|
|
40
|
+
}, timeout=60)
|
|
41
|
+
except Exception:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _db_health_check(conn):
|
|
46
|
+
"""Ana tabloların doluluk kontrolü."""
|
|
47
|
+
warnings = []
|
|
48
|
+
for table, desc in CRITICAL_TABLES:
|
|
49
|
+
try:
|
|
50
|
+
count = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
|
|
51
|
+
if count == 0:
|
|
52
|
+
warnings.append(f" ⚠ {table} boş — {desc} yüklenmemiş.")
|
|
53
|
+
except Exception:
|
|
54
|
+
warnings.append(f" ⚠ {table} tablosu bulunamadı.")
|
|
55
|
+
if warnings:
|
|
56
|
+
print("⚠ DB UYARI:")
|
|
57
|
+
for w in warnings:
|
|
58
|
+
print(w)
|
|
59
|
+
print()
|
|
60
|
+
return len(warnings) == 0
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _diagnose_empty_result(sql, conn):
|
|
64
|
+
"""0 satır dönen sorguda olası nedeni bul."""
|
|
65
|
+
muta_match = re.search(r"muta\s*=\s*'(\d+)'", sql)
|
|
66
|
+
if muta_match:
|
|
67
|
+
tid = muta_match.group(1)
|
|
68
|
+
if conn.execute(f"SELECT COUNT(*) FROM map_identity WHERE muta='{tid}'").fetchone()[0] == 0:
|
|
69
|
+
return f"💡 MUTA {tid} veritabanında yok."
|
|
70
|
+
|
|
71
|
+
entity_match = re.search(r"entity_id\s*(?:=|IN)\s*\(?'?(\d+)", sql)
|
|
72
|
+
if entity_match:
|
|
73
|
+
tid = entity_match.group(1)
|
|
74
|
+
if conn.execute(f"SELECT COUNT(*) FROM map_identity WHERE entity_id='{tid}'").fetchone()[0] == 0:
|
|
75
|
+
return f"💡 Entity {tid} veritabanında yok."
|
|
76
|
+
|
|
77
|
+
ilike_match = re.search(r"ILIKE\s+'([^']+)'", sql, re.IGNORECASE)
|
|
78
|
+
if ilike_match:
|
|
79
|
+
pattern = ilike_match.group(1)
|
|
80
|
+
if conn.execute(f"SELECT COUNT(*) FROM map_identity WHERE unvan ILIKE '{pattern}'").fetchone()[0] == 0:
|
|
81
|
+
return f"💡 '{pattern}' ile eşleşen firma yok."
|
|
82
|
+
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
25
86
|
# ── Loglama ──
|
|
26
87
|
|
|
27
88
|
_LOG_FILE = None
|
|
@@ -125,16 +186,18 @@ def has_reference_trigger(question):
|
|
|
125
186
|
|
|
126
187
|
|
|
127
188
|
SORGU_TRIGGERS = [
|
|
128
|
-
"yanına ekle", "yanına da ekle", "yanına", "
|
|
189
|
+
"yanına ekle", "yanına da ekle", "yanına", "yanlarına",
|
|
190
|
+
"buna ekle", "buna da ekle",
|
|
129
191
|
"kolonu da ekle", "kolonunu da ekle", "bir de", "aynısına",
|
|
130
192
|
"aynı sorguya", "aynı sorgu", "üstüne ekle", "ekle yanına",
|
|
131
193
|
"göster yanında", "da göster", "da getir", "de göster", "de getir",
|
|
132
194
|
"tablodan", "tablodaki", "tabloyu", "tabloya",
|
|
195
|
+
"altına ekle", "altına", "altına da ekle",
|
|
133
196
|
"çıkart", "çıkar", "kaldır", "at şunu", "filtrele", "daralt",
|
|
134
197
|
"sadece", "hariç", "hariç tut",
|
|
135
198
|
]
|
|
136
199
|
|
|
137
|
-
MAX_SQL_CONTEXT_LENGTH =
|
|
200
|
+
MAX_SQL_CONTEXT_LENGTH = 1000
|
|
138
201
|
|
|
139
202
|
|
|
140
203
|
def has_query_trigger(question):
|
|
@@ -145,6 +208,38 @@ def has_query_trigger(question):
|
|
|
145
208
|
|
|
146
209
|
# ── System prompt ──
|
|
147
210
|
|
|
211
|
+
def _compact_schema(schema_text):
|
|
212
|
+
"""Schema'yı kompakt tek-satır formata dönüştür."""
|
|
213
|
+
lines = []
|
|
214
|
+
for line in schema_text.strip().split("\n"):
|
|
215
|
+
line = line.strip()
|
|
216
|
+
if not line:
|
|
217
|
+
continue
|
|
218
|
+
# " tablo (N satır): col1 (TYPE), col2 (TYPE)" → "tablo(col1 TYPE, col2 TYPE)"
|
|
219
|
+
if "satır" in line:
|
|
220
|
+
parts = line.split(":")
|
|
221
|
+
if len(parts) >= 2:
|
|
222
|
+
tbl = parts[0].split("(")[0].strip()
|
|
223
|
+
cols = parts[1].strip()
|
|
224
|
+
# "col (TYPE)" → "col TYPE" — sadece sorunlu tipleri koru
|
|
225
|
+
compact_cols = []
|
|
226
|
+
for c in cols.split(", "):
|
|
227
|
+
c = c.strip()
|
|
228
|
+
if "(" in c:
|
|
229
|
+
name = c.split("(")[0].strip()
|
|
230
|
+
typ = c.split("(")[1].rstrip(")")
|
|
231
|
+
if typ in ("VARCHAR", "BIGINT", "DOUBLE"):
|
|
232
|
+
compact_cols.append(f"{name} {typ}")
|
|
233
|
+
else:
|
|
234
|
+
compact_cols.append(name)
|
|
235
|
+
else:
|
|
236
|
+
compact_cols.append(c)
|
|
237
|
+
lines.append(f"{tbl}({', '.join(compact_cols)})")
|
|
238
|
+
else:
|
|
239
|
+
lines.append(line)
|
|
240
|
+
return "\n".join(lines)
|
|
241
|
+
|
|
242
|
+
|
|
148
243
|
def build_system_prompt(schema_text, domain_text="", last_result_entities=None,
|
|
149
244
|
question=None, last_sql=None, column_hints=""):
|
|
150
245
|
prompt = f"""Sen bir SQL asistanısın. Kullanıcının sorusuna uygun SQL yaz.
|
|
@@ -161,10 +256,10 @@ Kurallar:
|
|
|
161
256
|
Veritabanı şeması:
|
|
162
257
|
{schema_text}
|
|
163
258
|
"""
|
|
164
|
-
if domain_text:
|
|
165
|
-
prompt += f"\nDomain bilgisi:\n{domain_text}\n"
|
|
166
259
|
if column_hints:
|
|
167
260
|
prompt += f"\n{column_hints}\n"
|
|
261
|
+
if domain_text:
|
|
262
|
+
prompt += f"\nDomain:\n{domain_text}\n"
|
|
168
263
|
if last_sql and question and has_query_trigger(question):
|
|
169
264
|
if len(last_sql) <= MAX_SQL_CONTEXT_LENGTH:
|
|
170
265
|
prompt += f"""\nÖNCEKİ SQL'İ MODİFİYE ET:
|
|
@@ -232,6 +327,49 @@ def _like_to_ilike(sql):
|
|
|
232
327
|
)
|
|
233
328
|
|
|
234
329
|
|
|
330
|
+
VARCHAR_SCORE_COLUMNS = ["ews_skor", "yis_skor", "eus_skor"]
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def _fix_varchar_sort(sql):
|
|
334
|
+
"""ORDER BY'da VARCHAR skor kolonlarını TRY_CAST ile sar."""
|
|
335
|
+
for col in VARCHAR_SCORE_COLUMNS:
|
|
336
|
+
pattern = rf'(ORDER\s+BY\s+.*?)(\w+\.)?({re.escape(col)})\b(?!\s*AS\b)'
|
|
337
|
+
|
|
338
|
+
def _repl(m, _col=col):
|
|
339
|
+
prefix, alias, column = m.group(1), m.group(2) or "", m.group(3)
|
|
340
|
+
if "TRY_CAST" in prefix.split(",")[-1]:
|
|
341
|
+
return m.group(0)
|
|
342
|
+
return f"{prefix}TRY_CAST({alias}{column} AS DOUBLE)"
|
|
343
|
+
|
|
344
|
+
sql = re.sub(pattern, _repl, sql, flags=re.IGNORECASE)
|
|
345
|
+
|
|
346
|
+
if "TRY_CAST" in sql and "ORDER BY" in sql.upper() and "NULLS LAST" not in sql.upper():
|
|
347
|
+
sql = re.sub(r'(ORDER\s+BY\s+.+?)(\s*;?\s*$)', r'\1 NULLS LAST\2',
|
|
348
|
+
sql, flags=re.IGNORECASE)
|
|
349
|
+
return sql
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _fix_varchar_comparison(sql):
|
|
353
|
+
"""WHERE'de VARCHAR skor kolonlarının sayısal karşılaştırmasını düzelt."""
|
|
354
|
+
for col in VARCHAR_SCORE_COLUMNS:
|
|
355
|
+
pattern = rf'(?<!TRY_CAST\()(\w+\.)?({re.escape(col)})\s*([><=!]+)\s*(\d+)'
|
|
356
|
+
|
|
357
|
+
def _repl(m, _col=col):
|
|
358
|
+
alias = m.group(1) or ""
|
|
359
|
+
return f"TRY_CAST({alias}{m.group(2)} AS DOUBLE) {m.group(3)} {m.group(4)}"
|
|
360
|
+
|
|
361
|
+
sql = re.sub(pattern, _repl, sql, flags=re.IGNORECASE)
|
|
362
|
+
return sql
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _apply_guardrails(sql):
|
|
366
|
+
"""Tüm SQL post-processing guardrail'leri uygula."""
|
|
367
|
+
sql = _like_to_ilike(sql)
|
|
368
|
+
sql = _fix_varchar_sort(sql)
|
|
369
|
+
sql = _fix_varchar_comparison(sql)
|
|
370
|
+
return sql
|
|
371
|
+
|
|
372
|
+
|
|
235
373
|
def check_sql_safety(sql):
|
|
236
374
|
"""Sadece SELECT/WITH izinli. Tehlikeli keyword varsa hata döndür."""
|
|
237
375
|
sql_upper = sql.strip().upper()
|
|
@@ -246,7 +384,7 @@ def check_sql_safety(sql):
|
|
|
246
384
|
# ── LLM call ──
|
|
247
385
|
|
|
248
386
|
def ask_model(system_prompt, question):
|
|
249
|
-
"""Ollama'ya soru gönder, yanıt
|
|
387
|
+
"""Ollama'ya soru gönder, yanıt + timing döndür."""
|
|
250
388
|
t0 = time.time()
|
|
251
389
|
try:
|
|
252
390
|
resp = requests.post(
|
|
@@ -266,9 +404,17 @@ def ask_model(system_prompt, question):
|
|
|
266
404
|
content = data.get("message", {}).get("content", "")
|
|
267
405
|
elapsed = time.time() - t0
|
|
268
406
|
tokens = data.get("eval_count", 0)
|
|
269
|
-
|
|
407
|
+
# Ollama timing metrikleri (nanosecond → ms)
|
|
408
|
+
timing = {
|
|
409
|
+
"prompt_eval_ms": data.get("prompt_eval_duration", 0) / 1e6,
|
|
410
|
+
"generation_ms": data.get("eval_duration", 0) / 1e6,
|
|
411
|
+
"prompt_tokens": data.get("prompt_eval_count", 0),
|
|
412
|
+
"gen_tokens": tokens,
|
|
413
|
+
"prompt_chars": len(system_prompt),
|
|
414
|
+
}
|
|
415
|
+
return content, elapsed, tokens, timing
|
|
270
416
|
except Exception as e:
|
|
271
|
-
return f"LLM HATA: {e}", time.time() - t0, 0
|
|
417
|
+
return f"LLM HATA: {e}", time.time() - t0, 0, {}
|
|
272
418
|
|
|
273
419
|
|
|
274
420
|
def _short_error(msg):
|
|
@@ -331,21 +477,21 @@ def generate_sql(conn, question, last_result_entities=None, domain_text="",
|
|
|
331
477
|
column_hints = format_column_hints(col_map) if col_map else ""
|
|
332
478
|
system_prompt = build_system_prompt(schema_text, domain_text, last_result_entities,
|
|
333
479
|
question=question, column_hints=column_hints)
|
|
334
|
-
response, _, _ = ask_model(system_prompt, question)
|
|
480
|
+
response, _, _, _ = ask_model(system_prompt, question)
|
|
335
481
|
sql = extract_sql(response)
|
|
336
482
|
if not sql or check_sql_safety(sql):
|
|
337
483
|
return None
|
|
338
|
-
sql =
|
|
484
|
+
sql = _apply_guardrails(sql)
|
|
339
485
|
|
|
340
486
|
# Genel SQL hata retry: EXPLAIN ile ön kontrol
|
|
341
487
|
try:
|
|
342
488
|
conn.execute(f"EXPLAIN {sql}")
|
|
343
489
|
except Exception as e:
|
|
344
490
|
retry_q = _build_retry_prompt(question, sql, e, col_map)
|
|
345
|
-
resp2, _, _ = ask_model(system_prompt, retry_q)
|
|
491
|
+
resp2, _, _, _ = ask_model(system_prompt, retry_q)
|
|
346
492
|
sql2 = extract_sql(resp2)
|
|
347
493
|
if sql2 and not check_sql_safety(sql2):
|
|
348
|
-
return
|
|
494
|
+
return _apply_guardrails(sql2)
|
|
349
495
|
return None
|
|
350
496
|
|
|
351
497
|
return sql
|
|
@@ -408,11 +554,17 @@ def format_table(columns, rows, max_rows=50):
|
|
|
408
554
|
# ── Main loop ──
|
|
409
555
|
|
|
410
556
|
def _extract_entity_ids(columns, rows, max_entities=100):
|
|
411
|
-
"""Sorgu sonucundan entity_id listesini çıkar. Yoksa None döner."""
|
|
557
|
+
"""Sorgu sonucundan entity_id veya muta listesini çıkar. Yoksa None döner."""
|
|
412
558
|
col_lower = [c.lower() for c in columns]
|
|
413
|
-
|
|
559
|
+
# entity_id veya muta — ikisi de firma kimliği
|
|
560
|
+
id_col = None
|
|
561
|
+
for name in ("entity_id", "muta"):
|
|
562
|
+
if name in col_lower:
|
|
563
|
+
id_col = name
|
|
564
|
+
break
|
|
565
|
+
if id_col is None:
|
|
414
566
|
return None
|
|
415
|
-
idx = col_lower.index(
|
|
567
|
+
idx = col_lower.index(id_col)
|
|
416
568
|
ids = list(dict.fromkeys(row[idx] for row in rows if row[idx] is not None))
|
|
417
569
|
if len(ids) > max_entities:
|
|
418
570
|
return None # çok geniş, context'e ekleme
|
|
@@ -422,11 +574,17 @@ def _extract_entity_ids(columns, rows, max_entities=100):
|
|
|
422
574
|
def run_query(conn, system_prompt, question, col_map=None):
|
|
423
575
|
"""Tek soru → SQL → çalıştır → sonuç. (entity_id listesi, sql) tuple döndürür."""
|
|
424
576
|
# 1. Model'e sor
|
|
425
|
-
response, elapsed, tokens = ask_model(system_prompt, question)
|
|
577
|
+
response, elapsed, tokens, timing = ask_model(system_prompt, question)
|
|
426
578
|
_log("model", question=question, elapsed=round(elapsed, 1), tokens=tokens,
|
|
427
|
-
response=response)
|
|
579
|
+
response=response, timing=timing)
|
|
428
580
|
|
|
429
581
|
print(f"\n💭 MODEL ({elapsed:.1f}s, ~{tokens} tok)")
|
|
582
|
+
if timing.get("prompt_eval_ms"):
|
|
583
|
+
pe = timing["prompt_eval_ms"]
|
|
584
|
+
ge = timing["generation_ms"]
|
|
585
|
+
pt = timing["prompt_tokens"]
|
|
586
|
+
pc = timing["prompt_chars"]
|
|
587
|
+
print(f"⏱ Prompt: {pc} char ({pt} tok) → eval: {pe:.0f}ms | gen: {ge:.0f}ms")
|
|
430
588
|
if not response.startswith("LLM HATA"):
|
|
431
589
|
thought = re.sub(r'```sql.*?```', '', response, flags=re.DOTALL).strip()
|
|
432
590
|
if thought:
|
|
@@ -449,7 +607,7 @@ def run_query(conn, system_prompt, question, col_map=None):
|
|
|
449
607
|
return None, None
|
|
450
608
|
|
|
451
609
|
# 3b. LIKE → ILIKE guardrail
|
|
452
|
-
sql =
|
|
610
|
+
sql = _apply_guardrails(sql)
|
|
453
611
|
|
|
454
612
|
print(f"🔍 SQL: {sql}")
|
|
455
613
|
|
|
@@ -463,7 +621,11 @@ def run_query(conn, system_prompt, question, col_map=None):
|
|
|
463
621
|
|
|
464
622
|
print(f"\n📊 SONUÇ ({len(rows)} satır, {query_ms:.0f}ms)")
|
|
465
623
|
print(format_table(columns, rows))
|
|
466
|
-
if len(rows)
|
|
624
|
+
if len(rows) == 0:
|
|
625
|
+
hint = _diagnose_empty_result(sql, conn)
|
|
626
|
+
if hint:
|
|
627
|
+
print(f" {hint}")
|
|
628
|
+
elif len(rows) > 50:
|
|
467
629
|
print(f" ⚠ {len(rows)} satır döndü, ilk 50 gösteriliyor. Soruyu daraltın.")
|
|
468
630
|
|
|
469
631
|
# Entity context çıkar
|
|
@@ -478,10 +640,10 @@ def run_query(conn, system_prompt, question, col_map=None):
|
|
|
478
640
|
# Genel SQL hata retry — tek retry, her hata tipinde
|
|
479
641
|
print(f"🔄 Retry ({_short_error(e)})...")
|
|
480
642
|
retry_q = _build_retry_prompt(question, sql, e, col_map)
|
|
481
|
-
resp2, _, _ = ask_model(system_prompt, retry_q)
|
|
643
|
+
resp2, _, _, _ = ask_model(system_prompt, retry_q)
|
|
482
644
|
sql2 = extract_sql(resp2)
|
|
483
645
|
if sql2 and not check_sql_safety(sql2):
|
|
484
|
-
sql2 =
|
|
646
|
+
sql2 = _apply_guardrails(sql2)
|
|
485
647
|
print(f"🔍 Retry SQL: {sql2}")
|
|
486
648
|
try:
|
|
487
649
|
result = conn.execute(sql2)
|
|
@@ -587,6 +749,64 @@ def interactive_loop(conn, schema_text, domain_text, col_map):
|
|
|
587
749
|
print()
|
|
588
750
|
|
|
589
751
|
|
|
752
|
+
BENCH_QUESTIONS = [
|
|
753
|
+
"toplam kaç firma var",
|
|
754
|
+
"en büyük firma hangisi",
|
|
755
|
+
"kobi segmentinde kaç firma var",
|
|
756
|
+
"en riskli 5 firma",
|
|
757
|
+
"10000041 mutanın bilgileri",
|
|
758
|
+
"son dönemde riski artan firmalar",
|
|
759
|
+
"ews skoru 500den büyük firmalar",
|
|
760
|
+
"en büyük 3 kobi firması",
|
|
761
|
+
"2602 döneminde kaç firma var",
|
|
762
|
+
"kombine riski 10 milyonun üstünde olan firmalar",
|
|
763
|
+
]
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def run_benchmark(conn, schema_text, domain_text, col_map):
|
|
767
|
+
"""10 soru × 2 tur benchmark."""
|
|
768
|
+
column_hints = format_column_hints(col_map)
|
|
769
|
+
|
|
770
|
+
def run_one_pass(label):
|
|
771
|
+
results = []
|
|
772
|
+
for q in BENCH_QUESTIONS:
|
|
773
|
+
sp = build_system_prompt(schema_text, domain_text,
|
|
774
|
+
question=q, column_hints=column_hints)
|
|
775
|
+
_, elapsed, tokens, timing = ask_model(sp, q)
|
|
776
|
+
results.append({
|
|
777
|
+
"question": q, "elapsed": elapsed,
|
|
778
|
+
"tokens": tokens, **timing,
|
|
779
|
+
})
|
|
780
|
+
return results
|
|
781
|
+
|
|
782
|
+
print(f"contsql benchmark — {len(BENCH_QUESTIONS)} soru × 2 tur\n")
|
|
783
|
+
print(f"Prompt boyutu: {len(build_system_prompt(schema_text, domain_text, column_hints=column_hints))} char\n")
|
|
784
|
+
|
|
785
|
+
cold = run_one_pass("cold")
|
|
786
|
+
warm = run_one_pass("warm")
|
|
787
|
+
|
|
788
|
+
for label, results in [("TUR 1 (cold)", cold), ("TUR 2 (warm)", warm)]:
|
|
789
|
+
times = [r["elapsed"] for r in results]
|
|
790
|
+
pe = [r.get("prompt_eval_ms", 0) for r in results]
|
|
791
|
+
ge = [r.get("generation_ms", 0) for r in results]
|
|
792
|
+
times.sort()
|
|
793
|
+
print(f"{label}:")
|
|
794
|
+
print(f" Ort: {sum(times)/len(times):.1f}s | "
|
|
795
|
+
f"P50: {times[len(times)//2]:.1f}s | "
|
|
796
|
+
f"P95: {times[int(len(times)*0.95)]:.1f}s")
|
|
797
|
+
print(f" Prompt eval ort: {sum(pe)/len(pe):.0f}ms | "
|
|
798
|
+
f"Gen ort: {sum(ge)/len(ge):.0f}ms")
|
|
799
|
+
print()
|
|
800
|
+
|
|
801
|
+
# Soru bazlı detay
|
|
802
|
+
print(f"{'#':>2} | {'Soru':<45} | {'Cold':>5} | {'Warm':>5} | {'P.Eval':>7} | {'Gen':>5}")
|
|
803
|
+
print("-" * 85)
|
|
804
|
+
for i, (c, w) in enumerate(zip(cold, warm)):
|
|
805
|
+
print(f"{i+1:2d} | {c['question'][:45]:<45} | "
|
|
806
|
+
f"{c['elapsed']:5.1f} | {w['elapsed']:5.1f} | "
|
|
807
|
+
f"{w.get('prompt_eval_ms',0):7.0f} | {w.get('generation_ms',0):5.0f}")
|
|
808
|
+
|
|
809
|
+
|
|
590
810
|
def main():
|
|
591
811
|
parser = argparse.ArgumentParser(
|
|
592
812
|
description="contsql — DuckDB SQL agent",
|
|
@@ -596,6 +816,7 @@ def main():
|
|
|
596
816
|
parser.add_argument("question", nargs="?", help="Tek soru (opsiyonel)")
|
|
597
817
|
global MODEL
|
|
598
818
|
parser.add_argument("--model", default=MODEL, help="Ollama model adı (default: cont-local)")
|
|
819
|
+
parser.add_argument("--bench", action="store_true", help="Benchmark modu (10 soru × 2 tur)")
|
|
599
820
|
args = parser.parse_args()
|
|
600
821
|
MODEL = args.model
|
|
601
822
|
|
|
@@ -610,16 +831,31 @@ def main():
|
|
|
610
831
|
# Loglama başlat
|
|
611
832
|
log_path = _init_log(db_path)
|
|
612
833
|
|
|
613
|
-
#
|
|
834
|
+
# Warmup — background thread
|
|
835
|
+
warmup_t = threading.Thread(target=_warmup_model, daemon=True)
|
|
836
|
+
t0_warmup = time.time()
|
|
837
|
+
warmup_t.start()
|
|
838
|
+
|
|
839
|
+
# Schema + domain + column map (warmup parallel çalışır)
|
|
614
840
|
schema_text = read_schema(conn)
|
|
615
841
|
domain_text = read_domain_notes(str(db_path))
|
|
616
842
|
col_map = build_column_owner_map(conn)
|
|
617
843
|
|
|
844
|
+
# Warmup bitmesini bekle
|
|
845
|
+
warmup_t.join(timeout=60)
|
|
846
|
+
warmup_ms = (time.time() - t0_warmup) * 1000
|
|
847
|
+
|
|
848
|
+
# DB health check
|
|
849
|
+
db_ok = _db_health_check(conn)
|
|
850
|
+
|
|
618
851
|
print(f"DB: {db_path} | Model: {MODEL} | Log: {log_path}")
|
|
619
|
-
|
|
852
|
+
print(f"🔥 Warmup: {warmup_ms:.0f}ms")
|
|
853
|
+
_log("session_start", db=str(db_path), model=MODEL, warmup_ms=round(warmup_ms))
|
|
620
854
|
|
|
621
|
-
#
|
|
622
|
-
if args.
|
|
855
|
+
# Benchmark, tek soru veya interaktif
|
|
856
|
+
if args.bench:
|
|
857
|
+
run_benchmark(conn, schema_text, domain_text, col_map)
|
|
858
|
+
elif args.question:
|
|
623
859
|
column_hints = format_column_hints(col_map)
|
|
624
860
|
system_prompt = build_system_prompt(schema_text, domain_text,
|
|
625
861
|
question=args.question,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|