contsql 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contsql-0.2.0/PKG-INFO +6 -0
- contsql-0.2.0/README.md +51 -0
- contsql-0.2.0/contsql.egg-info/PKG-INFO +6 -0
- contsql-0.2.0/contsql.egg-info/SOURCES.txt +9 -0
- contsql-0.2.0/contsql.egg-info/dependency_links.txt +1 -0
- contsql-0.2.0/contsql.egg-info/entry_points.txt +2 -0
- contsql-0.2.0/contsql.egg-info/requires.txt +2 -0
- contsql-0.2.0/contsql.egg-info/top_level.txt +1 -0
- contsql-0.2.0/contsql.py +343 -0
- contsql-0.2.0/pyproject.toml +15 -0
- contsql-0.2.0/setup.cfg +4 -0
contsql-0.2.0/PKG-INFO
ADDED
contsql-0.2.0/README.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# contsql
|
|
2
|
+
|
|
3
|
+
Minimal DuckDB SQL agent. Soru sor, SQL üret, çalıştır, göster.
|
|
4
|
+
|
|
5
|
+
## Kullanım
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Tek soru
|
|
9
|
+
python contsql.py ~/projects/ews_v511/ews_dashboard/db/ews.duckdb "en riskli 10 firma"
|
|
10
|
+
|
|
11
|
+
# İnteraktif mod
|
|
12
|
+
python contsql.py ~/projects/ews_v511/ews_dashboard/db/ews.duckdb
|
|
13
|
+
|
|
14
|
+
# Trace açık
|
|
15
|
+
python contsql.py ~/projects/ews_v511/ews_dashboard/db/ews.duckdb --trace "firma adedi"
|
|
16
|
+
|
|
17
|
+
# Farklı model
|
|
18
|
+
python contsql.py ~/db.duckdb --model qwen3-coder "kaç tablo var"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Gereksinimler
|
|
22
|
+
|
|
23
|
+
- Python 3.10+
|
|
24
|
+
- `duckdb`, `requests`
|
|
25
|
+
- Ollama çalışıyor olmalı (localhost:11434)
|
|
26
|
+
- Model yüklü: `cont-local` (default) veya `CONTSQL_MODEL` env var
|
|
27
|
+
|
|
28
|
+
## Env vars
|
|
29
|
+
|
|
30
|
+
- `OLLAMA_HOST` — Ollama URL (default: http://localhost:11434)
|
|
31
|
+
- `CONTSQL_MODEL` — Model adı (default: cont-local)
|
|
32
|
+
- `CONTSQL_TIMEOUT` — LLM timeout saniye (default: 120)
|
|
33
|
+
|
|
34
|
+
## Domain bilgisi
|
|
35
|
+
|
|
36
|
+
`domain_notes.txt` dosyası DB dizininde veya contsql dizininde varsa
|
|
37
|
+
system prompt'a eklenir. Yoksa sadece schema ile çalışır.
|
|
38
|
+
|
|
39
|
+
## Güvenlik
|
|
40
|
+
|
|
41
|
+
- Connection: `read_only=True`
|
|
42
|
+
- SQL: sadece SELECT/WITH (defense-in-depth)
|
|
43
|
+
- Banned: INSERT, UPDATE, DELETE, DROP, ALTER, CREATE, TRUNCATE, EXEC
|
|
44
|
+
|
|
45
|
+
## Bilinen sınırlamalar
|
|
46
|
+
|
|
47
|
+
- Multi-turn yok — her soru bağımsız
|
|
48
|
+
- Chat history tutulmaz
|
|
49
|
+
- Sadece DuckDB (PostgreSQL/MySQL desteklenmiyor)
|
|
50
|
+
- Model SQL üretemezse retry mekanizması yok
|
|
51
|
+
- ews_skor VARCHAR — TRY_CAST gerekebilir
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
contsql
|
contsql-0.2.0/contsql.py
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# v0.2 | 2026-04-13 | case insensitive ILIKE guardrail + önceki sorgu entity context
|
|
3
|
+
"""contsql — Minimal DuckDB SQL agent. Soru sor, SQL üret, çalıştır, göster."""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import sys
|
|
10
|
+
import time
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import duckdb
|
|
14
|
+
import requests
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ── Config ──
|
|
18
|
+
|
|
19
|
+
OLLAMA_URL = os.environ.get("OLLAMA_HOST", "http://localhost:11434")
|
|
20
|
+
MODEL = os.environ.get("CONTSQL_MODEL", "cont-local")
|
|
21
|
+
TIMEOUT = int(os.environ.get("CONTSQL_TIMEOUT", "120"))
|
|
22
|
+
|
|
23
|
+
BANNED_SQL = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "TRUNCATE", "EXEC"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ── Schema discovery ──
|
|
27
|
+
|
|
28
|
+
def read_schema(conn):
|
|
29
|
+
"""DB'den tablo/kolon/satır bilgisi çıkar → string."""
|
|
30
|
+
tables = conn.execute(
|
|
31
|
+
"SELECT table_name FROM information_schema.tables WHERE table_schema='main'"
|
|
32
|
+
).fetchall()
|
|
33
|
+
|
|
34
|
+
lines = []
|
|
35
|
+
for (tname,) in tables:
|
|
36
|
+
cols = conn.execute(f"""
|
|
37
|
+
SELECT column_name, data_type
|
|
38
|
+
FROM information_schema.columns
|
|
39
|
+
WHERE table_schema='main' AND table_name='{tname}'
|
|
40
|
+
ORDER BY ordinal_position
|
|
41
|
+
""").fetchall()
|
|
42
|
+
count = conn.execute(f"SELECT COUNT(*) FROM {tname}").fetchone()[0]
|
|
43
|
+
col_str = ", ".join(f"{c} ({t})" for c, t in cols)
|
|
44
|
+
lines.append(f" {tname} ({count} satır): {col_str}")
|
|
45
|
+
return "\n".join(lines)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def read_domain_notes(db_path):
|
|
49
|
+
"""domain_notes.txt veya ews_domain.yaml varsa oku."""
|
|
50
|
+
for name in ("domain_notes.txt", "ews_domain.yaml"):
|
|
51
|
+
p = Path(db_path).parent / name
|
|
52
|
+
if p.exists():
|
|
53
|
+
return p.read_text(encoding="utf-8", errors="ignore")
|
|
54
|
+
# contsql dizininde de ara
|
|
55
|
+
p2 = Path(__file__).parent / name
|
|
56
|
+
if p2.exists():
|
|
57
|
+
return p2.read_text(encoding="utf-8", errors="ignore")
|
|
58
|
+
return ""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ── System prompt ──
|
|
62
|
+
|
|
63
|
+
def build_system_prompt(schema_text, domain_text="", last_result_entities=None):
|
|
64
|
+
prompt = f"""Sen bir SQL asistanısın. Kullanıcının sorusuna uygun SQL yaz.
|
|
65
|
+
|
|
66
|
+
Kurallar:
|
|
67
|
+
- Sadece SELECT veya WITH ... SELECT yaz. INSERT/UPDATE/DELETE/DROP/ALTER/CREATE yasak.
|
|
68
|
+
- Cevabında SADECE SQL ver, ```sql ... ``` bloğu içinde.
|
|
69
|
+
- SQL öncesi veya sonrası açıklama ekleme.
|
|
70
|
+
- Emin değilsen "Bu soruyu mevcut tablolarla cevaplayamıyorum" de.
|
|
71
|
+
- Veri uydurma. Sorgu sonucu olmadan liste verme.
|
|
72
|
+
- String karşılaştırmalarında LIKE yerine her zaman ILIKE kullan. Türkçe karakter eşleştirmesi (İ↔i, I↔ı, Ş↔ş, Ü↔ü, Ö↔ö, Ç↔ç, Ğ↔ğ) için ILIKE şart.
|
|
73
|
+
|
|
74
|
+
Veritabanı şeması:
|
|
75
|
+
{schema_text}
|
|
76
|
+
"""
|
|
77
|
+
if domain_text:
|
|
78
|
+
prompt += f"\nDomain bilgisi:\n{domain_text}\n"
|
|
79
|
+
if last_result_entities:
|
|
80
|
+
prompt += (
|
|
81
|
+
f"\nÖNCEKİ SORGU SONUCUNDAKI FİRMALAR (entity_id): {last_result_entities}\n"
|
|
82
|
+
"Kullanıcı 'bu firmalar', 'bunların', 'aynıları', 'yukarıdakiler' gibi "
|
|
83
|
+
"referans verirse bu entity_id listesini WHERE koşulunda kullan.\n"
|
|
84
|
+
)
|
|
85
|
+
return prompt
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ── SQL extraction + safety ──
|
|
89
|
+
|
|
90
|
+
def extract_sql(response_text):
|
|
91
|
+
"""Model yanıtından ```sql ... ``` bloğunu çıkar."""
|
|
92
|
+
match = re.search(r'```sql\s*\n?(.*?)```', response_text, re.DOTALL)
|
|
93
|
+
if match:
|
|
94
|
+
return match.group(1).strip()
|
|
95
|
+
# Fallback: tüm metin SQL olabilir
|
|
96
|
+
stripped = response_text.strip()
|
|
97
|
+
if stripped.upper().startswith(("SELECT", "WITH")):
|
|
98
|
+
return stripped
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _like_to_ilike(sql):
|
|
103
|
+
"""LIKE → ILIKE guardrail. String literal içindekilere dokunmaz."""
|
|
104
|
+
return re.sub(
|
|
105
|
+
r"""(?x)
|
|
106
|
+
( # Grup 1: string literal — atla
|
|
107
|
+
'(?:[^']|'')*' # tek tırnak içi
|
|
108
|
+
)
|
|
109
|
+
|
|
|
110
|
+
\b(NOT\s+)?LIKE\b # Grup 2: opsiyonel NOT, ardından LIKE keyword
|
|
111
|
+
""",
|
|
112
|
+
lambda m: m.group(0) if m.group(1) else f"{m.group(2) or ''}ILIKE",
|
|
113
|
+
sql,
|
|
114
|
+
flags=re.IGNORECASE,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def check_sql_safety(sql):
|
|
119
|
+
"""Sadece SELECT/WITH izinli. Tehlikeli keyword varsa hata döndür."""
|
|
120
|
+
sql_upper = sql.strip().upper()
|
|
121
|
+
if not sql_upper.startswith("SELECT") and not sql_upper.startswith("WITH"):
|
|
122
|
+
return "HATA: Sadece SELECT/WITH sorguları çalıştırılabilir."
|
|
123
|
+
for kw in BANNED_SQL:
|
|
124
|
+
if kw in sql_upper:
|
|
125
|
+
return f"HATA: {kw} komutu yasak. Sadece okuma sorguları izinli."
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ── LLM call ──
|
|
130
|
+
|
|
131
|
+
def ask_model(system_prompt, question):
|
|
132
|
+
"""Ollama'ya soru gönder, yanıt al."""
|
|
133
|
+
t0 = time.time()
|
|
134
|
+
try:
|
|
135
|
+
resp = requests.post(
|
|
136
|
+
f"{OLLAMA_URL}/api/chat",
|
|
137
|
+
json={
|
|
138
|
+
"model": MODEL,
|
|
139
|
+
"messages": [
|
|
140
|
+
{"role": "system", "content": system_prompt},
|
|
141
|
+
{"role": "user", "content": question},
|
|
142
|
+
],
|
|
143
|
+
"stream": False,
|
|
144
|
+
},
|
|
145
|
+
timeout=TIMEOUT,
|
|
146
|
+
)
|
|
147
|
+
resp.raise_for_status()
|
|
148
|
+
data = resp.json()
|
|
149
|
+
content = data.get("message", {}).get("content", "")
|
|
150
|
+
elapsed = time.time() - t0
|
|
151
|
+
tokens = data.get("eval_count", 0)
|
|
152
|
+
return content, elapsed, tokens
|
|
153
|
+
except Exception as e:
|
|
154
|
+
return f"LLM HATA: {e}", time.time() - t0, 0
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# ── Result formatting ──
|
|
158
|
+
|
|
159
|
+
def _fmt_value(v):
|
|
160
|
+
"""Sayısal değerleri Türkçe formatla (binlik nokta, ondalık virgül)."""
|
|
161
|
+
if v is None:
|
|
162
|
+
return "—"
|
|
163
|
+
if isinstance(v, float):
|
|
164
|
+
# Ondalık kısmı olan float
|
|
165
|
+
int_part, dec_part = f"{v:.2f}".split(".")
|
|
166
|
+
int_formatted = f"{int(int_part):,}".replace(",", ".")
|
|
167
|
+
# Sondaki gereksiz sıfırları kaldır ama en az 1 ondalık bırak değil,
|
|
168
|
+
# .00 ise tam sayı gibi göster
|
|
169
|
+
if dec_part == "00":
|
|
170
|
+
return int_formatted
|
|
171
|
+
return f"{int_formatted},{dec_part}"
|
|
172
|
+
if isinstance(v, int):
|
|
173
|
+
return f"{v:,}".replace(",", ".")
|
|
174
|
+
return str(v)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def format_table(columns, rows, max_rows=50):
|
|
178
|
+
"""Basit tablo formatı."""
|
|
179
|
+
if not rows:
|
|
180
|
+
return " (sonuç yok)"
|
|
181
|
+
|
|
182
|
+
# Değerleri formatla
|
|
183
|
+
formatted_rows = [
|
|
184
|
+
[_fmt_value(v) for v in row] for row in rows[:max_rows]
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
# Kolon genişlikleri
|
|
188
|
+
widths = [len(str(c)) for c in columns]
|
|
189
|
+
for row in formatted_rows:
|
|
190
|
+
for i, v in enumerate(row):
|
|
191
|
+
widths[i] = max(widths[i], len(v))
|
|
192
|
+
widths = [min(w, 30) for w in widths]
|
|
193
|
+
|
|
194
|
+
lines = []
|
|
195
|
+
# Header
|
|
196
|
+
header = " | ".join(str(c).ljust(widths[i]) for i, c in enumerate(columns))
|
|
197
|
+
lines.append(f" {header}")
|
|
198
|
+
lines.append(f" {'-+-'.join('-' * w for w in widths)}")
|
|
199
|
+
# Rows
|
|
200
|
+
for row in formatted_rows:
|
|
201
|
+
row_str = " | ".join(
|
|
202
|
+
v[:widths[i]].ljust(widths[i])
|
|
203
|
+
for i, v in enumerate(row)
|
|
204
|
+
)
|
|
205
|
+
lines.append(f" {row_str}")
|
|
206
|
+
if len(rows) > max_rows:
|
|
207
|
+
lines.append(f" ... ve {len(rows) - max_rows} satır daha")
|
|
208
|
+
return "\n".join(lines)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ── Main loop ──
|
|
212
|
+
|
|
213
|
+
def _extract_entity_ids(columns, rows, max_entities=100):
|
|
214
|
+
"""Sorgu sonucundan entity_id listesini çıkar. Yoksa None döner."""
|
|
215
|
+
col_lower = [c.lower() for c in columns]
|
|
216
|
+
if "entity_id" not in col_lower:
|
|
217
|
+
return None
|
|
218
|
+
idx = col_lower.index("entity_id")
|
|
219
|
+
ids = list(dict.fromkeys(row[idx] for row in rows if row[idx] is not None))
|
|
220
|
+
if len(ids) > max_entities:
|
|
221
|
+
return None # çok geniş, context'e ekleme
|
|
222
|
+
return ids or None
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def run_query(conn, system_prompt, question):
|
|
226
|
+
"""Tek soru → SQL → çalıştır → sonuç. entity_id listesi döndürür."""
|
|
227
|
+
# 1. Model'e sor
|
|
228
|
+
response, elapsed, tokens = ask_model(system_prompt, question)
|
|
229
|
+
|
|
230
|
+
print(f"\n💭 MODEL ({elapsed:.1f}s, ~{tokens} tok)")
|
|
231
|
+
if not response.startswith("LLM HATA"):
|
|
232
|
+
thought = re.sub(r'```sql.*?```', '', response, flags=re.DOTALL).strip()
|
|
233
|
+
if thought:
|
|
234
|
+
print(f" {thought[:200]}")
|
|
235
|
+
|
|
236
|
+
# 2. SQL çıkar
|
|
237
|
+
sql = extract_sql(response)
|
|
238
|
+
if not sql:
|
|
239
|
+
print(f"\n❌ Model SQL üretmedi:")
|
|
240
|
+
print(f" {response[:300]}")
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
# 3. Güvenlik kontrolü
|
|
244
|
+
safety_error = check_sql_safety(sql)
|
|
245
|
+
if safety_error:
|
|
246
|
+
print(f"\n⛔ {safety_error}")
|
|
247
|
+
print(f"🔍 SQL: {sql[:200]}")
|
|
248
|
+
return None
|
|
249
|
+
|
|
250
|
+
# 3b. LIKE → ILIKE guardrail
|
|
251
|
+
sql = _like_to_ilike(sql)
|
|
252
|
+
|
|
253
|
+
print(f"🔍 SQL: {sql}")
|
|
254
|
+
|
|
255
|
+
# 4. Çalıştır
|
|
256
|
+
try:
|
|
257
|
+
t0 = time.time()
|
|
258
|
+
result = conn.execute(sql)
|
|
259
|
+
columns = [desc[0] for desc in result.description]
|
|
260
|
+
rows = result.fetchall()
|
|
261
|
+
query_ms = (time.time() - t0) * 1000
|
|
262
|
+
|
|
263
|
+
print(f"\n📊 SONUÇ ({len(rows)} satır, {query_ms:.0f}ms)")
|
|
264
|
+
print(format_table(columns, rows))
|
|
265
|
+
|
|
266
|
+
# Entity context çıkar
|
|
267
|
+
entities = _extract_entity_ids(columns, rows)
|
|
268
|
+
if entities is None and len(rows) > 100:
|
|
269
|
+
print(" ⚠ Önceki sorgu çok geniş — firma referansı için soruyu daraltın.")
|
|
270
|
+
return entities
|
|
271
|
+
except duckdb.Error as e:
|
|
272
|
+
print(f"\n❌ SQL hatası: {e}")
|
|
273
|
+
print(f"🔍 SQL: {sql}")
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def interactive_loop(conn, schema_text, domain_text):
|
|
278
|
+
"""REPL döngüsü."""
|
|
279
|
+
print(f"\ncontsql hazır. Model: {MODEL}")
|
|
280
|
+
print("Çıkmak için: quit/exit/q\n")
|
|
281
|
+
|
|
282
|
+
last_result_entities = None
|
|
283
|
+
|
|
284
|
+
while True:
|
|
285
|
+
try:
|
|
286
|
+
question = input("contsql> ").strip()
|
|
287
|
+
except (EOFError, KeyboardInterrupt):
|
|
288
|
+
print("\nÇıkış.")
|
|
289
|
+
break
|
|
290
|
+
|
|
291
|
+
if not question:
|
|
292
|
+
continue
|
|
293
|
+
if question.lower() in ("quit", "exit", "q", "çık"):
|
|
294
|
+
break
|
|
295
|
+
if question.lower() in ("schema", "şema"):
|
|
296
|
+
print(f"\n{read_schema(conn)}\n")
|
|
297
|
+
continue
|
|
298
|
+
|
|
299
|
+
system_prompt = build_system_prompt(schema_text, domain_text, last_result_entities)
|
|
300
|
+
entities = run_query(conn, system_prompt, question)
|
|
301
|
+
if entities is not None:
|
|
302
|
+
last_result_entities = entities
|
|
303
|
+
print()
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def main():
|
|
307
|
+
parser = argparse.ArgumentParser(
|
|
308
|
+
description="contsql — DuckDB SQL agent",
|
|
309
|
+
usage="contsql DB_PATH [SORU] [--model MODEL]",
|
|
310
|
+
)
|
|
311
|
+
parser.add_argument("db_path", help="DuckDB dosya yolu")
|
|
312
|
+
parser.add_argument("question", nargs="?", help="Tek soru (opsiyonel)")
|
|
313
|
+
global MODEL
|
|
314
|
+
parser.add_argument("--model", default=MODEL, help="Ollama model adı (default: cont-local)")
|
|
315
|
+
args = parser.parse_args()
|
|
316
|
+
MODEL = args.model
|
|
317
|
+
|
|
318
|
+
# DB bağlan
|
|
319
|
+
db_path = Path(args.db_path).expanduser()
|
|
320
|
+
if not db_path.exists():
|
|
321
|
+
print(f"HATA: DB bulunamadı: {db_path}")
|
|
322
|
+
sys.exit(1)
|
|
323
|
+
|
|
324
|
+
conn = duckdb.connect(str(db_path), read_only=True)
|
|
325
|
+
|
|
326
|
+
# Schema + domain
|
|
327
|
+
schema_text = read_schema(conn)
|
|
328
|
+
domain_text = read_domain_notes(str(db_path))
|
|
329
|
+
|
|
330
|
+
print(f"DB: {db_path} | Model: {MODEL}")
|
|
331
|
+
|
|
332
|
+
# Tek soru veya interaktif
|
|
333
|
+
if args.question:
|
|
334
|
+
system_prompt = build_system_prompt(schema_text, domain_text)
|
|
335
|
+
run_query(conn, system_prompt, args.question)
|
|
336
|
+
else:
|
|
337
|
+
interactive_loop(conn, schema_text, domain_text)
|
|
338
|
+
|
|
339
|
+
conn.close()
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
if __name__ == "__main__":
|
|
343
|
+
main()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "contsql"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
requires-python = ">=3.10"
|
|
9
|
+
dependencies = ["duckdb", "requests"]
|
|
10
|
+
|
|
11
|
+
[project.scripts]
|
|
12
|
+
contsql = "contsql:main"
|
|
13
|
+
|
|
14
|
+
[tool.setuptools]
|
|
15
|
+
py-modules = ["contsql"]
|
contsql-0.2.0/setup.cfg
ADDED