contsql 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
contsql-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,6 @@
1
+ Metadata-Version: 2.4
2
+ Name: contsql
3
+ Version: 0.2.0
4
+ Requires-Python: >=3.10
5
+ Requires-Dist: duckdb
6
+ Requires-Dist: requests
@@ -0,0 +1,51 @@
1
+ # contsql
2
+
3
+ Minimal DuckDB SQL agent. Soru sor, SQL üret, çalıştır, göster.
4
+
5
+ ## Kullanım
6
+
7
+ ```bash
8
+ # Tek soru
9
+ python contsql.py ~/projects/ews_v511/ews_dashboard/db/ews.duckdb "en riskli 10 firma"
10
+
11
+ # İnteraktif mod
12
+ python contsql.py ~/projects/ews_v511/ews_dashboard/db/ews.duckdb
13
+
14
+ # Trace açık
15
+ python contsql.py ~/projects/ews_v511/ews_dashboard/db/ews.duckdb --trace "firma adedi"
16
+
17
+ # Farklı model
18
+ python contsql.py ~/db.duckdb --model qwen3-coder "kaç tablo var"
19
+ ```
20
+
21
+ ## Gereksinimler
22
+
23
+ - Python 3.10+
24
+ - `duckdb`, `requests`
25
+ - Ollama çalışıyor olmalı (localhost:11434)
26
+ - Model yüklü: `cont-local` (default) veya `CONTSQL_MODEL` env var
27
+
28
+ ## Env vars
29
+
30
+ - `OLLAMA_HOST` — Ollama URL (default: http://localhost:11434)
31
+ - `CONTSQL_MODEL` — Model adı (default: cont-local)
32
+ - `CONTSQL_TIMEOUT` — LLM timeout saniye (default: 120)
33
+
34
+ ## Domain bilgisi
35
+
36
+ `domain_notes.txt` dosyası DB dizininde veya contsql dizininde varsa
37
+ system prompt'a eklenir. Yoksa sadece schema ile çalışır.
38
+
39
+ ## Güvenlik
40
+
41
+ - Connection: `read_only=True`
42
+ - SQL: sadece SELECT/WITH (defense-in-depth)
43
+ - Banned: INSERT, UPDATE, DELETE, DROP, ALTER, CREATE, TRUNCATE, EXEC
44
+
45
+ ## Bilinen sınırlamalar
46
+
47
+ - Multi-turn yok — her soru bağımsız
48
+ - Chat history tutulmaz
49
+ - Sadece DuckDB (PostgreSQL/MySQL desteklenmiyor)
50
+ - Model SQL üretemezse retry mekanizması yok
51
+ - ews_skor VARCHAR — TRY_CAST gerekebilir
@@ -0,0 +1,6 @@
1
+ Metadata-Version: 2.4
2
+ Name: contsql
3
+ Version: 0.2.0
4
+ Requires-Python: >=3.10
5
+ Requires-Dist: duckdb
6
+ Requires-Dist: requests
@@ -0,0 +1,9 @@
1
+ README.md
2
+ contsql.py
3
+ pyproject.toml
4
+ contsql.egg-info/PKG-INFO
5
+ contsql.egg-info/SOURCES.txt
6
+ contsql.egg-info/dependency_links.txt
7
+ contsql.egg-info/entry_points.txt
8
+ contsql.egg-info/requires.txt
9
+ contsql.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ contsql = contsql:main
@@ -0,0 +1,2 @@
1
+ duckdb
2
+ requests
@@ -0,0 +1 @@
1
+ contsql
@@ -0,0 +1,343 @@
1
+ #!/usr/bin/env python3
2
+ # v0.2 | 2026-04-13 | case insensitive ILIKE guardrail + önceki sorgu entity context
3
+ """contsql — Minimal DuckDB SQL agent. Soru sor, SQL üret, çalıştır, göster."""
4
+
5
+ import argparse
6
+ import json
7
+ import os
8
+ import re
9
+ import sys
10
+ import time
11
+ from pathlib import Path
12
+
13
+ import duckdb
14
+ import requests
15
+
16
+
17
+ # ── Config ──
18
+
19
+ OLLAMA_URL = os.environ.get("OLLAMA_HOST", "http://localhost:11434")
20
+ MODEL = os.environ.get("CONTSQL_MODEL", "cont-local")
21
+ TIMEOUT = int(os.environ.get("CONTSQL_TIMEOUT", "120"))
22
+
23
+ BANNED_SQL = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "TRUNCATE", "EXEC"]
24
+
25
+
26
+ # ── Schema discovery ──
27
+
28
+ def read_schema(conn):
29
+ """DB'den tablo/kolon/satır bilgisi çıkar → string."""
30
+ tables = conn.execute(
31
+ "SELECT table_name FROM information_schema.tables WHERE table_schema='main'"
32
+ ).fetchall()
33
+
34
+ lines = []
35
+ for (tname,) in tables:
36
+ cols = conn.execute(f"""
37
+ SELECT column_name, data_type
38
+ FROM information_schema.columns
39
+ WHERE table_schema='main' AND table_name='{tname}'
40
+ ORDER BY ordinal_position
41
+ """).fetchall()
42
+ count = conn.execute(f"SELECT COUNT(*) FROM {tname}").fetchone()[0]
43
+ col_str = ", ".join(f"{c} ({t})" for c, t in cols)
44
+ lines.append(f" {tname} ({count} satır): {col_str}")
45
+ return "\n".join(lines)
46
+
47
+
48
+ def read_domain_notes(db_path):
49
+ """domain_notes.txt veya ews_domain.yaml varsa oku."""
50
+ for name in ("domain_notes.txt", "ews_domain.yaml"):
51
+ p = Path(db_path).parent / name
52
+ if p.exists():
53
+ return p.read_text(encoding="utf-8", errors="ignore")
54
+ # contsql dizininde de ara
55
+ p2 = Path(__file__).parent / name
56
+ if p2.exists():
57
+ return p2.read_text(encoding="utf-8", errors="ignore")
58
+ return ""
59
+
60
+
61
+ # ── System prompt ──
62
+
63
+ def build_system_prompt(schema_text, domain_text="", last_result_entities=None):
64
+ prompt = f"""Sen bir SQL asistanısın. Kullanıcının sorusuna uygun SQL yaz.
65
+
66
+ Kurallar:
67
+ - Sadece SELECT veya WITH ... SELECT yaz. INSERT/UPDATE/DELETE/DROP/ALTER/CREATE yasak.
68
+ - Cevabında SADECE SQL ver, ```sql ... ``` bloğu içinde.
69
+ - SQL öncesi veya sonrası açıklama ekleme.
70
+ - Emin değilsen "Bu soruyu mevcut tablolarla cevaplayamıyorum" de.
71
+ - Veri uydurma. Sorgu sonucu olmadan liste verme.
72
+ - String karşılaştırmalarında LIKE yerine her zaman ILIKE kullan. Türkçe karakter eşleştirmesi (İ↔i, I↔ı, Ş↔ş, Ü↔ü, Ö↔ö, Ç↔ç, Ğ↔ğ) için ILIKE şart.
73
+
74
+ Veritabanı şeması:
75
+ {schema_text}
76
+ """
77
+ if domain_text:
78
+ prompt += f"\nDomain bilgisi:\n{domain_text}\n"
79
+ if last_result_entities:
80
+ prompt += (
81
+ f"\nÖNCEKİ SORGU SONUCUNDAKI FİRMALAR (entity_id): {last_result_entities}\n"
82
+ "Kullanıcı 'bu firmalar', 'bunların', 'aynıları', 'yukarıdakiler' gibi "
83
+ "referans verirse bu entity_id listesini WHERE koşulunda kullan.\n"
84
+ )
85
+ return prompt
86
+
87
+
88
+ # ── SQL extraction + safety ──
89
+
90
+ def extract_sql(response_text):
91
+ """Model yanıtından ```sql ... ``` bloğunu çıkar."""
92
+ match = re.search(r'```sql\s*\n?(.*?)```', response_text, re.DOTALL)
93
+ if match:
94
+ return match.group(1).strip()
95
+ # Fallback: tüm metin SQL olabilir
96
+ stripped = response_text.strip()
97
+ if stripped.upper().startswith(("SELECT", "WITH")):
98
+ return stripped
99
+ return None
100
+
101
+
102
+ def _like_to_ilike(sql):
103
+ """LIKE → ILIKE guardrail. String literal içindekilere dokunmaz."""
104
+ return re.sub(
105
+ r"""(?x)
106
+ ( # Grup 1: string literal — atla
107
+ '(?:[^']|'')*' # tek tırnak içi
108
+ )
109
+ |
110
+ \b(NOT\s+)?LIKE\b # Grup 2: opsiyonel NOT, ardından LIKE keyword
111
+ """,
112
+ lambda m: m.group(0) if m.group(1) else f"{m.group(2) or ''}ILIKE",
113
+ sql,
114
+ flags=re.IGNORECASE,
115
+ )
116
+
117
+
118
+ def check_sql_safety(sql):
119
+ """Sadece SELECT/WITH izinli. Tehlikeli keyword varsa hata döndür."""
120
+ sql_upper = sql.strip().upper()
121
+ if not sql_upper.startswith("SELECT") and not sql_upper.startswith("WITH"):
122
+ return "HATA: Sadece SELECT/WITH sorguları çalıştırılabilir."
123
+ for kw in BANNED_SQL:
124
+ if kw in sql_upper:
125
+ return f"HATA: {kw} komutu yasak. Sadece okuma sorguları izinli."
126
+ return None
127
+
128
+
129
+ # ── LLM call ──
130
+
131
+ def ask_model(system_prompt, question):
132
+ """Ollama'ya soru gönder, yanıt al."""
133
+ t0 = time.time()
134
+ try:
135
+ resp = requests.post(
136
+ f"{OLLAMA_URL}/api/chat",
137
+ json={
138
+ "model": MODEL,
139
+ "messages": [
140
+ {"role": "system", "content": system_prompt},
141
+ {"role": "user", "content": question},
142
+ ],
143
+ "stream": False,
144
+ },
145
+ timeout=TIMEOUT,
146
+ )
147
+ resp.raise_for_status()
148
+ data = resp.json()
149
+ content = data.get("message", {}).get("content", "")
150
+ elapsed = time.time() - t0
151
+ tokens = data.get("eval_count", 0)
152
+ return content, elapsed, tokens
153
+ except Exception as e:
154
+ return f"LLM HATA: {e}", time.time() - t0, 0
155
+
156
+
157
+ # ── Result formatting ──
158
+
159
+ def _fmt_value(v):
160
+ """Sayısal değerleri Türkçe formatla (binlik nokta, ondalık virgül)."""
161
+ if v is None:
162
+ return "—"
163
+ if isinstance(v, float):
164
+ # Ondalık kısmı olan float
165
+ int_part, dec_part = f"{v:.2f}".split(".")
166
+ int_formatted = f"{int(int_part):,}".replace(",", ".")
167
+ # Sondaki gereksiz sıfırları kaldır ama en az 1 ondalık bırak değil,
168
+ # .00 ise tam sayı gibi göster
169
+ if dec_part == "00":
170
+ return int_formatted
171
+ return f"{int_formatted},{dec_part}"
172
+ if isinstance(v, int):
173
+ return f"{v:,}".replace(",", ".")
174
+ return str(v)
175
+
176
+
177
+ def format_table(columns, rows, max_rows=50):
178
+ """Basit tablo formatı."""
179
+ if not rows:
180
+ return " (sonuç yok)"
181
+
182
+ # Değerleri formatla
183
+ formatted_rows = [
184
+ [_fmt_value(v) for v in row] for row in rows[:max_rows]
185
+ ]
186
+
187
+ # Kolon genişlikleri
188
+ widths = [len(str(c)) for c in columns]
189
+ for row in formatted_rows:
190
+ for i, v in enumerate(row):
191
+ widths[i] = max(widths[i], len(v))
192
+ widths = [min(w, 30) for w in widths]
193
+
194
+ lines = []
195
+ # Header
196
+ header = " | ".join(str(c).ljust(widths[i]) for i, c in enumerate(columns))
197
+ lines.append(f" {header}")
198
+ lines.append(f" {'-+-'.join('-' * w for w in widths)}")
199
+ # Rows
200
+ for row in formatted_rows:
201
+ row_str = " | ".join(
202
+ v[:widths[i]].ljust(widths[i])
203
+ for i, v in enumerate(row)
204
+ )
205
+ lines.append(f" {row_str}")
206
+ if len(rows) > max_rows:
207
+ lines.append(f" ... ve {len(rows) - max_rows} satır daha")
208
+ return "\n".join(lines)
209
+
210
+
211
+ # ── Main loop ──
212
+
213
+ def _extract_entity_ids(columns, rows, max_entities=100):
214
+ """Sorgu sonucundan entity_id listesini çıkar. Yoksa None döner."""
215
+ col_lower = [c.lower() for c in columns]
216
+ if "entity_id" not in col_lower:
217
+ return None
218
+ idx = col_lower.index("entity_id")
219
+ ids = list(dict.fromkeys(row[idx] for row in rows if row[idx] is not None))
220
+ if len(ids) > max_entities:
221
+ return None # çok geniş, context'e ekleme
222
+ return ids or None
223
+
224
+
225
+ def run_query(conn, system_prompt, question):
226
+ """Tek soru → SQL → çalıştır → sonuç. entity_id listesi döndürür."""
227
+ # 1. Model'e sor
228
+ response, elapsed, tokens = ask_model(system_prompt, question)
229
+
230
+ print(f"\n💭 MODEL ({elapsed:.1f}s, ~{tokens} tok)")
231
+ if not response.startswith("LLM HATA"):
232
+ thought = re.sub(r'```sql.*?```', '', response, flags=re.DOTALL).strip()
233
+ if thought:
234
+ print(f" {thought[:200]}")
235
+
236
+ # 2. SQL çıkar
237
+ sql = extract_sql(response)
238
+ if not sql:
239
+ print(f"\n❌ Model SQL üretmedi:")
240
+ print(f" {response[:300]}")
241
+ return None
242
+
243
+ # 3. Güvenlik kontrolü
244
+ safety_error = check_sql_safety(sql)
245
+ if safety_error:
246
+ print(f"\n⛔ {safety_error}")
247
+ print(f"🔍 SQL: {sql[:200]}")
248
+ return None
249
+
250
+ # 3b. LIKE → ILIKE guardrail
251
+ sql = _like_to_ilike(sql)
252
+
253
+ print(f"🔍 SQL: {sql}")
254
+
255
+ # 4. Çalıştır
256
+ try:
257
+ t0 = time.time()
258
+ result = conn.execute(sql)
259
+ columns = [desc[0] for desc in result.description]
260
+ rows = result.fetchall()
261
+ query_ms = (time.time() - t0) * 1000
262
+
263
+ print(f"\n📊 SONUÇ ({len(rows)} satır, {query_ms:.0f}ms)")
264
+ print(format_table(columns, rows))
265
+
266
+ # Entity context çıkar
267
+ entities = _extract_entity_ids(columns, rows)
268
+ if entities is None and len(rows) > 100:
269
+ print(" ⚠ Önceki sorgu çok geniş — firma referansı için soruyu daraltın.")
270
+ return entities
271
+ except duckdb.Error as e:
272
+ print(f"\n❌ SQL hatası: {e}")
273
+ print(f"🔍 SQL: {sql}")
274
+ return None
275
+
276
+
277
+ def interactive_loop(conn, schema_text, domain_text):
278
+ """REPL döngüsü."""
279
+ print(f"\ncontsql hazır. Model: {MODEL}")
280
+ print("Çıkmak için: quit/exit/q\n")
281
+
282
+ last_result_entities = None
283
+
284
+ while True:
285
+ try:
286
+ question = input("contsql> ").strip()
287
+ except (EOFError, KeyboardInterrupt):
288
+ print("\nÇıkış.")
289
+ break
290
+
291
+ if not question:
292
+ continue
293
+ if question.lower() in ("quit", "exit", "q", "çık"):
294
+ break
295
+ if question.lower() in ("schema", "şema"):
296
+ print(f"\n{read_schema(conn)}\n")
297
+ continue
298
+
299
+ system_prompt = build_system_prompt(schema_text, domain_text, last_result_entities)
300
+ entities = run_query(conn, system_prompt, question)
301
+ if entities is not None:
302
+ last_result_entities = entities
303
+ print()
304
+
305
+
306
+ def main():
307
+ parser = argparse.ArgumentParser(
308
+ description="contsql — DuckDB SQL agent",
309
+ usage="contsql DB_PATH [SORU] [--model MODEL]",
310
+ )
311
+ parser.add_argument("db_path", help="DuckDB dosya yolu")
312
+ parser.add_argument("question", nargs="?", help="Tek soru (opsiyonel)")
313
+ global MODEL
314
+ parser.add_argument("--model", default=MODEL, help="Ollama model adı (default: cont-local)")
315
+ args = parser.parse_args()
316
+ MODEL = args.model
317
+
318
+ # DB bağlan
319
+ db_path = Path(args.db_path).expanduser()
320
+ if not db_path.exists():
321
+ print(f"HATA: DB bulunamadı: {db_path}")
322
+ sys.exit(1)
323
+
324
+ conn = duckdb.connect(str(db_path), read_only=True)
325
+
326
+ # Schema + domain
327
+ schema_text = read_schema(conn)
328
+ domain_text = read_domain_notes(str(db_path))
329
+
330
+ print(f"DB: {db_path} | Model: {MODEL}")
331
+
332
+ # Tek soru veya interaktif
333
+ if args.question:
334
+ system_prompt = build_system_prompt(schema_text, domain_text)
335
+ run_query(conn, system_prompt, args.question)
336
+ else:
337
+ interactive_loop(conn, schema_text, domain_text)
338
+
339
+ conn.close()
340
+
341
+
342
+ if __name__ == "__main__":
343
+ main()
@@ -0,0 +1,15 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "contsql"
7
+ version = "0.2.0"
8
+ requires-python = ">=3.10"
9
+ dependencies = ["duckdb", "requests"]
10
+
11
+ [project.scripts]
12
+ contsql = "contsql:main"
13
+
14
+ [tool.setuptools]
15
+ py-modules = ["contsql"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+