contsql 0.2.3__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: contsql
3
- Version: 0.2.3
3
+ Version: 0.2.9
4
4
  Requires-Python: >=3.10
5
5
  Requires-Dist: duckdb
6
6
  Requires-Dist: requests
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: contsql
3
- Version: 0.2.3
3
+ Version: 0.2.9
4
4
  Requires-Python: >=3.10
5
5
  Requires-Dist: duckdb
6
6
  Requires-Dist: requests
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- # v0.2.3 | 2026-04-13 | entity_id+unvan zorunlu kural + slash commands (/s /schema /trace /help)
2
+ # v0.2.9 | 2026-04-13 | JSONL session loglama her şey kaydedilir, default açık
3
3
  """contsql — Minimal DuckDB SQL agent. Soru sor, SQL üret, çalıştır, göster."""
4
4
 
5
5
  import argparse
@@ -22,6 +22,27 @@ TIMEOUT = int(os.environ.get("CONTSQL_TIMEOUT", "120"))
22
22
 
23
23
  BANNED_SQL = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "TRUNCATE", "EXEC"]
24
24
 
25
+ # ── Loglama ──
26
+
27
+ _LOG_FILE = None
28
+
29
+
30
+ def _init_log(db_path):
31
+ """Log dosyasını aç. DB'nin yanına contsql_log.jsonl yazar."""
32
+ global _LOG_FILE
33
+ log_path = Path(db_path).parent / "contsql_log.jsonl"
34
+ _LOG_FILE = open(log_path, "a", encoding="utf-8")
35
+ return log_path
36
+
37
+
38
+ def _log(event, **data):
39
+ """Tek satır JSONL log yaz."""
40
+ if _LOG_FILE is None:
41
+ return
42
+ entry = {"ts": time.strftime("%Y-%m-%d %H:%M:%S"), "event": event, **data}
43
+ _LOG_FILE.write(json.dumps(entry, ensure_ascii=False) + "\n")
44
+ _LOG_FILE.flush()
45
+
25
46
 
26
47
  # ── Schema discovery ──
27
48
 
@@ -45,6 +66,31 @@ def read_schema(conn):
45
66
  return "\n".join(lines)
46
67
 
47
68
 
69
+ def build_column_owner_map(conn):
70
+ """Her kolonun hangi tablo(lar)da olduğunu çıkar. Açılışta bir kez çalışır."""
71
+ tables = conn.execute(
72
+ "SELECT table_name FROM information_schema.tables WHERE table_schema='main'"
73
+ ).fetchall()
74
+ col_map = {}
75
+ for (tname,) in tables:
76
+ cols = conn.execute(f"""
77
+ SELECT column_name FROM information_schema.columns
78
+ WHERE table_schema='main' AND table_name='{tname}'
79
+ """).fetchall()
80
+ for (cname,) in cols:
81
+ col_map.setdefault(cname, []).append(tname)
82
+ return col_map
83
+
84
+
85
+ def format_column_hints(col_map):
86
+ """Tek tabloya ait kolonları vurgula — bunlar JOIN gerektirir."""
87
+ lines = ["KOLON SAHİPLİĞİ (JOIN gerektiren kolonlar):"]
88
+ for col, tables in sorted(col_map.items()):
89
+ if len(tables) == 1:
90
+ lines.append(f" {col} → SADECE {tables[0]}")
91
+ return "\n".join(lines)
92
+
93
+
48
94
  def read_domain_notes(db_path):
49
95
  """domain_notes.txt veya ews_domain.yaml varsa oku."""
50
96
  for name in ("domain_notes.txt", "ews_domain.yaml"):
@@ -73,10 +119,29 @@ def has_reference_trigger(question):
73
119
  return any(trigger in q_lower for trigger in REFERANS_TRIGGERS)
74
120
 
75
121
 
122
+ SORGU_TRIGGERS = [
123
+ "yanına ekle", "yanına da ekle", "yanına", "buna ekle", "buna da ekle",
124
+ "kolonu da ekle", "kolonunu da ekle", "bir de", "aynısına",
125
+ "aynı sorguya", "aynı sorgu", "üstüne ekle", "ekle yanına",
126
+ "göster yanında", "da göster", "da getir", "de göster", "de getir",
127
+ "tablodan", "tablodaki", "tabloyu", "tabloya",
128
+ "çıkart", "çıkar", "kaldır", "at şunu", "filtrele", "daralt",
129
+ "sadece", "hariç", "hariç tut",
130
+ ]
131
+
132
+ MAX_SQL_CONTEXT_LENGTH = 500
133
+
134
+
135
+ def has_query_trigger(question):
136
+ """Kullanıcı önceki sorguyu modifiye etmek mi istiyor?"""
137
+ q_lower = question.lower()
138
+ return any(trigger in q_lower for trigger in SORGU_TRIGGERS)
139
+
140
+
76
141
  # ── System prompt ──
77
142
 
78
143
  def build_system_prompt(schema_text, domain_text="", last_result_entities=None,
79
- question=None):
144
+ question=None, last_sql=None, column_hints=""):
80
145
  prompt = f"""Sen bir SQL asistanısın. Kullanıcının sorusuna uygun SQL yaz.
81
146
 
82
147
  Kurallar:
@@ -93,11 +158,39 @@ Veritabanı şeması:
93
158
  """
94
159
  if domain_text:
95
160
  prompt += f"\nDomain bilgisi:\n{domain_text}\n"
161
+ if column_hints:
162
+ prompt += f"\n{column_hints}\n"
163
+ if last_sql and question and has_query_trigger(question):
164
+ if len(last_sql) <= MAX_SQL_CONTEXT_LENGTH:
165
+ prompt += f"""\nÖNCEKİ SQL'İ MODİFİYE ET:
166
+
167
+ {last_sql}
168
+
169
+ KURALLAR:
170
+ 1. Yukarıdaki SQL'i TEMEL AL. Sıfırdan sorgu YAZMA.
171
+ 2. WHERE koşullarını AYNEN KORU (filtre, LIMIT, entity_id değerleri dahil).
172
+ 3. JOIN'leri AYNEN KORU. Yeni kolon için ek JOIN gerekiyorsa EKLE ama mevcutları ÇIKARMA.
173
+ 4. Sadece istenen değişikliği yap:
174
+ - "ekle" / "göster" → SELECT'e kolon ekle
175
+ - "çıkar" / "kaldır" → SELECT'ten kolon çıkar
176
+ - "filtrele" / "sadece" → WHERE'e koşul ekle
177
+ 5. LIMIT varsa KORU. ORDER BY varsa KORU.
178
+ 6. Kolon eklerken dönem belirsizse son dönemi kullan: f.donem = (SELECT MAX(donem) FROM fact_periodic). Tüm dönemleri getirme.
179
+ """
96
180
  if last_result_entities and question and has_reference_trigger(question):
97
- prompt += (
98
- f"\nÖNCEKİ SORGU SONUCUNDAKI FİRMALAR (entity_id): {last_result_entities}\n"
99
- "Bu entity_id listesini WHERE koşulunda kullan.\n"
100
- )
181
+ ids = last_result_entities
182
+ ids_quoted = ", ".join(str(eid) for eid in ids)
183
+ prompt += f"""\nÖNCEKİ SORGU FİRMALARI (entity_id): {ids}
184
+
185
+ Bu entity_id değerlerini SQL'de DOĞRUDAN yaz.
186
+ DOĞRU: WHERE fp.entity_id IN ({ids_quoted})
187
+ YANLIŞ: WHERE entity_id = :muta
188
+ YANLIŞ: WHERE entity_id = 'MUTA_DEGERI'
189
+ YANLIŞ: WHERE entity_id = '1234567890'
190
+
191
+ Placeholder, parametre, bind variable, örnek değer KULLANMA.
192
+ Gerçek entity_id değerlerini doğrudan SQL string'ine yaz.
193
+ """
101
194
  return prompt
102
195
 
103
196
 
@@ -117,6 +210,9 @@ def extract_sql(response_text):
117
210
 
118
211
  def _like_to_ilike(sql):
119
212
  """LIKE → ILIKE guardrail. String literal içindekilere dokunmaz."""
213
+ # ILIKE(col, pattern) fonksiyon syntax'ini col ILIKE pattern'e çevir
214
+ sql = re.sub(r'\bILIKE\s*\(\s*(\w+\.?\w*)\s*,\s*', r'\1 ILIKE ', sql,
215
+ flags=re.IGNORECASE)
120
216
  return re.sub(
121
217
  r"""(?x)
122
218
  ( # Grup 1: string literal — atla
@@ -170,24 +266,77 @@ def ask_model(system_prompt, question):
170
266
  return f"LLM HATA: {e}", time.time() - t0, 0
171
267
 
172
268
 
173
- def generate_sql(conn, question, last_result_entities=None, domain_text=""):
269
+ def _short_error(msg):
270
+ """Trace için kısa hata özeti."""
271
+ first = str(msg).split('\n')[0]
272
+ return first[:77] + "..." if len(first) > 80 else first
273
+
274
+
275
+ def _extract_column_hint(error_message, col_map):
276
+ """Hata mesajından kolon adını çıkar, doğru tabloyu bul."""
277
+ err = str(error_message)
278
+ m = re.search(r'[Cc]olumn "(\w+)" not found', err)
279
+ if not m:
280
+ m = re.search(r'does not have a column named "(\w+)"', err)
281
+ if not m:
282
+ return None
283
+ col_name = m.group(1)
284
+ if col_name not in col_map:
285
+ return f"'{col_name}' kolonu veritabanında hiç yok. Schema'yı kontrol et."
286
+ owners = col_map[col_name]
287
+ if len(owners) == 1:
288
+ return (f"'{col_name}' kolonu SADECE '{owners[0]}' tablosundadır. "
289
+ f"JOIN {owners[0]} ... yapıp {owners[0]}.{col_name} olarak eriş.")
290
+ tables_str = ", ".join(owners)
291
+ return (f"'{col_name}' kolonu şu tablolarda var: {tables_str}. "
292
+ f"Doğru tabloyu JOIN ile ekle ve tablo alias'ı kullan.")
293
+
294
+
295
+ def _build_retry_prompt(question, failed_sql, error_message, col_map=None):
296
+ """Hata sonrası modele gönderilecek retry prompt."""
297
+ hint_block = ""
298
+ if col_map:
299
+ hint = _extract_column_hint(error_message, col_map)
300
+ if hint:
301
+ hint_block = f"\n\nİPUCU: {hint}\n"
302
+
303
+ return f"""Kullanıcı sorusu: {question}
304
+
305
+ Ürettiğin SQL hata verdi:
306
+
307
+ SQL:
308
+ {failed_sql}
309
+
310
+ Hata:
311
+ {error_message}
312
+ {hint_block}
313
+ Bu hatayı düzelt ve yeni SQL yaz. Kurallar:
314
+ - Hata mesajındaki ipucunu kullan
315
+ - Doğru tabloyu JOIN ile ekle
316
+ - Her kolon referansında tablo alias'ı kullan
317
+ - Placeholder, parametre, bind variable KULLANMA — gerçek değerleri yaz
318
+ - Sadece düzeltilmiş SQL yaz, açıklama yapma
319
+ """
320
+
321
+
322
+ def generate_sql(conn, question, last_result_entities=None, domain_text="",
323
+ col_map=None):
174
324
  """Soru → SQL string. Test runner için callable. Başarısızsa None."""
175
325
  schema_text = read_schema(conn)
326
+ column_hints = format_column_hints(col_map) if col_map else ""
176
327
  system_prompt = build_system_prompt(schema_text, domain_text, last_result_entities,
177
- question=question)
328
+ question=question, column_hints=column_hints)
178
329
  response, _, _ = ask_model(system_prompt, question)
179
330
  sql = extract_sql(response)
180
331
  if not sql or check_sql_safety(sql):
181
332
  return None
182
333
  sql = _like_to_ilike(sql)
183
334
 
184
- # Ambiguous column retry: EXPLAIN ile ön kontrol
335
+ # Genel SQL hata retry: EXPLAIN ile ön kontrol
185
336
  try:
186
337
  conn.execute(f"EXPLAIN {sql}")
187
338
  except Exception as e:
188
- if "ambiguous" not in str(e).lower():
189
- return sql
190
- retry_q = f"{question}\n\nÖNCEKİ SQL HATA: {e}\nJOIN'de tablo alias kullan."
339
+ retry_q = _build_retry_prompt(question, sql, e, col_map)
191
340
  resp2, _, _ = ask_model(system_prompt, retry_q)
192
341
  sql2 = extract_sql(resp2)
193
342
  if sql2 and not check_sql_safety(sql2):
@@ -265,10 +414,12 @@ def _extract_entity_ids(columns, rows, max_entities=100):
265
414
  return ids or None
266
415
 
267
416
 
268
- def run_query(conn, system_prompt, question):
269
- """Tek soru → SQL → çalıştır → sonuç. entity_id listesi döndürür."""
417
+ def run_query(conn, system_prompt, question, col_map=None):
418
+ """Tek soru → SQL → çalıştır → sonuç. (entity_id listesi, sql) tuple döndürür."""
270
419
  # 1. Model'e sor
271
420
  response, elapsed, tokens = ask_model(system_prompt, question)
421
+ _log("model", question=question, elapsed=round(elapsed, 1), tokens=tokens,
422
+ response=response)
272
423
 
273
424
  print(f"\n💭 MODEL ({elapsed:.1f}s, ~{tokens} tok)")
274
425
  if not response.startswith("LLM HATA"):
@@ -281,14 +432,16 @@ def run_query(conn, system_prompt, question):
281
432
  if not sql:
282
433
  print(f"\n❌ Model SQL üretmedi:")
283
434
  print(f" {response[:300]}")
284
- return None
435
+ _log("no_sql", question=question)
436
+ return None, None
285
437
 
286
438
  # 3. Güvenlik kontrolü
287
439
  safety_error = check_sql_safety(sql)
288
440
  if safety_error:
289
441
  print(f"\n⛔ {safety_error}")
290
442
  print(f"🔍 SQL: {sql[:200]}")
291
- return None
443
+ _log("safety", question=question, sql=sql, error=safety_error)
444
+ return None, None
292
445
 
293
446
  # 3b. LIKE → ILIKE guardrail
294
447
  sql = _like_to_ilike(sql)
@@ -305,37 +458,47 @@ def run_query(conn, system_prompt, question):
305
458
 
306
459
  print(f"\n📊 SONUÇ ({len(rows)} satır, {query_ms:.0f}ms)")
307
460
  print(format_table(columns, rows))
461
+ if len(rows) > 50:
462
+ print(f" ⚠ {len(rows)} satır döndü, ilk 50 gösteriliyor. Soruyu daraltın.")
308
463
 
309
464
  # Entity context çıkar
310
465
  entities = _extract_entity_ids(columns, rows)
311
466
  if entities is None and len(rows) > 100:
312
- print(" ⚠ Önceki sorgu çok geniş — firma referansı için soruyu daraltın.")
313
- return entities
467
+ print(" ⚠ Firma referansı için soruyu daraltın (max 100 entity).")
468
+ _log("ok", question=question, sql=sql, row_count=len(rows),
469
+ query_ms=round(query_ms), columns=columns)
470
+ return entities, sql
314
471
  except duckdb.Error as e:
315
- # Ambiguous column retry
316
- if "ambiguous" in str(e).lower():
317
- print(f"🔄 Ambiguous column, retry...")
318
- retry_q = f"{question}\n\nSQL HATA: {e}\nJOIN'de tablo alias kullan."
319
- resp2, _, _ = ask_model(system_prompt, retry_q)
320
- sql2 = extract_sql(resp2)
321
- if sql2 and not check_sql_safety(sql2):
322
- sql2 = _like_to_ilike(sql2)
323
- print(f"🔍 Retry SQL: {sql2}")
324
- try:
325
- result = conn.execute(sql2)
326
- columns = [desc[0] for desc in result.description]
327
- rows = result.fetchall()
328
- query_ms = (time.time() - t0) * 1000
329
- print(f"\n📊 SONUÇ ({len(rows)} satır, {query_ms:.0f}ms)")
330
- print(format_table(columns, rows))
331
- entities = _extract_entity_ids(columns, rows)
332
- return entities
333
- except duckdb.Error as e2:
334
- print(f"\n❌ Retry hatası: {e2}")
335
- return None
472
+ _log("error", question=question, sql=sql, error=str(e))
473
+ # Genel SQL hata retry — tek retry, her hata tipinde
474
+ print(f"🔄 Retry ({_short_error(e)})...")
475
+ retry_q = _build_retry_prompt(question, sql, e, col_map)
476
+ resp2, _, _ = ask_model(system_prompt, retry_q)
477
+ sql2 = extract_sql(resp2)
478
+ if sql2 and not check_sql_safety(sql2):
479
+ sql2 = _like_to_ilike(sql2)
480
+ print(f"🔍 Retry SQL: {sql2}")
481
+ try:
482
+ result = conn.execute(sql2)
483
+ columns = [desc[0] for desc in result.description]
484
+ rows = result.fetchall()
485
+ query_ms = (time.time() - t0) * 1000
486
+ print(f"\n📊 SONUÇ ({len(rows)} satır, {query_ms:.0f}ms)")
487
+ print(format_table(columns, rows))
488
+ entities = _extract_entity_ids(columns, rows)
489
+ if entities is None and len(rows) > 100:
490
+ print(" ⚠ Önceki sorgu çok geniş — firma referansı için soruyu daraltın.")
491
+ _log("retry_ok", question=question, original_sql=sql,
492
+ retry_sql=sql2, row_count=len(rows), columns=columns)
493
+ return entities, sql2
494
+ except duckdb.Error as e2:
495
+ print(f"\n❌ Retry de başarısız: {_short_error(e2)}")
496
+ _log("retry_fail", question=question, original_sql=sql,
497
+ retry_sql=sql2, error=str(e2))
498
+ return None, None
336
499
  print(f"\n❌ SQL hatası: {e}")
337
500
  print(f"🔍 SQL: {sql}")
338
- return None
501
+ return None, None
339
502
 
340
503
 
341
504
  def handle_slash_command(cmd, state):
@@ -344,7 +507,9 @@ def handle_slash_command(cmd, state):
344
507
 
345
508
  if cmd == "/s":
346
509
  state["last_result_entities"] = None
510
+ state["last_sql"] = None
347
511
  print("🧹 Bellek temizlendi.")
512
+ _log("cmd", cmd="/s")
348
513
  return True
349
514
 
350
515
  if cmd == "/schema":
@@ -368,13 +533,16 @@ def handle_slash_command(cmd, state):
368
533
  return False
369
534
 
370
535
 
371
- def interactive_loop(conn, schema_text, domain_text):
536
+ def interactive_loop(conn, schema_text, domain_text, col_map):
372
537
  """REPL döngüsü."""
373
538
  print(f"\ncontsql hazır. Model: {MODEL} | /help komutlar")
374
539
  print("Çıkmak için: quit/exit/q\n")
375
540
 
541
+ column_hints = format_column_hints(col_map)
542
+
376
543
  state = {
377
544
  "last_result_entities": None,
545
+ "last_sql": None,
378
546
  "trace": False,
379
547
  "schema_text": schema_text,
380
548
  }
@@ -396,12 +564,21 @@ def interactive_loop(conn, schema_text, domain_text):
396
564
  print(f"Bilinmeyen komut: {question}. /help yazın.")
397
565
  continue
398
566
 
567
+ # SQL context uzunluk uyarısı
568
+ if state["last_sql"] and has_query_trigger(question) \
569
+ and len(state["last_sql"]) > MAX_SQL_CONTEXT_LENGTH:
570
+ print("⚠ Önceki sorgu çok uzun — lütfen soruyu tam yazın.")
571
+
399
572
  system_prompt = build_system_prompt(schema_text, domain_text,
400
573
  state["last_result_entities"],
401
- question=question)
402
- entities = run_query(conn, system_prompt, question)
574
+ question=question,
575
+ last_sql=state["last_sql"],
576
+ column_hints=column_hints)
577
+ entities, sql = run_query(conn, system_prompt, question, col_map)
403
578
  if entities is not None:
404
579
  state["last_result_entities"] = entities
580
+ if sql is not None:
581
+ state["last_sql"] = sql
405
582
  print()
406
583
 
407
584
 
@@ -425,19 +602,26 @@ def main():
425
602
 
426
603
  conn = duckdb.connect(str(db_path), read_only=True)
427
604
 
428
- # Schema + domain
605
+ # Loglama başlat
606
+ log_path = _init_log(db_path)
607
+
608
+ # Schema + domain + column map
429
609
  schema_text = read_schema(conn)
430
610
  domain_text = read_domain_notes(str(db_path))
611
+ col_map = build_column_owner_map(conn)
431
612
 
432
- print(f"DB: {db_path} | Model: {MODEL}")
613
+ print(f"DB: {db_path} | Model: {MODEL} | Log: {log_path}")
614
+ _log("session_start", db=str(db_path), model=MODEL)
433
615
 
434
616
  # Tek soru veya interaktif
435
617
  if args.question:
618
+ column_hints = format_column_hints(col_map)
436
619
  system_prompt = build_system_prompt(schema_text, domain_text,
437
- question=args.question)
438
- run_query(conn, system_prompt, args.question)
620
+ question=args.question,
621
+ column_hints=column_hints)
622
+ run_query(conn, system_prompt, args.question, col_map)
439
623
  else:
440
- interactive_loop(conn, schema_text, domain_text)
624
+ interactive_loop(conn, schema_text, domain_text, col_map)
441
625
 
442
626
  conn.close()
443
627
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "contsql"
7
- version = "0.2.3"
7
+ version = "0.2.9"
8
8
  requires-python = ">=3.10"
9
9
  dependencies = ["duckdb", "requests"]
10
10
 
File without changes
File without changes