csvai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
csvai/ui.py ADDED
@@ -0,0 +1,216 @@
1
+ """Streamlit UI for CSVAI — live-updating prompt controls (no form gating)."""
2
+ import asyncio
3
+ import hashlib
4
+ import logging
5
+ import threading
6
+ import time
7
+ import queue
8
+ from pathlib import Path
9
+ import tempfile
10
+
11
+ import streamlit as st
12
+
13
+ from csvai.processor import CSVAIProcessor, ProcessorConfig
14
+ from csvai.io_utils import default_output_file
15
+ from csvai.settings import Settings
16
+
17
+ settings = Settings()
18
+
19
+ # -----------------------------------------------------------------------------
20
+ # Page setup & persistent state
21
+ # -----------------------------------------------------------------------------
22
+ st.set_page_config(page_title="CSVAI", layout="centered")
23
+ st.title("CSVAI")
24
+
25
+ # Persist across reruns
26
+ st.session_state.setdefault("processor", None)
27
+ st.session_state.setdefault("thread", None)
28
+ st.session_state.setdefault("log_queue", queue.Queue())
29
+ st.session_state.setdefault("log_handler_attached", False)
30
+ st.session_state.setdefault("raw_logs", [])
31
+ st.session_state.setdefault("working_dir", None)
32
+ st.session_state.setdefault("output_path", None)
33
+
34
+ # -----------------------------------------------------------------------------
35
+ # Logging: worker -> queue (no Streamlit calls in worker thread)
36
+ # -----------------------------------------------------------------------------
37
+ class QueueLogHandler(logging.Handler):
38
+ def __init__(self, q: "queue.Queue[str]"):
39
+ super().__init__()
40
+ self.q = q
41
+ def emit(self, record: logging.LogRecord) -> None:
42
+ try:
43
+ self.q.put_nowait(self.format(record))
44
+ except Exception:
45
+ pass # never raise from logging
46
+
47
+ def drain_logs():
48
+ """Drain any queued logs into session_state.raw_logs."""
49
+ q = st.session_state.log_queue
50
+ while True:
51
+ try:
52
+ line = q.get_nowait()
53
+ except queue.Empty:
54
+ break
55
+ st.session_state.raw_logs.append(line)
56
+
57
+ # -----------------------------------------------------------------------------
58
+ # Stable working dir per input (so resume works)
59
+ # -----------------------------------------------------------------------------
60
+ def stable_working_dir(upload_name: str, upload_bytes: bytes) -> Path:
61
+ h = hashlib.md5(upload_bytes).hexdigest()[:12]
62
+ base = Path(tempfile.gettempdir()) / "csvai" / f"{upload_name}-{h}"
63
+ base.mkdir(parents=True, exist_ok=True)
64
+ return base
65
+
66
+ # -----------------------------------------------------------------------------
67
+ # Inputs (live-updating; NOT inside a form)
68
+ # -----------------------------------------------------------------------------
69
+ uploaded = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"], key="uploaded")
70
+
71
+ prompt_src = st.radio("Prompt source", ["Upload file", "Paste text"], horizontal=True, key="prompt_src")
72
+ if prompt_src == "Upload file":
73
+ prompt_file = st.file_uploader("Prompt (.txt)", type=["txt"], key="prompt_file")
74
+ prompt_text = None
75
+ else:
76
+ prompt_file = None
77
+ prompt_text = st.text_area("Prompt text", height=160, key="prompt_text")
78
+
79
+ schema_file = st.file_uploader("Schema (optional, .json)", type=["json"], key="schema_file")
80
+ model = st.text_input("Model", value=settings.default_model, key="model")
81
+ limit = st.number_input("Row limit (0 = all new)", min_value=0, value=0, step=1, key="limit")
82
+
83
+ c1, c2 = st.columns(2)
84
+ with c1:
85
+ run_clicked = st.button("▶ Run", use_container_width=True, key="run_btn")
86
+ with c2:
87
+ reset_clicked = st.button("Reset working folder", use_container_width=True, key="reset_btn")
88
+
89
+ # Reset working folder (simple: just clear state & forget output file)
90
+ if reset_clicked:
91
+ try:
92
+ if st.session_state.output_path:
93
+ op = Path(st.session_state.output_path)
94
+ if op.exists():
95
+ op.unlink()
96
+ except Exception:
97
+ pass
98
+ st.session_state.working_dir = None
99
+ st.session_state.output_path = None
100
+ st.session_state.raw_logs = []
101
+ st.session_state.processor = None
102
+ st.session_state.thread = None
103
+ st.success("Working folder & UI state reset.")
104
+
105
+ # -----------------------------------------------------------------------------
106
+ # Start / resume run
107
+ # -----------------------------------------------------------------------------
108
+ if run_clicked:
109
+ if not uploaded:
110
+ st.error("Please upload an input file.")
111
+ elif not (prompt_file or (prompt_text and prompt_text.strip())):
112
+ st.error("Please provide a prompt file or text.")
113
+ elif st.session_state.thread and st.session_state.thread.is_alive():
114
+ st.error("A run is already in progress.")
115
+ else:
116
+ # Stable folder for this input => resume will find the same _enriched file
117
+ workdir = Path(st.session_state.working_dir) if st.session_state.working_dir else stable_working_dir(
118
+ uploaded.name, uploaded.getvalue()
119
+ )
120
+ st.session_state.working_dir = str(workdir)
121
+
122
+ # Save inputs into working dir
123
+ input_path = workdir / uploaded.name
124
+ input_path.write_bytes(uploaded.getvalue())
125
+
126
+ if prompt_file:
127
+ prompt_path = workdir / prompt_file.name
128
+ prompt_path.write_bytes(prompt_file.getvalue())
129
+ else:
130
+ prompt_path = workdir / "prompt.txt"
131
+ prompt_path.write_text(prompt_text or "", encoding="utf-8")
132
+
133
+ schema_path = None
134
+ if schema_file:
135
+ schema_path = workdir / schema_file.name
136
+ schema_path.write_bytes(schema_file.getvalue())
137
+
138
+ # Decide output path inside same folder so resume works
139
+ default_out = default_output_file(input_path, None).name
140
+ output_path = workdir / default_out
141
+ st.session_state.output_path = str(output_path)
142
+
143
+ # Prepare processor config
144
+ cfg = ProcessorConfig(
145
+ input=str(input_path),
146
+ prompt=str(prompt_path),
147
+ output=str(output_path),
148
+ schema=str(schema_path) if schema_path else None,
149
+ limit=int(limit) if limit > 0 else None,
150
+ model=model,
151
+ )
152
+ processor = CSVAIProcessor(cfg, settings=settings)
153
+
154
+ # Attach logging handler once
155
+ if not st.session_state.log_handler_attached:
156
+ handler = QueueLogHandler(st.session_state.log_queue)
157
+ handler.setLevel(logging.INFO)
158
+ handler.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(message)s"))
159
+ root_logger = logging.getLogger()
160
+ # Remove any prior queue handlers if hot-reloaded
161
+ for h in list(root_logger.handlers):
162
+ if isinstance(h, QueueLogHandler):
163
+ root_logger.removeHandler(h)
164
+ root_logger.addHandler(handler)
165
+ root_logger.setLevel(logging.INFO)
166
+ st.session_state.log_handler_attached = True
167
+
168
+ # Clear old logs for this view
169
+ st.session_state.raw_logs = []
170
+
171
+ # Launch worker
172
+ thread = threading.Thread(target=lambda: asyncio.run(processor.run()), daemon=True)
173
+ thread.start()
174
+ st.session_state.processor = processor
175
+ st.session_state.thread = thread
176
+
177
+ # tiny delay so first logs land
178
+ time.sleep(0.1)
179
+ st.rerun()
180
+
181
+ # -----------------------------------------------------------------------------
182
+ # Live view: raw logs + controls + download
183
+ # -----------------------------------------------------------------------------
184
+ drain_logs()
185
+ processor = st.session_state.processor
186
+ thread = st.session_state.thread
187
+
188
+ # Controls during run
189
+ if thread and thread.is_alive() and processor:
190
+ c1, c2, c3 = st.columns(3)
191
+ paused = not processor.pause_event.is_set()
192
+ with c1:
193
+ st.button("Pause", on_click=processor.pause, disabled=paused, use_container_width=True)
194
+ with c2:
195
+ st.button("Resume", on_click=processor.resume, disabled=not paused, use_container_width=True)
196
+ with c3:
197
+ st.button("Stop", on_click=processor.stop, use_container_width=True)
198
+
199
+ # Raw logs (read-only)
200
+ st.subheader("Logs")
201
+ if st.session_state.raw_logs:
202
+ st.code("\n".join(st.session_state.raw_logs), language=None)
203
+ else:
204
+ st.info("No logs yet.")
205
+
206
+ # Download (if file exists)
207
+ if st.session_state.output_path and Path(st.session_state.output_path).exists():
208
+ outp = Path(st.session_state.output_path)
209
+ with open(outp, "rb") as f:
210
+ st.download_button("Download enriched file", f, file_name=outp.name, use_container_width=True)
211
+
212
+ # Auto-refresh while running to stream logs
213
+ if thread and thread.is_alive():
214
+ time.sleep(1)
215
+ drain_logs()
216
+ st.rerun()