csvai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csvai/__init__.py +8 -0
- csvai/__main__.py +7 -0
- csvai/cli.py +57 -0
- csvai/io_utils.py +125 -0
- csvai/launch.py +13 -0
- csvai/processor.py +420 -0
- csvai/settings.py +42 -0
- csvai/ui.py +216 -0
- csvai-0.1.0.dist-info/METADATA +751 -0
- csvai-0.1.0.dist-info/RECORD +14 -0
- csvai-0.1.0.dist-info/WHEEL +5 -0
- csvai-0.1.0.dist-info/entry_points.txt +3 -0
- csvai-0.1.0.dist-info/licenses/LICENSE +339 -0
- csvai-0.1.0.dist-info/top_level.txt +1 -0
csvai/ui.py
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
"""Streamlit UI for CSVAI — live-updating prompt controls (no form gating)."""
|
2
|
+
import asyncio
|
3
|
+
import hashlib
|
4
|
+
import logging
|
5
|
+
import threading
|
6
|
+
import time
|
7
|
+
import queue
|
8
|
+
from pathlib import Path
|
9
|
+
import tempfile
|
10
|
+
|
11
|
+
import streamlit as st
|
12
|
+
|
13
|
+
from csvai.processor import CSVAIProcessor, ProcessorConfig
|
14
|
+
from csvai.io_utils import default_output_file
|
15
|
+
from csvai.settings import Settings
|
16
|
+
|
17
|
+
settings = Settings()
|
18
|
+
|
19
|
+
# -----------------------------------------------------------------------------
|
20
|
+
# Page setup & persistent state
|
21
|
+
# -----------------------------------------------------------------------------
|
22
|
+
st.set_page_config(page_title="CSVAI", layout="centered")
|
23
|
+
st.title("CSVAI")
|
24
|
+
|
25
|
+
# Persist across reruns
|
26
|
+
st.session_state.setdefault("processor", None)
|
27
|
+
st.session_state.setdefault("thread", None)
|
28
|
+
st.session_state.setdefault("log_queue", queue.Queue())
|
29
|
+
st.session_state.setdefault("log_handler_attached", False)
|
30
|
+
st.session_state.setdefault("raw_logs", [])
|
31
|
+
st.session_state.setdefault("working_dir", None)
|
32
|
+
st.session_state.setdefault("output_path", None)
|
33
|
+
|
34
|
+
# -----------------------------------------------------------------------------
|
35
|
+
# Logging: worker -> queue (no Streamlit calls in worker thread)
|
36
|
+
# -----------------------------------------------------------------------------
|
37
|
+
class QueueLogHandler(logging.Handler):
|
38
|
+
def __init__(self, q: "queue.Queue[str]"):
|
39
|
+
super().__init__()
|
40
|
+
self.q = q
|
41
|
+
def emit(self, record: logging.LogRecord) -> None:
|
42
|
+
try:
|
43
|
+
self.q.put_nowait(self.format(record))
|
44
|
+
except Exception:
|
45
|
+
pass # never raise from logging
|
46
|
+
|
47
|
+
def drain_logs():
|
48
|
+
"""Drain any queued logs into session_state.raw_logs."""
|
49
|
+
q = st.session_state.log_queue
|
50
|
+
while True:
|
51
|
+
try:
|
52
|
+
line = q.get_nowait()
|
53
|
+
except queue.Empty:
|
54
|
+
break
|
55
|
+
st.session_state.raw_logs.append(line)
|
56
|
+
|
57
|
+
# -----------------------------------------------------------------------------
|
58
|
+
# Stable working dir per input (so resume works)
|
59
|
+
# -----------------------------------------------------------------------------
|
60
|
+
def stable_working_dir(upload_name: str, upload_bytes: bytes) -> Path:
|
61
|
+
h = hashlib.md5(upload_bytes).hexdigest()[:12]
|
62
|
+
base = Path(tempfile.gettempdir()) / "csvai" / f"{upload_name}-{h}"
|
63
|
+
base.mkdir(parents=True, exist_ok=True)
|
64
|
+
return base
|
65
|
+
|
66
|
+
# -----------------------------------------------------------------------------
|
67
|
+
# Inputs (live-updating; NOT inside a form)
|
68
|
+
# -----------------------------------------------------------------------------
|
69
|
+
uploaded = st.file_uploader("Upload CSV or Excel", type=["csv", "xlsx", "xls"], key="uploaded")
|
70
|
+
|
71
|
+
prompt_src = st.radio("Prompt source", ["Upload file", "Paste text"], horizontal=True, key="prompt_src")
|
72
|
+
if prompt_src == "Upload file":
|
73
|
+
prompt_file = st.file_uploader("Prompt (.txt)", type=["txt"], key="prompt_file")
|
74
|
+
prompt_text = None
|
75
|
+
else:
|
76
|
+
prompt_file = None
|
77
|
+
prompt_text = st.text_area("Prompt text", height=160, key="prompt_text")
|
78
|
+
|
79
|
+
schema_file = st.file_uploader("Schema (optional, .json)", type=["json"], key="schema_file")
|
80
|
+
model = st.text_input("Model", value=settings.default_model, key="model")
|
81
|
+
limit = st.number_input("Row limit (0 = all new)", min_value=0, value=0, step=1, key="limit")
|
82
|
+
|
83
|
+
c1, c2 = st.columns(2)
|
84
|
+
with c1:
|
85
|
+
run_clicked = st.button("▶ Run", use_container_width=True, key="run_btn")
|
86
|
+
with c2:
|
87
|
+
reset_clicked = st.button("Reset working folder", use_container_width=True, key="reset_btn")
|
88
|
+
|
89
|
+
# Reset working folder (simple: just clear state & forget output file)
|
90
|
+
if reset_clicked:
|
91
|
+
try:
|
92
|
+
if st.session_state.output_path:
|
93
|
+
op = Path(st.session_state.output_path)
|
94
|
+
if op.exists():
|
95
|
+
op.unlink()
|
96
|
+
except Exception:
|
97
|
+
pass
|
98
|
+
st.session_state.working_dir = None
|
99
|
+
st.session_state.output_path = None
|
100
|
+
st.session_state.raw_logs = []
|
101
|
+
st.session_state.processor = None
|
102
|
+
st.session_state.thread = None
|
103
|
+
st.success("Working folder & UI state reset.")
|
104
|
+
|
105
|
+
# -----------------------------------------------------------------------------
|
106
|
+
# Start / resume run
|
107
|
+
# -----------------------------------------------------------------------------
|
108
|
+
if run_clicked:
|
109
|
+
if not uploaded:
|
110
|
+
st.error("Please upload an input file.")
|
111
|
+
elif not (prompt_file or (prompt_text and prompt_text.strip())):
|
112
|
+
st.error("Please provide a prompt file or text.")
|
113
|
+
elif st.session_state.thread and st.session_state.thread.is_alive():
|
114
|
+
st.error("A run is already in progress.")
|
115
|
+
else:
|
116
|
+
# Stable folder for this input => resume will find the same _enriched file
|
117
|
+
workdir = Path(st.session_state.working_dir) if st.session_state.working_dir else stable_working_dir(
|
118
|
+
uploaded.name, uploaded.getvalue()
|
119
|
+
)
|
120
|
+
st.session_state.working_dir = str(workdir)
|
121
|
+
|
122
|
+
# Save inputs into working dir
|
123
|
+
input_path = workdir / uploaded.name
|
124
|
+
input_path.write_bytes(uploaded.getvalue())
|
125
|
+
|
126
|
+
if prompt_file:
|
127
|
+
prompt_path = workdir / prompt_file.name
|
128
|
+
prompt_path.write_bytes(prompt_file.getvalue())
|
129
|
+
else:
|
130
|
+
prompt_path = workdir / "prompt.txt"
|
131
|
+
prompt_path.write_text(prompt_text or "", encoding="utf-8")
|
132
|
+
|
133
|
+
schema_path = None
|
134
|
+
if schema_file:
|
135
|
+
schema_path = workdir / schema_file.name
|
136
|
+
schema_path.write_bytes(schema_file.getvalue())
|
137
|
+
|
138
|
+
# Decide output path inside same folder so resume works
|
139
|
+
default_out = default_output_file(input_path, None).name
|
140
|
+
output_path = workdir / default_out
|
141
|
+
st.session_state.output_path = str(output_path)
|
142
|
+
|
143
|
+
# Prepare processor config
|
144
|
+
cfg = ProcessorConfig(
|
145
|
+
input=str(input_path),
|
146
|
+
prompt=str(prompt_path),
|
147
|
+
output=str(output_path),
|
148
|
+
schema=str(schema_path) if schema_path else None,
|
149
|
+
limit=int(limit) if limit > 0 else None,
|
150
|
+
model=model,
|
151
|
+
)
|
152
|
+
processor = CSVAIProcessor(cfg, settings=settings)
|
153
|
+
|
154
|
+
# Attach logging handler once
|
155
|
+
if not st.session_state.log_handler_attached:
|
156
|
+
handler = QueueLogHandler(st.session_state.log_queue)
|
157
|
+
handler.setLevel(logging.INFO)
|
158
|
+
handler.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(message)s"))
|
159
|
+
root_logger = logging.getLogger()
|
160
|
+
# Remove any prior queue handlers if hot-reloaded
|
161
|
+
for h in list(root_logger.handlers):
|
162
|
+
if isinstance(h, QueueLogHandler):
|
163
|
+
root_logger.removeHandler(h)
|
164
|
+
root_logger.addHandler(handler)
|
165
|
+
root_logger.setLevel(logging.INFO)
|
166
|
+
st.session_state.log_handler_attached = True
|
167
|
+
|
168
|
+
# Clear old logs for this view
|
169
|
+
st.session_state.raw_logs = []
|
170
|
+
|
171
|
+
# Launch worker
|
172
|
+
thread = threading.Thread(target=lambda: asyncio.run(processor.run()), daemon=True)
|
173
|
+
thread.start()
|
174
|
+
st.session_state.processor = processor
|
175
|
+
st.session_state.thread = thread
|
176
|
+
|
177
|
+
# tiny delay so first logs land
|
178
|
+
time.sleep(0.1)
|
179
|
+
st.rerun()
|
180
|
+
|
181
|
+
# -----------------------------------------------------------------------------
|
182
|
+
# Live view: raw logs + controls + download
|
183
|
+
# -----------------------------------------------------------------------------
|
184
|
+
drain_logs()
|
185
|
+
processor = st.session_state.processor
|
186
|
+
thread = st.session_state.thread
|
187
|
+
|
188
|
+
# Controls during run
|
189
|
+
if thread and thread.is_alive() and processor:
|
190
|
+
c1, c2, c3 = st.columns(3)
|
191
|
+
paused = not processor.pause_event.is_set()
|
192
|
+
with c1:
|
193
|
+
st.button("Pause", on_click=processor.pause, disabled=paused, use_container_width=True)
|
194
|
+
with c2:
|
195
|
+
st.button("Resume", on_click=processor.resume, disabled=not paused, use_container_width=True)
|
196
|
+
with c3:
|
197
|
+
st.button("Stop", on_click=processor.stop, use_container_width=True)
|
198
|
+
|
199
|
+
# Raw logs (read-only)
|
200
|
+
st.subheader("Logs")
|
201
|
+
if st.session_state.raw_logs:
|
202
|
+
st.code("\n".join(st.session_state.raw_logs), language=None)
|
203
|
+
else:
|
204
|
+
st.info("No logs yet.")
|
205
|
+
|
206
|
+
# Download (if file exists)
|
207
|
+
if st.session_state.output_path and Path(st.session_state.output_path).exists():
|
208
|
+
outp = Path(st.session_state.output_path)
|
209
|
+
with open(outp, "rb") as f:
|
210
|
+
st.download_button("Download enriched file", f, file_name=outp.name, use_container_width=True)
|
211
|
+
|
212
|
+
# Auto-refresh while running to stream logs
|
213
|
+
if thread and thread.is_alive():
|
214
|
+
time.sleep(1)
|
215
|
+
drain_logs()
|
216
|
+
st.rerun()
|