logtap 0.2.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logtap/__init__.py +1 -1
- logtap/api/app.py +69 -3
- logtap/api/routes/health.py +26 -4
- logtap/api/routes/logs.py +2 -2
- logtap/api/routes/runs.py +351 -0
- logtap/cli/commands/collect.py +107 -0
- logtap/cli/commands/ingest.py +123 -0
- logtap/cli/commands/runs.py +116 -0
- logtap/cli/commands/tail.py +212 -23
- logtap/cli/main.py +11 -5
- logtap/core/parsers/base.py +3 -1
- logtap/core/parsers/json_parser.py +11 -0
- logtap/core/reader.py +3 -5
- logtap/core/runs.py +393 -0
- logtap/core/search.py +15 -11
- logtap/models/responses.py +54 -1
- {logtap-0.2.2.dist-info → logtap-0.4.0.dist-info}/METADATA +23 -21
- {logtap-0.2.2.dist-info → logtap-0.4.0.dist-info}/RECORD +22 -17
- {logtap-0.2.2.dist-info → logtap-0.4.0.dist-info}/WHEEL +1 -1
- logtap-0.4.0.dist-info/entry_points.txt +2 -0
- logtap-0.2.2.dist-info/entry_points.txt +0 -3
- {logtap-0.2.2.dist-info → logtap-0.4.0.dist-info}/licenses/LICENSE +0 -0
logtap/__init__.py
CHANGED
logtap/api/app.py
CHANGED
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
"""FastAPI application factory for logtap."""
|
|
2
2
|
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
3
7
|
from fastapi import FastAPI
|
|
4
8
|
from fastapi.middleware.cors import CORSMiddleware
|
|
5
9
|
|
|
6
10
|
from logtap import __version__
|
|
7
|
-
from logtap.api.routes import files, health, logs, parsed
|
|
11
|
+
from logtap.api.routes import files, health, logs, parsed, runs
|
|
12
|
+
from logtap.core.runs import RunStore
|
|
8
13
|
|
|
9
14
|
|
|
10
15
|
def create_app() -> FastAPI:
|
|
11
16
|
"""
|
|
12
|
-
Create and configure the FastAPI application.
|
|
17
|
+
Create and configure the FastAPI application for serve mode.
|
|
18
|
+
|
|
19
|
+
Serves static log files from a directory (legacy mode).
|
|
13
20
|
|
|
14
21
|
Returns:
|
|
15
22
|
Configured FastAPI application instance.
|
|
@@ -23,6 +30,10 @@ def create_app() -> FastAPI:
|
|
|
23
30
|
openapi_url="/openapi.json",
|
|
24
31
|
)
|
|
25
32
|
|
|
33
|
+
# Store mode info
|
|
34
|
+
app.state.mode = "serve"
|
|
35
|
+
app.state.features = ["files"]
|
|
36
|
+
|
|
26
37
|
# Configure CORS
|
|
27
38
|
app.add_middleware(
|
|
28
39
|
CORSMiddleware,
|
|
@@ -41,5 +52,60 @@ def create_app() -> FastAPI:
|
|
|
41
52
|
return app
|
|
42
53
|
|
|
43
54
|
|
|
44
|
-
|
|
55
|
+
def create_collector_app() -> FastAPI:
|
|
56
|
+
"""
|
|
57
|
+
Create and configure the FastAPI application for collector mode.
|
|
58
|
+
|
|
59
|
+
Accepts ingested log streams and serves them for tailing.
|
|
60
|
+
This is the recommended mode for ML training logs.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Configured FastAPI application instance.
|
|
64
|
+
"""
|
|
65
|
+
app = FastAPI(
|
|
66
|
+
title="logtap",
|
|
67
|
+
description="tail -f for GPU clouds. Survives disconnects, aggregates multi-node.",
|
|
68
|
+
version=__version__,
|
|
69
|
+
docs_url="/docs",
|
|
70
|
+
redoc_url="/redoc",
|
|
71
|
+
openapi_url="/openapi.json",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Store mode info and start time
|
|
75
|
+
app.state.mode = "collect"
|
|
76
|
+
app.state.features = ["runs"]
|
|
77
|
+
app.state.start_time = time.time()
|
|
78
|
+
|
|
79
|
+
# Configure CORS
|
|
80
|
+
app.add_middleware(
|
|
81
|
+
CORSMiddleware,
|
|
82
|
+
allow_origins=["*"],
|
|
83
|
+
allow_credentials=True,
|
|
84
|
+
allow_methods=["*"],
|
|
85
|
+
allow_headers=["*"],
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Initialize run store from environment
|
|
89
|
+
data_dir = Path(os.environ.get("LOGTAP_DATA_DIR", "~/.logtap/runs")).expanduser()
|
|
90
|
+
buffer_lines = int(os.environ.get("LOGTAP_BUFFER_LINES", "100000"))
|
|
91
|
+
max_disk_mb = int(os.environ.get("LOGTAP_MAX_DISK_MB", "1000"))
|
|
92
|
+
retention_hours = int(os.environ.get("LOGTAP_RETENTION_HOURS", "72"))
|
|
93
|
+
|
|
94
|
+
run_store = RunStore(
|
|
95
|
+
data_dir=data_dir,
|
|
96
|
+
buffer_lines=buffer_lines,
|
|
97
|
+
max_disk_mb=max_disk_mb,
|
|
98
|
+
retention_hours=retention_hours,
|
|
99
|
+
)
|
|
100
|
+
runs.set_run_store(run_store)
|
|
101
|
+
app.state.run_store = run_store
|
|
102
|
+
|
|
103
|
+
# Include routers
|
|
104
|
+
app.include_router(health.router, tags=["health"])
|
|
105
|
+
app.include_router(runs.router, prefix="/runs", tags=["runs"])
|
|
106
|
+
|
|
107
|
+
return app
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# Create default app instance for uvicorn (serve mode)
|
|
45
111
|
app = create_app()
|
logtap/api/routes/health.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Health check endpoint for logtap."""
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, Request
|
|
4
6
|
|
|
5
7
|
from logtap import __version__
|
|
6
8
|
from logtap.models.responses import HealthResponse
|
|
@@ -9,11 +11,31 @@ router = APIRouter()
|
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
@router.get("/health", response_model=HealthResponse)
|
|
12
|
-
async def health_check() -> HealthResponse:
|
|
14
|
+
async def health_check(request: Request) -> HealthResponse:
|
|
13
15
|
"""
|
|
14
16
|
Check the health of the logtap service.
|
|
15
17
|
|
|
16
18
|
Returns:
|
|
17
|
-
Health status and
|
|
19
|
+
Health status, version, mode, and capability information.
|
|
18
20
|
"""
|
|
19
|
-
|
|
21
|
+
mode = getattr(request.app.state, "mode", "serve")
|
|
22
|
+
features = getattr(request.app.state, "features", ["files"])
|
|
23
|
+
|
|
24
|
+
# Get run count if in collect mode
|
|
25
|
+
runs_count = None
|
|
26
|
+
if hasattr(request.app.state, "run_store"):
|
|
27
|
+
runs_count = len(request.app.state.run_store.list_runs())
|
|
28
|
+
|
|
29
|
+
# Get uptime if start_time is set
|
|
30
|
+
uptime = None
|
|
31
|
+
if hasattr(request.app.state, "start_time"):
|
|
32
|
+
uptime = int(time.time() - request.app.state.start_time)
|
|
33
|
+
|
|
34
|
+
return HealthResponse(
|
|
35
|
+
status="healthy",
|
|
36
|
+
version=__version__,
|
|
37
|
+
mode=mode,
|
|
38
|
+
features=features,
|
|
39
|
+
runs=runs_count,
|
|
40
|
+
uptime_seconds=uptime,
|
|
41
|
+
)
|
logtap/api/routes/logs.py
CHANGED
|
@@ -180,7 +180,7 @@ async def stream_logs(
|
|
|
180
180
|
return
|
|
181
181
|
|
|
182
182
|
try:
|
|
183
|
-
async with aiofiles.open(filepath, mode="r", encoding="utf-8") as f:
|
|
183
|
+
async with aiofiles.open(filepath, mode="r", encoding="utf-8", errors="replace") as f:
|
|
184
184
|
# Seek to end of file
|
|
185
185
|
await f.seek(0, 2)
|
|
186
186
|
|
|
@@ -225,7 +225,7 @@ async def stream_logs_sse(
|
|
|
225
225
|
filepath = get_filepath(filename, settings)
|
|
226
226
|
|
|
227
227
|
async def event_generator():
|
|
228
|
-
async with aiofiles.open(filepath, mode="r", encoding="utf-8") as f:
|
|
228
|
+
async with aiofiles.open(filepath, mode="r", encoding="utf-8", errors="replace") as f:
|
|
229
229
|
# Seek to end
|
|
230
230
|
await f.seek(0, 2)
|
|
231
231
|
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""Routes for run management (collector mode)."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request, Response
|
|
7
|
+
from fastapi.responses import StreamingResponse
|
|
8
|
+
|
|
9
|
+
from logtap.api.dependencies import verify_api_key
|
|
10
|
+
from logtap.core.runs import RunStore
|
|
11
|
+
from logtap.models.responses import (
|
|
12
|
+
IngestResponse,
|
|
13
|
+
RunInfo,
|
|
14
|
+
RunListResponse,
|
|
15
|
+
StreamLineEvent,
|
|
16
|
+
StreamMetaEvent,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
router = APIRouter()
|
|
20
|
+
|
|
21
|
+
# Global run store - will be set by app factory
|
|
22
|
+
_run_store: Optional[RunStore] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_run_store() -> RunStore:
|
|
26
|
+
"""Get the run store instance."""
|
|
27
|
+
if _run_store is None:
|
|
28
|
+
raise HTTPException(status_code=500, detail="Run store not initialized")
|
|
29
|
+
return _run_store
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def set_run_store(store: RunStore) -> None:
|
|
33
|
+
"""Set the global run store instance."""
|
|
34
|
+
global _run_store
|
|
35
|
+
_run_store = store
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def parse_tags(tag_headers: Optional[List[str]]) -> dict:
|
|
39
|
+
"""Parse X-Logtap-Tag headers into dict."""
|
|
40
|
+
if not tag_headers:
|
|
41
|
+
return {}
|
|
42
|
+
|
|
43
|
+
tags = {}
|
|
44
|
+
for tag in tag_headers:
|
|
45
|
+
if "=" in tag:
|
|
46
|
+
key, value = tag.split("=", 1)
|
|
47
|
+
tags[key.strip()] = value.strip()
|
|
48
|
+
return tags
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@router.get("", response_model=RunListResponse)
|
|
52
|
+
async def list_runs(
|
|
53
|
+
since_hours: Optional[int] = Query(None, description="Filter to runs active within N hours"),
|
|
54
|
+
_: None = Depends(verify_api_key),
|
|
55
|
+
store: RunStore = Depends(get_run_store),
|
|
56
|
+
) -> RunListResponse:
|
|
57
|
+
"""List all runs."""
|
|
58
|
+
runs = store.list_runs(since_hours=since_hours)
|
|
59
|
+
|
|
60
|
+
return RunListResponse(
|
|
61
|
+
runs=[
|
|
62
|
+
RunInfo(
|
|
63
|
+
id=run.id,
|
|
64
|
+
lines=run.metadata.lines_count,
|
|
65
|
+
cursor_earliest=run.cursor_earliest,
|
|
66
|
+
cursor_latest=run.cursor_latest,
|
|
67
|
+
tags=run.metadata.tags,
|
|
68
|
+
created_at=run.metadata.created_at,
|
|
69
|
+
last_activity=run.metadata.last_activity,
|
|
70
|
+
active=run.metadata.active,
|
|
71
|
+
bytes_on_disk=run.metadata.bytes_on_disk,
|
|
72
|
+
)
|
|
73
|
+
for run in runs
|
|
74
|
+
]
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@router.get("/{run_id}", response_model=RunInfo)
|
|
79
|
+
async def get_run(
|
|
80
|
+
run_id: str,
|
|
81
|
+
_: None = Depends(verify_api_key),
|
|
82
|
+
store: RunStore = Depends(get_run_store),
|
|
83
|
+
) -> RunInfo:
|
|
84
|
+
"""Get details for a specific run."""
|
|
85
|
+
run = store.get(run_id)
|
|
86
|
+
if run is None:
|
|
87
|
+
raise HTTPException(
|
|
88
|
+
status_code=404,
|
|
89
|
+
detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
return RunInfo(
|
|
93
|
+
id=run.id,
|
|
94
|
+
lines=run.metadata.lines_count,
|
|
95
|
+
cursor_earliest=run.cursor_earliest,
|
|
96
|
+
cursor_latest=run.cursor_latest,
|
|
97
|
+
tags=run.metadata.tags,
|
|
98
|
+
created_at=run.metadata.created_at,
|
|
99
|
+
last_activity=run.metadata.last_activity,
|
|
100
|
+
active=run.metadata.active,
|
|
101
|
+
bytes_on_disk=run.metadata.bytes_on_disk,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@router.post("/{run_id}/ingest", response_model=IngestResponse)
|
|
106
|
+
async def ingest(
|
|
107
|
+
run_id: str,
|
|
108
|
+
request: Request,
|
|
109
|
+
response: Response,
|
|
110
|
+
x_logtap_tag: Optional[List[str]] = Header(None),
|
|
111
|
+
_: None = Depends(verify_api_key),
|
|
112
|
+
store: RunStore = Depends(get_run_store),
|
|
113
|
+
) -> IngestResponse:
|
|
114
|
+
"""
|
|
115
|
+
Ingest log lines for a run.
|
|
116
|
+
|
|
117
|
+
Send lines as plain text with newline delimiters.
|
|
118
|
+
Supports chunked transfer encoding for streaming.
|
|
119
|
+
"""
|
|
120
|
+
# Check storage
|
|
121
|
+
storage_err = store.check_storage()
|
|
122
|
+
if storage_err:
|
|
123
|
+
raise HTTPException(
|
|
124
|
+
status_code=507,
|
|
125
|
+
detail={"error": "insufficient_storage", "message": "Disk limit exceeded"},
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Get or create run
|
|
129
|
+
run, created = store.get_or_create(run_id)
|
|
130
|
+
|
|
131
|
+
# Handle tags
|
|
132
|
+
tags = parse_tags(x_logtap_tag)
|
|
133
|
+
if tags:
|
|
134
|
+
err = run.set_tags(tags)
|
|
135
|
+
if err:
|
|
136
|
+
raise HTTPException(
|
|
137
|
+
status_code=409,
|
|
138
|
+
detail={"error": "tag_conflict", "message": err},
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Read and ingest body
|
|
142
|
+
lines_ingested = 0
|
|
143
|
+
buffer = ""
|
|
144
|
+
|
|
145
|
+
async for chunk in request.stream():
|
|
146
|
+
text = chunk.decode("utf-8", errors="replace")
|
|
147
|
+
buffer += text
|
|
148
|
+
|
|
149
|
+
# Process complete lines
|
|
150
|
+
while "\n" in buffer:
|
|
151
|
+
line, buffer = buffer.split("\n", 1)
|
|
152
|
+
run.append(line)
|
|
153
|
+
lines_ingested += 1
|
|
154
|
+
|
|
155
|
+
# Flush remaining partial line
|
|
156
|
+
if buffer:
|
|
157
|
+
run.append(buffer)
|
|
158
|
+
lines_ingested += 1
|
|
159
|
+
|
|
160
|
+
# Save metadata
|
|
161
|
+
run._save_metadata()
|
|
162
|
+
|
|
163
|
+
# Set status code: 201 for new run, 200 for existing
|
|
164
|
+
response.status_code = 201 if created else 200
|
|
165
|
+
|
|
166
|
+
return IngestResponse(
|
|
167
|
+
run_id=run_id,
|
|
168
|
+
lines_ingested=lines_ingested,
|
|
169
|
+
cursor_end=run.cursor_latest,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@router.get("/{run_id}/stream")
|
|
174
|
+
async def stream_run(
|
|
175
|
+
run_id: str,
|
|
176
|
+
since: Optional[int] = Query(None, description="Cursor to resume from (exclusive)"),
|
|
177
|
+
tail: int = Query(50, description="Lines to show if since not provided"),
|
|
178
|
+
follow: bool = Query(False, description="Keep connection open for new lines"),
|
|
179
|
+
tag: Optional[List[str]] = Query(None, description="Filter by tag (key=value)"),
|
|
180
|
+
_: None = Depends(verify_api_key),
|
|
181
|
+
store: RunStore = Depends(get_run_store),
|
|
182
|
+
) -> StreamingResponse:
|
|
183
|
+
"""
|
|
184
|
+
Stream lines from a run using Server-Sent Events.
|
|
185
|
+
|
|
186
|
+
Supports resume via `since` parameter or Last-Event-ID header.
|
|
187
|
+
"""
|
|
188
|
+
run = store.get(run_id)
|
|
189
|
+
if run is None:
|
|
190
|
+
raise HTTPException(
|
|
191
|
+
status_code=404,
|
|
192
|
+
detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Tag filtering (if specified, check run has all tags)
|
|
196
|
+
if tag:
|
|
197
|
+
required_tags = parse_tags(tag)
|
|
198
|
+
for key, value in required_tags.items():
|
|
199
|
+
if run.metadata.tags.get(key) != value:
|
|
200
|
+
# Run doesn't match filter - return empty stream
|
|
201
|
+
async def empty_stream():
|
|
202
|
+
meta = StreamMetaEvent(
|
|
203
|
+
cursor_earliest=run.cursor_earliest,
|
|
204
|
+
cursor_latest=run.cursor_latest,
|
|
205
|
+
gap=False,
|
|
206
|
+
)
|
|
207
|
+
yield f"event: meta\ndata: {meta.model_dump_json()}\n\n"
|
|
208
|
+
|
|
209
|
+
return StreamingResponse(
|
|
210
|
+
empty_stream(),
|
|
211
|
+
media_type="text/event-stream",
|
|
212
|
+
headers={
|
|
213
|
+
"Cache-Control": "no-cache",
|
|
214
|
+
"X-Logtap-Earliest-Cursor": str(run.cursor_earliest),
|
|
215
|
+
"X-Logtap-Latest-Cursor": str(run.cursor_latest),
|
|
216
|
+
},
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
async def generate_sse():
|
|
220
|
+
# Get initial lines and check for gap
|
|
221
|
+
lines, gap = run.get_lines(since=since, tail=tail)
|
|
222
|
+
|
|
223
|
+
# Send meta event
|
|
224
|
+
missed = None
|
|
225
|
+
if gap and since is not None:
|
|
226
|
+
missed = run.cursor_earliest - since - 1
|
|
227
|
+
if missed < 0:
|
|
228
|
+
missed = 0
|
|
229
|
+
|
|
230
|
+
meta = StreamMetaEvent(
|
|
231
|
+
cursor_earliest=run.cursor_earliest,
|
|
232
|
+
cursor_latest=run.cursor_latest,
|
|
233
|
+
gap=gap,
|
|
234
|
+
missed=missed,
|
|
235
|
+
)
|
|
236
|
+
yield f"event: meta\ndata: {meta.model_dump_json()}\n\n"
|
|
237
|
+
|
|
238
|
+
# Send initial lines
|
|
239
|
+
last_cursor = since if since is not None else -1
|
|
240
|
+
for line in lines:
|
|
241
|
+
event = StreamLineEvent(
|
|
242
|
+
cursor=line.cursor,
|
|
243
|
+
line=line.line,
|
|
244
|
+
ts=line.ts,
|
|
245
|
+
)
|
|
246
|
+
yield f"id: {line.cursor}\nevent: line\ndata: {event.model_dump_json()}\n\n"
|
|
247
|
+
last_cursor = line.cursor
|
|
248
|
+
|
|
249
|
+
if not follow:
|
|
250
|
+
return
|
|
251
|
+
|
|
252
|
+
# Follow mode - stream new lines
|
|
253
|
+
heartbeat_interval = 15 # seconds
|
|
254
|
+
last_heartbeat = asyncio.get_event_loop().time()
|
|
255
|
+
|
|
256
|
+
while True:
|
|
257
|
+
# Get new lines since last cursor
|
|
258
|
+
new_lines, _ = run.get_lines(since=last_cursor, limit=100)
|
|
259
|
+
|
|
260
|
+
for line in new_lines:
|
|
261
|
+
event = StreamLineEvent(
|
|
262
|
+
cursor=line.cursor,
|
|
263
|
+
line=line.line,
|
|
264
|
+
ts=line.ts,
|
|
265
|
+
)
|
|
266
|
+
yield f"id: {line.cursor}\nevent: line\ndata: {event.model_dump_json()}\n\n"
|
|
267
|
+
last_cursor = line.cursor
|
|
268
|
+
|
|
269
|
+
# Send heartbeat if needed
|
|
270
|
+
now = asyncio.get_event_loop().time()
|
|
271
|
+
if now - last_heartbeat >= heartbeat_interval:
|
|
272
|
+
yield ": heartbeat\n\n"
|
|
273
|
+
last_heartbeat = now
|
|
274
|
+
|
|
275
|
+
# Small delay before checking again
|
|
276
|
+
await asyncio.sleep(0.1)
|
|
277
|
+
|
|
278
|
+
return StreamingResponse(
|
|
279
|
+
generate_sse(),
|
|
280
|
+
media_type="text/event-stream",
|
|
281
|
+
headers={
|
|
282
|
+
"Cache-Control": "no-cache",
|
|
283
|
+
"X-Logtap-Earliest-Cursor": str(run.cursor_earliest),
|
|
284
|
+
"X-Logtap-Latest-Cursor": str(run.cursor_latest),
|
|
285
|
+
},
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
@router.get("/{run_id}/query")
|
|
290
|
+
async def query_run(
|
|
291
|
+
run_id: str,
|
|
292
|
+
from_cursor: Optional[int] = Query(None, alias="from", description="Start cursor (inclusive)"),
|
|
293
|
+
to_cursor: Optional[int] = Query(None, alias="to", description="End cursor (inclusive)"),
|
|
294
|
+
tail: int = Query(50, description="Last N lines (if from/to not provided)"),
|
|
295
|
+
limit: int = Query(1000, le=10000, description="Maximum lines to return"),
|
|
296
|
+
search: Optional[str] = Query(None, description="Substring filter"),
|
|
297
|
+
regex: Optional[str] = Query(None, description="Regex filter"),
|
|
298
|
+
output: str = Query("jsonl", description="Output format: jsonl or plain"),
|
|
299
|
+
_: None = Depends(verify_api_key),
|
|
300
|
+
store: RunStore = Depends(get_run_store),
|
|
301
|
+
) -> StreamingResponse:
|
|
302
|
+
"""Query lines from a run."""
|
|
303
|
+
# Validate search/regex mutual exclusion
|
|
304
|
+
if search and regex:
|
|
305
|
+
raise HTTPException(
|
|
306
|
+
status_code=400,
|
|
307
|
+
detail={"error": "invalid_query", "message": "Cannot use both search and regex"},
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
run = store.get(run_id)
|
|
311
|
+
if run is None:
|
|
312
|
+
raise HTTPException(
|
|
313
|
+
status_code=404,
|
|
314
|
+
detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Get lines
|
|
318
|
+
if from_cursor is not None:
|
|
319
|
+
# Range query - get lines from cursor onwards
|
|
320
|
+
lines, _ = run.get_lines(since=from_cursor - 1, limit=limit)
|
|
321
|
+
if to_cursor is not None:
|
|
322
|
+
lines = [ln for ln in lines if ln.cursor <= to_cursor]
|
|
323
|
+
else:
|
|
324
|
+
# Tail query
|
|
325
|
+
lines, _ = run.get_lines(tail=tail, limit=limit)
|
|
326
|
+
|
|
327
|
+
# Apply search/regex filter
|
|
328
|
+
if search:
|
|
329
|
+
lines = [ln for ln in lines if search in ln.line]
|
|
330
|
+
elif regex:
|
|
331
|
+
import re2
|
|
332
|
+
|
|
333
|
+
try:
|
|
334
|
+
pattern = re2.compile(regex)
|
|
335
|
+
lines = [ln for ln in lines if pattern.search(ln.line)]
|
|
336
|
+
except re2.error:
|
|
337
|
+
raise HTTPException(
|
|
338
|
+
status_code=400,
|
|
339
|
+
detail={"error": "invalid_regex", "message": "Invalid regex pattern"},
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
async def generate():
|
|
343
|
+
for line in lines:
|
|
344
|
+
if output == "plain":
|
|
345
|
+
yield line.line + "\n"
|
|
346
|
+
else:
|
|
347
|
+
event = StreamLineEvent(cursor=line.cursor, line=line.line, ts=line.ts)
|
|
348
|
+
yield event.model_dump_json() + "\n"
|
|
349
|
+
|
|
350
|
+
content_type = "text/plain" if output == "plain" else "application/x-ndjson"
|
|
351
|
+
return StreamingResponse(generate(), media_type=content_type)
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Collector command for logtap CLI."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def collect(
|
|
13
|
+
port: int = typer.Option(
|
|
14
|
+
8000,
|
|
15
|
+
"--port",
|
|
16
|
+
"-p",
|
|
17
|
+
help="Port to listen on.",
|
|
18
|
+
),
|
|
19
|
+
host: str = typer.Option(
|
|
20
|
+
"0.0.0.0",
|
|
21
|
+
"--host",
|
|
22
|
+
"-H",
|
|
23
|
+
help="Host to bind to.",
|
|
24
|
+
),
|
|
25
|
+
api_key: Optional[str] = typer.Option(
|
|
26
|
+
None,
|
|
27
|
+
"--api-key",
|
|
28
|
+
"-k",
|
|
29
|
+
help="API key for authentication.",
|
|
30
|
+
envvar="LOGTAP_API_KEY",
|
|
31
|
+
),
|
|
32
|
+
data_dir: Path = typer.Option(
|
|
33
|
+
Path("~/.logtap/runs").expanduser(),
|
|
34
|
+
"--data-dir",
|
|
35
|
+
"-d",
|
|
36
|
+
help="Directory for run storage.",
|
|
37
|
+
),
|
|
38
|
+
buffer_lines: int = typer.Option(
|
|
39
|
+
100_000,
|
|
40
|
+
"--buffer-lines",
|
|
41
|
+
help="In-memory cache size per run.",
|
|
42
|
+
),
|
|
43
|
+
max_disk_mb: int = typer.Option(
|
|
44
|
+
1000,
|
|
45
|
+
"--max-disk-mb",
|
|
46
|
+
help="Maximum disk usage across all runs (MB).",
|
|
47
|
+
),
|
|
48
|
+
retention_hours: int = typer.Option(
|
|
49
|
+
72,
|
|
50
|
+
"--retention-hours",
|
|
51
|
+
help="Hours to retain runs before cleanup.",
|
|
52
|
+
),
|
|
53
|
+
reload: bool = typer.Option(
|
|
54
|
+
False,
|
|
55
|
+
"--reload",
|
|
56
|
+
"-r",
|
|
57
|
+
help="Enable auto-reload for development.",
|
|
58
|
+
),
|
|
59
|
+
) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Start the logtap collector server.
|
|
62
|
+
|
|
63
|
+
Accepts ingested log streams over HTTP and serves them for tailing.
|
|
64
|
+
This is the recommended mode for ML training logs.
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
logtap collect
|
|
68
|
+
logtap collect --port 9000 --api-key secret
|
|
69
|
+
logtap collect --data-dir /mnt/logs --max-disk-mb 5000
|
|
70
|
+
"""
|
|
71
|
+
import os
|
|
72
|
+
|
|
73
|
+
import uvicorn
|
|
74
|
+
|
|
75
|
+
# Expand data_dir
|
|
76
|
+
data_dir = Path(data_dir).expanduser()
|
|
77
|
+
|
|
78
|
+
# Set environment variables for the app
|
|
79
|
+
os.environ["LOGTAP_HOST"] = host
|
|
80
|
+
os.environ["LOGTAP_PORT"] = str(port)
|
|
81
|
+
os.environ["LOGTAP_MODE"] = "collect"
|
|
82
|
+
os.environ["LOGTAP_DATA_DIR"] = str(data_dir)
|
|
83
|
+
os.environ["LOGTAP_BUFFER_LINES"] = str(buffer_lines)
|
|
84
|
+
os.environ["LOGTAP_MAX_DISK_MB"] = str(max_disk_mb)
|
|
85
|
+
os.environ["LOGTAP_RETENTION_HOURS"] = str(retention_hours)
|
|
86
|
+
if api_key:
|
|
87
|
+
os.environ["LOGTAP_API_KEY"] = api_key
|
|
88
|
+
|
|
89
|
+
console.print("[bold green]Starting logtap collector[/bold green]")
|
|
90
|
+
console.print(f" [dim]Host:[/dim] {host}")
|
|
91
|
+
console.print(f" [dim]Port:[/dim] {port}")
|
|
92
|
+
console.print(f" [dim]Data directory:[/dim] {data_dir}")
|
|
93
|
+
console.print(f" [dim]Buffer lines:[/dim] {buffer_lines:,}")
|
|
94
|
+
console.print(f" [dim]Max disk:[/dim] {max_disk_mb} MB")
|
|
95
|
+
console.print(f" [dim]Retention:[/dim] {retention_hours} hours")
|
|
96
|
+
console.print(f" [dim]Auth:[/dim] {'enabled' if api_key else 'disabled'}")
|
|
97
|
+
console.print()
|
|
98
|
+
console.print(f"[dim]API docs available at[/dim] http://{host}:{port}/docs")
|
|
99
|
+
console.print()
|
|
100
|
+
|
|
101
|
+
uvicorn.run(
|
|
102
|
+
"logtap.api.app:create_collector_app",
|
|
103
|
+
host=host,
|
|
104
|
+
port=port,
|
|
105
|
+
reload=reload,
|
|
106
|
+
factory=True,
|
|
107
|
+
)
|