logtap 0.2.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
logtap/__init__.py CHANGED
@@ -4,5 +4,5 @@ logtap - A CLI-first log access tool for Unix systems.
4
4
  Remote log file access without SSH. No database. No complex setup.
5
5
  """
6
6
 
7
- __version__ = "0.1.0"
7
+ __version__ = "0.4.0"
8
8
  __author__ = "Kyle Cain"
logtap/api/app.py CHANGED
@@ -1,15 +1,22 @@
1
1
  """FastAPI application factory for logtap."""
2
2
 
3
+ import os
4
+ import time
5
+ from pathlib import Path
6
+
3
7
  from fastapi import FastAPI
4
8
  from fastapi.middleware.cors import CORSMiddleware
5
9
 
6
10
  from logtap import __version__
7
- from logtap.api.routes import files, health, logs, parsed
11
+ from logtap.api.routes import files, health, logs, parsed, runs
12
+ from logtap.core.runs import RunStore
8
13
 
9
14
 
10
15
  def create_app() -> FastAPI:
11
16
  """
12
- Create and configure the FastAPI application.
17
+ Create and configure the FastAPI application for serve mode.
18
+
19
+ Serves static log files from a directory (legacy mode).
13
20
 
14
21
  Returns:
15
22
  Configured FastAPI application instance.
@@ -23,6 +30,10 @@ def create_app() -> FastAPI:
23
30
  openapi_url="/openapi.json",
24
31
  )
25
32
 
33
+ # Store mode info
34
+ app.state.mode = "serve"
35
+ app.state.features = ["files"]
36
+
26
37
  # Configure CORS
27
38
  app.add_middleware(
28
39
  CORSMiddleware,
@@ -41,5 +52,60 @@ def create_app() -> FastAPI:
41
52
  return app
42
53
 
43
54
 
44
- # Create default app instance for uvicorn
55
+ def create_collector_app() -> FastAPI:
56
+ """
57
+ Create and configure the FastAPI application for collector mode.
58
+
59
+ Accepts ingested log streams and serves them for tailing.
60
+ This is the recommended mode for ML training logs.
61
+
62
+ Returns:
63
+ Configured FastAPI application instance.
64
+ """
65
+ app = FastAPI(
66
+ title="logtap",
67
+ description="tail -f for GPU clouds. Survives disconnects, aggregates multi-node.",
68
+ version=__version__,
69
+ docs_url="/docs",
70
+ redoc_url="/redoc",
71
+ openapi_url="/openapi.json",
72
+ )
73
+
74
+ # Store mode info and start time
75
+ app.state.mode = "collect"
76
+ app.state.features = ["runs"]
77
+ app.state.start_time = time.time()
78
+
79
+ # Configure CORS
80
+ app.add_middleware(
81
+ CORSMiddleware,
82
+ allow_origins=["*"],
83
+ allow_credentials=True,
84
+ allow_methods=["*"],
85
+ allow_headers=["*"],
86
+ )
87
+
88
+ # Initialize run store from environment
89
+ data_dir = Path(os.environ.get("LOGTAP_DATA_DIR", "~/.logtap/runs")).expanduser()
90
+ buffer_lines = int(os.environ.get("LOGTAP_BUFFER_LINES", "100000"))
91
+ max_disk_mb = int(os.environ.get("LOGTAP_MAX_DISK_MB", "1000"))
92
+ retention_hours = int(os.environ.get("LOGTAP_RETENTION_HOURS", "72"))
93
+
94
+ run_store = RunStore(
95
+ data_dir=data_dir,
96
+ buffer_lines=buffer_lines,
97
+ max_disk_mb=max_disk_mb,
98
+ retention_hours=retention_hours,
99
+ )
100
+ runs.set_run_store(run_store)
101
+ app.state.run_store = run_store
102
+
103
+ # Include routers
104
+ app.include_router(health.router, tags=["health"])
105
+ app.include_router(runs.router, prefix="/runs", tags=["runs"])
106
+
107
+ return app
108
+
109
+
110
+ # Create default app instance for uvicorn (serve mode)
45
111
  app = create_app()
@@ -1,6 +1,8 @@
1
1
  """Health check endpoint for logtap."""
2
2
 
3
- from fastapi import APIRouter
3
+ import time
4
+
5
+ from fastapi import APIRouter, Request
4
6
 
5
7
  from logtap import __version__
6
8
  from logtap.models.responses import HealthResponse
@@ -9,11 +11,31 @@ router = APIRouter()
9
11
 
10
12
 
11
13
  @router.get("/health", response_model=HealthResponse)
12
- async def health_check() -> HealthResponse:
14
+ async def health_check(request: Request) -> HealthResponse:
13
15
  """
14
16
  Check the health of the logtap service.
15
17
 
16
18
  Returns:
17
- Health status and version information.
19
+ Health status, version, mode, and capability information.
18
20
  """
19
- return HealthResponse(status="healthy", version=__version__)
21
+ mode = getattr(request.app.state, "mode", "serve")
22
+ features = getattr(request.app.state, "features", ["files"])
23
+
24
+ # Get run count if in collect mode
25
+ runs_count = None
26
+ if hasattr(request.app.state, "run_store"):
27
+ runs_count = len(request.app.state.run_store.list_runs())
28
+
29
+ # Get uptime if start_time is set
30
+ uptime = None
31
+ if hasattr(request.app.state, "start_time"):
32
+ uptime = int(time.time() - request.app.state.start_time)
33
+
34
+ return HealthResponse(
35
+ status="healthy",
36
+ version=__version__,
37
+ mode=mode,
38
+ features=features,
39
+ runs=runs_count,
40
+ uptime_seconds=uptime,
41
+ )
logtap/api/routes/logs.py CHANGED
@@ -180,7 +180,7 @@ async def stream_logs(
180
180
  return
181
181
 
182
182
  try:
183
- async with aiofiles.open(filepath, mode="r", encoding="utf-8") as f:
183
+ async with aiofiles.open(filepath, mode="r", encoding="utf-8", errors="replace") as f:
184
184
  # Seek to end of file
185
185
  await f.seek(0, 2)
186
186
 
@@ -225,7 +225,7 @@ async def stream_logs_sse(
225
225
  filepath = get_filepath(filename, settings)
226
226
 
227
227
  async def event_generator():
228
- async with aiofiles.open(filepath, mode="r", encoding="utf-8") as f:
228
+ async with aiofiles.open(filepath, mode="r", encoding="utf-8", errors="replace") as f:
229
229
  # Seek to end
230
230
  await f.seek(0, 2)
231
231
 
@@ -0,0 +1,351 @@
1
+ """Routes for run management (collector mode)."""
2
+
3
+ import asyncio
4
+ from typing import List, Optional
5
+
6
+ from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request, Response
7
+ from fastapi.responses import StreamingResponse
8
+
9
+ from logtap.api.dependencies import verify_api_key
10
+ from logtap.core.runs import RunStore
11
+ from logtap.models.responses import (
12
+ IngestResponse,
13
+ RunInfo,
14
+ RunListResponse,
15
+ StreamLineEvent,
16
+ StreamMetaEvent,
17
+ )
18
+
19
+ router = APIRouter()
20
+
21
+ # Global run store - will be set by app factory
22
+ _run_store: Optional[RunStore] = None
23
+
24
+
25
+ def get_run_store() -> RunStore:
26
+ """Get the run store instance."""
27
+ if _run_store is None:
28
+ raise HTTPException(status_code=500, detail="Run store not initialized")
29
+ return _run_store
30
+
31
+
32
+ def set_run_store(store: RunStore) -> None:
33
+ """Set the global run store instance."""
34
+ global _run_store
35
+ _run_store = store
36
+
37
+
38
+ def parse_tags(tag_headers: Optional[List[str]]) -> dict:
39
+ """Parse X-Logtap-Tag headers into dict."""
40
+ if not tag_headers:
41
+ return {}
42
+
43
+ tags = {}
44
+ for tag in tag_headers:
45
+ if "=" in tag:
46
+ key, value = tag.split("=", 1)
47
+ tags[key.strip()] = value.strip()
48
+ return tags
49
+
50
+
51
+ @router.get("", response_model=RunListResponse)
52
+ async def list_runs(
53
+ since_hours: Optional[int] = Query(None, description="Filter to runs active within N hours"),
54
+ _: None = Depends(verify_api_key),
55
+ store: RunStore = Depends(get_run_store),
56
+ ) -> RunListResponse:
57
+ """List all runs."""
58
+ runs = store.list_runs(since_hours=since_hours)
59
+
60
+ return RunListResponse(
61
+ runs=[
62
+ RunInfo(
63
+ id=run.id,
64
+ lines=run.metadata.lines_count,
65
+ cursor_earliest=run.cursor_earliest,
66
+ cursor_latest=run.cursor_latest,
67
+ tags=run.metadata.tags,
68
+ created_at=run.metadata.created_at,
69
+ last_activity=run.metadata.last_activity,
70
+ active=run.metadata.active,
71
+ bytes_on_disk=run.metadata.bytes_on_disk,
72
+ )
73
+ for run in runs
74
+ ]
75
+ )
76
+
77
+
78
+ @router.get("/{run_id}", response_model=RunInfo)
79
+ async def get_run(
80
+ run_id: str,
81
+ _: None = Depends(verify_api_key),
82
+ store: RunStore = Depends(get_run_store),
83
+ ) -> RunInfo:
84
+ """Get details for a specific run."""
85
+ run = store.get(run_id)
86
+ if run is None:
87
+ raise HTTPException(
88
+ status_code=404,
89
+ detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
90
+ )
91
+
92
+ return RunInfo(
93
+ id=run.id,
94
+ lines=run.metadata.lines_count,
95
+ cursor_earliest=run.cursor_earliest,
96
+ cursor_latest=run.cursor_latest,
97
+ tags=run.metadata.tags,
98
+ created_at=run.metadata.created_at,
99
+ last_activity=run.metadata.last_activity,
100
+ active=run.metadata.active,
101
+ bytes_on_disk=run.metadata.bytes_on_disk,
102
+ )
103
+
104
+
105
+ @router.post("/{run_id}/ingest", response_model=IngestResponse)
106
+ async def ingest(
107
+ run_id: str,
108
+ request: Request,
109
+ response: Response,
110
+ x_logtap_tag: Optional[List[str]] = Header(None),
111
+ _: None = Depends(verify_api_key),
112
+ store: RunStore = Depends(get_run_store),
113
+ ) -> IngestResponse:
114
+ """
115
+ Ingest log lines for a run.
116
+
117
+ Send lines as plain text with newline delimiters.
118
+ Supports chunked transfer encoding for streaming.
119
+ """
120
+ # Check storage
121
+ storage_err = store.check_storage()
122
+ if storage_err:
123
+ raise HTTPException(
124
+ status_code=507,
125
+ detail={"error": "insufficient_storage", "message": "Disk limit exceeded"},
126
+ )
127
+
128
+ # Get or create run
129
+ run, created = store.get_or_create(run_id)
130
+
131
+ # Handle tags
132
+ tags = parse_tags(x_logtap_tag)
133
+ if tags:
134
+ err = run.set_tags(tags)
135
+ if err:
136
+ raise HTTPException(
137
+ status_code=409,
138
+ detail={"error": "tag_conflict", "message": err},
139
+ )
140
+
141
+ # Read and ingest body
142
+ lines_ingested = 0
143
+ buffer = ""
144
+
145
+ async for chunk in request.stream():
146
+ text = chunk.decode("utf-8", errors="replace")
147
+ buffer += text
148
+
149
+ # Process complete lines
150
+ while "\n" in buffer:
151
+ line, buffer = buffer.split("\n", 1)
152
+ run.append(line)
153
+ lines_ingested += 1
154
+
155
+ # Flush remaining partial line
156
+ if buffer:
157
+ run.append(buffer)
158
+ lines_ingested += 1
159
+
160
+ # Save metadata
161
+ run._save_metadata()
162
+
163
+ # Set status code: 201 for new run, 200 for existing
164
+ response.status_code = 201 if created else 200
165
+
166
+ return IngestResponse(
167
+ run_id=run_id,
168
+ lines_ingested=lines_ingested,
169
+ cursor_end=run.cursor_latest,
170
+ )
171
+
172
+
173
+ @router.get("/{run_id}/stream")
174
+ async def stream_run(
175
+ run_id: str,
176
+ since: Optional[int] = Query(None, description="Cursor to resume from (exclusive)"),
177
+ tail: int = Query(50, description="Lines to show if since not provided"),
178
+ follow: bool = Query(False, description="Keep connection open for new lines"),
179
+ tag: Optional[List[str]] = Query(None, description="Filter by tag (key=value)"),
180
+ _: None = Depends(verify_api_key),
181
+ store: RunStore = Depends(get_run_store),
182
+ ) -> StreamingResponse:
183
+ """
184
+ Stream lines from a run using Server-Sent Events.
185
+
186
+ Supports resume via `since` parameter or Last-Event-ID header.
187
+ """
188
+ run = store.get(run_id)
189
+ if run is None:
190
+ raise HTTPException(
191
+ status_code=404,
192
+ detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
193
+ )
194
+
195
+ # Tag filtering (if specified, check run has all tags)
196
+ if tag:
197
+ required_tags = parse_tags(tag)
198
+ for key, value in required_tags.items():
199
+ if run.metadata.tags.get(key) != value:
200
+ # Run doesn't match filter - return empty stream
201
+ async def empty_stream():
202
+ meta = StreamMetaEvent(
203
+ cursor_earliest=run.cursor_earliest,
204
+ cursor_latest=run.cursor_latest,
205
+ gap=False,
206
+ )
207
+ yield f"event: meta\ndata: {meta.model_dump_json()}\n\n"
208
+
209
+ return StreamingResponse(
210
+ empty_stream(),
211
+ media_type="text/event-stream",
212
+ headers={
213
+ "Cache-Control": "no-cache",
214
+ "X-Logtap-Earliest-Cursor": str(run.cursor_earliest),
215
+ "X-Logtap-Latest-Cursor": str(run.cursor_latest),
216
+ },
217
+ )
218
+
219
+ async def generate_sse():
220
+ # Get initial lines and check for gap
221
+ lines, gap = run.get_lines(since=since, tail=tail)
222
+
223
+ # Send meta event
224
+ missed = None
225
+ if gap and since is not None:
226
+ missed = run.cursor_earliest - since - 1
227
+ if missed < 0:
228
+ missed = 0
229
+
230
+ meta = StreamMetaEvent(
231
+ cursor_earliest=run.cursor_earliest,
232
+ cursor_latest=run.cursor_latest,
233
+ gap=gap,
234
+ missed=missed,
235
+ )
236
+ yield f"event: meta\ndata: {meta.model_dump_json()}\n\n"
237
+
238
+ # Send initial lines
239
+ last_cursor = since if since is not None else -1
240
+ for line in lines:
241
+ event = StreamLineEvent(
242
+ cursor=line.cursor,
243
+ line=line.line,
244
+ ts=line.ts,
245
+ )
246
+ yield f"id: {line.cursor}\nevent: line\ndata: {event.model_dump_json()}\n\n"
247
+ last_cursor = line.cursor
248
+
249
+ if not follow:
250
+ return
251
+
252
+ # Follow mode - stream new lines
253
+ heartbeat_interval = 15 # seconds
254
+ last_heartbeat = asyncio.get_event_loop().time()
255
+
256
+ while True:
257
+ # Get new lines since last cursor
258
+ new_lines, _ = run.get_lines(since=last_cursor, limit=100)
259
+
260
+ for line in new_lines:
261
+ event = StreamLineEvent(
262
+ cursor=line.cursor,
263
+ line=line.line,
264
+ ts=line.ts,
265
+ )
266
+ yield f"id: {line.cursor}\nevent: line\ndata: {event.model_dump_json()}\n\n"
267
+ last_cursor = line.cursor
268
+
269
+ # Send heartbeat if needed
270
+ now = asyncio.get_event_loop().time()
271
+ if now - last_heartbeat >= heartbeat_interval:
272
+ yield ": heartbeat\n\n"
273
+ last_heartbeat = now
274
+
275
+ # Small delay before checking again
276
+ await asyncio.sleep(0.1)
277
+
278
+ return StreamingResponse(
279
+ generate_sse(),
280
+ media_type="text/event-stream",
281
+ headers={
282
+ "Cache-Control": "no-cache",
283
+ "X-Logtap-Earliest-Cursor": str(run.cursor_earliest),
284
+ "X-Logtap-Latest-Cursor": str(run.cursor_latest),
285
+ },
286
+ )
287
+
288
+
289
+ @router.get("/{run_id}/query")
290
+ async def query_run(
291
+ run_id: str,
292
+ from_cursor: Optional[int] = Query(None, alias="from", description="Start cursor (inclusive)"),
293
+ to_cursor: Optional[int] = Query(None, alias="to", description="End cursor (inclusive)"),
294
+ tail: int = Query(50, description="Last N lines (if from/to not provided)"),
295
+ limit: int = Query(1000, le=10000, description="Maximum lines to return"),
296
+ search: Optional[str] = Query(None, description="Substring filter"),
297
+ regex: Optional[str] = Query(None, description="Regex filter"),
298
+ output: str = Query("jsonl", description="Output format: jsonl or plain"),
299
+ _: None = Depends(verify_api_key),
300
+ store: RunStore = Depends(get_run_store),
301
+ ) -> StreamingResponse:
302
+ """Query lines from a run."""
303
+ # Validate search/regex mutual exclusion
304
+ if search and regex:
305
+ raise HTTPException(
306
+ status_code=400,
307
+ detail={"error": "invalid_query", "message": "Cannot use both search and regex"},
308
+ )
309
+
310
+ run = store.get(run_id)
311
+ if run is None:
312
+ raise HTTPException(
313
+ status_code=404,
314
+ detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
315
+ )
316
+
317
+ # Get lines
318
+ if from_cursor is not None:
319
+ # Range query - get lines from cursor onwards
320
+ lines, _ = run.get_lines(since=from_cursor - 1, limit=limit)
321
+ if to_cursor is not None:
322
+ lines = [ln for ln in lines if ln.cursor <= to_cursor]
323
+ else:
324
+ # Tail query
325
+ lines, _ = run.get_lines(tail=tail, limit=limit)
326
+
327
+ # Apply search/regex filter
328
+ if search:
329
+ lines = [ln for ln in lines if search in ln.line]
330
+ elif regex:
331
+ import re2
332
+
333
+ try:
334
+ pattern = re2.compile(regex)
335
+ lines = [ln for ln in lines if pattern.search(ln.line)]
336
+ except re2.error:
337
+ raise HTTPException(
338
+ status_code=400,
339
+ detail={"error": "invalid_regex", "message": "Invalid regex pattern"},
340
+ )
341
+
342
+ async def generate():
343
+ for line in lines:
344
+ if output == "plain":
345
+ yield line.line + "\n"
346
+ else:
347
+ event = StreamLineEvent(cursor=line.cursor, line=line.line, ts=line.ts)
348
+ yield event.model_dump_json() + "\n"
349
+
350
+ content_type = "text/plain" if output == "plain" else "application/x-ndjson"
351
+ return StreamingResponse(generate(), media_type=content_type)
@@ -0,0 +1,107 @@
1
+ """Collector command for logtap CLI."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ import typer
7
+ from rich.console import Console
8
+
9
+ console = Console()
10
+
11
+
12
+ def collect(
13
+ port: int = typer.Option(
14
+ 8000,
15
+ "--port",
16
+ "-p",
17
+ help="Port to listen on.",
18
+ ),
19
+ host: str = typer.Option(
20
+ "0.0.0.0",
21
+ "--host",
22
+ "-H",
23
+ help="Host to bind to.",
24
+ ),
25
+ api_key: Optional[str] = typer.Option(
26
+ None,
27
+ "--api-key",
28
+ "-k",
29
+ help="API key for authentication.",
30
+ envvar="LOGTAP_API_KEY",
31
+ ),
32
+ data_dir: Path = typer.Option(
33
+ Path("~/.logtap/runs").expanduser(),
34
+ "--data-dir",
35
+ "-d",
36
+ help="Directory for run storage.",
37
+ ),
38
+ buffer_lines: int = typer.Option(
39
+ 100_000,
40
+ "--buffer-lines",
41
+ help="In-memory cache size per run.",
42
+ ),
43
+ max_disk_mb: int = typer.Option(
44
+ 1000,
45
+ "--max-disk-mb",
46
+ help="Maximum disk usage across all runs (MB).",
47
+ ),
48
+ retention_hours: int = typer.Option(
49
+ 72,
50
+ "--retention-hours",
51
+ help="Hours to retain runs before cleanup.",
52
+ ),
53
+ reload: bool = typer.Option(
54
+ False,
55
+ "--reload",
56
+ "-r",
57
+ help="Enable auto-reload for development.",
58
+ ),
59
+ ) -> None:
60
+ """
61
+ Start the logtap collector server.
62
+
63
+ Accepts ingested log streams over HTTP and serves them for tailing.
64
+ This is the recommended mode for ML training logs.
65
+
66
+ Example:
67
+ logtap collect
68
+ logtap collect --port 9000 --api-key secret
69
+ logtap collect --data-dir /mnt/logs --max-disk-mb 5000
70
+ """
71
+ import os
72
+
73
+ import uvicorn
74
+
75
+ # Expand data_dir
76
+ data_dir = Path(data_dir).expanduser()
77
+
78
+ # Set environment variables for the app
79
+ os.environ["LOGTAP_HOST"] = host
80
+ os.environ["LOGTAP_PORT"] = str(port)
81
+ os.environ["LOGTAP_MODE"] = "collect"
82
+ os.environ["LOGTAP_DATA_DIR"] = str(data_dir)
83
+ os.environ["LOGTAP_BUFFER_LINES"] = str(buffer_lines)
84
+ os.environ["LOGTAP_MAX_DISK_MB"] = str(max_disk_mb)
85
+ os.environ["LOGTAP_RETENTION_HOURS"] = str(retention_hours)
86
+ if api_key:
87
+ os.environ["LOGTAP_API_KEY"] = api_key
88
+
89
+ console.print("[bold green]Starting logtap collector[/bold green]")
90
+ console.print(f" [dim]Host:[/dim] {host}")
91
+ console.print(f" [dim]Port:[/dim] {port}")
92
+ console.print(f" [dim]Data directory:[/dim] {data_dir}")
93
+ console.print(f" [dim]Buffer lines:[/dim] {buffer_lines:,}")
94
+ console.print(f" [dim]Max disk:[/dim] {max_disk_mb} MB")
95
+ console.print(f" [dim]Retention:[/dim] {retention_hours} hours")
96
+ console.print(f" [dim]Auth:[/dim] {'enabled' if api_key else 'disabled'}")
97
+ console.print()
98
+ console.print(f"[dim]API docs available at[/dim] http://{host}:{port}/docs")
99
+ console.print()
100
+
101
+ uvicorn.run(
102
+ "logtap.api.app:create_collector_app",
103
+ host=host,
104
+ port=port,
105
+ reload=reload,
106
+ factory=True,
107
+ )