PyPI - logtap - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

logtap 0.3.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

logtap/__init__.py +1 -1
logtap/api/app.py +69 -3
logtap/api/routes/health.py +26 -4
logtap/api/routes/logs.py +26 -31
logtap/api/routes/parsed.py +8 -7
logtap/api/routes/runs.py +330 -0
logtap/cli/commands/collect.py +107 -0
logtap/cli/commands/doctor.py +127 -0
logtap/cli/commands/ingest.py +123 -0
logtap/cli/commands/runs.py +116 -0
logtap/cli/commands/tail.py +220 -23
logtap/cli/main.py +12 -5
logtap/core/runs.py +433 -0
logtap/core/validation.py +132 -0
logtap/models/responses.py +54 -1
logtap-0.4.1.dist-info/METADATA +304 -0
{logtap-0.3.0.dist-info → logtap-0.4.1.dist-info}/RECORD +20 -14
logtap-0.3.0.dist-info/METADATA +0 -319
{logtap-0.3.0.dist-info → logtap-0.4.1.dist-info}/WHEEL +0 -0
{logtap-0.3.0.dist-info → logtap-0.4.1.dist-info}/entry_points.txt +0 -0
{logtap-0.3.0.dist-info → logtap-0.4.1.dist-info}/licenses/LICENSE +0 -0

logtap/__init__.py CHANGED Viewed

@@ -4,5 +4,5 @@ logtap - A CLI-first log access tool for Unix systems.
 Remote log file access without SSH. No database. No complex setup.
 """
-__version__ = "0.3.0"
+__version__ = "0.4.0"
 __author__ = "Kyle Cain"

logtap/api/app.py CHANGED Viewed

@@ -1,15 +1,22 @@
 """FastAPI application factory for logtap."""
+import os
+import time
+from pathlib import Path
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from logtap import __version__
-from logtap.api.routes import files, health, logs, parsed
+from logtap.api.routes import files, health, logs, parsed, runs
+from logtap.core.runs import RunStore
 def create_app() -> FastAPI:
     """
-    Create and configure the FastAPI application.
+    Create and configure the FastAPI application for serve mode.
+    Serves static log files from a directory (legacy mode).
     Returns:
         Configured FastAPI application instance.
@@ -23,6 +30,10 @@ def create_app() -> FastAPI:
         openapi_url="/openapi.json",
     )
+    # Store mode info
+    app.state.mode = "serve"
+    app.state.features = ["files"]
     # Configure CORS
     app.add_middleware(
         CORSMiddleware,
@@ -41,5 +52,60 @@ def create_app() -> FastAPI:
     return app
-# Create default app instance for uvicorn
+def create_collector_app() -> FastAPI:
+    """
+    Create and configure the FastAPI application for collector mode.
+    Accepts ingested log streams and serves them for tailing.
+    This is the recommended mode for ML training logs.
+    Returns:
+        Configured FastAPI application instance.
+    """
+    app = FastAPI(
+        title="logtap",
+        description="tail -f for GPU clouds. Survives disconnects, aggregates multi-node.",
+        version=__version__,
+        docs_url="/docs",
+        redoc_url="/redoc",
+        openapi_url="/openapi.json",
+    )
+    # Store mode info and start time
+    app.state.mode = "collect"
+    app.state.features = ["runs"]
+    app.state.start_time = time.time()
+    # Configure CORS
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    # Initialize run store from environment
+    data_dir = Path(os.environ.get("LOGTAP_DATA_DIR", "~/.logtap/runs")).expanduser()
+    buffer_lines = int(os.environ.get("LOGTAP_BUFFER_LINES", "100000"))
+    max_disk_mb = int(os.environ.get("LOGTAP_MAX_DISK_MB", "1000"))
+    retention_hours = int(os.environ.get("LOGTAP_RETENTION_HOURS", "72"))
+    run_store = RunStore(
+        data_dir=data_dir,
+        buffer_lines=buffer_lines,
+        max_disk_mb=max_disk_mb,
+        retention_hours=retention_hours,
+    )
+    runs.set_run_store(run_store)
+    app.state.run_store = run_store
+    # Include routers
+    app.include_router(health.router, tags=["health"])
+    app.include_router(runs.router, prefix="/runs", tags=["runs"])
+    return app
+# Create default app instance for uvicorn (serve mode)
 app = create_app()

logtap/api/routes/health.py CHANGED Viewed

@@ -1,6 +1,8 @@
 """Health check endpoint for logtap."""
-from fastapi import APIRouter
+import time
+from fastapi import APIRouter, Request
 from logtap import __version__
 from logtap.models.responses import HealthResponse
@@ -9,11 +11,31 @@ router = APIRouter()
 @router.get("/health", response_model=HealthResponse)
-async def health_check() -> HealthResponse:
+async def health_check(request: Request) -> HealthResponse:
     """
     Check the health of the logtap service.
     Returns:
-        Health status and version information.
+        Health status, version, mode, and capability information.
     """
-    return HealthResponse(status="healthy", version=__version__)
+    mode = getattr(request.app.state, "mode", "serve")
+    features = getattr(request.app.state, "features", ["files"])
+    # Get run count if in collect mode
+    runs_count = None
+    if hasattr(request.app.state, "run_store"):
+        runs_count = len(request.app.state.run_store.list_runs())
+    # Get uptime if start_time is set
+    uptime = None
+    if hasattr(request.app.state, "start_time"):
+        uptime = int(time.time() - request.app.state.start_time)
+    return HealthResponse(
+        status="healthy",
+        version=__version__,
+        mode=mode,
+        features=features,
+        runs=runs_count,
+        uptime_seconds=uptime,
+    )

logtap/api/routes/logs.py CHANGED Viewed

@@ -11,42 +11,39 @@ from starlette.responses import StreamingResponse
 from logtap.api.dependencies import get_settings, verify_api_key
 from logtap.core.reader import tail_async
 from logtap.core.search import filter_lines
-from logtap.core.validation import is_filename_valid, is_limit_valid, is_search_term_valid
+from logtap.core.validation import (
+    is_limit_valid,
+    is_search_term_valid,
+    resolve_safe_path,
+)
 from logtap.models.config import Settings
 from logtap.models.responses import LogResponse
 router = APIRouter()
-# Error messages (matching original for backward compatibility)
-ERROR_INVALID_FILENAME = 'Invalid filename: must not contain ".." or start with "/"'
+# Error messages
+ERROR_INVALID_FILENAME = "Invalid filename"
 ERROR_LONG_SEARCH_TERM = "Search term is too long: must be 100 characters or fewer"
 ERROR_INVALID_LIMIT = "Invalid limit value: must be between 1 and 1000"
-def validate_filename(filename: str) -> None:
-    """Validate filename and raise HTTPException if invalid."""
-    if not is_filename_valid(filename):
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=ERROR_INVALID_FILENAME,
-        )
-    # Block any filename with path separators
-    if "/" in filename or "\\" in filename:
+def get_filepath(filename: str, settings: Settings) -> str:
+    """Get full filepath and validate it exists and is within allowed directory."""
+    log_dir = settings.get_log_directory()
+    # resolve_safe_path handles: NUL bytes, control chars, path traversal,
+    # separators, absolute paths, symlink escape, and containment check
+    filepath = resolve_safe_path(log_dir, filename)
+    if filepath is None:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=ERROR_INVALID_FILENAME,
         )
-def get_filepath(filename: str, settings: Settings) -> str:
-    """Get full filepath and validate it exists."""
-    log_dir = settings.get_log_directory()
-    filepath = os.path.join(log_dir, filename)
+    # Separate existence check for correct 404 response
     if not os.path.isfile(filepath):
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File not found: {filepath} does not exist",
+            detail=f"File not found: {filename} does not exist",
         )
     return filepath
@@ -67,8 +64,6 @@ async def get_logs(
     This endpoint reads the last N lines from a log file and optionally
     filters them by a search term or regex pattern.
     """
-    validate_filename(filename)
     if term and not is_search_term_valid(term):
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
@@ -126,8 +121,11 @@ async def get_logs_multi(
     for filename in file_list:
         try:
-            validate_filename(filename)
-            filepath = os.path.join(log_dir, filename)
+            filepath = resolve_safe_path(log_dir, filename)
+            if filepath is None:
+                results[filename] = {"error": ERROR_INVALID_FILENAME, "lines": []}
+                continue
             if not os.path.isfile(filepath):
                 results[filename] = {"error": "File not found", "lines": []}
@@ -164,16 +162,14 @@ async def stream_logs(
     # Get settings (can't use Depends in WebSocket easily)
     settings = get_settings()
-    try:
-        validate_filename(filename)
-    except HTTPException as e:
-        await websocket.send_json({"error": e.detail})
+    log_dir = settings.get_log_directory()
+    filepath = resolve_safe_path(log_dir, filename)
+    if filepath is None:
+        await websocket.send_json({"error": ERROR_INVALID_FILENAME})
         await websocket.close()
         return
-    log_dir = settings.get_log_directory()
-    filepath = os.path.join(log_dir, filename)
     if not os.path.isfile(filepath):
         await websocket.send_json({"error": f"File not found: {filename}"})
         await websocket.close()
@@ -221,7 +217,6 @@ async def stream_logs_sse(
     Alternative to WebSocket for simpler clients.
     """
-    validate_filename(filename)
     filepath = get_filepath(filename, settings)
     async def event_generator():

logtap/api/routes/parsed.py CHANGED Viewed

@@ -9,6 +9,7 @@ from logtap.api.dependencies import get_settings, verify_api_key
 from logtap.core.parsers import AutoParser, LogLevel
 from logtap.core.reader import tail_async
 from logtap.core.search import filter_entries
+from logtap.core.validation import resolve_safe_path
 from logtap.models.config import Settings
 router = APIRouter()
@@ -40,21 +41,21 @@ async def get_parsed_logs(
     Supported formats: syslog, JSON, nginx, apache (auto-detected).
     """
-    # Validate filename
-    if ".." in filename or filename.startswith("/") or "/" in filename or "\\" in filename:
+    # Validate filename and resolve safe path
+    log_dir = settings.get_log_directory()
+    # resolve_safe_path handles all path security: traversal, symlinks, containment
+    filepath = resolve_safe_path(log_dir, filename)
+    if filepath is None:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail='Invalid filename: must not contain ".." or start with "/"',
         )
-    # Build file path
-    log_dir = settings.get_log_directory()
-    filepath = os.path.join(log_dir, filename)
     if not os.path.isfile(filepath):
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File not found: {filepath} does not exist",
+            detail=f"File not found: {filename} does not exist",
         )
     # Read file lines

logtap/api/routes/runs.py ADDED Viewed

@@ -0,0 +1,330 @@
+"""Routes for run management (collector mode)."""
+import asyncio
+from typing import List, Optional
+from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request, Response
+from fastapi.responses import StreamingResponse
+from logtap.api.dependencies import verify_api_key
+from logtap.core.runs import RunStore
+from logtap.models.responses import (
+    IngestResponse,
+    RunInfo,
+    RunListResponse,
+    StreamLineEvent,
+    StreamMetaEvent,
+)
+router = APIRouter()
+# Global run store - will be set by app factory
+_run_store: Optional[RunStore] = None
+def get_run_store() -> RunStore:
+    """Get the run store instance."""
+    if _run_store is None:
+        raise HTTPException(status_code=500, detail="Run store not initialized")
+    return _run_store
+def set_run_store(store: RunStore) -> None:
+    """Set the global run store instance."""
+    global _run_store
+    _run_store = store
+def parse_tags(tag_headers: Optional[List[str]]) -> dict:
+    """Parse X-Logtap-Tag headers into dict."""
+    if not tag_headers:
+        return {}
+    tags = {}
+    for tag in tag_headers:
+        if "=" in tag:
+            key, value = tag.split("=", 1)
+            tags[key.strip()] = value.strip()
+    return tags
+@router.get("", response_model=RunListResponse)
+async def list_runs(
+    since_hours: Optional[int] = Query(None, description="Filter to runs active within N hours"),
+    _: None = Depends(verify_api_key),
+    store: RunStore = Depends(get_run_store),
+) -> RunListResponse:
+    """List all runs."""
+    runs = store.list_runs(since_hours=since_hours)
+    return RunListResponse(
+        runs=[
+            RunInfo(
+                id=run.id,
+                lines=run.metadata.lines_count,
+                cursor_earliest=run.cursor_earliest,
+                cursor_latest=run.cursor_latest,
+                tags=run.metadata.tags,
+                created_at=run.metadata.created_at,
+                last_activity=run.metadata.last_activity,
+                active=run.metadata.active,
+                bytes_on_disk=run.metadata.bytes_on_disk,
+            )
+            for run in runs
+        ]
+    )
+@router.get("/{run_id}", response_model=RunInfo)
+async def get_run(
+    run_id: str,
+    _: None = Depends(verify_api_key),
+    store: RunStore = Depends(get_run_store),
+) -> RunInfo:
+    """Get details for a specific run."""
+    run = store.get(run_id)
+    if run is None:
+        raise HTTPException(
+            status_code=404,
+            detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
+        )
+    return RunInfo(
+        id=run.id,
+        lines=run.metadata.lines_count,
+        cursor_earliest=run.cursor_earliest,
+        cursor_latest=run.cursor_latest,
+        tags=run.metadata.tags,
+        created_at=run.metadata.created_at,
+        last_activity=run.metadata.last_activity,
+        active=run.metadata.active,
+        bytes_on_disk=run.metadata.bytes_on_disk,
+    )
+@router.post("/{run_id}/ingest", response_model=IngestResponse)
+async def ingest(
+    run_id: str,
+    request: Request,
+    response: Response,
+    x_logtap_tag: Optional[List[str]] = Header(None),
+    _: None = Depends(verify_api_key),
+    store: RunStore = Depends(get_run_store),
+) -> IngestResponse:
+    """
+    Ingest log lines for a run.
+    Send lines as plain text with newline delimiters.
+    Supports chunked transfer encoding for streaming.
+    """
+    # Check storage
+    storage_err = store.check_storage()
+    if storage_err:
+        raise HTTPException(
+            status_code=507,
+            detail={"error": "insufficient_storage", "message": "Disk limit exceeded"},
+        )
+    # Get or create run
+    run, created = store.get_or_create(run_id)
+    # Handle tags (validate and track in metadata)
+    tags = parse_tags(x_logtap_tag)
+    if tags:
+        err = run.set_tags(tags)
+        if err:
+            raise HTTPException(
+                status_code=400,
+                detail={"error": "invalid_tag", "message": err},
+            )
+    # Read and ingest body
+    lines_ingested = 0
+    buffer = ""
+    async for chunk in request.stream():
+        text = chunk.decode("utf-8", errors="replace")
+        buffer += text
+        # Process complete lines
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            run.append(line, tags)  # Pass tags to each line
+            lines_ingested += 1
+    # Flush remaining partial line
+    if buffer:
+        run.append(buffer, tags)  # Pass tags to each line
+        lines_ingested += 1
+    # Save metadata
+    run._save_metadata()
+    # Set status code: 201 for new run, 200 for existing
+    response.status_code = 201 if created else 200
+    return IngestResponse(
+        run_id=run_id,
+        lines_ingested=lines_ingested,
+        cursor_end=run.cursor_latest,
+    )
+@router.get("/{run_id}/stream")
+async def stream_run(
+    run_id: str,
+    since: Optional[int] = Query(None, description="Cursor to resume from (exclusive)"),
+    tail: int = Query(50, description="Lines to show if since not provided"),
+    follow: bool = Query(False, description="Keep connection open for new lines"),
+    tag: Optional[List[str]] = Query(None, description="Filter by tag (key=value)"),
+    _: None = Depends(verify_api_key),
+    store: RunStore = Depends(get_run_store),
+) -> StreamingResponse:
+    """
+    Stream lines from a run using Server-Sent Events.
+    Supports resume via `since` parameter or Last-Event-ID header.
+    """
+    run = store.get(run_id)
+    if run is None:
+        raise HTTPException(
+            status_code=404,
+            detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
+        )
+    # Parse tag filter for per-line filtering
+    tag_filter = parse_tags(tag) if tag else None
+    async def generate_sse():
+        # Get initial lines and check for gap
+        lines, gap = run.get_lines(since=since, tail=tail, tag_filter=tag_filter)
+        # Send meta event
+        missed = None
+        if gap and since is not None:
+            missed = run.cursor_earliest - since - 1
+            if missed < 0:
+                missed = 0
+        meta = StreamMetaEvent(
+            cursor_earliest=run.cursor_earliest,
+            cursor_latest=run.cursor_latest,
+            gap=gap,
+            missed=missed,
+        )
+        yield f"event: meta\ndata: {meta.model_dump_json()}\n\n"
+        # Send initial lines
+        last_cursor = since if since is not None else -1
+        for line in lines:
+            event = StreamLineEvent(
+                cursor=line.cursor,
+                line=line.line,
+                ts=line.ts,
+            )
+            yield f"id: {line.cursor}\nevent: line\ndata: {event.model_dump_json()}\n\n"
+            last_cursor = line.cursor
+        if not follow:
+            return
+        # Follow mode - stream new lines
+        heartbeat_interval = 15  # seconds
+        last_heartbeat = asyncio.get_event_loop().time()
+        while True:
+            # Get new lines since last cursor
+            new_lines, _ = run.get_lines(since=last_cursor, limit=100, tag_filter=tag_filter)
+            for line in new_lines:
+                event = StreamLineEvent(
+                    cursor=line.cursor,
+                    line=line.line,
+                    ts=line.ts,
+                )
+                yield f"id: {line.cursor}\nevent: line\ndata: {event.model_dump_json()}\n\n"
+                last_cursor = line.cursor
+            # Send heartbeat if needed
+            now = asyncio.get_event_loop().time()
+            if now - last_heartbeat >= heartbeat_interval:
+                yield ": heartbeat\n\n"
+                last_heartbeat = now
+            # Small delay before checking again
+            await asyncio.sleep(0.1)
+    return StreamingResponse(
+        generate_sse(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "X-Logtap-Earliest-Cursor": str(run.cursor_earliest),
+            "X-Logtap-Latest-Cursor": str(run.cursor_latest),
+        },
+    )
+@router.get("/{run_id}/query")
+async def query_run(
+    run_id: str,
+    from_cursor: Optional[int] = Query(None, alias="from", description="Start cursor (inclusive)"),
+    to_cursor: Optional[int] = Query(None, alias="to", description="End cursor (inclusive)"),
+    tail: int = Query(50, description="Last N lines (if from/to not provided)"),
+    limit: int = Query(1000, le=10000, description="Maximum lines to return"),
+    search: Optional[str] = Query(None, description="Substring filter"),
+    regex: Optional[str] = Query(None, description="Regex filter"),
+    output: str = Query("jsonl", description="Output format: jsonl or plain"),
+    _: None = Depends(verify_api_key),
+    store: RunStore = Depends(get_run_store),
+) -> StreamingResponse:
+    """Query lines from a run."""
+    # Validate search/regex mutual exclusion
+    if search and regex:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "invalid_query", "message": "Cannot use both search and regex"},
+        )
+    run = store.get(run_id)
+    if run is None:
+        raise HTTPException(
+            status_code=404,
+            detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
+        )
+    # Get lines
+    if from_cursor is not None:
+        # Range query - get lines from cursor onwards
+        lines, _ = run.get_lines(since=from_cursor - 1, limit=limit)
+        if to_cursor is not None:
+            lines = [ln for ln in lines if ln.cursor <= to_cursor]
+    else:
+        # Tail query
+        lines, _ = run.get_lines(tail=tail, limit=limit)
+    # Apply search/regex filter
+    if search:
+        lines = [ln for ln in lines if search in ln.line]
+    elif regex:
+        import re2
+        try:
+            pattern = re2.compile(regex)
+            lines = [ln for ln in lines if pattern.search(ln.line)]
+        except re2.error:
+            raise HTTPException(
+                status_code=400,
+                detail={"error": "invalid_regex", "message": "Invalid regex pattern"},
+            )
+    async def generate():
+        for line in lines:
+            if output == "plain":
+                yield line.line + "\n"
+            else:
+                event = StreamLineEvent(cursor=line.cursor, line=line.line, ts=line.ts)
+                yield event.model_dump_json() + "\n"
+    content_type = "text/plain" if output == "plain" else "application/x-ndjson"
+    return StreamingResponse(generate(), media_type=content_type)

logtap 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

logtap 0.3.0py3-none-any.whl → 0.4.1py3-none-any.whl