PyPI - logtap - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

logtap 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

logtap/api/routes/logs.py +26 -31
logtap/api/routes/parsed.py +8 -7
logtap/api/routes/runs.py +9 -30
logtap/cli/commands/doctor.py +127 -0
logtap/cli/commands/tail.py +13 -5
logtap/cli/main.py +2 -1
logtap/core/runs.py +71 -31
logtap/core/validation.py +132 -0
logtap-0.4.1.dist-info/METADATA +304 -0
{logtap-0.4.0.dist-info → logtap-0.4.1.dist-info}/RECORD +13 -12
logtap-0.4.0.dist-info/METADATA +0 -319
{logtap-0.4.0.dist-info → logtap-0.4.1.dist-info}/WHEEL +0 -0
{logtap-0.4.0.dist-info → logtap-0.4.1.dist-info}/entry_points.txt +0 -0
{logtap-0.4.0.dist-info → logtap-0.4.1.dist-info}/licenses/LICENSE +0 -0

logtap/api/routes/logs.py CHANGED Viewed

@@ -11,42 +11,39 @@ from starlette.responses import StreamingResponse
 from logtap.api.dependencies import get_settings, verify_api_key
 from logtap.core.reader import tail_async
 from logtap.core.search import filter_lines
-from logtap.core.validation import is_filename_valid, is_limit_valid, is_search_term_valid
+from logtap.core.validation import (
+    is_limit_valid,
+    is_search_term_valid,
+    resolve_safe_path,
+)
 from logtap.models.config import Settings
 from logtap.models.responses import LogResponse
 router = APIRouter()
-# Error messages (matching original for backward compatibility)
-ERROR_INVALID_FILENAME = 'Invalid filename: must not contain ".." or start with "/"'
+# Error messages
+ERROR_INVALID_FILENAME = "Invalid filename"
 ERROR_LONG_SEARCH_TERM = "Search term is too long: must be 100 characters or fewer"
 ERROR_INVALID_LIMIT = "Invalid limit value: must be between 1 and 1000"
-def validate_filename(filename: str) -> None:
-    """Validate filename and raise HTTPException if invalid."""
-    if not is_filename_valid(filename):
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=ERROR_INVALID_FILENAME,
-        )
-    # Block any filename with path separators
-    if "/" in filename or "\\" in filename:
+def get_filepath(filename: str, settings: Settings) -> str:
+    """Get full filepath and validate it exists and is within allowed directory."""
+    log_dir = settings.get_log_directory()
+    # resolve_safe_path handles: NUL bytes, control chars, path traversal,
+    # separators, absolute paths, symlink escape, and containment check
+    filepath = resolve_safe_path(log_dir, filename)
+    if filepath is None:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=ERROR_INVALID_FILENAME,
         )
-def get_filepath(filename: str, settings: Settings) -> str:
-    """Get full filepath and validate it exists."""
-    log_dir = settings.get_log_directory()
-    filepath = os.path.join(log_dir, filename)
+    # Separate existence check for correct 404 response
     if not os.path.isfile(filepath):
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File not found: {filepath} does not exist",
+            detail=f"File not found: {filename} does not exist",
         )
     return filepath
@@ -67,8 +64,6 @@ async def get_logs(
     This endpoint reads the last N lines from a log file and optionally
     filters them by a search term or regex pattern.
     """
-    validate_filename(filename)
     if term and not is_search_term_valid(term):
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
@@ -126,8 +121,11 @@ async def get_logs_multi(
     for filename in file_list:
         try:
-            validate_filename(filename)
-            filepath = os.path.join(log_dir, filename)
+            filepath = resolve_safe_path(log_dir, filename)
+            if filepath is None:
+                results[filename] = {"error": ERROR_INVALID_FILENAME, "lines": []}
+                continue
             if not os.path.isfile(filepath):
                 results[filename] = {"error": "File not found", "lines": []}
@@ -164,16 +162,14 @@ async def stream_logs(
     # Get settings (can't use Depends in WebSocket easily)
     settings = get_settings()
-    try:
-        validate_filename(filename)
-    except HTTPException as e:
-        await websocket.send_json({"error": e.detail})
+    log_dir = settings.get_log_directory()
+    filepath = resolve_safe_path(log_dir, filename)
+    if filepath is None:
+        await websocket.send_json({"error": ERROR_INVALID_FILENAME})
         await websocket.close()
         return
-    log_dir = settings.get_log_directory()
-    filepath = os.path.join(log_dir, filename)
     if not os.path.isfile(filepath):
         await websocket.send_json({"error": f"File not found: {filename}"})
         await websocket.close()
@@ -221,7 +217,6 @@ async def stream_logs_sse(
     Alternative to WebSocket for simpler clients.
     """
-    validate_filename(filename)
     filepath = get_filepath(filename, settings)
     async def event_generator():

logtap/api/routes/parsed.py CHANGED Viewed

@@ -9,6 +9,7 @@ from logtap.api.dependencies import get_settings, verify_api_key
 from logtap.core.parsers import AutoParser, LogLevel
 from logtap.core.reader import tail_async
 from logtap.core.search import filter_entries
+from logtap.core.validation import resolve_safe_path
 from logtap.models.config import Settings
 router = APIRouter()
@@ -40,21 +41,21 @@ async def get_parsed_logs(
     Supported formats: syslog, JSON, nginx, apache (auto-detected).
     """
-    # Validate filename
-    if ".." in filename or filename.startswith("/") or "/" in filename or "\\" in filename:
+    # Validate filename and resolve safe path
+    log_dir = settings.get_log_directory()
+    # resolve_safe_path handles all path security: traversal, symlinks, containment
+    filepath = resolve_safe_path(log_dir, filename)
+    if filepath is None:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail='Invalid filename: must not contain ".." or start with "/"',
         )
-    # Build file path
-    log_dir = settings.get_log_directory()
-    filepath = os.path.join(log_dir, filename)
     if not os.path.isfile(filepath):
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"File not found: {filepath} does not exist",
+            detail=f"File not found: {filename} does not exist",
         )
     # Read file lines

logtap/api/routes/runs.py CHANGED Viewed

@@ -128,14 +128,14 @@ async def ingest(
     # Get or create run
     run, created = store.get_or_create(run_id)
-    # Handle tags
+    # Handle tags (validate and track in metadata)
     tags = parse_tags(x_logtap_tag)
     if tags:
         err = run.set_tags(tags)
         if err:
             raise HTTPException(
-                status_code=409,
-                detail={"error": "tag_conflict", "message": err},
+                status_code=400,
+                detail={"error": "invalid_tag", "message": err},
             )
     # Read and ingest body
@@ -149,12 +149,12 @@ async def ingest(
         # Process complete lines
         while "\n" in buffer:
             line, buffer = buffer.split("\n", 1)
-            run.append(line)
+            run.append(line, tags)  # Pass tags to each line
             lines_ingested += 1
     # Flush remaining partial line
     if buffer:
-        run.append(buffer)
+        run.append(buffer, tags)  # Pass tags to each line
         lines_ingested += 1
     # Save metadata
@@ -192,33 +192,12 @@ async def stream_run(
             detail={"error": "run_not_found", "message": f"Run '{run_id}' does not exist"},
         )
-    # Tag filtering (if specified, check run has all tags)
-    if tag:
-        required_tags = parse_tags(tag)
-        for key, value in required_tags.items():
-            if run.metadata.tags.get(key) != value:
-                # Run doesn't match filter - return empty stream
-                async def empty_stream():
-                    meta = StreamMetaEvent(
-                        cursor_earliest=run.cursor_earliest,
-                        cursor_latest=run.cursor_latest,
-                        gap=False,
-                    )
-                    yield f"event: meta\ndata: {meta.model_dump_json()}\n\n"
-                return StreamingResponse(
-                    empty_stream(),
-                    media_type="text/event-stream",
-                    headers={
-                        "Cache-Control": "no-cache",
-                        "X-Logtap-Earliest-Cursor": str(run.cursor_earliest),
-                        "X-Logtap-Latest-Cursor": str(run.cursor_latest),
-                    },
-                )
+    # Parse tag filter for per-line filtering
+    tag_filter = parse_tags(tag) if tag else None
     async def generate_sse():
         # Get initial lines and check for gap
-        lines, gap = run.get_lines(since=since, tail=tail)
+        lines, gap = run.get_lines(since=since, tail=tail, tag_filter=tag_filter)
         # Send meta event
         missed = None
@@ -255,7 +234,7 @@ async def stream_run(
         while True:
             # Get new lines since last cursor
-            new_lines, _ = run.get_lines(since=last_cursor, limit=100)
+            new_lines, _ = run.get_lines(since=last_cursor, limit=100, tag_filter=tag_filter)
             for line in new_lines:
                 event = StreamLineEvent(

logtap/cli/commands/doctor.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""Doctor command for logtap CLI - diagnose connection issues."""
+from typing import Optional
+import typer
+from rich.console import Console
+console = Console()
+def doctor(
+    server: str = typer.Option(
+        "http://localhost:8000",
+        "--server",
+        "-s",
+        help="Server URL to check.",
+        envvar="LOGTAP_SERVER",
+    ),
+    api_key: Optional[str] = typer.Option(
+        None,
+        "--api-key",
+        "-k",
+        help="API key for authentication.",
+        envvar="LOGTAP_API_KEY",
+    ),
+) -> None:
+    """
+    Check server connectivity and diagnose issues.
+    Verifies the server is reachable, auth works, and reports capabilities.
+    Example:
+        logtap doctor
+        logtap doctor --server http://gpu-box:8000
+    """
+    import httpx
+    console.print(f"[bold]Checking {server}[/bold]\n")
+    # Build headers
+    headers = {}
+    if api_key:
+        headers["X-API-Key"] = api_key
+    # Step 1: Check if server is reachable
+    console.print("[dim]1. Server reachable?[/dim]", end=" ")
+    try:
+        with httpx.Client(timeout=10) as client:
+            response = client.get(f"{server}/health", headers=headers)
+    except httpx.ConnectError:
+        console.print("[red]NO[/red]")
+        console.print(f"\n[red]Could not connect to {server}[/red]")
+        console.print("\n[dim]Possible causes:[/dim]")
+        console.print("  - Server not running (start with: logtap collect)")
+        console.print("  - Wrong host/port")
+        console.print("  - Firewall blocking connection")
+        console.print(f"\n[dim]Try:[/dim] curl {server}/health")
+        raise typer.Exit(1)
+    except httpx.TimeoutException:
+        console.print("[red]TIMEOUT[/red]")
+        console.print(f"\n[red]Connection timed out to {server}[/red]")
+        raise typer.Exit(1)
+    console.print("[green]YES[/green]")
+    # Step 2: Check auth
+    console.print("[dim]2. Auth working?[/dim]", end=" ")
+    if response.status_code == 401:
+        console.print("[red]NO (401 Unauthorized)[/red]")
+        console.print("\n[red]API key required or invalid[/red]")
+        if not api_key:
+            console.print("\n[dim]Try:[/dim] logtap doctor --api-key YOUR_KEY")
+            console.print("[dim]Or:[/dim] export LOGTAP_API_KEY=YOUR_KEY")
+        else:
+            console.print("\n[dim]Check that your API key matches the server's --api-key[/dim]")
+        raise typer.Exit(1)
+    elif response.status_code >= 400:
+        console.print(f"[red]NO ({response.status_code})[/red]")
+        raise typer.Exit(1)
+    if api_key:
+        console.print("[green]YES (key accepted)[/green]")
+    else:
+        console.print("[green]YES (no auth required)[/green]")
+    # Step 3: Parse health response
+    try:
+        health = response.json()
+    except Exception:
+        console.print("\n[yellow]Warning: Could not parse health response[/yellow]")
+        health = {}
+    # Step 4: Check features
+    console.print("[dim]3. Features?[/dim]", end=" ")
+    mode = health.get("mode", "unknown")
+    features = health.get("features", [])
+    version = health.get("version", "unknown")
+    if "runs" in features:
+        console.print("[green]runs[/green] (collector mode)")
+    elif "files" in features:
+        console.print("[cyan]files[/cyan] (legacy serve mode)")
+    else:
+        console.print(f"[yellow]{mode}[/yellow]")
+    # Step 5: Show summary
+    console.print("\n[bold green]Server OK[/bold green]")
+    console.print(f"  Version: {version}")
+    console.print(f"  Mode: {mode}")
+    console.print(f"  Features: {', '.join(features) if features else 'unknown'}")
+    if "runs" in features:
+        runs_count = health.get("runs", 0)
+        console.print(f"  Active runs: {runs_count}")
+    # Show curl command for debugging
+    console.print("\n[dim]Debug command:[/dim]")
+    if api_key:
+        console.print(f'  curl -H "X-API-Key: {api_key}" {server}/health')
+    else:
+        console.print(f"  curl {server}/health")
+    # Show quick start if collector mode
+    if "runs" in features:
+        console.print("\n[dim]Quick start:[/dim]")
+        console.print("  python train.py 2>&1 | logtap ingest run1")
+        console.print("  logtap tail run1 --follow")

logtap/cli/commands/tail.py CHANGED Viewed

@@ -166,8 +166,8 @@ def _tail_run(
                     console.print(f"[red]Error: Server returned {response.status_code}[/red]")
                     raise typer.Exit(1)
-                # Track cursor for gap detection message
-                last_cursor = since
+                # Track if this is a reconnect (user provided --since)
+                is_reconnect = since is not None
                 # Parse SSE stream
                 buffer = ""
@@ -175,7 +175,7 @@ def _tail_run(
                     buffer += chunk
                     while "\n\n" in buffer:
                         event_str, buffer = buffer.split("\n\n", 1)
-                        _process_sse_event(event_str, output, last_cursor)
+                        _process_sse_event(event_str, output, is_reconnect)
     except httpx.ConnectError:
         console.print(f"[red]Error: Could not connect to {server}[/red]")
@@ -185,7 +185,7 @@ def _tail_run(
         console.print("\n[dim]Stopped.[/dim]")
-def _process_sse_event(event_str: str, output: str, last_cursor: Optional[int]) -> None:
+def _process_sse_event(event_str: str, output: str, is_reconnect: bool) -> None:
     """Process a single SSE event."""
     event_type = None
     data = None
@@ -204,7 +204,15 @@ def _process_sse_event(event_str: str, output: str, last_cursor: Optional[int])
         try:
             meta = json.loads(data)
             if meta.get("gap") and meta.get("missed"):
-                console.print(f"[yellow]reconnected (missed {meta['missed']} lines)[/yellow]")
+                # Gap detected - warn user about missed lines
+                console.print(
+                    f"[yellow]reconnected (missed {meta['missed']} lines)[/yellow]",
+                    stderr=True,
+                )
+            elif is_reconnect:
+                # Clean reconnect - show cursor position
+                cursor = meta.get("cursor_earliest", meta.get("cursor_latest", "?"))
+                console.print(f"[dim]resumed at cursor {cursor}[/dim]", stderr=True)
         except Exception:
             pass
     elif event_type == "line" and data:

logtap/cli/main.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import typer
 from logtap import __version__
-from logtap.cli.commands import collect, files, ingest, query, runs, serve, tail
+from logtap.cli.commands import collect, doctor, files, ingest, query, runs, serve, tail
 app = typer.Typer(
     name="logtap",
@@ -44,6 +44,7 @@ def main(
 app.command()(collect.collect)
 app.command()(ingest.ingest)
 app.command()(runs.runs)
+app.command()(doctor.doctor)
 # Legacy commands (file-based workflow)
 app.command()(serve.serve)

logtap/core/runs.py CHANGED Viewed

@@ -17,11 +17,12 @@ TAG_VALUE_MAX_LEN = 256
 @dataclass
 class RunLine:
-    """A single log line with cursor and timestamp."""
+    """A single log line with cursor, timestamp, and optional tags."""
     cursor: int
     line: str
     ts: datetime
+    tags: Dict[str, str] = field(default_factory=dict)
 @dataclass
@@ -100,32 +101,52 @@ class Run:
     def _populate_cache_from_disk(self) -> None:
         """Load last N lines from disk into cache."""
+        import json
         if not self.log_file.exists():
             return
-        lines: List[str] = []
+        run_lines: List[RunLine] = []
         with open(self.log_file, "r", encoding="utf-8", errors="replace") as f:
-            # Read all lines (for small files) or tail
-            for line in f:
-                lines.append(line.rstrip("\n"))
+            for raw_line in f:
+                raw_line = raw_line.rstrip("\n")
+                if not raw_line:
+                    continue
+                # Try JSONL format first
+                if raw_line.startswith("{"):
+                    try:
+                        record = json.loads(raw_line)
+                        run_lines.append(
+                            RunLine(
+                                cursor=record["c"],
+                                line=record["l"],
+                                ts=datetime.fromisoformat(record["t"]),
+                                tags=record.get("g", {}),
+                            )
+                        )
+                        continue
+                    except (json.JSONDecodeError, KeyError):
+                        pass
+                # Legacy plain text format
+                run_lines.append(
+                    RunLine(
+                        cursor=len(run_lines),
+                        line=raw_line,
+                        ts=self.metadata.last_activity,
+                        tags={},
+                    )
+                )
         # Only keep last buffer_lines
-        if len(lines) > self.buffer_lines:
-            lines = lines[-self.buffer_lines :]
-            start_cursor = self.metadata.cursor_latest - len(lines) + 1
-        else:
-            start_cursor = 0
+        if len(run_lines) > self.buffer_lines:
+            run_lines = run_lines[-self.buffer_lines :]
-        self._cache_start_cursor = start_cursor
+        self._cache_start_cursor = run_lines[0].cursor if run_lines else 0
         self._cache.clear()
-        for i, line in enumerate(lines):
-            self._cache.append(
-                RunLine(
-                    cursor=start_cursor + i,
-                    line=line,
-                    ts=self.metadata.last_activity,  # Approximate
-                )
-            )
+        for rl in run_lines:
+            self._cache.append(rl)
     def _save_metadata(self) -> None:
         """Save metadata to disk."""
@@ -147,17 +168,26 @@ class Run:
                 f,
             )
-    def append(self, line: str) -> RunLine:
+    def append(self, line: str, tags: Optional[Dict[str, str]] = None) -> RunLine:
         """Append a line to the run. Returns the line with assigned cursor."""
+        import json
         with self._lock:
             now = datetime.now(timezone.utc)
             cursor = self.metadata.cursor_latest + 1
-            run_line = RunLine(cursor=cursor, line=line, ts=now)
+            run_line = RunLine(cursor=cursor, line=line, ts=now, tags=tags or {})
-            # Append to disk
+            # Append to disk as JSONL
+            record = {
+                "c": cursor,
+                "l": line,
+                "t": now.isoformat(),
+            }
+            if tags:
+                record["g"] = tags  # g for tags (short key)
             with open(self.log_file, "a", encoding="utf-8") as f:
-                written = f.write(line + "\n")
+                written = f.write(json.dumps(record, separators=(",", ":")) + "\n")
                 self.metadata.bytes_on_disk += written
             # Update cache
@@ -172,17 +202,23 @@ class Run:
             return run_line
-    def append_batch(self, lines: List[str]) -> List[RunLine]:
+    def append_batch(
+        self, lines: List[str], tags: Optional[Dict[str, str]] = None
+    ) -> List[RunLine]:
         """Append multiple lines atomically."""
         with self._lock:
             result = []
             for line in lines:
-                result.append(self.append(line))
+                result.append(self.append(line, tags))
             self._save_metadata()
             return result
     def set_tags(self, tags: Dict[str, str]) -> Optional[str]:
-        """Set tags, merging with existing. Returns error message on conflict, None on success."""
+        """Validate tags. Returns error message on invalid tag, None on success.
+        Note: Tags are now stored per-line, not per-run. This method just validates
+        and tracks known tag keys in run metadata for discoverability.
+        """
         import re
         with self._lock:
@@ -193,12 +229,8 @@ class Run:
                 # Validate value length
                 if len(value) > TAG_VALUE_MAX_LEN:
                     return f"Tag value too long: {key}"
-                # Check for conflict
-                if key in self.metadata.tags and self.metadata.tags[key] != value:
-                    existing = self.metadata.tags[key]
-                    return f"Tag conflict for key '{key}': existing='{existing}', new='{value}'"
-            # Merge tags
+            # Track tag keys in metadata (last value wins, just for discoverability)
             self.metadata.tags.update(tags)
             self._save_metadata()
             return None
@@ -222,6 +254,7 @@ class Run:
         since: Optional[int] = None,
         tail: int = 50,
         limit: int = 1000,
+        tag_filter: Optional[Dict[str, str]] = None,
     ) -> tuple[List[RunLine], bool]:
         """
         Get lines from run.
@@ -230,6 +263,7 @@ class Run:
             since: Cursor to start from (exclusive). If None, returns last `tail` lines.
             tail: Number of recent lines if since is None.
             limit: Maximum lines to return.
+            tag_filter: Filter lines by tags (AND semantics).
         Returns:
             Tuple of (lines, gap_detected).
@@ -253,6 +287,12 @@ class Run:
                 # Tail mode - get last N lines
                 lines = list(self._cache)[-tail:]
+            # Filter by tags (AND semantics)
+            if tag_filter:
+                lines = [
+                    ln for ln in lines if all(ln.tags.get(k) == v for k, v in tag_filter.items())
+                ]
             # Apply limit
             if len(lines) > limit:
                 lines = lines[:limit]

logtap 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

logtap 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl