langsmith-cli 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.claude-plugin/marketplace.json +2 -2
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.claude-plugin/plugin.json +1 -1
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/CLAUDE.md +62 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/PKG-INFO +1 -1
- langsmith_cli-0.4.2/docs/devto-article.md +214 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/pyproject.toml +1 -1
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/SKILL.md +70 -5
- langsmith_cli-0.4.2/skills/langsmith/references/runs.md +500 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/uv.lock +1 -1
- langsmith_cli-0.4.0/skills/langsmith/references/runs.md +0 -258
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.env.example +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.github/dependabot.yml +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.github/workflows/ci.yml +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.github/workflows/dependency-review.yml +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.github/workflows/publish.yml +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.gitignore +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.pre-commit-config.yaml +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/.python-version +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/AGENTS.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/LICENSE +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/README.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/RELEASING.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/COMMANDS_DESIGN.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/PIPES_TO_CLI_REFERENCE.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/PRD.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/QOL_FEATURES.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/QOL_IMPROVEMENTS.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/TLDR.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/CI_BEST_PRACTICES.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/CODECOV_SETUP.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/IMPLEMENTATION_PLAN.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/LANGSMITH_TEAM_QUESTIONS.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/MCP_PARITY.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/PUBLISHING.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/PYPI_SETUP_SUMMARY.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/SESSION_DIRECTIVES.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/TESTING_PERFORMANCE.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/TESTING_STRATEGY.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/docs/dev/TYPE_SAFETY_GUIDE.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/main.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/scripts/install.ps1 +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/scripts/install.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/scripts/install.sh +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/scripts/release.sh +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/scripts/test_installer.sh +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/scripts/uninstall.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/docs/examples.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/docs/reference.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/references/datasets.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/references/examples.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/references/fql.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/references/installation.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/references/projects.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/references/prompts.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/skills/langsmith/references/troubleshooting.md +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/__init__.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/cli_logging.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/commands/auth.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/commands/datasets.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/commands/examples.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/commands/projects.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/commands/prompts.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/commands/runs.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/commands/self_cmd.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/config.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/field_analysis.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/filters.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/main.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/src/langsmith_cli/utils.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/conftest.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_auth.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_config.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_datasets.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_e2e.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_examples.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_fetch_helpers.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_field_analysis.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_filters.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_logging.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_main.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_output_flag.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_projects.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_prompts.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_analyze.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_discovery.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_export.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_fields.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_get.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_list.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_roots.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_sample.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_search.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_runs_view.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_self.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_smoke.py +0 -0
- {langsmith_cli-0.4.0 → langsmith_cli-0.4.2}/tests/test_utils.py +0 -0
|
@@ -6,14 +6,14 @@
|
|
|
6
6
|
},
|
|
7
7
|
"metadata": {
|
|
8
8
|
"description": "LangSmith CLI plugin marketplace",
|
|
9
|
-
"version": "0.4.
|
|
9
|
+
"version": "0.4.2"
|
|
10
10
|
},
|
|
11
11
|
"plugins": [
|
|
12
12
|
{
|
|
13
13
|
"name": "langsmith-cli",
|
|
14
14
|
"source": "./",
|
|
15
15
|
"description": "A context-efficient interface for LangSmith observability and evaluations.",
|
|
16
|
-
"version": "0.4.
|
|
16
|
+
"version": "0.4.2",
|
|
17
17
|
"author": {
|
|
18
18
|
"name": "Gigaverse",
|
|
19
19
|
"email": "aviadr1@gmail.com"
|
|
@@ -889,6 +889,68 @@ def test_mycommand_list(runner):
|
|
|
889
889
|
|
|
890
890
|
**Python Version:** >=3.12
|
|
891
891
|
|
|
892
|
+
## Releasing
|
|
893
|
+
|
|
894
|
+
### ALWAYS Use the Release Script
|
|
895
|
+
|
|
896
|
+
**Never manually bump versions or create tags/releases.** Use `scripts/release.sh` which handles everything:
|
|
897
|
+
|
|
898
|
+
```bash
|
|
899
|
+
# Patch bump (0.4.0 → 0.4.1) — default
|
|
900
|
+
./scripts/release.sh
|
|
901
|
+
|
|
902
|
+
# Minor bump (0.4.0 → 0.5.0)
|
|
903
|
+
./scripts/release.sh minor
|
|
904
|
+
|
|
905
|
+
# Major bump (0.4.0 → 1.0.0)
|
|
906
|
+
./scripts/release.sh major
|
|
907
|
+
|
|
908
|
+
# Explicit version
|
|
909
|
+
./scripts/release.sh 0.5.0
|
|
910
|
+
|
|
911
|
+
# Skip tests for docs-only releases
|
|
912
|
+
./scripts/release.sh --skip-tests
|
|
913
|
+
|
|
914
|
+
# Auto-confirm (no prompts)
|
|
915
|
+
./scripts/release.sh -y
|
|
916
|
+
```
|
|
917
|
+
|
|
918
|
+
### What the Release Script Does
|
|
919
|
+
|
|
920
|
+
1. **Bumps version in all 4 files simultaneously:**
|
|
921
|
+
- `pyproject.toml`
|
|
922
|
+
- `.claude-plugin/plugin.json`
|
|
923
|
+
- `.claude-plugin/marketplace.json` (both `metadata.version` and `plugins[0].version`)
|
|
924
|
+
- `uv.lock`
|
|
925
|
+
2. **Runs quality checks:** ruff lint, ruff format, pyright
|
|
926
|
+
3. **Runs tests** (unless `--skip-tests`)
|
|
927
|
+
4. **Creates git commit and annotated tag**
|
|
928
|
+
5. **Pushes to remote** (triggers CI → PyPI publish → GitHub release)
|
|
929
|
+
|
|
930
|
+
### Version Files — Never Edit Manually
|
|
931
|
+
|
|
932
|
+
These files contain version strings that **must stay in sync**:
|
|
933
|
+
- `pyproject.toml` — PyPI package version
|
|
934
|
+
- `.claude-plugin/plugin.json` — Plugin version for Claude Code
|
|
935
|
+
- `.claude-plugin/marketplace.json` — Marketplace listing version (2 locations)
|
|
936
|
+
- `uv.lock` — Auto-updated by the script
|
|
937
|
+
|
|
938
|
+
Editing any of these manually creates version drift. The release script is the **single source of truth** for version bumps.
|
|
939
|
+
|
|
940
|
+
### When to Release
|
|
941
|
+
|
|
942
|
+
- **Code changes:** Always use `./scripts/release.sh` (runs tests by default)
|
|
943
|
+
- **Docs/skill-only changes:** Use `./scripts/release.sh --skip-tests` (still bumps version)
|
|
944
|
+
- **After updating SKILL.md:** A release is needed for plugin users to get the updated skill
|
|
945
|
+
|
|
946
|
+
### CI/CD Pipeline (Automated)
|
|
947
|
+
|
|
948
|
+
On tag push (`v*`), GitHub Actions automatically:
|
|
949
|
+
1. Runs full test suite
|
|
950
|
+
2. Builds wheel and sdist
|
|
951
|
+
3. Publishes to PyPI (Trusted Publishing, no tokens)
|
|
952
|
+
4. Creates GitHub release with artifacts
|
|
953
|
+
|
|
892
954
|
## Git Workflow
|
|
893
955
|
|
|
894
956
|
Per docs/dev/SESSION_DIRECTIVES.md:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: langsmith-cli
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: Context-efficient CLI for LangSmith. Built for humans and agents.
|
|
5
5
|
Project-URL: Homepage, https://github.com/aviadr1/langsmith-cli
|
|
6
6
|
Project-URL: Repository, https://github.com/aviadr1/langsmith-cli
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "I Replaced My LangSmith MCP Server with a CLI That Only Loads When You Need It"
|
|
3
|
+
published: false
|
|
4
|
+
description: "How langsmith-cli gives you 100% MCP parity, 96% less context per query, and features the MCP server doesn't have — all in a single pip install."
|
|
5
|
+
tags: langsmith, llmops, cli, claude
|
|
6
|
+
cover_image:
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
If you're using LangSmith with Claude Code (or any AI coding agent), you're probably running the official MCP server. It works. But every session, it injects **~5,000 tokens** of tool schemas into your context window — whether you touch LangSmith or not.
|
|
10
|
+
|
|
11
|
+
I built [langsmith-cli](https://github.com/gigaverse-app/langsmith-cli) to fix that. It's a standalone CLI *and* a Claude Code plugin that replaces the always-on MCP server with an **on-demand skill** that only loads when your agent actually needs to talk to LangSmith.
|
|
12
|
+
|
|
13
|
+
And it does more than the MCP server does.
|
|
14
|
+
|
|
15
|
+
## The Problem with MCP Servers
|
|
16
|
+
|
|
17
|
+
MCP servers are always-on. The moment your agent session starts, every tool definition gets loaded into context. For LangSmith's MCP server, that's 66 parameters across multiple tools — around 5,000 tokens of JSON schema sitting in your context window whether you ever query a trace or not.
|
|
18
|
+
|
|
19
|
+
For agents that need to do many things — write code, run tests, debug, *and occasionally* check LangSmith — this is wasteful. Context is your agent's working memory. Every token of schema is a token not available for reasoning.
|
|
20
|
+
|
|
21
|
+
## The Fix: On-Demand Skills Instead of Always-On Schemas
|
|
22
|
+
|
|
23
|
+
`langsmith-cli` takes a different approach. Instead of an MCP server that injects schemas at session start, it's a CLI tool with a skill file that **only loads when the agent actually invokes it**:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Install the CLI
|
|
27
|
+
uv tool install langsmith-cli
|
|
28
|
+
|
|
29
|
+
# Add as Claude Code plugin
|
|
30
|
+
claude plugin marketplace add gigaverse-app/langsmith-cli
|
|
31
|
+
claude plugin install langsmith-cli@langsmith-cli
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Sessions that never touch LangSmith pay **zero context tokens**. When the agent *does* need observability data, it invokes the skill and gets a comprehensive reference for the full CLI — every command, every flag, with usage patterns and examples. Then it runs shell commands:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Get the latest failed run with only the fields you need
|
|
38
|
+
langsmith-cli --json runs get-latest --project my-app \
|
|
39
|
+
--failed --fields id,name,error
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
No always-on server. No startup schema tax. The skill loads on-demand, and `--fields` keeps the *response* data lean too.
|
|
43
|
+
|
|
44
|
+
## 96% Token Reduction with `--fields`
|
|
45
|
+
|
|
46
|
+
This is the feature that matters most for agents. A typical LangSmith run object is **20KB** — easily 1,000+ tokens. With `--fields`, you get only what you asked for:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# Full run object: ~1000 tokens
|
|
50
|
+
langsmith-cli --json runs get abc-123
|
|
51
|
+
|
|
52
|
+
# Just what you need: ~40 tokens
|
|
53
|
+
langsmith-cli --json runs get abc-123 --fields name,status,error
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
`--fields` works on every list and get command: runs, projects, datasets, examples, prompts. Your agent stays lean.
|
|
57
|
+
|
|
58
|
+
## Built for Two Audiences
|
|
59
|
+
|
|
60
|
+
Most developer tools pick one audience. `langsmith-cli` serves both:
|
|
61
|
+
|
|
62
|
+
**For humans** — rich terminal tables with color-coded statuses, smart column truncation, syntax highlighting:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
langsmith-cli runs list --project my-app --status error --last 24h
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━┓
|
|
70
|
+
┃ Name ┃ Status ┃ Tokens ┃ Latency ┃ Error ┃
|
|
71
|
+
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━┩
|
|
72
|
+
│ extractor │ error │ 2,340 │ 3.2s │ Rate limit │
|
|
73
|
+
│ classifier │ error │ 1,102 │ 12.4s │ Timeout │
|
|
74
|
+
└──────────────┴────────────┴────────┴──────────┴─────────────┘
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**For agents** — add `--json` as the first flag and everything switches: strict JSON to stdout, diagnostics to stderr, zero formatting noise:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
langsmith-cli --json runs list --project my-app --status error --limit 5
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
One flag. Two completely different UX modes.
|
|
84
|
+
|
|
85
|
+
## Features the MCP Server Doesn't Have
|
|
86
|
+
|
|
87
|
+
`langsmith-cli` has 100% parity with the official MCP server (all 66 parameters mapped). But it also has features the MCP server can't offer:
|
|
88
|
+
|
|
89
|
+
### Live Monitoring with `runs watch`
|
|
90
|
+
|
|
91
|
+
A real-time streaming dashboard in your terminal:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
langsmith-cli runs watch --project my-app
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### One-Command Debugging with `runs get-latest`
|
|
98
|
+
|
|
99
|
+
No more `list | jq | get` pipelines:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# Before: three commands piped together
|
|
103
|
+
langsmith-cli --json runs list --project X --limit 1 \
|
|
104
|
+
| jq -r '.[0].id' \
|
|
105
|
+
| xargs langsmith-cli --json runs get
|
|
106
|
+
|
|
107
|
+
# After: one command
|
|
108
|
+
langsmith-cli --json runs get-latest --project X --fields inputs,outputs,error
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Stratified Sampling with `runs sample`
|
|
112
|
+
|
|
113
|
+
Build statistically sound eval datasets:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
langsmith-cli runs sample \
|
|
117
|
+
--stratify-by tag:length,tag:content_type \
|
|
118
|
+
--dimension-values "short|long,news|gaming" \
|
|
119
|
+
--samples-per-combination 5 \
|
|
120
|
+
--output eval_samples.jsonl
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Aggregate Analytics with `runs analyze`
|
|
124
|
+
|
|
125
|
+
Group-by metrics without leaving the terminal:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
langsmith-cli --json runs analyze \
|
|
129
|
+
--group-by tag:model \
|
|
130
|
+
--metrics count,error_rate,p50_latency,avg_cost
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Schema Discovery with `runs fields` / `runs describe`
|
|
134
|
+
|
|
135
|
+
Don't know what fields your runs have? Discover them:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
langsmith-cli --json runs fields --include inputs,outputs
|
|
139
|
+
# Returns field paths, types, presence rates, even language distribution
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Tag & Metadata Discovery
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
langsmith-cli runs tags --project my-app
|
|
146
|
+
langsmith-cli runs metadata-keys --project my-app
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Bulk Export with Pattern Filenames
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
langsmith-cli runs export ./traces \
|
|
153
|
+
--project my-app --roots --limit 1000 \
|
|
154
|
+
--filename-pattern "{name}-{run_id}"
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Production Run to Eval Example in One Command
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
langsmith-cli --json examples from-run <run-id> --dataset my-eval-set
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Smart Filtering That Translates to FQL
|
|
164
|
+
|
|
165
|
+
Nobody wants to write raw Filter Query Language. The CLI translates human-friendly flags automatically:
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
# These flags...
|
|
169
|
+
langsmith-cli runs list --tag summarizer --failed --last 24h --slow
|
|
170
|
+
|
|
171
|
+
# ...become this FQL:
|
|
172
|
+
# and(has(tags, "summarizer"), eq(error, true),
|
|
173
|
+
# gt(start_time, "2026-03-03T..."), gt(latency, "5s"))
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Time presets like `--recent` (last hour), `--today`, `--last 7d`, and `--since 2026-01-01` all work. Content search with `--grep` supports regex and field-specific matching. Everything composes.
|
|
177
|
+
|
|
178
|
+
## What's New in v0.4.0
|
|
179
|
+
|
|
180
|
+
The v0.4.0 release focused on type safety and code quality:
|
|
181
|
+
|
|
182
|
+
- **Zero pyright errors** — every function has proper type annotations. `client: langsmith.Client`, not `client: Any`. Return types are real SDK Pydantic models, not `object`.
|
|
183
|
+
- **`datasets delete`** command with confirmation prompts and JSON mode support
|
|
184
|
+
- **Improved error handling** across prompts and runs commands using specific SDK exception types (`LangSmithNotFoundError`, `LangSmithConflictError`) instead of broad `except Exception`
|
|
185
|
+
- **702 unit tests** passing with real Pydantic model instances (no MagicMock for test data)
|
|
186
|
+
|
|
187
|
+
## Getting Started
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
# Install
|
|
191
|
+
uv tool install langsmith-cli
|
|
192
|
+
# or: pip install langsmith-cli
|
|
193
|
+
|
|
194
|
+
# Authenticate
|
|
195
|
+
export LANGSMITH_API_KEY="lsv2_..."
|
|
196
|
+
# or: langsmith-cli auth login
|
|
197
|
+
|
|
198
|
+
# Start exploring
|
|
199
|
+
langsmith-cli runs list --project my-app --last 24h
|
|
200
|
+
langsmith-cli --json runs get-latest --failed --fields name,error
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
If you're using Claude Code, add the plugin for the best agent experience:
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
claude plugin marketplace add gigaverse-app/langsmith-cli
|
|
207
|
+
claude plugin install langsmith-cli@langsmith-cli
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
The code is MIT licensed and on GitHub: [gigaverse-app/langsmith-cli](https://github.com/gigaverse-app/langsmith-cli)
|
|
213
|
+
|
|
214
|
+
If you're building with LangSmith and tired of context-heavy MCP servers, give it a try. Happy to hear feedback in the issues.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "langsmith-cli"
|
|
3
3
|
# IMPORTANT: When bumping this version, also update .claude-plugin/plugin.json
|
|
4
|
-
version = "0.4.
|
|
4
|
+
version = "0.4.2"
|
|
5
5
|
description = "Context-efficient CLI for LangSmith. Built for humans and agents."
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
requires-python = ">=3.12"
|
|
@@ -99,10 +99,21 @@ langsmith-cli --json runs list --project my-project --limit 5 2>&1
|
|
|
99
99
|
|
|
100
100
|
## API Reference
|
|
101
101
|
|
|
102
|
+
### Authentication
|
|
103
|
+
- `langsmith-cli auth login`: Configure API key (saves to global config).
|
|
104
|
+
- `--local`: Save to `.env` in current directory instead.
|
|
105
|
+
|
|
102
106
|
### Projects
|
|
103
107
|
- `langsmith-cli --json projects list [OPTIONS]`: List all projects.
|
|
108
|
+
- `--limit <n>`: Max results (default: 100, use 0 for no limit)
|
|
109
|
+
- `--name <text>`: Filter by exact name
|
|
110
|
+
- `--name-pattern <pattern>`: Wildcard filter (e.g., `'*prod*'`)
|
|
111
|
+
- `--name-regex <regex>`: Regex filter
|
|
112
|
+
- `--has-runs`: Show only projects with runs
|
|
113
|
+
- `--sort-by <field>`: Sort by field (name, run_count). Prefix `-` for descending
|
|
104
114
|
- `--fields <comma-separated>`: Select specific fields (e.g., `id,name`)
|
|
105
115
|
- `--output <file>`: Write to file instead of stdout
|
|
116
|
+
- See [Projects Reference](references/projects.md) for full options and output fields.
|
|
106
117
|
- `langsmith-cli --json projects get <name-or-id>`: Get project details (UUID auto-detected).
|
|
107
118
|
- `--include-stats/--no-stats`: Include/exclude run statistics (default: include)
|
|
108
119
|
- `--fields <comma-separated>`: Select fields
|
|
@@ -114,9 +125,24 @@ langsmith-cli --json runs list --project my-project --limit 5 2>&1
|
|
|
114
125
|
- `langsmith-cli --json runs list [OPTIONS]`: List recent runs.
|
|
115
126
|
- `--project <name>`: Filter by project name (default: "default").
|
|
116
127
|
- `--project-id <uuid>`: Filter by project UUID (bypasses name resolution, faster).
|
|
128
|
+
- **Multi-project:** `--project-name <text>`, `--project-name-exact <text>`, `--project-name-pattern <pattern>`, `--project-name-regex <regex>`
|
|
117
129
|
- `--limit <n>`: Max results (default 10, keep it small).
|
|
118
130
|
- `--status <success|error>`: Filter by status.
|
|
131
|
+
- **Convenience shortcuts:** `--failed`, `--succeeded`, `--slow` (>5s), `--recent` (last hour), `--today`
|
|
119
132
|
- `--filter <string>`: Advanced FQL query string (see FQL examples below).
|
|
133
|
+
- `--roots`: Show only root traces (recommended for cleaner output).
|
|
134
|
+
- `--trace-id <uuid>`: Get all runs in a specific trace tree.
|
|
135
|
+
- `--run-type <type>`: Filter by type (llm, chain, tool, retriever, etc).
|
|
136
|
+
- `--tag <tag>`: Filter by tag (repeatable for AND logic).
|
|
137
|
+
- `--name-pattern <pattern>`: Wildcard filter on run names (client-side).
|
|
138
|
+
- `--name-regex <regex>`: Regex filter on run names (client-side).
|
|
139
|
+
- `--model <name>`: Filter by model name (e.g., `gpt-4`, `claude-3`).
|
|
140
|
+
- `--since <time>`: Runs since time (ISO, `3d`, or `3 days ago`).
|
|
141
|
+
- `--last <duration>`: Runs from last duration (e.g., `24h`, `7d`).
|
|
142
|
+
- `--min-latency <dur>` / `--max-latency <dur>`: Latency range (e.g., `2s`, `500ms`).
|
|
143
|
+
- `--trace-filter <fql>` / `--tree-filter <fql>`: Filter on root trace / any run in tree.
|
|
144
|
+
- `--sort-by <field>`: Sort by field (name, status, latency, start_time). Prefix `-` for descending.
|
|
145
|
+
- `--format <table|json|csv|yaml>`: Output format.
|
|
120
146
|
- **Content Search Options:**
|
|
121
147
|
- `--query <text>`: Server-side full-text search (fast, but only first ~250 chars indexed).
|
|
122
148
|
- `--grep <pattern>`: Client-side content search (unlimited content, supports regex).
|
|
@@ -126,7 +152,7 @@ langsmith-cli --json runs list --project my-project --limit 5 2>&1
|
|
|
126
152
|
- `--fields <comma-separated>`: Reduce output size (e.g., `id,name,status,error`).
|
|
127
153
|
- `--output <file>`: Write to file (JSONL format) instead of stdout.
|
|
128
154
|
- `--no-truncate`: Show full content in table columns (only affects table output, not JSON).
|
|
129
|
-
-
|
|
155
|
+
- See [Runs Reference](references/runs.md) for full field list and examples.
|
|
130
156
|
- `langsmith-cli --json runs get <id> [OPTIONS]`: Get details of a single run.
|
|
131
157
|
- `--fields <comma-separated>`: Only return specific fields (e.g., `inputs,outputs,error`).
|
|
132
158
|
- `langsmith-cli --json runs get-latest [OPTIONS]`: Get the most recent run matching filters.
|
|
@@ -139,6 +165,21 @@ langsmith-cli --json runs list --project my-project --limit 5 2>&1
|
|
|
139
165
|
- Example: `langsmith-cli --json runs get-latest --project-name-pattern "prd/*" --succeeded --roots`
|
|
140
166
|
- **Before (complex):** `langsmith-cli --json runs list --project X --limit 1 --roots | jq -r '.[0].id' | xargs langsmith-cli --json runs get --fields inputs,outputs`
|
|
141
167
|
- **After (simple):** `langsmith-cli --json runs get-latest --project X --roots --fields inputs,outputs`
|
|
168
|
+
- `langsmith-cli --json runs search <query> [OPTIONS]`: Full-text search across runs.
|
|
169
|
+
- `--project <name>`: Project name (default: "default").
|
|
170
|
+
- Multi-project: `--project-name-pattern`, `--project-name-regex`, etc.
|
|
171
|
+
- `--limit <n>`: Max results (default: 10).
|
|
172
|
+
- `--roots`: Show only root traces.
|
|
173
|
+
- `--in <all|inputs|outputs|error>`: Where to search (default: all).
|
|
174
|
+
- `--input-contains <text>`: Filter by content in inputs.
|
|
175
|
+
- `--output-contains <text>`: Filter by content in outputs.
|
|
176
|
+
- `--since <time>` / `--last <duration>`: Time filters.
|
|
177
|
+
- `--format <table|json|csv|yaml>`: Output format.
|
|
178
|
+
- Example: `langsmith-cli --json runs search "timeout" --in error --project myapp`
|
|
179
|
+
- `langsmith-cli runs watch [OPTIONS]`: Live monitoring dashboard (interactive, no `--json`).
|
|
180
|
+
- `--project <name>`: Project to monitor (default: "default").
|
|
181
|
+
- Multi-project: `--project-name-pattern`, `--project-name-regex`, etc.
|
|
182
|
+
- `--interval <seconds>`: Refresh interval (default: 2).
|
|
142
183
|
- `langsmith-cli runs view-file <pattern> [OPTIONS]`: View runs from JSONL files with table display.
|
|
143
184
|
- **Use this to read files created by `--output`** - don't use the Read tool on JSONL files (they can be 30K+ tokens).
|
|
144
185
|
- `<pattern>`: File path or glob pattern (e.g., `samples.jsonl`, `data/*.jsonl`).
|
|
@@ -215,16 +256,29 @@ langsmith-cli --json runs list --project my-project --limit 5 2>&1
|
|
|
215
256
|
- `--output <file>`: Write to file instead of stdout
|
|
216
257
|
- `langsmith-cli --json datasets get <id> [--fields id,name,description]`: Get dataset details.
|
|
217
258
|
- `langsmith-cli --json datasets create <name>`: Create a dataset.
|
|
259
|
+
- `--description <text>`: Dataset description.
|
|
260
|
+
- `--type [kv|llm|chat]`: Dataset type (default: kv).
|
|
218
261
|
- `langsmith-cli --json datasets delete <name-or-id> --confirm`: Delete a dataset.
|
|
219
262
|
- `langsmith-cli --json datasets push <file.jsonl> --dataset <name>`: Upload examples from JSONL.
|
|
263
|
+
- See [Datasets Reference](references/datasets.md) for full options and output fields.
|
|
220
264
|
- `langsmith-cli --json examples list --dataset <name> [OPTIONS]`: List examples in a dataset.
|
|
265
|
+
- `--limit <n>` / `--offset <n>`: Pagination.
|
|
266
|
+
- `--splits <comma-separated>`: Filter by splits (e.g., `train,test`).
|
|
267
|
+
- `--as-of <tag-or-timestamp>`: Version snapshot.
|
|
268
|
+
- `--filter <fql>`: Advanced FQL query.
|
|
269
|
+
- `--metadata <json>`: Filter by metadata.
|
|
221
270
|
- `--fields <comma-separated>`: Select fields (e.g., `id,inputs,outputs`)
|
|
222
271
|
- `--output <file>`: Write to file instead of stdout
|
|
223
272
|
- `langsmith-cli --json examples get <id> [--fields id,inputs,outputs]`: Get example details.
|
|
224
273
|
- `langsmith-cli --json examples create --dataset <name> --inputs <json> --outputs <json>`: Add an example.
|
|
274
|
+
- `--metadata <json>`: Custom metadata.
|
|
275
|
+
- `--split <name>`: Split name (e.g., `train`, `test`).
|
|
225
276
|
- `langsmith-cli --json examples update <id> --inputs <json> --outputs <json>`: Update an example.
|
|
277
|
+
- `--metadata <json>`: New metadata.
|
|
278
|
+
- `--split <name>`: New split name.
|
|
226
279
|
- `langsmith-cli --json examples delete <id> [<id>...] --confirm`: Delete examples (supports bulk).
|
|
227
280
|
- `langsmith-cli --json examples from-run <run-id> --dataset <name>`: Create example from a run.
|
|
281
|
+
- See [Examples Reference](references/examples.md) for full options and output fields.
|
|
228
282
|
|
|
229
283
|
### Prompts
|
|
230
284
|
- `langsmith-cli --json prompts list [OPTIONS]`: List prompt repositories.
|
|
@@ -235,9 +289,20 @@ langsmith-cli --json runs list --project my-project --limit 5 2>&1
|
|
|
235
289
|
- `--include-model`: Include model configuration
|
|
236
290
|
- `--fields <comma-separated>`: Select fields
|
|
237
291
|
- `langsmith-cli --json prompts push <name> <file_path>`: Push a local file as a prompt.
|
|
292
|
+
- `--description <text>`: Prompt description.
|
|
293
|
+
- `--tags <comma-separated>`: Tags.
|
|
294
|
+
- `--is-public <bool>`: Make public.
|
|
238
295
|
- `langsmith-cli --json prompts create <name> [--description <text>]`: Create a new prompt.
|
|
296
|
+
- `--tags <comma-separated>`: Tags.
|
|
297
|
+
- `--is-public <bool>`: Make public.
|
|
239
298
|
- `langsmith-cli --json prompts delete <name> --confirm`: Delete a prompt.
|
|
240
299
|
- `langsmith-cli --json prompts commits <name> [--limit N]`: List prompt versions.
|
|
300
|
+
- `--offset <n>`: Skip N commits.
|
|
301
|
+
- `--include-model`: Include model configuration.
|
|
302
|
+
- `--fields <comma-separated>`: Select fields.
|
|
303
|
+
- `--count`: Return only the count of commits.
|
|
304
|
+
- `--output <file>`: Write to file.
|
|
305
|
+
- See [Prompts Reference](references/prompts.md) for full options and output fields.
|
|
241
306
|
|
|
242
307
|
### Self (Installation Management)
|
|
243
308
|
- `langsmith-cli self detect`: Show installation details (version, install method, paths).
|
|
@@ -295,7 +360,7 @@ langsmith-cli --json projects list | jq -r '.[].name' | grep -E "(prd|stg)/"
|
|
|
295
360
|
langsmith-cli --json projects list --name-regex "^(prd|stg)/" --fields name
|
|
296
361
|
```
|
|
297
362
|
|
|
298
|
-
### Pattern
|
|
363
|
+
### Pattern 3: Get Latest Run Without Nested Commands
|
|
299
364
|
```bash
|
|
300
365
|
# ❌ BAD (requires jq + nested command)
|
|
301
366
|
langsmith-cli --json runs get $(
|
|
@@ -307,7 +372,7 @@ langsmith-cli --json runs get $(
|
|
|
307
372
|
langsmith-cli --json runs get-latest --project X --roots --fields inputs,outputs
|
|
308
373
|
```
|
|
309
374
|
|
|
310
|
-
### Pattern
|
|
375
|
+
### Pattern 4: Get Latest Error from Production
|
|
311
376
|
```bash
|
|
312
377
|
# ❌ BAD (complex piping)
|
|
313
378
|
for project in $(langsmith-cli --json projects list | jq -r '.[].name' | grep "prd/"); do
|
|
@@ -318,7 +383,7 @@ done | jq -s '.[0]'
|
|
|
318
383
|
langsmith-cli --json runs get-latest --project-name-pattern "prd/*" --failed --fields id,name,error
|
|
319
384
|
```
|
|
320
385
|
|
|
321
|
-
### Pattern
|
|
386
|
+
### Pattern 5: Filter Projects by Pattern
|
|
322
387
|
```bash
|
|
323
388
|
# Filter by substring
|
|
324
389
|
langsmith-cli --json projects list --name "production" --fields name
|
|
@@ -330,7 +395,7 @@ langsmith-cli --json projects list --name-pattern "*prod*" --fields name
|
|
|
330
395
|
langsmith-cli --json projects list --name-regex "^(prd|stg)/.*" --fields name
|
|
331
396
|
```
|
|
332
397
|
|
|
333
|
-
### Pattern
|
|
398
|
+
### Pattern 6: Get Latest Successful Run from Multiple Projects
|
|
334
399
|
```bash
|
|
335
400
|
# Searches across all matching projects
|
|
336
401
|
langsmith-cli --json runs get-latest \
|