langsmith-cli 0.3.2__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.claude-plugin/plugin.json +1 -1
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/CLAUDE.md +36 -11
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/PKG-INFO +19 -7
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/README.md +18 -6
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/QOL_IMPROVEMENTS.md +0 -1
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/MCP_PARITY.md +0 -1
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/pyproject.toml +1 -1
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/SKILL.md +11 -3
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/docs/examples.md +3 -7
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/references/runs.md +0 -2
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/commands/datasets.py +14 -3
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/commands/prompts.py +6 -1
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/commands/runs.py +15 -19
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/main.py +21 -2
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/utils.py +12 -3
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_datasets.py +31 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_main.py +129 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_prompts.py +18 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_runs_analyze.py +17 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_runs_discovery.py +12 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_runs_get.py +22 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_runs_list.py +27 -23
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_runs_roots.py +11 -18
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_runs_sample.py +27 -1
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_runs_search.py +11 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_utils.py +42 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.claude-plugin/marketplace.json +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.env.example +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.github/dependabot.yml +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.github/workflows/ci.yml +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.github/workflows/dependency-review.yml +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.github/workflows/publish.yml +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.gitignore +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.pre-commit-config.yaml +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/.python-version +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/AGENTS.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/LICENSE +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/RELEASING.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/COMMANDS_DESIGN.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/PIPES_TO_CLI_REFERENCE.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/PRD.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/QOL_FEATURES.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/TLDR.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/CI_BEST_PRACTICES.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/CODECOV_SETUP.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/IMPLEMENTATION_PLAN.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/LANGSMITH_TEAM_QUESTIONS.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/PUBLISHING.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/PYPI_SETUP_SUMMARY.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/SESSION_DIRECTIVES.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/TESTING_PERFORMANCE.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/TESTING_STRATEGY.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/docs/dev/TYPE_SAFETY_GUIDE.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/main.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/scripts/install.ps1 +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/scripts/install.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/scripts/install.sh +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/scripts/release.sh +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/scripts/test_installer.sh +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/scripts/uninstall.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/docs/reference.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/references/datasets.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/references/examples.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/references/fql.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/references/installation.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/references/projects.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/references/prompts.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/skills/langsmith/references/troubleshooting.md +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/__init__.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/cli_logging.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/commands/auth.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/commands/examples.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/commands/projects.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/commands/self_cmd.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/config.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/field_analysis.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/src/langsmith_cli/filters.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/conftest.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_auth.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_config.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_e2e.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_examples.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_fetch_helpers.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_field_analysis.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_filters.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_logging.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_output_flag.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_projects.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_runs_fields.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_runs_view.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_self.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/tests/test_smoke.py +0 -0
- {langsmith_cli-0.3.2 → langsmith_cli-0.3.3}/uv.lock +0 -0
|
@@ -111,10 +111,18 @@ main.py (entry point)
|
|
|
111
111
|
├── runs (group)
|
|
112
112
|
│ ├── list
|
|
113
113
|
│ ├── get
|
|
114
|
+
│ ├── get-latest
|
|
114
115
|
│ ├── stats
|
|
115
116
|
│ ├── open
|
|
116
117
|
│ ├── watch
|
|
117
|
-
│
|
|
118
|
+
│ ├── search
|
|
119
|
+
│ ├── sample
|
|
120
|
+
│ ├── analyze
|
|
121
|
+
│ ├── tags
|
|
122
|
+
│ ├── metadata-keys
|
|
123
|
+
│ ├── fields
|
|
124
|
+
│ ├── describe
|
|
125
|
+
│ └── view-file
|
|
118
126
|
├── datasets (group)
|
|
119
127
|
│ ├── list
|
|
120
128
|
│ ├── get
|
|
@@ -124,10 +132,13 @@ main.py (entry point)
|
|
|
124
132
|
│ ├── list
|
|
125
133
|
│ ├── get
|
|
126
134
|
│ └── create
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
135
|
+
├── prompts (group)
|
|
136
|
+
│ ├── list
|
|
137
|
+
│ ├── get
|
|
138
|
+
│ └── push
|
|
139
|
+
└── self (group)
|
|
140
|
+
├── detect
|
|
141
|
+
└── update
|
|
131
142
|
```
|
|
132
143
|
|
|
133
144
|
### Key Design Patterns
|
|
@@ -217,23 +228,37 @@ def list_runs(ctx, output_format, count, output, ...):
|
|
|
217
228
|
```
|
|
218
229
|
src/langsmith_cli/
|
|
219
230
|
├── __init__.py
|
|
220
|
-
├── main.py # Entry point, CLI group registration
|
|
221
|
-
├──
|
|
231
|
+
├── main.py # Entry point, CLI group registration, global error handler
|
|
232
|
+
├── cli_logging.py # CLILogger for verbosity control and stream separation
|
|
233
|
+
├── config.py # Credentials file management
|
|
234
|
+
├── field_analysis.py # Field discovery and statistics (runs fields/describe)
|
|
235
|
+
├── filters.py # FQL filter builders and time parsing
|
|
236
|
+
├── utils.py # Shared helpers (output formatting, project resolution, etc.)
|
|
222
237
|
└── commands/ # Modular command implementations
|
|
223
238
|
├── auth.py # Authentication (login)
|
|
224
239
|
├── projects.py # Project management
|
|
225
240
|
├── runs.py # Runs/traces (largest module)
|
|
226
241
|
├── datasets.py # Dataset operations
|
|
227
242
|
├── examples.py # Dataset examples
|
|
228
|
-
|
|
243
|
+
├── prompts.py # Prompt management
|
|
244
|
+
└── self_cmd.py # Self-inspection (detect, update)
|
|
229
245
|
|
|
230
246
|
tests/
|
|
231
|
-
├── conftest.py # Pytest fixtures (CliRunner)
|
|
232
|
-
├── test_main.py # Root CLI tests
|
|
247
|
+
├── conftest.py # Pytest fixtures (CliRunner, model factories)
|
|
248
|
+
├── test_main.py # Root CLI + global error handler tests
|
|
233
249
|
├── test_logging.py # CLILogger tests
|
|
234
250
|
├── test_auth.py # Auth command tests
|
|
235
251
|
├── test_projects.py # Projects command tests
|
|
236
|
-
├── test_runs.py # Runs command tests
|
|
252
|
+
├── test_runs.py # Runs command tests
|
|
253
|
+
├── test_runs_list.py # Runs list command tests (filters, JSON, formats)
|
|
254
|
+
├── test_runs_get.py # Runs get/get-latest/open tests
|
|
255
|
+
├── test_runs_roots.py # Runs --roots flag tests
|
|
256
|
+
├── test_runs_sample.py # Runs sample command tests
|
|
257
|
+
├── test_datasets.py # Datasets command tests
|
|
258
|
+
├── test_examples.py # Examples command tests
|
|
259
|
+
├── test_prompts.py # Prompts command tests
|
|
260
|
+
├── test_self.py # Self detect/update command tests
|
|
261
|
+
├── test_utils.py # Utility function tests
|
|
237
262
|
├── test_smoke.py # Smoke tests (requires API key)
|
|
238
263
|
└── test_e2e.py # End-to-end tests (requires API key)
|
|
239
264
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: langsmith-cli
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Context-efficient CLI for LangSmith. Built for humans and agents.
|
|
5
5
|
Project-URL: Homepage, https://github.com/aviadr1/langsmith-cli
|
|
6
6
|
Project-URL: Repository, https://github.com/aviadr1/langsmith-cli
|
|
@@ -158,10 +158,11 @@ langsmith-cli runs watch
|
|
|
158
158
|
### 📦 **Complete Coverage**
|
|
159
159
|
Every LangSmith resource at your fingertips:
|
|
160
160
|
- ✅ **Projects** - List, create, inspect
|
|
161
|
-
- ✅ **Runs** - Search, stats, watch,
|
|
161
|
+
- ✅ **Runs** - Search, stats, watch, sample, analyze, field discovery
|
|
162
162
|
- ✅ **Datasets** - CRUD + bulk JSONL uploads
|
|
163
163
|
- ✅ **Examples** - Full lifecycle management
|
|
164
164
|
- ✅ **Prompts** - Version control your prompts
|
|
165
|
+
- ✅ **Self** - Installation detection + auto-update
|
|
165
166
|
|
|
166
167
|
---
|
|
167
168
|
|
|
@@ -310,8 +311,8 @@ langsmith-cli runs watch --project production
|
|
|
310
311
|
### 💾 Bulk Dataset Uploads
|
|
311
312
|
|
|
312
313
|
```bash
|
|
313
|
-
# Export examples to JSONL
|
|
314
|
-
langsmith-cli
|
|
314
|
+
# Export examples to JSONL (using --output for reliable file writing)
|
|
315
|
+
langsmith-cli examples list --dataset my-dataset --output examples.jsonl
|
|
315
316
|
|
|
316
317
|
# Upload to new dataset
|
|
317
318
|
langsmith-cli datasets push examples.jsonl --dataset production-eval
|
|
@@ -424,9 +425,18 @@ auth login # Authenticate with LangSmith
|
|
|
424
425
|
projects list # List all projects
|
|
425
426
|
runs list # Search and filter runs
|
|
426
427
|
runs get <id> # Inspect a specific run
|
|
428
|
+
runs get-latest # Get most recent run matching filters
|
|
427
429
|
runs stats # Aggregate statistics
|
|
428
430
|
runs watch # Live run dashboard
|
|
429
431
|
runs open <id> # Open trace in browser
|
|
432
|
+
runs search # Full-text search across runs
|
|
433
|
+
runs sample # Stratified sampling by tags/metadata
|
|
434
|
+
runs analyze # Group runs and compute metrics
|
|
435
|
+
runs tags # Discover tag patterns
|
|
436
|
+
runs metadata-keys # Discover metadata keys
|
|
437
|
+
runs fields # Discover field paths and types
|
|
438
|
+
runs describe # Detailed field statistics
|
|
439
|
+
runs view-file # View runs from JSONL files
|
|
430
440
|
datasets list # List datasets
|
|
431
441
|
datasets create # Create new dataset
|
|
432
442
|
datasets push # Bulk upload from JSONL
|
|
@@ -435,6 +445,8 @@ examples create # Add example to dataset
|
|
|
435
445
|
prompts list # List prompt repositories
|
|
436
446
|
prompts get # Pull a prompt template
|
|
437
447
|
prompts push # Push local prompt to LangSmith
|
|
448
|
+
self detect # Show installation details
|
|
449
|
+
self update # Update to latest version
|
|
438
450
|
```
|
|
439
451
|
|
|
440
452
|
### Global Flags
|
|
@@ -450,7 +462,8 @@ prompts push # Push local prompt to LangSmith
|
|
|
450
462
|
|
|
451
463
|
| Option | Description | Example |
|
|
452
464
|
|--------|-------------|---------|
|
|
453
|
-
| `--project` | Filter by project | `--project production` |
|
|
465
|
+
| `--project` | Filter by project name | `--project production` |
|
|
466
|
+
| `--project-id` | Filter by project UUID | `--project-id abc-123...` |
|
|
454
467
|
| `--status` | Filter by status | `--status error` |
|
|
455
468
|
| `--failed` | Only failed runs | `--failed` |
|
|
456
469
|
| `--succeeded` | Only successful runs | `--succeeded` |
|
|
@@ -542,8 +555,7 @@ uv run pyright
|
|
|
542
555
|
```
|
|
543
556
|
|
|
544
557
|
### Project Stats
|
|
545
|
-
- **
|
|
546
|
-
- **100% Utils Coverage** (47 tests for helpers)
|
|
558
|
+
- **92% Test Coverage** (589 tests)
|
|
547
559
|
- **Zero Type Errors** (Pyright clean)
|
|
548
560
|
- **100% MCP Parity** (13/13 tools)
|
|
549
561
|
|
|
@@ -108,10 +108,11 @@ langsmith-cli runs watch
|
|
|
108
108
|
### 📦 **Complete Coverage**
|
|
109
109
|
Every LangSmith resource at your fingertips:
|
|
110
110
|
- ✅ **Projects** - List, create, inspect
|
|
111
|
-
- ✅ **Runs** - Search, stats, watch,
|
|
111
|
+
- ✅ **Runs** - Search, stats, watch, sample, analyze, field discovery
|
|
112
112
|
- ✅ **Datasets** - CRUD + bulk JSONL uploads
|
|
113
113
|
- ✅ **Examples** - Full lifecycle management
|
|
114
114
|
- ✅ **Prompts** - Version control your prompts
|
|
115
|
+
- ✅ **Self** - Installation detection + auto-update
|
|
115
116
|
|
|
116
117
|
---
|
|
117
118
|
|
|
@@ -260,8 +261,8 @@ langsmith-cli runs watch --project production
|
|
|
260
261
|
### 💾 Bulk Dataset Uploads
|
|
261
262
|
|
|
262
263
|
```bash
|
|
263
|
-
# Export examples to JSONL
|
|
264
|
-
langsmith-cli
|
|
264
|
+
# Export examples to JSONL (using --output for reliable file writing)
|
|
265
|
+
langsmith-cli examples list --dataset my-dataset --output examples.jsonl
|
|
265
266
|
|
|
266
267
|
# Upload to new dataset
|
|
267
268
|
langsmith-cli datasets push examples.jsonl --dataset production-eval
|
|
@@ -374,9 +375,18 @@ auth login # Authenticate with LangSmith
|
|
|
374
375
|
projects list # List all projects
|
|
375
376
|
runs list # Search and filter runs
|
|
376
377
|
runs get <id> # Inspect a specific run
|
|
378
|
+
runs get-latest # Get most recent run matching filters
|
|
377
379
|
runs stats # Aggregate statistics
|
|
378
380
|
runs watch # Live run dashboard
|
|
379
381
|
runs open <id> # Open trace in browser
|
|
382
|
+
runs search # Full-text search across runs
|
|
383
|
+
runs sample # Stratified sampling by tags/metadata
|
|
384
|
+
runs analyze # Group runs and compute metrics
|
|
385
|
+
runs tags # Discover tag patterns
|
|
386
|
+
runs metadata-keys # Discover metadata keys
|
|
387
|
+
runs fields # Discover field paths and types
|
|
388
|
+
runs describe # Detailed field statistics
|
|
389
|
+
runs view-file # View runs from JSONL files
|
|
380
390
|
datasets list # List datasets
|
|
381
391
|
datasets create # Create new dataset
|
|
382
392
|
datasets push # Bulk upload from JSONL
|
|
@@ -385,6 +395,8 @@ examples create # Add example to dataset
|
|
|
385
395
|
prompts list # List prompt repositories
|
|
386
396
|
prompts get # Pull a prompt template
|
|
387
397
|
prompts push # Push local prompt to LangSmith
|
|
398
|
+
self detect # Show installation details
|
|
399
|
+
self update # Update to latest version
|
|
388
400
|
```
|
|
389
401
|
|
|
390
402
|
### Global Flags
|
|
@@ -400,7 +412,8 @@ prompts push # Push local prompt to LangSmith
|
|
|
400
412
|
|
|
401
413
|
| Option | Description | Example |
|
|
402
414
|
|--------|-------------|---------|
|
|
403
|
-
| `--project` | Filter by project | `--project production` |
|
|
415
|
+
| `--project` | Filter by project name | `--project production` |
|
|
416
|
+
| `--project-id` | Filter by project UUID | `--project-id abc-123...` |
|
|
404
417
|
| `--status` | Filter by status | `--status error` |
|
|
405
418
|
| `--failed` | Only failed runs | `--failed` |
|
|
406
419
|
| `--succeeded` | Only successful runs | `--succeeded` |
|
|
@@ -492,8 +505,7 @@ uv run pyright
|
|
|
492
505
|
```
|
|
493
506
|
|
|
494
507
|
### Project Stats
|
|
495
|
-
- **
|
|
496
|
-
- **100% Utils Coverage** (47 tests for helpers)
|
|
508
|
+
- **92% Test Coverage** (589 tests)
|
|
497
509
|
- **Zero Type Errors** (Pyright clean)
|
|
498
510
|
- **100% MCP Parity** (13/13 tools)
|
|
499
511
|
|
|
@@ -45,7 +45,6 @@ This document tracks feature parity between the langsmith-cli and the official L
|
|
|
45
45
|
- ✅ `is_root` → `--is-root`
|
|
46
46
|
- ✅ `trace_filter` → `--trace-filter`
|
|
47
47
|
- ✅ `tree_filter` → `--tree-filter`
|
|
48
|
-
- ✅ `order_by` → `--order-by`
|
|
49
48
|
- ✅ `reference_example_id` → `--reference-example-id`
|
|
50
49
|
|
|
51
50
|
**`runs get`** (2/2 parameters):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "langsmith-cli"
|
|
3
3
|
# IMPORTANT: When bumping this version, also update .claude-plugin/plugin.json
|
|
4
|
-
version = "0.3.
|
|
4
|
+
version = "0.3.3"
|
|
5
5
|
description = "Context-efficient CLI for LangSmith. Built for humans and agents."
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
requires-python = ">=3.12"
|
|
@@ -48,8 +48,8 @@ langsmith-cli runs list --project my-project --fields id,name,status --output ru
|
|
|
48
48
|
```bash
|
|
49
49
|
# ❌ WRONG - Never use shell redirection for data extraction
|
|
50
50
|
langsmith-cli --json runs list --project my-project > runs.json
|
|
51
|
-
# If API fails:
|
|
52
|
-
# You won't know
|
|
51
|
+
# If API fails: errors go to stderr (invisible with redirection)
|
|
52
|
+
# You may get a JSON error object instead of data, and won't know what happened
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
```bash
|
|
@@ -107,7 +107,8 @@ langsmith-cli --json runs list --project my-project --limit 5 2>&1
|
|
|
107
107
|
|
|
108
108
|
### Runs (Traces)
|
|
109
109
|
- `langsmith-cli --json runs list [OPTIONS]`: List recent runs.
|
|
110
|
-
- `--project <name>`: Filter by project.
|
|
110
|
+
- `--project <name>`: Filter by project name (default: "default").
|
|
111
|
+
- `--project-id <uuid>`: Filter by project UUID (bypasses name resolution, faster).
|
|
111
112
|
- `--limit <n>`: Max results (default 10, keep it small).
|
|
112
113
|
- `--status <success|error>`: Filter by status.
|
|
113
114
|
- `--filter <string>`: Advanced FQL query string (see FQL examples below).
|
|
@@ -211,6 +212,13 @@ langsmith-cli --json runs list --project my-project --limit 5 2>&1
|
|
|
211
212
|
- `langsmith-cli --json prompts get <name> [--commit <hash>]`: Fetch a prompt template.
|
|
212
213
|
- `langsmith-cli --json prompts push <name> <file_path>`: Push a local file as a prompt.
|
|
213
214
|
|
|
215
|
+
### Self (Installation Management)
|
|
216
|
+
- `langsmith-cli self detect`: Show installation details (version, install method, paths).
|
|
217
|
+
- Reports: version, install method (uv tool, pipx, pip, editable), install path, executable path, Python version.
|
|
218
|
+
- `langsmith-cli self update`: Update langsmith-cli to the latest version.
|
|
219
|
+
- Auto-detects install method and runs the appropriate upgrade command.
|
|
220
|
+
- Checks PyPI for latest version before updating.
|
|
221
|
+
|
|
214
222
|
## Common Patterns (No Piping Needed)
|
|
215
223
|
|
|
216
224
|
The CLI provides built-in commands that eliminate the need for Unix pipes, jq, and nested commands:
|
|
@@ -23,8 +23,7 @@ This document provides practical workflows and use cases for common LangSmith op
|
|
|
23
23
|
langsmith-cli --json runs list \
|
|
24
24
|
--project production-app \
|
|
25
25
|
--status error \
|
|
26
|
-
--limit 5
|
|
27
|
-
--order-by -start_time
|
|
26
|
+
--limit 5
|
|
28
27
|
|
|
29
28
|
# Step 2: Inspect specific failure (context-efficient)
|
|
30
29
|
langsmith-cli --json runs get <run-id> \
|
|
@@ -75,8 +74,7 @@ langsmith-cli runs open <trace-id>
|
|
|
75
74
|
langsmith-cli --json runs list \
|
|
76
75
|
--project production-app \
|
|
77
76
|
--filter 'gt(latency, "5s")' \
|
|
78
|
-
--limit 20
|
|
79
|
-
--order-by -latency
|
|
77
|
+
--limit 20
|
|
80
78
|
|
|
81
79
|
# Step 2: Analyze latency distribution
|
|
82
80
|
langsmith-cli --json runs stats \
|
|
@@ -374,8 +372,7 @@ langsmith-cli --json runs stats --project production-app --limit 1000
|
|
|
374
372
|
langsmith-cli --json runs list \
|
|
375
373
|
--project production-app \
|
|
376
374
|
--filter 'gt(total_cost, 0.1)' \
|
|
377
|
-
--limit 20
|
|
378
|
-
--order-by -total_cost
|
|
375
|
+
--limit 20
|
|
379
376
|
|
|
380
377
|
# Analyze cost by component type
|
|
381
378
|
langsmith-cli --json runs list \
|
|
@@ -685,7 +682,6 @@ while true; do
|
|
|
685
682
|
RUNS=$(langsmith-cli --json runs list \
|
|
686
683
|
--project "$PROJECT" \
|
|
687
684
|
--limit 100 \
|
|
688
|
-
--order-by -start_time \
|
|
689
685
|
$FILTER)
|
|
690
686
|
|
|
691
687
|
# Process each new run
|
|
@@ -18,7 +18,6 @@ langsmith-cli --json runs list [OPTIONS]
|
|
|
18
18
|
- `--filter TEXT` - Advanced FQL query (see Filter Query Language section)
|
|
19
19
|
- `--trace-filter TEXT` - Filter applied to root run of trace
|
|
20
20
|
- `--tree-filter TEXT` - Filter applied to any run in trace tree
|
|
21
|
-
- `--order-by TEXT` - Sort field (default: `-start_time`). Prefix with `-` for descending
|
|
22
21
|
- `--reference-example-id UUID` - Filter runs by reference example ID
|
|
23
22
|
|
|
24
23
|
**Output Fields:**
|
|
@@ -194,4 +193,3 @@ langsmith-cli runs watch [OPTIONS]
|
|
|
194
193
|
- `--refresh INTEGER` - Refresh interval in seconds (default: 2)
|
|
195
194
|
|
|
196
195
|
**Behavior:** Shows live table of recent runs with auto-refresh
|
|
197
|
-
|
|
@@ -232,6 +232,17 @@ def push_dataset(ctx, file_path, dataset):
|
|
|
232
232
|
dataset_name=dataset,
|
|
233
233
|
)
|
|
234
234
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
235
|
+
if ctx.obj.get("json"):
|
|
236
|
+
click.echo(
|
|
237
|
+
json_dumps(
|
|
238
|
+
{
|
|
239
|
+
"status": "success",
|
|
240
|
+
"dataset": dataset,
|
|
241
|
+
"examples_count": len(examples),
|
|
242
|
+
}
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
else:
|
|
246
|
+
logger.success(
|
|
247
|
+
f"Successfully pushed {len(examples)} examples to dataset '{dataset}'"
|
|
248
|
+
)
|
|
@@ -164,4 +164,9 @@ def push_prompt(ctx, name, file_path, description, tags, is_public):
|
|
|
164
164
|
is_public=is_public,
|
|
165
165
|
)
|
|
166
166
|
|
|
167
|
-
|
|
167
|
+
if ctx.obj.get("json"):
|
|
168
|
+
from langsmith_cli.utils import json_dumps
|
|
169
|
+
|
|
170
|
+
click.echo(json_dumps({"status": "success", "name": name}))
|
|
171
|
+
else:
|
|
172
|
+
logger.success(f"Successfully pushed prompt to {name}")
|
|
@@ -321,9 +321,6 @@ def compute_metrics(
|
|
|
321
321
|
)
|
|
322
322
|
@click.option("--trace-filter", help="Filter applied to root trace.")
|
|
323
323
|
@click.option("--tree-filter", help="Filter if any run in trace tree matches.")
|
|
324
|
-
@click.option(
|
|
325
|
-
"--order-by", default="-start_time", help="Sort field (prefix with - for desc)."
|
|
326
|
-
)
|
|
327
324
|
@click.option("--reference-example-id", help="Filter runs for a specific example.")
|
|
328
325
|
@click.option(
|
|
329
326
|
"--tag",
|
|
@@ -430,7 +427,6 @@ def list_runs(
|
|
|
430
427
|
roots,
|
|
431
428
|
trace_filter,
|
|
432
429
|
tree_filter,
|
|
433
|
-
order_by,
|
|
434
430
|
reference_example_id,
|
|
435
431
|
tag,
|
|
436
432
|
name_pattern,
|
|
@@ -661,7 +657,6 @@ def list_runs(
|
|
|
661
657
|
is_root=is_root,
|
|
662
658
|
trace_filter=trace_filter,
|
|
663
659
|
tree_filter=tree_filter,
|
|
664
|
-
order_by=order_by,
|
|
665
660
|
reference_example_id=reference_example_id,
|
|
666
661
|
console=None, # Don't auto-report warnings (we have custom diagnostics below)
|
|
667
662
|
)
|
|
@@ -669,10 +664,8 @@ def list_runs(
|
|
|
669
664
|
failed_projects = result.failed_sources
|
|
670
665
|
|
|
671
666
|
# CRITICAL: Fail fast if ALL sources failed (prevents silent failures)
|
|
672
|
-
#
|
|
673
|
-
|
|
674
|
-
format_type = determine_output_format(output_format, ctx.obj.get("json"))
|
|
675
|
-
output_formatted_data([], format_type)
|
|
667
|
+
# The global error handler in LangSmithCLIGroup outputs JSON errors in --json mode,
|
|
668
|
+
# so we don't need to output [] here (which would cause double output on stdout).
|
|
676
669
|
result.raise_if_all_failed(logger, "runs")
|
|
677
670
|
|
|
678
671
|
# Report partial failures (some succeeded, some failed)
|
|
@@ -993,7 +986,6 @@ def get_latest_run(
|
|
|
993
986
|
error=error_filter,
|
|
994
987
|
filter=combined_filter,
|
|
995
988
|
is_root=roots,
|
|
996
|
-
order_by="-start_time",
|
|
997
989
|
)
|
|
998
990
|
|
|
999
991
|
if pq.use_id:
|
|
@@ -1168,7 +1160,14 @@ def run_stats(
|
|
|
1168
1160
|
resolved_project_ids.append(proj_name)
|
|
1169
1161
|
|
|
1170
1162
|
if not resolved_project_ids:
|
|
1171
|
-
|
|
1163
|
+
if ctx.obj.get("json"):
|
|
1164
|
+
click.echo(
|
|
1165
|
+
json_dumps(
|
|
1166
|
+
{"error": "NotFoundError", "message": "No matching projects found."}
|
|
1167
|
+
)
|
|
1168
|
+
)
|
|
1169
|
+
else:
|
|
1170
|
+
console.print("[yellow]No matching projects found.[/yellow]")
|
|
1172
1171
|
return
|
|
1173
1172
|
|
|
1174
1173
|
stats = client.get_run_stats(project_ids=resolved_project_ids)
|
|
@@ -1206,8 +1205,11 @@ def open_run(ctx, run_id):
|
|
|
1206
1205
|
# The SDK also has a way to get the URL but it might require project name.
|
|
1207
1206
|
url = f"https://smith.langchain.com/r/{run_id}"
|
|
1208
1207
|
|
|
1209
|
-
|
|
1210
|
-
|
|
1208
|
+
if ctx.obj.get("json"):
|
|
1209
|
+
click.echo(json_dumps({"run_id": run_id, "url": url}))
|
|
1210
|
+
else:
|
|
1211
|
+
click.echo(f"Opening run {run_id} in browser...")
|
|
1212
|
+
click.echo(f"URL: {url}")
|
|
1211
1213
|
webbrowser.open(url)
|
|
1212
1214
|
|
|
1213
1215
|
|
|
@@ -1447,7 +1449,6 @@ def search_runs(
|
|
|
1447
1449
|
roots=roots, # Pass through --roots flag
|
|
1448
1450
|
trace_filter=None,
|
|
1449
1451
|
tree_filter=None,
|
|
1450
|
-
order_by="-start_time",
|
|
1451
1452
|
reference_example_id=None,
|
|
1452
1453
|
tag=(),
|
|
1453
1454
|
name_pattern=None,
|
|
@@ -1664,7 +1665,6 @@ def sample_runs(
|
|
|
1664
1665
|
project_query=pq,
|
|
1665
1666
|
limit=sample_limit,
|
|
1666
1667
|
filter=combined_filter,
|
|
1667
|
-
order_by="-start_time",
|
|
1668
1668
|
console=console,
|
|
1669
1669
|
)
|
|
1670
1670
|
stratum_runs = result.items[:sample_limit]
|
|
@@ -1713,7 +1713,6 @@ def sample_runs(
|
|
|
1713
1713
|
project_query=pq,
|
|
1714
1714
|
limit=samples_per_stratum,
|
|
1715
1715
|
filter=combined_filter,
|
|
1716
|
-
order_by="-start_time",
|
|
1717
1716
|
console=console,
|
|
1718
1717
|
)
|
|
1719
1718
|
stratum_runs = result.items[:samples_per_stratum]
|
|
@@ -1954,7 +1953,6 @@ def analyze_runs(
|
|
|
1954
1953
|
project_query=pq,
|
|
1955
1954
|
filter=combined_filter,
|
|
1956
1955
|
limit=None,
|
|
1957
|
-
order_by="-start_time",
|
|
1958
1956
|
console=console,
|
|
1959
1957
|
)
|
|
1960
1958
|
all_runs = result.items
|
|
@@ -1973,7 +1971,6 @@ def analyze_runs(
|
|
|
1973
1971
|
**proj_kwargs,
|
|
1974
1972
|
filter=combined_filter,
|
|
1975
1973
|
limit=None, # SDK paginates automatically
|
|
1976
|
-
order_by="-start_time",
|
|
1977
1974
|
select=list(select_fields) if select_fields else None,
|
|
1978
1975
|
)
|
|
1979
1976
|
|
|
@@ -2142,7 +2139,6 @@ def _fetch_runs_for_discovery(
|
|
|
2142
2139
|
_fetch_runs,
|
|
2143
2140
|
project_query=pq,
|
|
2144
2141
|
limit=sample_size,
|
|
2145
|
-
order_by="-start_time",
|
|
2146
2142
|
select=select,
|
|
2147
2143
|
filter=combined_filter,
|
|
2148
2144
|
console=console,
|
|
@@ -147,8 +147,27 @@ class LangSmithCLIGroup(click.Group):
|
|
|
147
147
|
sys.exit(1)
|
|
148
148
|
|
|
149
149
|
else:
|
|
150
|
-
#
|
|
151
|
-
|
|
150
|
+
# Non-LangSmith error (Click exceptions, Python exceptions, etc.)
|
|
151
|
+
if json_mode:
|
|
152
|
+
# In JSON mode, ALWAYS output structured JSON to stdout.
|
|
153
|
+
# Empty stdout breaks piped JSON parsing (json.loads fails).
|
|
154
|
+
if isinstance(e, click.ClickException):
|
|
155
|
+
error_data = {
|
|
156
|
+
"error": type(e).__name__,
|
|
157
|
+
"message": e.format_message(),
|
|
158
|
+
}
|
|
159
|
+
exit_code = e.exit_code
|
|
160
|
+
else:
|
|
161
|
+
error_data = {
|
|
162
|
+
"error": type(e).__name__,
|
|
163
|
+
"message": str(e),
|
|
164
|
+
}
|
|
165
|
+
exit_code = 1
|
|
166
|
+
click.echo(json_lib.dumps(error_data))
|
|
167
|
+
sys.exit(exit_code)
|
|
168
|
+
else:
|
|
169
|
+
# In human mode, re-raise for Click's default formatting
|
|
170
|
+
raise
|
|
152
171
|
finally:
|
|
153
172
|
# Flush stdout to prevent data loss when piping to other processes
|
|
154
173
|
# This fixes race conditions where buffered output may not reach the pipe
|
|
@@ -1559,12 +1559,21 @@ def write_output_to_file(
|
|
|
1559
1559
|
else:
|
|
1560
1560
|
raise ValueError(f"Unsupported format_type: {format_type}")
|
|
1561
1561
|
|
|
1562
|
+
# Diagnostic messages go to stderr to avoid corrupting piped stdout in JSON mode
|
|
1563
|
+
from rich.console import Console as RichConsole
|
|
1564
|
+
|
|
1565
|
+
stderr_console = RichConsole(stderr=True)
|
|
1562
1566
|
if is_single:
|
|
1563
|
-
|
|
1567
|
+
stderr_console.print(f"[green]Wrote item to {output_path}[/green]")
|
|
1564
1568
|
else:
|
|
1565
|
-
|
|
1569
|
+
stderr_console.print(
|
|
1570
|
+
f"[green]Wrote {len(data)} items to {output_path}[/green]"
|
|
1571
|
+
)
|
|
1566
1572
|
except Exception as e:
|
|
1567
|
-
console
|
|
1573
|
+
from rich.console import Console as RichConsole
|
|
1574
|
+
|
|
1575
|
+
stderr_console = RichConsole(stderr=True)
|
|
1576
|
+
stderr_console.print(f"[red]Error writing to file {output_path}: {e}[/red]")
|
|
1568
1577
|
raise click.Abort()
|
|
1569
1578
|
|
|
1570
1579
|
|
|
@@ -350,6 +350,37 @@ def test_datasets_create_with_type(runner):
|
|
|
350
350
|
assert call_kwargs["data_type"].value == "chat"
|
|
351
351
|
|
|
352
352
|
|
|
353
|
+
def test_datasets_push_json_mode_outputs_json_confirmation(runner, tmp_path):
|
|
354
|
+
"""Invariant: --json mode push outputs JSON confirmation, not empty stdout."""
|
|
355
|
+
with patch("langsmith.Client") as MockClient:
|
|
356
|
+
mock_client = MockClient.return_value
|
|
357
|
+
|
|
358
|
+
jsonl_file = tmp_path / "examples.jsonl"
|
|
359
|
+
examples = [
|
|
360
|
+
{"inputs": {"text": "hello"}, "outputs": {"result": "world"}},
|
|
361
|
+
]
|
|
362
|
+
jsonl_file.write_text("\n".join(json.dumps(e) for e in examples))
|
|
363
|
+
|
|
364
|
+
mock_client.read_dataset.return_value = create_dataset(name="target-dataset")
|
|
365
|
+
|
|
366
|
+
result = runner.invoke(
|
|
367
|
+
cli,
|
|
368
|
+
[
|
|
369
|
+
"--json",
|
|
370
|
+
"datasets",
|
|
371
|
+
"push",
|
|
372
|
+
str(jsonl_file),
|
|
373
|
+
"--dataset",
|
|
374
|
+
"target-dataset",
|
|
375
|
+
],
|
|
376
|
+
)
|
|
377
|
+
assert result.exit_code == 0
|
|
378
|
+
data = json.loads(result.output)
|
|
379
|
+
assert data["status"] == "success"
|
|
380
|
+
assert data["dataset"] == "target-dataset"
|
|
381
|
+
assert data["examples_count"] == 1
|
|
382
|
+
|
|
383
|
+
|
|
353
384
|
def test_datasets_push(runner, tmp_path):
|
|
354
385
|
"""INVARIANT: datasets push should upload examples from a JSONL file."""
|
|
355
386
|
with patch("langsmith.Client") as MockClient:
|