applied-cli 0.6.6__tar.gz → 0.6.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {applied_cli-0.6.6 → applied_cli-0.6.7}/PKG-INFO +57 -1
  2. {applied_cli-0.6.6 → applied_cli-0.6.7}/README.md +56 -0
  3. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli.egg-info/PKG-INFO +57 -1
  4. {applied_cli-0.6.6 → applied_cli-0.6.7}/pyproject.toml +1 -1
  5. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/__init__.py +0 -0
  6. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/agent_scoped_flows.py +0 -0
  7. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/auth.py +0 -0
  8. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/cli.py +0 -0
  9. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/client.py +0 -0
  10. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/conversation_lookup.py +0 -0
  11. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/conversations.py +0 -0
  12. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/credentials.py +0 -0
  13. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/flow_helpers.py +0 -0
  14. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/formatters.py +0 -0
  15. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/mcp.py +0 -0
  16. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/recovery.py +0 -0
  17. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/toolkit.py +0 -0
  18. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/tools.py +0 -0
  19. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/__init__.py +0 -0
  20. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/agents.py +0 -0
  21. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/articles.py +0 -0
  22. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/catalog.py +0 -0
  23. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/connectors.py +0 -0
  24. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/content.py +0 -0
  25. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/conversations.py +0 -0
  26. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/domains.py +0 -0
  27. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/flows.py +0 -0
  28. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/knowledge.py +0 -0
  29. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/manifest.py +0 -0
  30. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/products.py +0 -0
  31. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/scenarios.py +0 -0
  32. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/taxonomy.py +0 -0
  33. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli/v2/tickets.py +0 -0
  34. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli.egg-info/SOURCES.txt +0 -0
  35. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli.egg-info/dependency_links.txt +0 -0
  36. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli.egg-info/entry_points.txt +0 -0
  37. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli.egg-info/requires.txt +0 -0
  38. {applied_cli-0.6.6 → applied_cli-0.6.7}/applied_cli.egg-info/top_level.txt +0 -0
  39. {applied_cli-0.6.6 → applied_cli-0.6.7}/setup.cfg +0 -0
  40. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_agent_scoped_flows.py +0 -0
  41. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_audit_tools.py +0 -0
  42. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_auth_context.py +0 -0
  43. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_benchmark_clone.py +0 -0
  44. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_benchmark_delete_guardrail.py +0 -0
  45. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_benchmark_results.py +0 -0
  46. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_benchmark_scenario_tools.py +0 -0
  47. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_cli.py +0 -0
  48. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_cli_v2.py +0 -0
  49. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_client.py +0 -0
  50. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_client_v2.py +0 -0
  51. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_conversation_tools.py +0 -0
  52. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_flow_tools.py +0 -0
  53. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_knowledge_content_tools.py +0 -0
  54. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_recovery.py +0 -0
  55. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_scenario_bulk_cancel.py +0 -0
  56. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_scenario_bulk_run_contact.py +0 -0
  57. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_scenario_bulk_run_wait.py +0 -0
  58. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_toolkit_contract.py +0 -0
  59. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_agents.py +0 -0
  60. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_articles.py +0 -0
  61. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_catalog_and_mcp.py +0 -0
  62. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_connectors.py +0 -0
  63. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_content.py +0 -0
  64. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_conversations.py +0 -0
  65. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_flows.py +0 -0
  66. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_knowledge.py +0 -0
  67. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_products.py +0 -0
  68. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_scenarios.py +0 -0
  69. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_taxonomy.py +0 -0
  70. {applied_cli-0.6.6 → applied_cli-0.6.7}/tests/test_v2_tickets.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: applied-cli
3
- Version: 0.6.6
3
+ Version: 0.6.7
4
4
  Summary: CLI and shared client library for Applied Labs AI support agents
5
5
  Author: Applied Labs
6
6
  License-Expression: MIT
@@ -80,6 +80,57 @@ applied metrics --metric-name conversation.resolve --start 2026-04-01 --end 2026
80
80
  object. `analytics` returns grouped rows and currently supports `--metrics count`.
81
81
  Raw analytics SQL is not available through the public CLI surface.
82
82
 
83
+ ## Benchmarks & Scenarios
84
+
85
+ A **benchmark** is a named regression suite; a **scenario** is one test conversation
86
+ (built from a real `input_conversation_id`) that can belong to one or more benchmarks.
87
+ The typical loop is: build a suite → run it → review the pass rate → fix → re-run.
88
+
89
+ ```bash
90
+ # Inspect benchmarks and their scenarios
91
+ applied benchmarks --agent-id <agent_id> --format json
92
+ applied benchmark <benchmark_id> --format json
93
+ applied scenarios --benchmark-id <benchmark_id> --format json
94
+
95
+ # Build a suite
96
+ applied benchmark-create --agent-id <agent_id> --name "Cancel Regression"
97
+ applied scenario-create --input-conversation-id <conversation_id> --name "<name>" \
98
+ --benchmark-id <benchmark_id>
99
+
100
+ # Port a suite to another agent (e.g. email -> chat). Cross-agent recreates the
101
+ # scenarios under the destination agent; same-agent just tags them in.
102
+ # Dry-run by default; add --apply to write.
103
+ applied benchmark-clone <source_benchmark_id> --dest-benchmark-name "Chat Regression" \
104
+ --target-agent-id <chat_agent_id> --apply
105
+
106
+ # Run a benchmark and wait for results in one command.
107
+ # --contact-email runs as a contact that has an email, fixing
108
+ # "Email is not present in the conversation" on test conversations.
109
+ applied scenario-bulk-run --benchmark-id <benchmark_id> \
110
+ --contact-email test@example.com --wait
111
+ applied scenario-bulk-status <job_id> --include-runs --format json
112
+
113
+ # Kill a stuck bulk run (deletes its queued/running runs; finished runs preserved)
114
+ applied scenario-bulk-cancel <job_id> --apply
115
+
116
+ # Review pass/fail health (pass_status reflects the latest run per scenario)
117
+ applied benchmark-results <benchmark_id> --format json
118
+
119
+ # Rate scenarios as you evaluate
120
+ applied scenario-update <scenario_id> --pass-status pass --feedback "<note>"
121
+
122
+ # Safe delete — refuses to wipe scenarios unless you opt in
123
+ applied benchmark-delete <benchmark_id> --detach-scenarios # preserve scenarios
124
+ applied benchmark-delete <benchmark_id> --force # cascade delete
125
+
126
+ # Recover deleted benchmark/scenario rows from a local PITR export
127
+ applied scenario-recover-catalog --recovery-dir <dir> --apply
128
+ ```
129
+
130
+ Deleting a benchmark cascades and permanently deletes its scenarios and runs, so
131
+ `benchmark-delete` refuses a non-empty benchmark unless you pass `--detach-scenarios`
132
+ (unlink the scenarios first so they survive under their agent) or `--force`.
133
+
83
134
  ## Library Usage
84
135
 
85
136
  ```python
@@ -113,6 +164,11 @@ conversations = await tools.conversation_query(
113
164
  | `analytics_report` | Read standard dashboard/report analytics views |
114
165
  | `analytics_query` | Aggregate supported conversation dimensions with count |
115
166
  | `metrics_query` | Roll up named metric events |
167
+ | `benchmark_clone` | Copy all scenarios from one benchmark into another |
168
+ | `benchmark_delete` | Delete a benchmark (guards against wiping scenarios) |
169
+ | `benchmark_results` | Pass/fail/unrated tally and pass rate for a benchmark |
170
+ | `scenario_bulk_run` | Run scenarios (contact override + wait-to-completion) |
171
+ | `scenario_bulk_cancel` | Cancel a stuck bulk run's queued/running scenario runs |
116
172
 
117
173
  ## Examples
118
174
 
@@ -54,6 +54,57 @@ applied metrics --metric-name conversation.resolve --start 2026-04-01 --end 2026
54
54
  object. `analytics` returns grouped rows and currently supports `--metrics count`.
55
55
  Raw analytics SQL is not available through the public CLI surface.
56
56
 
57
+ ## Benchmarks & Scenarios
58
+
59
+ A **benchmark** is a named regression suite; a **scenario** is one test conversation
60
+ (built from a real `input_conversation_id`) that can belong to one or more benchmarks.
61
+ The typical loop is: build a suite → run it → review the pass rate → fix → re-run.
62
+
63
+ ```bash
64
+ # Inspect benchmarks and their scenarios
65
+ applied benchmarks --agent-id <agent_id> --format json
66
+ applied benchmark <benchmark_id> --format json
67
+ applied scenarios --benchmark-id <benchmark_id> --format json
68
+
69
+ # Build a suite
70
+ applied benchmark-create --agent-id <agent_id> --name "Cancel Regression"
71
+ applied scenario-create --input-conversation-id <conversation_id> --name "<name>" \
72
+ --benchmark-id <benchmark_id>
73
+
74
+ # Port a suite to another agent (e.g. email -> chat). Cross-agent recreates the
75
+ # scenarios under the destination agent; same-agent just tags them in.
76
+ # Dry-run by default; add --apply to write.
77
+ applied benchmark-clone <source_benchmark_id> --dest-benchmark-name "Chat Regression" \
78
+ --target-agent-id <chat_agent_id> --apply
79
+
80
+ # Run a benchmark and wait for results in one command.
81
+ # --contact-email runs as a contact that has an email, fixing
82
+ # "Email is not present in the conversation" on test conversations.
83
+ applied scenario-bulk-run --benchmark-id <benchmark_id> \
84
+ --contact-email test@example.com --wait
85
+ applied scenario-bulk-status <job_id> --include-runs --format json
86
+
87
+ # Kill a stuck bulk run (deletes its queued/running runs; finished runs preserved)
88
+ applied scenario-bulk-cancel <job_id> --apply
89
+
90
+ # Review pass/fail health (pass_status reflects the latest run per scenario)
91
+ applied benchmark-results <benchmark_id> --format json
92
+
93
+ # Rate scenarios as you evaluate
94
+ applied scenario-update <scenario_id> --pass-status pass --feedback "<note>"
95
+
96
+ # Safe delete — refuses to wipe scenarios unless you opt in
97
+ applied benchmark-delete <benchmark_id> --detach-scenarios # preserve scenarios
98
+ applied benchmark-delete <benchmark_id> --force # cascade delete
99
+
100
+ # Recover deleted benchmark/scenario rows from a local PITR export
101
+ applied scenario-recover-catalog --recovery-dir <dir> --apply
102
+ ```
103
+
104
+ Deleting a benchmark cascades and permanently deletes its scenarios and runs, so
105
+ `benchmark-delete` refuses a non-empty benchmark unless you pass `--detach-scenarios`
106
+ (unlink the scenarios first so they survive under their agent) or `--force`.
107
+
57
108
  ## Library Usage
58
109
 
59
110
  ```python
@@ -87,6 +138,11 @@ conversations = await tools.conversation_query(
87
138
  | `analytics_report` | Read standard dashboard/report analytics views |
88
139
  | `analytics_query` | Aggregate supported conversation dimensions with count |
89
140
  | `metrics_query` | Roll up named metric events |
141
+ | `benchmark_clone` | Copy all scenarios from one benchmark into another |
142
+ | `benchmark_delete` | Delete a benchmark (guards against wiping scenarios) |
143
+ | `benchmark_results` | Pass/fail/unrated tally and pass rate for a benchmark |
144
+ | `scenario_bulk_run` | Run scenarios (contact override + wait-to-completion) |
145
+ | `scenario_bulk_cancel` | Cancel a stuck bulk run's queued/running scenario runs |
90
146
 
91
147
  ## Examples
92
148
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: applied-cli
3
- Version: 0.6.6
3
+ Version: 0.6.7
4
4
  Summary: CLI and shared client library for Applied Labs AI support agents
5
5
  Author: Applied Labs
6
6
  License-Expression: MIT
@@ -80,6 +80,57 @@ applied metrics --metric-name conversation.resolve --start 2026-04-01 --end 2026
80
80
  object. `analytics` returns grouped rows and currently supports `--metrics count`.
81
81
  Raw analytics SQL is not available through the public CLI surface.
82
82
 
83
+ ## Benchmarks & Scenarios
84
+
85
+ A **benchmark** is a named regression suite; a **scenario** is one test conversation
86
+ (built from a real `input_conversation_id`) that can belong to one or more benchmarks.
87
+ The typical loop is: build a suite → run it → review the pass rate → fix → re-run.
88
+
89
+ ```bash
90
+ # Inspect benchmarks and their scenarios
91
+ applied benchmarks --agent-id <agent_id> --format json
92
+ applied benchmark <benchmark_id> --format json
93
+ applied scenarios --benchmark-id <benchmark_id> --format json
94
+
95
+ # Build a suite
96
+ applied benchmark-create --agent-id <agent_id> --name "Cancel Regression"
97
+ applied scenario-create --input-conversation-id <conversation_id> --name "<name>" \
98
+ --benchmark-id <benchmark_id>
99
+
100
+ # Port a suite to another agent (e.g. email -> chat). Cross-agent recreates the
101
+ # scenarios under the destination agent; same-agent just tags them in.
102
+ # Dry-run by default; add --apply to write.
103
+ applied benchmark-clone <source_benchmark_id> --dest-benchmark-name "Chat Regression" \
104
+ --target-agent-id <chat_agent_id> --apply
105
+
106
+ # Run a benchmark and wait for results in one command.
107
+ # --contact-email runs as a contact that has an email, fixing
108
+ # "Email is not present in the conversation" on test conversations.
109
+ applied scenario-bulk-run --benchmark-id <benchmark_id> \
110
+ --contact-email test@example.com --wait
111
+ applied scenario-bulk-status <job_id> --include-runs --format json
112
+
113
+ # Kill a stuck bulk run (deletes its queued/running runs; finished runs preserved)
114
+ applied scenario-bulk-cancel <job_id> --apply
115
+
116
+ # Review pass/fail health (pass_status reflects the latest run per scenario)
117
+ applied benchmark-results <benchmark_id> --format json
118
+
119
+ # Rate scenarios as you evaluate
120
+ applied scenario-update <scenario_id> --pass-status pass --feedback "<note>"
121
+
122
+ # Safe delete — refuses to wipe scenarios unless you opt in
123
+ applied benchmark-delete <benchmark_id> --detach-scenarios # preserve scenarios
124
+ applied benchmark-delete <benchmark_id> --force # cascade delete
125
+
126
+ # Recover deleted benchmark/scenario rows from a local PITR export
127
+ applied scenario-recover-catalog --recovery-dir <dir> --apply
128
+ ```
129
+
130
+ Deleting a benchmark cascades and permanently deletes its scenarios and runs, so
131
+ `benchmark-delete` refuses a non-empty benchmark unless you pass `--detach-scenarios`
132
+ (unlink the scenarios first so they survive under their agent) or `--force`.
133
+
83
134
  ## Library Usage
84
135
 
85
136
  ```python
@@ -113,6 +164,11 @@ conversations = await tools.conversation_query(
113
164
  | `analytics_report` | Read standard dashboard/report analytics views |
114
165
  | `analytics_query` | Aggregate supported conversation dimensions with count |
115
166
  | `metrics_query` | Roll up named metric events |
167
+ | `benchmark_clone` | Copy all scenarios from one benchmark into another |
168
+ | `benchmark_delete` | Delete a benchmark (guards against wiping scenarios) |
169
+ | `benchmark_results` | Pass/fail/unrated tally and pass rate for a benchmark |
170
+ | `scenario_bulk_run` | Run scenarios (contact override + wait-to-completion) |
171
+ | `scenario_bulk_cancel` | Cancel a stuck bulk run's queued/running scenario runs |
116
172
 
117
173
  ## Examples
118
174
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "applied-cli"
3
- version = "0.6.6"
3
+ version = "0.6.7"
4
4
  description = "CLI and shared client library for Applied Labs AI support agents"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
File without changes