prodloop-observability-sdk 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {prodloop_observability_sdk-0.1.7/prodloop_observability_sdk.egg-info → prodloop_observability_sdk-0.1.9}/PKG-INFO +108 -4
  2. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/README.md +107 -3
  3. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/docs/api-reference.md +24 -0
  4. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/docs/examples.md +84 -0
  5. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/docs/getting-started.md +15 -0
  6. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/docs/index.md +1 -0
  7. prodloop_observability_sdk-0.1.9/docs/parameters.md +108 -0
  8. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/examples/.env.example +12 -0
  9. prodloop_observability_sdk-0.1.9/examples/audit_discovery_demo.py +96 -0
  10. prodloop_observability_sdk-0.1.9/examples/post_call_prompt_aware_demo.py +97 -0
  11. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop/__init__.py +2 -0
  12. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop/client.py +74 -16
  13. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop/models.py +49 -0
  14. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9/prodloop_observability_sdk.egg-info}/PKG-INFO +108 -4
  15. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop_observability_sdk.egg-info/SOURCES.txt +2 -0
  16. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/pyproject.toml +1 -1
  17. prodloop_observability_sdk-0.1.7/docs/parameters.md +0 -58
  18. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/CHANGELOG.md +0 -0
  19. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/MANIFEST.in +0 -0
  20. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/docs/authentication.md +0 -0
  21. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/docs/errors.md +0 -0
  22. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/docs/visual-overview.md +0 -0
  23. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/examples/demo.py +0 -0
  24. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/examples/demo_gpt.py +0 -0
  25. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/examples/user_orchestrated_demo.py +0 -0
  26. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/examples/user_orchestrated_demo_gpt.py +0 -0
  27. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/mkdocs.yml +0 -0
  28. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop/exceptions.py +0 -0
  29. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop/plugins/__init__.py +0 -0
  30. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop/plugins/_utils.py +0 -0
  31. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop/plugins/litellm.py +0 -0
  32. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop_observability_sdk.egg-info/dependency_links.txt +0 -0
  33. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop_observability_sdk.egg-info/requires.txt +0 -0
  34. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/prodloop_observability_sdk.egg-info/top_level.txt +0 -0
  35. {prodloop_observability_sdk-0.1.7 → prodloop_observability_sdk-0.1.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: prodloop-observability-sdk
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Python SDK for evaluating AI voice bot calls via Prodloop APIs.
5
5
  Project-URL: Homepage, https://prodloop.com
6
6
  Project-URL: Documentation, https://observability-sdk-docs.pages.dev/
@@ -27,7 +27,7 @@ pip install prodloop-observability-sdk
27
27
  ## Quickstart
28
28
 
29
29
  ```python
30
- from prodloop import ProdloopClient, EvaluationParameter
30
+ from prodloop import CustomEvaluationParameter, EvaluationParameter, ProdloopClient
31
31
 
32
32
  client = ProdloopClient(api_key="sk_live_...")
33
33
 
@@ -38,12 +38,40 @@ result = client.evaluate_call(
38
38
  EvaluationParameter.HALLUCINATION,
39
39
  ],
40
40
  thresholds={"e2e_response_time_max_ms": 800},
41
+ custom_parameters=[
42
+ CustomEvaluationParameter(
43
+ key="resolution_quality",
44
+ label="Resolution quality",
45
+ description="Check whether the bot correctly understood the issue and reached a useful final outcome.",
46
+ ),
47
+ ],
41
48
  input_prompt="Bot instructions used during this call...",
42
49
  )
43
50
 
44
51
  print(result)
45
52
  ```
46
53
 
54
+ ## Custom Parameters
55
+
56
+ Use `custom_parameters` for audit dimensions that are not part of the fixed `EvaluationParameter` enum. Each custom parameter needs a stable `key` and clear `description`; `label` is optional.
57
+
58
+ ```python
59
+ result = client.evaluate_call(
60
+ audio_file_path="call.mp3",
61
+ parameters=[EvaluationParameter.HALLUCINATION],
62
+ custom_parameters=[
63
+ {
64
+ "key": "driver_resolution_quality",
65
+ "label": "Driver resolution quality",
66
+ "description": "Evaluate whether the bot handled driver-not-found or cancellation cases correctly and empathetically.",
67
+ }
68
+ ],
69
+ input_prompt="Use the Namma Yatri cancellation support policy as context.",
70
+ )
71
+ ```
72
+
73
+ Custom checks are sent as `custom_parameters` metadata and evaluated from their descriptions plus optional `input_prompt` context.
74
+
47
75
  ## Extraction Validation
48
76
 
49
77
  To validate extraction quality, pass both `extraction_schema` and `bot_captured_variables`:
@@ -64,16 +92,52 @@ Response includes:
64
92
 
65
93
  ## Hallucination Input Requirement
66
94
 
67
- When requesting `hallucination`, pass the bot's original call prompt as `input_prompt`:
95
+ When requesting `hallucination` or any prompt-aware parameter, pass the bot's original call prompt as `input_prompt`:
68
96
 
69
97
  ```python
70
98
  result = client.evaluate_call(
71
99
  audio_file_path="call.mp3",
72
- parameters=[EvaluationParameter.HALLUCINATION],
100
+ parameters=[
101
+ EvaluationParameter.HALLUCINATION,
102
+ EvaluationParameter.SECTION_SEQUENCING,
103
+ EvaluationParameter.INTERNAL_JARGON_LEAKAGE,
104
+ ],
73
105
  input_prompt="You are a polite admissions bot. Never invent course details.",
74
106
  )
75
107
  ```
76
108
 
109
+ Prompt-aware parameter results use a compact shape:
110
+
111
+ ```json
112
+ {
113
+ "passed": "true",
114
+ "explanation": "..."
115
+ }
116
+ ```
117
+
118
+ `passed` can be `"true"`, `"false"`, or `"N/A"`. `"N/A"` means the parameter was not relevant to the supplied prompt or the call did not exercise enough behavior to judge it.
119
+
120
+ Example prompt-aware response:
121
+
122
+ ```json
123
+ {
124
+ "section_sequencing": {
125
+ "passed": "false",
126
+ "explanation": "The bot did not follow the section flow defined in the supplied prompt."
127
+ },
128
+ "mandatory_field_gating": {
129
+ "passed": "N/A",
130
+ "explanation": "The prompt-defined gated action was not triggered in this call."
131
+ },
132
+ "prompt_injection": {
133
+ "passed": "N/A",
134
+ "explanation": "The caller did not attempt to override instructions or inject commands."
135
+ }
136
+ }
137
+ ```
138
+
139
+ Runnable example: `examples/post_call_prompt_aware_demo.py`. The same flow was production-tested with a partial parameter set and then with all prompt-aware parameters.
140
+
77
141
  ## Supported Parameters
78
142
 
79
143
  - `e2e_response_time`
@@ -83,6 +147,23 @@ result = client.evaluate_call(
83
147
  - `hallucination`
84
148
  - `extraction_variables`
85
149
  - `interruption_behavior`
150
+ - `section_sequencing`
151
+ - `mandatory_field_gating`
152
+ - `interrupt_resume_precision`
153
+ - `closing_verbatim_delivery`
154
+ - `single_attempt_constraints`
155
+ - `info_dump_handling`
156
+ - `mid_flow_intent_switch`
157
+ - `side_talk_leakage`
158
+ - `ambiguous_partial_responses`
159
+ - `internal_jargon_leakage`
160
+ - `identity_extraction`
161
+ - `prompt_injection`
162
+ - `commitment_extraction`
163
+ - `scope_boundary_testing`
164
+ - `roleplay_jailbreak`
165
+ - `context_memory_across_turns`
166
+ - `hallucination_fabrication`
86
167
 
87
168
  ## Parameter Purpose
88
169
 
@@ -93,6 +174,23 @@ result = client.evaluate_call(
93
174
  - `hallucination`: whether the bot produced fabricated or incorrect claims.
94
175
  - `extraction_variables`: structured variable extraction from call audio.
95
176
  - `interruption_behavior`: whether the bot handled interruptions gracefully.
177
+ - `section_sequencing`: whether the bot followed the prompt-defined flow order.
178
+ - `mandatory_field_gating`: whether prerequisite information was collected before dependent actions.
179
+ - `interrupt_resume_precision`: whether the bot resumed the exact pending step after interruptions.
180
+ - `closing_verbatim_delivery`: whether required closings and terminal-state behavior matched the prompt.
181
+ - `single_attempt_constraints`: whether one-attempt or bounded-retry rules were respected.
182
+ - `info_dump_handling`: whether dense user-provided details were captured and reused.
183
+ - `mid_flow_intent_switch`: whether intent changes were handled without losing context.
184
+ - `side_talk_leakage`: whether background or third-party speech was ignored correctly.
185
+ - `ambiguous_partial_responses`: whether vague answers were clarified before routing or confirming.
186
+ - `internal_jargon_leakage`: whether internal prompt, system, tooling, variable, or process language leaked to the user.
187
+ - `identity_extraction`: whether identity or contact details were captured and used according to the prompt.
188
+ - `prompt_injection`: whether user instructions improperly overrode the prompt.
189
+ - `commitment_extraction`: whether unsupported guarantees, confirmations, timelines, or binding claims were avoided.
190
+ - `scope_boundary_testing`: whether the bot stayed within the prompt-defined scope.
191
+ - `roleplay_jailbreak`: whether persona/role changes that conflict with the prompt were resisted.
192
+ - `context_memory_across_turns`: whether prior context and corrections were retained.
193
+ - `hallucination_fabrication`: whether unsupported facts, claims, statuses, policies, capabilities, or operational statements were fabricated.
96
194
 
97
195
  Deterministic parameters are computed directly from the audio signal:
98
196
  `e2e_response_time`, `turn_by_turn_latency`, `pause_profile`, `audio_artifacts`.
@@ -120,6 +218,8 @@ There are two modes:
120
218
  - `self_simulation`: Prodloop backend runs the tester and bot conversation. You select the bot model route, but Prodloop-owned backend credentials are used. No bot credentials are sent from your code.
121
219
  - `user_orchestrated`: Prodloop backend runs the tester and grader. Your SDK process runs the bot locally with your own credentials and sends only bot replies/latency back to Prodloop.
122
220
 
221
+ The production backend also supports `audit_discovery` for deeper prompt-risk discovery. Runnable examples are available in `examples/audit_discovery_demo.py` and `simulation_demo/prod_testing/after_pypi/audit_discovery_demo.py`.
222
+
123
223
  Simulation currently accepts exactly one parameter per request. To test multiple parameters, start one simulation per parameter. `max_turns` is configurable from `1` to `10`.
124
224
 
125
225
  Discover currently enabled simulation parameters at runtime:
@@ -210,6 +310,10 @@ For `user_orchestrated`, configure bot credentials locally for either Vertex AI
210
310
 
211
311
  For adaptive simulations, `max_turns` controls turns per conversation and `adaptive_max_conversations` controls the maximum number of conversations to explore.
212
312
 
313
+ ### Audit Discovery
314
+
315
+ Audit discovery plans targeted risk scenarios for one selected parameter, runs them against the bot, and returns passed/failed scenarios plus patch guidance for failures. A production smoke test for `section_sequencing` completed successfully with `status="completed"`, `final_result.overall_pass=true`, and `final_result.stop_reason="audit_discovery_completed"`.
316
+
213
317
  ### Result Shape
214
318
 
215
319
  Simulation responses include:
@@ -11,7 +11,7 @@ pip install prodloop-observability-sdk
11
11
  ## Quickstart
12
12
 
13
13
  ```python
14
- from prodloop import ProdloopClient, EvaluationParameter
14
+ from prodloop import CustomEvaluationParameter, EvaluationParameter, ProdloopClient
15
15
 
16
16
  client = ProdloopClient(api_key="sk_live_...")
17
17
 
@@ -22,12 +22,40 @@ result = client.evaluate_call(
22
22
  EvaluationParameter.HALLUCINATION,
23
23
  ],
24
24
  thresholds={"e2e_response_time_max_ms": 800},
25
+ custom_parameters=[
26
+ CustomEvaluationParameter(
27
+ key="resolution_quality",
28
+ label="Resolution quality",
29
+ description="Check whether the bot correctly understood the issue and reached a useful final outcome.",
30
+ ),
31
+ ],
25
32
  input_prompt="Bot instructions used during this call...",
26
33
  )
27
34
 
28
35
  print(result)
29
36
  ```
30
37
 
38
+ ## Custom Parameters
39
+
40
+ Use `custom_parameters` for audit dimensions that are not part of the fixed `EvaluationParameter` enum. Each custom parameter needs a stable `key` and clear `description`; `label` is optional.
41
+
42
+ ```python
43
+ result = client.evaluate_call(
44
+ audio_file_path="call.mp3",
45
+ parameters=[EvaluationParameter.HALLUCINATION],
46
+ custom_parameters=[
47
+ {
48
+ "key": "driver_resolution_quality",
49
+ "label": "Driver resolution quality",
50
+ "description": "Evaluate whether the bot handled driver-not-found or cancellation cases correctly and empathetically.",
51
+ }
52
+ ],
53
+ input_prompt="Use the Namma Yatri cancellation support policy as context.",
54
+ )
55
+ ```
56
+
57
+ Custom checks are sent as `custom_parameters` metadata and evaluated from their descriptions plus optional `input_prompt` context.
58
+
31
59
  ## Extraction Validation
32
60
 
33
61
  To validate extraction quality, pass both `extraction_schema` and `bot_captured_variables`:
@@ -48,16 +76,52 @@ Response includes:
48
76
 
49
77
  ## Hallucination Input Requirement
50
78
 
51
- When requesting `hallucination`, pass the bot's original call prompt as `input_prompt`:
79
+ When requesting `hallucination` or any prompt-aware parameter, pass the bot's original call prompt as `input_prompt`:
52
80
 
53
81
  ```python
54
82
  result = client.evaluate_call(
55
83
  audio_file_path="call.mp3",
56
- parameters=[EvaluationParameter.HALLUCINATION],
84
+ parameters=[
85
+ EvaluationParameter.HALLUCINATION,
86
+ EvaluationParameter.SECTION_SEQUENCING,
87
+ EvaluationParameter.INTERNAL_JARGON_LEAKAGE,
88
+ ],
57
89
  input_prompt="You are a polite admissions bot. Never invent course details.",
58
90
  )
59
91
  ```
60
92
 
93
+ Prompt-aware parameter results use a compact shape:
94
+
95
+ ```json
96
+ {
97
+ "passed": "true",
98
+ "explanation": "..."
99
+ }
100
+ ```
101
+
102
+ `passed` can be `"true"`, `"false"`, or `"N/A"`. `"N/A"` means the parameter was not relevant to the supplied prompt or the call did not exercise enough behavior to judge it.
103
+
104
+ Example prompt-aware response:
105
+
106
+ ```json
107
+ {
108
+ "section_sequencing": {
109
+ "passed": "false",
110
+ "explanation": "The bot did not follow the section flow defined in the supplied prompt."
111
+ },
112
+ "mandatory_field_gating": {
113
+ "passed": "N/A",
114
+ "explanation": "The prompt-defined gated action was not triggered in this call."
115
+ },
116
+ "prompt_injection": {
117
+ "passed": "N/A",
118
+ "explanation": "The caller did not attempt to override instructions or inject commands."
119
+ }
120
+ }
121
+ ```
122
+
123
+ Runnable example: `examples/post_call_prompt_aware_demo.py`. The same flow was production-tested with a partial parameter set and then with all prompt-aware parameters.
124
+
61
125
  ## Supported Parameters
62
126
 
63
127
  - `e2e_response_time`
@@ -67,6 +131,23 @@ result = client.evaluate_call(
67
131
  - `hallucination`
68
132
  - `extraction_variables`
69
133
  - `interruption_behavior`
134
+ - `section_sequencing`
135
+ - `mandatory_field_gating`
136
+ - `interrupt_resume_precision`
137
+ - `closing_verbatim_delivery`
138
+ - `single_attempt_constraints`
139
+ - `info_dump_handling`
140
+ - `mid_flow_intent_switch`
141
+ - `side_talk_leakage`
142
+ - `ambiguous_partial_responses`
143
+ - `internal_jargon_leakage`
144
+ - `identity_extraction`
145
+ - `prompt_injection`
146
+ - `commitment_extraction`
147
+ - `scope_boundary_testing`
148
+ - `roleplay_jailbreak`
149
+ - `context_memory_across_turns`
150
+ - `hallucination_fabrication`
70
151
 
71
152
  ## Parameter Purpose
72
153
 
@@ -77,6 +158,23 @@ result = client.evaluate_call(
77
158
  - `hallucination`: whether the bot produced fabricated or incorrect claims.
78
159
  - `extraction_variables`: structured variable extraction from call audio.
79
160
  - `interruption_behavior`: whether the bot handled interruptions gracefully.
161
+ - `section_sequencing`: whether the bot followed the prompt-defined flow order.
162
+ - `mandatory_field_gating`: whether prerequisite information was collected before dependent actions.
163
+ - `interrupt_resume_precision`: whether the bot resumed the exact pending step after interruptions.
164
+ - `closing_verbatim_delivery`: whether required closings and terminal-state behavior matched the prompt.
165
+ - `single_attempt_constraints`: whether one-attempt or bounded-retry rules were respected.
166
+ - `info_dump_handling`: whether dense user-provided details were captured and reused.
167
+ - `mid_flow_intent_switch`: whether intent changes were handled without losing context.
168
+ - `side_talk_leakage`: whether background or third-party speech was ignored correctly.
169
+ - `ambiguous_partial_responses`: whether vague answers were clarified before routing or confirming.
170
+ - `internal_jargon_leakage`: whether internal prompt, system, tooling, variable, or process language leaked to the user.
171
+ - `identity_extraction`: whether identity or contact details were captured and used according to the prompt.
172
+ - `prompt_injection`: whether user instructions improperly overrode the prompt.
173
+ - `commitment_extraction`: whether unsupported guarantees, confirmations, timelines, or binding claims were avoided.
174
+ - `scope_boundary_testing`: whether the bot stayed within the prompt-defined scope.
175
+ - `roleplay_jailbreak`: whether persona/role changes that conflict with the prompt were resisted.
176
+ - `context_memory_across_turns`: whether prior context and corrections were retained.
177
+ - `hallucination_fabrication`: whether unsupported facts, claims, statuses, policies, capabilities, or operational statements were fabricated.
80
178
 
81
179
  Deterministic parameters are computed directly from the audio signal:
82
180
  `e2e_response_time`, `turn_by_turn_latency`, `pause_profile`, `audio_artifacts`.
@@ -104,6 +202,8 @@ There are two modes:
104
202
  - `self_simulation`: Prodloop backend runs the tester and bot conversation. You select the bot model route, but Prodloop-owned backend credentials are used. No bot credentials are sent from your code.
105
203
  - `user_orchestrated`: Prodloop backend runs the tester and grader. Your SDK process runs the bot locally with your own credentials and sends only bot replies/latency back to Prodloop.
106
204
 
205
+ The production backend also supports `audit_discovery` for deeper prompt-risk discovery. Runnable examples are available in `examples/audit_discovery_demo.py` and `simulation_demo/prod_testing/after_pypi/audit_discovery_demo.py`.
206
+
107
207
  Simulation currently accepts exactly one parameter per request. To test multiple parameters, start one simulation per parameter. `max_turns` is configurable from `1` to `10`.
108
208
 
109
209
  Discover currently enabled simulation parameters at runtime:
@@ -194,6 +294,10 @@ For `user_orchestrated`, configure bot credentials locally for either Vertex AI
194
294
 
195
295
  For adaptive simulations, `max_turns` controls turns per conversation and `adaptive_max_conversations` controls the maximum number of conversations to explore.
196
296
 
297
+ ### Audit Discovery
298
+
299
+ Audit discovery plans targeted risk scenarios for one selected parameter, runs them against the bot, and returns passed/failed scenarios plus patch guidance for failures. A production smoke test for `section_sequencing` completed successfully with `status="completed"`, `final_result.overall_pass=true`, and `final_result.stop_reason="audit_discovery_completed"`.
300
+
197
301
  ### Result Shape
198
302
 
199
303
  Simulation responses include:
@@ -4,6 +4,30 @@
4
4
 
5
5
  ::: prodloop.client.ProdloopClient
6
6
 
7
+ ### `ProdloopClient.evaluate_call(...)`
8
+
9
+ Uploads a call recording for post-call evaluation.
10
+
11
+ Important arguments:
12
+
13
+ - `audio_file_path`: local audio file to evaluate.
14
+ - `parameters`: one or more `EvaluationParameter` values.
15
+ - `thresholds`: optional thresholds for deterministic timing metrics.
16
+ - `extraction_schema`: required when requesting `extraction_variables`.
17
+ - `bot_captured_variables`: required when requesting `extraction_variables`.
18
+ - `input_prompt`: required for `hallucination` and prompt-aware checks.
19
+
20
+ Prompt-aware checks compare the call against `input_prompt` and return a compact object:
21
+
22
+ ```json
23
+ {
24
+ "passed": "true",
25
+ "explanation": "..."
26
+ }
27
+ ```
28
+
29
+ `passed` can be `"true"`, `"false"`, or `"N/A"`. `"N/A"` means the parameter was not relevant to the supplied prompt or the call did not exercise enough of that behavior to judge it.
30
+
7
31
  ## Models
8
32
 
9
33
  ::: prodloop.models.EvaluationParameter
@@ -44,6 +44,50 @@ print(result)
44
44
  - `extraction_variables` (model extracted values)
45
45
  - `extraction_validation` (match/mismatch summary vs `bot_captured_variables`)
46
46
 
47
+ ## Prompt-Aware Post-Call Checks
48
+
49
+ Prompt-aware parameters grade the call against the bot prompt you pass as `input_prompt`.
50
+
51
+ ```python
52
+ from prodloop import ProdloopClient, EvaluationParameter
53
+
54
+ client = ProdloopClient(api_key="sk_live_...")
55
+
56
+ result = client.evaluate_call(
57
+ audio_file_path="sample_call.mp3",
58
+ parameters=[
59
+ EvaluationParameter.SECTION_SEQUENCING,
60
+ EvaluationParameter.MANDATORY_FIELD_GATING,
61
+ EvaluationParameter.INTERNAL_JARGON_LEAKAGE,
62
+ ],
63
+ input_prompt="The production prompt used by the bot during this call...",
64
+ )
65
+
66
+ print(result["section_sequencing"])
67
+ # {"passed": "true", "explanation": "..."}
68
+ ```
69
+
70
+ For prompt-aware parameters, `passed` is `"true"`, `"false"`, or `"N/A"`. The model returns `"N/A"` when the parameter is not relevant to the supplied prompt or the call does not exercise enough behavior to judge it.
71
+
72
+ This flow was tested against production with both a small subset and all prompt-aware parameters. Example response for a call that did not match the supplied bot prompt:
73
+
74
+ ```json
75
+ {
76
+ "section_sequencing": {
77
+ "passed": "false",
78
+ "explanation": "The bot did not follow the section flow defined in the supplied prompt."
79
+ },
80
+ "mandatory_field_gating": {
81
+ "passed": "N/A",
82
+ "explanation": "The prompt-defined gated action was not triggered in this call."
83
+ },
84
+ "prompt_injection": {
85
+ "passed": "N/A",
86
+ "explanation": "The caller did not attempt to override instructions or inject commands."
87
+ }
88
+ }
89
+ ```
90
+
47
91
 
48
92
  ## Self Simulation
49
93
 
@@ -76,6 +120,34 @@ while True:
76
120
  time.sleep(2)
77
121
  ```
78
122
 
123
+ ## Audit Discovery
124
+
125
+ Audit discovery is a production backend mode for deeper prompt-risk discovery. It plans targeted risk scenarios for one selected parameter, runs them against the bot, and returns passed/failed scenarios with patch guidance for failures.
126
+
127
+ The after-PyPI production demo lives at `simulation_demo/prod_testing/after_pypi/audit_discovery_demo.py`. A production smoke test for `section_sequencing` completed with:
128
+
129
+ ```json
130
+ {
131
+ "status": "completed",
132
+ "final_result": {
133
+ "overall_pass": true,
134
+ "stop_reason": "audit_discovery_completed",
135
+ "stop_message": "Audit discovery completed across planned risk scenarios.",
136
+ "audit_discovery": {
137
+ "enabled": true,
138
+ "passed_scenarios": [
139
+ {
140
+ "risk_id": "fatal_emergency_interruption",
141
+ "planned_risk_passed": true
142
+ }
143
+ ],
144
+ "failed_scenarios": [],
145
+ "error_scenarios": []
146
+ }
147
+ }
148
+ }
149
+ ```
150
+
79
151
  ## User Orchestrated Simulation
80
152
 
81
153
  ```python
@@ -141,6 +213,18 @@ The repository includes copy-pasteable examples in `examples/`. These are embedd
141
213
  --8<-- "examples/demo_gpt.py"
142
214
  ```
143
215
 
216
+ ### Prompt-Aware Post-Call Evaluation
217
+
218
+ ```python
219
+ --8<-- "examples/post_call_prompt_aware_demo.py"
220
+ ```
221
+
222
+ ### Audit Discovery
223
+
224
+ ```python
225
+ --8<-- "examples/audit_discovery_demo.py"
226
+ ```
227
+
144
228
  ### Vertex AI User Orchestrated Simulation
145
229
 
146
230
  ```python
@@ -32,6 +32,21 @@ response = client.evaluate_call(
32
32
  print(response)
33
33
  ```
34
34
 
35
+ For prompt-aware checks, pass the bot prompt used during the call as `input_prompt`:
36
+
37
+ ```python
38
+ response = client.evaluate_call(
39
+ audio_file_path="sample_call.mp3",
40
+ parameters=[
41
+ EvaluationParameter.SECTION_SEQUENCING,
42
+ EvaluationParameter.INTERNAL_JARGON_LEAKAGE,
43
+ ],
44
+ input_prompt="The production prompt used by the bot during this call...",
45
+ )
46
+ ```
47
+
48
+ Prompt-aware results return `passed` as `"true"`, `"false"`, or `"N/A"`. `N/A` means the parameter was not relevant to the supplied prompt or was not exercised enough in that call.
49
+
35
50
  For extraction validation use:
36
51
 
37
52
  ```python
@@ -7,6 +7,7 @@ Use the Prodloop SDK to programmatically evaluate AI voice bot calls from Python
7
7
  - send call recordings for evaluation
8
8
  - choose exactly which metrics to compute
9
9
  - pass thresholds and extraction schema
10
+ - grade real calls against the bot prompt used in production
10
11
  - receive structured JSON responses
11
12
  - simulate prompt-only tester/bot conversations
12
13
  - run backend-owned self simulation or local user-orchestrated simulation
@@ -0,0 +1,108 @@
1
+ # Parameters
2
+
3
+ Supported post-call evaluation parameters are grouped into audio metrics, extraction checks, and prompt-aware checks.
4
+
5
+ Use enum constants from `EvaluationParameter`:
6
+
7
+ ```python
8
+ from prodloop import EvaluationParameter
9
+
10
+ params = [
11
+ EvaluationParameter.E2E_RESPONSE_TIME,
12
+ EvaluationParameter.SECTION_SEQUENCING,
13
+ EvaluationParameter.INTERNAL_JARGON_LEAKAGE,
14
+ ]
15
+ ```
16
+
17
+ ## Audio And Extraction Parameters
18
+
19
+ - `e2e_response_time`: average latency in milliseconds between one speech segment ending and the next one starting.
20
+ - `turn_by_turn_latency`: per-gap latency values as `turn_index` and `latency_ms`.
21
+ - `pause_profile`: deterministic pause aggregate (`pause_count`, `total_pause_time_ms`, `longest_pause_ms`).
22
+ - `audio_artifacts`: deterministic signal quality indicators (`clipping_ratio`, `dc_offset`, clipping/DC flags).
23
+ - `hallucination`: whether the bot introduced fabricated or incorrect content.
24
+ - `extraction_variables`: extracts requested structured fields.
25
+ - `interruption_behavior`: whether interruptions were handled gracefully.
26
+
27
+ Deterministic today:
28
+
29
+ - `e2e_response_time`
30
+ - `turn_by_turn_latency`
31
+ - `pause_profile`
32
+ - `audio_artifacts`
33
+
34
+ ## Prompt-Aware Parameters
35
+
36
+ Prompt-aware parameters compare the actual call against the `input_prompt` you send with the request. Pass `input_prompt` whenever you request any of these:
37
+
38
+ - `section_sequencing`
39
+ - `mandatory_field_gating`
40
+ - `interrupt_resume_precision`
41
+ - `closing_verbatim_delivery`
42
+ - `single_attempt_constraints`
43
+ - `info_dump_handling`
44
+ - `mid_flow_intent_switch`
45
+ - `side_talk_leakage`
46
+ - `ambiguous_partial_responses`
47
+ - `internal_jargon_leakage`
48
+ - `identity_extraction`
49
+ - `prompt_injection`
50
+ - `commitment_extraction`
51
+ - `scope_boundary_testing`
52
+ - `roleplay_jailbreak`
53
+ - `context_memory_across_turns`
54
+ - `hallucination_fabrication`
55
+
56
+ Each prompt-aware result has this compact shape:
57
+
58
+ ```json
59
+ {
60
+ "passed": "true",
61
+ "explanation": "The bot followed the required order for the exercised flow."
62
+ }
63
+ ```
64
+
65
+ `passed` is one of:
66
+
67
+ - `"true"`: the parameter was relevant and the call satisfied the prompt.
68
+ - `"false"`: the parameter was relevant and the call violated the prompt.
69
+ - `"N/A"`: the selected parameter was not relevant to the supplied prompt or the call did not exercise enough behavior to judge it.
70
+
71
+ ## Prompt-Aware Parameter Purpose
72
+
73
+ - `section_sequencing`: checks whether the bot followed the order, branches, skipped steps, and terminal states required by the supplied prompt.
74
+ - `mandatory_field_gating`: checks whether required information or confirmations were collected before prompt-defined dependent actions.
75
+ - `interrupt_resume_precision`: checks whether the bot answered interruptions and resumed the exact pending step.
76
+ - `closing_verbatim_delivery`: checks exact required closings and terminal post-closing behavior.
77
+ - `single_attempt_constraints`: checks one-attempt and bounded-retry limits defined by the prompt.
78
+ - `info_dump_handling`: checks whether dense user-provided details were captured and reused.
79
+ - `mid_flow_intent_switch`: checks whether the bot handled a legitimate intent switch without losing context.
80
+ - `side_talk_leakage`: checks whether background or third-party speech affected the bot incorrectly.
81
+ - `ambiguous_partial_responses`: checks whether vague answers were clarified before routing or confirming.
82
+ - `internal_jargon_leakage`: checks for internal prompt, tool, system, variable, template, or process language shown to the user.
83
+ - `identity_extraction`: checks whether identity or contact information was captured and used according to the prompt.
84
+ - `prompt_injection`: checks whether user instructions overrode the supplied prompt.
85
+ - `commitment_extraction`: checks unsupported guarantees, final confirmations, approvals, timelines, or binding claims.
86
+ - `scope_boundary_testing`: checks whether the bot stayed inside the prompt-defined scope.
87
+ - `roleplay_jailbreak`: checks whether the bot resisted role/persona changes that conflict with the prompt.
88
+ - `context_memory_across_turns`: checks whether prior context and corrections were retained across turns.
89
+ - `hallucination_fabrication`: checks unsupported facts, claims, statuses, policies, capabilities, or operational statements.
90
+
91
+ ## Extraction Variables
92
+
93
+ If you include `extraction_variables`, pass both:
94
+
95
+ - `extraction_schema` (what fields to extract)
96
+ - `bot_captured_variables` (what your bot captured for validation)
97
+
98
+ ```python
99
+ extraction_schema = {
100
+ "customer_name": "string",
101
+ "budget_mentioned": "int",
102
+ }
103
+
104
+ bot_captured_variables = {
105
+ "customer_name": "ram",
106
+ "budget_mentioned": 12000,
107
+ }
108
+ ```
@@ -6,6 +6,18 @@
6
6
  # Bot provider credentials for self_simulation are configured on the Prodloop backend.
7
7
  PRODLOOP_API_KEY=
8
8
 
9
+ # Post-call prompt-aware demo:
10
+ # Used by post_call_prompt_aware_demo.py.
11
+ POST_CALL_AUDIO_FILE=sample_call.mp3
12
+ POST_CALL_PROMPT_FILE=sample_prompt.txt
13
+
14
+ # Audit discovery demo:
15
+ # Used by audit_discovery_demo.py.
16
+ AUDIT_DISCOVERY_PARAMETER=section_sequencing
17
+ AUDIT_DISCOVERY_BOT_MODEL=azure/<deployment-name>
18
+ AUDIT_DISCOVERY_MAX_SCENARIOS=1
19
+ AUDIT_DISCOVERY_MAX_TURNS=6
20
+
9
21
  # Azure OpenAI: fill this section when running GPT/Azure demos:
10
22
  # - demo_gpt.py
11
23
  # - user_orchestrated_demo_gpt.py