lucidicai 1.2.14__tar.gz → 1.2.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {lucidicai-1.2.14 → lucidicai-1.2.16}/PKG-INFO +1 -1
  2. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/__init__.py +18 -2
  3. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/client.py +19 -3
  4. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/event.py +2 -2
  5. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/model_pricing.py +14 -1
  6. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/providers/anthropic_handler.py +0 -7
  7. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/providers/langchain.py +0 -78
  8. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/providers/openai_handler.py +1 -56
  9. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/providers/pydantic_ai_handler.py +1 -18
  10. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/session.py +10 -4
  11. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/step.py +4 -4
  12. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/streaming.py +2 -3
  13. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai.egg-info/PKG-INFO +1 -1
  14. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai.egg-info/SOURCES.txt +1 -0
  15. {lucidicai-1.2.14 → lucidicai-1.2.16}/setup.py +1 -1
  16. lucidicai-1.2.16/tests/test_anthropic_thinking.py +325 -0
  17. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/constants.py +0 -0
  18. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/errors.py +0 -0
  19. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/image_upload.py +0 -0
  20. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/providers/__init__.py +0 -0
  21. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/providers/base_providers.py +0 -0
  22. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/providers/openai_agents_handler.py +0 -0
  23. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/providers/opentelemetry_converter.py +0 -0
  24. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai/singleton.py +0 -0
  25. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai.egg-info/dependency_links.txt +0 -0
  26. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai.egg-info/requires.txt +0 -0
  27. {lucidicai-1.2.14 → lucidicai-1.2.16}/lucidicai.egg-info/top_level.txt +0 -0
  28. {lucidicai-1.2.14 → lucidicai-1.2.16}/setup.cfg +0 -0
  29. {lucidicai-1.2.14 → lucidicai-1.2.16}/tests/test_anthropic_comprehensive.py +0 -0
  30. {lucidicai-1.2.14 → lucidicai-1.2.16}/tests/test_event_display.py +0 -0
  31. {lucidicai-1.2.14 → lucidicai-1.2.16}/tests/test_openai_agents_9_patterns_fixed.py +0 -0
  32. {lucidicai-1.2.14 → lucidicai-1.2.16}/tests/test_openai_comprehensive.py +0 -0
  33. {lucidicai-1.2.14 → lucidicai-1.2.16}/tests/test_pydantic_ai_comprehensive.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lucidicai
3
- Version: 1.2.14
3
+ Version: 1.2.16
4
4
  Summary: Lucidic AI Python SDK
5
5
  Author: Andy Liang
6
6
  Author-email: andy@lucidic.ai
@@ -101,9 +101,11 @@ def init(
101
101
  agent_id: Optional[str] = None,
102
102
  task: Optional[str] = None,
103
103
  providers: Optional[List[ProviderType]] = [],
104
+ production_monitoring: Optional[bool] = False,
104
105
  mass_sim_id: Optional[str] = None,
105
106
  rubrics: Optional[list] = None,
106
107
  tags: Optional[list] = None,
108
+ masking_function = None,
107
109
  ) -> str:
108
110
  """
109
111
  Initialize the Lucidic client.
@@ -117,6 +119,7 @@ def init(
117
119
  mass_sim_id: Optional mass simulation ID, if session is to be part of a mass simulation.
118
120
  rubrics: Optional rubrics for evaluation, list of strings.
119
121
  tags: Optional tags for the session, list of strings.
122
+ masking_function: Optional function to mask sensitive data.
120
123
 
121
124
  Raises:
122
125
  InvalidOperationError: If the client is already initialized.
@@ -137,6 +140,13 @@ def init(
137
140
  if not getattr(client, 'initialized', False):
138
141
  client = Client(lucidic_api_key=lucidic_api_key, agent_id=agent_id)
139
142
 
143
+ if not production_monitoring:
144
+ production_monitoring = os.getenv("LUCIDIC_PRODUCTION_MONITORING", False)
145
+ if production_monitoring == "True":
146
+ production_monitoring = True
147
+ else:
148
+ production_monitoring = False
149
+
140
150
  # Set up providers
141
151
  _setup_providers(client, providers)
142
152
  session_id = client.init_session(
@@ -144,8 +154,11 @@ def init(
144
154
  mass_sim_id=mass_sim_id,
145
155
  task=task,
146
156
  rubrics=rubrics,
147
- tags=tags
157
+ tags=tags,
158
+ production_monitoring=production_monitoring,
148
159
  )
160
+ if masking_function:
161
+ client.masking_function = masking_function
149
162
  logger.info("Session initialized successfully")
150
163
  return session_id
151
164
 
@@ -154,7 +167,8 @@ def continue_session(
154
167
  session_id: str,
155
168
  lucidic_api_key: Optional[str] = None,
156
169
  agent_id: Optional[str] = None,
157
- providers: Optional[List[ProviderType]] = []
170
+ providers: Optional[List[ProviderType]] = [],
171
+ masking_function = None,
158
172
  ):
159
173
  if lucidic_api_key is None:
160
174
  lucidic_api_key = os.getenv("LUCIDIC_API_KEY", None)
@@ -177,6 +191,8 @@ def continue_session(
177
191
  # Set up providers
178
192
  _setup_providers(client, providers)
179
193
  session_id = client.continue_session(session_id=session_id)
194
+ if masking_function:
195
+ client.masking_function = masking_function
180
196
  logger.info(f"Session {session_id} continuing...")
181
197
  return session_id # For consistency
182
198
 
@@ -4,6 +4,7 @@ from datetime import datetime, timezone
4
4
  from typing import Optional, Tuple
5
5
 
6
6
  import requests
7
+ import logging
7
8
  from requests.adapters import HTTPAdapter, Retry
8
9
  from urllib3.util import Retry
9
10
 
@@ -30,6 +31,7 @@ class Client:
30
31
  self.providers = []
31
32
  self.api_key = lucidic_api_key
32
33
  self.agent_id = agent_id
34
+ self.masking_function = None
33
35
  self.request_session = requests.Session()
34
36
  retry_cfg = Retry(
35
37
  total=3, # 3 attempts in total
@@ -73,7 +75,8 @@ class Client:
73
75
  mass_sim_id: Optional[str] = None,
74
76
  task: Optional[str] = None,
75
77
  rubrics: Optional[list] = None,
76
- tags: Optional[list] = None
78
+ tags: Optional[list] = None,
79
+ production_monitoring: Optional[bool] = False
77
80
  ) -> None:
78
81
  self.session = Session(
79
82
  agent_id=self.agent_id,
@@ -81,7 +84,8 @@ class Client:
81
84
  mass_sim_id=mass_sim_id,
82
85
  task=task,
83
86
  rubrics=rubrics,
84
- tags=tags
87
+ tags=tags,
88
+ production_monitoring=production_monitoring
85
89
  )
86
90
  self.initialized = True
87
91
  return self.session.session_id
@@ -147,4 +151,16 @@ class Client:
147
151
  response.raise_for_status()
148
152
  except requests.exceptions.HTTPError as e:
149
153
  raise InvalidOperationError(f"Request to Lucidic AI Backend failed: {e.response.text}")
150
- return response.json()
154
+ return response.json()
155
+
156
+ def mask(self, data):
157
+ if not self.masking_function:
158
+ return data
159
+ if not data:
160
+ return data
161
+ try:
162
+ return self.masking_function(data)
163
+ except Exception as e:
164
+ logger = logging.getLogger('Lucidic')
165
+ logger.error(f"Error in custom masking function: {repr(e)}")
166
+ return "<Error in custom masking function, this is a fully-masked placeholder>"
@@ -40,8 +40,8 @@ class Event:
40
40
  self.is_finished = kwargs['is_finished']
41
41
  request_data = {
42
42
  "event_id": self.event_id,
43
- "description": kwargs.get("description", None),
44
- "result": kwargs.get("result", None),
43
+ "description": Client().mask(kwargs.get("description", None)),
44
+ "result": Client().mask(kwargs.get("result", None)),
45
45
  "is_finished": self.is_finished,
46
46
  "cost_added": kwargs.get("cost_added", None),
47
47
  "model": kwargs.get("model", None),
@@ -141,6 +141,7 @@ MODEL_PRICING = {
141
141
  "deepseek-ai/deepseek-r1-distill-llama-70b": {"input": 0.75, "output": 0.99},
142
142
  "deepseek-coder": {"input": 0.14, "output": 0.28},
143
143
  "deepseek-chat": {"input": 0.14, "output": 0.28},
144
+ "deepseek/deepseek-v3-0324": {"input": 0.14, "output": 0.28},
144
145
 
145
146
  # Qwen Models
146
147
  "qwen-qwq-32b": {"input": 0.29, "output": 0.39},
@@ -148,6 +149,8 @@ MODEL_PRICING = {
148
149
  "qwen-turbo": {"input": 0.3, "output": 0.6},
149
150
  "qwen-plus": {"input": 0.5, "output": 2.0},
150
151
  "qwen-max": {"input": 2.0, "output": 6.0},
152
+ "qwen2.5-32b-instruct": {"input": 0.7, "output": "2.8"},
153
+ "qwen2.5-max": {"input": 1.6, "output": 6.4},
151
154
 
152
155
  # Google Gemma Models
153
156
  "gemma-2-9b": {"input": 0.20, "output": 0.20},
@@ -163,7 +166,14 @@ MODEL_PRICING = {
163
166
  "pplx-7b-chat": {"input": 0.07, "output": 0.28},
164
167
  "pplx-70b-chat": {"input": 0.7, "output": 2.8},
165
168
  "pplx-7b-online": {"input": 0.07, "output": 0.28},
166
- "pplx-70b-online": {"input": 0.7, "output": 2.8}
169
+ "pplx-70b-online": {"input": 0.7, "output": 2.8},
170
+
171
+ # Grok Models
172
+ "grok-3-latest": {"input": 3, "output": 15},
173
+ "grok-3": {"input": 3, "output": 15},
174
+ "grok-3-fast": {"input": 5, "output": 25},
175
+ "grok-3-mini": {"input": 0.3, "output": 0.5},
176
+ "grok-3-mini-fast": {"input": 0.6, "output": 4},
167
177
 
168
178
  }
169
179
 
@@ -179,6 +189,7 @@ PROVIDER_AVERAGES = {
179
189
  "qwen": {"input": 0.5, "output": 1.0}, # Qwen average
180
190
  "together": {"input": 0.15, "output": 0.15}, # Together AI average
181
191
  "perplexity": {"input": 0.4, "output": 1.5}, # Perplexity average
192
+ "grok": {"input": 2.4, "output": 12}, # Grok average
182
193
  }
183
194
 
184
195
  def get_provider_from_model(model: str) -> str:
@@ -205,6 +216,8 @@ def get_provider_from_model(model: str) -> str:
205
216
  return "together"
206
217
  elif any(pplx in model_lower for pplx in ["pplx", "perplexity"]):
207
218
  return "perplexity"
219
+ elif any(grok in model_lower for grok in ["grok", "xAI"]):
220
+ return "grok"
208
221
  else:
209
222
  return "unknown"
210
223
 
@@ -51,7 +51,6 @@ class AnthropicHandler(BaseProvider):
51
51
  return " ".join(descriptions), screenshots
52
52
 
53
53
  def handle_response(self, response, kwargs):
54
- event = Client().session.active_step
55
54
 
56
55
  # for synchronous streaming responses
57
56
  if isinstance(response, Stream):
@@ -222,9 +221,6 @@ class AnthropicHandler(BaseProvider):
222
221
  self.original_create_async = AsyncMessages.create
223
222
 
224
223
  def patched_create(*args, **kwargs):
225
- step = Client().session.active_step
226
- if not step:
227
- return self.original_create(*args, **kwargs)
228
224
  description, images = self._format_messages(kwargs.get("messages", []))
229
225
 
230
226
  event_id = Client().session.create_event(
@@ -237,9 +233,6 @@ class AnthropicHandler(BaseProvider):
237
233
  return self.handle_response(result, kwargs)
238
234
 
239
235
  async def patched_create_async(*args, **kwargs):
240
- step = Client().session.active_step
241
- if not step:
242
- return self.original_create_async(*args, **kwargs)
243
236
  description, images = self._format_messages(kwargs.get("messages", []))
244
237
 
245
238
  event_id = Client().session.create_event(
@@ -63,11 +63,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
63
63
  text.append(prompt)
64
64
  elif isinstance(prompt, dict) and 'image' in prompt:
65
65
  images.append(prompt['image'])
66
-
67
- # Make sure we have a valid session and step
68
- if not (Client().session and Client().session.active_step):
69
- logger.warning("Cannot create event - no active session or step")
70
- return
71
66
 
72
67
  try:
73
68
  # Create a new event
@@ -112,12 +107,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
112
107
  image_url = block.get("image_url", "")
113
108
  image_str = image_url.get('url', "")
114
109
  images_b64.append(image_str[image_str.find(',') + 1:])
115
-
116
-
117
- # Make sure we have a valid session and step
118
- if not (Client().session and Client().session.active_step):
119
- logger.warning("Cannot create event - no active session or step")
120
- return
121
110
 
122
111
  try:
123
112
  # Create a new event
@@ -157,11 +146,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
157
146
  message = response.generations[0][0].message
158
147
  usage = message.usage_metadata
159
148
  cost = calculate_cost(model, usage)
160
-
161
- # Make sure we have a valid session
162
- if not (Client().session and Client().session.active_step):
163
- logger.warning("Cannot end event - no active session or step")
164
- return
165
149
 
166
150
  try:
167
151
  if run_str in self.run_to_event:
@@ -206,11 +190,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
206
190
  logger.debug("Handling LLM error in Langchain Handler, ending event...")
207
191
  run_str = str(run_id)
208
192
  model = self.run_to_model.get(run_str, "unknown")
209
-
210
- # Make sure we have a valid session
211
- if not (Client().session and Client().session.active_step):
212
- logger.warning("Cannot end event - no active session or step")
213
- return
214
193
 
215
194
  try:
216
195
  if run_str in self.run_to_event:
@@ -266,12 +245,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
266
245
  image_url = block.get("image_url", "")
267
246
  image_str = image_url.get('url', "")
268
247
  images_b64.append(image_str[image_str.find(',') + 1:])
269
-
270
-
271
- # Make sure we have a valid session and step
272
- if not (Client().session and Client().session.active_step):
273
- logger.warning("Cannot create event - no active session or step")
274
- return
275
248
 
276
249
  try:
277
250
  # Create a new event
@@ -285,11 +258,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
285
258
  logger.debug("Ending chain execution in Langchain Handler, ending event...")
286
259
  run_id = str(kwargs.get("run_id", "unknown"))
287
260
 
288
- # Make sure we have a valid session
289
- if not (Client().session and Client().session.active_step):
290
- logger.warning("Cannot end event - no active session or step")
291
- return
292
-
293
261
  # Extract result from outputs
294
262
  result = None
295
263
  if outputs:
@@ -321,11 +289,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
321
289
  """Handle chain errors"""
322
290
  logger.debug("Handling chain error in Langchain Handler, ending event...")
323
291
  run_id = str(kwargs.get("run_id", "unknown"))
324
-
325
- # Make sure we have a valid session
326
- if not (Client().session and Client().session.active_step):
327
- logger.warning("Cannot end event - no active session or step")
328
- return
329
292
 
330
293
  try:
331
294
  if run_id in self.run_to_event:
@@ -352,11 +315,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
352
315
  run_id = str(kwargs.get("run_id", "unknown"))
353
316
  tool_name = serialized.get("name", "Unknown Tool")
354
317
  description = f"Tool Call ({tool_name}): {input_str[:100]}..."
355
-
356
- # Make sure we have a valid session and step
357
- if not (Client().session and Client().session.active_step):
358
- logger.warning("Cannot create event - no active session or step")
359
- return
360
318
 
361
319
  try:
362
320
  # Create event
@@ -372,11 +330,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
372
330
  logger.debug("Ending tool execution in Langchain Handler, ending event...")
373
331
  run_id = str(kwargs.get("run_id", "unknown"))
374
332
 
375
- # Make sure we have a valid session and step
376
- if not (Client().session and Client().session.active_step):
377
- logger.warning("Cannot end event - no active session or step")
378
- return
379
-
380
333
  # Get result from output
381
334
  result = None
382
335
  if output is not None:
@@ -404,11 +357,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
404
357
  """
405
358
  logger.debug("Handling tool error in Langchain Handler, ending event...")
406
359
  run_id = str(kwargs.get("run_id", "unknown"))
407
-
408
- # Make sure we have a valid session and step
409
- if not (Client().session and Client().session.active_step):
410
- logger.warning("Cannot end event - no active session or step")
411
- return
412
360
 
413
361
  try:
414
362
  if run_id in self.run_to_event:
@@ -434,11 +382,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
434
382
  run_id = str(kwargs.get("run_id", "unknown"))
435
383
  retriever_type = serialized.get("name", "Unknown Retriever")
436
384
  description = f"Retriever ({retriever_type}): {query[:100]}..."
437
-
438
- # Make sure we have a valid session and step
439
- if not (Client().session and Client().session.active_step):
440
- logger.warning("Cannot create event - no active session or step")
441
- return
442
385
 
443
386
  try:
444
387
  # Create event
@@ -454,11 +397,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
454
397
  logger.debug("Ending retriever execution in Langchain Handler, ending event...")
455
398
  run_id = str(kwargs.get("run_id", "unknown"))
456
399
 
457
- # Make sure we have a valid session and step
458
- if not (Client().session and Client().session.active_step):
459
- logger.warning("Cannot end event - no active session or step")
460
- return
461
-
462
400
  # Extract result from documents
463
401
  result = None
464
402
  if documents:
@@ -493,11 +431,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
493
431
  """
494
432
  logger.debug("Handling retriever error in Langchain Handler, ending event...")
495
433
  run_id = str(kwargs.get("run_id", "unknown"))
496
-
497
- # Make sure we have a valid session and step
498
- if not (Client().session and Client().session.active_step):
499
- logger.warning("Cannot end event - no active session or step")
500
- return
501
434
 
502
435
  try:
503
436
  if run_id in self.run_to_event:
@@ -524,11 +457,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
524
457
  tool = getattr(action, 'tool', 'unknown_tool')
525
458
  description = f"Agent Action: {tool}"
526
459
 
527
- # Make sure we have a valid session and step
528
- if not (Client().session and Client().session.active_step):
529
- logger.warning("Cannot create event - no active session or step")
530
- return
531
-
532
460
  # Extract useful information from the action
533
461
  result = None
534
462
  try:
@@ -571,12 +499,6 @@ class LucidicLangchainHandler(BaseCallbackHandler):
571
499
  """
572
500
  logger.debug("Handling agent finish in Langchain Handler, ending event...")
573
501
  run_id = str(kwargs.get("run_id", "unknown"))
574
-
575
-
576
- # Make sure we have a valid session and step
577
- if not (Client().session and Client().session.active_step):
578
- logger.warning("Cannot end event - no active session or step")
579
- return
580
502
 
581
503
  # Extract result from finish
582
504
  result = None
@@ -122,20 +122,7 @@ class OpenAIHandler(BaseProvider):
122
122
  if session is None:
123
123
  logger.info(f"[OpenAI Handler] No session, skipping tracking")
124
124
  return await original_method(*args, **kwargs)
125
-
126
- # Auto-create step if no active step exists
127
- if session.active_step is None:
128
- logger.info(f"[OpenAI Handler] No active step, auto-creating step")
129
- try:
130
- step_id = session.create_step(
131
- state="Auto-created step for API call",
132
- action=f"Execute {method_name}",
133
- goal="Process API request"
134
- )
135
- logger.info(f"[OpenAI Handler] Created step: {step_id}")
136
- except Exception as e:
137
- logger.error(f"[OpenAI Handler] Failed to auto-create step: {e}")
138
- return await original_method(*args, **kwargs)
125
+
139
126
 
140
127
  # Prepare kwargs
141
128
  self._prepare_streaming_kwargs(method_name, kwargs)
@@ -157,20 +144,6 @@ class OpenAIHandler(BaseProvider):
157
144
  logger.info(f"[OpenAI Handler] No session, skipping tracking")
158
145
  return original_method(*args, **kwargs)
159
146
 
160
- # Auto-create step if no active step exists
161
- if session.active_step is None:
162
- logger.info(f"[OpenAI Handler] No active step, auto-creating step")
163
- try:
164
- step_id = session.create_step(
165
- state="Auto-created step for API call",
166
- action=f"Execute {method_name}",
167
- goal="Process API request"
168
- )
169
- logger.info(f"[OpenAI Handler] Created step: {step_id}")
170
- except Exception as e:
171
- logger.error(f"[OpenAI Handler] Failed to auto-create step: {e}")
172
- return original_method(*args, **kwargs)
173
-
174
147
  # Prepare kwargs
175
148
  self._prepare_streaming_kwargs(method_name, kwargs)
176
149
 
@@ -394,20 +367,6 @@ class OpenAIHandler(BaseProvider):
394
367
  logger.info(f"[OpenAI Handler] No session, skipping tracking")
395
368
  return await original_method(*args, **kwargs)
396
369
 
397
- # Auto-create step if no active step exists
398
- if session.active_step is None:
399
- logger.info(f"[OpenAI Handler] No active step, auto-creating step")
400
- try:
401
- step_id = session.create_step(
402
- state="Auto-created step for responses API call",
403
- action="Execute responses.create",
404
- goal="Process API request"
405
- )
406
- logger.info(f"[OpenAI Handler] Created step: {step_id}")
407
- except Exception as e:
408
- logger.error(f"[OpenAI Handler] Failed to auto-create step: {e}")
409
- return await original_method(*args, **kwargs)
410
-
411
370
  # Check for agent context
412
371
  agent_name = self._get_agent_name_from_input(kwargs.get('input', []))
413
372
 
@@ -500,20 +459,6 @@ class OpenAIHandler(BaseProvider):
500
459
  logger.info(f"[OpenAI Handler] No session, skipping tracking")
501
460
  return original_method(*args, **kwargs)
502
461
 
503
- # Auto-create step if no active step exists
504
- if session.active_step is None:
505
- logger.info(f"[OpenAI Handler] No active step, auto-creating step")
506
- try:
507
- step_id = session.create_step(
508
- state="Auto-created step for responses API call",
509
- action="Execute responses.create",
510
- goal="Process API request"
511
- )
512
- logger.info(f"[OpenAI Handler] Created step: {step_id}")
513
- except Exception as e:
514
- logger.error(f"[OpenAI Handler] Failed to auto-create step: {e}")
515
- return original_method(*args, **kwargs)
516
-
517
462
  # Check for agent context
518
463
  agent_name = self._get_agent_name_from_input(kwargs.get('input', []))
519
464
 
@@ -381,11 +381,6 @@ class PydanticAIHandler(BaseProvider):
381
381
 
382
382
  def _wrap_request(self, model_instance, messages, model_settings, model_request_parameters, original_method):
383
383
  """Wrap regular request method to track LLM calls"""
384
- # Create event before API call
385
- step = Client().session.active_step
386
- if step is None:
387
- return original_method(model_instance, messages, model_settings, model_request_parameters)
388
-
389
384
  description = self._format_messages(messages)
390
385
  event_id = Client().session.create_event(
391
386
  description=description,
@@ -412,13 +407,6 @@ class PydanticAIHandler(BaseProvider):
412
407
 
413
408
  def _wrap_request_stream_context_manager(self, model_instance, messages, model_settings, model_request_parameters, original_method):
414
409
  """Return an async context manager for streaming requests"""
415
- # Create event before API call
416
- event_id = None
417
- step = Client().session.active_step
418
-
419
- if step is None:
420
- return original_method(model_instance, messages, model_settings, model_request_parameters)
421
-
422
410
  description = self._format_messages(messages)
423
411
  event_id = Client().session.create_event(
424
412
  description=description,
@@ -466,13 +454,8 @@ class PydanticAIHandler(BaseProvider):
466
454
 
467
455
  async def _wrap_request_stream(self, model_instance, messages, model_settings, model_request_parameters, original_method):
468
456
  """Wrap streaming request method"""
469
- # Create event before API call
470
- step = Client().session.active_step
471
- if step is None:
472
- return original_method(model_instance, messages, model_settings, model_request_parameters)
473
-
474
457
  description = self._format_messages(messages)
475
- event = step.create_event(
458
+ event = Client().session.create_event(
476
459
  description=description,
477
460
  result="Streaming response..."
478
461
  )
@@ -43,7 +43,8 @@ class Session:
43
43
  "task": kwargs.get("task", None),
44
44
  "mass_sim_id": kwargs.get("mass_sim_id", None),
45
45
  "rubrics": kwargs.get("rubrics", None),
46
- "tags": kwargs.get("tags", None)
46
+ "tags": kwargs.get("tags", None),
47
+ "production_monitoring": kwargs.get("production_monitoring", False)
47
48
  }
48
49
  data = Client().make_request('initsession', 'POST', request_data)
49
50
  self.session_id = data["session_id"]
@@ -73,9 +74,9 @@ class Session:
73
74
  "is_finished": kwargs.get("is_finished", None),
74
75
  "task": kwargs.get("task", None),
75
76
  "is_successful": kwargs.get("is_successful", None),
76
- "is_successful_reason": kwargs.get("is_successful_reason", None),
77
+ "is_successful_reason": Client().mask(kwargs.get("is_successful_reason", None)),
77
78
  "session_eval": kwargs.get("session_eval", None),
78
- "session_eval_reason": kwargs.get("session_eval_reason", None),
79
+ "session_eval_reason": Client().mask(kwargs.get("session_eval_reason", None)),
79
80
  "tags": kwargs.get("tags", None)
80
81
  }
81
82
  Client().make_request('updatesession', 'PUT', request_data)
@@ -101,12 +102,14 @@ class Session:
101
102
 
102
103
  def create_event(self, **kwargs):
103
104
  # Get step_id from kwargs or active step
105
+ temp_step_created = False
104
106
  if 'step_id' in kwargs and kwargs['step_id'] is not None:
105
107
  step_id = kwargs['step_id']
106
108
  elif self._active_step:
107
109
  step_id = self._active_step
108
110
  else:
109
- raise InvalidOperationError("No active step to create event in and no step_id provided")
111
+ step_id = self.create_step()
112
+ temp_step_created = True
110
113
  kwargs.pop('step_id', None)
111
114
  event = Event(
112
115
  session_id=self.session_id,
@@ -115,6 +118,9 @@ class Session:
115
118
  )
116
119
  self.event_history[event.event_id] = event
117
120
  self._active_event = event
121
+ if temp_step_created:
122
+ self.update_step(step_id=step_id, is_finished=True)
123
+ self._active_step = None
118
124
  return event.event_id
119
125
 
120
126
  def update_event(self, **kwargs):
@@ -48,11 +48,11 @@ class Step:
48
48
  upload_image_to_s3(presigned_url, screenshot, "JPEG")
49
49
  request_data = {
50
50
  "step_id": self.step_id,
51
- "goal": kwargs['goal'] if 'goal' in kwargs else None,
52
- "action": kwargs['action'] if 'action' in kwargs else None,
53
- "state": kwargs['state'] if 'state' in kwargs else None,
51
+ "goal": Client().mask(kwargs['goal']) if 'goal' in kwargs else None,
52
+ "action": Client().mask(kwargs['action']) if 'action' in kwargs else None,
53
+ "state": Client().mask(kwargs['state']) if 'state' in kwargs else None,
54
54
  "eval_score": kwargs['eval_score'] if 'eval_score' in kwargs else None,
55
- "eval_description": kwargs['eval_description'] if 'eval_description' in kwargs else None,
55
+ "eval_description": Client().mask(kwargs['eval_description']) if 'eval_description' in kwargs else None,
56
56
  "is_finished": kwargs['is_finished'] if 'is_finished' in kwargs else None,
57
57
  "has_screenshot": True if screenshot else None
58
58
  }
@@ -14,7 +14,6 @@ class StreamingResponseWrapper:
14
14
 
15
15
  def __init__(self, response: Any, session: Any, kwargs: Dict[str, Any]):
16
16
  self.response = response
17
- self.session = session
18
17
  self.kwargs = kwargs
19
18
  self.chunks = []
20
19
  self.start_time = time.time()
@@ -34,7 +33,7 @@ class StreamingResponseWrapper:
34
33
  logger.info(f"[Streaming] Using existing event ID: {self.event_id}")
35
34
  return
36
35
 
37
- if self.session and hasattr(self.session, 'active_step') and self.session.active_step:
36
+ if Client().session:
38
37
  description, images = self._format_messages(self.kwargs.get('messages', ''))
39
38
 
40
39
  event_data = {
@@ -54,7 +53,7 @@ class StreamingResponseWrapper:
54
53
  if images:
55
54
  event_data['screenshots'] = images
56
55
 
57
- self.event_id = self.session.create_event(**event_data)
56
+ self.event_id = Client().session.create_event(**event_data)
58
57
  logger.debug(f"[Streaming] Created new streaming event with ID: {self.event_id}")
59
58
  except Exception as e:
60
59
  logger.error(f"[Streaming] Error creating initial streaming event: {str(e)}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lucidicai
3
- Version: 1.2.14
3
+ Version: 1.2.16
4
4
  Summary: Lucidic AI Python SDK
5
5
  Author: Andy Liang
6
6
  Author-email: andy@lucidic.ai
@@ -24,6 +24,7 @@ lucidicai/providers/openai_handler.py
24
24
  lucidicai/providers/opentelemetry_converter.py
25
25
  lucidicai/providers/pydantic_ai_handler.py
26
26
  tests/test_anthropic_comprehensive.py
27
+ tests/test_anthropic_thinking.py
27
28
  tests/test_event_display.py
28
29
  tests/test_openai_agents_9_patterns_fixed.py
29
30
  tests/test_openai_comprehensive.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="lucidicai",
5
- version="1.2.14",
5
+ version="1.2.16",
6
6
  packages=find_packages(),
7
7
  install_requires=[
8
8
  "requests>=2.25.1",
@@ -0,0 +1,325 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script for Lucidic SDK with Anthropic Thinking mode.
4
+ This tests the SDK's ability to handle ThinkingBlock responses from Anthropic.
5
+
6
+ Required environment variables:
7
+ - ANTHROPIC_API_KEY: Your Anthropic API key
8
+ - LUCIDIC_API_KEY: Your Lucidic API key
9
+ - LUCIDIC_AGENT_ID: Your Lucidic agent ID
10
+
11
+ You can set these in a .env file or export them in your shell.
12
+
13
+ Note: Extended thinking is supported in these models:
14
+ - Claude Opus 4 (claude-opus-4-20250514)
15
+ - Claude Sonnet 4 (claude-sonnet-4-20250514)
16
+ - Claude Sonnet 3.7 (claude-3-7-sonnet-20250219)
17
+
18
+ This test uses Claude 3.7 Sonnet. For Claude 4 models, you can also use
19
+ the beta header "anthropic-beta: interleaved-thinking-2025-05-14" for
20
+ interleaved thinking between tool calls.
21
+ """
22
+
23
+ import os
24
+ import asyncio
25
+ from anthropic import Anthropic
26
+ import lucidicai as lai
27
+ from dotenv import load_dotenv
28
+
29
+ load_dotenv()
30
+
31
+ def test_anthropic_thinking_sync():
32
+ """Test synchronous Anthropic calls with thinking mode"""
33
+ print("Testing Anthropic Thinking mode (synchronous)...")
34
+
35
+ # Create Anthropic client - SDK will automatically handle it with the provider
36
+ client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
37
+
38
+ # Create a step for this test
39
+ lai.create_step(
40
+ action="Test Anthropic Thinking Mode",
41
+ goal="Testing ThinkingBlock handling in Anthropic responses"
42
+ )
43
+
44
+ try:
45
+ # Test 1: Simple thinking mode request
46
+ print("\nTest 1: Simple thinking request...")
47
+ response = client.messages.create(
48
+ model="claude-3-7-sonnet-20250219", # Using Claude 3.7 Sonnet which supports extended thinking
49
+ max_tokens=4096,
50
+ messages=[{
51
+ "role": "user",
52
+ "content": "Think step by step about how to calculate the factorial of 5."
53
+ }],
54
+ thinking={
55
+ "type": "enabled",
56
+ "budget_tokens": 2048 # Minimum is 1024
57
+ }
58
+ )
59
+
60
+ # Check for thinking blocks in response
61
+ thinking_content = None
62
+ text_content = None
63
+ for block in response.content:
64
+ if hasattr(block, 'type'):
65
+ if block.type == 'thinking':
66
+ thinking_content = getattr(block, 'thinking', '')
67
+ print(f"Thinking block detected: {thinking_content[:100]}...")
68
+ elif block.type == 'text':
69
+ text_content = block.text
70
+
71
+ if text_content:
72
+ print(f"Response: {text_content[:200]}...")
73
+ else:
74
+ print("No text content found in response")
75
+
76
+ # Test 2: Complex reasoning with thinking
77
+ print("\nTest 2: Complex reasoning with thinking...")
78
+ response2 = client.messages.create(
79
+ model="claude-3-7-sonnet-20250219", # Using Claude 3.7 Sonnet which supports extended thinking
80
+ max_tokens=5000,
81
+ messages=[{
82
+ "role": "user",
83
+ "content": "Think through this problem: If a train leaves Station A at 2 PM traveling at 60 mph, and another train leaves Station B at 3 PM traveling at 80 mph, and the stations are 280 miles apart, when will they meet?"
84
+ }],
85
+ thinking={
86
+ "type": "enabled",
87
+ "budget_tokens": 3000
88
+ }
89
+ )
90
+
91
+ # Check for thinking blocks in response2
92
+ for block in response2.content:
93
+ if hasattr(block, 'type'):
94
+ if block.type == 'thinking':
95
+ print(f"Thinking block detected in response2")
96
+ elif block.type == 'text':
97
+ print(f"Response: {block.text[:200]}...")
98
+
99
+ # Test 3: Multi-turn conversation with thinking
100
+ print("\nTest 3: Multi-turn conversation with thinking...")
101
+ messages = [
102
+ {"role": "user", "content": "Let's solve a logic puzzle. Think through this: Three houses are in a row. The red house is to the left of the green house. The blue house is to the right of the red house. What is the order of the houses?"},
103
+ {"role": "assistant", "content": "I need to think through this step-by-step.\n\nGiven information:\n- Three houses in a row\n- Red house is to the left of green house\n- Blue house is to the right of red house\n\nFrom 'Red is left of Green': Red < Green\nFrom 'Blue is right of Red': Red < Blue\n\nSo we have: Red < Green and Red < Blue\n\nThis means Red must be the leftmost house. Now I need to determine the order of Blue and Green.\n\nIf the order were Red, Green, Blue, that would satisfy both conditions.\nIf the order were Red, Blue, Green, that would also satisfy both conditions.\n\nWait, let me reconsider. If Blue is to the right of Red, and Red is to the left of Green, we need to check if there's a unique solution.\n\nActually, the order must be: Red, Blue, Green\n\nThis satisfies:\n- Red is to the left of Green ✓\n- Blue is to the right of Red ✓"},
104
+ {"role": "user", "content": "Think about whether your answer is correct. What if the order was Red, Green, Blue?"}
105
+ ]
106
+
107
+ response3 = client.messages.create(
108
+ model="claude-3-7-sonnet-20250219", # Using Claude 3.7 Sonnet which supports extended thinking
109
+ max_tokens=4096,
110
+ messages=messages,
111
+ thinking={
112
+ "type": "enabled",
113
+ "budget_tokens": 2048
114
+ }
115
+ )
116
+
117
+ # Check for thinking blocks in response3
118
+ for block in response3.content:
119
+ if hasattr(block, 'type'):
120
+ if block.type == 'thinking':
121
+ print(f"Thinking block detected in response3")
122
+ elif block.type == 'text':
123
+ print(f"Response: {block.text[:200]}...")
124
+
125
+ # Update step with results
126
+ lai.update_step(
127
+ state="Test completed successfully",
128
+ action="Ran 3 thinking mode tests",
129
+ eval_score=1.0,
130
+ eval_description="All thinking mode tests passed"
131
+ )
132
+
133
+ except Exception as e:
134
+ print(f"Error during testing: {e}")
135
+ lai.update_step(
136
+ state="Test failed",
137
+ action="Error during testing",
138
+ eval_score=0.0,
139
+ eval_description=f"Error: {str(e)}"
140
+ )
141
+ raise
142
+
143
+ finally:
144
+ lai.end_step()
145
+ pass
146
+
147
+ def test_anthropic_thinking_streaming():
148
+ """Test streaming Anthropic calls with thinking mode"""
149
+ print("\n\nTesting Anthropic Thinking mode (streaming)...")
150
+
151
+ # Create Anthropic client - SDK will automatically handle it with the provider
152
+ client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
153
+
154
+ # Create a step for this test
155
+ lai.create_step(
156
+ action="Test Anthropic Thinking Mode Streaming",
157
+ goal="Testing ThinkingBlock handling in streaming Anthropic responses"
158
+ )
159
+
160
+ try:
161
+ print("\nTest 4: Streaming with thinking mode...")
162
+
163
+ # Create a streaming request
164
+ stream = client.messages.create(
165
+ model="claude-3-7-sonnet-20250219", # Using Claude 3.7 Sonnet which supports extended thinking
166
+ max_tokens=4096,
167
+ messages=[{
168
+ "role": "user",
169
+ "content": "Think through how to implement a binary search algorithm in Python, then provide the code."
170
+ }],
171
+ stream=True,
172
+ thinking={
173
+ "type": "enabled",
174
+ "budget_tokens": 2500
175
+ }
176
+ )
177
+
178
+ # Process the stream
179
+ full_response = ""
180
+ thinking_blocks = []
181
+
182
+ for chunk in stream:
183
+ if hasattr(chunk, 'type'):
184
+ if chunk.type == 'content_block_start':
185
+ if hasattr(chunk, 'content_block') and hasattr(chunk.content_block, 'type'):
186
+ if chunk.content_block.type == 'thinking':
187
+ print('\nThinking block started...', end='', flush=True)
188
+ elif chunk.type == 'content_block_delta':
189
+ if hasattr(chunk.delta, 'text'):
190
+ full_response += chunk.delta.text
191
+ print('.', end='', flush=True)
192
+ elif hasattr(chunk.delta, 'thinking'):
193
+ print('T', end='', flush=True)
194
+ thinking_blocks.append(chunk)
195
+
196
+ print(f"\n\nStreaming response received ({len(full_response)} chars)")
197
+ print(f"Thinking blocks detected: {len(thinking_blocks)}")
198
+
199
+ lai.update_step(
200
+ state="Streaming test completed",
201
+ action=f"Processed {len(full_response)} chars with {len(thinking_blocks)} thinking blocks",
202
+ eval_score=1.0,
203
+ eval_description="Streaming with thinking blocks handled successfully"
204
+ )
205
+
206
+ except Exception as e:
207
+ print(f"Error during streaming test: {e}")
208
+ lai.update_step(
209
+ state="Streaming test failed",
210
+ action="Error during streaming test",
211
+ eval_score=0.0,
212
+ eval_description=f"Error: {str(e)}"
213
+ )
214
+ raise
215
+
216
+ finally:
217
+ lai.end_step()
218
+ pass
219
+
220
+ async def test_anthropic_thinking_async():
221
+ """Test asynchronous Anthropic calls with thinking mode"""
222
+ print("\n\nTesting Anthropic Thinking mode (asynchronous)...")
223
+
224
+ from anthropic import AsyncAnthropic
225
+
226
+ # Create async Anthropic client - SDK will automatically handle it with the provider
227
+ client = AsyncAnthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
228
+
229
+ # Create a step for this test
230
+ lai.create_step(
231
+ action="Test Anthropic Thinking Mode Async",
232
+ goal="Testing ThinkingBlock handling in async Anthropic responses"
233
+ )
234
+
235
+ try:
236
+ print("\nTest 5: Async thinking mode request...")
237
+
238
+ response = await client.messages.create(
239
+ model="claude-3-7-sonnet-20250219", # Using Claude 3.7 Sonnet which supports extended thinking
240
+ max_tokens=4096,
241
+ messages=[{
242
+ "role": "user",
243
+ "content": "Think about the most efficient sorting algorithm for a nearly sorted array and explain why."
244
+ }],
245
+ thinking={
246
+ "type": "enabled",
247
+ "budget_tokens": 2048
248
+ }
249
+ )
250
+
251
+ # Check for thinking blocks in async response
252
+ for block in response.content:
253
+ if hasattr(block, 'type'):
254
+ if block.type == 'thinking':
255
+ print(f"Thinking block detected in async response")
256
+ elif block.type == 'text':
257
+ print(f"Async response: {block.text[:200]}...")
258
+
259
+ lai.update_step(
260
+ state="Async test completed",
261
+ action="Executed async thinking mode request",
262
+ eval_score=1.0,
263
+ eval_description="Async thinking mode handled successfully"
264
+ )
265
+
266
+ except Exception as e:
267
+ print(f"Error during async test: {e}")
268
+ lai.update_step(
269
+ state="Async test failed",
270
+ action="Error during async test",
271
+ eval_score=0.0,
272
+ eval_description=f"Error: {str(e)}"
273
+ )
274
+ raise
275
+
276
+ finally:
277
+ lai.end_step()
278
+ pass
279
+
280
+ def main():
281
+ """Run all tests"""
282
+ print("Starting Lucidic SDK Anthropic Thinking Mode Tests")
283
+ print("=" * 50)
284
+
285
+ # Check for required environment variables
286
+ required_vars = ["ANTHROPIC_API_KEY", "LUCIDIC_API_KEY", "LUCIDIC_AGENT_ID"]
287
+ missing_vars = [var for var in required_vars if not os.environ.get(var)]
288
+
289
+ if missing_vars:
290
+ print("Error: Missing required environment variables:")
291
+ for var in missing_vars:
292
+ print(f" - {var}")
293
+ print("\nPlease set these environment variables or add them to a .env file.")
294
+ return
295
+
296
+ # Initialize the SDK with Anthropic provider
297
+ session_id = lai.init(
298
+ session_name="Anthropic Thinking Mode Test",
299
+ providers=["anthropic"]
300
+ )
301
+ print(f"Session initialized: {session_id}")
302
+
303
+ try:
304
+ # Run synchronous tests
305
+ test_anthropic_thinking_sync()
306
+
307
+ # Run streaming tests
308
+ test_anthropic_thinking_streaming()
309
+
310
+ # Run async tests
311
+ asyncio.run(test_anthropic_thinking_async())
312
+
313
+ print("\n\nAll tests completed successfully!")
314
+
315
+ except Exception as e:
316
+ print(f"\n\nTest suite failed: {e}")
317
+ raise
318
+
319
+ finally:
320
+ # End the session
321
+ lai.end_session()
322
+ print("\nSession ended.")
323
+
324
+ if __name__ == "__main__":
325
+ main()
File without changes