npcpy 1.2.36__py3-none-any.whl → 1.2.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
npcpy/__init__.py CHANGED
@@ -1,6 +1,14 @@
1
1
  from . import npc_compiler
2
2
  from . import npc_sysenv
3
3
  from . import llm_funcs
4
+ from . import ml_funcs
5
+ from . import npc_array
4
6
  from . import sql
5
- from . import work
6
- from . import gen
7
+ from . import work
8
+ from . import gen
9
+
10
+ # Expose key classes at package level
11
+ from .npc_array import NPCArray, ResponseTensor, LazyResult, infer_matrix, ensemble_vote
12
+ from .npc_compiler import NPC, Team, Jinx
13
+ from .llm_funcs import get_llm_response, check_llm_command, execute_llm_command
14
+ from .ml_funcs import fit_model, predict_model, score_model, ensemble_predict
npcpy/gen/image_gen.py CHANGED
@@ -363,7 +363,8 @@ def generate_image(
363
363
  api_url: Optional[str] = None,
364
364
  attachments: Union[List[Union[str, bytes, Image.Image]], None] = None,
365
365
  save_path: Optional[str] = None,
366
- custom_model_path: Optional[str] = None, # <--- NEW: Accept custom_model_path
366
+ custom_model_path: Optional[str] = None, # <--- NEW: Accept custom_model_path,
367
+
367
368
  ):
368
369
  """
369
370
  Unified function to generate or edit images using various providers.
@@ -429,7 +430,9 @@ def generate_image(
429
430
  attachments=attachments,
430
431
  height=height,
431
432
  width=width,
432
- n_images=n_images
433
+ n_images=n_images,
434
+ api_key=api_key
435
+
433
436
  )
434
437
  all_generated_pil_images.extend(images)
435
438
 
npcpy/gen/response.py CHANGED
@@ -19,9 +19,76 @@ try:
19
19
  except ImportError:
20
20
  pass
21
21
  except OSError:
22
-
23
22
  pass
24
23
 
24
+ # Token costs per 1M tokens (input, output)
25
+ TOKEN_COSTS = {
26
+ # OpenAI
27
+ "gpt-4o": (2.50, 10.00),
28
+ "gpt-4o-mini": (0.15, 0.60),
29
+ "gpt-4-turbo": (10.00, 30.00),
30
+ "gpt-3.5-turbo": (0.50, 1.50),
31
+ "gpt-5": (1.25, 10.00),
32
+ "gpt-5-mini": (0.25, 2.00),
33
+ "o1": (15.00, 60.00),
34
+ "o1-mini": (3.00, 12.00),
35
+ "o3": (10.00, 40.00),
36
+ "o3-mini": (1.10, 4.40),
37
+ "o4-mini": (1.10, 4.40),
38
+ # Anthropic
39
+ "claude-3-5-sonnet": (3.00, 15.00),
40
+ "claude-3-opus": (15.00, 75.00),
41
+ "claude-3-haiku": (0.25, 1.25),
42
+ "claude-sonnet-4": (3.00, 15.00),
43
+ "claude-opus-4": (15.00, 75.00),
44
+ "claude-opus-4-5": (5.00, 25.00),
45
+ "claude-sonnet-4-5": (3.00, 15.00),
46
+ "claude-haiku-4": (0.80, 4.00),
47
+ # Google
48
+ "gemini-1.5-pro": (1.25, 5.00),
49
+ "gemini-1.5-flash": (0.075, 0.30),
50
+ "gemini-2.0-flash": (0.10, 0.40),
51
+ "gemini-2.5-pro": (1.25, 10.00),
52
+ "gemini-2.5-flash": (0.15, 0.60),
53
+ "gemini-3-pro": (2.00, 12.00),
54
+ # Groq (free tier limits, paid is cheap)
55
+ "llama-3": (0.05, 0.08),
56
+ "llama-3.1": (0.05, 0.08),
57
+ "llama-3.2": (0.05, 0.08),
58
+ "llama-4": (0.05, 0.10),
59
+ "mixtral": (0.24, 0.24),
60
+ # DeepSeek
61
+ "deepseek-v3": (0.27, 1.10),
62
+ "deepseek-r1": (0.55, 2.19),
63
+ # Mistral
64
+ "mistral-large": (2.00, 6.00),
65
+ "mistral-small": (0.20, 0.60),
66
+ # xAI
67
+ "grok-2": (2.00, 10.00),
68
+ "grok-3": (3.00, 15.00),
69
+ }
70
+
71
+ def calculate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
72
+ """Calculate cost in USD for a response."""
73
+ if not model:
74
+ return 0.0
75
+
76
+ # Normalize model name - strip provider prefix and lowercase
77
+ model_key = model.split("/")[-1].lower()
78
+
79
+ # Check for exact or partial match
80
+ costs = None
81
+ for key, cost in TOKEN_COSTS.items():
82
+ if key in model_key or model_key in key:
83
+ costs = cost
84
+ break
85
+
86
+ if not costs:
87
+ return 0.0 # Unknown/local model, assume free
88
+
89
+ input_cost, output_cost = costs
90
+ return (input_tokens * input_cost / 1_000_000) + (output_tokens * output_cost / 1_000_000)
91
+
25
92
  def handle_streaming_json(api_params):
26
93
  """
27
94
  Handles streaming responses when JSON format is requested from LiteLLM.
@@ -117,7 +184,7 @@ Do not include any additional markdown formatting or leading ```json tags in you
117
184
 
118
185
  if detected_tools:
119
186
  result["tool_calls"] = detected_tools
120
- result = process_tool_calls(result, tool_map, "local", "transformers", result["messages"])
187
+ result = process_tool_calls(result, tool_map, "local", "transformers", result["messages"], tools=tools)
121
188
 
122
189
  if format == "json":
123
190
  try:
@@ -278,12 +345,21 @@ def get_ollama_response(
278
345
  if not auto_process_tool_calls or not (tools and tool_map):
279
346
  res = ollama.chat(**api_params, options=options)
280
347
  result["raw_response"] = res
281
-
348
+
349
+ # Extract usage from ollama response
350
+ if hasattr(res, 'prompt_eval_count') or 'prompt_eval_count' in res:
351
+ input_tokens = getattr(res, 'prompt_eval_count', None) or res.get('prompt_eval_count', 0) or 0
352
+ output_tokens = getattr(res, 'eval_count', None) or res.get('eval_count', 0) or 0
353
+ result["usage"] = {
354
+ "input_tokens": input_tokens,
355
+ "output_tokens": output_tokens,
356
+ }
357
+
282
358
  if stream:
283
- result["response"] = res
359
+ result["response"] = res
284
360
  return result
285
361
  else:
286
-
362
+
287
363
  message = res.get("message", {})
288
364
  response_content = message.get("content", "")
289
365
  result["response"] = response_content
@@ -333,11 +409,12 @@ def get_ollama_response(
333
409
  }
334
410
 
335
411
 
336
- processed_result = process_tool_calls(response_for_processing,
337
- tool_map, model,
338
- 'ollama',
339
- messages,
340
- stream=False)
412
+ processed_result = process_tool_calls(response_for_processing,
413
+ tool_map, model,
414
+ 'ollama',
415
+ messages,
416
+ stream=False,
417
+ tools=tools)
341
418
 
342
419
 
343
420
  if stream:
@@ -624,7 +701,20 @@ def get_litellm_response(
624
701
  api_params["stream"] = stream
625
702
  resp = completion(**api_params)
626
703
  result["raw_response"] = resp
627
-
704
+
705
+ # Extract usage if available (handles both standard litellm and ollama formats)
706
+ if hasattr(resp, 'usage') and resp.usage:
707
+ result["usage"] = {
708
+ "input_tokens": getattr(resp.usage, 'prompt_tokens', 0) or 0,
709
+ "output_tokens": getattr(resp.usage, 'completion_tokens', 0) or 0,
710
+ }
711
+ elif hasattr(resp, 'prompt_eval_count'):
712
+ # Ollama format
713
+ result["usage"] = {
714
+ "input_tokens": getattr(resp, 'prompt_eval_count', 0) or 0,
715
+ "output_tokens": getattr(resp, 'eval_count', 0) or 0,
716
+ }
717
+
628
718
  if stream:
629
719
  result["response"] = resp
630
720
  return result
@@ -687,59 +777,99 @@ def get_litellm_response(
687
777
 
688
778
  initial_api_params = api_params.copy()
689
779
  initial_api_params["stream"] = False
690
-
691
-
692
- resp = completion(**initial_api_params)
780
+
781
+ try:
782
+ resp = completion(**initial_api_params)
783
+ except Exception as e:
784
+ from termcolor import colored
785
+ print(colored(f"[litellm ERROR] completion() failed: {type(e).__name__}: {e}", "red"))
786
+ result["error"] = str(e)
787
+ result["response"] = f"LLM call failed: {e}"
788
+ return result
789
+
693
790
  result["raw_response"] = resp
694
-
695
-
791
+
792
+ # Extract usage if available
793
+ if hasattr(resp, 'usage') and resp.usage:
794
+ result["usage"] = {
795
+ "input_tokens": getattr(resp.usage, 'prompt_tokens', 0) or 0,
796
+ "output_tokens": getattr(resp.usage, 'completion_tokens', 0) or 0,
797
+ }
798
+
799
+ if not resp.choices:
800
+ result["response"] = "No response from model"
801
+ return result
802
+
696
803
  has_tool_calls = hasattr(resp.choices[0].message, 'tool_calls') and resp.choices[0].message.tool_calls
697
804
 
698
805
  if has_tool_calls:
699
-
700
-
701
806
  result["tool_calls"] = resp.choices[0].message.tool_calls
702
-
703
-
704
- processed_result = process_tool_calls(result,
705
- tool_map,
706
- model,
707
- provider,
708
- result["messages"],
709
- stream=False)
710
-
711
-
712
- if stream:
713
807
 
714
-
808
+ processed_result = process_tool_calls(result,
809
+ tool_map,
810
+ model,
811
+ provider,
812
+ result["messages"],
813
+ stream=False,
814
+ tools=tools)
715
815
 
716
- clean_messages = []
717
- for msg in processed_result["messages"]:
718
- if msg.get('role') == 'assistant' and 'tool_calls' in msg:
719
- continue
720
-
721
- else:
722
- clean_messages.append(msg)
723
-
724
- final_api_params = api_params.copy()
725
- final_api_params["messages"] = clean_messages
726
- final_api_params["stream"] = True
816
+ # Always do a follow-up call to get a proper response after tool execution
817
+ # Convert tool interactions to a clean format for the follow-up call
818
+ clean_messages = []
819
+ tool_results_summary = []
820
+
821
+ for msg in processed_result["messages"]:
822
+ role = msg.get('role', '')
823
+ if role == 'assistant' and 'tool_calls' in msg:
824
+ # Skip the tool_calls message - we'll summarize results instead
825
+ continue
826
+ elif role == 'tool':
827
+ # Collect tool results for summary
828
+ content = msg.get('content', '')
829
+ # Truncate very long results
830
+ if len(content) > 2000:
831
+ content = content[:2000] + "... (truncated)"
832
+ tool_results_summary.append(content)
833
+ else:
834
+ clean_messages.append(msg)
727
835
 
836
+ # Add tool results as an assistant message summarizing what was done
837
+ if tool_results_summary:
838
+ clean_messages.append({
839
+ "role": "assistant",
840
+ "content": "I executed the requested tools. Here are the results:\n\n" + "\n\n".join(tool_results_summary)
841
+ })
728
842
 
729
- final_api_params = api_params.copy()
730
- final_api_params["messages"] = clean_messages
731
- final_api_params["stream"] = True
732
- if "tools" in final_api_params:
733
- del final_api_params["tools"]
734
- if "tool_choice" in final_api_params:
735
- del final_api_params["tool_choice"]
843
+ # Add instruction for the LLM to provide a helpful response
844
+ clean_messages.append({
845
+ "role": "user",
846
+ "content": "Based on the tool results above, provide a brief summary of what happened. Do NOT output any code - the tool has already executed. Just describe the results concisely."
847
+ })
736
848
 
737
- final_stream = completion(**final_api_params)
849
+ final_api_params = api_params.copy()
850
+ final_api_params["messages"] = clean_messages
851
+ final_api_params["stream"] = stream
852
+ if "tools" in final_api_params:
853
+ del final_api_params["tools"]
854
+ if "tool_choice" in final_api_params:
855
+ del final_api_params["tool_choice"]
856
+
857
+ final_resp = completion(**final_api_params)
858
+
859
+ if stream:
860
+ processed_result["response"] = final_resp
861
+ else:
862
+ if final_resp.choices:
863
+ final_content = final_resp.choices[0].message.content
864
+ processed_result["response"] = final_content
865
+ processed_result["messages"].append({"role": "assistant", "content": final_content})
866
+ else:
867
+ # No choices returned, use the tool results summary directly
868
+ if tool_results_summary:
869
+ processed_result["response"] = "\n\n".join(tool_results_summary)
870
+ else:
871
+ processed_result["response"] = "Tool executed successfully."
738
872
 
739
-
740
- final_stream = completion(**final_api_params)
741
- processed_result["response"] = final_stream
742
-
743
873
  return processed_result
744
874
 
745
875
 
@@ -770,18 +900,41 @@ def get_litellm_response(
770
900
  else:
771
901
  result["response"] = llm_response
772
902
  return result
773
- def process_tool_calls(response_dict, tool_map, model, provider, messages, stream=False):
903
+ def process_tool_calls(response_dict, tool_map, model, provider, messages, stream=False, tools=None):
774
904
  result = response_dict.copy()
775
905
  result["tool_results"] = []
776
-
906
+
777
907
  if "messages" not in result:
778
908
  result["messages"] = messages if messages else []
779
-
909
+
780
910
  tool_calls = result.get("tool_calls", [])
781
-
911
+
782
912
  if not tool_calls:
783
913
  return result
784
-
914
+
915
+ # First, add the assistant message with tool_calls (required by Gemini and other providers)
916
+ # This must come BEFORE the tool results
917
+ tool_calls_for_message = []
918
+ for tc in tool_calls:
919
+ if isinstance(tc, dict):
920
+ tool_calls_for_message.append(tc)
921
+ else:
922
+ # Convert object to dict format
923
+ tool_calls_for_message.append({
924
+ "id": getattr(tc, "id", str(uuid.uuid4())),
925
+ "type": "function",
926
+ "function": {
927
+ "name": getattr(tc.function, "name", "") if hasattr(tc, "function") else "",
928
+ "arguments": getattr(tc.function, "arguments", "{}") if hasattr(tc, "function") else "{}"
929
+ }
930
+ })
931
+
932
+ result["messages"].append({
933
+ "role": "assistant",
934
+ "content": None,
935
+ "tool_calls": tool_calls_for_message
936
+ })
937
+
785
938
  for tool_call in tool_calls:
786
939
  tool_id = str(uuid.uuid4())
787
940
  tool_name = None
@@ -812,10 +965,54 @@ def process_tool_calls(response_dict, tool_map, model, provider, messages, strea
812
965
  tool_result_str = ""
813
966
  serializable_result = None
814
967
 
968
+ # Show tool execution indicator with truncated args
969
+ # Store full args for Ctrl+O expansion
970
+ _last_tool_call = {"name": tool_name, "arguments": arguments}
971
+ try:
972
+ import builtins
973
+ builtins._npcsh_last_tool_call = _last_tool_call
974
+ except:
975
+ pass
976
+
977
+ try:
978
+ from termcolor import colored
979
+ # Format arguments nicely - show key=value pairs
980
+ is_truncated = False
981
+ if arguments:
982
+ arg_parts = []
983
+ for k, v in arguments.items():
984
+ v_str = str(v)
985
+ if len(v_str) > 40:
986
+ v_str = v_str[:40] + "…"
987
+ is_truncated = True
988
+ arg_parts.append(f"{v_str}")
989
+ args_display = " ".join(arg_parts)
990
+ if len(args_display) > 60:
991
+ args_display = args_display[:60] + "…"
992
+ is_truncated = True
993
+ else:
994
+ args_display = ""
995
+
996
+ if args_display:
997
+ hint = colored(" [^O]", "white", attrs=["dark"]) if is_truncated else ""
998
+ print(colored(f" ⚡ {tool_name}", "cyan") + colored(f" {args_display}", "white", attrs=["dark"]) + hint, end="", flush=True)
999
+ else:
1000
+ print(colored(f" ⚡ {tool_name}", "cyan"), end="", flush=True)
1001
+ except:
1002
+ pass
1003
+
815
1004
  try:
816
1005
  tool_result = tool_map[tool_name](**arguments)
1006
+ try:
1007
+ print(colored(" ✓", "green"), flush=True)
1008
+ except:
1009
+ pass
817
1010
  except Exception as e:
818
- tool_result = f"Error executing tool '{tool_name}': {str(e)}. Tool map is : {tool_map}"
1011
+ tool_result = f"Error executing tool '{tool_name}': {str(e)}"
1012
+ try:
1013
+ print(colored(f" ✗ {str(e)[:50]}", "red"), flush=True)
1014
+ except:
1015
+ pass
819
1016
 
820
1017
  try:
821
1018
  tool_result_str = json.dumps(tool_result, default=str)
@@ -826,18 +1023,19 @@ def process_tool_calls(response_dict, tool_map, model, provider, messages, strea
826
1023
  except Exception as e_serialize:
827
1024
  tool_result_str = f"Error serializing result for {tool_name}: {str(e_serialize)}"
828
1025
  serializable_result = {"error": tool_result_str}
829
-
1026
+
830
1027
  result["tool_results"].append({
831
1028
  "tool_call_id": tool_id,
832
1029
  "tool_name": tool_name,
833
1030
  "arguments": arguments,
834
1031
  "result": serializable_result
835
1032
  })
836
-
837
-
1033
+
1034
+ # Add tool result as a tool message (proper format for multi-turn)
838
1035
  result["messages"].append({
839
- "role": "assistant",
840
- "content": f'The results of the tool call for {tool_name} with {arguments} are as follows:' +tool_result_str
1036
+ "role": "tool",
1037
+ "tool_call_id": tool_id,
1038
+ "content": tool_result_str
841
1039
  })
842
1040
 
843
1041
  return result