langroid 0.53.14__tar.gz → 0.53.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. {langroid-0.53.14 → langroid-0.53.15}/PKG-INFO +1 -1
  2. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/table_chat_agent.py +19 -5
  3. langroid-0.53.15/langroid/utils/pandas_utils.py +310 -0
  4. {langroid-0.53.14 → langroid-0.53.15}/langroid/vector_store/base.py +7 -6
  5. {langroid-0.53.14 → langroid-0.53.15}/pyproject.toml +1 -1
  6. langroid-0.53.14/langroid/utils/pandas_utils.py +0 -30
  7. {langroid-0.53.14 → langroid-0.53.15}/.gitignore +0 -0
  8. {langroid-0.53.14 → langroid-0.53.15}/LICENSE +0 -0
  9. {langroid-0.53.14 → langroid-0.53.15}/README.md +0 -0
  10. {langroid-0.53.14 → langroid-0.53.15}/langroid/__init__.py +0 -0
  11. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/__init__.py +0 -0
  12. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/base.py +0 -0
  13. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/batch.py +0 -0
  14. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/callbacks/__init__.py +0 -0
  15. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/callbacks/chainlit.py +0 -0
  16. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/chat_agent.py +0 -0
  17. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/chat_document.py +0 -0
  18. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/openai_assistant.py +0 -0
  19. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/__init__.py +0 -0
  20. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/arangodb/__init__.py +0 -0
  21. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
  22. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/arangodb/system_messages.py +0 -0
  23. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/arangodb/tools.py +0 -0
  24. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/arangodb/utils.py +0 -0
  25. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/doc_chat_agent.py +0 -0
  26. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/doc_chat_task.py +0 -0
  27. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  28. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/lance_rag/__init__.py +0 -0
  29. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  30. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  31. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  32. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/lance_tools.py +0 -0
  33. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/neo4j/__init__.py +0 -0
  34. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  35. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  36. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/neo4j/system_messages.py +0 -0
  37. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/neo4j/tools.py +0 -0
  38. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  39. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/retriever_agent.py +0 -0
  40. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/sql/__init__.py +0 -0
  41. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  42. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/sql/utils/__init__.py +0 -0
  43. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  44. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  45. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/sql/utils/system_message.py +0 -0
  46. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/special/sql/utils/tools.py +0 -0
  47. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/task.py +0 -0
  48. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tool_message.py +0 -0
  49. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/__init__.py +0 -0
  50. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  51. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/exa_search_tool.py +0 -0
  52. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/file_tools.py +0 -0
  53. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/google_search_tool.py +0 -0
  54. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/mcp/__init__.py +0 -0
  55. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/mcp/decorators.py +0 -0
  56. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/mcp/fastmcp_client.py +0 -0
  57. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  58. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/orchestration.py +0 -0
  59. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/recipient_tool.py +0 -0
  60. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/retrieval_tool.py +0 -0
  61. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/rewind_tool.py +0 -0
  62. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/segment_extract_tool.py +0 -0
  63. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/tools/tavily_search_tool.py +0 -0
  64. {langroid-0.53.14 → langroid-0.53.15}/langroid/agent/xml_tool_message.py +0 -0
  65. {langroid-0.53.14 → langroid-0.53.15}/langroid/cachedb/__init__.py +0 -0
  66. {langroid-0.53.14 → langroid-0.53.15}/langroid/cachedb/base.py +0 -0
  67. {langroid-0.53.14 → langroid-0.53.15}/langroid/cachedb/redis_cachedb.py +0 -0
  68. {langroid-0.53.14 → langroid-0.53.15}/langroid/embedding_models/__init__.py +0 -0
  69. {langroid-0.53.14 → langroid-0.53.15}/langroid/embedding_models/base.py +0 -0
  70. {langroid-0.53.14 → langroid-0.53.15}/langroid/embedding_models/models.py +0 -0
  71. {langroid-0.53.14 → langroid-0.53.15}/langroid/embedding_models/protoc/__init__.py +0 -0
  72. {langroid-0.53.14 → langroid-0.53.15}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  73. {langroid-0.53.14 → langroid-0.53.15}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  74. {langroid-0.53.14 → langroid-0.53.15}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  75. {langroid-0.53.14 → langroid-0.53.15}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  76. {langroid-0.53.14 → langroid-0.53.15}/langroid/embedding_models/remote_embeds.py +0 -0
  77. {langroid-0.53.14 → langroid-0.53.15}/langroid/exceptions.py +0 -0
  78. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/__init__.py +0 -0
  79. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/azure_openai.py +0 -0
  80. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/base.py +0 -0
  81. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/config.py +0 -0
  82. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/mcp_client_lm.py +0 -0
  83. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/mock_lm.py +0 -0
  84. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/model_info.py +0 -0
  85. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/openai_gpt.py +0 -0
  86. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  87. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/prompt_formatter/base.py +0 -0
  88. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  89. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  90. {langroid-0.53.14 → langroid-0.53.15}/langroid/language_models/utils.py +0 -0
  91. {langroid-0.53.14 → langroid-0.53.15}/langroid/mytypes.py +0 -0
  92. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/__init__.py +0 -0
  93. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/agent_chats.py +0 -0
  94. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/code_parser.py +0 -0
  95. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/document_parser.py +0 -0
  96. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/file_attachment.py +0 -0
  97. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/md_parser.py +0 -0
  98. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/para_sentence_split.py +0 -0
  99. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/parse_json.py +0 -0
  100. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/parser.py +0 -0
  101. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/pdf_utils.py +0 -0
  102. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/repo_loader.py +0 -0
  103. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/routing.py +0 -0
  104. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/search.py +0 -0
  105. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/spider.py +0 -0
  106. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/table_loader.py +0 -0
  107. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/url_loader.py +0 -0
  108. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/urls.py +0 -0
  109. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/utils.py +0 -0
  110. {langroid-0.53.14 → langroid-0.53.15}/langroid/parsing/web_search.py +0 -0
  111. {langroid-0.53.14 → langroid-0.53.15}/langroid/prompts/__init__.py +0 -0
  112. {langroid-0.53.14 → langroid-0.53.15}/langroid/prompts/dialog.py +0 -0
  113. {langroid-0.53.14 → langroid-0.53.15}/langroid/prompts/prompts_config.py +0 -0
  114. {langroid-0.53.14 → langroid-0.53.15}/langroid/prompts/templates.py +0 -0
  115. {langroid-0.53.14 → langroid-0.53.15}/langroid/py.typed +0 -0
  116. {langroid-0.53.14 → langroid-0.53.15}/langroid/pydantic_v1/__init__.py +0 -0
  117. {langroid-0.53.14 → langroid-0.53.15}/langroid/pydantic_v1/main.py +0 -0
  118. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/__init__.py +0 -0
  119. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/algorithms/__init__.py +0 -0
  120. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/algorithms/graph.py +0 -0
  121. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/configuration.py +0 -0
  122. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/constants.py +0 -0
  123. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/git_utils.py +0 -0
  124. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/globals.py +0 -0
  125. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/logging.py +0 -0
  126. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/object_registry.py +0 -0
  127. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/output/__init__.py +0 -0
  128. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/output/citations.py +0 -0
  129. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/output/printing.py +0 -0
  130. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/output/status.py +0 -0
  131. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/pydantic_utils.py +0 -0
  132. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/system.py +0 -0
  133. {langroid-0.53.14 → langroid-0.53.15}/langroid/utils/types.py +0 -0
  134. {langroid-0.53.14 → langroid-0.53.15}/langroid/vector_store/__init__.py +0 -0
  135. {langroid-0.53.14 → langroid-0.53.15}/langroid/vector_store/chromadb.py +0 -0
  136. {langroid-0.53.14 → langroid-0.53.15}/langroid/vector_store/lancedb.py +0 -0
  137. {langroid-0.53.14 → langroid-0.53.15}/langroid/vector_store/meilisearch.py +0 -0
  138. {langroid-0.53.14 → langroid-0.53.15}/langroid/vector_store/pineconedb.py +0 -0
  139. {langroid-0.53.14 → langroid-0.53.15}/langroid/vector_store/postgres.py +0 -0
  140. {langroid-0.53.14 → langroid-0.53.15}/langroid/vector_store/qdrantdb.py +0 -0
  141. {langroid-0.53.14 → langroid-0.53.15}/langroid/vector_store/weaviatedb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.53.14
3
+ Version: 0.53.15
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -7,6 +7,13 @@ expression (involving a dataframe `df`) to answer the query.
7
7
  The expression is passed via the `pandas_eval` tool/function-call,
8
8
  which is handled by the Agent's `pandas_eval` method. This method evaluates
9
9
  the expression and returns the result as a string.
10
+
11
+ WARNING: This Agent should be used only with trusted input, as it can execute system
12
+ commands.
13
+
14
+ The `full_eval` flag is false by default, which means that the input is sanitized
15
+ against most common code injection attack vectors. `full_eval` may be set to True to
16
+ disable sanitization at all. Both cases should be used with caution.
10
17
  """
11
18
 
12
19
  import io
@@ -26,6 +33,7 @@ from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig
26
33
  from langroid.parsing.table_loader import read_tabular_data
27
34
  from langroid.prompts.prompts_config import PromptsConfig
28
35
  from langroid.utils.constants import DONE, PASS
36
+ from langroid.utils.pandas_utils import sanitize_command
29
37
  from langroid.vector_store.base import VectorStoreConfig
30
38
 
31
39
  logger = logging.getLogger(__name__)
@@ -113,6 +121,9 @@ class TableChatAgentConfig(ChatAgentConfig):
113
121
  cache: bool = True # cache results
114
122
  debug: bool = False
115
123
  stream: bool = True # allow streaming where needed
124
+ full_eval: bool = (
125
+ False # runs eval without sanitization. Use only on trusted input!
126
+ )
116
127
  data: str | pd.DataFrame # data file, URL, or DataFrame
117
128
  separator: None | str = None # separator for data file
118
129
  vecdb: None | VectorStoreConfig = None
@@ -204,7 +215,7 @@ class TableChatAgent(ChatAgent):
204
215
  """
205
216
  self.sent_expression = True
206
217
  exprn = msg.expression
207
- local_vars = {"df": self.df}
218
+ vars = {"df": self.df}
208
219
  # Create a string-based I/O stream
209
220
  code_out = io.StringIO()
210
221
 
@@ -212,10 +223,13 @@ class TableChatAgent(ChatAgent):
212
223
  sys.stdout = code_out
213
224
 
214
225
  # Evaluate the last line and get the result;
215
- # SECURITY: eval only with empty globals and {"df": df} in locals to
216
- # prevent arbitrary Python code execution.
226
+ # SECURITY MITIGATION: Eval input is sanitized by default to prevent most
227
+ # common code injection attack vectors.
217
228
  try:
218
- eval_result = eval(exprn, {}, local_vars)
229
+ if not self.config.full_eval:
230
+ exprn = sanitize_command(exprn)
231
+ code = compile(exprn, "<calc>", "eval")
232
+ eval_result = eval(code, vars, {})
219
233
  except Exception as e:
220
234
  eval_result = f"ERROR: {type(e)}: {e}"
221
235
 
@@ -226,7 +240,7 @@ class TableChatAgent(ChatAgent):
226
240
  sys.stdout = sys.__stdout__
227
241
 
228
242
  # If df has been modified in-place, save the changes back to self.df
229
- self.df = local_vars["df"]
243
+ self.df = vars["df"]
230
244
 
231
245
  # Get the resulting string from the I/O stream
232
246
  print_result = code_out.getvalue() or ""
@@ -0,0 +1,310 @@
1
+ import ast
2
+ from typing import Any
3
+
4
+ import pandas as pd
5
+
6
+ COMMON_USE_DF_METHODS = {
7
+ "T",
8
+ "abs",
9
+ "add",
10
+ "add_prefix",
11
+ "add_suffix",
12
+ "agg",
13
+ "aggregate",
14
+ "align",
15
+ "all",
16
+ "any",
17
+ "apply",
18
+ "applymap",
19
+ "at",
20
+ "at_time",
21
+ "between_time",
22
+ "bfill",
23
+ "clip",
24
+ "combine",
25
+ "combine_first",
26
+ "convert_dtypes",
27
+ "corr",
28
+ "corrwith",
29
+ "count",
30
+ "cov",
31
+ "cummax",
32
+ "cummin",
33
+ "cumprod",
34
+ "cumsum",
35
+ "describe",
36
+ "diff",
37
+ "dot",
38
+ "drop_duplicates",
39
+ "duplicated",
40
+ "eq",
41
+ "eval",
42
+ "ewm",
43
+ "expanding",
44
+ "explode",
45
+ "filter",
46
+ "first",
47
+ "groupby",
48
+ "head",
49
+ "idxmax",
50
+ "idxmin",
51
+ "infer_objects",
52
+ "interpolate",
53
+ "isin",
54
+ "kurt",
55
+ "kurtosis",
56
+ "last",
57
+ "le",
58
+ "loc",
59
+ "lt",
60
+ "gt",
61
+ "ge",
62
+ "iloc",
63
+ "mask",
64
+ "max",
65
+ "mean",
66
+ "median",
67
+ "melt",
68
+ "min",
69
+ "mode",
70
+ "mul",
71
+ "nlargest",
72
+ "nsmallest",
73
+ "notna",
74
+ "notnull",
75
+ "nunique",
76
+ "pct_change",
77
+ "pipe",
78
+ "pivot",
79
+ "pivot_table",
80
+ "prod",
81
+ "product",
82
+ "quantile",
83
+ "query",
84
+ "rank",
85
+ "replace",
86
+ "resample",
87
+ "rolling",
88
+ "round",
89
+ "sample",
90
+ "select_dtypes",
91
+ "sem",
92
+ "shift",
93
+ "skew",
94
+ "sort_index",
95
+ "sort_values",
96
+ "squeeze",
97
+ "stack",
98
+ "std",
99
+ "sum",
100
+ "tail",
101
+ "transform",
102
+ "transpose",
103
+ "unstack",
104
+ "value_counts",
105
+ "var",
106
+ "where",
107
+ "xs",
108
+ }
109
+
110
+ POTENTIALLY_DANGEROUS_DF_METHODS = {
111
+ "eval",
112
+ "query",
113
+ "apply",
114
+ "applymap",
115
+ "pipe",
116
+ "agg",
117
+ "aggregate",
118
+ "transform",
119
+ "rolling",
120
+ "expanding",
121
+ "resample",
122
+ }
123
+
124
+ WHITELISTED_DF_METHODS = COMMON_USE_DF_METHODS - POTENTIALLY_DANGEROUS_DF_METHODS
125
+
126
+
127
+ BLOCKED_KW = {
128
+ "engine",
129
+ "parser",
130
+ "inplace",
131
+ "regex",
132
+ "dtype",
133
+ "converters",
134
+ "eval",
135
+ }
136
+ MAX_CHAIN = 6
137
+ MAX_DEPTH = 25
138
+ NUMERIC_LIMIT = 1_000_000_000
139
+
140
+
141
+ class UnsafeCommandError(ValueError):
142
+ """Raised when a command string violates security policy."""
143
+
144
+ pass
145
+
146
+
147
+ def _literal_ok(node: ast.AST) -> bool:
148
+ """Return True if *node* is a safe literal (and within numeric limit)."""
149
+ if isinstance(node, ast.Constant):
150
+ if (
151
+ isinstance(node.value, (int, float, complex))
152
+ and abs(node.value) > NUMERIC_LIMIT
153
+ ):
154
+ raise UnsafeCommandError("numeric constant exceeds limit")
155
+ return True
156
+ if isinstance(node, (ast.Tuple, ast.List)):
157
+ return all(_literal_ok(elt) for elt in node.elts)
158
+ if isinstance(node, ast.Slice):
159
+ return all(
160
+ sub is None or _literal_ok(sub)
161
+ for sub in (node.lower, node.upper, node.step)
162
+ )
163
+ return False
164
+
165
+
166
+ class CommandValidator(ast.NodeVisitor):
167
+ """AST walker that enforces the security policy."""
168
+
169
+ # Comparison operators we allow
170
+ ALLOWED_CMPOP = (ast.Gt, ast.GtE, ast.Lt, ast.LtE, ast.Eq, ast.NotEq)
171
+
172
+ # Arithmetic operators we allow (power ** intentionally omitted)
173
+ ALLOWED_BINOP = (ast.Add, ast.Sub, ast.Mult, ast.Div, ast.FloorDiv, ast.Mod)
174
+ ALLOWED_UNARY = (ast.UAdd, ast.USub)
175
+
176
+ # Node whitelist
177
+ ALLOWED_NODES = (
178
+ ast.Expression,
179
+ ast.Attribute,
180
+ ast.Name,
181
+ ast.Load,
182
+ ast.Call,
183
+ ast.Subscript,
184
+ ast.Constant,
185
+ ast.Tuple,
186
+ ast.List,
187
+ ast.Slice,
188
+ ast.keyword,
189
+ ast.BinOp,
190
+ ast.UnaryOp,
191
+ ast.Compare,
192
+ *ALLOWED_BINOP,
193
+ *ALLOWED_UNARY,
194
+ *ALLOWED_CMPOP,
195
+ )
196
+
197
+ def __init__(self, df_name: str = "df"):
198
+ self.df_name = df_name
199
+ self.depth = 0
200
+ self.chain = 0
201
+
202
+ # Depth guard
203
+ def generic_visit(self, node: ast.AST) -> None:
204
+ self.depth += 1
205
+ if self.depth > MAX_DEPTH:
206
+ raise UnsafeCommandError("AST nesting too deep")
207
+ super().generic_visit(node)
208
+ self.depth -= 1
209
+
210
+ # Literal validation
211
+ def visit_Constant(self, node: ast.Constant) -> None:
212
+ _literal_ok(node)
213
+
214
+ # Arithmetic
215
+ def visit_BinOp(self, node: ast.BinOp) -> None:
216
+ if not isinstance(node.op, self.ALLOWED_BINOP):
217
+ raise UnsafeCommandError("operator not allowed")
218
+ self.generic_visit(node)
219
+
220
+ def visit_UnaryOp(self, node: ast.UnaryOp) -> None:
221
+ if not isinstance(node.op, self.ALLOWED_UNARY):
222
+ raise UnsafeCommandError("unary operator not allowed")
223
+ self.generic_visit(node)
224
+
225
+ # Comparisons
226
+ def visit_Compare(self, node: ast.Compare) -> None:
227
+ if not all(isinstance(op, self.ALLOWED_CMPOP) for op in node.ops):
228
+ raise UnsafeCommandError("comparison operator not allowed")
229
+ for comp in node.comparators:
230
+ _literal_ok(comp)
231
+ self.generic_visit(node)
232
+
233
+ # Subscripts
234
+ def visit_Subscript(self, node: ast.Subscript) -> None:
235
+ if not _literal_ok(node.slice):
236
+ raise UnsafeCommandError("subscript must be literal")
237
+ self.generic_visit(node)
238
+
239
+ # Method calls
240
+ def visit_Call(self, node: ast.Call) -> None:
241
+ if not isinstance(node.func, ast.Attribute):
242
+ raise UnsafeCommandError("only DataFrame method calls allowed")
243
+
244
+ method = node.func.attr
245
+ self.chain += 1
246
+ if self.chain > MAX_CHAIN:
247
+ raise UnsafeCommandError("method-chain too long")
248
+ if method not in WHITELISTED_DF_METHODS:
249
+ raise UnsafeCommandError(f"method '{method}' not permitted")
250
+
251
+ # kwarg / arg checks
252
+ for kw in node.keywords:
253
+ if kw.arg in BLOCKED_KW:
254
+ raise UnsafeCommandError(f"kwarg '{kw.arg}' is blocked")
255
+ _literal_ok(kw.value)
256
+ for arg in node.args:
257
+ _literal_ok(arg)
258
+
259
+ try:
260
+ self.generic_visit(node)
261
+ finally:
262
+ self.chain -= 1
263
+
264
+ # Names
265
+ def visit_Name(self, node: ast.Name) -> None:
266
+ if node.id != self.df_name:
267
+ raise UnsafeCommandError(f"unexpected variable '{node.id}'")
268
+
269
+ # Top-level gate
270
+ def visit(self, node: ast.AST) -> None:
271
+ if not isinstance(node, self.ALLOWED_NODES):
272
+ raise UnsafeCommandError(f"disallowed node {type(node).__name__}")
273
+ super().visit(node)
274
+
275
+
276
+ def sanitize_command(expr: str, df_name: str = "df") -> str:
277
+ """
278
+ Validate *expr*; return it unchanged if it passes all rules,
279
+ else raise UnsafeCommandError with the first violation encountered.
280
+ """
281
+ tree = ast.parse(expr, mode="eval")
282
+ CommandValidator(df_name).visit(tree)
283
+ return expr
284
+
285
+
286
+ def stringify(x: Any) -> str:
287
+ # Convert x to DataFrame if it is not one already
288
+ if isinstance(x, pd.Series):
289
+ df = x.to_frame()
290
+ elif not isinstance(x, pd.DataFrame):
291
+ return str(x)
292
+ else:
293
+ df = x
294
+
295
+ # Truncate long text columns to 1000 characters
296
+ for col in df.columns:
297
+ if df[col].dtype == object:
298
+ df[col] = df[col].apply(
299
+ lambda item: (
300
+ (item[:1000] + "...")
301
+ if isinstance(item, str) and len(item) > 1000
302
+ else item
303
+ )
304
+ )
305
+
306
+ # Limit to 10 rows
307
+ df = df.head(10)
308
+
309
+ # Convert to string
310
+ return df.to_string(index=False) # type: ignore
@@ -14,7 +14,7 @@ from langroid.utils.algorithms.graph import components, topological_sort
14
14
  from langroid.utils.configuration import settings
15
15
  from langroid.utils.object_registry import ObjectRegistry
16
16
  from langroid.utils.output.printing import print_long_text
17
- from langroid.utils.pandas_utils import stringify
17
+ from langroid.utils.pandas_utils import sanitize_command, stringify
18
18
  from langroid.utils.pydantic_utils import flatten_dict
19
19
 
20
20
  logger = logging.getLogger(__name__)
@@ -159,11 +159,12 @@ class VectorStore(ABC):
159
159
  df = pd.DataFrame(dicts)
160
160
 
161
161
  try:
162
- # SECURITY: Use Python's eval() with NO globals and only {"df": df}
163
- # in locals. This allows pandas operations on `df` while preventing
164
- # access to builtins or other potentially harmful global functions,
165
- # mitigating risks associated with executing untrusted `calc` strings.
166
- result = eval(calc, {}, {"df": df}) # type: ignore
162
+ # SECURITY MITIGATION: Eval input is sanitized to prevent most common
163
+ # code injection attack vectors.
164
+ vars = {"df": df}
165
+ calc = sanitize_command(calc)
166
+ code = compile(calc, "<calc>", "eval")
167
+ result = eval(code, vars, {})
167
168
  except Exception as e:
168
169
  # return error message so LLM can fix the calc string if needed
169
170
  err = f"""
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "langroid"
3
- version = "0.53.14"
3
+ version = "0.53.15"
4
4
  authors = [
5
5
  {name = "Prasad Chalasani", email = "pchalasani@gmail.com"},
6
6
  ]
@@ -1,30 +0,0 @@
1
- from typing import Any
2
-
3
- import pandas as pd
4
-
5
-
6
- def stringify(x: Any) -> str:
7
- # Convert x to DataFrame if it is not one already
8
- if isinstance(x, pd.Series):
9
- df = x.to_frame()
10
- elif not isinstance(x, pd.DataFrame):
11
- return str(x)
12
- else:
13
- df = x
14
-
15
- # Truncate long text columns to 1000 characters
16
- for col in df.columns:
17
- if df[col].dtype == object:
18
- df[col] = df[col].apply(
19
- lambda item: (
20
- (item[:1000] + "...")
21
- if isinstance(item, str) and len(item) > 1000
22
- else item
23
- )
24
- )
25
-
26
- # Limit to 10 rows
27
- df = df.head(10)
28
-
29
- # Convert to string
30
- return df.to_string(index=False) # type: ignore
File without changes
File without changes
File without changes
File without changes