langroid 0.53.14__py3-none-any.whl → 0.53.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/special/table_chat_agent.py +19 -5
- langroid/utils/pandas_utils.py +280 -0
- langroid/vector_store/base.py +7 -6
- {langroid-0.53.14.dist-info → langroid-0.53.15.dist-info}/METADATA +1 -1
- {langroid-0.53.14.dist-info → langroid-0.53.15.dist-info}/RECORD +7 -7
- {langroid-0.53.14.dist-info → langroid-0.53.15.dist-info}/WHEEL +0 -0
- {langroid-0.53.14.dist-info → langroid-0.53.15.dist-info}/licenses/LICENSE +0 -0
@@ -7,6 +7,13 @@ expression (involving a dataframe `df`) to answer the query.
|
|
7
7
|
The expression is passed via the `pandas_eval` tool/function-call,
|
8
8
|
which is handled by the Agent's `pandas_eval` method. This method evaluates
|
9
9
|
the expression and returns the result as a string.
|
10
|
+
|
11
|
+
WARNING: This Agent should be used only with trusted input, as it can execute system
|
12
|
+
commands.
|
13
|
+
|
14
|
+
The `full_eval` flag is false by default, which means that the input is sanitized
|
15
|
+
against most common code injection attack vectors. `full_eval` may be set to True to
|
16
|
+
disable sanitization at all. Both cases should be used with caution.
|
10
17
|
"""
|
11
18
|
|
12
19
|
import io
|
@@ -26,6 +33,7 @@ from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig
|
|
26
33
|
from langroid.parsing.table_loader import read_tabular_data
|
27
34
|
from langroid.prompts.prompts_config import PromptsConfig
|
28
35
|
from langroid.utils.constants import DONE, PASS
|
36
|
+
from langroid.utils.pandas_utils import sanitize_command
|
29
37
|
from langroid.vector_store.base import VectorStoreConfig
|
30
38
|
|
31
39
|
logger = logging.getLogger(__name__)
|
@@ -113,6 +121,9 @@ class TableChatAgentConfig(ChatAgentConfig):
|
|
113
121
|
cache: bool = True # cache results
|
114
122
|
debug: bool = False
|
115
123
|
stream: bool = True # allow streaming where needed
|
124
|
+
full_eval: bool = (
|
125
|
+
False # runs eval without sanitization. Use only on trusted input!
|
126
|
+
)
|
116
127
|
data: str | pd.DataFrame # data file, URL, or DataFrame
|
117
128
|
separator: None | str = None # separator for data file
|
118
129
|
vecdb: None | VectorStoreConfig = None
|
@@ -204,7 +215,7 @@ class TableChatAgent(ChatAgent):
|
|
204
215
|
"""
|
205
216
|
self.sent_expression = True
|
206
217
|
exprn = msg.expression
|
207
|
-
|
218
|
+
vars = {"df": self.df}
|
208
219
|
# Create a string-based I/O stream
|
209
220
|
code_out = io.StringIO()
|
210
221
|
|
@@ -212,10 +223,13 @@ class TableChatAgent(ChatAgent):
|
|
212
223
|
sys.stdout = code_out
|
213
224
|
|
214
225
|
# Evaluate the last line and get the result;
|
215
|
-
# SECURITY:
|
216
|
-
#
|
226
|
+
# SECURITY MITIGATION: Eval input is sanitized by default to prevent most
|
227
|
+
# common code injection attack vectors.
|
217
228
|
try:
|
218
|
-
|
229
|
+
if not self.config.full_eval:
|
230
|
+
exprn = sanitize_command(exprn)
|
231
|
+
code = compile(exprn, "<calc>", "eval")
|
232
|
+
eval_result = eval(code, vars, {})
|
219
233
|
except Exception as e:
|
220
234
|
eval_result = f"ERROR: {type(e)}: {e}"
|
221
235
|
|
@@ -226,7 +240,7 @@ class TableChatAgent(ChatAgent):
|
|
226
240
|
sys.stdout = sys.__stdout__
|
227
241
|
|
228
242
|
# If df has been modified in-place, save the changes back to self.df
|
229
|
-
self.df =
|
243
|
+
self.df = vars["df"]
|
230
244
|
|
231
245
|
# Get the resulting string from the I/O stream
|
232
246
|
print_result = code_out.getvalue() or ""
|
langroid/utils/pandas_utils.py
CHANGED
@@ -1,7 +1,287 @@
|
|
1
|
+
import ast
|
1
2
|
from typing import Any
|
2
3
|
|
3
4
|
import pandas as pd
|
4
5
|
|
6
|
+
COMMON_USE_DF_METHODS = {
|
7
|
+
"T",
|
8
|
+
"abs",
|
9
|
+
"add",
|
10
|
+
"add_prefix",
|
11
|
+
"add_suffix",
|
12
|
+
"agg",
|
13
|
+
"aggregate",
|
14
|
+
"align",
|
15
|
+
"all",
|
16
|
+
"any",
|
17
|
+
"apply",
|
18
|
+
"applymap",
|
19
|
+
"at",
|
20
|
+
"at_time",
|
21
|
+
"between_time",
|
22
|
+
"bfill",
|
23
|
+
"clip",
|
24
|
+
"combine",
|
25
|
+
"combine_first",
|
26
|
+
"convert_dtypes",
|
27
|
+
"corr",
|
28
|
+
"corrwith",
|
29
|
+
"count",
|
30
|
+
"cov",
|
31
|
+
"cummax",
|
32
|
+
"cummin",
|
33
|
+
"cumprod",
|
34
|
+
"cumsum",
|
35
|
+
"describe",
|
36
|
+
"diff",
|
37
|
+
"dot",
|
38
|
+
"drop_duplicates",
|
39
|
+
"duplicated",
|
40
|
+
"eq",
|
41
|
+
"eval",
|
42
|
+
"ewm",
|
43
|
+
"expanding",
|
44
|
+
"explode",
|
45
|
+
"filter",
|
46
|
+
"first",
|
47
|
+
"groupby",
|
48
|
+
"head",
|
49
|
+
"idxmax",
|
50
|
+
"idxmin",
|
51
|
+
"infer_objects",
|
52
|
+
"interpolate",
|
53
|
+
"isin",
|
54
|
+
"kurt",
|
55
|
+
"kurtosis",
|
56
|
+
"last",
|
57
|
+
"le",
|
58
|
+
"loc",
|
59
|
+
"lt",
|
60
|
+
"gt",
|
61
|
+
"ge",
|
62
|
+
"iloc",
|
63
|
+
"mask",
|
64
|
+
"max",
|
65
|
+
"mean",
|
66
|
+
"median",
|
67
|
+
"melt",
|
68
|
+
"min",
|
69
|
+
"mode",
|
70
|
+
"mul",
|
71
|
+
"nlargest",
|
72
|
+
"nsmallest",
|
73
|
+
"notna",
|
74
|
+
"notnull",
|
75
|
+
"nunique",
|
76
|
+
"pct_change",
|
77
|
+
"pipe",
|
78
|
+
"pivot",
|
79
|
+
"pivot_table",
|
80
|
+
"prod",
|
81
|
+
"product",
|
82
|
+
"quantile",
|
83
|
+
"query",
|
84
|
+
"rank",
|
85
|
+
"replace",
|
86
|
+
"resample",
|
87
|
+
"rolling",
|
88
|
+
"round",
|
89
|
+
"sample",
|
90
|
+
"select_dtypes",
|
91
|
+
"sem",
|
92
|
+
"shift",
|
93
|
+
"skew",
|
94
|
+
"sort_index",
|
95
|
+
"sort_values",
|
96
|
+
"squeeze",
|
97
|
+
"stack",
|
98
|
+
"std",
|
99
|
+
"sum",
|
100
|
+
"tail",
|
101
|
+
"transform",
|
102
|
+
"transpose",
|
103
|
+
"unstack",
|
104
|
+
"value_counts",
|
105
|
+
"var",
|
106
|
+
"where",
|
107
|
+
"xs",
|
108
|
+
}
|
109
|
+
|
110
|
+
POTENTIALLY_DANGEROUS_DF_METHODS = {
|
111
|
+
"eval",
|
112
|
+
"query",
|
113
|
+
"apply",
|
114
|
+
"applymap",
|
115
|
+
"pipe",
|
116
|
+
"agg",
|
117
|
+
"aggregate",
|
118
|
+
"transform",
|
119
|
+
"rolling",
|
120
|
+
"expanding",
|
121
|
+
"resample",
|
122
|
+
}
|
123
|
+
|
124
|
+
WHITELISTED_DF_METHODS = COMMON_USE_DF_METHODS - POTENTIALLY_DANGEROUS_DF_METHODS
|
125
|
+
|
126
|
+
|
127
|
+
BLOCKED_KW = {
|
128
|
+
"engine",
|
129
|
+
"parser",
|
130
|
+
"inplace",
|
131
|
+
"regex",
|
132
|
+
"dtype",
|
133
|
+
"converters",
|
134
|
+
"eval",
|
135
|
+
}
|
136
|
+
MAX_CHAIN = 6
|
137
|
+
MAX_DEPTH = 25
|
138
|
+
NUMERIC_LIMIT = 1_000_000_000
|
139
|
+
|
140
|
+
|
141
|
+
class UnsafeCommandError(ValueError):
|
142
|
+
"""Raised when a command string violates security policy."""
|
143
|
+
|
144
|
+
pass
|
145
|
+
|
146
|
+
|
147
|
+
def _literal_ok(node: ast.AST) -> bool:
|
148
|
+
"""Return True if *node* is a safe literal (and within numeric limit)."""
|
149
|
+
if isinstance(node, ast.Constant):
|
150
|
+
if (
|
151
|
+
isinstance(node.value, (int, float, complex))
|
152
|
+
and abs(node.value) > NUMERIC_LIMIT
|
153
|
+
):
|
154
|
+
raise UnsafeCommandError("numeric constant exceeds limit")
|
155
|
+
return True
|
156
|
+
if isinstance(node, (ast.Tuple, ast.List)):
|
157
|
+
return all(_literal_ok(elt) for elt in node.elts)
|
158
|
+
if isinstance(node, ast.Slice):
|
159
|
+
return all(
|
160
|
+
sub is None or _literal_ok(sub)
|
161
|
+
for sub in (node.lower, node.upper, node.step)
|
162
|
+
)
|
163
|
+
return False
|
164
|
+
|
165
|
+
|
166
|
+
class CommandValidator(ast.NodeVisitor):
|
167
|
+
"""AST walker that enforces the security policy."""
|
168
|
+
|
169
|
+
# Comparison operators we allow
|
170
|
+
ALLOWED_CMPOP = (ast.Gt, ast.GtE, ast.Lt, ast.LtE, ast.Eq, ast.NotEq)
|
171
|
+
|
172
|
+
# Arithmetic operators we allow (power ** intentionally omitted)
|
173
|
+
ALLOWED_BINOP = (ast.Add, ast.Sub, ast.Mult, ast.Div, ast.FloorDiv, ast.Mod)
|
174
|
+
ALLOWED_UNARY = (ast.UAdd, ast.USub)
|
175
|
+
|
176
|
+
# Node whitelist
|
177
|
+
ALLOWED_NODES = (
|
178
|
+
ast.Expression,
|
179
|
+
ast.Attribute,
|
180
|
+
ast.Name,
|
181
|
+
ast.Load,
|
182
|
+
ast.Call,
|
183
|
+
ast.Subscript,
|
184
|
+
ast.Constant,
|
185
|
+
ast.Tuple,
|
186
|
+
ast.List,
|
187
|
+
ast.Slice,
|
188
|
+
ast.keyword,
|
189
|
+
ast.BinOp,
|
190
|
+
ast.UnaryOp,
|
191
|
+
ast.Compare,
|
192
|
+
*ALLOWED_BINOP,
|
193
|
+
*ALLOWED_UNARY,
|
194
|
+
*ALLOWED_CMPOP,
|
195
|
+
)
|
196
|
+
|
197
|
+
def __init__(self, df_name: str = "df"):
|
198
|
+
self.df_name = df_name
|
199
|
+
self.depth = 0
|
200
|
+
self.chain = 0
|
201
|
+
|
202
|
+
# Depth guard
|
203
|
+
def generic_visit(self, node: ast.AST) -> None:
|
204
|
+
self.depth += 1
|
205
|
+
if self.depth > MAX_DEPTH:
|
206
|
+
raise UnsafeCommandError("AST nesting too deep")
|
207
|
+
super().generic_visit(node)
|
208
|
+
self.depth -= 1
|
209
|
+
|
210
|
+
# Literal validation
|
211
|
+
def visit_Constant(self, node: ast.Constant) -> None:
|
212
|
+
_literal_ok(node)
|
213
|
+
|
214
|
+
# Arithmetic
|
215
|
+
def visit_BinOp(self, node: ast.BinOp) -> None:
|
216
|
+
if not isinstance(node.op, self.ALLOWED_BINOP):
|
217
|
+
raise UnsafeCommandError("operator not allowed")
|
218
|
+
self.generic_visit(node)
|
219
|
+
|
220
|
+
def visit_UnaryOp(self, node: ast.UnaryOp) -> None:
|
221
|
+
if not isinstance(node.op, self.ALLOWED_UNARY):
|
222
|
+
raise UnsafeCommandError("unary operator not allowed")
|
223
|
+
self.generic_visit(node)
|
224
|
+
|
225
|
+
# Comparisons
|
226
|
+
def visit_Compare(self, node: ast.Compare) -> None:
|
227
|
+
if not all(isinstance(op, self.ALLOWED_CMPOP) for op in node.ops):
|
228
|
+
raise UnsafeCommandError("comparison operator not allowed")
|
229
|
+
for comp in node.comparators:
|
230
|
+
_literal_ok(comp)
|
231
|
+
self.generic_visit(node)
|
232
|
+
|
233
|
+
# Subscripts
|
234
|
+
def visit_Subscript(self, node: ast.Subscript) -> None:
|
235
|
+
if not _literal_ok(node.slice):
|
236
|
+
raise UnsafeCommandError("subscript must be literal")
|
237
|
+
self.generic_visit(node)
|
238
|
+
|
239
|
+
# Method calls
|
240
|
+
def visit_Call(self, node: ast.Call) -> None:
|
241
|
+
if not isinstance(node.func, ast.Attribute):
|
242
|
+
raise UnsafeCommandError("only DataFrame method calls allowed")
|
243
|
+
|
244
|
+
method = node.func.attr
|
245
|
+
self.chain += 1
|
246
|
+
if self.chain > MAX_CHAIN:
|
247
|
+
raise UnsafeCommandError("method-chain too long")
|
248
|
+
if method not in WHITELISTED_DF_METHODS:
|
249
|
+
raise UnsafeCommandError(f"method '{method}' not permitted")
|
250
|
+
|
251
|
+
# kwarg / arg checks
|
252
|
+
for kw in node.keywords:
|
253
|
+
if kw.arg in BLOCKED_KW:
|
254
|
+
raise UnsafeCommandError(f"kwarg '{kw.arg}' is blocked")
|
255
|
+
_literal_ok(kw.value)
|
256
|
+
for arg in node.args:
|
257
|
+
_literal_ok(arg)
|
258
|
+
|
259
|
+
try:
|
260
|
+
self.generic_visit(node)
|
261
|
+
finally:
|
262
|
+
self.chain -= 1
|
263
|
+
|
264
|
+
# Names
|
265
|
+
def visit_Name(self, node: ast.Name) -> None:
|
266
|
+
if node.id != self.df_name:
|
267
|
+
raise UnsafeCommandError(f"unexpected variable '{node.id}'")
|
268
|
+
|
269
|
+
# Top-level gate
|
270
|
+
def visit(self, node: ast.AST) -> None:
|
271
|
+
if not isinstance(node, self.ALLOWED_NODES):
|
272
|
+
raise UnsafeCommandError(f"disallowed node {type(node).__name__}")
|
273
|
+
super().visit(node)
|
274
|
+
|
275
|
+
|
276
|
+
def sanitize_command(expr: str, df_name: str = "df") -> str:
|
277
|
+
"""
|
278
|
+
Validate *expr*; return it unchanged if it passes all rules,
|
279
|
+
else raise UnsafeCommandError with the first violation encountered.
|
280
|
+
"""
|
281
|
+
tree = ast.parse(expr, mode="eval")
|
282
|
+
CommandValidator(df_name).visit(tree)
|
283
|
+
return expr
|
284
|
+
|
5
285
|
|
6
286
|
def stringify(x: Any) -> str:
|
7
287
|
# Convert x to DataFrame if it is not one already
|
langroid/vector_store/base.py
CHANGED
@@ -14,7 +14,7 @@ from langroid.utils.algorithms.graph import components, topological_sort
|
|
14
14
|
from langroid.utils.configuration import settings
|
15
15
|
from langroid.utils.object_registry import ObjectRegistry
|
16
16
|
from langroid.utils.output.printing import print_long_text
|
17
|
-
from langroid.utils.pandas_utils import stringify
|
17
|
+
from langroid.utils.pandas_utils import sanitize_command, stringify
|
18
18
|
from langroid.utils.pydantic_utils import flatten_dict
|
19
19
|
|
20
20
|
logger = logging.getLogger(__name__)
|
@@ -159,11 +159,12 @@ class VectorStore(ABC):
|
|
159
159
|
df = pd.DataFrame(dicts)
|
160
160
|
|
161
161
|
try:
|
162
|
-
# SECURITY:
|
163
|
-
#
|
164
|
-
|
165
|
-
|
166
|
-
|
162
|
+
# SECURITY MITIGATION: Eval input is sanitized to prevent most common
|
163
|
+
# code injection attack vectors.
|
164
|
+
vars = {"df": df}
|
165
|
+
calc = sanitize_command(calc)
|
166
|
+
code = compile(calc, "<calc>", "eval")
|
167
|
+
result = eval(code, vars, {})
|
167
168
|
except Exception as e:
|
168
169
|
# return error message so LLM can fix the calc string if needed
|
169
170
|
err = f"""
|
@@ -20,7 +20,7 @@ langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVb
|
|
20
20
|
langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
|
21
21
|
langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
|
22
22
|
langroid/agent/special/retriever_agent.py,sha256=o2UfqiCGME0t85SZ6qjK041_WZYqXSuV1SeH_3KtVuc,1931
|
23
|
-
langroid/agent/special/table_chat_agent.py,sha256=
|
23
|
+
langroid/agent/special/table_chat_agent.py,sha256=WS-E3QqI6Wm67-GNAcfIIl_TW7C-kYzlpd461hwLz6Y,10403
|
24
24
|
langroid/agent/special/arangodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
langroid/agent/special/arangodb/arangodb_agent.py,sha256=12Y54c84c9qXV-YXRBcI5HaqyiY75JR4TmqlURYKJAM,25851
|
26
26
|
langroid/agent/special/arangodb/system_messages.py,sha256=udwfLleTdyz_DuxHuoiv2wHEZoAPBPbwdF_ivjIfP5c,6867
|
@@ -114,7 +114,7 @@ langroid/utils/git_utils.py,sha256=WnflJ3R3owhlD0LNdSJakcKhExcEehE1UW5jYVQl8JY,7
|
|
114
114
|
langroid/utils/globals.py,sha256=Az9dOFqR6n9CoTYSqa2kLikQWS0oCQ9DFQIQAnG-2q8,1355
|
115
115
|
langroid/utils/logging.py,sha256=kmdpj1ozH1apf3o00Zgz-ZT-S4BHqUfF81GkY0Gf578,7262
|
116
116
|
langroid/utils/object_registry.py,sha256=iPz9GHzvmCeVoidB3JdAMEKcxJEqTdUr0otQEexDZ5s,2100
|
117
|
-
langroid/utils/pandas_utils.py,sha256=
|
117
|
+
langroid/utils/pandas_utils.py,sha256=Zz_-dKogZR2Ijw5QNTHC2EcEmzgODPU-2_MVgS3274c,7266
|
118
118
|
langroid/utils/pydantic_utils.py,sha256=R7Ps8VP56-eSo-LYHWllFo-SJ2zDmdItuuYpUq2gGJ8,20854
|
119
119
|
langroid/utils/system.py,sha256=q3QJtTSapIwNe8MMhGEM03wgxPLmZiD47_sF1pKx53I,8472
|
120
120
|
langroid/utils/types.py,sha256=-BvyIf_LmAJ5jR9NC7S4CSVNEr3XayAaxJ5o0TiIej0,2992
|
@@ -125,7 +125,7 @@ langroid/utils/output/citations.py,sha256=9W0slQQgzRGLS7hU51mm5UWao5cS_xr8AVosVe
|
|
125
125
|
langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
|
126
126
|
langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
|
127
127
|
langroid/vector_store/__init__.py,sha256=8ktJUVsVUoc7FMmkUFpFBZu7VMWUqQY9zpm4kEJ8yTs,1537
|
128
|
-
langroid/vector_store/base.py,sha256=
|
128
|
+
langroid/vector_store/base.py,sha256=uIRz3ZVmqxzuq2V71Kpys_6-j460gGjHXQIAJWJLI78,14675
|
129
129
|
langroid/vector_store/chromadb.py,sha256=p9mEqJwO2BrL2jSSXfa23kCPlPOwWpF3xJYd5zoWw_c,8661
|
130
130
|
langroid/vector_store/lancedb.py,sha256=Qd20gKjWozPWfW5-D66J6U8dSrJo1yl-maj6s1lbf1c,14688
|
131
131
|
langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
|
@@ -133,7 +133,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
|
|
133
133
|
langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
|
134
134
|
langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
|
135
135
|
langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
|
136
|
-
langroid-0.53.
|
137
|
-
langroid-0.53.
|
138
|
-
langroid-0.53.
|
139
|
-
langroid-0.53.
|
136
|
+
langroid-0.53.15.dist-info/METADATA,sha256=893X5dUY-L85Q0rKwS2Ex6fIH3L_W3TZ0NFqCeMbz4Q,64946
|
137
|
+
langroid-0.53.15.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
138
|
+
langroid-0.53.15.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
139
|
+
langroid-0.53.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|