matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,551 @@
|
|
1
|
+
# Copyright 2021 - 2022 Matrix Origin
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Offline tests for SQL escaping edge cases.
|
17
|
+
|
18
|
+
This test file validates that our exec_driver_sql() implementation correctly handles
|
19
|
+
various edge cases including:
|
20
|
+
- Percent signs in LIKE clauses
|
21
|
+
- JSON strings with colons
|
22
|
+
- Single quotes in strings
|
23
|
+
- Special character combinations
|
24
|
+
- Double escaping scenarios
|
25
|
+
"""
|
26
|
+
|
27
|
+
import pytest
|
28
|
+
from unittest.mock import Mock
|
29
|
+
from matrixone.base_client import BaseMatrixOneClient
|
30
|
+
|
31
|
+
|
32
|
+
class TestSQLEscapingEdgeCases:
|
33
|
+
"""Test SQL escaping for various edge cases"""
|
34
|
+
|
35
|
+
def setup_method(self):
|
36
|
+
"""Setup for each test method"""
|
37
|
+
self.captured_sql = []
|
38
|
+
|
39
|
+
def _create_mock_connection(self):
|
40
|
+
"""Create a mock connection that captures SQL"""
|
41
|
+
mock_conn = Mock()
|
42
|
+
|
43
|
+
def capture_sql(sql):
|
44
|
+
self.captured_sql.append(sql)
|
45
|
+
result = Mock()
|
46
|
+
result.returns_rows = False
|
47
|
+
result.rowcount = 1
|
48
|
+
return result
|
49
|
+
|
50
|
+
mock_conn.exec_driver_sql = Mock(side_effect=capture_sql)
|
51
|
+
return mock_conn
|
52
|
+
|
53
|
+
def test_batch_insert_json_with_colons(self):
|
54
|
+
"""Test that JSON strings with colons are properly escaped"""
|
55
|
+
from matrixone.base_client import BaseMatrixOneClient
|
56
|
+
|
57
|
+
client = BaseMatrixOneClient()
|
58
|
+
|
59
|
+
# Test data with JSON containing colons
|
60
|
+
data_list = [
|
61
|
+
{"id": 1, "json_data": '{"key":"value"}'},
|
62
|
+
{"id": 2, "json_data": '{"a":1, "b":2}'},
|
63
|
+
{"id": 3, "json_data": '{"nested":{"deep":"value"}}'},
|
64
|
+
]
|
65
|
+
|
66
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
67
|
+
|
68
|
+
# Verify SQL structure
|
69
|
+
assert "INSERT INTO test_table" in sql
|
70
|
+
assert '{"key":"value"}' in sql or "key" in sql # JSON should be in SQL
|
71
|
+
assert '{"a":1, "b":2}' in sql or '"a"' in sql
|
72
|
+
assert '{"nested":{"deep":"value"}}' in sql or "nested" in sql
|
73
|
+
|
74
|
+
# Single quotes should be escaped
|
75
|
+
assert "''" not in sql or sql.count("''") >= 0 # Allow escaped quotes
|
76
|
+
|
77
|
+
print(f"Generated SQL:\n{sql}\n")
|
78
|
+
|
79
|
+
# Verify it looks like valid SQL
|
80
|
+
assert sql.startswith("INSERT INTO test_table")
|
81
|
+
assert "VALUES" in sql
|
82
|
+
|
83
|
+
def test_batch_insert_with_percent_signs(self):
|
84
|
+
"""Test that percent signs in data are properly handled"""
|
85
|
+
from matrixone.base_client import BaseMatrixOneClient
|
86
|
+
|
87
|
+
client = BaseMatrixOneClient()
|
88
|
+
|
89
|
+
# Test data with percent signs (all dicts must have same keys)
|
90
|
+
data_list = [
|
91
|
+
{"id": 1, "text": "100% complete"},
|
92
|
+
{"id": 2, "text": "LIKE '%test%'"},
|
93
|
+
{"id": 3, "text": "Save %(amount)s dollars"},
|
94
|
+
]
|
95
|
+
|
96
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
97
|
+
|
98
|
+
# Original SQL should contain single %
|
99
|
+
assert "100% complete" in sql
|
100
|
+
assert "LIKE '%test%'" in sql or "test" in sql
|
101
|
+
assert "%(amount)s" in sql or "amount" in sql
|
102
|
+
|
103
|
+
print(f"Generated SQL before escaping:\n{sql}\n")
|
104
|
+
|
105
|
+
# When executed with exec_driver_sql, % will be escaped to %%
|
106
|
+
# This is correct behavior for pymysql
|
107
|
+
|
108
|
+
def test_batch_insert_with_single_quotes(self):
|
109
|
+
"""Test that single quotes are properly escaped"""
|
110
|
+
from matrixone.base_client import BaseMatrixOneClient
|
111
|
+
|
112
|
+
client = BaseMatrixOneClient()
|
113
|
+
|
114
|
+
# Test data with single quotes (all dicts must have same keys)
|
115
|
+
data_list = [
|
116
|
+
{"id": 1, "text": "O'Brien"},
|
117
|
+
{"id": 2, "text": "It's a test"},
|
118
|
+
{"id": 3, "text": "He said 'hello'"},
|
119
|
+
]
|
120
|
+
|
121
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
122
|
+
|
123
|
+
# Single quotes should be escaped to ''
|
124
|
+
assert "O''Brien" in sql or "O" in sql # Should be escaped
|
125
|
+
assert "It''s" in sql or "It" in sql
|
126
|
+
assert "'hello'" in sql or "hello" in sql
|
127
|
+
|
128
|
+
print(f"Generated SQL with quote escaping:\n{sql}\n")
|
129
|
+
|
130
|
+
def test_batch_insert_null_values(self):
|
131
|
+
"""Test that NULL values are properly handled"""
|
132
|
+
from matrixone.base_client import BaseMatrixOneClient
|
133
|
+
|
134
|
+
client = BaseMatrixOneClient()
|
135
|
+
|
136
|
+
# Test data with NULL values
|
137
|
+
data_list = [
|
138
|
+
{"id": 1, "name": "Alice", "metadata": None},
|
139
|
+
{"id": 2, "name": None, "metadata": '{"key":"value"}'},
|
140
|
+
]
|
141
|
+
|
142
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
143
|
+
|
144
|
+
# NULL should be unquoted
|
145
|
+
assert ", NULL," in sql or ", NULL)" in sql
|
146
|
+
assert "'Alice'" in sql
|
147
|
+
|
148
|
+
print(f"Generated SQL with NULL:\n{sql}\n")
|
149
|
+
|
150
|
+
def test_batch_insert_vector_arrays(self):
|
151
|
+
"""Test that vector arrays are properly formatted"""
|
152
|
+
from matrixone.base_client import BaseMatrixOneClient
|
153
|
+
|
154
|
+
client = BaseMatrixOneClient()
|
155
|
+
|
156
|
+
# Test data with vector arrays
|
157
|
+
data_list = [
|
158
|
+
{"id": 1, "embedding": [0.1, 0.2, 0.3]},
|
159
|
+
{"id": 2, "embedding": [1.0, 2.0, 3.0]},
|
160
|
+
]
|
161
|
+
|
162
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
163
|
+
|
164
|
+
# Vectors should be formatted as [0.1,0.2,0.3]
|
165
|
+
assert "[0.1,0.2,0.3]" in sql
|
166
|
+
assert "[1.0,2.0,3.0]" in sql
|
167
|
+
|
168
|
+
print(f"Generated SQL with vectors:\n{sql}\n")
|
169
|
+
|
170
|
+
def test_complex_json_with_multiple_special_chars(self):
|
171
|
+
"""Test JSON with multiple special characters"""
|
172
|
+
from matrixone.base_client import BaseMatrixOneClient
|
173
|
+
|
174
|
+
client = BaseMatrixOneClient()
|
175
|
+
|
176
|
+
# Complex JSON with various special characters
|
177
|
+
data_list = [
|
178
|
+
{
|
179
|
+
"id": 1,
|
180
|
+
"json_data": '{"name":"O\'Brien", "progress":"100%", "tags":["tag1","tag2"]}',
|
181
|
+
},
|
182
|
+
{
|
183
|
+
"id": 2,
|
184
|
+
"json_data": '{"query":"SELECT * FROM table WHERE col LIKE \'%test%\'"}',
|
185
|
+
},
|
186
|
+
]
|
187
|
+
|
188
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
189
|
+
|
190
|
+
# Verify SQL is generated
|
191
|
+
assert "INSERT INTO test_table" in sql
|
192
|
+
assert "VALUES" in sql
|
193
|
+
|
194
|
+
# Single quotes should be escaped
|
195
|
+
# Note: The quote inside JSON will be escaped to ''
|
196
|
+
assert "O''Brien" in sql or "O'Brien" in sql # Allow both
|
197
|
+
|
198
|
+
print(f"Generated SQL with complex JSON:\n{sql}\n")
|
199
|
+
|
200
|
+
def test_percent_escaping_in_exec_driver_sql(self):
|
201
|
+
"""Test that % is correctly escaped to %% when using exec_driver_sql"""
|
202
|
+
mock_conn = self._create_mock_connection()
|
203
|
+
|
204
|
+
# Simulate a LIKE query
|
205
|
+
test_sql = "SELECT * FROM table WHERE name LIKE '%test%'"
|
206
|
+
|
207
|
+
# Simulate what happens in _exec_sql_safe or execute
|
208
|
+
if hasattr(mock_conn, 'exec_driver_sql'):
|
209
|
+
escaped_sql = test_sql.replace('%', '%%')
|
210
|
+
mock_conn.exec_driver_sql(escaped_sql)
|
211
|
+
|
212
|
+
# Verify the SQL was escaped
|
213
|
+
assert len(self.captured_sql) == 1
|
214
|
+
captured = self.captured_sql[0]
|
215
|
+
|
216
|
+
# % should be escaped to %%
|
217
|
+
assert "LIKE '%%test%%'" in captured
|
218
|
+
|
219
|
+
print(f"Original SQL: {test_sql}")
|
220
|
+
print(f"Escaped SQL: {captured}\n")
|
221
|
+
|
222
|
+
def test_json_colon_not_treated_as_bind_param(self):
|
223
|
+
"""Test that JSON colons are NOT treated as bind parameters"""
|
224
|
+
mock_conn = self._create_mock_connection()
|
225
|
+
|
226
|
+
# JSON with colons that could be mistaken for :1, :2 bind params
|
227
|
+
test_sql = "INSERT INTO table (data) VALUES ('{\"a\":1, \"b\":2}')"
|
228
|
+
|
229
|
+
# This is what should happen (no : should remain as bind param marker)
|
230
|
+
if hasattr(mock_conn, 'exec_driver_sql'):
|
231
|
+
escaped_sql = test_sql.replace('%', '%%')
|
232
|
+
mock_conn.exec_driver_sql(escaped_sql)
|
233
|
+
|
234
|
+
assert len(self.captured_sql) == 1
|
235
|
+
captured = self.captured_sql[0]
|
236
|
+
|
237
|
+
# Colons should still be in the SQL (not converted to bind params)
|
238
|
+
assert '\"a\":1' in captured or '"a":1' in captured
|
239
|
+
|
240
|
+
print(f"JSON SQL: {captured}\n")
|
241
|
+
|
242
|
+
def test_mixed_special_characters(self):
|
243
|
+
"""Test SQL with multiple types of special characters"""
|
244
|
+
mock_conn = self._create_mock_connection()
|
245
|
+
|
246
|
+
# SQL with %, :, and '
|
247
|
+
test_sql = "SELECT * FROM logs WHERE msg LIKE '%error%' AND data = '{\"level\":\"critical\"}'"
|
248
|
+
|
249
|
+
if hasattr(mock_conn, 'exec_driver_sql'):
|
250
|
+
escaped_sql = test_sql.replace('%', '%%')
|
251
|
+
mock_conn.exec_driver_sql(escaped_sql)
|
252
|
+
|
253
|
+
assert len(self.captured_sql) == 1
|
254
|
+
captured = self.captured_sql[0]
|
255
|
+
|
256
|
+
# % should be escaped to %%
|
257
|
+
assert '%%error%%' in captured
|
258
|
+
|
259
|
+
# JSON colons should remain
|
260
|
+
assert 'level' in captured and 'critical' in captured
|
261
|
+
|
262
|
+
print(f"Original: {test_sql}")
|
263
|
+
print(f"Escaped: {captured}\n")
|
264
|
+
|
265
|
+
def test_double_percent_not_double_escaped(self):
|
266
|
+
"""Test that %% is not escaped to %%%% (no double escaping)"""
|
267
|
+
mock_conn = self._create_mock_connection()
|
268
|
+
|
269
|
+
# SQL that already has %%
|
270
|
+
test_sql = "SELECT * FROM table WHERE value LIKE '%%already_escaped%%'"
|
271
|
+
|
272
|
+
if hasattr(mock_conn, 'exec_driver_sql'):
|
273
|
+
escaped_sql = test_sql.replace('%', '%%')
|
274
|
+
mock_conn.exec_driver_sql(escaped_sql)
|
275
|
+
|
276
|
+
assert len(self.captured_sql) == 1
|
277
|
+
captured = self.captured_sql[0]
|
278
|
+
|
279
|
+
# %% should become %%%%
|
280
|
+
assert '%%%%already_escaped%%%%' in captured
|
281
|
+
|
282
|
+
print(f"Original: {test_sql}")
|
283
|
+
print(f"Escaped: {captured}")
|
284
|
+
print("⚠️ WARNING: This is double escaping! May need smarter logic.\n")
|
285
|
+
|
286
|
+
def test_chinese_characters_with_json(self):
|
287
|
+
"""Test Chinese characters in JSON strings"""
|
288
|
+
from matrixone.base_client import BaseMatrixOneClient
|
289
|
+
|
290
|
+
client = BaseMatrixOneClient()
|
291
|
+
|
292
|
+
# Chinese text with JSON
|
293
|
+
data_list = [
|
294
|
+
{"id": 1, "json_data": '{"中文":"測試", "english":"test"}'},
|
295
|
+
{"id": 2, "json_data": '{"标题":"学习教材", "描述":"适合初学者"}'},
|
296
|
+
]
|
297
|
+
|
298
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
299
|
+
|
300
|
+
# Verify Chinese characters are preserved
|
301
|
+
assert "中文" in sql
|
302
|
+
assert "測試" in sql
|
303
|
+
assert "学习教材" in sql
|
304
|
+
|
305
|
+
print(f"Generated SQL with Chinese:\n{sql}\n")
|
306
|
+
|
307
|
+
def test_edge_case_empty_json_object(self):
|
308
|
+
"""Test empty JSON object and arrays"""
|
309
|
+
from matrixone.base_client import BaseMatrixOneClient
|
310
|
+
|
311
|
+
client = BaseMatrixOneClient()
|
312
|
+
|
313
|
+
data_list = [
|
314
|
+
{"id": 1, "json_data": '{}'},
|
315
|
+
{"id": 2, "json_data": '[]'},
|
316
|
+
{"id": 3, "json_data": '{"empty":""}'},
|
317
|
+
]
|
318
|
+
|
319
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
320
|
+
|
321
|
+
# Verify empty structures are preserved
|
322
|
+
assert '{}' in sql
|
323
|
+
assert '[]' in sql
|
324
|
+
|
325
|
+
print(f"Generated SQL with empty JSON:\n{sql}\n")
|
326
|
+
|
327
|
+
def test_backslash_in_strings(self):
|
328
|
+
"""Test backslashes in strings (potential SQL injection vector)"""
|
329
|
+
from matrixone.base_client import BaseMatrixOneClient
|
330
|
+
|
331
|
+
client = BaseMatrixOneClient()
|
332
|
+
|
333
|
+
# Data with backslashes (all dicts must have same keys)
|
334
|
+
data_list = [
|
335
|
+
{"id": 1, "text": "C:\\Users\\test"},
|
336
|
+
{"id": 2, "text": "\\d+"},
|
337
|
+
{"id": 3, "text": "test\\nvalue"},
|
338
|
+
]
|
339
|
+
|
340
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
341
|
+
|
342
|
+
# Verify backslashes are preserved
|
343
|
+
assert "C:\\\\Users" in sql or "C:\\Users" in sql
|
344
|
+
assert "\\\\d+" in sql or "\\d+" in sql
|
345
|
+
|
346
|
+
print(f"Generated SQL with backslashes:\n{sql}\n")
|
347
|
+
print("⚠️ WARNING: Backslash handling may need review for SQL injection safety.\n")
|
348
|
+
|
349
|
+
def test_sql_injection_attempt_in_json(self):
|
350
|
+
"""Test potential SQL injection in JSON strings"""
|
351
|
+
from matrixone.base_client import BaseMatrixOneClient
|
352
|
+
|
353
|
+
client = BaseMatrixOneClient()
|
354
|
+
|
355
|
+
# Malicious data attempts
|
356
|
+
data_list = [
|
357
|
+
{"id": 1, "json_data": '{"value":"test", "hack":"1; DROP TABLE users;"}'},
|
358
|
+
{"id": 2, "json_data": '{"sql":"SELECT * FROM passwords"}'},
|
359
|
+
]
|
360
|
+
|
361
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
362
|
+
|
363
|
+
# The malicious content should be safely inside quotes
|
364
|
+
assert "DROP TABLE" in sql # It's there, but quoted
|
365
|
+
assert sql.count("DROP TABLE") == 1 # Only once, in the data
|
366
|
+
|
367
|
+
# Should not have unquoted semicolons that could execute
|
368
|
+
lines = sql.split("'")
|
369
|
+
for i, line in enumerate(lines):
|
370
|
+
if i % 2 == 0: # Outside quotes
|
371
|
+
assert "DROP TABLE" not in line, "DROP TABLE should only be inside quotes"
|
372
|
+
|
373
|
+
print(f"Generated SQL with injection attempt:\n{sql}\n")
|
374
|
+
print("✅ Injection attempt is safely quoted\n")
|
375
|
+
|
376
|
+
def test_expected_sql_format_for_json_insert(self):
|
377
|
+
"""Verify the exact SQL format for JSON inserts"""
|
378
|
+
from matrixone.base_client import BaseMatrixOneClient
|
379
|
+
|
380
|
+
client = BaseMatrixOneClient()
|
381
|
+
|
382
|
+
data_list = [
|
383
|
+
{"id": 1, "json_col": '{"key":"value"}'},
|
384
|
+
]
|
385
|
+
|
386
|
+
sql = client._build_batch_insert_sql("my_table", data_list)
|
387
|
+
|
388
|
+
# Expected format
|
389
|
+
expected = "INSERT INTO my_table (id, json_col) VALUES ('1', '{\"key\":\"value\"}')"
|
390
|
+
|
391
|
+
# Verify structure
|
392
|
+
assert "INSERT INTO my_table (id, json_col) VALUES" in sql
|
393
|
+
assert "'1'" in sql
|
394
|
+
assert 'key' in sql and 'value' in sql
|
395
|
+
|
396
|
+
print(f"Generated SQL:\n{sql}")
|
397
|
+
print(f"\nExpected pattern:\n{expected}\n")
|
398
|
+
|
399
|
+
def test_percent_in_insert_then_exec_driver_sql(self):
|
400
|
+
"""
|
401
|
+
Test the full flow: build SQL with % -> escape to %% -> execute
|
402
|
+
This simulates what happens in real usage
|
403
|
+
"""
|
404
|
+
from matrixone.base_client import BaseMatrixOneClient
|
405
|
+
|
406
|
+
mock_conn = self._create_mock_connection()
|
407
|
+
client = BaseMatrixOneClient()
|
408
|
+
|
409
|
+
# Build SQL with %
|
410
|
+
data_list = [
|
411
|
+
{"id": 1, "text": "100% done"},
|
412
|
+
]
|
413
|
+
sql = client._build_batch_insert_sql("test_table", data_list)
|
414
|
+
|
415
|
+
print(f"Step 1 - Built SQL:\n{sql}\n")
|
416
|
+
|
417
|
+
# Simulate exec_driver_sql execution (with % escaping)
|
418
|
+
if hasattr(mock_conn, 'exec_driver_sql'):
|
419
|
+
escaped_sql = sql.replace('%', '%%')
|
420
|
+
mock_conn.exec_driver_sql(escaped_sql)
|
421
|
+
|
422
|
+
# Verify what was actually sent to the driver
|
423
|
+
assert len(self.captured_sql) == 1
|
424
|
+
final_sql = self.captured_sql[0]
|
425
|
+
|
426
|
+
print(f"Step 2 - Escaped SQL sent to driver:\n{final_sql}\n")
|
427
|
+
|
428
|
+
# The % should be escaped to %%
|
429
|
+
assert "100%% done" in final_sql
|
430
|
+
|
431
|
+
print("✅ Percent escaping works correctly\n")
|
432
|
+
|
433
|
+
def test_like_clause_full_flow(self):
|
434
|
+
"""Test LIKE clause through the full execution flow"""
|
435
|
+
mock_conn = self._create_mock_connection()
|
436
|
+
|
437
|
+
# A LIKE query
|
438
|
+
original_sql = "SELECT * FROM users WHERE name LIKE '%John%'"
|
439
|
+
|
440
|
+
print(f"Step 1 - Original SQL:\n{original_sql}\n")
|
441
|
+
|
442
|
+
# Simulate what execute() does
|
443
|
+
if hasattr(mock_conn, 'exec_driver_sql'):
|
444
|
+
escaped_sql = original_sql.replace('%', '%%')
|
445
|
+
mock_conn.exec_driver_sql(escaped_sql)
|
446
|
+
|
447
|
+
assert len(self.captured_sql) == 1
|
448
|
+
final_sql = self.captured_sql[0]
|
449
|
+
|
450
|
+
print(f"Step 2 - SQL sent to driver:\n{final_sql}\n")
|
451
|
+
|
452
|
+
# LIKE '%John%' should become LIKE '%%John%%'
|
453
|
+
assert "LIKE '%%John%%'" in final_sql
|
454
|
+
|
455
|
+
print("✅ LIKE clause escaping works correctly\n")
|
456
|
+
|
457
|
+
def test_potential_double_escaping_issue(self):
|
458
|
+
"""
|
459
|
+
Test potential issue: what if SQL already has %%?
|
460
|
+
This is a known limitation of the current approach.
|
461
|
+
"""
|
462
|
+
mock_conn = self._create_mock_connection()
|
463
|
+
|
464
|
+
# SQL that already has %% (maybe from previous escaping?)
|
465
|
+
test_sql = "SELECT * FROM table WHERE value = '%%'"
|
466
|
+
|
467
|
+
print(f"Step 1 - Input SQL (already has %%):\n{test_sql}\n")
|
468
|
+
|
469
|
+
if hasattr(mock_conn, 'exec_driver_sql'):
|
470
|
+
escaped_sql = test_sql.replace('%', '%%')
|
471
|
+
mock_conn.exec_driver_sql(escaped_sql)
|
472
|
+
|
473
|
+
final_sql = self.captured_sql[0]
|
474
|
+
|
475
|
+
print(f"Step 2 - After escaping:\n{final_sql}\n")
|
476
|
+
|
477
|
+
# %% becomes %%%%
|
478
|
+
assert "%%%%" in final_sql
|
479
|
+
|
480
|
+
print("⚠️ WARNING: Double escaping detected!")
|
481
|
+
print(" If SQL already has %%, it will become %%%%")
|
482
|
+
print(" This is a known limitation but unlikely in normal usage.\n")
|
483
|
+
|
484
|
+
def test_recommended_sql_patterns(self):
|
485
|
+
"""Document recommended SQL patterns that work well"""
|
486
|
+
from matrixone.base_client import BaseMatrixOneClient
|
487
|
+
|
488
|
+
client = BaseMatrixOneClient()
|
489
|
+
|
490
|
+
print("=" * 70)
|
491
|
+
print("RECOMMENDED SQL PATTERNS")
|
492
|
+
print("=" * 70)
|
493
|
+
|
494
|
+
# Pattern 1: JSON inserts
|
495
|
+
print("\n1. JSON Inserts:")
|
496
|
+
data = [{"id": 1, "json": '{"key":"value"}'}]
|
497
|
+
sql = client._build_batch_insert_sql("t", data)
|
498
|
+
print(f" ✅ {sql}")
|
499
|
+
|
500
|
+
# Pattern 2: String with single quotes
|
501
|
+
print("\n2. Single Quotes:")
|
502
|
+
data = [{"id": 1, "name": "O'Brien"}]
|
503
|
+
sql = client._build_batch_insert_sql("t", data)
|
504
|
+
print(f" ✅ {sql}")
|
505
|
+
|
506
|
+
# Pattern 3: NULL values
|
507
|
+
print("\n3. NULL Values:")
|
508
|
+
data = [{"id": 1, "optional": None}]
|
509
|
+
sql = client._build_batch_insert_sql("t", data)
|
510
|
+
print(f" ✅ {sql}")
|
511
|
+
|
512
|
+
# Pattern 4: Vectors
|
513
|
+
print("\n4. Vectors:")
|
514
|
+
data = [{"id": 1, "vec": [1.0, 2.0, 3.0]}]
|
515
|
+
sql = client._build_batch_insert_sql("t", data)
|
516
|
+
print(f" ✅ {sql}")
|
517
|
+
|
518
|
+
print("\n" + "=" * 70)
|
519
|
+
|
520
|
+
def test_potential_issues_summary(self):
|
521
|
+
"""Summary of potential issues and recommendations"""
|
522
|
+
print("\n" + "=" * 70)
|
523
|
+
print("POTENTIAL ISSUES & RECOMMENDATIONS")
|
524
|
+
print("=" * 70)
|
525
|
+
|
526
|
+
print("\n✅ CORRECTLY HANDLED:")
|
527
|
+
print(" 1. JSON with colons: {\"a\":1} - colons preserved")
|
528
|
+
print(" 2. Single quotes: O'Brien -> O''Brien")
|
529
|
+
print(" 3. LIKE clauses: '%test%' -> '%%test%%'")
|
530
|
+
print(" 4. NULL values: None -> NULL (unquoted)")
|
531
|
+
print(" 5. Vectors: [1,2,3] -> '[1,2,3]'")
|
532
|
+
|
533
|
+
print("\n⚠️ KNOWN LIMITATIONS:")
|
534
|
+
print(" 1. Double escaping: If SQL already has %%, becomes %%%%")
|
535
|
+
print(" Impact: Low (unlikely in normal usage)")
|
536
|
+
print(" Mitigation: Don't pre-escape % in user code")
|
537
|
+
|
538
|
+
print("\n⚠️ REQUIRES ATTENTION:")
|
539
|
+
print(" 1. Backslash handling: May not be fully MySQL-compatible")
|
540
|
+
print(" Recommendation: Test with real data containing backslashes")
|
541
|
+
|
542
|
+
print("\n 2. Binary data: Not tested")
|
543
|
+
print(" Recommendation: Use parameterized queries for binary data")
|
544
|
+
|
545
|
+
print("\n💡 BEST PRACTICES:")
|
546
|
+
print(" 1. Use batch_insert for JSON data ✅")
|
547
|
+
print(" 2. Don't pre-escape % in your data ✅")
|
548
|
+
print(" 3. Single quotes are auto-escaped ✅")
|
549
|
+
print(" 4. For binary data, consider using proper parameter binding")
|
550
|
+
|
551
|
+
print("\n" + "=" * 70 + "\n")
|