crate 0.35.1__py2.py3-none-any.whl → 1.0.0.dev0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crate/client/__init__.py +1 -1
- crate/testing/test_datetime_old.py +90 -0
- crate-1.0.0.dev0-py3.11-nspkg.pth +1 -0
- {crate-0.35.1.dist-info → crate-1.0.0.dev0.dist-info}/METADATA +15 -19
- crate-1.0.0.dev0.dist-info/RECORD +26 -0
- {crate-0.35.1.dist-info → crate-1.0.0.dev0.dist-info}/WHEEL +1 -1
- crate/client/sqlalchemy/__init__.py +0 -50
- crate/client/sqlalchemy/compat/__init__.py +0 -0
- crate/client/sqlalchemy/compat/api13.py +0 -156
- crate/client/sqlalchemy/compat/core10.py +0 -264
- crate/client/sqlalchemy/compat/core14.py +0 -359
- crate/client/sqlalchemy/compat/core20.py +0 -447
- crate/client/sqlalchemy/compiler.py +0 -318
- crate/client/sqlalchemy/dialect.py +0 -369
- crate/client/sqlalchemy/predicates/__init__.py +0 -99
- crate/client/sqlalchemy/sa_version.py +0 -28
- crate/client/sqlalchemy/support.py +0 -62
- crate/client/sqlalchemy/tests/__init__.py +0 -59
- crate/client/sqlalchemy/tests/array_test.py +0 -111
- crate/client/sqlalchemy/tests/bulk_test.py +0 -256
- crate/client/sqlalchemy/tests/compiler_test.py +0 -434
- crate/client/sqlalchemy/tests/connection_test.py +0 -129
- crate/client/sqlalchemy/tests/create_table_test.py +0 -313
- crate/client/sqlalchemy/tests/datetime_test.py +0 -90
- crate/client/sqlalchemy/tests/dialect_test.py +0 -156
- crate/client/sqlalchemy/tests/dict_test.py +0 -460
- crate/client/sqlalchemy/tests/function_test.py +0 -47
- crate/client/sqlalchemy/tests/insert_from_select_test.py +0 -85
- crate/client/sqlalchemy/tests/match_test.py +0 -137
- crate/client/sqlalchemy/tests/query_caching.py +0 -143
- crate/client/sqlalchemy/tests/update_test.py +0 -115
- crate/client/sqlalchemy/tests/warnings_test.py +0 -64
- crate/client/sqlalchemy/types.py +0 -277
- crate/client/tests.py +0 -416
- crate/testing/tests.py +0 -34
- crate-0.35.1-py3.11-nspkg.pth +0 -1
- crate-0.35.1.dist-info/RECORD +0 -55
- crate-0.35.1.dist-info/entry_points.txt +0 -2
- {crate-0.35.1.dist-info → crate-1.0.0.dev0.dist-info}/LICENSE +0 -0
- {crate-0.35.1.dist-info → crate-1.0.0.dev0.dist-info}/NOTICE +0 -0
- {crate-0.35.1.dist-info → crate-1.0.0.dev0.dist-info}/namespace_packages.txt +0 -0
- {crate-0.35.1.dist-info → crate-1.0.0.dev0.dist-info}/top_level.txt +0 -0
@@ -1,99 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8; -*-
|
2
|
-
#
|
3
|
-
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
-
# license agreements. See the NOTICE file distributed with this work for
|
5
|
-
# additional information regarding copyright ownership. Crate licenses
|
6
|
-
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
-
# you may not use this file except in compliance with the License. You may
|
8
|
-
# obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
# Unless required by applicable law or agreed to in writing, software
|
13
|
-
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
-
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
-
# License for the specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
#
|
18
|
-
# However, if you have executed another commercial license agreement
|
19
|
-
# with Crate these terms will supersede the license and you may use the
|
20
|
-
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
-
|
22
|
-
from sqlalchemy.sql.expression import ColumnElement, literal
|
23
|
-
from sqlalchemy.ext.compiler import compiles
|
24
|
-
|
25
|
-
|
26
|
-
class Match(ColumnElement):
|
27
|
-
inherit_cache = True
|
28
|
-
|
29
|
-
def __init__(self, column, term, match_type=None, options=None):
|
30
|
-
super(Match, self).__init__()
|
31
|
-
self.column = column
|
32
|
-
self.term = term
|
33
|
-
self.match_type = match_type
|
34
|
-
self.options = options
|
35
|
-
|
36
|
-
def compile_column(self, compiler):
|
37
|
-
if isinstance(self.column, dict):
|
38
|
-
column = ', '.join(
|
39
|
-
sorted(["{0} {1}".format(compiler.process(k), v)
|
40
|
-
for k, v in self.column.items()])
|
41
|
-
)
|
42
|
-
return "({0})".format(column)
|
43
|
-
else:
|
44
|
-
return "{0}".format(compiler.process(self.column))
|
45
|
-
|
46
|
-
def compile_term(self, compiler):
|
47
|
-
return compiler.process(literal(self.term))
|
48
|
-
|
49
|
-
def compile_using(self, compiler):
|
50
|
-
if self.match_type:
|
51
|
-
using = "using {0}".format(self.match_type)
|
52
|
-
with_clause = self.with_clause()
|
53
|
-
if with_clause:
|
54
|
-
using = ' '.join([using, with_clause])
|
55
|
-
return using
|
56
|
-
if self.options:
|
57
|
-
raise ValueError("missing match_type. " +
|
58
|
-
"It's not allowed to specify options " +
|
59
|
-
"without match_type")
|
60
|
-
|
61
|
-
def with_clause(self):
|
62
|
-
if self.options:
|
63
|
-
options = ', '.join(
|
64
|
-
sorted(["{0}={1}".format(k, v)
|
65
|
-
for k, v in self.options.items()])
|
66
|
-
)
|
67
|
-
|
68
|
-
return "with ({0})".format(options)
|
69
|
-
|
70
|
-
|
71
|
-
def match(column, term, match_type=None, options=None):
|
72
|
-
"""Generates match predicate for fulltext search
|
73
|
-
|
74
|
-
:param column: A reference to a column or an index, or a subcolumn, or a
|
75
|
-
dictionary of subcolumns with boost values.
|
76
|
-
|
77
|
-
:param term: The term to match against. This string is analyzed and the
|
78
|
-
resulting tokens are compared to the index.
|
79
|
-
|
80
|
-
:param match_type (optional): The match type. Determine how the term is
|
81
|
-
applied and the score calculated.
|
82
|
-
|
83
|
-
:param options (optional): The match options. Specify match type behaviour.
|
84
|
-
(Not possible without a specified match type.) Match options must be
|
85
|
-
supplied as a dictionary.
|
86
|
-
"""
|
87
|
-
return Match(column, term, match_type, options)
|
88
|
-
|
89
|
-
|
90
|
-
@compiles(Match)
|
91
|
-
def compile_match(match, compiler, **kwargs):
|
92
|
-
func = "match(%s, %s)" % (
|
93
|
-
match.compile_column(compiler),
|
94
|
-
match.compile_term(compiler)
|
95
|
-
)
|
96
|
-
using = match.compile_using(compiler)
|
97
|
-
if using:
|
98
|
-
func = ' '.join([func, using])
|
99
|
-
return func
|
@@ -1,28 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8; -*-
|
2
|
-
#
|
3
|
-
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
-
# license agreements. See the NOTICE file distributed with this work for
|
5
|
-
# additional information regarding copyright ownership. Crate licenses
|
6
|
-
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
-
# you may not use this file except in compliance with the License. You may
|
8
|
-
# obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
# Unless required by applicable law or agreed to in writing, software
|
13
|
-
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
-
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
-
# License for the specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
#
|
18
|
-
# However, if you have executed another commercial license agreement
|
19
|
-
# with Crate these terms will supersede the license and you may use the
|
20
|
-
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
-
|
22
|
-
import sqlalchemy as sa
|
23
|
-
from verlib2 import Version
|
24
|
-
|
25
|
-
SA_VERSION = Version(sa.__version__)
|
26
|
-
|
27
|
-
SA_1_4 = Version('1.4.0b1')
|
28
|
-
SA_2_0 = Version('2.0.0')
|
@@ -1,62 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8; -*-
|
2
|
-
#
|
3
|
-
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
-
# license agreements. See the NOTICE file distributed with this work for
|
5
|
-
# additional information regarding copyright ownership. Crate licenses
|
6
|
-
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
-
# you may not use this file except in compliance with the License. You may
|
8
|
-
# obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
# Unless required by applicable law or agreed to in writing, software
|
13
|
-
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
-
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
-
# License for the specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
#
|
18
|
-
# However, if you have executed another commercial license agreement
|
19
|
-
# with Crate these terms will supersede the license and you may use the
|
20
|
-
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
-
import logging
|
22
|
-
|
23
|
-
|
24
|
-
logger = logging.getLogger(__name__)
|
25
|
-
|
26
|
-
|
27
|
-
def insert_bulk(pd_table, conn, keys, data_iter):
|
28
|
-
"""
|
29
|
-
Use CrateDB's "bulk operations" endpoint as a fast path for pandas' and Dask's `to_sql()` [1] method.
|
30
|
-
|
31
|
-
The idea is to break out of SQLAlchemy, compile the insert statement, and use the raw
|
32
|
-
DBAPI connection client, in order to invoke a request using `bulk_parameters` [2]::
|
33
|
-
|
34
|
-
cursor.execute(sql=sql, bulk_parameters=data)
|
35
|
-
|
36
|
-
The vanilla implementation, used by SQLAlchemy, is::
|
37
|
-
|
38
|
-
data = [dict(zip(keys, row)) for row in data_iter]
|
39
|
-
conn.execute(pd_table.table.insert(), data)
|
40
|
-
|
41
|
-
Batch chunking will happen outside of this function, for example [3] demonstrates
|
42
|
-
the relevant code in `pandas.io.sql`.
|
43
|
-
|
44
|
-
[1] https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html
|
45
|
-
[2] https://crate.io/docs/crate/reference/en/latest/interfaces/http.html#bulk-operations
|
46
|
-
[3] https://github.com/pandas-dev/pandas/blob/v2.0.1/pandas/io/sql.py#L1011-L1027
|
47
|
-
"""
|
48
|
-
|
49
|
-
# Compile SQL statement and materialize batch.
|
50
|
-
sql = str(pd_table.table.insert().compile(bind=conn))
|
51
|
-
data = list(data_iter)
|
52
|
-
|
53
|
-
# For debugging and tracing the batches running through this method.
|
54
|
-
if logger.level == logging.DEBUG:
|
55
|
-
logger.debug(f"Bulk SQL: {sql}")
|
56
|
-
logger.debug(f"Bulk records: {len(data)}")
|
57
|
-
# logger.debug(f"Bulk data: {data}")
|
58
|
-
|
59
|
-
# Invoke bulk insert operation.
|
60
|
-
cursor = conn._dbapi_connection.cursor()
|
61
|
-
cursor.execute(sql=sql, bulk_parameters=data)
|
62
|
-
cursor.close()
|
@@ -1,59 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
from ..compat.api13 import monkeypatch_amend_select_sa14, monkeypatch_add_connectionfairy_driver_connection
|
4
|
-
from ..sa_version import SA_1_4, SA_VERSION
|
5
|
-
from ...test_util import ParametrizedTestCase
|
6
|
-
|
7
|
-
# `sql.select()` of SQLAlchemy 1.3 uses old calling semantics,
|
8
|
-
# but the test cases already need the modern ones.
|
9
|
-
if SA_VERSION < SA_1_4:
|
10
|
-
monkeypatch_amend_select_sa14()
|
11
|
-
monkeypatch_add_connectionfairy_driver_connection()
|
12
|
-
|
13
|
-
from unittest import TestLoader, TestSuite
|
14
|
-
from .connection_test import SqlAlchemyConnectionTest
|
15
|
-
from .dict_test import SqlAlchemyDictTypeTest
|
16
|
-
from .datetime_test import SqlAlchemyDateAndDateTimeTest
|
17
|
-
from .compiler_test import SqlAlchemyCompilerTest, SqlAlchemyDDLCompilerTest
|
18
|
-
from .update_test import SqlAlchemyUpdateTest
|
19
|
-
from .match_test import SqlAlchemyMatchTest
|
20
|
-
from .bulk_test import SqlAlchemyBulkTest
|
21
|
-
from .insert_from_select_test import SqlAlchemyInsertFromSelectTest
|
22
|
-
from .create_table_test import SqlAlchemyCreateTableTest
|
23
|
-
from .array_test import SqlAlchemyArrayTypeTest
|
24
|
-
from .dialect_test import SqlAlchemyDialectTest
|
25
|
-
from .function_test import SqlAlchemyFunctionTest
|
26
|
-
from .warnings_test import SqlAlchemyWarningsTest
|
27
|
-
from .query_caching import SqlAlchemyQueryCompilationCaching
|
28
|
-
|
29
|
-
|
30
|
-
makeSuite = TestLoader().loadTestsFromTestCase
|
31
|
-
|
32
|
-
|
33
|
-
def test_suite_unit():
|
34
|
-
tests = TestSuite()
|
35
|
-
tests.addTest(makeSuite(SqlAlchemyConnectionTest))
|
36
|
-
tests.addTest(makeSuite(SqlAlchemyDictTypeTest))
|
37
|
-
tests.addTest(makeSuite(SqlAlchemyDateAndDateTimeTest))
|
38
|
-
tests.addTest(makeSuite(SqlAlchemyCompilerTest))
|
39
|
-
tests.addTest(makeSuite(SqlAlchemyDDLCompilerTest))
|
40
|
-
tests.addTest(ParametrizedTestCase.parametrize(SqlAlchemyCompilerTest, param={"server_version_info": None}))
|
41
|
-
tests.addTest(ParametrizedTestCase.parametrize(SqlAlchemyCompilerTest, param={"server_version_info": (4, 0, 12)}))
|
42
|
-
tests.addTest(ParametrizedTestCase.parametrize(SqlAlchemyCompilerTest, param={"server_version_info": (4, 1, 10)}))
|
43
|
-
tests.addTest(makeSuite(SqlAlchemyUpdateTest))
|
44
|
-
tests.addTest(makeSuite(SqlAlchemyMatchTest))
|
45
|
-
tests.addTest(makeSuite(SqlAlchemyCreateTableTest))
|
46
|
-
tests.addTest(makeSuite(SqlAlchemyBulkTest))
|
47
|
-
tests.addTest(makeSuite(SqlAlchemyInsertFromSelectTest))
|
48
|
-
tests.addTest(makeSuite(SqlAlchemyInsertFromSelectTest))
|
49
|
-
tests.addTest(makeSuite(SqlAlchemyDialectTest))
|
50
|
-
tests.addTest(makeSuite(SqlAlchemyFunctionTest))
|
51
|
-
tests.addTest(makeSuite(SqlAlchemyArrayTypeTest))
|
52
|
-
tests.addTest(makeSuite(SqlAlchemyWarningsTest))
|
53
|
-
return tests
|
54
|
-
|
55
|
-
|
56
|
-
def test_suite_integration():
|
57
|
-
tests = TestSuite()
|
58
|
-
tests.addTest(makeSuite(SqlAlchemyQueryCompilationCaching))
|
59
|
-
return tests
|
@@ -1,111 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8; -*-
|
2
|
-
#
|
3
|
-
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
-
# license agreements. See the NOTICE file distributed with this work for
|
5
|
-
# additional information regarding copyright ownership. Crate licenses
|
6
|
-
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
-
# you may not use this file except in compliance with the License. You may
|
8
|
-
# obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
# Unless required by applicable law or agreed to in writing, software
|
13
|
-
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
-
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
-
# License for the specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
#
|
18
|
-
# However, if you have executed another commercial license agreement
|
19
|
-
# with Crate these terms will supersede the license and you may use the
|
20
|
-
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
-
|
22
|
-
|
23
|
-
from unittest import TestCase
|
24
|
-
from unittest.mock import patch, MagicMock
|
25
|
-
|
26
|
-
import sqlalchemy as sa
|
27
|
-
from sqlalchemy.sql import operators
|
28
|
-
from sqlalchemy.orm import Session
|
29
|
-
try:
|
30
|
-
from sqlalchemy.orm import declarative_base
|
31
|
-
except ImportError:
|
32
|
-
from sqlalchemy.ext.declarative import declarative_base
|
33
|
-
|
34
|
-
from crate.client.cursor import Cursor
|
35
|
-
|
36
|
-
fake_cursor = MagicMock(name='fake_cursor')
|
37
|
-
FakeCursor = MagicMock(name='FakeCursor', spec=Cursor)
|
38
|
-
FakeCursor.return_value = fake_cursor
|
39
|
-
|
40
|
-
|
41
|
-
@patch('crate.client.connection.Cursor', FakeCursor)
|
42
|
-
class SqlAlchemyArrayTypeTest(TestCase):
|
43
|
-
|
44
|
-
def setUp(self):
|
45
|
-
self.engine = sa.create_engine('crate://')
|
46
|
-
Base = declarative_base()
|
47
|
-
self.metadata = sa.MetaData()
|
48
|
-
|
49
|
-
class User(Base):
|
50
|
-
__tablename__ = 'users'
|
51
|
-
|
52
|
-
name = sa.Column(sa.String, primary_key=True)
|
53
|
-
friends = sa.Column(sa.ARRAY(sa.String))
|
54
|
-
scores = sa.Column(sa.ARRAY(sa.Integer))
|
55
|
-
|
56
|
-
self.User = User
|
57
|
-
self.session = Session(bind=self.engine)
|
58
|
-
|
59
|
-
def assertSQL(self, expected_str, actual_expr):
|
60
|
-
self.assertEqual(expected_str, str(actual_expr).replace('\n', ''))
|
61
|
-
|
62
|
-
def test_create_with_array(self):
|
63
|
-
t1 = sa.Table('t', self.metadata,
|
64
|
-
sa.Column('int_array', sa.ARRAY(sa.Integer)),
|
65
|
-
sa.Column('str_array', sa.ARRAY(sa.String))
|
66
|
-
)
|
67
|
-
t1.create(self.engine)
|
68
|
-
fake_cursor.execute.assert_called_with(
|
69
|
-
('\nCREATE TABLE t (\n\t'
|
70
|
-
'int_array ARRAY(INT), \n\t'
|
71
|
-
'str_array ARRAY(STRING)\n)\n\n'),
|
72
|
-
())
|
73
|
-
|
74
|
-
def test_array_insert(self):
|
75
|
-
trillian = self.User(name='Trillian', friends=['Arthur', 'Ford'])
|
76
|
-
self.session.add(trillian)
|
77
|
-
self.session.commit()
|
78
|
-
fake_cursor.execute.assert_called_with(
|
79
|
-
("INSERT INTO users (name, friends, scores) VALUES (?, ?, ?)"),
|
80
|
-
('Trillian', ['Arthur', 'Ford'], None))
|
81
|
-
|
82
|
-
def test_any(self):
|
83
|
-
s = self.session.query(self.User.name) \
|
84
|
-
.filter(self.User.friends.any("arthur"))
|
85
|
-
self.assertSQL(
|
86
|
-
"SELECT users.name AS users_name FROM users "
|
87
|
-
"WHERE ? = ANY (users.friends)",
|
88
|
-
s
|
89
|
-
)
|
90
|
-
|
91
|
-
def test_any_with_operator(self):
|
92
|
-
s = self.session.query(self.User.name) \
|
93
|
-
.filter(self.User.scores.any(6, operator=operators.lt))
|
94
|
-
self.assertSQL(
|
95
|
-
"SELECT users.name AS users_name FROM users "
|
96
|
-
"WHERE ? < ANY (users.scores)",
|
97
|
-
s
|
98
|
-
)
|
99
|
-
|
100
|
-
def test_multidimensional_arrays(self):
|
101
|
-
t1 = sa.Table('t', self.metadata,
|
102
|
-
sa.Column('unsupported_array',
|
103
|
-
sa.ARRAY(sa.Integer, dimensions=2)),
|
104
|
-
)
|
105
|
-
err = None
|
106
|
-
try:
|
107
|
-
t1.create(self.engine)
|
108
|
-
except NotImplementedError as e:
|
109
|
-
err = e
|
110
|
-
self.assertEqual(str(err),
|
111
|
-
"CrateDB doesn't support multidimensional arrays")
|
@@ -1,256 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8; -*-
|
2
|
-
#
|
3
|
-
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
-
# license agreements. See the NOTICE file distributed with this work for
|
5
|
-
# additional information regarding copyright ownership. Crate licenses
|
6
|
-
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
-
# you may not use this file except in compliance with the License. You may
|
8
|
-
# obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
# Unless required by applicable law or agreed to in writing, software
|
13
|
-
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
-
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
-
# License for the specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
#
|
18
|
-
# However, if you have executed another commercial license agreement
|
19
|
-
# with Crate these terms will supersede the license and you may use the
|
20
|
-
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
-
import math
|
22
|
-
import sys
|
23
|
-
from unittest import TestCase, skipIf
|
24
|
-
from unittest.mock import patch, MagicMock
|
25
|
-
|
26
|
-
import sqlalchemy as sa
|
27
|
-
from sqlalchemy.orm import Session
|
28
|
-
|
29
|
-
from crate.client.sqlalchemy.sa_version import SA_VERSION, SA_2_0, SA_1_4
|
30
|
-
|
31
|
-
try:
|
32
|
-
from sqlalchemy.orm import declarative_base
|
33
|
-
except ImportError:
|
34
|
-
from sqlalchemy.ext.declarative import declarative_base
|
35
|
-
|
36
|
-
from crate.client.cursor import Cursor
|
37
|
-
|
38
|
-
|
39
|
-
fake_cursor = MagicMock(name='fake_cursor')
|
40
|
-
FakeCursor = MagicMock(name='FakeCursor', spec=Cursor, return_value=fake_cursor)
|
41
|
-
|
42
|
-
|
43
|
-
class SqlAlchemyBulkTest(TestCase):
|
44
|
-
|
45
|
-
def setUp(self):
|
46
|
-
self.engine = sa.create_engine('crate://')
|
47
|
-
Base = declarative_base()
|
48
|
-
|
49
|
-
class Character(Base):
|
50
|
-
__tablename__ = 'characters'
|
51
|
-
|
52
|
-
name = sa.Column(sa.String, primary_key=True)
|
53
|
-
age = sa.Column(sa.Integer)
|
54
|
-
|
55
|
-
self.character = Character
|
56
|
-
self.session = Session(bind=self.engine)
|
57
|
-
|
58
|
-
@skipIf(SA_VERSION >= SA_2_0, "SQLAlchemy 2.x uses modern bulk INSERT mode")
|
59
|
-
@patch('crate.client.connection.Cursor', FakeCursor)
|
60
|
-
def test_bulk_save_legacy(self):
|
61
|
-
"""
|
62
|
-
Verify legacy SQLAlchemy bulk INSERT mode.
|
63
|
-
|
64
|
-
> bulk_save_objects: Perform a bulk save of the given list of objects.
|
65
|
-
> This method is a legacy feature as of the 2.0 series of SQLAlchemy. For modern
|
66
|
-
> bulk INSERT and UPDATE, see the sections ORM Bulk INSERT Statements and ORM Bulk
|
67
|
-
> UPDATE by Primary Key.
|
68
|
-
>
|
69
|
-
> -- https://docs.sqlalchemy.org/orm/session_api.html#sqlalchemy.orm.Session.bulk_save_objects
|
70
|
-
|
71
|
-
> The Session includes legacy methods for performing "bulk" INSERT and UPDATE
|
72
|
-
> statements. These methods share implementations with the SQLAlchemy 2.0
|
73
|
-
> versions of these features, described at ORM Bulk INSERT Statements and
|
74
|
-
> ORM Bulk UPDATE by Primary Key, however lack many features, namely RETURNING
|
75
|
-
> support as well as support for session-synchronization.
|
76
|
-
>
|
77
|
-
> -- https://docs.sqlalchemy.org/orm/queryguide/dml.html#legacy-session-bulk-insert-methods
|
78
|
-
|
79
|
-
> The 1.4 version of the "ORM bulk insert" methods are really not very efficient and
|
80
|
-
> don't grant that much of a performance bump vs. regular ORM `session.add()`, provided
|
81
|
-
> in both cases the objects you provide already have their primary key values assigned.
|
82
|
-
> SQLAlchemy 2.0 made a much more comprehensive change to how this all works as well so
|
83
|
-
> that all INSERT methods are essentially extremely fast now, relative to the 1.x series.
|
84
|
-
>
|
85
|
-
> -- https://github.com/sqlalchemy/sqlalchemy/discussions/6935#discussioncomment-4789701
|
86
|
-
"""
|
87
|
-
chars = [
|
88
|
-
self.character(name='Arthur', age=35),
|
89
|
-
self.character(name='Banshee', age=26),
|
90
|
-
self.character(name='Callisto', age=37),
|
91
|
-
]
|
92
|
-
|
93
|
-
fake_cursor.description = ()
|
94
|
-
fake_cursor.rowcount = len(chars)
|
95
|
-
fake_cursor.executemany.return_value = [
|
96
|
-
{'rowcount': 1},
|
97
|
-
{'rowcount': 1},
|
98
|
-
{'rowcount': 1},
|
99
|
-
]
|
100
|
-
self.session.bulk_save_objects(chars)
|
101
|
-
(stmt, bulk_args), _ = fake_cursor.executemany.call_args
|
102
|
-
|
103
|
-
expected_stmt = "INSERT INTO characters (name, age) VALUES (?, ?)"
|
104
|
-
self.assertEqual(expected_stmt, stmt)
|
105
|
-
|
106
|
-
expected_bulk_args = (
|
107
|
-
('Arthur', 35),
|
108
|
-
('Banshee', 26),
|
109
|
-
('Callisto', 37)
|
110
|
-
)
|
111
|
-
self.assertSequenceEqual(expected_bulk_args, bulk_args)
|
112
|
-
|
113
|
-
@skipIf(SA_VERSION < SA_2_0, "SQLAlchemy 1.x uses legacy bulk INSERT mode")
|
114
|
-
@patch('crate.client.connection.Cursor', FakeCursor)
|
115
|
-
def test_bulk_save_modern(self):
|
116
|
-
"""
|
117
|
-
Verify modern SQLAlchemy bulk INSERT mode.
|
118
|
-
|
119
|
-
> A list of parameter dictionaries sent to the `Session.execute.params` parameter,
|
120
|
-
> separate from the Insert object itself, will invoke *bulk INSERT mode* for the
|
121
|
-
> statement, which essentially means the operation will optimize as much as
|
122
|
-
> possible for many rows.
|
123
|
-
>
|
124
|
-
> -- https://docs.sqlalchemy.org/orm/queryguide/dml.html#orm-queryguide-bulk-insert
|
125
|
-
|
126
|
-
> We have been looking into getting performance optimizations
|
127
|
-
> from `bulk_save()` to be inherently part of `add_all()`.
|
128
|
-
>
|
129
|
-
> -- https://github.com/sqlalchemy/sqlalchemy/discussions/6935#discussioncomment-1233465
|
130
|
-
|
131
|
-
> The remaining performance limitation, that the `cursor.executemany()` DBAPI method
|
132
|
-
> does not allow for rows to be fetched, is resolved for most backends by *foregoing*
|
133
|
-
> the use of `executemany()` and instead restructuring individual INSERT statements
|
134
|
-
> to each accommodate a large number of rows in a single statement that is invoked
|
135
|
-
> using `cursor.execute()`. This approach originates from the `psycopg2` fast execution
|
136
|
-
> helpers feature of the `psycopg2` DBAPI, which SQLAlchemy incrementally added more
|
137
|
-
> and more support towards in recent release series.
|
138
|
-
>
|
139
|
-
> -- https://docs.sqlalchemy.org/core/connections.html#engine-insertmanyvalues
|
140
|
-
"""
|
141
|
-
|
142
|
-
# Don't truncate unittest's diff output on `assertListEqual`.
|
143
|
-
self.maxDiff = None
|
144
|
-
|
145
|
-
chars = [
|
146
|
-
self.character(name='Arthur', age=35),
|
147
|
-
self.character(name='Banshee', age=26),
|
148
|
-
self.character(name='Callisto', age=37),
|
149
|
-
]
|
150
|
-
|
151
|
-
fake_cursor.description = ()
|
152
|
-
fake_cursor.rowcount = len(chars)
|
153
|
-
fake_cursor.execute.return_value = [
|
154
|
-
{'rowcount': 1},
|
155
|
-
{'rowcount': 1},
|
156
|
-
{'rowcount': 1},
|
157
|
-
]
|
158
|
-
self.session.add_all(chars)
|
159
|
-
self.session.commit()
|
160
|
-
(stmt, bulk_args), _ = fake_cursor.execute.call_args
|
161
|
-
|
162
|
-
expected_stmt = "INSERT INTO characters (name, age) VALUES (?, ?), (?, ?), (?, ?)"
|
163
|
-
self.assertEqual(expected_stmt, stmt)
|
164
|
-
|
165
|
-
expected_bulk_args = (
|
166
|
-
'Arthur', 35,
|
167
|
-
'Banshee', 26,
|
168
|
-
'Callisto', 37,
|
169
|
-
)
|
170
|
-
self.assertSequenceEqual(expected_bulk_args, bulk_args)
|
171
|
-
|
172
|
-
@skipIf(sys.version_info < (3, 8), "SQLAlchemy/pandas is not supported on Python <3.8")
|
173
|
-
@skipIf(SA_VERSION < SA_1_4, "SQLAlchemy 1.3 is not supported by pandas")
|
174
|
-
@patch('crate.client.connection.Cursor', mock_cursor=FakeCursor)
|
175
|
-
def test_bulk_save_pandas(self, mock_cursor):
|
176
|
-
"""
|
177
|
-
Verify bulk INSERT with pandas.
|
178
|
-
"""
|
179
|
-
from pandas._testing import makeTimeDataFrame
|
180
|
-
from crate.client.sqlalchemy.support import insert_bulk
|
181
|
-
|
182
|
-
# 42 records / 8 chunksize = 5.25, which means 6 batches will be emitted.
|
183
|
-
INSERT_RECORDS = 42
|
184
|
-
CHUNK_SIZE = 8
|
185
|
-
OPCOUNT = math.ceil(INSERT_RECORDS / CHUNK_SIZE)
|
186
|
-
|
187
|
-
# Create a DataFrame to feed into the database.
|
188
|
-
df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S")
|
189
|
-
|
190
|
-
dburi = "crate://localhost:4200"
|
191
|
-
engine = sa.create_engine(dburi, echo=True)
|
192
|
-
retval = df.to_sql(
|
193
|
-
name="test-testdrive",
|
194
|
-
con=engine,
|
195
|
-
if_exists="replace",
|
196
|
-
index=False,
|
197
|
-
chunksize=CHUNK_SIZE,
|
198
|
-
method=insert_bulk,
|
199
|
-
)
|
200
|
-
self.assertIsNone(retval)
|
201
|
-
|
202
|
-
# Initializing the query has an overhead of two calls to the cursor object, probably one
|
203
|
-
# initial connection from the DB-API driver, to inquire the database version, and another
|
204
|
-
# one, for SQLAlchemy. SQLAlchemy will use it to inquire the table schema using `information_schema`,
|
205
|
-
# and to eventually issue the `CREATE TABLE ...` statement.
|
206
|
-
effective_op_count = mock_cursor.call_count - 2
|
207
|
-
|
208
|
-
# Verify number of batches.
|
209
|
-
self.assertEqual(effective_op_count, OPCOUNT)
|
210
|
-
|
211
|
-
@skipIf(sys.version_info < (3, 8), "SQLAlchemy/Dask is not supported on Python <3.8")
|
212
|
-
@skipIf(SA_VERSION < SA_1_4, "SQLAlchemy 1.3 is not supported by pandas")
|
213
|
-
@patch('crate.client.connection.Cursor', mock_cursor=FakeCursor)
|
214
|
-
def test_bulk_save_dask(self, mock_cursor):
|
215
|
-
"""
|
216
|
-
Verify bulk INSERT with Dask.
|
217
|
-
"""
|
218
|
-
import dask.dataframe as dd
|
219
|
-
from pandas._testing import makeTimeDataFrame
|
220
|
-
from crate.client.sqlalchemy.support import insert_bulk
|
221
|
-
|
222
|
-
# 42 records / 4 partitions means each partition has a size of 10.5 elements.
|
223
|
-
# Because the chunk size 8 is slightly smaller than 10, the partition will not
|
224
|
-
# fit into it, so two batches will be emitted to the database for each data
|
225
|
-
# partition. 4 partitions * 2 batches = 8 insert operations will be emitted.
|
226
|
-
# Those settings are a perfect example of non-optimal settings, and have been
|
227
|
-
# made so on purpose, in order to demonstrate that using optimal settings
|
228
|
-
# is crucial.
|
229
|
-
INSERT_RECORDS = 42
|
230
|
-
NPARTITIONS = 4
|
231
|
-
CHUNK_SIZE = 8
|
232
|
-
OPCOUNT = math.ceil(INSERT_RECORDS / NPARTITIONS / CHUNK_SIZE) * NPARTITIONS
|
233
|
-
|
234
|
-
# Create a DataFrame to feed into the database.
|
235
|
-
df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S")
|
236
|
-
ddf = dd.from_pandas(df, npartitions=NPARTITIONS)
|
237
|
-
|
238
|
-
dburi = "crate://localhost:4200"
|
239
|
-
retval = ddf.to_sql(
|
240
|
-
name="test-testdrive",
|
241
|
-
uri=dburi,
|
242
|
-
if_exists="replace",
|
243
|
-
index=False,
|
244
|
-
chunksize=CHUNK_SIZE,
|
245
|
-
method=insert_bulk,
|
246
|
-
parallel=True,
|
247
|
-
)
|
248
|
-
self.assertIsNone(retval)
|
249
|
-
|
250
|
-
# Each of the insert operation incurs another call to the cursor object. This is probably
|
251
|
-
# the initial connection from the DB-API driver, to inquire the database version.
|
252
|
-
# This compensation formula has been determined empirically / by educated guessing.
|
253
|
-
effective_op_count = (mock_cursor.call_count - 2 * NPARTITIONS) - 2
|
254
|
-
|
255
|
-
# Verify number of batches.
|
256
|
-
self.assertEqual(effective_op_count, OPCOUNT)
|