crate 0.35.2__py2.py3-none-any.whl → 1.0.0.dev0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. crate/client/__init__.py +1 -1
  2. crate/testing/test_datetime_old.py +90 -0
  3. crate-1.0.0.dev0-py3.11-nspkg.pth +1 -0
  4. {crate-0.35.2.dist-info → crate-1.0.0.dev0.dist-info}/METADATA +15 -19
  5. crate-1.0.0.dev0.dist-info/RECORD +26 -0
  6. {crate-0.35.2.dist-info → crate-1.0.0.dev0.dist-info}/WHEEL +1 -1
  7. crate/client/sqlalchemy/__init__.py +0 -50
  8. crate/client/sqlalchemy/compat/__init__.py +0 -0
  9. crate/client/sqlalchemy/compat/api13.py +0 -156
  10. crate/client/sqlalchemy/compat/core10.py +0 -264
  11. crate/client/sqlalchemy/compat/core14.py +0 -359
  12. crate/client/sqlalchemy/compat/core20.py +0 -447
  13. crate/client/sqlalchemy/compiler.py +0 -318
  14. crate/client/sqlalchemy/dialect.py +0 -369
  15. crate/client/sqlalchemy/predicates/__init__.py +0 -99
  16. crate/client/sqlalchemy/sa_version.py +0 -28
  17. crate/client/sqlalchemy/support.py +0 -62
  18. crate/client/sqlalchemy/tests/__init__.py +0 -59
  19. crate/client/sqlalchemy/tests/array_test.py +0 -111
  20. crate/client/sqlalchemy/tests/bulk_test.py +0 -256
  21. crate/client/sqlalchemy/tests/compiler_test.py +0 -434
  22. crate/client/sqlalchemy/tests/connection_test.py +0 -129
  23. crate/client/sqlalchemy/tests/create_table_test.py +0 -313
  24. crate/client/sqlalchemy/tests/datetime_test.py +0 -90
  25. crate/client/sqlalchemy/tests/dialect_test.py +0 -156
  26. crate/client/sqlalchemy/tests/dict_test.py +0 -460
  27. crate/client/sqlalchemy/tests/function_test.py +0 -47
  28. crate/client/sqlalchemy/tests/insert_from_select_test.py +0 -85
  29. crate/client/sqlalchemy/tests/match_test.py +0 -137
  30. crate/client/sqlalchemy/tests/query_caching.py +0 -143
  31. crate/client/sqlalchemy/tests/update_test.py +0 -115
  32. crate/client/sqlalchemy/tests/warnings_test.py +0 -64
  33. crate/client/sqlalchemy/types.py +0 -277
  34. crate/client/tests.py +0 -416
  35. crate/testing/tests.py +0 -34
  36. crate-0.35.2-py3.11-nspkg.pth +0 -1
  37. crate-0.35.2.dist-info/RECORD +0 -55
  38. crate-0.35.2.dist-info/entry_points.txt +0 -2
  39. {crate-0.35.2.dist-info → crate-1.0.0.dev0.dist-info}/LICENSE +0 -0
  40. {crate-0.35.2.dist-info → crate-1.0.0.dev0.dist-info}/NOTICE +0 -0
  41. {crate-0.35.2.dist-info → crate-1.0.0.dev0.dist-info}/namespace_packages.txt +0 -0
  42. {crate-0.35.2.dist-info → crate-1.0.0.dev0.dist-info}/top_level.txt +0 -0
@@ -1,99 +0,0 @@
1
- # -*- coding: utf-8; -*-
2
- #
3
- # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
- # license agreements. See the NOTICE file distributed with this work for
5
- # additional information regarding copyright ownership. Crate licenses
6
- # this file to you under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License. You may
8
- # obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
- # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
- # License for the specific language governing permissions and limitations
16
- # under the License.
17
- #
18
- # However, if you have executed another commercial license agreement
19
- # with Crate these terms will supersede the license and you may use the
20
- # software solely pursuant to the terms of the relevant commercial agreement.
21
-
22
- from sqlalchemy.sql.expression import ColumnElement, literal
23
- from sqlalchemy.ext.compiler import compiles
24
-
25
-
26
- class Match(ColumnElement):
27
- inherit_cache = True
28
-
29
- def __init__(self, column, term, match_type=None, options=None):
30
- super(Match, self).__init__()
31
- self.column = column
32
- self.term = term
33
- self.match_type = match_type
34
- self.options = options
35
-
36
- def compile_column(self, compiler):
37
- if isinstance(self.column, dict):
38
- column = ', '.join(
39
- sorted(["{0} {1}".format(compiler.process(k), v)
40
- for k, v in self.column.items()])
41
- )
42
- return "({0})".format(column)
43
- else:
44
- return "{0}".format(compiler.process(self.column))
45
-
46
- def compile_term(self, compiler):
47
- return compiler.process(literal(self.term))
48
-
49
- def compile_using(self, compiler):
50
- if self.match_type:
51
- using = "using {0}".format(self.match_type)
52
- with_clause = self.with_clause()
53
- if with_clause:
54
- using = ' '.join([using, with_clause])
55
- return using
56
- if self.options:
57
- raise ValueError("missing match_type. " +
58
- "It's not allowed to specify options " +
59
- "without match_type")
60
-
61
- def with_clause(self):
62
- if self.options:
63
- options = ', '.join(
64
- sorted(["{0}={1}".format(k, v)
65
- for k, v in self.options.items()])
66
- )
67
-
68
- return "with ({0})".format(options)
69
-
70
-
71
- def match(column, term, match_type=None, options=None):
72
- """Generates match predicate for fulltext search
73
-
74
- :param column: A reference to a column or an index, or a subcolumn, or a
75
- dictionary of subcolumns with boost values.
76
-
77
- :param term: The term to match against. This string is analyzed and the
78
- resulting tokens are compared to the index.
79
-
80
- :param match_type (optional): The match type. Determine how the term is
81
- applied and the score calculated.
82
-
83
- :param options (optional): The match options. Specify match type behaviour.
84
- (Not possible without a specified match type.) Match options must be
85
- supplied as a dictionary.
86
- """
87
- return Match(column, term, match_type, options)
88
-
89
-
90
- @compiles(Match)
91
- def compile_match(match, compiler, **kwargs):
92
- func = "match(%s, %s)" % (
93
- match.compile_column(compiler),
94
- match.compile_term(compiler)
95
- )
96
- using = match.compile_using(compiler)
97
- if using:
98
- func = ' '.join([func, using])
99
- return func
@@ -1,28 +0,0 @@
1
- # -*- coding: utf-8; -*-
2
- #
3
- # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
- # license agreements. See the NOTICE file distributed with this work for
5
- # additional information regarding copyright ownership. Crate licenses
6
- # this file to you under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License. You may
8
- # obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
- # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
- # License for the specific language governing permissions and limitations
16
- # under the License.
17
- #
18
- # However, if you have executed another commercial license agreement
19
- # with Crate these terms will supersede the license and you may use the
20
- # software solely pursuant to the terms of the relevant commercial agreement.
21
-
22
- import sqlalchemy as sa
23
- from verlib2 import Version
24
-
25
- SA_VERSION = Version(sa.__version__)
26
-
27
- SA_1_4 = Version('1.4.0b1')
28
- SA_2_0 = Version('2.0.0')
@@ -1,62 +0,0 @@
1
- # -*- coding: utf-8; -*-
2
- #
3
- # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
- # license agreements. See the NOTICE file distributed with this work for
5
- # additional information regarding copyright ownership. Crate licenses
6
- # this file to you under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License. You may
8
- # obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
- # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
- # License for the specific language governing permissions and limitations
16
- # under the License.
17
- #
18
- # However, if you have executed another commercial license agreement
19
- # with Crate these terms will supersede the license and you may use the
20
- # software solely pursuant to the terms of the relevant commercial agreement.
21
- import logging
22
-
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
-
27
- def insert_bulk(pd_table, conn, keys, data_iter):
28
- """
29
- Use CrateDB's "bulk operations" endpoint as a fast path for pandas' and Dask's `to_sql()` [1] method.
30
-
31
- The idea is to break out of SQLAlchemy, compile the insert statement, and use the raw
32
- DBAPI connection client, in order to invoke a request using `bulk_parameters` [2]::
33
-
34
- cursor.execute(sql=sql, bulk_parameters=data)
35
-
36
- The vanilla implementation, used by SQLAlchemy, is::
37
-
38
- data = [dict(zip(keys, row)) for row in data_iter]
39
- conn.execute(pd_table.table.insert(), data)
40
-
41
- Batch chunking will happen outside of this function, for example [3] demonstrates
42
- the relevant code in `pandas.io.sql`.
43
-
44
- [1] https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html
45
- [2] https://crate.io/docs/crate/reference/en/latest/interfaces/http.html#bulk-operations
46
- [3] https://github.com/pandas-dev/pandas/blob/v2.0.1/pandas/io/sql.py#L1011-L1027
47
- """
48
-
49
- # Compile SQL statement and materialize batch.
50
- sql = str(pd_table.table.insert().compile(bind=conn))
51
- data = list(data_iter)
52
-
53
- # For debugging and tracing the batches running through this method.
54
- if logger.level == logging.DEBUG:
55
- logger.debug(f"Bulk SQL: {sql}")
56
- logger.debug(f"Bulk records: {len(data)}")
57
- # logger.debug(f"Bulk data: {data}")
58
-
59
- # Invoke bulk insert operation.
60
- cursor = conn._dbapi_connection.cursor()
61
- cursor.execute(sql=sql, bulk_parameters=data)
62
- cursor.close()
@@ -1,59 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- from ..compat.api13 import monkeypatch_amend_select_sa14, monkeypatch_add_connectionfairy_driver_connection
4
- from ..sa_version import SA_1_4, SA_VERSION
5
- from ...test_util import ParametrizedTestCase
6
-
7
- # `sql.select()` of SQLAlchemy 1.3 uses old calling semantics,
8
- # but the test cases already need the modern ones.
9
- if SA_VERSION < SA_1_4:
10
- monkeypatch_amend_select_sa14()
11
- monkeypatch_add_connectionfairy_driver_connection()
12
-
13
- from unittest import TestLoader, TestSuite
14
- from .connection_test import SqlAlchemyConnectionTest
15
- from .dict_test import SqlAlchemyDictTypeTest
16
- from .datetime_test import SqlAlchemyDateAndDateTimeTest
17
- from .compiler_test import SqlAlchemyCompilerTest, SqlAlchemyDDLCompilerTest
18
- from .update_test import SqlAlchemyUpdateTest
19
- from .match_test import SqlAlchemyMatchTest
20
- from .bulk_test import SqlAlchemyBulkTest
21
- from .insert_from_select_test import SqlAlchemyInsertFromSelectTest
22
- from .create_table_test import SqlAlchemyCreateTableTest
23
- from .array_test import SqlAlchemyArrayTypeTest
24
- from .dialect_test import SqlAlchemyDialectTest
25
- from .function_test import SqlAlchemyFunctionTest
26
- from .warnings_test import SqlAlchemyWarningsTest
27
- from .query_caching import SqlAlchemyQueryCompilationCaching
28
-
29
-
30
- makeSuite = TestLoader().loadTestsFromTestCase
31
-
32
-
33
- def test_suite_unit():
34
- tests = TestSuite()
35
- tests.addTest(makeSuite(SqlAlchemyConnectionTest))
36
- tests.addTest(makeSuite(SqlAlchemyDictTypeTest))
37
- tests.addTest(makeSuite(SqlAlchemyDateAndDateTimeTest))
38
- tests.addTest(makeSuite(SqlAlchemyCompilerTest))
39
- tests.addTest(makeSuite(SqlAlchemyDDLCompilerTest))
40
- tests.addTest(ParametrizedTestCase.parametrize(SqlAlchemyCompilerTest, param={"server_version_info": None}))
41
- tests.addTest(ParametrizedTestCase.parametrize(SqlAlchemyCompilerTest, param={"server_version_info": (4, 0, 12)}))
42
- tests.addTest(ParametrizedTestCase.parametrize(SqlAlchemyCompilerTest, param={"server_version_info": (4, 1, 10)}))
43
- tests.addTest(makeSuite(SqlAlchemyUpdateTest))
44
- tests.addTest(makeSuite(SqlAlchemyMatchTest))
45
- tests.addTest(makeSuite(SqlAlchemyCreateTableTest))
46
- tests.addTest(makeSuite(SqlAlchemyBulkTest))
47
- tests.addTest(makeSuite(SqlAlchemyInsertFromSelectTest))
48
- tests.addTest(makeSuite(SqlAlchemyInsertFromSelectTest))
49
- tests.addTest(makeSuite(SqlAlchemyDialectTest))
50
- tests.addTest(makeSuite(SqlAlchemyFunctionTest))
51
- tests.addTest(makeSuite(SqlAlchemyArrayTypeTest))
52
- tests.addTest(makeSuite(SqlAlchemyWarningsTest))
53
- return tests
54
-
55
-
56
- def test_suite_integration():
57
- tests = TestSuite()
58
- tests.addTest(makeSuite(SqlAlchemyQueryCompilationCaching))
59
- return tests
@@ -1,111 +0,0 @@
1
- # -*- coding: utf-8; -*-
2
- #
3
- # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
- # license agreements. See the NOTICE file distributed with this work for
5
- # additional information regarding copyright ownership. Crate licenses
6
- # this file to you under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License. You may
8
- # obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
- # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
- # License for the specific language governing permissions and limitations
16
- # under the License.
17
- #
18
- # However, if you have executed another commercial license agreement
19
- # with Crate these terms will supersede the license and you may use the
20
- # software solely pursuant to the terms of the relevant commercial agreement.
21
-
22
-
23
- from unittest import TestCase
24
- from unittest.mock import patch, MagicMock
25
-
26
- import sqlalchemy as sa
27
- from sqlalchemy.sql import operators
28
- from sqlalchemy.orm import Session
29
- try:
30
- from sqlalchemy.orm import declarative_base
31
- except ImportError:
32
- from sqlalchemy.ext.declarative import declarative_base
33
-
34
- from crate.client.cursor import Cursor
35
-
36
- fake_cursor = MagicMock(name='fake_cursor')
37
- FakeCursor = MagicMock(name='FakeCursor', spec=Cursor)
38
- FakeCursor.return_value = fake_cursor
39
-
40
-
41
- @patch('crate.client.connection.Cursor', FakeCursor)
42
- class SqlAlchemyArrayTypeTest(TestCase):
43
-
44
- def setUp(self):
45
- self.engine = sa.create_engine('crate://')
46
- Base = declarative_base()
47
- self.metadata = sa.MetaData()
48
-
49
- class User(Base):
50
- __tablename__ = 'users'
51
-
52
- name = sa.Column(sa.String, primary_key=True)
53
- friends = sa.Column(sa.ARRAY(sa.String))
54
- scores = sa.Column(sa.ARRAY(sa.Integer))
55
-
56
- self.User = User
57
- self.session = Session(bind=self.engine)
58
-
59
- def assertSQL(self, expected_str, actual_expr):
60
- self.assertEqual(expected_str, str(actual_expr).replace('\n', ''))
61
-
62
- def test_create_with_array(self):
63
- t1 = sa.Table('t', self.metadata,
64
- sa.Column('int_array', sa.ARRAY(sa.Integer)),
65
- sa.Column('str_array', sa.ARRAY(sa.String))
66
- )
67
- t1.create(self.engine)
68
- fake_cursor.execute.assert_called_with(
69
- ('\nCREATE TABLE t (\n\t'
70
- 'int_array ARRAY(INT), \n\t'
71
- 'str_array ARRAY(STRING)\n)\n\n'),
72
- ())
73
-
74
- def test_array_insert(self):
75
- trillian = self.User(name='Trillian', friends=['Arthur', 'Ford'])
76
- self.session.add(trillian)
77
- self.session.commit()
78
- fake_cursor.execute.assert_called_with(
79
- ("INSERT INTO users (name, friends, scores) VALUES (?, ?, ?)"),
80
- ('Trillian', ['Arthur', 'Ford'], None))
81
-
82
- def test_any(self):
83
- s = self.session.query(self.User.name) \
84
- .filter(self.User.friends.any("arthur"))
85
- self.assertSQL(
86
- "SELECT users.name AS users_name FROM users "
87
- "WHERE ? = ANY (users.friends)",
88
- s
89
- )
90
-
91
- def test_any_with_operator(self):
92
- s = self.session.query(self.User.name) \
93
- .filter(self.User.scores.any(6, operator=operators.lt))
94
- self.assertSQL(
95
- "SELECT users.name AS users_name FROM users "
96
- "WHERE ? < ANY (users.scores)",
97
- s
98
- )
99
-
100
- def test_multidimensional_arrays(self):
101
- t1 = sa.Table('t', self.metadata,
102
- sa.Column('unsupported_array',
103
- sa.ARRAY(sa.Integer, dimensions=2)),
104
- )
105
- err = None
106
- try:
107
- t1.create(self.engine)
108
- except NotImplementedError as e:
109
- err = e
110
- self.assertEqual(str(err),
111
- "CrateDB doesn't support multidimensional arrays")
@@ -1,256 +0,0 @@
1
- # -*- coding: utf-8; -*-
2
- #
3
- # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
- # license agreements. See the NOTICE file distributed with this work for
5
- # additional information regarding copyright ownership. Crate licenses
6
- # this file to you under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License. You may
8
- # obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
- # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
- # License for the specific language governing permissions and limitations
16
- # under the License.
17
- #
18
- # However, if you have executed another commercial license agreement
19
- # with Crate these terms will supersede the license and you may use the
20
- # software solely pursuant to the terms of the relevant commercial agreement.
21
- import math
22
- import sys
23
- from unittest import TestCase, skipIf
24
- from unittest.mock import patch, MagicMock
25
-
26
- import sqlalchemy as sa
27
- from sqlalchemy.orm import Session
28
-
29
- from crate.client.sqlalchemy.sa_version import SA_VERSION, SA_2_0, SA_1_4
30
-
31
- try:
32
- from sqlalchemy.orm import declarative_base
33
- except ImportError:
34
- from sqlalchemy.ext.declarative import declarative_base
35
-
36
- from crate.client.cursor import Cursor
37
-
38
-
39
- fake_cursor = MagicMock(name='fake_cursor')
40
- FakeCursor = MagicMock(name='FakeCursor', spec=Cursor, return_value=fake_cursor)
41
-
42
-
43
- class SqlAlchemyBulkTest(TestCase):
44
-
45
- def setUp(self):
46
- self.engine = sa.create_engine('crate://')
47
- Base = declarative_base()
48
-
49
- class Character(Base):
50
- __tablename__ = 'characters'
51
-
52
- name = sa.Column(sa.String, primary_key=True)
53
- age = sa.Column(sa.Integer)
54
-
55
- self.character = Character
56
- self.session = Session(bind=self.engine)
57
-
58
- @skipIf(SA_VERSION >= SA_2_0, "SQLAlchemy 2.x uses modern bulk INSERT mode")
59
- @patch('crate.client.connection.Cursor', FakeCursor)
60
- def test_bulk_save_legacy(self):
61
- """
62
- Verify legacy SQLAlchemy bulk INSERT mode.
63
-
64
- > bulk_save_objects: Perform a bulk save of the given list of objects.
65
- > This method is a legacy feature as of the 2.0 series of SQLAlchemy. For modern
66
- > bulk INSERT and UPDATE, see the sections ORM Bulk INSERT Statements and ORM Bulk
67
- > UPDATE by Primary Key.
68
- >
69
- > -- https://docs.sqlalchemy.org/orm/session_api.html#sqlalchemy.orm.Session.bulk_save_objects
70
-
71
- > The Session includes legacy methods for performing "bulk" INSERT and UPDATE
72
- > statements. These methods share implementations with the SQLAlchemy 2.0
73
- > versions of these features, described at ORM Bulk INSERT Statements and
74
- > ORM Bulk UPDATE by Primary Key, however lack many features, namely RETURNING
75
- > support as well as support for session-synchronization.
76
- >
77
- > -- https://docs.sqlalchemy.org/orm/queryguide/dml.html#legacy-session-bulk-insert-methods
78
-
79
- > The 1.4 version of the "ORM bulk insert" methods are really not very efficient and
80
- > don't grant that much of a performance bump vs. regular ORM `session.add()`, provided
81
- > in both cases the objects you provide already have their primary key values assigned.
82
- > SQLAlchemy 2.0 made a much more comprehensive change to how this all works as well so
83
- > that all INSERT methods are essentially extremely fast now, relative to the 1.x series.
84
- >
85
- > -- https://github.com/sqlalchemy/sqlalchemy/discussions/6935#discussioncomment-4789701
86
- """
87
- chars = [
88
- self.character(name='Arthur', age=35),
89
- self.character(name='Banshee', age=26),
90
- self.character(name='Callisto', age=37),
91
- ]
92
-
93
- fake_cursor.description = ()
94
- fake_cursor.rowcount = len(chars)
95
- fake_cursor.executemany.return_value = [
96
- {'rowcount': 1},
97
- {'rowcount': 1},
98
- {'rowcount': 1},
99
- ]
100
- self.session.bulk_save_objects(chars)
101
- (stmt, bulk_args), _ = fake_cursor.executemany.call_args
102
-
103
- expected_stmt = "INSERT INTO characters (name, age) VALUES (?, ?)"
104
- self.assertEqual(expected_stmt, stmt)
105
-
106
- expected_bulk_args = (
107
- ('Arthur', 35),
108
- ('Banshee', 26),
109
- ('Callisto', 37)
110
- )
111
- self.assertSequenceEqual(expected_bulk_args, bulk_args)
112
-
113
- @skipIf(SA_VERSION < SA_2_0, "SQLAlchemy 1.x uses legacy bulk INSERT mode")
114
- @patch('crate.client.connection.Cursor', FakeCursor)
115
- def test_bulk_save_modern(self):
116
- """
117
- Verify modern SQLAlchemy bulk INSERT mode.
118
-
119
- > A list of parameter dictionaries sent to the `Session.execute.params` parameter,
120
- > separate from the Insert object itself, will invoke *bulk INSERT mode* for the
121
- > statement, which essentially means the operation will optimize as much as
122
- > possible for many rows.
123
- >
124
- > -- https://docs.sqlalchemy.org/orm/queryguide/dml.html#orm-queryguide-bulk-insert
125
-
126
- > We have been looking into getting performance optimizations
127
- > from `bulk_save()` to be inherently part of `add_all()`.
128
- >
129
- > -- https://github.com/sqlalchemy/sqlalchemy/discussions/6935#discussioncomment-1233465
130
-
131
- > The remaining performance limitation, that the `cursor.executemany()` DBAPI method
132
- > does not allow for rows to be fetched, is resolved for most backends by *foregoing*
133
- > the use of `executemany()` and instead restructuring individual INSERT statements
134
- > to each accommodate a large number of rows in a single statement that is invoked
135
- > using `cursor.execute()`. This approach originates from the `psycopg2` fast execution
136
- > helpers feature of the `psycopg2` DBAPI, which SQLAlchemy incrementally added more
137
- > and more support towards in recent release series.
138
- >
139
- > -- https://docs.sqlalchemy.org/core/connections.html#engine-insertmanyvalues
140
- """
141
-
142
- # Don't truncate unittest's diff output on `assertListEqual`.
143
- self.maxDiff = None
144
-
145
- chars = [
146
- self.character(name='Arthur', age=35),
147
- self.character(name='Banshee', age=26),
148
- self.character(name='Callisto', age=37),
149
- ]
150
-
151
- fake_cursor.description = ()
152
- fake_cursor.rowcount = len(chars)
153
- fake_cursor.execute.return_value = [
154
- {'rowcount': 1},
155
- {'rowcount': 1},
156
- {'rowcount': 1},
157
- ]
158
- self.session.add_all(chars)
159
- self.session.commit()
160
- (stmt, bulk_args), _ = fake_cursor.execute.call_args
161
-
162
- expected_stmt = "INSERT INTO characters (name, age) VALUES (?, ?), (?, ?), (?, ?)"
163
- self.assertEqual(expected_stmt, stmt)
164
-
165
- expected_bulk_args = (
166
- 'Arthur', 35,
167
- 'Banshee', 26,
168
- 'Callisto', 37,
169
- )
170
- self.assertSequenceEqual(expected_bulk_args, bulk_args)
171
-
172
- @skipIf(sys.version_info < (3, 8), "SQLAlchemy/pandas is not supported on Python <3.8")
173
- @skipIf(SA_VERSION < SA_1_4, "SQLAlchemy 1.3 is not supported by pandas")
174
- @patch('crate.client.connection.Cursor', mock_cursor=FakeCursor)
175
- def test_bulk_save_pandas(self, mock_cursor):
176
- """
177
- Verify bulk INSERT with pandas.
178
- """
179
- from pandas._testing import makeTimeDataFrame
180
- from crate.client.sqlalchemy.support import insert_bulk
181
-
182
- # 42 records / 8 chunksize = 5.25, which means 6 batches will be emitted.
183
- INSERT_RECORDS = 42
184
- CHUNK_SIZE = 8
185
- OPCOUNT = math.ceil(INSERT_RECORDS / CHUNK_SIZE)
186
-
187
- # Create a DataFrame to feed into the database.
188
- df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S")
189
-
190
- dburi = "crate://localhost:4200"
191
- engine = sa.create_engine(dburi, echo=True)
192
- retval = df.to_sql(
193
- name="test-testdrive",
194
- con=engine,
195
- if_exists="replace",
196
- index=False,
197
- chunksize=CHUNK_SIZE,
198
- method=insert_bulk,
199
- )
200
- self.assertIsNone(retval)
201
-
202
- # Initializing the query has an overhead of two calls to the cursor object, probably one
203
- # initial connection from the DB-API driver, to inquire the database version, and another
204
- # one, for SQLAlchemy. SQLAlchemy will use it to inquire the table schema using `information_schema`,
205
- # and to eventually issue the `CREATE TABLE ...` statement.
206
- effective_op_count = mock_cursor.call_count - 2
207
-
208
- # Verify number of batches.
209
- self.assertEqual(effective_op_count, OPCOUNT)
210
-
211
- @skipIf(sys.version_info < (3, 8), "SQLAlchemy/Dask is not supported on Python <3.8")
212
- @skipIf(SA_VERSION < SA_1_4, "SQLAlchemy 1.3 is not supported by pandas")
213
- @patch('crate.client.connection.Cursor', mock_cursor=FakeCursor)
214
- def test_bulk_save_dask(self, mock_cursor):
215
- """
216
- Verify bulk INSERT with Dask.
217
- """
218
- import dask.dataframe as dd
219
- from pandas._testing import makeTimeDataFrame
220
- from crate.client.sqlalchemy.support import insert_bulk
221
-
222
- # 42 records / 4 partitions means each partition has a size of 10.5 elements.
223
- # Because the chunk size 8 is slightly smaller than 10, the partition will not
224
- # fit into it, so two batches will be emitted to the database for each data
225
- # partition. 4 partitions * 2 batches = 8 insert operations will be emitted.
226
- # Those settings are a perfect example of non-optimal settings, and have been
227
- # made so on purpose, in order to demonstrate that using optimal settings
228
- # is crucial.
229
- INSERT_RECORDS = 42
230
- NPARTITIONS = 4
231
- CHUNK_SIZE = 8
232
- OPCOUNT = math.ceil(INSERT_RECORDS / NPARTITIONS / CHUNK_SIZE) * NPARTITIONS
233
-
234
- # Create a DataFrame to feed into the database.
235
- df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S")
236
- ddf = dd.from_pandas(df, npartitions=NPARTITIONS)
237
-
238
- dburi = "crate://localhost:4200"
239
- retval = ddf.to_sql(
240
- name="test-testdrive",
241
- uri=dburi,
242
- if_exists="replace",
243
- index=False,
244
- chunksize=CHUNK_SIZE,
245
- method=insert_bulk,
246
- parallel=True,
247
- )
248
- self.assertIsNone(retval)
249
-
250
- # Each of the insert operation incurs another call to the cursor object. This is probably
251
- # the initial connection from the DB-API driver, to inquire the database version.
252
- # This compensation formula has been determined empirically / by educated guessing.
253
- effective_op_count = (mock_cursor.call_count - 2 * NPARTITIONS) - 2
254
-
255
- # Verify number of batches.
256
- self.assertEqual(effective_op_count, OPCOUNT)