cratedb-toolkit 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. cratedb_toolkit/__init__.py +11 -0
  2. cratedb_toolkit/adapter/__init__.py +0 -0
  3. cratedb_toolkit/adapter/pymongo/__init__.py +1 -0
  4. cratedb_toolkit/adapter/pymongo/api.py +77 -0
  5. cratedb_toolkit/adapter/pymongo/collection.py +156 -0
  6. cratedb_toolkit/adapter/pymongo/cursor.py +390 -0
  7. cratedb_toolkit/adapter/pymongo/reactor.py +74 -0
  8. cratedb_toolkit/adapter/pymongo/util.py +51 -0
  9. cratedb_toolkit/adapter/rockset/__init__.py +0 -0
  10. cratedb_toolkit/adapter/rockset/cli.py +55 -0
  11. cratedb_toolkit/adapter/rockset/server/__init__.py +0 -0
  12. cratedb_toolkit/adapter/rockset/server/api/__init__.py +0 -0
  13. cratedb_toolkit/adapter/rockset/server/api/collection.py +8 -0
  14. cratedb_toolkit/adapter/rockset/server/api/document.py +91 -0
  15. cratedb_toolkit/adapter/rockset/server/api/query.py +68 -0
  16. cratedb_toolkit/adapter/rockset/server/dependencies.py +13 -0
  17. cratedb_toolkit/adapter/rockset/server/main.py +28 -0
  18. cratedb_toolkit/api/__init__.py +0 -0
  19. cratedb_toolkit/api/cli.py +31 -0
  20. cratedb_toolkit/api/guide.py +38 -0
  21. cratedb_toolkit/api/main.py +183 -0
  22. cratedb_toolkit/cfr/__init__.py +0 -0
  23. cratedb_toolkit/cfr/cli.py +195 -0
  24. cratedb_toolkit/cfr/info.py +59 -0
  25. cratedb_toolkit/cfr/jobstats.py +272 -0
  26. cratedb_toolkit/cfr/marimo.py +308 -0
  27. cratedb_toolkit/cfr/systable.py +257 -0
  28. cratedb_toolkit/cli.py +36 -0
  29. cratedb_toolkit/cluster/__init__.py +0 -0
  30. cratedb_toolkit/cluster/cli.py +43 -0
  31. cratedb_toolkit/cluster/croud.py +37 -0
  32. cratedb_toolkit/cluster/util.py +13 -0
  33. cratedb_toolkit/cmd/__init__.py +0 -0
  34. cratedb_toolkit/cmd/tail/__init__.py +0 -0
  35. cratedb_toolkit/cmd/tail/cli.py +56 -0
  36. cratedb_toolkit/cmd/tail/main.py +136 -0
  37. cratedb_toolkit/datasets/__init__.py +1 -0
  38. cratedb_toolkit/datasets/core.py +9 -0
  39. cratedb_toolkit/datasets/kaggle.py +49 -0
  40. cratedb_toolkit/datasets/model.py +138 -0
  41. cratedb_toolkit/datasets/store.py +3 -0
  42. cratedb_toolkit/datasets/tutorial.py +117 -0
  43. cratedb_toolkit/datasets/util.py +27 -0
  44. cratedb_toolkit/docs/__init__.py +0 -0
  45. cratedb_toolkit/docs/cli.py +112 -0
  46. cratedb_toolkit/docs/functions.py +133 -0
  47. cratedb_toolkit/docs/model.py +26 -0
  48. cratedb_toolkit/docs/settings.py +728 -0
  49. cratedb_toolkit/docs/util.py +61 -0
  50. cratedb_toolkit/exception.py +10 -0
  51. cratedb_toolkit/iac/__init__.py +0 -0
  52. cratedb_toolkit/iac/aws.py +10 -0
  53. cratedb_toolkit/info/__init__.py +0 -0
  54. cratedb_toolkit/info/cli.py +98 -0
  55. cratedb_toolkit/info/core.py +93 -0
  56. cratedb_toolkit/info/http.py +41 -0
  57. cratedb_toolkit/info/library.py +596 -0
  58. cratedb_toolkit/info/model.py +86 -0
  59. cratedb_toolkit/info/util.py +21 -0
  60. cratedb_toolkit/io/__init__.py +0 -0
  61. cratedb_toolkit/io/cli.py +92 -0
  62. cratedb_toolkit/io/core.py +189 -0
  63. cratedb_toolkit/io/croud.py +196 -0
  64. cratedb_toolkit/io/dynamodb/__init__.py +0 -0
  65. cratedb_toolkit/io/dynamodb/adapter.py +73 -0
  66. cratedb_toolkit/io/dynamodb/api.py +40 -0
  67. cratedb_toolkit/io/dynamodb/copy.py +96 -0
  68. cratedb_toolkit/io/influxdb.py +27 -0
  69. cratedb_toolkit/io/kinesis/__init__.py +0 -0
  70. cratedb_toolkit/io/kinesis/adapter.py +129 -0
  71. cratedb_toolkit/io/kinesis/api.py +6 -0
  72. cratedb_toolkit/io/kinesis/relay.py +91 -0
  73. cratedb_toolkit/io/mongodb/__init__.py +0 -0
  74. cratedb_toolkit/io/mongodb/adapter.py +246 -0
  75. cratedb_toolkit/io/mongodb/api.py +209 -0
  76. cratedb_toolkit/io/mongodb/cdc.py +130 -0
  77. cratedb_toolkit/io/mongodb/cli.py +112 -0
  78. cratedb_toolkit/io/mongodb/copy.py +107 -0
  79. cratedb_toolkit/io/mongodb/core.py +129 -0
  80. cratedb_toolkit/io/mongodb/export.py +72 -0
  81. cratedb_toolkit/io/mongodb/extract.py +208 -0
  82. cratedb_toolkit/io/mongodb/model.py +4 -0
  83. cratedb_toolkit/io/mongodb/transform.py +67 -0
  84. cratedb_toolkit/io/mongodb/translate.py +191 -0
  85. cratedb_toolkit/io/mongodb/util.py +64 -0
  86. cratedb_toolkit/io/processor/__init__.py +0 -0
  87. cratedb_toolkit/io/processor/kinesis_lambda.py +159 -0
  88. cratedb_toolkit/io/sql.py +1 -0
  89. cratedb_toolkit/job/__init__.py +0 -0
  90. cratedb_toolkit/job/cli.py +30 -0
  91. cratedb_toolkit/job/croud.py +17 -0
  92. cratedb_toolkit/model.py +197 -0
  93. cratedb_toolkit/options.py +11 -0
  94. cratedb_toolkit/query/__init__.py +0 -0
  95. cratedb_toolkit/query/cli.py +26 -0
  96. cratedb_toolkit/query/convert/__init__.py +0 -0
  97. cratedb_toolkit/query/convert/basic.py +24 -0
  98. cratedb_toolkit/query/convert/cli.py +43 -0
  99. cratedb_toolkit/query/mcp/__init__.py +0 -0
  100. cratedb_toolkit/query/mcp/cli.py +92 -0
  101. cratedb_toolkit/query/mcp/inquiry.py +133 -0
  102. cratedb_toolkit/query/mcp/model.py +144 -0
  103. cratedb_toolkit/query/mcp/pg_mcp.py +8 -0
  104. cratedb_toolkit/query/mcp/registry.py +213 -0
  105. cratedb_toolkit/query/mcp/util.py +97 -0
  106. cratedb_toolkit/retention/__init__.py +0 -0
  107. cratedb_toolkit/retention/cli.py +320 -0
  108. cratedb_toolkit/retention/core.py +209 -0
  109. cratedb_toolkit/retention/model.py +192 -0
  110. cratedb_toolkit/retention/setup/__init__.py +0 -0
  111. cratedb_toolkit/retention/setup/schema.py +35 -0
  112. cratedb_toolkit/retention/setup/schema.sql +34 -0
  113. cratedb_toolkit/retention/store.py +252 -0
  114. cratedb_toolkit/retention/strategy/__init__.py +0 -0
  115. cratedb_toolkit/retention/strategy/delete.py +37 -0
  116. cratedb_toolkit/retention/strategy/reallocate.py +65 -0
  117. cratedb_toolkit/retention/strategy/snapshot.py +41 -0
  118. cratedb_toolkit/shell/__init__.py +0 -0
  119. cratedb_toolkit/shell/cli.py +78 -0
  120. cratedb_toolkit/testing/__init__.py +0 -0
  121. cratedb_toolkit/testing/pytest.py +27 -0
  122. cratedb_toolkit/testing/testcontainers/__init__.py +0 -0
  123. cratedb_toolkit/testing/testcontainers/azurite.py +74 -0
  124. cratedb_toolkit/testing/testcontainers/cratedb.py +216 -0
  125. cratedb_toolkit/testing/testcontainers/influxdb2.py +83 -0
  126. cratedb_toolkit/testing/testcontainers/localstack.py +41 -0
  127. cratedb_toolkit/testing/testcontainers/minio.py +46 -0
  128. cratedb_toolkit/testing/testcontainers/mongodb.py +141 -0
  129. cratedb_toolkit/testing/testcontainers/util.py +126 -0
  130. cratedb_toolkit/util/__init__.py +0 -0
  131. cratedb_toolkit/util/cli.py +133 -0
  132. cratedb_toolkit/util/common.py +44 -0
  133. cratedb_toolkit/util/config.py +55 -0
  134. cratedb_toolkit/util/cr8.py +8 -0
  135. cratedb_toolkit/util/crash.py +37 -0
  136. cratedb_toolkit/util/croud.py +170 -0
  137. cratedb_toolkit/util/data.py +54 -0
  138. cratedb_toolkit/util/data_dict.py +165 -0
  139. cratedb_toolkit/util/database.py +438 -0
  140. cratedb_toolkit/util/date.py +13 -0
  141. cratedb_toolkit/util/format.py +88 -0
  142. cratedb_toolkit/util/io.py +15 -0
  143. cratedb_toolkit/util/pandas.py +83 -0
  144. cratedb_toolkit/util/platform.py +56 -0
  145. cratedb_toolkit/util/process.py +23 -0
  146. cratedb_toolkit/util/service.py +24 -0
  147. cratedb_toolkit/util/sqlalchemy.py +16 -0
  148. cratedb_toolkit-0.0.0.dist-info/METADATA +275 -0
  149. cratedb_toolkit-0.0.0.dist-info/RECORD +153 -0
  150. cratedb_toolkit-0.0.0.dist-info/WHEEL +5 -0
  151. cratedb_toolkit-0.0.0.dist-info/entry_points.txt +9 -0
  152. cratedb_toolkit-0.0.0.dist-info/licenses/LICENSE +619 -0
  153. cratedb_toolkit-0.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,11 @@
1
+ try:
2
+ from importlib.metadata import PackageNotFoundError, version
3
+ except (ImportError, ModuleNotFoundError): # pragma:nocover
4
+ from importlib_metadata import PackageNotFoundError, version # type: ignore[assignment,no-redef,unused-ignore]
5
+
6
+ __appname__ = "cratedb-toolkit"
7
+
8
+ try:
9
+ __version__ = version(__appname__)
10
+ except PackageNotFoundError: # pragma: no cover
11
+ __version__ = "unknown"
File without changes
@@ -0,0 +1 @@
1
+ from .api import PyMongoCrateDBAdapter # noqa: F401
@@ -0,0 +1,77 @@
1
+ from unittest.mock import patch
2
+
3
+ import pymongo.collection
4
+
5
+ from cratedb_toolkit.adapter.pymongo.collection import collection_factory
6
+ from cratedb_toolkit.util.database import DatabaseAdapter
7
+ from cratedb_toolkit.util.pandas import patch_pandas_sqltable_with_extended_mapping
8
+ from cratedb_toolkit.util.sqlalchemy import patch_types_map
9
+
10
+
11
+ class PyMongoCrateDBAdapter:
12
+ """
13
+ Patch PyMongo to talk to CrateDB.
14
+ """
15
+
16
+ def __init__(self, dburi: str):
17
+ self.cratedb = DatabaseAdapter(dburi=dburi)
18
+ self.collection_backup = pymongo.collection.Collection
19
+
20
+ collection_patched = collection_factory(cratedb=self.cratedb) # type: ignore[misc]
21
+ self.patches = [
22
+ # Patch PyMongo's `Collection` implementation.
23
+ patch("pymongo.collection.Collection", collection_patched),
24
+ patch("pymongo.database.Collection", collection_patched),
25
+ # Converge a few low-level functions of PyMongo to no-ops.
26
+ patch("pymongo.mongo_client.MongoClient._ensure_session"),
27
+ patch("pymongo.mongo_client._ClientConnectionRetryable._get_server"),
28
+ ]
29
+
30
+ def start(self):
31
+ self.adjust_sqlalchemy()
32
+ self.activate()
33
+
34
+ def stop(self):
35
+ self.deactivate()
36
+
37
+ def __enter__(self):
38
+ self.start()
39
+
40
+ def __exit__(self, exc_type, exc_val, exc_tb):
41
+ self.stop()
42
+
43
+ def adjust_sqlalchemy(self):
44
+ """
45
+ Configure CrateDB SQLAlchemy dialect.
46
+
47
+ Setting the CrateDB column policy to `dynamic` means that new columns
48
+ can be added without needing to explicitly change the table definition
49
+ by running corresponding `ALTER TABLE` statements.
50
+
51
+ https://cratedb.com/docs/crate/reference/en/latest/general/ddl/column-policy.html#dynamic
52
+ """
53
+ # 1. Patch data types for CrateDB dialect.
54
+ # TODO: Upstream to `sqlalchemy-cratedb`.
55
+ patch_types_map()
56
+
57
+ # 2. Prepare pandas.
58
+ # TODO: Provide unpatching hook.
59
+ # TODO: Use `with table_kwargs(...)`.
60
+ from cratedb_toolkit.util.pandas import patch_pandas_sqltable_with_dialect_parameters
61
+
62
+ patch_pandas_sqltable_with_dialect_parameters(table_kwargs={"crate_column_policy": "'dynamic'"})
63
+ patch_pandas_sqltable_with_extended_mapping()
64
+
65
+ def activate(self):
66
+ """
67
+ Swap in the MongoDB -> CrateDB adapter, by patching functions in PyMongo.
68
+ """
69
+ for patch_ in self.patches:
70
+ patch_.start()
71
+
72
+ def deactivate(self):
73
+ """
74
+ Swap out the MongoDB -> CrateDB adapter, by restoring patched functions.
75
+ """
76
+ for patch_ in self.patches:
77
+ patch_.stop()
@@ -0,0 +1,156 @@
1
+ # Make Python 3.7 and 3.8 support generic types like `dict` instead of `typing.Dict`.
2
+ from __future__ import annotations
3
+
4
+ import io
5
+ import logging
6
+ from collections import abc
7
+ from typing import Any, Iterable, Iterator, Mapping, Optional, Union
8
+
9
+ import pandas as pd
10
+ from bson.raw_bson import RawBSONDocument
11
+ from pymongo import common
12
+ from pymongo.client_session import ClientSession
13
+ from pymongo.collection import Collection
14
+ from pymongo.cursor import Cursor
15
+ from pymongo.results import InsertManyResult, InsertOneResult
16
+ from pymongo.typings import _DocumentType
17
+ from sqlalchemy_cratedb.support import insert_bulk
18
+
19
+ from cratedb_toolkit.adapter.pymongo.cursor import cursor_factory
20
+ from cratedb_toolkit.adapter.pymongo.util import AmendedObjectId as ObjectId
21
+ from cratedb_toolkit.util.database import DatabaseAdapter
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def collection_factory(cratedb: DatabaseAdapter):
27
+ class AmendedCollection(Collection):
28
+ def find(self: Collection, *args: Any, **kwargs: Any) -> Cursor[_DocumentType]:
29
+ AmendedCursor = cursor_factory(cratedb=cratedb)
30
+ return AmendedCursor(self, *args, **kwargs)
31
+
32
+ def count_documents(
33
+ self: Collection,
34
+ filter: Mapping[str, Any], # noqa: A002
35
+ session: Optional[ClientSession] = None,
36
+ comment: Optional[Any] = None,
37
+ **kwargs: Any,
38
+ ) -> int:
39
+ """
40
+ TODO: Make it more efficient.
41
+ """
42
+ filter = filter or {} # noqa: A001
43
+ return len(list(self.find(filter=filter, session=session, comment=comment, **kwargs)))
44
+
45
+ @staticmethod
46
+ def get_df_info(df: pd.DataFrame) -> str:
47
+ buffer = io.StringIO()
48
+ df.info(buf=buffer)
49
+ buffer.seek(0)
50
+ return buffer.read()
51
+
52
+ def insert_one(
53
+ self: Collection,
54
+ document: Union[_DocumentType, RawBSONDocument],
55
+ bypass_document_validation: bool = False,
56
+ session: Optional[ClientSession] = None,
57
+ comment: Optional[Any] = None,
58
+ ) -> InsertOneResult:
59
+ logger.debug(f"Reading document: {document}")
60
+ data = pd.DataFrame.from_records([document])
61
+ # logger.debug(f"Dataframe: {self.get_df_info(data)}, {data.tail()}") # noqa: ERA001
62
+ logger.debug(f"Inserting record into CrateDB: schema={self.database.name}, table={self.name}")
63
+
64
+ object_id_cratedb: Optional[str] = None
65
+
66
+ def insert_returning_id(pd_table, conn, keys, data_iter):
67
+ """
68
+ Use CrateDB's "bulk operations" endpoint as a fast path for pandas' and Dask's `to_sql()` [1] method.
69
+
70
+ The idea is to break out of SQLAlchemy, compile the insert statement, and use the raw
71
+ DBAPI connection client, in order to be able to amend the SQL statement, adding a
72
+ `RETURNING _id` clause.
73
+
74
+ The vanilla implementation, used by SQLAlchemy, is::
75
+
76
+ data = [dict(zip(keys, row)) for row in data_iter]
77
+ conn.execute(pd_table.table.insert(), data)
78
+ """
79
+ nonlocal object_id_cratedb
80
+
81
+ # Compile SQL statement and materialize batch.
82
+ sql = str(pd_table.table.insert().compile(bind=conn))
83
+ data = list(data_iter)
84
+
85
+ # Invoke amended insert operation, returning the record
86
+ # identifier as surrogate to MongoDB's `ObjectId`.
87
+ cursor = conn._dbapi_connection.cursor()
88
+ cursor.execute(sql=sql + " RETURNING _id", parameters=data[0])
89
+ outcome = cursor.fetchone()
90
+ object_id_cratedb = outcome[0]
91
+ cursor.close()
92
+
93
+ # TODO: Either, or?
94
+ data.to_sql(
95
+ name=self.name,
96
+ schema=self.database.name,
97
+ con=cratedb.engine,
98
+ index=False,
99
+ # TODO: Handle `append` vs. `replace`.
100
+ if_exists="append",
101
+ method=insert_returning_id,
102
+ )
103
+
104
+ if object_id_cratedb is None:
105
+ raise ValueError("Object may have been created, but there is no object id")
106
+
107
+ object_id_mongodb = ObjectId.from_str(object_id_cratedb)
108
+ logger.debug(f"Created object with id: {object_id_mongodb!r}")
109
+ return InsertOneResult(inserted_id=object_id_mongodb, acknowledged=True)
110
+
111
+ def insert_many(
112
+ self,
113
+ documents: Iterable[Union[_DocumentType, RawBSONDocument]],
114
+ ordered: bool = True,
115
+ bypass_document_validation: bool = False,
116
+ session: Optional[ClientSession] = None,
117
+ comment: Optional[Any] = None,
118
+ ) -> InsertManyResult:
119
+ if not isinstance(documents, abc.Iterable) or isinstance(documents, abc.Mapping) or not documents:
120
+ raise TypeError("documents must be a non-empty list")
121
+ inserted_ids: list[ObjectId] = []
122
+
123
+ def gen() -> Iterator[Mapping[str, Any]]:
124
+ """A generator that validates documents and handles _ids."""
125
+ for document in documents:
126
+ common.validate_is_document_type("document", document)
127
+ if not isinstance(document, RawBSONDocument):
128
+ if "_id" in document:
129
+ identifier = ObjectId(document["_id"])
130
+ else:
131
+ identifier = ObjectId()
132
+ document["_id"] = str(identifier) # type: ignore[index]
133
+ inserted_ids.append(identifier)
134
+ yield document
135
+
136
+ logger.debug("Converting documents")
137
+ documents_real = list(gen())
138
+
139
+ logger.debug(f"Reading documents: {documents_real}")
140
+ data = pd.DataFrame.from_records(documents_real)
141
+ logger.debug(f"Dataframe: {self.get_df_info(data)}, {data.tail()}") # noqa: ERA001
142
+ logger.debug(f"Inserting records into CrateDB: schema={self.database.name}, table={self.name}")
143
+
144
+ data.to_sql(
145
+ name=self.name,
146
+ schema=self.database.name,
147
+ con=cratedb.engine,
148
+ index=False,
149
+ # TODO: Handle `append` vs. `replace`.
150
+ if_exists="append",
151
+ method=insert_bulk,
152
+ )
153
+
154
+ return InsertManyResult(inserted_ids, acknowledged=True)
155
+
156
+ return AmendedCollection
@@ -0,0 +1,390 @@
1
+ # Make Python 3.7 and 3.8 support generic types like `dict` instead of `typing.Dict`.
2
+ from __future__ import annotations
3
+
4
+ import copy
5
+ import logging
6
+ import warnings
7
+ from collections import deque
8
+ from typing import Any, Iterable, Mapping, Optional, Union
9
+
10
+ import sqlalchemy as sa
11
+ from bson import SON
12
+ from pymongo import CursorType, helpers
13
+ from pymongo.client_session import ClientSession
14
+ from pymongo.collation import validate_collation_or_none
15
+ from pymongo.collection import Collection
16
+ from pymongo.common import validate_is_document_type, validate_is_mapping
17
+ from pymongo.cursor import _QUERY_OPTIONS, Cursor, _Hint, _Sort
18
+ from pymongo.errors import InvalidOperation
19
+ from pymongo.message import _GetMore, _Query
20
+ from pymongo.read_preferences import _ServerMode
21
+ from pymongo.typings import _Address, _CollationIn, _DocumentType
22
+ from pymongo.write_concern import validate_boolean
23
+
24
+ from cratedb_toolkit.adapter.pymongo.reactor import mongodb_query, table_to_model
25
+ from cratedb_toolkit.adapter.pymongo.util import AmendedObjectId
26
+ from cratedb_toolkit.util.database import DatabaseAdapter
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ def cursor_factory(cratedb: DatabaseAdapter):
32
+ class AmendedCursor(Cursor[_DocumentType]):
33
+ _query_class = _Query
34
+ _getmore_class = _GetMore
35
+
36
+ def __init__(
37
+ self,
38
+ collection: Collection[_DocumentType],
39
+ filter: Optional[Mapping[str, Any]] = None, # noqa: A002
40
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
41
+ skip: int = 0,
42
+ limit: int = 0,
43
+ no_cursor_timeout: bool = False,
44
+ cursor_type: int = CursorType.NON_TAILABLE,
45
+ sort: Optional[_Sort] = None,
46
+ allow_partial_results: bool = False,
47
+ oplog_replay: bool = False,
48
+ batch_size: int = 0,
49
+ collation: Optional[_CollationIn] = None,
50
+ hint: Optional[_Hint] = None,
51
+ max_scan: Optional[int] = None,
52
+ max_time_ms: Optional[int] = None,
53
+ max: Optional[_Sort] = None, # noqa: A002
54
+ min: Optional[_Sort] = None, # noqa: A002
55
+ return_key: Optional[bool] = None,
56
+ show_record_id: Optional[bool] = None,
57
+ snapshot: Optional[bool] = None,
58
+ comment: Optional[Any] = None,
59
+ session: Optional[ClientSession] = None,
60
+ allow_disk_use: Optional[bool] = None,
61
+ let: Optional[bool] = None,
62
+ ) -> None:
63
+ """Create a new cursor.
64
+
65
+ Should not be called directly by application developers - see
66
+ :meth:`~pymongo.collection.Collection.find` instead.
67
+
68
+ .. seealso:: The MongoDB documentation on `cursors <https://dochub.mongodb.org/core/cursors>`_.
69
+ """
70
+ # Initialize all attributes used in __del__ before possibly raising
71
+ # an error to avoid attribute errors during garbage collection.
72
+ self.__collection: Collection[_DocumentType] = collection
73
+ self.__id: Any = None
74
+ self.__exhaust = False
75
+ self.__sock_mgr: Any = None
76
+ self.__killed = False
77
+ self.__session: Optional[ClientSession]
78
+
79
+ if session:
80
+ self.__session = session
81
+ self.__explicit_session = True
82
+ else:
83
+ self.__session = None
84
+ self.__explicit_session = False
85
+
86
+ spec: Mapping[str, Any] = filter or {}
87
+ validate_is_mapping("filter", spec)
88
+ if not isinstance(skip, int):
89
+ raise TypeError("skip must be an instance of int")
90
+ if not isinstance(limit, int):
91
+ raise TypeError("limit must be an instance of int")
92
+ validate_boolean("no_cursor_timeout", no_cursor_timeout)
93
+ if no_cursor_timeout and not self.__explicit_session:
94
+ warnings.warn(
95
+ "use an explicit session with no_cursor_timeout=True "
96
+ "otherwise the cursor may still timeout after "
97
+ "30 minutes, for more info see "
98
+ "https://mongodb.com/docs/v4.4/reference/method/"
99
+ "cursor.noCursorTimeout/"
100
+ "#session-idle-timeout-overrides-nocursortimeout",
101
+ UserWarning,
102
+ stacklevel=2,
103
+ )
104
+ if cursor_type not in (
105
+ CursorType.NON_TAILABLE,
106
+ CursorType.TAILABLE,
107
+ CursorType.TAILABLE_AWAIT,
108
+ CursorType.EXHAUST,
109
+ ):
110
+ raise ValueError("not a valid value for cursor_type")
111
+ validate_boolean("allow_partial_results", allow_partial_results)
112
+ validate_boolean("oplog_replay", oplog_replay)
113
+ if not isinstance(batch_size, int):
114
+ raise TypeError("batch_size must be an integer")
115
+ if batch_size < 0:
116
+ raise ValueError("batch_size must be >= 0")
117
+ # Only set if allow_disk_use is provided by the user, else None.
118
+ if allow_disk_use is not None:
119
+ allow_disk_use = validate_boolean("allow_disk_use", allow_disk_use)
120
+
121
+ if projection is not None:
122
+ projection = helpers._fields_list_to_dict(projection, "projection")
123
+
124
+ if let is not None:
125
+ validate_is_document_type("let", let)
126
+
127
+ self.__let = let
128
+ self.__spec = spec
129
+ self.__has_filter = filter is not None
130
+ self.__projection = projection
131
+ self.__skip = skip
132
+ self.__limit = limit
133
+ self.__batch_size = batch_size
134
+ self.__ordering = sort and helpers._index_document(sort) or None
135
+ self.__max_scan = max_scan
136
+ self.__explain = False
137
+ self.__comment = comment
138
+ self.__max_time_ms = max_time_ms
139
+ self.__max_await_time_ms: Optional[int] = None
140
+ self.__max: Optional[Union[SON[Any, Any], _Sort]] = max
141
+ self.__min: Optional[Union[SON[Any, Any], _Sort]] = min
142
+ self.__collation = validate_collation_or_none(collation)
143
+ self.__return_key = return_key
144
+ self.__show_record_id = show_record_id
145
+ self.__allow_disk_use = allow_disk_use
146
+ self.__snapshot = snapshot
147
+ self.__hint: Union[str, SON[str, Any], None]
148
+ self.__set_hint(hint)
149
+
150
+ # Exhaust cursor support
151
+ # TODO: Implement.
152
+ """
153
+ if cursor_type == CursorType.EXHAUST:
154
+ if self.__collection.database.client.is_mongos:
155
+ raise InvalidOperation("Exhaust cursors are not supported by mongos")
156
+ if limit:
157
+ raise InvalidOperation("Can't use limit and exhaust together.")
158
+ self.__exhaust = True
159
+ """
160
+
161
+ # This is ugly. People want to be able to do cursor[5:5] and
162
+ # get an empty result set (old behavior was an
163
+ # exception). It's hard to do that right, though, because the
164
+ # server uses limit(0) to mean 'no limit'. So we set __empty
165
+ # in that case and check for it when iterating. We also unset
166
+ # it anytime we change __limit.
167
+ self.__empty = False
168
+
169
+ self.__data: deque = deque()
170
+ self.__address: Optional[_Address] = None
171
+ self.__retrieved = 0
172
+
173
+ self.__codec_options = collection.codec_options
174
+ # Read preference is set when the initial find is sent.
175
+ self.__read_preference: Optional[_ServerMode] = None
176
+ self.__read_concern = collection.read_concern
177
+
178
+ self.__query_flags = cursor_type
179
+ if no_cursor_timeout:
180
+ self.__query_flags |= _QUERY_OPTIONS["no_timeout"]
181
+ if allow_partial_results:
182
+ self.__query_flags |= _QUERY_OPTIONS["partial"]
183
+ if oplog_replay:
184
+ self.__query_flags |= _QUERY_OPTIONS["oplog_replay"]
185
+
186
+ # The namespace to use for find/getMore commands.
187
+ self.__dbname = collection.database.name
188
+ self.__collname = collection.name
189
+
190
+ # Hack back the inheritance into the parent class.
191
+ self._synthesize()
192
+
193
+ def _synthesize(self):
194
+ # Hack back the inheritance into the parent class, in order to save code.
195
+ # Otherwise, it will yield errors like `AttributeError: 'AmendedCursor'
196
+ # object has no attribute '_Cursor__explicit_session'`
197
+ attrs = self.__dict__
198
+ for name in list(attrs.keys()):
199
+ if not name.startswith("_AmendedCursor"):
200
+ continue
201
+ parent_name = name.replace("_AmendedCursor__", "_Cursor__")
202
+ setattr(self, parent_name, getattr(self, name))
203
+
204
+ def update_parent(self):
205
+ self._Cursor__data = self.__data
206
+
207
+ def next(self) -> _DocumentType: # noqa: A002, A003
208
+ """Advance the cursor."""
209
+ if self.__empty:
210
+ raise StopIteration
211
+ if len(self.__data) or self._refresh():
212
+ return self.__data.popleft()
213
+ else:
214
+ raise StopIteration
215
+
216
+ __next__ = next
217
+
218
+ def __enter__(self) -> Cursor[_DocumentType]:
219
+ return self
220
+
221
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
222
+ self.close()
223
+
224
+ def _refresh(self) -> int:
225
+ """Refreshes the cursor with more data from Mongo.
226
+
227
+ Returns the length of self.__data after refresh. Will exit early if
228
+ self.__data is already non-empty. Raises OperationFailure when the
229
+ cursor cannot be refreshed due to an error on the query.
230
+ """
231
+ if len(self.__data) or self.__killed:
232
+ return len(self.__data)
233
+
234
+ if not self.__session:
235
+ self.__session = self.__collection.database.client._ensure_session()
236
+
237
+ if self.__id is None: # Query
238
+ if (self.__min or self.__max) and not self.__hint:
239
+ raise InvalidOperation(
240
+ "Passing a 'hint' is required when using the min/max query"
241
+ " option to ensure the query utilizes the correct index"
242
+ )
243
+ q = self._query_class(
244
+ self.__query_flags,
245
+ self.__collection.database.name,
246
+ self.__collection.name,
247
+ self.__skip,
248
+ self.__query_spec(),
249
+ self.__projection,
250
+ self.__codec_options,
251
+ self._read_preference(),
252
+ self.__limit,
253
+ self.__batch_size,
254
+ self.__read_concern,
255
+ self.__collation,
256
+ self.__session,
257
+ self.__collection.database.client,
258
+ self.__allow_disk_use,
259
+ self.__exhaust,
260
+ )
261
+ self.__send_message(q)
262
+ elif self.__id: # Get More
263
+ if self.__limit:
264
+ limit = self.__limit - self.__retrieved
265
+ if self.__batch_size:
266
+ limit = min(limit, self.__batch_size)
267
+ else:
268
+ limit = self.__batch_size
269
+ # Exhaust cursors don't send getMore messages.
270
+ g = self._getmore_class(
271
+ self.__dbname,
272
+ self.__collname,
273
+ limit,
274
+ self.__id,
275
+ self.__codec_options,
276
+ self._read_preference(),
277
+ self.__session,
278
+ self.__collection.database.client,
279
+ self.__max_await_time_ms,
280
+ self.__sock_mgr,
281
+ self.__exhaust,
282
+ self.__comment,
283
+ )
284
+ self.__send_message(g)
285
+
286
+ return len(self.__data)
287
+
288
+ def sort(self, key_or_list: _Hint, direction: Optional[Union[int, str]] = None) -> Cursor[_DocumentType]:
289
+ """ """
290
+ keys = helpers._index_list(key_or_list, direction)
291
+ self.__ordering = helpers._index_document(keys)
292
+ return self
293
+
294
+ def __send_message(self, operation: Union[_Query, _GetMore]) -> None:
295
+ """
296
+ Usually sends a query or getmore operation and handles the response to/from a MongoDB server.
297
+ Here, it will build an SQL query from the `operation`s metadata, and will have a conversation
298
+ with a CrateDB server instead.
299
+
300
+ TODO: OperationFailure / self.close() / PinnedResponse / explain / batching
301
+ """
302
+ metadata = sa.MetaData(schema=operation.db)
303
+ table_name = operation.coll
304
+
305
+ table = sa.Table(table_name, metadata, autoload_with=cratedb.engine)
306
+ table.append_column(sa.Column("_id", sa.String(), primary_key=True, system=True))
307
+ model = table_to_model(table)
308
+
309
+ query = mongodb_query(
310
+ model=model,
311
+ filter=dict(self.__spec) or {},
312
+ sort=self.__ordering and list(self.__ordering) or ["_id"],
313
+ )
314
+ records = query.fetchall(cratedb.connection)
315
+ for record in records:
316
+ record["_id"] = AmendedObjectId.from_str(record["_id"])
317
+ self.__data = deque(records)
318
+ self.__retrieved += len(records)
319
+ self.__id = 0
320
+
321
+ # Needed when manipulating `self.__data`, to synchronize
322
+ # with the `Cursor` parent class.
323
+ self.update_parent()
324
+
325
+ def __query_spec(self) -> Mapping[str, Any]:
326
+ """Get the spec to use for a query."""
327
+ operators: dict[str, Any] = {}
328
+ if self.__ordering:
329
+ operators["$orderby"] = self.__ordering
330
+ if self.__explain:
331
+ operators["$explain"] = True
332
+ if self.__hint:
333
+ operators["$hint"] = self.__hint
334
+ if self.__let:
335
+ operators["let"] = self.__let
336
+ if self.__comment:
337
+ operators["$comment"] = self.__comment
338
+ if self.__max_scan:
339
+ operators["$maxScan"] = self.__max_scan
340
+ if self.__max_time_ms is not None:
341
+ operators["$maxTimeMS"] = self.__max_time_ms
342
+ if self.__max:
343
+ operators["$max"] = self.__max
344
+ if self.__min:
345
+ operators["$min"] = self.__min
346
+ if self.__return_key is not None:
347
+ operators["$returnKey"] = self.__return_key
348
+ if self.__show_record_id is not None:
349
+ # This is upgraded to showRecordId for MongoDB 3.2+ "find" command.
350
+ operators["$showDiskLoc"] = self.__show_record_id
351
+ if self.__snapshot is not None:
352
+ operators["$snapshot"] = self.__snapshot
353
+
354
+ if operators:
355
+ # Make a shallow copy so we can cleanly rewind or clone.
356
+ spec = copy.copy(self.__spec)
357
+
358
+ # Allow-listed commands must be wrapped in $query.
359
+ if "$query" not in spec:
360
+ # $query has to come first
361
+ spec = SON([("$query", spec)])
362
+
363
+ if not isinstance(spec, SON):
364
+ # Ensure the spec is SON. As order is important this will
365
+ # ensure its set before merging in any extra operators.
366
+ spec = SON(spec)
367
+
368
+ spec.update(operators)
369
+ return spec
370
+ # Have to wrap with $query if "query" is the first key.
371
+ # We can't just use $query anytime "query" is a key as
372
+ # that breaks commands like count and find_and_modify.
373
+ # Checking spec.keys()[0] covers the case that the spec
374
+ # was passed as an instance of SON or OrderedDict.
375
+ elif "query" in self.__spec and (len(self.__spec) == 1 or next(iter(self.__spec)) == "query"):
376
+ return SON({"$query": self.__spec})
377
+
378
+ return self.__spec
379
+
380
+ def __set_hint(self, index: Optional[_Hint]) -> None:
381
+ if index is None:
382
+ self.__hint = None
383
+ return
384
+
385
+ if isinstance(index, str):
386
+ self.__hint = index
387
+ else:
388
+ self.__hint = SON(helpers._index_document(index))
389
+
390
+ return AmendedCursor