ingestr 0.13.41__py3-none-any.whl → 0.13.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/factory.py +1 -0
- ingestr/src/mongodb/__init__.py +66 -6
- ingestr/src/mongodb/helpers.py +540 -37
- {ingestr-0.13.41.dist-info → ingestr-0.13.42.dist-info}/METADATA +1 -1
- {ingestr-0.13.41.dist-info → ingestr-0.13.42.dist-info}/RECORD +9 -9
- {ingestr-0.13.41.dist-info → ingestr-0.13.42.dist-info}/WHEEL +0 -0
- {ingestr-0.13.41.dist-info → ingestr-0.13.42.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.41.dist-info → ingestr-0.13.42.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.42"
|
ingestr/src/factory.py
CHANGED
|
@@ -120,6 +120,7 @@ class SourceDestinationFactory:
|
|
|
120
120
|
sources: Dict[str, Type[SourceProtocol]] = {
|
|
121
121
|
"csv": LocalCsvSource,
|
|
122
122
|
"mongodb": MongoDbSource,
|
|
123
|
+
"mongodb+srv": MongoDbSource,
|
|
123
124
|
"notion": NotionSource,
|
|
124
125
|
"gsheets": GoogleSheetsSource,
|
|
125
126
|
"shopify": ShopifySource,
|
ingestr/src/mongodb/__init__.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
"""Source that loads collections form any a mongo database, supports incremental loads."""
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable, List, Optional
|
|
3
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
|
|
4
4
|
|
|
5
5
|
import dlt
|
|
6
|
+
from dlt.common.data_writers import TDataItemFormat
|
|
6
7
|
from dlt.sources import DltResource
|
|
7
8
|
|
|
8
9
|
from .helpers import (
|
|
@@ -21,6 +22,10 @@ def mongodb(
|
|
|
21
22
|
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
22
23
|
write_disposition: Optional[str] = dlt.config.value,
|
|
23
24
|
parallel: Optional[bool] = dlt.config.value,
|
|
25
|
+
limit: Optional[int] = None,
|
|
26
|
+
filter_: Optional[Dict[str, Any]] = None,
|
|
27
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
28
|
+
pymongoarrow_schema: Optional[Any] = None,
|
|
24
29
|
) -> Iterable[DltResource]:
|
|
25
30
|
"""
|
|
26
31
|
A DLT source which loads data from a mongo database using PyMongo.
|
|
@@ -34,6 +39,18 @@ def mongodb(
|
|
|
34
39
|
E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
|
|
35
40
|
write_disposition (str): Write disposition of the resource.
|
|
36
41
|
parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
|
|
42
|
+
limit (Optional[int]):
|
|
43
|
+
The maximum number of documents to load. The limit is
|
|
44
|
+
applied to each requested collection separately.
|
|
45
|
+
filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
|
|
46
|
+
projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields of a collection
|
|
47
|
+
when loading the collection. Supported inputs:
|
|
48
|
+
include (list) - ["year", "title"]
|
|
49
|
+
include (dict) - {"year": True, "title": True}
|
|
50
|
+
exclude (dict) - {"released": False, "runtime": False}
|
|
51
|
+
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
52
|
+
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types of a collection to convert BSON to Arrow
|
|
53
|
+
|
|
37
54
|
Returns:
|
|
38
55
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
39
56
|
"""
|
|
@@ -58,19 +75,36 @@ def mongodb(
|
|
|
58
75
|
primary_key="_id",
|
|
59
76
|
write_disposition=write_disposition,
|
|
60
77
|
spec=MongoDbCollectionConfiguration,
|
|
61
|
-
)(
|
|
78
|
+
)(
|
|
79
|
+
client,
|
|
80
|
+
collection,
|
|
81
|
+
incremental=incremental,
|
|
82
|
+
parallel=parallel,
|
|
83
|
+
limit=limit,
|
|
84
|
+
filter_=filter_ or {},
|
|
85
|
+
projection=projection,
|
|
86
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
87
|
+
)
|
|
62
88
|
|
|
63
89
|
|
|
64
|
-
@dlt.
|
|
65
|
-
|
|
90
|
+
@dlt.resource(
|
|
91
|
+
name=lambda args: args["collection"],
|
|
92
|
+
standalone=True,
|
|
93
|
+
spec=MongoDbCollectionResourceConfiguration,
|
|
66
94
|
)
|
|
67
95
|
def mongodb_collection(
|
|
68
|
-
connection_url: str = dlt.
|
|
96
|
+
connection_url: str = dlt.secrets.value,
|
|
69
97
|
database: Optional[str] = dlt.config.value,
|
|
70
98
|
collection: str = dlt.config.value,
|
|
71
99
|
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
72
100
|
write_disposition: Optional[str] = dlt.config.value,
|
|
73
101
|
parallel: Optional[bool] = False,
|
|
102
|
+
limit: Optional[int] = None,
|
|
103
|
+
chunk_size: Optional[int] = 10000,
|
|
104
|
+
data_item_format: Optional[TDataItemFormat] = "object",
|
|
105
|
+
filter_: Optional[Dict[str, Any]] = None,
|
|
106
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
|
|
107
|
+
pymongoarrow_schema: Optional[Any] = None,
|
|
74
108
|
) -> Any:
|
|
75
109
|
"""
|
|
76
110
|
A DLT source which loads a collection from a mongo database using PyMongo.
|
|
@@ -83,6 +117,21 @@ def mongodb_collection(
|
|
|
83
117
|
E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
|
|
84
118
|
write_disposition (str): Write disposition of the resource.
|
|
85
119
|
parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
|
|
120
|
+
limit (Optional[int]): The number of documents load.
|
|
121
|
+
chunk_size (Optional[int]): The number of documents load in each batch.
|
|
122
|
+
data_item_format (Optional[TDataItemFormat]): The data format to use for loading.
|
|
123
|
+
Supported formats:
|
|
124
|
+
object - Python objects (dicts, lists).
|
|
125
|
+
arrow - Apache Arrow tables.
|
|
126
|
+
filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
|
|
127
|
+
projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields
|
|
128
|
+
when loading the collection. Supported inputs:
|
|
129
|
+
include (list) - ["year", "title"]
|
|
130
|
+
include (dict) - {"year": True, "title": True}
|
|
131
|
+
exclude (dict) - {"released": False, "runtime": False}
|
|
132
|
+
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
133
|
+
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
|
|
134
|
+
|
|
86
135
|
Returns:
|
|
87
136
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
88
137
|
"""
|
|
@@ -100,4 +149,15 @@ def mongodb_collection(
|
|
|
100
149
|
name=collection_obj.name,
|
|
101
150
|
primary_key="_id",
|
|
102
151
|
write_disposition=write_disposition,
|
|
103
|
-
)(
|
|
152
|
+
)(
|
|
153
|
+
client,
|
|
154
|
+
collection_obj,
|
|
155
|
+
incremental=incremental,
|
|
156
|
+
parallel=parallel,
|
|
157
|
+
limit=limit,
|
|
158
|
+
chunk_size=chunk_size,
|
|
159
|
+
data_item_format=data_item_format,
|
|
160
|
+
filter_=filter_ or {},
|
|
161
|
+
projection=projection,
|
|
162
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
163
|
+
)
|
ingestr/src/mongodb/helpers.py
CHANGED
|
@@ -1,12 +1,27 @@
|
|
|
1
1
|
"""Mongo database source helpers"""
|
|
2
2
|
|
|
3
3
|
from itertools import islice
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import (
|
|
5
|
+
TYPE_CHECKING,
|
|
6
|
+
Any,
|
|
7
|
+
Dict,
|
|
8
|
+
Iterable,
|
|
9
|
+
Iterator,
|
|
10
|
+
List,
|
|
11
|
+
Mapping,
|
|
12
|
+
Optional,
|
|
13
|
+
Tuple,
|
|
14
|
+
Union,
|
|
15
|
+
)
|
|
5
16
|
|
|
6
17
|
import dlt
|
|
7
18
|
from bson.decimal128 import Decimal128
|
|
8
19
|
from bson.objectid import ObjectId
|
|
20
|
+
from bson.regex import Regex
|
|
21
|
+
from bson.timestamp import Timestamp
|
|
22
|
+
from dlt.common import logger
|
|
9
23
|
from dlt.common.configuration.specs import BaseConfiguration, configspec
|
|
24
|
+
from dlt.common.data_writers import TDataItemFormat
|
|
10
25
|
from dlt.common.time import ensure_pendulum_datetime
|
|
11
26
|
from dlt.common.typing import TDataItem
|
|
12
27
|
from dlt.common.utils import map_nested_in_place
|
|
@@ -14,17 +29,23 @@ from pendulum import _datetime
|
|
|
14
29
|
from pymongo import ASCENDING, DESCENDING, MongoClient
|
|
15
30
|
from pymongo.collection import Collection
|
|
16
31
|
from pymongo.cursor import Cursor
|
|
32
|
+
from pymongo.helpers_shared import _fields_list_to_dict
|
|
17
33
|
|
|
18
34
|
if TYPE_CHECKING:
|
|
19
35
|
TMongoClient = MongoClient[Any]
|
|
20
|
-
TCollection = Collection[Any]
|
|
36
|
+
TCollection = Collection[Any]
|
|
21
37
|
TCursor = Cursor[Any]
|
|
22
38
|
else:
|
|
23
39
|
TMongoClient = Any
|
|
24
40
|
TCollection = Any
|
|
25
41
|
TCursor = Any
|
|
26
42
|
|
|
27
|
-
|
|
43
|
+
try:
|
|
44
|
+
import pymongoarrow # type: ignore
|
|
45
|
+
|
|
46
|
+
PYMONGOARROW_AVAILABLE = True
|
|
47
|
+
except ImportError:
|
|
48
|
+
PYMONGOARROW_AVAILABLE = False
|
|
28
49
|
|
|
29
50
|
|
|
30
51
|
class CollectionLoader:
|
|
@@ -32,11 +53,14 @@ class CollectionLoader:
|
|
|
32
53
|
self,
|
|
33
54
|
client: TMongoClient,
|
|
34
55
|
collection: TCollection,
|
|
56
|
+
chunk_size: int,
|
|
35
57
|
incremental: Optional[dlt.sources.incremental[Any]] = None,
|
|
36
58
|
) -> None:
|
|
37
59
|
self.client = client
|
|
38
60
|
self.collection = collection
|
|
39
61
|
self.incremental = incremental
|
|
62
|
+
self.chunk_size = chunk_size
|
|
63
|
+
|
|
40
64
|
if incremental:
|
|
41
65
|
self.cursor_field = incremental.cursor_path
|
|
42
66
|
self.last_value = incremental.last_value
|
|
@@ -45,45 +69,186 @@ class CollectionLoader:
|
|
|
45
69
|
self.last_value = None
|
|
46
70
|
|
|
47
71
|
@property
|
|
48
|
-
def
|
|
72
|
+
def _sort_op(self) -> List[Optional[Tuple[str, int]]]:
|
|
49
73
|
if not self.incremental or not self.last_value:
|
|
50
|
-
return
|
|
51
|
-
if self.incremental.last_value_func is max:
|
|
52
|
-
return {self.cursor_field: {"$gte": self.last_value}}
|
|
53
|
-
elif self.incremental.last_value_func is min:
|
|
54
|
-
return {self.cursor_field: {"$lt": self.last_value}}
|
|
55
|
-
return {}
|
|
74
|
+
return []
|
|
56
75
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
76
|
+
if (
|
|
77
|
+
self.incremental.row_order == "asc"
|
|
78
|
+
and self.incremental.last_value_func is max
|
|
79
|
+
) or (
|
|
80
|
+
self.incremental.row_order == "desc"
|
|
81
|
+
and self.incremental.last_value_func is min
|
|
82
|
+
):
|
|
83
|
+
return [(self.cursor_field, ASCENDING)]
|
|
61
84
|
|
|
85
|
+
elif (
|
|
86
|
+
self.incremental.row_order == "asc"
|
|
87
|
+
and self.incremental.last_value_func is min
|
|
88
|
+
) or (
|
|
89
|
+
self.incremental.row_order == "desc"
|
|
90
|
+
and self.incremental.last_value_func is max
|
|
91
|
+
):
|
|
92
|
+
return [(self.cursor_field, DESCENDING)]
|
|
93
|
+
|
|
94
|
+
return []
|
|
62
95
|
|
|
63
|
-
class CollectionLoaderParallell(CollectionLoader):
|
|
64
96
|
@property
|
|
65
|
-
def
|
|
66
|
-
|
|
67
|
-
|
|
97
|
+
def _filter_op(self) -> Dict[str, Any]:
|
|
98
|
+
"""Build a filtering operator.
|
|
99
|
+
|
|
100
|
+
Includes a field and the filtering condition for it.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Dict[str, Any]: A dictionary with the filter operator.
|
|
104
|
+
"""
|
|
105
|
+
if not (self.incremental and self.last_value):
|
|
106
|
+
return {}
|
|
107
|
+
|
|
108
|
+
filt = {}
|
|
68
109
|
if self.incremental.last_value_func is max:
|
|
69
|
-
|
|
110
|
+
filt = {self.cursor_field: {"$gte": self.last_value}}
|
|
111
|
+
if self.incremental.end_value:
|
|
112
|
+
filt[self.cursor_field]["$lt"] = self.incremental.end_value
|
|
113
|
+
|
|
70
114
|
elif self.incremental.last_value_func is min:
|
|
71
|
-
|
|
72
|
-
|
|
115
|
+
filt = {self.cursor_field: {"$lte": self.last_value}}
|
|
116
|
+
if self.incremental.end_value:
|
|
117
|
+
filt[self.cursor_field]["$gt"] = self.incremental.end_value
|
|
118
|
+
|
|
119
|
+
return filt
|
|
120
|
+
|
|
121
|
+
def _projection_op(
|
|
122
|
+
self, projection: Optional[Union[Mapping[str, Any], Iterable[str]]]
|
|
123
|
+
) -> Optional[Dict[str, Any]]:
|
|
124
|
+
"""Build a projection operator.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): A tuple of fields to include or a dict specifying fields to include or exclude.
|
|
128
|
+
The incremental `primary_key` needs to be handle differently for inclusion
|
|
129
|
+
and exclusion projections.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Tuple[str, ...] | Dict[str, Any]: A tuple or dictionary with the projection operator.
|
|
133
|
+
"""
|
|
134
|
+
if projection is None:
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
projection_dict = dict(_fields_list_to_dict(projection, "projection"))
|
|
138
|
+
|
|
139
|
+
if self.incremental:
|
|
140
|
+
# this is an inclusion projection
|
|
141
|
+
if any(v == 1 for v in projection_dict.values()):
|
|
142
|
+
# ensure primary_key is included
|
|
143
|
+
projection_dict.update(m={self.incremental.primary_key: 1})
|
|
144
|
+
# this is an exclusion projection
|
|
145
|
+
else:
|
|
146
|
+
try:
|
|
147
|
+
# ensure primary_key isn't excluded
|
|
148
|
+
projection_dict.pop(self.incremental.primary_key) # type: ignore
|
|
149
|
+
except KeyError:
|
|
150
|
+
pass # primary_key was properly not included in exclusion projection
|
|
151
|
+
else:
|
|
152
|
+
dlt.common.logger.warn(
|
|
153
|
+
f"Primary key `{self.incremental.primary_key}` was removed from exclusion projection"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return projection_dict
|
|
157
|
+
|
|
158
|
+
def _limit(self, cursor: Cursor, limit: Optional[int] = None) -> TCursor: # type: ignore
|
|
159
|
+
"""Apply a limit to the cursor, if needed.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
cursor (Cursor): The cursor to apply the limit.
|
|
163
|
+
limit (Optional[int]): The number of documents to load.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Cursor: The cursor with the limit applied (if given).
|
|
167
|
+
"""
|
|
168
|
+
if limit not in (0, None):
|
|
169
|
+
if self.incremental is None or self.incremental.last_value_func is None:
|
|
170
|
+
logger.warning(
|
|
171
|
+
"Using limit without ordering - results may be inconsistent."
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
cursor = cursor.limit(abs(limit))
|
|
73
175
|
|
|
176
|
+
return cursor
|
|
177
|
+
|
|
178
|
+
def load_documents(
|
|
179
|
+
self,
|
|
180
|
+
filter_: Dict[str, Any],
|
|
181
|
+
limit: Optional[int] = None,
|
|
182
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
183
|
+
) -> Iterator[TDataItem]:
|
|
184
|
+
"""Construct the query and load the documents from the collection.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
filter_ (Dict[str, Any]): The filter to apply to the collection.
|
|
188
|
+
limit (Optional[int]): The number of documents to load.
|
|
189
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
|
|
190
|
+
|
|
191
|
+
Yields:
|
|
192
|
+
Iterator[TDataItem]: An iterator of the loaded documents.
|
|
193
|
+
"""
|
|
194
|
+
filter_op = self._filter_op
|
|
195
|
+
_raise_if_intersection(filter_op, filter_)
|
|
196
|
+
filter_op.update(filter_)
|
|
197
|
+
|
|
198
|
+
projection_op = self._projection_op(projection)
|
|
199
|
+
|
|
200
|
+
cursor = self.collection.find(filter=filter_op, projection=projection_op)
|
|
201
|
+
if self._sort_op:
|
|
202
|
+
cursor = cursor.sort(self._sort_op)
|
|
203
|
+
|
|
204
|
+
cursor = self._limit(cursor, limit)
|
|
205
|
+
|
|
206
|
+
while docs_slice := list(islice(cursor, self.chunk_size)):
|
|
207
|
+
yield map_nested_in_place(convert_mongo_objs, docs_slice)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class CollectionLoaderParallel(CollectionLoader):
|
|
74
211
|
def _get_document_count(self) -> int:
|
|
75
212
|
return self.collection.count_documents(filter=self._filter_op)
|
|
76
213
|
|
|
77
|
-
def _create_batches(self) -> List[Dict[str, int]]:
|
|
214
|
+
def _create_batches(self, limit: Optional[int] = None) -> List[Dict[str, int]]:
|
|
78
215
|
doc_count = self._get_document_count()
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
216
|
+
if limit:
|
|
217
|
+
doc_count = min(doc_count, abs(limit))
|
|
218
|
+
|
|
219
|
+
batches = []
|
|
220
|
+
left_to_load = doc_count
|
|
221
|
+
|
|
222
|
+
for sk in range(0, doc_count, self.chunk_size):
|
|
223
|
+
batches.append(dict(skip=sk, limit=min(self.chunk_size, left_to_load)))
|
|
224
|
+
left_to_load -= self.chunk_size
|
|
225
|
+
|
|
226
|
+
return batches
|
|
82
227
|
|
|
83
|
-
def _get_cursor(
|
|
84
|
-
|
|
228
|
+
def _get_cursor(
|
|
229
|
+
self,
|
|
230
|
+
filter_: Dict[str, Any],
|
|
231
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
232
|
+
) -> TCursor:
|
|
233
|
+
"""Get a reading cursor for the collection.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
filter_ (Dict[str, Any]): The filter to apply to the collection.
|
|
237
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Cursor: The cursor for the collection.
|
|
241
|
+
"""
|
|
242
|
+
filter_op = self._filter_op
|
|
243
|
+
_raise_if_intersection(filter_op, filter_)
|
|
244
|
+
filter_op.update(filter_)
|
|
245
|
+
|
|
246
|
+
projection_op = self._projection_op(projection)
|
|
247
|
+
|
|
248
|
+
cursor = self.collection.find(filter=filter_op, projection=projection_op)
|
|
85
249
|
if self._sort_op:
|
|
86
250
|
cursor = cursor.sort(self._sort_op)
|
|
251
|
+
|
|
87
252
|
return cursor
|
|
88
253
|
|
|
89
254
|
@dlt.defer
|
|
@@ -93,25 +258,223 @@ class CollectionLoaderParallell(CollectionLoader):
|
|
|
93
258
|
data = []
|
|
94
259
|
for document in cursor.skip(batch["skip"]).limit(batch["limit"]):
|
|
95
260
|
data.append(map_nested_in_place(convert_mongo_objs, document))
|
|
261
|
+
|
|
96
262
|
return data
|
|
97
263
|
|
|
98
|
-
def _get_all_batches(
|
|
99
|
-
|
|
100
|
-
|
|
264
|
+
def _get_all_batches(
|
|
265
|
+
self,
|
|
266
|
+
filter_: Dict[str, Any],
|
|
267
|
+
limit: Optional[int] = None,
|
|
268
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
269
|
+
) -> Iterator[TDataItem]:
|
|
270
|
+
"""Load all documents from the collection in parallel batches.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
filter_ (Dict[str, Any]): The filter to apply to the collection.
|
|
274
|
+
limit (Optional[int]): The maximum number of documents to load.
|
|
275
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
|
|
276
|
+
|
|
277
|
+
Yields:
|
|
278
|
+
Iterator[TDataItem]: An iterator of the loaded documents.
|
|
279
|
+
"""
|
|
280
|
+
batches = self._create_batches(limit=limit)
|
|
281
|
+
cursor = self._get_cursor(filter_=filter_, projection=projection)
|
|
101
282
|
|
|
102
283
|
for batch in batches:
|
|
103
284
|
yield self._run_batch(cursor=cursor, batch=batch)
|
|
104
285
|
|
|
105
|
-
def load_documents(
|
|
106
|
-
|
|
286
|
+
def load_documents(
|
|
287
|
+
self,
|
|
288
|
+
filter_: Dict[str, Any],
|
|
289
|
+
limit: Optional[int] = None,
|
|
290
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
291
|
+
) -> Iterator[TDataItem]:
|
|
292
|
+
"""Load documents from the collection in parallel.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
filter_ (Dict[str, Any]): The filter to apply to the collection.
|
|
296
|
+
limit (Optional[int]): The number of documents to load.
|
|
297
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
|
|
298
|
+
|
|
299
|
+
Yields:
|
|
300
|
+
Iterator[TDataItem]: An iterator of the loaded documents.
|
|
301
|
+
"""
|
|
302
|
+
for document in self._get_all_batches(
|
|
303
|
+
limit=limit, filter_=filter_, projection=projection
|
|
304
|
+
):
|
|
107
305
|
yield document
|
|
108
306
|
|
|
109
307
|
|
|
308
|
+
class CollectionArrowLoader(CollectionLoader):
|
|
309
|
+
"""
|
|
310
|
+
Mongo DB collection loader, which uses
|
|
311
|
+
Apache Arrow for data processing.
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
def load_documents(
|
|
315
|
+
self,
|
|
316
|
+
filter_: Dict[str, Any],
|
|
317
|
+
limit: Optional[int] = None,
|
|
318
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
319
|
+
pymongoarrow_schema: Any = None,
|
|
320
|
+
) -> Iterator[Any]:
|
|
321
|
+
"""
|
|
322
|
+
Load documents from the collection in Apache Arrow format.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
filter_ (Dict[str, Any]): The filter to apply to the collection.
|
|
326
|
+
limit (Optional[int]): The number of documents to load.
|
|
327
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
|
|
328
|
+
pymongoarrow_schema (Any): The mapping of field types to convert BSON to Arrow.
|
|
329
|
+
|
|
330
|
+
Yields:
|
|
331
|
+
Iterator[Any]: An iterator of the loaded documents.
|
|
332
|
+
"""
|
|
333
|
+
from pymongoarrow.context import PyMongoArrowContext # type: ignore
|
|
334
|
+
from pymongoarrow.lib import process_bson_stream # type: ignore
|
|
335
|
+
|
|
336
|
+
filter_op = self._filter_op
|
|
337
|
+
_raise_if_intersection(filter_op, filter_)
|
|
338
|
+
filter_op.update(filter_)
|
|
339
|
+
|
|
340
|
+
projection_op = self._projection_op(projection)
|
|
341
|
+
|
|
342
|
+
# NOTE the `filter_op` isn't passed
|
|
343
|
+
cursor = self.collection.find_raw_batches(
|
|
344
|
+
filter_, batch_size=self.chunk_size, projection=projection_op
|
|
345
|
+
)
|
|
346
|
+
if self._sort_op:
|
|
347
|
+
cursor = cursor.sort(self._sort_op) # type: ignore
|
|
348
|
+
|
|
349
|
+
cursor = self._limit(cursor, limit) # type: ignore
|
|
350
|
+
|
|
351
|
+
context = PyMongoArrowContext.from_schema(
|
|
352
|
+
schema=pymongoarrow_schema, codec_options=self.collection.codec_options
|
|
353
|
+
)
|
|
354
|
+
for batch in cursor:
|
|
355
|
+
process_bson_stream(batch, context)
|
|
356
|
+
table = context.finish()
|
|
357
|
+
yield convert_arrow_columns(table)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class CollectionArrowLoaderParallel(CollectionLoaderParallel):
|
|
361
|
+
"""
|
|
362
|
+
Mongo DB collection parallel loader, which uses
|
|
363
|
+
Apache Arrow for data processing.
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
def load_documents(
|
|
367
|
+
self,
|
|
368
|
+
filter_: Dict[str, Any],
|
|
369
|
+
limit: Optional[int] = None,
|
|
370
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
371
|
+
pymongoarrow_schema: Any = None,
|
|
372
|
+
) -> Iterator[TDataItem]:
|
|
373
|
+
"""Load documents from the collection in parallel.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
filter_ (Dict[str, Any]): The filter to apply to the collection.
|
|
377
|
+
limit (Optional[int]): The number of documents to load.
|
|
378
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
|
|
379
|
+
pymongoarrow_schema (Any): The mapping of field types to convert BSON to Arrow.
|
|
380
|
+
|
|
381
|
+
Yields:
|
|
382
|
+
Iterator[TDataItem]: An iterator of the loaded documents.
|
|
383
|
+
"""
|
|
384
|
+
yield from self._get_all_batches(
|
|
385
|
+
limit=limit,
|
|
386
|
+
filter_=filter_,
|
|
387
|
+
projection=projection,
|
|
388
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
def _get_all_batches(
|
|
392
|
+
self,
|
|
393
|
+
filter_: Dict[str, Any],
|
|
394
|
+
limit: Optional[int] = None,
|
|
395
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
396
|
+
pymongoarrow_schema: Any = None,
|
|
397
|
+
) -> Iterator[TDataItem]:
|
|
398
|
+
"""Load all documents from the collection in parallel batches.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
filter_ (Dict[str, Any]): The filter to apply to the collection.
|
|
402
|
+
limit (Optional[int]): The maximum number of documents to load.
|
|
403
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
|
|
404
|
+
pymongoarrow_schema (Any): The mapping of field types to convert BSON to Arrow.
|
|
405
|
+
|
|
406
|
+
Yields:
|
|
407
|
+
Iterator[TDataItem]: An iterator of the loaded documents.
|
|
408
|
+
"""
|
|
409
|
+
batches = self._create_batches(limit=limit)
|
|
410
|
+
cursor = self._get_cursor(filter_=filter_, projection=projection)
|
|
411
|
+
for batch in batches:
|
|
412
|
+
yield self._run_batch(
|
|
413
|
+
cursor=cursor,
|
|
414
|
+
batch=batch,
|
|
415
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
def _get_cursor(
|
|
419
|
+
self,
|
|
420
|
+
filter_: Dict[str, Any],
|
|
421
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
422
|
+
) -> TCursor:
|
|
423
|
+
"""Get a reading cursor for the collection.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
filter_ (Dict[str, Any]): The filter to apply to the collection.
|
|
427
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
Cursor: The cursor for the collection.
|
|
431
|
+
"""
|
|
432
|
+
filter_op = self._filter_op
|
|
433
|
+
_raise_if_intersection(filter_op, filter_)
|
|
434
|
+
filter_op.update(filter_)
|
|
435
|
+
|
|
436
|
+
projection_op = self._projection_op(projection)
|
|
437
|
+
|
|
438
|
+
cursor = self.collection.find_raw_batches(
|
|
439
|
+
filter=filter_op, batch_size=self.chunk_size, projection=projection_op
|
|
440
|
+
)
|
|
441
|
+
if self._sort_op:
|
|
442
|
+
cursor = cursor.sort(self._sort_op) # type: ignore
|
|
443
|
+
|
|
444
|
+
return cursor
|
|
445
|
+
|
|
446
|
+
@dlt.defer
|
|
447
|
+
def _run_batch(
|
|
448
|
+
self,
|
|
449
|
+
cursor: TCursor,
|
|
450
|
+
batch: Dict[str, int],
|
|
451
|
+
pymongoarrow_schema: Any = None,
|
|
452
|
+
) -> TDataItem:
|
|
453
|
+
from pymongoarrow.context import PyMongoArrowContext
|
|
454
|
+
from pymongoarrow.lib import process_bson_stream
|
|
455
|
+
|
|
456
|
+
cursor = cursor.clone()
|
|
457
|
+
|
|
458
|
+
context = PyMongoArrowContext.from_schema(
|
|
459
|
+
schema=pymongoarrow_schema, codec_options=self.collection.codec_options
|
|
460
|
+
)
|
|
461
|
+
for chunk in cursor.skip(batch["skip"]).limit(batch["limit"]):
|
|
462
|
+
process_bson_stream(chunk, context)
|
|
463
|
+
table = context.finish()
|
|
464
|
+
yield convert_arrow_columns(table)
|
|
465
|
+
|
|
466
|
+
|
|
110
467
|
def collection_documents(
|
|
111
468
|
client: TMongoClient,
|
|
112
469
|
collection: TCollection,
|
|
470
|
+
filter_: Dict[str, Any],
|
|
471
|
+
projection: Union[Dict[str, Any], List[str]],
|
|
472
|
+
pymongoarrow_schema: "pymongoarrow.schema.Schema",
|
|
113
473
|
incremental: Optional[dlt.sources.incremental[Any]] = None,
|
|
114
474
|
parallel: bool = False,
|
|
475
|
+
limit: Optional[int] = None,
|
|
476
|
+
chunk_size: Optional[int] = 10000,
|
|
477
|
+
data_item_format: Optional[TDataItemFormat] = "object",
|
|
115
478
|
) -> Iterator[TDataItem]:
|
|
116
479
|
"""
|
|
117
480
|
A DLT source which loads data from a Mongo database using PyMongo.
|
|
@@ -120,27 +483,145 @@ def collection_documents(
|
|
|
120
483
|
Args:
|
|
121
484
|
client (MongoClient): The PyMongo client `pymongo.MongoClient` instance.
|
|
122
485
|
collection (Collection): The collection `pymongo.collection.Collection` to load.
|
|
486
|
+
filter_ (Dict[str, Any]): The filter to apply to the collection.
|
|
487
|
+
projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
|
|
488
|
+
when loading the collection. Supported inputs:
|
|
489
|
+
include (list) - ["year", "title"]
|
|
490
|
+
include (dict) - {"year": True, "title": True}
|
|
491
|
+
exclude (dict) - {"released": False, "runtime": False}
|
|
492
|
+
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
493
|
+
pymongoarrow_schema (pymongoarrow.schema.Schema): The mapping of field types to convert BSON to Arrow.
|
|
123
494
|
incremental (Optional[dlt.sources.incremental[Any]]): The incremental configuration.
|
|
124
495
|
parallel (bool): Option to enable parallel loading for the collection. Default is False.
|
|
496
|
+
limit (Optional[int]): The maximum number of documents to load.
|
|
497
|
+
chunk_size (Optional[int]): The number of documents to load in each batch.
|
|
498
|
+
data_item_format (Optional[TDataItemFormat]): The data format to use for loading.
|
|
499
|
+
Supported formats:
|
|
500
|
+
object - Python objects (dicts, lists).
|
|
501
|
+
arrow - Apache Arrow tables.
|
|
125
502
|
|
|
126
503
|
Returns:
|
|
127
504
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
128
505
|
"""
|
|
129
|
-
|
|
506
|
+
if data_item_format == "arrow" and not PYMONGOARROW_AVAILABLE:
|
|
507
|
+
dlt.common.logger.warn(
|
|
508
|
+
"'pymongoarrow' is not installed; falling back to standard MongoDB CollectionLoader."
|
|
509
|
+
)
|
|
510
|
+
data_item_format = "object"
|
|
511
|
+
|
|
512
|
+
if data_item_format != "arrow" and pymongoarrow_schema:
|
|
513
|
+
dlt.common.logger.warn(
|
|
514
|
+
"Received value for `pymongoarrow_schema`, but `data_item_format=='object'` "
|
|
515
|
+
"Use `data_item_format=='arrow'` to enforce schema."
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
if data_item_format == "arrow" and pymongoarrow_schema and projection:
|
|
519
|
+
dlt.common.logger.warn(
|
|
520
|
+
"Received values for both `pymongoarrow_schema` and `projection`. Since both "
|
|
521
|
+
"create a projection to select fields, `projection` will be ignored."
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
if parallel:
|
|
525
|
+
if data_item_format == "arrow":
|
|
526
|
+
LoaderClass = CollectionArrowLoaderParallel
|
|
527
|
+
else:
|
|
528
|
+
LoaderClass = CollectionLoaderParallel # type: ignore
|
|
529
|
+
else:
|
|
530
|
+
if data_item_format == "arrow":
|
|
531
|
+
LoaderClass = CollectionArrowLoader # type: ignore
|
|
532
|
+
else:
|
|
533
|
+
LoaderClass = CollectionLoader # type: ignore
|
|
130
534
|
|
|
131
|
-
loader = LoaderClass(
|
|
132
|
-
|
|
133
|
-
|
|
535
|
+
loader = LoaderClass(
|
|
536
|
+
client, collection, incremental=incremental, chunk_size=chunk_size
|
|
537
|
+
)
|
|
538
|
+
if isinstance(loader, (CollectionArrowLoader, CollectionArrowLoaderParallel)):
|
|
539
|
+
yield from loader.load_documents(
|
|
540
|
+
limit=limit,
|
|
541
|
+
filter_=filter_,
|
|
542
|
+
projection=projection,
|
|
543
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
544
|
+
)
|
|
545
|
+
else:
|
|
546
|
+
yield from loader.load_documents(
|
|
547
|
+
limit=limit, filter_=filter_, projection=projection
|
|
548
|
+
)
|
|
134
549
|
|
|
135
550
|
|
|
136
551
|
def convert_mongo_objs(value: Any) -> Any:
|
|
552
|
+
"""MongoDB to dlt type conversion when using Python loaders.
|
|
553
|
+
|
|
554
|
+
Notes:
|
|
555
|
+
The method `ObjectId.__str__()` creates a hexstring using `binascii.hexlify(__id).decode()`
|
|
556
|
+
|
|
557
|
+
"""
|
|
137
558
|
if isinstance(value, (ObjectId, Decimal128)):
|
|
138
559
|
return str(value)
|
|
139
560
|
if isinstance(value, _datetime.datetime):
|
|
140
561
|
return ensure_pendulum_datetime(value)
|
|
562
|
+
if isinstance(value, Regex):
|
|
563
|
+
return value.try_compile().pattern
|
|
564
|
+
if isinstance(value, Timestamp):
|
|
565
|
+
date = value.as_datetime()
|
|
566
|
+
return ensure_pendulum_datetime(date)
|
|
567
|
+
|
|
141
568
|
return value
|
|
142
569
|
|
|
143
570
|
|
|
571
|
+
def convert_arrow_columns(table: Any) -> Any:
|
|
572
|
+
"""Convert the given table columns to Python types.
|
|
573
|
+
|
|
574
|
+
Notes:
|
|
575
|
+
Calling str() matches the `convert_mongo_obs()` used in non-arrow code.
|
|
576
|
+
Pymongoarrow converts ObjectId to `fixed_size_binary[12]`, which can't be
|
|
577
|
+
converted to a string as a vectorized operation because it contains ASCII characters.
|
|
578
|
+
|
|
579
|
+
Instead, you need to loop over values using:
|
|
580
|
+
```python
|
|
581
|
+
pyarrow.array([v.as_buffer().hex() for v in object_id_array], type=pyarrow.string())
|
|
582
|
+
# pymongoarrow simplifies this by allowing this syntax
|
|
583
|
+
[str(v) for v in object_id_array]
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
table (pyarrow.lib.Table): The table to convert.
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
pyarrow.lib.Table: The table with the columns converted.
|
|
591
|
+
"""
|
|
592
|
+
from dlt.common.libs.pyarrow import pyarrow
|
|
593
|
+
from pymongoarrow.types import ( # type: ignore
|
|
594
|
+
_is_binary,
|
|
595
|
+
_is_code,
|
|
596
|
+
_is_decimal128,
|
|
597
|
+
_is_objectid,
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
for i, field in enumerate(table.schema):
|
|
601
|
+
if _is_objectid(field.type) or _is_decimal128(field.type):
|
|
602
|
+
col_values = [str(value) for value in table[field.name]]
|
|
603
|
+
table = table.set_column(
|
|
604
|
+
i,
|
|
605
|
+
pyarrow.field(field.name, pyarrow.string()),
|
|
606
|
+
pyarrow.array(col_values, type=pyarrow.string()),
|
|
607
|
+
)
|
|
608
|
+
else:
|
|
609
|
+
type_ = None
|
|
610
|
+
if _is_binary(field.type):
|
|
611
|
+
type_ = pyarrow.binary()
|
|
612
|
+
elif _is_code(field.type):
|
|
613
|
+
type_ = pyarrow.string()
|
|
614
|
+
|
|
615
|
+
if type_:
|
|
616
|
+
col_values = [value.as_py() for value in table[field.name]]
|
|
617
|
+
table = table.set_column(
|
|
618
|
+
i,
|
|
619
|
+
pyarrow.field(field.name, type_),
|
|
620
|
+
pyarrow.array(col_values, type=type_),
|
|
621
|
+
)
|
|
622
|
+
return table
|
|
623
|
+
|
|
624
|
+
|
|
144
625
|
def client_from_credentials(connection_url: str) -> TMongoClient:
|
|
145
626
|
client: TMongoClient = MongoClient(
|
|
146
627
|
connection_url, uuidRepresentation="standard", tz_aware=True
|
|
@@ -148,6 +629,27 @@ def client_from_credentials(connection_url: str) -> TMongoClient:
|
|
|
148
629
|
return client
|
|
149
630
|
|
|
150
631
|
|
|
632
|
+
def _raise_if_intersection(filter1: Dict[str, Any], filter2: Dict[str, Any]) -> None:
|
|
633
|
+
"""
|
|
634
|
+
Raise an exception, if the given filters'
|
|
635
|
+
fields are intersecting.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
filter1 (Dict[str, Any]): The first filter.
|
|
639
|
+
filter2 (Dict[str, Any]): The second filter.
|
|
640
|
+
"""
|
|
641
|
+
field_inter = filter1.keys() & filter2.keys()
|
|
642
|
+
for field in field_inter:
|
|
643
|
+
if filter1[field].keys() & filter2[field].keys():
|
|
644
|
+
str_repr = str({field: filter1[field]})
|
|
645
|
+
raise ValueError(
|
|
646
|
+
(
|
|
647
|
+
f"Filtering operator {str_repr} is already used by the "
|
|
648
|
+
"incremental and can't be used in the filter."
|
|
649
|
+
)
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
|
|
151
653
|
@configspec
|
|
152
654
|
class MongoDbCollectionConfiguration(BaseConfiguration):
|
|
153
655
|
incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg]
|
|
@@ -155,12 +657,13 @@ class MongoDbCollectionConfiguration(BaseConfiguration):
|
|
|
155
657
|
|
|
156
658
|
@configspec
|
|
157
659
|
class MongoDbCollectionResourceConfiguration(BaseConfiguration):
|
|
158
|
-
connection_url:
|
|
660
|
+
connection_url: dlt.TSecretValue = dlt.secrets.value
|
|
159
661
|
database: Optional[str] = dlt.config.value
|
|
160
662
|
collection: str = dlt.config.value
|
|
161
663
|
incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg]
|
|
162
664
|
write_disposition: Optional[str] = dlt.config.value
|
|
163
665
|
parallel: Optional[bool] = False
|
|
666
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value
|
|
164
667
|
|
|
165
668
|
|
|
166
669
|
__source_name__ = "mongodb"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.42
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -2,10 +2,10 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
|
2
2
|
ingestr/main.py,sha256=Pe_rzwcDRKIYa7baEVUAAPOHyqQbX29RUexMl0F_S1k,25273
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=O7TKwcOrZv6D3qO1eCdzlVqCphHukEF6_O786P7Z1yo,21
|
|
6
6
|
ingestr/src/destinations.py,sha256=MctbeJUyNr0DRB0XYt2xAbEKkHZ40-nXXEOYCs4KuoE,15420
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=l9PpW4cVTvhOLXhb7jha5CdPY7YT_Uhj4Ac9RndB7fM,5635
|
|
9
9
|
ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
|
|
10
10
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
11
11
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
@@ -79,8 +79,8 @@ ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYH
|
|
|
79
79
|
ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
|
|
80
80
|
ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
|
|
81
81
|
ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
|
|
82
|
-
ingestr/src/mongodb/__init__.py,sha256=
|
|
83
|
-
ingestr/src/mongodb/helpers.py,sha256=
|
|
82
|
+
ingestr/src/mongodb/__init__.py,sha256=Ht5HGt9UJ8LeCtilgu7hZhrebo-knRLlPIlgGQojLgk,7221
|
|
83
|
+
ingestr/src/mongodb/helpers.py,sha256=H0GpOK3bPBhFWBEhJZOjywUBdzih6MOpmyVO_cKSN14,24178
|
|
84
84
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
85
85
|
ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
|
|
86
86
|
ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -128,8 +128,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
128
128
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
129
129
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
130
130
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
131
|
-
ingestr-0.13.
|
|
132
|
-
ingestr-0.13.
|
|
133
|
-
ingestr-0.13.
|
|
134
|
-
ingestr-0.13.
|
|
135
|
-
ingestr-0.13.
|
|
131
|
+
ingestr-0.13.42.dist-info/METADATA,sha256=Njczb9BZLigMPvPeGS7gzh3OiBR5yxRU47huQPGvW-I,13852
|
|
132
|
+
ingestr-0.13.42.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
133
|
+
ingestr-0.13.42.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
134
|
+
ingestr-0.13.42.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
135
|
+
ingestr-0.13.42.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|