ingestr 0.13.41__py3-none-any.whl → 0.13.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -0,0 +1,100 @@
1
+ from typing import Iterable, Iterator
2
+
3
+ import dlt
4
+ from dlt.sources import DltResource
5
+
6
+ from .helpers import AttioClient
7
+
8
+
9
+ @dlt.source(max_table_nesting=0)
10
+ def attio_source(
11
+ api_key: str,
12
+ params: list[str],
13
+ ) -> Iterable[DltResource]:
14
+
15
+ attio_client = AttioClient(api_key)
16
+
17
+ @dlt.resource(
18
+ name="objects",
19
+ write_disposition="replace",
20
+ columns={
21
+ "created_at": {"data_type": "timestamp", "partition": True},
22
+ },
23
+ )
24
+ def fetch_objects() -> Iterator[dict]:
25
+ if len(params) != 0:
26
+ raise ValueError("Objects table must be in the format `objects`")
27
+
28
+ path = "objects"
29
+ yield attio_client.fetch_data(path, "get")
30
+
31
+ @dlt.resource(
32
+ name="records",
33
+ write_disposition="replace",
34
+ columns={
35
+ "created_at": {"data_type": "timestamp", "partition": True},
36
+ },
37
+ )
38
+ def fetch_records() -> Iterator[dict]:
39
+ if len(params) != 1:
40
+ raise ValueError(
41
+ "Records table must be in the format `records:{object_api_slug}`"
42
+ )
43
+
44
+ object_id = params[0]
45
+ path = f"objects/{object_id}/records/query"
46
+
47
+ yield attio_client.fetch_data(path, "post")
48
+
49
+ @dlt.resource(
50
+ name="lists",
51
+ write_disposition="replace",
52
+ columns={
53
+ "created_at": {"data_type": "timestamp", "partition": True},
54
+ },
55
+ )
56
+ def fetch_lists() -> Iterator[dict]:
57
+ path = "lists"
58
+ yield attio_client.fetch_data(path, "get")
59
+
60
+ @dlt.resource(
61
+ name="list_entries",
62
+ write_disposition="replace",
63
+ columns={
64
+ "created_at": {"data_type": "timestamp", "partition": True},
65
+ },
66
+ )
67
+ def fetch_list_entries() -> Iterator[dict]:
68
+ if len(params) != 1:
69
+ raise ValueError(
70
+ "List entries table must be in the format `list_entries:{list_id}`"
71
+ )
72
+ path = f"lists/{params[0]}/entries/query"
73
+
74
+ yield attio_client.fetch_data(path, "post")
75
+
76
+ @dlt.resource(
77
+ name="all_list_entries",
78
+ write_disposition="replace",
79
+ columns={
80
+ "created_at": {"data_type": "timestamp", "partition": True},
81
+ },
82
+ )
83
+ def fetch_all_list_entries() -> Iterator[dict]:
84
+ if len(params) != 1:
85
+ raise ValueError(
86
+ "All list entries table must be in the format `all_list_entries:{object_api_slug}`"
87
+ )
88
+ path = "lists"
89
+ for lst in attio_client.fetch_data(path, "get"):
90
+ if params[0] in lst["parent_object"]:
91
+ path = f"lists/{lst['id']['list_id']}/entries/query"
92
+ yield from attio_client.fetch_data(path, "post")
93
+
94
+ return (
95
+ fetch_objects,
96
+ fetch_records,
97
+ fetch_lists,
98
+ fetch_list_entries,
99
+ fetch_all_list_entries,
100
+ )
@@ -0,0 +1,54 @@
1
+ from ingestr.src.http_client import create_client
2
+
3
+
4
+ class AttioClient:
5
+ def __init__(self, api_key: str):
6
+ self.base_url = "https://api.attio.com/v2"
7
+ self.headers = {
8
+ "Accept": "application/json",
9
+ "Authorization": f"Bearer {api_key}",
10
+ }
11
+ self.client = create_client()
12
+
13
+ def fetch_data(self, path: str, method: str, limit: int = 1000, params=None):
14
+ url = f"{self.base_url}/{path}"
15
+ if params is None:
16
+ params = {}
17
+ offset = 0
18
+ while True:
19
+ query_params = {**params, "limit": limit, "offset": offset}
20
+ if method == "get":
21
+ response = self.client.get(
22
+ url, headers=self.headers, params=query_params
23
+ )
24
+ else:
25
+ response = self.client.post(
26
+ url, headers=self.headers, params=query_params
27
+ )
28
+
29
+ if response.status_code != 200:
30
+ raise Exception(f"HTTP {response.status_code} error: {response.text}")
31
+
32
+ response_data = response.json()
33
+ if "data" not in response_data:
34
+ print(f"API Response: {response_data}")
35
+ raise Exception(
36
+ "Attio API returned a response without the expected data"
37
+ )
38
+
39
+ data = response_data["data"]
40
+
41
+ for item in data:
42
+ flat_item = flatten_item(item)
43
+ yield flat_item
44
+
45
+ if len(data) < limit:
46
+ break
47
+ offset += limit
48
+
49
+
50
+ def flatten_item(item: dict) -> dict:
51
+ if "id" in item:
52
+ for key, value in item["id"].items():
53
+ item[key] = value
54
+ return item
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.41"
1
+ version = "v0.13.43"
ingestr/src/factory.py CHANGED
@@ -26,6 +26,7 @@ from ingestr.src.sources import (
26
26
  AppsflyerSource,
27
27
  ArrowMemoryMappedSource,
28
28
  AsanaSource,
29
+ AttioSource,
29
30
  ChessSource,
30
31
  DynamoDBSource,
31
32
  ElasticsearchSource,
@@ -120,6 +121,7 @@ class SourceDestinationFactory:
120
121
  sources: Dict[str, Type[SourceProtocol]] = {
121
122
  "csv": LocalCsvSource,
122
123
  "mongodb": MongoDbSource,
124
+ "mongodb+srv": MongoDbSource,
123
125
  "notion": NotionSource,
124
126
  "gsheets": GoogleSheetsSource,
125
127
  "shopify": ShopifySource,
@@ -156,6 +158,7 @@ class SourceDestinationFactory:
156
158
  "freshdesk": FreshdeskSource,
157
159
  "phantombuster": PhantombusterSource,
158
160
  "elasticsearch": ElasticsearchSource,
161
+ "attio": AttioSource,
159
162
  }
160
163
  destinations: Dict[str, Type[DestinationProtocol]] = {
161
164
  "bigquery": BigQueryDestination,
@@ -0,0 +1,17 @@
1
+ import requests
2
+ from dlt.sources.helpers.requests import Client
3
+
4
+ def create_client() -> requests.Session:
5
+ return Client(
6
+ raise_for_status=False,
7
+ retry_condition=retry_on_limit,
8
+ request_max_attempts=12,
9
+ ).session
10
+
11
+
12
+ def retry_on_limit(
13
+ response: requests.Response | None, exception: BaseException | None
14
+ ) -> bool:
15
+ if response is None:
16
+ return False
17
+ return response.status_code == 502
@@ -1,8 +1,9 @@
1
1
  """Source that loads collections form any a mongo database, supports incremental loads."""
2
2
 
3
- from typing import Any, Iterable, List, Optional
3
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
4
4
 
5
5
  import dlt
6
+ from dlt.common.data_writers import TDataItemFormat
6
7
  from dlt.sources import DltResource
7
8
 
8
9
  from .helpers import (
@@ -21,6 +22,10 @@ def mongodb(
21
22
  incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
22
23
  write_disposition: Optional[str] = dlt.config.value,
23
24
  parallel: Optional[bool] = dlt.config.value,
25
+ limit: Optional[int] = None,
26
+ filter_: Optional[Dict[str, Any]] = None,
27
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
28
+ pymongoarrow_schema: Optional[Any] = None,
24
29
  ) -> Iterable[DltResource]:
25
30
  """
26
31
  A DLT source which loads data from a mongo database using PyMongo.
@@ -34,6 +39,18 @@ def mongodb(
34
39
  E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
35
40
  write_disposition (str): Write disposition of the resource.
36
41
  parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
42
+ limit (Optional[int]):
43
+ The maximum number of documents to load. The limit is
44
+ applied to each requested collection separately.
45
+ filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
46
+ projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields of a collection
47
+ when loading the collection. Supported inputs:
48
+ include (list) - ["year", "title"]
49
+ include (dict) - {"year": True, "title": True}
50
+ exclude (dict) - {"released": False, "runtime": False}
51
+ Note: Can't mix include and exclude statements '{"title": True, "released": False}`
52
+ pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types of a collection to convert BSON to Arrow
53
+
37
54
  Returns:
38
55
  Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
39
56
  """
@@ -58,19 +75,36 @@ def mongodb(
58
75
  primary_key="_id",
59
76
  write_disposition=write_disposition,
60
77
  spec=MongoDbCollectionConfiguration,
61
- )(client, collection, incremental=incremental, parallel=parallel)
78
+ )(
79
+ client,
80
+ collection,
81
+ incremental=incremental,
82
+ parallel=parallel,
83
+ limit=limit,
84
+ filter_=filter_ or {},
85
+ projection=projection,
86
+ pymongoarrow_schema=pymongoarrow_schema,
87
+ )
62
88
 
63
89
 
64
- @dlt.common.configuration.with_config(
65
- sections=("sources", "mongodb"), spec=MongoDbCollectionResourceConfiguration
90
+ @dlt.resource(
91
+ name=lambda args: args["collection"],
92
+ standalone=True,
93
+ spec=MongoDbCollectionResourceConfiguration,
66
94
  )
67
95
  def mongodb_collection(
68
- connection_url: str = dlt.config.value,
96
+ connection_url: str = dlt.secrets.value,
69
97
  database: Optional[str] = dlt.config.value,
70
98
  collection: str = dlt.config.value,
71
99
  incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
72
100
  write_disposition: Optional[str] = dlt.config.value,
73
101
  parallel: Optional[bool] = False,
102
+ limit: Optional[int] = None,
103
+ chunk_size: Optional[int] = 10000,
104
+ data_item_format: Optional[TDataItemFormat] = "object",
105
+ filter_: Optional[Dict[str, Any]] = None,
106
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
107
+ pymongoarrow_schema: Optional[Any] = None,
74
108
  ) -> Any:
75
109
  """
76
110
  A DLT source which loads a collection from a mongo database using PyMongo.
@@ -83,6 +117,21 @@ def mongodb_collection(
83
117
  E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
84
118
  write_disposition (str): Write disposition of the resource.
85
119
  parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
120
+ limit (Optional[int]): The number of documents load.
121
+ chunk_size (Optional[int]): The number of documents load in each batch.
122
+ data_item_format (Optional[TDataItemFormat]): The data format to use for loading.
123
+ Supported formats:
124
+ object - Python objects (dicts, lists).
125
+ arrow - Apache Arrow tables.
126
+ filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
127
+ projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields
128
+ when loading the collection. Supported inputs:
129
+ include (list) - ["year", "title"]
130
+ include (dict) - {"year": True, "title": True}
131
+ exclude (dict) - {"released": False, "runtime": False}
132
+ Note: Can't mix include and exclude statements '{"title": True, "released": False}`
133
+ pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
134
+
86
135
  Returns:
87
136
  Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
88
137
  """
@@ -100,4 +149,15 @@ def mongodb_collection(
100
149
  name=collection_obj.name,
101
150
  primary_key="_id",
102
151
  write_disposition=write_disposition,
103
- )(client, collection_obj, incremental=incremental, parallel=parallel)
152
+ )(
153
+ client,
154
+ collection_obj,
155
+ incremental=incremental,
156
+ parallel=parallel,
157
+ limit=limit,
158
+ chunk_size=chunk_size,
159
+ data_item_format=data_item_format,
160
+ filter_=filter_ or {},
161
+ projection=projection,
162
+ pymongoarrow_schema=pymongoarrow_schema,
163
+ )
@@ -1,12 +1,27 @@
1
1
  """Mongo database source helpers"""
2
2
 
3
3
  from itertools import islice
4
- from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple
4
+ from typing import (
5
+ TYPE_CHECKING,
6
+ Any,
7
+ Dict,
8
+ Iterable,
9
+ Iterator,
10
+ List,
11
+ Mapping,
12
+ Optional,
13
+ Tuple,
14
+ Union,
15
+ )
5
16
 
6
17
  import dlt
7
18
  from bson.decimal128 import Decimal128
8
19
  from bson.objectid import ObjectId
20
+ from bson.regex import Regex
21
+ from bson.timestamp import Timestamp
22
+ from dlt.common import logger
9
23
  from dlt.common.configuration.specs import BaseConfiguration, configspec
24
+ from dlt.common.data_writers import TDataItemFormat
10
25
  from dlt.common.time import ensure_pendulum_datetime
11
26
  from dlt.common.typing import TDataItem
12
27
  from dlt.common.utils import map_nested_in_place
@@ -14,17 +29,23 @@ from pendulum import _datetime
14
29
  from pymongo import ASCENDING, DESCENDING, MongoClient
15
30
  from pymongo.collection import Collection
16
31
  from pymongo.cursor import Cursor
32
+ from pymongo.helpers_shared import _fields_list_to_dict
17
33
 
18
34
  if TYPE_CHECKING:
19
35
  TMongoClient = MongoClient[Any]
20
- TCollection = Collection[Any] # type: ignore
36
+ TCollection = Collection[Any]
21
37
  TCursor = Cursor[Any]
22
38
  else:
23
39
  TMongoClient = Any
24
40
  TCollection = Any
25
41
  TCursor = Any
26
42
 
27
- CHUNK_SIZE = 10000
43
+ try:
44
+ import pymongoarrow # type: ignore
45
+
46
+ PYMONGOARROW_AVAILABLE = True
47
+ except ImportError:
48
+ PYMONGOARROW_AVAILABLE = False
28
49
 
29
50
 
30
51
  class CollectionLoader:
@@ -32,11 +53,14 @@ class CollectionLoader:
32
53
  self,
33
54
  client: TMongoClient,
34
55
  collection: TCollection,
56
+ chunk_size: int,
35
57
  incremental: Optional[dlt.sources.incremental[Any]] = None,
36
58
  ) -> None:
37
59
  self.client = client
38
60
  self.collection = collection
39
61
  self.incremental = incremental
62
+ self.chunk_size = chunk_size
63
+
40
64
  if incremental:
41
65
  self.cursor_field = incremental.cursor_path
42
66
  self.last_value = incremental.last_value
@@ -45,45 +69,186 @@ class CollectionLoader:
45
69
  self.last_value = None
46
70
 
47
71
  @property
48
- def _filter_op(self) -> Dict[str, Any]:
72
+ def _sort_op(self) -> List[Optional[Tuple[str, int]]]:
49
73
  if not self.incremental or not self.last_value:
50
- return {}
51
- if self.incremental.last_value_func is max:
52
- return {self.cursor_field: {"$gte": self.last_value}}
53
- elif self.incremental.last_value_func is min:
54
- return {self.cursor_field: {"$lt": self.last_value}}
55
- return {}
74
+ return []
56
75
 
57
- def load_documents(self) -> Iterator[TDataItem]:
58
- cursor = self.collection.find(self._filter_op)
59
- while docs_slice := list(islice(cursor, CHUNK_SIZE)):
60
- yield map_nested_in_place(convert_mongo_objs, docs_slice)
76
+ if (
77
+ self.incremental.row_order == "asc"
78
+ and self.incremental.last_value_func is max
79
+ ) or (
80
+ self.incremental.row_order == "desc"
81
+ and self.incremental.last_value_func is min
82
+ ):
83
+ return [(self.cursor_field, ASCENDING)]
61
84
 
85
+ elif (
86
+ self.incremental.row_order == "asc"
87
+ and self.incremental.last_value_func is min
88
+ ) or (
89
+ self.incremental.row_order == "desc"
90
+ and self.incremental.last_value_func is max
91
+ ):
92
+ return [(self.cursor_field, DESCENDING)]
93
+
94
+ return []
62
95
 
63
- class CollectionLoaderParallell(CollectionLoader):
64
96
  @property
65
- def _sort_op(self) -> List[Optional[Tuple[str, int]]]:
66
- if not self.incremental or not self.last_value:
67
- return []
97
+ def _filter_op(self) -> Dict[str, Any]:
98
+ """Build a filtering operator.
99
+
100
+ Includes a field and the filtering condition for it.
101
+
102
+ Returns:
103
+ Dict[str, Any]: A dictionary with the filter operator.
104
+ """
105
+ if not (self.incremental and self.last_value):
106
+ return {}
107
+
108
+ filt = {}
68
109
  if self.incremental.last_value_func is max:
69
- return [(self.cursor_field, ASCENDING)]
110
+ filt = {self.cursor_field: {"$gte": self.last_value}}
111
+ if self.incremental.end_value:
112
+ filt[self.cursor_field]["$lt"] = self.incremental.end_value
113
+
70
114
  elif self.incremental.last_value_func is min:
71
- return [(self.cursor_field, DESCENDING)]
72
- return []
115
+ filt = {self.cursor_field: {"$lte": self.last_value}}
116
+ if self.incremental.end_value:
117
+ filt[self.cursor_field]["$gt"] = self.incremental.end_value
118
+
119
+ return filt
120
+
121
+ def _projection_op(
122
+ self, projection: Optional[Union[Mapping[str, Any], Iterable[str]]]
123
+ ) -> Optional[Dict[str, Any]]:
124
+ """Build a projection operator.
125
+
126
+ Args:
127
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): A tuple of fields to include or a dict specifying fields to include or exclude.
128
+ The incremental `primary_key` needs to be handle differently for inclusion
129
+ and exclusion projections.
130
+
131
+ Returns:
132
+ Tuple[str, ...] | Dict[str, Any]: A tuple or dictionary with the projection operator.
133
+ """
134
+ if projection is None:
135
+ return None
136
+
137
+ projection_dict = dict(_fields_list_to_dict(projection, "projection"))
138
+
139
+ if self.incremental:
140
+ # this is an inclusion projection
141
+ if any(v == 1 for v in projection_dict.values()):
142
+ # ensure primary_key is included
143
+ projection_dict.update(m={self.incremental.primary_key: 1})
144
+ # this is an exclusion projection
145
+ else:
146
+ try:
147
+ # ensure primary_key isn't excluded
148
+ projection_dict.pop(self.incremental.primary_key) # type: ignore
149
+ except KeyError:
150
+ pass # primary_key was properly not included in exclusion projection
151
+ else:
152
+ dlt.common.logger.warn(
153
+ f"Primary key `{self.incremental.primary_key}` was removed from exclusion projection"
154
+ )
155
+
156
+ return projection_dict
157
+
158
+ def _limit(self, cursor: Cursor, limit: Optional[int] = None) -> TCursor: # type: ignore
159
+ """Apply a limit to the cursor, if needed.
160
+
161
+ Args:
162
+ cursor (Cursor): The cursor to apply the limit.
163
+ limit (Optional[int]): The number of documents to load.
164
+
165
+ Returns:
166
+ Cursor: The cursor with the limit applied (if given).
167
+ """
168
+ if limit not in (0, None):
169
+ if self.incremental is None or self.incremental.last_value_func is None:
170
+ logger.warning(
171
+ "Using limit without ordering - results may be inconsistent."
172
+ )
173
+
174
+ cursor = cursor.limit(abs(limit))
73
175
 
176
+ return cursor
177
+
178
+ def load_documents(
179
+ self,
180
+ filter_: Dict[str, Any],
181
+ limit: Optional[int] = None,
182
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
183
+ ) -> Iterator[TDataItem]:
184
+ """Construct the query and load the documents from the collection.
185
+
186
+ Args:
187
+ filter_ (Dict[str, Any]): The filter to apply to the collection.
188
+ limit (Optional[int]): The number of documents to load.
189
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
190
+
191
+ Yields:
192
+ Iterator[TDataItem]: An iterator of the loaded documents.
193
+ """
194
+ filter_op = self._filter_op
195
+ _raise_if_intersection(filter_op, filter_)
196
+ filter_op.update(filter_)
197
+
198
+ projection_op = self._projection_op(projection)
199
+
200
+ cursor = self.collection.find(filter=filter_op, projection=projection_op)
201
+ if self._sort_op:
202
+ cursor = cursor.sort(self._sort_op)
203
+
204
+ cursor = self._limit(cursor, limit)
205
+
206
+ while docs_slice := list(islice(cursor, self.chunk_size)):
207
+ yield map_nested_in_place(convert_mongo_objs, docs_slice)
208
+
209
+
210
+ class CollectionLoaderParallel(CollectionLoader):
74
211
  def _get_document_count(self) -> int:
75
212
  return self.collection.count_documents(filter=self._filter_op)
76
213
 
77
- def _create_batches(self) -> List[Dict[str, int]]:
214
+ def _create_batches(self, limit: Optional[int] = None) -> List[Dict[str, int]]:
78
215
  doc_count = self._get_document_count()
79
- return [
80
- dict(skip=sk, limit=CHUNK_SIZE) for sk in range(0, doc_count, CHUNK_SIZE)
81
- ]
216
+ if limit:
217
+ doc_count = min(doc_count, abs(limit))
218
+
219
+ batches = []
220
+ left_to_load = doc_count
221
+
222
+ for sk in range(0, doc_count, self.chunk_size):
223
+ batches.append(dict(skip=sk, limit=min(self.chunk_size, left_to_load)))
224
+ left_to_load -= self.chunk_size
225
+
226
+ return batches
82
227
 
83
- def _get_cursor(self) -> TCursor:
84
- cursor = self.collection.find(filter=self._filter_op)
228
+ def _get_cursor(
229
+ self,
230
+ filter_: Dict[str, Any],
231
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
232
+ ) -> TCursor:
233
+ """Get a reading cursor for the collection.
234
+
235
+ Args:
236
+ filter_ (Dict[str, Any]): The filter to apply to the collection.
237
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
238
+
239
+ Returns:
240
+ Cursor: The cursor for the collection.
241
+ """
242
+ filter_op = self._filter_op
243
+ _raise_if_intersection(filter_op, filter_)
244
+ filter_op.update(filter_)
245
+
246
+ projection_op = self._projection_op(projection)
247
+
248
+ cursor = self.collection.find(filter=filter_op, projection=projection_op)
85
249
  if self._sort_op:
86
250
  cursor = cursor.sort(self._sort_op)
251
+
87
252
  return cursor
88
253
 
89
254
  @dlt.defer
@@ -93,25 +258,223 @@ class CollectionLoaderParallell(CollectionLoader):
93
258
  data = []
94
259
  for document in cursor.skip(batch["skip"]).limit(batch["limit"]):
95
260
  data.append(map_nested_in_place(convert_mongo_objs, document))
261
+
96
262
  return data
97
263
 
98
- def _get_all_batches(self) -> Iterator[TDataItem]:
99
- batches = self._create_batches()
100
- cursor = self._get_cursor()
264
+ def _get_all_batches(
265
+ self,
266
+ filter_: Dict[str, Any],
267
+ limit: Optional[int] = None,
268
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
269
+ ) -> Iterator[TDataItem]:
270
+ """Load all documents from the collection in parallel batches.
271
+
272
+ Args:
273
+ filter_ (Dict[str, Any]): The filter to apply to the collection.
274
+ limit (Optional[int]): The maximum number of documents to load.
275
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
276
+
277
+ Yields:
278
+ Iterator[TDataItem]: An iterator of the loaded documents.
279
+ """
280
+ batches = self._create_batches(limit=limit)
281
+ cursor = self._get_cursor(filter_=filter_, projection=projection)
101
282
 
102
283
  for batch in batches:
103
284
  yield self._run_batch(cursor=cursor, batch=batch)
104
285
 
105
- def load_documents(self) -> Iterator[TDataItem]:
106
- for document in self._get_all_batches():
286
+ def load_documents(
287
+ self,
288
+ filter_: Dict[str, Any],
289
+ limit: Optional[int] = None,
290
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
291
+ ) -> Iterator[TDataItem]:
292
+ """Load documents from the collection in parallel.
293
+
294
+ Args:
295
+ filter_ (Dict[str, Any]): The filter to apply to the collection.
296
+ limit (Optional[int]): The number of documents to load.
297
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
298
+
299
+ Yields:
300
+ Iterator[TDataItem]: An iterator of the loaded documents.
301
+ """
302
+ for document in self._get_all_batches(
303
+ limit=limit, filter_=filter_, projection=projection
304
+ ):
107
305
  yield document
108
306
 
109
307
 
308
+ class CollectionArrowLoader(CollectionLoader):
309
+ """
310
+ Mongo DB collection loader, which uses
311
+ Apache Arrow for data processing.
312
+ """
313
+
314
+ def load_documents(
315
+ self,
316
+ filter_: Dict[str, Any],
317
+ limit: Optional[int] = None,
318
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
319
+ pymongoarrow_schema: Any = None,
320
+ ) -> Iterator[Any]:
321
+ """
322
+ Load documents from the collection in Apache Arrow format.
323
+
324
+ Args:
325
+ filter_ (Dict[str, Any]): The filter to apply to the collection.
326
+ limit (Optional[int]): The number of documents to load.
327
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
328
+ pymongoarrow_schema (Any): The mapping of field types to convert BSON to Arrow.
329
+
330
+ Yields:
331
+ Iterator[Any]: An iterator of the loaded documents.
332
+ """
333
+ from pymongoarrow.context import PyMongoArrowContext # type: ignore
334
+ from pymongoarrow.lib import process_bson_stream # type: ignore
335
+
336
+ filter_op = self._filter_op
337
+ _raise_if_intersection(filter_op, filter_)
338
+ filter_op.update(filter_)
339
+
340
+ projection_op = self._projection_op(projection)
341
+
342
+ # NOTE the `filter_op` isn't passed
343
+ cursor = self.collection.find_raw_batches(
344
+ filter_, batch_size=self.chunk_size, projection=projection_op
345
+ )
346
+ if self._sort_op:
347
+ cursor = cursor.sort(self._sort_op) # type: ignore
348
+
349
+ cursor = self._limit(cursor, limit) # type: ignore
350
+
351
+ context = PyMongoArrowContext.from_schema(
352
+ schema=pymongoarrow_schema, codec_options=self.collection.codec_options
353
+ )
354
+ for batch in cursor:
355
+ process_bson_stream(batch, context)
356
+ table = context.finish()
357
+ yield convert_arrow_columns(table)
358
+
359
+
360
+ class CollectionArrowLoaderParallel(CollectionLoaderParallel):
361
+ """
362
+ Mongo DB collection parallel loader, which uses
363
+ Apache Arrow for data processing.
364
+ """
365
+
366
+ def load_documents(
367
+ self,
368
+ filter_: Dict[str, Any],
369
+ limit: Optional[int] = None,
370
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
371
+ pymongoarrow_schema: Any = None,
372
+ ) -> Iterator[TDataItem]:
373
+ """Load documents from the collection in parallel.
374
+
375
+ Args:
376
+ filter_ (Dict[str, Any]): The filter to apply to the collection.
377
+ limit (Optional[int]): The number of documents to load.
378
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
379
+ pymongoarrow_schema (Any): The mapping of field types to convert BSON to Arrow.
380
+
381
+ Yields:
382
+ Iterator[TDataItem]: An iterator of the loaded documents.
383
+ """
384
+ yield from self._get_all_batches(
385
+ limit=limit,
386
+ filter_=filter_,
387
+ projection=projection,
388
+ pymongoarrow_schema=pymongoarrow_schema,
389
+ )
390
+
391
+ def _get_all_batches(
392
+ self,
393
+ filter_: Dict[str, Any],
394
+ limit: Optional[int] = None,
395
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
396
+ pymongoarrow_schema: Any = None,
397
+ ) -> Iterator[TDataItem]:
398
+ """Load all documents from the collection in parallel batches.
399
+
400
+ Args:
401
+ filter_ (Dict[str, Any]): The filter to apply to the collection.
402
+ limit (Optional[int]): The maximum number of documents to load.
403
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
404
+ pymongoarrow_schema (Any): The mapping of field types to convert BSON to Arrow.
405
+
406
+ Yields:
407
+ Iterator[TDataItem]: An iterator of the loaded documents.
408
+ """
409
+ batches = self._create_batches(limit=limit)
410
+ cursor = self._get_cursor(filter_=filter_, projection=projection)
411
+ for batch in batches:
412
+ yield self._run_batch(
413
+ cursor=cursor,
414
+ batch=batch,
415
+ pymongoarrow_schema=pymongoarrow_schema,
416
+ )
417
+
418
+ def _get_cursor(
419
+ self,
420
+ filter_: Dict[str, Any],
421
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
422
+ ) -> TCursor:
423
+ """Get a reading cursor for the collection.
424
+
425
+ Args:
426
+ filter_ (Dict[str, Any]): The filter to apply to the collection.
427
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
428
+
429
+ Returns:
430
+ Cursor: The cursor for the collection.
431
+ """
432
+ filter_op = self._filter_op
433
+ _raise_if_intersection(filter_op, filter_)
434
+ filter_op.update(filter_)
435
+
436
+ projection_op = self._projection_op(projection)
437
+
438
+ cursor = self.collection.find_raw_batches(
439
+ filter=filter_op, batch_size=self.chunk_size, projection=projection_op
440
+ )
441
+ if self._sort_op:
442
+ cursor = cursor.sort(self._sort_op) # type: ignore
443
+
444
+ return cursor
445
+
446
+ @dlt.defer
447
+ def _run_batch(
448
+ self,
449
+ cursor: TCursor,
450
+ batch: Dict[str, int],
451
+ pymongoarrow_schema: Any = None,
452
+ ) -> TDataItem:
453
+ from pymongoarrow.context import PyMongoArrowContext
454
+ from pymongoarrow.lib import process_bson_stream
455
+
456
+ cursor = cursor.clone()
457
+
458
+ context = PyMongoArrowContext.from_schema(
459
+ schema=pymongoarrow_schema, codec_options=self.collection.codec_options
460
+ )
461
+ for chunk in cursor.skip(batch["skip"]).limit(batch["limit"]):
462
+ process_bson_stream(chunk, context)
463
+ table = context.finish()
464
+ yield convert_arrow_columns(table)
465
+
466
+
110
467
  def collection_documents(
111
468
  client: TMongoClient,
112
469
  collection: TCollection,
470
+ filter_: Dict[str, Any],
471
+ projection: Union[Dict[str, Any], List[str]],
472
+ pymongoarrow_schema: "pymongoarrow.schema.Schema",
113
473
  incremental: Optional[dlt.sources.incremental[Any]] = None,
114
474
  parallel: bool = False,
475
+ limit: Optional[int] = None,
476
+ chunk_size: Optional[int] = 10000,
477
+ data_item_format: Optional[TDataItemFormat] = "object",
115
478
  ) -> Iterator[TDataItem]:
116
479
  """
117
480
  A DLT source which loads data from a Mongo database using PyMongo.
@@ -120,27 +483,145 @@ def collection_documents(
120
483
  Args:
121
484
  client (MongoClient): The PyMongo client `pymongo.MongoClient` instance.
122
485
  collection (Collection): The collection `pymongo.collection.Collection` to load.
486
+ filter_ (Dict[str, Any]): The filter to apply to the collection.
487
+ projection (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields to create the Cursor.
488
+ when loading the collection. Supported inputs:
489
+ include (list) - ["year", "title"]
490
+ include (dict) - {"year": True, "title": True}
491
+ exclude (dict) - {"released": False, "runtime": False}
492
+ Note: Can't mix include and exclude statements '{"title": True, "released": False}`
493
+ pymongoarrow_schema (pymongoarrow.schema.Schema): The mapping of field types to convert BSON to Arrow.
123
494
  incremental (Optional[dlt.sources.incremental[Any]]): The incremental configuration.
124
495
  parallel (bool): Option to enable parallel loading for the collection. Default is False.
496
+ limit (Optional[int]): The maximum number of documents to load.
497
+ chunk_size (Optional[int]): The number of documents to load in each batch.
498
+ data_item_format (Optional[TDataItemFormat]): The data format to use for loading.
499
+ Supported formats:
500
+ object - Python objects (dicts, lists).
501
+ arrow - Apache Arrow tables.
125
502
 
126
503
  Returns:
127
504
  Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
128
505
  """
129
- LoaderClass = CollectionLoaderParallell if parallel else CollectionLoader
506
+ if data_item_format == "arrow" and not PYMONGOARROW_AVAILABLE:
507
+ dlt.common.logger.warn(
508
+ "'pymongoarrow' is not installed; falling back to standard MongoDB CollectionLoader."
509
+ )
510
+ data_item_format = "object"
511
+
512
+ if data_item_format != "arrow" and pymongoarrow_schema:
513
+ dlt.common.logger.warn(
514
+ "Received value for `pymongoarrow_schema`, but `data_item_format=='object'` "
515
+ "Use `data_item_format=='arrow'` to enforce schema."
516
+ )
517
+
518
+ if data_item_format == "arrow" and pymongoarrow_schema and projection:
519
+ dlt.common.logger.warn(
520
+ "Received values for both `pymongoarrow_schema` and `projection`. Since both "
521
+ "create a projection to select fields, `projection` will be ignored."
522
+ )
523
+
524
+ if parallel:
525
+ if data_item_format == "arrow":
526
+ LoaderClass = CollectionArrowLoaderParallel
527
+ else:
528
+ LoaderClass = CollectionLoaderParallel # type: ignore
529
+ else:
530
+ if data_item_format == "arrow":
531
+ LoaderClass = CollectionArrowLoader # type: ignore
532
+ else:
533
+ LoaderClass = CollectionLoader # type: ignore
130
534
 
131
- loader = LoaderClass(client, collection, incremental=incremental)
132
- for data in loader.load_documents():
133
- yield data
535
+ loader = LoaderClass(
536
+ client, collection, incremental=incremental, chunk_size=chunk_size
537
+ )
538
+ if isinstance(loader, (CollectionArrowLoader, CollectionArrowLoaderParallel)):
539
+ yield from loader.load_documents(
540
+ limit=limit,
541
+ filter_=filter_,
542
+ projection=projection,
543
+ pymongoarrow_schema=pymongoarrow_schema,
544
+ )
545
+ else:
546
+ yield from loader.load_documents(
547
+ limit=limit, filter_=filter_, projection=projection
548
+ )
134
549
 
135
550
 
136
551
  def convert_mongo_objs(value: Any) -> Any:
552
+ """MongoDB to dlt type conversion when using Python loaders.
553
+
554
+ Notes:
555
+ The method `ObjectId.__str__()` creates a hexstring using `binascii.hexlify(__id).decode()`
556
+
557
+ """
137
558
  if isinstance(value, (ObjectId, Decimal128)):
138
559
  return str(value)
139
560
  if isinstance(value, _datetime.datetime):
140
561
  return ensure_pendulum_datetime(value)
562
+ if isinstance(value, Regex):
563
+ return value.try_compile().pattern
564
+ if isinstance(value, Timestamp):
565
+ date = value.as_datetime()
566
+ return ensure_pendulum_datetime(date)
567
+
141
568
  return value
142
569
 
143
570
 
571
+ def convert_arrow_columns(table: Any) -> Any:
572
+ """Convert the given table columns to Python types.
573
+
574
+ Notes:
575
+ Calling str() matches the `convert_mongo_obs()` used in non-arrow code.
576
+ Pymongoarrow converts ObjectId to `fixed_size_binary[12]`, which can't be
577
+ converted to a string as a vectorized operation because it contains ASCII characters.
578
+
579
+ Instead, you need to loop over values using:
580
+ ```python
581
+ pyarrow.array([v.as_buffer().hex() for v in object_id_array], type=pyarrow.string())
582
+ # pymongoarrow simplifies this by allowing this syntax
583
+ [str(v) for v in object_id_array]
584
+ ```
585
+
586
+ Args:
587
+ table (pyarrow.lib.Table): The table to convert.
588
+
589
+ Returns:
590
+ pyarrow.lib.Table: The table with the columns converted.
591
+ """
592
+ from dlt.common.libs.pyarrow import pyarrow
593
+ from pymongoarrow.types import ( # type: ignore
594
+ _is_binary,
595
+ _is_code,
596
+ _is_decimal128,
597
+ _is_objectid,
598
+ )
599
+
600
+ for i, field in enumerate(table.schema):
601
+ if _is_objectid(field.type) or _is_decimal128(field.type):
602
+ col_values = [str(value) for value in table[field.name]]
603
+ table = table.set_column(
604
+ i,
605
+ pyarrow.field(field.name, pyarrow.string()),
606
+ pyarrow.array(col_values, type=pyarrow.string()),
607
+ )
608
+ else:
609
+ type_ = None
610
+ if _is_binary(field.type):
611
+ type_ = pyarrow.binary()
612
+ elif _is_code(field.type):
613
+ type_ = pyarrow.string()
614
+
615
+ if type_:
616
+ col_values = [value.as_py() for value in table[field.name]]
617
+ table = table.set_column(
618
+ i,
619
+ pyarrow.field(field.name, type_),
620
+ pyarrow.array(col_values, type=type_),
621
+ )
622
+ return table
623
+
624
+
144
625
  def client_from_credentials(connection_url: str) -> TMongoClient:
145
626
  client: TMongoClient = MongoClient(
146
627
  connection_url, uuidRepresentation="standard", tz_aware=True
@@ -148,6 +629,27 @@ def client_from_credentials(connection_url: str) -> TMongoClient:
148
629
  return client
149
630
 
150
631
 
632
+ def _raise_if_intersection(filter1: Dict[str, Any], filter2: Dict[str, Any]) -> None:
633
+ """
634
+ Raise an exception, if the given filters'
635
+ fields are intersecting.
636
+
637
+ Args:
638
+ filter1 (Dict[str, Any]): The first filter.
639
+ filter2 (Dict[str, Any]): The second filter.
640
+ """
641
+ field_inter = filter1.keys() & filter2.keys()
642
+ for field in field_inter:
643
+ if filter1[field].keys() & filter2[field].keys():
644
+ str_repr = str({field: filter1[field]})
645
+ raise ValueError(
646
+ (
647
+ f"Filtering operator {str_repr} is already used by the "
648
+ "incremental and can't be used in the filter."
649
+ )
650
+ )
651
+
652
+
151
653
  @configspec
152
654
  class MongoDbCollectionConfiguration(BaseConfiguration):
153
655
  incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg]
@@ -155,12 +657,13 @@ class MongoDbCollectionConfiguration(BaseConfiguration):
155
657
 
156
658
  @configspec
157
659
  class MongoDbCollectionResourceConfiguration(BaseConfiguration):
158
- connection_url: str = dlt.config.value
660
+ connection_url: dlt.TSecretValue = dlt.secrets.value
159
661
  database: Optional[str] = dlt.config.value
160
662
  collection: str = dlt.config.value
161
663
  incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg]
162
664
  write_disposition: Optional[str] = dlt.config.value
163
665
  parallel: Optional[bool] = False
666
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value
164
667
 
165
668
 
166
669
  __source_name__ = "mongodb"
ingestr/src/sources.py CHANGED
@@ -21,6 +21,7 @@ from urllib.parse import ParseResult, parse_qs, quote, urlencode, urlparse
21
21
  import pendulum
22
22
  from dlt.common.time import ensure_pendulum_datetime
23
23
  from dlt.extract import Incremental
24
+ from dlt.extract.exceptions import ResourcesNotFoundError
24
25
  from dlt.sources import incremental as dlt_incremental
25
26
  from dlt.sources.credentials import (
26
27
  ConnectionStringCredentials,
@@ -2392,3 +2393,29 @@ class ElasticsearchSource:
2392
2393
  verify_certs=verify_certs,
2393
2394
  incremental=incremental,
2394
2395
  ).with_resources(table)
2396
+
2397
+
2398
+ class AttioSource:
2399
+ def handles_incrementality(self) -> bool:
2400
+ return True
2401
+
2402
+ def dlt_source(self, uri: str, table: str, **kwargs):
2403
+ parsed_uri = urlparse(uri)
2404
+ query_params = parse_qs(parsed_uri.query)
2405
+ api_key = query_params.get("api_key")
2406
+
2407
+ if api_key is None:
2408
+ raise MissingValueError("api_key", "Attio")
2409
+
2410
+ parts = table.replace(" ", "").split(":")
2411
+ table_name = parts[0]
2412
+ params = parts[1:]
2413
+
2414
+ from ingestr.src.attio import attio_source
2415
+
2416
+ try:
2417
+ return attio_source(api_key=api_key[0], params=params).with_resources(
2418
+ table_name
2419
+ )
2420
+ except ResourcesNotFoundError:
2421
+ raise UnsupportedResourceError(table_name, "Attio")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.41
3
+ Version: 0.13.43
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -2,15 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
2
  ingestr/main.py,sha256=Pe_rzwcDRKIYa7baEVUAAPOHyqQbX29RUexMl0F_S1k,25273
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
5
- ingestr/src/buildinfo.py,sha256=AK7sGUNx6CPDKJOXeMexFRun9bjpyv2c9t4DII73Pes,21
5
+ ingestr/src/buildinfo.py,sha256=af3TiaPbJzAW9JyfpOIeOvjzAsiboFF4f1frweJenxc,21
6
6
  ingestr/src/destinations.py,sha256=MctbeJUyNr0DRB0XYt2xAbEKkHZ40-nXXEOYCs4KuoE,15420
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
- ingestr/src/factory.py,sha256=KJKIL9q7kU4oAVXy5o0wDwLAU0nG9y0xC8D7HzksYak,5597
8
+ ingestr/src/factory.py,sha256=c5WfqmRrXFj1PddnBOzTzzZUHJ-Fb42cvCvsBEqn6Yo,5682
9
9
  ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
10
+ ingestr/src/http_client.py,sha256=dbAAf6Ptxf9pSn9RmNSHL4HEn5xhFimWjE2JZ67J00o,434
10
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
11
12
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
12
13
  ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
13
- ingestr/src/sources.py,sha256=SWZAa6bokLurQRPtH7rxi8K-GSVLp_p9Ig1ArGRsxCo,82635
14
+ ingestr/src/sources.py,sha256=HZQGWPuDdf4sVq91KuaJ9p7MtSfjWue8d2vknZpIwg8,83456
14
15
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
15
16
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
16
17
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -30,6 +31,8 @@ ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0
30
31
  ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
31
32
  ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
32
33
  ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
34
+ ingestr/src/attio/__init__.py,sha256=Dvr9rSZUlw6HGsqF7iabUit-xRC17Nv6QcmA4cRF2wA,2864
35
+ ingestr/src/attio/helpers.py,sha256=QvB-0BV_Z-cvMTBZDwOCuhxY1cB5PraPdrDkNyQ5TsM,1715
33
36
  ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
34
37
  ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
35
38
  ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
@@ -79,8 +82,8 @@ ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYH
79
82
  ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
80
83
  ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
81
84
  ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
82
- ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
83
- ingestr/src/mongodb/helpers.py,sha256=y9rYKR8eyIqam_eNsZmwSYevgi8mghh7Zp8qhTHl65s,5652
85
+ ingestr/src/mongodb/__init__.py,sha256=Ht5HGt9UJ8LeCtilgu7hZhrebo-knRLlPIlgGQojLgk,7221
86
+ ingestr/src/mongodb/helpers.py,sha256=H0GpOK3bPBhFWBEhJZOjywUBdzih6MOpmyVO_cKSN14,24178
84
87
  ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
85
88
  ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
86
89
  ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -128,8 +131,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
128
131
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
129
132
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
130
133
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
131
- ingestr-0.13.41.dist-info/METADATA,sha256=UzGBs9s0Kr6R1xji_ULG5Tuc383Klx2AIzfyZdXLBp4,13852
132
- ingestr-0.13.41.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
133
- ingestr-0.13.41.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
134
- ingestr-0.13.41.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
135
- ingestr-0.13.41.dist-info/RECORD,,
134
+ ingestr-0.13.43.dist-info/METADATA,sha256=n3hm9A9kfuALzOs9t2f4V2X4LH3hcIyFnj5y0DYP9Qo,13852
135
+ ingestr-0.13.43.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
136
+ ingestr-0.13.43.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
137
+ ingestr-0.13.43.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
138
+ ingestr-0.13.43.dist-info/RECORD,,