ingestr 0.13.20__py3-none-any.whl → 0.13.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +29 -25
- ingestr/src/adjust/adjust_helpers.py +6 -2
- ingestr/src/applovin_max/__init__.py +5 -3
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/factory.py +2 -0
- ingestr/src/hubspot/__init__.py +0 -1
- ingestr/src/kinesis/__init__.py +3 -4
- ingestr/src/partition.py +2 -2
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/sources.py +46 -14
- {ingestr-0.13.20.dist-info → ingestr-0.13.22.dist-info}/METADATA +5 -5
- {ingestr-0.13.20.dist-info → ingestr-0.13.22.dist-info}/RECORD +20 -14
- {ingestr-0.13.20.dist-info → ingestr-0.13.22.dist-info}/WHEEL +0 -0
- {ingestr-0.13.20.dist-info → ingestr-0.13.22.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.20.dist-info → ingestr-0.13.22.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -127,40 +127,44 @@ class SqlReflectionLevel(str, Enum):
|
|
|
127
127
|
def ingest(
|
|
128
128
|
source_uri: Annotated[
|
|
129
129
|
str,
|
|
130
|
-
typer.Option(
|
|
130
|
+
typer.Option(
|
|
131
|
+
help="The URI of the [green]source[/green]",
|
|
132
|
+
envvar=["SOURCE_URI", "INGESTR_SOURCE_URI"],
|
|
133
|
+
),
|
|
131
134
|
], # type: ignore
|
|
132
135
|
dest_uri: Annotated[
|
|
133
136
|
str,
|
|
134
137
|
typer.Option(
|
|
135
|
-
help="The URI of the [cyan]destination[/cyan]",
|
|
138
|
+
help="The URI of the [cyan]destination[/cyan]",
|
|
139
|
+
envvar=["DESTINATION_URI", "INGESTR_DESTINATION_URI"],
|
|
136
140
|
),
|
|
137
141
|
], # type: ignore
|
|
138
142
|
source_table: Annotated[
|
|
139
143
|
str,
|
|
140
144
|
typer.Option(
|
|
141
145
|
help="The table name in the [green]source[/green] to fetch",
|
|
142
|
-
envvar="SOURCE_TABLE",
|
|
146
|
+
envvar=["SOURCE_TABLE", "INGESTR_SOURCE_TABLE"],
|
|
143
147
|
),
|
|
144
148
|
], # type: ignore
|
|
145
149
|
dest_table: Annotated[
|
|
146
150
|
str,
|
|
147
151
|
typer.Option(
|
|
148
152
|
help="The table in the [cyan]destination[/cyan] to save the data into",
|
|
149
|
-
envvar="DESTINATION_TABLE",
|
|
153
|
+
envvar=["DESTINATION_TABLE", "INGESTR_DESTINATION_TABLE"],
|
|
150
154
|
),
|
|
151
155
|
] = None, # type: ignore
|
|
152
156
|
incremental_key: Annotated[
|
|
153
157
|
Optional[str],
|
|
154
158
|
typer.Option(
|
|
155
159
|
help="The incremental key from the table to be used for incremental strategies",
|
|
156
|
-
envvar="INCREMENTAL_KEY",
|
|
160
|
+
envvar=["INCREMENTAL_KEY", "INGESTR_INCREMENTAL_KEY"],
|
|
157
161
|
),
|
|
158
162
|
] = None, # type: ignore
|
|
159
163
|
incremental_strategy: Annotated[
|
|
160
164
|
IncrementalStrategy,
|
|
161
165
|
typer.Option(
|
|
162
166
|
help="The incremental strategy to use",
|
|
163
|
-
envvar="INCREMENTAL_STRATEGY",
|
|
167
|
+
envvar=["INCREMENTAL_STRATEGY", "INGESTR_INCREMENTAL_STRATEGY"],
|
|
164
168
|
),
|
|
165
169
|
] = IncrementalStrategy.create_replace, # type: ignore
|
|
166
170
|
interval_start: Annotated[
|
|
@@ -168,7 +172,7 @@ def ingest(
|
|
|
168
172
|
typer.Option(
|
|
169
173
|
help="The start of the interval the incremental key will cover",
|
|
170
174
|
formats=DATE_FORMATS,
|
|
171
|
-
envvar="INTERVAL_START",
|
|
175
|
+
envvar=["INTERVAL_START", "INGESTR_INTERVAL_START"],
|
|
172
176
|
),
|
|
173
177
|
] = None, # type: ignore
|
|
174
178
|
interval_end: Annotated[
|
|
@@ -176,126 +180,126 @@ def ingest(
|
|
|
176
180
|
typer.Option(
|
|
177
181
|
help="The end of the interval the incremental key will cover",
|
|
178
182
|
formats=DATE_FORMATS,
|
|
179
|
-
envvar="INTERVAL_END",
|
|
183
|
+
envvar=["INTERVAL_END", "INGESTR_INTERVAL_END"],
|
|
180
184
|
),
|
|
181
185
|
] = None, # type: ignore
|
|
182
186
|
primary_key: Annotated[
|
|
183
187
|
Optional[list[str]],
|
|
184
188
|
typer.Option(
|
|
185
189
|
help="The key that will be used to deduplicate the resulting table",
|
|
186
|
-
envvar="PRIMARY_KEY",
|
|
190
|
+
envvar=["PRIMARY_KEY", "INGESTR_PRIMARY_KEY"],
|
|
187
191
|
),
|
|
188
192
|
] = None, # type: ignore
|
|
189
193
|
partition_by: Annotated[
|
|
190
194
|
Optional[str],
|
|
191
195
|
typer.Option(
|
|
192
196
|
help="The partition key to be used for partitioning the destination table",
|
|
193
|
-
envvar="PARTITION_BY",
|
|
197
|
+
envvar=["PARTITION_BY", "INGESTR_PARTITION_BY"],
|
|
194
198
|
),
|
|
195
199
|
] = None, # type: ignore
|
|
196
200
|
cluster_by: Annotated[
|
|
197
201
|
Optional[str],
|
|
198
202
|
typer.Option(
|
|
199
203
|
help="The clustering key to be used for clustering the destination table, not every destination supports clustering.",
|
|
200
|
-
envvar="CLUSTER_BY",
|
|
204
|
+
envvar=["CLUSTER_BY", "INGESTR_CLUSTER_BY"],
|
|
201
205
|
),
|
|
202
206
|
] = None, # type: ignore
|
|
203
207
|
yes: Annotated[
|
|
204
208
|
Optional[bool],
|
|
205
209
|
typer.Option(
|
|
206
210
|
help="Skip the confirmation prompt and ingest right away",
|
|
207
|
-
envvar="SKIP_CONFIRMATION",
|
|
211
|
+
envvar=["SKIP_CONFIRMATION", "INGESTR_SKIP_CONFIRMATION"],
|
|
208
212
|
),
|
|
209
213
|
] = False, # type: ignore
|
|
210
214
|
full_refresh: Annotated[
|
|
211
215
|
bool,
|
|
212
216
|
typer.Option(
|
|
213
217
|
help="Ignore the state and refresh the destination table completely",
|
|
214
|
-
envvar="FULL_REFRESH",
|
|
218
|
+
envvar=["FULL_REFRESH", "INGESTR_FULL_REFRESH"],
|
|
215
219
|
),
|
|
216
220
|
] = False, # type: ignore
|
|
217
221
|
progress: Annotated[
|
|
218
222
|
Progress,
|
|
219
223
|
typer.Option(
|
|
220
224
|
help="The progress display type, must be one of 'interactive', 'log'",
|
|
221
|
-
envvar="PROGRESS",
|
|
225
|
+
envvar=["PROGRESS", "INGESTR_PROGRESS"],
|
|
222
226
|
),
|
|
223
227
|
] = Progress.interactive, # type: ignore
|
|
224
228
|
sql_backend: Annotated[
|
|
225
229
|
SqlBackend,
|
|
226
230
|
typer.Option(
|
|
227
231
|
help="The SQL backend to use",
|
|
228
|
-
envvar="SQL_BACKEND",
|
|
232
|
+
envvar=["SQL_BACKEND", "INGESTR_SQL_BACKEND"],
|
|
229
233
|
),
|
|
230
234
|
] = SqlBackend.pyarrow, # type: ignore
|
|
231
235
|
loader_file_format: Annotated[
|
|
232
236
|
Optional[LoaderFileFormat],
|
|
233
237
|
typer.Option(
|
|
234
238
|
help="The file format to use when loading data",
|
|
235
|
-
envvar="LOADER_FILE_FORMAT",
|
|
239
|
+
envvar=["LOADER_FILE_FORMAT", "INGESTR_LOADER_FILE_FORMAT"],
|
|
236
240
|
),
|
|
237
241
|
] = None, # type: ignore
|
|
238
242
|
page_size: Annotated[
|
|
239
243
|
Optional[int],
|
|
240
244
|
typer.Option(
|
|
241
245
|
help="The page size to be used when fetching data from SQL sources",
|
|
242
|
-
envvar="PAGE_SIZE",
|
|
246
|
+
envvar=["PAGE_SIZE", "INGESTR_PAGE_SIZE"],
|
|
243
247
|
),
|
|
244
248
|
] = 50000, # type: ignore
|
|
245
249
|
loader_file_size: Annotated[
|
|
246
250
|
Optional[int],
|
|
247
251
|
typer.Option(
|
|
248
252
|
help="The file size to be used by the loader to split the data into multiple files. This can be set independent of the page size, since page size is used for fetching the data from the sources whereas this is used for the processing/loading part.",
|
|
249
|
-
envvar="LOADER_FILE_SIZE",
|
|
253
|
+
envvar=["LOADER_FILE_SIZE", "INGESTR_LOADER_FILE_SIZE"],
|
|
250
254
|
),
|
|
251
255
|
] = 100000, # type: ignore
|
|
252
256
|
schema_naming: Annotated[
|
|
253
257
|
SchemaNaming,
|
|
254
258
|
typer.Option(
|
|
255
259
|
help="The naming convention to use when moving the tables from source to destination. The default behavior is explained here: https://dlthub.com/docs/general-usage/schema#naming-convention",
|
|
256
|
-
envvar="SCHEMA_NAMING",
|
|
260
|
+
envvar=["SCHEMA_NAMING", "INGESTR_SCHEMA_NAMING"],
|
|
257
261
|
),
|
|
258
262
|
] = SchemaNaming.default, # type: ignore
|
|
259
263
|
pipelines_dir: Annotated[
|
|
260
264
|
Optional[str],
|
|
261
265
|
typer.Option(
|
|
262
266
|
help="The path to store dlt-related pipeline metadata. By default, ingestr will create a temporary directory and delete it after the execution is done in order to make retries stateless.",
|
|
263
|
-
envvar="PIPELINES_DIR",
|
|
267
|
+
envvar=["PIPELINES_DIR", "INGESTR_PIPELINES_DIR"],
|
|
264
268
|
),
|
|
265
269
|
] = None, # type: ignore
|
|
266
270
|
extract_parallelism: Annotated[
|
|
267
271
|
Optional[int],
|
|
268
272
|
typer.Option(
|
|
269
273
|
help="The number of parallel jobs to run for extracting data from the source, only applicable for certain sources",
|
|
270
|
-
envvar="EXTRACT_PARALLELISM",
|
|
274
|
+
envvar=["EXTRACT_PARALLELISM", "INGESTR_EXTRACT_PARALLELISM"],
|
|
271
275
|
),
|
|
272
276
|
] = 5, # type: ignore
|
|
273
277
|
sql_reflection_level: Annotated[
|
|
274
278
|
SqlReflectionLevel,
|
|
275
279
|
typer.Option(
|
|
276
280
|
help="The reflection level to use when reflecting the table schema from the source",
|
|
277
|
-
envvar="SQL_REFLECTION_LEVEL",
|
|
281
|
+
envvar=["SQL_REFLECTION_LEVEL", "INGESTR_SQL_REFLECTION_LEVEL"],
|
|
278
282
|
),
|
|
279
283
|
] = SqlReflectionLevel.full, # type: ignore
|
|
280
284
|
sql_limit: Annotated[
|
|
281
285
|
Optional[int],
|
|
282
286
|
typer.Option(
|
|
283
287
|
help="The limit to use when fetching data from the source",
|
|
284
|
-
envvar="SQL_LIMIT",
|
|
288
|
+
envvar=["SQL_LIMIT", "INGESTR_SQL_LIMIT"],
|
|
285
289
|
),
|
|
286
290
|
] = None, # type: ignore
|
|
287
291
|
sql_exclude_columns: Annotated[
|
|
288
292
|
Optional[list[str]],
|
|
289
293
|
typer.Option(
|
|
290
294
|
help="The columns to exclude from the source table",
|
|
291
|
-
envvar="SQL_EXCLUDE_COLUMNS",
|
|
295
|
+
envvar=["SQL_EXCLUDE_COLUMNS", "INGESTR_SQL_EXCLUDE_COLUMNS"],
|
|
292
296
|
),
|
|
293
297
|
] = [], # type: ignore
|
|
294
298
|
columns: Annotated[
|
|
295
299
|
Optional[list[str]],
|
|
296
300
|
typer.Option(
|
|
297
301
|
help="The column types to be used for the destination table in the format of 'column_name:column_type'",
|
|
298
|
-
envvar="COLUMNS",
|
|
302
|
+
envvar=["COLUMNS", "INGESTR_COLUMNS"],
|
|
299
303
|
),
|
|
300
304
|
] = None, # type: ignore
|
|
301
305
|
):
|
|
@@ -82,7 +82,9 @@ class AdjustAPI:
|
|
|
82
82
|
items = result.get("rows", [])
|
|
83
83
|
yield items
|
|
84
84
|
else:
|
|
85
|
-
raise HTTPError(
|
|
85
|
+
raise HTTPError(
|
|
86
|
+
f"Request failed with status code: {response.status_code}, {response.text}."
|
|
87
|
+
)
|
|
86
88
|
|
|
87
89
|
def fetch_events(self):
|
|
88
90
|
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
@@ -93,7 +95,9 @@ class AdjustAPI:
|
|
|
93
95
|
result = response.json()
|
|
94
96
|
yield result
|
|
95
97
|
else:
|
|
96
|
-
raise HTTPError(
|
|
98
|
+
raise HTTPError(
|
|
99
|
+
f"Request failed with status code: {response.status_code}, {response.text}."
|
|
100
|
+
)
|
|
97
101
|
|
|
98
102
|
|
|
99
103
|
def parse_filters(filters_raw: str) -> dict:
|
|
@@ -105,11 +105,13 @@ def get_data(
|
|
|
105
105
|
if response.status_code == 404:
|
|
106
106
|
if "No Mediation App Id found for platform" in response.text:
|
|
107
107
|
return None
|
|
108
|
-
error_message =
|
|
108
|
+
error_message = (
|
|
109
|
+
f"AppLovin MAX API error (status {response.status_code}): {response.text}"
|
|
110
|
+
)
|
|
109
111
|
raise requests.HTTPError(error_message)
|
|
110
|
-
|
|
112
|
+
|
|
111
113
|
response_url = response.json().get("ad_revenue_report_url")
|
|
112
114
|
df = pd.read_csv(response_url)
|
|
113
115
|
df["Date"] = pd.to_datetime(df["Date"])
|
|
114
116
|
df["partition_date"] = df["Date"].dt.date
|
|
115
|
-
return df
|
|
117
|
+
return df
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.22"
|
ingestr/src/factory.py
CHANGED
|
@@ -43,6 +43,7 @@ from ingestr.src.sources import (
|
|
|
43
43
|
MongoDbSource,
|
|
44
44
|
NotionSource,
|
|
45
45
|
PersonioSource,
|
|
46
|
+
PipedriveSource,
|
|
46
47
|
S3Source,
|
|
47
48
|
SalesforceSource,
|
|
48
49
|
ShopifySource,
|
|
@@ -144,6 +145,7 @@ class SourceDestinationFactory:
|
|
|
144
145
|
"salesforce": SalesforceSource,
|
|
145
146
|
"personio": PersonioSource,
|
|
146
147
|
"kinesis": KinesisSource,
|
|
148
|
+
"pipedrive": PipedriveSource,
|
|
147
149
|
}
|
|
148
150
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
149
151
|
"bigquery": BigQueryDestination,
|
ingestr/src/hubspot/__init__.py
CHANGED
ingestr/src/kinesis/__init__.py
CHANGED
|
@@ -16,7 +16,7 @@ from .helpers import get_shard_iterator, max_sequence_by_shard
|
|
|
16
16
|
name=lambda args: args["stream_name"],
|
|
17
17
|
primary_key="kinesis_msg_id",
|
|
18
18
|
standalone=True,
|
|
19
|
-
max_table_nesting=0
|
|
19
|
+
max_table_nesting=0,
|
|
20
20
|
)
|
|
21
21
|
def kinesis_stream(
|
|
22
22
|
stream_name: str,
|
|
@@ -75,7 +75,6 @@ def kinesis_stream(
|
|
|
75
75
|
|
|
76
76
|
# get next shard to fetch messages from
|
|
77
77
|
while shard_id := shard_ids.pop(0) if shard_ids else None:
|
|
78
|
-
|
|
79
78
|
shard_iterator, _ = get_shard_iterator(
|
|
80
79
|
kinesis_client,
|
|
81
80
|
stream_name,
|
|
@@ -83,14 +82,14 @@ def kinesis_stream(
|
|
|
83
82
|
last_msg, # type: ignore
|
|
84
83
|
initial_at_datetime, # type: ignore
|
|
85
84
|
)
|
|
86
|
-
|
|
85
|
+
|
|
87
86
|
while shard_iterator:
|
|
88
87
|
records = []
|
|
89
88
|
records_response = kinesis_client.get_records(
|
|
90
89
|
ShardIterator=shard_iterator,
|
|
91
90
|
Limit=chunk_size, # The size of data can be up to 1 MB, it must be controlled by the user
|
|
92
91
|
)
|
|
93
|
-
|
|
92
|
+
|
|
94
93
|
for record in records_response["Records"]:
|
|
95
94
|
sequence_number = record["SequenceNumber"]
|
|
96
95
|
content = record["Data"]
|
ingestr/src/partition.py
CHANGED
|
@@ -13,7 +13,6 @@ def apply_athena_hints(
|
|
|
13
13
|
additional_hints: Dict[str, TColumnSchema] = {},
|
|
14
14
|
) -> None:
|
|
15
15
|
def _apply_partition_hint(resource: DltResource) -> None:
|
|
16
|
-
|
|
17
16
|
columns = resource.columns if resource.columns else {}
|
|
18
17
|
|
|
19
18
|
partition_hint = (
|
|
@@ -24,7 +23,8 @@ def apply_athena_hints(
|
|
|
24
23
|
athena_adapter(
|
|
25
24
|
resource,
|
|
26
25
|
athena_partition.day(partition_column)
|
|
27
|
-
if partition_hint
|
|
26
|
+
if partition_hint
|
|
27
|
+
and partition_hint.get("data_type") in ("timestamp", "date")
|
|
28
28
|
else partition_column,
|
|
29
29
|
)
|
|
30
30
|
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Highly customizable source for Pipedrive, supports endpoint addition, selection and column rename
|
|
2
|
+
|
|
3
|
+
Pipedrive api docs: https://developers.pipedrive.com/docs/api/v1
|
|
4
|
+
|
|
5
|
+
Pipedrive changes or deprecates fields and endpoints without versioning the api.
|
|
6
|
+
If something breaks, it's a good idea to check the changelog.
|
|
7
|
+
Api changelog: https://developers.pipedrive.com/changelog
|
|
8
|
+
|
|
9
|
+
To get an api key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import Any, Dict, Iterator, List, Optional, Union # noqa: F401
|
|
13
|
+
|
|
14
|
+
import dlt
|
|
15
|
+
from dlt.common import pendulum
|
|
16
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
17
|
+
from dlt.sources import DltResource, TDataItems
|
|
18
|
+
|
|
19
|
+
from .helpers import group_deal_flows
|
|
20
|
+
from .helpers.custom_fields_munger import rename_fields, update_fields_mapping
|
|
21
|
+
from .helpers.pages import get_pages, get_recent_items_incremental
|
|
22
|
+
from .settings import ENTITY_MAPPINGS, RECENTS_ENTITIES
|
|
23
|
+
from .typing import TDataPage
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dlt.source(name="pipedrive", max_table_nesting=0)
|
|
27
|
+
def pipedrive_source(
|
|
28
|
+
pipedrive_api_key: str = dlt.secrets.value,
|
|
29
|
+
since_timestamp: Optional[Union[pendulum.DateTime, str]] = "1970-01-01 00:00:00",
|
|
30
|
+
) -> Iterator[DltResource]:
|
|
31
|
+
"""
|
|
32
|
+
Get data from the Pipedrive API. Supports incremental loading and custom fields mapping.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
pipedrive_api_key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
|
|
36
|
+
since_timestamp: Starting timestamp for incremental loading. By default complete history is loaded on first run.
|
|
37
|
+
incremental: Enable or disable incremental loading.
|
|
38
|
+
|
|
39
|
+
Returns resources:
|
|
40
|
+
custom_fields_mapping
|
|
41
|
+
activities
|
|
42
|
+
activityTypes
|
|
43
|
+
deals
|
|
44
|
+
deals_flow
|
|
45
|
+
deals_participants
|
|
46
|
+
files
|
|
47
|
+
filters
|
|
48
|
+
notes
|
|
49
|
+
persons
|
|
50
|
+
organizations
|
|
51
|
+
pipelines
|
|
52
|
+
products
|
|
53
|
+
stages
|
|
54
|
+
users
|
|
55
|
+
leads
|
|
56
|
+
|
|
57
|
+
For custom fields rename the `custom_fields_mapping` resource must be selected or loaded before other resources.
|
|
58
|
+
|
|
59
|
+
Resources that depend on another resource are implemented as transformers
|
|
60
|
+
so they can re-use the original resource data without re-downloading.
|
|
61
|
+
Examples: deals_participants, deals_flow
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# yield nice rename mapping
|
|
65
|
+
yield create_state(pipedrive_api_key) | parsed_mapping
|
|
66
|
+
|
|
67
|
+
# parse timestamp and build kwargs
|
|
68
|
+
since_timestamp = ensure_pendulum_datetime(since_timestamp).strftime(
|
|
69
|
+
"%Y-%m-%d %H:%M:%S"
|
|
70
|
+
)
|
|
71
|
+
resource_kwargs: Any = (
|
|
72
|
+
{"since_timestamp": since_timestamp} if since_timestamp else {}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# create resources for all endpoints
|
|
76
|
+
endpoints_resources = {}
|
|
77
|
+
for entity, resource_name in RECENTS_ENTITIES.items():
|
|
78
|
+
endpoints_resources[resource_name] = dlt.resource(
|
|
79
|
+
get_recent_items_incremental,
|
|
80
|
+
name=resource_name,
|
|
81
|
+
primary_key="id",
|
|
82
|
+
write_disposition="merge",
|
|
83
|
+
)(entity, pipedrive_api_key, **resource_kwargs)
|
|
84
|
+
|
|
85
|
+
yield from endpoints_resources.values()
|
|
86
|
+
|
|
87
|
+
# create transformers for deals to participants and flows
|
|
88
|
+
yield endpoints_resources["deals"] | dlt.transformer(
|
|
89
|
+
name="deals_participants", write_disposition="merge", primary_key="id"
|
|
90
|
+
)(_get_deals_participants)(pipedrive_api_key)
|
|
91
|
+
|
|
92
|
+
yield endpoints_resources["deals"] | dlt.transformer(
|
|
93
|
+
name="deals_flow", write_disposition="merge", primary_key="id"
|
|
94
|
+
)(_get_deals_flow)(pipedrive_api_key)
|
|
95
|
+
|
|
96
|
+
yield leads(pipedrive_api_key, update_time=since_timestamp)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_deals_flow(
|
|
100
|
+
deals_page: TDataPage, pipedrive_api_key: str
|
|
101
|
+
) -> Iterator[TDataItems]:
|
|
102
|
+
custom_fields_mapping = dlt.current.source_state().get("custom_fields_mapping", {})
|
|
103
|
+
for row in deals_page:
|
|
104
|
+
url = f"deals/{row['id']}/flow"
|
|
105
|
+
pages = get_pages(url, pipedrive_api_key)
|
|
106
|
+
for entity, page in group_deal_flows(pages):
|
|
107
|
+
yield dlt.mark.with_table_name(
|
|
108
|
+
rename_fields(page, custom_fields_mapping.get(entity, {})),
|
|
109
|
+
"deals_flow_" + entity,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _get_deals_participants(
|
|
114
|
+
deals_page: TDataPage, pipedrive_api_key: str
|
|
115
|
+
) -> Iterator[TDataPage]:
|
|
116
|
+
for row in deals_page:
|
|
117
|
+
url = f"deals/{row['id']}/participants"
|
|
118
|
+
yield from get_pages(url, pipedrive_api_key)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dlt.resource(selected=False)
|
|
122
|
+
def create_state(pipedrive_api_key: str) -> Iterator[Dict[str, Any]]:
|
|
123
|
+
def _get_pages_for_rename(
|
|
124
|
+
entity: str, fields_entity: str, pipedrive_api_key: str
|
|
125
|
+
) -> Dict[str, Any]:
|
|
126
|
+
existing_fields_mapping: Dict[str, Dict[str, str]] = (
|
|
127
|
+
custom_fields_mapping.setdefault(entity, {})
|
|
128
|
+
)
|
|
129
|
+
# we need to process all pages before yielding
|
|
130
|
+
for page in get_pages(fields_entity, pipedrive_api_key):
|
|
131
|
+
existing_fields_mapping = update_fields_mapping(
|
|
132
|
+
page, existing_fields_mapping
|
|
133
|
+
)
|
|
134
|
+
return existing_fields_mapping
|
|
135
|
+
|
|
136
|
+
# gets all *Fields data and stores in state
|
|
137
|
+
custom_fields_mapping = dlt.current.source_state().setdefault(
|
|
138
|
+
"custom_fields_mapping", {}
|
|
139
|
+
)
|
|
140
|
+
for entity, fields_entity, _ in ENTITY_MAPPINGS:
|
|
141
|
+
if fields_entity is None:
|
|
142
|
+
continue
|
|
143
|
+
custom_fields_mapping[entity] = _get_pages_for_rename(
|
|
144
|
+
entity, fields_entity, pipedrive_api_key
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
yield custom_fields_mapping
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dlt.transformer(
|
|
151
|
+
name="custom_fields_mapping",
|
|
152
|
+
write_disposition="replace",
|
|
153
|
+
columns={"options": {"data_type": "json"}},
|
|
154
|
+
)
|
|
155
|
+
def parsed_mapping(
|
|
156
|
+
custom_fields_mapping: Dict[str, Any],
|
|
157
|
+
) -> Optional[Iterator[List[Dict[str, str]]]]:
|
|
158
|
+
"""
|
|
159
|
+
Parses and yields custom fields' mapping in order to be stored in destiny by dlt
|
|
160
|
+
"""
|
|
161
|
+
for endpoint, data_item_mapping in custom_fields_mapping.items():
|
|
162
|
+
yield [
|
|
163
|
+
{
|
|
164
|
+
"endpoint": endpoint,
|
|
165
|
+
"hash_string": hash_string,
|
|
166
|
+
"name": names["name"],
|
|
167
|
+
"normalized_name": names["normalized_name"],
|
|
168
|
+
"options": names["options"],
|
|
169
|
+
"field_type": names["field_type"],
|
|
170
|
+
}
|
|
171
|
+
for hash_string, names in data_item_mapping.items()
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
176
|
+
def leads(
|
|
177
|
+
pipedrive_api_key: str = dlt.secrets.value,
|
|
178
|
+
update_time: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
179
|
+
"update_time", "1970-01-01 00:00:00"
|
|
180
|
+
),
|
|
181
|
+
) -> Iterator[TDataPage]:
|
|
182
|
+
"""Resource to incrementally load pipedrive leads by update_time"""
|
|
183
|
+
# Leads inherit custom fields from deals
|
|
184
|
+
fields_mapping = (
|
|
185
|
+
dlt.current.source_state().get("custom_fields_mapping", {}).get("deals", {})
|
|
186
|
+
)
|
|
187
|
+
# Load leads pages sorted from newest to oldest and stop loading when
|
|
188
|
+
# last incremental value is reached
|
|
189
|
+
pages = get_pages(
|
|
190
|
+
"leads",
|
|
191
|
+
pipedrive_api_key,
|
|
192
|
+
extra_params={"sort": "update_time DESC"},
|
|
193
|
+
)
|
|
194
|
+
for page in pages:
|
|
195
|
+
yield rename_fields(page, fields_mapping)
|
|
196
|
+
|
|
197
|
+
if update_time.start_out_of_range:
|
|
198
|
+
return
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Pipedrive source helpers"""
|
|
2
|
+
|
|
3
|
+
from itertools import groupby
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Tuple, cast # noqa: F401
|
|
5
|
+
|
|
6
|
+
from dlt.common import pendulum # noqa: F401
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _deals_flow_group_key(item: Dict[str, Any]) -> str:
|
|
10
|
+
return item["object"] # type: ignore[no-any-return]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def group_deal_flows(
|
|
14
|
+
pages: Iterable[Iterable[Dict[str, Any]]],
|
|
15
|
+
) -> Iterable[Tuple[str, List[Dict[str, Any]]]]:
|
|
16
|
+
for page in pages:
|
|
17
|
+
for entity, items in groupby(
|
|
18
|
+
sorted(page, key=_deals_flow_group_key), key=_deals_flow_group_key
|
|
19
|
+
):
|
|
20
|
+
yield (
|
|
21
|
+
entity,
|
|
22
|
+
[dict(item["data"], timestamp=item["timestamp"]) for item in items],
|
|
23
|
+
)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional, TypedDict
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
|
|
5
|
+
from ..typing import TDataPage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TFieldMapping(TypedDict):
|
|
9
|
+
name: str
|
|
10
|
+
normalized_name: str
|
|
11
|
+
options: Optional[Dict[str, str]]
|
|
12
|
+
field_type: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def update_fields_mapping(
|
|
16
|
+
new_fields_mapping: TDataPage, existing_fields_mapping: Dict[str, Any]
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""
|
|
19
|
+
Specific function to perform data munging and push changes to custom fields' mapping stored in dlt's state
|
|
20
|
+
The endpoint must be an entity fields' endpoint
|
|
21
|
+
"""
|
|
22
|
+
for data_item in new_fields_mapping:
|
|
23
|
+
# 'edit_flag' field contains a boolean value, which is set to 'True' for custom fields and 'False' otherwise.
|
|
24
|
+
if data_item.get("edit_flag"):
|
|
25
|
+
# Regarding custom fields, 'key' field contains pipedrive's hash string representation of its name
|
|
26
|
+
# We assume that pipedrive's hash strings are meant to be an univoque representation of custom fields' name, so dlt's state shouldn't be updated while those values
|
|
27
|
+
# remain unchanged
|
|
28
|
+
existing_fields_mapping = _update_field(data_item, existing_fields_mapping)
|
|
29
|
+
# Built in enum and set fields are mapped if their options have int ids
|
|
30
|
+
# Enum fields with bool and string key options are left intact
|
|
31
|
+
elif data_item.get("field_type") in {"set", "enum"}:
|
|
32
|
+
options = data_item.get("options", [])
|
|
33
|
+
first_option = options[0]["id"] if len(options) >= 1 else None
|
|
34
|
+
if isinstance(first_option, int) and not isinstance(first_option, bool):
|
|
35
|
+
existing_fields_mapping = _update_field(
|
|
36
|
+
data_item, existing_fields_mapping
|
|
37
|
+
)
|
|
38
|
+
return existing_fields_mapping
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _update_field(
|
|
42
|
+
data_item: Dict[str, Any],
|
|
43
|
+
existing_fields_mapping: Optional[Dict[str, TFieldMapping]],
|
|
44
|
+
) -> Dict[str, TFieldMapping]:
|
|
45
|
+
"""Create or update the given field's info the custom fields state
|
|
46
|
+
If the field hash already exists in the state from previous runs the name is not updated.
|
|
47
|
+
New enum options (if any) are appended to the state.
|
|
48
|
+
"""
|
|
49
|
+
existing_fields_mapping = existing_fields_mapping or {}
|
|
50
|
+
key = data_item["key"]
|
|
51
|
+
options = data_item.get("options", [])
|
|
52
|
+
new_options_map = {str(o["id"]): o["label"] for o in options}
|
|
53
|
+
existing_field = existing_fields_mapping.get(key)
|
|
54
|
+
if not existing_field:
|
|
55
|
+
existing_fields_mapping[key] = dict(
|
|
56
|
+
name=data_item["name"],
|
|
57
|
+
normalized_name=_normalized_name(data_item["name"]),
|
|
58
|
+
options=new_options_map,
|
|
59
|
+
field_type=data_item["field_type"],
|
|
60
|
+
)
|
|
61
|
+
return existing_fields_mapping
|
|
62
|
+
existing_options = existing_field.get("options", {})
|
|
63
|
+
if not existing_options or existing_options == new_options_map:
|
|
64
|
+
existing_field["options"] = new_options_map
|
|
65
|
+
existing_field["field_type"] = data_item[
|
|
66
|
+
"field_type"
|
|
67
|
+
] # Add for backwards compat
|
|
68
|
+
return existing_fields_mapping
|
|
69
|
+
# Add new enum options to the existing options array
|
|
70
|
+
# so that when option is renamed the original label remains valid
|
|
71
|
+
new_option_keys = set(new_options_map) - set(existing_options)
|
|
72
|
+
for key in new_option_keys:
|
|
73
|
+
existing_options[key] = new_options_map[key]
|
|
74
|
+
existing_field["options"] = existing_options
|
|
75
|
+
return existing_fields_mapping
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _normalized_name(name: str) -> str:
|
|
79
|
+
source_schema = dlt.current.source_schema()
|
|
80
|
+
normalized_name = name.strip() # remove leading and trailing spaces
|
|
81
|
+
return source_schema.naming.normalize_identifier(normalized_name)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def rename_fields(data: TDataPage, fields_mapping: Dict[str, Any]) -> TDataPage:
|
|
85
|
+
if not fields_mapping:
|
|
86
|
+
return data
|
|
87
|
+
for data_item in data:
|
|
88
|
+
for hash_string, field in fields_mapping.items():
|
|
89
|
+
if hash_string not in data_item:
|
|
90
|
+
continue
|
|
91
|
+
field_value = data_item.pop(hash_string)
|
|
92
|
+
field_name = field["name"]
|
|
93
|
+
options_map = field["options"]
|
|
94
|
+
# Get label instead of ID for 'enum' and 'set' fields
|
|
95
|
+
if field_value and field["field_type"] == "set": # Multiple choice
|
|
96
|
+
field_value = [
|
|
97
|
+
options_map.get(str(enum_id), enum_id) for enum_id in field_value
|
|
98
|
+
]
|
|
99
|
+
elif field_value and field["field_type"] == "enum":
|
|
100
|
+
field_value = options_map.get(str(field_value), field_value)
|
|
101
|
+
data_item[field_name] = field_value
|
|
102
|
+
return data
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from itertools import chain
|
|
2
|
+
from typing import (
|
|
3
|
+
Any,
|
|
4
|
+
Dict,
|
|
5
|
+
Iterable,
|
|
6
|
+
Iterator,
|
|
7
|
+
List,
|
|
8
|
+
TypeVar,
|
|
9
|
+
Union,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
import dlt
|
|
13
|
+
from dlt.sources.helpers import requests
|
|
14
|
+
|
|
15
|
+
from ..typing import TDataPage
|
|
16
|
+
from .custom_fields_munger import rename_fields
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_pages(
|
|
20
|
+
entity: str, pipedrive_api_key: str, extra_params: Dict[str, Any] = None
|
|
21
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
22
|
+
"""
|
|
23
|
+
Generic method to retrieve endpoint data based on the required headers and params.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
entity: the endpoint you want to call
|
|
27
|
+
pipedrive_api_key:
|
|
28
|
+
extra_params: any needed request params except pagination.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
headers = {"Content-Type": "application/json"}
|
|
34
|
+
params = {"api_token": pipedrive_api_key}
|
|
35
|
+
if extra_params:
|
|
36
|
+
params.update(extra_params)
|
|
37
|
+
url = f"https://app.pipedrive.com/v1/{entity}"
|
|
38
|
+
yield from _paginated_get(url, headers=headers, params=params)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_recent_items_incremental(
|
|
42
|
+
entity: str,
|
|
43
|
+
pipedrive_api_key: str,
|
|
44
|
+
since_timestamp: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
45
|
+
"update_time|modified", "1970-01-01 00:00:00"
|
|
46
|
+
),
|
|
47
|
+
) -> Iterator[TDataPage]:
|
|
48
|
+
"""Get a specific entity type from /recents with incremental state."""
|
|
49
|
+
yield from _get_recent_pages(entity, pipedrive_api_key, since_timestamp.last_value)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _paginated_get(
|
|
53
|
+
url: str, headers: Dict[str, Any], params: Dict[str, Any]
|
|
54
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
55
|
+
"""
|
|
56
|
+
Requests and yields data 500 records at a time
|
|
57
|
+
Documentation: https://pipedrive.readme.io/docs/core-api-concepts-pagination
|
|
58
|
+
"""
|
|
59
|
+
# pagination start and page limit
|
|
60
|
+
params["start"] = 0
|
|
61
|
+
params["limit"] = 500
|
|
62
|
+
while True:
|
|
63
|
+
page = requests.get(url, headers=headers, params=params).json()
|
|
64
|
+
# yield data only
|
|
65
|
+
data = page["data"]
|
|
66
|
+
if data:
|
|
67
|
+
yield data
|
|
68
|
+
# check if next page exists
|
|
69
|
+
pagination_info = page.get("additional_data", {}).get("pagination", {})
|
|
70
|
+
# is_next_page is set to True or False
|
|
71
|
+
if not pagination_info.get("more_items_in_collection", False):
|
|
72
|
+
break
|
|
73
|
+
params["start"] = pagination_info.get("next_start")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
T = TypeVar("T")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _extract_recents_data(data: Iterable[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
80
|
+
"""Results from recents endpoint contain `data` key which is either a single entity or list of entities
|
|
81
|
+
|
|
82
|
+
This returns a flat list of entities from an iterable of recent results
|
|
83
|
+
"""
|
|
84
|
+
return [
|
|
85
|
+
data_item
|
|
86
|
+
for data_item in chain.from_iterable(
|
|
87
|
+
(_list_wrapped(item["data"]) for item in data)
|
|
88
|
+
)
|
|
89
|
+
if data_item is not None
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _list_wrapped(item: Union[List[T], T]) -> List[T]:
|
|
94
|
+
if isinstance(item, list):
|
|
95
|
+
return item
|
|
96
|
+
return [item]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_recent_pages(
|
|
100
|
+
entity: str, pipedrive_api_key: str, since_timestamp: str
|
|
101
|
+
) -> Iterator[TDataPage]:
|
|
102
|
+
custom_fields_mapping = (
|
|
103
|
+
dlt.current.source_state().get("custom_fields_mapping", {}).get(entity, {})
|
|
104
|
+
)
|
|
105
|
+
pages = get_pages(
|
|
106
|
+
"recents",
|
|
107
|
+
pipedrive_api_key,
|
|
108
|
+
extra_params=dict(since_timestamp=since_timestamp, items=entity),
|
|
109
|
+
)
|
|
110
|
+
pages = (_extract_recents_data(page) for page in pages)
|
|
111
|
+
for page in pages:
|
|
112
|
+
yield rename_fields(page, custom_fields_mapping)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
__source_name__ = "pipedrive"
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Pipedrive source settings and constants"""
|
|
2
|
+
|
|
3
|
+
ENTITY_MAPPINGS = [
|
|
4
|
+
("activity", "activityFields", {"user_id": 0}),
|
|
5
|
+
("organization", "organizationFields", None),
|
|
6
|
+
("person", "personFields", None),
|
|
7
|
+
("product", "productFields", None),
|
|
8
|
+
("deal", "dealFields", None),
|
|
9
|
+
("pipeline", None, None),
|
|
10
|
+
("stage", None, None),
|
|
11
|
+
("user", None, None),
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
RECENTS_ENTITIES = {
|
|
15
|
+
"activity": "activities",
|
|
16
|
+
"activityType": "activity_types",
|
|
17
|
+
"deal": "deals",
|
|
18
|
+
"file": "files",
|
|
19
|
+
"filter": "filters",
|
|
20
|
+
"note": "notes",
|
|
21
|
+
"person": "persons",
|
|
22
|
+
"organization": "organizations",
|
|
23
|
+
"pipeline": "pipelines",
|
|
24
|
+
"product": "products",
|
|
25
|
+
"stage": "stages",
|
|
26
|
+
"user": "users",
|
|
27
|
+
}
|
ingestr/src/sources.py
CHANGED
|
@@ -85,6 +85,7 @@ from ingestr.src.linkedin_ads.dimension_time_enum import (
|
|
|
85
85
|
from ingestr.src.mongodb import mongodb_collection
|
|
86
86
|
from ingestr.src.notion import notion_databases
|
|
87
87
|
from ingestr.src.personio import personio_source
|
|
88
|
+
from ingestr.src.pipedrive import pipedrive_source
|
|
88
89
|
from ingestr.src.salesforce import salesforce_source
|
|
89
90
|
from ingestr.src.shopify import shopify_source
|
|
90
91
|
from ingestr.src.slack import slack_source
|
|
@@ -177,7 +178,7 @@ class SqlSource:
|
|
|
177
178
|
scheme="clickhouse+native",
|
|
178
179
|
query=urlencode(query_params, doseq=True),
|
|
179
180
|
).geturl()
|
|
180
|
-
|
|
181
|
+
|
|
181
182
|
if uri.startswith("db2://"):
|
|
182
183
|
uri = uri.replace("db2://", "db2+ibm_db://")
|
|
183
184
|
|
|
@@ -1837,8 +1838,8 @@ class AppLovinSource:
|
|
|
1837
1838
|
|
|
1838
1839
|
|
|
1839
1840
|
class ApplovinMaxSource:
|
|
1840
|
-
#expected uri format: applovinmax://?api_key=<api_key>
|
|
1841
|
-
#expected table format: user_ad_revenue:app_id_1,app_id_2
|
|
1841
|
+
# expected uri format: applovinmax://?api_key=<api_key>
|
|
1842
|
+
# expected table format: user_ad_revenue:app_id_1,app_id_2
|
|
1842
1843
|
|
|
1843
1844
|
def handles_incrementality(self) -> bool:
|
|
1844
1845
|
return True
|
|
@@ -1850,7 +1851,7 @@ class ApplovinMaxSource:
|
|
|
1850
1851
|
api_key = params.get("api_key")
|
|
1851
1852
|
if api_key is None:
|
|
1852
1853
|
raise ValueError("api_key is required to connect to AppLovin Max API.")
|
|
1853
|
-
|
|
1854
|
+
|
|
1854
1855
|
AVAILABLE_TABLES = ["user_ad_revenue"]
|
|
1855
1856
|
|
|
1856
1857
|
table_fields = table.split(":")
|
|
@@ -1860,7 +1861,7 @@ class ApplovinMaxSource:
|
|
|
1860
1861
|
raise ValueError(
|
|
1861
1862
|
"Invalid table format. Expected format is user_ad_revenue:app_id_1,app_id_2"
|
|
1862
1863
|
)
|
|
1863
|
-
|
|
1864
|
+
|
|
1864
1865
|
if requested_table not in AVAILABLE_TABLES:
|
|
1865
1866
|
raise ValueError(
|
|
1866
1867
|
f"Table name '{requested_table}' is not supported for AppLovin Max source yet."
|
|
@@ -1868,17 +1869,15 @@ class ApplovinMaxSource:
|
|
|
1868
1869
|
"If you need additional tables, please create a GitHub issue at "
|
|
1869
1870
|
"https://github.com/bruin-data/ingestr"
|
|
1870
1871
|
)
|
|
1871
|
-
|
|
1872
|
-
applications = [
|
|
1872
|
+
|
|
1873
|
+
applications = [
|
|
1874
|
+
i for i in table_fields[1].replace(" ", "").split(",") if i.strip()
|
|
1875
|
+
]
|
|
1873
1876
|
if len(applications) == 0:
|
|
1874
|
-
raise ValueError(
|
|
1875
|
-
|
|
1876
|
-
)
|
|
1877
|
-
|
|
1877
|
+
raise ValueError("At least one application id is required")
|
|
1878
|
+
|
|
1878
1879
|
if len(applications) != len(set(applications)):
|
|
1879
|
-
raise ValueError(
|
|
1880
|
-
"Application ids must be unique."
|
|
1881
|
-
)
|
|
1880
|
+
raise ValueError("Application ids must be unique.")
|
|
1882
1881
|
|
|
1883
1882
|
interval_start = kwargs.get("interval_start")
|
|
1884
1883
|
interval_end = kwargs.get("interval_end")
|
|
@@ -2009,3 +2008,36 @@ class KinesisSource:
|
|
|
2009
2008
|
return kinesis_stream(
|
|
2010
2009
|
stream_name=table, credentials=credentials, initial_at_timestamp=start_date
|
|
2011
2010
|
)
|
|
2011
|
+
|
|
2012
|
+
|
|
2013
|
+
class PipedriveSource:
|
|
2014
|
+
def handles_incrementality(self) -> bool:
|
|
2015
|
+
return True
|
|
2016
|
+
|
|
2017
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2018
|
+
parsed_uri = urlparse(uri)
|
|
2019
|
+
params = parse_qs(parsed_uri.query)
|
|
2020
|
+
api_key = params.get("api_token")
|
|
2021
|
+
if api_key is None:
|
|
2022
|
+
raise MissingValueError("api_token", "Pipedrive")
|
|
2023
|
+
|
|
2024
|
+
start_date = kwargs.get("interval_start")
|
|
2025
|
+
if start_date is not None:
|
|
2026
|
+
start_date = ensure_pendulum_datetime(start_date)
|
|
2027
|
+
else:
|
|
2028
|
+
start_date = pendulum.parse("2000-01-01")
|
|
2029
|
+
|
|
2030
|
+
if table not in [
|
|
2031
|
+
"users",
|
|
2032
|
+
"activities",
|
|
2033
|
+
"persons",
|
|
2034
|
+
"organizations",
|
|
2035
|
+
"products",
|
|
2036
|
+
"stages",
|
|
2037
|
+
"deals",
|
|
2038
|
+
]:
|
|
2039
|
+
raise UnsupportedResourceError(table, "Pipedrive")
|
|
2040
|
+
|
|
2041
|
+
return pipedrive_source(
|
|
2042
|
+
pipedrive_api_key=api_key, since_timestamp=start_date
|
|
2043
|
+
).with_resources(table)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.22
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -16,7 +16,7 @@ Classifier: Topic :: Database
|
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
17
|
Requires-Dist: aiobotocore==2.21.1
|
|
18
18
|
Requires-Dist: aiohappyeyeballs==2.4.8
|
|
19
|
-
Requires-Dist: aiohttp==3.11.
|
|
19
|
+
Requires-Dist: aiohttp==3.11.15
|
|
20
20
|
Requires-Dist: aioitertools==0.12.0
|
|
21
21
|
Requires-Dist: aiosignal==1.3.2
|
|
22
22
|
Requires-Dist: alembic==1.15.1
|
|
@@ -55,8 +55,8 @@ Requires-Dist: facebook-business==20.0.0
|
|
|
55
55
|
Requires-Dist: filelock==3.17.0
|
|
56
56
|
Requires-Dist: flatten-json==0.1.14
|
|
57
57
|
Requires-Dist: frozenlist==1.5.0
|
|
58
|
-
Requires-Dist: fsspec==
|
|
59
|
-
Requires-Dist: gcsfs==
|
|
58
|
+
Requires-Dist: fsspec==2025.3.2
|
|
59
|
+
Requires-Dist: gcsfs==2025.3.2
|
|
60
60
|
Requires-Dist: gitdb==4.0.12
|
|
61
61
|
Requires-Dist: gitpython==3.1.44
|
|
62
62
|
Requires-Dist: giturlparse==0.12.0
|
|
@@ -149,7 +149,7 @@ Requires-Dist: rich-argparse==1.7.0
|
|
|
149
149
|
Requires-Dist: rich==13.9.4
|
|
150
150
|
Requires-Dist: rsa==4.9
|
|
151
151
|
Requires-Dist: rudder-sdk-python==2.1.4
|
|
152
|
-
Requires-Dist: s3fs==
|
|
152
|
+
Requires-Dist: s3fs==2025.3.2
|
|
153
153
|
Requires-Dist: s3transfer==0.11.3
|
|
154
154
|
Requires-Dist: scramp==1.4.5
|
|
155
155
|
Requires-Dist: semver==3.0.4
|
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
ingestr/main.py,sha256=
|
|
1
|
+
ingestr/main.py,sha256=74lbiWEa27MUKFPbyUNGIlrwD5fRxej5cKFwe_LX1pE,25452
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
|
|
4
|
-
ingestr/src/buildinfo.py,sha256=
|
|
4
|
+
ingestr/src/buildinfo.py,sha256=ExEPLDyz3-FkQx0OHsblNsR-B9G1fUx77cQtxlv6CXA,21
|
|
5
5
|
ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
|
|
6
6
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
7
|
-
ingestr/src/factory.py,sha256
|
|
7
|
+
ingestr/src/factory.py,sha256=1jqcLv_QUUGeyg1OYN3ywrRdcDZyDRtMOongwyjDapU,5268
|
|
8
8
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
9
9
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
10
|
-
ingestr/src/partition.py,sha256=
|
|
10
|
+
ingestr/src/partition.py,sha256=E0WHqh1FTheQAIVK_-jWUx0dgyYZCD1VxlAm362gao4,964
|
|
11
11
|
ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
|
|
12
|
-
ingestr/src/sources.py,sha256=
|
|
12
|
+
ingestr/src/sources.py,sha256=kGsgFWf8Ghha0-HlC6PlDIIKX2Lriah4UmAseziGdr4,72035
|
|
13
13
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
14
14
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
15
15
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
16
16
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
17
|
-
ingestr/src/adjust/adjust_helpers.py,sha256=
|
|
17
|
+
ingestr/src/adjust/adjust_helpers.py,sha256=G_EvNuvA7CsaOtbV3g249iAyggMDMZYbtWOzOAz_EjY,3742
|
|
18
18
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
19
19
|
ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
|
|
20
|
-
ingestr/src/applovin_max/__init__.py,sha256=
|
|
20
|
+
ingestr/src/applovin_max/__init__.py,sha256=CBMADQ23gi0oxAsxe-RV67GGb8I4EFOX_It45Vv9dj4,3315
|
|
21
21
|
ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
|
|
22
22
|
ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
|
|
23
23
|
ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
|
|
@@ -58,12 +58,12 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
|
|
|
58
58
|
ingestr/src/google_sheets/helpers/data_processing.py,sha256=WYO6z4XjGcG0Hat2J2enb-eLX5mSNVb2vaqRE83FBWU,11000
|
|
59
59
|
ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
|
|
60
60
|
ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
|
|
61
|
-
ingestr/src/hubspot/__init__.py,sha256=
|
|
61
|
+
ingestr/src/hubspot/__init__.py,sha256=NYgSIAPXQh2Qp1eKun7TgcerKogq6pWtNkr-_f0FXbI,9464
|
|
62
62
|
ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KFE,6727
|
|
63
63
|
ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
|
|
64
64
|
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
65
65
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
66
|
-
ingestr/src/kinesis/__init__.py,sha256=
|
|
66
|
+
ingestr/src/kinesis/__init__.py,sha256=u5ThH1y8uObZKXgIo71em1UnX6MsVHWOjcf1jKqKbE8,6205
|
|
67
67
|
ingestr/src/kinesis/helpers.py,sha256=aF0GCDKSectaaW8XPdERY_6bUs0ky19dcBs24ZFn-o0,2473
|
|
68
68
|
ingestr/src/klaviyo/_init_.py,sha256=ucWHqBe8DQvXVpbmxKFAV5ljpCFb4ps_2QTD0OSiWxY,7905
|
|
69
69
|
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
@@ -80,6 +80,12 @@ ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falP
|
|
|
80
80
|
ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
|
|
81
81
|
ingestr/src/personio/__init__.py,sha256=sHYpoV-rg-kA1YsflctChis0hKcTrL6mka9O0CHV4zA,11638
|
|
82
82
|
ingestr/src/personio/helpers.py,sha256=EKmBN0Lf4R0lc3yqqs7D-RjoZ75E8gPcctt59xwHxrY,2901
|
|
83
|
+
ingestr/src/pipedrive/__init__.py,sha256=iRrxeMwo8_83ptgGnTFTNHV1nYvIsFfg0a3XzugPYeI,6982
|
|
84
|
+
ingestr/src/pipedrive/settings.py,sha256=q119Fy4C5Ip1rMoCILX2BkHV3bwiXC_dW58KIiDUzsY,708
|
|
85
|
+
ingestr/src/pipedrive/typing.py,sha256=lEMXu4hhAA3XkhVSlBUa-juqyupisd3c-qSQKxFvzoE,69
|
|
86
|
+
ingestr/src/pipedrive/helpers/__init__.py,sha256=UX1K_qnGXB0ShtnBOfp2XuVbK8RRoCK8TdEmIjRckgg,710
|
|
87
|
+
ingestr/src/pipedrive/helpers/custom_fields_munger.py,sha256=rZ4AjdITHfJE2NNomCR7vMBS1KnWpEGVF6fADwsIHUE,4488
|
|
88
|
+
ingestr/src/pipedrive/helpers/pages.py,sha256=Klpjw2OnMuhzit3PpiHKsfzGcJ3rQPSQBl3HhE3-6eA,3358
|
|
83
89
|
ingestr/src/salesforce/__init__.py,sha256=2hik5pRrxVODdDTlUEMoyccNC07zozjnxkMHcjMT1qA,4558
|
|
84
90
|
ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
|
|
85
91
|
ingestr/src/shopify/__init__.py,sha256=PF_6VQnS065Br1UzSIekTVXBu3WtrMQL_v5CfbfaX5Y,63151
|
|
@@ -112,8 +118,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
112
118
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
113
119
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
114
120
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
115
|
-
ingestr-0.13.
|
|
116
|
-
ingestr-0.13.
|
|
117
|
-
ingestr-0.13.
|
|
118
|
-
ingestr-0.13.
|
|
119
|
-
ingestr-0.13.
|
|
121
|
+
ingestr-0.13.22.dist-info/METADATA,sha256=SC89LgkVuV22LAaSCETkDoT6bFYCgIkHjLgs2UP4q4c,13627
|
|
122
|
+
ingestr-0.13.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
123
|
+
ingestr-0.13.22.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
124
|
+
ingestr-0.13.22.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
125
|
+
ingestr-0.13.22.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|