ingestr 0.13.13__py3-none-any.whl → 0.14.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestr/conftest.py +72 -0
- ingestr/main.py +134 -87
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/adjust/adjust_helpers.py +7 -3
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin_max/__init__.py +6 -4
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -45
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/arrow/__init__.py +9 -1
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/attio/__init__.py +102 -0
- ingestr/src/attio/helpers.py +65 -0
- ingestr/src/blob.py +37 -10
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/clickup/__init__.py +85 -0
- ingestr/src/clickup/helpers.py +47 -0
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +508 -27
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/__init__.py +80 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +47 -28
- ingestr/src/facebook_ads/helpers.py +59 -37
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +107 -2
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +46 -3
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -0
- ingestr/src/frankfurter/helpers.py +48 -0
- ingestr/src/freshdesk/__init__.py +89 -0
- ingestr/src/freshdesk/freshdesk_client.py +137 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/fundraiseup/__init__.py +95 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +41 -6
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_analytics/__init__.py +22 -4
- ingestr/src/google_analytics/helpers.py +124 -6
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/http_client.py +24 -0
- ingestr/src/hubspot/__init__.py +66 -23
- ingestr/src/hubspot/helpers.py +52 -22
- ingestr/src/hubspot/settings.py +14 -7
- ingestr/src/influxdb/__init__.py +46 -0
- ingestr/src/influxdb/client.py +34 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/isoc_pulse/__init__.py +159 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/kafka/__init__.py +4 -1
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +82 -0
- ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
- ingestr/src/linear/__init__.py +634 -0
- ingestr/src/linear/helpers.py +111 -0
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/mixpanel/__init__.py +62 -0
- ingestr/src/mixpanel/client.py +99 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +72 -8
- ingestr/src/mongodb/helpers.py +915 -38
- ingestr/src/partition.py +32 -0
- ingestr/src/phantombuster/__init__.py +65 -0
- ingestr/src/phantombuster/client.py +87 -0
- ingestr/src/pinterest/__init__.py +82 -0
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/quickbooks/__init__.py +117 -0
- ingestr/src/resource.py +40 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +15 -8
- ingestr/src/shopify/__init__.py +1 -17
- ingestr/src/smartsheets/__init__.py +82 -0
- ingestr/src/snapchat_ads/__init__.py +489 -0
- ingestr/src/snapchat_ads/client.py +72 -0
- ingestr/src/snapchat_ads/helpers.py +535 -0
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/solidgate/__init__.py +219 -0
- ingestr/src/solidgate/helpers.py +154 -0
- ingestr/src/sources.py +2933 -245
- ingestr/src/stripe_analytics/__init__.py +49 -21
- ingestr/src/stripe_analytics/helpers.py +286 -1
- ingestr/src/stripe_analytics/settings.py +62 -10
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/__init__.py +12 -6
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/src/zoom/__init__.py +99 -0
- ingestr/src/zoom/helpers.py +102 -0
- ingestr/tests/unit/test_smartsheets.py +133 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/METADATA +229 -19
- ingestr-0.14.104.dist-info/RECORD +203 -0
- ingestr/src/appsflyer/_init_.py +0 -24
- ingestr-0.13.13.dist-info/RECORD +0 -115
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/partition.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from dlt.common.schema.typing import TColumnSchema
|
|
4
|
+
from dlt.sources import DltResource, DltSource
|
|
5
|
+
|
|
6
|
+
import ingestr.src.resource as resource
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def apply_athena_hints(
|
|
10
|
+
source: DltSource | DltResource,
|
|
11
|
+
partition_column: str,
|
|
12
|
+
additional_hints: Dict[str, TColumnSchema] = {},
|
|
13
|
+
) -> None:
|
|
14
|
+
from dlt.destinations.adapters import athena_adapter, athena_partition
|
|
15
|
+
|
|
16
|
+
def _apply_partition_hint(resource: DltResource) -> None:
|
|
17
|
+
columns = resource.columns if resource.columns else {}
|
|
18
|
+
|
|
19
|
+
partition_hint = (
|
|
20
|
+
columns.get(partition_column) # type: ignore
|
|
21
|
+
or additional_hints.get(partition_column)
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
athena_adapter(
|
|
25
|
+
resource,
|
|
26
|
+
athena_partition.day(partition_column)
|
|
27
|
+
if partition_hint
|
|
28
|
+
and partition_hint.get("data_type") in ("timestamp", "date")
|
|
29
|
+
else partition_column,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
resource.for_each(source, _apply_partition_hint)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Iterable, Optional
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
import requests
|
|
6
|
+
from dlt.common.typing import TAnyDateTime, TDataItem
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
from dlt.sources.helpers.requests import Client
|
|
9
|
+
|
|
10
|
+
from ingestr.src.phantombuster.client import PhantombusterClient
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def retry_on_limit(
|
|
14
|
+
response: Optional[requests.Response], exception: Optional[BaseException]
|
|
15
|
+
) -> bool:
|
|
16
|
+
if response is not None and response.status_code == 429:
|
|
17
|
+
return True
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def create_client() -> requests.Session:
|
|
22
|
+
return Client(
|
|
23
|
+
raise_for_status=False,
|
|
24
|
+
retry_condition=retry_on_limit,
|
|
25
|
+
request_max_attempts=12,
|
|
26
|
+
request_backoff_factor=2,
|
|
27
|
+
).session
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dlt.source(max_table_nesting=0)
|
|
31
|
+
def phantombuster_source(
|
|
32
|
+
api_key: str, agent_id: str, start_date: TAnyDateTime, end_date: TAnyDateTime | None
|
|
33
|
+
) -> Iterable[DltResource]:
|
|
34
|
+
client = PhantombusterClient(api_key)
|
|
35
|
+
|
|
36
|
+
@dlt.resource(
|
|
37
|
+
write_disposition="merge",
|
|
38
|
+
primary_key="container_id",
|
|
39
|
+
columns={
|
|
40
|
+
"partition_dt": {"data_type": "date", "partition": True},
|
|
41
|
+
},
|
|
42
|
+
)
|
|
43
|
+
def completed_phantoms(
|
|
44
|
+
dateTime=(
|
|
45
|
+
dlt.sources.incremental(
|
|
46
|
+
"ended_at",
|
|
47
|
+
initial_value=start_date,
|
|
48
|
+
end_value=end_date,
|
|
49
|
+
range_start="closed",
|
|
50
|
+
range_end="closed",
|
|
51
|
+
)
|
|
52
|
+
),
|
|
53
|
+
) -> Iterable[TDataItem]:
|
|
54
|
+
if dateTime.end_value is None:
|
|
55
|
+
end_dt = pendulum.now(tz="UTC")
|
|
56
|
+
else:
|
|
57
|
+
end_dt = dateTime.end_value
|
|
58
|
+
|
|
59
|
+
start_dt = dateTime.last_value
|
|
60
|
+
|
|
61
|
+
yield client.fetch_containers_result(
|
|
62
|
+
create_client(), agent_id, start_date=start_dt, end_date=end_dt
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return completed_phantoms
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
import pendulum
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PhantombusterClient:
|
|
8
|
+
def __init__(self, api_key: str):
|
|
9
|
+
self.api_key = api_key
|
|
10
|
+
|
|
11
|
+
def _get_headers(self):
|
|
12
|
+
return {
|
|
13
|
+
"X-Phantombuster-Key-1": self.api_key,
|
|
14
|
+
"accept": "application/json",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
def fetch_containers_result(
|
|
18
|
+
self,
|
|
19
|
+
session: requests.Session,
|
|
20
|
+
agent_id: str,
|
|
21
|
+
start_date: pendulum.DateTime,
|
|
22
|
+
end_date: pendulum.DateTime,
|
|
23
|
+
):
|
|
24
|
+
url = "https://api.phantombuster.com/api/v2/containers/fetch-all/"
|
|
25
|
+
before_ended_at = None
|
|
26
|
+
limit = 100
|
|
27
|
+
|
|
28
|
+
started_at = start_date.int_timestamp * 1000 + int(
|
|
29
|
+
start_date.microsecond / 1000
|
|
30
|
+
)
|
|
31
|
+
ended_at = end_date.int_timestamp * 1000 + int(end_date.microsecond / 1000)
|
|
32
|
+
|
|
33
|
+
while True:
|
|
34
|
+
params: dict[str, Union[str, int, float, bytes, None]] = {
|
|
35
|
+
"agentId": agent_id,
|
|
36
|
+
"limit": limit,
|
|
37
|
+
"mode": "finalized",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if before_ended_at:
|
|
41
|
+
params["beforeEndedAt"] = before_ended_at
|
|
42
|
+
|
|
43
|
+
response = session.get(url=url, headers=self._get_headers(), params=params)
|
|
44
|
+
data = response.json()
|
|
45
|
+
containers = data.get("containers", [])
|
|
46
|
+
|
|
47
|
+
for container in containers:
|
|
48
|
+
container_ended_at = container.get("endedAt")
|
|
49
|
+
|
|
50
|
+
if before_ended_at is None or before_ended_at > container_ended_at:
|
|
51
|
+
before_ended_at = container_ended_at
|
|
52
|
+
|
|
53
|
+
if container_ended_at < started_at or container_ended_at > ended_at:
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
result = self.fetch_result_object(session, container["id"])
|
|
58
|
+
partition_dt = pendulum.from_timestamp(
|
|
59
|
+
container_ended_at / 1000, tz="UTC"
|
|
60
|
+
).date()
|
|
61
|
+
container_ended_at_datetime = pendulum.from_timestamp(
|
|
62
|
+
container_ended_at / 1000, tz="UTC"
|
|
63
|
+
)
|
|
64
|
+
row = {
|
|
65
|
+
"container_id": container["id"],
|
|
66
|
+
"container": container,
|
|
67
|
+
"result": result,
|
|
68
|
+
"partition_dt": partition_dt,
|
|
69
|
+
"ended_at": container_ended_at_datetime,
|
|
70
|
+
}
|
|
71
|
+
yield row
|
|
72
|
+
|
|
73
|
+
except requests.RequestException as e:
|
|
74
|
+
print(f"Error fetching result for container {container['id']}: {e}")
|
|
75
|
+
|
|
76
|
+
if data["maxLimitReached"] is False:
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
def fetch_result_object(self, session: requests.Session, container_id: str):
|
|
80
|
+
result_url = (
|
|
81
|
+
"https://api.phantombuster.com/api/v2/containers/fetch-result-object"
|
|
82
|
+
)
|
|
83
|
+
params = {"id": container_id}
|
|
84
|
+
response = session.get(result_url, headers=self._get_headers(), params=params)
|
|
85
|
+
response.raise_for_status()
|
|
86
|
+
|
|
87
|
+
return response.json()
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
6
|
+
from dlt.common.typing import TDataItem
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
from dlt.sources.helpers import requests
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dlt.source(name="pinterest", max_table_nesting=0)
|
|
12
|
+
def pinterest_source(
|
|
13
|
+
start_date: pendulum.DateTime,
|
|
14
|
+
access_token: str,
|
|
15
|
+
page_size: int = 200,
|
|
16
|
+
end_date: pendulum.DateTime | None = None,
|
|
17
|
+
) -> Iterable[DltResource]:
|
|
18
|
+
session = requests.Session()
|
|
19
|
+
session.headers.update({"Authorization": f"Bearer {access_token}"})
|
|
20
|
+
base_url = "https://api.pinterest.com/v5"
|
|
21
|
+
|
|
22
|
+
def fetch_data(
|
|
23
|
+
endpoint: str,
|
|
24
|
+
start_dt: pendulum.DateTime,
|
|
25
|
+
end_dt: pendulum.DateTime,
|
|
26
|
+
) -> Iterable[TDataItem]:
|
|
27
|
+
url = f"{base_url}/{endpoint}"
|
|
28
|
+
params = {"page_size": page_size}
|
|
29
|
+
bookmark = None
|
|
30
|
+
while True:
|
|
31
|
+
if bookmark:
|
|
32
|
+
params["bookmark"] = bookmark
|
|
33
|
+
|
|
34
|
+
resp = session.get(url, params=params)
|
|
35
|
+
resp.raise_for_status()
|
|
36
|
+
data = resp.json()
|
|
37
|
+
items = data.get("items") or []
|
|
38
|
+
|
|
39
|
+
for item in items:
|
|
40
|
+
item_created = ensure_pendulum_datetime(item["created_at"])
|
|
41
|
+
if item_created <= start_dt:
|
|
42
|
+
continue
|
|
43
|
+
if item_created > end_dt:
|
|
44
|
+
continue
|
|
45
|
+
item["created_at"] = item_created
|
|
46
|
+
yield item
|
|
47
|
+
|
|
48
|
+
bookmark = data.get("bookmark")
|
|
49
|
+
if not bookmark:
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
53
|
+
def pins(
|
|
54
|
+
datetime=dlt.sources.incremental(
|
|
55
|
+
"created_at",
|
|
56
|
+
initial_value=start_date,
|
|
57
|
+
end_value=end_date,
|
|
58
|
+
),
|
|
59
|
+
) -> Iterable[TDataItem]:
|
|
60
|
+
_start_date = datetime.last_value or start_date
|
|
61
|
+
if end_date is None:
|
|
62
|
+
_end_date = pendulum.now("UTC")
|
|
63
|
+
else:
|
|
64
|
+
_end_date = datetime.end_value
|
|
65
|
+
yield from fetch_data("pins", _start_date, _end_date)
|
|
66
|
+
|
|
67
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
68
|
+
def boards(
|
|
69
|
+
datetime=dlt.sources.incremental(
|
|
70
|
+
"created_at",
|
|
71
|
+
initial_value=start_date,
|
|
72
|
+
end_value=end_date,
|
|
73
|
+
),
|
|
74
|
+
) -> Iterable[TDataItem]:
|
|
75
|
+
_start_date = datetime.last_value or start_date
|
|
76
|
+
if end_date is None:
|
|
77
|
+
_end_date = pendulum.now("UTC")
|
|
78
|
+
else:
|
|
79
|
+
_end_date = datetime.end_value
|
|
80
|
+
yield from fetch_data("boards", _start_date, _end_date)
|
|
81
|
+
|
|
82
|
+
return pins, boards
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Highly customizable source for Pipedrive, supports endpoint addition, selection and column rename
|
|
2
|
+
|
|
3
|
+
Pipedrive api docs: https://developers.pipedrive.com/docs/api/v1
|
|
4
|
+
|
|
5
|
+
Pipedrive changes or deprecates fields and endpoints without versioning the api.
|
|
6
|
+
If something breaks, it's a good idea to check the changelog.
|
|
7
|
+
Api changelog: https://developers.pipedrive.com/changelog
|
|
8
|
+
|
|
9
|
+
To get an api key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import Any, Dict, Iterator, List, Optional, Union # noqa: F401
|
|
13
|
+
|
|
14
|
+
import dlt
|
|
15
|
+
from dlt.common import pendulum
|
|
16
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
17
|
+
from dlt.sources import DltResource, TDataItems
|
|
18
|
+
|
|
19
|
+
from .helpers import group_deal_flows
|
|
20
|
+
from .helpers.custom_fields_munger import rename_fields, update_fields_mapping
|
|
21
|
+
from .helpers.pages import get_pages, get_recent_items_incremental
|
|
22
|
+
from .settings import ENTITY_MAPPINGS, RECENTS_ENTITIES
|
|
23
|
+
from .typing import TDataPage
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dlt.source(name="pipedrive", max_table_nesting=0)
|
|
27
|
+
def pipedrive_source(
|
|
28
|
+
pipedrive_api_key: str = dlt.secrets.value,
|
|
29
|
+
since_timestamp: Optional[Union[pendulum.DateTime, str]] = "1970-01-01 00:00:00",
|
|
30
|
+
) -> Iterator[DltResource]:
|
|
31
|
+
"""
|
|
32
|
+
Get data from the Pipedrive API. Supports incremental loading and custom fields mapping.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
pipedrive_api_key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
|
|
36
|
+
since_timestamp: Starting timestamp for incremental loading. By default complete history is loaded on first run.
|
|
37
|
+
incremental: Enable or disable incremental loading.
|
|
38
|
+
|
|
39
|
+
Returns resources:
|
|
40
|
+
custom_fields_mapping
|
|
41
|
+
activities
|
|
42
|
+
activityTypes
|
|
43
|
+
deals
|
|
44
|
+
deals_flow
|
|
45
|
+
deals_participants
|
|
46
|
+
files
|
|
47
|
+
filters
|
|
48
|
+
notes
|
|
49
|
+
persons
|
|
50
|
+
organizations
|
|
51
|
+
pipelines
|
|
52
|
+
products
|
|
53
|
+
stages
|
|
54
|
+
users
|
|
55
|
+
leads
|
|
56
|
+
|
|
57
|
+
For custom fields rename the `custom_fields_mapping` resource must be selected or loaded before other resources.
|
|
58
|
+
|
|
59
|
+
Resources that depend on another resource are implemented as transformers
|
|
60
|
+
so they can re-use the original resource data without re-downloading.
|
|
61
|
+
Examples: deals_participants, deals_flow
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# yield nice rename mapping
|
|
65
|
+
yield create_state(pipedrive_api_key) | parsed_mapping
|
|
66
|
+
|
|
67
|
+
# parse timestamp and build kwargs
|
|
68
|
+
since_timestamp = ensure_pendulum_datetime(since_timestamp).strftime(
|
|
69
|
+
"%Y-%m-%d %H:%M:%S"
|
|
70
|
+
)
|
|
71
|
+
resource_kwargs: Any = (
|
|
72
|
+
{"since_timestamp": since_timestamp} if since_timestamp else {}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# create resources for all endpoints
|
|
76
|
+
endpoints_resources = {}
|
|
77
|
+
for entity, resource_name in RECENTS_ENTITIES.items():
|
|
78
|
+
endpoints_resources[resource_name] = dlt.resource(
|
|
79
|
+
get_recent_items_incremental,
|
|
80
|
+
name=resource_name,
|
|
81
|
+
primary_key="id",
|
|
82
|
+
write_disposition="merge",
|
|
83
|
+
)(entity, pipedrive_api_key, **resource_kwargs)
|
|
84
|
+
|
|
85
|
+
yield from endpoints_resources.values()
|
|
86
|
+
|
|
87
|
+
# create transformers for deals to participants and flows
|
|
88
|
+
yield endpoints_resources["deals"] | dlt.transformer(
|
|
89
|
+
name="deals_participants", write_disposition="merge", primary_key="id"
|
|
90
|
+
)(_get_deals_participants)(pipedrive_api_key)
|
|
91
|
+
|
|
92
|
+
yield endpoints_resources["deals"] | dlt.transformer(
|
|
93
|
+
name="deals_flow", write_disposition="merge", primary_key="id"
|
|
94
|
+
)(_get_deals_flow)(pipedrive_api_key)
|
|
95
|
+
|
|
96
|
+
yield leads(pipedrive_api_key, update_time=since_timestamp)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_deals_flow(
|
|
100
|
+
deals_page: TDataPage, pipedrive_api_key: str
|
|
101
|
+
) -> Iterator[TDataItems]:
|
|
102
|
+
custom_fields_mapping = dlt.current.source_state().get("custom_fields_mapping", {})
|
|
103
|
+
for row in deals_page:
|
|
104
|
+
url = f"deals/{row['id']}/flow"
|
|
105
|
+
pages = get_pages(url, pipedrive_api_key)
|
|
106
|
+
for entity, page in group_deal_flows(pages):
|
|
107
|
+
yield dlt.mark.with_table_name(
|
|
108
|
+
rename_fields(page, custom_fields_mapping.get(entity, {})),
|
|
109
|
+
"deals_flow_" + entity,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _get_deals_participants(
|
|
114
|
+
deals_page: TDataPage, pipedrive_api_key: str
|
|
115
|
+
) -> Iterator[TDataPage]:
|
|
116
|
+
for row in deals_page:
|
|
117
|
+
url = f"deals/{row['id']}/participants"
|
|
118
|
+
yield from get_pages(url, pipedrive_api_key)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dlt.resource(selected=False)
|
|
122
|
+
def create_state(pipedrive_api_key: str) -> Iterator[Dict[str, Any]]:
|
|
123
|
+
def _get_pages_for_rename(
|
|
124
|
+
entity: str, fields_entity: str, pipedrive_api_key: str
|
|
125
|
+
) -> Dict[str, Any]:
|
|
126
|
+
existing_fields_mapping: Dict[str, Dict[str, str]] = (
|
|
127
|
+
custom_fields_mapping.setdefault(entity, {})
|
|
128
|
+
)
|
|
129
|
+
# we need to process all pages before yielding
|
|
130
|
+
for page in get_pages(fields_entity, pipedrive_api_key):
|
|
131
|
+
existing_fields_mapping = update_fields_mapping(
|
|
132
|
+
page, existing_fields_mapping
|
|
133
|
+
)
|
|
134
|
+
return existing_fields_mapping
|
|
135
|
+
|
|
136
|
+
# gets all *Fields data and stores in state
|
|
137
|
+
custom_fields_mapping = dlt.current.source_state().setdefault(
|
|
138
|
+
"custom_fields_mapping", {}
|
|
139
|
+
)
|
|
140
|
+
for entity, fields_entity, _ in ENTITY_MAPPINGS:
|
|
141
|
+
if fields_entity is None:
|
|
142
|
+
continue
|
|
143
|
+
custom_fields_mapping[entity] = _get_pages_for_rename(
|
|
144
|
+
entity, fields_entity, pipedrive_api_key
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
yield custom_fields_mapping
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dlt.transformer(
|
|
151
|
+
name="custom_fields_mapping",
|
|
152
|
+
write_disposition="replace",
|
|
153
|
+
columns={"options": {"data_type": "json"}},
|
|
154
|
+
)
|
|
155
|
+
def parsed_mapping(
|
|
156
|
+
custom_fields_mapping: Dict[str, Any],
|
|
157
|
+
) -> Optional[Iterator[List[Dict[str, str]]]]:
|
|
158
|
+
"""
|
|
159
|
+
Parses and yields custom fields' mapping in order to be stored in destiny by dlt
|
|
160
|
+
"""
|
|
161
|
+
for endpoint, data_item_mapping in custom_fields_mapping.items():
|
|
162
|
+
yield [
|
|
163
|
+
{
|
|
164
|
+
"endpoint": endpoint,
|
|
165
|
+
"hash_string": hash_string,
|
|
166
|
+
"name": names["name"],
|
|
167
|
+
"normalized_name": names["normalized_name"],
|
|
168
|
+
"options": names["options"],
|
|
169
|
+
"field_type": names["field_type"],
|
|
170
|
+
}
|
|
171
|
+
for hash_string, names in data_item_mapping.items()
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
176
|
+
def leads(
|
|
177
|
+
pipedrive_api_key: str = dlt.secrets.value,
|
|
178
|
+
update_time: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
179
|
+
"update_time", "1970-01-01 00:00:00"
|
|
180
|
+
),
|
|
181
|
+
) -> Iterator[TDataPage]:
|
|
182
|
+
"""Resource to incrementally load pipedrive leads by update_time"""
|
|
183
|
+
# Leads inherit custom fields from deals
|
|
184
|
+
fields_mapping = (
|
|
185
|
+
dlt.current.source_state().get("custom_fields_mapping", {}).get("deals", {})
|
|
186
|
+
)
|
|
187
|
+
# Load leads pages sorted from newest to oldest and stop loading when
|
|
188
|
+
# last incremental value is reached
|
|
189
|
+
pages = get_pages(
|
|
190
|
+
"leads",
|
|
191
|
+
pipedrive_api_key,
|
|
192
|
+
extra_params={"sort": "update_time DESC"},
|
|
193
|
+
)
|
|
194
|
+
for page in pages:
|
|
195
|
+
yield rename_fields(page, fields_mapping)
|
|
196
|
+
|
|
197
|
+
if update_time.start_out_of_range:
|
|
198
|
+
return
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Pipedrive source helpers"""
|
|
2
|
+
|
|
3
|
+
from itertools import groupby
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Tuple, cast # noqa: F401
|
|
5
|
+
|
|
6
|
+
from dlt.common import pendulum # noqa: F401
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _deals_flow_group_key(item: Dict[str, Any]) -> str:
|
|
10
|
+
return item["object"] # type: ignore[no-any-return]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def group_deal_flows(
|
|
14
|
+
pages: Iterable[Iterable[Dict[str, Any]]],
|
|
15
|
+
) -> Iterable[Tuple[str, List[Dict[str, Any]]]]:
|
|
16
|
+
for page in pages:
|
|
17
|
+
for entity, items in groupby(
|
|
18
|
+
sorted(page, key=_deals_flow_group_key), key=_deals_flow_group_key
|
|
19
|
+
):
|
|
20
|
+
yield (
|
|
21
|
+
entity,
|
|
22
|
+
[dict(item["data"], timestamp=item["timestamp"]) for item in items],
|
|
23
|
+
)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional, TypedDict
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
|
|
5
|
+
from ..typing import TDataPage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TFieldMapping(TypedDict):
|
|
9
|
+
name: str
|
|
10
|
+
normalized_name: str
|
|
11
|
+
options: Optional[Dict[str, str]]
|
|
12
|
+
field_type: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def update_fields_mapping(
|
|
16
|
+
new_fields_mapping: TDataPage, existing_fields_mapping: Dict[str, Any]
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""
|
|
19
|
+
Specific function to perform data munging and push changes to custom fields' mapping stored in dlt's state
|
|
20
|
+
The endpoint must be an entity fields' endpoint
|
|
21
|
+
"""
|
|
22
|
+
for data_item in new_fields_mapping:
|
|
23
|
+
# 'edit_flag' field contains a boolean value, which is set to 'True' for custom fields and 'False' otherwise.
|
|
24
|
+
if data_item.get("edit_flag"):
|
|
25
|
+
# Regarding custom fields, 'key' field contains pipedrive's hash string representation of its name
|
|
26
|
+
# We assume that pipedrive's hash strings are meant to be an univoque representation of custom fields' name, so dlt's state shouldn't be updated while those values
|
|
27
|
+
# remain unchanged
|
|
28
|
+
existing_fields_mapping = _update_field(data_item, existing_fields_mapping)
|
|
29
|
+
# Built in enum and set fields are mapped if their options have int ids
|
|
30
|
+
# Enum fields with bool and string key options are left intact
|
|
31
|
+
elif data_item.get("field_type") in {"set", "enum"}:
|
|
32
|
+
options = data_item.get("options", [])
|
|
33
|
+
first_option = options[0]["id"] if len(options) >= 1 else None
|
|
34
|
+
if isinstance(first_option, int) and not isinstance(first_option, bool):
|
|
35
|
+
existing_fields_mapping = _update_field(
|
|
36
|
+
data_item, existing_fields_mapping
|
|
37
|
+
)
|
|
38
|
+
return existing_fields_mapping
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _update_field(
|
|
42
|
+
data_item: Dict[str, Any],
|
|
43
|
+
existing_fields_mapping: Optional[Dict[str, TFieldMapping]],
|
|
44
|
+
) -> Dict[str, TFieldMapping]:
|
|
45
|
+
"""Create or update the given field's info the custom fields state
|
|
46
|
+
If the field hash already exists in the state from previous runs the name is not updated.
|
|
47
|
+
New enum options (if any) are appended to the state.
|
|
48
|
+
"""
|
|
49
|
+
existing_fields_mapping = existing_fields_mapping or {}
|
|
50
|
+
key = data_item["key"]
|
|
51
|
+
options = data_item.get("options", [])
|
|
52
|
+
new_options_map = {str(o["id"]): o["label"] for o in options}
|
|
53
|
+
existing_field = existing_fields_mapping.get(key)
|
|
54
|
+
if not existing_field:
|
|
55
|
+
existing_fields_mapping[key] = dict(
|
|
56
|
+
name=data_item["name"],
|
|
57
|
+
normalized_name=_normalized_name(data_item["name"]),
|
|
58
|
+
options=new_options_map,
|
|
59
|
+
field_type=data_item["field_type"],
|
|
60
|
+
)
|
|
61
|
+
return existing_fields_mapping
|
|
62
|
+
existing_options = existing_field.get("options", {})
|
|
63
|
+
if not existing_options or existing_options == new_options_map:
|
|
64
|
+
existing_field["options"] = new_options_map
|
|
65
|
+
existing_field["field_type"] = data_item[
|
|
66
|
+
"field_type"
|
|
67
|
+
] # Add for backwards compat
|
|
68
|
+
return existing_fields_mapping
|
|
69
|
+
# Add new enum options to the existing options array
|
|
70
|
+
# so that when option is renamed the original label remains valid
|
|
71
|
+
new_option_keys = set(new_options_map) - set(existing_options)
|
|
72
|
+
for key in new_option_keys:
|
|
73
|
+
existing_options[key] = new_options_map[key]
|
|
74
|
+
existing_field["options"] = existing_options
|
|
75
|
+
return existing_fields_mapping
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _normalized_name(name: str) -> str:
|
|
79
|
+
source_schema = dlt.current.source_schema()
|
|
80
|
+
normalized_name = name.strip() # remove leading and trailing spaces
|
|
81
|
+
return source_schema.naming.normalize_identifier(normalized_name)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def rename_fields(data: TDataPage, fields_mapping: Dict[str, Any]) -> TDataPage:
|
|
85
|
+
if not fields_mapping:
|
|
86
|
+
return data
|
|
87
|
+
for data_item in data:
|
|
88
|
+
for hash_string, field in fields_mapping.items():
|
|
89
|
+
if hash_string not in data_item:
|
|
90
|
+
continue
|
|
91
|
+
field_value = data_item.pop(hash_string)
|
|
92
|
+
field_name = field["name"]
|
|
93
|
+
options_map = field["options"]
|
|
94
|
+
# Get label instead of ID for 'enum' and 'set' fields
|
|
95
|
+
if field_value and field["field_type"] == "set": # Multiple choice
|
|
96
|
+
field_value = [
|
|
97
|
+
options_map.get(str(enum_id), enum_id) for enum_id in field_value
|
|
98
|
+
]
|
|
99
|
+
elif field_value and field["field_type"] == "enum":
|
|
100
|
+
field_value = options_map.get(str(field_value), field_value)
|
|
101
|
+
data_item[field_name] = field_value
|
|
102
|
+
return data
|