ingestr 0.13.13__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin_max/__init__.py +6 -4
  10. ingestr/src/appsflyer/__init__.py +325 -0
  11. ingestr/src/appsflyer/client.py +49 -45
  12. ingestr/src/appstore/__init__.py +1 -0
  13. ingestr/src/arrow/__init__.py +9 -1
  14. ingestr/src/asana_source/__init__.py +1 -1
  15. ingestr/src/attio/__init__.py +102 -0
  16. ingestr/src/attio/helpers.py +65 -0
  17. ingestr/src/blob.py +37 -10
  18. ingestr/src/buildinfo.py +1 -1
  19. ingestr/src/chess/__init__.py +1 -1
  20. ingestr/src/clickup/__init__.py +85 -0
  21. ingestr/src/clickup/helpers.py +47 -0
  22. ingestr/src/collector/spinner.py +43 -0
  23. ingestr/src/couchbase_source/__init__.py +118 -0
  24. ingestr/src/couchbase_source/helpers.py +135 -0
  25. ingestr/src/cursor/__init__.py +83 -0
  26. ingestr/src/cursor/helpers.py +188 -0
  27. ingestr/src/destinations.py +508 -27
  28. ingestr/src/docebo/__init__.py +589 -0
  29. ingestr/src/docebo/client.py +435 -0
  30. ingestr/src/docebo/helpers.py +97 -0
  31. ingestr/src/elasticsearch/__init__.py +80 -0
  32. ingestr/src/elasticsearch/helpers.py +138 -0
  33. ingestr/src/errors.py +8 -0
  34. ingestr/src/facebook_ads/__init__.py +47 -28
  35. ingestr/src/facebook_ads/helpers.py +59 -37
  36. ingestr/src/facebook_ads/settings.py +2 -0
  37. ingestr/src/facebook_ads/utils.py +39 -0
  38. ingestr/src/factory.py +107 -2
  39. ingestr/src/filesystem/__init__.py +8 -3
  40. ingestr/src/filters.py +46 -3
  41. ingestr/src/fluxx/__init__.py +9906 -0
  42. ingestr/src/fluxx/helpers.py +209 -0
  43. ingestr/src/frankfurter/__init__.py +157 -0
  44. ingestr/src/frankfurter/helpers.py +48 -0
  45. ingestr/src/freshdesk/__init__.py +89 -0
  46. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  47. ingestr/src/freshdesk/settings.py +9 -0
  48. ingestr/src/fundraiseup/__init__.py +95 -0
  49. ingestr/src/fundraiseup/client.py +81 -0
  50. ingestr/src/github/__init__.py +41 -6
  51. ingestr/src/github/helpers.py +5 -5
  52. ingestr/src/google_analytics/__init__.py +22 -4
  53. ingestr/src/google_analytics/helpers.py +124 -6
  54. ingestr/src/google_sheets/__init__.py +4 -4
  55. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  56. ingestr/src/hostaway/__init__.py +302 -0
  57. ingestr/src/hostaway/client.py +288 -0
  58. ingestr/src/http/__init__.py +35 -0
  59. ingestr/src/http/readers.py +114 -0
  60. ingestr/src/http_client.py +24 -0
  61. ingestr/src/hubspot/__init__.py +66 -23
  62. ingestr/src/hubspot/helpers.py +52 -22
  63. ingestr/src/hubspot/settings.py +14 -7
  64. ingestr/src/influxdb/__init__.py +46 -0
  65. ingestr/src/influxdb/client.py +34 -0
  66. ingestr/src/intercom/__init__.py +142 -0
  67. ingestr/src/intercom/helpers.py +674 -0
  68. ingestr/src/intercom/settings.py +279 -0
  69. ingestr/src/isoc_pulse/__init__.py +159 -0
  70. ingestr/src/jira_source/__init__.py +340 -0
  71. ingestr/src/jira_source/helpers.py +439 -0
  72. ingestr/src/jira_source/settings.py +170 -0
  73. ingestr/src/kafka/__init__.py +4 -1
  74. ingestr/src/kinesis/__init__.py +139 -0
  75. ingestr/src/kinesis/helpers.py +82 -0
  76. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  77. ingestr/src/linear/__init__.py +634 -0
  78. ingestr/src/linear/helpers.py +111 -0
  79. ingestr/src/linkedin_ads/helpers.py +0 -1
  80. ingestr/src/mailchimp/__init__.py +126 -0
  81. ingestr/src/mailchimp/helpers.py +226 -0
  82. ingestr/src/mailchimp/settings.py +164 -0
  83. ingestr/src/masking.py +344 -0
  84. ingestr/src/mixpanel/__init__.py +62 -0
  85. ingestr/src/mixpanel/client.py +99 -0
  86. ingestr/src/monday/__init__.py +246 -0
  87. ingestr/src/monday/helpers.py +392 -0
  88. ingestr/src/monday/settings.py +328 -0
  89. ingestr/src/mongodb/__init__.py +72 -8
  90. ingestr/src/mongodb/helpers.py +915 -38
  91. ingestr/src/partition.py +32 -0
  92. ingestr/src/phantombuster/__init__.py +65 -0
  93. ingestr/src/phantombuster/client.py +87 -0
  94. ingestr/src/pinterest/__init__.py +82 -0
  95. ingestr/src/pipedrive/__init__.py +198 -0
  96. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  97. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  98. ingestr/src/pipedrive/helpers/pages.py +115 -0
  99. ingestr/src/pipedrive/settings.py +27 -0
  100. ingestr/src/pipedrive/typing.py +3 -0
  101. ingestr/src/plusvibeai/__init__.py +335 -0
  102. ingestr/src/plusvibeai/helpers.py +544 -0
  103. ingestr/src/plusvibeai/settings.py +252 -0
  104. ingestr/src/quickbooks/__init__.py +117 -0
  105. ingestr/src/resource.py +40 -0
  106. ingestr/src/revenuecat/__init__.py +83 -0
  107. ingestr/src/revenuecat/helpers.py +237 -0
  108. ingestr/src/salesforce/__init__.py +15 -8
  109. ingestr/src/shopify/__init__.py +1 -17
  110. ingestr/src/smartsheets/__init__.py +82 -0
  111. ingestr/src/snapchat_ads/__init__.py +489 -0
  112. ingestr/src/snapchat_ads/client.py +72 -0
  113. ingestr/src/snapchat_ads/helpers.py +535 -0
  114. ingestr/src/socrata_source/__init__.py +83 -0
  115. ingestr/src/socrata_source/helpers.py +85 -0
  116. ingestr/src/socrata_source/settings.py +8 -0
  117. ingestr/src/solidgate/__init__.py +219 -0
  118. ingestr/src/solidgate/helpers.py +154 -0
  119. ingestr/src/sources.py +2933 -245
  120. ingestr/src/stripe_analytics/__init__.py +49 -21
  121. ingestr/src/stripe_analytics/helpers.py +286 -1
  122. ingestr/src/stripe_analytics/settings.py +62 -10
  123. ingestr/src/telemetry/event.py +10 -9
  124. ingestr/src/tiktok_ads/__init__.py +12 -6
  125. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  126. ingestr/src/trustpilot/__init__.py +48 -0
  127. ingestr/src/trustpilot/client.py +48 -0
  128. ingestr/src/wise/__init__.py +68 -0
  129. ingestr/src/wise/client.py +63 -0
  130. ingestr/src/zoom/__init__.py +99 -0
  131. ingestr/src/zoom/helpers.py +102 -0
  132. ingestr/tests/unit/test_smartsheets.py +133 -0
  133. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/METADATA +229 -19
  134. ingestr-0.14.104.dist-info/RECORD +203 -0
  135. ingestr/src/appsflyer/_init_.py +0 -24
  136. ingestr-0.13.13.dist-info/RECORD +0 -115
  137. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  138. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  139. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,32 @@
1
+ from typing import Dict
2
+
3
+ from dlt.common.schema.typing import TColumnSchema
4
+ from dlt.sources import DltResource, DltSource
5
+
6
+ import ingestr.src.resource as resource
7
+
8
+
9
+ def apply_athena_hints(
10
+ source: DltSource | DltResource,
11
+ partition_column: str,
12
+ additional_hints: Dict[str, TColumnSchema] = {},
13
+ ) -> None:
14
+ from dlt.destinations.adapters import athena_adapter, athena_partition
15
+
16
+ def _apply_partition_hint(resource: DltResource) -> None:
17
+ columns = resource.columns if resource.columns else {}
18
+
19
+ partition_hint = (
20
+ columns.get(partition_column) # type: ignore
21
+ or additional_hints.get(partition_column)
22
+ )
23
+
24
+ athena_adapter(
25
+ resource,
26
+ athena_partition.day(partition_column)
27
+ if partition_hint
28
+ and partition_hint.get("data_type") in ("timestamp", "date")
29
+ else partition_column,
30
+ )
31
+
32
+ resource.for_each(source, _apply_partition_hint)
@@ -0,0 +1,65 @@
1
+ from typing import Iterable, Optional
2
+
3
+ import dlt
4
+ import pendulum
5
+ import requests
6
+ from dlt.common.typing import TAnyDateTime, TDataItem
7
+ from dlt.sources import DltResource
8
+ from dlt.sources.helpers.requests import Client
9
+
10
+ from ingestr.src.phantombuster.client import PhantombusterClient
11
+
12
+
13
+ def retry_on_limit(
14
+ response: Optional[requests.Response], exception: Optional[BaseException]
15
+ ) -> bool:
16
+ if response is not None and response.status_code == 429:
17
+ return True
18
+ return False
19
+
20
+
21
+ def create_client() -> requests.Session:
22
+ return Client(
23
+ raise_for_status=False,
24
+ retry_condition=retry_on_limit,
25
+ request_max_attempts=12,
26
+ request_backoff_factor=2,
27
+ ).session
28
+
29
+
30
+ @dlt.source(max_table_nesting=0)
31
+ def phantombuster_source(
32
+ api_key: str, agent_id: str, start_date: TAnyDateTime, end_date: TAnyDateTime | None
33
+ ) -> Iterable[DltResource]:
34
+ client = PhantombusterClient(api_key)
35
+
36
+ @dlt.resource(
37
+ write_disposition="merge",
38
+ primary_key="container_id",
39
+ columns={
40
+ "partition_dt": {"data_type": "date", "partition": True},
41
+ },
42
+ )
43
+ def completed_phantoms(
44
+ dateTime=(
45
+ dlt.sources.incremental(
46
+ "ended_at",
47
+ initial_value=start_date,
48
+ end_value=end_date,
49
+ range_start="closed",
50
+ range_end="closed",
51
+ )
52
+ ),
53
+ ) -> Iterable[TDataItem]:
54
+ if dateTime.end_value is None:
55
+ end_dt = pendulum.now(tz="UTC")
56
+ else:
57
+ end_dt = dateTime.end_value
58
+
59
+ start_dt = dateTime.last_value
60
+
61
+ yield client.fetch_containers_result(
62
+ create_client(), agent_id, start_date=start_dt, end_date=end_dt
63
+ )
64
+
65
+ return completed_phantoms
@@ -0,0 +1,87 @@
1
+ from typing import Union
2
+
3
+ import pendulum
4
+ import requests
5
+
6
+
7
+ class PhantombusterClient:
8
+ def __init__(self, api_key: str):
9
+ self.api_key = api_key
10
+
11
+ def _get_headers(self):
12
+ return {
13
+ "X-Phantombuster-Key-1": self.api_key,
14
+ "accept": "application/json",
15
+ }
16
+
17
+ def fetch_containers_result(
18
+ self,
19
+ session: requests.Session,
20
+ agent_id: str,
21
+ start_date: pendulum.DateTime,
22
+ end_date: pendulum.DateTime,
23
+ ):
24
+ url = "https://api.phantombuster.com/api/v2/containers/fetch-all/"
25
+ before_ended_at = None
26
+ limit = 100
27
+
28
+ started_at = start_date.int_timestamp * 1000 + int(
29
+ start_date.microsecond / 1000
30
+ )
31
+ ended_at = end_date.int_timestamp * 1000 + int(end_date.microsecond / 1000)
32
+
33
+ while True:
34
+ params: dict[str, Union[str, int, float, bytes, None]] = {
35
+ "agentId": agent_id,
36
+ "limit": limit,
37
+ "mode": "finalized",
38
+ }
39
+
40
+ if before_ended_at:
41
+ params["beforeEndedAt"] = before_ended_at
42
+
43
+ response = session.get(url=url, headers=self._get_headers(), params=params)
44
+ data = response.json()
45
+ containers = data.get("containers", [])
46
+
47
+ for container in containers:
48
+ container_ended_at = container.get("endedAt")
49
+
50
+ if before_ended_at is None or before_ended_at > container_ended_at:
51
+ before_ended_at = container_ended_at
52
+
53
+ if container_ended_at < started_at or container_ended_at > ended_at:
54
+ continue
55
+
56
+ try:
57
+ result = self.fetch_result_object(session, container["id"])
58
+ partition_dt = pendulum.from_timestamp(
59
+ container_ended_at / 1000, tz="UTC"
60
+ ).date()
61
+ container_ended_at_datetime = pendulum.from_timestamp(
62
+ container_ended_at / 1000, tz="UTC"
63
+ )
64
+ row = {
65
+ "container_id": container["id"],
66
+ "container": container,
67
+ "result": result,
68
+ "partition_dt": partition_dt,
69
+ "ended_at": container_ended_at_datetime,
70
+ }
71
+ yield row
72
+
73
+ except requests.RequestException as e:
74
+ print(f"Error fetching result for container {container['id']}: {e}")
75
+
76
+ if data["maxLimitReached"] is False:
77
+ break
78
+
79
+ def fetch_result_object(self, session: requests.Session, container_id: str):
80
+ result_url = (
81
+ "https://api.phantombuster.com/api/v2/containers/fetch-result-object"
82
+ )
83
+ params = {"id": container_id}
84
+ response = session.get(result_url, headers=self._get_headers(), params=params)
85
+ response.raise_for_status()
86
+
87
+ return response.json()
@@ -0,0 +1,82 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.time import ensure_pendulum_datetime
6
+ from dlt.common.typing import TDataItem
7
+ from dlt.sources import DltResource
8
+ from dlt.sources.helpers import requests
9
+
10
+
11
+ @dlt.source(name="pinterest", max_table_nesting=0)
12
+ def pinterest_source(
13
+ start_date: pendulum.DateTime,
14
+ access_token: str,
15
+ page_size: int = 200,
16
+ end_date: pendulum.DateTime | None = None,
17
+ ) -> Iterable[DltResource]:
18
+ session = requests.Session()
19
+ session.headers.update({"Authorization": f"Bearer {access_token}"})
20
+ base_url = "https://api.pinterest.com/v5"
21
+
22
+ def fetch_data(
23
+ endpoint: str,
24
+ start_dt: pendulum.DateTime,
25
+ end_dt: pendulum.DateTime,
26
+ ) -> Iterable[TDataItem]:
27
+ url = f"{base_url}/{endpoint}"
28
+ params = {"page_size": page_size}
29
+ bookmark = None
30
+ while True:
31
+ if bookmark:
32
+ params["bookmark"] = bookmark
33
+
34
+ resp = session.get(url, params=params)
35
+ resp.raise_for_status()
36
+ data = resp.json()
37
+ items = data.get("items") or []
38
+
39
+ for item in items:
40
+ item_created = ensure_pendulum_datetime(item["created_at"])
41
+ if item_created <= start_dt:
42
+ continue
43
+ if item_created > end_dt:
44
+ continue
45
+ item["created_at"] = item_created
46
+ yield item
47
+
48
+ bookmark = data.get("bookmark")
49
+ if not bookmark:
50
+ break
51
+
52
+ @dlt.resource(write_disposition="merge", primary_key="id")
53
+ def pins(
54
+ datetime=dlt.sources.incremental(
55
+ "created_at",
56
+ initial_value=start_date,
57
+ end_value=end_date,
58
+ ),
59
+ ) -> Iterable[TDataItem]:
60
+ _start_date = datetime.last_value or start_date
61
+ if end_date is None:
62
+ _end_date = pendulum.now("UTC")
63
+ else:
64
+ _end_date = datetime.end_value
65
+ yield from fetch_data("pins", _start_date, _end_date)
66
+
67
+ @dlt.resource(write_disposition="merge", primary_key="id")
68
+ def boards(
69
+ datetime=dlt.sources.incremental(
70
+ "created_at",
71
+ initial_value=start_date,
72
+ end_value=end_date,
73
+ ),
74
+ ) -> Iterable[TDataItem]:
75
+ _start_date = datetime.last_value or start_date
76
+ if end_date is None:
77
+ _end_date = pendulum.now("UTC")
78
+ else:
79
+ _end_date = datetime.end_value
80
+ yield from fetch_data("boards", _start_date, _end_date)
81
+
82
+ return pins, boards
@@ -0,0 +1,198 @@
1
+ """Highly customizable source for Pipedrive, supports endpoint addition, selection and column rename
2
+
3
+ Pipedrive api docs: https://developers.pipedrive.com/docs/api/v1
4
+
5
+ Pipedrive changes or deprecates fields and endpoints without versioning the api.
6
+ If something breaks, it's a good idea to check the changelog.
7
+ Api changelog: https://developers.pipedrive.com/changelog
8
+
9
+ To get an api key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
10
+ """
11
+
12
+ from typing import Any, Dict, Iterator, List, Optional, Union # noqa: F401
13
+
14
+ import dlt
15
+ from dlt.common import pendulum
16
+ from dlt.common.time import ensure_pendulum_datetime
17
+ from dlt.sources import DltResource, TDataItems
18
+
19
+ from .helpers import group_deal_flows
20
+ from .helpers.custom_fields_munger import rename_fields, update_fields_mapping
21
+ from .helpers.pages import get_pages, get_recent_items_incremental
22
+ from .settings import ENTITY_MAPPINGS, RECENTS_ENTITIES
23
+ from .typing import TDataPage
24
+
25
+
26
+ @dlt.source(name="pipedrive", max_table_nesting=0)
27
+ def pipedrive_source(
28
+ pipedrive_api_key: str = dlt.secrets.value,
29
+ since_timestamp: Optional[Union[pendulum.DateTime, str]] = "1970-01-01 00:00:00",
30
+ ) -> Iterator[DltResource]:
31
+ """
32
+ Get data from the Pipedrive API. Supports incremental loading and custom fields mapping.
33
+
34
+ Args:
35
+ pipedrive_api_key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
36
+ since_timestamp: Starting timestamp for incremental loading. By default complete history is loaded on first run.
37
+ incremental: Enable or disable incremental loading.
38
+
39
+ Returns resources:
40
+ custom_fields_mapping
41
+ activities
42
+ activityTypes
43
+ deals
44
+ deals_flow
45
+ deals_participants
46
+ files
47
+ filters
48
+ notes
49
+ persons
50
+ organizations
51
+ pipelines
52
+ products
53
+ stages
54
+ users
55
+ leads
56
+
57
+ For custom fields rename the `custom_fields_mapping` resource must be selected or loaded before other resources.
58
+
59
+ Resources that depend on another resource are implemented as transformers
60
+ so they can re-use the original resource data without re-downloading.
61
+ Examples: deals_participants, deals_flow
62
+ """
63
+
64
+ # yield nice rename mapping
65
+ yield create_state(pipedrive_api_key) | parsed_mapping
66
+
67
+ # parse timestamp and build kwargs
68
+ since_timestamp = ensure_pendulum_datetime(since_timestamp).strftime(
69
+ "%Y-%m-%d %H:%M:%S"
70
+ )
71
+ resource_kwargs: Any = (
72
+ {"since_timestamp": since_timestamp} if since_timestamp else {}
73
+ )
74
+
75
+ # create resources for all endpoints
76
+ endpoints_resources = {}
77
+ for entity, resource_name in RECENTS_ENTITIES.items():
78
+ endpoints_resources[resource_name] = dlt.resource(
79
+ get_recent_items_incremental,
80
+ name=resource_name,
81
+ primary_key="id",
82
+ write_disposition="merge",
83
+ )(entity, pipedrive_api_key, **resource_kwargs)
84
+
85
+ yield from endpoints_resources.values()
86
+
87
+ # create transformers for deals to participants and flows
88
+ yield endpoints_resources["deals"] | dlt.transformer(
89
+ name="deals_participants", write_disposition="merge", primary_key="id"
90
+ )(_get_deals_participants)(pipedrive_api_key)
91
+
92
+ yield endpoints_resources["deals"] | dlt.transformer(
93
+ name="deals_flow", write_disposition="merge", primary_key="id"
94
+ )(_get_deals_flow)(pipedrive_api_key)
95
+
96
+ yield leads(pipedrive_api_key, update_time=since_timestamp)
97
+
98
+
99
+ def _get_deals_flow(
100
+ deals_page: TDataPage, pipedrive_api_key: str
101
+ ) -> Iterator[TDataItems]:
102
+ custom_fields_mapping = dlt.current.source_state().get("custom_fields_mapping", {})
103
+ for row in deals_page:
104
+ url = f"deals/{row['id']}/flow"
105
+ pages = get_pages(url, pipedrive_api_key)
106
+ for entity, page in group_deal_flows(pages):
107
+ yield dlt.mark.with_table_name(
108
+ rename_fields(page, custom_fields_mapping.get(entity, {})),
109
+ "deals_flow_" + entity,
110
+ )
111
+
112
+
113
+ def _get_deals_participants(
114
+ deals_page: TDataPage, pipedrive_api_key: str
115
+ ) -> Iterator[TDataPage]:
116
+ for row in deals_page:
117
+ url = f"deals/{row['id']}/participants"
118
+ yield from get_pages(url, pipedrive_api_key)
119
+
120
+
121
+ @dlt.resource(selected=False)
122
+ def create_state(pipedrive_api_key: str) -> Iterator[Dict[str, Any]]:
123
+ def _get_pages_for_rename(
124
+ entity: str, fields_entity: str, pipedrive_api_key: str
125
+ ) -> Dict[str, Any]:
126
+ existing_fields_mapping: Dict[str, Dict[str, str]] = (
127
+ custom_fields_mapping.setdefault(entity, {})
128
+ )
129
+ # we need to process all pages before yielding
130
+ for page in get_pages(fields_entity, pipedrive_api_key):
131
+ existing_fields_mapping = update_fields_mapping(
132
+ page, existing_fields_mapping
133
+ )
134
+ return existing_fields_mapping
135
+
136
+ # gets all *Fields data and stores in state
137
+ custom_fields_mapping = dlt.current.source_state().setdefault(
138
+ "custom_fields_mapping", {}
139
+ )
140
+ for entity, fields_entity, _ in ENTITY_MAPPINGS:
141
+ if fields_entity is None:
142
+ continue
143
+ custom_fields_mapping[entity] = _get_pages_for_rename(
144
+ entity, fields_entity, pipedrive_api_key
145
+ )
146
+
147
+ yield custom_fields_mapping
148
+
149
+
150
+ @dlt.transformer(
151
+ name="custom_fields_mapping",
152
+ write_disposition="replace",
153
+ columns={"options": {"data_type": "json"}},
154
+ )
155
+ def parsed_mapping(
156
+ custom_fields_mapping: Dict[str, Any],
157
+ ) -> Optional[Iterator[List[Dict[str, str]]]]:
158
+ """
159
+ Parses and yields custom fields' mapping in order to be stored in destiny by dlt
160
+ """
161
+ for endpoint, data_item_mapping in custom_fields_mapping.items():
162
+ yield [
163
+ {
164
+ "endpoint": endpoint,
165
+ "hash_string": hash_string,
166
+ "name": names["name"],
167
+ "normalized_name": names["normalized_name"],
168
+ "options": names["options"],
169
+ "field_type": names["field_type"],
170
+ }
171
+ for hash_string, names in data_item_mapping.items()
172
+ ]
173
+
174
+
175
+ @dlt.resource(primary_key="id", write_disposition="merge")
176
+ def leads(
177
+ pipedrive_api_key: str = dlt.secrets.value,
178
+ update_time: dlt.sources.incremental[str] = dlt.sources.incremental(
179
+ "update_time", "1970-01-01 00:00:00"
180
+ ),
181
+ ) -> Iterator[TDataPage]:
182
+ """Resource to incrementally load pipedrive leads by update_time"""
183
+ # Leads inherit custom fields from deals
184
+ fields_mapping = (
185
+ dlt.current.source_state().get("custom_fields_mapping", {}).get("deals", {})
186
+ )
187
+ # Load leads pages sorted from newest to oldest and stop loading when
188
+ # last incremental value is reached
189
+ pages = get_pages(
190
+ "leads",
191
+ pipedrive_api_key,
192
+ extra_params={"sort": "update_time DESC"},
193
+ )
194
+ for page in pages:
195
+ yield rename_fields(page, fields_mapping)
196
+
197
+ if update_time.start_out_of_range:
198
+ return
@@ -0,0 +1,23 @@
1
+ """Pipedrive source helpers"""
2
+
3
+ from itertools import groupby
4
+ from typing import Any, Dict, Iterable, List, Tuple, cast # noqa: F401
5
+
6
+ from dlt.common import pendulum # noqa: F401
7
+
8
+
9
+ def _deals_flow_group_key(item: Dict[str, Any]) -> str:
10
+ return item["object"] # type: ignore[no-any-return]
11
+
12
+
13
+ def group_deal_flows(
14
+ pages: Iterable[Iterable[Dict[str, Any]]],
15
+ ) -> Iterable[Tuple[str, List[Dict[str, Any]]]]:
16
+ for page in pages:
17
+ for entity, items in groupby(
18
+ sorted(page, key=_deals_flow_group_key), key=_deals_flow_group_key
19
+ ):
20
+ yield (
21
+ entity,
22
+ [dict(item["data"], timestamp=item["timestamp"]) for item in items],
23
+ )
@@ -0,0 +1,102 @@
1
+ from typing import Any, Dict, Optional, TypedDict
2
+
3
+ import dlt
4
+
5
+ from ..typing import TDataPage
6
+
7
+
8
+ class TFieldMapping(TypedDict):
9
+ name: str
10
+ normalized_name: str
11
+ options: Optional[Dict[str, str]]
12
+ field_type: str
13
+
14
+
15
+ def update_fields_mapping(
16
+ new_fields_mapping: TDataPage, existing_fields_mapping: Dict[str, Any]
17
+ ) -> Dict[str, Any]:
18
+ """
19
+ Specific function to perform data munging and push changes to custom fields' mapping stored in dlt's state
20
+ The endpoint must be an entity fields' endpoint
21
+ """
22
+ for data_item in new_fields_mapping:
23
+ # 'edit_flag' field contains a boolean value, which is set to 'True' for custom fields and 'False' otherwise.
24
+ if data_item.get("edit_flag"):
25
+ # Regarding custom fields, 'key' field contains pipedrive's hash string representation of its name
26
+ # We assume that pipedrive's hash strings are meant to be an univoque representation of custom fields' name, so dlt's state shouldn't be updated while those values
27
+ # remain unchanged
28
+ existing_fields_mapping = _update_field(data_item, existing_fields_mapping)
29
+ # Built in enum and set fields are mapped if their options have int ids
30
+ # Enum fields with bool and string key options are left intact
31
+ elif data_item.get("field_type") in {"set", "enum"}:
32
+ options = data_item.get("options", [])
33
+ first_option = options[0]["id"] if len(options) >= 1 else None
34
+ if isinstance(first_option, int) and not isinstance(first_option, bool):
35
+ existing_fields_mapping = _update_field(
36
+ data_item, existing_fields_mapping
37
+ )
38
+ return existing_fields_mapping
39
+
40
+
41
+ def _update_field(
42
+ data_item: Dict[str, Any],
43
+ existing_fields_mapping: Optional[Dict[str, TFieldMapping]],
44
+ ) -> Dict[str, TFieldMapping]:
45
+ """Create or update the given field's info the custom fields state
46
+ If the field hash already exists in the state from previous runs the name is not updated.
47
+ New enum options (if any) are appended to the state.
48
+ """
49
+ existing_fields_mapping = existing_fields_mapping or {}
50
+ key = data_item["key"]
51
+ options = data_item.get("options", [])
52
+ new_options_map = {str(o["id"]): o["label"] for o in options}
53
+ existing_field = existing_fields_mapping.get(key)
54
+ if not existing_field:
55
+ existing_fields_mapping[key] = dict(
56
+ name=data_item["name"],
57
+ normalized_name=_normalized_name(data_item["name"]),
58
+ options=new_options_map,
59
+ field_type=data_item["field_type"],
60
+ )
61
+ return existing_fields_mapping
62
+ existing_options = existing_field.get("options", {})
63
+ if not existing_options or existing_options == new_options_map:
64
+ existing_field["options"] = new_options_map
65
+ existing_field["field_type"] = data_item[
66
+ "field_type"
67
+ ] # Add for backwards compat
68
+ return existing_fields_mapping
69
+ # Add new enum options to the existing options array
70
+ # so that when option is renamed the original label remains valid
71
+ new_option_keys = set(new_options_map) - set(existing_options)
72
+ for key in new_option_keys:
73
+ existing_options[key] = new_options_map[key]
74
+ existing_field["options"] = existing_options
75
+ return existing_fields_mapping
76
+
77
+
78
+ def _normalized_name(name: str) -> str:
79
+ source_schema = dlt.current.source_schema()
80
+ normalized_name = name.strip() # remove leading and trailing spaces
81
+ return source_schema.naming.normalize_identifier(normalized_name)
82
+
83
+
84
+ def rename_fields(data: TDataPage, fields_mapping: Dict[str, Any]) -> TDataPage:
85
+ if not fields_mapping:
86
+ return data
87
+ for data_item in data:
88
+ for hash_string, field in fields_mapping.items():
89
+ if hash_string not in data_item:
90
+ continue
91
+ field_value = data_item.pop(hash_string)
92
+ field_name = field["name"]
93
+ options_map = field["options"]
94
+ # Get label instead of ID for 'enum' and 'set' fields
95
+ if field_value and field["field_type"] == "set": # Multiple choice
96
+ field_value = [
97
+ options_map.get(str(enum_id), enum_id) for enum_id in field_value
98
+ ]
99
+ elif field_value and field["field_type"] == "enum":
100
+ field_value = options_map.get(str(field_value), field_value)
101
+ data_item[field_name] = field_value
102
+ return data