omniload 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. omniload/conftest.py +72 -0
  2. omniload/main.py +810 -0
  3. omniload/src/.gitignore +10 -0
  4. omniload/src/adjust/__init__.py +108 -0
  5. omniload/src/adjust/adjust_helpers.py +122 -0
  6. omniload/src/airtable/__init__.py +84 -0
  7. omniload/src/allium/__init__.py +128 -0
  8. omniload/src/anthropic/__init__.py +277 -0
  9. omniload/src/anthropic/helpers.py +525 -0
  10. omniload/src/applovin/__init__.py +316 -0
  11. omniload/src/applovin_max/__init__.py +117 -0
  12. omniload/src/appsflyer/__init__.py +325 -0
  13. omniload/src/appsflyer/client.py +110 -0
  14. omniload/src/appstore/__init__.py +142 -0
  15. omniload/src/appstore/client.py +126 -0
  16. omniload/src/appstore/errors.py +15 -0
  17. omniload/src/appstore/models.py +117 -0
  18. omniload/src/appstore/resources.py +179 -0
  19. omniload/src/arrow/__init__.py +81 -0
  20. omniload/src/asana_source/__init__.py +281 -0
  21. omniload/src/asana_source/helpers.py +30 -0
  22. omniload/src/asana_source/settings.py +158 -0
  23. omniload/src/attio/__init__.py +102 -0
  24. omniload/src/attio/helpers.py +65 -0
  25. omniload/src/blob.py +95 -0
  26. omniload/src/bruin/__init__.py +76 -0
  27. omniload/src/chess/__init__.py +180 -0
  28. omniload/src/chess/helpers.py +35 -0
  29. omniload/src/chess/settings.py +18 -0
  30. omniload/src/clickup/__init__.py +85 -0
  31. omniload/src/clickup/helpers.py +47 -0
  32. omniload/src/collector/spinner.py +43 -0
  33. omniload/src/couchbase_source/__init__.py +118 -0
  34. omniload/src/couchbase_source/helpers.py +135 -0
  35. omniload/src/cursor/__init__.py +83 -0
  36. omniload/src/cursor/helpers.py +188 -0
  37. omniload/src/customer_io/__init__.py +486 -0
  38. omniload/src/customer_io/helpers.py +530 -0
  39. omniload/src/destinations.py +982 -0
  40. omniload/src/docebo/__init__.py +589 -0
  41. omniload/src/docebo/client.py +435 -0
  42. omniload/src/docebo/helpers.py +97 -0
  43. omniload/src/dune/__init__.py +104 -0
  44. omniload/src/dune/helpers.py +108 -0
  45. omniload/src/dynamodb/__init__.py +86 -0
  46. omniload/src/elasticsearch/__init__.py +80 -0
  47. omniload/src/elasticsearch/helpers.py +141 -0
  48. omniload/src/errors.py +26 -0
  49. omniload/src/facebook_ads/__init__.py +403 -0
  50. omniload/src/facebook_ads/exceptions.py +19 -0
  51. omniload/src/facebook_ads/helpers.py +296 -0
  52. omniload/src/facebook_ads/settings.py +224 -0
  53. omniload/src/facebook_ads/utils.py +53 -0
  54. omniload/src/factory.py +305 -0
  55. omniload/src/filesystem/__init__.py +133 -0
  56. omniload/src/filesystem/helpers.py +114 -0
  57. omniload/src/filesystem/readers.py +187 -0
  58. omniload/src/filters.py +62 -0
  59. omniload/src/fireflies/__init__.py +151 -0
  60. omniload/src/fireflies/helpers.py +753 -0
  61. omniload/src/fluxx/__init__.py +10013 -0
  62. omniload/src/fluxx/helpers.py +233 -0
  63. omniload/src/frankfurter/__init__.py +157 -0
  64. omniload/src/frankfurter/helpers.py +48 -0
  65. omniload/src/freshdesk/__init__.py +103 -0
  66. omniload/src/freshdesk/freshdesk_client.py +151 -0
  67. omniload/src/freshdesk/settings.py +23 -0
  68. omniload/src/fundraiseup/__init__.py +95 -0
  69. omniload/src/fundraiseup/client.py +81 -0
  70. omniload/src/github/__init__.py +202 -0
  71. omniload/src/github/helpers.py +207 -0
  72. omniload/src/github/queries.py +129 -0
  73. omniload/src/github/settings.py +24 -0
  74. omniload/src/google_ads/__init__.py +198 -0
  75. omniload/src/google_ads/field.py +17 -0
  76. omniload/src/google_ads/metrics.py +254 -0
  77. omniload/src/google_ads/predicates.py +37 -0
  78. omniload/src/google_ads/reports.py +411 -0
  79. omniload/src/google_ads/test_google_ads.py +184 -0
  80. omniload/src/google_analytics/__init__.py +144 -0
  81. omniload/src/google_analytics/helpers.py +312 -0
  82. omniload/src/google_sheets/README.md +95 -0
  83. omniload/src/google_sheets/__init__.py +166 -0
  84. omniload/src/google_sheets/helpers/__init__.py +15 -0
  85. omniload/src/google_sheets/helpers/api_calls.py +160 -0
  86. omniload/src/google_sheets/helpers/data_processing.py +316 -0
  87. omniload/src/gorgias/__init__.py +595 -0
  88. omniload/src/gorgias/helpers.py +166 -0
  89. omniload/src/hostaway/__init__.py +302 -0
  90. omniload/src/hostaway/client.py +288 -0
  91. omniload/src/http/__init__.py +38 -0
  92. omniload/src/http/readers.py +146 -0
  93. omniload/src/http_client.py +24 -0
  94. omniload/src/hubspot/__init__.py +800 -0
  95. omniload/src/hubspot/helpers.py +417 -0
  96. omniload/src/hubspot/settings.py +329 -0
  97. omniload/src/indeed/__init__.py +153 -0
  98. omniload/src/indeed/helpers.py +228 -0
  99. omniload/src/influxdb/__init__.py +46 -0
  100. omniload/src/influxdb/client.py +34 -0
  101. omniload/src/intercom/__init__.py +142 -0
  102. omniload/src/intercom/helpers.py +674 -0
  103. omniload/src/intercom/settings.py +279 -0
  104. omniload/src/isoc_pulse/__init__.py +159 -0
  105. omniload/src/jira_source/__init__.py +377 -0
  106. omniload/src/jira_source/helpers.py +510 -0
  107. omniload/src/jira_source/settings.py +184 -0
  108. omniload/src/kafka/__init__.py +120 -0
  109. omniload/src/kafka/helpers.py +241 -0
  110. omniload/src/kinesis/__init__.py +153 -0
  111. omniload/src/kinesis/helpers.py +96 -0
  112. omniload/src/klaviyo/__init__.py +237 -0
  113. omniload/src/klaviyo/client.py +212 -0
  114. omniload/src/klaviyo/helpers.py +19 -0
  115. omniload/src/linear/__init__.py +634 -0
  116. omniload/src/linear/helpers.py +111 -0
  117. omniload/src/linkedin_ads/__init__.py +266 -0
  118. omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
  119. omniload/src/linkedin_ads/helpers.py +246 -0
  120. omniload/src/loader.py +69 -0
  121. omniload/src/mailchimp/__init__.py +126 -0
  122. omniload/src/mailchimp/helpers.py +226 -0
  123. omniload/src/mailchimp/settings.py +164 -0
  124. omniload/src/masking.py +344 -0
  125. omniload/src/mixpanel/__init__.py +62 -0
  126. omniload/src/mixpanel/client.py +104 -0
  127. omniload/src/monday/__init__.py +246 -0
  128. omniload/src/monday/helpers.py +392 -0
  129. omniload/src/monday/settings.py +325 -0
  130. omniload/src/mongodb/__init__.py +281 -0
  131. omniload/src/mongodb/helpers.py +975 -0
  132. omniload/src/notion/__init__.py +69 -0
  133. omniload/src/notion/helpers/__init__.py +14 -0
  134. omniload/src/notion/helpers/client.py +178 -0
  135. omniload/src/notion/helpers/database.py +92 -0
  136. omniload/src/notion/settings.py +17 -0
  137. omniload/src/partition.py +32 -0
  138. omniload/src/personio/__init__.py +345 -0
  139. omniload/src/personio/helpers.py +100 -0
  140. omniload/src/phantombuster/__init__.py +65 -0
  141. omniload/src/phantombuster/client.py +87 -0
  142. omniload/src/pinterest/__init__.py +82 -0
  143. omniload/src/pipedrive/__init__.py +212 -0
  144. omniload/src/pipedrive/helpers/__init__.py +37 -0
  145. omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
  146. omniload/src/pipedrive/helpers/pages.py +129 -0
  147. omniload/src/pipedrive/settings.py +41 -0
  148. omniload/src/pipedrive/typing.py +17 -0
  149. omniload/src/plusvibeai/__init__.py +335 -0
  150. omniload/src/plusvibeai/helpers.py +544 -0
  151. omniload/src/plusvibeai/settings.py +252 -0
  152. omniload/src/primer/__init__.py +45 -0
  153. omniload/src/primer/helpers.py +79 -0
  154. omniload/src/quickbooks/__init__.py +117 -0
  155. omniload/src/reddit_ads/__init__.py +183 -0
  156. omniload/src/reddit_ads/helpers.py +232 -0
  157. omniload/src/resource.py +40 -0
  158. omniload/src/revenuecat/__init__.py +83 -0
  159. omniload/src/revenuecat/helpers.py +237 -0
  160. omniload/src/salesforce/__init__.py +170 -0
  161. omniload/src/salesforce/helpers.py +78 -0
  162. omniload/src/shopify/__init__.py +1953 -0
  163. omniload/src/shopify/exceptions.py +17 -0
  164. omniload/src/shopify/helpers.py +202 -0
  165. omniload/src/shopify/settings.py +19 -0
  166. omniload/src/slack/__init__.py +290 -0
  167. omniload/src/slack/helpers.py +218 -0
  168. omniload/src/slack/settings.py +36 -0
  169. omniload/src/smartsheets/__init__.py +82 -0
  170. omniload/src/snapchat_ads/__init__.py +455 -0
  171. omniload/src/snapchat_ads/client.py +72 -0
  172. omniload/src/snapchat_ads/helpers.py +630 -0
  173. omniload/src/snapchat_ads/settings.py +130 -0
  174. omniload/src/socrata_source/__init__.py +83 -0
  175. omniload/src/socrata_source/helpers.py +85 -0
  176. omniload/src/socrata_source/settings.py +8 -0
  177. omniload/src/solidgate/__init__.py +219 -0
  178. omniload/src/solidgate/helpers.py +154 -0
  179. omniload/src/sources.py +5408 -0
  180. omniload/src/sql_database/__init__.py +0 -0
  181. omniload/src/sql_database/callbacks.py +66 -0
  182. omniload/src/stripe_analytics/__init__.py +183 -0
  183. omniload/src/stripe_analytics/helpers.py +386 -0
  184. omniload/src/stripe_analytics/settings.py +80 -0
  185. omniload/src/table_definition.py +15 -0
  186. omniload/src/testdata/fakebqcredentials.json +14 -0
  187. omniload/src/tiktok_ads/__init__.py +150 -0
  188. omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
  189. omniload/src/time.py +11 -0
  190. omniload/src/trustpilot/__init__.py +48 -0
  191. omniload/src/trustpilot/client.py +48 -0
  192. omniload/src/version.py +6 -0
  193. omniload/src/wise/__init__.py +68 -0
  194. omniload/src/wise/client.py +63 -0
  195. omniload/src/zendesk/__init__.py +480 -0
  196. omniload/src/zendesk/helpers/__init__.py +39 -0
  197. omniload/src/zendesk/helpers/api_helpers.py +119 -0
  198. omniload/src/zendesk/helpers/credentials.py +68 -0
  199. omniload/src/zendesk/helpers/talk_api.py +132 -0
  200. omniload/src/zendesk/settings.py +71 -0
  201. omniload/src/zoom/__init__.py +99 -0
  202. omniload/src/zoom/helpers.py +102 -0
  203. omniload/testdata/.gitignore +2 -0
  204. omniload/testdata/create_replace.csv +21 -0
  205. omniload/testdata/delete_insert_expected.csv +6 -0
  206. omniload/testdata/delete_insert_part1.csv +5 -0
  207. omniload/testdata/delete_insert_part2.csv +6 -0
  208. omniload/testdata/merge_expected.csv +5 -0
  209. omniload/testdata/merge_part1.csv +4 -0
  210. omniload/testdata/merge_part2.csv +5 -0
  211. omniload/tests/unit/test_smartsheets.py +133 -0
  212. omniload-0.0.0.dev0.dist-info/METADATA +439 -0
  213. omniload-0.0.0.dev0.dist-info/RECORD +218 -0
  214. omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
  215. omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
  216. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
  217. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
  218. omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
@@ -0,0 +1,212 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Highly customizable source for Pipedrive, supports endpoint addition, selection and column rename
16
+
17
+ Pipedrive api docs: https://developers.pipedrive.com/docs/api/v1
18
+
19
+ Pipedrive changes or deprecates fields and endpoints without versioning the api.
20
+ If something breaks, it's a good idea to check the changelog.
21
+ Api changelog: https://developers.pipedrive.com/changelog
22
+
23
+ To get an api key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
24
+ """
25
+
26
+ from typing import Any, Dict, Iterator, List, Optional, Union # noqa: F401
27
+
28
+ import dlt
29
+ from dlt.common import pendulum
30
+ from dlt.common.time import ensure_pendulum_datetime
31
+ from dlt.sources import DltResource, TDataItems
32
+
33
+ from .helpers import group_deal_flows
34
+ from .helpers.custom_fields_munger import rename_fields, update_fields_mapping
35
+ from .helpers.pages import get_pages, get_recent_items_incremental
36
+ from .settings import ENTITY_MAPPINGS, RECENTS_ENTITIES
37
+ from .typing import TDataPage
38
+
39
+
40
+ @dlt.source(name="pipedrive", max_table_nesting=0)
41
+ def pipedrive_source(
42
+ pipedrive_api_key: str = dlt.secrets.value,
43
+ since_timestamp: Optional[Union[pendulum.DateTime, str]] = "1970-01-01 00:00:00",
44
+ ) -> Iterator[DltResource]:
45
+ """
46
+ Get data from the Pipedrive API. Supports incremental loading and custom fields mapping.
47
+
48
+ Args:
49
+ pipedrive_api_key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
50
+ since_timestamp: Starting timestamp for incremental loading. By default complete history is loaded on first run.
51
+ incremental: Enable or disable incremental loading.
52
+
53
+ Returns resources:
54
+ custom_fields_mapping
55
+ activities
56
+ activityTypes
57
+ deals
58
+ deals_flow
59
+ deals_participants
60
+ files
61
+ filters
62
+ notes
63
+ persons
64
+ organizations
65
+ pipelines
66
+ products
67
+ stages
68
+ users
69
+ leads
70
+
71
+ For custom fields rename the `custom_fields_mapping` resource must be selected or loaded before other resources.
72
+
73
+ Resources that depend on another resource are implemented as transformers
74
+ so they can re-use the original resource data without re-downloading.
75
+ Examples: deals_participants, deals_flow
76
+ """
77
+
78
+ # yield nice rename mapping
79
+ yield create_state(pipedrive_api_key) | parsed_mapping
80
+
81
+ # parse timestamp and build kwargs
82
+ since_timestamp = ensure_pendulum_datetime(since_timestamp).strftime(
83
+ "%Y-%m-%d %H:%M:%S"
84
+ )
85
+ resource_kwargs: Any = (
86
+ {"since_timestamp": since_timestamp} if since_timestamp else {}
87
+ )
88
+
89
+ # create resources for all endpoints
90
+ endpoints_resources = {}
91
+ for entity, resource_name in RECENTS_ENTITIES.items():
92
+ endpoints_resources[resource_name] = dlt.resource(
93
+ get_recent_items_incremental,
94
+ name=resource_name,
95
+ primary_key="id",
96
+ write_disposition="merge",
97
+ )(entity, pipedrive_api_key, **resource_kwargs)
98
+
99
+ yield from endpoints_resources.values()
100
+
101
+ # create transformers for deals to participants and flows
102
+ yield endpoints_resources["deals"] | dlt.transformer(
103
+ name="deals_participants", write_disposition="merge", primary_key="id"
104
+ )(_get_deals_participants)(pipedrive_api_key)
105
+
106
+ yield endpoints_resources["deals"] | dlt.transformer(
107
+ name="deals_flow", write_disposition="merge", primary_key="id"
108
+ )(_get_deals_flow)(pipedrive_api_key)
109
+
110
+ yield leads(pipedrive_api_key, update_time=since_timestamp)
111
+
112
+
113
+ def _get_deals_flow(
114
+ deals_page: TDataPage, pipedrive_api_key: str
115
+ ) -> Iterator[TDataItems]:
116
+ custom_fields_mapping = dlt.current.source_state().get("custom_fields_mapping", {})
117
+ for row in deals_page:
118
+ url = f"deals/{row['id']}/flow"
119
+ pages = get_pages(url, pipedrive_api_key)
120
+ for entity, page in group_deal_flows(pages):
121
+ yield dlt.mark.with_table_name(
122
+ rename_fields(page, custom_fields_mapping.get(entity, {})),
123
+ "deals_flow_" + entity,
124
+ )
125
+
126
+
127
+ def _get_deals_participants(
128
+ deals_page: TDataPage, pipedrive_api_key: str
129
+ ) -> Iterator[TDataPage]:
130
+ for row in deals_page:
131
+ url = f"deals/{row['id']}/participants"
132
+ yield from get_pages(url, pipedrive_api_key)
133
+
134
+
135
+ @dlt.resource(selected=False)
136
+ def create_state(pipedrive_api_key: str) -> Iterator[Dict[str, Any]]:
137
+ def _get_pages_for_rename(
138
+ entity: str, fields_entity: str, pipedrive_api_key: str
139
+ ) -> Dict[str, Any]:
140
+ existing_fields_mapping: Dict[str, Dict[str, str]] = (
141
+ custom_fields_mapping.setdefault(entity, {})
142
+ )
143
+ # we need to process all pages before yielding
144
+ for page in get_pages(fields_entity, pipedrive_api_key):
145
+ existing_fields_mapping = update_fields_mapping(
146
+ page, existing_fields_mapping
147
+ )
148
+ return existing_fields_mapping
149
+
150
+ # gets all *Fields data and stores in state
151
+ custom_fields_mapping = dlt.current.source_state().setdefault(
152
+ "custom_fields_mapping", {}
153
+ )
154
+ for entity, fields_entity, _ in ENTITY_MAPPINGS:
155
+ if fields_entity is None:
156
+ continue
157
+ custom_fields_mapping[entity] = _get_pages_for_rename(
158
+ entity, fields_entity, pipedrive_api_key
159
+ )
160
+
161
+ yield custom_fields_mapping
162
+
163
+
164
+ @dlt.transformer(
165
+ name="custom_fields_mapping",
166
+ write_disposition="replace",
167
+ columns={"options": {"data_type": "json"}},
168
+ )
169
+ def parsed_mapping(
170
+ custom_fields_mapping: Dict[str, Any],
171
+ ) -> Optional[Iterator[List[Dict[str, str]]]]:
172
+ """
173
+ Parses and yields custom fields' mapping in order to be stored in destiny by dlt
174
+ """
175
+ for endpoint, data_item_mapping in custom_fields_mapping.items():
176
+ yield [
177
+ {
178
+ "endpoint": endpoint,
179
+ "hash_string": hash_string,
180
+ "name": names["name"],
181
+ "normalized_name": names["normalized_name"],
182
+ "options": names["options"],
183
+ "field_type": names["field_type"],
184
+ }
185
+ for hash_string, names in data_item_mapping.items()
186
+ ]
187
+
188
+
189
+ @dlt.resource(primary_key="id", write_disposition="merge")
190
+ def leads(
191
+ pipedrive_api_key: str = dlt.secrets.value,
192
+ update_time: dlt.sources.incremental[str] = dlt.sources.incremental(
193
+ "update_time", "1970-01-01 00:00:00"
194
+ ),
195
+ ) -> Iterator[TDataPage]:
196
+ """Resource to incrementally load pipedrive leads by update_time"""
197
+ # Leads inherit custom fields from deals
198
+ fields_mapping = (
199
+ dlt.current.source_state().get("custom_fields_mapping", {}).get("deals", {})
200
+ )
201
+ # Load leads pages sorted from newest to oldest and stop loading when
202
+ # last incremental value is reached
203
+ pages = get_pages(
204
+ "leads",
205
+ pipedrive_api_key,
206
+ extra_params={"sort": "update_time DESC"},
207
+ )
208
+ for page in pages:
209
+ yield rename_fields(page, fields_mapping)
210
+
211
+ if update_time.start_out_of_range:
212
+ return
@@ -0,0 +1,37 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Pipedrive source helpers"""
16
+
17
+ from itertools import groupby
18
+ from typing import Any, Dict, Iterable, List, Tuple, cast # noqa: F401
19
+
20
+ from dlt.common import pendulum # noqa: F401
21
+
22
+
23
+ def _deals_flow_group_key(item: Dict[str, Any]) -> str:
24
+ return item["object"] # type: ignore[no-any-return]
25
+
26
+
27
+ def group_deal_flows(
28
+ pages: Iterable[Iterable[Dict[str, Any]]],
29
+ ) -> Iterable[Tuple[str, List[Dict[str, Any]]]]:
30
+ for page in pages:
31
+ for entity, items in groupby(
32
+ sorted(page, key=_deals_flow_group_key), key=_deals_flow_group_key
33
+ ):
34
+ yield (
35
+ entity,
36
+ [dict(item["data"], timestamp=item["timestamp"]) for item in items],
37
+ )
@@ -0,0 +1,116 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Dict, Optional, TypedDict
16
+
17
+ import dlt
18
+
19
+ from ..typing import TDataPage
20
+
21
+
22
+ class TFieldMapping(TypedDict):
23
+ name: str
24
+ normalized_name: str
25
+ options: Optional[Dict[str, str]]
26
+ field_type: str
27
+
28
+
29
+ def update_fields_mapping(
30
+ new_fields_mapping: TDataPage, existing_fields_mapping: Dict[str, Any]
31
+ ) -> Dict[str, Any]:
32
+ """
33
+ Specific function to perform data munging and push changes to custom fields' mapping stored in dlt's state
34
+ The endpoint must be an entity fields' endpoint
35
+ """
36
+ for data_item in new_fields_mapping:
37
+ # 'edit_flag' field contains a boolean value, which is set to 'True' for custom fields and 'False' otherwise.
38
+ if data_item.get("edit_flag"):
39
+ # Regarding custom fields, 'key' field contains pipedrive's hash string representation of its name
40
+ # We assume that pipedrive's hash strings are meant to be an univoque representation of custom fields' name, so dlt's state shouldn't be updated while those values
41
+ # remain unchanged
42
+ existing_fields_mapping = _update_field(data_item, existing_fields_mapping)
43
+ # Built in enum and set fields are mapped if their options have int ids
44
+ # Enum fields with bool and string key options are left intact
45
+ elif data_item.get("field_type") in {"set", "enum"}:
46
+ options = data_item.get("options", [])
47
+ first_option = options[0]["id"] if len(options) >= 1 else None
48
+ if isinstance(first_option, int) and not isinstance(first_option, bool):
49
+ existing_fields_mapping = _update_field(
50
+ data_item, existing_fields_mapping
51
+ )
52
+ return existing_fields_mapping
53
+
54
+
55
+ def _update_field(
56
+ data_item: Dict[str, Any],
57
+ existing_fields_mapping: Optional[Dict[str, TFieldMapping]],
58
+ ) -> Dict[str, TFieldMapping]:
59
+ """Create or update the given field's info the custom fields state
60
+ If the field hash already exists in the state from previous runs the name is not updated.
61
+ New enum options (if any) are appended to the state.
62
+ """
63
+ existing_fields_mapping = existing_fields_mapping or {}
64
+ key = data_item["key"]
65
+ options = data_item.get("options", [])
66
+ new_options_map = {str(o["id"]): o["label"] for o in options}
67
+ existing_field = existing_fields_mapping.get(key)
68
+ if not existing_field:
69
+ existing_fields_mapping[key] = dict(
70
+ name=data_item["name"],
71
+ normalized_name=_normalized_name(data_item["name"]),
72
+ options=new_options_map,
73
+ field_type=data_item["field_type"],
74
+ )
75
+ return existing_fields_mapping
76
+ existing_options = existing_field.get("options", {})
77
+ if not existing_options or existing_options == new_options_map:
78
+ existing_field["options"] = new_options_map
79
+ existing_field["field_type"] = data_item[
80
+ "field_type"
81
+ ] # Add for backwards compat
82
+ return existing_fields_mapping
83
+ # Add new enum options to the existing options array
84
+ # so that when option is renamed the original label remains valid
85
+ new_option_keys = set(new_options_map) - set(existing_options)
86
+ for key in new_option_keys:
87
+ existing_options[key] = new_options_map[key]
88
+ existing_field["options"] = existing_options
89
+ return existing_fields_mapping
90
+
91
+
92
+ def _normalized_name(name: str) -> str:
93
+ source_schema = dlt.current.source_schema()
94
+ normalized_name = name.strip() # remove leading and trailing spaces
95
+ return source_schema.naming.normalize_identifier(normalized_name)
96
+
97
+
98
+ def rename_fields(data: TDataPage, fields_mapping: Dict[str, Any]) -> TDataPage:
99
+ if not fields_mapping:
100
+ return data
101
+ for data_item in data:
102
+ for hash_string, field in fields_mapping.items():
103
+ if hash_string not in data_item:
104
+ continue
105
+ field_value = data_item.pop(hash_string)
106
+ field_name = field["name"]
107
+ options_map = field["options"]
108
+ # Get label instead of ID for 'enum' and 'set' fields
109
+ if field_value and field["field_type"] == "set": # Multiple choice
110
+ field_value = [
111
+ options_map.get(str(enum_id), enum_id) for enum_id in field_value
112
+ ]
113
+ elif field_value and field["field_type"] == "enum":
114
+ field_value = options_map.get(str(field_value), field_value)
115
+ data_item[field_name] = field_value
116
+ return data
@@ -0,0 +1,129 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from itertools import chain
16
+ from typing import (
17
+ Any,
18
+ Dict,
19
+ Iterable,
20
+ Iterator,
21
+ List,
22
+ TypeVar,
23
+ Union,
24
+ )
25
+
26
+ import dlt
27
+ from dlt.sources.helpers import requests
28
+
29
+ from ..typing import TDataPage
30
+ from .custom_fields_munger import rename_fields
31
+
32
+
33
+ def get_pages(
34
+ entity: str, pipedrive_api_key: str, extra_params: Dict[str, Any] = None
35
+ ) -> Iterator[List[Dict[str, Any]]]:
36
+ """
37
+ Generic method to retrieve endpoint data based on the required headers and params.
38
+
39
+ Args:
40
+ entity: the endpoint you want to call
41
+ pipedrive_api_key:
42
+ extra_params: any needed request params except pagination.
43
+
44
+ Returns:
45
+
46
+ """
47
+ headers = {"Content-Type": "application/json"}
48
+ params = {"api_token": pipedrive_api_key}
49
+ if extra_params:
50
+ params.update(extra_params)
51
+ url = f"https://app.pipedrive.com/v1/{entity}"
52
+ yield from _paginated_get(url, headers=headers, params=params)
53
+
54
+
55
+ def get_recent_items_incremental(
56
+ entity: str,
57
+ pipedrive_api_key: str,
58
+ since_timestamp: dlt.sources.incremental[str] = dlt.sources.incremental(
59
+ "update_time|modified", "1970-01-01 00:00:00"
60
+ ),
61
+ ) -> Iterator[TDataPage]:
62
+ """Get a specific entity type from /recents with incremental state."""
63
+ yield from _get_recent_pages(entity, pipedrive_api_key, since_timestamp.last_value)
64
+
65
+
66
+ def _paginated_get(
67
+ url: str, headers: Dict[str, Any], params: Dict[str, Any]
68
+ ) -> Iterator[List[Dict[str, Any]]]:
69
+ """
70
+ Requests and yields data 500 records at a time
71
+ Documentation: https://pipedrive.readme.io/docs/core-api-concepts-pagination
72
+ """
73
+ # pagination start and page limit
74
+ params["start"] = 0
75
+ params["limit"] = 500
76
+ while True:
77
+ page = requests.get(url, headers=headers, params=params).json()
78
+ # yield data only
79
+ data = page["data"]
80
+ if data:
81
+ yield data
82
+ # check if next page exists
83
+ pagination_info = page.get("additional_data", {}).get("pagination", {})
84
+ # is_next_page is set to True or False
85
+ if not pagination_info.get("more_items_in_collection", False):
86
+ break
87
+ params["start"] = pagination_info.get("next_start")
88
+
89
+
90
+ T = TypeVar("T")
91
+
92
+
93
+ def _extract_recents_data(data: Iterable[Dict[str, Any]]) -> List[Dict[str, Any]]:
94
+ """Results from recents endpoint contain `data` key which is either a single entity or list of entities
95
+
96
+ This returns a flat list of entities from an iterable of recent results
97
+ """
98
+ return [
99
+ data_item
100
+ for data_item in chain.from_iterable(
101
+ (_list_wrapped(item["data"]) for item in data)
102
+ )
103
+ if data_item is not None
104
+ ]
105
+
106
+
107
+ def _list_wrapped(item: Union[List[T], T]) -> List[T]:
108
+ if isinstance(item, list):
109
+ return item
110
+ return [item]
111
+
112
+
113
+ def _get_recent_pages(
114
+ entity: str, pipedrive_api_key: str, since_timestamp: str
115
+ ) -> Iterator[TDataPage]:
116
+ custom_fields_mapping = (
117
+ dlt.current.source_state().get("custom_fields_mapping", {}).get(entity, {})
118
+ )
119
+ pages = get_pages(
120
+ "recents",
121
+ pipedrive_api_key,
122
+ extra_params=dict(since_timestamp=since_timestamp, items=entity),
123
+ )
124
+ pages = (_extract_recents_data(page) for page in pages)
125
+ for page in pages:
126
+ yield rename_fields(page, custom_fields_mapping)
127
+
128
+
129
+ __source_name__ = "pipedrive"
@@ -0,0 +1,41 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Pipedrive source settings and constants"""
16
+
17
+ ENTITY_MAPPINGS = [
18
+ ("activity", "activityFields", {"user_id": 0}),
19
+ ("organization", "organizationFields", None),
20
+ ("person", "personFields", None),
21
+ ("product", "productFields", None),
22
+ ("deal", "dealFields", None),
23
+ ("pipeline", None, None),
24
+ ("stage", None, None),
25
+ ("user", None, None),
26
+ ]
27
+
28
+ RECENTS_ENTITIES = {
29
+ "activity": "activities",
30
+ "activityType": "activity_types",
31
+ "deal": "deals",
32
+ "file": "files",
33
+ "filter": "filters",
34
+ "note": "notes",
35
+ "person": "persons",
36
+ "organization": "organizations",
37
+ "pipeline": "pipelines",
38
+ "product": "products",
39
+ "stage": "stages",
40
+ "user": "users",
41
+ }
@@ -0,0 +1,17 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Dict, List
16
+
17
+ TDataPage = List[Dict[str, Any]]