ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestr/conftest.py +72 -0
- ingestr/main.py +134 -87
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/adjust/adjust_helpers.py +7 -3
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin/__init__.py +262 -0
- ingestr/src/applovin_max/__init__.py +117 -0
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -45
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/arrow/__init__.py +9 -1
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/attio/__init__.py +102 -0
- ingestr/src/attio/helpers.py +65 -0
- ingestr/src/blob.py +38 -11
- ingestr/src/buildinfo.py +1 -0
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/clickup/__init__.py +85 -0
- ingestr/src/clickup/helpers.py +47 -0
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +520 -33
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/__init__.py +80 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +47 -28
- ingestr/src/facebook_ads/helpers.py +59 -37
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +116 -2
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +46 -3
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -0
- ingestr/src/frankfurter/helpers.py +48 -0
- ingestr/src/freshdesk/__init__.py +89 -0
- ingestr/src/freshdesk/freshdesk_client.py +137 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/fundraiseup/__init__.py +95 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +41 -6
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_analytics/__init__.py +22 -4
- ingestr/src/google_analytics/helpers.py +124 -6
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/http_client.py +24 -0
- ingestr/src/hubspot/__init__.py +66 -23
- ingestr/src/hubspot/helpers.py +52 -22
- ingestr/src/hubspot/settings.py +14 -7
- ingestr/src/influxdb/__init__.py +46 -0
- ingestr/src/influxdb/client.py +34 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/isoc_pulse/__init__.py +159 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/kafka/__init__.py +4 -1
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +82 -0
- ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
- ingestr/src/linear/__init__.py +634 -0
- ingestr/src/linear/helpers.py +111 -0
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/loader.py +69 -0
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/mixpanel/__init__.py +62 -0
- ingestr/src/mixpanel/client.py +99 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +72 -8
- ingestr/src/mongodb/helpers.py +915 -38
- ingestr/src/partition.py +32 -0
- ingestr/src/personio/__init__.py +331 -0
- ingestr/src/personio/helpers.py +86 -0
- ingestr/src/phantombuster/__init__.py +65 -0
- ingestr/src/phantombuster/client.py +87 -0
- ingestr/src/pinterest/__init__.py +82 -0
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/quickbooks/__init__.py +117 -0
- ingestr/src/resource.py +40 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +156 -0
- ingestr/src/salesforce/helpers.py +64 -0
- ingestr/src/shopify/__init__.py +1 -17
- ingestr/src/smartsheets/__init__.py +82 -0
- ingestr/src/snapchat_ads/__init__.py +489 -0
- ingestr/src/snapchat_ads/client.py +72 -0
- ingestr/src/snapchat_ads/helpers.py +535 -0
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/solidgate/__init__.py +219 -0
- ingestr/src/solidgate/helpers.py +154 -0
- ingestr/src/sources.py +3132 -212
- ingestr/src/stripe_analytics/__init__.py +49 -21
- ingestr/src/stripe_analytics/helpers.py +286 -1
- ingestr/src/stripe_analytics/settings.py +62 -10
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/__init__.py +12 -6
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- ingestr/src/version.py +6 -1
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/src/zoom/__init__.py +99 -0
- ingestr/src/zoom/helpers.py +102 -0
- ingestr/tests/unit/test_smartsheets.py +133 -0
- ingestr-0.14.104.dist-info/METADATA +563 -0
- ingestr-0.14.104.dist-info/RECORD +203 -0
- ingestr/src/appsflyer/_init_.py +0 -24
- ingestr-0.13.2.dist-info/METADATA +0 -302
- ingestr-0.13.2.dist-info/RECORD +0 -107
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Highly customizable source for Pipedrive, supports endpoint addition, selection and column rename
|
|
2
|
+
|
|
3
|
+
Pipedrive api docs: https://developers.pipedrive.com/docs/api/v1
|
|
4
|
+
|
|
5
|
+
Pipedrive changes or deprecates fields and endpoints without versioning the api.
|
|
6
|
+
If something breaks, it's a good idea to check the changelog.
|
|
7
|
+
Api changelog: https://developers.pipedrive.com/changelog
|
|
8
|
+
|
|
9
|
+
To get an api key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import Any, Dict, Iterator, List, Optional, Union # noqa: F401
|
|
13
|
+
|
|
14
|
+
import dlt
|
|
15
|
+
from dlt.common import pendulum
|
|
16
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
17
|
+
from dlt.sources import DltResource, TDataItems
|
|
18
|
+
|
|
19
|
+
from .helpers import group_deal_flows
|
|
20
|
+
from .helpers.custom_fields_munger import rename_fields, update_fields_mapping
|
|
21
|
+
from .helpers.pages import get_pages, get_recent_items_incremental
|
|
22
|
+
from .settings import ENTITY_MAPPINGS, RECENTS_ENTITIES
|
|
23
|
+
from .typing import TDataPage
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dlt.source(name="pipedrive", max_table_nesting=0)
|
|
27
|
+
def pipedrive_source(
|
|
28
|
+
pipedrive_api_key: str = dlt.secrets.value,
|
|
29
|
+
since_timestamp: Optional[Union[pendulum.DateTime, str]] = "1970-01-01 00:00:00",
|
|
30
|
+
) -> Iterator[DltResource]:
|
|
31
|
+
"""
|
|
32
|
+
Get data from the Pipedrive API. Supports incremental loading and custom fields mapping.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
pipedrive_api_key: https://pipedrive.readme.io/docs/how-to-find-the-api-token
|
|
36
|
+
since_timestamp: Starting timestamp for incremental loading. By default complete history is loaded on first run.
|
|
37
|
+
incremental: Enable or disable incremental loading.
|
|
38
|
+
|
|
39
|
+
Returns resources:
|
|
40
|
+
custom_fields_mapping
|
|
41
|
+
activities
|
|
42
|
+
activityTypes
|
|
43
|
+
deals
|
|
44
|
+
deals_flow
|
|
45
|
+
deals_participants
|
|
46
|
+
files
|
|
47
|
+
filters
|
|
48
|
+
notes
|
|
49
|
+
persons
|
|
50
|
+
organizations
|
|
51
|
+
pipelines
|
|
52
|
+
products
|
|
53
|
+
stages
|
|
54
|
+
users
|
|
55
|
+
leads
|
|
56
|
+
|
|
57
|
+
For custom fields rename the `custom_fields_mapping` resource must be selected or loaded before other resources.
|
|
58
|
+
|
|
59
|
+
Resources that depend on another resource are implemented as transformers
|
|
60
|
+
so they can re-use the original resource data without re-downloading.
|
|
61
|
+
Examples: deals_participants, deals_flow
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# yield nice rename mapping
|
|
65
|
+
yield create_state(pipedrive_api_key) | parsed_mapping
|
|
66
|
+
|
|
67
|
+
# parse timestamp and build kwargs
|
|
68
|
+
since_timestamp = ensure_pendulum_datetime(since_timestamp).strftime(
|
|
69
|
+
"%Y-%m-%d %H:%M:%S"
|
|
70
|
+
)
|
|
71
|
+
resource_kwargs: Any = (
|
|
72
|
+
{"since_timestamp": since_timestamp} if since_timestamp else {}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# create resources for all endpoints
|
|
76
|
+
endpoints_resources = {}
|
|
77
|
+
for entity, resource_name in RECENTS_ENTITIES.items():
|
|
78
|
+
endpoints_resources[resource_name] = dlt.resource(
|
|
79
|
+
get_recent_items_incremental,
|
|
80
|
+
name=resource_name,
|
|
81
|
+
primary_key="id",
|
|
82
|
+
write_disposition="merge",
|
|
83
|
+
)(entity, pipedrive_api_key, **resource_kwargs)
|
|
84
|
+
|
|
85
|
+
yield from endpoints_resources.values()
|
|
86
|
+
|
|
87
|
+
# create transformers for deals to participants and flows
|
|
88
|
+
yield endpoints_resources["deals"] | dlt.transformer(
|
|
89
|
+
name="deals_participants", write_disposition="merge", primary_key="id"
|
|
90
|
+
)(_get_deals_participants)(pipedrive_api_key)
|
|
91
|
+
|
|
92
|
+
yield endpoints_resources["deals"] | dlt.transformer(
|
|
93
|
+
name="deals_flow", write_disposition="merge", primary_key="id"
|
|
94
|
+
)(_get_deals_flow)(pipedrive_api_key)
|
|
95
|
+
|
|
96
|
+
yield leads(pipedrive_api_key, update_time=since_timestamp)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_deals_flow(
|
|
100
|
+
deals_page: TDataPage, pipedrive_api_key: str
|
|
101
|
+
) -> Iterator[TDataItems]:
|
|
102
|
+
custom_fields_mapping = dlt.current.source_state().get("custom_fields_mapping", {})
|
|
103
|
+
for row in deals_page:
|
|
104
|
+
url = f"deals/{row['id']}/flow"
|
|
105
|
+
pages = get_pages(url, pipedrive_api_key)
|
|
106
|
+
for entity, page in group_deal_flows(pages):
|
|
107
|
+
yield dlt.mark.with_table_name(
|
|
108
|
+
rename_fields(page, custom_fields_mapping.get(entity, {})),
|
|
109
|
+
"deals_flow_" + entity,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _get_deals_participants(
|
|
114
|
+
deals_page: TDataPage, pipedrive_api_key: str
|
|
115
|
+
) -> Iterator[TDataPage]:
|
|
116
|
+
for row in deals_page:
|
|
117
|
+
url = f"deals/{row['id']}/participants"
|
|
118
|
+
yield from get_pages(url, pipedrive_api_key)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dlt.resource(selected=False)
|
|
122
|
+
def create_state(pipedrive_api_key: str) -> Iterator[Dict[str, Any]]:
|
|
123
|
+
def _get_pages_for_rename(
|
|
124
|
+
entity: str, fields_entity: str, pipedrive_api_key: str
|
|
125
|
+
) -> Dict[str, Any]:
|
|
126
|
+
existing_fields_mapping: Dict[str, Dict[str, str]] = (
|
|
127
|
+
custom_fields_mapping.setdefault(entity, {})
|
|
128
|
+
)
|
|
129
|
+
# we need to process all pages before yielding
|
|
130
|
+
for page in get_pages(fields_entity, pipedrive_api_key):
|
|
131
|
+
existing_fields_mapping = update_fields_mapping(
|
|
132
|
+
page, existing_fields_mapping
|
|
133
|
+
)
|
|
134
|
+
return existing_fields_mapping
|
|
135
|
+
|
|
136
|
+
# gets all *Fields data and stores in state
|
|
137
|
+
custom_fields_mapping = dlt.current.source_state().setdefault(
|
|
138
|
+
"custom_fields_mapping", {}
|
|
139
|
+
)
|
|
140
|
+
for entity, fields_entity, _ in ENTITY_MAPPINGS:
|
|
141
|
+
if fields_entity is None:
|
|
142
|
+
continue
|
|
143
|
+
custom_fields_mapping[entity] = _get_pages_for_rename(
|
|
144
|
+
entity, fields_entity, pipedrive_api_key
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
yield custom_fields_mapping
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dlt.transformer(
|
|
151
|
+
name="custom_fields_mapping",
|
|
152
|
+
write_disposition="replace",
|
|
153
|
+
columns={"options": {"data_type": "json"}},
|
|
154
|
+
)
|
|
155
|
+
def parsed_mapping(
|
|
156
|
+
custom_fields_mapping: Dict[str, Any],
|
|
157
|
+
) -> Optional[Iterator[List[Dict[str, str]]]]:
|
|
158
|
+
"""
|
|
159
|
+
Parses and yields custom fields' mapping in order to be stored in destiny by dlt
|
|
160
|
+
"""
|
|
161
|
+
for endpoint, data_item_mapping in custom_fields_mapping.items():
|
|
162
|
+
yield [
|
|
163
|
+
{
|
|
164
|
+
"endpoint": endpoint,
|
|
165
|
+
"hash_string": hash_string,
|
|
166
|
+
"name": names["name"],
|
|
167
|
+
"normalized_name": names["normalized_name"],
|
|
168
|
+
"options": names["options"],
|
|
169
|
+
"field_type": names["field_type"],
|
|
170
|
+
}
|
|
171
|
+
for hash_string, names in data_item_mapping.items()
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@dlt.resource(primary_key="id", write_disposition="merge")
|
|
176
|
+
def leads(
|
|
177
|
+
pipedrive_api_key: str = dlt.secrets.value,
|
|
178
|
+
update_time: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
179
|
+
"update_time", "1970-01-01 00:00:00"
|
|
180
|
+
),
|
|
181
|
+
) -> Iterator[TDataPage]:
|
|
182
|
+
"""Resource to incrementally load pipedrive leads by update_time"""
|
|
183
|
+
# Leads inherit custom fields from deals
|
|
184
|
+
fields_mapping = (
|
|
185
|
+
dlt.current.source_state().get("custom_fields_mapping", {}).get("deals", {})
|
|
186
|
+
)
|
|
187
|
+
# Load leads pages sorted from newest to oldest and stop loading when
|
|
188
|
+
# last incremental value is reached
|
|
189
|
+
pages = get_pages(
|
|
190
|
+
"leads",
|
|
191
|
+
pipedrive_api_key,
|
|
192
|
+
extra_params={"sort": "update_time DESC"},
|
|
193
|
+
)
|
|
194
|
+
for page in pages:
|
|
195
|
+
yield rename_fields(page, fields_mapping)
|
|
196
|
+
|
|
197
|
+
if update_time.start_out_of_range:
|
|
198
|
+
return
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Pipedrive source helpers"""
|
|
2
|
+
|
|
3
|
+
from itertools import groupby
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Tuple, cast # noqa: F401
|
|
5
|
+
|
|
6
|
+
from dlt.common import pendulum # noqa: F401
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _deals_flow_group_key(item: Dict[str, Any]) -> str:
|
|
10
|
+
return item["object"] # type: ignore[no-any-return]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def group_deal_flows(
|
|
14
|
+
pages: Iterable[Iterable[Dict[str, Any]]],
|
|
15
|
+
) -> Iterable[Tuple[str, List[Dict[str, Any]]]]:
|
|
16
|
+
for page in pages:
|
|
17
|
+
for entity, items in groupby(
|
|
18
|
+
sorted(page, key=_deals_flow_group_key), key=_deals_flow_group_key
|
|
19
|
+
):
|
|
20
|
+
yield (
|
|
21
|
+
entity,
|
|
22
|
+
[dict(item["data"], timestamp=item["timestamp"]) for item in items],
|
|
23
|
+
)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional, TypedDict
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
|
|
5
|
+
from ..typing import TDataPage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TFieldMapping(TypedDict):
|
|
9
|
+
name: str
|
|
10
|
+
normalized_name: str
|
|
11
|
+
options: Optional[Dict[str, str]]
|
|
12
|
+
field_type: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def update_fields_mapping(
|
|
16
|
+
new_fields_mapping: TDataPage, existing_fields_mapping: Dict[str, Any]
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""
|
|
19
|
+
Specific function to perform data munging and push changes to custom fields' mapping stored in dlt's state
|
|
20
|
+
The endpoint must be an entity fields' endpoint
|
|
21
|
+
"""
|
|
22
|
+
for data_item in new_fields_mapping:
|
|
23
|
+
# 'edit_flag' field contains a boolean value, which is set to 'True' for custom fields and 'False' otherwise.
|
|
24
|
+
if data_item.get("edit_flag"):
|
|
25
|
+
# Regarding custom fields, 'key' field contains pipedrive's hash string representation of its name
|
|
26
|
+
# We assume that pipedrive's hash strings are meant to be an univoque representation of custom fields' name, so dlt's state shouldn't be updated while those values
|
|
27
|
+
# remain unchanged
|
|
28
|
+
existing_fields_mapping = _update_field(data_item, existing_fields_mapping)
|
|
29
|
+
# Built in enum and set fields are mapped if their options have int ids
|
|
30
|
+
# Enum fields with bool and string key options are left intact
|
|
31
|
+
elif data_item.get("field_type") in {"set", "enum"}:
|
|
32
|
+
options = data_item.get("options", [])
|
|
33
|
+
first_option = options[0]["id"] if len(options) >= 1 else None
|
|
34
|
+
if isinstance(first_option, int) and not isinstance(first_option, bool):
|
|
35
|
+
existing_fields_mapping = _update_field(
|
|
36
|
+
data_item, existing_fields_mapping
|
|
37
|
+
)
|
|
38
|
+
return existing_fields_mapping
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _update_field(
|
|
42
|
+
data_item: Dict[str, Any],
|
|
43
|
+
existing_fields_mapping: Optional[Dict[str, TFieldMapping]],
|
|
44
|
+
) -> Dict[str, TFieldMapping]:
|
|
45
|
+
"""Create or update the given field's info the custom fields state
|
|
46
|
+
If the field hash already exists in the state from previous runs the name is not updated.
|
|
47
|
+
New enum options (if any) are appended to the state.
|
|
48
|
+
"""
|
|
49
|
+
existing_fields_mapping = existing_fields_mapping or {}
|
|
50
|
+
key = data_item["key"]
|
|
51
|
+
options = data_item.get("options", [])
|
|
52
|
+
new_options_map = {str(o["id"]): o["label"] for o in options}
|
|
53
|
+
existing_field = existing_fields_mapping.get(key)
|
|
54
|
+
if not existing_field:
|
|
55
|
+
existing_fields_mapping[key] = dict(
|
|
56
|
+
name=data_item["name"],
|
|
57
|
+
normalized_name=_normalized_name(data_item["name"]),
|
|
58
|
+
options=new_options_map,
|
|
59
|
+
field_type=data_item["field_type"],
|
|
60
|
+
)
|
|
61
|
+
return existing_fields_mapping
|
|
62
|
+
existing_options = existing_field.get("options", {})
|
|
63
|
+
if not existing_options or existing_options == new_options_map:
|
|
64
|
+
existing_field["options"] = new_options_map
|
|
65
|
+
existing_field["field_type"] = data_item[
|
|
66
|
+
"field_type"
|
|
67
|
+
] # Add for backwards compat
|
|
68
|
+
return existing_fields_mapping
|
|
69
|
+
# Add new enum options to the existing options array
|
|
70
|
+
# so that when option is renamed the original label remains valid
|
|
71
|
+
new_option_keys = set(new_options_map) - set(existing_options)
|
|
72
|
+
for key in new_option_keys:
|
|
73
|
+
existing_options[key] = new_options_map[key]
|
|
74
|
+
existing_field["options"] = existing_options
|
|
75
|
+
return existing_fields_mapping
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _normalized_name(name: str) -> str:
|
|
79
|
+
source_schema = dlt.current.source_schema()
|
|
80
|
+
normalized_name = name.strip() # remove leading and trailing spaces
|
|
81
|
+
return source_schema.naming.normalize_identifier(normalized_name)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def rename_fields(data: TDataPage, fields_mapping: Dict[str, Any]) -> TDataPage:
|
|
85
|
+
if not fields_mapping:
|
|
86
|
+
return data
|
|
87
|
+
for data_item in data:
|
|
88
|
+
for hash_string, field in fields_mapping.items():
|
|
89
|
+
if hash_string not in data_item:
|
|
90
|
+
continue
|
|
91
|
+
field_value = data_item.pop(hash_string)
|
|
92
|
+
field_name = field["name"]
|
|
93
|
+
options_map = field["options"]
|
|
94
|
+
# Get label instead of ID for 'enum' and 'set' fields
|
|
95
|
+
if field_value and field["field_type"] == "set": # Multiple choice
|
|
96
|
+
field_value = [
|
|
97
|
+
options_map.get(str(enum_id), enum_id) for enum_id in field_value
|
|
98
|
+
]
|
|
99
|
+
elif field_value and field["field_type"] == "enum":
|
|
100
|
+
field_value = options_map.get(str(field_value), field_value)
|
|
101
|
+
data_item[field_name] = field_value
|
|
102
|
+
return data
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from itertools import chain
|
|
2
|
+
from typing import (
|
|
3
|
+
Any,
|
|
4
|
+
Dict,
|
|
5
|
+
Iterable,
|
|
6
|
+
Iterator,
|
|
7
|
+
List,
|
|
8
|
+
TypeVar,
|
|
9
|
+
Union,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
import dlt
|
|
13
|
+
from dlt.sources.helpers import requests
|
|
14
|
+
|
|
15
|
+
from ..typing import TDataPage
|
|
16
|
+
from .custom_fields_munger import rename_fields
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_pages(
|
|
20
|
+
entity: str, pipedrive_api_key: str, extra_params: Dict[str, Any] = None
|
|
21
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
22
|
+
"""
|
|
23
|
+
Generic method to retrieve endpoint data based on the required headers and params.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
entity: the endpoint you want to call
|
|
27
|
+
pipedrive_api_key:
|
|
28
|
+
extra_params: any needed request params except pagination.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
headers = {"Content-Type": "application/json"}
|
|
34
|
+
params = {"api_token": pipedrive_api_key}
|
|
35
|
+
if extra_params:
|
|
36
|
+
params.update(extra_params)
|
|
37
|
+
url = f"https://app.pipedrive.com/v1/{entity}"
|
|
38
|
+
yield from _paginated_get(url, headers=headers, params=params)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_recent_items_incremental(
|
|
42
|
+
entity: str,
|
|
43
|
+
pipedrive_api_key: str,
|
|
44
|
+
since_timestamp: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
45
|
+
"update_time|modified", "1970-01-01 00:00:00"
|
|
46
|
+
),
|
|
47
|
+
) -> Iterator[TDataPage]:
|
|
48
|
+
"""Get a specific entity type from /recents with incremental state."""
|
|
49
|
+
yield from _get_recent_pages(entity, pipedrive_api_key, since_timestamp.last_value)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _paginated_get(
|
|
53
|
+
url: str, headers: Dict[str, Any], params: Dict[str, Any]
|
|
54
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
55
|
+
"""
|
|
56
|
+
Requests and yields data 500 records at a time
|
|
57
|
+
Documentation: https://pipedrive.readme.io/docs/core-api-concepts-pagination
|
|
58
|
+
"""
|
|
59
|
+
# pagination start and page limit
|
|
60
|
+
params["start"] = 0
|
|
61
|
+
params["limit"] = 500
|
|
62
|
+
while True:
|
|
63
|
+
page = requests.get(url, headers=headers, params=params).json()
|
|
64
|
+
# yield data only
|
|
65
|
+
data = page["data"]
|
|
66
|
+
if data:
|
|
67
|
+
yield data
|
|
68
|
+
# check if next page exists
|
|
69
|
+
pagination_info = page.get("additional_data", {}).get("pagination", {})
|
|
70
|
+
# is_next_page is set to True or False
|
|
71
|
+
if not pagination_info.get("more_items_in_collection", False):
|
|
72
|
+
break
|
|
73
|
+
params["start"] = pagination_info.get("next_start")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
T = TypeVar("T")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _extract_recents_data(data: Iterable[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
80
|
+
"""Results from recents endpoint contain `data` key which is either a single entity or list of entities
|
|
81
|
+
|
|
82
|
+
This returns a flat list of entities from an iterable of recent results
|
|
83
|
+
"""
|
|
84
|
+
return [
|
|
85
|
+
data_item
|
|
86
|
+
for data_item in chain.from_iterable(
|
|
87
|
+
(_list_wrapped(item["data"]) for item in data)
|
|
88
|
+
)
|
|
89
|
+
if data_item is not None
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _list_wrapped(item: Union[List[T], T]) -> List[T]:
|
|
94
|
+
if isinstance(item, list):
|
|
95
|
+
return item
|
|
96
|
+
return [item]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_recent_pages(
|
|
100
|
+
entity: str, pipedrive_api_key: str, since_timestamp: str
|
|
101
|
+
) -> Iterator[TDataPage]:
|
|
102
|
+
custom_fields_mapping = (
|
|
103
|
+
dlt.current.source_state().get("custom_fields_mapping", {}).get(entity, {})
|
|
104
|
+
)
|
|
105
|
+
pages = get_pages(
|
|
106
|
+
"recents",
|
|
107
|
+
pipedrive_api_key,
|
|
108
|
+
extra_params=dict(since_timestamp=since_timestamp, items=entity),
|
|
109
|
+
)
|
|
110
|
+
pages = (_extract_recents_data(page) for page in pages)
|
|
111
|
+
for page in pages:
|
|
112
|
+
yield rename_fields(page, custom_fields_mapping)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
__source_name__ = "pipedrive"
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Pipedrive source settings and constants"""
|
|
2
|
+
|
|
3
|
+
ENTITY_MAPPINGS = [
|
|
4
|
+
("activity", "activityFields", {"user_id": 0}),
|
|
5
|
+
("organization", "organizationFields", None),
|
|
6
|
+
("person", "personFields", None),
|
|
7
|
+
("product", "productFields", None),
|
|
8
|
+
("deal", "dealFields", None),
|
|
9
|
+
("pipeline", None, None),
|
|
10
|
+
("stage", None, None),
|
|
11
|
+
("user", None, None),
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
RECENTS_ENTITIES = {
|
|
15
|
+
"activity": "activities",
|
|
16
|
+
"activityType": "activity_types",
|
|
17
|
+
"deal": "deals",
|
|
18
|
+
"file": "files",
|
|
19
|
+
"filter": "filters",
|
|
20
|
+
"note": "notes",
|
|
21
|
+
"person": "persons",
|
|
22
|
+
"organization": "organizations",
|
|
23
|
+
"pipeline": "pipelines",
|
|
24
|
+
"product": "products",
|
|
25
|
+
"stage": "stages",
|
|
26
|
+
"user": "users",
|
|
27
|
+
}
|