ingestr 0.13.13__py3-none-any.whl → 0.14.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestr/conftest.py +72 -0
- ingestr/main.py +134 -87
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/adjust/adjust_helpers.py +7 -3
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin_max/__init__.py +6 -4
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -45
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/arrow/__init__.py +9 -1
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/attio/__init__.py +102 -0
- ingestr/src/attio/helpers.py +65 -0
- ingestr/src/blob.py +37 -10
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/clickup/__init__.py +85 -0
- ingestr/src/clickup/helpers.py +47 -0
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +508 -27
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/__init__.py +80 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +47 -28
- ingestr/src/facebook_ads/helpers.py +59 -37
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +107 -2
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +46 -3
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -0
- ingestr/src/frankfurter/helpers.py +48 -0
- ingestr/src/freshdesk/__init__.py +89 -0
- ingestr/src/freshdesk/freshdesk_client.py +137 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/fundraiseup/__init__.py +95 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +41 -6
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_analytics/__init__.py +22 -4
- ingestr/src/google_analytics/helpers.py +124 -6
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/http_client.py +24 -0
- ingestr/src/hubspot/__init__.py +66 -23
- ingestr/src/hubspot/helpers.py +52 -22
- ingestr/src/hubspot/settings.py +14 -7
- ingestr/src/influxdb/__init__.py +46 -0
- ingestr/src/influxdb/client.py +34 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/isoc_pulse/__init__.py +159 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/kafka/__init__.py +4 -1
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +82 -0
- ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
- ingestr/src/linear/__init__.py +634 -0
- ingestr/src/linear/helpers.py +111 -0
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/mixpanel/__init__.py +62 -0
- ingestr/src/mixpanel/client.py +99 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +72 -8
- ingestr/src/mongodb/helpers.py +915 -38
- ingestr/src/partition.py +32 -0
- ingestr/src/phantombuster/__init__.py +65 -0
- ingestr/src/phantombuster/client.py +87 -0
- ingestr/src/pinterest/__init__.py +82 -0
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/quickbooks/__init__.py +117 -0
- ingestr/src/resource.py +40 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +15 -8
- ingestr/src/shopify/__init__.py +1 -17
- ingestr/src/smartsheets/__init__.py +82 -0
- ingestr/src/snapchat_ads/__init__.py +489 -0
- ingestr/src/snapchat_ads/client.py +72 -0
- ingestr/src/snapchat_ads/helpers.py +535 -0
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/solidgate/__init__.py +219 -0
- ingestr/src/solidgate/helpers.py +154 -0
- ingestr/src/sources.py +2933 -245
- ingestr/src/stripe_analytics/__init__.py +49 -21
- ingestr/src/stripe_analytics/helpers.py +286 -1
- ingestr/src/stripe_analytics/settings.py +62 -10
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/__init__.py +12 -6
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/src/zoom/__init__.py +99 -0
- ingestr/src/zoom/helpers.py +102 -0
- ingestr/tests/unit/test_smartsheets.py +133 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/METADATA +229 -19
- ingestr-0.14.104.dist-info/RECORD +203 -0
- ingestr/src/appsflyer/_init_.py +0 -24
- ingestr-0.13.13.dist-info/RECORD +0 -115
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/factory.py
CHANGED
|
@@ -7,58 +7,108 @@ from ingestr.src.destinations import (
|
|
|
7
7
|
AthenaDestination,
|
|
8
8
|
BigQueryDestination,
|
|
9
9
|
ClickhouseDestination,
|
|
10
|
+
CrateDBDestination,
|
|
10
11
|
CsvDestination,
|
|
11
12
|
DatabricksDestination,
|
|
12
13
|
DuckDBDestination,
|
|
14
|
+
ElasticsearchDestination,
|
|
15
|
+
GCSDestination,
|
|
16
|
+
MongoDBDestination,
|
|
17
|
+
MotherduckDestination,
|
|
13
18
|
MsSQLDestination,
|
|
19
|
+
MySqlDestination,
|
|
14
20
|
PostgresDestination,
|
|
15
21
|
RedshiftDestination,
|
|
22
|
+
S3Destination,
|
|
16
23
|
SnowflakeDestination,
|
|
24
|
+
SqliteDestination,
|
|
17
25
|
SynapseDestination,
|
|
26
|
+
TrinoDestination,
|
|
18
27
|
)
|
|
19
28
|
from ingestr.src.sources import (
|
|
20
29
|
AdjustSource,
|
|
21
30
|
AirtableSource,
|
|
31
|
+
AlliumSource,
|
|
32
|
+
AnthropicSource,
|
|
22
33
|
AppleAppStoreSource,
|
|
23
34
|
ApplovinMaxSource,
|
|
24
35
|
AppLovinSource,
|
|
25
36
|
AppsflyerSource,
|
|
26
37
|
ArrowMemoryMappedSource,
|
|
27
38
|
AsanaSource,
|
|
39
|
+
AttioSource,
|
|
28
40
|
ChessSource,
|
|
41
|
+
ClickupSource,
|
|
42
|
+
CouchbaseSource,
|
|
43
|
+
CursorSource,
|
|
44
|
+
DoceboSource,
|
|
29
45
|
DynamoDBSource,
|
|
46
|
+
ElasticsearchSource,
|
|
30
47
|
FacebookAdsSource,
|
|
48
|
+
FluxxSource,
|
|
49
|
+
FrankfurterSource,
|
|
50
|
+
FreshdeskSource,
|
|
51
|
+
FundraiseupSource,
|
|
31
52
|
GCSSource,
|
|
32
53
|
GitHubSource,
|
|
33
54
|
GoogleAdsSource,
|
|
34
55
|
GoogleAnalyticsSource,
|
|
35
56
|
GoogleSheetsSource,
|
|
36
57
|
GorgiasSource,
|
|
58
|
+
HostawaySource,
|
|
59
|
+
HttpSource,
|
|
37
60
|
HubspotSource,
|
|
61
|
+
InfluxDBSource,
|
|
62
|
+
IntercomSource,
|
|
63
|
+
IsocPulseSource,
|
|
64
|
+
JiraSource,
|
|
38
65
|
KafkaSource,
|
|
66
|
+
KinesisSource,
|
|
39
67
|
KlaviyoSource,
|
|
68
|
+
LinearSource,
|
|
40
69
|
LinkedInAdsSource,
|
|
41
70
|
LocalCsvSource,
|
|
71
|
+
MailchimpSource,
|
|
72
|
+
MixpanelSource,
|
|
73
|
+
MondaySource,
|
|
42
74
|
MongoDbSource,
|
|
43
75
|
NotionSource,
|
|
44
76
|
PersonioSource,
|
|
77
|
+
PhantombusterSource,
|
|
78
|
+
PinterestSource,
|
|
79
|
+
PipedriveSource,
|
|
80
|
+
PlusVibeAISource,
|
|
81
|
+
QuickBooksSource,
|
|
82
|
+
RevenueCatSource,
|
|
45
83
|
S3Source,
|
|
46
84
|
SalesforceSource,
|
|
85
|
+
SFTPSource,
|
|
47
86
|
ShopifySource,
|
|
48
87
|
SlackSource,
|
|
88
|
+
SmartsheetSource,
|
|
89
|
+
SnapchatAdsSource,
|
|
90
|
+
SocrataSource,
|
|
91
|
+
SolidgateSource,
|
|
49
92
|
SqlSource,
|
|
50
93
|
StripeAnalyticsSource,
|
|
51
94
|
TikTokSource,
|
|
95
|
+
TrustpilotSource,
|
|
96
|
+
WiseSource,
|
|
52
97
|
ZendeskSource,
|
|
98
|
+
ZoomSource,
|
|
53
99
|
)
|
|
54
100
|
|
|
55
101
|
SQL_SOURCE_SCHEMES = [
|
|
56
102
|
"bigquery",
|
|
103
|
+
"crate",
|
|
57
104
|
"duckdb",
|
|
58
105
|
"mssql",
|
|
106
|
+
"mssql+pyodbc",
|
|
59
107
|
"mysql",
|
|
60
108
|
"mysql+pymysql",
|
|
61
109
|
"mysql+mysqlconnector",
|
|
110
|
+
"md",
|
|
111
|
+
"motherduck",
|
|
62
112
|
"postgres",
|
|
63
113
|
"postgresql",
|
|
64
114
|
"postgresql+psycopg2",
|
|
@@ -68,8 +118,13 @@ SQL_SOURCE_SCHEMES = [
|
|
|
68
118
|
"sqlite",
|
|
69
119
|
"oracle",
|
|
70
120
|
"oracle+cx_oracle",
|
|
121
|
+
"oracle+oracledb",
|
|
71
122
|
"hana",
|
|
72
123
|
"clickhouse",
|
|
124
|
+
"databricks",
|
|
125
|
+
"db2",
|
|
126
|
+
"spanner",
|
|
127
|
+
"trino",
|
|
73
128
|
]
|
|
74
129
|
|
|
75
130
|
|
|
@@ -108,8 +163,16 @@ class SourceDestinationFactory:
|
|
|
108
163
|
source_scheme: str
|
|
109
164
|
destination_scheme: str
|
|
110
165
|
sources: Dict[str, Type[SourceProtocol]] = {
|
|
166
|
+
"allium": AlliumSource,
|
|
167
|
+
"anthropic": AnthropicSource,
|
|
111
168
|
"csv": LocalCsvSource,
|
|
169
|
+
"couchbase": CouchbaseSource,
|
|
170
|
+
"cursor": CursorSource,
|
|
171
|
+
"docebo": DoceboSource,
|
|
172
|
+
"http": HttpSource,
|
|
173
|
+
"https": HttpSource,
|
|
112
174
|
"mongodb": MongoDbSource,
|
|
175
|
+
"mongodb+srv": MongoDbSource,
|
|
113
176
|
"notion": NotionSource,
|
|
114
177
|
"gsheets": GoogleSheetsSource,
|
|
115
178
|
"shopify": ShopifySource,
|
|
@@ -118,10 +181,15 @@ class SourceDestinationFactory:
|
|
|
118
181
|
"chess": ChessSource,
|
|
119
182
|
"stripe": StripeAnalyticsSource,
|
|
120
183
|
"facebookads": FacebookAdsSource,
|
|
184
|
+
"fluxx": FluxxSource,
|
|
121
185
|
"slack": SlackSource,
|
|
186
|
+
"hostaway": HostawaySource,
|
|
122
187
|
"hubspot": HubspotSource,
|
|
188
|
+
"intercom": IntercomSource,
|
|
189
|
+
"jira": JiraSource,
|
|
123
190
|
"airtable": AirtableSource,
|
|
124
191
|
"klaviyo": KlaviyoSource,
|
|
192
|
+
"mixpanel": MixpanelSource,
|
|
125
193
|
"appsflyer": AppsflyerSource,
|
|
126
194
|
"kafka": KafkaSource,
|
|
127
195
|
"adjust": AdjustSource,
|
|
@@ -136,15 +204,44 @@ class SourceDestinationFactory:
|
|
|
136
204
|
"appstore": AppleAppStoreSource,
|
|
137
205
|
"gs": GCSSource,
|
|
138
206
|
"linkedinads": LinkedInAdsSource,
|
|
207
|
+
"linear": LinearSource,
|
|
139
208
|
"applovin": AppLovinSource,
|
|
140
209
|
"applovinmax": ApplovinMaxSource,
|
|
141
210
|
"salesforce": SalesforceSource,
|
|
142
211
|
"personio": PersonioSource,
|
|
212
|
+
"kinesis": KinesisSource,
|
|
213
|
+
"pipedrive": PipedriveSource,
|
|
214
|
+
"frankfurter": FrankfurterSource,
|
|
215
|
+
"freshdesk": FreshdeskSource,
|
|
216
|
+
"fundraiseup": FundraiseupSource,
|
|
217
|
+
"trustpilot": TrustpilotSource,
|
|
218
|
+
"phantombuster": PhantombusterSource,
|
|
219
|
+
"elasticsearch": ElasticsearchSource,
|
|
220
|
+
"attio": AttioSource,
|
|
221
|
+
"solidgate": SolidgateSource,
|
|
222
|
+
"quickbooks": QuickBooksSource,
|
|
223
|
+
"isoc-pulse": IsocPulseSource,
|
|
224
|
+
"smartsheet": SmartsheetSource,
|
|
225
|
+
"sftp": SFTPSource,
|
|
226
|
+
"pinterest": PinterestSource,
|
|
227
|
+
"revenuecat": RevenueCatSource,
|
|
228
|
+
"socrata": SocrataSource,
|
|
229
|
+
"snapchatads": SnapchatAdsSource,
|
|
230
|
+
"zoom": ZoomSource,
|
|
231
|
+
"clickup": ClickupSource,
|
|
232
|
+
"influxdb": InfluxDBSource,
|
|
233
|
+
"wise": WiseSource,
|
|
234
|
+
"plusvibeai": PlusVibeAISource,
|
|
235
|
+
"monday": MondaySource,
|
|
236
|
+
"mailchimp": MailchimpSource,
|
|
143
237
|
}
|
|
144
238
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
145
239
|
"bigquery": BigQueryDestination,
|
|
240
|
+
"cratedb": CrateDBDestination,
|
|
146
241
|
"databricks": DatabricksDestination,
|
|
147
242
|
"duckdb": DuckDBDestination,
|
|
243
|
+
"motherduck": MotherduckDestination,
|
|
244
|
+
"md": MotherduckDestination,
|
|
148
245
|
"mssql": MsSQLDestination,
|
|
149
246
|
"postgres": PostgresDestination,
|
|
150
247
|
"postgresql": PostgresDestination,
|
|
@@ -158,12 +255,20 @@ class SourceDestinationFactory:
|
|
|
158
255
|
"athena": AthenaDestination,
|
|
159
256
|
"clickhouse+native": ClickhouseDestination,
|
|
160
257
|
"clickhouse": ClickhouseDestination,
|
|
258
|
+
"elasticsearch": ElasticsearchDestination,
|
|
259
|
+
"mongodb": MongoDBDestination,
|
|
260
|
+
"mongodb+srv": MongoDBDestination,
|
|
261
|
+
"s3": S3Destination,
|
|
262
|
+
"gs": GCSDestination,
|
|
263
|
+
"sqlite": SqliteDestination,
|
|
264
|
+
"mysql": MySqlDestination,
|
|
265
|
+
"mysql+pymysql": MySqlDestination,
|
|
266
|
+
"trino": TrinoDestination,
|
|
161
267
|
}
|
|
162
268
|
|
|
163
269
|
def __init__(self, source_uri: str, destination_uri: str):
|
|
164
270
|
self.source_uri = source_uri
|
|
165
|
-
|
|
166
|
-
self.source_scheme = source_fields.scheme
|
|
271
|
+
self.source_scheme = parse_scheme_from_uri(source_uri)
|
|
167
272
|
|
|
168
273
|
self.destination_uri = destination_uri
|
|
169
274
|
self.destination_scheme = parse_scheme_from_uri(destination_uri)
|
|
@@ -37,9 +37,14 @@ def readers(
|
|
|
37
37
|
file_glob (str, optional): The filter to apply to the files in glob format. by default lists all files in bucket_url non-recursively
|
|
38
38
|
"""
|
|
39
39
|
filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
|
|
41
|
+
# NOTE: incremental support is disabled until we can figure out
|
|
42
|
+
# how to support incremental loads per matching file, rather
|
|
43
|
+
# than a blanket threshold.
|
|
44
|
+
#
|
|
45
|
+
# filesystem_resource.apply_hints(
|
|
46
|
+
# incremental=dlt.sources.incremental("modification_date"),
|
|
47
|
+
# )
|
|
43
48
|
return (
|
|
44
49
|
filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
|
|
45
50
|
filesystem_resource | dlt.transformer(name="read_jsonl")(_read_jsonl),
|
ingestr/src/filters.py
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
from dlt.common.libs.sql_alchemy import Table
|
|
2
|
-
|
|
3
|
-
|
|
4
1
|
def cast_set_to_list(row):
|
|
5
2
|
# this handles just the sqlalchemy backend for now
|
|
6
3
|
if isinstance(row, dict):
|
|
@@ -10,10 +7,56 @@ def cast_set_to_list(row):
|
|
|
10
7
|
return row
|
|
11
8
|
|
|
12
9
|
|
|
10
|
+
def cast_spanner_types(row):
|
|
11
|
+
if not isinstance(row, dict):
|
|
12
|
+
return row
|
|
13
|
+
|
|
14
|
+
from google.cloud.spanner_v1.data_types import JsonObject
|
|
15
|
+
|
|
16
|
+
for key in row.keys():
|
|
17
|
+
if isinstance(row[key], JsonObject):
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
row[key] = json.loads(row[key].serialize())
|
|
21
|
+
return row
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def handle_mysql_empty_dates(row):
|
|
25
|
+
# MySQL returns empty dates as 0000-00-00, which is not a valid date, we handle them here.
|
|
26
|
+
if not isinstance(row, dict):
|
|
27
|
+
return row
|
|
28
|
+
|
|
29
|
+
for key in row.keys():
|
|
30
|
+
if not isinstance(row[key], str):
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
if row[key] == "0000-00-00":
|
|
34
|
+
from datetime import date
|
|
35
|
+
|
|
36
|
+
row[key] = date(1970, 1, 1)
|
|
37
|
+
|
|
38
|
+
elif row[key] == "0000-00-00 00:00:00":
|
|
39
|
+
from datetime import datetime
|
|
40
|
+
|
|
41
|
+
row[key] = datetime(1970, 1, 1, 0, 0, 0)
|
|
42
|
+
return row
|
|
43
|
+
|
|
44
|
+
|
|
13
45
|
def table_adapter_exclude_columns(cols: list[str]):
|
|
46
|
+
from dlt.common.libs.sql_alchemy import Table
|
|
47
|
+
|
|
14
48
|
def excluder(table: Table):
|
|
15
49
|
cols_to_remove = [col for col in table._columns if col.name in cols] # type: ignore
|
|
16
50
|
for col in cols_to_remove:
|
|
17
51
|
table._columns.remove(col) # type: ignore
|
|
18
52
|
|
|
19
53
|
return excluder
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def create_masking_filter(mask_configs: list[str]):
|
|
57
|
+
from ingestr.src.masking import create_masking_mapper
|
|
58
|
+
|
|
59
|
+
if not mask_configs:
|
|
60
|
+
return lambda x: x
|
|
61
|
+
|
|
62
|
+
return create_masking_mapper(mask_configs)
|