ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestr/conftest.py +72 -0
- ingestr/main.py +134 -87
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/adjust/adjust_helpers.py +7 -3
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin/__init__.py +262 -0
- ingestr/src/applovin_max/__init__.py +117 -0
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -45
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/arrow/__init__.py +9 -1
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/attio/__init__.py +102 -0
- ingestr/src/attio/helpers.py +65 -0
- ingestr/src/blob.py +38 -11
- ingestr/src/buildinfo.py +1 -0
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/clickup/__init__.py +85 -0
- ingestr/src/clickup/helpers.py +47 -0
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +520 -33
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/__init__.py +80 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +47 -28
- ingestr/src/facebook_ads/helpers.py +59 -37
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +116 -2
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +46 -3
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -0
- ingestr/src/frankfurter/helpers.py +48 -0
- ingestr/src/freshdesk/__init__.py +89 -0
- ingestr/src/freshdesk/freshdesk_client.py +137 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/fundraiseup/__init__.py +95 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +41 -6
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_analytics/__init__.py +22 -4
- ingestr/src/google_analytics/helpers.py +124 -6
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/http_client.py +24 -0
- ingestr/src/hubspot/__init__.py +66 -23
- ingestr/src/hubspot/helpers.py +52 -22
- ingestr/src/hubspot/settings.py +14 -7
- ingestr/src/influxdb/__init__.py +46 -0
- ingestr/src/influxdb/client.py +34 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/isoc_pulse/__init__.py +159 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/kafka/__init__.py +4 -1
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +82 -0
- ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
- ingestr/src/linear/__init__.py +634 -0
- ingestr/src/linear/helpers.py +111 -0
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/loader.py +69 -0
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/mixpanel/__init__.py +62 -0
- ingestr/src/mixpanel/client.py +99 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +72 -8
- ingestr/src/mongodb/helpers.py +915 -38
- ingestr/src/partition.py +32 -0
- ingestr/src/personio/__init__.py +331 -0
- ingestr/src/personio/helpers.py +86 -0
- ingestr/src/phantombuster/__init__.py +65 -0
- ingestr/src/phantombuster/client.py +87 -0
- ingestr/src/pinterest/__init__.py +82 -0
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/quickbooks/__init__.py +117 -0
- ingestr/src/resource.py +40 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +156 -0
- ingestr/src/salesforce/helpers.py +64 -0
- ingestr/src/shopify/__init__.py +1 -17
- ingestr/src/smartsheets/__init__.py +82 -0
- ingestr/src/snapchat_ads/__init__.py +489 -0
- ingestr/src/snapchat_ads/client.py +72 -0
- ingestr/src/snapchat_ads/helpers.py +535 -0
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/solidgate/__init__.py +219 -0
- ingestr/src/solidgate/helpers.py +154 -0
- ingestr/src/sources.py +3132 -212
- ingestr/src/stripe_analytics/__init__.py +49 -21
- ingestr/src/stripe_analytics/helpers.py +286 -1
- ingestr/src/stripe_analytics/settings.py +62 -10
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/__init__.py +12 -6
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- ingestr/src/version.py +6 -1
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/src/zoom/__init__.py +99 -0
- ingestr/src/zoom/helpers.py +102 -0
- ingestr/tests/unit/test_smartsheets.py +133 -0
- ingestr-0.14.104.dist-info/METADATA +563 -0
- ingestr-0.14.104.dist-info/RECORD +203 -0
- ingestr/src/appsflyer/_init_.py +0 -24
- ingestr-0.13.2.dist-info/METADATA +0 -302
- ingestr-0.13.2.dist-info/RECORD +0 -107
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/factory.py
CHANGED
|
@@ -7,54 +7,108 @@ from ingestr.src.destinations import (
|
|
|
7
7
|
AthenaDestination,
|
|
8
8
|
BigQueryDestination,
|
|
9
9
|
ClickhouseDestination,
|
|
10
|
+
CrateDBDestination,
|
|
10
11
|
CsvDestination,
|
|
11
12
|
DatabricksDestination,
|
|
12
13
|
DuckDBDestination,
|
|
14
|
+
ElasticsearchDestination,
|
|
15
|
+
GCSDestination,
|
|
16
|
+
MongoDBDestination,
|
|
17
|
+
MotherduckDestination,
|
|
13
18
|
MsSQLDestination,
|
|
19
|
+
MySqlDestination,
|
|
14
20
|
PostgresDestination,
|
|
15
21
|
RedshiftDestination,
|
|
22
|
+
S3Destination,
|
|
16
23
|
SnowflakeDestination,
|
|
24
|
+
SqliteDestination,
|
|
17
25
|
SynapseDestination,
|
|
26
|
+
TrinoDestination,
|
|
18
27
|
)
|
|
19
28
|
from ingestr.src.sources import (
|
|
20
29
|
AdjustSource,
|
|
21
30
|
AirtableSource,
|
|
31
|
+
AlliumSource,
|
|
32
|
+
AnthropicSource,
|
|
22
33
|
AppleAppStoreSource,
|
|
34
|
+
ApplovinMaxSource,
|
|
35
|
+
AppLovinSource,
|
|
23
36
|
AppsflyerSource,
|
|
24
37
|
ArrowMemoryMappedSource,
|
|
25
38
|
AsanaSource,
|
|
39
|
+
AttioSource,
|
|
26
40
|
ChessSource,
|
|
41
|
+
ClickupSource,
|
|
42
|
+
CouchbaseSource,
|
|
43
|
+
CursorSource,
|
|
44
|
+
DoceboSource,
|
|
27
45
|
DynamoDBSource,
|
|
46
|
+
ElasticsearchSource,
|
|
28
47
|
FacebookAdsSource,
|
|
48
|
+
FluxxSource,
|
|
49
|
+
FrankfurterSource,
|
|
50
|
+
FreshdeskSource,
|
|
51
|
+
FundraiseupSource,
|
|
29
52
|
GCSSource,
|
|
30
53
|
GitHubSource,
|
|
31
54
|
GoogleAdsSource,
|
|
32
55
|
GoogleAnalyticsSource,
|
|
33
56
|
GoogleSheetsSource,
|
|
34
57
|
GorgiasSource,
|
|
58
|
+
HostawaySource,
|
|
59
|
+
HttpSource,
|
|
35
60
|
HubspotSource,
|
|
61
|
+
InfluxDBSource,
|
|
62
|
+
IntercomSource,
|
|
63
|
+
IsocPulseSource,
|
|
64
|
+
JiraSource,
|
|
36
65
|
KafkaSource,
|
|
66
|
+
KinesisSource,
|
|
37
67
|
KlaviyoSource,
|
|
68
|
+
LinearSource,
|
|
38
69
|
LinkedInAdsSource,
|
|
39
70
|
LocalCsvSource,
|
|
71
|
+
MailchimpSource,
|
|
72
|
+
MixpanelSource,
|
|
73
|
+
MondaySource,
|
|
40
74
|
MongoDbSource,
|
|
41
75
|
NotionSource,
|
|
76
|
+
PersonioSource,
|
|
77
|
+
PhantombusterSource,
|
|
78
|
+
PinterestSource,
|
|
79
|
+
PipedriveSource,
|
|
80
|
+
PlusVibeAISource,
|
|
81
|
+
QuickBooksSource,
|
|
82
|
+
RevenueCatSource,
|
|
42
83
|
S3Source,
|
|
84
|
+
SalesforceSource,
|
|
85
|
+
SFTPSource,
|
|
43
86
|
ShopifySource,
|
|
44
87
|
SlackSource,
|
|
88
|
+
SmartsheetSource,
|
|
89
|
+
SnapchatAdsSource,
|
|
90
|
+
SocrataSource,
|
|
91
|
+
SolidgateSource,
|
|
45
92
|
SqlSource,
|
|
46
93
|
StripeAnalyticsSource,
|
|
47
94
|
TikTokSource,
|
|
95
|
+
TrustpilotSource,
|
|
96
|
+
WiseSource,
|
|
48
97
|
ZendeskSource,
|
|
98
|
+
ZoomSource,
|
|
49
99
|
)
|
|
50
100
|
|
|
51
101
|
SQL_SOURCE_SCHEMES = [
|
|
52
102
|
"bigquery",
|
|
103
|
+
"crate",
|
|
53
104
|
"duckdb",
|
|
54
105
|
"mssql",
|
|
106
|
+
"mssql+pyodbc",
|
|
55
107
|
"mysql",
|
|
56
108
|
"mysql+pymysql",
|
|
57
109
|
"mysql+mysqlconnector",
|
|
110
|
+
"md",
|
|
111
|
+
"motherduck",
|
|
58
112
|
"postgres",
|
|
59
113
|
"postgresql",
|
|
60
114
|
"postgresql+psycopg2",
|
|
@@ -64,7 +118,13 @@ SQL_SOURCE_SCHEMES = [
|
|
|
64
118
|
"sqlite",
|
|
65
119
|
"oracle",
|
|
66
120
|
"oracle+cx_oracle",
|
|
121
|
+
"oracle+oracledb",
|
|
67
122
|
"hana",
|
|
123
|
+
"clickhouse",
|
|
124
|
+
"databricks",
|
|
125
|
+
"db2",
|
|
126
|
+
"spanner",
|
|
127
|
+
"trino",
|
|
68
128
|
]
|
|
69
129
|
|
|
70
130
|
|
|
@@ -103,8 +163,16 @@ class SourceDestinationFactory:
|
|
|
103
163
|
source_scheme: str
|
|
104
164
|
destination_scheme: str
|
|
105
165
|
sources: Dict[str, Type[SourceProtocol]] = {
|
|
166
|
+
"allium": AlliumSource,
|
|
167
|
+
"anthropic": AnthropicSource,
|
|
106
168
|
"csv": LocalCsvSource,
|
|
169
|
+
"couchbase": CouchbaseSource,
|
|
170
|
+
"cursor": CursorSource,
|
|
171
|
+
"docebo": DoceboSource,
|
|
172
|
+
"http": HttpSource,
|
|
173
|
+
"https": HttpSource,
|
|
107
174
|
"mongodb": MongoDbSource,
|
|
175
|
+
"mongodb+srv": MongoDbSource,
|
|
108
176
|
"notion": NotionSource,
|
|
109
177
|
"gsheets": GoogleSheetsSource,
|
|
110
178
|
"shopify": ShopifySource,
|
|
@@ -113,10 +181,15 @@ class SourceDestinationFactory:
|
|
|
113
181
|
"chess": ChessSource,
|
|
114
182
|
"stripe": StripeAnalyticsSource,
|
|
115
183
|
"facebookads": FacebookAdsSource,
|
|
184
|
+
"fluxx": FluxxSource,
|
|
116
185
|
"slack": SlackSource,
|
|
186
|
+
"hostaway": HostawaySource,
|
|
117
187
|
"hubspot": HubspotSource,
|
|
188
|
+
"intercom": IntercomSource,
|
|
189
|
+
"jira": JiraSource,
|
|
118
190
|
"airtable": AirtableSource,
|
|
119
191
|
"klaviyo": KlaviyoSource,
|
|
192
|
+
"mixpanel": MixpanelSource,
|
|
120
193
|
"appsflyer": AppsflyerSource,
|
|
121
194
|
"kafka": KafkaSource,
|
|
122
195
|
"adjust": AdjustSource,
|
|
@@ -131,11 +204,44 @@ class SourceDestinationFactory:
|
|
|
131
204
|
"appstore": AppleAppStoreSource,
|
|
132
205
|
"gs": GCSSource,
|
|
133
206
|
"linkedinads": LinkedInAdsSource,
|
|
207
|
+
"linear": LinearSource,
|
|
208
|
+
"applovin": AppLovinSource,
|
|
209
|
+
"applovinmax": ApplovinMaxSource,
|
|
210
|
+
"salesforce": SalesforceSource,
|
|
211
|
+
"personio": PersonioSource,
|
|
212
|
+
"kinesis": KinesisSource,
|
|
213
|
+
"pipedrive": PipedriveSource,
|
|
214
|
+
"frankfurter": FrankfurterSource,
|
|
215
|
+
"freshdesk": FreshdeskSource,
|
|
216
|
+
"fundraiseup": FundraiseupSource,
|
|
217
|
+
"trustpilot": TrustpilotSource,
|
|
218
|
+
"phantombuster": PhantombusterSource,
|
|
219
|
+
"elasticsearch": ElasticsearchSource,
|
|
220
|
+
"attio": AttioSource,
|
|
221
|
+
"solidgate": SolidgateSource,
|
|
222
|
+
"quickbooks": QuickBooksSource,
|
|
223
|
+
"isoc-pulse": IsocPulseSource,
|
|
224
|
+
"smartsheet": SmartsheetSource,
|
|
225
|
+
"sftp": SFTPSource,
|
|
226
|
+
"pinterest": PinterestSource,
|
|
227
|
+
"revenuecat": RevenueCatSource,
|
|
228
|
+
"socrata": SocrataSource,
|
|
229
|
+
"snapchatads": SnapchatAdsSource,
|
|
230
|
+
"zoom": ZoomSource,
|
|
231
|
+
"clickup": ClickupSource,
|
|
232
|
+
"influxdb": InfluxDBSource,
|
|
233
|
+
"wise": WiseSource,
|
|
234
|
+
"plusvibeai": PlusVibeAISource,
|
|
235
|
+
"monday": MondaySource,
|
|
236
|
+
"mailchimp": MailchimpSource,
|
|
134
237
|
}
|
|
135
238
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
136
239
|
"bigquery": BigQueryDestination,
|
|
240
|
+
"cratedb": CrateDBDestination,
|
|
137
241
|
"databricks": DatabricksDestination,
|
|
138
242
|
"duckdb": DuckDBDestination,
|
|
243
|
+
"motherduck": MotherduckDestination,
|
|
244
|
+
"md": MotherduckDestination,
|
|
139
245
|
"mssql": MsSQLDestination,
|
|
140
246
|
"postgres": PostgresDestination,
|
|
141
247
|
"postgresql": PostgresDestination,
|
|
@@ -149,12 +255,20 @@ class SourceDestinationFactory:
|
|
|
149
255
|
"athena": AthenaDestination,
|
|
150
256
|
"clickhouse+native": ClickhouseDestination,
|
|
151
257
|
"clickhouse": ClickhouseDestination,
|
|
258
|
+
"elasticsearch": ElasticsearchDestination,
|
|
259
|
+
"mongodb": MongoDBDestination,
|
|
260
|
+
"mongodb+srv": MongoDBDestination,
|
|
261
|
+
"s3": S3Destination,
|
|
262
|
+
"gs": GCSDestination,
|
|
263
|
+
"sqlite": SqliteDestination,
|
|
264
|
+
"mysql": MySqlDestination,
|
|
265
|
+
"mysql+pymysql": MySqlDestination,
|
|
266
|
+
"trino": TrinoDestination,
|
|
152
267
|
}
|
|
153
268
|
|
|
154
269
|
def __init__(self, source_uri: str, destination_uri: str):
|
|
155
270
|
self.source_uri = source_uri
|
|
156
|
-
|
|
157
|
-
self.source_scheme = source_fields.scheme
|
|
271
|
+
self.source_scheme = parse_scheme_from_uri(source_uri)
|
|
158
272
|
|
|
159
273
|
self.destination_uri = destination_uri
|
|
160
274
|
self.destination_scheme = parse_scheme_from_uri(destination_uri)
|
|
@@ -37,9 +37,14 @@ def readers(
|
|
|
37
37
|
file_glob (str, optional): The filter to apply to the files in glob format. by default lists all files in bucket_url non-recursively
|
|
38
38
|
"""
|
|
39
39
|
filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
|
|
41
|
+
# NOTE: incremental support is disabled until we can figure out
|
|
42
|
+
# how to support incremental loads per matching file, rather
|
|
43
|
+
# than a blanket threshold.
|
|
44
|
+
#
|
|
45
|
+
# filesystem_resource.apply_hints(
|
|
46
|
+
# incremental=dlt.sources.incremental("modification_date"),
|
|
47
|
+
# )
|
|
43
48
|
return (
|
|
44
49
|
filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
|
|
45
50
|
filesystem_resource | dlt.transformer(name="read_jsonl")(_read_jsonl),
|
ingestr/src/filters.py
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
from dlt.common.libs.sql_alchemy import Table
|
|
2
|
-
|
|
3
|
-
|
|
4
1
|
def cast_set_to_list(row):
|
|
5
2
|
# this handles just the sqlalchemy backend for now
|
|
6
3
|
if isinstance(row, dict):
|
|
@@ -10,10 +7,56 @@ def cast_set_to_list(row):
|
|
|
10
7
|
return row
|
|
11
8
|
|
|
12
9
|
|
|
10
|
+
def cast_spanner_types(row):
|
|
11
|
+
if not isinstance(row, dict):
|
|
12
|
+
return row
|
|
13
|
+
|
|
14
|
+
from google.cloud.spanner_v1.data_types import JsonObject
|
|
15
|
+
|
|
16
|
+
for key in row.keys():
|
|
17
|
+
if isinstance(row[key], JsonObject):
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
row[key] = json.loads(row[key].serialize())
|
|
21
|
+
return row
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def handle_mysql_empty_dates(row):
|
|
25
|
+
# MySQL returns empty dates as 0000-00-00, which is not a valid date, we handle them here.
|
|
26
|
+
if not isinstance(row, dict):
|
|
27
|
+
return row
|
|
28
|
+
|
|
29
|
+
for key in row.keys():
|
|
30
|
+
if not isinstance(row[key], str):
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
if row[key] == "0000-00-00":
|
|
34
|
+
from datetime import date
|
|
35
|
+
|
|
36
|
+
row[key] = date(1970, 1, 1)
|
|
37
|
+
|
|
38
|
+
elif row[key] == "0000-00-00 00:00:00":
|
|
39
|
+
from datetime import datetime
|
|
40
|
+
|
|
41
|
+
row[key] = datetime(1970, 1, 1, 0, 0, 0)
|
|
42
|
+
return row
|
|
43
|
+
|
|
44
|
+
|
|
13
45
|
def table_adapter_exclude_columns(cols: list[str]):
|
|
46
|
+
from dlt.common.libs.sql_alchemy import Table
|
|
47
|
+
|
|
14
48
|
def excluder(table: Table):
|
|
15
49
|
cols_to_remove = [col for col in table._columns if col.name in cols] # type: ignore
|
|
16
50
|
for col in cols_to_remove:
|
|
17
51
|
table._columns.remove(col) # type: ignore
|
|
18
52
|
|
|
19
53
|
return excluder
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def create_masking_filter(mask_configs: list[str]):
|
|
57
|
+
from ingestr.src.masking import create_masking_mapper
|
|
58
|
+
|
|
59
|
+
if not mask_configs:
|
|
60
|
+
return lambda x: x
|
|
61
|
+
|
|
62
|
+
return create_masking_mapper(mask_configs)
|