ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/factory.py CHANGED
@@ -7,54 +7,108 @@ from ingestr.src.destinations import (
7
7
  AthenaDestination,
8
8
  BigQueryDestination,
9
9
  ClickhouseDestination,
10
+ CrateDBDestination,
10
11
  CsvDestination,
11
12
  DatabricksDestination,
12
13
  DuckDBDestination,
14
+ ElasticsearchDestination,
15
+ GCSDestination,
16
+ MongoDBDestination,
17
+ MotherduckDestination,
13
18
  MsSQLDestination,
19
+ MySqlDestination,
14
20
  PostgresDestination,
15
21
  RedshiftDestination,
22
+ S3Destination,
16
23
  SnowflakeDestination,
24
+ SqliteDestination,
17
25
  SynapseDestination,
26
+ TrinoDestination,
18
27
  )
19
28
  from ingestr.src.sources import (
20
29
  AdjustSource,
21
30
  AirtableSource,
31
+ AlliumSource,
32
+ AnthropicSource,
22
33
  AppleAppStoreSource,
34
+ ApplovinMaxSource,
35
+ AppLovinSource,
23
36
  AppsflyerSource,
24
37
  ArrowMemoryMappedSource,
25
38
  AsanaSource,
39
+ AttioSource,
26
40
  ChessSource,
41
+ ClickupSource,
42
+ CouchbaseSource,
43
+ CursorSource,
44
+ DoceboSource,
27
45
  DynamoDBSource,
46
+ ElasticsearchSource,
28
47
  FacebookAdsSource,
48
+ FluxxSource,
49
+ FrankfurterSource,
50
+ FreshdeskSource,
51
+ FundraiseupSource,
29
52
  GCSSource,
30
53
  GitHubSource,
31
54
  GoogleAdsSource,
32
55
  GoogleAnalyticsSource,
33
56
  GoogleSheetsSource,
34
57
  GorgiasSource,
58
+ HostawaySource,
59
+ HttpSource,
35
60
  HubspotSource,
61
+ InfluxDBSource,
62
+ IntercomSource,
63
+ IsocPulseSource,
64
+ JiraSource,
36
65
  KafkaSource,
66
+ KinesisSource,
37
67
  KlaviyoSource,
68
+ LinearSource,
38
69
  LinkedInAdsSource,
39
70
  LocalCsvSource,
71
+ MailchimpSource,
72
+ MixpanelSource,
73
+ MondaySource,
40
74
  MongoDbSource,
41
75
  NotionSource,
76
+ PersonioSource,
77
+ PhantombusterSource,
78
+ PinterestSource,
79
+ PipedriveSource,
80
+ PlusVibeAISource,
81
+ QuickBooksSource,
82
+ RevenueCatSource,
42
83
  S3Source,
84
+ SalesforceSource,
85
+ SFTPSource,
43
86
  ShopifySource,
44
87
  SlackSource,
88
+ SmartsheetSource,
89
+ SnapchatAdsSource,
90
+ SocrataSource,
91
+ SolidgateSource,
45
92
  SqlSource,
46
93
  StripeAnalyticsSource,
47
94
  TikTokSource,
95
+ TrustpilotSource,
96
+ WiseSource,
48
97
  ZendeskSource,
98
+ ZoomSource,
49
99
  )
50
100
 
51
101
  SQL_SOURCE_SCHEMES = [
52
102
  "bigquery",
103
+ "crate",
53
104
  "duckdb",
54
105
  "mssql",
106
+ "mssql+pyodbc",
55
107
  "mysql",
56
108
  "mysql+pymysql",
57
109
  "mysql+mysqlconnector",
110
+ "md",
111
+ "motherduck",
58
112
  "postgres",
59
113
  "postgresql",
60
114
  "postgresql+psycopg2",
@@ -64,7 +118,13 @@ SQL_SOURCE_SCHEMES = [
64
118
  "sqlite",
65
119
  "oracle",
66
120
  "oracle+cx_oracle",
121
+ "oracle+oracledb",
67
122
  "hana",
123
+ "clickhouse",
124
+ "databricks",
125
+ "db2",
126
+ "spanner",
127
+ "trino",
68
128
  ]
69
129
 
70
130
 
@@ -103,8 +163,16 @@ class SourceDestinationFactory:
103
163
  source_scheme: str
104
164
  destination_scheme: str
105
165
  sources: Dict[str, Type[SourceProtocol]] = {
166
+ "allium": AlliumSource,
167
+ "anthropic": AnthropicSource,
106
168
  "csv": LocalCsvSource,
169
+ "couchbase": CouchbaseSource,
170
+ "cursor": CursorSource,
171
+ "docebo": DoceboSource,
172
+ "http": HttpSource,
173
+ "https": HttpSource,
107
174
  "mongodb": MongoDbSource,
175
+ "mongodb+srv": MongoDbSource,
108
176
  "notion": NotionSource,
109
177
  "gsheets": GoogleSheetsSource,
110
178
  "shopify": ShopifySource,
@@ -113,10 +181,15 @@ class SourceDestinationFactory:
113
181
  "chess": ChessSource,
114
182
  "stripe": StripeAnalyticsSource,
115
183
  "facebookads": FacebookAdsSource,
184
+ "fluxx": FluxxSource,
116
185
  "slack": SlackSource,
186
+ "hostaway": HostawaySource,
117
187
  "hubspot": HubspotSource,
188
+ "intercom": IntercomSource,
189
+ "jira": JiraSource,
118
190
  "airtable": AirtableSource,
119
191
  "klaviyo": KlaviyoSource,
192
+ "mixpanel": MixpanelSource,
120
193
  "appsflyer": AppsflyerSource,
121
194
  "kafka": KafkaSource,
122
195
  "adjust": AdjustSource,
@@ -131,11 +204,44 @@ class SourceDestinationFactory:
131
204
  "appstore": AppleAppStoreSource,
132
205
  "gs": GCSSource,
133
206
  "linkedinads": LinkedInAdsSource,
207
+ "linear": LinearSource,
208
+ "applovin": AppLovinSource,
209
+ "applovinmax": ApplovinMaxSource,
210
+ "salesforce": SalesforceSource,
211
+ "personio": PersonioSource,
212
+ "kinesis": KinesisSource,
213
+ "pipedrive": PipedriveSource,
214
+ "frankfurter": FrankfurterSource,
215
+ "freshdesk": FreshdeskSource,
216
+ "fundraiseup": FundraiseupSource,
217
+ "trustpilot": TrustpilotSource,
218
+ "phantombuster": PhantombusterSource,
219
+ "elasticsearch": ElasticsearchSource,
220
+ "attio": AttioSource,
221
+ "solidgate": SolidgateSource,
222
+ "quickbooks": QuickBooksSource,
223
+ "isoc-pulse": IsocPulseSource,
224
+ "smartsheet": SmartsheetSource,
225
+ "sftp": SFTPSource,
226
+ "pinterest": PinterestSource,
227
+ "revenuecat": RevenueCatSource,
228
+ "socrata": SocrataSource,
229
+ "snapchatads": SnapchatAdsSource,
230
+ "zoom": ZoomSource,
231
+ "clickup": ClickupSource,
232
+ "influxdb": InfluxDBSource,
233
+ "wise": WiseSource,
234
+ "plusvibeai": PlusVibeAISource,
235
+ "monday": MondaySource,
236
+ "mailchimp": MailchimpSource,
134
237
  }
135
238
  destinations: Dict[str, Type[DestinationProtocol]] = {
136
239
  "bigquery": BigQueryDestination,
240
+ "cratedb": CrateDBDestination,
137
241
  "databricks": DatabricksDestination,
138
242
  "duckdb": DuckDBDestination,
243
+ "motherduck": MotherduckDestination,
244
+ "md": MotherduckDestination,
139
245
  "mssql": MsSQLDestination,
140
246
  "postgres": PostgresDestination,
141
247
  "postgresql": PostgresDestination,
@@ -149,12 +255,20 @@ class SourceDestinationFactory:
149
255
  "athena": AthenaDestination,
150
256
  "clickhouse+native": ClickhouseDestination,
151
257
  "clickhouse": ClickhouseDestination,
258
+ "elasticsearch": ElasticsearchDestination,
259
+ "mongodb": MongoDBDestination,
260
+ "mongodb+srv": MongoDBDestination,
261
+ "s3": S3Destination,
262
+ "gs": GCSDestination,
263
+ "sqlite": SqliteDestination,
264
+ "mysql": MySqlDestination,
265
+ "mysql+pymysql": MySqlDestination,
266
+ "trino": TrinoDestination,
152
267
  }
153
268
 
154
269
  def __init__(self, source_uri: str, destination_uri: str):
155
270
  self.source_uri = source_uri
156
- source_fields = urlparse(source_uri)
157
- self.source_scheme = source_fields.scheme
271
+ self.source_scheme = parse_scheme_from_uri(source_uri)
158
272
 
159
273
  self.destination_uri = destination_uri
160
274
  self.destination_scheme = parse_scheme_from_uri(destination_uri)
@@ -37,9 +37,14 @@ def readers(
37
37
  file_glob (str, optional): The filter to apply to the files in glob format. by default lists all files in bucket_url non-recursively
38
38
  """
39
39
  filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
40
- filesystem_resource.apply_hints(
41
- incremental=dlt.sources.incremental("modification_date"),
42
- )
40
+
41
+ # NOTE: incremental support is disabled until we can figure out
42
+ # how to support incremental loads per matching file, rather
43
+ # than a blanket threshold.
44
+ #
45
+ # filesystem_resource.apply_hints(
46
+ # incremental=dlt.sources.incremental("modification_date"),
47
+ # )
43
48
  return (
44
49
  filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
45
50
  filesystem_resource | dlt.transformer(name="read_jsonl")(_read_jsonl),
ingestr/src/filters.py CHANGED
@@ -1,6 +1,3 @@
1
- from dlt.common.libs.sql_alchemy import Table
2
-
3
-
4
1
  def cast_set_to_list(row):
5
2
  # this handles just the sqlalchemy backend for now
6
3
  if isinstance(row, dict):
@@ -10,10 +7,56 @@ def cast_set_to_list(row):
10
7
  return row
11
8
 
12
9
 
10
+ def cast_spanner_types(row):
11
+ if not isinstance(row, dict):
12
+ return row
13
+
14
+ from google.cloud.spanner_v1.data_types import JsonObject
15
+
16
+ for key in row.keys():
17
+ if isinstance(row[key], JsonObject):
18
+ import json
19
+
20
+ row[key] = json.loads(row[key].serialize())
21
+ return row
22
+
23
+
24
+ def handle_mysql_empty_dates(row):
25
+ # MySQL returns empty dates as 0000-00-00, which is not a valid date, we handle them here.
26
+ if not isinstance(row, dict):
27
+ return row
28
+
29
+ for key in row.keys():
30
+ if not isinstance(row[key], str):
31
+ continue
32
+
33
+ if row[key] == "0000-00-00":
34
+ from datetime import date
35
+
36
+ row[key] = date(1970, 1, 1)
37
+
38
+ elif row[key] == "0000-00-00 00:00:00":
39
+ from datetime import datetime
40
+
41
+ row[key] = datetime(1970, 1, 1, 0, 0, 0)
42
+ return row
43
+
44
+
13
45
  def table_adapter_exclude_columns(cols: list[str]):
46
+ from dlt.common.libs.sql_alchemy import Table
47
+
14
48
  def excluder(table: Table):
15
49
  cols_to_remove = [col for col in table._columns if col.name in cols] # type: ignore
16
50
  for col in cols_to_remove:
17
51
  table._columns.remove(col) # type: ignore
18
52
 
19
53
  return excluder
54
+
55
+
56
+ def create_masking_filter(mask_configs: list[str]):
57
+ from ingestr.src.masking import create_masking_mapper
58
+
59
+ if not mask_configs:
60
+ return lambda x: x
61
+
62
+ return create_masking_mapper(mask_configs)