ingestr 0.13.13__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin_max/__init__.py +6 -4
  10. ingestr/src/appsflyer/__init__.py +325 -0
  11. ingestr/src/appsflyer/client.py +49 -45
  12. ingestr/src/appstore/__init__.py +1 -0
  13. ingestr/src/arrow/__init__.py +9 -1
  14. ingestr/src/asana_source/__init__.py +1 -1
  15. ingestr/src/attio/__init__.py +102 -0
  16. ingestr/src/attio/helpers.py +65 -0
  17. ingestr/src/blob.py +37 -10
  18. ingestr/src/buildinfo.py +1 -1
  19. ingestr/src/chess/__init__.py +1 -1
  20. ingestr/src/clickup/__init__.py +85 -0
  21. ingestr/src/clickup/helpers.py +47 -0
  22. ingestr/src/collector/spinner.py +43 -0
  23. ingestr/src/couchbase_source/__init__.py +118 -0
  24. ingestr/src/couchbase_source/helpers.py +135 -0
  25. ingestr/src/cursor/__init__.py +83 -0
  26. ingestr/src/cursor/helpers.py +188 -0
  27. ingestr/src/destinations.py +508 -27
  28. ingestr/src/docebo/__init__.py +589 -0
  29. ingestr/src/docebo/client.py +435 -0
  30. ingestr/src/docebo/helpers.py +97 -0
  31. ingestr/src/elasticsearch/__init__.py +80 -0
  32. ingestr/src/elasticsearch/helpers.py +138 -0
  33. ingestr/src/errors.py +8 -0
  34. ingestr/src/facebook_ads/__init__.py +47 -28
  35. ingestr/src/facebook_ads/helpers.py +59 -37
  36. ingestr/src/facebook_ads/settings.py +2 -0
  37. ingestr/src/facebook_ads/utils.py +39 -0
  38. ingestr/src/factory.py +107 -2
  39. ingestr/src/filesystem/__init__.py +8 -3
  40. ingestr/src/filters.py +46 -3
  41. ingestr/src/fluxx/__init__.py +9906 -0
  42. ingestr/src/fluxx/helpers.py +209 -0
  43. ingestr/src/frankfurter/__init__.py +157 -0
  44. ingestr/src/frankfurter/helpers.py +48 -0
  45. ingestr/src/freshdesk/__init__.py +89 -0
  46. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  47. ingestr/src/freshdesk/settings.py +9 -0
  48. ingestr/src/fundraiseup/__init__.py +95 -0
  49. ingestr/src/fundraiseup/client.py +81 -0
  50. ingestr/src/github/__init__.py +41 -6
  51. ingestr/src/github/helpers.py +5 -5
  52. ingestr/src/google_analytics/__init__.py +22 -4
  53. ingestr/src/google_analytics/helpers.py +124 -6
  54. ingestr/src/google_sheets/__init__.py +4 -4
  55. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  56. ingestr/src/hostaway/__init__.py +302 -0
  57. ingestr/src/hostaway/client.py +288 -0
  58. ingestr/src/http/__init__.py +35 -0
  59. ingestr/src/http/readers.py +114 -0
  60. ingestr/src/http_client.py +24 -0
  61. ingestr/src/hubspot/__init__.py +66 -23
  62. ingestr/src/hubspot/helpers.py +52 -22
  63. ingestr/src/hubspot/settings.py +14 -7
  64. ingestr/src/influxdb/__init__.py +46 -0
  65. ingestr/src/influxdb/client.py +34 -0
  66. ingestr/src/intercom/__init__.py +142 -0
  67. ingestr/src/intercom/helpers.py +674 -0
  68. ingestr/src/intercom/settings.py +279 -0
  69. ingestr/src/isoc_pulse/__init__.py +159 -0
  70. ingestr/src/jira_source/__init__.py +340 -0
  71. ingestr/src/jira_source/helpers.py +439 -0
  72. ingestr/src/jira_source/settings.py +170 -0
  73. ingestr/src/kafka/__init__.py +4 -1
  74. ingestr/src/kinesis/__init__.py +139 -0
  75. ingestr/src/kinesis/helpers.py +82 -0
  76. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  77. ingestr/src/linear/__init__.py +634 -0
  78. ingestr/src/linear/helpers.py +111 -0
  79. ingestr/src/linkedin_ads/helpers.py +0 -1
  80. ingestr/src/mailchimp/__init__.py +126 -0
  81. ingestr/src/mailchimp/helpers.py +226 -0
  82. ingestr/src/mailchimp/settings.py +164 -0
  83. ingestr/src/masking.py +344 -0
  84. ingestr/src/mixpanel/__init__.py +62 -0
  85. ingestr/src/mixpanel/client.py +99 -0
  86. ingestr/src/monday/__init__.py +246 -0
  87. ingestr/src/monday/helpers.py +392 -0
  88. ingestr/src/monday/settings.py +328 -0
  89. ingestr/src/mongodb/__init__.py +72 -8
  90. ingestr/src/mongodb/helpers.py +915 -38
  91. ingestr/src/partition.py +32 -0
  92. ingestr/src/phantombuster/__init__.py +65 -0
  93. ingestr/src/phantombuster/client.py +87 -0
  94. ingestr/src/pinterest/__init__.py +82 -0
  95. ingestr/src/pipedrive/__init__.py +198 -0
  96. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  97. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  98. ingestr/src/pipedrive/helpers/pages.py +115 -0
  99. ingestr/src/pipedrive/settings.py +27 -0
  100. ingestr/src/pipedrive/typing.py +3 -0
  101. ingestr/src/plusvibeai/__init__.py +335 -0
  102. ingestr/src/plusvibeai/helpers.py +544 -0
  103. ingestr/src/plusvibeai/settings.py +252 -0
  104. ingestr/src/quickbooks/__init__.py +117 -0
  105. ingestr/src/resource.py +40 -0
  106. ingestr/src/revenuecat/__init__.py +83 -0
  107. ingestr/src/revenuecat/helpers.py +237 -0
  108. ingestr/src/salesforce/__init__.py +15 -8
  109. ingestr/src/shopify/__init__.py +1 -17
  110. ingestr/src/smartsheets/__init__.py +82 -0
  111. ingestr/src/snapchat_ads/__init__.py +489 -0
  112. ingestr/src/snapchat_ads/client.py +72 -0
  113. ingestr/src/snapchat_ads/helpers.py +535 -0
  114. ingestr/src/socrata_source/__init__.py +83 -0
  115. ingestr/src/socrata_source/helpers.py +85 -0
  116. ingestr/src/socrata_source/settings.py +8 -0
  117. ingestr/src/solidgate/__init__.py +219 -0
  118. ingestr/src/solidgate/helpers.py +154 -0
  119. ingestr/src/sources.py +2933 -245
  120. ingestr/src/stripe_analytics/__init__.py +49 -21
  121. ingestr/src/stripe_analytics/helpers.py +286 -1
  122. ingestr/src/stripe_analytics/settings.py +62 -10
  123. ingestr/src/telemetry/event.py +10 -9
  124. ingestr/src/tiktok_ads/__init__.py +12 -6
  125. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  126. ingestr/src/trustpilot/__init__.py +48 -0
  127. ingestr/src/trustpilot/client.py +48 -0
  128. ingestr/src/wise/__init__.py +68 -0
  129. ingestr/src/wise/client.py +63 -0
  130. ingestr/src/zoom/__init__.py +99 -0
  131. ingestr/src/zoom/helpers.py +102 -0
  132. ingestr/tests/unit/test_smartsheets.py +133 -0
  133. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/METADATA +229 -19
  134. ingestr-0.14.104.dist-info/RECORD +203 -0
  135. ingestr/src/appsflyer/_init_.py +0 -24
  136. ingestr-0.13.13.dist-info/RECORD +0 -115
  137. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  138. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  139. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/factory.py CHANGED
@@ -7,58 +7,108 @@ from ingestr.src.destinations import (
7
7
  AthenaDestination,
8
8
  BigQueryDestination,
9
9
  ClickhouseDestination,
10
+ CrateDBDestination,
10
11
  CsvDestination,
11
12
  DatabricksDestination,
12
13
  DuckDBDestination,
14
+ ElasticsearchDestination,
15
+ GCSDestination,
16
+ MongoDBDestination,
17
+ MotherduckDestination,
13
18
  MsSQLDestination,
19
+ MySqlDestination,
14
20
  PostgresDestination,
15
21
  RedshiftDestination,
22
+ S3Destination,
16
23
  SnowflakeDestination,
24
+ SqliteDestination,
17
25
  SynapseDestination,
26
+ TrinoDestination,
18
27
  )
19
28
  from ingestr.src.sources import (
20
29
  AdjustSource,
21
30
  AirtableSource,
31
+ AlliumSource,
32
+ AnthropicSource,
22
33
  AppleAppStoreSource,
23
34
  ApplovinMaxSource,
24
35
  AppLovinSource,
25
36
  AppsflyerSource,
26
37
  ArrowMemoryMappedSource,
27
38
  AsanaSource,
39
+ AttioSource,
28
40
  ChessSource,
41
+ ClickupSource,
42
+ CouchbaseSource,
43
+ CursorSource,
44
+ DoceboSource,
29
45
  DynamoDBSource,
46
+ ElasticsearchSource,
30
47
  FacebookAdsSource,
48
+ FluxxSource,
49
+ FrankfurterSource,
50
+ FreshdeskSource,
51
+ FundraiseupSource,
31
52
  GCSSource,
32
53
  GitHubSource,
33
54
  GoogleAdsSource,
34
55
  GoogleAnalyticsSource,
35
56
  GoogleSheetsSource,
36
57
  GorgiasSource,
58
+ HostawaySource,
59
+ HttpSource,
37
60
  HubspotSource,
61
+ InfluxDBSource,
62
+ IntercomSource,
63
+ IsocPulseSource,
64
+ JiraSource,
38
65
  KafkaSource,
66
+ KinesisSource,
39
67
  KlaviyoSource,
68
+ LinearSource,
40
69
  LinkedInAdsSource,
41
70
  LocalCsvSource,
71
+ MailchimpSource,
72
+ MixpanelSource,
73
+ MondaySource,
42
74
  MongoDbSource,
43
75
  NotionSource,
44
76
  PersonioSource,
77
+ PhantombusterSource,
78
+ PinterestSource,
79
+ PipedriveSource,
80
+ PlusVibeAISource,
81
+ QuickBooksSource,
82
+ RevenueCatSource,
45
83
  S3Source,
46
84
  SalesforceSource,
85
+ SFTPSource,
47
86
  ShopifySource,
48
87
  SlackSource,
88
+ SmartsheetSource,
89
+ SnapchatAdsSource,
90
+ SocrataSource,
91
+ SolidgateSource,
49
92
  SqlSource,
50
93
  StripeAnalyticsSource,
51
94
  TikTokSource,
95
+ TrustpilotSource,
96
+ WiseSource,
52
97
  ZendeskSource,
98
+ ZoomSource,
53
99
  )
54
100
 
55
101
  SQL_SOURCE_SCHEMES = [
56
102
  "bigquery",
103
+ "crate",
57
104
  "duckdb",
58
105
  "mssql",
106
+ "mssql+pyodbc",
59
107
  "mysql",
60
108
  "mysql+pymysql",
61
109
  "mysql+mysqlconnector",
110
+ "md",
111
+ "motherduck",
62
112
  "postgres",
63
113
  "postgresql",
64
114
  "postgresql+psycopg2",
@@ -68,8 +118,13 @@ SQL_SOURCE_SCHEMES = [
68
118
  "sqlite",
69
119
  "oracle",
70
120
  "oracle+cx_oracle",
121
+ "oracle+oracledb",
71
122
  "hana",
72
123
  "clickhouse",
124
+ "databricks",
125
+ "db2",
126
+ "spanner",
127
+ "trino",
73
128
  ]
74
129
 
75
130
 
@@ -108,8 +163,16 @@ class SourceDestinationFactory:
108
163
  source_scheme: str
109
164
  destination_scheme: str
110
165
  sources: Dict[str, Type[SourceProtocol]] = {
166
+ "allium": AlliumSource,
167
+ "anthropic": AnthropicSource,
111
168
  "csv": LocalCsvSource,
169
+ "couchbase": CouchbaseSource,
170
+ "cursor": CursorSource,
171
+ "docebo": DoceboSource,
172
+ "http": HttpSource,
173
+ "https": HttpSource,
112
174
  "mongodb": MongoDbSource,
175
+ "mongodb+srv": MongoDbSource,
113
176
  "notion": NotionSource,
114
177
  "gsheets": GoogleSheetsSource,
115
178
  "shopify": ShopifySource,
@@ -118,10 +181,15 @@ class SourceDestinationFactory:
118
181
  "chess": ChessSource,
119
182
  "stripe": StripeAnalyticsSource,
120
183
  "facebookads": FacebookAdsSource,
184
+ "fluxx": FluxxSource,
121
185
  "slack": SlackSource,
186
+ "hostaway": HostawaySource,
122
187
  "hubspot": HubspotSource,
188
+ "intercom": IntercomSource,
189
+ "jira": JiraSource,
123
190
  "airtable": AirtableSource,
124
191
  "klaviyo": KlaviyoSource,
192
+ "mixpanel": MixpanelSource,
125
193
  "appsflyer": AppsflyerSource,
126
194
  "kafka": KafkaSource,
127
195
  "adjust": AdjustSource,
@@ -136,15 +204,44 @@ class SourceDestinationFactory:
136
204
  "appstore": AppleAppStoreSource,
137
205
  "gs": GCSSource,
138
206
  "linkedinads": LinkedInAdsSource,
207
+ "linear": LinearSource,
139
208
  "applovin": AppLovinSource,
140
209
  "applovinmax": ApplovinMaxSource,
141
210
  "salesforce": SalesforceSource,
142
211
  "personio": PersonioSource,
212
+ "kinesis": KinesisSource,
213
+ "pipedrive": PipedriveSource,
214
+ "frankfurter": FrankfurterSource,
215
+ "freshdesk": FreshdeskSource,
216
+ "fundraiseup": FundraiseupSource,
217
+ "trustpilot": TrustpilotSource,
218
+ "phantombuster": PhantombusterSource,
219
+ "elasticsearch": ElasticsearchSource,
220
+ "attio": AttioSource,
221
+ "solidgate": SolidgateSource,
222
+ "quickbooks": QuickBooksSource,
223
+ "isoc-pulse": IsocPulseSource,
224
+ "smartsheet": SmartsheetSource,
225
+ "sftp": SFTPSource,
226
+ "pinterest": PinterestSource,
227
+ "revenuecat": RevenueCatSource,
228
+ "socrata": SocrataSource,
229
+ "snapchatads": SnapchatAdsSource,
230
+ "zoom": ZoomSource,
231
+ "clickup": ClickupSource,
232
+ "influxdb": InfluxDBSource,
233
+ "wise": WiseSource,
234
+ "plusvibeai": PlusVibeAISource,
235
+ "monday": MondaySource,
236
+ "mailchimp": MailchimpSource,
143
237
  }
144
238
  destinations: Dict[str, Type[DestinationProtocol]] = {
145
239
  "bigquery": BigQueryDestination,
240
+ "cratedb": CrateDBDestination,
146
241
  "databricks": DatabricksDestination,
147
242
  "duckdb": DuckDBDestination,
243
+ "motherduck": MotherduckDestination,
244
+ "md": MotherduckDestination,
148
245
  "mssql": MsSQLDestination,
149
246
  "postgres": PostgresDestination,
150
247
  "postgresql": PostgresDestination,
@@ -158,12 +255,20 @@ class SourceDestinationFactory:
158
255
  "athena": AthenaDestination,
159
256
  "clickhouse+native": ClickhouseDestination,
160
257
  "clickhouse": ClickhouseDestination,
258
+ "elasticsearch": ElasticsearchDestination,
259
+ "mongodb": MongoDBDestination,
260
+ "mongodb+srv": MongoDBDestination,
261
+ "s3": S3Destination,
262
+ "gs": GCSDestination,
263
+ "sqlite": SqliteDestination,
264
+ "mysql": MySqlDestination,
265
+ "mysql+pymysql": MySqlDestination,
266
+ "trino": TrinoDestination,
161
267
  }
162
268
 
163
269
  def __init__(self, source_uri: str, destination_uri: str):
164
270
  self.source_uri = source_uri
165
- source_fields = urlparse(source_uri)
166
- self.source_scheme = source_fields.scheme
271
+ self.source_scheme = parse_scheme_from_uri(source_uri)
167
272
 
168
273
  self.destination_uri = destination_uri
169
274
  self.destination_scheme = parse_scheme_from_uri(destination_uri)
@@ -37,9 +37,14 @@ def readers(
37
37
  file_glob (str, optional): The filter to apply to the files in glob format. by default lists all files in bucket_url non-recursively
38
38
  """
39
39
  filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
40
- filesystem_resource.apply_hints(
41
- incremental=dlt.sources.incremental("modification_date"),
42
- )
40
+
41
+ # NOTE: incremental support is disabled until we can figure out
42
+ # how to support incremental loads per matching file, rather
43
+ # than a blanket threshold.
44
+ #
45
+ # filesystem_resource.apply_hints(
46
+ # incremental=dlt.sources.incremental("modification_date"),
47
+ # )
43
48
  return (
44
49
  filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
45
50
  filesystem_resource | dlt.transformer(name="read_jsonl")(_read_jsonl),
ingestr/src/filters.py CHANGED
@@ -1,6 +1,3 @@
1
- from dlt.common.libs.sql_alchemy import Table
2
-
3
-
4
1
  def cast_set_to_list(row):
5
2
  # this handles just the sqlalchemy backend for now
6
3
  if isinstance(row, dict):
@@ -10,10 +7,56 @@ def cast_set_to_list(row):
10
7
  return row
11
8
 
12
9
 
10
+ def cast_spanner_types(row):
11
+ if not isinstance(row, dict):
12
+ return row
13
+
14
+ from google.cloud.spanner_v1.data_types import JsonObject
15
+
16
+ for key in row.keys():
17
+ if isinstance(row[key], JsonObject):
18
+ import json
19
+
20
+ row[key] = json.loads(row[key].serialize())
21
+ return row
22
+
23
+
24
+ def handle_mysql_empty_dates(row):
25
+ # MySQL returns empty dates as 0000-00-00, which is not a valid date, we handle them here.
26
+ if not isinstance(row, dict):
27
+ return row
28
+
29
+ for key in row.keys():
30
+ if not isinstance(row[key], str):
31
+ continue
32
+
33
+ if row[key] == "0000-00-00":
34
+ from datetime import date
35
+
36
+ row[key] = date(1970, 1, 1)
37
+
38
+ elif row[key] == "0000-00-00 00:00:00":
39
+ from datetime import datetime
40
+
41
+ row[key] = datetime(1970, 1, 1, 0, 0, 0)
42
+ return row
43
+
44
+
13
45
  def table_adapter_exclude_columns(cols: list[str]):
46
+ from dlt.common.libs.sql_alchemy import Table
47
+
14
48
  def excluder(table: Table):
15
49
  cols_to_remove = [col for col in table._columns if col.name in cols] # type: ignore
16
50
  for col in cols_to_remove:
17
51
  table._columns.remove(col) # type: ignore
18
52
 
19
53
  return excluder
54
+
55
+
56
+ def create_masking_filter(mask_configs: list[str]):
57
+ from ingestr.src.masking import create_masking_mapper
58
+
59
+ if not mask_configs:
60
+ return lambda x: x
61
+
62
+ return create_masking_mapper(mask_configs)