ingestr 0.13.75__py3-none-any.whl → 0.14.98__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (79) hide show
  1. ingestr/main.py +22 -3
  2. ingestr/src/adjust/__init__.py +4 -4
  3. ingestr/src/allium/__init__.py +128 -0
  4. ingestr/src/anthropic/__init__.py +277 -0
  5. ingestr/src/anthropic/helpers.py +525 -0
  6. ingestr/src/appstore/__init__.py +1 -0
  7. ingestr/src/asana_source/__init__.py +1 -1
  8. ingestr/src/buildinfo.py +1 -1
  9. ingestr/src/chess/__init__.py +1 -1
  10. ingestr/src/couchbase_source/__init__.py +118 -0
  11. ingestr/src/couchbase_source/helpers.py +135 -0
  12. ingestr/src/cursor/__init__.py +83 -0
  13. ingestr/src/cursor/helpers.py +188 -0
  14. ingestr/src/destinations.py +169 -1
  15. ingestr/src/docebo/__init__.py +589 -0
  16. ingestr/src/docebo/client.py +435 -0
  17. ingestr/src/docebo/helpers.py +97 -0
  18. ingestr/src/elasticsearch/helpers.py +138 -0
  19. ingestr/src/errors.py +8 -0
  20. ingestr/src/facebook_ads/__init__.py +26 -23
  21. ingestr/src/facebook_ads/helpers.py +47 -1
  22. ingestr/src/factory.py +48 -0
  23. ingestr/src/filesystem/__init__.py +8 -3
  24. ingestr/src/filters.py +9 -0
  25. ingestr/src/fluxx/__init__.py +9906 -0
  26. ingestr/src/fluxx/helpers.py +209 -0
  27. ingestr/src/frankfurter/__init__.py +157 -163
  28. ingestr/src/frankfurter/helpers.py +3 -3
  29. ingestr/src/freshdesk/__init__.py +25 -8
  30. ingestr/src/freshdesk/freshdesk_client.py +40 -5
  31. ingestr/src/fundraiseup/__init__.py +49 -0
  32. ingestr/src/fundraiseup/client.py +81 -0
  33. ingestr/src/github/__init__.py +6 -4
  34. ingestr/src/google_analytics/__init__.py +1 -1
  35. ingestr/src/hostaway/__init__.py +302 -0
  36. ingestr/src/hostaway/client.py +288 -0
  37. ingestr/src/http/__init__.py +35 -0
  38. ingestr/src/http/readers.py +114 -0
  39. ingestr/src/hubspot/__init__.py +6 -12
  40. ingestr/src/influxdb/__init__.py +1 -0
  41. ingestr/src/intercom/__init__.py +142 -0
  42. ingestr/src/intercom/helpers.py +674 -0
  43. ingestr/src/intercom/settings.py +279 -0
  44. ingestr/src/jira_source/__init__.py +340 -0
  45. ingestr/src/jira_source/helpers.py +439 -0
  46. ingestr/src/jira_source/settings.py +170 -0
  47. ingestr/src/klaviyo/__init__.py +5 -5
  48. ingestr/src/linear/__init__.py +553 -116
  49. ingestr/src/linear/helpers.py +77 -38
  50. ingestr/src/mailchimp/__init__.py +126 -0
  51. ingestr/src/mailchimp/helpers.py +226 -0
  52. ingestr/src/mailchimp/settings.py +164 -0
  53. ingestr/src/masking.py +344 -0
  54. ingestr/src/monday/__init__.py +246 -0
  55. ingestr/src/monday/helpers.py +392 -0
  56. ingestr/src/monday/settings.py +328 -0
  57. ingestr/src/mongodb/__init__.py +5 -2
  58. ingestr/src/mongodb/helpers.py +384 -10
  59. ingestr/src/plusvibeai/__init__.py +335 -0
  60. ingestr/src/plusvibeai/helpers.py +544 -0
  61. ingestr/src/plusvibeai/settings.py +252 -0
  62. ingestr/src/revenuecat/__init__.py +83 -0
  63. ingestr/src/revenuecat/helpers.py +237 -0
  64. ingestr/src/salesforce/__init__.py +15 -8
  65. ingestr/src/shopify/__init__.py +1 -1
  66. ingestr/src/smartsheets/__init__.py +33 -5
  67. ingestr/src/socrata_source/__init__.py +83 -0
  68. ingestr/src/socrata_source/helpers.py +85 -0
  69. ingestr/src/socrata_source/settings.py +8 -0
  70. ingestr/src/sources.py +1418 -54
  71. ingestr/src/stripe_analytics/__init__.py +2 -19
  72. ingestr/src/wise/__init__.py +68 -0
  73. ingestr/src/wise/client.py +63 -0
  74. ingestr/tests/unit/test_smartsheets.py +6 -9
  75. {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/METADATA +24 -12
  76. {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/RECORD +79 -37
  77. {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/WHEEL +0 -0
  78. {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/entry_points.txt +0 -0
  79. {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/licenses/LICENSE.md +0 -0
@@ -22,13 +22,8 @@ from .settings import (
22
22
  DEFAULT_ADCREATIVE_FIELDS,
23
23
  DEFAULT_ADSET_FIELDS,
24
24
  DEFAULT_CAMPAIGN_FIELDS,
25
- DEFAULT_INSIGHT_FIELDS,
26
25
  DEFAULT_LEAD_FIELDS,
27
26
  INSIGHT_FIELDS_TYPES,
28
- INSIGHTS_BREAKDOWNS_OPTIONS,
29
- INSIGHTS_PRIMARY_KEY,
30
- INVALID_INSIGHTS_FIELDS,
31
- TInsightsBreakdownOptions,
32
27
  TInsightsLevels,
33
28
  )
34
29
 
@@ -106,10 +101,9 @@ def facebook_insights_source(
106
101
  account_id: str = dlt.config.value,
107
102
  access_token: str = dlt.secrets.value,
108
103
  initial_load_past_days: int = 1,
109
- fields: Sequence[str] = DEFAULT_INSIGHT_FIELDS,
110
- attribution_window_days_lag: int = 7,
104
+ dimensions: Sequence[str] = None,
105
+ fields: Sequence[str] = None,
111
106
  time_increment_days: int = 1,
112
- breakdowns: TInsightsBreakdownOptions = "ads_insights",
113
107
  action_breakdowns: Sequence[str] = ALL_ACTION_BREAKDOWNS,
114
108
  level: TInsightsLevels = "ad",
115
109
  action_attribution_windows: Sequence[str] = ALL_ACTION_ATTRIBUTION_WINDOWS,
@@ -118,6 +112,9 @@ def facebook_insights_source(
118
112
  app_api_version: str = None,
119
113
  start_date: pendulum.DateTime | None = None,
120
114
  end_date: pendulum.DateTime | None = None,
115
+ insights_max_wait_to_finish_seconds: int = 60 * 60 * 4,
116
+ insights_max_wait_to_start_seconds: int = 60 * 30,
117
+ insights_max_async_sleep_seconds: int = 20,
121
118
  ) -> DltResource:
122
119
  """Incrementally loads insight reports with defined granularity level, fields, breakdowns etc.
123
120
 
@@ -153,14 +150,19 @@ def facebook_insights_source(
153
150
  if start_date is None:
154
151
  start_date = pendulum.today().subtract(days=initial_load_past_days)
155
152
 
153
+ if dimensions is None:
154
+ dimensions = []
155
+ if fields is None:
156
+ fields = []
157
+
156
158
  columns = {}
157
159
  for field in fields:
158
160
  if field in INSIGHT_FIELDS_TYPES:
159
161
  columns[field] = INSIGHT_FIELDS_TYPES[field]
160
162
 
161
163
  @dlt.resource(
162
- primary_key=INSIGHTS_PRIMARY_KEY,
163
164
  write_disposition="merge",
165
+ merge_key="date_start",
164
166
  columns=columns,
165
167
  )
166
168
  def facebook_insights(
@@ -175,25 +177,24 @@ def facebook_insights_source(
175
177
  ),
176
178
  ) -> Iterator[TDataItems]:
177
179
  start_date = date_start.last_value
178
- end_date = (
179
- pendulum.instance(date_start.end_value)
180
- if date_start.end_value
181
- else pendulum.now()
182
- )
180
+ if date_start.end_value:
181
+ end_date_val = pendulum.instance(date_start.end_value)
182
+
183
+ end_date = (
184
+ end_date_val
185
+ if isinstance(end_date_val, pendulum.Date)
186
+ else end_date_val.date()
187
+ )
188
+ else:
189
+ end_date = pendulum.now().date()
183
190
 
184
191
  while start_date <= end_date:
185
192
  query = {
186
193
  "level": level,
187
194
  "action_breakdowns": list(action_breakdowns),
188
- "breakdowns": list(
189
- INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["breakdowns"]
190
- ),
195
+ "breakdowns": dimensions,
191
196
  "limit": batch_size,
192
- "fields": list(
193
- set(fields)
194
- .union(INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["fields"])
195
- .difference(INVALID_INSIGHTS_FIELDS)
196
- ),
197
+ "fields": fields,
197
198
  "time_increment": time_increment_days,
198
199
  "action_attribution_windows": list(action_attribution_windows),
199
200
  "time_ranges": [
@@ -207,7 +208,9 @@ def facebook_insights_source(
207
208
  }
208
209
  job = execute_job(
209
210
  account.get_insights(params=query, is_async=True),
210
- insights_max_async_sleep_seconds=20,
211
+ insights_max_async_sleep_seconds=insights_max_async_sleep_seconds,
212
+ insights_max_wait_to_finish_seconds=insights_max_wait_to_finish_seconds,
213
+ insights_max_wait_to_start_seconds=insights_max_wait_to_start_seconds,
211
214
  )
212
215
  output = list(map(process_report_item, job.get_result()))
213
216
  yield output
@@ -144,7 +144,7 @@ def execute_job(
144
144
  raise InsightsJobTimeout(
145
145
  "facebook_insights",
146
146
  pretty_error_message.format(
147
- job_id, insights_max_wait_to_finish_seconds // 60
147
+ job_id, insights_max_wait_to_finish_seconds
148
148
  ),
149
149
  )
150
150
 
@@ -229,3 +229,49 @@ def notify_on_token_expiration(access_token_expires_at: int = None) -> None:
229
229
  logger.error(
230
230
  f"Access Token expires in {humanize.precisedelta(pendulum.now() - expires_at)}. Replace the token now!"
231
231
  )
232
+
233
+
234
+ def parse_insights_table_to_source_kwargs(table: str) -> DictStrAny:
235
+ import typing
236
+
237
+ from ingestr.src.facebook_ads.settings import (
238
+ INSIGHTS_BREAKDOWNS_OPTIONS,
239
+ TInsightsBreakdownOptions,
240
+ TInsightsLevels,
241
+ )
242
+
243
+ parts = table.split(":")
244
+
245
+ source_kwargs = {}
246
+
247
+ breakdown_type = parts[1]
248
+
249
+ valid_breakdowns = list(typing.get_args(TInsightsBreakdownOptions))
250
+ if breakdown_type in valid_breakdowns:
251
+ dimensions = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["breakdowns"]
252
+ fields = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["fields"]
253
+ source_kwargs["dimensions"] = dimensions
254
+ source_kwargs["fields"] = fields
255
+ else:
256
+ dimensions = breakdown_type.split(",")
257
+ valid_levels = list(typing.get_args(TInsightsLevels))
258
+ level = None
259
+ for valid_level in reversed(valid_levels):
260
+ if valid_level in dimensions:
261
+ level = valid_level
262
+ dimensions.remove(valid_level)
263
+ break
264
+
265
+ source_kwargs["level"] = level
266
+ source_kwargs["dimensions"] = dimensions
267
+
268
+ # If custom metrics are provided, parse them
269
+ if len(parts) == 3:
270
+ fields = [f.strip() for f in parts[2].split(",") if f.strip()]
271
+ if not fields:
272
+ raise ValueError(
273
+ "Custom metrics must be provided after the second colon in format: facebook_insights:breakdown_type:metric1,metric2..."
274
+ )
275
+ source_kwargs["fields"] = fields
276
+
277
+ return source_kwargs
ingestr/src/factory.py CHANGED
@@ -11,7 +11,10 @@ from ingestr.src.destinations import (
11
11
  CsvDestination,
12
12
  DatabricksDestination,
13
13
  DuckDBDestination,
14
+ ElasticsearchDestination,
14
15
  GCSDestination,
16
+ MongoDBDestination,
17
+ MotherduckDestination,
15
18
  MsSQLDestination,
16
19
  MySqlDestination,
17
20
  PostgresDestination,
@@ -20,10 +23,13 @@ from ingestr.src.destinations import (
20
23
  SnowflakeDestination,
21
24
  SqliteDestination,
22
25
  SynapseDestination,
26
+ TrinoDestination,
23
27
  )
24
28
  from ingestr.src.sources import (
25
29
  AdjustSource,
26
30
  AirtableSource,
31
+ AlliumSource,
32
+ AnthropicSource,
27
33
  AppleAppStoreSource,
28
34
  ApplovinMaxSource,
29
35
  AppLovinSource,
@@ -33,45 +39,60 @@ from ingestr.src.sources import (
33
39
  AttioSource,
34
40
  ChessSource,
35
41
  ClickupSource,
42
+ CouchbaseSource,
43
+ CursorSource,
44
+ DoceboSource,
36
45
  DynamoDBSource,
37
46
  ElasticsearchSource,
38
47
  FacebookAdsSource,
48
+ FluxxSource,
39
49
  FrankfurterSource,
40
50
  FreshdeskSource,
51
+ FundraiseupSource,
41
52
  GCSSource,
42
53
  GitHubSource,
43
54
  GoogleAdsSource,
44
55
  GoogleAnalyticsSource,
45
56
  GoogleSheetsSource,
46
57
  GorgiasSource,
58
+ HostawaySource,
59
+ HttpSource,
47
60
  HubspotSource,
48
61
  InfluxDBSource,
62
+ IntercomSource,
49
63
  IsocPulseSource,
64
+ JiraSource,
50
65
  KafkaSource,
51
66
  KinesisSource,
52
67
  KlaviyoSource,
53
68
  LinearSource,
54
69
  LinkedInAdsSource,
55
70
  LocalCsvSource,
71
+ MailchimpSource,
56
72
  MixpanelSource,
73
+ MondaySource,
57
74
  MongoDbSource,
58
75
  NotionSource,
59
76
  PersonioSource,
60
77
  PhantombusterSource,
61
78
  PinterestSource,
62
79
  PipedriveSource,
80
+ PlusVibeAISource,
63
81
  QuickBooksSource,
82
+ RevenueCatSource,
64
83
  S3Source,
65
84
  SalesforceSource,
66
85
  SFTPSource,
67
86
  ShopifySource,
68
87
  SlackSource,
69
88
  SmartsheetSource,
89
+ SocrataSource,
70
90
  SolidgateSource,
71
91
  SqlSource,
72
92
  StripeAnalyticsSource,
73
93
  TikTokSource,
74
94
  TrustpilotSource,
95
+ WiseSource,
75
96
  ZendeskSource,
76
97
  ZoomSource,
77
98
  )
@@ -85,6 +106,8 @@ SQL_SOURCE_SCHEMES = [
85
106
  "mysql",
86
107
  "mysql+pymysql",
87
108
  "mysql+mysqlconnector",
109
+ "md",
110
+ "motherduck",
88
111
  "postgres",
89
112
  "postgresql",
90
113
  "postgresql+psycopg2",
@@ -99,6 +122,7 @@ SQL_SOURCE_SCHEMES = [
99
122
  "databricks",
100
123
  "db2",
101
124
  "spanner",
125
+ "trino",
102
126
  ]
103
127
 
104
128
 
@@ -137,7 +161,14 @@ class SourceDestinationFactory:
137
161
  source_scheme: str
138
162
  destination_scheme: str
139
163
  sources: Dict[str, Type[SourceProtocol]] = {
164
+ "allium": AlliumSource,
165
+ "anthropic": AnthropicSource,
140
166
  "csv": LocalCsvSource,
167
+ "couchbase": CouchbaseSource,
168
+ "cursor": CursorSource,
169
+ "docebo": DoceboSource,
170
+ "http": HttpSource,
171
+ "https": HttpSource,
141
172
  "mongodb": MongoDbSource,
142
173
  "mongodb+srv": MongoDbSource,
143
174
  "notion": NotionSource,
@@ -148,8 +179,12 @@ class SourceDestinationFactory:
148
179
  "chess": ChessSource,
149
180
  "stripe": StripeAnalyticsSource,
150
181
  "facebookads": FacebookAdsSource,
182
+ "fluxx": FluxxSource,
151
183
  "slack": SlackSource,
184
+ "hostaway": HostawaySource,
152
185
  "hubspot": HubspotSource,
186
+ "intercom": IntercomSource,
187
+ "jira": JiraSource,
153
188
  "airtable": AirtableSource,
154
189
  "klaviyo": KlaviyoSource,
155
190
  "mixpanel": MixpanelSource,
@@ -176,6 +211,7 @@ class SourceDestinationFactory:
176
211
  "pipedrive": PipedriveSource,
177
212
  "frankfurter": FrankfurterSource,
178
213
  "freshdesk": FreshdeskSource,
214
+ "fundraiseup": FundraiseupSource,
179
215
  "trustpilot": TrustpilotSource,
180
216
  "phantombuster": PhantombusterSource,
181
217
  "elasticsearch": ElasticsearchSource,
@@ -186,15 +222,23 @@ class SourceDestinationFactory:
186
222
  "smartsheet": SmartsheetSource,
187
223
  "sftp": SFTPSource,
188
224
  "pinterest": PinterestSource,
225
+ "revenuecat": RevenueCatSource,
226
+ "socrata": SocrataSource,
189
227
  "zoom": ZoomSource,
190
228
  "clickup": ClickupSource,
191
229
  "influxdb": InfluxDBSource,
230
+ "wise": WiseSource,
231
+ "plusvibeai": PlusVibeAISource,
232
+ "monday": MondaySource,
233
+ "mailchimp": MailchimpSource,
192
234
  }
193
235
  destinations: Dict[str, Type[DestinationProtocol]] = {
194
236
  "bigquery": BigQueryDestination,
195
237
  "cratedb": CrateDBDestination,
196
238
  "databricks": DatabricksDestination,
197
239
  "duckdb": DuckDBDestination,
240
+ "motherduck": MotherduckDestination,
241
+ "md": MotherduckDestination,
198
242
  "mssql": MsSQLDestination,
199
243
  "postgres": PostgresDestination,
200
244
  "postgresql": PostgresDestination,
@@ -208,11 +252,15 @@ class SourceDestinationFactory:
208
252
  "athena": AthenaDestination,
209
253
  "clickhouse+native": ClickhouseDestination,
210
254
  "clickhouse": ClickhouseDestination,
255
+ "elasticsearch": ElasticsearchDestination,
256
+ "mongodb": MongoDBDestination,
257
+ "mongodb+srv": MongoDBDestination,
211
258
  "s3": S3Destination,
212
259
  "gs": GCSDestination,
213
260
  "sqlite": SqliteDestination,
214
261
  "mysql": MySqlDestination,
215
262
  "mysql+pymysql": MySqlDestination,
263
+ "trino": TrinoDestination,
216
264
  }
217
265
 
218
266
  def __init__(self, source_uri: str, destination_uri: str):
@@ -37,9 +37,14 @@ def readers(
37
37
  file_glob (str, optional): The filter to apply to the files in glob format. by default lists all files in bucket_url non-recursively
38
38
  """
39
39
  filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
40
- filesystem_resource.apply_hints(
41
- incremental=dlt.sources.incremental("modification_date"),
42
- )
40
+
41
+ # NOTE: incremental support is disabled until we can figure out
42
+ # how to support incremental loads per matching file, rather
43
+ # than a blanket threshold.
44
+ #
45
+ # filesystem_resource.apply_hints(
46
+ # incremental=dlt.sources.incremental("modification_date"),
47
+ # )
43
48
  return (
44
49
  filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
45
50
  filesystem_resource | dlt.transformer(name="read_jsonl")(_read_jsonl),
ingestr/src/filters.py CHANGED
@@ -51,3 +51,12 @@ def table_adapter_exclude_columns(cols: list[str]):
51
51
  table._columns.remove(col) # type: ignore
52
52
 
53
53
  return excluder
54
+
55
+
56
+ def create_masking_filter(mask_configs: list[str]):
57
+ from ingestr.src.masking import create_masking_mapper
58
+
59
+ if not mask_configs:
60
+ return lambda x: x
61
+
62
+ return create_masking_mapper(mask_configs)