ingestr 0.13.75__py3-none-any.whl → 0.14.98__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +22 -3
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +169 -1
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +26 -23
- ingestr/src/facebook_ads/helpers.py +47 -1
- ingestr/src/factory.py +48 -0
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +9 -0
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -163
- ingestr/src/frankfurter/helpers.py +3 -3
- ingestr/src/freshdesk/__init__.py +25 -8
- ingestr/src/freshdesk/freshdesk_client.py +40 -5
- ingestr/src/fundraiseup/__init__.py +49 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +6 -4
- ingestr/src/google_analytics/__init__.py +1 -1
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/hubspot/__init__.py +6 -12
- ingestr/src/influxdb/__init__.py +1 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/klaviyo/__init__.py +5 -5
- ingestr/src/linear/__init__.py +553 -116
- ingestr/src/linear/helpers.py +77 -38
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +5 -2
- ingestr/src/mongodb/helpers.py +384 -10
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +15 -8
- ingestr/src/shopify/__init__.py +1 -1
- ingestr/src/smartsheets/__init__.py +33 -5
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/sources.py +1418 -54
- ingestr/src/stripe_analytics/__init__.py +2 -19
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/tests/unit/test_smartsheets.py +6 -9
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/METADATA +24 -12
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/RECORD +79 -37
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/WHEEL +0 -0
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.75.dist-info → ingestr-0.14.98.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -22,13 +22,8 @@ from .settings import (
|
|
|
22
22
|
DEFAULT_ADCREATIVE_FIELDS,
|
|
23
23
|
DEFAULT_ADSET_FIELDS,
|
|
24
24
|
DEFAULT_CAMPAIGN_FIELDS,
|
|
25
|
-
DEFAULT_INSIGHT_FIELDS,
|
|
26
25
|
DEFAULT_LEAD_FIELDS,
|
|
27
26
|
INSIGHT_FIELDS_TYPES,
|
|
28
|
-
INSIGHTS_BREAKDOWNS_OPTIONS,
|
|
29
|
-
INSIGHTS_PRIMARY_KEY,
|
|
30
|
-
INVALID_INSIGHTS_FIELDS,
|
|
31
|
-
TInsightsBreakdownOptions,
|
|
32
27
|
TInsightsLevels,
|
|
33
28
|
)
|
|
34
29
|
|
|
@@ -106,10 +101,9 @@ def facebook_insights_source(
|
|
|
106
101
|
account_id: str = dlt.config.value,
|
|
107
102
|
access_token: str = dlt.secrets.value,
|
|
108
103
|
initial_load_past_days: int = 1,
|
|
109
|
-
|
|
110
|
-
|
|
104
|
+
dimensions: Sequence[str] = None,
|
|
105
|
+
fields: Sequence[str] = None,
|
|
111
106
|
time_increment_days: int = 1,
|
|
112
|
-
breakdowns: TInsightsBreakdownOptions = "ads_insights",
|
|
113
107
|
action_breakdowns: Sequence[str] = ALL_ACTION_BREAKDOWNS,
|
|
114
108
|
level: TInsightsLevels = "ad",
|
|
115
109
|
action_attribution_windows: Sequence[str] = ALL_ACTION_ATTRIBUTION_WINDOWS,
|
|
@@ -118,6 +112,9 @@ def facebook_insights_source(
|
|
|
118
112
|
app_api_version: str = None,
|
|
119
113
|
start_date: pendulum.DateTime | None = None,
|
|
120
114
|
end_date: pendulum.DateTime | None = None,
|
|
115
|
+
insights_max_wait_to_finish_seconds: int = 60 * 60 * 4,
|
|
116
|
+
insights_max_wait_to_start_seconds: int = 60 * 30,
|
|
117
|
+
insights_max_async_sleep_seconds: int = 20,
|
|
121
118
|
) -> DltResource:
|
|
122
119
|
"""Incrementally loads insight reports with defined granularity level, fields, breakdowns etc.
|
|
123
120
|
|
|
@@ -153,14 +150,19 @@ def facebook_insights_source(
|
|
|
153
150
|
if start_date is None:
|
|
154
151
|
start_date = pendulum.today().subtract(days=initial_load_past_days)
|
|
155
152
|
|
|
153
|
+
if dimensions is None:
|
|
154
|
+
dimensions = []
|
|
155
|
+
if fields is None:
|
|
156
|
+
fields = []
|
|
157
|
+
|
|
156
158
|
columns = {}
|
|
157
159
|
for field in fields:
|
|
158
160
|
if field in INSIGHT_FIELDS_TYPES:
|
|
159
161
|
columns[field] = INSIGHT_FIELDS_TYPES[field]
|
|
160
162
|
|
|
161
163
|
@dlt.resource(
|
|
162
|
-
primary_key=INSIGHTS_PRIMARY_KEY,
|
|
163
164
|
write_disposition="merge",
|
|
165
|
+
merge_key="date_start",
|
|
164
166
|
columns=columns,
|
|
165
167
|
)
|
|
166
168
|
def facebook_insights(
|
|
@@ -175,25 +177,24 @@ def facebook_insights_source(
|
|
|
175
177
|
),
|
|
176
178
|
) -> Iterator[TDataItems]:
|
|
177
179
|
start_date = date_start.last_value
|
|
178
|
-
|
|
179
|
-
pendulum.instance(date_start.end_value)
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
180
|
+
if date_start.end_value:
|
|
181
|
+
end_date_val = pendulum.instance(date_start.end_value)
|
|
182
|
+
|
|
183
|
+
end_date = (
|
|
184
|
+
end_date_val
|
|
185
|
+
if isinstance(end_date_val, pendulum.Date)
|
|
186
|
+
else end_date_val.date()
|
|
187
|
+
)
|
|
188
|
+
else:
|
|
189
|
+
end_date = pendulum.now().date()
|
|
183
190
|
|
|
184
191
|
while start_date <= end_date:
|
|
185
192
|
query = {
|
|
186
193
|
"level": level,
|
|
187
194
|
"action_breakdowns": list(action_breakdowns),
|
|
188
|
-
"breakdowns":
|
|
189
|
-
INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["breakdowns"]
|
|
190
|
-
),
|
|
195
|
+
"breakdowns": dimensions,
|
|
191
196
|
"limit": batch_size,
|
|
192
|
-
"fields":
|
|
193
|
-
set(fields)
|
|
194
|
-
.union(INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["fields"])
|
|
195
|
-
.difference(INVALID_INSIGHTS_FIELDS)
|
|
196
|
-
),
|
|
197
|
+
"fields": fields,
|
|
197
198
|
"time_increment": time_increment_days,
|
|
198
199
|
"action_attribution_windows": list(action_attribution_windows),
|
|
199
200
|
"time_ranges": [
|
|
@@ -207,7 +208,9 @@ def facebook_insights_source(
|
|
|
207
208
|
}
|
|
208
209
|
job = execute_job(
|
|
209
210
|
account.get_insights(params=query, is_async=True),
|
|
210
|
-
insights_max_async_sleep_seconds=
|
|
211
|
+
insights_max_async_sleep_seconds=insights_max_async_sleep_seconds,
|
|
212
|
+
insights_max_wait_to_finish_seconds=insights_max_wait_to_finish_seconds,
|
|
213
|
+
insights_max_wait_to_start_seconds=insights_max_wait_to_start_seconds,
|
|
211
214
|
)
|
|
212
215
|
output = list(map(process_report_item, job.get_result()))
|
|
213
216
|
yield output
|
|
@@ -144,7 +144,7 @@ def execute_job(
|
|
|
144
144
|
raise InsightsJobTimeout(
|
|
145
145
|
"facebook_insights",
|
|
146
146
|
pretty_error_message.format(
|
|
147
|
-
job_id, insights_max_wait_to_finish_seconds
|
|
147
|
+
job_id, insights_max_wait_to_finish_seconds
|
|
148
148
|
),
|
|
149
149
|
)
|
|
150
150
|
|
|
@@ -229,3 +229,49 @@ def notify_on_token_expiration(access_token_expires_at: int = None) -> None:
|
|
|
229
229
|
logger.error(
|
|
230
230
|
f"Access Token expires in {humanize.precisedelta(pendulum.now() - expires_at)}. Replace the token now!"
|
|
231
231
|
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def parse_insights_table_to_source_kwargs(table: str) -> DictStrAny:
|
|
235
|
+
import typing
|
|
236
|
+
|
|
237
|
+
from ingestr.src.facebook_ads.settings import (
|
|
238
|
+
INSIGHTS_BREAKDOWNS_OPTIONS,
|
|
239
|
+
TInsightsBreakdownOptions,
|
|
240
|
+
TInsightsLevels,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
parts = table.split(":")
|
|
244
|
+
|
|
245
|
+
source_kwargs = {}
|
|
246
|
+
|
|
247
|
+
breakdown_type = parts[1]
|
|
248
|
+
|
|
249
|
+
valid_breakdowns = list(typing.get_args(TInsightsBreakdownOptions))
|
|
250
|
+
if breakdown_type in valid_breakdowns:
|
|
251
|
+
dimensions = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["breakdowns"]
|
|
252
|
+
fields = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["fields"]
|
|
253
|
+
source_kwargs["dimensions"] = dimensions
|
|
254
|
+
source_kwargs["fields"] = fields
|
|
255
|
+
else:
|
|
256
|
+
dimensions = breakdown_type.split(",")
|
|
257
|
+
valid_levels = list(typing.get_args(TInsightsLevels))
|
|
258
|
+
level = None
|
|
259
|
+
for valid_level in reversed(valid_levels):
|
|
260
|
+
if valid_level in dimensions:
|
|
261
|
+
level = valid_level
|
|
262
|
+
dimensions.remove(valid_level)
|
|
263
|
+
break
|
|
264
|
+
|
|
265
|
+
source_kwargs["level"] = level
|
|
266
|
+
source_kwargs["dimensions"] = dimensions
|
|
267
|
+
|
|
268
|
+
# If custom metrics are provided, parse them
|
|
269
|
+
if len(parts) == 3:
|
|
270
|
+
fields = [f.strip() for f in parts[2].split(",") if f.strip()]
|
|
271
|
+
if not fields:
|
|
272
|
+
raise ValueError(
|
|
273
|
+
"Custom metrics must be provided after the second colon in format: facebook_insights:breakdown_type:metric1,metric2..."
|
|
274
|
+
)
|
|
275
|
+
source_kwargs["fields"] = fields
|
|
276
|
+
|
|
277
|
+
return source_kwargs
|
ingestr/src/factory.py
CHANGED
|
@@ -11,7 +11,10 @@ from ingestr.src.destinations import (
|
|
|
11
11
|
CsvDestination,
|
|
12
12
|
DatabricksDestination,
|
|
13
13
|
DuckDBDestination,
|
|
14
|
+
ElasticsearchDestination,
|
|
14
15
|
GCSDestination,
|
|
16
|
+
MongoDBDestination,
|
|
17
|
+
MotherduckDestination,
|
|
15
18
|
MsSQLDestination,
|
|
16
19
|
MySqlDestination,
|
|
17
20
|
PostgresDestination,
|
|
@@ -20,10 +23,13 @@ from ingestr.src.destinations import (
|
|
|
20
23
|
SnowflakeDestination,
|
|
21
24
|
SqliteDestination,
|
|
22
25
|
SynapseDestination,
|
|
26
|
+
TrinoDestination,
|
|
23
27
|
)
|
|
24
28
|
from ingestr.src.sources import (
|
|
25
29
|
AdjustSource,
|
|
26
30
|
AirtableSource,
|
|
31
|
+
AlliumSource,
|
|
32
|
+
AnthropicSource,
|
|
27
33
|
AppleAppStoreSource,
|
|
28
34
|
ApplovinMaxSource,
|
|
29
35
|
AppLovinSource,
|
|
@@ -33,45 +39,60 @@ from ingestr.src.sources import (
|
|
|
33
39
|
AttioSource,
|
|
34
40
|
ChessSource,
|
|
35
41
|
ClickupSource,
|
|
42
|
+
CouchbaseSource,
|
|
43
|
+
CursorSource,
|
|
44
|
+
DoceboSource,
|
|
36
45
|
DynamoDBSource,
|
|
37
46
|
ElasticsearchSource,
|
|
38
47
|
FacebookAdsSource,
|
|
48
|
+
FluxxSource,
|
|
39
49
|
FrankfurterSource,
|
|
40
50
|
FreshdeskSource,
|
|
51
|
+
FundraiseupSource,
|
|
41
52
|
GCSSource,
|
|
42
53
|
GitHubSource,
|
|
43
54
|
GoogleAdsSource,
|
|
44
55
|
GoogleAnalyticsSource,
|
|
45
56
|
GoogleSheetsSource,
|
|
46
57
|
GorgiasSource,
|
|
58
|
+
HostawaySource,
|
|
59
|
+
HttpSource,
|
|
47
60
|
HubspotSource,
|
|
48
61
|
InfluxDBSource,
|
|
62
|
+
IntercomSource,
|
|
49
63
|
IsocPulseSource,
|
|
64
|
+
JiraSource,
|
|
50
65
|
KafkaSource,
|
|
51
66
|
KinesisSource,
|
|
52
67
|
KlaviyoSource,
|
|
53
68
|
LinearSource,
|
|
54
69
|
LinkedInAdsSource,
|
|
55
70
|
LocalCsvSource,
|
|
71
|
+
MailchimpSource,
|
|
56
72
|
MixpanelSource,
|
|
73
|
+
MondaySource,
|
|
57
74
|
MongoDbSource,
|
|
58
75
|
NotionSource,
|
|
59
76
|
PersonioSource,
|
|
60
77
|
PhantombusterSource,
|
|
61
78
|
PinterestSource,
|
|
62
79
|
PipedriveSource,
|
|
80
|
+
PlusVibeAISource,
|
|
63
81
|
QuickBooksSource,
|
|
82
|
+
RevenueCatSource,
|
|
64
83
|
S3Source,
|
|
65
84
|
SalesforceSource,
|
|
66
85
|
SFTPSource,
|
|
67
86
|
ShopifySource,
|
|
68
87
|
SlackSource,
|
|
69
88
|
SmartsheetSource,
|
|
89
|
+
SocrataSource,
|
|
70
90
|
SolidgateSource,
|
|
71
91
|
SqlSource,
|
|
72
92
|
StripeAnalyticsSource,
|
|
73
93
|
TikTokSource,
|
|
74
94
|
TrustpilotSource,
|
|
95
|
+
WiseSource,
|
|
75
96
|
ZendeskSource,
|
|
76
97
|
ZoomSource,
|
|
77
98
|
)
|
|
@@ -85,6 +106,8 @@ SQL_SOURCE_SCHEMES = [
|
|
|
85
106
|
"mysql",
|
|
86
107
|
"mysql+pymysql",
|
|
87
108
|
"mysql+mysqlconnector",
|
|
109
|
+
"md",
|
|
110
|
+
"motherduck",
|
|
88
111
|
"postgres",
|
|
89
112
|
"postgresql",
|
|
90
113
|
"postgresql+psycopg2",
|
|
@@ -99,6 +122,7 @@ SQL_SOURCE_SCHEMES = [
|
|
|
99
122
|
"databricks",
|
|
100
123
|
"db2",
|
|
101
124
|
"spanner",
|
|
125
|
+
"trino",
|
|
102
126
|
]
|
|
103
127
|
|
|
104
128
|
|
|
@@ -137,7 +161,14 @@ class SourceDestinationFactory:
|
|
|
137
161
|
source_scheme: str
|
|
138
162
|
destination_scheme: str
|
|
139
163
|
sources: Dict[str, Type[SourceProtocol]] = {
|
|
164
|
+
"allium": AlliumSource,
|
|
165
|
+
"anthropic": AnthropicSource,
|
|
140
166
|
"csv": LocalCsvSource,
|
|
167
|
+
"couchbase": CouchbaseSource,
|
|
168
|
+
"cursor": CursorSource,
|
|
169
|
+
"docebo": DoceboSource,
|
|
170
|
+
"http": HttpSource,
|
|
171
|
+
"https": HttpSource,
|
|
141
172
|
"mongodb": MongoDbSource,
|
|
142
173
|
"mongodb+srv": MongoDbSource,
|
|
143
174
|
"notion": NotionSource,
|
|
@@ -148,8 +179,12 @@ class SourceDestinationFactory:
|
|
|
148
179
|
"chess": ChessSource,
|
|
149
180
|
"stripe": StripeAnalyticsSource,
|
|
150
181
|
"facebookads": FacebookAdsSource,
|
|
182
|
+
"fluxx": FluxxSource,
|
|
151
183
|
"slack": SlackSource,
|
|
184
|
+
"hostaway": HostawaySource,
|
|
152
185
|
"hubspot": HubspotSource,
|
|
186
|
+
"intercom": IntercomSource,
|
|
187
|
+
"jira": JiraSource,
|
|
153
188
|
"airtable": AirtableSource,
|
|
154
189
|
"klaviyo": KlaviyoSource,
|
|
155
190
|
"mixpanel": MixpanelSource,
|
|
@@ -176,6 +211,7 @@ class SourceDestinationFactory:
|
|
|
176
211
|
"pipedrive": PipedriveSource,
|
|
177
212
|
"frankfurter": FrankfurterSource,
|
|
178
213
|
"freshdesk": FreshdeskSource,
|
|
214
|
+
"fundraiseup": FundraiseupSource,
|
|
179
215
|
"trustpilot": TrustpilotSource,
|
|
180
216
|
"phantombuster": PhantombusterSource,
|
|
181
217
|
"elasticsearch": ElasticsearchSource,
|
|
@@ -186,15 +222,23 @@ class SourceDestinationFactory:
|
|
|
186
222
|
"smartsheet": SmartsheetSource,
|
|
187
223
|
"sftp": SFTPSource,
|
|
188
224
|
"pinterest": PinterestSource,
|
|
225
|
+
"revenuecat": RevenueCatSource,
|
|
226
|
+
"socrata": SocrataSource,
|
|
189
227
|
"zoom": ZoomSource,
|
|
190
228
|
"clickup": ClickupSource,
|
|
191
229
|
"influxdb": InfluxDBSource,
|
|
230
|
+
"wise": WiseSource,
|
|
231
|
+
"plusvibeai": PlusVibeAISource,
|
|
232
|
+
"monday": MondaySource,
|
|
233
|
+
"mailchimp": MailchimpSource,
|
|
192
234
|
}
|
|
193
235
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
194
236
|
"bigquery": BigQueryDestination,
|
|
195
237
|
"cratedb": CrateDBDestination,
|
|
196
238
|
"databricks": DatabricksDestination,
|
|
197
239
|
"duckdb": DuckDBDestination,
|
|
240
|
+
"motherduck": MotherduckDestination,
|
|
241
|
+
"md": MotherduckDestination,
|
|
198
242
|
"mssql": MsSQLDestination,
|
|
199
243
|
"postgres": PostgresDestination,
|
|
200
244
|
"postgresql": PostgresDestination,
|
|
@@ -208,11 +252,15 @@ class SourceDestinationFactory:
|
|
|
208
252
|
"athena": AthenaDestination,
|
|
209
253
|
"clickhouse+native": ClickhouseDestination,
|
|
210
254
|
"clickhouse": ClickhouseDestination,
|
|
255
|
+
"elasticsearch": ElasticsearchDestination,
|
|
256
|
+
"mongodb": MongoDBDestination,
|
|
257
|
+
"mongodb+srv": MongoDBDestination,
|
|
211
258
|
"s3": S3Destination,
|
|
212
259
|
"gs": GCSDestination,
|
|
213
260
|
"sqlite": SqliteDestination,
|
|
214
261
|
"mysql": MySqlDestination,
|
|
215
262
|
"mysql+pymysql": MySqlDestination,
|
|
263
|
+
"trino": TrinoDestination,
|
|
216
264
|
}
|
|
217
265
|
|
|
218
266
|
def __init__(self, source_uri: str, destination_uri: str):
|
|
@@ -37,9 +37,14 @@ def readers(
|
|
|
37
37
|
file_glob (str, optional): The filter to apply to the files in glob format. by default lists all files in bucket_url non-recursively
|
|
38
38
|
"""
|
|
39
39
|
filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
|
|
41
|
+
# NOTE: incremental support is disabled until we can figure out
|
|
42
|
+
# how to support incremental loads per matching file, rather
|
|
43
|
+
# than a blanket threshold.
|
|
44
|
+
#
|
|
45
|
+
# filesystem_resource.apply_hints(
|
|
46
|
+
# incremental=dlt.sources.incremental("modification_date"),
|
|
47
|
+
# )
|
|
43
48
|
return (
|
|
44
49
|
filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
|
|
45
50
|
filesystem_resource | dlt.transformer(name="read_jsonl")(_read_jsonl),
|
ingestr/src/filters.py
CHANGED
|
@@ -51,3 +51,12 @@ def table_adapter_exclude_columns(cols: list[str]):
|
|
|
51
51
|
table._columns.remove(col) # type: ignore
|
|
52
52
|
|
|
53
53
|
return excluder
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def create_masking_filter(mask_configs: list[str]):
|
|
57
|
+
from ingestr.src.masking import create_masking_mapper
|
|
58
|
+
|
|
59
|
+
if not mask_configs:
|
|
60
|
+
return lambda x: x
|
|
61
|
+
|
|
62
|
+
return create_masking_mapper(mask_configs)
|