omniload 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omniload/conftest.py +72 -0
- omniload/main.py +810 -0
- omniload/src/.gitignore +10 -0
- omniload/src/adjust/__init__.py +108 -0
- omniload/src/adjust/adjust_helpers.py +122 -0
- omniload/src/airtable/__init__.py +84 -0
- omniload/src/allium/__init__.py +128 -0
- omniload/src/anthropic/__init__.py +277 -0
- omniload/src/anthropic/helpers.py +525 -0
- omniload/src/applovin/__init__.py +316 -0
- omniload/src/applovin_max/__init__.py +117 -0
- omniload/src/appsflyer/__init__.py +325 -0
- omniload/src/appsflyer/client.py +110 -0
- omniload/src/appstore/__init__.py +142 -0
- omniload/src/appstore/client.py +126 -0
- omniload/src/appstore/errors.py +15 -0
- omniload/src/appstore/models.py +117 -0
- omniload/src/appstore/resources.py +179 -0
- omniload/src/arrow/__init__.py +81 -0
- omniload/src/asana_source/__init__.py +281 -0
- omniload/src/asana_source/helpers.py +30 -0
- omniload/src/asana_source/settings.py +158 -0
- omniload/src/attio/__init__.py +102 -0
- omniload/src/attio/helpers.py +65 -0
- omniload/src/blob.py +95 -0
- omniload/src/bruin/__init__.py +76 -0
- omniload/src/chess/__init__.py +180 -0
- omniload/src/chess/helpers.py +35 -0
- omniload/src/chess/settings.py +18 -0
- omniload/src/clickup/__init__.py +85 -0
- omniload/src/clickup/helpers.py +47 -0
- omniload/src/collector/spinner.py +43 -0
- omniload/src/couchbase_source/__init__.py +118 -0
- omniload/src/couchbase_source/helpers.py +135 -0
- omniload/src/cursor/__init__.py +83 -0
- omniload/src/cursor/helpers.py +188 -0
- omniload/src/customer_io/__init__.py +486 -0
- omniload/src/customer_io/helpers.py +530 -0
- omniload/src/destinations.py +982 -0
- omniload/src/docebo/__init__.py +589 -0
- omniload/src/docebo/client.py +435 -0
- omniload/src/docebo/helpers.py +97 -0
- omniload/src/dune/__init__.py +104 -0
- omniload/src/dune/helpers.py +108 -0
- omniload/src/dynamodb/__init__.py +86 -0
- omniload/src/elasticsearch/__init__.py +80 -0
- omniload/src/elasticsearch/helpers.py +141 -0
- omniload/src/errors.py +26 -0
- omniload/src/facebook_ads/__init__.py +403 -0
- omniload/src/facebook_ads/exceptions.py +19 -0
- omniload/src/facebook_ads/helpers.py +296 -0
- omniload/src/facebook_ads/settings.py +224 -0
- omniload/src/facebook_ads/utils.py +53 -0
- omniload/src/factory.py +305 -0
- omniload/src/filesystem/__init__.py +133 -0
- omniload/src/filesystem/helpers.py +114 -0
- omniload/src/filesystem/readers.py +187 -0
- omniload/src/filters.py +62 -0
- omniload/src/fireflies/__init__.py +151 -0
- omniload/src/fireflies/helpers.py +753 -0
- omniload/src/fluxx/__init__.py +10013 -0
- omniload/src/fluxx/helpers.py +233 -0
- omniload/src/frankfurter/__init__.py +157 -0
- omniload/src/frankfurter/helpers.py +48 -0
- omniload/src/freshdesk/__init__.py +103 -0
- omniload/src/freshdesk/freshdesk_client.py +151 -0
- omniload/src/freshdesk/settings.py +23 -0
- omniload/src/fundraiseup/__init__.py +95 -0
- omniload/src/fundraiseup/client.py +81 -0
- omniload/src/github/__init__.py +202 -0
- omniload/src/github/helpers.py +207 -0
- omniload/src/github/queries.py +129 -0
- omniload/src/github/settings.py +24 -0
- omniload/src/google_ads/__init__.py +198 -0
- omniload/src/google_ads/field.py +17 -0
- omniload/src/google_ads/metrics.py +254 -0
- omniload/src/google_ads/predicates.py +37 -0
- omniload/src/google_ads/reports.py +411 -0
- omniload/src/google_ads/test_google_ads.py +184 -0
- omniload/src/google_analytics/__init__.py +144 -0
- omniload/src/google_analytics/helpers.py +312 -0
- omniload/src/google_sheets/README.md +95 -0
- omniload/src/google_sheets/__init__.py +166 -0
- omniload/src/google_sheets/helpers/__init__.py +15 -0
- omniload/src/google_sheets/helpers/api_calls.py +160 -0
- omniload/src/google_sheets/helpers/data_processing.py +316 -0
- omniload/src/gorgias/__init__.py +595 -0
- omniload/src/gorgias/helpers.py +166 -0
- omniload/src/hostaway/__init__.py +302 -0
- omniload/src/hostaway/client.py +288 -0
- omniload/src/http/__init__.py +38 -0
- omniload/src/http/readers.py +146 -0
- omniload/src/http_client.py +24 -0
- omniload/src/hubspot/__init__.py +800 -0
- omniload/src/hubspot/helpers.py +417 -0
- omniload/src/hubspot/settings.py +329 -0
- omniload/src/indeed/__init__.py +153 -0
- omniload/src/indeed/helpers.py +228 -0
- omniload/src/influxdb/__init__.py +46 -0
- omniload/src/influxdb/client.py +34 -0
- omniload/src/intercom/__init__.py +142 -0
- omniload/src/intercom/helpers.py +674 -0
- omniload/src/intercom/settings.py +279 -0
- omniload/src/isoc_pulse/__init__.py +159 -0
- omniload/src/jira_source/__init__.py +377 -0
- omniload/src/jira_source/helpers.py +510 -0
- omniload/src/jira_source/settings.py +184 -0
- omniload/src/kafka/__init__.py +120 -0
- omniload/src/kafka/helpers.py +241 -0
- omniload/src/kinesis/__init__.py +153 -0
- omniload/src/kinesis/helpers.py +96 -0
- omniload/src/klaviyo/__init__.py +237 -0
- omniload/src/klaviyo/client.py +212 -0
- omniload/src/klaviyo/helpers.py +19 -0
- omniload/src/linear/__init__.py +634 -0
- omniload/src/linear/helpers.py +111 -0
- omniload/src/linkedin_ads/__init__.py +266 -0
- omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
- omniload/src/linkedin_ads/helpers.py +246 -0
- omniload/src/loader.py +69 -0
- omniload/src/mailchimp/__init__.py +126 -0
- omniload/src/mailchimp/helpers.py +226 -0
- omniload/src/mailchimp/settings.py +164 -0
- omniload/src/masking.py +344 -0
- omniload/src/mixpanel/__init__.py +62 -0
- omniload/src/mixpanel/client.py +104 -0
- omniload/src/monday/__init__.py +246 -0
- omniload/src/monday/helpers.py +392 -0
- omniload/src/monday/settings.py +325 -0
- omniload/src/mongodb/__init__.py +281 -0
- omniload/src/mongodb/helpers.py +975 -0
- omniload/src/notion/__init__.py +69 -0
- omniload/src/notion/helpers/__init__.py +14 -0
- omniload/src/notion/helpers/client.py +178 -0
- omniload/src/notion/helpers/database.py +92 -0
- omniload/src/notion/settings.py +17 -0
- omniload/src/partition.py +32 -0
- omniload/src/personio/__init__.py +345 -0
- omniload/src/personio/helpers.py +100 -0
- omniload/src/phantombuster/__init__.py +65 -0
- omniload/src/phantombuster/client.py +87 -0
- omniload/src/pinterest/__init__.py +82 -0
- omniload/src/pipedrive/__init__.py +212 -0
- omniload/src/pipedrive/helpers/__init__.py +37 -0
- omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
- omniload/src/pipedrive/helpers/pages.py +129 -0
- omniload/src/pipedrive/settings.py +41 -0
- omniload/src/pipedrive/typing.py +17 -0
- omniload/src/plusvibeai/__init__.py +335 -0
- omniload/src/plusvibeai/helpers.py +544 -0
- omniload/src/plusvibeai/settings.py +252 -0
- omniload/src/primer/__init__.py +45 -0
- omniload/src/primer/helpers.py +79 -0
- omniload/src/quickbooks/__init__.py +117 -0
- omniload/src/reddit_ads/__init__.py +183 -0
- omniload/src/reddit_ads/helpers.py +232 -0
- omniload/src/resource.py +40 -0
- omniload/src/revenuecat/__init__.py +83 -0
- omniload/src/revenuecat/helpers.py +237 -0
- omniload/src/salesforce/__init__.py +170 -0
- omniload/src/salesforce/helpers.py +78 -0
- omniload/src/shopify/__init__.py +1953 -0
- omniload/src/shopify/exceptions.py +17 -0
- omniload/src/shopify/helpers.py +202 -0
- omniload/src/shopify/settings.py +19 -0
- omniload/src/slack/__init__.py +290 -0
- omniload/src/slack/helpers.py +218 -0
- omniload/src/slack/settings.py +36 -0
- omniload/src/smartsheets/__init__.py +82 -0
- omniload/src/snapchat_ads/__init__.py +455 -0
- omniload/src/snapchat_ads/client.py +72 -0
- omniload/src/snapchat_ads/helpers.py +630 -0
- omniload/src/snapchat_ads/settings.py +130 -0
- omniload/src/socrata_source/__init__.py +83 -0
- omniload/src/socrata_source/helpers.py +85 -0
- omniload/src/socrata_source/settings.py +8 -0
- omniload/src/solidgate/__init__.py +219 -0
- omniload/src/solidgate/helpers.py +154 -0
- omniload/src/sources.py +5408 -0
- omniload/src/sql_database/__init__.py +0 -0
- omniload/src/sql_database/callbacks.py +66 -0
- omniload/src/stripe_analytics/__init__.py +183 -0
- omniload/src/stripe_analytics/helpers.py +386 -0
- omniload/src/stripe_analytics/settings.py +80 -0
- omniload/src/table_definition.py +15 -0
- omniload/src/testdata/fakebqcredentials.json +14 -0
- omniload/src/tiktok_ads/__init__.py +150 -0
- omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
- omniload/src/time.py +11 -0
- omniload/src/trustpilot/__init__.py +48 -0
- omniload/src/trustpilot/client.py +48 -0
- omniload/src/version.py +6 -0
- omniload/src/wise/__init__.py +68 -0
- omniload/src/wise/client.py +63 -0
- omniload/src/zendesk/__init__.py +480 -0
- omniload/src/zendesk/helpers/__init__.py +39 -0
- omniload/src/zendesk/helpers/api_helpers.py +119 -0
- omniload/src/zendesk/helpers/credentials.py +68 -0
- omniload/src/zendesk/helpers/talk_api.py +132 -0
- omniload/src/zendesk/settings.py +71 -0
- omniload/src/zoom/__init__.py +99 -0
- omniload/src/zoom/helpers.py +102 -0
- omniload/testdata/.gitignore +2 -0
- omniload/testdata/create_replace.csv +21 -0
- omniload/testdata/delete_insert_expected.csv +6 -0
- omniload/testdata/delete_insert_part1.csv +5 -0
- omniload/testdata/delete_insert_part2.csv +6 -0
- omniload/testdata/merge_expected.csv +5 -0
- omniload/testdata/merge_part1.csv +4 -0
- omniload/testdata/merge_part2.csv +5 -0
- omniload/tests/unit/test_smartsheets.py +133 -0
- omniload-0.0.0.dev0.dist-info/METADATA +439 -0
- omniload-0.0.0.dev0.dist-info/RECORD +218 -0
- omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
- omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
- omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# Copyright 2022-2025 ScaleVector
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING, Any, Iterator, List, Optional
|
|
16
|
+
|
|
17
|
+
from dlt.common import json
|
|
18
|
+
from dlt.common.typing import copy_sig
|
|
19
|
+
from dlt.sources import DltResource, DltSource, TDataItems
|
|
20
|
+
from dlt.sources.filesystem import FileItemDict
|
|
21
|
+
|
|
22
|
+
from .helpers import fetch_arrow, fetch_json
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _read_csv(
|
|
26
|
+
items: Iterator[FileItemDict], chunksize: int = 10000, **pandas_kwargs: Any
|
|
27
|
+
) -> Iterator[TDataItems]:
|
|
28
|
+
"""Reads csv file with Pandas chunk by chunk.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
chunksize (int): Number of records to read in one chunk
|
|
32
|
+
**pandas_kwargs: Additional keyword arguments passed to Pandas.read_csv
|
|
33
|
+
Returns:
|
|
34
|
+
TDataItem: The file content
|
|
35
|
+
"""
|
|
36
|
+
import pandas as pd
|
|
37
|
+
|
|
38
|
+
# apply defaults to pandas kwargs
|
|
39
|
+
kwargs = {**{"header": "infer", "chunksize": chunksize}, **pandas_kwargs}
|
|
40
|
+
|
|
41
|
+
for file_obj in items:
|
|
42
|
+
# Here we use pandas chunksize to read the file in chunks and avoid loading the whole file
|
|
43
|
+
# in memory.
|
|
44
|
+
with file_obj.open() as file:
|
|
45
|
+
for df in pd.read_csv(file, **kwargs):
|
|
46
|
+
yield df.to_dict(orient="records")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _read_csv_headless(
|
|
50
|
+
items: Iterator[FileItemDict],
|
|
51
|
+
chunksize: int = 10000,
|
|
52
|
+
column_names: Optional[List[str]] = None,
|
|
53
|
+
**pandas_kwargs: Any,
|
|
54
|
+
) -> Iterator[TDataItems]:
|
|
55
|
+
"""Reads csv file without headers, using provided column names or generating them.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
chunksize (int): Number of records to read in one chunk
|
|
59
|
+
column_names (list[str], optional): Column names for the CSV. If not provided,
|
|
60
|
+
columns will be named unknown_col_0, unknown_col_1, etc.
|
|
61
|
+
**pandas_kwargs: Additional keyword arguments passed to Pandas.read_csv
|
|
62
|
+
Returns:
|
|
63
|
+
TDataItem: The file content
|
|
64
|
+
"""
|
|
65
|
+
import pandas as pd
|
|
66
|
+
|
|
67
|
+
for file_obj in items:
|
|
68
|
+
with file_obj.open() as file:
|
|
69
|
+
# Determine column names
|
|
70
|
+
if column_names:
|
|
71
|
+
names = column_names
|
|
72
|
+
else:
|
|
73
|
+
# Count columns from first row
|
|
74
|
+
first_row = pd.read_csv(file, header=None, nrows=1)
|
|
75
|
+
num_columns = len(first_row.columns)
|
|
76
|
+
names = [f"unknown_col_{i}" for i in range(num_columns)]
|
|
77
|
+
file.seek(0) # Reset file pointer after reading first row
|
|
78
|
+
|
|
79
|
+
kwargs = {
|
|
80
|
+
**{"header": None, "names": names, "chunksize": chunksize},
|
|
81
|
+
**pandas_kwargs,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
for df in pd.read_csv(file, **kwargs):
|
|
85
|
+
yield df.to_dict(orient="records")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _read_jsonl(
|
|
89
|
+
items: Iterator[FileItemDict], chunksize: int = 1000
|
|
90
|
+
) -> Iterator[TDataItems]:
|
|
91
|
+
"""Reads jsonl file content and extract the data.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
chunksize (int, optional): The number of JSON lines to load and yield at once, defaults to 1000
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
TDataItem: The file content
|
|
98
|
+
"""
|
|
99
|
+
for file_obj in items:
|
|
100
|
+
with file_obj.open() as f:
|
|
101
|
+
lines_chunk = []
|
|
102
|
+
for line in f:
|
|
103
|
+
lines_chunk.append(json.loadb(line))
|
|
104
|
+
if len(lines_chunk) >= chunksize:
|
|
105
|
+
yield lines_chunk
|
|
106
|
+
lines_chunk = []
|
|
107
|
+
if lines_chunk:
|
|
108
|
+
yield lines_chunk
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _read_parquet(
|
|
112
|
+
items: Iterator[FileItemDict],
|
|
113
|
+
chunksize: int = 10,
|
|
114
|
+
) -> Iterator[TDataItems]:
|
|
115
|
+
"""Reads parquet file content and extract the data.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
chunksize (int, optional): The number of files to process at once, defaults to 10.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
TDataItem: The file content
|
|
122
|
+
"""
|
|
123
|
+
from pyarrow import parquet as pq
|
|
124
|
+
|
|
125
|
+
for file_obj in items:
|
|
126
|
+
with file_obj.open() as f:
|
|
127
|
+
parquet_file = pq.ParquetFile(f)
|
|
128
|
+
for rows in parquet_file.iter_batches(batch_size=chunksize):
|
|
129
|
+
yield rows.to_pylist()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _read_csv_duckdb(
|
|
133
|
+
items: Iterator[FileItemDict],
|
|
134
|
+
chunk_size: Optional[int] = 5000,
|
|
135
|
+
use_pyarrow: bool = False,
|
|
136
|
+
**duckdb_kwargs: Any,
|
|
137
|
+
) -> Iterator[TDataItems]:
|
|
138
|
+
"""A resource to extract data from the given CSV files.
|
|
139
|
+
|
|
140
|
+
Uses DuckDB engine to import and cast CSV data.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
items (Iterator[FileItemDict]): CSV files to read.
|
|
144
|
+
chunk_size (Optional[int]):
|
|
145
|
+
The number of rows to read at once. Defaults to 5000.
|
|
146
|
+
use_pyarrow (bool):
|
|
147
|
+
Whether to use `pyarrow` to read the data and designate
|
|
148
|
+
data schema. If set to False (by default), JSON is used.
|
|
149
|
+
duckdb_kwargs (Dict):
|
|
150
|
+
Additional keyword arguments to pass to the `read_csv()`.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Iterable[TDataItem]: Data items, read from the given CSV files.
|
|
154
|
+
"""
|
|
155
|
+
import duckdb
|
|
156
|
+
|
|
157
|
+
helper = fetch_arrow if use_pyarrow else fetch_json
|
|
158
|
+
|
|
159
|
+
for item in items:
|
|
160
|
+
with item.open() as f:
|
|
161
|
+
file_data = duckdb.from_csv_auto(f, **duckdb_kwargs) # type: ignore
|
|
162
|
+
|
|
163
|
+
yield from helper(file_data, chunk_size)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
if TYPE_CHECKING:
|
|
167
|
+
|
|
168
|
+
class ReadersSource(DltSource):
|
|
169
|
+
"""This is a typing stub that provides docstrings and signatures to the resources in `readers" source"""
|
|
170
|
+
|
|
171
|
+
@copy_sig(_read_csv)
|
|
172
|
+
def read_csv(self) -> DltResource: ...
|
|
173
|
+
|
|
174
|
+
@copy_sig(_read_csv_headless)
|
|
175
|
+
def read_csv_headless(self) -> DltResource: ...
|
|
176
|
+
|
|
177
|
+
@copy_sig(_read_jsonl)
|
|
178
|
+
def read_jsonl(self) -> DltResource: ...
|
|
179
|
+
|
|
180
|
+
@copy_sig(_read_parquet)
|
|
181
|
+
def read_parquet(self) -> DltResource: ...
|
|
182
|
+
|
|
183
|
+
@copy_sig(_read_csv_duckdb)
|
|
184
|
+
def read_csv_duckdb(self) -> DltResource: ...
|
|
185
|
+
|
|
186
|
+
else:
|
|
187
|
+
ReadersSource = DltSource
|
omniload/src/filters.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
def cast_set_to_list(row):
|
|
2
|
+
# this handles just the sqlalchemy backend for now
|
|
3
|
+
if isinstance(row, dict):
|
|
4
|
+
for key in row.keys():
|
|
5
|
+
if isinstance(row[key], set):
|
|
6
|
+
row[key] = list(row[key])
|
|
7
|
+
return row
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def cast_spanner_types(row):
|
|
11
|
+
if not isinstance(row, dict):
|
|
12
|
+
return row
|
|
13
|
+
|
|
14
|
+
from google.cloud.spanner_v1.data_types import JsonObject
|
|
15
|
+
|
|
16
|
+
for key in row.keys():
|
|
17
|
+
if isinstance(row[key], JsonObject):
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
row[key] = json.loads(row[key].serialize())
|
|
21
|
+
return row
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def handle_mysql_empty_dates(row):
|
|
25
|
+
# MySQL returns empty dates as 0000-00-00, which is not a valid date, we handle them here.
|
|
26
|
+
if not isinstance(row, dict):
|
|
27
|
+
return row
|
|
28
|
+
|
|
29
|
+
for key in row.keys():
|
|
30
|
+
if not isinstance(row[key], str):
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
if row[key] == "0000-00-00":
|
|
34
|
+
from datetime import date
|
|
35
|
+
|
|
36
|
+
row[key] = date(1970, 1, 1)
|
|
37
|
+
|
|
38
|
+
elif row[key] == "0000-00-00 00:00:00":
|
|
39
|
+
from datetime import datetime
|
|
40
|
+
|
|
41
|
+
row[key] = datetime(1970, 1, 1, 0, 0, 0)
|
|
42
|
+
return row
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def table_adapter_exclude_columns(cols: list[str]):
|
|
46
|
+
from dlt.common.libs.sql_alchemy import Table
|
|
47
|
+
|
|
48
|
+
def excluder(table: Table):
|
|
49
|
+
cols_to_remove = [col for col in table._columns if col.name in cols] # type: ignore
|
|
50
|
+
for col in cols_to_remove:
|
|
51
|
+
table._columns.remove(col) # type: ignore
|
|
52
|
+
|
|
53
|
+
return excluder
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def create_masking_filter(mask_configs: list[str]):
|
|
57
|
+
from omniload.src.masking import create_masking_mapper
|
|
58
|
+
|
|
59
|
+
if not mask_configs:
|
|
60
|
+
return lambda x: x
|
|
61
|
+
|
|
62
|
+
return create_masking_mapper(mask_configs)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from typing import Iterable, List, Optional
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
6
|
+
from dlt.common.typing import TDataItem
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
|
|
9
|
+
from .helpers import FirefliesAPI
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dlt.source(name="fireflies", max_table_nesting=0)
|
|
13
|
+
def fireflies_source(
|
|
14
|
+
api_key: str,
|
|
15
|
+
start_datetime: Optional[pendulum.DateTime],
|
|
16
|
+
end_datetime: Optional[pendulum.DateTime],
|
|
17
|
+
granularity: Optional[str] = None,
|
|
18
|
+
) -> List[DltResource]:
|
|
19
|
+
fireflies_api = FirefliesAPI(api_key=api_key)
|
|
20
|
+
|
|
21
|
+
start_datetime = (
|
|
22
|
+
ensure_pendulum_datetime(start_datetime) if start_datetime else None
|
|
23
|
+
)
|
|
24
|
+
end_datetime = ensure_pendulum_datetime(end_datetime) if end_datetime else None
|
|
25
|
+
|
|
26
|
+
# Select fetch method based on granularity
|
|
27
|
+
def get_analytics_fetcher():
|
|
28
|
+
if granularity == "DAY":
|
|
29
|
+
return fireflies_api.fetch_analytics_daily
|
|
30
|
+
elif granularity == "HOUR":
|
|
31
|
+
return fireflies_api.fetch_analytics_hourly
|
|
32
|
+
elif granularity == "MONTH":
|
|
33
|
+
return fireflies_api.fetch_analytics_monthly
|
|
34
|
+
else:
|
|
35
|
+
return fireflies_api.fetch_analytics
|
|
36
|
+
|
|
37
|
+
@dlt.resource(write_disposition="replace")
|
|
38
|
+
def active_meetings() -> Iterable[TDataItem]:
|
|
39
|
+
for page in fireflies_api.fetch_active_meetings():
|
|
40
|
+
for item in page:
|
|
41
|
+
yield item
|
|
42
|
+
|
|
43
|
+
@dlt.resource(
|
|
44
|
+
write_disposition="merge",
|
|
45
|
+
primary_key=["start_time", "end_time"],
|
|
46
|
+
)
|
|
47
|
+
def analytics(
|
|
48
|
+
updated_at: dlt.sources.incremental[
|
|
49
|
+
pendulum.DateTime
|
|
50
|
+
] = dlt.sources.incremental(
|
|
51
|
+
"end_time",
|
|
52
|
+
initial_value=start_datetime
|
|
53
|
+
if start_datetime
|
|
54
|
+
else pendulum.datetime(1970, 1, 1, tz="UTC"),
|
|
55
|
+
end_value=end_datetime if end_datetime else None,
|
|
56
|
+
range_end="closed" if end_datetime else "open",
|
|
57
|
+
range_start="closed",
|
|
58
|
+
),
|
|
59
|
+
) -> Iterable[TDataItem]:
|
|
60
|
+
from_date_dt = updated_at.last_value
|
|
61
|
+
to_date_dt = (
|
|
62
|
+
updated_at.end_value if updated_at.end_value else pendulum.now(tz="UTC")
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
from_date_iso = from_date_dt.to_iso8601_string() if from_date_dt else None
|
|
66
|
+
to_date_iso = to_date_dt.to_iso8601_string() if to_date_dt else None
|
|
67
|
+
|
|
68
|
+
fetch_method = get_analytics_fetcher()
|
|
69
|
+
for page in fetch_method(
|
|
70
|
+
from_date=from_date_iso,
|
|
71
|
+
to_date=to_date_iso,
|
|
72
|
+
):
|
|
73
|
+
for item in page:
|
|
74
|
+
if "end_time" in item and isinstance(item["end_time"], str):
|
|
75
|
+
item["end_time"] = pendulum.parse(item["end_time"])
|
|
76
|
+
yield item
|
|
77
|
+
|
|
78
|
+
@dlt.resource(write_disposition="replace")
|
|
79
|
+
def channels() -> Iterable[TDataItem]:
|
|
80
|
+
for page in fireflies_api.fetch_channels():
|
|
81
|
+
for item in page:
|
|
82
|
+
yield item
|
|
83
|
+
|
|
84
|
+
@dlt.resource(write_disposition="replace")
|
|
85
|
+
def users() -> Iterable[TDataItem]:
|
|
86
|
+
for page in fireflies_api.fetch_users():
|
|
87
|
+
for item in page:
|
|
88
|
+
yield item
|
|
89
|
+
|
|
90
|
+
@dlt.resource(write_disposition="replace")
|
|
91
|
+
def user_groups() -> Iterable[TDataItem]:
|
|
92
|
+
for page in fireflies_api.fetch_user_groups():
|
|
93
|
+
for item in page:
|
|
94
|
+
yield item
|
|
95
|
+
|
|
96
|
+
@dlt.resource(
|
|
97
|
+
write_disposition="merge",
|
|
98
|
+
primary_key="id",
|
|
99
|
+
)
|
|
100
|
+
def transcripts(
|
|
101
|
+
updated_at: dlt.sources.incremental[
|
|
102
|
+
pendulum.DateTime
|
|
103
|
+
] = dlt.sources.incremental(
|
|
104
|
+
"date",
|
|
105
|
+
initial_value=start_datetime
|
|
106
|
+
if start_datetime
|
|
107
|
+
else pendulum.datetime(1970, 1, 1, tz="UTC"),
|
|
108
|
+
end_value=end_datetime if end_datetime else None,
|
|
109
|
+
range_end="closed" if end_datetime else "open",
|
|
110
|
+
range_start="closed",
|
|
111
|
+
),
|
|
112
|
+
) -> Iterable[TDataItem]:
|
|
113
|
+
from_date_dt = updated_at.last_value
|
|
114
|
+
to_date_dt = (
|
|
115
|
+
updated_at.end_value if updated_at.end_value else pendulum.now(tz="UTC")
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
from_date_iso = from_date_dt.to_iso8601_string() if from_date_dt else None
|
|
119
|
+
to_date_iso = to_date_dt.to_iso8601_string() if to_date_dt else None
|
|
120
|
+
|
|
121
|
+
for page in fireflies_api.fetch_transcripts(
|
|
122
|
+
from_date=from_date_iso,
|
|
123
|
+
to_date=to_date_iso,
|
|
124
|
+
):
|
|
125
|
+
for item in page:
|
|
126
|
+
if "date" in item and isinstance(item["date"], (int, float)):
|
|
127
|
+
item["date"] = pendulum.from_timestamp(item["date"] / 1000)
|
|
128
|
+
yield item
|
|
129
|
+
|
|
130
|
+
@dlt.resource(write_disposition="replace")
|
|
131
|
+
def bites() -> Iterable[TDataItem]:
|
|
132
|
+
for page in fireflies_api.fetch_bites():
|
|
133
|
+
for item in page:
|
|
134
|
+
yield item
|
|
135
|
+
|
|
136
|
+
@dlt.resource(write_disposition="replace")
|
|
137
|
+
def contacts() -> Iterable[TDataItem]:
|
|
138
|
+
for page in fireflies_api.fetch_contacts():
|
|
139
|
+
for item in page:
|
|
140
|
+
yield item
|
|
141
|
+
|
|
142
|
+
return [
|
|
143
|
+
active_meetings,
|
|
144
|
+
analytics,
|
|
145
|
+
channels,
|
|
146
|
+
users,
|
|
147
|
+
transcripts,
|
|
148
|
+
user_groups,
|
|
149
|
+
bites,
|
|
150
|
+
contacts,
|
|
151
|
+
]
|