ingestr 0.14.5__py3-none-any.whl → 0.14.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/factory.py +5 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/sources.py +83 -0
- {ingestr-0.14.5.dist-info → ingestr-0.14.6.dist-info}/METADATA +1 -1
- {ingestr-0.14.5.dist-info → ingestr-0.14.6.dist-info}/RECORD +13 -8
- {ingestr-0.14.5.dist-info → ingestr-0.14.6.dist-info}/WHEEL +0 -0
- {ingestr-0.14.5.dist-info → ingestr-0.14.6.dist-info}/entry_points.txt +0 -0
- {ingestr-0.14.5.dist-info → ingestr-0.14.6.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.14.
|
|
1
|
+
version = "v0.14.6"
|
ingestr/src/factory.py
CHANGED
|
@@ -52,6 +52,7 @@ from ingestr.src.sources import (
|
|
|
52
52
|
GoogleAnalyticsSource,
|
|
53
53
|
GoogleSheetsSource,
|
|
54
54
|
GorgiasSource,
|
|
55
|
+
HttpSource,
|
|
55
56
|
HubspotSource,
|
|
56
57
|
InfluxDBSource,
|
|
57
58
|
IntercomSource,
|
|
@@ -64,6 +65,7 @@ from ingestr.src.sources import (
|
|
|
64
65
|
LinkedInAdsSource,
|
|
65
66
|
LocalCsvSource,
|
|
66
67
|
MixpanelSource,
|
|
68
|
+
MondaySource,
|
|
67
69
|
MongoDbSource,
|
|
68
70
|
NotionSource,
|
|
69
71
|
PersonioSource,
|
|
@@ -156,6 +158,8 @@ class SourceDestinationFactory:
|
|
|
156
158
|
"anthropic": AnthropicSource,
|
|
157
159
|
"csv": LocalCsvSource,
|
|
158
160
|
"docebo": DoceboSource,
|
|
161
|
+
"http": HttpSource,
|
|
162
|
+
"https": HttpSource,
|
|
159
163
|
"mongodb": MongoDbSource,
|
|
160
164
|
"mongodb+srv": MongoDbSource,
|
|
161
165
|
"notion": NotionSource,
|
|
@@ -214,6 +218,7 @@ class SourceDestinationFactory:
|
|
|
214
218
|
"influxdb": InfluxDBSource,
|
|
215
219
|
"wise": WiseSource,
|
|
216
220
|
"plusvibeai": PlusVibeAISource,
|
|
221
|
+
"monday": MondaySource,
|
|
217
222
|
}
|
|
218
223
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
219
224
|
"bigquery": BigQueryDestination,
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""HTTP source for reading CSV, JSON, and Parquet files from public URLs"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.sources import DltResource
|
|
7
|
+
|
|
8
|
+
from .readers import HttpReader
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dlt.source
|
|
12
|
+
def http_source(
|
|
13
|
+
url: str,
|
|
14
|
+
file_format: Optional[str] = None,
|
|
15
|
+
**kwargs: Any,
|
|
16
|
+
) -> DltResource:
|
|
17
|
+
"""Source for reading files from HTTP URLs.
|
|
18
|
+
|
|
19
|
+
Supports CSV, JSON, and Parquet file formats.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
url (str): The HTTP(S) URL to the file
|
|
23
|
+
file_format (str, optional): File format ('csv', 'json', 'parquet').
|
|
24
|
+
If not provided, will be inferred from URL extension.
|
|
25
|
+
**kwargs: Additional arguments passed to the reader functions
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
DltResource: A dlt resource that yields the file data
|
|
29
|
+
"""
|
|
30
|
+
reader = HttpReader(url, file_format)
|
|
31
|
+
|
|
32
|
+
return dlt.resource(
|
|
33
|
+
reader.read_file(**kwargs),
|
|
34
|
+
name="http_data",
|
|
35
|
+
)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Readers for HTTP file sources"""
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
from typing import Any, Iterator, Optional
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
from dlt.sources import TDataItems
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class HttpReader:
|
|
12
|
+
"""Reader for HTTP-based file sources"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, url: str, file_format: Optional[str] = None):
|
|
15
|
+
self.url = url
|
|
16
|
+
self.file_format = file_format or self._infer_format(url)
|
|
17
|
+
|
|
18
|
+
if self.file_format not in ["csv", "json", "parquet"]:
|
|
19
|
+
raise ValueError(
|
|
20
|
+
f"Unsupported file format: {self.file_format}. "
|
|
21
|
+
"Supported formats: csv, json, parquet"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def _infer_format(self, url: str) -> str:
|
|
25
|
+
"""Infer file format from URL extension"""
|
|
26
|
+
parsed = urlparse(url)
|
|
27
|
+
path = parsed.path.lower()
|
|
28
|
+
|
|
29
|
+
if path.endswith(".csv"):
|
|
30
|
+
return "csv"
|
|
31
|
+
elif path.endswith(".json") or path.endswith(".jsonl"):
|
|
32
|
+
return "json"
|
|
33
|
+
elif path.endswith(".parquet"):
|
|
34
|
+
return "parquet"
|
|
35
|
+
else:
|
|
36
|
+
raise ValueError(
|
|
37
|
+
f"Cannot infer file format from URL: {url}. "
|
|
38
|
+
"Please specify file_format parameter."
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def _download_file(self) -> bytes:
|
|
42
|
+
"""Download file from URL"""
|
|
43
|
+
response = requests.get(self.url, stream=True, timeout=30)
|
|
44
|
+
response.raise_for_status()
|
|
45
|
+
return response.content
|
|
46
|
+
|
|
47
|
+
def read_file(self, **kwargs: Any) -> Iterator[TDataItems]:
|
|
48
|
+
"""Read file and yield data in chunks"""
|
|
49
|
+
content = self._download_file()
|
|
50
|
+
|
|
51
|
+
if self.file_format == "csv":
|
|
52
|
+
yield from self._read_csv(content, **kwargs)
|
|
53
|
+
elif self.file_format == "json":
|
|
54
|
+
yield from self._read_json(content, **kwargs)
|
|
55
|
+
elif self.file_format == "parquet":
|
|
56
|
+
yield from self._read_parquet(content, **kwargs)
|
|
57
|
+
|
|
58
|
+
def _read_csv(
|
|
59
|
+
self, content: bytes, chunksize: int = 10000, **pandas_kwargs: Any
|
|
60
|
+
) -> Iterator[TDataItems]:
|
|
61
|
+
"""Read CSV file with Pandas chunk by chunk"""
|
|
62
|
+
import pandas as pd # type: ignore
|
|
63
|
+
|
|
64
|
+
kwargs = {**{"header": "infer", "chunksize": chunksize}, **pandas_kwargs}
|
|
65
|
+
|
|
66
|
+
file_obj = io.BytesIO(content)
|
|
67
|
+
for df in pd.read_csv(file_obj, **kwargs):
|
|
68
|
+
yield df.to_dict(orient="records")
|
|
69
|
+
|
|
70
|
+
def _read_json(
|
|
71
|
+
self, content: bytes, chunksize: int = 1000, **kwargs: Any
|
|
72
|
+
) -> Iterator[TDataItems]:
|
|
73
|
+
"""Read JSON or JSONL file"""
|
|
74
|
+
from dlt.common import json
|
|
75
|
+
|
|
76
|
+
file_obj = io.BytesIO(content)
|
|
77
|
+
text = file_obj.read().decode("utf-8")
|
|
78
|
+
|
|
79
|
+
# Try to detect if it's JSONL format (one JSON object per line)
|
|
80
|
+
lines = text.strip().split("\n")
|
|
81
|
+
|
|
82
|
+
if len(lines) > 1:
|
|
83
|
+
# Likely JSONL format
|
|
84
|
+
lines_chunk = []
|
|
85
|
+
for line in lines:
|
|
86
|
+
if line.strip():
|
|
87
|
+
lines_chunk.append(json.loads(line))
|
|
88
|
+
if len(lines_chunk) >= chunksize:
|
|
89
|
+
yield lines_chunk
|
|
90
|
+
lines_chunk = []
|
|
91
|
+
if lines_chunk:
|
|
92
|
+
yield lines_chunk
|
|
93
|
+
else:
|
|
94
|
+
# Single JSON object or array
|
|
95
|
+
data = json.loads(text)
|
|
96
|
+
if isinstance(data, list):
|
|
97
|
+
# Chunk the list
|
|
98
|
+
for i in range(0, len(data), chunksize):
|
|
99
|
+
yield data[i : i + chunksize]
|
|
100
|
+
else:
|
|
101
|
+
# Single object
|
|
102
|
+
yield [data]
|
|
103
|
+
|
|
104
|
+
def _read_parquet(
|
|
105
|
+
self, content: bytes, chunksize: int = 10000, **kwargs: Any
|
|
106
|
+
) -> Iterator[TDataItems]:
|
|
107
|
+
"""Read Parquet file"""
|
|
108
|
+
from pyarrow import parquet as pq # type: ignore
|
|
109
|
+
|
|
110
|
+
file_obj = io.BytesIO(content)
|
|
111
|
+
parquet_file = pq.ParquetFile(file_obj)
|
|
112
|
+
|
|
113
|
+
for batch in parquet_file.iter_batches(batch_size=chunksize):
|
|
114
|
+
yield batch.to_pylist()
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Monday.com source for data extraction via GraphQL API.
|
|
3
|
+
|
|
4
|
+
This source provides access to Monday.com app installation data.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Iterable, Iterator, Optional
|
|
8
|
+
|
|
9
|
+
import dlt
|
|
10
|
+
from dlt.sources import DltResource
|
|
11
|
+
|
|
12
|
+
from .helpers import MondayClient, normalize_dict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dlt.source(max_table_nesting=0, name="monday_source")
|
|
16
|
+
def monday_source(
|
|
17
|
+
api_token: str,
|
|
18
|
+
params: list[str],
|
|
19
|
+
start_date: Optional[str] = None,
|
|
20
|
+
end_date: Optional[str] = None,
|
|
21
|
+
) -> Iterable[DltResource]:
|
|
22
|
+
"""
|
|
23
|
+
Monday.com data source.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
api_token: Monday.com API token for authentication
|
|
27
|
+
params: Table-specific parameters in format [table_type, ...params]
|
|
28
|
+
start_date: Optional start date for date-filtered queries (YYYY-MM-DD)
|
|
29
|
+
end_date: Optional end date for date-filtered queries (YYYY-MM-DD)
|
|
30
|
+
|
|
31
|
+
Yields:
|
|
32
|
+
DltResource: Data resource for the requested table
|
|
33
|
+
"""
|
|
34
|
+
monday_client = MondayClient(api_token)
|
|
35
|
+
|
|
36
|
+
@dlt.resource(
|
|
37
|
+
name="account",
|
|
38
|
+
write_disposition="replace",
|
|
39
|
+
)
|
|
40
|
+
def fetch_account() -> Iterator[dict[str, Any]]:
|
|
41
|
+
"""
|
|
42
|
+
Fetch account information from Monday.com.
|
|
43
|
+
|
|
44
|
+
Table format: account (no parameters needed)
|
|
45
|
+
"""
|
|
46
|
+
if len(params) != 0:
|
|
47
|
+
raise ValueError("Account table must be in the format `account`")
|
|
48
|
+
|
|
49
|
+
yield normalize_dict(monday_client.get_account())
|
|
50
|
+
|
|
51
|
+
@dlt.resource(
|
|
52
|
+
name="account_roles",
|
|
53
|
+
write_disposition="replace",
|
|
54
|
+
)
|
|
55
|
+
def fetch_account_roles() -> Iterator[dict[str, Any]]:
|
|
56
|
+
"""
|
|
57
|
+
Fetch account roles from Monday.com.
|
|
58
|
+
|
|
59
|
+
Table format: account_roles (no parameters needed)
|
|
60
|
+
"""
|
|
61
|
+
if len(params) != 0:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
"Account roles table must be in the format `account_roles`"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
yield from monday_client.get_account_roles()
|
|
67
|
+
|
|
68
|
+
@dlt.resource(
|
|
69
|
+
name="users",
|
|
70
|
+
write_disposition="replace",
|
|
71
|
+
)
|
|
72
|
+
def fetch_users() -> Iterator[dict[str, Any]]:
|
|
73
|
+
"""
|
|
74
|
+
Fetch users from Monday.com.
|
|
75
|
+
|
|
76
|
+
Table format: users (no parameters needed)
|
|
77
|
+
"""
|
|
78
|
+
if len(params) != 0:
|
|
79
|
+
raise ValueError("Users table must be in the format `users`")
|
|
80
|
+
|
|
81
|
+
yield from monday_client.get_users()
|
|
82
|
+
|
|
83
|
+
@dlt.resource(
|
|
84
|
+
name="boards",
|
|
85
|
+
write_disposition="merge",
|
|
86
|
+
primary_key="id",
|
|
87
|
+
)
|
|
88
|
+
def fetch_boards(
|
|
89
|
+
updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
90
|
+
"updated_at", initial_value=start_date
|
|
91
|
+
),
|
|
92
|
+
) -> Iterator[dict[str, Any]]:
|
|
93
|
+
"""
|
|
94
|
+
Fetch boards from Monday.com.
|
|
95
|
+
|
|
96
|
+
Table format: boards (no parameters needed)
|
|
97
|
+
"""
|
|
98
|
+
if len(params) != 0:
|
|
99
|
+
raise ValueError("Boards table must be in the format `boards`")
|
|
100
|
+
|
|
101
|
+
yield from monday_client.get_boards()
|
|
102
|
+
|
|
103
|
+
@dlt.resource(
|
|
104
|
+
name="workspaces",
|
|
105
|
+
write_disposition="replace",
|
|
106
|
+
)
|
|
107
|
+
def fetch_workspaces() -> Iterator[dict[str, Any]]:
|
|
108
|
+
"""
|
|
109
|
+
Fetch workspaces from Monday.com.
|
|
110
|
+
|
|
111
|
+
Table format: workspaces (no parameters needed)
|
|
112
|
+
"""
|
|
113
|
+
if len(params) != 0:
|
|
114
|
+
raise ValueError("Workspaces table must be in the format `workspaces`")
|
|
115
|
+
|
|
116
|
+
yield from monday_client.get_workspaces()
|
|
117
|
+
|
|
118
|
+
@dlt.resource(
|
|
119
|
+
name="webhooks",
|
|
120
|
+
write_disposition="replace",
|
|
121
|
+
)
|
|
122
|
+
def fetch_webhooks() -> Iterator[dict[str, Any]]:
|
|
123
|
+
"""
|
|
124
|
+
Fetch webhooks from Monday.com.
|
|
125
|
+
|
|
126
|
+
Table format: webhooks (no parameters needed)
|
|
127
|
+
"""
|
|
128
|
+
if len(params) != 0:
|
|
129
|
+
raise ValueError("Webhooks table must be in the format `webhooks`")
|
|
130
|
+
|
|
131
|
+
yield from monday_client.get_webhooks()
|
|
132
|
+
|
|
133
|
+
@dlt.resource(
|
|
134
|
+
name="updates",
|
|
135
|
+
write_disposition="merge",
|
|
136
|
+
primary_key="id",
|
|
137
|
+
)
|
|
138
|
+
def fetch_updates(
|
|
139
|
+
updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
140
|
+
"updated_at", initial_value=start_date
|
|
141
|
+
),
|
|
142
|
+
) -> Iterator[dict[str, Any]]:
|
|
143
|
+
"""
|
|
144
|
+
Fetch updates from Monday.com.
|
|
145
|
+
|
|
146
|
+
Table format: updates (no parameters needed)
|
|
147
|
+
Requires start_date and end_date parameters
|
|
148
|
+
"""
|
|
149
|
+
if len(params) != 0:
|
|
150
|
+
raise ValueError("Updates table must be in the format `updates`")
|
|
151
|
+
|
|
152
|
+
yield from monday_client.get_updates(start_date=start_date, end_date=end_date)
|
|
153
|
+
|
|
154
|
+
@dlt.resource(
|
|
155
|
+
name="teams",
|
|
156
|
+
write_disposition="replace",
|
|
157
|
+
)
|
|
158
|
+
def fetch_teams() -> Iterator[dict[str, Any]]:
|
|
159
|
+
"""
|
|
160
|
+
Fetch teams from Monday.com.
|
|
161
|
+
|
|
162
|
+
Table format: teams (no parameters needed)
|
|
163
|
+
"""
|
|
164
|
+
if len(params) != 0:
|
|
165
|
+
raise ValueError("Teams table must be in the format `teams`")
|
|
166
|
+
|
|
167
|
+
yield from monday_client.get_teams()
|
|
168
|
+
|
|
169
|
+
@dlt.resource(
|
|
170
|
+
name="tags",
|
|
171
|
+
write_disposition="replace",
|
|
172
|
+
)
|
|
173
|
+
def fetch_tags() -> Iterator[dict[str, Any]]:
|
|
174
|
+
"""
|
|
175
|
+
Fetch tags from Monday.com.
|
|
176
|
+
|
|
177
|
+
Table format: tags (no parameters needed)
|
|
178
|
+
"""
|
|
179
|
+
if len(params) != 0:
|
|
180
|
+
raise ValueError("Tags table must be in the format `tags`")
|
|
181
|
+
|
|
182
|
+
yield from monday_client.get_tags()
|
|
183
|
+
|
|
184
|
+
@dlt.resource(
|
|
185
|
+
name="custom_activities",
|
|
186
|
+
write_disposition="replace",
|
|
187
|
+
)
|
|
188
|
+
def fetch_custom_activities() -> Iterator[dict[str, Any]]:
|
|
189
|
+
"""
|
|
190
|
+
Fetch custom activities from Monday.com.
|
|
191
|
+
|
|
192
|
+
Table format: custom_activities (no parameters needed)
|
|
193
|
+
"""
|
|
194
|
+
if len(params) != 0:
|
|
195
|
+
raise ValueError(
|
|
196
|
+
"Custom activities table must be in the format `custom_activities`"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
yield from monday_client.get_custom_activities()
|
|
200
|
+
|
|
201
|
+
@dlt.resource(
|
|
202
|
+
name="board_columns",
|
|
203
|
+
write_disposition="replace",
|
|
204
|
+
)
|
|
205
|
+
def fetch_board_columns() -> Iterator[dict[str, Any]]:
|
|
206
|
+
"""
|
|
207
|
+
Fetch board columns from Monday.com.
|
|
208
|
+
|
|
209
|
+
Table format: board_columns (no parameters needed)
|
|
210
|
+
"""
|
|
211
|
+
if len(params) != 0:
|
|
212
|
+
raise ValueError(
|
|
213
|
+
"Board columns table must be in the format `board_columns`"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
yield from monday_client.get_board_columns()
|
|
217
|
+
|
|
218
|
+
@dlt.resource(
|
|
219
|
+
name="board_views",
|
|
220
|
+
write_disposition="replace",
|
|
221
|
+
)
|
|
222
|
+
def fetch_board_views() -> Iterator[dict[str, Any]]:
|
|
223
|
+
"""
|
|
224
|
+
Fetch board views from Monday.com.
|
|
225
|
+
|
|
226
|
+
Table format: board_views (no parameters needed)
|
|
227
|
+
"""
|
|
228
|
+
if len(params) != 0:
|
|
229
|
+
raise ValueError("Board views table must be in the format `board_views`")
|
|
230
|
+
|
|
231
|
+
yield from monday_client.get_board_views()
|
|
232
|
+
|
|
233
|
+
return (
|
|
234
|
+
fetch_account,
|
|
235
|
+
fetch_account_roles,
|
|
236
|
+
fetch_users,
|
|
237
|
+
fetch_boards,
|
|
238
|
+
fetch_workspaces,
|
|
239
|
+
fetch_webhooks,
|
|
240
|
+
fetch_updates,
|
|
241
|
+
fetch_teams,
|
|
242
|
+
fetch_tags,
|
|
243
|
+
fetch_custom_activities,
|
|
244
|
+
fetch_board_columns,
|
|
245
|
+
fetch_board_views,
|
|
246
|
+
)
|
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
from typing import Any, Dict, Iterator, Optional
|
|
2
|
+
|
|
3
|
+
from ingestr.src.http_client import create_client
|
|
4
|
+
|
|
5
|
+
from .settings import (
|
|
6
|
+
ACCOUNT_QUERY,
|
|
7
|
+
ACCOUNT_ROLES_QUERY,
|
|
8
|
+
BOARD_COLUMNS_QUERY,
|
|
9
|
+
BOARD_VIEWS_QUERY,
|
|
10
|
+
BOARDS_QUERY,
|
|
11
|
+
CUSTOM_ACTIVITIES_QUERY,
|
|
12
|
+
MAX_PAGE_SIZE,
|
|
13
|
+
TAGS_QUERY,
|
|
14
|
+
TEAMS_QUERY,
|
|
15
|
+
UPDATES_QUERY,
|
|
16
|
+
USERS_QUERY,
|
|
17
|
+
WEBHOOKS_QUERY,
|
|
18
|
+
WORKSPACES_QUERY,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _paginate(
|
|
23
|
+
client: "MondayClient",
|
|
24
|
+
query: str,
|
|
25
|
+
field_name: str,
|
|
26
|
+
limit: int = 100,
|
|
27
|
+
extra_variables: Optional[Dict[str, Any]] = None,
|
|
28
|
+
) -> Iterator[Dict[str, Any]]:
|
|
29
|
+
"""
|
|
30
|
+
Helper function to paginate through Monday.com API results.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
client: MondayClient instance
|
|
34
|
+
query: GraphQL query with $limit and $page variables
|
|
35
|
+
field_name: Name of the field in the response to extract
|
|
36
|
+
limit: Number of results per page
|
|
37
|
+
extra_variables: Additional variables to pass to the query
|
|
38
|
+
|
|
39
|
+
Yields:
|
|
40
|
+
Normalized dictionaries from the API response
|
|
41
|
+
"""
|
|
42
|
+
page = 1
|
|
43
|
+
|
|
44
|
+
while True:
|
|
45
|
+
variables = {
|
|
46
|
+
"limit": min(limit, MAX_PAGE_SIZE),
|
|
47
|
+
"page": page,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if extra_variables:
|
|
51
|
+
variables.update(extra_variables)
|
|
52
|
+
|
|
53
|
+
data = client._execute_query(query, variables)
|
|
54
|
+
items = data.get(field_name, [])
|
|
55
|
+
|
|
56
|
+
if not items:
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
for item in items:
|
|
60
|
+
yield normalize_dict(item)
|
|
61
|
+
|
|
62
|
+
if len(items) < limit:
|
|
63
|
+
break
|
|
64
|
+
|
|
65
|
+
page += 1
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _get_all_board_ids(client: "MondayClient") -> list[str]:
|
|
69
|
+
"""
|
|
70
|
+
Collect all board IDs from the Monday.com API.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
client: MondayClient instance
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
List of board IDs as strings
|
|
77
|
+
"""
|
|
78
|
+
board_ids = []
|
|
79
|
+
for board in _paginate(client, BOARDS_QUERY, "boards", MAX_PAGE_SIZE):
|
|
80
|
+
board_id = board.get("id")
|
|
81
|
+
if board_id:
|
|
82
|
+
board_ids.append(str(board_id))
|
|
83
|
+
return board_ids
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _fetch_nested_board_data(
|
|
87
|
+
client: "MondayClient", query: str, nested_field: str
|
|
88
|
+
) -> Iterator[Dict[str, Any]]:
|
|
89
|
+
"""
|
|
90
|
+
Fetch nested data from boards (columns, views, etc).
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
client: MondayClient instance
|
|
94
|
+
query: GraphQL query to execute
|
|
95
|
+
nested_field: Name of the nested field to extract (e.g., "columns", "views")
|
|
96
|
+
|
|
97
|
+
Yields:
|
|
98
|
+
Dict containing nested data with board_id added
|
|
99
|
+
"""
|
|
100
|
+
board_ids = _get_all_board_ids(client)
|
|
101
|
+
|
|
102
|
+
if not board_ids:
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
for board_id in board_ids:
|
|
106
|
+
variables = {"board_ids": [board_id]}
|
|
107
|
+
data = client._execute_query(query, variables)
|
|
108
|
+
boards = data.get("boards", [])
|
|
109
|
+
|
|
110
|
+
for board in boards:
|
|
111
|
+
nested_items = board.get(nested_field, [])
|
|
112
|
+
|
|
113
|
+
if nested_items and isinstance(nested_items, list):
|
|
114
|
+
for item in nested_items:
|
|
115
|
+
item_data = item.copy()
|
|
116
|
+
item_data["board_id"] = board.get("id")
|
|
117
|
+
yield normalize_dict(item_data)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _fetch_simple_list(
|
|
121
|
+
client: "MondayClient", query: str, field_name: str
|
|
122
|
+
) -> Iterator[Dict[str, Any]]:
|
|
123
|
+
"""
|
|
124
|
+
Fetch a simple list of items from Monday.com API without pagination.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
client: MondayClient instance
|
|
128
|
+
query: GraphQL query to execute
|
|
129
|
+
field_name: Name of the field in the response to extract
|
|
130
|
+
|
|
131
|
+
Yields:
|
|
132
|
+
Normalized dictionaries from the API response
|
|
133
|
+
"""
|
|
134
|
+
data = client._execute_query(query)
|
|
135
|
+
items = data.get(field_name, [])
|
|
136
|
+
|
|
137
|
+
for item in items:
|
|
138
|
+
yield normalize_dict(item)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def normalize_dict(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
142
|
+
"""
|
|
143
|
+
Normalize dictionary fields by detecting their structure:
|
|
144
|
+
- Convert nested objects with 'id' field to {field_name}_id
|
|
145
|
+
- Convert objects with other fields to flattened {field_name}_{subfield}
|
|
146
|
+
- Convert arrays to JSON strings for storage
|
|
147
|
+
- Preserve null values
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
data: The dictionary to normalize
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Normalized dictionary with flattened structure
|
|
154
|
+
|
|
155
|
+
Example:
|
|
156
|
+
>>> normalize_dict({"user": {"id": "123"}, "plan": {"tier": "pro"}})
|
|
157
|
+
{"user_id": "123", "plan_tier": "pro"}
|
|
158
|
+
"""
|
|
159
|
+
import json
|
|
160
|
+
|
|
161
|
+
normalized: Dict[str, Any] = {}
|
|
162
|
+
|
|
163
|
+
for key, value in data.items():
|
|
164
|
+
if value is None:
|
|
165
|
+
# Keep null values as-is
|
|
166
|
+
normalized[key] = None
|
|
167
|
+
elif isinstance(value, dict):
|
|
168
|
+
# If the dict has only an 'id' field, replace with {key}_id
|
|
169
|
+
if "id" in value and len(value) == 1:
|
|
170
|
+
normalized[f"{key}_id"] = value["id"]
|
|
171
|
+
# If dict has multiple fields, flatten them
|
|
172
|
+
elif value:
|
|
173
|
+
for subkey, subvalue in value.items():
|
|
174
|
+
normalized[f"{key}_{subkey}"] = subvalue
|
|
175
|
+
elif isinstance(value, list):
|
|
176
|
+
# If list contains dicts with only 'id' field, extract ids
|
|
177
|
+
if value and isinstance(value[0], dict) and list(value[0].keys()) == ["id"]:
|
|
178
|
+
normalized[key] = [item["id"] for item in value]
|
|
179
|
+
else:
|
|
180
|
+
# Convert other lists to JSON strings for storage
|
|
181
|
+
normalized[key] = json.dumps(value)
|
|
182
|
+
else:
|
|
183
|
+
# Add scalar values directly
|
|
184
|
+
normalized[key] = value
|
|
185
|
+
|
|
186
|
+
return normalized
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class MondayClient:
|
|
190
|
+
"""Monday.com GraphQL API client."""
|
|
191
|
+
|
|
192
|
+
def __init__(self, api_token: str) -> None:
|
|
193
|
+
self.api_token = api_token
|
|
194
|
+
self.base_url = "https://api.monday.com/v2"
|
|
195
|
+
self.session = create_client()
|
|
196
|
+
|
|
197
|
+
def _headers(self) -> Dict[str, str]:
|
|
198
|
+
return {
|
|
199
|
+
"Authorization": self.api_token,
|
|
200
|
+
"Content-Type": "application/json",
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
def _execute_query(
|
|
204
|
+
self, query: str, variables: Optional[Dict[str, Any]] = None
|
|
205
|
+
) -> Dict[str, Any]:
|
|
206
|
+
"""Execute a GraphQL query against Monday.com API."""
|
|
207
|
+
payload: Dict[str, Any] = {"query": query}
|
|
208
|
+
if variables:
|
|
209
|
+
payload["variables"] = variables
|
|
210
|
+
|
|
211
|
+
response = self.session.post(
|
|
212
|
+
self.base_url,
|
|
213
|
+
headers=self._headers(),
|
|
214
|
+
json=payload,
|
|
215
|
+
)
|
|
216
|
+
response.raise_for_status()
|
|
217
|
+
data = response.json()
|
|
218
|
+
|
|
219
|
+
if "errors" in data:
|
|
220
|
+
raise Exception(f"GraphQL errors: {data['errors']}")
|
|
221
|
+
|
|
222
|
+
return data.get("data", {})
|
|
223
|
+
|
|
224
|
+
def get_account(self) -> Dict[str, Any]:
|
|
225
|
+
"""
|
|
226
|
+
Fetch account information from Monday.com API.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Dict containing account data
|
|
230
|
+
"""
|
|
231
|
+
data = self._execute_query(ACCOUNT_QUERY)
|
|
232
|
+
account = data.get("account", {})
|
|
233
|
+
|
|
234
|
+
if not account:
|
|
235
|
+
raise Exception("No account data returned from Monday.com API")
|
|
236
|
+
|
|
237
|
+
return normalize_dict(account)
|
|
238
|
+
|
|
239
|
+
def get_account_roles(self) -> Iterator[Dict[str, Any]]:
|
|
240
|
+
"""
|
|
241
|
+
Fetch account roles from Monday.com API.
|
|
242
|
+
|
|
243
|
+
Yields:
|
|
244
|
+
Dict containing account role data
|
|
245
|
+
"""
|
|
246
|
+
yield from _fetch_simple_list(self, ACCOUNT_ROLES_QUERY, "account_roles")
|
|
247
|
+
|
|
248
|
+
def get_users(self, limit: int = MAX_PAGE_SIZE) -> Iterator[Dict[str, Any]]:
|
|
249
|
+
"""
|
|
250
|
+
Fetch users from Monday.com API with pagination.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
limit: Number of results per page (max 100)
|
|
254
|
+
|
|
255
|
+
Yields:
|
|
256
|
+
Dict containing user data
|
|
257
|
+
"""
|
|
258
|
+
yield from _paginate(self, USERS_QUERY, "users", limit)
|
|
259
|
+
|
|
260
|
+
def get_boards(self, limit: int = MAX_PAGE_SIZE) -> Iterator[Dict[str, Any]]:
|
|
261
|
+
"""
|
|
262
|
+
Fetch boards from Monday.com API with pagination.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
limit: Number of results per page (max 100)
|
|
266
|
+
|
|
267
|
+
Yields:
|
|
268
|
+
Dict containing board data
|
|
269
|
+
"""
|
|
270
|
+
yield from _paginate(self, BOARDS_QUERY, "boards", limit)
|
|
271
|
+
|
|
272
|
+
def get_workspaces(self) -> Iterator[Dict[str, Any]]:
|
|
273
|
+
"""
|
|
274
|
+
Fetch workspaces from Monday.com API.
|
|
275
|
+
First gets all boards to extract unique workspace IDs,
|
|
276
|
+
then fetches workspace details.
|
|
277
|
+
|
|
278
|
+
Yields:
|
|
279
|
+
Dict containing workspace data
|
|
280
|
+
"""
|
|
281
|
+
# Collect unique workspace IDs from boards
|
|
282
|
+
workspace_ids = set()
|
|
283
|
+
for board in _paginate(self, BOARDS_QUERY, "boards", MAX_PAGE_SIZE):
|
|
284
|
+
workspace_id = board.get("workspace_id")
|
|
285
|
+
if workspace_id:
|
|
286
|
+
workspace_ids.add(str(workspace_id))
|
|
287
|
+
|
|
288
|
+
if not workspace_ids:
|
|
289
|
+
return
|
|
290
|
+
|
|
291
|
+
# Fetch workspace details
|
|
292
|
+
variables = {"ids": list(workspace_ids)}
|
|
293
|
+
data = self._execute_query(WORKSPACES_QUERY, variables)
|
|
294
|
+
workspaces = data.get("workspaces", [])
|
|
295
|
+
|
|
296
|
+
for workspace in workspaces:
|
|
297
|
+
yield normalize_dict(workspace)
|
|
298
|
+
|
|
299
|
+
def get_webhooks(self) -> Iterator[Dict[str, Any]]:
|
|
300
|
+
"""
|
|
301
|
+
Fetch webhooks from Monday.com API.
|
|
302
|
+
First gets all board IDs, then fetches webhooks for each board.
|
|
303
|
+
|
|
304
|
+
Yields:
|
|
305
|
+
Dict containing webhook data
|
|
306
|
+
"""
|
|
307
|
+
board_ids = _get_all_board_ids(self)
|
|
308
|
+
|
|
309
|
+
for board_id in board_ids:
|
|
310
|
+
variables = {"board_id": board_id}
|
|
311
|
+
data = self._execute_query(WEBHOOKS_QUERY, variables)
|
|
312
|
+
webhooks = data.get("webhooks", [])
|
|
313
|
+
|
|
314
|
+
for webhook in webhooks:
|
|
315
|
+
yield normalize_dict(webhook)
|
|
316
|
+
|
|
317
|
+
def get_updates(
|
|
318
|
+
self,
|
|
319
|
+
limit: int = MAX_PAGE_SIZE,
|
|
320
|
+
start_date: Optional[str] = None,
|
|
321
|
+
end_date: Optional[str] = None,
|
|
322
|
+
) -> Iterator[Dict[str, Any]]:
|
|
323
|
+
"""
|
|
324
|
+
Fetch updates from Monday.com API.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
limit: Number of results (max 100)
|
|
328
|
+
start_date: Start date in YYYY-MM-DD format
|
|
329
|
+
end_date: End date in YYYY-MM-DD format
|
|
330
|
+
|
|
331
|
+
Yields:
|
|
332
|
+
Dict containing update data
|
|
333
|
+
"""
|
|
334
|
+
variables: Dict[str, Any] = {"limit": min(limit, MAX_PAGE_SIZE)}
|
|
335
|
+
|
|
336
|
+
if start_date:
|
|
337
|
+
variables["from_date"] = start_date
|
|
338
|
+
if end_date:
|
|
339
|
+
variables["to_date"] = end_date
|
|
340
|
+
|
|
341
|
+
data = self._execute_query(UPDATES_QUERY, variables)
|
|
342
|
+
updates = data.get("updates", [])
|
|
343
|
+
|
|
344
|
+
for update in updates:
|
|
345
|
+
yield normalize_dict(update)
|
|
346
|
+
|
|
347
|
+
def get_teams(self) -> Iterator[Dict[str, Any]]:
|
|
348
|
+
"""
|
|
349
|
+
Fetch teams from Monday.com API.
|
|
350
|
+
|
|
351
|
+
Yields:
|
|
352
|
+
Dict containing team data
|
|
353
|
+
"""
|
|
354
|
+
yield from _fetch_simple_list(self, TEAMS_QUERY, "teams")
|
|
355
|
+
|
|
356
|
+
def get_tags(self) -> Iterator[Dict[str, Any]]:
|
|
357
|
+
"""
|
|
358
|
+
Fetch tags from Monday.com API.
|
|
359
|
+
|
|
360
|
+
Yields:
|
|
361
|
+
Dict containing tag data
|
|
362
|
+
"""
|
|
363
|
+
yield from _fetch_simple_list(self, TAGS_QUERY, "tags")
|
|
364
|
+
|
|
365
|
+
def get_custom_activities(self) -> Iterator[Dict[str, Any]]:
|
|
366
|
+
"""
|
|
367
|
+
Fetch custom activities from Monday.com API.
|
|
368
|
+
|
|
369
|
+
Yields:
|
|
370
|
+
Dict containing custom activity data
|
|
371
|
+
"""
|
|
372
|
+
yield from _fetch_simple_list(self, CUSTOM_ACTIVITIES_QUERY, "custom_activity")
|
|
373
|
+
|
|
374
|
+
def get_board_columns(self) -> Iterator[Dict[str, Any]]:
|
|
375
|
+
"""
|
|
376
|
+
Fetch board columns from Monday.com API.
|
|
377
|
+
First gets all board IDs, then fetches columns for each board.
|
|
378
|
+
|
|
379
|
+
Yields:
|
|
380
|
+
Dict containing board column data with board_id
|
|
381
|
+
"""
|
|
382
|
+
yield from _fetch_nested_board_data(self, BOARD_COLUMNS_QUERY, "columns")
|
|
383
|
+
|
|
384
|
+
def get_board_views(self) -> Iterator[Dict[str, Any]]:
|
|
385
|
+
"""
|
|
386
|
+
Fetch board views from Monday.com API.
|
|
387
|
+
First gets all board IDs, then fetches views for each board.
|
|
388
|
+
|
|
389
|
+
Yields:
|
|
390
|
+
Dict containing board view data with board_id
|
|
391
|
+
"""
|
|
392
|
+
yield from _fetch_nested_board_data(self, BOARD_VIEWS_QUERY, "views")
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""Monday.com source settings and constants"""
|
|
2
|
+
|
|
3
|
+
# GraphQL query for fetching app installs
|
|
4
|
+
APP_INSTALLS_QUERY = """
|
|
5
|
+
query ($app_id: ID!, $account_id: ID, $limit: Int!, $page: Int!) {
|
|
6
|
+
app_installs(
|
|
7
|
+
app_id: $app_id
|
|
8
|
+
account_id: $account_id
|
|
9
|
+
limit: $limit
|
|
10
|
+
page: $page
|
|
11
|
+
) {
|
|
12
|
+
app_id
|
|
13
|
+
timestamp
|
|
14
|
+
app_install_account {
|
|
15
|
+
id
|
|
16
|
+
}
|
|
17
|
+
app_install_user {
|
|
18
|
+
id
|
|
19
|
+
}
|
|
20
|
+
app_version {
|
|
21
|
+
major
|
|
22
|
+
minor
|
|
23
|
+
patch
|
|
24
|
+
type
|
|
25
|
+
text
|
|
26
|
+
}
|
|
27
|
+
permissions {
|
|
28
|
+
approved_scopes
|
|
29
|
+
required_scopes
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
# GraphQL query for fetching account information
|
|
36
|
+
ACCOUNT_QUERY = """
|
|
37
|
+
query {
|
|
38
|
+
account {
|
|
39
|
+
id
|
|
40
|
+
name
|
|
41
|
+
slug
|
|
42
|
+
tier
|
|
43
|
+
country_code
|
|
44
|
+
first_day_of_the_week
|
|
45
|
+
show_timeline_weekends
|
|
46
|
+
sign_up_product_kind
|
|
47
|
+
active_members_count
|
|
48
|
+
logo
|
|
49
|
+
plan {
|
|
50
|
+
max_users
|
|
51
|
+
period
|
|
52
|
+
tier
|
|
53
|
+
version
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# GraphQL query for fetching account roles
|
|
60
|
+
ACCOUNT_ROLES_QUERY = """
|
|
61
|
+
query {
|
|
62
|
+
account_roles {
|
|
63
|
+
id
|
|
64
|
+
name
|
|
65
|
+
roleType
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# GraphQL query for fetching users
|
|
71
|
+
USERS_QUERY = """
|
|
72
|
+
query ($limit: Int!, $page: Int!) {
|
|
73
|
+
users(limit: $limit, page: $page) {
|
|
74
|
+
id
|
|
75
|
+
name
|
|
76
|
+
email
|
|
77
|
+
enabled
|
|
78
|
+
is_admin
|
|
79
|
+
is_guest
|
|
80
|
+
is_pending
|
|
81
|
+
is_view_only
|
|
82
|
+
created_at
|
|
83
|
+
birthday
|
|
84
|
+
country_code
|
|
85
|
+
join_date
|
|
86
|
+
location
|
|
87
|
+
mobile_phone
|
|
88
|
+
phone
|
|
89
|
+
photo_original
|
|
90
|
+
photo_thumb
|
|
91
|
+
photo_tiny
|
|
92
|
+
time_zone_identifier
|
|
93
|
+
title
|
|
94
|
+
url
|
|
95
|
+
utc_hours_diff
|
|
96
|
+
current_language
|
|
97
|
+
account {
|
|
98
|
+
id
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
# GraphQL query for fetching boards
|
|
105
|
+
BOARDS_QUERY = """
|
|
106
|
+
query ($limit: Int!, $page: Int!) {
|
|
107
|
+
boards(limit: $limit, page: $page) {
|
|
108
|
+
id
|
|
109
|
+
name
|
|
110
|
+
description
|
|
111
|
+
state
|
|
112
|
+
board_kind
|
|
113
|
+
board_folder_id
|
|
114
|
+
workspace_id
|
|
115
|
+
permissions
|
|
116
|
+
item_terminology
|
|
117
|
+
items_count
|
|
118
|
+
updated_at
|
|
119
|
+
url
|
|
120
|
+
communication
|
|
121
|
+
object_type_unique_key
|
|
122
|
+
type
|
|
123
|
+
creator {
|
|
124
|
+
id
|
|
125
|
+
}
|
|
126
|
+
owners {
|
|
127
|
+
id
|
|
128
|
+
}
|
|
129
|
+
subscribers {
|
|
130
|
+
id
|
|
131
|
+
}
|
|
132
|
+
team_owners {
|
|
133
|
+
id
|
|
134
|
+
}
|
|
135
|
+
team_subscribers {
|
|
136
|
+
id
|
|
137
|
+
}
|
|
138
|
+
tags {
|
|
139
|
+
id
|
|
140
|
+
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
# GraphQL query for fetching custom activities
|
|
147
|
+
CUSTOM_ACTIVITIES_QUERY = """
|
|
148
|
+
query {
|
|
149
|
+
custom_activity {
|
|
150
|
+
id
|
|
151
|
+
name
|
|
152
|
+
type
|
|
153
|
+
color
|
|
154
|
+
icon_id
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
# GraphQL query for fetching board columns
|
|
160
|
+
BOARD_COLUMNS_QUERY = """
|
|
161
|
+
query ($board_ids: [ID!]) {
|
|
162
|
+
boards(ids: $board_ids) {
|
|
163
|
+
id
|
|
164
|
+
columns {
|
|
165
|
+
id
|
|
166
|
+
title
|
|
167
|
+
type
|
|
168
|
+
archived
|
|
169
|
+
description
|
|
170
|
+
settings_str
|
|
171
|
+
width
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
# GraphQL query for fetching board views
|
|
178
|
+
BOARD_VIEWS_QUERY = """
|
|
179
|
+
query ($board_ids: [ID!]) {
|
|
180
|
+
boards(ids: $board_ids) {
|
|
181
|
+
id
|
|
182
|
+
views {
|
|
183
|
+
id
|
|
184
|
+
name
|
|
185
|
+
type
|
|
186
|
+
settings_str
|
|
187
|
+
view_specific_data_str
|
|
188
|
+
source_view_id
|
|
189
|
+
access_level
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
# GraphQL query for fetching workspaces by IDs
|
|
196
|
+
WORKSPACES_QUERY = """
|
|
197
|
+
query ($ids: [ID!]) {
|
|
198
|
+
workspaces(ids: $ids) {
|
|
199
|
+
id
|
|
200
|
+
name
|
|
201
|
+
kind
|
|
202
|
+
description
|
|
203
|
+
created_at
|
|
204
|
+
is_default_workspace
|
|
205
|
+
state
|
|
206
|
+
account_product {
|
|
207
|
+
id
|
|
208
|
+
}
|
|
209
|
+
owners_subscribers {
|
|
210
|
+
id
|
|
211
|
+
}
|
|
212
|
+
team_owners_subscribers {
|
|
213
|
+
id
|
|
214
|
+
}
|
|
215
|
+
teams_subscribers {
|
|
216
|
+
id
|
|
217
|
+
}
|
|
218
|
+
users_subscribers {
|
|
219
|
+
id
|
|
220
|
+
}
|
|
221
|
+
settings {
|
|
222
|
+
icon
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
# GraphQL query for fetching webhooks by board ID
|
|
229
|
+
WEBHOOKS_QUERY = """
|
|
230
|
+
query ($board_id: ID!) {
|
|
231
|
+
webhooks(board_id: $board_id) {
|
|
232
|
+
id
|
|
233
|
+
event
|
|
234
|
+
board_id
|
|
235
|
+
config
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
# GraphQL query for fetching updates
|
|
241
|
+
UPDATES_QUERY = """
|
|
242
|
+
query ($limit: Int!, $from_date: String, $to_date: String) {
|
|
243
|
+
updates(limit: $limit, from_date: $from_date, to_date: $to_date) {
|
|
244
|
+
id
|
|
245
|
+
body
|
|
246
|
+
text_body
|
|
247
|
+
created_at
|
|
248
|
+
updated_at
|
|
249
|
+
edited_at
|
|
250
|
+
creator_id
|
|
251
|
+
item_id
|
|
252
|
+
creator {
|
|
253
|
+
id
|
|
254
|
+
}
|
|
255
|
+
item {
|
|
256
|
+
id
|
|
257
|
+
}
|
|
258
|
+
assets {
|
|
259
|
+
id
|
|
260
|
+
name
|
|
261
|
+
file_extension
|
|
262
|
+
file_size
|
|
263
|
+
public_url
|
|
264
|
+
url
|
|
265
|
+
url_thumbnail
|
|
266
|
+
created_at
|
|
267
|
+
original_geometry
|
|
268
|
+
uploaded_by {
|
|
269
|
+
id
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
replies {
|
|
273
|
+
id
|
|
274
|
+
body
|
|
275
|
+
text_body
|
|
276
|
+
created_at
|
|
277
|
+
updated_at
|
|
278
|
+
creator_id
|
|
279
|
+
creator {
|
|
280
|
+
id
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
likes {
|
|
284
|
+
id
|
|
285
|
+
}
|
|
286
|
+
pinned_to_top {
|
|
287
|
+
item_id
|
|
288
|
+
}
|
|
289
|
+
viewers {
|
|
290
|
+
medium
|
|
291
|
+
user_id
|
|
292
|
+
user {
|
|
293
|
+
id
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
"""
|
|
299
|
+
|
|
300
|
+
# GraphQL query for fetching teams
|
|
301
|
+
TEAMS_QUERY = """
|
|
302
|
+
query {
|
|
303
|
+
teams {
|
|
304
|
+
id
|
|
305
|
+
name
|
|
306
|
+
picture_url
|
|
307
|
+
users {
|
|
308
|
+
id
|
|
309
|
+
created_at
|
|
310
|
+
phone
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
"""
|
|
315
|
+
|
|
316
|
+
# GraphQL query for fetching tags
|
|
317
|
+
TAGS_QUERY = """
|
|
318
|
+
query {
|
|
319
|
+
tags {
|
|
320
|
+
id
|
|
321
|
+
name
|
|
322
|
+
color
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
"""
|
|
326
|
+
|
|
327
|
+
# Maximum number of results per page
|
|
328
|
+
MAX_PAGE_SIZE = 100
|
ingestr/src/sources.py
CHANGED
|
@@ -3878,3 +3878,86 @@ class IntercomSource:
|
|
|
3878
3878
|
start_date=start_date,
|
|
3879
3879
|
end_date=end_date,
|
|
3880
3880
|
).with_resources(table)
|
|
3881
|
+
|
|
3882
|
+
|
|
3883
|
+
class HttpSource:
|
|
3884
|
+
"""Source for reading CSV, JSON, and Parquet files from HTTP URLs"""
|
|
3885
|
+
|
|
3886
|
+
def handles_incrementality(self) -> bool:
|
|
3887
|
+
return False
|
|
3888
|
+
|
|
3889
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
3890
|
+
"""
|
|
3891
|
+
Create a dlt source for reading files from HTTP URLs.
|
|
3892
|
+
|
|
3893
|
+
URI format: http://example.com/file.csv or https://example.com/file.json
|
|
3894
|
+
|
|
3895
|
+
Args:
|
|
3896
|
+
uri: HTTP(S) URL to the file
|
|
3897
|
+
table: Not used for HTTP source (files are read directly)
|
|
3898
|
+
**kwargs: Additional arguments:
|
|
3899
|
+
- file_format: Optional file format override ('csv', 'json', 'parquet')
|
|
3900
|
+
- chunksize: Number of records to process at once (default varies by format)
|
|
3901
|
+
- merge_key: Merge key for the resource
|
|
3902
|
+
|
|
3903
|
+
Returns:
|
|
3904
|
+
DltResource for the HTTP file
|
|
3905
|
+
"""
|
|
3906
|
+
from ingestr.src.http import http_source
|
|
3907
|
+
|
|
3908
|
+
# Extract the actual URL (remove the http:// or https:// scheme if duplicated)
|
|
3909
|
+
url = uri
|
|
3910
|
+
if uri.startswith("http://http://") or uri.startswith("https://https://"):
|
|
3911
|
+
url = uri.split("://", 1)[1]
|
|
3912
|
+
|
|
3913
|
+
file_format = kwargs.get("file_format")
|
|
3914
|
+
chunksize = kwargs.get("chunksize")
|
|
3915
|
+
merge_key = kwargs.get("merge_key")
|
|
3916
|
+
|
|
3917
|
+
reader_kwargs = {}
|
|
3918
|
+
if chunksize is not None:
|
|
3919
|
+
reader_kwargs["chunksize"] = chunksize
|
|
3920
|
+
|
|
3921
|
+
source = http_source(url=url, file_format=file_format, **reader_kwargs)
|
|
3922
|
+
|
|
3923
|
+
if merge_key:
|
|
3924
|
+
source.apply_hints(merge_key=merge_key)
|
|
3925
|
+
|
|
3926
|
+
return source
|
|
3927
|
+
|
|
3928
|
+
|
|
3929
|
+
class MondaySource:
|
|
3930
|
+
def handles_incrementality(self) -> bool:
|
|
3931
|
+
return False
|
|
3932
|
+
|
|
3933
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
3934
|
+
parsed_uri = urlparse(uri)
|
|
3935
|
+
query_params = parse_qs(parsed_uri.query)
|
|
3936
|
+
api_token = query_params.get("api_token")
|
|
3937
|
+
|
|
3938
|
+
if api_token is None:
|
|
3939
|
+
raise MissingValueError("api_token", "Monday")
|
|
3940
|
+
|
|
3941
|
+
parts = table.replace(" ", "").split(":")
|
|
3942
|
+
table_name = parts[0]
|
|
3943
|
+
params = parts[1:]
|
|
3944
|
+
|
|
3945
|
+
# Get interval_start and interval_end from kwargs (command line args)
|
|
3946
|
+
interval_start = kwargs.get("interval_start")
|
|
3947
|
+
interval_end = kwargs.get("interval_end")
|
|
3948
|
+
|
|
3949
|
+
# Convert datetime to string format YYYY-MM-DD
|
|
3950
|
+
start_date = interval_start.strftime("%Y-%m-%d") if interval_start else None
|
|
3951
|
+
end_date = interval_end.strftime("%Y-%m-%d") if interval_end else None
|
|
3952
|
+
|
|
3953
|
+
from ingestr.src.monday import monday_source
|
|
3954
|
+
|
|
3955
|
+
try:
|
|
3956
|
+
return monday_source(
|
|
3957
|
+
api_token=api_token[0],
|
|
3958
|
+
params=params,
|
|
3959
|
+
start_date=start_date,
|
|
3960
|
+
end_date=end_date,
|
|
3961
|
+
).with_resources(table_name)
|
|
3962
|
+
except ResourcesNotFoundError:
|
|
3963
|
+
raise UnsupportedResourceError(table_name, "Monday")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.14.
|
|
3
|
+
Version: 0.14.6
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -2,17 +2,17 @@ ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
|
|
|
2
2
|
ingestr/main.py,sha256=qo0g3wCFl8a_1jUwXagX8L1Q8PKKQlTF7md9pfnzW0Y,27155
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=dazsjzReTYtam8X7FVSN4WAYUmvlPNZ0XztT57SOHTU,20
|
|
6
6
|
ingestr/src/destinations.py,sha256=QtjE0AGs0WkPHaI2snWPHJ8HHi4lwXUBYLJPklz8Mvk,27772
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=03eGDe2rL6qyT5sGmTGZi-XIwJbbdoedE_KjW3ZF7QY,7661
|
|
9
9
|
ingestr/src/filters.py,sha256=0n0sNAVG_f-B_1r7lW5iNtw9z_G1bxWzPaiL1i6tnbU,1665
|
|
10
10
|
ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
|
|
11
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
12
12
|
ingestr/src/masking.py,sha256=VN0LdfvExhQ1bZMRylGtaBUIoH-vjuIUmRnYKwo3yiY,11358
|
|
13
13
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
14
14
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
15
|
-
ingestr/src/sources.py,sha256=
|
|
15
|
+
ingestr/src/sources.py,sha256=yQhmgIIfzMr8qHxQr-yDmzowti_q59khRzBDPY0Kw-I,138486
|
|
16
16
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
17
17
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
18
18
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -83,6 +83,8 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
|
|
|
83
83
|
ingestr/src/google_sheets/helpers/data_processing.py,sha256=RNt2MYfdJhk4bRahnQVezpNg2x9z0vx60YFq2ukZ8vI,11004
|
|
84
84
|
ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
|
|
85
85
|
ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
|
|
86
|
+
ingestr/src/http/__init__.py,sha256=Y9mQIE0RolHOh6dPjW41qzYXSG8BC0GPKxEtz2CJGpU,902
|
|
87
|
+
ingestr/src/http/readers.py,sha256=rgBwYG5SOQ7P2uzBAFMOQIevKxV51ZW41VSiRTZ0Xvo,3863
|
|
86
88
|
ingestr/src/hubspot/__init__.py,sha256=FCqjLeOjijdc9JC_NoDwtRqy3FDyY-szDi6UV7CdDN0,11548
|
|
87
89
|
ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
|
|
88
90
|
ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
|
|
@@ -109,6 +111,9 @@ ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffW
|
|
|
109
111
|
ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
|
|
110
112
|
ingestr/src/mixpanel/__init__.py,sha256=s1QtqMP0BTGW6YtdCabJFWj7lEn7KujzELwGpBOQgfs,1796
|
|
111
113
|
ingestr/src/mixpanel/client.py,sha256=c_reouegOVYBOwHLfgYFwpmkba0Sxro1Zkml07NCYf0,3602
|
|
114
|
+
ingestr/src/monday/__init__.py,sha256=ZNdGCC_1CEYlgxAef-5QO56Drm9IMP82-rZpEvbD8aY,6918
|
|
115
|
+
ingestr/src/monday/helpers.py,sha256=xkAYTFIwjbU-dQTa4d41oQm6kFvCHv74AhCmN-H8aPE,11572
|
|
116
|
+
ingestr/src/monday/settings.py,sha256=5TC0OrTHQO52AifwP3Z2xsh4D8SDUq0WxqY5AQMjcns,5667
|
|
112
117
|
ingestr/src/mongodb/__init__.py,sha256=6-DvvaKL7XOPPRwItI7lSpoMQLEPzYubV6dKhpzbuME,7494
|
|
113
118
|
ingestr/src/mongodb/helpers.py,sha256=BKb0F-AUWjFJikE9OPP9z5wFuMmJsf8YsyWhvQ9dC1k,38076
|
|
114
119
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
@@ -175,8 +180,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
|
|
|
175
180
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
176
181
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
177
182
|
ingestr/tests/unit/test_smartsheets.py,sha256=zf3DXT29Y4TH2lNPBFphdjlaelUUyPJcsW2UO68RzDs,4862
|
|
178
|
-
ingestr-0.14.
|
|
179
|
-
ingestr-0.14.
|
|
180
|
-
ingestr-0.14.
|
|
181
|
-
ingestr-0.14.
|
|
182
|
-
ingestr-0.14.
|
|
183
|
+
ingestr-0.14.6.dist-info/METADATA,sha256=3akmbk91m4xi1AMYFkuMPKmAtlcUSzwCOsYbeXwFlsk,15265
|
|
184
|
+
ingestr-0.14.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
185
|
+
ingestr-0.14.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
186
|
+
ingestr-0.14.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
187
|
+
ingestr-0.14.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|