amochka 0.1.9__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amochka/__init__.py +28 -0
- amochka/client.py +1375 -0
- amochka/etl.py +302 -0
- {amochka-0.1.9.dist-info → amochka-0.3.0.dist-info}/METADATA +5 -3
- amochka-0.3.0.dist-info/RECORD +14 -0
- amochka-0.3.0.dist-info/top_level.txt +2 -0
- etl/__init__.py +7 -0
- etl/config.py +236 -0
- etl/extractors.py +354 -0
- etl/loaders.py +813 -0
- etl/migrations/001_create_tables.sql +346 -0
- etl/run_etl.py +684 -0
- etl/transformers.py +470 -0
- amochka-0.1.9.dist-info/RECORD +0 -4
- amochka-0.1.9.dist-info/top_level.txt +0 -1
- {amochka-0.1.9.dist-info → amochka-0.3.0.dist-info}/WHEEL +0 -0
amochka/etl.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Callable, Iterable, List, Optional, Sequence, Set, Union
|
|
4
|
+
|
|
5
|
+
from .client import AmoCRMClient
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _ensure_path(path: Union[str, Path]) -> Path:
|
|
9
|
+
output_path = Path(path)
|
|
10
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
11
|
+
return output_path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _resolve_timestamp(record: dict, timestamp_fields: Sequence[str]) -> Optional[Union[int, float, str]]:
|
|
15
|
+
for field in timestamp_fields:
|
|
16
|
+
if not field:
|
|
17
|
+
continue
|
|
18
|
+
value = record.get(field)
|
|
19
|
+
if value is not None:
|
|
20
|
+
return value
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def write_ndjson(
|
|
25
|
+
records: Iterable[dict],
|
|
26
|
+
output_path: Union[str, Path],
|
|
27
|
+
*,
|
|
28
|
+
entity: str,
|
|
29
|
+
account_id: Optional[Union[int, str]] = None,
|
|
30
|
+
timestamp_fields: Sequence[str] = ("updated_at", "created_at"),
|
|
31
|
+
transform: Optional[Callable[[dict], dict]] = None,
|
|
32
|
+
on_record: Optional[Callable[[dict], None]] = None,
|
|
33
|
+
) -> int:
|
|
34
|
+
"""
|
|
35
|
+
Записывает переданные записи в формат NDJSON.
|
|
36
|
+
|
|
37
|
+
Возвращает количество записанных строк.
|
|
38
|
+
"""
|
|
39
|
+
path = _ensure_path(output_path)
|
|
40
|
+
count = 0
|
|
41
|
+
with path.open("w", encoding="utf-8") as handler:
|
|
42
|
+
for original in records:
|
|
43
|
+
payload = transform(original) if transform else original
|
|
44
|
+
timestamp = _resolve_timestamp(original, timestamp_fields)
|
|
45
|
+
line = {
|
|
46
|
+
"entity": entity,
|
|
47
|
+
"account_id": account_id,
|
|
48
|
+
"updated_at": timestamp,
|
|
49
|
+
"payload": payload,
|
|
50
|
+
}
|
|
51
|
+
handler.write(json.dumps(line, ensure_ascii=False))
|
|
52
|
+
handler.write("\n")
|
|
53
|
+
count += 1
|
|
54
|
+
if on_record:
|
|
55
|
+
on_record(original)
|
|
56
|
+
return count
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def export_leads_to_ndjson(
|
|
60
|
+
client: AmoCRMClient,
|
|
61
|
+
output_path: Union[str, Path],
|
|
62
|
+
account_id: Union[int, str],
|
|
63
|
+
*,
|
|
64
|
+
start=None,
|
|
65
|
+
end=None,
|
|
66
|
+
pipeline_ids=None,
|
|
67
|
+
include_contacts: bool = True,
|
|
68
|
+
include=None,
|
|
69
|
+
limit: int = 250,
|
|
70
|
+
extra_params: Optional[dict] = None,
|
|
71
|
+
on_record: Optional[Callable[[dict], None]] = None,
|
|
72
|
+
) -> int:
|
|
73
|
+
"""
|
|
74
|
+
Выгружает сделки и записывает их в NDJSON.
|
|
75
|
+
"""
|
|
76
|
+
records = client.iter_leads(
|
|
77
|
+
updated_from=start,
|
|
78
|
+
updated_to=end,
|
|
79
|
+
pipeline_ids=pipeline_ids,
|
|
80
|
+
include_contacts=include_contacts,
|
|
81
|
+
include=include,
|
|
82
|
+
limit=limit,
|
|
83
|
+
extra_params=extra_params,
|
|
84
|
+
)
|
|
85
|
+
return write_ndjson(
|
|
86
|
+
records,
|
|
87
|
+
output_path,
|
|
88
|
+
entity="lead",
|
|
89
|
+
account_id=account_id,
|
|
90
|
+
timestamp_fields=("updated_at", "created_at"),
|
|
91
|
+
on_record=on_record,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def export_contacts_to_ndjson(
|
|
96
|
+
client: AmoCRMClient,
|
|
97
|
+
output_path: Union[str, Path],
|
|
98
|
+
account_id: Union[int, str],
|
|
99
|
+
*,
|
|
100
|
+
start=None,
|
|
101
|
+
end=None,
|
|
102
|
+
contact_ids=None,
|
|
103
|
+
limit: int = 250,
|
|
104
|
+
extra_params: Optional[dict] = None,
|
|
105
|
+
on_record: Optional[Callable[[dict], None]] = None,
|
|
106
|
+
) -> int:
|
|
107
|
+
"""
|
|
108
|
+
Выгружает контакты и записывает их в NDJSON.
|
|
109
|
+
"""
|
|
110
|
+
contact_id_list: Optional[List[int]] = None
|
|
111
|
+
if contact_ids is not None:
|
|
112
|
+
if isinstance(contact_ids, (list, tuple, set)):
|
|
113
|
+
contact_id_list = [int(cid) for cid in contact_ids if cid is not None]
|
|
114
|
+
else:
|
|
115
|
+
contact_id_list = [int(contact_ids)]
|
|
116
|
+
|
|
117
|
+
def _iter_contacts():
|
|
118
|
+
seen: Set[int] = set()
|
|
119
|
+
if contact_id_list:
|
|
120
|
+
params = dict(extra_params or {})
|
|
121
|
+
params["filter[id][]"] = [str(cid) for cid in contact_id_list]
|
|
122
|
+
params["page"] = 1
|
|
123
|
+
params["limit"] = limit
|
|
124
|
+
while True:
|
|
125
|
+
response = client._make_request("GET", "/api/v4/contacts", params=params)
|
|
126
|
+
embedded = (response or {}).get("_embedded", {})
|
|
127
|
+
contacts = embedded.get("contacts") or []
|
|
128
|
+
if not contacts:
|
|
129
|
+
break
|
|
130
|
+
for contact in contacts:
|
|
131
|
+
cid = contact.get("id")
|
|
132
|
+
if cid is not None:
|
|
133
|
+
seen.add(int(cid))
|
|
134
|
+
yield contact
|
|
135
|
+
total_pages = response.get("_page_count", params["page"])
|
|
136
|
+
if params["page"] >= total_pages:
|
|
137
|
+
break
|
|
138
|
+
params["page"] += 1
|
|
139
|
+
else:
|
|
140
|
+
for contact in client.iter_contacts(
|
|
141
|
+
updated_from=start,
|
|
142
|
+
updated_to=end,
|
|
143
|
+
contact_ids=None,
|
|
144
|
+
limit=limit,
|
|
145
|
+
extra_params=extra_params,
|
|
146
|
+
):
|
|
147
|
+
cid = contact.get("id")
|
|
148
|
+
if cid is not None:
|
|
149
|
+
seen.add(int(cid))
|
|
150
|
+
yield contact
|
|
151
|
+
|
|
152
|
+
if contact_id_list:
|
|
153
|
+
missing = [cid for cid in contact_id_list if cid not in seen]
|
|
154
|
+
for cid in missing:
|
|
155
|
+
try:
|
|
156
|
+
contact = client.get_contact_by_id(cid)
|
|
157
|
+
except Exception:
|
|
158
|
+
continue
|
|
159
|
+
retrieved_id = contact.get("id")
|
|
160
|
+
if retrieved_id is not None and int(retrieved_id) not in seen:
|
|
161
|
+
seen.add(int(retrieved_id))
|
|
162
|
+
yield contact
|
|
163
|
+
|
|
164
|
+
return write_ndjson(
|
|
165
|
+
_iter_contacts(),
|
|
166
|
+
output_path,
|
|
167
|
+
entity="contact",
|
|
168
|
+
account_id=account_id,
|
|
169
|
+
timestamp_fields=("updated_at", "created_at"),
|
|
170
|
+
on_record=on_record,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def export_notes_to_ndjson(
|
|
175
|
+
client: AmoCRMClient,
|
|
176
|
+
output_path: Union[str, Path],
|
|
177
|
+
account_id: Union[int, str],
|
|
178
|
+
*,
|
|
179
|
+
entity: str = "lead",
|
|
180
|
+
start=None,
|
|
181
|
+
end=None,
|
|
182
|
+
note_type=None,
|
|
183
|
+
entity_ids=None,
|
|
184
|
+
limit: int = 250,
|
|
185
|
+
extra_params: Optional[dict] = None,
|
|
186
|
+
on_record: Optional[Callable[[dict], None]] = None,
|
|
187
|
+
) -> int:
|
|
188
|
+
"""
|
|
189
|
+
Выгружает примечания и записывает их в NDJSON.
|
|
190
|
+
"""
|
|
191
|
+
records = client.iter_notes(
|
|
192
|
+
entity=entity,
|
|
193
|
+
updated_from=start,
|
|
194
|
+
updated_to=end,
|
|
195
|
+
note_type=note_type,
|
|
196
|
+
entity_ids=entity_ids,
|
|
197
|
+
limit=limit,
|
|
198
|
+
extra_params=extra_params,
|
|
199
|
+
)
|
|
200
|
+
entity_name = f"{entity}_note" if entity else "note"
|
|
201
|
+
return write_ndjson(
|
|
202
|
+
records,
|
|
203
|
+
output_path,
|
|
204
|
+
entity=entity_name,
|
|
205
|
+
account_id=account_id,
|
|
206
|
+
timestamp_fields=("updated_at", "created_at"),
|
|
207
|
+
on_record=on_record,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def export_events_to_ndjson(
|
|
212
|
+
client: AmoCRMClient,
|
|
213
|
+
output_path: Union[str, Path],
|
|
214
|
+
account_id: Union[int, str],
|
|
215
|
+
*,
|
|
216
|
+
entity: Optional[str] = "lead",
|
|
217
|
+
start=None,
|
|
218
|
+
end=None,
|
|
219
|
+
event_type=None,
|
|
220
|
+
entity_ids=None,
|
|
221
|
+
limit: int = 250,
|
|
222
|
+
extra_params: Optional[dict] = None,
|
|
223
|
+
on_record: Optional[Callable[[dict], None]] = None,
|
|
224
|
+
) -> int:
|
|
225
|
+
"""
|
|
226
|
+
Выгружает события и записывает их в NDJSON.
|
|
227
|
+
"""
|
|
228
|
+
records = client.iter_events(
|
|
229
|
+
entity=entity,
|
|
230
|
+
entity_ids=entity_ids,
|
|
231
|
+
event_type=event_type,
|
|
232
|
+
created_from=start,
|
|
233
|
+
created_to=end,
|
|
234
|
+
limit=limit,
|
|
235
|
+
extra_params=extra_params,
|
|
236
|
+
)
|
|
237
|
+
entity_name = f"{entity}_event" if entity else "event"
|
|
238
|
+
return write_ndjson(
|
|
239
|
+
records,
|
|
240
|
+
output_path,
|
|
241
|
+
entity=entity_name,
|
|
242
|
+
account_id=account_id,
|
|
243
|
+
timestamp_fields=("created_at", "updated_at"),
|
|
244
|
+
on_record=on_record,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def export_users_to_ndjson(
|
|
249
|
+
client: AmoCRMClient,
|
|
250
|
+
output_path: Union[str, Path],
|
|
251
|
+
account_id: Union[int, str],
|
|
252
|
+
*,
|
|
253
|
+
limit: int = 250,
|
|
254
|
+
extra_params: Optional[dict] = None,
|
|
255
|
+
on_record: Optional[Callable[[dict], None]] = None,
|
|
256
|
+
) -> int:
|
|
257
|
+
"""
|
|
258
|
+
Выгружает пользователей и записывает их в NDJSON.
|
|
259
|
+
"""
|
|
260
|
+
records = client.iter_users(limit=limit, extra_params=extra_params)
|
|
261
|
+
return write_ndjson(
|
|
262
|
+
records,
|
|
263
|
+
output_path,
|
|
264
|
+
entity="user",
|
|
265
|
+
account_id=account_id,
|
|
266
|
+
timestamp_fields=("updated_at", "created_at"),
|
|
267
|
+
on_record=on_record,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def export_pipelines_to_ndjson(
|
|
272
|
+
client: AmoCRMClient,
|
|
273
|
+
output_path: Union[str, Path],
|
|
274
|
+
account_id: Union[int, str],
|
|
275
|
+
*,
|
|
276
|
+
limit: int = 250,
|
|
277
|
+
extra_params: Optional[dict] = None,
|
|
278
|
+
on_record: Optional[Callable[[dict], None]] = None,
|
|
279
|
+
) -> int:
|
|
280
|
+
"""
|
|
281
|
+
Выгружает воронки и записывает их в NDJSON.
|
|
282
|
+
"""
|
|
283
|
+
records = client.iter_pipelines(limit=limit, extra_params=extra_params)
|
|
284
|
+
return write_ndjson(
|
|
285
|
+
records,
|
|
286
|
+
output_path,
|
|
287
|
+
entity="pipeline",
|
|
288
|
+
account_id=account_id,
|
|
289
|
+
timestamp_fields=("updated_at", "created_at"),
|
|
290
|
+
on_record=on_record,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
__all__ = [
|
|
295
|
+
"write_ndjson",
|
|
296
|
+
"export_leads_to_ndjson",
|
|
297
|
+
"export_contacts_to_ndjson",
|
|
298
|
+
"export_notes_to_ndjson",
|
|
299
|
+
"export_events_to_ndjson",
|
|
300
|
+
"export_users_to_ndjson",
|
|
301
|
+
"export_pipelines_to_ndjson",
|
|
302
|
+
]
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: amochka
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Python library for working with amoCRM API
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Python library for working with amoCRM API with ETL capabilities
|
|
5
5
|
Author-email: Timur <timurdt@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/yourusername/amochka
|
|
8
8
|
Project-URL: Documentation, https://github.com/yourusername/amochka
|
|
9
9
|
Project-URL: Repository, https://github.com/yourusername/amochka
|
|
10
10
|
Project-URL: Bug Tracker, https://github.com/yourusername/amochka/issues
|
|
11
|
-
Keywords: amocrm,crm,api,client,automation
|
|
11
|
+
Keywords: amocrm,crm,api,client,automation,etl
|
|
12
12
|
Classifier: Development Status :: 4 - Beta
|
|
13
13
|
Classifier: Intended Audience :: Developers
|
|
14
14
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -27,6 +27,8 @@ Requires-Python: >=3.6
|
|
|
27
27
|
Description-Content-Type: text/markdown
|
|
28
28
|
Requires-Dist: requests>=2.25.0
|
|
29
29
|
Requires-Dist: ratelimit>=2.2.0
|
|
30
|
+
Requires-Dist: psycopg2-binary>=2.9.0
|
|
31
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
30
32
|
|
|
31
33
|
# amochka
|
|
32
34
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
amochka/__init__.py,sha256=NFAgMbhBnrx3nF--MeY9Chpu5gtZ5kVn-QYnmO3Nhpk,620
|
|
2
|
+
amochka/client.py,sha256=mGoDEE0XOt-c9f2FmqFVzDMrXQel_zComtyAFXdCHjg,66155
|
|
3
|
+
amochka/etl.py,sha256=N8rXNFbtmlKfsYpgr7HDcP4enoj63XQPWuTDxGuMhw4,8901
|
|
4
|
+
etl/__init__.py,sha256=bp9fPqbKlOc7xzs27diHEvysy1FgBrwlpX6GnR6GL9U,255
|
|
5
|
+
etl/config.py,sha256=YY6M7pib_XD7bjPW8J_iWzt2hoXzDpx6yIvZHmlUlrE,8873
|
|
6
|
+
etl/extractors.py,sha256=-QCBZ6PoJ51j0drNQaH5bLfvjPqAQmfVgaT1D_ZSwjI,12909
|
|
7
|
+
etl/loaders.py,sha256=nQx6TDwnuHVWzjmuKY9v-23hmmk4Ex6ZoGtb3PApO1k,31974
|
|
8
|
+
etl/run_etl.py,sha256=p_2NxJwXMiACMETvRsjrozMgz66U9ezDNSWZXUieNMs,26262
|
|
9
|
+
etl/transformers.py,sha256=OwYJ_9l3oqvy2Y3-umXjAGweOIqlfRI0iSiCFPrcQ8E,17867
|
|
10
|
+
etl/migrations/001_create_tables.sql,sha256=YrSaZjpofC1smjYx0bM4eHQumboruIBY3fwRDlJLLSo,15749
|
|
11
|
+
amochka-0.3.0.dist-info/METADATA,sha256=5dZFPOs2wupzc0VsYVXyk1pJ-F4Kwm997N9EA_Z7htE,6371
|
|
12
|
+
amochka-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
+
amochka-0.3.0.dist-info/top_level.txt,sha256=grRX8aLFG-yYKPsAqCD6sUBmdLSQeOMHsc9Dl6S7Lzo,12
|
|
14
|
+
amochka-0.3.0.dist-info/RECORD,,
|
etl/__init__.py
ADDED
etl/config.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Конфигурация ETL для amoCRM.
|
|
3
|
+
|
|
4
|
+
Настройки загружаются из переменных окружения или .env файла.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _load_env_file(path: Path) -> Dict[str, str]:
|
|
14
|
+
"""Загружает переменные из .env файла."""
|
|
15
|
+
if not path.exists():
|
|
16
|
+
return {}
|
|
17
|
+
|
|
18
|
+
env: Dict[str, str] = {}
|
|
19
|
+
with path.open("r", encoding="utf-8") as handler:
|
|
20
|
+
for raw_line in handler:
|
|
21
|
+
line = raw_line.strip()
|
|
22
|
+
if not line or line.startswith("#"):
|
|
23
|
+
continue
|
|
24
|
+
if "=" not in line:
|
|
25
|
+
continue
|
|
26
|
+
key, value = line.split("=", 1)
|
|
27
|
+
key = key.strip()
|
|
28
|
+
value = value.strip()
|
|
29
|
+
if value.startswith(('"', "'")) and value.endswith(('"', "'")) and len(value) >= 2:
|
|
30
|
+
value = value[1:-1]
|
|
31
|
+
env[key] = value
|
|
32
|
+
return env
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class DatabaseConfig:
|
|
37
|
+
"""Конфигурация подключения к PostgreSQL."""
|
|
38
|
+
|
|
39
|
+
host: str
|
|
40
|
+
port: int
|
|
41
|
+
dbname: str
|
|
42
|
+
user: str
|
|
43
|
+
password: str
|
|
44
|
+
schema: str = "public"
|
|
45
|
+
sslmode: Optional[str] = None
|
|
46
|
+
connect_timeout: int = 30
|
|
47
|
+
|
|
48
|
+
def connection_kwargs(self) -> Dict[str, Any]:
|
|
49
|
+
"""Возвращает kwargs для psycopg.connect()."""
|
|
50
|
+
kwargs: Dict[str, Any] = {
|
|
51
|
+
"host": self.host,
|
|
52
|
+
"port": self.port,
|
|
53
|
+
"dbname": self.dbname,
|
|
54
|
+
"user": self.user,
|
|
55
|
+
"password": self.password,
|
|
56
|
+
"connect_timeout": self.connect_timeout,
|
|
57
|
+
}
|
|
58
|
+
if self.sslmode:
|
|
59
|
+
kwargs["sslmode"] = self.sslmode
|
|
60
|
+
return kwargs
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def from_dict(cls, d: Dict[str, Any]) -> "DatabaseConfig":
|
|
64
|
+
"""Создаёт конфигурацию из словаря (для Airflow DAG)."""
|
|
65
|
+
return cls(
|
|
66
|
+
host=d.get("host", "localhost"),
|
|
67
|
+
port=int(d.get("port", 5432)),
|
|
68
|
+
dbname=d.get("dbname", "amocrm"),
|
|
69
|
+
user=d.get("user", "postgres"),
|
|
70
|
+
password=d.get("password", ""),
|
|
71
|
+
schema=d.get("schema", "public"),
|
|
72
|
+
sslmode=d.get("sslmode"),
|
|
73
|
+
connect_timeout=int(d.get("connect_timeout", 30)),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def from_env(cls, env_path: Optional[Path] = None) -> "DatabaseConfig":
|
|
78
|
+
"""Создаёт конфигурацию из переменных окружения."""
|
|
79
|
+
if env_path:
|
|
80
|
+
file_env = _load_env_file(env_path)
|
|
81
|
+
for key, value in file_env.items():
|
|
82
|
+
os.environ.setdefault(key, value)
|
|
83
|
+
|
|
84
|
+
def _get(key: str, default: Optional[str] = None) -> Optional[str]:
|
|
85
|
+
return os.environ.get(key, default)
|
|
86
|
+
|
|
87
|
+
return cls(
|
|
88
|
+
host=_get("ETL_DB_HOST", "localhost") or "localhost",
|
|
89
|
+
port=int(_get("ETL_DB_PORT", "5432") or "5432"),
|
|
90
|
+
dbname=_get("ETL_DB_NAME", "amocrm") or "amocrm",
|
|
91
|
+
user=_get("ETL_DB_USER", "postgres") or "postgres",
|
|
92
|
+
password=_get("ETL_DB_PASSWORD", "") or "",
|
|
93
|
+
schema=_get("ETL_DB_SCHEMA", "public") or "public",
|
|
94
|
+
sslmode=_get("ETL_DB_SSLMODE"),
|
|
95
|
+
connect_timeout=int(_get("ETL_DB_CONNECT_TIMEOUT", "30") or "30"),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class AmoCRMAccount:
|
|
101
|
+
"""Конфигурация одного аккаунта amoCRM."""
|
|
102
|
+
|
|
103
|
+
id: int # ID аккаунта в amoCRM (из URL или API)
|
|
104
|
+
name: str
|
|
105
|
+
base_url: str
|
|
106
|
+
token_path: Path
|
|
107
|
+
mybi_account_id: int # Внутренний account_id как в mybi.ru (для совместимости)
|
|
108
|
+
pipeline_ids: Optional[List[int]] = None # None = все воронки
|
|
109
|
+
cache_dir: Optional[Path] = None
|
|
110
|
+
|
|
111
|
+
def __post_init__(self):
|
|
112
|
+
if isinstance(self.token_path, str):
|
|
113
|
+
self.token_path = Path(self.token_path)
|
|
114
|
+
if isinstance(self.cache_dir, str):
|
|
115
|
+
self.cache_dir = Path(self.cache_dir)
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def from_dict(cls, d: Dict[str, Any]) -> "AmoCRMAccount":
|
|
119
|
+
"""Создаёт конфигурацию аккаунта из словаря (для Airflow DAG)."""
|
|
120
|
+
return cls(
|
|
121
|
+
id=int(d.get("id", 0)),
|
|
122
|
+
name=d.get("name", "account"),
|
|
123
|
+
base_url=d.get("base_url", ""),
|
|
124
|
+
token_path=Path(d.get("token_path", "token.json")),
|
|
125
|
+
mybi_account_id=int(d.get("mybi_account_id", 0)),
|
|
126
|
+
pipeline_ids=d.get("pipeline_ids"),
|
|
127
|
+
cache_dir=Path(d.get("cache_dir", ".cache")) if d.get("cache_dir") else None,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass
|
|
132
|
+
class ETLConfig:
|
|
133
|
+
"""Главная конфигурация ETL."""
|
|
134
|
+
|
|
135
|
+
database: DatabaseConfig
|
|
136
|
+
accounts: List[AmoCRMAccount]
|
|
137
|
+
batch_size: int = 100
|
|
138
|
+
window_minutes: int = 120 # Окно выгрузки по умолчанию (2 часа)
|
|
139
|
+
log_level: str = "INFO"
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def from_env(cls, env_path: Optional[Path] = None) -> "ETLConfig":
|
|
143
|
+
"""Создаёт конфигурацию из переменных окружения и файла настроек."""
|
|
144
|
+
if env_path:
|
|
145
|
+
file_env = _load_env_file(env_path)
|
|
146
|
+
for key, value in file_env.items():
|
|
147
|
+
os.environ.setdefault(key, value)
|
|
148
|
+
|
|
149
|
+
db_config = DatabaseConfig.from_env(env_path)
|
|
150
|
+
|
|
151
|
+
# Парсим аккаунты из переменных окружения
|
|
152
|
+
# Формат: AMO_ACCOUNT_1_ID, AMO_ACCOUNT_1_NAME, AMO_ACCOUNT_1_URL, etc.
|
|
153
|
+
accounts = []
|
|
154
|
+
for i in range(1, 10): # Поддерживаем до 9 аккаунтов
|
|
155
|
+
prefix = f"AMO_ACCOUNT_{i}_"
|
|
156
|
+
account_id = os.environ.get(f"{prefix}ID")
|
|
157
|
+
if not account_id:
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
pipeline_ids_str = os.environ.get(f"{prefix}PIPELINE_IDS", "")
|
|
161
|
+
pipeline_ids = None
|
|
162
|
+
if pipeline_ids_str:
|
|
163
|
+
pipeline_ids = [int(pid.strip()) for pid in pipeline_ids_str.split(",") if pid.strip()]
|
|
164
|
+
|
|
165
|
+
mybi_id = os.environ.get(f"{prefix}MYBI_ACCOUNT_ID")
|
|
166
|
+
if not mybi_id:
|
|
167
|
+
raise ValueError(f"Не указан {prefix}MYBI_ACCOUNT_ID для аккаунта {account_id}")
|
|
168
|
+
|
|
169
|
+
accounts.append(
|
|
170
|
+
AmoCRMAccount(
|
|
171
|
+
id=int(account_id),
|
|
172
|
+
name=os.environ.get(f"{prefix}NAME", f"account_{i}") or f"account_{i}",
|
|
173
|
+
base_url=os.environ.get(f"{prefix}URL", "") or "",
|
|
174
|
+
token_path=Path(os.environ.get(f"{prefix}TOKEN_PATH", f"token_{i}.json") or f"token_{i}.json"),
|
|
175
|
+
mybi_account_id=int(mybi_id),
|
|
176
|
+
pipeline_ids=pipeline_ids if pipeline_ids else None,
|
|
177
|
+
cache_dir=Path(os.environ.get(f"{prefix}CACHE_DIR", ".cache") or ".cache"),
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return cls(
|
|
182
|
+
database=db_config,
|
|
183
|
+
accounts=accounts,
|
|
184
|
+
batch_size=int(os.environ.get("ETL_BATCH_SIZE", "100") or "100"),
|
|
185
|
+
window_minutes=int(os.environ.get("ETL_WINDOW_MINUTES", "120") or "120"),
|
|
186
|
+
log_level=os.environ.get("ETL_LOG_LEVEL", "INFO") or "INFO",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# Пример конфигурации для разработки (можно переопределить в .env)
|
|
191
|
+
DEFAULT_CONFIG = ETLConfig(
|
|
192
|
+
database=DatabaseConfig(
|
|
193
|
+
host="localhost",
|
|
194
|
+
port=5432,
|
|
195
|
+
dbname="amocrm",
|
|
196
|
+
user="postgres",
|
|
197
|
+
password="",
|
|
198
|
+
schema="public",
|
|
199
|
+
),
|
|
200
|
+
accounts=[
|
|
201
|
+
AmoCRMAccount(
|
|
202
|
+
id=30019651,
|
|
203
|
+
name="bneginskogo",
|
|
204
|
+
base_url="https://bneginskogo.amocrm.ru",
|
|
205
|
+
token_path=Path("token.json"),
|
|
206
|
+
mybi_account_id=53859, # Внутренний ID из mybi.ru
|
|
207
|
+
pipeline_ids=[5987164, 6241334],
|
|
208
|
+
cache_dir=Path(".cache"),
|
|
209
|
+
),
|
|
210
|
+
# Добавьте остальные аккаунты здесь
|
|
211
|
+
],
|
|
212
|
+
batch_size=100,
|
|
213
|
+
window_minutes=120,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_config(env_path: Optional[Union[str, Path]] = None) -> ETLConfig:
|
|
218
|
+
"""
|
|
219
|
+
Получает конфигурацию ETL.
|
|
220
|
+
|
|
221
|
+
Если указан env_path, загружает настройки из файла.
|
|
222
|
+
Иначе использует переменные окружения или DEFAULT_CONFIG.
|
|
223
|
+
"""
|
|
224
|
+
if env_path:
|
|
225
|
+
return ETLConfig.from_env(Path(env_path))
|
|
226
|
+
|
|
227
|
+
# Проверяем наличие .env в текущей директории
|
|
228
|
+
default_env = Path(".env")
|
|
229
|
+
if default_env.exists():
|
|
230
|
+
return ETLConfig.from_env(default_env)
|
|
231
|
+
|
|
232
|
+
# Проверяем наличие переменных окружения
|
|
233
|
+
if os.environ.get("ETL_DB_HOST") or os.environ.get("AMO_ACCOUNT_1_ID"):
|
|
234
|
+
return ETLConfig.from_env()
|
|
235
|
+
|
|
236
|
+
return DEFAULT_CONFIG
|