amochka 0.1.8__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amochka/__init__.py +1 -1
- amochka/client.py +305 -31
- amochka-0.3.0.dist-info/METADATA +126 -0
- amochka-0.3.0.dist-info/RECORD +14 -0
- {amochka-0.1.8.dist-info → amochka-0.3.0.dist-info}/top_level.txt +1 -0
- etl/__init__.py +7 -0
- etl/config.py +236 -0
- etl/extractors.py +354 -0
- etl/loaders.py +813 -0
- etl/migrations/001_create_tables.sql +346 -0
- etl/run_etl.py +684 -0
- etl/transformers.py +470 -0
- amochka-0.1.8.dist-info/METADATA +0 -40
- amochka-0.1.8.dist-info/RECORD +0 -7
- {amochka-0.1.8.dist-info → amochka-0.3.0.dist-info}/WHEEL +0 -0
etl/run_etl.py
ADDED
|
@@ -0,0 +1,684 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Главный скрипт ETL для выгрузки данных из amoCRM в PostgreSQL.
|
|
4
|
+
|
|
5
|
+
Использование:
|
|
6
|
+
python -m etl.run_etl [--env .env] [--migrate] [--full] [--entities leads,contacts,events]
|
|
7
|
+
|
|
8
|
+
Примеры:
|
|
9
|
+
# Инкрементальная выгрузка сделок
|
|
10
|
+
python -m etl.run_etl
|
|
11
|
+
|
|
12
|
+
# Запуск миграций + полная выгрузка
|
|
13
|
+
python -m etl.run_etl --migrate --full
|
|
14
|
+
|
|
15
|
+
# Только события за последние 24 часа
|
|
16
|
+
python -m etl.run_etl --entities events --window 1440
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import logging
|
|
21
|
+
import sys
|
|
22
|
+
from datetime import datetime, timedelta, timezone
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import List, Optional, Set
|
|
25
|
+
|
|
26
|
+
from .config import AmoCRMAccount, ETLConfig, get_config
|
|
27
|
+
from .extractors import AmoCRMExtractor
|
|
28
|
+
from .loaders import PostgresLoader
|
|
29
|
+
from .transformers import (
|
|
30
|
+
ContactTransformer,
|
|
31
|
+
EventTransformer,
|
|
32
|
+
LeadTransformer,
|
|
33
|
+
NoteTransformer,
|
|
34
|
+
PipelineTransformer,
|
|
35
|
+
UserTransformer,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def setup_logging(level: str = "INFO") -> None:
|
|
42
|
+
"""Настраивает логирование."""
|
|
43
|
+
logging.basicConfig(
|
|
44
|
+
level=getattr(logging, level.upper()),
|
|
45
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
46
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def run_migrations(loader: PostgresLoader, migrations_dir: Path) -> None:
|
|
51
|
+
"""Выполняет SQL-миграции."""
|
|
52
|
+
logger.info("Выполняем миграции из %s", migrations_dir)
|
|
53
|
+
loader.run_migrations(migrations_dir)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def sync_pipelines_and_users(
|
|
57
|
+
extractor: AmoCRMExtractor,
|
|
58
|
+
loader: PostgresLoader,
|
|
59
|
+
mybi_account_id: int,
|
|
60
|
+
) -> None:
|
|
61
|
+
"""Синхронизирует воронки, статусы и пользователей."""
|
|
62
|
+
pipeline_transformer = PipelineTransformer(mybi_account_id)
|
|
63
|
+
user_transformer = UserTransformer(mybi_account_id)
|
|
64
|
+
|
|
65
|
+
with loader.connection() as conn:
|
|
66
|
+
with conn.cursor() as cursor:
|
|
67
|
+
# Воронки и статусы
|
|
68
|
+
logger.info("Синхронизируем воронки и статусы")
|
|
69
|
+
for pipeline in extractor.iter_pipelines():
|
|
70
|
+
pipeline_record, statuses = pipeline_transformer.transform_pipeline(pipeline)
|
|
71
|
+
loader.upsert_pipeline(cursor, pipeline_record)
|
|
72
|
+
for status in statuses:
|
|
73
|
+
loader.upsert_status(cursor, status)
|
|
74
|
+
|
|
75
|
+
# Пользователи
|
|
76
|
+
logger.info("Синхронизируем пользователей")
|
|
77
|
+
for user in extractor.iter_users():
|
|
78
|
+
user_record = user_transformer.transform(user)
|
|
79
|
+
loader.upsert_user(cursor, user_record)
|
|
80
|
+
|
|
81
|
+
conn.commit()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def sync_leads_with_contacts(
|
|
85
|
+
extractor: AmoCRMExtractor,
|
|
86
|
+
loader: PostgresLoader,
|
|
87
|
+
mybi_account_id: int,
|
|
88
|
+
updated_from: Optional[datetime] = None,
|
|
89
|
+
updated_to: Optional[datetime] = None,
|
|
90
|
+
pipeline_ids: Optional[List[int]] = None,
|
|
91
|
+
batch_size: int = 100,
|
|
92
|
+
) -> dict:
|
|
93
|
+
"""
|
|
94
|
+
Синхронизирует сделки вместе с их контактами.
|
|
95
|
+
|
|
96
|
+
Порядок:
|
|
97
|
+
1. Выгружаем сделки из API и собираем ID контактов
|
|
98
|
+
2. Загружаем недостающие контакты по их ID
|
|
99
|
+
3. Загружаем сделки с корректным маппингом contacts_id
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
dict с ключами: leads_count, contacts_count
|
|
103
|
+
"""
|
|
104
|
+
contact_transformer = ContactTransformer(mybi_account_id)
|
|
105
|
+
|
|
106
|
+
# Загружаем справочники для денормализации
|
|
107
|
+
pipelines_map, statuses_map = extractor.load_pipelines_and_statuses()
|
|
108
|
+
lead_transformer = LeadTransformer(mybi_account_id, pipelines_map, statuses_map)
|
|
109
|
+
|
|
110
|
+
# 1. Собираем сделки и ID контактов из API
|
|
111
|
+
leads_iter = extractor.iter_leads(
|
|
112
|
+
updated_from=updated_from,
|
|
113
|
+
updated_to=updated_to,
|
|
114
|
+
pipeline_ids=pipeline_ids,
|
|
115
|
+
include_contacts=True,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
leads_list, contact_ids = extractor.collect_contact_ids_from_leads(leads_iter)
|
|
119
|
+
|
|
120
|
+
if not leads_list:
|
|
121
|
+
logger.info("Нет сделок для загрузки")
|
|
122
|
+
return {"leads_count": 0, "contacts_count": 0}
|
|
123
|
+
|
|
124
|
+
logger.info(
|
|
125
|
+
"Найдено %d сделок, %d уникальных контактов",
|
|
126
|
+
len(leads_list),
|
|
127
|
+
len(contact_ids),
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# 2. Загружаем недостающие контакты
|
|
131
|
+
contacts_loaded = 0
|
|
132
|
+
if contact_ids:
|
|
133
|
+
with loader.connection() as conn:
|
|
134
|
+
with conn.cursor() as cursor:
|
|
135
|
+
# Проверяем какие контакты уже есть в БД
|
|
136
|
+
existing_contacts = loader.build_contact_id_map(cursor, mybi_account_id)
|
|
137
|
+
missing_contact_ids = contact_ids - set(existing_contacts.keys())
|
|
138
|
+
|
|
139
|
+
if missing_contact_ids:
|
|
140
|
+
logger.info("Загружаем %d недостающих контактов", len(missing_contact_ids))
|
|
141
|
+
|
|
142
|
+
for contact in extractor.iter_contacts(contact_ids=list(missing_contact_ids)):
|
|
143
|
+
transformed = contact_transformer.transform(contact)
|
|
144
|
+
loader.load_transformed_contact(cursor, transformed)
|
|
145
|
+
contacts_loaded += 1
|
|
146
|
+
|
|
147
|
+
if contacts_loaded % batch_size == 0:
|
|
148
|
+
conn.commit()
|
|
149
|
+
|
|
150
|
+
conn.commit()
|
|
151
|
+
logger.info("Загружено %d контактов", contacts_loaded)
|
|
152
|
+
|
|
153
|
+
# 3. Загружаем сделки
|
|
154
|
+
leads_loaded = 0
|
|
155
|
+
max_updated_at = updated_from or datetime.min.replace(tzinfo=timezone.utc)
|
|
156
|
+
|
|
157
|
+
with loader.connection() as conn:
|
|
158
|
+
with conn.cursor() as cursor:
|
|
159
|
+
# Строим маппинги для внутренних ID (теперь включая новые контакты)
|
|
160
|
+
user_id_map = loader.build_user_id_map(cursor, mybi_account_id)
|
|
161
|
+
contact_id_map = loader.build_contact_id_map(cursor, mybi_account_id)
|
|
162
|
+
logger.debug("Маппинги: users=%d, contacts=%d", len(user_id_map), len(contact_id_map))
|
|
163
|
+
|
|
164
|
+
for i, lead in enumerate(leads_list):
|
|
165
|
+
transformed = lead_transformer.transform(lead)
|
|
166
|
+
loader.load_transformed_lead(cursor, transformed, user_id_map, contact_id_map)
|
|
167
|
+
leads_loaded += 1
|
|
168
|
+
|
|
169
|
+
# Отслеживаем максимальный updated_at
|
|
170
|
+
lead_updated = lead.get("updated_at")
|
|
171
|
+
if lead_updated:
|
|
172
|
+
lead_dt = datetime.fromtimestamp(lead_updated, tz=timezone.utc)
|
|
173
|
+
if lead_dt > max_updated_at:
|
|
174
|
+
max_updated_at = lead_dt
|
|
175
|
+
|
|
176
|
+
if (i + 1) % batch_size == 0:
|
|
177
|
+
conn.commit()
|
|
178
|
+
logger.debug("Закоммичено %d сделок", i + 1)
|
|
179
|
+
|
|
180
|
+
# Обновляем состояние ETL
|
|
181
|
+
if leads_loaded > 0:
|
|
182
|
+
loader.update_etl_state(
|
|
183
|
+
cursor,
|
|
184
|
+
entity_type="leads",
|
|
185
|
+
account_id=mybi_account_id,
|
|
186
|
+
last_updated_at=max_updated_at,
|
|
187
|
+
records_loaded=leads_loaded,
|
|
188
|
+
pipeline_id=pipeline_ids[0] if pipeline_ids and len(pipeline_ids) == 1 else None,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
conn.commit()
|
|
192
|
+
|
|
193
|
+
logger.info("Загружено %d сделок, %d контактов", leads_loaded, contacts_loaded)
|
|
194
|
+
return {"leads_count": leads_loaded, "contacts_count": contacts_loaded}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def mark_deleted_leads(
|
|
198
|
+
extractor: AmoCRMExtractor,
|
|
199
|
+
loader: PostgresLoader,
|
|
200
|
+
mybi_account_id: int,
|
|
201
|
+
pipeline_ids: Optional[List[int]] = None,
|
|
202
|
+
) -> int:
|
|
203
|
+
"""
|
|
204
|
+
Помечает удалённые сделки в БД (is_deleted = true).
|
|
205
|
+
|
|
206
|
+
Выгружает список удалённых сделок из amoCRM (корзина) и обновляет
|
|
207
|
+
соответствующие записи в БД.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Количество помеченных сделок
|
|
211
|
+
"""
|
|
212
|
+
# Выгружаем ID удалённых сделок из amoCRM
|
|
213
|
+
deleted_lead_ids = []
|
|
214
|
+
for lead in extractor.iter_leads(
|
|
215
|
+
pipeline_ids=pipeline_ids,
|
|
216
|
+
only_deleted=True,
|
|
217
|
+
):
|
|
218
|
+
deleted_lead_ids.append(lead.get("id"))
|
|
219
|
+
|
|
220
|
+
if not deleted_lead_ids:
|
|
221
|
+
logger.info("Нет удалённых сделок для пометки")
|
|
222
|
+
return 0
|
|
223
|
+
|
|
224
|
+
logger.info("Найдено %d удалённых сделок в amoCRM", len(deleted_lead_ids))
|
|
225
|
+
|
|
226
|
+
# Помечаем в БД
|
|
227
|
+
marked_count = 0
|
|
228
|
+
with loader.connection() as conn:
|
|
229
|
+
with conn.cursor() as cursor:
|
|
230
|
+
for lead_id in deleted_lead_ids:
|
|
231
|
+
cursor.execute(
|
|
232
|
+
"""
|
|
233
|
+
UPDATE amocrm_leads
|
|
234
|
+
SET is_deleted = true
|
|
235
|
+
WHERE account_id = %s AND lead_id = %s AND is_deleted = false
|
|
236
|
+
""",
|
|
237
|
+
(mybi_account_id, lead_id),
|
|
238
|
+
)
|
|
239
|
+
if cursor.rowcount > 0:
|
|
240
|
+
marked_count += 1
|
|
241
|
+
|
|
242
|
+
conn.commit()
|
|
243
|
+
|
|
244
|
+
logger.info("Помечено как удалённые: %d сделок", marked_count)
|
|
245
|
+
return marked_count
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def sync_contacts(
|
|
249
|
+
extractor: AmoCRMExtractor,
|
|
250
|
+
loader: PostgresLoader,
|
|
251
|
+
mybi_account_id: int,
|
|
252
|
+
contact_ids: Optional[Set[int]] = None,
|
|
253
|
+
updated_from: Optional[datetime] = None,
|
|
254
|
+
updated_to: Optional[datetime] = None,
|
|
255
|
+
batch_size: int = 100,
|
|
256
|
+
) -> int:
|
|
257
|
+
"""
|
|
258
|
+
Синхронизирует контакты.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
contact_ids: Фильтр - загружать только эти контакты (если указан)
|
|
262
|
+
updated_from: Начало периода по updated_at
|
|
263
|
+
updated_to: Конец периода по updated_at
|
|
264
|
+
|
|
265
|
+
Логика:
|
|
266
|
+
1. Загружаем все обновлённые контакты за период
|
|
267
|
+
2. Если contact_ids указан - фильтруем, оставляя только нужные
|
|
268
|
+
3. Обновляем etl_state
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Количество загруженных контактов
|
|
272
|
+
"""
|
|
273
|
+
contact_transformer = ContactTransformer(mybi_account_id)
|
|
274
|
+
|
|
275
|
+
# Загружаем все обновлённые контакты за период
|
|
276
|
+
contacts_iter = extractor.iter_contacts(
|
|
277
|
+
updated_from=updated_from,
|
|
278
|
+
updated_to=updated_to,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
loaded_count = 0
|
|
282
|
+
skipped_count = 0
|
|
283
|
+
max_updated_at = updated_from or datetime.min.replace(tzinfo=timezone.utc)
|
|
284
|
+
|
|
285
|
+
with loader.connection() as conn:
|
|
286
|
+
with conn.cursor() as cursor:
|
|
287
|
+
for i, contact in enumerate(contacts_iter):
|
|
288
|
+
contact_id = contact.get("id")
|
|
289
|
+
|
|
290
|
+
# Фильтруем по contact_ids если указан
|
|
291
|
+
if contact_ids and contact_id not in contact_ids:
|
|
292
|
+
skipped_count += 1
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
transformed = contact_transformer.transform(contact)
|
|
296
|
+
loader.load_transformed_contact(cursor, transformed)
|
|
297
|
+
loaded_count += 1
|
|
298
|
+
|
|
299
|
+
# Отслеживаем максимальный updated_at
|
|
300
|
+
contact_updated = contact.get("updated_at")
|
|
301
|
+
if contact_updated:
|
|
302
|
+
contact_dt = datetime.fromtimestamp(contact_updated, tz=timezone.utc)
|
|
303
|
+
if contact_dt > max_updated_at:
|
|
304
|
+
max_updated_at = contact_dt
|
|
305
|
+
|
|
306
|
+
if loaded_count % batch_size == 0:
|
|
307
|
+
conn.commit()
|
|
308
|
+
logger.debug("Закоммичено %d контактов", loaded_count)
|
|
309
|
+
|
|
310
|
+
# Обновляем etl_state
|
|
311
|
+
if max_updated_at > (updated_from or datetime.min.replace(tzinfo=timezone.utc)):
|
|
312
|
+
loader.update_etl_state(
|
|
313
|
+
cursor,
|
|
314
|
+
entity_type="contacts",
|
|
315
|
+
account_id=mybi_account_id,
|
|
316
|
+
last_updated_at=max_updated_at,
|
|
317
|
+
records_loaded=loaded_count,
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
conn.commit()
|
|
321
|
+
|
|
322
|
+
if contact_ids:
|
|
323
|
+
logger.info("Загружено %d контактов (пропущено %d - не в воронках)", loaded_count, skipped_count)
|
|
324
|
+
else:
|
|
325
|
+
logger.info("Загружено %d контактов", loaded_count)
|
|
326
|
+
return loaded_count
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def sync_events(
|
|
330
|
+
extractor: AmoCRMExtractor,
|
|
331
|
+
loader: PostgresLoader,
|
|
332
|
+
mybi_account_id: int,
|
|
333
|
+
created_from: Optional[datetime] = None,
|
|
334
|
+
created_to: Optional[datetime] = None,
|
|
335
|
+
event_types: Optional[List[str]] = None,
|
|
336
|
+
batch_size: int = 100,
|
|
337
|
+
) -> int:
|
|
338
|
+
"""
|
|
339
|
+
Синхронизирует события.
|
|
340
|
+
|
|
341
|
+
ВАЖНО: События должны загружаться ПОСЛЕ сделок, т.к. leads_id ссылается
|
|
342
|
+
на внутренний id из amocrm_leads, а не на lead_id из amoCRM.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
event_types: Фильтр по типам (по умолчанию lead_added, lead_status_changed)
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
Количество загруженных событий
|
|
349
|
+
"""
|
|
350
|
+
if event_types is None:
|
|
351
|
+
event_types = ["lead_added", "lead_status_changed"]
|
|
352
|
+
|
|
353
|
+
event_transformer = EventTransformer(mybi_account_id)
|
|
354
|
+
|
|
355
|
+
events_iter = extractor.iter_events(
|
|
356
|
+
entity_type="lead",
|
|
357
|
+
event_types=event_types,
|
|
358
|
+
created_from=created_from,
|
|
359
|
+
created_to=created_to,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
loaded_count = 0
|
|
363
|
+
skipped_count = 0
|
|
364
|
+
max_created_at = created_from or datetime.min.replace(tzinfo=timezone.utc)
|
|
365
|
+
|
|
366
|
+
with loader.connection() as conn:
|
|
367
|
+
with conn.cursor() as cursor:
|
|
368
|
+
# Строим маппинг lead_id -> internal_id
|
|
369
|
+
logger.info("Загружаем маппинг lead_id -> internal_id")
|
|
370
|
+
lead_id_map = loader.build_lead_id_map(cursor, mybi_account_id)
|
|
371
|
+
logger.info("Загружено %d сделок в маппинг", len(lead_id_map))
|
|
372
|
+
|
|
373
|
+
for i, event in enumerate(events_iter):
|
|
374
|
+
transformed = event_transformer.transform(event, entity_type="lead")
|
|
375
|
+
|
|
376
|
+
# Подставляем внутренний leads_id вместо entity_id из amoCRM
|
|
377
|
+
amo_lead_id = event.get("entity_id")
|
|
378
|
+
internal_lead_id = lead_id_map.get(amo_lead_id)
|
|
379
|
+
|
|
380
|
+
if internal_lead_id is None:
|
|
381
|
+
# Сделка не найдена в БД - пропускаем событие
|
|
382
|
+
skipped_count += 1
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
transformed.event["leads_id"] = internal_lead_id
|
|
386
|
+
loader.upsert_event(cursor, transformed.event)
|
|
387
|
+
loaded_count += 1
|
|
388
|
+
|
|
389
|
+
# Отслеживаем максимальный created_at
|
|
390
|
+
event_created = event.get("created_at")
|
|
391
|
+
if event_created:
|
|
392
|
+
event_dt = datetime.fromtimestamp(event_created, tz=timezone.utc)
|
|
393
|
+
if event_dt > max_created_at:
|
|
394
|
+
max_created_at = event_dt
|
|
395
|
+
|
|
396
|
+
if (i + 1) % batch_size == 0:
|
|
397
|
+
conn.commit()
|
|
398
|
+
logger.debug("Закоммичено %d событий", i + 1)
|
|
399
|
+
|
|
400
|
+
if loaded_count > 0:
|
|
401
|
+
loader.update_etl_state(
|
|
402
|
+
cursor,
|
|
403
|
+
entity_type="events",
|
|
404
|
+
account_id=mybi_account_id,
|
|
405
|
+
last_updated_at=max_created_at,
|
|
406
|
+
records_loaded=loaded_count,
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
conn.commit()
|
|
410
|
+
|
|
411
|
+
logger.info("Загружено %d событий (пропущено %d - сделки не найдены)", loaded_count, skipped_count)
|
|
412
|
+
return loaded_count
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def sync_notes(
|
|
416
|
+
extractor: AmoCRMExtractor,
|
|
417
|
+
loader: PostgresLoader,
|
|
418
|
+
mybi_account_id: int,
|
|
419
|
+
updated_from: Optional[datetime] = None,
|
|
420
|
+
updated_to: Optional[datetime] = None,
|
|
421
|
+
note_type: Optional[str] = "common",
|
|
422
|
+
batch_size: int = 100,
|
|
423
|
+
) -> int:
|
|
424
|
+
"""
|
|
425
|
+
Синхронизирует примечания.
|
|
426
|
+
|
|
427
|
+
ВАЖНО: Примечания должны загружаться ПОСЛЕ сделок, т.к. leads_id ссылается
|
|
428
|
+
на внутренний id из amocrm_leads.
|
|
429
|
+
"""
|
|
430
|
+
note_transformer = NoteTransformer(mybi_account_id)
|
|
431
|
+
|
|
432
|
+
notes_iter = extractor.iter_notes(
|
|
433
|
+
entity_type="lead",
|
|
434
|
+
note_type=note_type,
|
|
435
|
+
updated_from=updated_from,
|
|
436
|
+
updated_to=updated_to,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
loaded_count = 0
|
|
440
|
+
skipped_count = 0
|
|
441
|
+
|
|
442
|
+
with loader.connection() as conn:
|
|
443
|
+
with conn.cursor() as cursor:
|
|
444
|
+
# Строим маппинг lead_id -> internal_id
|
|
445
|
+
lead_id_map = loader.build_lead_id_map(cursor, mybi_account_id)
|
|
446
|
+
|
|
447
|
+
for i, note in enumerate(notes_iter):
|
|
448
|
+
note_record = note_transformer.transform(note, entity_type="lead")
|
|
449
|
+
|
|
450
|
+
# Подставляем внутренний leads_id
|
|
451
|
+
amo_lead_id = note.get("entity_id")
|
|
452
|
+
internal_lead_id = lead_id_map.get(amo_lead_id)
|
|
453
|
+
|
|
454
|
+
if internal_lead_id is None:
|
|
455
|
+
skipped_count += 1
|
|
456
|
+
continue
|
|
457
|
+
|
|
458
|
+
note_record["leads_id"] = internal_lead_id
|
|
459
|
+
loader.upsert_note(cursor, note_record)
|
|
460
|
+
loaded_count += 1
|
|
461
|
+
|
|
462
|
+
if (i + 1) % batch_size == 0:
|
|
463
|
+
conn.commit()
|
|
464
|
+
|
|
465
|
+
conn.commit()
|
|
466
|
+
|
|
467
|
+
logger.info("Загружено %d примечаний (пропущено %d)", loaded_count, skipped_count)
|
|
468
|
+
return loaded_count
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def run_etl_for_account(
|
|
472
|
+
account: AmoCRMAccount,
|
|
473
|
+
loader: PostgresLoader,
|
|
474
|
+
entities: List[str],
|
|
475
|
+
window_minutes: int,
|
|
476
|
+
full_sync: bool = False,
|
|
477
|
+
batch_size: int = 100,
|
|
478
|
+
) -> dict:
|
|
479
|
+
"""
|
|
480
|
+
Запускает ETL для одного аккаунта.
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
Статистика по загруженным записям
|
|
484
|
+
"""
|
|
485
|
+
mybi_id = account.mybi_account_id
|
|
486
|
+
|
|
487
|
+
logger.info("=" * 60)
|
|
488
|
+
logger.info("Начинаем ETL для аккаунта: %s (amo_id=%d, mybi_id=%d)", account.name, account.id, mybi_id)
|
|
489
|
+
logger.info("=" * 60)
|
|
490
|
+
|
|
491
|
+
extractor = AmoCRMExtractor(account)
|
|
492
|
+
stats = {}
|
|
493
|
+
|
|
494
|
+
# Определяем временное окно
|
|
495
|
+
now = datetime.now(timezone.utc)
|
|
496
|
+
|
|
497
|
+
if full_sync:
|
|
498
|
+
updated_from = None
|
|
499
|
+
updated_to = None
|
|
500
|
+
logger.info("Режим полной синхронизации (без фильтра по дате)")
|
|
501
|
+
else:
|
|
502
|
+
updated_from = now - timedelta(minutes=window_minutes)
|
|
503
|
+
updated_to = now
|
|
504
|
+
logger.info("Инкрементальная синхронизация: %s - %s", updated_from.isoformat(), updated_to.isoformat())
|
|
505
|
+
|
|
506
|
+
# Синхронизируем справочники (всегда)
|
|
507
|
+
if "pipelines" in entities or "users" in entities or "leads" in entities or "contacts" in entities:
|
|
508
|
+
sync_pipelines_and_users(extractor, loader, mybi_id)
|
|
509
|
+
stats["pipelines_users"] = "synced"
|
|
510
|
+
|
|
511
|
+
# Сделки + контакты (загружаем вместе для корректного маппинга contacts_id)
|
|
512
|
+
if "leads" in entities:
|
|
513
|
+
leads_updated_from = updated_from
|
|
514
|
+
if not full_sync:
|
|
515
|
+
with loader.connection() as conn:
|
|
516
|
+
with conn.cursor() as cursor:
|
|
517
|
+
last_updated = loader.get_etl_state(cursor, "leads", mybi_id)
|
|
518
|
+
if last_updated:
|
|
519
|
+
leads_updated_from = last_updated
|
|
520
|
+
logger.info("Используем last_updated_at из БД: %s", leads_updated_from.isoformat())
|
|
521
|
+
|
|
522
|
+
# Основная загрузка: сделки + их контакты
|
|
523
|
+
result = sync_leads_with_contacts(
|
|
524
|
+
extractor,
|
|
525
|
+
loader,
|
|
526
|
+
mybi_id,
|
|
527
|
+
updated_from=leads_updated_from,
|
|
528
|
+
updated_to=updated_to,
|
|
529
|
+
pipeline_ids=account.pipeline_ids,
|
|
530
|
+
batch_size=batch_size,
|
|
531
|
+
)
|
|
532
|
+
stats["leads"] = result["leads_count"]
|
|
533
|
+
stats["contacts_from_leads"] = result["contacts_count"]
|
|
534
|
+
|
|
535
|
+
# Помечаем удалённые сделки (is_deleted = true)
|
|
536
|
+
deleted_count = mark_deleted_leads(
|
|
537
|
+
extractor,
|
|
538
|
+
loader,
|
|
539
|
+
mybi_id,
|
|
540
|
+
pipeline_ids=account.pipeline_ids,
|
|
541
|
+
)
|
|
542
|
+
stats["leads_marked_deleted"] = deleted_count
|
|
543
|
+
|
|
544
|
+
# Обновлённые контакты (только те, у которых есть сделки в наших воронках)
|
|
545
|
+
if "contacts" in entities or "leads" in entities:
|
|
546
|
+
contacts_updated_from = updated_from
|
|
547
|
+
if not full_sync:
|
|
548
|
+
with loader.connection() as conn:
|
|
549
|
+
with conn.cursor() as cursor:
|
|
550
|
+
last_updated = loader.get_etl_state(cursor, "contacts", mybi_id)
|
|
551
|
+
if last_updated:
|
|
552
|
+
contacts_updated_from = last_updated
|
|
553
|
+
|
|
554
|
+
# Получаем contact_ids из сделок в наших воронках
|
|
555
|
+
with loader.connection() as conn:
|
|
556
|
+
with conn.cursor() as cursor:
|
|
557
|
+
contact_ids_in_pipelines = loader.get_contact_ids_from_leads(
|
|
558
|
+
cursor, mybi_id, account.pipeline_ids
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
if contact_ids_in_pipelines:
|
|
562
|
+
stats["contacts_updated"] = sync_contacts(
|
|
563
|
+
extractor,
|
|
564
|
+
loader,
|
|
565
|
+
mybi_id,
|
|
566
|
+
contact_ids=contact_ids_in_pipelines, # Только контакты из наших воронок
|
|
567
|
+
updated_from=contacts_updated_from,
|
|
568
|
+
updated_to=updated_to,
|
|
569
|
+
batch_size=batch_size,
|
|
570
|
+
)
|
|
571
|
+
else:
|
|
572
|
+
stats["contacts_updated"] = 0
|
|
573
|
+
|
|
574
|
+
# События
|
|
575
|
+
if "events" in entities:
|
|
576
|
+
if not full_sync:
|
|
577
|
+
with loader.connection() as conn:
|
|
578
|
+
with conn.cursor() as cursor:
|
|
579
|
+
last_updated = loader.get_etl_state(cursor, "events", mybi_id)
|
|
580
|
+
if last_updated:
|
|
581
|
+
updated_from = last_updated
|
|
582
|
+
|
|
583
|
+
stats["events"] = sync_events(
|
|
584
|
+
extractor,
|
|
585
|
+
loader,
|
|
586
|
+
mybi_id,
|
|
587
|
+
created_from=updated_from,
|
|
588
|
+
created_to=updated_to,
|
|
589
|
+
batch_size=batch_size,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
# Примечания
|
|
593
|
+
if "notes" in entities:
|
|
594
|
+
stats["notes"] = sync_notes(
|
|
595
|
+
extractor,
|
|
596
|
+
loader,
|
|
597
|
+
mybi_id,
|
|
598
|
+
updated_from=updated_from,
|
|
599
|
+
updated_to=updated_to,
|
|
600
|
+
batch_size=batch_size,
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
logger.info("ETL для аккаунта %s завершён. Статистика: %s", account.name, stats)
|
|
604
|
+
return stats
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def main():
|
|
608
|
+
"""Главная функция."""
|
|
609
|
+
parser = argparse.ArgumentParser(description="ETL amoCRM → PostgreSQL")
|
|
610
|
+
parser.add_argument("--env", type=str, help="Путь к .env файлу")
|
|
611
|
+
parser.add_argument("--migrate", action="store_true", help="Выполнить SQL-миграции")
|
|
612
|
+
parser.add_argument("--full", action="store_true", help="Полная синхронизация (без инкремента)")
|
|
613
|
+
parser.add_argument(
|
|
614
|
+
"--entities",
|
|
615
|
+
type=str,
|
|
616
|
+
default="leads,contacts,events",
|
|
617
|
+
help="Сущности для синхронизации (через запятую): leads,contacts,events,notes,pipelines,users",
|
|
618
|
+
)
|
|
619
|
+
parser.add_argument("--window", type=int, help="Окно выгрузки в минутах (перезаписывает конфиг)")
|
|
620
|
+
parser.add_argument("--account", type=str, help="Имя конкретного аккаунта (по умолчанию все)")
|
|
621
|
+
parser.add_argument("--log-level", type=str, default="INFO", help="Уровень логирования")
|
|
622
|
+
|
|
623
|
+
args = parser.parse_args()
|
|
624
|
+
|
|
625
|
+
# Настраиваем логирование
|
|
626
|
+
setup_logging(args.log_level)
|
|
627
|
+
|
|
628
|
+
# Загружаем конфигурацию
|
|
629
|
+
env_path = Path(args.env) if args.env else None
|
|
630
|
+
config = get_config(env_path)
|
|
631
|
+
|
|
632
|
+
if args.window:
|
|
633
|
+
config.window_minutes = args.window
|
|
634
|
+
|
|
635
|
+
entities = [e.strip() for e in args.entities.split(",")]
|
|
636
|
+
logger.info("Сущности для синхронизации: %s", entities)
|
|
637
|
+
|
|
638
|
+
# Инициализируем loader
|
|
639
|
+
loader = PostgresLoader(config.database)
|
|
640
|
+
|
|
641
|
+
# Выполняем миграции если нужно
|
|
642
|
+
if args.migrate:
|
|
643
|
+
migrations_dir = Path(__file__).parent / "migrations"
|
|
644
|
+
run_migrations(loader, migrations_dir)
|
|
645
|
+
|
|
646
|
+
# Фильтруем аккаунты
|
|
647
|
+
accounts = config.accounts
|
|
648
|
+
if args.account:
|
|
649
|
+
accounts = [a for a in accounts if a.name == args.account]
|
|
650
|
+
if not accounts:
|
|
651
|
+
logger.error("Аккаунт '%s' не найден в конфигурации", args.account)
|
|
652
|
+
sys.exit(1)
|
|
653
|
+
|
|
654
|
+
if not accounts:
|
|
655
|
+
logger.error("Нет аккаунтов для обработки. Проверьте конфигурацию.")
|
|
656
|
+
sys.exit(1)
|
|
657
|
+
|
|
658
|
+
# Запускаем ETL для каждого аккаунта
|
|
659
|
+
all_stats = {}
|
|
660
|
+
for account in accounts:
|
|
661
|
+
try:
|
|
662
|
+
stats = run_etl_for_account(
|
|
663
|
+
account,
|
|
664
|
+
loader,
|
|
665
|
+
entities,
|
|
666
|
+
config.window_minutes,
|
|
667
|
+
full_sync=args.full,
|
|
668
|
+
batch_size=config.batch_size,
|
|
669
|
+
)
|
|
670
|
+
all_stats[account.name] = stats
|
|
671
|
+
except Exception as e:
|
|
672
|
+
logger.exception("Ошибка при обработке аккаунта %s: %s", account.name, e)
|
|
673
|
+
all_stats[account.name] = {"error": str(e)}
|
|
674
|
+
|
|
675
|
+
# Итоговая статистика
|
|
676
|
+
logger.info("=" * 60)
|
|
677
|
+
logger.info("ETL завершён. Итоговая статистика:")
|
|
678
|
+
for account_name, stats in all_stats.items():
|
|
679
|
+
logger.info(" %s: %s", account_name, stats)
|
|
680
|
+
logger.info("=" * 60)
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
if __name__ == "__main__":
|
|
684
|
+
main()
|