amochka 0.1.9__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amochka/__init__.py +28 -0
- amochka/client.py +1375 -0
- amochka/etl.py +302 -0
- {amochka-0.1.9.dist-info → amochka-0.3.0.dist-info}/METADATA +5 -3
- amochka-0.3.0.dist-info/RECORD +14 -0
- amochka-0.3.0.dist-info/top_level.txt +2 -0
- etl/__init__.py +7 -0
- etl/config.py +236 -0
- etl/extractors.py +354 -0
- etl/loaders.py +813 -0
- etl/migrations/001_create_tables.sql +346 -0
- etl/run_etl.py +684 -0
- etl/transformers.py +470 -0
- amochka-0.1.9.dist-info/RECORD +0 -4
- amochka-0.1.9.dist-info/top_level.txt +0 -1
- {amochka-0.1.9.dist-info → amochka-0.3.0.dist-info}/WHEEL +0 -0
etl/extractors.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Выгрузка данных из amoCRM через библиотеку amochka.
|
|
3
|
+
|
|
4
|
+
Обёртки для инкрементальной выгрузки с поддержкой фильтрации.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple, Union
|
|
11
|
+
|
|
12
|
+
from .config import AmoCRMAccount
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AmoCRMExtractor:
|
|
18
|
+
"""Экстрактор данных из amoCRM."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, account: AmoCRMAccount, cache_lifetime_hours: int = 24):
|
|
21
|
+
"""
|
|
22
|
+
Инициализирует экстрактор.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
account: Конфигурация аккаунта amoCRM
|
|
26
|
+
cache_lifetime_hours: Время жизни кэша custom_fields в часах
|
|
27
|
+
"""
|
|
28
|
+
self.account = account
|
|
29
|
+
self.cache_lifetime_hours = cache_lifetime_hours
|
|
30
|
+
self._client = None
|
|
31
|
+
self._pipelines_map: Dict[int, str] = {}
|
|
32
|
+
self._statuses_map: Dict[int, Dict[str, Any]] = {}
|
|
33
|
+
|
|
34
|
+
def _ensure_client(self):
|
|
35
|
+
"""Ленивая инициализация клиента amochka."""
|
|
36
|
+
if self._client is None:
|
|
37
|
+
try:
|
|
38
|
+
from amochka import AmoCRMClient, CacheConfig
|
|
39
|
+
except ImportError:
|
|
40
|
+
raise ImportError("amochka не установлена. Установите: pip install amochka")
|
|
41
|
+
|
|
42
|
+
cache_config = CacheConfig.file_cache(
|
|
43
|
+
base_dir=str(self.account.cache_dir) if self.account.cache_dir else ".cache",
|
|
44
|
+
lifetime_hours=self.cache_lifetime_hours,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
self._client = AmoCRMClient(
|
|
48
|
+
base_url=self.account.base_url,
|
|
49
|
+
token_file=str(self.account.token_path),
|
|
50
|
+
cache_config=cache_config,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def client(self):
|
|
55
|
+
"""Возвращает клиент amoCRM."""
|
|
56
|
+
self._ensure_client()
|
|
57
|
+
return self._client
|
|
58
|
+
|
|
59
|
+
def load_pipelines_and_statuses(self) -> Tuple[Dict[int, str], Dict[int, Dict[str, Any]]]:
|
|
60
|
+
"""
|
|
61
|
+
Загружает воронки и статусы для денормализации.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Tuple[pipelines_map, statuses_map]
|
|
65
|
+
- pipelines_map: {pipeline_id: name}
|
|
66
|
+
- statuses_map: {status_id: {"name": ..., "sort": ..., "pipeline_id": ...}}
|
|
67
|
+
"""
|
|
68
|
+
if self._pipelines_map and self._statuses_map:
|
|
69
|
+
return self._pipelines_map, self._statuses_map
|
|
70
|
+
|
|
71
|
+
logger.info("Загружаем воронки и статусы для аккаунта %s", self.account.name)
|
|
72
|
+
|
|
73
|
+
for pipeline in self.client.iter_pipelines():
|
|
74
|
+
pipeline_id = pipeline.get("id")
|
|
75
|
+
self._pipelines_map[pipeline_id] = pipeline.get("name")
|
|
76
|
+
|
|
77
|
+
# Статусы из _embedded
|
|
78
|
+
embedded = pipeline.get("_embedded", {})
|
|
79
|
+
for status in embedded.get("statuses", []):
|
|
80
|
+
status_id = status.get("id")
|
|
81
|
+
self._statuses_map[status_id] = {
|
|
82
|
+
"name": status.get("name"),
|
|
83
|
+
"sort": status.get("sort"),
|
|
84
|
+
"pipeline_id": pipeline_id,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
logger.info("Загружено %d воронок, %d статусов", len(self._pipelines_map), len(self._statuses_map))
|
|
88
|
+
return self._pipelines_map, self._statuses_map
|
|
89
|
+
|
|
90
|
+
def iter_leads(
|
|
91
|
+
self,
|
|
92
|
+
updated_from: Optional[datetime] = None,
|
|
93
|
+
updated_to: Optional[datetime] = None,
|
|
94
|
+
pipeline_ids: Optional[List[int]] = None,
|
|
95
|
+
include_contacts: bool = True,
|
|
96
|
+
only_deleted: bool = False,
|
|
97
|
+
) -> Iterator[Dict[str, Any]]:
|
|
98
|
+
"""
|
|
99
|
+
Итератор по сделкам.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
updated_from: Начало периода (по updated_at)
|
|
103
|
+
updated_to: Конец периода (по updated_at)
|
|
104
|
+
pipeline_ids: Фильтр по воронкам (None = из конфига аккаунта)
|
|
105
|
+
include_contacts: Включать вложенные контакты
|
|
106
|
+
only_deleted: Выгружать только удалённые сделки (из корзины)
|
|
107
|
+
|
|
108
|
+
Yields:
|
|
109
|
+
Dict с данными сделки из amoCRM API
|
|
110
|
+
"""
|
|
111
|
+
# Используем pipeline_ids из аккаунта если не указаны явно
|
|
112
|
+
if pipeline_ids is None:
|
|
113
|
+
pipeline_ids = self.account.pipeline_ids
|
|
114
|
+
|
|
115
|
+
logger.info(
|
|
116
|
+
"Выгружаем сделки: updated_from=%s, updated_to=%s, pipeline_ids=%s, only_deleted=%s",
|
|
117
|
+
updated_from,
|
|
118
|
+
updated_to,
|
|
119
|
+
pipeline_ids,
|
|
120
|
+
only_deleted,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
count = 0
|
|
124
|
+
# Формируем extra_params для only_deleted
|
|
125
|
+
extra_params = {}
|
|
126
|
+
if only_deleted:
|
|
127
|
+
extra_params["filter[only_deleted]"] = "true"
|
|
128
|
+
|
|
129
|
+
for lead in self.client.iter_leads(
|
|
130
|
+
updated_from=updated_from,
|
|
131
|
+
updated_to=updated_to,
|
|
132
|
+
pipeline_ids=pipeline_ids,
|
|
133
|
+
include_contacts=include_contacts,
|
|
134
|
+
extra_params=extra_params if extra_params else None,
|
|
135
|
+
):
|
|
136
|
+
count += 1
|
|
137
|
+
if count % 100 == 0:
|
|
138
|
+
logger.debug("Выгружено %d сделок", count)
|
|
139
|
+
yield lead
|
|
140
|
+
|
|
141
|
+
logger.info("Всего выгружено %d сделок", count)
|
|
142
|
+
|
|
143
|
+
def iter_contacts(
|
|
144
|
+
self,
|
|
145
|
+
contact_ids: Optional[List[int]] = None,
|
|
146
|
+
updated_from: Optional[datetime] = None,
|
|
147
|
+
updated_to: Optional[datetime] = None,
|
|
148
|
+
) -> Iterator[Dict[str, Any]]:
|
|
149
|
+
"""
|
|
150
|
+
Итератор по контактам.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
contact_ids: Список ID контактов для выгрузки
|
|
154
|
+
updated_from: Начало периода (по updated_at)
|
|
155
|
+
updated_to: Конец периода (по updated_at)
|
|
156
|
+
|
|
157
|
+
Yields:
|
|
158
|
+
Dict с данными контакта из amoCRM API
|
|
159
|
+
"""
|
|
160
|
+
logger.info(
|
|
161
|
+
"Выгружаем контакты: contact_ids=%s, updated_from=%s, updated_to=%s",
|
|
162
|
+
f"{len(contact_ids)} шт" if contact_ids else "все",
|
|
163
|
+
updated_from,
|
|
164
|
+
updated_to,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
count = 0
|
|
168
|
+
for contact in self.client.iter_contacts(
|
|
169
|
+
contact_ids=contact_ids,
|
|
170
|
+
updated_from=updated_from,
|
|
171
|
+
updated_to=updated_to,
|
|
172
|
+
):
|
|
173
|
+
count += 1
|
|
174
|
+
if count % 100 == 0:
|
|
175
|
+
logger.debug("Выгружено %d контактов", count)
|
|
176
|
+
yield contact
|
|
177
|
+
|
|
178
|
+
logger.info("Всего выгружено %d контактов", count)
|
|
179
|
+
|
|
180
|
+
def iter_events(
|
|
181
|
+
self,
|
|
182
|
+
entity_type: Optional[str] = "lead",
|
|
183
|
+
event_types: Optional[List[str]] = None,
|
|
184
|
+
created_from: Optional[datetime] = None,
|
|
185
|
+
created_to: Optional[datetime] = None,
|
|
186
|
+
) -> Iterator[Dict[str, Any]]:
|
|
187
|
+
"""
|
|
188
|
+
Итератор по событиям.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
entity_type: Тип сущности (lead, contact, company, etc.)
|
|
192
|
+
event_types: Фильтр по типам событий
|
|
193
|
+
created_from: Начало периода (по created_at)
|
|
194
|
+
created_to: Конец периода (по created_at)
|
|
195
|
+
|
|
196
|
+
Yields:
|
|
197
|
+
Dict с данными события из amoCRM API
|
|
198
|
+
"""
|
|
199
|
+
logger.info(
|
|
200
|
+
"Выгружаем события: entity_type=%s, event_types=%s, created_from=%s",
|
|
201
|
+
entity_type,
|
|
202
|
+
event_types,
|
|
203
|
+
created_from,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
count = 0
|
|
207
|
+
for event in self.client.iter_events(
|
|
208
|
+
entity_type=entity_type,
|
|
209
|
+
event_type=event_types[0] if event_types and len(event_types) == 1 else None,
|
|
210
|
+
start=created_from,
|
|
211
|
+
end=created_to,
|
|
212
|
+
):
|
|
213
|
+
# Фильтруем по типам если указано несколько
|
|
214
|
+
if event_types and len(event_types) > 1:
|
|
215
|
+
if event.get("type") not in event_types:
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
count += 1
|
|
219
|
+
if count % 500 == 0:
|
|
220
|
+
logger.debug("Выгружено %d событий", count)
|
|
221
|
+
yield event
|
|
222
|
+
|
|
223
|
+
logger.info("Всего выгружено %d событий", count)
|
|
224
|
+
|
|
225
|
+
def iter_notes(
|
|
226
|
+
self,
|
|
227
|
+
entity_type: str = "lead",
|
|
228
|
+
note_type: Optional[str] = None,
|
|
229
|
+
entity_ids: Optional[List[int]] = None,
|
|
230
|
+
updated_from: Optional[datetime] = None,
|
|
231
|
+
updated_to: Optional[datetime] = None,
|
|
232
|
+
) -> Iterator[Dict[str, Any]]:
|
|
233
|
+
"""
|
|
234
|
+
Итератор по примечаниям.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
entity_type: Тип сущности (lead, contact, company)
|
|
238
|
+
note_type: Тип примечания (common, call_in, call_out, etc.)
|
|
239
|
+
entity_ids: Список ID сущностей
|
|
240
|
+
updated_from: Начало периода
|
|
241
|
+
updated_to: Конец периода
|
|
242
|
+
|
|
243
|
+
Yields:
|
|
244
|
+
Dict с данными примечания из amoCRM API
|
|
245
|
+
"""
|
|
246
|
+
logger.info(
|
|
247
|
+
"Выгружаем примечания: entity_type=%s, note_type=%s, entity_ids=%s",
|
|
248
|
+
entity_type,
|
|
249
|
+
note_type,
|
|
250
|
+
f"{len(entity_ids)} шт" if entity_ids else "все",
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
count = 0
|
|
254
|
+
for note in self.client.iter_notes(
|
|
255
|
+
entity=entity_type,
|
|
256
|
+
note_type=note_type,
|
|
257
|
+
start=updated_from,
|
|
258
|
+
end=updated_to,
|
|
259
|
+
):
|
|
260
|
+
count += 1
|
|
261
|
+
if count % 100 == 0:
|
|
262
|
+
logger.debug("Выгружено %d примечаний", count)
|
|
263
|
+
yield note
|
|
264
|
+
|
|
265
|
+
logger.info("Всего выгружено %d примечаний", count)
|
|
266
|
+
|
|
267
|
+
def iter_users(self, with_groups: bool = True, with_roles: bool = True) -> Iterator[Dict[str, Any]]:
|
|
268
|
+
"""
|
|
269
|
+
Итератор по пользователям.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
with_groups: Включать информацию о группах
|
|
273
|
+
with_roles: Включать информацию о ролях
|
|
274
|
+
|
|
275
|
+
Yields:
|
|
276
|
+
Dict с данными пользователя из amoCRM API
|
|
277
|
+
"""
|
|
278
|
+
logger.info("Выгружаем пользователей")
|
|
279
|
+
|
|
280
|
+
extra_params = {}
|
|
281
|
+
with_parts = []
|
|
282
|
+
if with_groups:
|
|
283
|
+
with_parts.append("groups")
|
|
284
|
+
if with_roles:
|
|
285
|
+
with_parts.append("roles")
|
|
286
|
+
if with_parts:
|
|
287
|
+
extra_params["with"] = ",".join(with_parts)
|
|
288
|
+
|
|
289
|
+
count = 0
|
|
290
|
+
for user in self.client.iter_users(extra_params=extra_params):
|
|
291
|
+
count += 1
|
|
292
|
+
yield user
|
|
293
|
+
|
|
294
|
+
logger.info("Всего выгружено %d пользователей", count)
|
|
295
|
+
|
|
296
|
+
def iter_pipelines(self) -> Iterator[Dict[str, Any]]:
|
|
297
|
+
"""
|
|
298
|
+
Итератор по воронкам.
|
|
299
|
+
|
|
300
|
+
Yields:
|
|
301
|
+
Dict с данными воронки и статусами в _embedded
|
|
302
|
+
"""
|
|
303
|
+
logger.info("Выгружаем воронки")
|
|
304
|
+
|
|
305
|
+
count = 0
|
|
306
|
+
for pipeline in self.client.iter_pipelines():
|
|
307
|
+
count += 1
|
|
308
|
+
yield pipeline
|
|
309
|
+
|
|
310
|
+
logger.info("Всего выгружено %d воронок", count)
|
|
311
|
+
|
|
312
|
+
def collect_contact_ids_from_leads(
|
|
313
|
+
self,
|
|
314
|
+
leads: Iterator[Dict[str, Any]],
|
|
315
|
+
) -> Tuple[List[Dict[str, Any]], Set[int]]:
|
|
316
|
+
"""
|
|
317
|
+
Собирает ID контактов из сделок.
|
|
318
|
+
|
|
319
|
+
Полезно для последующей выгрузки связанных контактов.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
leads: Итератор сделок
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
Tuple[leads_list, contact_ids_set]
|
|
326
|
+
"""
|
|
327
|
+
leads_list = []
|
|
328
|
+
contact_ids: Set[int] = set()
|
|
329
|
+
|
|
330
|
+
for lead in leads:
|
|
331
|
+
leads_list.append(lead)
|
|
332
|
+
|
|
333
|
+
embedded = lead.get("_embedded", {})
|
|
334
|
+
for contact in embedded.get("contacts", []):
|
|
335
|
+
contact_id = contact.get("id")
|
|
336
|
+
if contact_id:
|
|
337
|
+
contact_ids.add(int(contact_id))
|
|
338
|
+
|
|
339
|
+
logger.info("Собрано %d уникальных контактов из %d сделок", len(contact_ids), len(leads_list))
|
|
340
|
+
return leads_list, contact_ids
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def create_extractor(account: AmoCRMAccount, **kwargs) -> AmoCRMExtractor:
|
|
344
|
+
"""
|
|
345
|
+
Фабричный метод для создания экстрактора.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
account: Конфигурация аккаунта
|
|
349
|
+
**kwargs: Дополнительные параметры для AmoCRMExtractor
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
Инициализированный экстрактор
|
|
353
|
+
"""
|
|
354
|
+
return AmoCRMExtractor(account, **kwargs)
|