amochka 0.1.8__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amochka/__init__.py +1 -1
- amochka/client.py +305 -31
- amochka-0.3.0.dist-info/METADATA +126 -0
- amochka-0.3.0.dist-info/RECORD +14 -0
- {amochka-0.1.8.dist-info → amochka-0.3.0.dist-info}/top_level.txt +1 -0
- etl/__init__.py +7 -0
- etl/config.py +236 -0
- etl/extractors.py +354 -0
- etl/loaders.py +813 -0
- etl/migrations/001_create_tables.sql +346 -0
- etl/run_etl.py +684 -0
- etl/transformers.py +470 -0
- amochka-0.1.8.dist-info/METADATA +0 -40
- amochka-0.1.8.dist-info/RECORD +0 -7
- {amochka-0.1.8.dist-info → amochka-0.3.0.dist-info}/WHEEL +0 -0
etl/config.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Конфигурация ETL для amoCRM.
|
|
3
|
+
|
|
4
|
+
Настройки загружаются из переменных окружения или .env файла.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _load_env_file(path: Path) -> Dict[str, str]:
|
|
14
|
+
"""Загружает переменные из .env файла."""
|
|
15
|
+
if not path.exists():
|
|
16
|
+
return {}
|
|
17
|
+
|
|
18
|
+
env: Dict[str, str] = {}
|
|
19
|
+
with path.open("r", encoding="utf-8") as handler:
|
|
20
|
+
for raw_line in handler:
|
|
21
|
+
line = raw_line.strip()
|
|
22
|
+
if not line or line.startswith("#"):
|
|
23
|
+
continue
|
|
24
|
+
if "=" not in line:
|
|
25
|
+
continue
|
|
26
|
+
key, value = line.split("=", 1)
|
|
27
|
+
key = key.strip()
|
|
28
|
+
value = value.strip()
|
|
29
|
+
if value.startswith(('"', "'")) and value.endswith(('"', "'")) and len(value) >= 2:
|
|
30
|
+
value = value[1:-1]
|
|
31
|
+
env[key] = value
|
|
32
|
+
return env
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class DatabaseConfig:
|
|
37
|
+
"""Конфигурация подключения к PostgreSQL."""
|
|
38
|
+
|
|
39
|
+
host: str
|
|
40
|
+
port: int
|
|
41
|
+
dbname: str
|
|
42
|
+
user: str
|
|
43
|
+
password: str
|
|
44
|
+
schema: str = "public"
|
|
45
|
+
sslmode: Optional[str] = None
|
|
46
|
+
connect_timeout: int = 30
|
|
47
|
+
|
|
48
|
+
def connection_kwargs(self) -> Dict[str, Any]:
|
|
49
|
+
"""Возвращает kwargs для psycopg.connect()."""
|
|
50
|
+
kwargs: Dict[str, Any] = {
|
|
51
|
+
"host": self.host,
|
|
52
|
+
"port": self.port,
|
|
53
|
+
"dbname": self.dbname,
|
|
54
|
+
"user": self.user,
|
|
55
|
+
"password": self.password,
|
|
56
|
+
"connect_timeout": self.connect_timeout,
|
|
57
|
+
}
|
|
58
|
+
if self.sslmode:
|
|
59
|
+
kwargs["sslmode"] = self.sslmode
|
|
60
|
+
return kwargs
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def from_dict(cls, d: Dict[str, Any]) -> "DatabaseConfig":
|
|
64
|
+
"""Создаёт конфигурацию из словаря (для Airflow DAG)."""
|
|
65
|
+
return cls(
|
|
66
|
+
host=d.get("host", "localhost"),
|
|
67
|
+
port=int(d.get("port", 5432)),
|
|
68
|
+
dbname=d.get("dbname", "amocrm"),
|
|
69
|
+
user=d.get("user", "postgres"),
|
|
70
|
+
password=d.get("password", ""),
|
|
71
|
+
schema=d.get("schema", "public"),
|
|
72
|
+
sslmode=d.get("sslmode"),
|
|
73
|
+
connect_timeout=int(d.get("connect_timeout", 30)),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def from_env(cls, env_path: Optional[Path] = None) -> "DatabaseConfig":
|
|
78
|
+
"""Создаёт конфигурацию из переменных окружения."""
|
|
79
|
+
if env_path:
|
|
80
|
+
file_env = _load_env_file(env_path)
|
|
81
|
+
for key, value in file_env.items():
|
|
82
|
+
os.environ.setdefault(key, value)
|
|
83
|
+
|
|
84
|
+
def _get(key: str, default: Optional[str] = None) -> Optional[str]:
|
|
85
|
+
return os.environ.get(key, default)
|
|
86
|
+
|
|
87
|
+
return cls(
|
|
88
|
+
host=_get("ETL_DB_HOST", "localhost") or "localhost",
|
|
89
|
+
port=int(_get("ETL_DB_PORT", "5432") or "5432"),
|
|
90
|
+
dbname=_get("ETL_DB_NAME", "amocrm") or "amocrm",
|
|
91
|
+
user=_get("ETL_DB_USER", "postgres") or "postgres",
|
|
92
|
+
password=_get("ETL_DB_PASSWORD", "") or "",
|
|
93
|
+
schema=_get("ETL_DB_SCHEMA", "public") or "public",
|
|
94
|
+
sslmode=_get("ETL_DB_SSLMODE"),
|
|
95
|
+
connect_timeout=int(_get("ETL_DB_CONNECT_TIMEOUT", "30") or "30"),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class AmoCRMAccount:
|
|
101
|
+
"""Конфигурация одного аккаунта amoCRM."""
|
|
102
|
+
|
|
103
|
+
id: int # ID аккаунта в amoCRM (из URL или API)
|
|
104
|
+
name: str
|
|
105
|
+
base_url: str
|
|
106
|
+
token_path: Path
|
|
107
|
+
mybi_account_id: int # Внутренний account_id как в mybi.ru (для совместимости)
|
|
108
|
+
pipeline_ids: Optional[List[int]] = None # None = все воронки
|
|
109
|
+
cache_dir: Optional[Path] = None
|
|
110
|
+
|
|
111
|
+
def __post_init__(self):
|
|
112
|
+
if isinstance(self.token_path, str):
|
|
113
|
+
self.token_path = Path(self.token_path)
|
|
114
|
+
if isinstance(self.cache_dir, str):
|
|
115
|
+
self.cache_dir = Path(self.cache_dir)
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def from_dict(cls, d: Dict[str, Any]) -> "AmoCRMAccount":
|
|
119
|
+
"""Создаёт конфигурацию аккаунта из словаря (для Airflow DAG)."""
|
|
120
|
+
return cls(
|
|
121
|
+
id=int(d.get("id", 0)),
|
|
122
|
+
name=d.get("name", "account"),
|
|
123
|
+
base_url=d.get("base_url", ""),
|
|
124
|
+
token_path=Path(d.get("token_path", "token.json")),
|
|
125
|
+
mybi_account_id=int(d.get("mybi_account_id", 0)),
|
|
126
|
+
pipeline_ids=d.get("pipeline_ids"),
|
|
127
|
+
cache_dir=Path(d.get("cache_dir", ".cache")) if d.get("cache_dir") else None,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass
|
|
132
|
+
class ETLConfig:
|
|
133
|
+
"""Главная конфигурация ETL."""
|
|
134
|
+
|
|
135
|
+
database: DatabaseConfig
|
|
136
|
+
accounts: List[AmoCRMAccount]
|
|
137
|
+
batch_size: int = 100
|
|
138
|
+
window_minutes: int = 120 # Окно выгрузки по умолчанию (2 часа)
|
|
139
|
+
log_level: str = "INFO"
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def from_env(cls, env_path: Optional[Path] = None) -> "ETLConfig":
|
|
143
|
+
"""Создаёт конфигурацию из переменных окружения и файла настроек."""
|
|
144
|
+
if env_path:
|
|
145
|
+
file_env = _load_env_file(env_path)
|
|
146
|
+
for key, value in file_env.items():
|
|
147
|
+
os.environ.setdefault(key, value)
|
|
148
|
+
|
|
149
|
+
db_config = DatabaseConfig.from_env(env_path)
|
|
150
|
+
|
|
151
|
+
# Парсим аккаунты из переменных окружения
|
|
152
|
+
# Формат: AMO_ACCOUNT_1_ID, AMO_ACCOUNT_1_NAME, AMO_ACCOUNT_1_URL, etc.
|
|
153
|
+
accounts = []
|
|
154
|
+
for i in range(1, 10): # Поддерживаем до 9 аккаунтов
|
|
155
|
+
prefix = f"AMO_ACCOUNT_{i}_"
|
|
156
|
+
account_id = os.environ.get(f"{prefix}ID")
|
|
157
|
+
if not account_id:
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
pipeline_ids_str = os.environ.get(f"{prefix}PIPELINE_IDS", "")
|
|
161
|
+
pipeline_ids = None
|
|
162
|
+
if pipeline_ids_str:
|
|
163
|
+
pipeline_ids = [int(pid.strip()) for pid in pipeline_ids_str.split(",") if pid.strip()]
|
|
164
|
+
|
|
165
|
+
mybi_id = os.environ.get(f"{prefix}MYBI_ACCOUNT_ID")
|
|
166
|
+
if not mybi_id:
|
|
167
|
+
raise ValueError(f"Не указан {prefix}MYBI_ACCOUNT_ID для аккаунта {account_id}")
|
|
168
|
+
|
|
169
|
+
accounts.append(
|
|
170
|
+
AmoCRMAccount(
|
|
171
|
+
id=int(account_id),
|
|
172
|
+
name=os.environ.get(f"{prefix}NAME", f"account_{i}") or f"account_{i}",
|
|
173
|
+
base_url=os.environ.get(f"{prefix}URL", "") or "",
|
|
174
|
+
token_path=Path(os.environ.get(f"{prefix}TOKEN_PATH", f"token_{i}.json") or f"token_{i}.json"),
|
|
175
|
+
mybi_account_id=int(mybi_id),
|
|
176
|
+
pipeline_ids=pipeline_ids if pipeline_ids else None,
|
|
177
|
+
cache_dir=Path(os.environ.get(f"{prefix}CACHE_DIR", ".cache") or ".cache"),
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return cls(
|
|
182
|
+
database=db_config,
|
|
183
|
+
accounts=accounts,
|
|
184
|
+
batch_size=int(os.environ.get("ETL_BATCH_SIZE", "100") or "100"),
|
|
185
|
+
window_minutes=int(os.environ.get("ETL_WINDOW_MINUTES", "120") or "120"),
|
|
186
|
+
log_level=os.environ.get("ETL_LOG_LEVEL", "INFO") or "INFO",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# Пример конфигурации для разработки (можно переопределить в .env)
|
|
191
|
+
DEFAULT_CONFIG = ETLConfig(
|
|
192
|
+
database=DatabaseConfig(
|
|
193
|
+
host="localhost",
|
|
194
|
+
port=5432,
|
|
195
|
+
dbname="amocrm",
|
|
196
|
+
user="postgres",
|
|
197
|
+
password="",
|
|
198
|
+
schema="public",
|
|
199
|
+
),
|
|
200
|
+
accounts=[
|
|
201
|
+
AmoCRMAccount(
|
|
202
|
+
id=30019651,
|
|
203
|
+
name="bneginskogo",
|
|
204
|
+
base_url="https://bneginskogo.amocrm.ru",
|
|
205
|
+
token_path=Path("token.json"),
|
|
206
|
+
mybi_account_id=53859, # Внутренний ID из mybi.ru
|
|
207
|
+
pipeline_ids=[5987164, 6241334],
|
|
208
|
+
cache_dir=Path(".cache"),
|
|
209
|
+
),
|
|
210
|
+
# Добавьте остальные аккаунты здесь
|
|
211
|
+
],
|
|
212
|
+
batch_size=100,
|
|
213
|
+
window_minutes=120,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_config(env_path: Optional[Union[str, Path]] = None) -> ETLConfig:
|
|
218
|
+
"""
|
|
219
|
+
Получает конфигурацию ETL.
|
|
220
|
+
|
|
221
|
+
Если указан env_path, загружает настройки из файла.
|
|
222
|
+
Иначе использует переменные окружения или DEFAULT_CONFIG.
|
|
223
|
+
"""
|
|
224
|
+
if env_path:
|
|
225
|
+
return ETLConfig.from_env(Path(env_path))
|
|
226
|
+
|
|
227
|
+
# Проверяем наличие .env в текущей директории
|
|
228
|
+
default_env = Path(".env")
|
|
229
|
+
if default_env.exists():
|
|
230
|
+
return ETLConfig.from_env(default_env)
|
|
231
|
+
|
|
232
|
+
# Проверяем наличие переменных окружения
|
|
233
|
+
if os.environ.get("ETL_DB_HOST") or os.environ.get("AMO_ACCOUNT_1_ID"):
|
|
234
|
+
return ETLConfig.from_env()
|
|
235
|
+
|
|
236
|
+
return DEFAULT_CONFIG
|
etl/extractors.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Выгрузка данных из amoCRM через библиотеку amochka.
|
|
3
|
+
|
|
4
|
+
Обёртки для инкрементальной выгрузки с поддержкой фильтрации.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple, Union
|
|
11
|
+
|
|
12
|
+
from .config import AmoCRMAccount
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AmoCRMExtractor:
|
|
18
|
+
"""Экстрактор данных из amoCRM."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, account: AmoCRMAccount, cache_lifetime_hours: int = 24):
|
|
21
|
+
"""
|
|
22
|
+
Инициализирует экстрактор.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
account: Конфигурация аккаунта amoCRM
|
|
26
|
+
cache_lifetime_hours: Время жизни кэша custom_fields в часах
|
|
27
|
+
"""
|
|
28
|
+
self.account = account
|
|
29
|
+
self.cache_lifetime_hours = cache_lifetime_hours
|
|
30
|
+
self._client = None
|
|
31
|
+
self._pipelines_map: Dict[int, str] = {}
|
|
32
|
+
self._statuses_map: Dict[int, Dict[str, Any]] = {}
|
|
33
|
+
|
|
34
|
+
def _ensure_client(self):
|
|
35
|
+
"""Ленивая инициализация клиента amochka."""
|
|
36
|
+
if self._client is None:
|
|
37
|
+
try:
|
|
38
|
+
from amochka import AmoCRMClient, CacheConfig
|
|
39
|
+
except ImportError:
|
|
40
|
+
raise ImportError("amochka не установлена. Установите: pip install amochka")
|
|
41
|
+
|
|
42
|
+
cache_config = CacheConfig.file_cache(
|
|
43
|
+
base_dir=str(self.account.cache_dir) if self.account.cache_dir else ".cache",
|
|
44
|
+
lifetime_hours=self.cache_lifetime_hours,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
self._client = AmoCRMClient(
|
|
48
|
+
base_url=self.account.base_url,
|
|
49
|
+
token_file=str(self.account.token_path),
|
|
50
|
+
cache_config=cache_config,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def client(self):
|
|
55
|
+
"""Возвращает клиент amoCRM."""
|
|
56
|
+
self._ensure_client()
|
|
57
|
+
return self._client
|
|
58
|
+
|
|
59
|
+
def load_pipelines_and_statuses(self) -> Tuple[Dict[int, str], Dict[int, Dict[str, Any]]]:
|
|
60
|
+
"""
|
|
61
|
+
Загружает воронки и статусы для денормализации.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Tuple[pipelines_map, statuses_map]
|
|
65
|
+
- pipelines_map: {pipeline_id: name}
|
|
66
|
+
- statuses_map: {status_id: {"name": ..., "sort": ..., "pipeline_id": ...}}
|
|
67
|
+
"""
|
|
68
|
+
if self._pipelines_map and self._statuses_map:
|
|
69
|
+
return self._pipelines_map, self._statuses_map
|
|
70
|
+
|
|
71
|
+
logger.info("Загружаем воронки и статусы для аккаунта %s", self.account.name)
|
|
72
|
+
|
|
73
|
+
for pipeline in self.client.iter_pipelines():
|
|
74
|
+
pipeline_id = pipeline.get("id")
|
|
75
|
+
self._pipelines_map[pipeline_id] = pipeline.get("name")
|
|
76
|
+
|
|
77
|
+
# Статусы из _embedded
|
|
78
|
+
embedded = pipeline.get("_embedded", {})
|
|
79
|
+
for status in embedded.get("statuses", []):
|
|
80
|
+
status_id = status.get("id")
|
|
81
|
+
self._statuses_map[status_id] = {
|
|
82
|
+
"name": status.get("name"),
|
|
83
|
+
"sort": status.get("sort"),
|
|
84
|
+
"pipeline_id": pipeline_id,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
logger.info("Загружено %d воронок, %d статусов", len(self._pipelines_map), len(self._statuses_map))
|
|
88
|
+
return self._pipelines_map, self._statuses_map
|
|
89
|
+
|
|
90
|
+
def iter_leads(
|
|
91
|
+
self,
|
|
92
|
+
updated_from: Optional[datetime] = None,
|
|
93
|
+
updated_to: Optional[datetime] = None,
|
|
94
|
+
pipeline_ids: Optional[List[int]] = None,
|
|
95
|
+
include_contacts: bool = True,
|
|
96
|
+
only_deleted: bool = False,
|
|
97
|
+
) -> Iterator[Dict[str, Any]]:
|
|
98
|
+
"""
|
|
99
|
+
Итератор по сделкам.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
updated_from: Начало периода (по updated_at)
|
|
103
|
+
updated_to: Конец периода (по updated_at)
|
|
104
|
+
pipeline_ids: Фильтр по воронкам (None = из конфига аккаунта)
|
|
105
|
+
include_contacts: Включать вложенные контакты
|
|
106
|
+
only_deleted: Выгружать только удалённые сделки (из корзины)
|
|
107
|
+
|
|
108
|
+
Yields:
|
|
109
|
+
Dict с данными сделки из amoCRM API
|
|
110
|
+
"""
|
|
111
|
+
# Используем pipeline_ids из аккаунта если не указаны явно
|
|
112
|
+
if pipeline_ids is None:
|
|
113
|
+
pipeline_ids = self.account.pipeline_ids
|
|
114
|
+
|
|
115
|
+
logger.info(
|
|
116
|
+
"Выгружаем сделки: updated_from=%s, updated_to=%s, pipeline_ids=%s, only_deleted=%s",
|
|
117
|
+
updated_from,
|
|
118
|
+
updated_to,
|
|
119
|
+
pipeline_ids,
|
|
120
|
+
only_deleted,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
count = 0
|
|
124
|
+
# Формируем extra_params для only_deleted
|
|
125
|
+
extra_params = {}
|
|
126
|
+
if only_deleted:
|
|
127
|
+
extra_params["filter[only_deleted]"] = "true"
|
|
128
|
+
|
|
129
|
+
for lead in self.client.iter_leads(
|
|
130
|
+
updated_from=updated_from,
|
|
131
|
+
updated_to=updated_to,
|
|
132
|
+
pipeline_ids=pipeline_ids,
|
|
133
|
+
include_contacts=include_contacts,
|
|
134
|
+
extra_params=extra_params if extra_params else None,
|
|
135
|
+
):
|
|
136
|
+
count += 1
|
|
137
|
+
if count % 100 == 0:
|
|
138
|
+
logger.debug("Выгружено %d сделок", count)
|
|
139
|
+
yield lead
|
|
140
|
+
|
|
141
|
+
logger.info("Всего выгружено %d сделок", count)
|
|
142
|
+
|
|
143
|
+
def iter_contacts(
|
|
144
|
+
self,
|
|
145
|
+
contact_ids: Optional[List[int]] = None,
|
|
146
|
+
updated_from: Optional[datetime] = None,
|
|
147
|
+
updated_to: Optional[datetime] = None,
|
|
148
|
+
) -> Iterator[Dict[str, Any]]:
|
|
149
|
+
"""
|
|
150
|
+
Итератор по контактам.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
contact_ids: Список ID контактов для выгрузки
|
|
154
|
+
updated_from: Начало периода (по updated_at)
|
|
155
|
+
updated_to: Конец периода (по updated_at)
|
|
156
|
+
|
|
157
|
+
Yields:
|
|
158
|
+
Dict с данными контакта из amoCRM API
|
|
159
|
+
"""
|
|
160
|
+
logger.info(
|
|
161
|
+
"Выгружаем контакты: contact_ids=%s, updated_from=%s, updated_to=%s",
|
|
162
|
+
f"{len(contact_ids)} шт" if contact_ids else "все",
|
|
163
|
+
updated_from,
|
|
164
|
+
updated_to,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
count = 0
|
|
168
|
+
for contact in self.client.iter_contacts(
|
|
169
|
+
contact_ids=contact_ids,
|
|
170
|
+
updated_from=updated_from,
|
|
171
|
+
updated_to=updated_to,
|
|
172
|
+
):
|
|
173
|
+
count += 1
|
|
174
|
+
if count % 100 == 0:
|
|
175
|
+
logger.debug("Выгружено %d контактов", count)
|
|
176
|
+
yield contact
|
|
177
|
+
|
|
178
|
+
logger.info("Всего выгружено %d контактов", count)
|
|
179
|
+
|
|
180
|
+
def iter_events(
|
|
181
|
+
self,
|
|
182
|
+
entity_type: Optional[str] = "lead",
|
|
183
|
+
event_types: Optional[List[str]] = None,
|
|
184
|
+
created_from: Optional[datetime] = None,
|
|
185
|
+
created_to: Optional[datetime] = None,
|
|
186
|
+
) -> Iterator[Dict[str, Any]]:
|
|
187
|
+
"""
|
|
188
|
+
Итератор по событиям.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
entity_type: Тип сущности (lead, contact, company, etc.)
|
|
192
|
+
event_types: Фильтр по типам событий
|
|
193
|
+
created_from: Начало периода (по created_at)
|
|
194
|
+
created_to: Конец периода (по created_at)
|
|
195
|
+
|
|
196
|
+
Yields:
|
|
197
|
+
Dict с данными события из amoCRM API
|
|
198
|
+
"""
|
|
199
|
+
logger.info(
|
|
200
|
+
"Выгружаем события: entity_type=%s, event_types=%s, created_from=%s",
|
|
201
|
+
entity_type,
|
|
202
|
+
event_types,
|
|
203
|
+
created_from,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
count = 0
|
|
207
|
+
for event in self.client.iter_events(
|
|
208
|
+
entity_type=entity_type,
|
|
209
|
+
event_type=event_types[0] if event_types and len(event_types) == 1 else None,
|
|
210
|
+
start=created_from,
|
|
211
|
+
end=created_to,
|
|
212
|
+
):
|
|
213
|
+
# Фильтруем по типам если указано несколько
|
|
214
|
+
if event_types and len(event_types) > 1:
|
|
215
|
+
if event.get("type") not in event_types:
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
count += 1
|
|
219
|
+
if count % 500 == 0:
|
|
220
|
+
logger.debug("Выгружено %d событий", count)
|
|
221
|
+
yield event
|
|
222
|
+
|
|
223
|
+
logger.info("Всего выгружено %d событий", count)
|
|
224
|
+
|
|
225
|
+
def iter_notes(
|
|
226
|
+
self,
|
|
227
|
+
entity_type: str = "lead",
|
|
228
|
+
note_type: Optional[str] = None,
|
|
229
|
+
entity_ids: Optional[List[int]] = None,
|
|
230
|
+
updated_from: Optional[datetime] = None,
|
|
231
|
+
updated_to: Optional[datetime] = None,
|
|
232
|
+
) -> Iterator[Dict[str, Any]]:
|
|
233
|
+
"""
|
|
234
|
+
Итератор по примечаниям.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
entity_type: Тип сущности (lead, contact, company)
|
|
238
|
+
note_type: Тип примечания (common, call_in, call_out, etc.)
|
|
239
|
+
entity_ids: Список ID сущностей
|
|
240
|
+
updated_from: Начало периода
|
|
241
|
+
updated_to: Конец периода
|
|
242
|
+
|
|
243
|
+
Yields:
|
|
244
|
+
Dict с данными примечания из amoCRM API
|
|
245
|
+
"""
|
|
246
|
+
logger.info(
|
|
247
|
+
"Выгружаем примечания: entity_type=%s, note_type=%s, entity_ids=%s",
|
|
248
|
+
entity_type,
|
|
249
|
+
note_type,
|
|
250
|
+
f"{len(entity_ids)} шт" if entity_ids else "все",
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
count = 0
|
|
254
|
+
for note in self.client.iter_notes(
|
|
255
|
+
entity=entity_type,
|
|
256
|
+
note_type=note_type,
|
|
257
|
+
start=updated_from,
|
|
258
|
+
end=updated_to,
|
|
259
|
+
):
|
|
260
|
+
count += 1
|
|
261
|
+
if count % 100 == 0:
|
|
262
|
+
logger.debug("Выгружено %d примечаний", count)
|
|
263
|
+
yield note
|
|
264
|
+
|
|
265
|
+
logger.info("Всего выгружено %d примечаний", count)
|
|
266
|
+
|
|
267
|
+
def iter_users(self, with_groups: bool = True, with_roles: bool = True) -> Iterator[Dict[str, Any]]:
|
|
268
|
+
"""
|
|
269
|
+
Итератор по пользователям.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
with_groups: Включать информацию о группах
|
|
273
|
+
with_roles: Включать информацию о ролях
|
|
274
|
+
|
|
275
|
+
Yields:
|
|
276
|
+
Dict с данными пользователя из amoCRM API
|
|
277
|
+
"""
|
|
278
|
+
logger.info("Выгружаем пользователей")
|
|
279
|
+
|
|
280
|
+
extra_params = {}
|
|
281
|
+
with_parts = []
|
|
282
|
+
if with_groups:
|
|
283
|
+
with_parts.append("groups")
|
|
284
|
+
if with_roles:
|
|
285
|
+
with_parts.append("roles")
|
|
286
|
+
if with_parts:
|
|
287
|
+
extra_params["with"] = ",".join(with_parts)
|
|
288
|
+
|
|
289
|
+
count = 0
|
|
290
|
+
for user in self.client.iter_users(extra_params=extra_params):
|
|
291
|
+
count += 1
|
|
292
|
+
yield user
|
|
293
|
+
|
|
294
|
+
logger.info("Всего выгружено %d пользователей", count)
|
|
295
|
+
|
|
296
|
+
def iter_pipelines(self) -> Iterator[Dict[str, Any]]:
|
|
297
|
+
"""
|
|
298
|
+
Итератор по воронкам.
|
|
299
|
+
|
|
300
|
+
Yields:
|
|
301
|
+
Dict с данными воронки и статусами в _embedded
|
|
302
|
+
"""
|
|
303
|
+
logger.info("Выгружаем воронки")
|
|
304
|
+
|
|
305
|
+
count = 0
|
|
306
|
+
for pipeline in self.client.iter_pipelines():
|
|
307
|
+
count += 1
|
|
308
|
+
yield pipeline
|
|
309
|
+
|
|
310
|
+
logger.info("Всего выгружено %d воронок", count)
|
|
311
|
+
|
|
312
|
+
def collect_contact_ids_from_leads(
|
|
313
|
+
self,
|
|
314
|
+
leads: Iterator[Dict[str, Any]],
|
|
315
|
+
) -> Tuple[List[Dict[str, Any]], Set[int]]:
|
|
316
|
+
"""
|
|
317
|
+
Собирает ID контактов из сделок.
|
|
318
|
+
|
|
319
|
+
Полезно для последующей выгрузки связанных контактов.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
leads: Итератор сделок
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
Tuple[leads_list, contact_ids_set]
|
|
326
|
+
"""
|
|
327
|
+
leads_list = []
|
|
328
|
+
contact_ids: Set[int] = set()
|
|
329
|
+
|
|
330
|
+
for lead in leads:
|
|
331
|
+
leads_list.append(lead)
|
|
332
|
+
|
|
333
|
+
embedded = lead.get("_embedded", {})
|
|
334
|
+
for contact in embedded.get("contacts", []):
|
|
335
|
+
contact_id = contact.get("id")
|
|
336
|
+
if contact_id:
|
|
337
|
+
contact_ids.add(int(contact_id))
|
|
338
|
+
|
|
339
|
+
logger.info("Собрано %d уникальных контактов из %d сделок", len(contact_ids), len(leads_list))
|
|
340
|
+
return leads_list, contact_ids
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def create_extractor(account: AmoCRMAccount, **kwargs) -> AmoCRMExtractor:
|
|
344
|
+
"""
|
|
345
|
+
Фабричный метод для создания экстрактора.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
account: Конфигурация аккаунта
|
|
349
|
+
**kwargs: Дополнительные параметры для AmoCRMExtractor
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
Инициализированный экстрактор
|
|
353
|
+
"""
|
|
354
|
+
return AmoCRMExtractor(account, **kwargs)
|