amochka 0.1.9__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
amochka/etl.py ADDED
@@ -0,0 +1,302 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Callable, Iterable, List, Optional, Sequence, Set, Union
4
+
5
+ from .client import AmoCRMClient
6
+
7
+
8
+ def _ensure_path(path: Union[str, Path]) -> Path:
9
+ output_path = Path(path)
10
+ output_path.parent.mkdir(parents=True, exist_ok=True)
11
+ return output_path
12
+
13
+
14
+ def _resolve_timestamp(record: dict, timestamp_fields: Sequence[str]) -> Optional[Union[int, float, str]]:
15
+ for field in timestamp_fields:
16
+ if not field:
17
+ continue
18
+ value = record.get(field)
19
+ if value is not None:
20
+ return value
21
+ return None
22
+
23
+
24
+ def write_ndjson(
25
+ records: Iterable[dict],
26
+ output_path: Union[str, Path],
27
+ *,
28
+ entity: str,
29
+ account_id: Optional[Union[int, str]] = None,
30
+ timestamp_fields: Sequence[str] = ("updated_at", "created_at"),
31
+ transform: Optional[Callable[[dict], dict]] = None,
32
+ on_record: Optional[Callable[[dict], None]] = None,
33
+ ) -> int:
34
+ """
35
+ Записывает переданные записи в формат NDJSON.
36
+
37
+ Возвращает количество записанных строк.
38
+ """
39
+ path = _ensure_path(output_path)
40
+ count = 0
41
+ with path.open("w", encoding="utf-8") as handler:
42
+ for original in records:
43
+ payload = transform(original) if transform else original
44
+ timestamp = _resolve_timestamp(original, timestamp_fields)
45
+ line = {
46
+ "entity": entity,
47
+ "account_id": account_id,
48
+ "updated_at": timestamp,
49
+ "payload": payload,
50
+ }
51
+ handler.write(json.dumps(line, ensure_ascii=False))
52
+ handler.write("\n")
53
+ count += 1
54
+ if on_record:
55
+ on_record(original)
56
+ return count
57
+
58
+
59
+ def export_leads_to_ndjson(
60
+ client: AmoCRMClient,
61
+ output_path: Union[str, Path],
62
+ account_id: Union[int, str],
63
+ *,
64
+ start=None,
65
+ end=None,
66
+ pipeline_ids=None,
67
+ include_contacts: bool = True,
68
+ include=None,
69
+ limit: int = 250,
70
+ extra_params: Optional[dict] = None,
71
+ on_record: Optional[Callable[[dict], None]] = None,
72
+ ) -> int:
73
+ """
74
+ Выгружает сделки и записывает их в NDJSON.
75
+ """
76
+ records = client.iter_leads(
77
+ updated_from=start,
78
+ updated_to=end,
79
+ pipeline_ids=pipeline_ids,
80
+ include_contacts=include_contacts,
81
+ include=include,
82
+ limit=limit,
83
+ extra_params=extra_params,
84
+ )
85
+ return write_ndjson(
86
+ records,
87
+ output_path,
88
+ entity="lead",
89
+ account_id=account_id,
90
+ timestamp_fields=("updated_at", "created_at"),
91
+ on_record=on_record,
92
+ )
93
+
94
+
95
+ def export_contacts_to_ndjson(
96
+ client: AmoCRMClient,
97
+ output_path: Union[str, Path],
98
+ account_id: Union[int, str],
99
+ *,
100
+ start=None,
101
+ end=None,
102
+ contact_ids=None,
103
+ limit: int = 250,
104
+ extra_params: Optional[dict] = None,
105
+ on_record: Optional[Callable[[dict], None]] = None,
106
+ ) -> int:
107
+ """
108
+ Выгружает контакты и записывает их в NDJSON.
109
+ """
110
+ contact_id_list: Optional[List[int]] = None
111
+ if contact_ids is not None:
112
+ if isinstance(contact_ids, (list, tuple, set)):
113
+ contact_id_list = [int(cid) for cid in contact_ids if cid is not None]
114
+ else:
115
+ contact_id_list = [int(contact_ids)]
116
+
117
+ def _iter_contacts():
118
+ seen: Set[int] = set()
119
+ if contact_id_list:
120
+ params = dict(extra_params or {})
121
+ params["filter[id][]"] = [str(cid) for cid in contact_id_list]
122
+ params["page"] = 1
123
+ params["limit"] = limit
124
+ while True:
125
+ response = client._make_request("GET", "/api/v4/contacts", params=params)
126
+ embedded = (response or {}).get("_embedded", {})
127
+ contacts = embedded.get("contacts") or []
128
+ if not contacts:
129
+ break
130
+ for contact in contacts:
131
+ cid = contact.get("id")
132
+ if cid is not None:
133
+ seen.add(int(cid))
134
+ yield contact
135
+ total_pages = response.get("_page_count", params["page"])
136
+ if params["page"] >= total_pages:
137
+ break
138
+ params["page"] += 1
139
+ else:
140
+ for contact in client.iter_contacts(
141
+ updated_from=start,
142
+ updated_to=end,
143
+ contact_ids=None,
144
+ limit=limit,
145
+ extra_params=extra_params,
146
+ ):
147
+ cid = contact.get("id")
148
+ if cid is not None:
149
+ seen.add(int(cid))
150
+ yield contact
151
+
152
+ if contact_id_list:
153
+ missing = [cid for cid in contact_id_list if cid not in seen]
154
+ for cid in missing:
155
+ try:
156
+ contact = client.get_contact_by_id(cid)
157
+ except Exception:
158
+ continue
159
+ retrieved_id = contact.get("id")
160
+ if retrieved_id is not None and int(retrieved_id) not in seen:
161
+ seen.add(int(retrieved_id))
162
+ yield contact
163
+
164
+ return write_ndjson(
165
+ _iter_contacts(),
166
+ output_path,
167
+ entity="contact",
168
+ account_id=account_id,
169
+ timestamp_fields=("updated_at", "created_at"),
170
+ on_record=on_record,
171
+ )
172
+
173
+
174
+ def export_notes_to_ndjson(
175
+ client: AmoCRMClient,
176
+ output_path: Union[str, Path],
177
+ account_id: Union[int, str],
178
+ *,
179
+ entity: str = "lead",
180
+ start=None,
181
+ end=None,
182
+ note_type=None,
183
+ entity_ids=None,
184
+ limit: int = 250,
185
+ extra_params: Optional[dict] = None,
186
+ on_record: Optional[Callable[[dict], None]] = None,
187
+ ) -> int:
188
+ """
189
+ Выгружает примечания и записывает их в NDJSON.
190
+ """
191
+ records = client.iter_notes(
192
+ entity=entity,
193
+ updated_from=start,
194
+ updated_to=end,
195
+ note_type=note_type,
196
+ entity_ids=entity_ids,
197
+ limit=limit,
198
+ extra_params=extra_params,
199
+ )
200
+ entity_name = f"{entity}_note" if entity else "note"
201
+ return write_ndjson(
202
+ records,
203
+ output_path,
204
+ entity=entity_name,
205
+ account_id=account_id,
206
+ timestamp_fields=("updated_at", "created_at"),
207
+ on_record=on_record,
208
+ )
209
+
210
+
211
+ def export_events_to_ndjson(
212
+ client: AmoCRMClient,
213
+ output_path: Union[str, Path],
214
+ account_id: Union[int, str],
215
+ *,
216
+ entity: Optional[str] = "lead",
217
+ start=None,
218
+ end=None,
219
+ event_type=None,
220
+ entity_ids=None,
221
+ limit: int = 250,
222
+ extra_params: Optional[dict] = None,
223
+ on_record: Optional[Callable[[dict], None]] = None,
224
+ ) -> int:
225
+ """
226
+ Выгружает события и записывает их в NDJSON.
227
+ """
228
+ records = client.iter_events(
229
+ entity=entity,
230
+ entity_ids=entity_ids,
231
+ event_type=event_type,
232
+ created_from=start,
233
+ created_to=end,
234
+ limit=limit,
235
+ extra_params=extra_params,
236
+ )
237
+ entity_name = f"{entity}_event" if entity else "event"
238
+ return write_ndjson(
239
+ records,
240
+ output_path,
241
+ entity=entity_name,
242
+ account_id=account_id,
243
+ timestamp_fields=("created_at", "updated_at"),
244
+ on_record=on_record,
245
+ )
246
+
247
+
248
+ def export_users_to_ndjson(
249
+ client: AmoCRMClient,
250
+ output_path: Union[str, Path],
251
+ account_id: Union[int, str],
252
+ *,
253
+ limit: int = 250,
254
+ extra_params: Optional[dict] = None,
255
+ on_record: Optional[Callable[[dict], None]] = None,
256
+ ) -> int:
257
+ """
258
+ Выгружает пользователей и записывает их в NDJSON.
259
+ """
260
+ records = client.iter_users(limit=limit, extra_params=extra_params)
261
+ return write_ndjson(
262
+ records,
263
+ output_path,
264
+ entity="user",
265
+ account_id=account_id,
266
+ timestamp_fields=("updated_at", "created_at"),
267
+ on_record=on_record,
268
+ )
269
+
270
+
271
+ def export_pipelines_to_ndjson(
272
+ client: AmoCRMClient,
273
+ output_path: Union[str, Path],
274
+ account_id: Union[int, str],
275
+ *,
276
+ limit: int = 250,
277
+ extra_params: Optional[dict] = None,
278
+ on_record: Optional[Callable[[dict], None]] = None,
279
+ ) -> int:
280
+ """
281
+ Выгружает воронки и записывает их в NDJSON.
282
+ """
283
+ records = client.iter_pipelines(limit=limit, extra_params=extra_params)
284
+ return write_ndjson(
285
+ records,
286
+ output_path,
287
+ entity="pipeline",
288
+ account_id=account_id,
289
+ timestamp_fields=("updated_at", "created_at"),
290
+ on_record=on_record,
291
+ )
292
+
293
+
294
+ __all__ = [
295
+ "write_ndjson",
296
+ "export_leads_to_ndjson",
297
+ "export_contacts_to_ndjson",
298
+ "export_notes_to_ndjson",
299
+ "export_events_to_ndjson",
300
+ "export_users_to_ndjson",
301
+ "export_pipelines_to_ndjson",
302
+ ]
@@ -0,0 +1,205 @@
1
+ Metadata-Version: 2.4
2
+ Name: amochka
3
+ Version: 0.3.1
4
+ Summary: Python library for working with amoCRM API with ETL capabilities
5
+ Author-email: Timur <timurdt@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/yourusername/amochka
8
+ Project-URL: Documentation, https://github.com/yourusername/amochka
9
+ Project-URL: Repository, https://github.com/yourusername/amochka
10
+ Project-URL: Bug Tracker, https://github.com/yourusername/amochka/issues
11
+ Keywords: amocrm,crm,api,client,automation,etl
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.6
18
+ Classifier: Programming Language :: Python :: 3.7
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Classifier: Topic :: Internet :: WWW/HTTP
26
+ Requires-Python: >=3.6
27
+ Description-Content-Type: text/markdown
28
+ Requires-Dist: requests>=2.25.0
29
+ Requires-Dist: ratelimit>=2.2.0
30
+ Requires-Dist: psycopg2-binary>=2.9.0
31
+ Requires-Dist: python-dotenv>=1.0.0
32
+
33
+ # amochka
34
+
35
+ Официальная документация API amocrm - https://www.amocrm.ru/developers/content/crm_platform/api-reference
36
+
37
+ **amochka** — библиотека для работы с API amoCRM на Python. Она поддерживает:
38
+ - Получение данных сделок с вложенными сущностями (контакты, компании, теги, и т.д.)
39
+ - Редактирование сделок, включая обновление стандартных и кастомных полей
40
+ - Поддержку нескольких amoCRM-аккаунтов с персистентным кэшированием кастомных полей для каждого аккаунта отдельно
41
+ - Ограничение запросов (7 запросов в секунду) с использованием декораторов из библиотеки `ratelimit`
42
+ - **Полнофункциональный ETL модуль** для синхронизации данных amoCRM в PostgreSQL
43
+
44
+ ## Возможности
45
+
46
+ ### API клиент
47
+
48
+ - `get_deal_by_id(deal_id)` — получение детальной информации по сделке
49
+ - `get_pipelines()` — список воронок и статусов
50
+ - `fetch_updated_leads_raw(pipeline_id, updated_from, ...)` — выгрузка необработанных сделок за период
51
+
52
+ ### ETL модуль
53
+
54
+ - **Extractors**: извлечение данных из amoCRM (сделки, контакты, события, примечания)
55
+ - **Transformers**: преобразование в табличный формат для БД
56
+ - **Loaders**: загрузка в PostgreSQL с UPSERT логикой и сохранением внутренних ID
57
+ - **Migrations**: автоматическое создание таблиц и схем
58
+ - **Incremental sync**: инкрементальная синхронизация по updated_at
59
+ - Интеграция с **Apache Airflow** для автоматизации ETL процессов
60
+
61
+ ## Требования к окружению
62
+
63
+ Python 3.6 или новее.
64
+
65
+ ## Установка
66
+
67
+ ```bash
68
+ pip install amochka
69
+ ```
70
+
71
+ Для использования ETL модуля установите дополнительные зависимости:
72
+
73
+ ```bash
74
+ pip install amochka psycopg2-binary python-dotenv
75
+ ```
76
+
77
+ ## Кэширование кастомных полей
78
+
79
+ Для уменьшения количества запросов к API кастомные поля кэшируются персистентно. Если параметр cache_file не указан, имя файла кэша генерируется автоматически на основе домена amoCRM-аккаунта. Вы можете обновлять кэш принудительно, передавая параметр force_update=True в метод get_custom_fields_mapping() или настроить время жизни кэша (по умолчанию — 24 часа).
80
+
81
+ ## Примеры использования
82
+
83
+ ### Быстрый старт: выгрузка обновленных сделок
84
+
85
+ ```python
86
+ from datetime import datetime, timedelta
87
+ from amochka import AmoCRMClient, CacheConfig
88
+
89
+ client = AmoCRMClient(
90
+ base_url="https://example.amocrm.ru",
91
+ token_file="token.json",
92
+ cache_config=CacheConfig.disabled(),
93
+ disable_logging=True
94
+ )
95
+
96
+ three_hours_ago = datetime.utcnow() - timedelta(hours=3)
97
+ leads = client.fetch_updated_leads_raw(
98
+ pipeline_id=123456,
99
+ updated_from=three_hours_ago,
100
+ save_to_file="leads.json",
101
+ include_contacts=True
102
+ )
103
+ ```
104
+
105
+ ### ETL: синхронизация в PostgreSQL
106
+
107
+ ```python
108
+ from etl.config import DatabaseConfig, AmoCRMAccount
109
+ from etl.extractors import AmoCRMExtractor
110
+ from etl.loaders import PostgresLoader
111
+ from etl.run_etl import sync_leads_with_contacts
112
+ from datetime import datetime, timezone
113
+
114
+ # Настройка БД
115
+ db_config = DatabaseConfig(
116
+ host="localhost",
117
+ port=5432,
118
+ dbname="amocrm",
119
+ user="postgres",
120
+ password="password",
121
+ schema="public"
122
+ )
123
+
124
+ # Настройка amoCRM аккаунта
125
+ account = AmoCRMAccount(
126
+ id=1,
127
+ name="main",
128
+ base_url="https://example.amocrm.ru",
129
+ token_path="token.json",
130
+ mybi_account_id=1,
131
+ pipeline_ids=[123456]
132
+ )
133
+
134
+ # ETL процесс
135
+ loader = PostgresLoader(db_config)
136
+ extractor = AmoCRMExtractor(account)
137
+
138
+ result = sync_leads_with_contacts(
139
+ extractor=extractor,
140
+ loader=loader,
141
+ mybi_account_id=1,
142
+ updated_from=datetime(2025, 1, 1, tzinfo=timezone.utc),
143
+ updated_to=datetime.now(timezone.utc),
144
+ pipeline_ids=[123456]
145
+ )
146
+
147
+ print(f"Загружено сделок: {result['leads_count']}")
148
+ print(f"Загружено контактов: {result['contacts_count']}")
149
+ ```
150
+
151
+ ### Пример структуры данных
152
+
153
+ ```json
154
+ [
155
+ {
156
+ "id": 12345678,
157
+ "name": "Сделка: Заявка от клиента",
158
+ "custom_fields_values": [
159
+ {
160
+ "field_name": "utm_source",
161
+ "values": [{"value": "google"}]
162
+ }
163
+ ],
164
+ "_embedded": {
165
+ "tags": [
166
+ {"id": 123, "name": "Приоритетный клиент"}
167
+ ]
168
+ }
169
+ }
170
+ ]
171
+ ```
172
+
173
+ ## Интеграция с Apache Airflow
174
+
175
+ Модуль ETL разработан для использования в Airflow DAG. Пример минимального DAG:
176
+
177
+ ```python
178
+ from airflow.decorators import dag, task
179
+ from etl.config import DatabaseConfig, AmoCRMAccount
180
+ from etl.run_etl import sync_leads_with_contacts
181
+
182
+ @dag(schedule_interval=None)
183
+ def amocrm_sync():
184
+ @task
185
+ def sync_data():
186
+ db_config = DatabaseConfig.from_env()
187
+ account = AmoCRMAccount.from_env()
188
+ # ... ETL процесс
189
+
190
+ amocrm_sync()
191
+ ```
192
+
193
+ ## Тесты
194
+
195
+ Запустить тесты можно командой:
196
+
197
+ ```bash
198
+ pytest -q
199
+ ```
200
+
201
+ Тесты проверяют основную функциональность API клиента и помогают убедиться, что изменения в коде не ломают работу библиотеки.
202
+
203
+ ## Лицензия
204
+
205
+ MIT
@@ -0,0 +1,14 @@
1
+ amochka/__init__.py,sha256=NFAgMbhBnrx3nF--MeY9Chpu5gtZ5kVn-QYnmO3Nhpk,620
2
+ amochka/client.py,sha256=mGoDEE0XOt-c9f2FmqFVzDMrXQel_zComtyAFXdCHjg,66155
3
+ amochka/etl.py,sha256=N8rXNFbtmlKfsYpgr7HDcP4enoj63XQPWuTDxGuMhw4,8901
4
+ etl/__init__.py,sha256=bp9fPqbKlOc7xzs27diHEvysy1FgBrwlpX6GnR6GL9U,255
5
+ etl/config.py,sha256=YY6M7pib_XD7bjPW8J_iWzt2hoXzDpx6yIvZHmlUlrE,8873
6
+ etl/extractors.py,sha256=-QCBZ6PoJ51j0drNQaH5bLfvjPqAQmfVgaT1D_ZSwjI,12909
7
+ etl/loaders.py,sha256=nQx6TDwnuHVWzjmuKY9v-23hmmk4Ex6ZoGtb3PApO1k,31974
8
+ etl/run_etl.py,sha256=p_2NxJwXMiACMETvRsjrozMgz66U9ezDNSWZXUieNMs,26262
9
+ etl/transformers.py,sha256=OwYJ_9l3oqvy2Y3-umXjAGweOIqlfRI0iSiCFPrcQ8E,17867
10
+ etl/migrations/001_create_tables.sql,sha256=YrSaZjpofC1smjYx0bM4eHQumboruIBY3fwRDlJLLSo,15749
11
+ amochka-0.3.1.dist-info/METADATA,sha256=CF7UAIgc6PvyJv9I8CULTlBQOV8BITZlwscrUo3BM10,7562
12
+ amochka-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ amochka-0.3.1.dist-info/top_level.txt,sha256=grRX8aLFG-yYKPsAqCD6sUBmdLSQeOMHsc9Dl6S7Lzo,12
14
+ amochka-0.3.1.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ amochka
2
+ etl
etl/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """
2
+ ETL-коннектор для выгрузки данных из amoCRM в PostgreSQL.
3
+
4
+ Структура таблиц совместима с mybi.ru для работы с существующими dbt-моделями.
5
+ """
6
+
7
+ __version__ = "0.1.0"