amochka 0.1.9__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
amochka/etl.py ADDED
@@ -0,0 +1,302 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Callable, Iterable, List, Optional, Sequence, Set, Union
4
+
5
+ from .client import AmoCRMClient
6
+
7
+
8
+ def _ensure_path(path: Union[str, Path]) -> Path:
9
+ output_path = Path(path)
10
+ output_path.parent.mkdir(parents=True, exist_ok=True)
11
+ return output_path
12
+
13
+
14
+ def _resolve_timestamp(record: dict, timestamp_fields: Sequence[str]) -> Optional[Union[int, float, str]]:
15
+ for field in timestamp_fields:
16
+ if not field:
17
+ continue
18
+ value = record.get(field)
19
+ if value is not None:
20
+ return value
21
+ return None
22
+
23
+
24
+ def write_ndjson(
25
+ records: Iterable[dict],
26
+ output_path: Union[str, Path],
27
+ *,
28
+ entity: str,
29
+ account_id: Optional[Union[int, str]] = None,
30
+ timestamp_fields: Sequence[str] = ("updated_at", "created_at"),
31
+ transform: Optional[Callable[[dict], dict]] = None,
32
+ on_record: Optional[Callable[[dict], None]] = None,
33
+ ) -> int:
34
+ """
35
+ Записывает переданные записи в формат NDJSON.
36
+
37
+ Возвращает количество записанных строк.
38
+ """
39
+ path = _ensure_path(output_path)
40
+ count = 0
41
+ with path.open("w", encoding="utf-8") as handler:
42
+ for original in records:
43
+ payload = transform(original) if transform else original
44
+ timestamp = _resolve_timestamp(original, timestamp_fields)
45
+ line = {
46
+ "entity": entity,
47
+ "account_id": account_id,
48
+ "updated_at": timestamp,
49
+ "payload": payload,
50
+ }
51
+ handler.write(json.dumps(line, ensure_ascii=False))
52
+ handler.write("\n")
53
+ count += 1
54
+ if on_record:
55
+ on_record(original)
56
+ return count
57
+
58
+
59
+ def export_leads_to_ndjson(
60
+ client: AmoCRMClient,
61
+ output_path: Union[str, Path],
62
+ account_id: Union[int, str],
63
+ *,
64
+ start=None,
65
+ end=None,
66
+ pipeline_ids=None,
67
+ include_contacts: bool = True,
68
+ include=None,
69
+ limit: int = 250,
70
+ extra_params: Optional[dict] = None,
71
+ on_record: Optional[Callable[[dict], None]] = None,
72
+ ) -> int:
73
+ """
74
+ Выгружает сделки и записывает их в NDJSON.
75
+ """
76
+ records = client.iter_leads(
77
+ updated_from=start,
78
+ updated_to=end,
79
+ pipeline_ids=pipeline_ids,
80
+ include_contacts=include_contacts,
81
+ include=include,
82
+ limit=limit,
83
+ extra_params=extra_params,
84
+ )
85
+ return write_ndjson(
86
+ records,
87
+ output_path,
88
+ entity="lead",
89
+ account_id=account_id,
90
+ timestamp_fields=("updated_at", "created_at"),
91
+ on_record=on_record,
92
+ )
93
+
94
+
95
+ def export_contacts_to_ndjson(
96
+ client: AmoCRMClient,
97
+ output_path: Union[str, Path],
98
+ account_id: Union[int, str],
99
+ *,
100
+ start=None,
101
+ end=None,
102
+ contact_ids=None,
103
+ limit: int = 250,
104
+ extra_params: Optional[dict] = None,
105
+ on_record: Optional[Callable[[dict], None]] = None,
106
+ ) -> int:
107
+ """
108
+ Выгружает контакты и записывает их в NDJSON.
109
+ """
110
+ contact_id_list: Optional[List[int]] = None
111
+ if contact_ids is not None:
112
+ if isinstance(contact_ids, (list, tuple, set)):
113
+ contact_id_list = [int(cid) for cid in contact_ids if cid is not None]
114
+ else:
115
+ contact_id_list = [int(contact_ids)]
116
+
117
+ def _iter_contacts():
118
+ seen: Set[int] = set()
119
+ if contact_id_list:
120
+ params = dict(extra_params or {})
121
+ params["filter[id][]"] = [str(cid) for cid in contact_id_list]
122
+ params["page"] = 1
123
+ params["limit"] = limit
124
+ while True:
125
+ response = client._make_request("GET", "/api/v4/contacts", params=params)
126
+ embedded = (response or {}).get("_embedded", {})
127
+ contacts = embedded.get("contacts") or []
128
+ if not contacts:
129
+ break
130
+ for contact in contacts:
131
+ cid = contact.get("id")
132
+ if cid is not None:
133
+ seen.add(int(cid))
134
+ yield contact
135
+ total_pages = response.get("_page_count", params["page"])
136
+ if params["page"] >= total_pages:
137
+ break
138
+ params["page"] += 1
139
+ else:
140
+ for contact in client.iter_contacts(
141
+ updated_from=start,
142
+ updated_to=end,
143
+ contact_ids=None,
144
+ limit=limit,
145
+ extra_params=extra_params,
146
+ ):
147
+ cid = contact.get("id")
148
+ if cid is not None:
149
+ seen.add(int(cid))
150
+ yield contact
151
+
152
+ if contact_id_list:
153
+ missing = [cid for cid in contact_id_list if cid not in seen]
154
+ for cid in missing:
155
+ try:
156
+ contact = client.get_contact_by_id(cid)
157
+ except Exception:
158
+ continue
159
+ retrieved_id = contact.get("id")
160
+ if retrieved_id is not None and int(retrieved_id) not in seen:
161
+ seen.add(int(retrieved_id))
162
+ yield contact
163
+
164
+ return write_ndjson(
165
+ _iter_contacts(),
166
+ output_path,
167
+ entity="contact",
168
+ account_id=account_id,
169
+ timestamp_fields=("updated_at", "created_at"),
170
+ on_record=on_record,
171
+ )
172
+
173
+
174
+ def export_notes_to_ndjson(
175
+ client: AmoCRMClient,
176
+ output_path: Union[str, Path],
177
+ account_id: Union[int, str],
178
+ *,
179
+ entity: str = "lead",
180
+ start=None,
181
+ end=None,
182
+ note_type=None,
183
+ entity_ids=None,
184
+ limit: int = 250,
185
+ extra_params: Optional[dict] = None,
186
+ on_record: Optional[Callable[[dict], None]] = None,
187
+ ) -> int:
188
+ """
189
+ Выгружает примечания и записывает их в NDJSON.
190
+ """
191
+ records = client.iter_notes(
192
+ entity=entity,
193
+ updated_from=start,
194
+ updated_to=end,
195
+ note_type=note_type,
196
+ entity_ids=entity_ids,
197
+ limit=limit,
198
+ extra_params=extra_params,
199
+ )
200
+ entity_name = f"{entity}_note" if entity else "note"
201
+ return write_ndjson(
202
+ records,
203
+ output_path,
204
+ entity=entity_name,
205
+ account_id=account_id,
206
+ timestamp_fields=("updated_at", "created_at"),
207
+ on_record=on_record,
208
+ )
209
+
210
+
211
+ def export_events_to_ndjson(
212
+ client: AmoCRMClient,
213
+ output_path: Union[str, Path],
214
+ account_id: Union[int, str],
215
+ *,
216
+ entity: Optional[str] = "lead",
217
+ start=None,
218
+ end=None,
219
+ event_type=None,
220
+ entity_ids=None,
221
+ limit: int = 250,
222
+ extra_params: Optional[dict] = None,
223
+ on_record: Optional[Callable[[dict], None]] = None,
224
+ ) -> int:
225
+ """
226
+ Выгружает события и записывает их в NDJSON.
227
+ """
228
+ records = client.iter_events(
229
+ entity=entity,
230
+ entity_ids=entity_ids,
231
+ event_type=event_type,
232
+ created_from=start,
233
+ created_to=end,
234
+ limit=limit,
235
+ extra_params=extra_params,
236
+ )
237
+ entity_name = f"{entity}_event" if entity else "event"
238
+ return write_ndjson(
239
+ records,
240
+ output_path,
241
+ entity=entity_name,
242
+ account_id=account_id,
243
+ timestamp_fields=("created_at", "updated_at"),
244
+ on_record=on_record,
245
+ )
246
+
247
+
248
+ def export_users_to_ndjson(
249
+ client: AmoCRMClient,
250
+ output_path: Union[str, Path],
251
+ account_id: Union[int, str],
252
+ *,
253
+ limit: int = 250,
254
+ extra_params: Optional[dict] = None,
255
+ on_record: Optional[Callable[[dict], None]] = None,
256
+ ) -> int:
257
+ """
258
+ Выгружает пользователей и записывает их в NDJSON.
259
+ """
260
+ records = client.iter_users(limit=limit, extra_params=extra_params)
261
+ return write_ndjson(
262
+ records,
263
+ output_path,
264
+ entity="user",
265
+ account_id=account_id,
266
+ timestamp_fields=("updated_at", "created_at"),
267
+ on_record=on_record,
268
+ )
269
+
270
+
271
+ def export_pipelines_to_ndjson(
272
+ client: AmoCRMClient,
273
+ output_path: Union[str, Path],
274
+ account_id: Union[int, str],
275
+ *,
276
+ limit: int = 250,
277
+ extra_params: Optional[dict] = None,
278
+ on_record: Optional[Callable[[dict], None]] = None,
279
+ ) -> int:
280
+ """
281
+ Выгружает воронки и записывает их в NDJSON.
282
+ """
283
+ records = client.iter_pipelines(limit=limit, extra_params=extra_params)
284
+ return write_ndjson(
285
+ records,
286
+ output_path,
287
+ entity="pipeline",
288
+ account_id=account_id,
289
+ timestamp_fields=("updated_at", "created_at"),
290
+ on_record=on_record,
291
+ )
292
+
293
+
294
+ __all__ = [
295
+ "write_ndjson",
296
+ "export_leads_to_ndjson",
297
+ "export_contacts_to_ndjson",
298
+ "export_notes_to_ndjson",
299
+ "export_events_to_ndjson",
300
+ "export_users_to_ndjson",
301
+ "export_pipelines_to_ndjson",
302
+ ]
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: amochka
3
- Version: 0.1.9
4
- Summary: Python library for working with amoCRM API
3
+ Version: 0.3.0
4
+ Summary: Python library for working with amoCRM API with ETL capabilities
5
5
  Author-email: Timur <timurdt@gmail.com>
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/yourusername/amochka
8
8
  Project-URL: Documentation, https://github.com/yourusername/amochka
9
9
  Project-URL: Repository, https://github.com/yourusername/amochka
10
10
  Project-URL: Bug Tracker, https://github.com/yourusername/amochka/issues
11
- Keywords: amocrm,crm,api,client,automation
11
+ Keywords: amocrm,crm,api,client,automation,etl
12
12
  Classifier: Development Status :: 4 - Beta
13
13
  Classifier: Intended Audience :: Developers
14
14
  Classifier: License :: OSI Approved :: MIT License
@@ -27,6 +27,8 @@ Requires-Python: >=3.6
27
27
  Description-Content-Type: text/markdown
28
28
  Requires-Dist: requests>=2.25.0
29
29
  Requires-Dist: ratelimit>=2.2.0
30
+ Requires-Dist: psycopg2-binary>=2.9.0
31
+ Requires-Dist: python-dotenv>=1.0.0
30
32
 
31
33
  # amochka
32
34
 
@@ -0,0 +1,14 @@
1
+ amochka/__init__.py,sha256=NFAgMbhBnrx3nF--MeY9Chpu5gtZ5kVn-QYnmO3Nhpk,620
2
+ amochka/client.py,sha256=mGoDEE0XOt-c9f2FmqFVzDMrXQel_zComtyAFXdCHjg,66155
3
+ amochka/etl.py,sha256=N8rXNFbtmlKfsYpgr7HDcP4enoj63XQPWuTDxGuMhw4,8901
4
+ etl/__init__.py,sha256=bp9fPqbKlOc7xzs27diHEvysy1FgBrwlpX6GnR6GL9U,255
5
+ etl/config.py,sha256=YY6M7pib_XD7bjPW8J_iWzt2hoXzDpx6yIvZHmlUlrE,8873
6
+ etl/extractors.py,sha256=-QCBZ6PoJ51j0drNQaH5bLfvjPqAQmfVgaT1D_ZSwjI,12909
7
+ etl/loaders.py,sha256=nQx6TDwnuHVWzjmuKY9v-23hmmk4Ex6ZoGtb3PApO1k,31974
8
+ etl/run_etl.py,sha256=p_2NxJwXMiACMETvRsjrozMgz66U9ezDNSWZXUieNMs,26262
9
+ etl/transformers.py,sha256=OwYJ_9l3oqvy2Y3-umXjAGweOIqlfRI0iSiCFPrcQ8E,17867
10
+ etl/migrations/001_create_tables.sql,sha256=YrSaZjpofC1smjYx0bM4eHQumboruIBY3fwRDlJLLSo,15749
11
+ amochka-0.3.0.dist-info/METADATA,sha256=5dZFPOs2wupzc0VsYVXyk1pJ-F4Kwm997N9EA_Z7htE,6371
12
+ amochka-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ amochka-0.3.0.dist-info/top_level.txt,sha256=grRX8aLFG-yYKPsAqCD6sUBmdLSQeOMHsc9Dl6S7Lzo,12
14
+ amochka-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ amochka
2
+ etl
etl/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """
2
+ ETL-коннектор для выгрузки данных из amoCRM в PostgreSQL.
3
+
4
+ Структура таблиц совместима с mybi.ru для работы с существующими dbt-моделями.
5
+ """
6
+
7
+ __version__ = "0.1.0"
etl/config.py ADDED
@@ -0,0 +1,236 @@
1
+ """
2
+ Конфигурация ETL для amoCRM.
3
+
4
+ Настройки загружаются из переменных окружения или .env файла.
5
+ """
6
+
7
+ import os
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+
13
+ def _load_env_file(path: Path) -> Dict[str, str]:
14
+ """Загружает переменные из .env файла."""
15
+ if not path.exists():
16
+ return {}
17
+
18
+ env: Dict[str, str] = {}
19
+ with path.open("r", encoding="utf-8") as handler:
20
+ for raw_line in handler:
21
+ line = raw_line.strip()
22
+ if not line or line.startswith("#"):
23
+ continue
24
+ if "=" not in line:
25
+ continue
26
+ key, value = line.split("=", 1)
27
+ key = key.strip()
28
+ value = value.strip()
29
+ if value.startswith(('"', "'")) and value.endswith(('"', "'")) and len(value) >= 2:
30
+ value = value[1:-1]
31
+ env[key] = value
32
+ return env
33
+
34
+
35
+ @dataclass
36
+ class DatabaseConfig:
37
+ """Конфигурация подключения к PostgreSQL."""
38
+
39
+ host: str
40
+ port: int
41
+ dbname: str
42
+ user: str
43
+ password: str
44
+ schema: str = "public"
45
+ sslmode: Optional[str] = None
46
+ connect_timeout: int = 30
47
+
48
+ def connection_kwargs(self) -> Dict[str, Any]:
49
+ """Возвращает kwargs для psycopg.connect()."""
50
+ kwargs: Dict[str, Any] = {
51
+ "host": self.host,
52
+ "port": self.port,
53
+ "dbname": self.dbname,
54
+ "user": self.user,
55
+ "password": self.password,
56
+ "connect_timeout": self.connect_timeout,
57
+ }
58
+ if self.sslmode:
59
+ kwargs["sslmode"] = self.sslmode
60
+ return kwargs
61
+
62
+ @classmethod
63
+ def from_dict(cls, d: Dict[str, Any]) -> "DatabaseConfig":
64
+ """Создаёт конфигурацию из словаря (для Airflow DAG)."""
65
+ return cls(
66
+ host=d.get("host", "localhost"),
67
+ port=int(d.get("port", 5432)),
68
+ dbname=d.get("dbname", "amocrm"),
69
+ user=d.get("user", "postgres"),
70
+ password=d.get("password", ""),
71
+ schema=d.get("schema", "public"),
72
+ sslmode=d.get("sslmode"),
73
+ connect_timeout=int(d.get("connect_timeout", 30)),
74
+ )
75
+
76
+ @classmethod
77
+ def from_env(cls, env_path: Optional[Path] = None) -> "DatabaseConfig":
78
+ """Создаёт конфигурацию из переменных окружения."""
79
+ if env_path:
80
+ file_env = _load_env_file(env_path)
81
+ for key, value in file_env.items():
82
+ os.environ.setdefault(key, value)
83
+
84
+ def _get(key: str, default: Optional[str] = None) -> Optional[str]:
85
+ return os.environ.get(key, default)
86
+
87
+ return cls(
88
+ host=_get("ETL_DB_HOST", "localhost") or "localhost",
89
+ port=int(_get("ETL_DB_PORT", "5432") or "5432"),
90
+ dbname=_get("ETL_DB_NAME", "amocrm") or "amocrm",
91
+ user=_get("ETL_DB_USER", "postgres") or "postgres",
92
+ password=_get("ETL_DB_PASSWORD", "") or "",
93
+ schema=_get("ETL_DB_SCHEMA", "public") or "public",
94
+ sslmode=_get("ETL_DB_SSLMODE"),
95
+ connect_timeout=int(_get("ETL_DB_CONNECT_TIMEOUT", "30") or "30"),
96
+ )
97
+
98
+
99
+ @dataclass
100
+ class AmoCRMAccount:
101
+ """Конфигурация одного аккаунта amoCRM."""
102
+
103
+ id: int # ID аккаунта в amoCRM (из URL или API)
104
+ name: str
105
+ base_url: str
106
+ token_path: Path
107
+ mybi_account_id: int # Внутренний account_id как в mybi.ru (для совместимости)
108
+ pipeline_ids: Optional[List[int]] = None # None = все воронки
109
+ cache_dir: Optional[Path] = None
110
+
111
+ def __post_init__(self):
112
+ if isinstance(self.token_path, str):
113
+ self.token_path = Path(self.token_path)
114
+ if isinstance(self.cache_dir, str):
115
+ self.cache_dir = Path(self.cache_dir)
116
+
117
+ @classmethod
118
+ def from_dict(cls, d: Dict[str, Any]) -> "AmoCRMAccount":
119
+ """Создаёт конфигурацию аккаунта из словаря (для Airflow DAG)."""
120
+ return cls(
121
+ id=int(d.get("id", 0)),
122
+ name=d.get("name", "account"),
123
+ base_url=d.get("base_url", ""),
124
+ token_path=Path(d.get("token_path", "token.json")),
125
+ mybi_account_id=int(d.get("mybi_account_id", 0)),
126
+ pipeline_ids=d.get("pipeline_ids"),
127
+ cache_dir=Path(d.get("cache_dir", ".cache")) if d.get("cache_dir") else None,
128
+ )
129
+
130
+
131
+ @dataclass
132
+ class ETLConfig:
133
+ """Главная конфигурация ETL."""
134
+
135
+ database: DatabaseConfig
136
+ accounts: List[AmoCRMAccount]
137
+ batch_size: int = 100
138
+ window_minutes: int = 120 # Окно выгрузки по умолчанию (2 часа)
139
+ log_level: str = "INFO"
140
+
141
+ @classmethod
142
+ def from_env(cls, env_path: Optional[Path] = None) -> "ETLConfig":
143
+ """Создаёт конфигурацию из переменных окружения и файла настроек."""
144
+ if env_path:
145
+ file_env = _load_env_file(env_path)
146
+ for key, value in file_env.items():
147
+ os.environ.setdefault(key, value)
148
+
149
+ db_config = DatabaseConfig.from_env(env_path)
150
+
151
+ # Парсим аккаунты из переменных окружения
152
+ # Формат: AMO_ACCOUNT_1_ID, AMO_ACCOUNT_1_NAME, AMO_ACCOUNT_1_URL, etc.
153
+ accounts = []
154
+ for i in range(1, 10): # Поддерживаем до 9 аккаунтов
155
+ prefix = f"AMO_ACCOUNT_{i}_"
156
+ account_id = os.environ.get(f"{prefix}ID")
157
+ if not account_id:
158
+ continue
159
+
160
+ pipeline_ids_str = os.environ.get(f"{prefix}PIPELINE_IDS", "")
161
+ pipeline_ids = None
162
+ if pipeline_ids_str:
163
+ pipeline_ids = [int(pid.strip()) for pid in pipeline_ids_str.split(",") if pid.strip()]
164
+
165
+ mybi_id = os.environ.get(f"{prefix}MYBI_ACCOUNT_ID")
166
+ if not mybi_id:
167
+ raise ValueError(f"Не указан {prefix}MYBI_ACCOUNT_ID для аккаунта {account_id}")
168
+
169
+ accounts.append(
170
+ AmoCRMAccount(
171
+ id=int(account_id),
172
+ name=os.environ.get(f"{prefix}NAME", f"account_{i}") or f"account_{i}",
173
+ base_url=os.environ.get(f"{prefix}URL", "") or "",
174
+ token_path=Path(os.environ.get(f"{prefix}TOKEN_PATH", f"token_{i}.json") or f"token_{i}.json"),
175
+ mybi_account_id=int(mybi_id),
176
+ pipeline_ids=pipeline_ids if pipeline_ids else None,
177
+ cache_dir=Path(os.environ.get(f"{prefix}CACHE_DIR", ".cache") or ".cache"),
178
+ )
179
+ )
180
+
181
+ return cls(
182
+ database=db_config,
183
+ accounts=accounts,
184
+ batch_size=int(os.environ.get("ETL_BATCH_SIZE", "100") or "100"),
185
+ window_minutes=int(os.environ.get("ETL_WINDOW_MINUTES", "120") or "120"),
186
+ log_level=os.environ.get("ETL_LOG_LEVEL", "INFO") or "INFO",
187
+ )
188
+
189
+
190
+ # Пример конфигурации для разработки (можно переопределить в .env)
191
+ DEFAULT_CONFIG = ETLConfig(
192
+ database=DatabaseConfig(
193
+ host="localhost",
194
+ port=5432,
195
+ dbname="amocrm",
196
+ user="postgres",
197
+ password="",
198
+ schema="public",
199
+ ),
200
+ accounts=[
201
+ AmoCRMAccount(
202
+ id=30019651,
203
+ name="bneginskogo",
204
+ base_url="https://bneginskogo.amocrm.ru",
205
+ token_path=Path("token.json"),
206
+ mybi_account_id=53859, # Внутренний ID из mybi.ru
207
+ pipeline_ids=[5987164, 6241334],
208
+ cache_dir=Path(".cache"),
209
+ ),
210
+ # Добавьте остальные аккаунты здесь
211
+ ],
212
+ batch_size=100,
213
+ window_minutes=120,
214
+ )
215
+
216
+
217
+ def get_config(env_path: Optional[Union[str, Path]] = None) -> ETLConfig:
218
+ """
219
+ Получает конфигурацию ETL.
220
+
221
+ Если указан env_path, загружает настройки из файла.
222
+ Иначе использует переменные окружения или DEFAULT_CONFIG.
223
+ """
224
+ if env_path:
225
+ return ETLConfig.from_env(Path(env_path))
226
+
227
+ # Проверяем наличие .env в текущей директории
228
+ default_env = Path(".env")
229
+ if default_env.exists():
230
+ return ETLConfig.from_env(default_env)
231
+
232
+ # Проверяем наличие переменных окружения
233
+ if os.environ.get("ETL_DB_HOST") or os.environ.get("AMO_ACCOUNT_1_ID"):
234
+ return ETLConfig.from_env()
235
+
236
+ return DEFAULT_CONFIG