chu4hel-plogger 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Chu4hel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chu4hel-plogger
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: PLogger: A Python logging utility for CSV files with intelligent, metric-based rotation strategies to preserve significant data.
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Author: Chu4hel
|
|
8
|
+
Author-email: 106600877+Chu4hel@users.noreply.github.com
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# PLogger: An Intelligent Logger for Preserving What Matters Most
|
|
20
|
+
|
|
21
|
+
[Русская версия README](docs/README_RU.md)
|
|
22
|
+
|
|
23
|
+
**PLogger** is a universal Python logger designed for collecting and analyzing performance metrics, experimental
|
|
24
|
+
results, or any other structured data. Its main feature is intelligent rotation strategies that allow you to preserve
|
|
25
|
+
the most unique and interesting records (outliers) while removing redundant data.
|
|
26
|
+
|
|
27
|
+
Unlike standard loggers that simply delete old entries, PLogger analyzes the data and retains those that stand out most
|
|
28
|
+
from the rest.
|
|
29
|
+
|
|
30
|
+
## Key Features
|
|
31
|
+
|
|
32
|
+
- **Intelligent Rotation**: Built-in strategies (`by_metric`, `by_group_metric`) to preserve the most significant data
|
|
33
|
+
when the log limit is reached.
|
|
34
|
+
- **Thread-Safety**: Secure logging from multiple threads without the risk of data corruption.
|
|
35
|
+
- **Ease of Use**: A concise API for quick integration into any project.
|
|
36
|
+
- **CSV Logging**: Data is stored in a universal and easily readable CSV format.
|
|
37
|
+
- **Zero Configuration**: Automatic creation of log directories and files.
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install plogger
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
*(Note: After you publish the package to PyPI)*
|
|
46
|
+
|
|
47
|
+
## Basic Usage
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from pathlib import Path
|
|
51
|
+
from plogger import PLogger
|
|
52
|
+
|
|
53
|
+
# Initialize the logger
|
|
54
|
+
# Retain up to 100 entries, then remove the "least interesting" based on 'latency_ms' metric
|
|
55
|
+
logger = PLogger(
|
|
56
|
+
log_path=Path("performance_logs.csv"),
|
|
57
|
+
header=["request_id", "latency_ms", "status_code"],
|
|
58
|
+
max_entries=100,
|
|
59
|
+
rotation_strategy='by_metric',
|
|
60
|
+
metric_column='latency_ms'
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Log regular requests
|
|
64
|
+
logger.log({"request_id": "abc-123", "latency_ms": 52, "status_code": 200})
|
|
65
|
+
logger.log({"request_id": "def-456", "latency_ms": 55, "status_code": 200})
|
|
66
|
+
|
|
67
|
+
# ...after many entries...
|
|
68
|
+
|
|
69
|
+
# Log an unusually long request.
|
|
70
|
+
# Thanks to the 'by_metric' strategy, this record is highly likely
|
|
71
|
+
# to be preserved in the log even after rotation.
|
|
72
|
+
logger.log({"request_id": "xyz-789", "latency_ms": 5300, "status_code": 500})
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Rotation Strategies
|
|
76
|
+
|
|
77
|
+
- `fifo`: Standard "first-in, first-out" strategy. Removes the oldest entry.
|
|
78
|
+
- `by_metric`: Finds a pair of entries with the smallest difference in a numerical metric (`metric_column`) and removes
|
|
79
|
+
the older one from that pair. Ideal for preserving outliers and unique values.
|
|
80
|
+
- `by_group_metric`: First finds the largest group of entries (by `group_column`), and then applies the `by_metric`
|
|
81
|
+
logic within that group. Useful when you need to analyze anomalies within the context of a specific category.
|
|
82
|
+
|
|
83
|
+
## License
|
|
84
|
+
|
|
85
|
+
The project is distributed under the [MIT License](LICENSE).
|
|
86
|
+
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# PLogger: An Intelligent Logger for Preserving What Matters Most
|
|
2
|
+
|
|
3
|
+
[Русская версия README](docs/README_RU.md)
|
|
4
|
+
|
|
5
|
+
**PLogger** is a universal Python logger designed for collecting and analyzing performance metrics, experimental
|
|
6
|
+
results, or any other structured data. Its main feature is intelligent rotation strategies that allow you to preserve
|
|
7
|
+
the most unique and interesting records (outliers) while removing redundant data.
|
|
8
|
+
|
|
9
|
+
Unlike standard loggers that simply delete old entries, PLogger analyzes the data and retains those that stand out most
|
|
10
|
+
from the rest.
|
|
11
|
+
|
|
12
|
+
## Key Features
|
|
13
|
+
|
|
14
|
+
- **Intelligent Rotation**: Built-in strategies (`by_metric`, `by_group_metric`) to preserve the most significant data
|
|
15
|
+
when the log limit is reached.
|
|
16
|
+
- **Thread-Safety**: Secure logging from multiple threads without the risk of data corruption.
|
|
17
|
+
- **Ease of Use**: A concise API for quick integration into any project.
|
|
18
|
+
- **CSV Logging**: Data is stored in a universal and easily readable CSV format.
|
|
19
|
+
- **Zero Configuration**: Automatic creation of log directories and files.
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install plogger
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
*(Note: After you publish the package to PyPI)*
|
|
28
|
+
|
|
29
|
+
## Basic Usage
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from plogger import PLogger
|
|
34
|
+
|
|
35
|
+
# Initialize the logger
|
|
36
|
+
# Retain up to 100 entries, then remove the "least interesting" based on 'latency_ms' metric
|
|
37
|
+
logger = PLogger(
|
|
38
|
+
log_path=Path("performance_logs.csv"),
|
|
39
|
+
header=["request_id", "latency_ms", "status_code"],
|
|
40
|
+
max_entries=100,
|
|
41
|
+
rotation_strategy='by_metric',
|
|
42
|
+
metric_column='latency_ms'
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Log regular requests
|
|
46
|
+
logger.log({"request_id": "abc-123", "latency_ms": 52, "status_code": 200})
|
|
47
|
+
logger.log({"request_id": "def-456", "latency_ms": 55, "status_code": 200})
|
|
48
|
+
|
|
49
|
+
# ...after many entries...
|
|
50
|
+
|
|
51
|
+
# Log an unusually long request.
|
|
52
|
+
# Thanks to the 'by_metric' strategy, this record is highly likely
|
|
53
|
+
# to be preserved in the log even after rotation.
|
|
54
|
+
logger.log({"request_id": "xyz-789", "latency_ms": 5300, "status_code": 500})
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Rotation Strategies
|
|
58
|
+
|
|
59
|
+
- `fifo`: Standard "first-in, first-out" strategy. Removes the oldest entry.
|
|
60
|
+
- `by_metric`: Finds a pair of entries with the smallest difference in a numerical metric (`metric_column`) and removes
|
|
61
|
+
the older one from that pair. Ideal for preserving outliers and unique values.
|
|
62
|
+
- `by_group_metric`: First finds the largest group of entries (by `group_column`), and then applies the `by_metric`
|
|
63
|
+
logic within that group. Useful when you need to analyze anomalies within the context of a specific category.
|
|
64
|
+
|
|
65
|
+
## License
|
|
66
|
+
|
|
67
|
+
The project is distributed under the [MIT License](LICENSE).
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "chu4hel-plogger"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "PLogger: A Python logging utility for CSV files with intelligent, metric-based rotation strategies to preserve significant data."
|
|
5
|
+
authors = ["Chu4hel <106600877+Chu4hel@users.noreply.github.com>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
packages = [{ include = "plogger", from = "src" }]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
exclude = [
|
|
10
|
+
"tests/",
|
|
11
|
+
"site/",
|
|
12
|
+
"examples/",
|
|
13
|
+
"docs/",
|
|
14
|
+
".github/",
|
|
15
|
+
".idea/",
|
|
16
|
+
"__pycache__/",
|
|
17
|
+
"*.pyc",
|
|
18
|
+
"*.log",
|
|
19
|
+
"*.egg-info/",
|
|
20
|
+
".vscode/",
|
|
21
|
+
".ruff_cache/",
|
|
22
|
+
"coverage.xml",
|
|
23
|
+
".coverage",
|
|
24
|
+
"PUBLISHING.md",
|
|
25
|
+
"project.txt",
|
|
26
|
+
"changelog.txt",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
[tool.poetry.dependencies]
|
|
31
|
+
python = ">=3.10"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
[build-system]
|
|
35
|
+
requires = ["poetry-core"]
|
|
36
|
+
build-backend = "poetry.core.masonry.api"
|
|
37
|
+
|
|
38
|
+
[tool.pytest.ini_options]
|
|
39
|
+
pythonpath = [
|
|
40
|
+
"src"
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[tool.poetry.group.dev.dependencies]
|
|
44
|
+
pytest = "^9.0.2"
|
|
45
|
+
pytest-cov = "^5.0.0"
|
|
46
|
+
pytest-mock = "^3.14.0"
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PLogger - A Universal Performance Logging Utility for Python.
|
|
3
|
+
|
|
4
|
+
Provides the `PLogger` class for thread-safe logging to CSV files with
|
|
5
|
+
intelligent, metric-based rotation strategies. Ideal for collecting
|
|
6
|
+
performance metrics, experimental results, or any structured data
|
|
7
|
+
where it's important to preserve unique values (outliers) while
|
|
8
|
+
managing log size.
|
|
9
|
+
|
|
10
|
+
Key Features:
|
|
11
|
+
- Thread-safe writes for use in concurrent applications.
|
|
12
|
+
- Multiple rotation strategies:
|
|
13
|
+
- `fifo`: Standard first-in, first-out.
|
|
14
|
+
- `by_metric`: Keeps outliers by removing entries with the smallest metric difference to their neighbors.
|
|
15
|
+
- `by_group_metric`: Applies `by_metric` logic within the largest group of entries.
|
|
16
|
+
- Simple, dictionary-based logging.
|
|
17
|
+
- Automatic creation of log files and directories.
|
|
18
|
+
|
|
19
|
+
Basic Usage:
|
|
20
|
+
------------
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from plogger import PLogger
|
|
23
|
+
|
|
24
|
+
# Initialize the logger
|
|
25
|
+
logger = PLogger(
|
|
26
|
+
log_path=Path("performance_logs.csv"),
|
|
27
|
+
header=["request_id", "latency_ms"],
|
|
28
|
+
max_entries=100,
|
|
29
|
+
rotation_strategy='by_metric',
|
|
30
|
+
metric_column='latency_ms'
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Log data
|
|
34
|
+
logger.log({"request_id": "abc-123", "latency_ms": 150.5})
|
|
35
|
+
"""
|
|
36
|
+
from importlib import metadata
|
|
37
|
+
|
|
38
|
+
from .plogger import PLogger
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
# This will read the version from the installed package's metadata
|
|
42
|
+
__version__ = metadata.version("plogger")
|
|
43
|
+
except metadata.PackageNotFoundError:
|
|
44
|
+
# This happens when the package is not installed, e.g., when running tests
|
|
45
|
+
# or in a development environment.
|
|
46
|
+
__version__ = "0.0.0-dev"
|
|
47
|
+
|
|
48
|
+
__all__ = ["PLogger"]
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Универсальный модуль для логирования метрик производительности.
|
|
2
|
+
|
|
3
|
+
Этот модуль предоставляет класс `PLogger` для сбора и записи метрик
|
|
4
|
+
производительности в CSV файлы с поддержкой потокобезопасной записи
|
|
5
|
+
и настраиваемых стратегий ротации логов.
|
|
6
|
+
"""
|
|
7
|
+
import csv
|
|
8
|
+
import threading
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Dict, List, Optional, Callable
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PLogger:
|
|
16
|
+
"""
|
|
17
|
+
Универсальный сервис для сбора и записи метрик производительности в один CSV файл.
|
|
18
|
+
Обеспечивает потокобезопасную запись и интеллектуальную ротацию логов.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
log_path: Path,
|
|
24
|
+
header: List[str],
|
|
25
|
+
max_entries: int,
|
|
26
|
+
rotation_strategy: str = 'fifo', # 'fifo', 'by_metric', 'by_group_metric'
|
|
27
|
+
metric_column: Optional[str] = None,
|
|
28
|
+
group_column: Optional[str] = None
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Инициализирует логгер для одного файла.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
log_path: Полный путь к файлу лога.
|
|
35
|
+
header: Список заголовков для CSV файла.
|
|
36
|
+
max_entries: Максимальное количество записей в логе перед ротацией.
|
|
37
|
+
rotation_strategy: Стратегия ротации ('fifo', 'by_metric', 'by_group_metric').
|
|
38
|
+
metric_column: Имя колонки, используемой для метрики при ротации.
|
|
39
|
+
group_column: Имя колонки, используемой для группировки при ротации.
|
|
40
|
+
"""
|
|
41
|
+
self.log_path: Path = log_path
|
|
42
|
+
self.header: List[str] = ["timestamp"] + header
|
|
43
|
+
self._max_entries: int = max_entries
|
|
44
|
+
self.metric_column: Optional[str] = metric_column
|
|
45
|
+
self.group_column: Optional[str] = group_column
|
|
46
|
+
self._lock: threading.Lock = threading.Lock()
|
|
47
|
+
|
|
48
|
+
self._rotation_strategies: Dict[str, Callable] = {
|
|
49
|
+
'fifo': lambda h, lines: 0,
|
|
50
|
+
'by_metric': self.__get_index_to_discard_by_metric,
|
|
51
|
+
'by_group_metric': self.__get_index_to_discard_by_group_metric,
|
|
52
|
+
}
|
|
53
|
+
if rotation_strategy not in self._rotation_strategies:
|
|
54
|
+
raise ValueError(f"Unknown rotation strategy: {rotation_strategy}")
|
|
55
|
+
self._rotation_strategy: Callable = self._rotation_strategies[rotation_strategy]
|
|
56
|
+
|
|
57
|
+
self.log_path.parent.mkdir(exist_ok=True, parents=True)
|
|
58
|
+
self.__init_log_file()
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def max_entries(self) -> int:
|
|
62
|
+
return self._max_entries
|
|
63
|
+
|
|
64
|
+
@max_entries.setter
|
|
65
|
+
def max_entries(self, value: int) -> None:
|
|
66
|
+
with self._lock:
|
|
67
|
+
self._max_entries = value
|
|
68
|
+
|
|
69
|
+
def log(self, data: Dict[str, Any]) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Записывает одну строку данных в лог.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
data: Словарь, где ключи - это имена колонок из `header` (кроме 'timestamp').
|
|
75
|
+
"""
|
|
76
|
+
with self._lock:
|
|
77
|
+
try:
|
|
78
|
+
# Убираем None значения, чтобы не писать лишнего
|
|
79
|
+
clean_data = {k: v for k, v in data.items() if v is not None}
|
|
80
|
+
|
|
81
|
+
header, lines = self.__read_log_data()
|
|
82
|
+
|
|
83
|
+
# Формируем строку в правильном порядке
|
|
84
|
+
new_row = [datetime.now().isoformat()]
|
|
85
|
+
# Пропускаем 'timestamp' в header при сопоставлении
|
|
86
|
+
for col in self.header[1:]:
|
|
87
|
+
new_row.append(str(clean_data.get(col, '')))
|
|
88
|
+
|
|
89
|
+
lines.append(new_row)
|
|
90
|
+
|
|
91
|
+
# Ротация
|
|
92
|
+
if 0 < self.max_entries < len(lines):
|
|
93
|
+
lines = self.__rotate(lines, header)
|
|
94
|
+
|
|
95
|
+
self.__write_log_data(self.log_path, header, lines)
|
|
96
|
+
except Exception as e:
|
|
97
|
+
print(f"Ошибка при записи в лог {self.log_path.name}: {e}")
|
|
98
|
+
|
|
99
|
+
def __init_log_file(self) -> None:
|
|
100
|
+
"""Проверяет наличие файла и создает его с заголовком, если он отсутствует."""
|
|
101
|
+
if not self.log_path.exists() or self.log_path.stat().st_size == 0:
|
|
102
|
+
with self.log_path.open('w', newline='', encoding='utf-8') as f:
|
|
103
|
+
writer = csv.writer(f)
|
|
104
|
+
writer.writerow(self.header)
|
|
105
|
+
|
|
106
|
+
def __rotate(self, lines: List[List[str]], header: List[str]) -> List[List[str]]:
|
|
107
|
+
"""Применяет выбранную стратегию для удаления лишних записей."""
|
|
108
|
+
while len(lines) > self.max_entries:
|
|
109
|
+
idx = self._rotation_strategy(header, lines)
|
|
110
|
+
lines.pop(idx)
|
|
111
|
+
return lines
|
|
112
|
+
|
|
113
|
+
# --- Блок I/O ---
|
|
114
|
+
def __read_log_data(self) -> tuple[List[str], List[List[str]]]:
|
|
115
|
+
if self.log_path.exists() and self.log_path.stat().st_size > 0:
|
|
116
|
+
with self.log_path.open('r', newline='', encoding='utf-8') as f:
|
|
117
|
+
reader = csv.reader(f)
|
|
118
|
+
header = next(reader, [])
|
|
119
|
+
return header, list(reader)
|
|
120
|
+
return self.header, []
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def __write_log_data(file_path: Path, header: List[str], lines: List[List[str]]) -> None:
|
|
124
|
+
with file_path.open('w', newline='', encoding='utf-8') as f:
|
|
125
|
+
writer = csv.writer(f)
|
|
126
|
+
writer.writerow(header)
|
|
127
|
+
writer.writerows(lines)
|
|
128
|
+
|
|
129
|
+
# --- Блок алгоритмов ротации ---
|
|
130
|
+
def __get_index_to_discard_by_metric(self, header: List[str], lines: List[List[str]]) -> int:
|
|
131
|
+
"""Находит индекс записи, метрика которой наиболее близка к соседу."""
|
|
132
|
+
if self.metric_column is None:
|
|
133
|
+
return 0 # Fallback to FIFO if no metric column is specified
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
m_idx = header.index(self.metric_column)
|
|
137
|
+
indexed_metrics = []
|
|
138
|
+
for i, line in enumerate(lines):
|
|
139
|
+
try:
|
|
140
|
+
indexed_metrics.append((float(line[m_idx]), i))
|
|
141
|
+
except (ValueError, IndexError):
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
if len(indexed_metrics) < 2:
|
|
145
|
+
return 0
|
|
146
|
+
|
|
147
|
+
indexed_metrics.sort()
|
|
148
|
+
|
|
149
|
+
min_diff = float('inf')
|
|
150
|
+
target_idx = 0
|
|
151
|
+
for i in range(len(indexed_metrics) - 1):
|
|
152
|
+
diff = indexed_metrics[i + 1][0] - indexed_metrics[i][0]
|
|
153
|
+
if diff < min_diff:
|
|
154
|
+
min_diff = diff
|
|
155
|
+
target_idx = min(indexed_metrics[i][1], indexed_metrics[i + 1][1])
|
|
156
|
+
return target_idx
|
|
157
|
+
|
|
158
|
+
except (ValueError, IndexError):
|
|
159
|
+
return 0
|
|
160
|
+
|
|
161
|
+
def __get_index_to_discard_by_group_metric(self, header: List[str], lines: List[List[str]]) -> int:
|
|
162
|
+
"""Группирует по `group_column` и ищет близкие по `metric_column`."""
|
|
163
|
+
if self.group_column is None or self.metric_column is None:
|
|
164
|
+
return 0 # Fallback to FIFO
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
name_idx = header.index(self.group_column)
|
|
168
|
+
groups = defaultdict(list)
|
|
169
|
+
for i, line in enumerate(lines):
|
|
170
|
+
groups[line[name_idx]].append(i)
|
|
171
|
+
|
|
172
|
+
largest_group_name = max(groups, key=lambda k: len(groups[k]))
|
|
173
|
+
group_indices = groups[largest_group_name]
|
|
174
|
+
|
|
175
|
+
if len(group_indices) < 2:
|
|
176
|
+
# Если в самой большой группе меньше 2х элементов, то просто удаляем самый старый из группы
|
|
177
|
+
return group_indices[0]
|
|
178
|
+
|
|
179
|
+
group_lines = [lines[i] for i in group_indices]
|
|
180
|
+
sub_idx = self.__get_index_to_discard_by_metric(header, group_lines)
|
|
181
|
+
return group_indices[sub_idx]
|
|
182
|
+
|
|
183
|
+
except (ValueError, IndexError):
|
|
184
|
+
return 0
|