cognitor 0.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognitor-0.0.0/PKG-INFO +112 -0
- cognitor-0.0.0/README.md +84 -0
- cognitor-0.0.0/cognitor/__init__.py +3 -0
- cognitor-0.0.0/cognitor/database.py +58 -0
- cognitor-0.0.0/cognitor/monitor.py +238 -0
- cognitor-0.0.0/cognitor/service.py +80 -0
- cognitor-0.0.0/cognitor/utils.py +51 -0
- cognitor-0.0.0/cognitor.egg-info/PKG-INFO +112 -0
- cognitor-0.0.0/cognitor.egg-info/SOURCES.txt +14 -0
- cognitor-0.0.0/cognitor.egg-info/dependency_links.txt +1 -0
- cognitor-0.0.0/cognitor.egg-info/requires.txt +14 -0
- cognitor-0.0.0/cognitor.egg-info/top_level.txt +1 -0
- cognitor-0.0.0/pyproject.toml +45 -0
- cognitor-0.0.0/setup.cfg +4 -0
- cognitor-0.0.0/tests/test_integration.py +46 -0
- cognitor-0.0.0/tests/test_monitor.py +70 -0
cognitor-0.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cognitor
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: Python SDK to extract relevant metrics from Small Language Model inference calls.
|
|
5
|
+
Author-email: Riccardo <riccardo@tanaos.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/riccardo/cognitor-py
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/riccardo/cognitor-py/issues
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: transformers
|
|
16
|
+
Requires-Dist: psutil
|
|
17
|
+
Requires-Dist: torch
|
|
18
|
+
Requires-Dist: pydantic
|
|
19
|
+
Requires-Dist: psycopg2-binary
|
|
20
|
+
Requires-Dist: sqlalchemy
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest; extra == "dev"
|
|
23
|
+
Requires-Dist: black; extra == "dev"
|
|
24
|
+
Requires-Dist: isort; extra == "dev"
|
|
25
|
+
Requires-Dist: build; extra == "dev"
|
|
26
|
+
Requires-Dist: twine; extra == "dev"
|
|
27
|
+
Requires-Dist: python-dotenv>=1.0.1; extra == "dev"
|
|
28
|
+
|
|
29
|
+
# cognitor-py
|
|
30
|
+
|
|
31
|
+
`cognitor-py` is a Python SDK that wraps `transformers` inference calls to extract useful metadata and performance metrics.
|
|
32
|
+
|
|
33
|
+
## Features
|
|
34
|
+
|
|
35
|
+
- **Model Information**: Automatically captures the model name.
|
|
36
|
+
- **Performance Metrics**: Tracks CPU and RAM usage during inference.
|
|
37
|
+
- **GPU Monitoring**: Captures peak GPU memory usage (if CUDA is available).
|
|
38
|
+
- **Token Counting**: Calculates input and output token counts for common pipeline tasks.
|
|
39
|
+
- **Latency Tracking**: Measures inference duration.
|
|
40
|
+
- **Error Handling**: Captures and reports errors during inference.
|
|
41
|
+
- **Flexible Logging Targets**: Automatically saves all inference logs to either a local PostgreSQL database or a local file (JSON lines).
|
|
42
|
+
- **Graceful Error Handling**: Ensures the program continues to run even if the database is unreachable.
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install cognitor-py
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
### Using the Inference Monitor
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from transformers import pipeline, AutoTokenizer
|
|
56
|
+
from cognitor import Cognitor
|
|
57
|
+
|
|
58
|
+
# Initialize your model and tokenizer
|
|
59
|
+
model_name = "gpt2"
|
|
60
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
61
|
+
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
|
|
62
|
+
|
|
63
|
+
# Initialize Cognitor with PostgreSQL configuration (default)
|
|
64
|
+
# Or use log_type="file" and log_path="logs.jsonl" for file logging
|
|
65
|
+
cognitor = Cognitor(
|
|
66
|
+
model_name=model_name,
|
|
67
|
+
tokenizer=tokenizer,
|
|
68
|
+
log_type="database", # or "file"
|
|
69
|
+
host="localhost",
|
|
70
|
+
port=5432,
|
|
71
|
+
user="postgres",
|
|
72
|
+
password="postgres",
|
|
73
|
+
dbname="cognitor"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Run inference within the monitor context
|
|
77
|
+
with cognitor.monitor() as m:
|
|
78
|
+
input_text = "Once upon a time,"
|
|
79
|
+
# Use track() to capture only the inference duration
|
|
80
|
+
with m.track():
|
|
81
|
+
output = pipe(input_text, max_length=50)
|
|
82
|
+
m.capture(input_data=input_text, output=output)
|
|
83
|
+
|
|
84
|
+
# The metadata is now available via the cognitor instance
|
|
85
|
+
metadata = cognitor.get_last_metadata()
|
|
86
|
+
print(output)
|
|
87
|
+
print(metadata)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Metadata Structure
|
|
91
|
+
|
|
92
|
+
The extracted metadata follows this structure:
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
{
|
|
96
|
+
"model_name": "gpt2",
|
|
97
|
+
"timestamp": "2026-04-01T14:34:14+0200",
|
|
98
|
+
"input_tokens": 5,
|
|
99
|
+
"output_tokens": 45,
|
|
100
|
+
"cpu_percent": 12.5,
|
|
101
|
+
"ram_usage_percent": 1.2,
|
|
102
|
+
"gpu_usage_percent": 5.5, # Optional
|
|
103
|
+
"duration": 0.45, # Inference-only duration
|
|
104
|
+
"input": "Once upon a time,",
|
|
105
|
+
"output": [...],
|
|
106
|
+
"error": None
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
MIT
|
cognitor-0.0.0/README.md
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# cognitor-py
|
|
2
|
+
|
|
3
|
+
`cognitor-py` is a Python SDK that wraps `transformers` inference calls to extract useful metadata and performance metrics.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Model Information**: Automatically captures the model name.
|
|
8
|
+
- **Performance Metrics**: Tracks CPU and RAM usage during inference.
|
|
9
|
+
- **GPU Monitoring**: Captures peak GPU memory usage (if CUDA is available).
|
|
10
|
+
- **Token Counting**: Calculates input and output token counts for common pipeline tasks.
|
|
11
|
+
- **Latency Tracking**: Measures inference duration.
|
|
12
|
+
- **Error Handling**: Captures and reports errors during inference.
|
|
13
|
+
- **Flexible Logging Targets**: Automatically saves all inference logs to either a local PostgreSQL database or a local file (JSON lines).
|
|
14
|
+
- **Graceful Error Handling**: Ensures the program continues to run even if the database is unreachable.
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install cognitor-py
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
### Using the Inference Monitor
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from transformers import pipeline, AutoTokenizer
|
|
28
|
+
from cognitor import Cognitor
|
|
29
|
+
|
|
30
|
+
# Initialize your model and tokenizer
|
|
31
|
+
model_name = "gpt2"
|
|
32
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
33
|
+
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
|
|
34
|
+
|
|
35
|
+
# Initialize Cognitor with PostgreSQL configuration (default)
|
|
36
|
+
# Or use log_type="file" and log_path="logs.jsonl" for file logging
|
|
37
|
+
cognitor = Cognitor(
|
|
38
|
+
model_name=model_name,
|
|
39
|
+
tokenizer=tokenizer,
|
|
40
|
+
log_type="database", # or "file"
|
|
41
|
+
host="localhost",
|
|
42
|
+
port=5432,
|
|
43
|
+
user="postgres",
|
|
44
|
+
password="postgres",
|
|
45
|
+
dbname="cognitor"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Run inference within the monitor context
|
|
49
|
+
with cognitor.monitor() as m:
|
|
50
|
+
input_text = "Once upon a time,"
|
|
51
|
+
# Use track() to capture only the inference duration
|
|
52
|
+
with m.track():
|
|
53
|
+
output = pipe(input_text, max_length=50)
|
|
54
|
+
m.capture(input_data=input_text, output=output)
|
|
55
|
+
|
|
56
|
+
# The metadata is now available via the cognitor instance
|
|
57
|
+
metadata = cognitor.get_last_metadata()
|
|
58
|
+
print(output)
|
|
59
|
+
print(metadata)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Metadata Structure
|
|
63
|
+
|
|
64
|
+
The extracted metadata follows this structure:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
{
|
|
68
|
+
"model_name": "gpt2",
|
|
69
|
+
"timestamp": "2026-04-01T14:34:14+0200",
|
|
70
|
+
"input_tokens": 5,
|
|
71
|
+
"output_tokens": 45,
|
|
72
|
+
"cpu_percent": 12.5,
|
|
73
|
+
"ram_usage_percent": 1.2,
|
|
74
|
+
"gpu_usage_percent": 5.5, # Optional
|
|
75
|
+
"duration": 0.45, # Inference-only duration
|
|
76
|
+
"input": "Once upon a time,",
|
|
77
|
+
"output": [...],
|
|
78
|
+
"error": None
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## License
|
|
83
|
+
|
|
84
|
+
MIT
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
from sqlalchemy import create_engine, Column, Integer, String, Float, Text
|
|
5
|
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
Base = declarative_base()
|
|
10
|
+
|
|
11
|
+
class InferenceLog(Base):
|
|
12
|
+
__tablename__ = 'inference_logs'
|
|
13
|
+
|
|
14
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
15
|
+
model_name = Column(String)
|
|
16
|
+
timestamp = Column(String)
|
|
17
|
+
input_tokens = Column(Integer)
|
|
18
|
+
output_tokens = Column(Integer)
|
|
19
|
+
cpu_percent = Column(Float)
|
|
20
|
+
ram_usage_percent = Column(Float)
|
|
21
|
+
gpu_usage_percent = Column(Float, nullable=True)
|
|
22
|
+
duration = Column(Float)
|
|
23
|
+
input = Column(Text)
|
|
24
|
+
output = Column(Text)
|
|
25
|
+
error = Column(Text, nullable=True)
|
|
26
|
+
extra = Column(Text)
|
|
27
|
+
|
|
28
|
+
class DatabaseManager:
|
|
29
|
+
"""
|
|
30
|
+
Manages the database connection and schema initialization using SQLAlchemy.
|
|
31
|
+
"""
|
|
32
|
+
def __init__(self, host: str = "localhost", port: int = 5432, user: str = "postgres", password: str = "postgres", dbname: str = "cognitor") -> None:
|
|
33
|
+
"""
|
|
34
|
+
Initializes the DatabaseManager with connection parameters.
|
|
35
|
+
Args:
|
|
36
|
+
host (str): The database host address.
|
|
37
|
+
port (int): The database port number.
|
|
38
|
+
user (str): The database username.
|
|
39
|
+
password (str): The database password.
|
|
40
|
+
dbname (str): The name of the database.
|
|
41
|
+
"""
|
|
42
|
+
self.db_url: str = f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
|
|
43
|
+
self.engine: Any = create_engine(self.db_url)
|
|
44
|
+
self.Session: Any = sessionmaker(bind=self.engine)
|
|
45
|
+
self._initialized: bool = False
|
|
46
|
+
|
|
47
|
+
def init_db(self) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Initializes the database schema by creating all defined tables.
|
|
50
|
+
Exits gracefully if the database is unreachable.
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
Base.metadata.create_all(self.engine)
|
|
54
|
+
self._initialized = True
|
|
55
|
+
except Exception as e:
|
|
56
|
+
print(f"CRITICAL: Database unreachable at {self.engine.url}. Error: {e}")
|
|
57
|
+
print("Exiting gracefully...")
|
|
58
|
+
exit(0)
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Any, Dict, Optional, Union, List
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
from .utils import get_resource_usage, Timer
|
|
5
|
+
from .database import DatabaseManager
|
|
6
|
+
from .service import LoggingService
|
|
7
|
+
import torch
|
|
8
|
+
|
|
9
|
+
class InferenceMetadata(BaseModel):
|
|
10
|
+
model_name: str
|
|
11
|
+
timestamp: str
|
|
12
|
+
input_tokens: int
|
|
13
|
+
output_tokens: int
|
|
14
|
+
cpu_percent: float
|
|
15
|
+
ram_usage_percent: float
|
|
16
|
+
gpu_usage_percent: Optional[float] = None
|
|
17
|
+
duration: float
|
|
18
|
+
input: Any
|
|
19
|
+
output: Any
|
|
20
|
+
error: Optional[str] = None
|
|
21
|
+
extra: Dict[str, Any] = {}
|
|
22
|
+
|
|
23
|
+
class InferenceMonitor:
|
|
24
|
+
"""
|
|
25
|
+
A context manager for monitoring a single inference call, capturing metrics and metadata.
|
|
26
|
+
"""
|
|
27
|
+
def __init__(self, cognitor: 'Cognitor') -> None:
|
|
28
|
+
"""
|
|
29
|
+
Initializes the InferenceMonitor with a Cognitor instance.
|
|
30
|
+
Args:
|
|
31
|
+
cognitor (Cognitor): The parent Cognitor instance.
|
|
32
|
+
"""
|
|
33
|
+
self.cognitor: 'Cognitor' = cognitor
|
|
34
|
+
self.input: Any = None
|
|
35
|
+
self.output: Any = None
|
|
36
|
+
self.error: Optional[str] = None
|
|
37
|
+
self.start_usage: Optional[Dict[str, float]] = None
|
|
38
|
+
self.full_timer: Timer = Timer()
|
|
39
|
+
self.inference_timer: Timer = Timer()
|
|
40
|
+
self.metadata: Optional[InferenceMetadata] = None
|
|
41
|
+
|
|
42
|
+
def __enter__(self) -> 'InferenceMonitor':
|
|
43
|
+
"""
|
|
44
|
+
Enters the monitoring context, recording initial resource usage and starting the timer.
|
|
45
|
+
Returns:
|
|
46
|
+
InferenceMonitor: The monitor instance.
|
|
47
|
+
"""
|
|
48
|
+
self.start_usage = get_resource_usage()
|
|
49
|
+
self.full_timer.__enter__()
|
|
50
|
+
return self
|
|
51
|
+
|
|
52
|
+
def track(self) -> Timer:
|
|
53
|
+
"""
|
|
54
|
+
Returns a context manager to track the specific inference duration.
|
|
55
|
+
Returns:
|
|
56
|
+
Timer: The inference timer context manager.
|
|
57
|
+
"""
|
|
58
|
+
return self.inference_timer
|
|
59
|
+
|
|
60
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
61
|
+
"""
|
|
62
|
+
Exits the monitoring context, generating metadata and logging it to the configured target.
|
|
63
|
+
Args:
|
|
64
|
+
exc_type: The exception type if an error occurred.
|
|
65
|
+
exc_val: The exception value if an error occurred.
|
|
66
|
+
exc_tb: The traceback if an error occurred.
|
|
67
|
+
"""
|
|
68
|
+
self.full_timer.__exit__(exc_type, exc_val, exc_tb)
|
|
69
|
+
if exc_type:
|
|
70
|
+
self.error = str(exc_val)
|
|
71
|
+
|
|
72
|
+
self.metadata = self.cognitor._generate_metadata(
|
|
73
|
+
input_data=self.input,
|
|
74
|
+
output=self.output,
|
|
75
|
+
error=self.error,
|
|
76
|
+
full_timer=self.full_timer,
|
|
77
|
+
inference_timer=self.inference_timer,
|
|
78
|
+
start_usage=self.start_usage if self.start_usage else {}
|
|
79
|
+
)
|
|
80
|
+
self.cognitor._last_metadata = self.metadata
|
|
81
|
+
|
|
82
|
+
# Log to database via LoggingService
|
|
83
|
+
if self.cognitor.logging_service:
|
|
84
|
+
self.cognitor.logging_service.log_inference(self.metadata.model_dump())
|
|
85
|
+
|
|
86
|
+
def capture(self, output: Any, input_data: Any = None) -> None:
|
|
87
|
+
"""
|
|
88
|
+
Captures the output and optionally the input of the inference.
|
|
89
|
+
Args:
|
|
90
|
+
output (Any): The inference output data.
|
|
91
|
+
input_data (Any, optional): The inference input data. Defaults to None.
|
|
92
|
+
"""
|
|
93
|
+
if input_data is not None:
|
|
94
|
+
self.input = input_data
|
|
95
|
+
self.output = output
|
|
96
|
+
|
|
97
|
+
class Cognitor:
|
|
98
|
+
"""
|
|
99
|
+
The main SDK class for monitoring SLM inference calls.
|
|
100
|
+
"""
|
|
101
|
+
def __init__(self, model_name: str, tokenizer: Any = None, log_type: str = "database", log_path: Optional[str] = None, host: str = "localhost", port: int = 5432, user: str = "postgres", password: str = "postgres", dbname: str = "cognitor") -> None:
|
|
102
|
+
"""
|
|
103
|
+
Initializes the Cognitor instance with model, tokenizer, and logging configuration.
|
|
104
|
+
Args:
|
|
105
|
+
model_name (str): The name of the model being monitored.
|
|
106
|
+
tokenizer (Any, optional): The tokenizer for token counting. Defaults to None.
|
|
107
|
+
log_type (str): The logging target ('database' or 'file'). Defaults to "database".
|
|
108
|
+
log_path (Optional[str]): The path for file logging. Defaults to None.
|
|
109
|
+
host (str): The database host. Defaults to "localhost".
|
|
110
|
+
port (int): The database port. Defaults to 5432.
|
|
111
|
+
user (str): The database user. Defaults to "postgres".
|
|
112
|
+
password (str): The database password. Defaults to "postgres".
|
|
113
|
+
dbname (str): The database name. Defaults to "cognitor".
|
|
114
|
+
"""
|
|
115
|
+
self.model_name: str = model_name
|
|
116
|
+
self.tokenizer: Any = tokenizer
|
|
117
|
+
self._last_metadata: Optional[InferenceMetadata] = None
|
|
118
|
+
|
|
119
|
+
self.db_manager: Optional[DatabaseManager] = None
|
|
120
|
+
self.logging_service: Optional[LoggingService] = None
|
|
121
|
+
|
|
122
|
+
if log_type == "database":
|
|
123
|
+
self.db_manager = DatabaseManager(host=host, port=port, user=user, password=password, dbname=dbname)
|
|
124
|
+
self.db_manager.init_db()
|
|
125
|
+
self.logging_service = LoggingService(db_manager=self.db_manager)
|
|
126
|
+
elif log_type == "file":
|
|
127
|
+
if not log_path:
|
|
128
|
+
log_path = f"{model_name}_logs.jsonl"
|
|
129
|
+
self.logging_service = LoggingService(log_file=log_path)
|
|
130
|
+
else:
|
|
131
|
+
raise ValueError(f"Invalid log_type: {log_type}. Must be 'database' or 'file'.")
|
|
132
|
+
|
|
133
|
+
def monitor(self) -> InferenceMonitor:
|
|
134
|
+
"""
|
|
135
|
+
Returns a context manager to monitor an inference call.
|
|
136
|
+
Returns:
|
|
137
|
+
InferenceMonitor: The inference monitor context manager.
|
|
138
|
+
"""
|
|
139
|
+
return InferenceMonitor(self)
|
|
140
|
+
|
|
141
|
+
def get_last_metadata(self) -> Optional[Dict[str, Any]]:
|
|
142
|
+
"""
|
|
143
|
+
Returns the metadata from the last monitored inference.
|
|
144
|
+
Returns:
|
|
145
|
+
Optional[Dict[str, Any]]: The metadata dictionary, or None if no inference has been monitored.
|
|
146
|
+
"""
|
|
147
|
+
return self._last_metadata.model_dump() if self._last_metadata else None
|
|
148
|
+
|
|
149
|
+
def monitor_inference(self, func: Any, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
|
150
|
+
"""
|
|
151
|
+
Monitors a single inference call using the context manager.
|
|
152
|
+
Args:
|
|
153
|
+
func (Callable): The inference function to monitor.
|
|
154
|
+
*args: Positional arguments for the inference function.
|
|
155
|
+
**kwargs: Keyword arguments for the inference function.
|
|
156
|
+
Returns:
|
|
157
|
+
Dict[str, Any]: A dictionary containing the 'output' and 'metadata'.
|
|
158
|
+
"""
|
|
159
|
+
input_data: Any = args[0] if args else kwargs
|
|
160
|
+
with self.monitor() as m:
|
|
161
|
+
try:
|
|
162
|
+
with m.track():
|
|
163
|
+
output = func(*args, **kwargs)
|
|
164
|
+
m.capture(input_data=input_data, output=output)
|
|
165
|
+
return {"output": output, "metadata": m.metadata.model_dump() if m.metadata else {}}
|
|
166
|
+
except Exception as e:
|
|
167
|
+
raise e
|
|
168
|
+
|
|
169
|
+
def _generate_metadata(self, input_data: Any, output: Any, error: Optional[str], full_timer: Timer, inference_timer: Timer, start_usage: Dict[str, float]) -> InferenceMetadata:
|
|
170
|
+
"""
|
|
171
|
+
Internal method to generate InferenceMetadata from collected metrics.
|
|
172
|
+
Args:
|
|
173
|
+
input_data (Any): The inference input data.
|
|
174
|
+
output (Any): The inference output data.
|
|
175
|
+
error (Optional[str]): The error message if an error occurred.
|
|
176
|
+
full_timer (Timer): The timer for the full context duration.
|
|
177
|
+
inference_timer (Timer): The timer for the specific inference duration.
|
|
178
|
+
start_usage (Dict[str, float]): The initial resource usage metrics.
|
|
179
|
+
Returns:
|
|
180
|
+
InferenceMetadata: The generated metadata object.
|
|
181
|
+
"""
|
|
182
|
+
end_usage: Dict[str, float] = get_resource_usage()
|
|
183
|
+
|
|
184
|
+
# Extract token counts if tokenizer is available
|
|
185
|
+
input_tokens: int = 0
|
|
186
|
+
output_tokens: int = 0
|
|
187
|
+
|
|
188
|
+
if self.tokenizer:
|
|
189
|
+
if isinstance(input_data, str):
|
|
190
|
+
input_tokens = len(self.tokenizer.encode(input_data))
|
|
191
|
+
elif isinstance(input_data, list):
|
|
192
|
+
if all(isinstance(i, str) for i in input_data):
|
|
193
|
+
input_tokens = sum(len(self.tokenizer.encode(i)) for i in input_data)
|
|
194
|
+
elif all(isinstance(i, dict) for i in input_data):
|
|
195
|
+
# Handle chat-like inputs
|
|
196
|
+
for msg in input_data:
|
|
197
|
+
if isinstance(msg, dict) and "content" in msg:
|
|
198
|
+
input_tokens += len(self.tokenizer.encode(msg["content"]))
|
|
199
|
+
|
|
200
|
+
if output:
|
|
201
|
+
if isinstance(output, list):
|
|
202
|
+
for item in output:
|
|
203
|
+
if isinstance(item, dict):
|
|
204
|
+
if "generated_text" in item:
|
|
205
|
+
output_tokens += len(self.tokenizer.encode(item["generated_text"]))
|
|
206
|
+
elif "summary_text" in item:
|
|
207
|
+
output_tokens += len(self.tokenizer.encode(item["summary_text"]))
|
|
208
|
+
elif "translation_text" in item:
|
|
209
|
+
output_tokens += len(self.tokenizer.encode(item["translation_text"]))
|
|
210
|
+
elif isinstance(item, str):
|
|
211
|
+
output_tokens += len(self.tokenizer.encode(item))
|
|
212
|
+
elif isinstance(output, dict):
|
|
213
|
+
if "generated_text" in output:
|
|
214
|
+
output_tokens = len(self.tokenizer.encode(output["generated_text"]))
|
|
215
|
+
elif isinstance(output, str):
|
|
216
|
+
output_tokens = len(self.tokenizer.encode(output))
|
|
217
|
+
|
|
218
|
+
gpu_usage_percent: Optional[float] = None
|
|
219
|
+
if torch.cuda.is_available():
|
|
220
|
+
device = torch.cuda.current_device()
|
|
221
|
+
total_mem = torch.cuda.get_device_properties(device).total_memory
|
|
222
|
+
peak_mem = torch.cuda.max_memory_allocated(device)
|
|
223
|
+
gpu_usage_percent = (peak_mem / total_mem) * 100
|
|
224
|
+
torch.cuda.reset_peak_memory_stats(device)
|
|
225
|
+
|
|
226
|
+
return InferenceMetadata(
|
|
227
|
+
model_name=self.model_name,
|
|
228
|
+
timestamp=full_timer.timestamp,
|
|
229
|
+
input_tokens=input_tokens,
|
|
230
|
+
output_tokens=output_tokens,
|
|
231
|
+
cpu_percent=end_usage["cpu_percent"],
|
|
232
|
+
ram_usage_percent=end_usage["ram_usage_percent"],
|
|
233
|
+
gpu_usage_percent=gpu_usage_percent,
|
|
234
|
+
duration=inference_timer.duration,
|
|
235
|
+
input=input_data,
|
|
236
|
+
output=output,
|
|
237
|
+
error=error
|
|
238
|
+
)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
from .database import DatabaseManager, InferenceLog
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
class LoggingService:
|
|
9
|
+
"""
|
|
10
|
+
Service responsible for logging inference metadata to either a database or a local file.
|
|
11
|
+
"""
|
|
12
|
+
def __init__(self, db_manager: Optional[DatabaseManager] = None, log_file: Optional[str] = None) -> None:
|
|
13
|
+
"""
|
|
14
|
+
Initializes the LoggingService with a DatabaseManager and/or a log file path.
|
|
15
|
+
Args:
|
|
16
|
+
db_manager (Optional[DatabaseManager]): The database manager instance for DB logging.
|
|
17
|
+
log_file (Optional[str]): The path to the local file for file-based logging.
|
|
18
|
+
"""
|
|
19
|
+
self.db_manager: Optional[DatabaseManager] = db_manager
|
|
20
|
+
self.log_file: Optional[str] = log_file
|
|
21
|
+
|
|
22
|
+
def log_inference(self, metadata: Dict[str, Any]) -> None:
|
|
23
|
+
"""
|
|
24
|
+
Saves inference metadata to the configured target (database or file).
|
|
25
|
+
Args:
|
|
26
|
+
metadata (Dict[str, Any]): The inference metadata to be logged.
|
|
27
|
+
"""
|
|
28
|
+
if self.db_manager and self.db_manager._initialized:
|
|
29
|
+
self._log_to_db(metadata)
|
|
30
|
+
|
|
31
|
+
if self.log_file:
|
|
32
|
+
self._log_to_file(metadata)
|
|
33
|
+
|
|
34
|
+
def _log_to_db(self, metadata: Dict[str, Any]) -> None:
|
|
35
|
+
"""
|
|
36
|
+
Internal method to save inference metadata to the database using SQLAlchemy.
|
|
37
|
+
Args:
|
|
38
|
+
metadata (Dict[str, Any]): The inference metadata to be logged.
|
|
39
|
+
"""
|
|
40
|
+
if not self.db_manager:
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
session: Any = self.db_manager.Session()
|
|
44
|
+
try:
|
|
45
|
+
log_entry = InferenceLog(
|
|
46
|
+
model_name=metadata.get("model_name"),
|
|
47
|
+
timestamp=metadata.get("timestamp"),
|
|
48
|
+
input_tokens=metadata.get("input_tokens"),
|
|
49
|
+
output_tokens=metadata.get("output_tokens"),
|
|
50
|
+
cpu_percent=metadata.get("cpu_percent"),
|
|
51
|
+
ram_usage_percent=metadata.get("ram_usage_percent"),
|
|
52
|
+
gpu_usage_percent=metadata.get("gpu_usage_percent"),
|
|
53
|
+
duration=metadata.get("duration"),
|
|
54
|
+
input=json.dumps(metadata.get("input")),
|
|
55
|
+
output=json.dumps(metadata.get("output")),
|
|
56
|
+
error=metadata.get("error"),
|
|
57
|
+
extra=json.dumps(metadata.get("extra", {}))
|
|
58
|
+
)
|
|
59
|
+
session.add(log_entry)
|
|
60
|
+
session.commit()
|
|
61
|
+
except Exception as e:
|
|
62
|
+
session.rollback()
|
|
63
|
+
print(f"WARNING: Failed to log to database: {e}")
|
|
64
|
+
finally:
|
|
65
|
+
session.close()
|
|
66
|
+
|
|
67
|
+
def _log_to_file(self, metadata: Dict[str, Any]) -> None:
|
|
68
|
+
"""
|
|
69
|
+
Internal method to save inference metadata to a local file in JSON lines format.
|
|
70
|
+
Args:
|
|
71
|
+
metadata (Dict[str, Any]): The inference metadata to be logged.
|
|
72
|
+
"""
|
|
73
|
+
if not self.log_file:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
with open(self.log_file, "a") as f:
|
|
78
|
+
f.write(json.dumps(metadata) + "\n")
|
|
79
|
+
except Exception as e:
|
|
80
|
+
print(f"WARNING: Failed to log to file {self.log_file}: {e}")
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import psutil
|
|
2
|
+
import time
|
|
3
|
+
import os
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Dict, Any
|
|
6
|
+
|
|
7
|
+
def get_resource_usage() -> Dict[str, float]:
|
|
8
|
+
"""
|
|
9
|
+
Returns current CPU and RAM usage percentages.
|
|
10
|
+
Returns:
|
|
11
|
+
Dict[str, float]: A dictionary containing 'cpu_percent' and 'ram_usage_percent'.
|
|
12
|
+
"""
|
|
13
|
+
process = psutil.Process(os.getpid())
|
|
14
|
+
return {
|
|
15
|
+
"cpu_percent": psutil.cpu_percent(interval=None),
|
|
16
|
+
"ram_usage_percent": process.memory_percent()
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
class Timer:
|
|
20
|
+
"""
|
|
21
|
+
A context manager to measure duration and record a UTC timestamp.
|
|
22
|
+
"""
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
"""
|
|
25
|
+
Initializes the Timer instance.
|
|
26
|
+
"""
|
|
27
|
+
self.start: float = 0.0
|
|
28
|
+
self.end: float = 0.0
|
|
29
|
+
self.duration: float = 0.0
|
|
30
|
+
self.timestamp: str = ""
|
|
31
|
+
|
|
32
|
+
def __enter__(self) -> 'Timer':
|
|
33
|
+
"""
|
|
34
|
+
Starts the timer and records the current UTC timestamp.
|
|
35
|
+
Returns:
|
|
36
|
+
Timer: The timer instance.
|
|
37
|
+
"""
|
|
38
|
+
self.start = time.perf_counter()
|
|
39
|
+
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
40
|
+
return self
|
|
41
|
+
|
|
42
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Stops the timer and calculates the duration.
|
|
45
|
+
Args:
|
|
46
|
+
exc_type: The exception type if an error occurred.
|
|
47
|
+
exc_val: The exception value if an error occurred.
|
|
48
|
+
exc_tb: The traceback if an error occurred.
|
|
49
|
+
"""
|
|
50
|
+
self.end = time.perf_counter()
|
|
51
|
+
self.duration = self.end - self.start
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cognitor
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: Python SDK to extract relevant metrics from Small Language Model inference calls.
|
|
5
|
+
Author-email: Riccardo <riccardo@tanaos.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/riccardo/cognitor-py
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/riccardo/cognitor-py/issues
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: transformers
|
|
16
|
+
Requires-Dist: psutil
|
|
17
|
+
Requires-Dist: torch
|
|
18
|
+
Requires-Dist: pydantic
|
|
19
|
+
Requires-Dist: psycopg2-binary
|
|
20
|
+
Requires-Dist: sqlalchemy
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest; extra == "dev"
|
|
23
|
+
Requires-Dist: black; extra == "dev"
|
|
24
|
+
Requires-Dist: isort; extra == "dev"
|
|
25
|
+
Requires-Dist: build; extra == "dev"
|
|
26
|
+
Requires-Dist: twine; extra == "dev"
|
|
27
|
+
Requires-Dist: python-dotenv>=1.0.1; extra == "dev"
|
|
28
|
+
|
|
29
|
+
# cognitor-py
|
|
30
|
+
|
|
31
|
+
`cognitor-py` is a Python SDK that wraps `transformers` inference calls to extract useful metadata and performance metrics.
|
|
32
|
+
|
|
33
|
+
## Features
|
|
34
|
+
|
|
35
|
+
- **Model Information**: Automatically captures the model name.
|
|
36
|
+
- **Performance Metrics**: Tracks CPU and RAM usage during inference.
|
|
37
|
+
- **GPU Monitoring**: Captures peak GPU memory usage (if CUDA is available).
|
|
38
|
+
- **Token Counting**: Calculates input and output token counts for common pipeline tasks.
|
|
39
|
+
- **Latency Tracking**: Measures inference duration.
|
|
40
|
+
- **Error Handling**: Captures and reports errors during inference.
|
|
41
|
+
- **Flexible Logging Targets**: Automatically saves all inference logs to either a local PostgreSQL database or a local file (JSON lines).
|
|
42
|
+
- **Graceful Error Handling**: Ensures the program continues to run even if the database is unreachable.
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install cognitor-py
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
### Using the Inference Monitor
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from transformers import pipeline, AutoTokenizer
|
|
56
|
+
from cognitor import Cognitor
|
|
57
|
+
|
|
58
|
+
# Initialize your model and tokenizer
|
|
59
|
+
model_name = "gpt2"
|
|
60
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
61
|
+
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
|
|
62
|
+
|
|
63
|
+
# Initialize Cognitor with PostgreSQL configuration (default)
|
|
64
|
+
# Or use log_type="file" and log_path="logs.jsonl" for file logging
|
|
65
|
+
cognitor = Cognitor(
|
|
66
|
+
model_name=model_name,
|
|
67
|
+
tokenizer=tokenizer,
|
|
68
|
+
log_type="database", # or "file"
|
|
69
|
+
host="localhost",
|
|
70
|
+
port=5432,
|
|
71
|
+
user="postgres",
|
|
72
|
+
password="postgres",
|
|
73
|
+
dbname="cognitor"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Run inference within the monitor context
|
|
77
|
+
with cognitor.monitor() as m:
|
|
78
|
+
input_text = "Once upon a time,"
|
|
79
|
+
# Use track() to capture only the inference duration
|
|
80
|
+
with m.track():
|
|
81
|
+
output = pipe(input_text, max_length=50)
|
|
82
|
+
m.capture(input_data=input_text, output=output)
|
|
83
|
+
|
|
84
|
+
# The metadata is now available via the cognitor instance
|
|
85
|
+
metadata = cognitor.get_last_metadata()
|
|
86
|
+
print(output)
|
|
87
|
+
print(metadata)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Metadata Structure
|
|
91
|
+
|
|
92
|
+
The extracted metadata follows this structure:
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
{
|
|
96
|
+
"model_name": "gpt2",
|
|
97
|
+
"timestamp": "2026-04-01T14:34:14+0200",
|
|
98
|
+
"input_tokens": 5,
|
|
99
|
+
"output_tokens": 45,
|
|
100
|
+
"cpu_percent": 12.5,
|
|
101
|
+
"ram_usage_percent": 1.2,
|
|
102
|
+
"gpu_usage_percent": 5.5, # Optional
|
|
103
|
+
"duration": 0.45, # Inference-only duration
|
|
104
|
+
"input": "Once upon a time,",
|
|
105
|
+
"output": [...],
|
|
106
|
+
"error": None
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
MIT
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
cognitor/__init__.py
|
|
4
|
+
cognitor/database.py
|
|
5
|
+
cognitor/monitor.py
|
|
6
|
+
cognitor/service.py
|
|
7
|
+
cognitor/utils.py
|
|
8
|
+
cognitor.egg-info/PKG-INFO
|
|
9
|
+
cognitor.egg-info/SOURCES.txt
|
|
10
|
+
cognitor.egg-info/dependency_links.txt
|
|
11
|
+
cognitor.egg-info/requires.txt
|
|
12
|
+
cognitor.egg-info/top_level.txt
|
|
13
|
+
tests/test_integration.py
|
|
14
|
+
tests/test_monitor.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cognitor
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cognitor"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Python SDK to extract relevant metrics from Small Language Model inference calls."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Riccardo", email = "riccardo@tanaos.com" },
|
|
13
|
+
]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"transformers",
|
|
23
|
+
"psutil",
|
|
24
|
+
"torch",
|
|
25
|
+
"pydantic",
|
|
26
|
+
"psycopg2-binary",
|
|
27
|
+
"sqlalchemy",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
"Homepage" = "https://github.com/riccardo/cognitor-py"
|
|
32
|
+
"Bug Tracker" = "https://github.com/riccardo/cognitor-py/issues"
|
|
33
|
+
|
|
34
|
+
[tool.setuptools_scm]
|
|
35
|
+
version_file = "cognitor/__version__.py"
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
dev = [
|
|
39
|
+
"pytest",
|
|
40
|
+
"black",
|
|
41
|
+
"isort",
|
|
42
|
+
"build",
|
|
43
|
+
"twine",
|
|
44
|
+
"python-dotenv>=1.0.1",
|
|
45
|
+
]
|
cognitor-0.0.0/setup.cfg
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from transformers import pipeline, AutoTokenizer
|
|
3
|
+
from cognitor import Cognitor
|
|
4
|
+
from unittest.mock import patch
|
|
5
|
+
|
|
6
|
+
@patch("cognitor.monitor.DatabaseManager")
|
|
7
|
+
def test_text_classification_pipeline(mock_db_manager, tmp_path):
|
|
8
|
+
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
|
9
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
10
|
+
# Use 'text-classification' which is the standard name for sentiment-analysis
|
|
11
|
+
pipe = pipeline("text-classification", model=model_name, tokenizer=tokenizer)
|
|
12
|
+
|
|
13
|
+
cognitor = Cognitor(model_name=model_name, tokenizer=tokenizer)
|
|
14
|
+
|
|
15
|
+
input_text = "I love this library!"
|
|
16
|
+
with cognitor.monitor() as m:
|
|
17
|
+
with m.track():
|
|
18
|
+
output = pipe(input_text)
|
|
19
|
+
m.capture(input_data=input_text, output=output)
|
|
20
|
+
|
|
21
|
+
assert output is not None
|
|
22
|
+
metadata = cognitor.get_last_metadata()
|
|
23
|
+
assert metadata["model_name"] == model_name
|
|
24
|
+
assert metadata["input_tokens"] > 0
|
|
25
|
+
assert metadata["duration"] > 0
|
|
26
|
+
|
|
27
|
+
@patch("cognitor.monitor.DatabaseManager")
|
|
28
|
+
def test_text_generation_pipeline(mock_db_manager, tmp_path):
|
|
29
|
+
model_name = "gpt2"
|
|
30
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
31
|
+
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
|
|
32
|
+
|
|
33
|
+
cognitor = Cognitor(model_name=model_name, tokenizer=tokenizer)
|
|
34
|
+
|
|
35
|
+
input_text = "Once upon a time,"
|
|
36
|
+
with cognitor.monitor() as m:
|
|
37
|
+
with m.track():
|
|
38
|
+
output = pipe(input_text, max_length=10, do_sample=False)
|
|
39
|
+
m.capture(input_data=input_text, output=output)
|
|
40
|
+
|
|
41
|
+
assert output is not None
|
|
42
|
+
metadata = cognitor.get_last_metadata()
|
|
43
|
+
assert metadata["model_name"] == model_name
|
|
44
|
+
assert metadata["input_tokens"] > 0
|
|
45
|
+
assert metadata["output_tokens"] > 0
|
|
46
|
+
assert metadata["duration"] > 0
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from cognitor import Cognitor
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
@patch("cognitor.monitor.DatabaseManager")
|
|
6
|
+
def test_metrics_collection(mock_db_manager, tmp_path):
|
|
7
|
+
cognitor = Cognitor(model_name="test-model")
|
|
8
|
+
|
|
9
|
+
with cognitor.monitor() as m:
|
|
10
|
+
input_val = 5
|
|
11
|
+
with m.track():
|
|
12
|
+
output = input_val * 2
|
|
13
|
+
m.capture(input_data=input_val, output=output)
|
|
14
|
+
|
|
15
|
+
assert output == 10
|
|
16
|
+
metadata = cognitor.get_last_metadata()
|
|
17
|
+
assert metadata["model_name"] == "test-model"
|
|
18
|
+
assert "cpu_percent" in metadata
|
|
19
|
+
assert "ram_usage_percent" in metadata
|
|
20
|
+
assert "duration" in metadata
|
|
21
|
+
assert metadata["input"] == 5
|
|
22
|
+
|
|
23
|
+
@patch("cognitor.monitor.DatabaseManager")
|
|
24
|
+
def test_token_counting(mock_db_manager, tmp_path):
|
|
25
|
+
mock_tokenizer = MagicMock()
|
|
26
|
+
mock_tokenizer.encode.side_effect = lambda x: [1] * len(x.split())
|
|
27
|
+
|
|
28
|
+
cognitor = Cognitor(model_name="test-model", tokenizer=mock_tokenizer)
|
|
29
|
+
|
|
30
|
+
input_text = "This is a test input"
|
|
31
|
+
with cognitor.monitor() as m:
|
|
32
|
+
with m.track():
|
|
33
|
+
output = {"generated_text": "This is a test output"}
|
|
34
|
+
m.capture(input_data=input_text, output=output)
|
|
35
|
+
|
|
36
|
+
metadata = cognitor.get_last_metadata()
|
|
37
|
+
assert metadata["input_tokens"] == 5 # "This is a test input" -> 5 words
|
|
38
|
+
assert metadata["output_tokens"] == 5 # "This is a test output" -> 5 words
|
|
39
|
+
|
|
40
|
+
@patch("cognitor.monitor.DatabaseManager")
|
|
41
|
+
def test_error_handling(mock_db_manager, tmp_path):
|
|
42
|
+
cognitor = Cognitor(model_name="test-model")
|
|
43
|
+
|
|
44
|
+
with pytest.raises(ValueError):
|
|
45
|
+
with cognitor.monitor() as m:
|
|
46
|
+
with m.track():
|
|
47
|
+
m.capture(input_data=5, output=None)
|
|
48
|
+
raise ValueError("Inference failed")
|
|
49
|
+
|
|
50
|
+
metadata = cognitor.get_last_metadata()
|
|
51
|
+
assert metadata["error"] == "Inference failed"
|
|
52
|
+
|
|
53
|
+
def test_file_logging(tmp_path):
|
|
54
|
+
log_file = tmp_path / "test_logs.jsonl"
|
|
55
|
+
cognitor = Cognitor(model_name="test-model", log_type="file", log_path=str(log_file))
|
|
56
|
+
|
|
57
|
+
with cognitor.monitor() as m:
|
|
58
|
+
input_val = 5
|
|
59
|
+
with m.track():
|
|
60
|
+
output = input_val * 2
|
|
61
|
+
m.capture(input_data=input_val, output=output)
|
|
62
|
+
|
|
63
|
+
assert log_file.exists()
|
|
64
|
+
with open(log_file, "r") as f:
|
|
65
|
+
line = f.readline()
|
|
66
|
+
import json
|
|
67
|
+
data = json.loads(line)
|
|
68
|
+
assert data["model_name"] == "test-model"
|
|
69
|
+
assert data["input"] == 5
|
|
70
|
+
assert data["output"] == 10
|