cognitor 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,112 @@
1
+ Metadata-Version: 2.4
2
+ Name: cognitor
3
+ Version: 0.0.0
4
+ Summary: Python SDK to extract relevant metrics from Small Language Model inference calls.
5
+ Author-email: Riccardo <riccardo@tanaos.com>
6
+ Project-URL: Homepage, https://github.com/riccardo/cognitor-py
7
+ Project-URL: Bug Tracker, https://github.com/riccardo/cognitor-py/issues
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
12
+ Classifier: Operating System :: OS Independent
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: transformers
16
+ Requires-Dist: psutil
17
+ Requires-Dist: torch
18
+ Requires-Dist: pydantic
19
+ Requires-Dist: psycopg2-binary
20
+ Requires-Dist: sqlalchemy
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest; extra == "dev"
23
+ Requires-Dist: black; extra == "dev"
24
+ Requires-Dist: isort; extra == "dev"
25
+ Requires-Dist: build; extra == "dev"
26
+ Requires-Dist: twine; extra == "dev"
27
+ Requires-Dist: python-dotenv>=1.0.1; extra == "dev"
28
+
29
+ # cognitor-py
30
+
31
+ `cognitor-py` is a Python SDK that wraps `transformers` inference calls to extract useful metadata and performance metrics.
32
+
33
+ ## Features
34
+
35
+ - **Model Information**: Automatically captures the model name.
36
+ - **Performance Metrics**: Tracks CPU and RAM usage during inference.
37
+ - **GPU Monitoring**: Captures peak GPU memory usage (if CUDA is available).
38
+ - **Token Counting**: Calculates input and output token counts for common pipeline tasks.
39
+ - **Latency Tracking**: Measures inference duration.
40
+ - **Error Handling**: Captures and reports errors during inference.
41
+ - **Flexible Logging Targets**: Automatically saves all inference logs to either a local PostgreSQL database or a local file (JSON lines).
42
+ - **Graceful Error Handling**: Ensures the program continues to run even if the database is unreachable.
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install cognitor-py
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ ### Using the Inference Monitor
53
+
54
+ ```python
55
+ from transformers import pipeline, AutoTokenizer
56
+ from cognitor import Cognitor
57
+
58
+ # Initialize your model and tokenizer
59
+ model_name = "gpt2"
60
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
61
+ pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
62
+
63
+ # Initialize Cognitor with PostgreSQL configuration (default)
64
+ # Or use log_type="file" and log_path="logs.jsonl" for file logging
65
+ cognitor = Cognitor(
66
+ model_name=model_name,
67
+ tokenizer=tokenizer,
68
+ log_type="database", # or "file"
69
+ host="localhost",
70
+ port=5432,
71
+ user="postgres",
72
+ password="postgres",
73
+ dbname="cognitor"
74
+ )
75
+
76
+ # Run inference within the monitor context
77
+ with cognitor.monitor() as m:
78
+ input_text = "Once upon a time,"
79
+ # Use track() to capture only the inference duration
80
+ with m.track():
81
+ output = pipe(input_text, max_length=50)
82
+ m.capture(input_data=input_text, output=output)
83
+
84
+ # The metadata is now available via the cognitor instance
85
+ metadata = cognitor.get_last_metadata()
86
+ print(output)
87
+ print(metadata)
88
+ ```
89
+
90
+ ### Metadata Structure
91
+
92
+ The extracted metadata follows this structure:
93
+
94
+ ```python
95
+ {
96
+ "model_name": "gpt2",
97
+ "timestamp": "2026-04-01T14:34:14+0200",
98
+ "input_tokens": 5,
99
+ "output_tokens": 45,
100
+ "cpu_percent": 12.5,
101
+ "ram_usage_percent": 1.2,
102
+ "gpu_usage_percent": 5.5, # Optional
103
+ "duration": 0.45, # Inference-only duration
104
+ "input": "Once upon a time,",
105
+ "output": [...],
106
+ "error": None
107
+ }
108
+ ```
109
+
110
+ ## License
111
+
112
+ MIT
@@ -0,0 +1,84 @@
1
+ # cognitor-py
2
+
3
+ `cognitor-py` is a Python SDK that wraps `transformers` inference calls to extract useful metadata and performance metrics.
4
+
5
+ ## Features
6
+
7
+ - **Model Information**: Automatically captures the model name.
8
+ - **Performance Metrics**: Tracks CPU and RAM usage during inference.
9
+ - **GPU Monitoring**: Captures peak GPU memory usage (if CUDA is available).
10
+ - **Token Counting**: Calculates input and output token counts for common pipeline tasks.
11
+ - **Latency Tracking**: Measures inference duration.
12
+ - **Error Handling**: Captures and reports errors during inference.
13
+ - **Flexible Logging Targets**: Automatically saves all inference logs to either a local PostgreSQL database or a local file (JSON lines).
14
+ - **Graceful Error Handling**: Ensures the program continues to run even if the database is unreachable.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install cognitor-py
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ### Using the Inference Monitor
25
+
26
+ ```python
27
+ from transformers import pipeline, AutoTokenizer
28
+ from cognitor import Cognitor
29
+
30
+ # Initialize your model and tokenizer
31
+ model_name = "gpt2"
32
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
33
+ pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
34
+
35
+ # Initialize Cognitor with PostgreSQL configuration (default)
36
+ # Or use log_type="file" and log_path="logs.jsonl" for file logging
37
+ cognitor = Cognitor(
38
+ model_name=model_name,
39
+ tokenizer=tokenizer,
40
+ log_type="database", # or "file"
41
+ host="localhost",
42
+ port=5432,
43
+ user="postgres",
44
+ password="postgres",
45
+ dbname="cognitor"
46
+ )
47
+
48
+ # Run inference within the monitor context
49
+ with cognitor.monitor() as m:
50
+ input_text = "Once upon a time,"
51
+ # Use track() to capture only the inference duration
52
+ with m.track():
53
+ output = pipe(input_text, max_length=50)
54
+ m.capture(input_data=input_text, output=output)
55
+
56
+ # The metadata is now available via the cognitor instance
57
+ metadata = cognitor.get_last_metadata()
58
+ print(output)
59
+ print(metadata)
60
+ ```
61
+
62
+ ### Metadata Structure
63
+
64
+ The extracted metadata follows this structure:
65
+
66
+ ```python
67
+ {
68
+ "model_name": "gpt2",
69
+ "timestamp": "2026-04-01T14:34:14+0200",
70
+ "input_tokens": 5,
71
+ "output_tokens": 45,
72
+ "cpu_percent": 12.5,
73
+ "ram_usage_percent": 1.2,
74
+ "gpu_usage_percent": 5.5, # Optional
75
+ "duration": 0.45, # Inference-only duration
76
+ "input": "Once upon a time,",
77
+ "output": [...],
78
+ "error": None
79
+ }
80
+ ```
81
+
82
+ ## License
83
+
84
+ MIT
@@ -0,0 +1,3 @@
1
+ from .monitor import Cognitor, InferenceMetadata
2
+
3
+ __all__ = ["Cognitor", "InferenceMetadata"]
@@ -0,0 +1,58 @@
1
+ import json
2
+ import logging
3
+ from typing import Any, Dict, Optional
4
+ from sqlalchemy import create_engine, Column, Integer, String, Float, Text
5
+ from sqlalchemy.orm import sessionmaker, declarative_base
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ Base = declarative_base()
10
+
11
+ class InferenceLog(Base):
12
+ __tablename__ = 'inference_logs'
13
+
14
+ id = Column(Integer, primary_key=True, autoincrement=True)
15
+ model_name = Column(String)
16
+ timestamp = Column(String)
17
+ input_tokens = Column(Integer)
18
+ output_tokens = Column(Integer)
19
+ cpu_percent = Column(Float)
20
+ ram_usage_percent = Column(Float)
21
+ gpu_usage_percent = Column(Float, nullable=True)
22
+ duration = Column(Float)
23
+ input = Column(Text)
24
+ output = Column(Text)
25
+ error = Column(Text, nullable=True)
26
+ extra = Column(Text)
27
+
28
+ class DatabaseManager:
29
+ """
30
+ Manages the database connection and schema initialization using SQLAlchemy.
31
+ """
32
+ def __init__(self, host: str = "localhost", port: int = 5432, user: str = "postgres", password: str = "postgres", dbname: str = "cognitor") -> None:
33
+ """
34
+ Initializes the DatabaseManager with connection parameters.
35
+ Args:
36
+ host (str): The database host address.
37
+ port (int): The database port number.
38
+ user (str): The database username.
39
+ password (str): The database password.
40
+ dbname (str): The name of the database.
41
+ """
42
+ self.db_url: str = f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
43
+ self.engine: Any = create_engine(self.db_url)
44
+ self.Session: Any = sessionmaker(bind=self.engine)
45
+ self._initialized: bool = False
46
+
47
+ def init_db(self) -> None:
48
+ """
49
+ Initializes the database schema by creating all defined tables.
50
+ Exits gracefully if the database is unreachable.
51
+ """
52
+ try:
53
+ Base.metadata.create_all(self.engine)
54
+ self._initialized = True
55
+ except Exception as e:
56
+ print(f"CRITICAL: Database unreachable at {self.engine.url}. Error: {e}")
57
+ print("Exiting gracefully...")
58
+ exit(0)
@@ -0,0 +1,238 @@
1
+ import time
2
+ from typing import Any, Dict, Optional, Union, List
3
+ from pydantic import BaseModel
4
+ from .utils import get_resource_usage, Timer
5
+ from .database import DatabaseManager
6
+ from .service import LoggingService
7
+ import torch
8
+
9
+ class InferenceMetadata(BaseModel):
10
+ model_name: str
11
+ timestamp: str
12
+ input_tokens: int
13
+ output_tokens: int
14
+ cpu_percent: float
15
+ ram_usage_percent: float
16
+ gpu_usage_percent: Optional[float] = None
17
+ duration: float
18
+ input: Any
19
+ output: Any
20
+ error: Optional[str] = None
21
+ extra: Dict[str, Any] = {}
22
+
23
+ class InferenceMonitor:
24
+ """
25
+ A context manager for monitoring a single inference call, capturing metrics and metadata.
26
+ """
27
+ def __init__(self, cognitor: 'Cognitor') -> None:
28
+ """
29
+ Initializes the InferenceMonitor with a Cognitor instance.
30
+ Args:
31
+ cognitor (Cognitor): The parent Cognitor instance.
32
+ """
33
+ self.cognitor: 'Cognitor' = cognitor
34
+ self.input: Any = None
35
+ self.output: Any = None
36
+ self.error: Optional[str] = None
37
+ self.start_usage: Optional[Dict[str, float]] = None
38
+ self.full_timer: Timer = Timer()
39
+ self.inference_timer: Timer = Timer()
40
+ self.metadata: Optional[InferenceMetadata] = None
41
+
42
+ def __enter__(self) -> 'InferenceMonitor':
43
+ """
44
+ Enters the monitoring context, recording initial resource usage and starting the timer.
45
+ Returns:
46
+ InferenceMonitor: The monitor instance.
47
+ """
48
+ self.start_usage = get_resource_usage()
49
+ self.full_timer.__enter__()
50
+ return self
51
+
52
+ def track(self) -> Timer:
53
+ """
54
+ Returns a context manager to track the specific inference duration.
55
+ Returns:
56
+ Timer: The inference timer context manager.
57
+ """
58
+ return self.inference_timer
59
+
60
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
61
+ """
62
+ Exits the monitoring context, generating metadata and logging it to the configured target.
63
+ Args:
64
+ exc_type: The exception type if an error occurred.
65
+ exc_val: The exception value if an error occurred.
66
+ exc_tb: The traceback if an error occurred.
67
+ """
68
+ self.full_timer.__exit__(exc_type, exc_val, exc_tb)
69
+ if exc_type:
70
+ self.error = str(exc_val)
71
+
72
+ self.metadata = self.cognitor._generate_metadata(
73
+ input_data=self.input,
74
+ output=self.output,
75
+ error=self.error,
76
+ full_timer=self.full_timer,
77
+ inference_timer=self.inference_timer,
78
+ start_usage=self.start_usage if self.start_usage else {}
79
+ )
80
+ self.cognitor._last_metadata = self.metadata
81
+
82
+ # Log to database via LoggingService
83
+ if self.cognitor.logging_service:
84
+ self.cognitor.logging_service.log_inference(self.metadata.model_dump())
85
+
86
+ def capture(self, output: Any, input_data: Any = None) -> None:
87
+ """
88
+ Captures the output and optionally the input of the inference.
89
+ Args:
90
+ output (Any): The inference output data.
91
+ input_data (Any, optional): The inference input data. Defaults to None.
92
+ """
93
+ if input_data is not None:
94
+ self.input = input_data
95
+ self.output = output
96
+
97
+ class Cognitor:
98
+ """
99
+ The main SDK class for monitoring SLM inference calls.
100
+ """
101
+ def __init__(self, model_name: str, tokenizer: Any = None, log_type: str = "database", log_path: Optional[str] = None, host: str = "localhost", port: int = 5432, user: str = "postgres", password: str = "postgres", dbname: str = "cognitor") -> None:
102
+ """
103
+ Initializes the Cognitor instance with model, tokenizer, and logging configuration.
104
+ Args:
105
+ model_name (str): The name of the model being monitored.
106
+ tokenizer (Any, optional): The tokenizer for token counting. Defaults to None.
107
+ log_type (str): The logging target ('database' or 'file'). Defaults to "database".
108
+ log_path (Optional[str]): The path for file logging. Defaults to None.
109
+ host (str): The database host. Defaults to "localhost".
110
+ port (int): The database port. Defaults to 5432.
111
+ user (str): The database user. Defaults to "postgres".
112
+ password (str): The database password. Defaults to "postgres".
113
+ dbname (str): The database name. Defaults to "cognitor".
114
+ """
115
+ self.model_name: str = model_name
116
+ self.tokenizer: Any = tokenizer
117
+ self._last_metadata: Optional[InferenceMetadata] = None
118
+
119
+ self.db_manager: Optional[DatabaseManager] = None
120
+ self.logging_service: Optional[LoggingService] = None
121
+
122
+ if log_type == "database":
123
+ self.db_manager = DatabaseManager(host=host, port=port, user=user, password=password, dbname=dbname)
124
+ self.db_manager.init_db()
125
+ self.logging_service = LoggingService(db_manager=self.db_manager)
126
+ elif log_type == "file":
127
+ if not log_path:
128
+ log_path = f"{model_name}_logs.jsonl"
129
+ self.logging_service = LoggingService(log_file=log_path)
130
+ else:
131
+ raise ValueError(f"Invalid log_type: {log_type}. Must be 'database' or 'file'.")
132
+
133
+ def monitor(self) -> InferenceMonitor:
134
+ """
135
+ Returns a context manager to monitor an inference call.
136
+ Returns:
137
+ InferenceMonitor: The inference monitor context manager.
138
+ """
139
+ return InferenceMonitor(self)
140
+
141
+ def get_last_metadata(self) -> Optional[Dict[str, Any]]:
142
+ """
143
+ Returns the metadata from the last monitored inference.
144
+ Returns:
145
+ Optional[Dict[str, Any]]: The metadata dictionary, or None if no inference has been monitored.
146
+ """
147
+ return self._last_metadata.model_dump() if self._last_metadata else None
148
+
149
+ def monitor_inference(self, func: Any, *args: Any, **kwargs: Any) -> Dict[str, Any]:
150
+ """
151
+ Monitors a single inference call using the context manager.
152
+ Args:
153
+ func (Callable): The inference function to monitor.
154
+ *args: Positional arguments for the inference function.
155
+ **kwargs: Keyword arguments for the inference function.
156
+ Returns:
157
+ Dict[str, Any]: A dictionary containing the 'output' and 'metadata'.
158
+ """
159
+ input_data: Any = args[0] if args else kwargs
160
+ with self.monitor() as m:
161
+ try:
162
+ with m.track():
163
+ output = func(*args, **kwargs)
164
+ m.capture(input_data=input_data, output=output)
165
+ return {"output": output, "metadata": m.metadata.model_dump() if m.metadata else {}}
166
+ except Exception as e:
167
+ raise e
168
+
169
+ def _generate_metadata(self, input_data: Any, output: Any, error: Optional[str], full_timer: Timer, inference_timer: Timer, start_usage: Dict[str, float]) -> InferenceMetadata:
170
+ """
171
+ Internal method to generate InferenceMetadata from collected metrics.
172
+ Args:
173
+ input_data (Any): The inference input data.
174
+ output (Any): The inference output data.
175
+ error (Optional[str]): The error message if an error occurred.
176
+ full_timer (Timer): The timer for the full context duration.
177
+ inference_timer (Timer): The timer for the specific inference duration.
178
+ start_usage (Dict[str, float]): The initial resource usage metrics.
179
+ Returns:
180
+ InferenceMetadata: The generated metadata object.
181
+ """
182
+ end_usage: Dict[str, float] = get_resource_usage()
183
+
184
+ # Extract token counts if tokenizer is available
185
+ input_tokens: int = 0
186
+ output_tokens: int = 0
187
+
188
+ if self.tokenizer:
189
+ if isinstance(input_data, str):
190
+ input_tokens = len(self.tokenizer.encode(input_data))
191
+ elif isinstance(input_data, list):
192
+ if all(isinstance(i, str) for i in input_data):
193
+ input_tokens = sum(len(self.tokenizer.encode(i)) for i in input_data)
194
+ elif all(isinstance(i, dict) for i in input_data):
195
+ # Handle chat-like inputs
196
+ for msg in input_data:
197
+ if isinstance(msg, dict) and "content" in msg:
198
+ input_tokens += len(self.tokenizer.encode(msg["content"]))
199
+
200
+ if output:
201
+ if isinstance(output, list):
202
+ for item in output:
203
+ if isinstance(item, dict):
204
+ if "generated_text" in item:
205
+ output_tokens += len(self.tokenizer.encode(item["generated_text"]))
206
+ elif "summary_text" in item:
207
+ output_tokens += len(self.tokenizer.encode(item["summary_text"]))
208
+ elif "translation_text" in item:
209
+ output_tokens += len(self.tokenizer.encode(item["translation_text"]))
210
+ elif isinstance(item, str):
211
+ output_tokens += len(self.tokenizer.encode(item))
212
+ elif isinstance(output, dict):
213
+ if "generated_text" in output:
214
+ output_tokens = len(self.tokenizer.encode(output["generated_text"]))
215
+ elif isinstance(output, str):
216
+ output_tokens = len(self.tokenizer.encode(output))
217
+
218
+ gpu_usage_percent: Optional[float] = None
219
+ if torch.cuda.is_available():
220
+ device = torch.cuda.current_device()
221
+ total_mem = torch.cuda.get_device_properties(device).total_memory
222
+ peak_mem = torch.cuda.max_memory_allocated(device)
223
+ gpu_usage_percent = (peak_mem / total_mem) * 100
224
+ torch.cuda.reset_peak_memory_stats(device)
225
+
226
+ return InferenceMetadata(
227
+ model_name=self.model_name,
228
+ timestamp=full_timer.timestamp,
229
+ input_tokens=input_tokens,
230
+ output_tokens=output_tokens,
231
+ cpu_percent=end_usage["cpu_percent"],
232
+ ram_usage_percent=end_usage["ram_usage_percent"],
233
+ gpu_usage_percent=gpu_usage_percent,
234
+ duration=inference_timer.duration,
235
+ input=input_data,
236
+ output=output,
237
+ error=error
238
+ )
@@ -0,0 +1,80 @@
1
+ import json
2
+ import logging
3
+ from typing import Any, Dict, Optional
4
+ from .database import DatabaseManager, InferenceLog
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class LoggingService:
9
+ """
10
+ Service responsible for logging inference metadata to either a database or a local file.
11
+ """
12
+ def __init__(self, db_manager: Optional[DatabaseManager] = None, log_file: Optional[str] = None) -> None:
13
+ """
14
+ Initializes the LoggingService with a DatabaseManager and/or a log file path.
15
+ Args:
16
+ db_manager (Optional[DatabaseManager]): The database manager instance for DB logging.
17
+ log_file (Optional[str]): The path to the local file for file-based logging.
18
+ """
19
+ self.db_manager: Optional[DatabaseManager] = db_manager
20
+ self.log_file: Optional[str] = log_file
21
+
22
+ def log_inference(self, metadata: Dict[str, Any]) -> None:
23
+ """
24
+ Saves inference metadata to the configured target (database or file).
25
+ Args:
26
+ metadata (Dict[str, Any]): The inference metadata to be logged.
27
+ """
28
+ if self.db_manager and self.db_manager._initialized:
29
+ self._log_to_db(metadata)
30
+
31
+ if self.log_file:
32
+ self._log_to_file(metadata)
33
+
34
+ def _log_to_db(self, metadata: Dict[str, Any]) -> None:
35
+ """
36
+ Internal method to save inference metadata to the database using SQLAlchemy.
37
+ Args:
38
+ metadata (Dict[str, Any]): The inference metadata to be logged.
39
+ """
40
+ if not self.db_manager:
41
+ return
42
+
43
+ session: Any = self.db_manager.Session()
44
+ try:
45
+ log_entry = InferenceLog(
46
+ model_name=metadata.get("model_name"),
47
+ timestamp=metadata.get("timestamp"),
48
+ input_tokens=metadata.get("input_tokens"),
49
+ output_tokens=metadata.get("output_tokens"),
50
+ cpu_percent=metadata.get("cpu_percent"),
51
+ ram_usage_percent=metadata.get("ram_usage_percent"),
52
+ gpu_usage_percent=metadata.get("gpu_usage_percent"),
53
+ duration=metadata.get("duration"),
54
+ input=json.dumps(metadata.get("input")),
55
+ output=json.dumps(metadata.get("output")),
56
+ error=metadata.get("error"),
57
+ extra=json.dumps(metadata.get("extra", {}))
58
+ )
59
+ session.add(log_entry)
60
+ session.commit()
61
+ except Exception as e:
62
+ session.rollback()
63
+ print(f"WARNING: Failed to log to database: {e}")
64
+ finally:
65
+ session.close()
66
+
67
+ def _log_to_file(self, metadata: Dict[str, Any]) -> None:
68
+ """
69
+ Internal method to save inference metadata to a local file in JSON lines format.
70
+ Args:
71
+ metadata (Dict[str, Any]): The inference metadata to be logged.
72
+ """
73
+ if not self.log_file:
74
+ return
75
+
76
+ try:
77
+ with open(self.log_file, "a") as f:
78
+ f.write(json.dumps(metadata) + "\n")
79
+ except Exception as e:
80
+ print(f"WARNING: Failed to log to file {self.log_file}: {e}")
@@ -0,0 +1,51 @@
1
+ import psutil
2
+ import time
3
+ import os
4
+ from datetime import datetime, timezone
5
+ from typing import Dict, Any
6
+
7
+ def get_resource_usage() -> Dict[str, float]:
8
+ """
9
+ Returns current CPU and RAM usage percentages.
10
+ Returns:
11
+ Dict[str, float]: A dictionary containing 'cpu_percent' and 'ram_usage_percent'.
12
+ """
13
+ process = psutil.Process(os.getpid())
14
+ return {
15
+ "cpu_percent": psutil.cpu_percent(interval=None),
16
+ "ram_usage_percent": process.memory_percent()
17
+ }
18
+
19
+ class Timer:
20
+ """
21
+ A context manager to measure duration and record a UTC timestamp.
22
+ """
23
+ def __init__(self) -> None:
24
+ """
25
+ Initializes the Timer instance.
26
+ """
27
+ self.start: float = 0.0
28
+ self.end: float = 0.0
29
+ self.duration: float = 0.0
30
+ self.timestamp: str = ""
31
+
32
+ def __enter__(self) -> 'Timer':
33
+ """
34
+ Starts the timer and records the current UTC timestamp.
35
+ Returns:
36
+ Timer: The timer instance.
37
+ """
38
+ self.start = time.perf_counter()
39
+ self.timestamp = datetime.now(timezone.utc).isoformat()
40
+ return self
41
+
42
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
43
+ """
44
+ Stops the timer and calculates the duration.
45
+ Args:
46
+ exc_type: The exception type if an error occurred.
47
+ exc_val: The exception value if an error occurred.
48
+ exc_tb: The traceback if an error occurred.
49
+ """
50
+ self.end = time.perf_counter()
51
+ self.duration = self.end - self.start
@@ -0,0 +1,112 @@
1
+ Metadata-Version: 2.4
2
+ Name: cognitor
3
+ Version: 0.0.0
4
+ Summary: Python SDK to extract relevant metrics from Small Language Model inference calls.
5
+ Author-email: Riccardo <riccardo@tanaos.com>
6
+ Project-URL: Homepage, https://github.com/riccardo/cognitor-py
7
+ Project-URL: Bug Tracker, https://github.com/riccardo/cognitor-py/issues
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
12
+ Classifier: Operating System :: OS Independent
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: transformers
16
+ Requires-Dist: psutil
17
+ Requires-Dist: torch
18
+ Requires-Dist: pydantic
19
+ Requires-Dist: psycopg2-binary
20
+ Requires-Dist: sqlalchemy
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest; extra == "dev"
23
+ Requires-Dist: black; extra == "dev"
24
+ Requires-Dist: isort; extra == "dev"
25
+ Requires-Dist: build; extra == "dev"
26
+ Requires-Dist: twine; extra == "dev"
27
+ Requires-Dist: python-dotenv>=1.0.1; extra == "dev"
28
+
29
+ # cognitor-py
30
+
31
+ `cognitor-py` is a Python SDK that wraps `transformers` inference calls to extract useful metadata and performance metrics.
32
+
33
+ ## Features
34
+
35
+ - **Model Information**: Automatically captures the model name.
36
+ - **Performance Metrics**: Tracks CPU and RAM usage during inference.
37
+ - **GPU Monitoring**: Captures peak GPU memory usage (if CUDA is available).
38
+ - **Token Counting**: Calculates input and output token counts for common pipeline tasks.
39
+ - **Latency Tracking**: Measures inference duration.
40
+ - **Error Handling**: Captures and reports errors during inference.
41
+ - **Flexible Logging Targets**: Automatically saves all inference logs to either a local PostgreSQL database or a local file (JSON lines).
42
+ - **Graceful Error Handling**: Ensures the program continues to run even if the database is unreachable.
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install cognitor-py
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ ### Using the Inference Monitor
53
+
54
+ ```python
55
+ from transformers import pipeline, AutoTokenizer
56
+ from cognitor import Cognitor
57
+
58
+ # Initialize your model and tokenizer
59
+ model_name = "gpt2"
60
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
61
+ pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
62
+
63
+ # Initialize Cognitor with PostgreSQL configuration (default)
64
+ # Or use log_type="file" and log_path="logs.jsonl" for file logging
65
+ cognitor = Cognitor(
66
+ model_name=model_name,
67
+ tokenizer=tokenizer,
68
+ log_type="database", # or "file"
69
+ host="localhost",
70
+ port=5432,
71
+ user="postgres",
72
+ password="postgres",
73
+ dbname="cognitor"
74
+ )
75
+
76
+ # Run inference within the monitor context
77
+ with cognitor.monitor() as m:
78
+ input_text = "Once upon a time,"
79
+ # Use track() to capture only the inference duration
80
+ with m.track():
81
+ output = pipe(input_text, max_length=50)
82
+ m.capture(input_data=input_text, output=output)
83
+
84
+ # The metadata is now available via the cognitor instance
85
+ metadata = cognitor.get_last_metadata()
86
+ print(output)
87
+ print(metadata)
88
+ ```
89
+
90
+ ### Metadata Structure
91
+
92
+ The extracted metadata follows this structure:
93
+
94
+ ```python
95
+ {
96
+ "model_name": "gpt2",
97
+ "timestamp": "2026-04-01T14:34:14+0200",
98
+ "input_tokens": 5,
99
+ "output_tokens": 45,
100
+ "cpu_percent": 12.5,
101
+ "ram_usage_percent": 1.2,
102
+ "gpu_usage_percent": 5.5, # Optional
103
+ "duration": 0.45, # Inference-only duration
104
+ "input": "Once upon a time,",
105
+ "output": [...],
106
+ "error": None
107
+ }
108
+ ```
109
+
110
+ ## License
111
+
112
+ MIT
@@ -0,0 +1,14 @@
1
+ README.md
2
+ pyproject.toml
3
+ cognitor/__init__.py
4
+ cognitor/database.py
5
+ cognitor/monitor.py
6
+ cognitor/service.py
7
+ cognitor/utils.py
8
+ cognitor.egg-info/PKG-INFO
9
+ cognitor.egg-info/SOURCES.txt
10
+ cognitor.egg-info/dependency_links.txt
11
+ cognitor.egg-info/requires.txt
12
+ cognitor.egg-info/top_level.txt
13
+ tests/test_integration.py
14
+ tests/test_monitor.py
@@ -0,0 +1,14 @@
1
+ transformers
2
+ psutil
3
+ torch
4
+ pydantic
5
+ psycopg2-binary
6
+ sqlalchemy
7
+
8
+ [dev]
9
+ pytest
10
+ black
11
+ isort
12
+ build
13
+ twine
14
+ python-dotenv>=1.0.1
@@ -0,0 +1 @@
1
+ cognitor
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "cognitor"
7
+ dynamic = ["version"]
8
+ description = "Python SDK to extract relevant metrics from Small Language Model inference calls."
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ authors = [
12
+ { name = "Riccardo", email = "riccardo@tanaos.com" },
13
+ ]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Topic :: Software Development :: Libraries :: Python Modules",
19
+ "Operating System :: OS Independent",
20
+ ]
21
+ dependencies = [
22
+ "transformers",
23
+ "psutil",
24
+ "torch",
25
+ "pydantic",
26
+ "psycopg2-binary",
27
+ "sqlalchemy",
28
+ ]
29
+
30
+ [project.urls]
31
+ "Homepage" = "https://github.com/riccardo/cognitor-py"
32
+ "Bug Tracker" = "https://github.com/riccardo/cognitor-py/issues"
33
+
34
+ [tool.setuptools_scm]
35
+ version_file = "cognitor/__version__.py"
36
+
37
+ [project.optional-dependencies]
38
+ dev = [
39
+ "pytest",
40
+ "black",
41
+ "isort",
42
+ "build",
43
+ "twine",
44
+ "python-dotenv>=1.0.1",
45
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,46 @@
1
+ import pytest
2
+ from transformers import pipeline, AutoTokenizer
3
+ from cognitor import Cognitor
4
+ from unittest.mock import patch
5
+
6
+ @patch("cognitor.monitor.DatabaseManager")
7
+ def test_text_classification_pipeline(mock_db_manager, tmp_path):
8
+ model_name = "distilbert-base-uncased-finetuned-sst-2-english"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ # Use 'text-classification' which is the standard name for sentiment-analysis
11
+ pipe = pipeline("text-classification", model=model_name, tokenizer=tokenizer)
12
+
13
+ cognitor = Cognitor(model_name=model_name, tokenizer=tokenizer)
14
+
15
+ input_text = "I love this library!"
16
+ with cognitor.monitor() as m:
17
+ with m.track():
18
+ output = pipe(input_text)
19
+ m.capture(input_data=input_text, output=output)
20
+
21
+ assert output is not None
22
+ metadata = cognitor.get_last_metadata()
23
+ assert metadata["model_name"] == model_name
24
+ assert metadata["input_tokens"] > 0
25
+ assert metadata["duration"] > 0
26
+
27
+ @patch("cognitor.monitor.DatabaseManager")
28
+ def test_text_generation_pipeline(mock_db_manager, tmp_path):
29
+ model_name = "gpt2"
30
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
31
+ pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer)
32
+
33
+ cognitor = Cognitor(model_name=model_name, tokenizer=tokenizer)
34
+
35
+ input_text = "Once upon a time,"
36
+ with cognitor.monitor() as m:
37
+ with m.track():
38
+ output = pipe(input_text, max_length=10, do_sample=False)
39
+ m.capture(input_data=input_text, output=output)
40
+
41
+ assert output is not None
42
+ metadata = cognitor.get_last_metadata()
43
+ assert metadata["model_name"] == model_name
44
+ assert metadata["input_tokens"] > 0
45
+ assert metadata["output_tokens"] > 0
46
+ assert metadata["duration"] > 0
@@ -0,0 +1,70 @@
1
+ import pytest
2
+ from cognitor import Cognitor
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ @patch("cognitor.monitor.DatabaseManager")
6
+ def test_metrics_collection(mock_db_manager, tmp_path):
7
+ cognitor = Cognitor(model_name="test-model")
8
+
9
+ with cognitor.monitor() as m:
10
+ input_val = 5
11
+ with m.track():
12
+ output = input_val * 2
13
+ m.capture(input_data=input_val, output=output)
14
+
15
+ assert output == 10
16
+ metadata = cognitor.get_last_metadata()
17
+ assert metadata["model_name"] == "test-model"
18
+ assert "cpu_percent" in metadata
19
+ assert "ram_usage_percent" in metadata
20
+ assert "duration" in metadata
21
+ assert metadata["input"] == 5
22
+
23
+ @patch("cognitor.monitor.DatabaseManager")
24
+ def test_token_counting(mock_db_manager, tmp_path):
25
+ mock_tokenizer = MagicMock()
26
+ mock_tokenizer.encode.side_effect = lambda x: [1] * len(x.split())
27
+
28
+ cognitor = Cognitor(model_name="test-model", tokenizer=mock_tokenizer)
29
+
30
+ input_text = "This is a test input"
31
+ with cognitor.monitor() as m:
32
+ with m.track():
33
+ output = {"generated_text": "This is a test output"}
34
+ m.capture(input_data=input_text, output=output)
35
+
36
+ metadata = cognitor.get_last_metadata()
37
+ assert metadata["input_tokens"] == 5 # "This is a test input" -> 5 words
38
+ assert metadata["output_tokens"] == 5 # "This is a test output" -> 5 words
39
+
40
+ @patch("cognitor.monitor.DatabaseManager")
41
+ def test_error_handling(mock_db_manager, tmp_path):
42
+ cognitor = Cognitor(model_name="test-model")
43
+
44
+ with pytest.raises(ValueError):
45
+ with cognitor.monitor() as m:
46
+ with m.track():
47
+ m.capture(input_data=5, output=None)
48
+ raise ValueError("Inference failed")
49
+
50
+ metadata = cognitor.get_last_metadata()
51
+ assert metadata["error"] == "Inference failed"
52
+
53
+ def test_file_logging(tmp_path):
54
+ log_file = tmp_path / "test_logs.jsonl"
55
+ cognitor = Cognitor(model_name="test-model", log_type="file", log_path=str(log_file))
56
+
57
+ with cognitor.monitor() as m:
58
+ input_val = 5
59
+ with m.track():
60
+ output = input_val * 2
61
+ m.capture(input_data=input_val, output=output)
62
+
63
+ assert log_file.exists()
64
+ with open(log_file, "r") as f:
65
+ line = f.readline()
66
+ import json
67
+ data = json.loads(line)
68
+ assert data["model_name"] == "test-model"
69
+ assert data["input"] == 5
70
+ assert data["output"] == 10