ragbandit-core 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragbandit/__init__.py +26 -0
- ragbandit/config/__init__.py +3 -0
- ragbandit/config/llms.py +34 -0
- ragbandit/config/pricing.py +38 -0
- ragbandit/documents/__init__.py +66 -0
- ragbandit/documents/chunkers/__init__.py +18 -0
- ragbandit/documents/chunkers/base_chunker.py +201 -0
- ragbandit/documents/chunkers/fixed_size_chunker.py +174 -0
- ragbandit/documents/chunkers/semantic_chunker.py +205 -0
- ragbandit/documents/document_pipeline.py +350 -0
- ragbandit/documents/embedders/__init__.py +14 -0
- ragbandit/documents/embedders/base_embedder.py +82 -0
- ragbandit/documents/embedders/mistral_embedder.py +129 -0
- ragbandit/documents/ocr/__init__.py +13 -0
- ragbandit/documents/ocr/base_ocr.py +136 -0
- ragbandit/documents/ocr/mistral_ocr.py +147 -0
- ragbandit/documents/processors/__init__.py +16 -0
- ragbandit/documents/processors/base_processor.py +88 -0
- ragbandit/documents/processors/footnotes_processor.py +353 -0
- ragbandit/documents/processors/references_processor.py +408 -0
- ragbandit/documents/utils/__init__.py +11 -0
- ragbandit/documents/utils/secure_file_handler.py +95 -0
- ragbandit/prompt_tools/__init__.py +27 -0
- ragbandit/prompt_tools/footnotes_processor_tools.py +195 -0
- ragbandit/prompt_tools/prompt_tool.py +118 -0
- ragbandit/prompt_tools/references_processor_tools.py +31 -0
- ragbandit/prompt_tools/semantic_chunker_tools.py +56 -0
- ragbandit/schema.py +206 -0
- ragbandit/utils/__init__.py +19 -0
- ragbandit/utils/in_memory_log_handler.py +33 -0
- ragbandit/utils/llm_utils.py +188 -0
- ragbandit/utils/mistral_client.py +76 -0
- ragbandit/utils/token_usage_tracker.py +220 -0
- ragbandit_core-0.1.1.dist-info/METADATA +145 -0
- ragbandit_core-0.1.1.dist-info/RECORD +38 -0
- ragbandit_core-0.1.1.dist-info/WHEEL +5 -0
- ragbandit_core-0.1.1.dist-info/licenses/LICENSE.md +9 -0
- ragbandit_core-0.1.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for interacting with LLM services.
|
|
3
|
+
|
|
4
|
+
This module provides standardized ways to make LLM requests with
|
|
5
|
+
consistent error handling, retries, and response parsing.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import time
|
|
10
|
+
import logging
|
|
11
|
+
import requests
|
|
12
|
+
from typing import Type, TypeVar
|
|
13
|
+
from pydantic import BaseModel
|
|
14
|
+
from ragbandit.utils.mistral_client import mistral_client_manager
|
|
15
|
+
from ragbandit.config.llms import (
|
|
16
|
+
DEFAULT_MODEL,
|
|
17
|
+
DEFAULT_TEMPERATURE,
|
|
18
|
+
DEFAULT_MAX_RETRIES,
|
|
19
|
+
DEFAULT_RETRY_DELAY,
|
|
20
|
+
DEFAULT_BACKOFF_FACTOR,
|
|
21
|
+
)
|
|
22
|
+
from ragbandit.utils.token_usage_tracker import TokenUsageTracker, count_tokens
|
|
23
|
+
|
|
24
|
+
# Configure logger
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
# Type variable for Pydantic model return types
|
|
28
|
+
T = TypeVar("T", bound=BaseModel)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def query_llm(
|
|
32
|
+
prompt: str,
|
|
33
|
+
output_schema: Type[T],
|
|
34
|
+
api_key: str,
|
|
35
|
+
usage_tracker: TokenUsageTracker | None = None,
|
|
36
|
+
model: str = DEFAULT_MODEL,
|
|
37
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
38
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
39
|
+
retry_delay: float = DEFAULT_RETRY_DELAY,
|
|
40
|
+
backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
|
|
41
|
+
track_usage: bool = True,
|
|
42
|
+
) -> T:
|
|
43
|
+
"""
|
|
44
|
+
Send a query to the LLM with standardized formatting and retry logic.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
prompt: The prompt to send to the LLM
|
|
48
|
+
output_schema: Pydantic model class for response validation and parsing
|
|
49
|
+
api_key: API key to use for the request
|
|
50
|
+
usage_tracker: Optional custom token usage tracker for
|
|
51
|
+
document-specific tracking.
|
|
52
|
+
If None, no tracking will be performed even
|
|
53
|
+
if track_usage is True.
|
|
54
|
+
model: Model name to use for the request
|
|
55
|
+
temperature: Sampling temperature (0 = deterministic)
|
|
56
|
+
max_retries: Maximum number of retry attempts
|
|
57
|
+
retry_delay: Initial delay between retries in seconds
|
|
58
|
+
backoff_factor: Multiplier for delay on each retry attempt
|
|
59
|
+
track_usage: Whether to track token usage and costs
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Validated instance of the output_schema model
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
ValueError: If response cannot be parsed according to schema
|
|
66
|
+
RuntimeError: If all retry attempts fail
|
|
67
|
+
"""
|
|
68
|
+
retry_count = 0
|
|
69
|
+
current_delay = retry_delay
|
|
70
|
+
|
|
71
|
+
# Only track usage if both conditions are met:
|
|
72
|
+
# 1. User wants to track usage (track_usage=True)
|
|
73
|
+
# 2. We have a tracker to use (usage_tracker is not None)
|
|
74
|
+
should_track = track_usage and usage_tracker is not None
|
|
75
|
+
|
|
76
|
+
# Count input tokens if tracking is enabled
|
|
77
|
+
input_tokens = 0
|
|
78
|
+
if should_track:
|
|
79
|
+
# Count tokens in the prompt
|
|
80
|
+
input_tokens = count_tokens(prompt, model)
|
|
81
|
+
logger.debug(f"Input tokens: {input_tokens} for model {model}")
|
|
82
|
+
|
|
83
|
+
while retry_count <= max_retries:
|
|
84
|
+
try:
|
|
85
|
+
# Make the API request
|
|
86
|
+
client = mistral_client_manager.get_client(api_key)
|
|
87
|
+
chat_response = client.chat.complete(
|
|
88
|
+
model=model,
|
|
89
|
+
messages=[
|
|
90
|
+
{
|
|
91
|
+
"role": "user",
|
|
92
|
+
"content": prompt,
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
response_format={
|
|
96
|
+
"type": "json_object",
|
|
97
|
+
"schema": output_schema.model_json_schema(),
|
|
98
|
+
},
|
|
99
|
+
temperature=temperature,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Parse and validate the response
|
|
103
|
+
response_content = chat_response.choices[0].message.content
|
|
104
|
+
response_dict = json.loads(response_content)
|
|
105
|
+
|
|
106
|
+
# Track token usage if enabled
|
|
107
|
+
if should_track and hasattr(chat_response, 'usage'):
|
|
108
|
+
# Get token counts from the API response
|
|
109
|
+
output_tokens = chat_response.usage.completion_tokens
|
|
110
|
+
actual_input_tokens = chat_response.usage.prompt_tokens
|
|
111
|
+
|
|
112
|
+
# Use the actual input tokens from the API if available
|
|
113
|
+
if actual_input_tokens > 0:
|
|
114
|
+
input_tokens = actual_input_tokens
|
|
115
|
+
|
|
116
|
+
# Log token usage
|
|
117
|
+
logger.debug(
|
|
118
|
+
f"Token usage - Input: {input_tokens}, "
|
|
119
|
+
f"Output: {output_tokens}, "
|
|
120
|
+
f"Model: {model}"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Track in the global usage tracker
|
|
124
|
+
usage_tracker.add_usage(input_tokens, output_tokens, model)
|
|
125
|
+
elif should_track:
|
|
126
|
+
# If API doesn't return usage stats, estimate output tokens
|
|
127
|
+
output_tokens = count_tokens(response_content, model)
|
|
128
|
+
usage_tracker.add_usage(input_tokens, output_tokens, model)
|
|
129
|
+
logger.debug(
|
|
130
|
+
f"Estimated token usage - Input: {input_tokens}, "
|
|
131
|
+
f"Output: {output_tokens}, "
|
|
132
|
+
f"Model: {model}"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
return output_schema.model_validate(response_dict)
|
|
136
|
+
|
|
137
|
+
except (requests.RequestException, TimeoutError, ConnectionError) as e:
|
|
138
|
+
# Handle network-related errors (timeouts, connection issues)
|
|
139
|
+
retry_count += 1
|
|
140
|
+
|
|
141
|
+
# If we've exhausted retries, raise the error
|
|
142
|
+
if retry_count > max_retries:
|
|
143
|
+
logger.error(f"Failed after {max_retries} retries: {str(e)}")
|
|
144
|
+
raise RuntimeError(
|
|
145
|
+
f"LLM request failed after {max_retries} retries: {str(e)}"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Log the error and retry
|
|
149
|
+
logger.warning(
|
|
150
|
+
"LLM request failed "
|
|
151
|
+
f"(attempt {retry_count}/{max_retries}): {str(e)}. "
|
|
152
|
+
f"Retrying in {current_delay} seconds..."
|
|
153
|
+
)
|
|
154
|
+
time.sleep(current_delay)
|
|
155
|
+
current_delay *= backoff_factor
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
# Handle other API errors (rate limits, server errors)
|
|
159
|
+
if "429" in str(e) or "too many requests" in str(e).lower():
|
|
160
|
+
# Rate limiting error - retry with backoff
|
|
161
|
+
retry_count += 1
|
|
162
|
+
|
|
163
|
+
if retry_count > max_retries:
|
|
164
|
+
logger.error(
|
|
165
|
+
(
|
|
166
|
+
f"Rate limit exceeded after {max_retries} "
|
|
167
|
+
f"retries: {str(e)}"
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
raise RuntimeError(
|
|
171
|
+
f"Rate limit exceeded after {max_retries} retries"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
logger.warning(
|
|
175
|
+
f"Rate limit hit (attempt {retry_count}/{max_retries}). "
|
|
176
|
+
f"Retrying in {current_delay} seconds..."
|
|
177
|
+
)
|
|
178
|
+
time.sleep(current_delay)
|
|
179
|
+
current_delay *= (
|
|
180
|
+
backoff_factor * 2
|
|
181
|
+
) # More aggressive backoff for rate limits
|
|
182
|
+
else:
|
|
183
|
+
# Other API errors - don't retry
|
|
184
|
+
logger.error(f"API error: {str(e)}")
|
|
185
|
+
raise RuntimeError(f"LLM API error: {str(e)}")
|
|
186
|
+
|
|
187
|
+
# This should never be reached due to the exception in the loop
|
|
188
|
+
raise RuntimeError("Unexpected error in LLM request retry loop")
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for working with the Mistral API.
|
|
3
|
+
|
|
4
|
+
This module provides helper functions for creating and managing
|
|
5
|
+
Mistral API client instances.
|
|
6
|
+
|
|
7
|
+
The module exports a singleton instance of MistralClientManager as
|
|
8
|
+
'mistral_client_manager' that should be used throughout the application
|
|
9
|
+
to ensure consistent client caching and management.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from mistralai import Mistral
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
# Configure logger
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MistralClientManager:
|
|
20
|
+
"""
|
|
21
|
+
Manager class for Mistral API clients.
|
|
22
|
+
|
|
23
|
+
This class provides a way to cache and
|
|
24
|
+
reuse Mistral client instances
|
|
25
|
+
based on API keys, avoiding the need to
|
|
26
|
+
create a new client for each request.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
"""Initialize an empty client cache."""
|
|
31
|
+
self._clients: dict[str, Mistral] = {}
|
|
32
|
+
|
|
33
|
+
def get_mistral_client(self, api_key: str):
|
|
34
|
+
"""
|
|
35
|
+
Get a configured Mistral client instance.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Mistral: Configured client instance
|
|
39
|
+
|
|
40
|
+
Raises:
|
|
41
|
+
ValueError: If api_key is None
|
|
42
|
+
"""
|
|
43
|
+
if not api_key or not api_key.strip():
|
|
44
|
+
raise ValueError("Mistral API key cannot be empty or None")
|
|
45
|
+
return Mistral(api_key=api_key)
|
|
46
|
+
|
|
47
|
+
def get_client(self, api_key: str) -> Mistral:
|
|
48
|
+
"""
|
|
49
|
+
Get a Mistral client for the given API key.
|
|
50
|
+
|
|
51
|
+
If a client with this API key already exists in the cache,
|
|
52
|
+
it will be reused.
|
|
53
|
+
Otherwise, a new client will be created and cached.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
api_key: Mistral API key to use for authentication
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Mistral: A configured Mistral client instance
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
ValueError: If api_key is empty or None
|
|
63
|
+
"""
|
|
64
|
+
# Hash the API key to use as a dictionary key
|
|
65
|
+
# This avoids storing the actual API key in memory as a dictionary key
|
|
66
|
+
key_hash = hash(api_key)
|
|
67
|
+
|
|
68
|
+
if key_hash not in self._clients:
|
|
69
|
+
# Create a new client and cache it
|
|
70
|
+
self._clients[key_hash] = self.get_mistral_client(api_key)
|
|
71
|
+
|
|
72
|
+
return self._clients[key_hash]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Global instance of the client manager
|
|
76
|
+
mistral_client_manager = MistralClientManager()
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cost tracking utilities for LLM API calls.
|
|
3
|
+
|
|
4
|
+
This module provides functions to calculate token usage and costs
|
|
5
|
+
for different LLM models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import tiktoken
|
|
10
|
+
from ragbandit.config.pricing import (
|
|
11
|
+
MODEL_COSTS,
|
|
12
|
+
EMBEDDING_COSTS,
|
|
13
|
+
DEFAULT_MODEL
|
|
14
|
+
)
|
|
15
|
+
from ragbandit.schema import TokenUsageMetrics
|
|
16
|
+
|
|
17
|
+
# Configure logger
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def count_tokens(text: str, model: str = DEFAULT_MODEL) -> int:
|
|
22
|
+
"""
|
|
23
|
+
Count the number of tokens in a text string for a specific model.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
text: The text to count tokens for
|
|
27
|
+
model: The model to use for token counting
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
int: Number of tokens
|
|
31
|
+
"""
|
|
32
|
+
try:
|
|
33
|
+
# For Mistral models, use cl100k_base encoding (same as GPT-4)
|
|
34
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
|
35
|
+
return len(encoding.encode(text))
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logger.warning(
|
|
38
|
+
f"Error counting tokens: {e}. Using character-based estimate."
|
|
39
|
+
)
|
|
40
|
+
# Fallback: rough estimate based on characters (1 token ≈ 4 chars)
|
|
41
|
+
return len(text) // 4
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def calculate_cost(
|
|
45
|
+
input_tokens: int, output_tokens: int, model: str = DEFAULT_MODEL
|
|
46
|
+
) -> tuple[float, dict[str, float]]:
|
|
47
|
+
"""
|
|
48
|
+
Calculate the cost of an API call based on token usage.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
input_tokens: Number of input tokens
|
|
52
|
+
output_tokens: Number of output tokens
|
|
53
|
+
model: Model name
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Tuple containing:
|
|
57
|
+
- Total cost in USD
|
|
58
|
+
- Dictionary with detailed cost breakdown
|
|
59
|
+
"""
|
|
60
|
+
# Get cost rates, defaulting to mistral-small if model not found
|
|
61
|
+
input_rate, output_rate = MODEL_COSTS.get(
|
|
62
|
+
model, MODEL_COSTS[DEFAULT_MODEL]
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Calculate costs (rates are per 1M tokens)
|
|
66
|
+
input_cost = (input_tokens / 1_000_000) * input_rate
|
|
67
|
+
output_cost = (output_tokens / 1_000_000) * output_rate
|
|
68
|
+
total_cost = input_cost + output_cost
|
|
69
|
+
|
|
70
|
+
cost_details = {
|
|
71
|
+
"model": model,
|
|
72
|
+
"input_tokens": input_tokens,
|
|
73
|
+
"output_tokens": output_tokens,
|
|
74
|
+
"total_tokens": input_tokens + output_tokens,
|
|
75
|
+
"input_cost_usd": input_cost,
|
|
76
|
+
"output_cost_usd": output_cost,
|
|
77
|
+
"total_cost_usd": total_cost,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return total_cost, cost_details
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TokenUsageTracker:
|
|
84
|
+
"""Track token usage and costs across multiple API calls."""
|
|
85
|
+
total_input_tokens: int
|
|
86
|
+
total_output_tokens: int
|
|
87
|
+
total_embedding_tokens: int
|
|
88
|
+
total_cost: float
|
|
89
|
+
calls_by_model: dict[str, dict[str, int | float]]
|
|
90
|
+
|
|
91
|
+
def __init__(self):
|
|
92
|
+
self.total_input_tokens = 0
|
|
93
|
+
self.total_output_tokens = 0
|
|
94
|
+
self.total_embedding_tokens = 0
|
|
95
|
+
self.total_cost = 0.0
|
|
96
|
+
self.calls_by_model = {}
|
|
97
|
+
|
|
98
|
+
def add_usage(
|
|
99
|
+
self,
|
|
100
|
+
input_tokens: int,
|
|
101
|
+
output_tokens: int,
|
|
102
|
+
model: str = DEFAULT_MODEL,
|
|
103
|
+
) -> None:
|
|
104
|
+
"""
|
|
105
|
+
Add usage statistics from an API call.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
input_tokens: Number of input tokens
|
|
109
|
+
output_tokens: Number of output tokens
|
|
110
|
+
model: Model name
|
|
111
|
+
"""
|
|
112
|
+
cost, details = calculate_cost(input_tokens, output_tokens, model)
|
|
113
|
+
|
|
114
|
+
# Update totals
|
|
115
|
+
self.total_input_tokens += input_tokens
|
|
116
|
+
self.total_output_tokens += output_tokens
|
|
117
|
+
self.total_cost += cost
|
|
118
|
+
|
|
119
|
+
# Update per-model tracking
|
|
120
|
+
if model not in self.calls_by_model:
|
|
121
|
+
self.calls_by_model[model] = {
|
|
122
|
+
"calls": 0,
|
|
123
|
+
"input_tokens": 0,
|
|
124
|
+
"output_tokens": 0,
|
|
125
|
+
"cost": 0.0,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
self.calls_by_model[model]["calls"] += 1
|
|
129
|
+
self.calls_by_model[model]["input_tokens"] += input_tokens
|
|
130
|
+
self.calls_by_model[model]["output_tokens"] += output_tokens
|
|
131
|
+
self.calls_by_model[model]["cost"] += cost
|
|
132
|
+
|
|
133
|
+
def add_embedding_tokens(self, tokens: int, model: str) -> None:
|
|
134
|
+
"""
|
|
135
|
+
Add embedding token usage statistics.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
tokens: Number of tokens processed for embedding
|
|
139
|
+
model: Embedding model name
|
|
140
|
+
"""
|
|
141
|
+
# Calculate cost based on embedding model rates
|
|
142
|
+
# Default to 0.10 if model not found
|
|
143
|
+
cost_per_million = EMBEDDING_COSTS.get(model, 0.10)
|
|
144
|
+
cost = (tokens / 1_000_000) * cost_per_million
|
|
145
|
+
|
|
146
|
+
# Update totals
|
|
147
|
+
self.total_embedding_tokens += tokens
|
|
148
|
+
self.total_cost += cost
|
|
149
|
+
|
|
150
|
+
# Update per-model tracking
|
|
151
|
+
if model not in self.calls_by_model:
|
|
152
|
+
self.calls_by_model[model] = {
|
|
153
|
+
"calls": 0,
|
|
154
|
+
"embedding_tokens": 0,
|
|
155
|
+
"cost": 0.0,
|
|
156
|
+
}
|
|
157
|
+
else:
|
|
158
|
+
# Add embedding_tokens field if it doesn't exist
|
|
159
|
+
if "embedding_tokens" not in self.calls_by_model[model]:
|
|
160
|
+
self.calls_by_model[model]["embedding_tokens"] = 0
|
|
161
|
+
|
|
162
|
+
self.calls_by_model[model]["calls"] += 1
|
|
163
|
+
self.calls_by_model[model]["embedding_tokens"] = (
|
|
164
|
+
self.calls_by_model[model].get("embedding_tokens", 0) + tokens
|
|
165
|
+
)
|
|
166
|
+
self.calls_by_model[model]["cost"] += cost
|
|
167
|
+
|
|
168
|
+
def get_summary(self) -> TokenUsageMetrics:
|
|
169
|
+
"""
|
|
170
|
+
Get a summary of token usage and costs.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
TokenUsageMetrics object with usage summary
|
|
174
|
+
"""
|
|
175
|
+
models_converted: dict[str, TokenUsageMetrics.ModelUsage] = {}
|
|
176
|
+
for model_name, stats in self.calls_by_model.items():
|
|
177
|
+
models_converted[model_name] = TokenUsageMetrics.ModelUsage(
|
|
178
|
+
calls=int(stats.get("calls", 0)),
|
|
179
|
+
input_tokens=int(stats.get("input_tokens", 0)),
|
|
180
|
+
output_tokens=int(stats.get("output_tokens", 0)),
|
|
181
|
+
embedding_tokens=int(stats.get("embedding_tokens", 0)),
|
|
182
|
+
cost=float(stats.get("cost", 0.0)),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
return TokenUsageMetrics(
|
|
186
|
+
total_calls=sum(m.calls for m in models_converted.values()),
|
|
187
|
+
total_input_tokens=self.total_input_tokens,
|
|
188
|
+
total_output_tokens=self.total_output_tokens,
|
|
189
|
+
total_embedding_tokens=self.total_embedding_tokens,
|
|
190
|
+
total_tokens=(
|
|
191
|
+
self.total_input_tokens +
|
|
192
|
+
self.total_output_tokens +
|
|
193
|
+
self.total_embedding_tokens
|
|
194
|
+
),
|
|
195
|
+
total_cost_usd=self.total_cost,
|
|
196
|
+
models=models_converted,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def log_summary(self, level: int = logging.INFO) -> None:
|
|
200
|
+
"""Log a summary of token usage and costs."""
|
|
201
|
+
summary = self.get_summary()
|
|
202
|
+
|
|
203
|
+
# Build log message
|
|
204
|
+
message = f"API Usage: {summary.total_calls} calls, "
|
|
205
|
+
|
|
206
|
+
# Add LLM token counts if any
|
|
207
|
+
if summary.total_input_tokens > 0 or summary.total_output_tokens > 0: # noqa
|
|
208
|
+
message += (
|
|
209
|
+
f"LLM: {summary.total_input_tokens:,} input + "
|
|
210
|
+
f"{summary.total_output_tokens:,} output tokens, "
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Add embedding token counts if any
|
|
214
|
+
if summary.total_embedding_tokens > 0:
|
|
215
|
+
message += f"Embeddings: {summary.total_embedding_tokens:,} tokens, " # noqa
|
|
216
|
+
|
|
217
|
+
# Add total cost
|
|
218
|
+
message += f"Total: ${summary.total_cost_usd:.4f} USD"
|
|
219
|
+
|
|
220
|
+
logger.log(level, message)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ragbandit-core
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Core utilities for document processing, RAG configuration, querying, and evaluation.
|
|
5
|
+
Author-email: Martim Chaves <martim@ragbandit.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/MartimChaves/ragbandit-core
|
|
8
|
+
Project-URL: Documentation, https://github.com/MartimChaves/ragbandit-core#readme
|
|
9
|
+
Project-URL: Source, https://github.com/MartimChaves/ragbandit-core
|
|
10
|
+
Project-URL: Issues, https://github.com/MartimChaves/ragbandit-core/issues
|
|
11
|
+
Classifier: Programming Language :: Python
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE.md
|
|
18
|
+
Requires-Dist: pydantic>=2.11.7
|
|
19
|
+
Requires-Dist: llama-index>=0.12.52
|
|
20
|
+
Requires-Dist: mistralai>=1.7.0
|
|
21
|
+
Requires-Dist: ragas>=0.3.0
|
|
22
|
+
Requires-Dist: cryptography>=44.0.2
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# ragbandit-core
|
|
26
|
+
|
|
27
|
+
Core utilities for:
|
|
28
|
+
|
|
29
|
+
* Document ingestion & processing (OCR, chunking, embedding)
|
|
30
|
+
* Building and running Retrieval-Augmented Generation (RAG) pipelines
|
|
31
|
+
* Evaluating answers with automated metrics
|
|
32
|
+
|
|
33
|
+
## Quick start
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install ragbandit-core
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from ragbandit.documents import (
|
|
41
|
+
DocumentPipeline,
|
|
42
|
+
ReferencesProcessor,
|
|
43
|
+
FootnoteProcessor,
|
|
44
|
+
MistralOCRDocument,
|
|
45
|
+
MistralEmbedder,
|
|
46
|
+
SemanticChunker
|
|
47
|
+
)
|
|
48
|
+
import os
|
|
49
|
+
import logging
|
|
50
|
+
from dotenv import load_dotenv
|
|
51
|
+
load_dotenv()
|
|
52
|
+
|
|
53
|
+
# Configure logging
|
|
54
|
+
logging.basicConfig(
|
|
55
|
+
level=logging.INFO,
|
|
56
|
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
|
60
|
+
|
|
61
|
+
file_path = "./data/raw/[document_name].pdf"
|
|
62
|
+
|
|
63
|
+
doc_pipeline = DocumentPipeline(
|
|
64
|
+
chunker=SemanticChunker(min_chunk_size=500, api_key=MISTRAL_API_KEY),
|
|
65
|
+
embedder=MistralEmbedder(model="mistral-embed", api_key=MISTRAL_API_KEY), # noqa
|
|
66
|
+
ocr_processor=MistralOCRDocument(api_key=MISTRAL_API_KEY),
|
|
67
|
+
processors=[
|
|
68
|
+
ReferencesProcessor(api_key=MISTRAL_API_KEY),
|
|
69
|
+
FootnoteProcessor(api_key=MISTRAL_API_KEY),
|
|
70
|
+
],
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
extended_response = doc_pipeline.process(file_path)
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Running Steps Manually
|
|
78
|
+
|
|
79
|
+
For more control, you can run each pipeline step independently:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from ragbandit.documents import (
|
|
83
|
+
DocumentPipeline,
|
|
84
|
+
ReferencesProcessor,
|
|
85
|
+
MistralOCRDocument,
|
|
86
|
+
MistralEmbedder,
|
|
87
|
+
SemanticChunker
|
|
88
|
+
)
|
|
89
|
+
import os
|
|
90
|
+
from dotenv import load_dotenv
|
|
91
|
+
load_dotenv()
|
|
92
|
+
|
|
93
|
+
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
|
94
|
+
file_path = "./data/raw/[document_name].pdf"
|
|
95
|
+
|
|
96
|
+
# Create pipeline with only the components you need
|
|
97
|
+
pipeline = DocumentPipeline(
|
|
98
|
+
ocr_processor=MistralOCRDocument(api_key=MISTRAL_API_KEY),
|
|
99
|
+
processors=[ReferencesProcessor(api_key=MISTRAL_API_KEY)],
|
|
100
|
+
chunker=SemanticChunker(min_chunk_size=500, api_key=MISTRAL_API_KEY),
|
|
101
|
+
embedder=MistralEmbedder(model="mistral-embed", api_key=MISTRAL_API_KEY),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Step 1: Run OCR
|
|
105
|
+
ocr_result = pipeline.run_ocr(file_path)
|
|
106
|
+
|
|
107
|
+
# Step 2: Run processors (optional)
|
|
108
|
+
processing_results = pipeline.run_processors(ocr_result)
|
|
109
|
+
final_doc = processing_results[-1] # Get the last processor's output
|
|
110
|
+
|
|
111
|
+
# Step 3: Chunk the document
|
|
112
|
+
chunk_result = pipeline.run_chunker(final_doc)
|
|
113
|
+
|
|
114
|
+
# Step 4: Embed chunks
|
|
115
|
+
embedding_result = pipeline.run_embedder(chunk_result)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
You can also create separate pipelines for different steps:
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
# OCR-only pipeline
|
|
122
|
+
ocr_pipeline = DocumentPipeline(
|
|
123
|
+
ocr_processor=MistralOCRDocument(api_key=MISTRAL_API_KEY)
|
|
124
|
+
)
|
|
125
|
+
ocr_result = ocr_pipeline.run_ocr(file_path)
|
|
126
|
+
|
|
127
|
+
# Later, chunk with a different pipeline
|
|
128
|
+
chunk_pipeline = DocumentPipeline(
|
|
129
|
+
chunker=SemanticChunker(min_chunk_size=500, api_key=MISTRAL_API_KEY)
|
|
130
|
+
)
|
|
131
|
+
chunks = chunk_pipeline.run_chunker(ocr_result)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Package layout
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
ragbandit-core/
|
|
138
|
+
├── src/ragbandit/
|
|
139
|
+
│ ├── documents/ # document ingestion, OCR, chunking,
|
|
140
|
+
└── tests/
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
MIT
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
ragbandit/__init__.py,sha256=W3cOQHgNtVtHh9o0l2myI7Nv8QLfW-5r06ddLJuhhyM,718
|
|
2
|
+
ragbandit/schema.py,sha256=mGoqqluTCgZaAq4yQQxn4evDjr03gJRmEKT6Xn-F3wI,5859
|
|
3
|
+
ragbandit/config/__init__.py,sha256=Xr1-QgP6oUhBR6I3OHP4dgCR6lh2LHcNqwC6AsA2bNI,52
|
|
4
|
+
ragbandit/config/llms.py,sha256=HHB4BbzlgTy7kZxHJ77tpEPdZfhlOU8nlc4rYLw6v7w,816
|
|
5
|
+
ragbandit/config/pricing.py,sha256=0PE4WTsdRUyG3eABUP9Zhme6QpHwGD1RBYOqqSRzD4A,1143
|
|
6
|
+
ragbandit/documents/__init__.py,sha256=0r0zNIz_MzrzpKpLopUh0le8wCzI5xhi9CPjlelXz4Y,1254
|
|
7
|
+
ragbandit/documents/document_pipeline.py,sha256=bDmOjhj8mIbMii3ZTLHA5blzPvdMmD_wTpx319gAjME,12328
|
|
8
|
+
ragbandit/documents/chunkers/__init__.py,sha256=U2ptxUtW-e_OCl50kjg-YmKD2e1eGI6iPAWl8hG2W0s,464
|
|
9
|
+
ragbandit/documents/chunkers/base_chunker.py,sha256=9sCEqn-uVesvYOh2aNbjoN9-mkkAQxml8-nuonr5zUk,6616
|
|
10
|
+
ragbandit/documents/chunkers/fixed_size_chunker.py,sha256=--OQ5XVhATw4v_MEmfTJPkfUQ2fHXCJyYpWktWVi9JY,5764
|
|
11
|
+
ragbandit/documents/chunkers/semantic_chunker.py,sha256=67I2TjytMSB_LWloGWhesOB7Hu4RlYBUsr9T8A9SD2I,7188
|
|
12
|
+
ragbandit/documents/embedders/__init__.py,sha256=6do7BGP8rHCLvIoPIwcK5W751jl8spEfq2smgkoTK3o,418
|
|
13
|
+
ragbandit/documents/embedders/base_embedder.py,sha256=Bdbmhvi82JxRCBtaY6ZFIbmmPkHriTqtZvvRs6k3dfg,2287
|
|
14
|
+
ragbandit/documents/embedders/mistral_embedder.py,sha256=T0FPILc7PKIgWxV_lWQbqyP5_LElkuuGtDvn4-Ec6d8,4242
|
|
15
|
+
ragbandit/documents/ocr/__init__.py,sha256=Dg3R2ClL1fDOA4a6hY8F7gHiR1mIgL0tNSbo88NPsGE,336
|
|
16
|
+
ragbandit/documents/ocr/base_ocr.py,sha256=o3gTEg6WW88JDOAaKSKusqOGco4jQ9Q8nUl-zsStjMg,4375
|
|
17
|
+
ragbandit/documents/ocr/mistral_ocr.py,sha256=074LRKDEIH4PYoRPrwP_3dUboVByefIqnLniqclz3Bg,5327
|
|
18
|
+
ragbandit/documents/processors/__init__.py,sha256=ecWUqcNqSEuloXYczgfMPQwWb5vlehHM7s6jB3uEVwM,541
|
|
19
|
+
ragbandit/documents/processors/base_processor.py,sha256=wVqgIjKDUhaSt8oyktTvY0b8jL9OYHQNvcD0VZcn-Wg,3128
|
|
20
|
+
ragbandit/documents/processors/footnotes_processor.py,sha256=dRb6NXiRaXVMVKB7lijCofmZZUHqqFuyl7JunqEbqC8,13630
|
|
21
|
+
ragbandit/documents/processors/references_processor.py,sha256=Fmg9MwBO9fB7sjrZl2q_ESTqTXNdzikVi1TLMJTAC3U,14520
|
|
22
|
+
ragbandit/documents/utils/__init__.py,sha256=eS9AhbxRSJVnUU7JlHKNfJl3KNKCpLYTyl_Lo_SVaWU,244
|
|
23
|
+
ragbandit/documents/utils/secure_file_handler.py,sha256=XGNd5dBfoqhYWo-jxHQkCWEidhurNI7MA-yR7KQHpF4,3098
|
|
24
|
+
ragbandit/prompt_tools/__init__.py,sha256=PRnA0EF9yKSvErRxVs8esOJ4761UQjgtHvHdcjsaCB4,801
|
|
25
|
+
ragbandit/prompt_tools/footnotes_processor_tools.py,sha256=_SgKN0wn98eBNlM7WGMAkiTEUnFXm9w5X5f7KKapnK8,6098
|
|
26
|
+
ragbandit/prompt_tools/prompt_tool.py,sha256=lN8rEwWnuS3gW6c2l1IYadgRvBnCB0tiU5mkpeoxl3s,3824
|
|
27
|
+
ragbandit/prompt_tools/references_processor_tools.py,sha256=0AqKRrdKiUKWsS0EAnrGlXVDPZM_oLdsr52WriWekNA,1063
|
|
28
|
+
ragbandit/prompt_tools/semantic_chunker_tools.py,sha256=Y66-ttVhpyiHdq6He1D0vaaJthDc8pX45mDbn24ONrs,2088
|
|
29
|
+
ragbandit/utils/__init__.py,sha256=nKj-69XL3HjzbMNOcJdMpsJt0J8N2KhGdQ19V_jQR1g,522
|
|
30
|
+
ragbandit/utils/in_memory_log_handler.py,sha256=vMtCG-Wk9OwiCo2087nQovSIOQCu5ZWXg3lBf0hPEkk,1109
|
|
31
|
+
ragbandit/utils/llm_utils.py,sha256=7motkdeez9D_eEBemY6Mw_tZZCCyvt886GEYHv9ddvs,7079
|
|
32
|
+
ragbandit/utils/mistral_client.py,sha256=VkqFgquyjCmUllBjrHKqQnnmvU3yF0frvCSVIkHH-jQ,2195
|
|
33
|
+
ragbandit/utils/token_usage_tracker.py,sha256=CMAuJolcdJ258CVMkpNVhZAnQIGC-jTnxYkOr2jvp0M,7196
|
|
34
|
+
ragbandit_core-0.1.1.dist-info/licenses/LICENSE.md,sha256=rZBctov8cSToljMmrdApur6WqyMIrX0KjkMKDpqx9w8,1070
|
|
35
|
+
ragbandit_core-0.1.1.dist-info/METADATA,sha256=FsGiCB_o8_vMQohVWsGN_o3gHzdM1usLA9Xw5zEi7bc,3924
|
|
36
|
+
ragbandit_core-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
37
|
+
ragbandit_core-0.1.1.dist-info/top_level.txt,sha256=UDjwZ4afIob8DIsuV6D08lU5bHCeN00grjXpzgDhsQ8,10
|
|
38
|
+
ragbandit_core-0.1.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Martim Chaves
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ragbandit
|