isage-middleware 0.2.4.3__cp311-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isage_middleware-0.2.4.3.dist-info/METADATA +266 -0
- isage_middleware-0.2.4.3.dist-info/RECORD +94 -0
- isage_middleware-0.2.4.3.dist-info/WHEEL +5 -0
- isage_middleware-0.2.4.3.dist-info/top_level.txt +1 -0
- sage/middleware/__init__.py +59 -0
- sage/middleware/_version.py +6 -0
- sage/middleware/components/__init__.py +30 -0
- sage/middleware/components/extensions_compat.py +141 -0
- sage/middleware/components/sage_db/__init__.py +116 -0
- sage/middleware/components/sage_db/backend.py +136 -0
- sage/middleware/components/sage_db/service.py +15 -0
- sage/middleware/components/sage_flow/__init__.py +76 -0
- sage/middleware/components/sage_flow/python/__init__.py +14 -0
- sage/middleware/components/sage_flow/python/micro_service/__init__.py +4 -0
- sage/middleware/components/sage_flow/python/micro_service/sage_flow_service.py +88 -0
- sage/middleware/components/sage_flow/python/sage_flow.py +30 -0
- sage/middleware/components/sage_flow/service.py +14 -0
- sage/middleware/components/sage_mem/__init__.py +83 -0
- sage/middleware/components/sage_sias/__init__.py +59 -0
- sage/middleware/components/sage_sias/continual_learner.py +184 -0
- sage/middleware/components/sage_sias/coreset_selector.py +302 -0
- sage/middleware/components/sage_sias/types.py +94 -0
- sage/middleware/components/sage_tsdb/__init__.py +81 -0
- sage/middleware/components/sage_tsdb/python/__init__.py +21 -0
- sage/middleware/components/sage_tsdb/python/_sage_tsdb.pyi +17 -0
- sage/middleware/components/sage_tsdb/python/algorithms/__init__.py +17 -0
- sage/middleware/components/sage_tsdb/python/algorithms/base.py +51 -0
- sage/middleware/components/sage_tsdb/python/algorithms/out_of_order_join.py +248 -0
- sage/middleware/components/sage_tsdb/python/algorithms/window_aggregator.py +296 -0
- sage/middleware/components/sage_tsdb/python/micro_service/__init__.py +7 -0
- sage/middleware/components/sage_tsdb/python/micro_service/sage_tsdb_service.py +365 -0
- sage/middleware/components/sage_tsdb/python/sage_tsdb.py +523 -0
- sage/middleware/components/sage_tsdb/service.py +17 -0
- sage/middleware/components/vector_stores/__init__.py +25 -0
- sage/middleware/components/vector_stores/chroma.py +483 -0
- sage/middleware/components/vector_stores/chroma_adapter.py +185 -0
- sage/middleware/components/vector_stores/milvus.py +677 -0
- sage/middleware/operators/__init__.py +56 -0
- sage/middleware/operators/agent/__init__.py +24 -0
- sage/middleware/operators/agent/planning/__init__.py +5 -0
- sage/middleware/operators/agent/planning/llm_adapter.py +41 -0
- sage/middleware/operators/agent/planning/planner_adapter.py +98 -0
- sage/middleware/operators/agent/planning/router.py +107 -0
- sage/middleware/operators/agent/runtime.py +296 -0
- sage/middleware/operators/agentic/__init__.py +41 -0
- sage/middleware/operators/agentic/config.py +254 -0
- sage/middleware/operators/agentic/planning_operator.py +125 -0
- sage/middleware/operators/agentic/refined_searcher.py +132 -0
- sage/middleware/operators/agentic/runtime.py +241 -0
- sage/middleware/operators/agentic/timing_operator.py +125 -0
- sage/middleware/operators/agentic/tool_selection_operator.py +127 -0
- sage/middleware/operators/context/__init__.py +17 -0
- sage/middleware/operators/context/critic_evaluation.py +16 -0
- sage/middleware/operators/context/model_context.py +565 -0
- sage/middleware/operators/context/quality_label.py +12 -0
- sage/middleware/operators/context/search_query_results.py +61 -0
- sage/middleware/operators/context/search_result.py +42 -0
- sage/middleware/operators/context/search_session.py +79 -0
- sage/middleware/operators/filters/__init__.py +26 -0
- sage/middleware/operators/filters/context_sink.py +387 -0
- sage/middleware/operators/filters/context_source.py +376 -0
- sage/middleware/operators/filters/evaluate_filter.py +83 -0
- sage/middleware/operators/filters/tool_filter.py +74 -0
- sage/middleware/operators/llm/__init__.py +18 -0
- sage/middleware/operators/llm/sagellm_generator.py +432 -0
- sage/middleware/operators/rag/__init__.py +147 -0
- sage/middleware/operators/rag/arxiv.py +331 -0
- sage/middleware/operators/rag/chunk.py +13 -0
- sage/middleware/operators/rag/document_loaders.py +23 -0
- sage/middleware/operators/rag/evaluate.py +658 -0
- sage/middleware/operators/rag/generator.py +340 -0
- sage/middleware/operators/rag/index_builder/__init__.py +48 -0
- sage/middleware/operators/rag/index_builder/builder.py +363 -0
- sage/middleware/operators/rag/index_builder/manifest.py +101 -0
- sage/middleware/operators/rag/index_builder/storage.py +131 -0
- sage/middleware/operators/rag/pipeline.py +46 -0
- sage/middleware/operators/rag/profiler.py +59 -0
- sage/middleware/operators/rag/promptor.py +400 -0
- sage/middleware/operators/rag/refiner.py +231 -0
- sage/middleware/operators/rag/reranker.py +364 -0
- sage/middleware/operators/rag/retriever.py +1308 -0
- sage/middleware/operators/rag/searcher.py +37 -0
- sage/middleware/operators/rag/types.py +28 -0
- sage/middleware/operators/rag/writer.py +80 -0
- sage/middleware/operators/tools/__init__.py +71 -0
- sage/middleware/operators/tools/arxiv_paper_searcher.py +175 -0
- sage/middleware/operators/tools/arxiv_searcher.py +102 -0
- sage/middleware/operators/tools/duckduckgo_searcher.py +105 -0
- sage/middleware/operators/tools/image_captioner.py +104 -0
- sage/middleware/operators/tools/nature_news_fetcher.py +224 -0
- sage/middleware/operators/tools/searcher_tool.py +514 -0
- sage/middleware/operators/tools/text_detector.py +185 -0
- sage/middleware/operators/tools/url_text_extractor.py +104 -0
- sage/middleware/py.typed +2 -0
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from ..algorithms import OutOfOrderStreamJoin, WindowAggregator
|
|
10
|
+
from ..sage_tsdb import AggregationType, SageTSDB, TimeRange
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class SageTSDBServiceConfig:
|
|
15
|
+
"""Configuration for SageTSDB service"""
|
|
16
|
+
|
|
17
|
+
# Database configuration
|
|
18
|
+
enable_compression: bool = False
|
|
19
|
+
max_memory_mb: int = 1024
|
|
20
|
+
|
|
21
|
+
# Algorithm defaults
|
|
22
|
+
default_window_size: int = 60000 # 1 minute in milliseconds
|
|
23
|
+
default_aggregation: str = "avg"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SageTSDBService:
|
|
27
|
+
"""
|
|
28
|
+
A micro-service style wrapper for SageTSDB.
|
|
29
|
+
|
|
30
|
+
This service provides a simplified interface for time series operations
|
|
31
|
+
and integrates with SAGE's service ecosystem.
|
|
32
|
+
|
|
33
|
+
Methods:
|
|
34
|
+
- add(timestamp, value, tags, fields) -> int
|
|
35
|
+
- add_batch(timestamps, values, tags_list, fields_list) -> list[int]
|
|
36
|
+
- query(start_time, end_time, tags, aggregation, window_size) -> list[dict]
|
|
37
|
+
- stream_join(left_stream, right_stream, window_size, join_key) -> list[dict]
|
|
38
|
+
- window_aggregate(data, window_type, window_size, aggregation) -> list[dict]
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, config: SageTSDBServiceConfig | None = None) -> None:
|
|
42
|
+
"""
|
|
43
|
+
Initialize SageTSDB service.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
config: Optional service configuration
|
|
47
|
+
"""
|
|
48
|
+
self._config = config or SageTSDBServiceConfig()
|
|
49
|
+
self._db = SageTSDB()
|
|
50
|
+
|
|
51
|
+
# Register default algorithms
|
|
52
|
+
self._register_default_algorithms()
|
|
53
|
+
|
|
54
|
+
# Statistics
|
|
55
|
+
self._stats = {
|
|
56
|
+
"total_writes": 0,
|
|
57
|
+
"total_queries": 0,
|
|
58
|
+
"total_joins": 0,
|
|
59
|
+
"total_aggregations": 0,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
def _register_default_algorithms(self):
|
|
63
|
+
"""Register commonly used algorithms"""
|
|
64
|
+
# Out-of-order stream join
|
|
65
|
+
join_algo = OutOfOrderStreamJoin(
|
|
66
|
+
{
|
|
67
|
+
"window_size": self._config.default_window_size,
|
|
68
|
+
"max_delay": 5000, # 5 seconds
|
|
69
|
+
}
|
|
70
|
+
)
|
|
71
|
+
self._db.register_algorithm("stream_join", join_algo)
|
|
72
|
+
|
|
73
|
+
# Window aggregator
|
|
74
|
+
window_algo = WindowAggregator(
|
|
75
|
+
{
|
|
76
|
+
"window_type": "tumbling",
|
|
77
|
+
"window_size": self._config.default_window_size,
|
|
78
|
+
"aggregation": self._config.default_aggregation,
|
|
79
|
+
}
|
|
80
|
+
)
|
|
81
|
+
self._db.register_algorithm("window_aggregate", window_algo)
|
|
82
|
+
|
|
83
|
+
def add(
|
|
84
|
+
self,
|
|
85
|
+
timestamp: int | datetime,
|
|
86
|
+
value: float | np.ndarray | list[float],
|
|
87
|
+
tags: dict[str, str] | None = None,
|
|
88
|
+
fields: dict[str, Any] | None = None,
|
|
89
|
+
) -> int:
|
|
90
|
+
"""
|
|
91
|
+
Add a single time series data point.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
timestamp: Unix timestamp (ms) or datetime object
|
|
95
|
+
value: Numeric value or array
|
|
96
|
+
tags: Optional tags for indexing
|
|
97
|
+
fields: Optional additional fields
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Index of the added data point
|
|
101
|
+
"""
|
|
102
|
+
if isinstance(value, list):
|
|
103
|
+
value = np.array(value, dtype=np.float32)
|
|
104
|
+
|
|
105
|
+
idx = self._db.add(timestamp=timestamp, value=value, tags=tags, fields=fields)
|
|
106
|
+
|
|
107
|
+
self._stats["total_writes"] += 1
|
|
108
|
+
return idx
|
|
109
|
+
|
|
110
|
+
def add_batch(
|
|
111
|
+
self,
|
|
112
|
+
timestamps: list[int] | list[datetime] | np.ndarray,
|
|
113
|
+
values: list[float] | np.ndarray,
|
|
114
|
+
tags_list: list[dict[str, str]] | None = None,
|
|
115
|
+
fields_list: list[dict[str, Any]] | None = None,
|
|
116
|
+
) -> list[int]:
|
|
117
|
+
"""
|
|
118
|
+
Add multiple time series data points.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
timestamps: List of timestamps
|
|
122
|
+
values: List of values
|
|
123
|
+
tags_list: Optional list of tags
|
|
124
|
+
fields_list: Optional list of fields
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
List of indices for added data points
|
|
128
|
+
"""
|
|
129
|
+
indices = self._db.add_batch(
|
|
130
|
+
timestamps=timestamps,
|
|
131
|
+
values=values,
|
|
132
|
+
tags_list=tags_list,
|
|
133
|
+
fields_list=fields_list,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
self._stats["total_writes"] += len(indices)
|
|
137
|
+
return indices
|
|
138
|
+
|
|
139
|
+
def query(
|
|
140
|
+
self,
|
|
141
|
+
start_time: int | datetime,
|
|
142
|
+
end_time: int | datetime,
|
|
143
|
+
tags: dict[str, str] | None = None,
|
|
144
|
+
aggregation: str | None = None,
|
|
145
|
+
window_size: int | None = None,
|
|
146
|
+
limit: int | None = None,
|
|
147
|
+
) -> list[dict[str, Any]]:
|
|
148
|
+
"""
|
|
149
|
+
Query time series data.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
start_time: Start of time range
|
|
153
|
+
end_time: End of time range
|
|
154
|
+
tags: Optional tags to filter by
|
|
155
|
+
aggregation: Optional aggregation type (sum/avg/min/max/count/etc.)
|
|
156
|
+
window_size: Optional window size for aggregation (ms)
|
|
157
|
+
limit: Optional limit on number of results
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
List of matching time series data as dictionaries
|
|
161
|
+
"""
|
|
162
|
+
# Create time range
|
|
163
|
+
time_range = TimeRange(start_time=start_time, end_time=end_time)
|
|
164
|
+
|
|
165
|
+
# Convert aggregation string to enum if provided
|
|
166
|
+
agg_type = None
|
|
167
|
+
if aggregation:
|
|
168
|
+
agg_type = AggregationType(aggregation)
|
|
169
|
+
|
|
170
|
+
# Query database
|
|
171
|
+
results = self._db.query(
|
|
172
|
+
time_range=time_range,
|
|
173
|
+
tags=tags,
|
|
174
|
+
aggregation=agg_type,
|
|
175
|
+
window_size=window_size,
|
|
176
|
+
limit=limit,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Convert to dictionary format
|
|
180
|
+
formatted = []
|
|
181
|
+
for r in results:
|
|
182
|
+
formatted.append(
|
|
183
|
+
{
|
|
184
|
+
"timestamp": r.timestamp,
|
|
185
|
+
"value": (
|
|
186
|
+
float(r.value)
|
|
187
|
+
if isinstance(r.value, (int, float))
|
|
188
|
+
else (r.value.tolist() if isinstance(r.value, np.ndarray) else r.value)
|
|
189
|
+
),
|
|
190
|
+
"tags": dict(r.tags) if r.tags else {},
|
|
191
|
+
"fields": dict(r.fields) if r.fields else {},
|
|
192
|
+
}
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
self._stats["total_queries"] += 1
|
|
196
|
+
return formatted
|
|
197
|
+
|
|
198
|
+
def stream_join(
|
|
199
|
+
self,
|
|
200
|
+
left_stream: list[dict[str, Any]],
|
|
201
|
+
right_stream: list[dict[str, Any]],
|
|
202
|
+
window_size: int | None = None,
|
|
203
|
+
max_delay: int | None = None,
|
|
204
|
+
join_key: str | None = None,
|
|
205
|
+
) -> list[dict[str, Any]]:
|
|
206
|
+
"""
|
|
207
|
+
Perform out-of-order stream join.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
left_stream: Data from left stream (list of dicts with timestamp, value, tags)
|
|
211
|
+
right_stream: Data from right stream
|
|
212
|
+
window_size: Join window size in milliseconds
|
|
213
|
+
max_delay: Maximum out-of-order delay in milliseconds
|
|
214
|
+
join_key: Optional tag key for equi-join
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
List of joined results
|
|
218
|
+
"""
|
|
219
|
+
# Create or update join algorithm
|
|
220
|
+
config = {
|
|
221
|
+
"window_size": window_size or self._config.default_window_size,
|
|
222
|
+
"max_delay": max_delay or 5000,
|
|
223
|
+
"join_key": join_key,
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
join_algo = OutOfOrderStreamJoin(config)
|
|
227
|
+
|
|
228
|
+
# Convert input dictionaries to TimeSeriesData
|
|
229
|
+
from ..sage_tsdb import TimeSeriesData
|
|
230
|
+
|
|
231
|
+
left_data = [
|
|
232
|
+
TimeSeriesData(
|
|
233
|
+
timestamp=item["timestamp"],
|
|
234
|
+
value=item["value"],
|
|
235
|
+
tags=item.get("tags"),
|
|
236
|
+
fields=item.get("fields"),
|
|
237
|
+
)
|
|
238
|
+
for item in left_stream
|
|
239
|
+
]
|
|
240
|
+
|
|
241
|
+
right_data = [
|
|
242
|
+
TimeSeriesData(
|
|
243
|
+
timestamp=item["timestamp"],
|
|
244
|
+
value=item["value"],
|
|
245
|
+
tags=item.get("tags"),
|
|
246
|
+
fields=item.get("fields"),
|
|
247
|
+
)
|
|
248
|
+
for item in right_stream
|
|
249
|
+
]
|
|
250
|
+
|
|
251
|
+
# Perform join
|
|
252
|
+
joined = join_algo.process(left_stream=left_data, right_stream=right_data)
|
|
253
|
+
|
|
254
|
+
# Format results
|
|
255
|
+
results = []
|
|
256
|
+
for left, right in joined:
|
|
257
|
+
results.append(
|
|
258
|
+
{
|
|
259
|
+
"left": {
|
|
260
|
+
"timestamp": left.timestamp,
|
|
261
|
+
"value": (
|
|
262
|
+
float(left.value)
|
|
263
|
+
if isinstance(left.value, (int, float))
|
|
264
|
+
else left.value
|
|
265
|
+
),
|
|
266
|
+
"tags": dict(left.tags) if left.tags else {},
|
|
267
|
+
},
|
|
268
|
+
"right": {
|
|
269
|
+
"timestamp": right.timestamp,
|
|
270
|
+
"value": (
|
|
271
|
+
float(right.value)
|
|
272
|
+
if isinstance(right.value, (int, float))
|
|
273
|
+
else right.value
|
|
274
|
+
),
|
|
275
|
+
"tags": dict(right.tags) if right.tags else {},
|
|
276
|
+
},
|
|
277
|
+
}
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
self._stats["total_joins"] += 1
|
|
281
|
+
return results
|
|
282
|
+
|
|
283
|
+
def window_aggregate(
|
|
284
|
+
self,
|
|
285
|
+
start_time: int | datetime,
|
|
286
|
+
end_time: int | datetime,
|
|
287
|
+
window_type: str = "tumbling",
|
|
288
|
+
window_size: int | None = None,
|
|
289
|
+
aggregation: str = "avg",
|
|
290
|
+
tags: dict[str, str] | None = None,
|
|
291
|
+
) -> list[dict[str, Any]]:
|
|
292
|
+
"""
|
|
293
|
+
Perform window-based aggregation.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
start_time: Start of time range
|
|
297
|
+
end_time: End of time range
|
|
298
|
+
window_type: Type of window (tumbling/sliding/session)
|
|
299
|
+
window_size: Window size in milliseconds
|
|
300
|
+
aggregation: Aggregation function (sum/avg/min/max/count/etc.)
|
|
301
|
+
tags: Optional tags to filter by
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
List of aggregated results
|
|
305
|
+
"""
|
|
306
|
+
# Query data first
|
|
307
|
+
time_range = TimeRange(start_time=start_time, end_time=end_time)
|
|
308
|
+
data = self._db.query(time_range=time_range, tags=tags)
|
|
309
|
+
|
|
310
|
+
# Create aggregator
|
|
311
|
+
config = {
|
|
312
|
+
"window_type": window_type,
|
|
313
|
+
"window_size": window_size or self._config.default_window_size,
|
|
314
|
+
"aggregation": aggregation,
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
aggregator = WindowAggregator(config)
|
|
318
|
+
|
|
319
|
+
# Perform aggregation
|
|
320
|
+
aggregated = aggregator.process(data)
|
|
321
|
+
|
|
322
|
+
# Format results
|
|
323
|
+
results = []
|
|
324
|
+
for item in aggregated:
|
|
325
|
+
results.append(
|
|
326
|
+
{
|
|
327
|
+
"timestamp": item.timestamp,
|
|
328
|
+
"value": (
|
|
329
|
+
float(item.value) if isinstance(item.value, (int, float)) else item.value
|
|
330
|
+
),
|
|
331
|
+
"tags": dict(item.tags) if item.tags else {},
|
|
332
|
+
"fields": dict(item.fields) if item.fields else {},
|
|
333
|
+
}
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
self._stats["total_aggregations"] += 1
|
|
337
|
+
return results
|
|
338
|
+
|
|
339
|
+
def stats(self) -> dict[str, Any]:
|
|
340
|
+
"""
|
|
341
|
+
Get service statistics.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
Dictionary with service statistics
|
|
345
|
+
"""
|
|
346
|
+
db_stats = self._db.get_stats()
|
|
347
|
+
return {
|
|
348
|
+
**self._stats,
|
|
349
|
+
"db_size": db_stats["size"],
|
|
350
|
+
"registered_algorithms": db_stats["algorithms"],
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
def reset(self):
|
|
354
|
+
"""Reset service state"""
|
|
355
|
+
self._db = SageTSDB()
|
|
356
|
+
self._register_default_algorithms()
|
|
357
|
+
self._stats = {
|
|
358
|
+
"total_writes": 0,
|
|
359
|
+
"total_queries": 0,
|
|
360
|
+
"total_joins": 0,
|
|
361
|
+
"total_aggregations": 0,
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
__all__ = ["SageTSDBService", "SageTSDBServiceConfig"]
|