isage-middleware 0.2.4.3__cp311-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. isage_middleware-0.2.4.3.dist-info/METADATA +266 -0
  2. isage_middleware-0.2.4.3.dist-info/RECORD +94 -0
  3. isage_middleware-0.2.4.3.dist-info/WHEEL +5 -0
  4. isage_middleware-0.2.4.3.dist-info/top_level.txt +1 -0
  5. sage/middleware/__init__.py +59 -0
  6. sage/middleware/_version.py +6 -0
  7. sage/middleware/components/__init__.py +30 -0
  8. sage/middleware/components/extensions_compat.py +141 -0
  9. sage/middleware/components/sage_db/__init__.py +116 -0
  10. sage/middleware/components/sage_db/backend.py +136 -0
  11. sage/middleware/components/sage_db/service.py +15 -0
  12. sage/middleware/components/sage_flow/__init__.py +76 -0
  13. sage/middleware/components/sage_flow/python/__init__.py +14 -0
  14. sage/middleware/components/sage_flow/python/micro_service/__init__.py +4 -0
  15. sage/middleware/components/sage_flow/python/micro_service/sage_flow_service.py +88 -0
  16. sage/middleware/components/sage_flow/python/sage_flow.py +30 -0
  17. sage/middleware/components/sage_flow/service.py +14 -0
  18. sage/middleware/components/sage_mem/__init__.py +83 -0
  19. sage/middleware/components/sage_sias/__init__.py +59 -0
  20. sage/middleware/components/sage_sias/continual_learner.py +184 -0
  21. sage/middleware/components/sage_sias/coreset_selector.py +302 -0
  22. sage/middleware/components/sage_sias/types.py +94 -0
  23. sage/middleware/components/sage_tsdb/__init__.py +81 -0
  24. sage/middleware/components/sage_tsdb/python/__init__.py +21 -0
  25. sage/middleware/components/sage_tsdb/python/_sage_tsdb.pyi +17 -0
  26. sage/middleware/components/sage_tsdb/python/algorithms/__init__.py +17 -0
  27. sage/middleware/components/sage_tsdb/python/algorithms/base.py +51 -0
  28. sage/middleware/components/sage_tsdb/python/algorithms/out_of_order_join.py +248 -0
  29. sage/middleware/components/sage_tsdb/python/algorithms/window_aggregator.py +296 -0
  30. sage/middleware/components/sage_tsdb/python/micro_service/__init__.py +7 -0
  31. sage/middleware/components/sage_tsdb/python/micro_service/sage_tsdb_service.py +365 -0
  32. sage/middleware/components/sage_tsdb/python/sage_tsdb.py +523 -0
  33. sage/middleware/components/sage_tsdb/service.py +17 -0
  34. sage/middleware/components/vector_stores/__init__.py +25 -0
  35. sage/middleware/components/vector_stores/chroma.py +483 -0
  36. sage/middleware/components/vector_stores/chroma_adapter.py +185 -0
  37. sage/middleware/components/vector_stores/milvus.py +677 -0
  38. sage/middleware/operators/__init__.py +56 -0
  39. sage/middleware/operators/agent/__init__.py +24 -0
  40. sage/middleware/operators/agent/planning/__init__.py +5 -0
  41. sage/middleware/operators/agent/planning/llm_adapter.py +41 -0
  42. sage/middleware/operators/agent/planning/planner_adapter.py +98 -0
  43. sage/middleware/operators/agent/planning/router.py +107 -0
  44. sage/middleware/operators/agent/runtime.py +296 -0
  45. sage/middleware/operators/agentic/__init__.py +41 -0
  46. sage/middleware/operators/agentic/config.py +254 -0
  47. sage/middleware/operators/agentic/planning_operator.py +125 -0
  48. sage/middleware/operators/agentic/refined_searcher.py +132 -0
  49. sage/middleware/operators/agentic/runtime.py +241 -0
  50. sage/middleware/operators/agentic/timing_operator.py +125 -0
  51. sage/middleware/operators/agentic/tool_selection_operator.py +127 -0
  52. sage/middleware/operators/context/__init__.py +17 -0
  53. sage/middleware/operators/context/critic_evaluation.py +16 -0
  54. sage/middleware/operators/context/model_context.py +565 -0
  55. sage/middleware/operators/context/quality_label.py +12 -0
  56. sage/middleware/operators/context/search_query_results.py +61 -0
  57. sage/middleware/operators/context/search_result.py +42 -0
  58. sage/middleware/operators/context/search_session.py +79 -0
  59. sage/middleware/operators/filters/__init__.py +26 -0
  60. sage/middleware/operators/filters/context_sink.py +387 -0
  61. sage/middleware/operators/filters/context_source.py +376 -0
  62. sage/middleware/operators/filters/evaluate_filter.py +83 -0
  63. sage/middleware/operators/filters/tool_filter.py +74 -0
  64. sage/middleware/operators/llm/__init__.py +18 -0
  65. sage/middleware/operators/llm/sagellm_generator.py +432 -0
  66. sage/middleware/operators/rag/__init__.py +147 -0
  67. sage/middleware/operators/rag/arxiv.py +331 -0
  68. sage/middleware/operators/rag/chunk.py +13 -0
  69. sage/middleware/operators/rag/document_loaders.py +23 -0
  70. sage/middleware/operators/rag/evaluate.py +658 -0
  71. sage/middleware/operators/rag/generator.py +340 -0
  72. sage/middleware/operators/rag/index_builder/__init__.py +48 -0
  73. sage/middleware/operators/rag/index_builder/builder.py +363 -0
  74. sage/middleware/operators/rag/index_builder/manifest.py +101 -0
  75. sage/middleware/operators/rag/index_builder/storage.py +131 -0
  76. sage/middleware/operators/rag/pipeline.py +46 -0
  77. sage/middleware/operators/rag/profiler.py +59 -0
  78. sage/middleware/operators/rag/promptor.py +400 -0
  79. sage/middleware/operators/rag/refiner.py +231 -0
  80. sage/middleware/operators/rag/reranker.py +364 -0
  81. sage/middleware/operators/rag/retriever.py +1308 -0
  82. sage/middleware/operators/rag/searcher.py +37 -0
  83. sage/middleware/operators/rag/types.py +28 -0
  84. sage/middleware/operators/rag/writer.py +80 -0
  85. sage/middleware/operators/tools/__init__.py +71 -0
  86. sage/middleware/operators/tools/arxiv_paper_searcher.py +175 -0
  87. sage/middleware/operators/tools/arxiv_searcher.py +102 -0
  88. sage/middleware/operators/tools/duckduckgo_searcher.py +105 -0
  89. sage/middleware/operators/tools/image_captioner.py +104 -0
  90. sage/middleware/operators/tools/nature_news_fetcher.py +224 -0
  91. sage/middleware/operators/tools/searcher_tool.py +514 -0
  92. sage/middleware/operators/tools/text_detector.py +185 -0
  93. sage/middleware/operators/tools/url_text_extractor.py +104 -0
  94. sage/middleware/py.typed +2 -0
@@ -0,0 +1,17 @@
1
+ """
2
+ Algorithms for time series processing.
3
+
4
+ This module provides a pluggable algorithm interface for various
5
+ time series processing tasks including stream joins, aggregations,
6
+ and complex event processing.
7
+ """
8
+
9
+ from .base import TimeSeriesAlgorithm
10
+ from .out_of_order_join import OutOfOrderStreamJoin
11
+ from .window_aggregator import WindowAggregator
12
+
13
+ __all__ = [
14
+ "TimeSeriesAlgorithm",
15
+ "OutOfOrderStreamJoin",
16
+ "WindowAggregator",
17
+ ]
@@ -0,0 +1,51 @@
1
+ """
2
+ Base algorithm interface for time series processing.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any
7
+
8
+ from ..sage_tsdb import TimeSeriesData
9
+
10
+
11
+ class TimeSeriesAlgorithm(ABC):
12
+ """
13
+ Base class for time series processing algorithms.
14
+
15
+ All algorithm implementations should inherit from this class and
16
+ implement the process method.
17
+ """
18
+
19
+ def __init__(self, config: dict[str, Any] | None = None):
20
+ """
21
+ Initialize algorithm.
22
+
23
+ Args:
24
+ config: Algorithm-specific configuration
25
+ """
26
+ self.config = config or {}
27
+
28
+ @abstractmethod
29
+ def process(self, data: list[TimeSeriesData], **kwargs) -> Any:
30
+ """
31
+ Process time series data.
32
+
33
+ Args:
34
+ data: Input time series data points
35
+ **kwargs: Additional algorithm-specific parameters
36
+
37
+ Returns:
38
+ Processed results (algorithm-specific format)
39
+ """
40
+ pass
41
+
42
+ def reset(self): # noqa: B027
43
+ """Reset algorithm state (for stateful algorithms)"""
44
+ pass
45
+
46
+ def get_stats(self) -> dict[str, Any]:
47
+ """Get algorithm statistics"""
48
+ return {}
49
+
50
+
51
+ __all__ = ["TimeSeriesAlgorithm"]
@@ -0,0 +1,248 @@
1
+ """
2
+ Out-of-Order Stream Join Algorithm
3
+
4
+ This algorithm handles joining two time series streams that may arrive
5
+ out of order, using windowing and buffering strategies.
6
+ """
7
+
8
+ from collections import defaultdict
9
+ from collections.abc import Callable
10
+ from dataclasses import dataclass
11
+ from typing import Any
12
+
13
+ from ..sage_tsdb import TimeSeriesData
14
+ from .base import TimeSeriesAlgorithm
15
+
16
+
17
+ @dataclass
18
+ class JoinConfig:
19
+ """Configuration for stream join"""
20
+
21
+ window_size: int # milliseconds
22
+ max_delay: int # maximum out-of-order delay (ms)
23
+ join_key: str | None = None # tag key for join condition
24
+ join_predicate: Callable[[TimeSeriesData, TimeSeriesData], bool] | None = None
25
+
26
+
27
+ class StreamBuffer:
28
+ """Buffer for managing out-of-order streams"""
29
+
30
+ def __init__(self, max_delay: int):
31
+ """
32
+ Initialize stream buffer.
33
+
34
+ Args:
35
+ max_delay: Maximum allowed delay (ms)
36
+ """
37
+ self.max_delay = max_delay
38
+ self.buffer: list[TimeSeriesData] = []
39
+ self.watermark = 0 # Current watermark timestamp
40
+
41
+ def add(self, data: TimeSeriesData):
42
+ """Add data to buffer"""
43
+ self.buffer.append(data)
44
+ self._update_watermark()
45
+
46
+ def add_batch(self, data_list: list[TimeSeriesData]):
47
+ """Add multiple data points to buffer"""
48
+ self.buffer.extend(data_list)
49
+ self._update_watermark()
50
+
51
+ def _update_watermark(self):
52
+ """Update watermark based on latest data"""
53
+ if self.buffer:
54
+ # Sort buffer by timestamp
55
+ self.buffer.sort(key=lambda x: x.timestamp)
56
+ # Watermark is the latest timestamp minus max delay
57
+ latest = self.buffer[-1].timestamp
58
+ self.watermark = latest - self.max_delay
59
+
60
+ def get_ready_data(self) -> list[TimeSeriesData]:
61
+ """Get data that's ready for processing (before watermark)"""
62
+ ready = [d for d in self.buffer if d.timestamp <= self.watermark]
63
+ # Remove ready data from buffer
64
+ self.buffer = [d for d in self.buffer if d.timestamp > self.watermark]
65
+ return ready
66
+
67
+ def size(self) -> int:
68
+ """Get buffer size"""
69
+ return len(self.buffer)
70
+
71
+
72
+ class OutOfOrderStreamJoin(TimeSeriesAlgorithm):
73
+ """
74
+ Out-of-Order Stream Join Algorithm.
75
+
76
+ This algorithm joins two time series streams that may arrive out of order.
77
+ It uses windowing and watermarking to handle late data while maintaining
78
+ join correctness.
79
+
80
+ Features:
81
+ - Handles out-of-order data arrival
82
+ - Window-based join semantics
83
+ - Configurable watermarking for late data
84
+ - Support for custom join predicates
85
+ """
86
+
87
+ def __init__(self, config: dict[str, Any] | None = None):
88
+ """
89
+ Initialize stream join algorithm.
90
+
91
+ Args:
92
+ config: Configuration dictionary with:
93
+ - window_size: Join window size in milliseconds
94
+ - max_delay: Maximum out-of-order delay in milliseconds
95
+ - join_key: Optional tag key for equi-join
96
+ - join_predicate: Optional custom join predicate function
97
+ """
98
+ super().__init__(config)
99
+
100
+ self.window_size = self.config.get("window_size", 10000) # 10 seconds
101
+ self.max_delay = self.config.get("max_delay", 5000) # 5 seconds
102
+ self.join_key = self.config.get("join_key", None)
103
+ self.join_predicate = self.config.get("join_predicate", None)
104
+
105
+ # Buffers for two streams
106
+ self.left_buffer = StreamBuffer(self.max_delay)
107
+ self.right_buffer = StreamBuffer(self.max_delay)
108
+
109
+ # Statistics
110
+ self.stats = {
111
+ "total_joined": 0,
112
+ "late_arrivals": 0,
113
+ "dropped_late": 0,
114
+ }
115
+
116
+ def add_left_stream(self, data: list[TimeSeriesData]):
117
+ """Add data to left stream"""
118
+ self.left_buffer.add_batch(data)
119
+
120
+ def add_right_stream(self, data: list[TimeSeriesData]):
121
+ """Add data to right stream"""
122
+ self.right_buffer.add_batch(data)
123
+
124
+ def process(
125
+ self,
126
+ data: list[TimeSeriesData] | None = None,
127
+ left_stream: list[TimeSeriesData] | None = None,
128
+ right_stream: list[TimeSeriesData] | None = None,
129
+ **kwargs,
130
+ ) -> list[tuple[TimeSeriesData, TimeSeriesData]]:
131
+ """
132
+ Process stream join.
133
+
134
+ Args:
135
+ data: Not used (for compatibility)
136
+ left_stream: Data from left stream
137
+ right_stream: Data from right stream
138
+ **kwargs: Additional parameters
139
+
140
+ Returns:
141
+ List of joined data pairs
142
+ """
143
+ # Add data to buffers
144
+ if left_stream:
145
+ self.add_left_stream(left_stream)
146
+ if right_stream:
147
+ self.add_right_stream(right_stream)
148
+
149
+ # Get ready data from both buffers
150
+ left_ready = self.left_buffer.get_ready_data()
151
+ right_ready = self.right_buffer.get_ready_data()
152
+
153
+ # Perform join
154
+ joined = self._join_data(left_ready, right_ready)
155
+
156
+ # Update statistics
157
+ self.stats["total_joined"] += len(joined)
158
+
159
+ return joined
160
+
161
+ def _join_data(
162
+ self, left_data: list[TimeSeriesData], right_data: list[TimeSeriesData]
163
+ ) -> list[tuple[TimeSeriesData, TimeSeriesData]]:
164
+ """
165
+ Join data from two streams.
166
+
167
+ Args:
168
+ left_data: Data from left stream
169
+ right_data: Data from right stream
170
+
171
+ Returns:
172
+ List of joined pairs
173
+ """
174
+ joined = []
175
+
176
+ # If join key is specified, use hash join
177
+ if self.join_key:
178
+ joined = self._hash_join(left_data, right_data)
179
+ else:
180
+ # Use nested loop join with window condition
181
+ joined = self._nested_loop_join(left_data, right_data)
182
+
183
+ return joined
184
+
185
+ def _hash_join(
186
+ self, left_data: list[TimeSeriesData], right_data: list[TimeSeriesData]
187
+ ) -> list[tuple[TimeSeriesData, TimeSeriesData]]:
188
+ """Hash join on specified key"""
189
+ joined = []
190
+
191
+ # Build hash table for right stream
192
+ right_hash: dict[str, list[TimeSeriesData]] = defaultdict(list)
193
+ for right in right_data:
194
+ key_value = right.tags.get(self.join_key) if self.join_key else None
195
+ if key_value:
196
+ right_hash[key_value].append(right)
197
+
198
+ # Probe with left stream
199
+ for left in left_data:
200
+ key_value = left.tags.get(self.join_key) if self.join_key else None
201
+ if key_value and key_value in right_hash:
202
+ for right in right_hash[key_value]:
203
+ # Check window condition
204
+ if abs(left.timestamp - right.timestamp) <= self.window_size:
205
+ # Check custom predicate if provided
206
+ if self.join_predicate is None or self.join_predicate(left, right):
207
+ joined.append((left, right))
208
+
209
+ return joined
210
+
211
+ def _nested_loop_join(
212
+ self, left_data: list[TimeSeriesData], right_data: list[TimeSeriesData]
213
+ ) -> list[tuple[TimeSeriesData, TimeSeriesData]]:
214
+ """Nested loop join with window condition"""
215
+ joined = []
216
+
217
+ for left in left_data:
218
+ for right in right_data:
219
+ # Check window condition
220
+ if abs(left.timestamp - right.timestamp) <= self.window_size:
221
+ # Check custom predicate if provided
222
+ if self.join_predicate is None or self.join_predicate(left, right):
223
+ joined.append((left, right))
224
+
225
+ return joined
226
+
227
+ def reset(self):
228
+ """Reset algorithm state"""
229
+ self.left_buffer = StreamBuffer(self.max_delay)
230
+ self.right_buffer = StreamBuffer(self.max_delay)
231
+ self.stats = {
232
+ "total_joined": 0,
233
+ "late_arrivals": 0,
234
+ "dropped_late": 0,
235
+ }
236
+
237
+ def get_stats(self) -> dict[str, Any]:
238
+ """Get join statistics"""
239
+ return {
240
+ **self.stats,
241
+ "left_buffer_size": self.left_buffer.size(),
242
+ "right_buffer_size": self.right_buffer.size(),
243
+ "left_watermark": self.left_buffer.watermark,
244
+ "right_watermark": self.right_buffer.watermark,
245
+ }
246
+
247
+
248
+ __all__ = ["OutOfOrderStreamJoin", "JoinConfig", "StreamBuffer"]
@@ -0,0 +1,296 @@
1
+ """
2
+ Window Aggregator Algorithm
3
+
4
+ Provides various windowing strategies for time series aggregation,
5
+ including tumbling, sliding, and session windows.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from enum import Enum
10
+ from typing import Any
11
+
12
+ import numpy as np
13
+
14
+ from ..sage_tsdb import AggregationType, TimeSeriesData
15
+ from .base import TimeSeriesAlgorithm
16
+
17
+
18
+ class WindowType(Enum):
19
+ """Window types for aggregation"""
20
+
21
+ TUMBLING = "tumbling" # Non-overlapping fixed-size windows
22
+ SLIDING = "sliding" # Overlapping fixed-size windows
23
+ SESSION = "session" # Dynamic windows based on inactivity gap
24
+
25
+
26
+ @dataclass
27
+ class WindowConfig:
28
+ """Configuration for windowing"""
29
+
30
+ window_type: WindowType
31
+ window_size: int # milliseconds
32
+ slide_interval: int | None = None # for sliding windows (ms)
33
+ session_gap: int | None = None # for session windows (ms)
34
+ aggregation: AggregationType = AggregationType.AVG
35
+
36
+
37
+ class WindowAggregator(TimeSeriesAlgorithm):
38
+ """
39
+ Window-based aggregation algorithm.
40
+
41
+ Supports multiple windowing strategies:
42
+ - Tumbling windows: Non-overlapping fixed-size windows
43
+ - Sliding windows: Overlapping windows with configurable slide interval
44
+ - Session windows: Dynamic windows based on inactivity gaps
45
+
46
+ Features:
47
+ - Multiple aggregation functions (sum, avg, min, max, count, etc.)
48
+ - Efficient incremental computation
49
+ - Support for late data handling
50
+ """
51
+
52
+ def __init__(self, config: dict[str, Any] | None = None):
53
+ """
54
+ Initialize window aggregator.
55
+
56
+ Args:
57
+ config: Configuration dictionary with:
58
+ - window_type: Type of window (tumbling/sliding/session)
59
+ - window_size: Window size in milliseconds
60
+ - slide_interval: Slide interval for sliding windows (ms)
61
+ - session_gap: Inactivity gap for session windows (ms)
62
+ - aggregation: Aggregation function to apply
63
+ """
64
+ super().__init__(config)
65
+
66
+ window_type_str = self.config.get("window_type", "tumbling")
67
+ self.window_type = WindowType(window_type_str)
68
+ self.window_size = self.config.get("window_size", 60000) # 1 minute
69
+ self.slide_interval = self.config.get("slide_interval", self.window_size)
70
+ self.session_gap = self.config.get("session_gap", 30000) # 30 seconds
71
+
72
+ agg_str = self.config.get("aggregation", "avg")
73
+ if isinstance(agg_str, str):
74
+ self.aggregation = AggregationType(agg_str)
75
+ else:
76
+ self.aggregation = agg_str
77
+
78
+ # State for incremental processing
79
+ self.windows: dict[int, list[TimeSeriesData]] = {}
80
+ self.stats = {
81
+ "windows_created": 0,
82
+ "windows_completed": 0,
83
+ "data_points_processed": 0,
84
+ }
85
+
86
+ def process(self, data: list[TimeSeriesData], **kwargs) -> list[TimeSeriesData]:
87
+ """
88
+ Process time series data with windowing.
89
+
90
+ Args:
91
+ data: Input time series data points
92
+ **kwargs: Additional parameters
93
+
94
+ Returns:
95
+ Aggregated time series data (one point per window)
96
+ """
97
+ if not data:
98
+ return []
99
+
100
+ # Sort data by timestamp
101
+ sorted_data = sorted(data, key=lambda x: x.timestamp)
102
+
103
+ # Apply windowing based on type
104
+ if self.window_type == WindowType.TUMBLING:
105
+ return self._tumbling_window(sorted_data)
106
+ elif self.window_type == WindowType.SLIDING:
107
+ return self._sliding_window(sorted_data)
108
+ elif self.window_type == WindowType.SESSION:
109
+ return self._session_window(sorted_data)
110
+
111
+ return []
112
+
113
+ def _tumbling_window(self, data: list[TimeSeriesData]) -> list[TimeSeriesData]:
114
+ """Process with tumbling windows"""
115
+ if not data:
116
+ return []
117
+
118
+ results = []
119
+ window_start = self._align_to_window(data[0].timestamp)
120
+ window_data = []
121
+
122
+ for point in data:
123
+ window_key = self._get_window_key(point.timestamp, window_start)
124
+
125
+ # Check if point belongs to current window
126
+ if window_key == window_start:
127
+ window_data.append(point)
128
+ else:
129
+ # Complete current window
130
+ if window_data:
131
+ agg_point = self._aggregate_window(window_data, window_start)
132
+ results.append(agg_point)
133
+ self.stats["windows_completed"] += 1
134
+
135
+ # Start new window(s)
136
+ # Handle potential gaps
137
+ while window_key > window_start:
138
+ window_start += self.window_size
139
+
140
+ window_data = [point]
141
+ self.stats["windows_created"] += 1
142
+
143
+ # Complete last window
144
+ if window_data:
145
+ agg_point = self._aggregate_window(window_data, window_start)
146
+ results.append(agg_point)
147
+ self.stats["windows_completed"] += 1
148
+
149
+ self.stats["data_points_processed"] += len(data)
150
+ return results
151
+
152
+ def _sliding_window(self, data: list[TimeSeriesData]) -> list[TimeSeriesData]:
153
+ """Process with sliding windows"""
154
+ if not data:
155
+ return []
156
+
157
+ results = []
158
+
159
+ # Get first window start
160
+ first_timestamp = data[0].timestamp
161
+ window_start = self._align_to_window(first_timestamp)
162
+
163
+ # Create windows until we've covered all data
164
+ last_timestamp = data[-1].timestamp
165
+
166
+ while window_start <= last_timestamp:
167
+ window_end = window_start + self.window_size
168
+
169
+ # Get data points in this window
170
+ window_data = [point for point in data if window_start <= point.timestamp < window_end]
171
+
172
+ if window_data:
173
+ agg_point = self._aggregate_window(window_data, window_start)
174
+ results.append(agg_point)
175
+ self.stats["windows_completed"] += 1
176
+
177
+ # Slide to next window
178
+ window_start += self.slide_interval
179
+ self.stats["windows_created"] += 1
180
+
181
+ self.stats["data_points_processed"] += len(data)
182
+ return results
183
+
184
+ def _session_window(self, data: list[TimeSeriesData]) -> list[TimeSeriesData]:
185
+ """Process with session windows"""
186
+ if not data:
187
+ return []
188
+
189
+ results = []
190
+ session_data = []
191
+ last_timestamp = data[0].timestamp
192
+ session_start = data[0].timestamp
193
+
194
+ for point in data:
195
+ # Check if point is within session gap
196
+ if point.timestamp - last_timestamp <= self.session_gap:
197
+ session_data.append(point)
198
+ else:
199
+ # Complete current session
200
+ if session_data:
201
+ agg_point = self._aggregate_window(session_data, session_start)
202
+ results.append(agg_point)
203
+ self.stats["windows_completed"] += 1
204
+
205
+ # Start new session
206
+ session_data = [point]
207
+ session_start = point.timestamp
208
+ self.stats["windows_created"] += 1
209
+
210
+ last_timestamp = point.timestamp
211
+
212
+ # Complete last session
213
+ if session_data:
214
+ agg_point = self._aggregate_window(session_data, session_start)
215
+ results.append(agg_point)
216
+ self.stats["windows_completed"] += 1
217
+
218
+ self.stats["data_points_processed"] += len(data)
219
+ return results
220
+
221
+ def _align_to_window(self, timestamp: int) -> int:
222
+ """Align timestamp to window boundary"""
223
+ return (timestamp // self.window_size) * self.window_size
224
+
225
+ def _get_window_key(self, timestamp: int, reference: int) -> int:
226
+ """Get window key for timestamp"""
227
+ return self._align_to_window(timestamp)
228
+
229
+ def _aggregate_window(
230
+ self, data: list[TimeSeriesData], window_timestamp: int
231
+ ) -> TimeSeriesData:
232
+ """Aggregate data in a window"""
233
+ if not data:
234
+ return TimeSeriesData(timestamp=window_timestamp, value=0.0)
235
+
236
+ # Extract values
237
+ values = []
238
+ for point in data:
239
+ # Flatten arrays/lists, append scalars
240
+ if isinstance(point.value, (list, np.ndarray)):
241
+ # Use np.ravel to flatten, then convert to list and extend
242
+ values.extend(np.ravel(point.value).tolist())
243
+ else:
244
+ values.append(point.value)
245
+
246
+ # Apply aggregation
247
+ if self.aggregation == AggregationType.SUM:
248
+ agg_value = sum(values)
249
+ elif self.aggregation == AggregationType.AVG:
250
+ agg_value = sum(values) / len(values)
251
+ elif self.aggregation == AggregationType.MIN:
252
+ agg_value = min(values)
253
+ elif self.aggregation == AggregationType.MAX:
254
+ agg_value = max(values)
255
+ elif self.aggregation == AggregationType.COUNT:
256
+ agg_value = len(values)
257
+ elif self.aggregation == AggregationType.FIRST:
258
+ agg_value = values[0]
259
+ elif self.aggregation == AggregationType.LAST:
260
+ agg_value = values[-1]
261
+ elif self.aggregation == AggregationType.STDDEV:
262
+ agg_value = float(np.std(values))
263
+ else:
264
+ agg_value = sum(values) / len(values)
265
+
266
+ # Merge tags from all data points
267
+ merged_tags = {}
268
+ for point in data:
269
+ if point.tags:
270
+ merged_tags.update(point.tags)
271
+
272
+ return TimeSeriesData(
273
+ timestamp=window_timestamp,
274
+ value=agg_value,
275
+ tags=merged_tags,
276
+ fields={"window_size": len(data), "aggregation": self.aggregation.value},
277
+ )
278
+
279
+ def reset(self):
280
+ """Reset algorithm state"""
281
+ self.windows = {}
282
+ self.stats = {
283
+ "windows_created": 0,
284
+ "windows_completed": 0,
285
+ "data_points_processed": 0,
286
+ }
287
+
288
+ def get_stats(self) -> dict[str, Any]:
289
+ """Get aggregator statistics"""
290
+ return {
291
+ **self.stats,
292
+ "active_windows": len(self.windows),
293
+ }
294
+
295
+
296
+ __all__ = ["WindowAggregator", "WindowType", "WindowConfig"]
@@ -0,0 +1,7 @@
1
+ """
2
+ Micro-service module for SageTSDB
3
+ """
4
+
5
+ from .sage_tsdb_service import SageTSDBService, SageTSDBServiceConfig
6
+
7
+ __all__ = ["SageTSDBService", "SageTSDBServiceConfig"]