nodeqmindmap 2.2.0__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/PKG-INFO +1 -1
- nodeqmindmap-2.3.0/nodeqmindmap/pipeline.py +415 -0
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/nodeqmindmap.egg-info/PKG-INFO +1 -1
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/pyproject.toml +1 -1
- nodeqmindmap-2.2.0/nodeqmindmap/pipeline.py +0 -198
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/README.md +0 -0
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/nodeqmindmap/__init__.py +0 -0
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/nodeqmindmap/adapter.py +0 -0
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/nodeqmindmap/types.py +0 -0
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/nodeqmindmap.egg-info/SOURCES.txt +0 -0
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/nodeqmindmap.egg-info/dependency_links.txt +0 -0
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/nodeqmindmap.egg-info/top_level.txt +0 -0
- {nodeqmindmap-2.2.0 → nodeqmindmap-2.3.0}/setup.cfg +0 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
"""PipelineEngine — mirrors pipeline-engine.ts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any, Callable
|
|
8
|
+
|
|
9
|
+
from .types import (
|
|
10
|
+
DataSample,
|
|
11
|
+
ExecutionResult,
|
|
12
|
+
PipelineConfig,
|
|
13
|
+
PipelinePerformance,
|
|
14
|
+
PipelineStats,
|
|
15
|
+
TransformationRule,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# ── Schema analysis helpers ──────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
def _flatten_object(obj: Any, prefix: str = "") -> dict[str, Any]:
|
|
21
|
+
"""Flatten a nested dict into dot-notation paths."""
|
|
22
|
+
out: dict[str, Any] = {}
|
|
23
|
+
if not isinstance(obj, dict):
|
|
24
|
+
return out
|
|
25
|
+
for k, v in obj.items():
|
|
26
|
+
key = f"{prefix}.{k}" if prefix else k
|
|
27
|
+
if isinstance(v, dict) and v is not None:
|
|
28
|
+
out.update(_flatten_object(v, key))
|
|
29
|
+
else:
|
|
30
|
+
out[key] = v
|
|
31
|
+
return out
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _normalise(s: str) -> str:
|
|
35
|
+
"""Strip separators and lowercase for comparison."""
|
|
36
|
+
return s.replace("_", "").replace("-", "").replace(" ", "").lower()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _to_camel_case(s: str) -> str:
|
|
40
|
+
parts = s.replace("-", "_").split("_")
|
|
41
|
+
return parts[0] + "".join(p.capitalize() for p in parts[1:])
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _field_name_similarity(a: str, b: str) -> float:
|
|
45
|
+
na, nb = _normalise(a), _normalise(b)
|
|
46
|
+
if na == nb:
|
|
47
|
+
return 1.0
|
|
48
|
+
if _to_camel_case(a).lower() == b.lower():
|
|
49
|
+
return 0.95
|
|
50
|
+
if _to_camel_case(b).lower() == a.lower():
|
|
51
|
+
return 0.95
|
|
52
|
+
if na in nb or nb in na:
|
|
53
|
+
return 0.7
|
|
54
|
+
# Common prefix score
|
|
55
|
+
common = 0
|
|
56
|
+
for ca, cb in zip(na, nb):
|
|
57
|
+
if ca == cb:
|
|
58
|
+
common += 1
|
|
59
|
+
else:
|
|
60
|
+
break
|
|
61
|
+
max_len = max(len(na), len(nb))
|
|
62
|
+
prefix_score = common / max_len if max_len else 0
|
|
63
|
+
return prefix_score * 0.8 if prefix_score > 0.5 else 0.0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _detect_scaling(in_val: Any, out_val: Any) -> dict[str, Any] | None:
|
|
67
|
+
if not (isinstance(in_val, (int, float)) and isinstance(out_val, (int, float))):
|
|
68
|
+
return None
|
|
69
|
+
if in_val == 0 or out_val == 0:
|
|
70
|
+
return None
|
|
71
|
+
ratio = in_val / out_val
|
|
72
|
+
if abs(ratio - 100) < 0.01:
|
|
73
|
+
return {"type": "divide", "factor": 100}
|
|
74
|
+
if abs(ratio - 1000) < 0.01:
|
|
75
|
+
return {"type": "divide", "factor": 1000}
|
|
76
|
+
if abs(ratio - 0.01) < 0.0001:
|
|
77
|
+
return {"type": "multiply", "factor": 100}
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _generate_rules(
|
|
82
|
+
input_sample: DataSample, output_sample: DataSample
|
|
83
|
+
) -> tuple[list[TransformationRule], float]:
|
|
84
|
+
in_flat = _flatten_object(input_sample.data or {})
|
|
85
|
+
out_flat = _flatten_object(output_sample.data or {})
|
|
86
|
+
|
|
87
|
+
in_keys = list(in_flat.keys())
|
|
88
|
+
out_keys = list(out_flat.keys())
|
|
89
|
+
|
|
90
|
+
rules: list[TransformationRule] = []
|
|
91
|
+
matched_out: set[str] = set()
|
|
92
|
+
|
|
93
|
+
for out_key in out_keys:
|
|
94
|
+
best_in: str | None = None
|
|
95
|
+
best_score = 0.0
|
|
96
|
+
for in_key in in_keys:
|
|
97
|
+
score = _field_name_similarity(in_key, out_key)
|
|
98
|
+
if score > best_score:
|
|
99
|
+
best_score = score
|
|
100
|
+
best_in = in_key
|
|
101
|
+
|
|
102
|
+
if best_in is not None and best_score >= 0.7:
|
|
103
|
+
in_val = in_flat[best_in]
|
|
104
|
+
out_val = out_flat[out_key]
|
|
105
|
+
scaling = _detect_scaling(in_val, out_val)
|
|
106
|
+
|
|
107
|
+
if scaling:
|
|
108
|
+
rule_type = scaling["type"]
|
|
109
|
+
factor = scaling.get("factor")
|
|
110
|
+
elif best_score < 1.0 and best_score >= 0.9:
|
|
111
|
+
rule_type = "rename"
|
|
112
|
+
factor = None
|
|
113
|
+
elif type(in_val) is not type(out_val) and not (
|
|
114
|
+
isinstance(in_val, (int, float)) and isinstance(out_val, (int, float))
|
|
115
|
+
):
|
|
116
|
+
rule_type = "convert"
|
|
117
|
+
factor = None
|
|
118
|
+
else:
|
|
119
|
+
rule_type = "direct"
|
|
120
|
+
factor = None
|
|
121
|
+
|
|
122
|
+
rules.append(
|
|
123
|
+
TransformationRule(
|
|
124
|
+
source_field=best_in,
|
|
125
|
+
target_field=out_key,
|
|
126
|
+
type=rule_type,
|
|
127
|
+
confidence=best_score,
|
|
128
|
+
factor=factor,
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
matched_out.add(out_key)
|
|
132
|
+
|
|
133
|
+
# Unmatched outputs → constant
|
|
134
|
+
for out_key in out_keys:
|
|
135
|
+
if out_key not in matched_out:
|
|
136
|
+
rules.append(
|
|
137
|
+
TransformationRule(
|
|
138
|
+
source_field="",
|
|
139
|
+
target_field=out_key,
|
|
140
|
+
type="constant",
|
|
141
|
+
confidence=0.4,
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
accuracy = (
|
|
146
|
+
min(sum(r.confidence for r in rules) / len(rules), 0.99) if rules else 0.0
|
|
147
|
+
)
|
|
148
|
+
return rules, accuracy
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _apply_rules(input_data: Any, rules: list[TransformationRule]) -> dict[str, Any]:
|
|
152
|
+
in_flat = _flatten_object(input_data or {})
|
|
153
|
+
out: dict[str, Any] = {}
|
|
154
|
+
|
|
155
|
+
for rule in rules:
|
|
156
|
+
if rule.type == "constant":
|
|
157
|
+
_set_nested(out, rule.target_field, None)
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
value = in_flat.get(rule.source_field)
|
|
161
|
+
if value is None:
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
if rule.type == "divide" and rule.factor and isinstance(value, (int, float)):
|
|
165
|
+
value = value / rule.factor
|
|
166
|
+
elif rule.type == "multiply" and rule.factor and isinstance(value, (int, float)):
|
|
167
|
+
value = value * rule.factor
|
|
168
|
+
elif rule.type == "convert":
|
|
169
|
+
value = str(value)
|
|
170
|
+
|
|
171
|
+
_set_nested(out, rule.target_field, value)
|
|
172
|
+
|
|
173
|
+
return out
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _set_nested(obj: dict[str, Any], key: str, value: Any) -> None:
|
|
177
|
+
"""Write a dot-notation key into a nested dict."""
|
|
178
|
+
parts = key.split(".")
|
|
179
|
+
cursor = obj
|
|
180
|
+
for part in parts[:-1]:
|
|
181
|
+
if part not in cursor or not isinstance(cursor[part], dict):
|
|
182
|
+
cursor[part] = {}
|
|
183
|
+
cursor = cursor[part]
|
|
184
|
+
cursor[parts[-1]] = value
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# ── PipelineEngine ────────────────────────────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
class PipelineEngine:
|
|
190
|
+
"""Manages in-memory ETL pipelines."""
|
|
191
|
+
|
|
192
|
+
def __init__(self) -> None:
|
|
193
|
+
self._pipelines: dict[str, PipelineConfig] = {}
|
|
194
|
+
self._next_id = 1
|
|
195
|
+
self._lock = threading.Lock()
|
|
196
|
+
|
|
197
|
+
def create_pipeline(
|
|
198
|
+
self,
|
|
199
|
+
name: str,
|
|
200
|
+
input_sample: DataSample,
|
|
201
|
+
output_sample: DataSample,
|
|
202
|
+
options: dict[str, Any] | None = None,
|
|
203
|
+
) -> PipelineConfig:
|
|
204
|
+
options = options or {}
|
|
205
|
+
rules, accuracy = _generate_rules(input_sample, output_sample)
|
|
206
|
+
|
|
207
|
+
with self._lock:
|
|
208
|
+
pipeline_id = f"pipeline_{self._next_id}"
|
|
209
|
+
self._next_id += 1
|
|
210
|
+
|
|
211
|
+
pipeline = PipelineConfig(
|
|
212
|
+
id=pipeline_id,
|
|
213
|
+
name=name,
|
|
214
|
+
input_sample=input_sample,
|
|
215
|
+
output_sample=output_sample,
|
|
216
|
+
transformation_rules=rules,
|
|
217
|
+
model_config=options.get("model_config") or {"type": "auto"},
|
|
218
|
+
accuracy=accuracy,
|
|
219
|
+
version="1.0.0",
|
|
220
|
+
created_at=datetime.now(),
|
|
221
|
+
data_sources=options.get("data_sources") or [],
|
|
222
|
+
etl_config=options.get("etl_options") or {},
|
|
223
|
+
performance=PipelinePerformance(
|
|
224
|
+
throughput=1000, latency=5, error_rate=1 - accuracy
|
|
225
|
+
),
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
with self._lock:
|
|
229
|
+
self._pipelines[pipeline_id] = pipeline
|
|
230
|
+
|
|
231
|
+
return pipeline
|
|
232
|
+
|
|
233
|
+
def update_pipeline(
|
|
234
|
+
self,
|
|
235
|
+
pipeline_id: str,
|
|
236
|
+
input_sample: DataSample | None = None,
|
|
237
|
+
output_sample: DataSample | None = None,
|
|
238
|
+
) -> PipelineConfig:
|
|
239
|
+
with self._lock:
|
|
240
|
+
pipeline = self._pipelines.get(pipeline_id)
|
|
241
|
+
if pipeline is None:
|
|
242
|
+
raise ValueError(f"Pipeline {pipeline_id} not found")
|
|
243
|
+
if input_sample is not None:
|
|
244
|
+
pipeline.input_sample = input_sample
|
|
245
|
+
if output_sample is not None:
|
|
246
|
+
pipeline.output_sample = output_sample
|
|
247
|
+
if input_sample is not None or output_sample is not None:
|
|
248
|
+
rules, accuracy = _generate_rules(pipeline.input_sample, pipeline.output_sample)
|
|
249
|
+
pipeline.transformation_rules = rules
|
|
250
|
+
pipeline.accuracy = accuracy
|
|
251
|
+
pipeline.performance.error_rate = 1 - accuracy
|
|
252
|
+
return pipeline
|
|
253
|
+
|
|
254
|
+
def execute_pipeline(self, pipeline_id: str, input_data: Any) -> ExecutionResult:
|
|
255
|
+
with self._lock:
|
|
256
|
+
pipeline = self._pipelines.get(pipeline_id)
|
|
257
|
+
if pipeline is None:
|
|
258
|
+
raise ValueError(f"Pipeline {pipeline_id} not found")
|
|
259
|
+
|
|
260
|
+
transformed = _apply_rules(input_data, pipeline.transformation_rules)
|
|
261
|
+
return ExecutionResult(
|
|
262
|
+
processed=True,
|
|
263
|
+
data=transformed,
|
|
264
|
+
timestamp=datetime.now(),
|
|
265
|
+
pipeline_id=pipeline_id,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def get_pipeline(self, pipeline_id: str) -> PipelineConfig | None:
|
|
269
|
+
with self._lock:
|
|
270
|
+
return self._pipelines.get(pipeline_id)
|
|
271
|
+
|
|
272
|
+
def get_all_pipelines(self) -> list[PipelineConfig]:
|
|
273
|
+
with self._lock:
|
|
274
|
+
return list(self._pipelines.values())
|
|
275
|
+
|
|
276
|
+
def get_pipeline_stats(self, pipeline_id: str) -> PipelineStats:
|
|
277
|
+
with self._lock:
|
|
278
|
+
pipeline = self._pipelines.get(pipeline_id)
|
|
279
|
+
if pipeline is None:
|
|
280
|
+
raise ValueError(f"Pipeline {pipeline_id} not found")
|
|
281
|
+
return PipelineStats(
|
|
282
|
+
id=pipeline.id,
|
|
283
|
+
name=pipeline.name,
|
|
284
|
+
performance=pipeline.performance,
|
|
285
|
+
version=pipeline.version,
|
|
286
|
+
last_executed=datetime.now(),
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
def generate_pipeline_code(self, pipeline_id: str) -> str:
|
|
290
|
+
with self._lock:
|
|
291
|
+
pipeline = self._pipelines.get(pipeline_id)
|
|
292
|
+
if pipeline is None:
|
|
293
|
+
raise ValueError(f"Pipeline {pipeline_id} not found")
|
|
294
|
+
|
|
295
|
+
fn_name = pipeline.name.replace(" ", "_")
|
|
296
|
+
lines = [
|
|
297
|
+
f"# Auto-generated pipeline: {pipeline.name}",
|
|
298
|
+
f"# Accuracy: {pipeline.accuracy * 100:.1f}% | Rules: {len(pipeline.transformation_rules)}",
|
|
299
|
+
f"def {fn_name}(input_data: dict) -> dict:",
|
|
300
|
+
" out = {}",
|
|
301
|
+
]
|
|
302
|
+
for rule in pipeline.transformation_rules:
|
|
303
|
+
if rule.type == "constant":
|
|
304
|
+
lines.append(f" out['{rule.target_field}'] = None # computed — fill in manually")
|
|
305
|
+
elif rule.type == "divide" and rule.factor:
|
|
306
|
+
lines.append(f" out['{rule.target_field}'] = input_data['{rule.source_field}'] / {rule.factor}")
|
|
307
|
+
elif rule.type == "multiply" and rule.factor:
|
|
308
|
+
lines.append(f" out['{rule.target_field}'] = input_data['{rule.source_field}'] * {rule.factor}")
|
|
309
|
+
elif rule.type == "convert":
|
|
310
|
+
lines.append(f" out['{rule.target_field}'] = str(input_data['{rule.source_field}'])")
|
|
311
|
+
else:
|
|
312
|
+
lines.append(
|
|
313
|
+
f" out['{rule.target_field}'] = input_data['{rule.source_field}']"
|
|
314
|
+
f" # {rule.type} ({int(rule.confidence * 100)}%)"
|
|
315
|
+
)
|
|
316
|
+
lines.append(" return out")
|
|
317
|
+
return "\n".join(lines)
|
|
318
|
+
|
|
319
|
+
def start_realtime_processing(
|
|
320
|
+
self,
|
|
321
|
+
pipeline_id: str,
|
|
322
|
+
on_data: Callable[[ExecutionResult], None],
|
|
323
|
+
interval_ms: int = 1000,
|
|
324
|
+
) -> Callable[[], None]:
|
|
325
|
+
"""Start a background thread that calls on_data repeatedly. Returns a stop function."""
|
|
326
|
+
with self._lock:
|
|
327
|
+
pipeline = self._pipelines.get(pipeline_id)
|
|
328
|
+
if pipeline is None:
|
|
329
|
+
raise ValueError(f"Pipeline {pipeline_id} not found")
|
|
330
|
+
|
|
331
|
+
stop_event = threading.Event()
|
|
332
|
+
|
|
333
|
+
def _run() -> None:
|
|
334
|
+
while not stop_event.wait(interval_ms / 1000):
|
|
335
|
+
result = self.execute_pipeline(pipeline_id, pipeline.input_sample.data)
|
|
336
|
+
on_data(result)
|
|
337
|
+
|
|
338
|
+
thread = threading.Thread(target=_run, daemon=True)
|
|
339
|
+
thread.start()
|
|
340
|
+
return stop_event.set
|
|
341
|
+
|
|
342
|
+
def get_pipeline_execution_mode(self, _pipeline_id: str) -> str:
|
|
343
|
+
return "static"
|
|
344
|
+
|
|
345
|
+
def is_pipeline_static(self, _pipeline_id: str) -> bool:
|
|
346
|
+
return True
|
|
347
|
+
|
|
348
|
+
def pipeline_to_mindmap(self, pipeline: PipelineConfig):
|
|
349
|
+
"""Convert a PipelineConfig to a MindMapNode tree."""
|
|
350
|
+
from .types import MindMapNode
|
|
351
|
+
|
|
352
|
+
mode = self.get_pipeline_execution_mode(pipeline.id)
|
|
353
|
+
is_static = self.is_pipeline_static(pipeline.id)
|
|
354
|
+
mode_label = "Static compiled execution" if is_static else "Dynamic execution"
|
|
355
|
+
|
|
356
|
+
children = [
|
|
357
|
+
MindMapNode(
|
|
358
|
+
topic="Execution Mode",
|
|
359
|
+
summary=mode_label,
|
|
360
|
+
skills=[f"Mode: {mode.upper()}"],
|
|
361
|
+
)
|
|
362
|
+
]
|
|
363
|
+
|
|
364
|
+
if pipeline.data_sources:
|
|
365
|
+
src_children = []
|
|
366
|
+
for src in pipeline.data_sources:
|
|
367
|
+
t = src.get("type", "")
|
|
368
|
+
conn = src.get("connection", {})
|
|
369
|
+
label = conn.get("host") or conn.get("apiEndpoint") or "Local"
|
|
370
|
+
src_children.append(MindMapNode(topic=t, summary=label))
|
|
371
|
+
children.append(
|
|
372
|
+
MindMapNode(
|
|
373
|
+
topic="Data Sources",
|
|
374
|
+
summary=f"{len(pipeline.data_sources)} connected sources",
|
|
375
|
+
children=src_children,
|
|
376
|
+
)
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
children.append(
|
|
380
|
+
MindMapNode(
|
|
381
|
+
topic="Input Schema",
|
|
382
|
+
summary="Data input configuration",
|
|
383
|
+
skills=list(pipeline.input_sample.schema.keys()),
|
|
384
|
+
)
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
rule_children = [
|
|
388
|
+
MindMapNode(
|
|
389
|
+
topic=f"{r.source_field} → {r.target_field}",
|
|
390
|
+
summary=f"{r.type} ({int(r.confidence * 100)}%)",
|
|
391
|
+
)
|
|
392
|
+
for r in pipeline.transformation_rules
|
|
393
|
+
]
|
|
394
|
+
children.append(
|
|
395
|
+
MindMapNode(
|
|
396
|
+
topic="Transformations",
|
|
397
|
+
summary=f"{len(pipeline.transformation_rules)} rules",
|
|
398
|
+
children=rule_children,
|
|
399
|
+
)
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
children.append(
|
|
403
|
+
MindMapNode(
|
|
404
|
+
topic="Output Schema",
|
|
405
|
+
summary="Data output configuration",
|
|
406
|
+
skills=list(pipeline.output_sample.schema.keys()),
|
|
407
|
+
)
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
return MindMapNode(
|
|
411
|
+
topic=pipeline.name,
|
|
412
|
+
summary=f"ETL Pipeline - Accuracy: {pipeline.accuracy * 100:.1f}%",
|
|
413
|
+
skills=[f"Version: {pipeline.version}"],
|
|
414
|
+
children=children,
|
|
415
|
+
)
|
|
@@ -1,198 +0,0 @@
|
|
|
1
|
-
"""PipelineEngine — mirrors pipeline-engine.ts."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import threading
|
|
6
|
-
from datetime import datetime
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
from .types import (
|
|
10
|
-
DataSample,
|
|
11
|
-
ExecutionResult,
|
|
12
|
-
PipelineConfig,
|
|
13
|
-
PipelinePerformance,
|
|
14
|
-
PipelineStats,
|
|
15
|
-
TransformationRule,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class PipelineEngine:
|
|
20
|
-
"""Manages in-memory ETL pipelines."""
|
|
21
|
-
|
|
22
|
-
def __init__(self) -> None:
|
|
23
|
-
self._pipelines: dict[str, PipelineConfig] = {}
|
|
24
|
-
self._next_id = 1
|
|
25
|
-
self._lock = threading.Lock()
|
|
26
|
-
|
|
27
|
-
def create_pipeline(
|
|
28
|
-
self,
|
|
29
|
-
name: str,
|
|
30
|
-
input_sample: DataSample,
|
|
31
|
-
output_sample: DataSample,
|
|
32
|
-
options: dict[str, Any] | None = None,
|
|
33
|
-
) -> PipelineConfig:
|
|
34
|
-
options = options or {}
|
|
35
|
-
with self._lock:
|
|
36
|
-
pipeline_id = f"pipeline_{self._next_id}"
|
|
37
|
-
self._next_id += 1
|
|
38
|
-
|
|
39
|
-
model_config: dict[str, Any] = options.get("model_config") or {"type": "auto"}
|
|
40
|
-
data_sources: list[dict[str, Any]] = options.get("data_sources") or []
|
|
41
|
-
etl_config: dict[str, Any] = options.get("etl_options") or {}
|
|
42
|
-
|
|
43
|
-
pipeline = PipelineConfig(
|
|
44
|
-
id=pipeline_id,
|
|
45
|
-
name=name,
|
|
46
|
-
input_sample=input_sample,
|
|
47
|
-
output_sample=output_sample,
|
|
48
|
-
transformation_rules=[],
|
|
49
|
-
model_config=model_config,
|
|
50
|
-
accuracy=0.85,
|
|
51
|
-
version="1.0.0",
|
|
52
|
-
created_at=datetime.now(),
|
|
53
|
-
data_sources=data_sources,
|
|
54
|
-
etl_config=etl_config,
|
|
55
|
-
performance=PipelinePerformance(throughput=100, latency=50, error_rate=0.01),
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
with self._lock:
|
|
59
|
-
self._pipelines[pipeline_id] = pipeline
|
|
60
|
-
|
|
61
|
-
return pipeline
|
|
62
|
-
|
|
63
|
-
def update_pipeline(
|
|
64
|
-
self,
|
|
65
|
-
pipeline_id: str,
|
|
66
|
-
input_sample: DataSample | None = None,
|
|
67
|
-
output_sample: DataSample | None = None,
|
|
68
|
-
) -> PipelineConfig:
|
|
69
|
-
with self._lock:
|
|
70
|
-
pipeline = self._pipelines.get(pipeline_id)
|
|
71
|
-
if pipeline is None:
|
|
72
|
-
raise ValueError(f"Pipeline {pipeline_id} not found")
|
|
73
|
-
if input_sample is not None:
|
|
74
|
-
pipeline.input_sample = input_sample
|
|
75
|
-
if output_sample is not None:
|
|
76
|
-
pipeline.output_sample = output_sample
|
|
77
|
-
return pipeline
|
|
78
|
-
|
|
79
|
-
def execute_pipeline(self, pipeline_id: str, input_data: Any) -> ExecutionResult:
|
|
80
|
-
with self._lock:
|
|
81
|
-
exists = pipeline_id in self._pipelines
|
|
82
|
-
if not exists:
|
|
83
|
-
raise ValueError(f"Pipeline {pipeline_id} not found")
|
|
84
|
-
return ExecutionResult(
|
|
85
|
-
processed=True,
|
|
86
|
-
data=input_data,
|
|
87
|
-
timestamp=datetime.now(),
|
|
88
|
-
pipeline_id=pipeline_id,
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
def get_pipeline(self, pipeline_id: str) -> PipelineConfig | None:
|
|
92
|
-
with self._lock:
|
|
93
|
-
return self._pipelines.get(pipeline_id)
|
|
94
|
-
|
|
95
|
-
def get_all_pipelines(self) -> list[PipelineConfig]:
|
|
96
|
-
with self._lock:
|
|
97
|
-
return list(self._pipelines.values())
|
|
98
|
-
|
|
99
|
-
def get_pipeline_stats(self, pipeline_id: str) -> PipelineStats:
|
|
100
|
-
with self._lock:
|
|
101
|
-
pipeline = self._pipelines.get(pipeline_id)
|
|
102
|
-
if pipeline is None:
|
|
103
|
-
raise ValueError(f"Pipeline {pipeline_id} not found")
|
|
104
|
-
return PipelineStats(
|
|
105
|
-
id=pipeline.id,
|
|
106
|
-
name=pipeline.name,
|
|
107
|
-
performance=pipeline.performance,
|
|
108
|
-
version=pipeline.version,
|
|
109
|
-
last_executed=datetime.now(),
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
def generate_pipeline_code(self, pipeline_id: str) -> str:
|
|
113
|
-
with self._lock:
|
|
114
|
-
pipeline = self._pipelines.get(pipeline_id)
|
|
115
|
-
if pipeline is None:
|
|
116
|
-
raise ValueError(f"Pipeline {pipeline_id} not found")
|
|
117
|
-
fn_name = pipeline.name.replace(" ", "_")
|
|
118
|
-
return (
|
|
119
|
-
f"# Generated pipeline code for {pipeline.name}\n"
|
|
120
|
-
f"def {fn_name}(input_data):\n"
|
|
121
|
-
f" # TODO: add transformation logic\n"
|
|
122
|
-
f" return input_data\n"
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
def get_pipeline_execution_mode(self, _pipeline_id: str) -> str:
|
|
126
|
-
return "static"
|
|
127
|
-
|
|
128
|
-
def is_pipeline_static(self, _pipeline_id: str) -> bool:
|
|
129
|
-
return True
|
|
130
|
-
|
|
131
|
-
def pipeline_to_mindmap(self, pipeline: PipelineConfig):
|
|
132
|
-
"""Convert a PipelineConfig to a MindMapNode tree (import delayed to avoid circular)."""
|
|
133
|
-
from .types import MindMapNode
|
|
134
|
-
|
|
135
|
-
mode = self.get_pipeline_execution_mode(pipeline.id)
|
|
136
|
-
is_static = self.is_pipeline_static(pipeline.id)
|
|
137
|
-
mode_label = "Static compiled execution" if is_static else "Dynamic execution"
|
|
138
|
-
|
|
139
|
-
children = [
|
|
140
|
-
MindMapNode(
|
|
141
|
-
topic="Execution Mode",
|
|
142
|
-
summary=mode_label,
|
|
143
|
-
skills=[f"Mode: {mode.upper()}"],
|
|
144
|
-
)
|
|
145
|
-
]
|
|
146
|
-
|
|
147
|
-
if pipeline.data_sources:
|
|
148
|
-
src_children = []
|
|
149
|
-
for src in pipeline.data_sources:
|
|
150
|
-
t = src.get("type", "")
|
|
151
|
-
conn = src.get("connection", {})
|
|
152
|
-
label = conn.get("host") or conn.get("apiEndpoint") or "Local"
|
|
153
|
-
src_children.append(MindMapNode(topic=t, summary=label))
|
|
154
|
-
children.append(
|
|
155
|
-
MindMapNode(
|
|
156
|
-
topic="Data Sources",
|
|
157
|
-
summary=f"{len(pipeline.data_sources)} connected sources",
|
|
158
|
-
children=src_children,
|
|
159
|
-
)
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
children.append(
|
|
163
|
-
MindMapNode(
|
|
164
|
-
topic="Input Schema",
|
|
165
|
-
summary="Data input configuration",
|
|
166
|
-
skills=list(pipeline.input_sample.schema.keys()),
|
|
167
|
-
)
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
rule_children = [
|
|
171
|
-
MindMapNode(
|
|
172
|
-
topic=f"{r.source_field} → {r.target_field}",
|
|
173
|
-
summary=f"{r.type} ({int(r.confidence * 100)}%)",
|
|
174
|
-
)
|
|
175
|
-
for r in pipeline.transformation_rules
|
|
176
|
-
]
|
|
177
|
-
children.append(
|
|
178
|
-
MindMapNode(
|
|
179
|
-
topic="Transformations",
|
|
180
|
-
summary=f"{len(pipeline.transformation_rules)} rules",
|
|
181
|
-
children=rule_children,
|
|
182
|
-
)
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
children.append(
|
|
186
|
-
MindMapNode(
|
|
187
|
-
topic="Output Schema",
|
|
188
|
-
summary="Data output configuration",
|
|
189
|
-
skills=list(pipeline.output_sample.schema.keys()),
|
|
190
|
-
)
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
return MindMapNode(
|
|
194
|
-
topic=pipeline.name,
|
|
195
|
-
summary=f"ETL Pipeline - Accuracy: {pipeline.accuracy * 100:.1f}%",
|
|
196
|
-
skills=[f"Version: {pipeline.version}"],
|
|
197
|
-
children=children,
|
|
198
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|