nodeqmindmap 2.2.0__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nodeqmindmap
3
- Version: 2.2.0
3
+ Version: 2.3.0
4
4
  Summary: Python port of nodeq-mindmap: MindMapNode data model and ETL pipeline engine
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/workflow-builder/nodeq-mindmap
@@ -0,0 +1,415 @@
1
+ """PipelineEngine — mirrors pipeline-engine.ts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ from datetime import datetime
7
+ from typing import Any, Callable
8
+
9
+ from .types import (
10
+ DataSample,
11
+ ExecutionResult,
12
+ PipelineConfig,
13
+ PipelinePerformance,
14
+ PipelineStats,
15
+ TransformationRule,
16
+ )
17
+
18
+ # ── Schema analysis helpers ──────────────────────────────────────────────────
19
+
20
+ def _flatten_object(obj: Any, prefix: str = "") -> dict[str, Any]:
21
+ """Flatten a nested dict into dot-notation paths."""
22
+ out: dict[str, Any] = {}
23
+ if not isinstance(obj, dict):
24
+ return out
25
+ for k, v in obj.items():
26
+ key = f"{prefix}.{k}" if prefix else k
27
+ if isinstance(v, dict) and v is not None:
28
+ out.update(_flatten_object(v, key))
29
+ else:
30
+ out[key] = v
31
+ return out
32
+
33
+
34
+ def _normalise(s: str) -> str:
35
+ """Strip separators and lowercase for comparison."""
36
+ return s.replace("_", "").replace("-", "").replace(" ", "").lower()
37
+
38
+
39
+ def _to_camel_case(s: str) -> str:
40
+ parts = s.replace("-", "_").split("_")
41
+ return parts[0] + "".join(p.capitalize() for p in parts[1:])
42
+
43
+
44
+ def _field_name_similarity(a: str, b: str) -> float:
45
+ na, nb = _normalise(a), _normalise(b)
46
+ if na == nb:
47
+ return 1.0
48
+ if _to_camel_case(a).lower() == b.lower():
49
+ return 0.95
50
+ if _to_camel_case(b).lower() == a.lower():
51
+ return 0.95
52
+ if na in nb or nb in na:
53
+ return 0.7
54
+ # Common prefix score
55
+ common = 0
56
+ for ca, cb in zip(na, nb):
57
+ if ca == cb:
58
+ common += 1
59
+ else:
60
+ break
61
+ max_len = max(len(na), len(nb))
62
+ prefix_score = common / max_len if max_len else 0
63
+ return prefix_score * 0.8 if prefix_score > 0.5 else 0.0
64
+
65
+
66
+ def _detect_scaling(in_val: Any, out_val: Any) -> dict[str, Any] | None:
67
+ if not (isinstance(in_val, (int, float)) and isinstance(out_val, (int, float))):
68
+ return None
69
+ if in_val == 0 or out_val == 0:
70
+ return None
71
+ ratio = in_val / out_val
72
+ if abs(ratio - 100) < 0.01:
73
+ return {"type": "divide", "factor": 100}
74
+ if abs(ratio - 1000) < 0.01:
75
+ return {"type": "divide", "factor": 1000}
76
+ if abs(ratio - 0.01) < 0.0001:
77
+ return {"type": "multiply", "factor": 100}
78
+ return None
79
+
80
+
81
+ def _generate_rules(
82
+ input_sample: DataSample, output_sample: DataSample
83
+ ) -> tuple[list[TransformationRule], float]:
84
+ in_flat = _flatten_object(input_sample.data or {})
85
+ out_flat = _flatten_object(output_sample.data or {})
86
+
87
+ in_keys = list(in_flat.keys())
88
+ out_keys = list(out_flat.keys())
89
+
90
+ rules: list[TransformationRule] = []
91
+ matched_out: set[str] = set()
92
+
93
+ for out_key in out_keys:
94
+ best_in: str | None = None
95
+ best_score = 0.0
96
+ for in_key in in_keys:
97
+ score = _field_name_similarity(in_key, out_key)
98
+ if score > best_score:
99
+ best_score = score
100
+ best_in = in_key
101
+
102
+ if best_in is not None and best_score >= 0.7:
103
+ in_val = in_flat[best_in]
104
+ out_val = out_flat[out_key]
105
+ scaling = _detect_scaling(in_val, out_val)
106
+
107
+ if scaling:
108
+ rule_type = scaling["type"]
109
+ factor = scaling.get("factor")
110
+ elif best_score < 1.0 and best_score >= 0.9:
111
+ rule_type = "rename"
112
+ factor = None
113
+ elif type(in_val) is not type(out_val) and not (
114
+ isinstance(in_val, (int, float)) and isinstance(out_val, (int, float))
115
+ ):
116
+ rule_type = "convert"
117
+ factor = None
118
+ else:
119
+ rule_type = "direct"
120
+ factor = None
121
+
122
+ rules.append(
123
+ TransformationRule(
124
+ source_field=best_in,
125
+ target_field=out_key,
126
+ type=rule_type,
127
+ confidence=best_score,
128
+ factor=factor,
129
+ )
130
+ )
131
+ matched_out.add(out_key)
132
+
133
+ # Unmatched outputs → constant
134
+ for out_key in out_keys:
135
+ if out_key not in matched_out:
136
+ rules.append(
137
+ TransformationRule(
138
+ source_field="",
139
+ target_field=out_key,
140
+ type="constant",
141
+ confidence=0.4,
142
+ )
143
+ )
144
+
145
+ accuracy = (
146
+ min(sum(r.confidence for r in rules) / len(rules), 0.99) if rules else 0.0
147
+ )
148
+ return rules, accuracy
149
+
150
+
151
+ def _apply_rules(input_data: Any, rules: list[TransformationRule]) -> dict[str, Any]:
152
+ in_flat = _flatten_object(input_data or {})
153
+ out: dict[str, Any] = {}
154
+
155
+ for rule in rules:
156
+ if rule.type == "constant":
157
+ _set_nested(out, rule.target_field, None)
158
+ continue
159
+
160
+ value = in_flat.get(rule.source_field)
161
+ if value is None:
162
+ continue
163
+
164
+ if rule.type == "divide" and rule.factor and isinstance(value, (int, float)):
165
+ value = value / rule.factor
166
+ elif rule.type == "multiply" and rule.factor and isinstance(value, (int, float)):
167
+ value = value * rule.factor
168
+ elif rule.type == "convert":
169
+ value = str(value)
170
+
171
+ _set_nested(out, rule.target_field, value)
172
+
173
+ return out
174
+
175
+
176
+ def _set_nested(obj: dict[str, Any], key: str, value: Any) -> None:
177
+ """Write a dot-notation key into a nested dict."""
178
+ parts = key.split(".")
179
+ cursor = obj
180
+ for part in parts[:-1]:
181
+ if part not in cursor or not isinstance(cursor[part], dict):
182
+ cursor[part] = {}
183
+ cursor = cursor[part]
184
+ cursor[parts[-1]] = value
185
+
186
+
187
+ # ── PipelineEngine ────────────────────────────────────────────────────────────
188
+
189
+ class PipelineEngine:
190
+ """Manages in-memory ETL pipelines."""
191
+
192
+ def __init__(self) -> None:
193
+ self._pipelines: dict[str, PipelineConfig] = {}
194
+ self._next_id = 1
195
+ self._lock = threading.Lock()
196
+
197
+ def create_pipeline(
198
+ self,
199
+ name: str,
200
+ input_sample: DataSample,
201
+ output_sample: DataSample,
202
+ options: dict[str, Any] | None = None,
203
+ ) -> PipelineConfig:
204
+ options = options or {}
205
+ rules, accuracy = _generate_rules(input_sample, output_sample)
206
+
207
+ with self._lock:
208
+ pipeline_id = f"pipeline_{self._next_id}"
209
+ self._next_id += 1
210
+
211
+ pipeline = PipelineConfig(
212
+ id=pipeline_id,
213
+ name=name,
214
+ input_sample=input_sample,
215
+ output_sample=output_sample,
216
+ transformation_rules=rules,
217
+ model_config=options.get("model_config") or {"type": "auto"},
218
+ accuracy=accuracy,
219
+ version="1.0.0",
220
+ created_at=datetime.now(),
221
+ data_sources=options.get("data_sources") or [],
222
+ etl_config=options.get("etl_options") or {},
223
+ performance=PipelinePerformance(
224
+ throughput=1000, latency=5, error_rate=1 - accuracy
225
+ ),
226
+ )
227
+
228
+ with self._lock:
229
+ self._pipelines[pipeline_id] = pipeline
230
+
231
+ return pipeline
232
+
233
+ def update_pipeline(
234
+ self,
235
+ pipeline_id: str,
236
+ input_sample: DataSample | None = None,
237
+ output_sample: DataSample | None = None,
238
+ ) -> PipelineConfig:
239
+ with self._lock:
240
+ pipeline = self._pipelines.get(pipeline_id)
241
+ if pipeline is None:
242
+ raise ValueError(f"Pipeline {pipeline_id} not found")
243
+ if input_sample is not None:
244
+ pipeline.input_sample = input_sample
245
+ if output_sample is not None:
246
+ pipeline.output_sample = output_sample
247
+ if input_sample is not None or output_sample is not None:
248
+ rules, accuracy = _generate_rules(pipeline.input_sample, pipeline.output_sample)
249
+ pipeline.transformation_rules = rules
250
+ pipeline.accuracy = accuracy
251
+ pipeline.performance.error_rate = 1 - accuracy
252
+ return pipeline
253
+
254
+ def execute_pipeline(self, pipeline_id: str, input_data: Any) -> ExecutionResult:
255
+ with self._lock:
256
+ pipeline = self._pipelines.get(pipeline_id)
257
+ if pipeline is None:
258
+ raise ValueError(f"Pipeline {pipeline_id} not found")
259
+
260
+ transformed = _apply_rules(input_data, pipeline.transformation_rules)
261
+ return ExecutionResult(
262
+ processed=True,
263
+ data=transformed,
264
+ timestamp=datetime.now(),
265
+ pipeline_id=pipeline_id,
266
+ )
267
+
268
+ def get_pipeline(self, pipeline_id: str) -> PipelineConfig | None:
269
+ with self._lock:
270
+ return self._pipelines.get(pipeline_id)
271
+
272
+ def get_all_pipelines(self) -> list[PipelineConfig]:
273
+ with self._lock:
274
+ return list(self._pipelines.values())
275
+
276
+ def get_pipeline_stats(self, pipeline_id: str) -> PipelineStats:
277
+ with self._lock:
278
+ pipeline = self._pipelines.get(pipeline_id)
279
+ if pipeline is None:
280
+ raise ValueError(f"Pipeline {pipeline_id} not found")
281
+ return PipelineStats(
282
+ id=pipeline.id,
283
+ name=pipeline.name,
284
+ performance=pipeline.performance,
285
+ version=pipeline.version,
286
+ last_executed=datetime.now(),
287
+ )
288
+
289
+ def generate_pipeline_code(self, pipeline_id: str) -> str:
290
+ with self._lock:
291
+ pipeline = self._pipelines.get(pipeline_id)
292
+ if pipeline is None:
293
+ raise ValueError(f"Pipeline {pipeline_id} not found")
294
+
295
+ fn_name = pipeline.name.replace(" ", "_")
296
+ lines = [
297
+ f"# Auto-generated pipeline: {pipeline.name}",
298
+ f"# Accuracy: {pipeline.accuracy * 100:.1f}% | Rules: {len(pipeline.transformation_rules)}",
299
+ f"def {fn_name}(input_data: dict) -> dict:",
300
+ " out = {}",
301
+ ]
302
+ for rule in pipeline.transformation_rules:
303
+ if rule.type == "constant":
304
+ lines.append(f" out['{rule.target_field}'] = None # computed — fill in manually")
305
+ elif rule.type == "divide" and rule.factor:
306
+ lines.append(f" out['{rule.target_field}'] = input_data['{rule.source_field}'] / {rule.factor}")
307
+ elif rule.type == "multiply" and rule.factor:
308
+ lines.append(f" out['{rule.target_field}'] = input_data['{rule.source_field}'] * {rule.factor}")
309
+ elif rule.type == "convert":
310
+ lines.append(f" out['{rule.target_field}'] = str(input_data['{rule.source_field}'])")
311
+ else:
312
+ lines.append(
313
+ f" out['{rule.target_field}'] = input_data['{rule.source_field}']"
314
+ f" # {rule.type} ({int(rule.confidence * 100)}%)"
315
+ )
316
+ lines.append(" return out")
317
+ return "\n".join(lines)
318
+
319
+ def start_realtime_processing(
320
+ self,
321
+ pipeline_id: str,
322
+ on_data: Callable[[ExecutionResult], None],
323
+ interval_ms: int = 1000,
324
+ ) -> Callable[[], None]:
325
+ """Start a background thread that calls on_data repeatedly. Returns a stop function."""
326
+ with self._lock:
327
+ pipeline = self._pipelines.get(pipeline_id)
328
+ if pipeline is None:
329
+ raise ValueError(f"Pipeline {pipeline_id} not found")
330
+
331
+ stop_event = threading.Event()
332
+
333
+ def _run() -> None:
334
+ while not stop_event.wait(interval_ms / 1000):
335
+ result = self.execute_pipeline(pipeline_id, pipeline.input_sample.data)
336
+ on_data(result)
337
+
338
+ thread = threading.Thread(target=_run, daemon=True)
339
+ thread.start()
340
+ return stop_event.set
341
+
342
+ def get_pipeline_execution_mode(self, _pipeline_id: str) -> str:
343
+ return "static"
344
+
345
+ def is_pipeline_static(self, _pipeline_id: str) -> bool:
346
+ return True
347
+
348
+ def pipeline_to_mindmap(self, pipeline: PipelineConfig):
349
+ """Convert a PipelineConfig to a MindMapNode tree."""
350
+ from .types import MindMapNode
351
+
352
+ mode = self.get_pipeline_execution_mode(pipeline.id)
353
+ is_static = self.is_pipeline_static(pipeline.id)
354
+ mode_label = "Static compiled execution" if is_static else "Dynamic execution"
355
+
356
+ children = [
357
+ MindMapNode(
358
+ topic="Execution Mode",
359
+ summary=mode_label,
360
+ skills=[f"Mode: {mode.upper()}"],
361
+ )
362
+ ]
363
+
364
+ if pipeline.data_sources:
365
+ src_children = []
366
+ for src in pipeline.data_sources:
367
+ t = src.get("type", "")
368
+ conn = src.get("connection", {})
369
+ label = conn.get("host") or conn.get("apiEndpoint") or "Local"
370
+ src_children.append(MindMapNode(topic=t, summary=label))
371
+ children.append(
372
+ MindMapNode(
373
+ topic="Data Sources",
374
+ summary=f"{len(pipeline.data_sources)} connected sources",
375
+ children=src_children,
376
+ )
377
+ )
378
+
379
+ children.append(
380
+ MindMapNode(
381
+ topic="Input Schema",
382
+ summary="Data input configuration",
383
+ skills=list(pipeline.input_sample.schema.keys()),
384
+ )
385
+ )
386
+
387
+ rule_children = [
388
+ MindMapNode(
389
+ topic=f"{r.source_field} → {r.target_field}",
390
+ summary=f"{r.type} ({int(r.confidence * 100)}%)",
391
+ )
392
+ for r in pipeline.transformation_rules
393
+ ]
394
+ children.append(
395
+ MindMapNode(
396
+ topic="Transformations",
397
+ summary=f"{len(pipeline.transformation_rules)} rules",
398
+ children=rule_children,
399
+ )
400
+ )
401
+
402
+ children.append(
403
+ MindMapNode(
404
+ topic="Output Schema",
405
+ summary="Data output configuration",
406
+ skills=list(pipeline.output_sample.schema.keys()),
407
+ )
408
+ )
409
+
410
+ return MindMapNode(
411
+ topic=pipeline.name,
412
+ summary=f"ETL Pipeline - Accuracy: {pipeline.accuracy * 100:.1f}%",
413
+ skills=[f"Version: {pipeline.version}"],
414
+ children=children,
415
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nodeqmindmap
3
- Version: 2.2.0
3
+ Version: 2.3.0
4
4
  Summary: Python port of nodeq-mindmap: MindMapNode data model and ETL pipeline engine
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/workflow-builder/nodeq-mindmap
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nodeqmindmap"
7
- version = "2.2.0"
7
+ version = "2.3.0"
8
8
  description = "Python port of nodeq-mindmap: MindMapNode data model and ETL pipeline engine"
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -1,198 +0,0 @@
1
- """PipelineEngine — mirrors pipeline-engine.ts."""
2
-
3
- from __future__ import annotations
4
-
5
- import threading
6
- from datetime import datetime
7
- from typing import Any
8
-
9
- from .types import (
10
- DataSample,
11
- ExecutionResult,
12
- PipelineConfig,
13
- PipelinePerformance,
14
- PipelineStats,
15
- TransformationRule,
16
- )
17
-
18
-
19
- class PipelineEngine:
20
- """Manages in-memory ETL pipelines."""
21
-
22
- def __init__(self) -> None:
23
- self._pipelines: dict[str, PipelineConfig] = {}
24
- self._next_id = 1
25
- self._lock = threading.Lock()
26
-
27
- def create_pipeline(
28
- self,
29
- name: str,
30
- input_sample: DataSample,
31
- output_sample: DataSample,
32
- options: dict[str, Any] | None = None,
33
- ) -> PipelineConfig:
34
- options = options or {}
35
- with self._lock:
36
- pipeline_id = f"pipeline_{self._next_id}"
37
- self._next_id += 1
38
-
39
- model_config: dict[str, Any] = options.get("model_config") or {"type": "auto"}
40
- data_sources: list[dict[str, Any]] = options.get("data_sources") or []
41
- etl_config: dict[str, Any] = options.get("etl_options") or {}
42
-
43
- pipeline = PipelineConfig(
44
- id=pipeline_id,
45
- name=name,
46
- input_sample=input_sample,
47
- output_sample=output_sample,
48
- transformation_rules=[],
49
- model_config=model_config,
50
- accuracy=0.85,
51
- version="1.0.0",
52
- created_at=datetime.now(),
53
- data_sources=data_sources,
54
- etl_config=etl_config,
55
- performance=PipelinePerformance(throughput=100, latency=50, error_rate=0.01),
56
- )
57
-
58
- with self._lock:
59
- self._pipelines[pipeline_id] = pipeline
60
-
61
- return pipeline
62
-
63
- def update_pipeline(
64
- self,
65
- pipeline_id: str,
66
- input_sample: DataSample | None = None,
67
- output_sample: DataSample | None = None,
68
- ) -> PipelineConfig:
69
- with self._lock:
70
- pipeline = self._pipelines.get(pipeline_id)
71
- if pipeline is None:
72
- raise ValueError(f"Pipeline {pipeline_id} not found")
73
- if input_sample is not None:
74
- pipeline.input_sample = input_sample
75
- if output_sample is not None:
76
- pipeline.output_sample = output_sample
77
- return pipeline
78
-
79
- def execute_pipeline(self, pipeline_id: str, input_data: Any) -> ExecutionResult:
80
- with self._lock:
81
- exists = pipeline_id in self._pipelines
82
- if not exists:
83
- raise ValueError(f"Pipeline {pipeline_id} not found")
84
- return ExecutionResult(
85
- processed=True,
86
- data=input_data,
87
- timestamp=datetime.now(),
88
- pipeline_id=pipeline_id,
89
- )
90
-
91
- def get_pipeline(self, pipeline_id: str) -> PipelineConfig | None:
92
- with self._lock:
93
- return self._pipelines.get(pipeline_id)
94
-
95
- def get_all_pipelines(self) -> list[PipelineConfig]:
96
- with self._lock:
97
- return list(self._pipelines.values())
98
-
99
- def get_pipeline_stats(self, pipeline_id: str) -> PipelineStats:
100
- with self._lock:
101
- pipeline = self._pipelines.get(pipeline_id)
102
- if pipeline is None:
103
- raise ValueError(f"Pipeline {pipeline_id} not found")
104
- return PipelineStats(
105
- id=pipeline.id,
106
- name=pipeline.name,
107
- performance=pipeline.performance,
108
- version=pipeline.version,
109
- last_executed=datetime.now(),
110
- )
111
-
112
- def generate_pipeline_code(self, pipeline_id: str) -> str:
113
- with self._lock:
114
- pipeline = self._pipelines.get(pipeline_id)
115
- if pipeline is None:
116
- raise ValueError(f"Pipeline {pipeline_id} not found")
117
- fn_name = pipeline.name.replace(" ", "_")
118
- return (
119
- f"# Generated pipeline code for {pipeline.name}\n"
120
- f"def {fn_name}(input_data):\n"
121
- f" # TODO: add transformation logic\n"
122
- f" return input_data\n"
123
- )
124
-
125
- def get_pipeline_execution_mode(self, _pipeline_id: str) -> str:
126
- return "static"
127
-
128
- def is_pipeline_static(self, _pipeline_id: str) -> bool:
129
- return True
130
-
131
- def pipeline_to_mindmap(self, pipeline: PipelineConfig):
132
- """Convert a PipelineConfig to a MindMapNode tree (import delayed to avoid circular)."""
133
- from .types import MindMapNode
134
-
135
- mode = self.get_pipeline_execution_mode(pipeline.id)
136
- is_static = self.is_pipeline_static(pipeline.id)
137
- mode_label = "Static compiled execution" if is_static else "Dynamic execution"
138
-
139
- children = [
140
- MindMapNode(
141
- topic="Execution Mode",
142
- summary=mode_label,
143
- skills=[f"Mode: {mode.upper()}"],
144
- )
145
- ]
146
-
147
- if pipeline.data_sources:
148
- src_children = []
149
- for src in pipeline.data_sources:
150
- t = src.get("type", "")
151
- conn = src.get("connection", {})
152
- label = conn.get("host") or conn.get("apiEndpoint") or "Local"
153
- src_children.append(MindMapNode(topic=t, summary=label))
154
- children.append(
155
- MindMapNode(
156
- topic="Data Sources",
157
- summary=f"{len(pipeline.data_sources)} connected sources",
158
- children=src_children,
159
- )
160
- )
161
-
162
- children.append(
163
- MindMapNode(
164
- topic="Input Schema",
165
- summary="Data input configuration",
166
- skills=list(pipeline.input_sample.schema.keys()),
167
- )
168
- )
169
-
170
- rule_children = [
171
- MindMapNode(
172
- topic=f"{r.source_field} → {r.target_field}",
173
- summary=f"{r.type} ({int(r.confidence * 100)}%)",
174
- )
175
- for r in pipeline.transformation_rules
176
- ]
177
- children.append(
178
- MindMapNode(
179
- topic="Transformations",
180
- summary=f"{len(pipeline.transformation_rules)} rules",
181
- children=rule_children,
182
- )
183
- )
184
-
185
- children.append(
186
- MindMapNode(
187
- topic="Output Schema",
188
- summary="Data output configuration",
189
- skills=list(pipeline.output_sample.schema.keys()),
190
- )
191
- )
192
-
193
- return MindMapNode(
194
- topic=pipeline.name,
195
- summary=f"ETL Pipeline - Accuracy: {pipeline.accuracy * 100:.1f}%",
196
- skills=[f"Version: {pipeline.version}"],
197
- children=children,
198
- )
File without changes
File without changes