npcpy 1.2.35__py3-none-any.whl → 1.2.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
npcpy/npc_array.py ADDED
@@ -0,0 +1,1294 @@
1
+ """
2
+ npc_array.py - NumPy-like interface for language models and ML at scale
3
+
4
+ This module provides NPCArray, a vectorized abstraction for model populations
5
+ that enables ensemble interactions, evolutionary optimization, and parallel
6
+ inference across heterogeneous model types (LLMs, sklearn, torch, etc.)
7
+
8
+ Core concepts:
9
+ - NPCArray wraps a collection of models (LLMs, ML models, or NPCs)
10
+ - Operations are lazy - they build a computation graph
11
+ - .collect() materializes results with automatic parallelization (like Spark)
12
+ - Same interface for single items (treated as 1D array of length 1)
13
+
14
+ Example:
15
+ >>> models = NPCArray.from_llms(['gpt-4', 'claude-3', 'llama3'])
16
+ >>> result = models.infer(prompts).filter(lambda r: len(r) > 100).vote()
17
+ >>> result.collect()
18
+ """
19
+
20
+ from __future__ import annotations
21
+ import copy
22
+ import itertools
23
+ import pickle
24
+ from concurrent.futures import ThreadPoolExecutor, as_completed
25
+ from dataclasses import dataclass, field
26
+ from typing import (
27
+ Any, Callable, Dict, List, Optional, Tuple, Union,
28
+ TYPE_CHECKING, Literal, Sequence
29
+ )
30
+ from enum import Enum
31
+ import numpy as np
32
+
33
+ if TYPE_CHECKING:
34
+ import polars as pl
35
+ import pandas as pd
36
+
37
+
38
+ # Operation types for the computation graph
39
+ class OpType(Enum):
40
+ SOURCE = "source"
41
+ INFER = "infer"
42
+ PREDICT = "predict"
43
+ FIT = "fit"
44
+ FORWARD = "forward"
45
+ MAP = "map"
46
+ FILTER = "filter"
47
+ REDUCE = "reduce"
48
+ CHAIN = "chain"
49
+ EVOLVE = "evolve"
50
+
51
+
52
+ @dataclass
53
+ class GraphNode:
54
+ """A node in the lazy computation graph"""
55
+ op_type: OpType
56
+ params: Dict[str, Any] = field(default_factory=dict)
57
+ parents: List['GraphNode'] = field(default_factory=list)
58
+ result: Any = None # Populated on compute()
59
+ shape: Optional[Tuple[int, ...]] = None
60
+
61
+
62
+ @dataclass
63
+ class ModelSpec:
64
+ """Specification for a model in the array"""
65
+ model_type: Literal["llm", "sklearn", "torch", "npc", "custom"]
66
+ model_ref: Any # model name, path, fitted object, or NPC
67
+ provider: Optional[str] = None
68
+ config: Dict[str, Any] = field(default_factory=dict)
69
+
70
+ def __hash__(self):
71
+ return hash((self.model_type, str(self.model_ref), self.provider))
72
+
73
+
74
+ @dataclass
75
+ class ResponseTensor:
76
+ """
77
+ Container for vectorized model outputs with shape information.
78
+ Similar to numpy ndarray but for model responses.
79
+ """
80
+ data: np.ndarray # Object array holding responses
81
+ model_specs: List[ModelSpec]
82
+ prompts: Optional[List[str]] = None
83
+ metadata: Dict[str, Any] = field(default_factory=dict)
84
+
85
+ @property
86
+ def shape(self) -> Tuple[int, ...]:
87
+ return self.data.shape
88
+
89
+ def __getitem__(self, key):
90
+ """NumPy-style indexing"""
91
+ result_data = self.data[key]
92
+ if isinstance(result_data, np.ndarray):
93
+ # Slice of models
94
+ if isinstance(key, int):
95
+ new_specs = [self.model_specs[key]]
96
+ elif isinstance(key, slice):
97
+ new_specs = self.model_specs[key]
98
+ elif isinstance(key, tuple) and len(key) == 2:
99
+ model_key, prompt_key = key
100
+ if isinstance(model_key, int):
101
+ new_specs = [self.model_specs[model_key]]
102
+ else:
103
+ new_specs = self.model_specs[model_key] if isinstance(model_key, slice) else self.model_specs
104
+ new_prompts = self.prompts[prompt_key] if self.prompts and isinstance(prompt_key, (int, slice)) else self.prompts
105
+ else:
106
+ new_specs = self.model_specs
107
+ return ResponseTensor(
108
+ data=result_data if result_data.ndim > 0 else np.array([result_data]),
109
+ model_specs=new_specs if isinstance(new_specs, list) else [new_specs],
110
+ prompts=self.prompts,
111
+ metadata=self.metadata
112
+ )
113
+ return result_data
114
+
115
+ def tolist(self) -> List:
116
+ """Convert to nested Python list"""
117
+ return self.data.tolist()
118
+
119
+ def flatten(self) -> List:
120
+ """Flatten to 1D list"""
121
+ return self.data.flatten().tolist()
122
+
123
+
124
+ class NPCArray:
125
+ """
126
+ NumPy-like array for model populations.
127
+
128
+ Supports:
129
+ - LLMs (via provider/model name)
130
+ - sklearn models (fitted or specs)
131
+ - PyTorch models
132
+ - NPCs (from npcpy)
133
+ - Custom model wrappers
134
+
135
+ All operations are lazy until .compute() is called.
136
+ """
137
+
138
+ def __init__(
139
+ self,
140
+ specs: List[ModelSpec],
141
+ graph: Optional[GraphNode] = None
142
+ ):
143
+ self._specs = specs
144
+ self._graph = graph or GraphNode(
145
+ op_type=OpType.SOURCE,
146
+ params={"specs": specs},
147
+ shape=(len(specs),)
148
+ )
149
+
150
+ # ==================== Factory Methods ====================
151
+
152
+ @classmethod
153
+ def from_llms(
154
+ cls,
155
+ models: Union[str, List[str]],
156
+ providers: Optional[Union[str, List[str]]] = None,
157
+ **config
158
+ ) -> 'NPCArray':
159
+ """
160
+ Create NPCArray from LLM model names.
161
+
162
+ Args:
163
+ models: Single model name or list of model names
164
+ providers: Optional provider(s) - auto-detected if not provided
165
+ **config: Additional config passed to all models
166
+
167
+ Example:
168
+ >>> arr = NPCArray.from_llms(['gpt-4', 'claude-3', 'llama3'])
169
+ >>> arr = NPCArray.from_llms('gpt-4') # Single model, still array-like
170
+ """
171
+ if isinstance(models, str):
172
+ models = [models]
173
+
174
+ if providers is None:
175
+ providers = [None] * len(models)
176
+ elif isinstance(providers, str):
177
+ providers = [providers] * len(models)
178
+ elif len(providers) == 1:
179
+ providers = providers * len(models)
180
+
181
+ specs = [
182
+ ModelSpec(
183
+ model_type="llm",
184
+ model_ref=model,
185
+ provider=provider,
186
+ config=config.copy()
187
+ )
188
+ for model, provider in zip(models, providers)
189
+ ]
190
+
191
+ return cls(specs)
192
+
193
+ @classmethod
194
+ def from_npcs(cls, npcs: Union[Any, List[Any]]) -> 'NPCArray':
195
+ """
196
+ Create NPCArray from NPC objects.
197
+
198
+ Args:
199
+ npcs: Single NPC or list of NPCs from npcpy
200
+ """
201
+ if not isinstance(npcs, list):
202
+ npcs = [npcs]
203
+
204
+ specs = [
205
+ ModelSpec(
206
+ model_type="npc",
207
+ model_ref=npc,
208
+ provider=getattr(npc, 'provider', None),
209
+ config={"model": getattr(npc, 'model', None)}
210
+ )
211
+ for npc in npcs
212
+ ]
213
+
214
+ return cls(specs)
215
+
216
+ @classmethod
217
+ def from_sklearn(
218
+ cls,
219
+ models: Union[Any, List[Any]],
220
+ fitted: bool = True
221
+ ) -> 'NPCArray':
222
+ """
223
+ Create NPCArray from sklearn models.
224
+
225
+ Args:
226
+ models: Fitted sklearn model(s) or estimator class names
227
+ fitted: Whether models are already fitted
228
+ """
229
+ if not isinstance(models, list):
230
+ models = [models]
231
+
232
+ specs = [
233
+ ModelSpec(
234
+ model_type="sklearn",
235
+ model_ref=model,
236
+ config={"fitted": fitted}
237
+ )
238
+ for model in models
239
+ ]
240
+
241
+ return cls(specs)
242
+
243
+ @classmethod
244
+ def from_torch(
245
+ cls,
246
+ models: Union[Any, List[Any]],
247
+ device: str = "cpu"
248
+ ) -> 'NPCArray':
249
+ """
250
+ Create NPCArray from PyTorch models.
251
+
252
+ Args:
253
+ models: PyTorch nn.Module(s)
254
+ device: Device to run inference on
255
+ """
256
+ if not isinstance(models, list):
257
+ models = [models]
258
+
259
+ specs = [
260
+ ModelSpec(
261
+ model_type="torch",
262
+ model_ref=model,
263
+ config={"device": device}
264
+ )
265
+ for model in models
266
+ ]
267
+
268
+ return cls(specs)
269
+
270
+ @classmethod
271
+ def from_specs(
272
+ cls,
273
+ specs: List[Dict[str, Any]]
274
+ ) -> 'NPCArray':
275
+ """
276
+ Create NPCArray from model specification dicts.
277
+
278
+ Args:
279
+ specs: List of dicts with 'type', and type-specific params
280
+
281
+ Example:
282
+ >>> specs = [
283
+ ... {'type': 'RandomForest', 'n_estimators': 100},
284
+ ... {'type': 'XGBoost', 'max_depth': 5}
285
+ ... ]
286
+ >>> arr = NPCArray.from_specs(specs)
287
+ """
288
+ model_specs = [
289
+ ModelSpec(
290
+ model_type="sklearn",
291
+ model_ref=spec.get('type'),
292
+ config={k: v for k, v in spec.items() if k != 'type'}
293
+ )
294
+ for spec in specs
295
+ ]
296
+
297
+ return cls(model_specs)
298
+
299
+ @classmethod
300
+ def meshgrid(cls, **param_ranges) -> 'NPCArray':
301
+ """
302
+ Create NPCArray from cartesian product of parameters.
303
+
304
+ Args:
305
+ **param_ranges: Parameter name -> list of values
306
+
307
+ Example:
308
+ >>> arr = NPCArray.meshgrid(
309
+ ... model=['gpt-4', 'claude-3'],
310
+ ... temperature=[0.0, 0.5, 1.0]
311
+ ... )
312
+ >>> arr.shape # (6,) - 2 models * 3 temperatures
313
+ """
314
+ keys = list(param_ranges.keys())
315
+ values = [param_ranges[k] for k in keys]
316
+
317
+ specs = []
318
+ for combo in itertools.product(*values):
319
+ config = dict(zip(keys, combo))
320
+ model = config.pop('model', 'llama3.2')
321
+ provider = config.pop('provider', None)
322
+ specs.append(ModelSpec(
323
+ model_type="llm",
324
+ model_ref=model,
325
+ provider=provider,
326
+ config=config
327
+ ))
328
+
329
+ return cls(specs)
330
+
331
+ # ==================== Properties ====================
332
+
333
+ @property
334
+ def shape(self) -> Tuple[int, ...]:
335
+ """Shape of the model array"""
336
+ return (len(self._specs),)
337
+
338
+ @property
339
+ def specs(self) -> List[ModelSpec]:
340
+ """Model specifications"""
341
+ return self._specs
342
+
343
+ def __len__(self) -> int:
344
+ return len(self._specs)
345
+
346
+ def __repr__(self) -> str:
347
+ types = [s.model_type for s in self._specs]
348
+ return f"NPCArray(shape={self.shape}, types={types})"
349
+
350
+ # ==================== Lazy Operations ====================
351
+
352
+ def infer(
353
+ self,
354
+ prompts: Union[str, List[str]],
355
+ **kwargs
356
+ ) -> 'LazyResult':
357
+ """
358
+ Queue inference across all models for given prompts.
359
+
360
+ Args:
361
+ prompts: Single prompt or list of prompts
362
+ **kwargs: Additional inference params (temperature, etc.)
363
+
364
+ Returns:
365
+ LazyResult with shape (n_models, n_prompts)
366
+ """
367
+ if isinstance(prompts, str):
368
+ prompts = [prompts]
369
+
370
+ new_node = GraphNode(
371
+ op_type=OpType.INFER,
372
+ params={"prompts": prompts, **kwargs},
373
+ parents=[self._graph],
374
+ shape=(len(self._specs), len(prompts))
375
+ )
376
+
377
+ return LazyResult(self._specs, new_node, prompts=prompts)
378
+
379
+ def predict(
380
+ self,
381
+ X: Any,
382
+ **kwargs
383
+ ) -> 'LazyResult':
384
+ """
385
+ Queue prediction for sklearn/ML models.
386
+
387
+ Args:
388
+ X: Input features (array-like)
389
+ **kwargs: Additional predict params
390
+
391
+ Returns:
392
+ LazyResult with predictions
393
+ """
394
+ new_node = GraphNode(
395
+ op_type=OpType.PREDICT,
396
+ params={"X": X, **kwargs},
397
+ parents=[self._graph],
398
+ shape=(len(self._specs), len(X) if hasattr(X, '__len__') else 1)
399
+ )
400
+
401
+ return LazyResult(self._specs, new_node)
402
+
403
+ def forward(
404
+ self,
405
+ inputs: Any,
406
+ **kwargs
407
+ ) -> 'LazyResult':
408
+ """
409
+ Queue forward pass for PyTorch models.
410
+
411
+ Args:
412
+ inputs: Input tensor(s)
413
+ **kwargs: Additional forward params
414
+
415
+ Returns:
416
+ LazyResult with outputs
417
+ """
418
+ new_node = GraphNode(
419
+ op_type=OpType.FORWARD,
420
+ params={"inputs": inputs, **kwargs},
421
+ parents=[self._graph],
422
+ shape=(len(self._specs),)
423
+ )
424
+
425
+ return LazyResult(self._specs, new_node)
426
+
427
+ def fit(
428
+ self,
429
+ X: Any,
430
+ y: Optional[Any] = None,
431
+ **kwargs
432
+ ) -> 'NPCArray':
433
+ """
434
+ Queue fitting for all models.
435
+
436
+ For LLMs, this means fine-tuning.
437
+ For sklearn/torch, this means training.
438
+
439
+ Args:
440
+ X: Training features
441
+ y: Training targets (optional for unsupervised)
442
+ **kwargs: Additional fit params (epochs, method, etc.)
443
+
444
+ Returns:
445
+ New NPCArray with fitted model specs
446
+ """
447
+ new_node = GraphNode(
448
+ op_type=OpType.FIT,
449
+ params={"X": X, "y": y, **kwargs},
450
+ parents=[self._graph],
451
+ shape=self.shape
452
+ )
453
+
454
+ # Return new NPCArray that will have fitted models
455
+ return NPCArray(self._specs, new_node)
456
+
457
+ def evolve(
458
+ self,
459
+ fitness_scores: List[float],
460
+ mutate_fn: Optional[Callable] = None,
461
+ crossover_fn: Optional[Callable] = None,
462
+ selection: str = "tournament",
463
+ elite_ratio: float = 0.1
464
+ ) -> 'NPCArray':
465
+ """
466
+ Evolve the model population based on fitness scores.
467
+
468
+ Args:
469
+ fitness_scores: Fitness score for each model
470
+ mutate_fn: Custom mutation function
471
+ crossover_fn: Custom crossover function
472
+ selection: Selection strategy ('tournament', 'roulette', 'rank')
473
+ elite_ratio: Fraction of top performers to keep unchanged
474
+
475
+ Returns:
476
+ New NPCArray with evolved population
477
+ """
478
+ new_node = GraphNode(
479
+ op_type=OpType.EVOLVE,
480
+ params={
481
+ "fitness_scores": fitness_scores,
482
+ "mutate_fn": mutate_fn,
483
+ "crossover_fn": crossover_fn,
484
+ "selection": selection,
485
+ "elite_ratio": elite_ratio
486
+ },
487
+ parents=[self._graph],
488
+ shape=self.shape
489
+ )
490
+
491
+ return NPCArray(self._specs, new_node)
492
+
493
+
494
+ class LazyResult:
495
+ """
496
+ Lazy result from model operations.
497
+
498
+ Builds computation graph without executing until .compute() is called.
499
+ Supports chaining operations like map, filter, reduce.
500
+ """
501
+
502
+ def __init__(
503
+ self,
504
+ specs: List[ModelSpec],
505
+ graph: GraphNode,
506
+ prompts: Optional[List[str]] = None
507
+ ):
508
+ self._specs = specs
509
+ self._graph = graph
510
+ self._prompts = prompts
511
+ self._computed = False
512
+ self._result: Optional[ResponseTensor] = None
513
+
514
+ @property
515
+ def shape(self) -> Optional[Tuple[int, ...]]:
516
+ """Expected shape of result"""
517
+ return self._graph.shape
518
+
519
+ # ==================== Chainable Operations ====================
520
+
521
+ def map(self, fn: Callable[[Any], Any]) -> 'LazyResult':
522
+ """
523
+ Apply function to each response.
524
+
525
+ Args:
526
+ fn: Function to apply to each response
527
+
528
+ Example:
529
+ >>> result.map(lambda r: len(r)) # Get lengths
530
+ >>> result.map(json.loads) # Parse JSON
531
+ """
532
+ new_node = GraphNode(
533
+ op_type=OpType.MAP,
534
+ params={"fn": fn},
535
+ parents=[self._graph],
536
+ shape=self._graph.shape
537
+ )
538
+
539
+ return LazyResult(self._specs, new_node, self._prompts)
540
+
541
+ def filter(self, predicate: Callable[[Any], bool]) -> 'LazyResult':
542
+ """
543
+ Filter responses by predicate.
544
+
545
+ Args:
546
+ predicate: Function returning True for responses to keep
547
+
548
+ Example:
549
+ >>> result.filter(lambda r: len(r) > 100)
550
+ >>> result.filter(lambda r: 'error' not in r.lower())
551
+ """
552
+ new_node = GraphNode(
553
+ op_type=OpType.FILTER,
554
+ params={"predicate": predicate},
555
+ parents=[self._graph],
556
+ shape=None # Unknown until computed
557
+ )
558
+
559
+ return LazyResult(self._specs, new_node, self._prompts)
560
+
561
+ def reduce(
562
+ self,
563
+ method: Union[str, Callable] = "vote",
564
+ axis: int = 0,
565
+ **kwargs
566
+ ) -> 'LazyResult':
567
+ """
568
+ Reduce responses along an axis.
569
+
570
+ Args:
571
+ method: Reduction method or custom function
572
+ - 'vote': Majority voting
573
+ - 'mean': Average (for numeric)
574
+ - 'concat': Concatenate strings
575
+ - 'consensus': LLM-based consensus
576
+ - 'best': Select by score
577
+ - callable: Custom reduction
578
+ axis: Axis to reduce (0=models, 1=prompts)
579
+ **kwargs: Additional params for reduction
580
+
581
+ Example:
582
+ >>> result.reduce('vote', axis=0) # Vote across models
583
+ >>> result.reduce('mean', axis=1) # Average across prompts
584
+ """
585
+ new_node = GraphNode(
586
+ op_type=OpType.REDUCE,
587
+ params={"method": method, "axis": axis, **kwargs},
588
+ parents=[self._graph],
589
+ shape=self._compute_reduced_shape(axis)
590
+ )
591
+
592
+ return LazyResult(self._specs, new_node, self._prompts)
593
+
594
+ def _compute_reduced_shape(self, axis: int) -> Optional[Tuple[int, ...]]:
595
+ """Compute shape after reduction"""
596
+ if self._graph.shape is None:
597
+ return None
598
+ shape = list(self._graph.shape)
599
+ if axis < len(shape):
600
+ shape.pop(axis)
601
+ return tuple(shape) if shape else (1,)
602
+
603
+ def chain(
604
+ self,
605
+ fn: Callable[[List[Any]], str],
606
+ n_rounds: int = 1
607
+ ) -> 'LazyResult':
608
+ """
609
+ Chain outputs through a synthesis function.
610
+
611
+ Useful for debate/discussion patterns where outputs
612
+ feed back as context for next round.
613
+
614
+ Args:
615
+ fn: Function taking all responses, returning synthesis prompt
616
+ n_rounds: Number of chain rounds
617
+
618
+ Example:
619
+ >>> def debate_round(responses):
620
+ ... return f"Consider these perspectives: {responses}. Synthesize."
621
+ >>> result.chain(debate_round, n_rounds=3)
622
+ """
623
+ new_node = GraphNode(
624
+ op_type=OpType.CHAIN,
625
+ params={"fn": fn, "n_rounds": n_rounds},
626
+ parents=[self._graph],
627
+ shape=self._graph.shape
628
+ )
629
+
630
+ return LazyResult(self._specs, new_node, self._prompts)
631
+
632
+ # ==================== Aggregation Helpers ====================
633
+
634
+ def vote(self, axis: int = 0) -> 'LazyResult':
635
+ """Shorthand for reduce('vote', axis)"""
636
+ return self.reduce('vote', axis=axis)
637
+
638
+ def consensus(self, axis: int = 0, model: str = None) -> 'LazyResult':
639
+ """Shorthand for reduce('consensus', axis)"""
640
+ return self.reduce('consensus', axis=axis, model=model)
641
+
642
+ def variance(self) -> 'LazyResult':
643
+ """Compute variance/disagreement across models"""
644
+ return self.map(_compute_response_variance)
645
+
646
+ def argmax(self, scores: List[float]) -> 'LazyResult':
647
+ """Select responses corresponding to max scores"""
648
+ return self.reduce('best', scores=scores)
649
+
650
+ # ==================== Execution ====================
651
+
652
+ def explain(self) -> str:
653
+ """
654
+ Print explanation of the computation graph.
655
+
656
+ Returns:
657
+ String representation of the DAG
658
+ """
659
+ lines = ["Computation Graph:"]
660
+ self._explain_node(self._graph, lines, depth=0)
661
+ explanation = "\n".join(lines)
662
+ print(explanation)
663
+ return explanation
664
+
665
+ def _explain_node(self, node: GraphNode, lines: List[str], depth: int):
666
+ indent = " " * depth
667
+ params_str = {k: v for k, v in node.params.items() if k not in ('fn', 'predicate')}
668
+ lines.append(f"{indent}└─ {node.op_type.value}: shape={node.shape}, params={params_str}")
669
+ for parent in node.parents:
670
+ self._explain_node(parent, lines, depth + 1)
671
+
672
+ def collect(
673
+ self,
674
+ parallel: bool = True,
675
+ max_workers: int = None,
676
+ progress: bool = False
677
+ ) -> ResponseTensor:
678
+ """
679
+ Execute the computation graph and return results.
680
+
681
+ Like Spark's collect(), this materializes the lazy computation.
682
+
683
+ Args:
684
+ parallel: Whether to parallelize independent operations
685
+ max_workers: Max parallel workers (default: number of models)
686
+ progress: Show progress bar
687
+
688
+ Returns:
689
+ ResponseTensor with materialized results
690
+ """
691
+ if self._computed and self._result is not None:
692
+ return self._result
693
+
694
+ executor = GraphExecutor(
695
+ parallel=parallel,
696
+ max_workers=max_workers or len(self._specs),
697
+ progress=progress
698
+ )
699
+
700
+ self._result = executor.execute(self._graph, self._specs, self._prompts)
701
+ self._computed = True
702
+
703
+ return self._result
704
+
705
+ def to_list(self) -> List:
706
+ """Collect and return as Python list"""
707
+ return self.collect().tolist()
708
+
709
+ # Alias for backwards compat
710
+ compute = collect
711
+
712
+
713
+ class GraphExecutor:
714
+ """
715
+ Executes the lazy computation graph.
716
+
717
+ Handles:
718
+ - Topological ordering
719
+ - Parallel execution of independent nodes
720
+ - Caching of intermediate results
721
+ """
722
+
723
+ def __init__(
724
+ self,
725
+ parallel: bool = True,
726
+ max_workers: int = 4,
727
+ progress: bool = False
728
+ ):
729
+ self.parallel = parallel
730
+ self.max_workers = max_workers
731
+ self.progress = progress
732
+ self._cache: Dict[int, Any] = {}
733
+
734
+ def execute(
735
+ self,
736
+ root: GraphNode,
737
+ specs: List[ModelSpec],
738
+ prompts: Optional[List[str]] = None
739
+ ) -> ResponseTensor:
740
+ """Execute graph starting from root node"""
741
+
742
+ # Topological sort
743
+ ordered = self._topological_sort(root)
744
+
745
+ # Execute in order
746
+ for node in ordered:
747
+ if id(node) in self._cache:
748
+ continue
749
+
750
+ # Get parent results
751
+ parent_results = [self._cache.get(id(p)) for p in node.parents]
752
+
753
+ # Execute node
754
+ result = self._execute_node(node, specs, prompts, parent_results)
755
+ self._cache[id(node)] = result
756
+
757
+ return self._cache[id(root)]
758
+
759
+ def _topological_sort(self, root: GraphNode) -> List[GraphNode]:
760
+ """Return nodes in execution order (leaves first)"""
761
+ visited = set()
762
+ ordered = []
763
+
764
+ def visit(node):
765
+ if id(node) in visited:
766
+ return
767
+ visited.add(id(node))
768
+ for parent in node.parents:
769
+ visit(parent)
770
+ ordered.append(node)
771
+
772
+ visit(root)
773
+ return ordered
774
+
775
+ def _execute_node(
776
+ self,
777
+ node: GraphNode,
778
+ specs: List[ModelSpec],
779
+ prompts: Optional[List[str]],
780
+ parent_results: List[Any]
781
+ ) -> ResponseTensor:
782
+ """Execute a single graph node"""
783
+
784
+ handlers = {
785
+ OpType.SOURCE: self._exec_source,
786
+ OpType.INFER: self._exec_infer,
787
+ OpType.PREDICT: self._exec_predict,
788
+ OpType.FORWARD: self._exec_forward,
789
+ OpType.FIT: self._exec_fit,
790
+ OpType.MAP: self._exec_map,
791
+ OpType.FILTER: self._exec_filter,
792
+ OpType.REDUCE: self._exec_reduce,
793
+ OpType.CHAIN: self._exec_chain,
794
+ OpType.EVOLVE: self._exec_evolve,
795
+ }
796
+
797
+ handler = handlers.get(node.op_type)
798
+ if handler is None:
799
+ raise ValueError(f"Unknown operation type: {node.op_type}")
800
+
801
+ return handler(node, specs, prompts, parent_results)
802
+
803
+ def _exec_source(self, node, specs, prompts, parents) -> ResponseTensor:
804
+ """Source node - just returns specs wrapped"""
805
+ return ResponseTensor(
806
+ data=np.array([s.model_ref for s in specs], dtype=object),
807
+ model_specs=specs,
808
+ prompts=prompts
809
+ )
810
+
811
+ def _exec_infer(self, node, specs, prompts, parents) -> ResponseTensor:
812
+ """Execute LLM inference across models and prompts"""
813
+ from npcpy.llm_funcs import get_llm_response
814
+
815
+ prompts_list = node.params.get("prompts", prompts or [])
816
+ extra_kwargs = {k: v for k, v in node.params.items() if k != "prompts"}
817
+
818
+ n_models = len(specs)
819
+ n_prompts = len(prompts_list)
820
+
821
+ # Prepare all inference tasks
822
+ tasks = []
823
+ for i, spec in enumerate(specs):
824
+ for j, prompt in enumerate(prompts_list):
825
+ tasks.append((i, j, spec, prompt))
826
+
827
+ # Execute (parallel or sequential)
828
+ results = np.empty((n_models, n_prompts), dtype=object)
829
+
830
+ if self.parallel and len(tasks) > 1:
831
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
832
+ futures = {}
833
+ for i, j, spec, prompt in tasks:
834
+ future = executor.submit(
835
+ self._infer_single, spec, prompt, extra_kwargs
836
+ )
837
+ futures[future] = (i, j)
838
+
839
+ for future in as_completed(futures):
840
+ i, j = futures[future]
841
+ try:
842
+ results[i, j] = future.result()
843
+ except Exception as e:
844
+ results[i, j] = f"Error: {e}"
845
+ else:
846
+ for i, j, spec, prompt in tasks:
847
+ try:
848
+ results[i, j] = self._infer_single(spec, prompt, extra_kwargs)
849
+ except Exception as e:
850
+ results[i, j] = f"Error: {e}"
851
+
852
+ return ResponseTensor(
853
+ data=results,
854
+ model_specs=specs,
855
+ prompts=prompts_list,
856
+ metadata={"operation": "infer", **extra_kwargs}
857
+ )
858
+
859
+ def _infer_single(self, spec: ModelSpec, prompt: str, kwargs: Dict) -> str:
860
+ """Single model inference"""
861
+ from npcpy.llm_funcs import get_llm_response
862
+
863
+ if spec.model_type == "llm":
864
+ response = get_llm_response(
865
+ prompt,
866
+ model=spec.model_ref,
867
+ provider=spec.provider,
868
+ **{**spec.config, **kwargs}
869
+ )
870
+ return response.get("response", "")
871
+
872
+ elif spec.model_type == "npc":
873
+ npc = spec.model_ref
874
+ response = get_llm_response(
875
+ prompt,
876
+ npc=npc,
877
+ **kwargs
878
+ )
879
+ return response.get("response", "")
880
+
881
+ else:
882
+ raise ValueError(f"Cannot infer with model type: {spec.model_type}")
883
+
884
+ def _exec_predict(self, node, specs, prompts, parents) -> ResponseTensor:
885
+ """Execute sklearn/ML prediction"""
886
+ X = node.params.get("X")
887
+
888
+ results = []
889
+ for spec in specs:
890
+ if spec.model_type == "sklearn":
891
+ model = spec.model_ref
892
+ if hasattr(model, 'predict'):
893
+ pred = model.predict(X)
894
+ results.append(pred)
895
+ else:
896
+ results.append(None)
897
+ else:
898
+ results.append(None)
899
+
900
+ return ResponseTensor(
901
+ data=np.array(results, dtype=object),
902
+ model_specs=specs,
903
+ metadata={"operation": "predict"}
904
+ )
905
+
906
+ def _exec_forward(self, node, specs, prompts, parents) -> ResponseTensor:
907
+ """Execute PyTorch forward pass"""
908
+ inputs = node.params.get("inputs")
909
+
910
+ results = []
911
+ for spec in specs:
912
+ if spec.model_type == "torch":
913
+ model = spec.model_ref
914
+ device = spec.config.get("device", "cpu")
915
+ try:
916
+ import torch
917
+ model.to(device)
918
+ model.eval()
919
+ with torch.no_grad():
920
+ output = model(inputs.to(device) if hasattr(inputs, 'to') else inputs)
921
+ results.append(output)
922
+ except Exception as e:
923
+ results.append(f"Error: {e}")
924
+ else:
925
+ results.append(None)
926
+
927
+ return ResponseTensor(
928
+ data=np.array(results, dtype=object),
929
+ model_specs=specs,
930
+ metadata={"operation": "forward"}
931
+ )
932
+
933
+ def _exec_fit(self, node, specs, prompts, parents) -> ResponseTensor:
934
+ """Execute model fitting"""
935
+ X = node.params.get("X")
936
+ y = node.params.get("y")
937
+
938
+ fitted_specs = []
939
+ for spec in specs:
940
+ if spec.model_type == "sklearn":
941
+ model = copy.deepcopy(spec.model_ref)
942
+ if hasattr(model, 'fit'):
943
+ model.fit(X, y)
944
+ new_spec = ModelSpec(
945
+ model_type="sklearn",
946
+ model_ref=model,
947
+ config={**spec.config, "fitted": True}
948
+ )
949
+ fitted_specs.append(new_spec)
950
+ elif spec.model_type == "llm":
951
+ # Fine-tuning would go here
952
+ # For now, just pass through
953
+ fitted_specs.append(spec)
954
+ else:
955
+ fitted_specs.append(spec)
956
+
957
+ return ResponseTensor(
958
+ data=np.array([s.model_ref for s in fitted_specs], dtype=object),
959
+ model_specs=fitted_specs,
960
+ metadata={"operation": "fit"}
961
+ )
962
+
963
+ def _exec_map(self, node, specs, prompts, parents) -> ResponseTensor:
964
+ """Apply function to each result"""
965
+ fn = node.params.get("fn")
966
+ parent_result = parents[0] if parents else None
967
+
968
+ if parent_result is None:
969
+ raise ValueError("Map requires parent result")
970
+
971
+ # Apply fn element-wise
972
+ mapped = np.vectorize(fn, otypes=[object])(parent_result.data)
973
+
974
+ return ResponseTensor(
975
+ data=mapped,
976
+ model_specs=parent_result.model_specs,
977
+ prompts=parent_result.prompts,
978
+ metadata={**parent_result.metadata, "mapped": True}
979
+ )
980
+
981
+ def _exec_filter(self, node, specs, prompts, parents) -> ResponseTensor:
982
+ """Filter results by predicate"""
983
+ predicate = node.params.get("predicate")
984
+ parent_result = parents[0] if parents else None
985
+
986
+ if parent_result is None:
987
+ raise ValueError("Filter requires parent result")
988
+
989
+ # Apply predicate and filter
990
+ mask = np.vectorize(predicate)(parent_result.data)
991
+ filtered_data = parent_result.data[mask]
992
+
993
+ # This changes shape, need to track which specs remain
994
+ return ResponseTensor(
995
+ data=filtered_data,
996
+ model_specs=parent_result.model_specs, # May need adjustment
997
+ prompts=parent_result.prompts,
998
+ metadata={**parent_result.metadata, "filtered": True}
999
+ )
1000
+
1001
+ def _exec_reduce(self, node, specs, prompts, parents) -> ResponseTensor:
1002
+ """Reduce results along axis"""
1003
+ method = node.params.get("method", "vote")
1004
+ axis = node.params.get("axis", 0)
1005
+ parent_result = parents[0] if parents else None
1006
+
1007
+ if parent_result is None:
1008
+ raise ValueError("Reduce requires parent result")
1009
+
1010
+ data = parent_result.data
1011
+
1012
+ if method == "vote":
1013
+ reduced = self._reduce_vote(data, axis)
1014
+ elif method == "mean":
1015
+ reduced = np.mean(data, axis=axis)
1016
+ elif method == "concat":
1017
+ reduced = self._reduce_concat(data, axis)
1018
+ elif method == "consensus":
1019
+ reduced = self._reduce_consensus(data, axis, node.params)
1020
+ elif method == "best":
1021
+ scores = node.params.get("scores", [])
1022
+ reduced = self._reduce_best(data, scores, axis)
1023
+ elif callable(method):
1024
+ reduced = np.apply_along_axis(method, axis, data)
1025
+ else:
1026
+ raise ValueError(f"Unknown reduce method: {method}")
1027
+
1028
+ return ResponseTensor(
1029
+ data=np.atleast_1d(reduced),
1030
+ model_specs=specs if axis != 0 else [specs[0]],
1031
+ prompts=prompts,
1032
+ metadata={**parent_result.metadata, "reduced": method}
1033
+ )
1034
+
1035
+ def _reduce_vote(self, data: np.ndarray, axis: int) -> np.ndarray:
1036
+ """Majority voting reduction"""
1037
+ from collections import Counter
1038
+
1039
+ def vote_fn(arr):
1040
+ counter = Counter(arr)
1041
+ return counter.most_common(1)[0][0] if counter else None
1042
+
1043
+ return np.apply_along_axis(vote_fn, axis, data)
1044
+
1045
+ def _reduce_concat(self, data: np.ndarray, axis: int) -> np.ndarray:
1046
+ """Concatenate strings"""
1047
+ def concat_fn(arr):
1048
+ return "\n---\n".join(str(x) for x in arr)
1049
+
1050
+ return np.apply_along_axis(concat_fn, axis, data)
1051
+
1052
+ def _reduce_consensus(self, data: np.ndarray, axis: int, params: Dict) -> np.ndarray:
1053
+ """LLM-based consensus"""
1054
+ from npcpy.llm_funcs import get_llm_response
1055
+
1056
+ model = params.get("model", "llama3.2")
1057
+
1058
+ def consensus_fn(arr):
1059
+ perspectives = "\n".join(f"- {x}" for x in arr)
1060
+ prompt = f"Given these different perspectives:\n{perspectives}\n\nProvide a consensus synthesis:"
1061
+ response = get_llm_response(prompt, model=model)
1062
+ return response.get("response", "")
1063
+
1064
+ return np.apply_along_axis(consensus_fn, axis, data)
1065
+
1066
+ def _reduce_best(self, data: np.ndarray, scores: List[float], axis: int) -> np.ndarray:
1067
+ """Select best by score"""
1068
+ if axis == 0:
1069
+ best_idx = np.argmax(scores)
1070
+ return data[best_idx]
1071
+ else:
1072
+ return data
1073
+
1074
+ def _exec_chain(self, node, specs, prompts, parents) -> ResponseTensor:
1075
+ """Chain responses through synthesis function"""
1076
+ fn = node.params.get("fn")
1077
+ n_rounds = node.params.get("n_rounds", 1)
1078
+ parent_result = parents[0] if parents else None
1079
+
1080
+ if parent_result is None:
1081
+ raise ValueError("Chain requires parent result")
1082
+
1083
+ current = parent_result.data
1084
+
1085
+ for _ in range(n_rounds):
1086
+ # Apply synthesis function to get new prompt
1087
+ new_prompt = fn(current.tolist())
1088
+
1089
+ # Run inference with new prompt
1090
+ infer_node = GraphNode(
1091
+ op_type=OpType.INFER,
1092
+ params={"prompts": [new_prompt]},
1093
+ shape=(len(specs), 1)
1094
+ )
1095
+ current = self._exec_infer(infer_node, specs, [new_prompt], []).data
1096
+
1097
+ return ResponseTensor(
1098
+ data=current,
1099
+ model_specs=specs,
1100
+ prompts=prompts,
1101
+ metadata={**parent_result.metadata, "chained": n_rounds}
1102
+ )
1103
+
1104
+ def _exec_evolve(self, node, specs, prompts, parents) -> ResponseTensor:
1105
+ """Evolve population based on fitness"""
1106
+ import random
1107
+
1108
+ fitness_scores = node.params.get("fitness_scores", [])
1109
+ mutate_fn = node.params.get("mutate_fn")
1110
+ crossover_fn = node.params.get("crossover_fn")
1111
+ elite_ratio = node.params.get("elite_ratio", 0.1)
1112
+
1113
+ n = len(specs)
1114
+ n_elite = max(1, int(n * elite_ratio))
1115
+
1116
+ # Sort by fitness
1117
+ sorted_indices = np.argsort(fitness_scores)[::-1]
1118
+
1119
+ # Keep elites
1120
+ new_specs = [specs[i] for i in sorted_indices[:n_elite]]
1121
+
1122
+ # Generate rest through mutation/crossover
1123
+ while len(new_specs) < n:
1124
+ if crossover_fn and random.random() < 0.5:
1125
+ parent1 = specs[random.choice(sorted_indices[:n//2])]
1126
+ parent2 = specs[random.choice(sorted_indices[:n//2])]
1127
+ child_spec = crossover_fn(parent1, parent2)
1128
+ else:
1129
+ parent = specs[random.choice(sorted_indices[:n//2])]
1130
+ child_spec = mutate_fn(parent) if mutate_fn else parent
1131
+ new_specs.append(child_spec)
1132
+
1133
+ return ResponseTensor(
1134
+ data=np.array([s.model_ref for s in new_specs], dtype=object),
1135
+ model_specs=new_specs,
1136
+ metadata={"operation": "evolve", "generation": 1}
1137
+ )
1138
+
1139
+
1140
+ def _compute_response_variance(responses: List[str]) -> float:
1141
+ """Compute semantic variance across responses"""
1142
+ # Simple heuristic: length variance + unique word ratio
1143
+ if not responses:
1144
+ return 0.0
1145
+
1146
+ lengths = [len(r) for r in responses]
1147
+ all_words = set()
1148
+ word_sets = []
1149
+ for r in responses:
1150
+ words = set(str(r).lower().split())
1151
+ word_sets.append(words)
1152
+ all_words.update(words)
1153
+
1154
+ # Jaccard-based disagreement
1155
+ if len(word_sets) < 2:
1156
+ return 0.0
1157
+
1158
+ total_overlap = 0
1159
+ n_pairs = 0
1160
+ for i, ws1 in enumerate(word_sets):
1161
+ for ws2 in word_sets[i+1:]:
1162
+ if ws1 or ws2:
1163
+ overlap = len(ws1 & ws2) / len(ws1 | ws2) if (ws1 | ws2) else 1.0
1164
+ total_overlap += overlap
1165
+ n_pairs += 1
1166
+
1167
+ avg_overlap = total_overlap / n_pairs if n_pairs > 0 else 1.0
1168
+ return 1.0 - avg_overlap
1169
+
1170
+
1171
+ # ==================== Polars Integration ====================
1172
+
1173
+ def npc_udf(
1174
+ operation: str,
1175
+ model_array: NPCArray,
1176
+ input_col: 'pl.Expr' = None,
1177
+ **kwargs
1178
+ ) -> 'pl.Expr':
1179
+ """
1180
+ Create a Polars user-defined function for NPC operations.
1181
+
1182
+ Args:
1183
+ operation: 'infer', 'predict', 'forward', 'fit'
1184
+ model_array: NPCArray to use
1185
+ input_col: Polars column expression for input
1186
+ **kwargs: Additional operation params
1187
+
1188
+ Example:
1189
+ >>> result = df.with_columns(
1190
+ ... npc_udf('infer', models, pl.col('text')).alias('response')
1191
+ ... )
1192
+ """
1193
+ try:
1194
+ import polars as pl
1195
+ except ImportError:
1196
+ raise ImportError("Polars required for npc_udf. Install with: pip install polars")
1197
+
1198
+ def apply_fn(inputs: pl.Series) -> pl.Series:
1199
+ input_list = inputs.to_list()
1200
+
1201
+ if operation == "infer":
1202
+ result = model_array.infer(input_list, **kwargs).compute()
1203
+ elif operation == "predict":
1204
+ result = model_array.predict(input_list, **kwargs).compute()
1205
+ elif operation == "forward":
1206
+ result = model_array.forward(input_list, **kwargs).compute()
1207
+ else:
1208
+ raise ValueError(f"Unknown operation: {operation}")
1209
+
1210
+ # Flatten if needed
1211
+ output = result.flatten() if result.shape[0] == 1 else result.data[:, 0].tolist()
1212
+ return pl.Series(output)
1213
+
1214
+ return input_col.map_elements(apply_fn, return_dtype=pl.Utf8)
1215
+
1216
+
1217
+ def register_polars_namespace():
1218
+ """
1219
+ Register 'npc' namespace on Polars DataFrames.
1220
+
1221
+ After calling this, you can do:
1222
+ >>> df.npc.infer(models, 'text_col')
1223
+ """
1224
+ try:
1225
+ import polars as pl
1226
+
1227
+ @pl.api.register_dataframe_namespace("npc")
1228
+ class NPCNamespace:
1229
+ def __init__(self, df: pl.DataFrame):
1230
+ self._df = df
1231
+
1232
+ def infer(
1233
+ self,
1234
+ models: NPCArray,
1235
+ input_col: str,
1236
+ output_col: str = "response",
1237
+ **kwargs
1238
+ ) -> pl.DataFrame:
1239
+ return self._df.with_columns(
1240
+ npc_udf('infer', models, pl.col(input_col), **kwargs)
1241
+ .alias(output_col)
1242
+ )
1243
+
1244
+ return True
1245
+ except ImportError:
1246
+ return False
1247
+
1248
+
1249
+ # ==================== Convenience Functions ====================
1250
+
1251
+ def infer_matrix(
1252
+ prompts: List[str],
1253
+ models: List[str] = None,
1254
+ providers: List[str] = None,
1255
+ **kwargs
1256
+ ) -> ResponseTensor:
1257
+ """
1258
+ Quick inference across model/prompt matrix.
1259
+
1260
+ Args:
1261
+ prompts: List of prompts
1262
+ models: List of model names
1263
+ providers: List of providers
1264
+ **kwargs: Additional params
1265
+
1266
+ Returns:
1267
+ ResponseTensor of shape (n_models, n_prompts)
1268
+ """
1269
+ if models is None:
1270
+ models = ["llama3.2"]
1271
+
1272
+ arr = NPCArray.from_llms(models, providers)
1273
+ return arr.infer(prompts, **kwargs).compute()
1274
+
1275
+
1276
+ def ensemble_vote(
1277
+ prompt: str,
1278
+ models: List[str],
1279
+ providers: List[str] = None
1280
+ ) -> str:
1281
+ """
1282
+ Quick ensemble voting across models.
1283
+
1284
+ Args:
1285
+ prompt: Single prompt
1286
+ models: List of models to query
1287
+ providers: Optional providers
1288
+
1289
+ Returns:
1290
+ Consensus response string
1291
+ """
1292
+ arr = NPCArray.from_llms(models, providers)
1293
+ result = arr.infer(prompt).vote(axis=0).compute()
1294
+ return result.data[0] if result.data.size > 0 else ""