phykit 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. phykit/__init__.py +0 -0
  2. phykit/__main__.py +6 -0
  3. phykit/helpers/__init__.py +0 -0
  4. phykit/helpers/boolean_argument_parsing.py +12 -0
  5. phykit/helpers/caching.py +201 -0
  6. phykit/helpers/files.py +125 -0
  7. phykit/helpers/parallel.py +305 -0
  8. phykit/helpers/stats_summary.py +64 -0
  9. phykit/helpers/streaming.py +152 -0
  10. phykit/phykit.py +2862 -0
  11. phykit/services/__init__.py +0 -0
  12. phykit/services/alignment/__init__.py +17 -0
  13. phykit/services/alignment/alignment_length.py +16 -0
  14. phykit/services/alignment/alignment_length_no_gaps.py +69 -0
  15. phykit/services/alignment/alignment_recoding.py +89 -0
  16. phykit/services/alignment/base.py +103 -0
  17. phykit/services/alignment/column_score.py +66 -0
  18. phykit/services/alignment/compositional_bias_per_site.py +98 -0
  19. phykit/services/alignment/create_concatenation_matrix.py +254 -0
  20. phykit/services/alignment/dna_threader.py +145 -0
  21. phykit/services/alignment/evolutionary_rate_per_site.py +85 -0
  22. phykit/services/alignment/faidx.py +21 -0
  23. phykit/services/alignment/gc_content.py +94 -0
  24. phykit/services/alignment/pairwise_identity.py +159 -0
  25. phykit/services/alignment/parsimony_informative_sites.py +81 -0
  26. phykit/services/alignment/rcv.py +14 -0
  27. phykit/services/alignment/rcvt.py +47 -0
  28. phykit/services/alignment/rename_fasta_entries.py +53 -0
  29. phykit/services/alignment/sum_of_pairs_score.py +157 -0
  30. phykit/services/alignment/variable_sites.py +54 -0
  31. phykit/services/base.py +9 -0
  32. phykit/services/tree/__init__.py +29 -0
  33. phykit/services/tree/base.py +178 -0
  34. phykit/services/tree/bipartition_support_stats.py +48 -0
  35. phykit/services/tree/branch_length_multiplier.py +37 -0
  36. phykit/services/tree/collapse_branches.py +27 -0
  37. phykit/services/tree/covarying_evolutionary_rates.py +272 -0
  38. phykit/services/tree/dvmc.py +37 -0
  39. phykit/services/tree/evolutionary_rate.py +17 -0
  40. phykit/services/tree/hidden_paralogy_check.py +128 -0
  41. phykit/services/tree/internal_branch_stats.py +77 -0
  42. phykit/services/tree/internode_labeler.py +33 -0
  43. phykit/services/tree/last_common_ancestor_subtree.py +35 -0
  44. phykit/services/tree/lb_score.py +196 -0
  45. phykit/services/tree/monophyly_check.py +106 -0
  46. phykit/services/tree/nearest_neighbor_interchange.py +140 -0
  47. phykit/services/tree/patristic_distances.py +113 -0
  48. phykit/services/tree/polytomy_test.py +546 -0
  49. phykit/services/tree/print_tree.py +28 -0
  50. phykit/services/tree/prune_tree.py +40 -0
  51. phykit/services/tree/rename_tree_tips.py +64 -0
  52. phykit/services/tree/rf_distance.py +136 -0
  53. phykit/services/tree/root_tree.py +35 -0
  54. phykit/services/tree/saturation.py +209 -0
  55. phykit/services/tree/spurious_sequence.py +75 -0
  56. phykit/services/tree/terminal_branch_stats.py +87 -0
  57. phykit/services/tree/tip_labels.py +18 -0
  58. phykit/services/tree/tip_to_tip_distance.py +41 -0
  59. phykit/services/tree/tip_to_tip_node_distance.py +41 -0
  60. phykit/services/tree/total_tree_length.py +25 -0
  61. phykit/services/tree/treeness.py +16 -0
  62. phykit/services/tree/treeness_over_rcv.py +40 -0
  63. phykit/version.py +1 -0
  64. phykit-2.1.0.dist-info/METADATA +150 -0
  65. phykit-2.1.0.dist-info/RECORD +69 -0
  66. phykit-2.1.0.dist-info/WHEEL +5 -0
  67. phykit-2.1.0.dist-info/entry_points.txt +121 -0
  68. phykit-2.1.0.dist-info/licenses/LICENSE.md +7 -0
  69. phykit-2.1.0.dist-info/top_level.txt +1 -0
phykit/__init__.py ADDED
File without changes
phykit/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """phykit.__main__: executed when phykit is called as script"""
2
+ import sys
3
+
4
+ from .phykit import Phykit
5
+
6
+ Phykit()
File without changes
@@ -0,0 +1,12 @@
1
+ import argparse
2
+
3
+
4
+ def str2bool(v):
5
+ if isinstance(v, bool):
6
+ return v
7
+ if v.lower() in ("true", "t", "1"):
8
+ return True
9
+ elif v.lower() in ("false", "f", "0"):
10
+ return False
11
+ else:
12
+ raise argparse.ArgumentTypeError("Boolean value expected.")
@@ -0,0 +1,201 @@
1
+ """
2
+ Caching utilities for expensive computations
3
+ """
4
+
5
+ import hashlib
6
+ import pickle
7
+ import os
8
+ import tempfile
9
+ from functools import wraps, lru_cache
10
+ from typing import Any, Callable
11
+ import json
12
+
13
+
14
+ class ResultCache:
15
+ """
16
+ File-based cache for expensive computation results.
17
+ """
18
+
19
+ def __init__(self, cache_dir: str = None):
20
+ """
21
+ Initialize cache.
22
+
23
+ Args:
24
+ cache_dir: Directory for cache files (uses temp dir if None)
25
+ """
26
+ if cache_dir is None:
27
+ cache_dir = os.path.join(tempfile.gettempdir(), 'phykit_cache')
28
+
29
+ self.cache_dir = cache_dir
30
+ os.makedirs(self.cache_dir, exist_ok=True)
31
+
32
+ def _get_cache_key(self, *args, **kwargs) -> str:
33
+ """Generate a unique cache key from function arguments."""
34
+ # Create a string representation of arguments
35
+ key_parts = []
36
+
37
+ for arg in args:
38
+ if isinstance(arg, (str, int, float, bool)):
39
+ key_parts.append(str(arg))
40
+ elif hasattr(arg, '__dict__'):
41
+ # For objects, use their attributes
42
+ key_parts.append(json.dumps(vars(arg), sort_keys=True, default=str))
43
+ else:
44
+ key_parts.append(str(arg))
45
+
46
+ for k, v in sorted(kwargs.items()):
47
+ key_parts.append(f"{k}={v}")
48
+
49
+ key_string = "_".join(key_parts)
50
+ return hashlib.md5(key_string.encode()).hexdigest()
51
+
52
+ def get(self, cache_key: str) -> Any:
53
+ """Retrieve cached result."""
54
+ cache_file = os.path.join(self.cache_dir, f"{cache_key}.pkl")
55
+
56
+ if os.path.exists(cache_file):
57
+ try:
58
+ with open(cache_file, 'rb') as f:
59
+ return pickle.load(f)
60
+ except:
61
+ # Cache corrupted, remove it
62
+ os.remove(cache_file)
63
+
64
+ return None
65
+
66
+ def set(self, cache_key: str, value: Any) -> None:
67
+ """Store result in cache."""
68
+ cache_file = os.path.join(self.cache_dir, f"{cache_key}.pkl")
69
+
70
+ try:
71
+ with open(cache_file, 'wb') as f:
72
+ pickle.dump(value, f)
73
+ except:
74
+ # Caching failed, continue without caching
75
+ pass
76
+
77
+ def clear(self) -> None:
78
+ """Clear all cached results."""
79
+ for file in os.listdir(self.cache_dir):
80
+ if file.endswith('.pkl'):
81
+ os.remove(os.path.join(self.cache_dir, file))
82
+
83
+
84
+ def cached_computation(cache_instance: ResultCache = None):
85
+ """
86
+ Decorator for caching expensive computation results.
87
+
88
+ Usage:
89
+ @cached_computation()
90
+ def expensive_function(param1, param2):
91
+ # Expensive computation
92
+ return result
93
+ """
94
+ if cache_instance is None:
95
+ cache_instance = ResultCache()
96
+
97
+ def decorator(func: Callable) -> Callable:
98
+ @wraps(func)
99
+ def wrapper(*args, **kwargs):
100
+ # Generate cache key
101
+ cache_key = cache_instance._get_cache_key(func.__name__, *args, **kwargs)
102
+
103
+ # Try to get cached result
104
+ cached_result = cache_instance.get(cache_key)
105
+ if cached_result is not None:
106
+ return cached_result
107
+
108
+ # Compute result
109
+ result = func(*args, **kwargs)
110
+
111
+ # Cache result
112
+ cache_instance.set(cache_key, result)
113
+
114
+ return result
115
+
116
+ # Add method to clear cache for this function
117
+ wrapper.clear_cache = cache_instance.clear
118
+
119
+ return wrapper
120
+
121
+ return decorator
122
+
123
+
124
+ # Specialized caching for tree operations
125
+ @lru_cache(maxsize=128)
126
+ def cached_tree_distance(tree_pickle: bytes, tip1: str, tip2: str) -> float:
127
+ """
128
+ Cache tree distance calculations.
129
+
130
+ Args:
131
+ tree_pickle: Pickled tree object
132
+ tip1: First tip name
133
+ tip2: Second tip name
134
+
135
+ Returns:
136
+ Distance between tips
137
+ """
138
+ import pickle
139
+ tree = pickle.loads(tree_pickle)
140
+ return tree.distance(tip1, tip2)
141
+
142
+
143
+ # Specialized caching for alignment operations
144
+ class AlignmentCache:
145
+ """
146
+ Specialized cache for alignment operations.
147
+ """
148
+
149
+ def __init__(self):
150
+ self._column_cache = {}
151
+ self._stats_cache = {}
152
+
153
+ @lru_cache(maxsize=1024)
154
+ def get_column(self, alignment_hash: str, column_idx: int) -> str:
155
+ """
156
+ Get cached alignment column.
157
+ """
158
+ return self._column_cache.get(f"{alignment_hash}_{column_idx}")
159
+
160
+ def set_column(self, alignment_hash: str, column_idx: int, column: str) -> None:
161
+ """
162
+ Cache alignment column.
163
+ """
164
+ self._column_cache[f"{alignment_hash}_{column_idx}"] = column
165
+
166
+ @lru_cache(maxsize=128)
167
+ def get_stats(self, alignment_hash: str, stat_type: str) -> Any:
168
+ """
169
+ Get cached alignment statistics.
170
+ """
171
+ return self._stats_cache.get(f"{alignment_hash}_{stat_type}")
172
+
173
+ def set_stats(self, alignment_hash: str, stat_type: str, stats: Any) -> None:
174
+ """
175
+ Cache alignment statistics.
176
+ """
177
+ self._stats_cache[f"{alignment_hash}_{stat_type}"] = stats
178
+
179
+ def clear(self) -> None:
180
+ """
181
+ Clear all caches.
182
+ """
183
+ self._column_cache.clear()
184
+ self._stats_cache.clear()
185
+ self.get_column.cache_clear()
186
+ self.get_stats.cache_clear()
187
+
188
+
189
+ # Global cache instances
190
+ _result_cache = ResultCache()
191
+ _alignment_cache = AlignmentCache()
192
+
193
+
194
+ def get_result_cache() -> ResultCache:
195
+ """Get global result cache instance."""
196
+ return _result_cache
197
+
198
+
199
+ def get_alignment_cache() -> AlignmentCache:
200
+ """Get global alignment cache instance."""
201
+ return _alignment_cache
@@ -0,0 +1,125 @@
1
+ from enum import Enum
2
+ import sys
3
+ from typing import Tuple, Optional
4
+ from functools import lru_cache
5
+ import hashlib
6
+ import os
7
+
8
+ from Bio import AlignIO
9
+ from Bio.Align import MultipleSeqAlignment
10
+
11
+
12
+ class FileFormat(Enum):
13
+ fasta = "fasta"
14
+ clustal = "clustal"
15
+ maf = "maf"
16
+ mauve = "mauve"
17
+ phylip = "phylip"
18
+ phylip_seq = "phylip-sequential"
19
+ phylip_rel = "phylip-relaxed"
20
+ stockholm = "stockholm"
21
+
22
+
23
+ def _get_file_hash(file_path: str) -> str:
24
+ """Calculate a hash for file content to use as cache key."""
25
+ # Use file path, size, and modification time for cache key
26
+ # This is faster than hashing file contents
27
+ stat = os.stat(file_path)
28
+ cache_key = f"{file_path}_{stat.st_size}_{stat.st_mtime}"
29
+ return hashlib.md5(cache_key.encode()).hexdigest()
30
+
31
+ def _detect_format_by_content(file_path: str) -> Optional[str]:
32
+ """Attempt to detect file format by examining file content."""
33
+ with open(file_path, 'r') as f:
34
+ first_line = f.readline().strip()
35
+
36
+ # Quick format detection based on first line
37
+ if first_line.startswith('>'):
38
+ return 'fasta'
39
+ elif first_line.startswith('CLUSTAL'):
40
+ return 'clustal'
41
+ elif first_line.startswith('#'):
42
+ # Could be Stockholm
43
+ if 'STOCKHOLM' in first_line:
44
+ return 'stockholm'
45
+ elif first_line.isdigit() or (len(first_line.split()) == 2 and
46
+ first_line.split()[0].isdigit()):
47
+ return 'phylip'
48
+
49
+ return None
50
+
51
+ @lru_cache(maxsize=32)
52
+ def _cached_alignment_read(file_hash: str, file_path: str, file_format: str) -> Tuple[MultipleSeqAlignment, bool]:
53
+ """Cached reading of alignment files."""
54
+ with open(file_path) as f:
55
+ alignment = AlignIO.read(f, file_format)
56
+ return alignment, is_protein_alignment(alignment)
57
+
58
+ def get_alignment_and_format(
59
+ alignment_file_path: str
60
+ ) -> Tuple[MultipleSeqAlignment, str, bool]:
61
+ # Check if file exists first
62
+ if not os.path.exists(alignment_file_path):
63
+ print(f"{alignment_file_path} corresponds to no such file.")
64
+ print("Please check file name and pathing")
65
+ sys.exit(2)
66
+
67
+ # Try to detect format by content first
68
+ detected_format = _detect_format_by_content(alignment_file_path)
69
+
70
+ # Get file hash for caching
71
+ file_hash = _get_file_hash(alignment_file_path)
72
+
73
+ # If format was detected, try it first
74
+ if detected_format:
75
+ try:
76
+ alignment, is_protein = _cached_alignment_read(
77
+ file_hash, alignment_file_path, detected_format
78
+ )
79
+ return alignment, detected_format, is_protein
80
+ except (ValueError, AssertionError):
81
+ pass
82
+
83
+ # Fall back to trying all formats
84
+ for fileFormat in FileFormat:
85
+ # Skip the already tried format
86
+ if detected_format and fileFormat.value == detected_format:
87
+ continue
88
+
89
+ try:
90
+ alignment, is_protein = _cached_alignment_read(
91
+ file_hash, alignment_file_path, fileFormat.value
92
+ )
93
+ return alignment, fileFormat.value, is_protein
94
+ except (ValueError, AssertionError):
95
+ continue
96
+
97
+ # If we get here, no format worked
98
+ print(f"Could not determine format for {alignment_file_path}")
99
+ print("Please ensure the file is in a supported format")
100
+ sys.exit(2)
101
+
102
+
103
+ def is_protein_alignment(alignment: MultipleSeqAlignment) -> bool:
104
+ nucleotide_set = {
105
+ "A", "C", "G", "T", "U", "-", "N", "?", "*"
106
+ }
107
+
108
+ for record in alignment:
109
+ seq_set = set(record.seq.upper())
110
+ if seq_set - nucleotide_set:
111
+ # if there are chars that are not in the nucl set,
112
+ # it's likely a protein sequence
113
+ return True
114
+
115
+ return False
116
+
117
+
118
+ def read_single_column_file_to_list(single_col_file_path: str) -> list:
119
+ try:
120
+ with open(single_col_file_path) as f:
121
+ return [line.rstrip("\n").strip() for line in f]
122
+ except FileNotFoundError:
123
+ print(f"{single_col_file_path} corresponds to no such file or directory.")
124
+ print("Please check file name and pathing")
125
+ sys.exit(2)
@@ -0,0 +1,305 @@
1
+ """
2
+ Parallel processing utilities for batch operations
3
+ """
4
+
5
+ import multiprocessing as mp
6
+ from functools import partial
7
+ from typing import List, Any, Callable, Optional
8
+ import numpy as np
9
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
10
+ import sys
11
+
12
+
13
+ class ParallelProcessor:
14
+ """
15
+ Utility class for parallel processing of batch operations.
16
+ """
17
+
18
+ @staticmethod
19
+ def get_optimal_workers(data_size: int, min_chunk_size: int = 10) -> int:
20
+ """
21
+ Determine optimal number of workers based on data size.
22
+
23
+ Args:
24
+ data_size: Size of data to process
25
+ min_chunk_size: Minimum size per chunk
26
+
27
+ Returns:
28
+ Optimal number of workers
29
+ """
30
+ max_workers = mp.cpu_count()
31
+ optimal_workers = min(max_workers, max(1, data_size // min_chunk_size))
32
+ return min(optimal_workers, 8) # Cap at 8 to avoid overhead
33
+
34
+ @staticmethod
35
+ def chunk_data(data: List[Any], num_chunks: int) -> List[List[Any]]:
36
+ """
37
+ Split data into chunks for parallel processing.
38
+
39
+ Args:
40
+ data: Data to split
41
+ num_chunks: Number of chunks
42
+
43
+ Returns:
44
+ List of data chunks
45
+ """
46
+ chunk_size = max(1, len(data) // num_chunks)
47
+ chunks = []
48
+
49
+ for i in range(0, len(data), chunk_size):
50
+ chunk = data[i:i + chunk_size]
51
+ if chunk:
52
+ chunks.append(chunk)
53
+
54
+ return chunks
55
+
56
+ @staticmethod
57
+ def parallel_map(
58
+ func: Callable,
59
+ data: List[Any],
60
+ num_workers: Optional[int] = None,
61
+ use_threads: bool = False,
62
+ show_progress: bool = False
63
+ ) -> List[Any]:
64
+ """
65
+ Apply function to data in parallel.
66
+
67
+ Args:
68
+ func: Function to apply
69
+ data: Data to process
70
+ num_workers: Number of workers (auto-determined if None)
71
+ use_threads: Use threads instead of processes
72
+ show_progress: Show progress bar
73
+
74
+ Returns:
75
+ List of results
76
+ """
77
+ if not data:
78
+ return []
79
+
80
+ # Determine number of workers
81
+ if num_workers is None:
82
+ num_workers = ParallelProcessor.get_optimal_workers(len(data))
83
+
84
+ # For small datasets, use sequential processing
85
+ if len(data) < 20 or num_workers == 1:
86
+ return [func(item) for item in data]
87
+
88
+ # Choose executor type
89
+ executor_class = ThreadPoolExecutor if use_threads else ProcessPoolExecutor
90
+
91
+ results = []
92
+ with executor_class(max_workers=num_workers) as executor:
93
+ if show_progress and sys.stderr.isatty():
94
+ try:
95
+ from tqdm import tqdm
96
+ futures = {executor.submit(func, item): i for i, item in enumerate(data)}
97
+ results = [None] * len(data)
98
+
99
+ for future in tqdm(as_completed(futures), total=len(data), desc="Processing"):
100
+ idx = futures[future]
101
+ results[idx] = future.result()
102
+ except ImportError:
103
+ # Fallback without progress bar
104
+ results = list(executor.map(func, data))
105
+ else:
106
+ results = list(executor.map(func, data))
107
+
108
+ return results
109
+
110
+ @staticmethod
111
+ def parallel_reduce(
112
+ func: Callable,
113
+ data: List[Any],
114
+ reduce_func: Callable,
115
+ initial_value: Any = None,
116
+ num_workers: Optional[int] = None
117
+ ) -> Any:
118
+ """
119
+ Apply function to data in parallel and reduce results.
120
+
121
+ Args:
122
+ func: Function to apply to each item
123
+ data: Data to process
124
+ reduce_func: Function to reduce results
125
+ initial_value: Initial value for reduction
126
+ num_workers: Number of workers
127
+
128
+ Returns:
129
+ Reduced result
130
+ """
131
+ # Apply function in parallel
132
+ results = ParallelProcessor.parallel_map(func, data, num_workers)
133
+
134
+ # Reduce results
135
+ if initial_value is not None:
136
+ result = initial_value
137
+ for item in results:
138
+ result = reduce_func(result, item)
139
+ else:
140
+ if not results:
141
+ return None
142
+ result = results[0]
143
+ for item in results[1:]:
144
+ result = reduce_func(result, item)
145
+
146
+ return result
147
+
148
+
149
+ class BatchFileProcessor:
150
+ """
151
+ Process multiple files in parallel.
152
+ """
153
+
154
+ @staticmethod
155
+ def process_files(
156
+ file_paths: List[str],
157
+ processing_func: Callable,
158
+ num_workers: Optional[int] = None,
159
+ aggregate_func: Optional[Callable] = None
160
+ ) -> Any:
161
+ """
162
+ Process multiple files in parallel.
163
+
164
+ Args:
165
+ file_paths: List of file paths
166
+ processing_func: Function to process each file
167
+ num_workers: Number of workers
168
+ aggregate_func: Function to aggregate results
169
+
170
+ Returns:
171
+ Processed results or aggregated result
172
+ """
173
+ if not file_paths:
174
+ return []
175
+
176
+ # Process files in parallel
177
+ results = ParallelProcessor.parallel_map(
178
+ processing_func,
179
+ file_paths,
180
+ num_workers,
181
+ show_progress=True
182
+ )
183
+
184
+ # Aggregate results if function provided
185
+ if aggregate_func:
186
+ return aggregate_func(results)
187
+
188
+ return results
189
+
190
+ @staticmethod
191
+ def process_file_pairs(
192
+ file_pairs: List[Tuple[str, str]],
193
+ processing_func: Callable,
194
+ num_workers: Optional[int] = None
195
+ ) -> List[Any]:
196
+ """
197
+ Process pairs of files in parallel.
198
+
199
+ Args:
200
+ file_pairs: List of file path pairs
201
+ processing_func: Function to process each pair
202
+ num_workers: Number of workers
203
+
204
+ Returns:
205
+ List of results
206
+ """
207
+ def process_pair(pair):
208
+ return processing_func(pair[0], pair[1])
209
+
210
+ return ParallelProcessor.parallel_map(
211
+ process_pair,
212
+ file_pairs,
213
+ num_workers
214
+ )
215
+
216
+
217
+ class NumpyParallel:
218
+ """
219
+ Utilities for parallel NumPy operations.
220
+ """
221
+
222
+ @staticmethod
223
+ def parallel_apply_along_axis(
224
+ func: Callable,
225
+ axis: int,
226
+ array: np.ndarray,
227
+ num_workers: Optional[int] = None
228
+ ) -> np.ndarray:
229
+ """
230
+ Apply function along axis in parallel.
231
+
232
+ Args:
233
+ func: Function to apply
234
+ axis: Axis along which to apply function
235
+ array: NumPy array
236
+ num_workers: Number of workers
237
+
238
+ Returns:
239
+ Result array
240
+ """
241
+ if axis == 0:
242
+ # Process columns
243
+ results = ParallelProcessor.parallel_map(
244
+ lambda col: func(array[:, col]),
245
+ list(range(array.shape[1])),
246
+ num_workers
247
+ )
248
+ return np.array(results).T
249
+ elif axis == 1:
250
+ # Process rows
251
+ results = ParallelProcessor.parallel_map(
252
+ lambda row: func(array[row, :]),
253
+ list(range(array.shape[0])),
254
+ num_workers
255
+ )
256
+ return np.array(results)
257
+ else:
258
+ raise ValueError(f"Unsupported axis: {axis}")
259
+
260
+ @staticmethod
261
+ def parallel_pairwise_operation(
262
+ items: List[Any],
263
+ operation_func: Callable,
264
+ num_workers: Optional[int] = None,
265
+ symmetric: bool = True
266
+ ) -> np.ndarray:
267
+ """
268
+ Perform pairwise operations in parallel.
269
+
270
+ Args:
271
+ items: List of items
272
+ operation_func: Function to apply to pairs
273
+ num_workers: Number of workers
274
+ symmetric: Whether operation is symmetric
275
+
276
+ Returns:
277
+ Matrix of pairwise results
278
+ """
279
+ n = len(items)
280
+ result_matrix = np.zeros((n, n))
281
+
282
+ # Generate pairs
283
+ pairs = []
284
+ for i in range(n):
285
+ for j in range(i + 1 if symmetric else 0, n):
286
+ pairs.append((i, j, items[i], items[j]))
287
+
288
+ # Process pairs in parallel
289
+ def process_pair(pair_data):
290
+ i, j, item1, item2 = pair_data
291
+ return i, j, operation_func(item1, item2)
292
+
293
+ results = ParallelProcessor.parallel_map(
294
+ process_pair,
295
+ pairs,
296
+ num_workers
297
+ )
298
+
299
+ # Fill result matrix
300
+ for i, j, value in results:
301
+ result_matrix[i, j] = value
302
+ if symmetric:
303
+ result_matrix[j, i] = value
304
+
305
+ return result_matrix