nettracer3d 0.9.8__py3-none-any.whl → 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nettracer3d might be problematic. Click here for more details.

nettracer3d/stats.py ADDED
@@ -0,0 +1,861 @@
1
+ import sys
2
+ import pandas as pd
3
+ import numpy as np
4
+ from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
5
+ QHBoxLayout, QLabel, QTableWidget, QTableWidgetItem,
6
+ QPushButton, QComboBox, QTextEdit, QSplitter,
7
+ QListWidget, QListWidgetItem, QFrame, QMessageBox,
8
+ QHeaderView, QAbstractItemView, QCheckBox)
9
+ from PyQt6.QtCore import Qt, QMimeData
10
+ from PyQt6.QtGui import QDragEnterEvent, QDropEvent, QFont, QDrag
11
+ from scipy import stats
12
+ import os
13
+
14
+ class DragDropListWidget(QListWidget):
15
+ """Custom QListWidget that accepts drag and drop of column names with remove buttons"""
16
+
17
+ def __init__(self, parent=None):
18
+ super().__init__(parent)
19
+ self.setAcceptDrops(True)
20
+ self.setDragDropMode(QAbstractItemView.DragDropMode.InternalMove)
21
+ self.setDefaultDropAction(Qt.DropAction.MoveAction)
22
+
23
+ def dragEnterEvent(self, event: QDragEnterEvent):
24
+ if event.mimeData().hasText():
25
+ event.acceptProposedAction()
26
+ else:
27
+ event.ignore()
28
+
29
+ def dropEvent(self, event: QDropEvent):
30
+ if event.mimeData().hasText():
31
+ column_name = event.mimeData().text()
32
+ # Check if column already exists
33
+ for i in range(self.count()):
34
+ widget = self.itemWidget(self.item(i))
35
+ if widget and widget.column_name == column_name:
36
+ return # Column already exists
37
+
38
+ self.add_column_item(column_name)
39
+ event.acceptProposedAction()
40
+ else:
41
+ event.ignore()
42
+
43
+ def add_column_item(self, column_name):
44
+ """Add a new column item with remove button"""
45
+ from PyQt6.QtWidgets import QWidget, QHBoxLayout, QLabel, QPushButton
46
+
47
+ # Create list item
48
+ item = QListWidgetItem()
49
+ self.addItem(item)
50
+
51
+ # Create custom widget for the item
52
+ widget = QWidget()
53
+ widget.column_name = column_name # Store column name for reference
54
+ layout = QHBoxLayout(widget)
55
+ layout.setContentsMargins(5, 2, 5, 2)
56
+
57
+ # Column name label
58
+ label = QLabel(column_name)
59
+ label.setStyleSheet("QLabel { color: #333; }")
60
+ layout.addWidget(label)
61
+
62
+ # Remove button
63
+ remove_btn = QPushButton("×")
64
+ remove_btn.setMaximumSize(20, 20)
65
+ remove_btn.setStyleSheet("""
66
+ QPushButton {
67
+ background-color: #ff4444;
68
+ color: white;
69
+ border: none;
70
+ border-radius: 10px;
71
+ font-weight: bold;
72
+ font-size: 12px;
73
+ }
74
+ QPushButton:hover {
75
+ background-color: #cc3333;
76
+ }
77
+ QPushButton:pressed {
78
+ background-color: #aa2222;
79
+ }
80
+ """)
81
+
82
+ # Connect remove button to removal function
83
+ remove_btn.clicked.connect(lambda: self.remove_column_item(item))
84
+ layout.addWidget(remove_btn)
85
+
86
+ # Set the custom widget for this item
87
+ item.setSizeHint(widget.sizeHint())
88
+ self.setItemWidget(item, widget)
89
+
90
+ def remove_column_item(self, item):
91
+ """Remove a column item from the list"""
92
+ row = self.row(item)
93
+ if row >= 0:
94
+ self.takeItem(row)
95
+
96
+ def get_selected_columns(self):
97
+ """Get list of all selected column names"""
98
+ columns = []
99
+ for i in range(self.count()):
100
+ widget = self.itemWidget(self.item(i))
101
+ if widget and hasattr(widget, 'column_name'):
102
+ columns.append(widget.column_name)
103
+ return columns
104
+
105
+ class DragDropTableWidget(QTableWidget):
106
+ """Custom QTableWidget that allows dragging column headers"""
107
+
108
+ def __init__(self, parent=None):
109
+ super().__init__(parent)
110
+ self.setDragEnabled(True)
111
+ self.setDragDropMode(QAbstractItemView.DragDropMode.DragOnly)
112
+ self.setDefaultDropAction(Qt.DropAction.CopyAction)
113
+
114
+ def startDrag(self, supportedActions):
115
+ if self.currentColumn() >= 0:
116
+ # Create drag data with column index and header
117
+ drag = QDrag(self)
118
+ mimeData = QMimeData()
119
+
120
+ # Store column index and header text
121
+ col_idx = self.currentColumn()
122
+ header_text = self.horizontalHeaderItem(col_idx).text() if self.horizontalHeaderItem(col_idx) else f"Column_{col_idx}"
123
+
124
+ # Just store the header text (simpler than the reference)
125
+ mimeData.setText(header_text)
126
+ drag.setMimeData(mimeData)
127
+
128
+ # Create drag pixmap for visual feedback
129
+ from PyQt6.QtGui import QPixmap, QPainter
130
+ pixmap = QPixmap(150, 30)
131
+ pixmap.fill(Qt.GlobalColor.lightGray)
132
+ painter = QPainter(pixmap)
133
+ painter.drawText(pixmap.rect(), Qt.AlignmentFlag.AlignCenter, header_text)
134
+ painter.end()
135
+ drag.setPixmap(pixmap)
136
+
137
+ drag.exec(Qt.DropAction.CopyAction)
138
+
139
+ class FileDropWidget(QWidget):
140
+ """Widget that accepts file drops"""
141
+
142
+ def __init__(self, parent=None):
143
+ super().__init__(parent)
144
+ self.setAcceptDrops(True)
145
+ self.parent_window = parent
146
+
147
+ # Setup UI
148
+ layout = QVBoxLayout()
149
+ self.label = QLabel("Drop .xlsx or .csv files here")
150
+ self.label.setAlignment(Qt.AlignmentFlag.AlignCenter)
151
+ self.label.setStyleSheet("""
152
+ QLabel {
153
+ border: 2px dashed #aaa;
154
+ border-radius: 10px;
155
+ padding: 20px;
156
+ background-color: #f9f9f9;
157
+ font-size: 14px;
158
+ color: #666;
159
+ }
160
+ """)
161
+ layout.addWidget(self.label)
162
+ self.setLayout(layout)
163
+
164
+ def dragEnterEvent(self, event: QDragEnterEvent):
165
+ if event.mimeData().hasUrls():
166
+ urls = event.mimeData().urls()
167
+ for url in urls:
168
+ if url.isLocalFile():
169
+ file_path = url.toLocalFile()
170
+ if file_path.lower().endswith(('.xlsx', '.csv')):
171
+ event.acceptProposedAction()
172
+ return
173
+ event.ignore()
174
+
175
+ def dropEvent(self, event: QDropEvent):
176
+ if event.mimeData().hasUrls():
177
+ urls = event.mimeData().urls()
178
+ for url in urls:
179
+ if url.isLocalFile():
180
+ file_path = url.toLocalFile()
181
+ if file_path.lower().endswith(('.xlsx', '.csv')):
182
+ self.parent_window.load_file(file_path)
183
+ event.acceptProposedAction()
184
+ return
185
+ event.ignore()
186
+
187
+ class StatisticalTestGUI(QMainWindow):
188
+ def __init__(self):
189
+ super().__init__()
190
+ self.current_dataframe = None
191
+ self.init_ui()
192
+
193
+ def init_ui(self):
194
+ self.setWindowTitle("Statistical Testing GUI")
195
+ self.setGeometry(100, 100, 1200, 800)
196
+
197
+ # Central widget
198
+ central_widget = QWidget()
199
+ self.setCentralWidget(central_widget)
200
+
201
+ # Main layout
202
+ main_layout = QHBoxLayout(central_widget)
203
+
204
+ # Create splitter for resizable sections
205
+ splitter = QSplitter(Qt.Orientation.Horizontal)
206
+ main_layout.addWidget(splitter)
207
+
208
+ # Left panel - File staging and data display
209
+ left_panel = QWidget()
210
+ left_layout = QVBoxLayout(left_panel)
211
+
212
+ # File drop area
213
+ file_drop_label = QLabel("Data Staging Area")
214
+ file_drop_label.setFont(QFont("Arial", 12, QFont.Weight.Bold))
215
+ left_layout.addWidget(file_drop_label)
216
+
217
+ self.file_drop_widget = FileDropWidget(self)
218
+ self.file_drop_widget.setMaximumHeight(100)
219
+ left_layout.addWidget(self.file_drop_widget)
220
+
221
+ # Data display table
222
+ data_display_label = QLabel("Data Display")
223
+ data_display_label.setFont(QFont("Arial", 12, QFont.Weight.Bold))
224
+ left_layout.addWidget(data_display_label)
225
+
226
+ self.data_table = DragDropTableWidget()
227
+ self.data_table.setAlternatingRowColors(True)
228
+ left_layout.addWidget(self.data_table)
229
+
230
+ splitter.addWidget(left_panel)
231
+
232
+ # Right panel - Column selection and testing
233
+ right_panel = QWidget()
234
+ right_layout = QVBoxLayout(right_panel)
235
+
236
+ # Column selection area
237
+ cocking_label = QLabel("Dataset Selection Area")
238
+ cocking_label.setFont(QFont("Arial", 12, QFont.Weight.Bold))
239
+ right_layout.addWidget(cocking_label)
240
+
241
+ cocking_instruction = QLabel("Drag column headers here to select datasets for comparison")
242
+ cocking_instruction.setStyleSheet("color: #666; font-style: italic;")
243
+ right_layout.addWidget(cocking_instruction)
244
+
245
+ self.column_list = DragDropListWidget()
246
+ self.column_list.setMaximumHeight(150)
247
+ right_layout.addWidget(self.column_list)
248
+
249
+ # Clear columns button
250
+ clear_button = QPushButton("Clear Selected Columns")
251
+ clear_button.clicked.connect(self.clear_columns)
252
+ right_layout.addWidget(clear_button)
253
+
254
+ # Test selection
255
+ test_label = QLabel("Statistical Test Selection")
256
+ test_label.setFont(QFont("Arial", 12, QFont.Weight.Bold))
257
+ right_layout.addWidget(test_label)
258
+
259
+ self.test_combo = QComboBox()
260
+ self.test_combo.addItems([
261
+ "Student's t-test (independent)",
262
+ "Student's t-test (paired)",
263
+ "Welch's t-test (independent)",
264
+ "Welch's t-test (paired)",
265
+ "One-way ANOVA",
266
+ "Mann-Whitney U test",
267
+ "Correlation analysis (Pearson)",
268
+ "Normality test (Shapiro-Wilk)",
269
+ "Chi-square test of independence"
270
+ ])
271
+ self.test_combo.currentTextChanged.connect(self.update_test_info)
272
+ right_layout.addWidget(self.test_combo)
273
+
274
+ # Test information label
275
+ self.test_info_label = QLabel()
276
+ self.test_info_label.setStyleSheet("color: #666; font-style: italic; padding: 5px;")
277
+ self.test_info_label.setWordWrap(True)
278
+ right_layout.addWidget(self.test_info_label)
279
+
280
+ # Update test info initially
281
+ self.update_test_info()
282
+
283
+ # Execute button
284
+ self.execute_button = QPushButton("Execute Statistical Test")
285
+ self.execute_button.clicked.connect(self.execute_test)
286
+ self.execute_button.setStyleSheet("""
287
+ QPushButton {
288
+ background-color: #4CAF50;
289
+ color: white;
290
+ border: none;
291
+ padding: 10px;
292
+ font-size: 14px;
293
+ font-weight: bold;
294
+ border-radius: 5px;
295
+ }
296
+ QPushButton:hover {
297
+ background-color: #45a049;
298
+ }
299
+ QPushButton:pressed {
300
+ background-color: #3e8e41;
301
+ }
302
+ """)
303
+ right_layout.addWidget(self.execute_button)
304
+
305
+ # Output area
306
+ output_label = QLabel("Test Results")
307
+ output_label.setFont(QFont("Arial", 12, QFont.Weight.Bold))
308
+ right_layout.addWidget(output_label)
309
+
310
+ self.output_text = QTextEdit()
311
+ self.output_text.setReadOnly(True)
312
+ self.output_text.setMaximumHeight(300)
313
+ right_layout.addWidget(self.output_text)
314
+
315
+ splitter.addWidget(right_panel)
316
+
317
+ # Set splitter proportions
318
+ splitter.setSizes([700, 500])
319
+
320
+ def update_test_info(self):
321
+ """Update the test information label based on selected test"""
322
+ test_type = self.test_combo.currentText()
323
+ info_text = ""
324
+
325
+ if "t-test (independent)" in test_type:
326
+ info_text = "Requires: 2 columns (independent groups)\nCompares means of two separate groups"
327
+ elif "t-test (paired)" in test_type:
328
+ info_text = "Requires: 2 columns (same subjects measured twice)\nCompares paired observations"
329
+ elif "One-way ANOVA" in test_type:
330
+ info_text = "Requires: 2+ columns\nCompares means across multiple groups"
331
+ elif "Mann-Whitney U" in test_type:
332
+ info_text = "Requires: 2 columns (independent groups)\nNon-parametric alternative to independent t-test"
333
+ elif "Correlation" in test_type:
334
+ info_text = "Requires: 2 columns\nMeasures linear relationship between variables"
335
+ elif "Normality test" in test_type:
336
+ info_text = "Requires: 1+ columns\nTests if data follows normal distribution"
337
+ elif "Chi-square" in test_type:
338
+ info_text = "Requires: 2 columns (categorical)\nTests independence between categorical variables"
339
+
340
+ self.test_info_label.setText(info_text)
341
+
342
+ def load_file(self, file_path):
343
+ """Load CSV or Excel file into pandas DataFrame and display in table"""
344
+ try:
345
+ if file_path.lower().endswith('.csv'):
346
+ df = pd.read_csv(file_path)
347
+ elif file_path.lower().endswith('.xlsx'):
348
+ df = pd.read_excel(file_path)
349
+ else:
350
+ self.show_error("Unsupported file format. Please use .csv or .xlsx files.")
351
+ return
352
+
353
+ self.current_dataframe = df
354
+ self.populate_table(df)
355
+
356
+ # Update file drop label
357
+ filename = os.path.basename(file_path)
358
+ self.file_drop_widget.label.setText(f"Loaded: {filename}")
359
+ self.file_drop_widget.label.setStyleSheet("""
360
+ QLabel {
361
+ border: 2px solid #4CAF50;
362
+ border-radius: 10px;
363
+ padding: 20px;
364
+ background-color: #e8f5e8;
365
+ font-size: 14px;
366
+ color: #2e7d2e;
367
+ }
368
+ """)
369
+
370
+ self.output_text.append(f"Successfully loaded {filename} with {df.shape[0]} rows and {df.shape[1]} columns.")
371
+
372
+ except Exception as e:
373
+ self.show_error(f"Error loading file: {str(e)}")
374
+
375
+ def populate_table(self, df):
376
+ """Populate the table widget with DataFrame data"""
377
+ self.data_table.setRowCount(min(df.shape[0], 1000)) # Limit display to 1000 rows
378
+ self.data_table.setColumnCount(df.shape[1])
379
+
380
+ # Set headers
381
+ self.data_table.setHorizontalHeaderLabels(df.columns.tolist())
382
+
383
+ # Populate data (limit to first 1000 rows for performance)
384
+ for i in range(min(df.shape[0], 1000)):
385
+ for j in range(df.shape[1]):
386
+ item = QTableWidgetItem(str(df.iloc[i, j]))
387
+ self.data_table.setItem(i, j, item)
388
+
389
+ # Resize columns to content
390
+ self.data_table.horizontalHeader().setStretchLastSection(True)
391
+ self.data_table.resizeColumnsToContents()
392
+
393
+ if df.shape[0] > 1000:
394
+ self.output_text.append(f"Note: Displaying first 1000 rows of {df.shape[0]} total rows.")
395
+
396
+ def clear_columns(self):
397
+ """Clear all selected columns"""
398
+ self.column_list.clear()
399
+
400
+ def get_numeric_column_data(self, column_name):
401
+ """Extract numeric data from a column, excluding NaN and non-numeric values"""
402
+ if self.current_dataframe is None:
403
+ return None
404
+
405
+ if column_name not in self.current_dataframe.columns:
406
+ return None
407
+
408
+ column_data = self.current_dataframe[column_name]
409
+
410
+ # Convert to numeric, coercing errors to NaN
411
+ numeric_data = pd.to_numeric(column_data, errors='coerce')
412
+
413
+ # Remove NaN values
414
+ clean_data = numeric_data.dropna()
415
+
416
+ return clean_data.values
417
+
418
+ def get_categorical_column_data(self, column_name):
419
+ """Extract categorical data from a column, excluding NaN values"""
420
+ if self.current_dataframe is None:
421
+ return None
422
+
423
+ if column_name not in self.current_dataframe.columns:
424
+ return None
425
+
426
+ column_data = self.current_dataframe[column_name]
427
+
428
+ # Remove NaN values
429
+ clean_data = column_data.dropna()
430
+
431
+ return clean_data.values
432
+
433
+ def execute_test(self):
434
+ """Execute the selected statistical test"""
435
+ if self.current_dataframe is None:
436
+ self.show_error("Please load a dataset first.")
437
+ return
438
+
439
+ # Get selected columns
440
+ selected_columns = self.column_list.get_selected_columns()
441
+
442
+ if len(selected_columns) == 0:
443
+ self.show_error("Please select at least one column for testing.")
444
+ return
445
+
446
+ # Get test type
447
+ test_type = self.test_combo.currentText()
448
+
449
+ try:
450
+ if "Student's t-test (independent)" in test_type:
451
+ self.execute_ttest(selected_columns, paired=False, equal_var=True)
452
+ elif "Student's t-test (paired)" in test_type:
453
+ self.execute_ttest(selected_columns, paired=True, equal_var=True)
454
+ elif "Welch's t-test (independent)" in test_type:
455
+ self.execute_ttest(selected_columns, paired=False, equal_var=False)
456
+ elif "Welch's t-test (paired)" in test_type:
457
+ self.execute_ttest(selected_columns, paired=True, equal_var=False)
458
+ elif "One-way ANOVA" in test_type:
459
+ self.execute_anova(selected_columns)
460
+ elif "Mann-Whitney U test" in test_type:
461
+ self.execute_mannwhitney(selected_columns)
462
+ elif "Correlation analysis" in test_type:
463
+ self.execute_correlation(selected_columns)
464
+ elif "Normality test" in test_type:
465
+ self.execute_normality_test(selected_columns)
466
+ elif "Chi-square test" in test_type:
467
+ self.execute_chisquare(selected_columns)
468
+ except Exception as e:
469
+ self.show_error(f"Error executing test: {str(e)}")
470
+
471
+ def execute_ttest(self, selected_columns, paired=False, equal_var=True):
472
+ """Execute t-test (Student's or Welch's, paired or independent)"""
473
+ if len(selected_columns) != 2:
474
+ self.show_error(f"t-test requires exactly 2 columns. You have selected {len(selected_columns)} columns.")
475
+ return
476
+
477
+ # Extract numeric data from columns
478
+ data1 = self.get_numeric_column_data(selected_columns[0])
479
+ data2 = self.get_numeric_column_data(selected_columns[1])
480
+
481
+ if data1 is None or len(data1) == 0:
482
+ self.show_error(f"Column '{selected_columns[0]}' contains no numeric data.")
483
+ return
484
+ if data2 is None or len(data2) == 0:
485
+ self.show_error(f"Column '{selected_columns[1]}' contains no numeric data.")
486
+ return
487
+
488
+ if paired and len(data1) != len(data2):
489
+ self.show_error("Paired t-test requires equal sample sizes.")
490
+ return
491
+
492
+ # Perform appropriate t-test
493
+ if paired:
494
+ statistic, p_value = stats.ttest_rel(data1, data2)
495
+ else:
496
+ statistic, p_value = stats.ttest_ind(data1, data2, equal_var=equal_var)
497
+
498
+ # Display results
499
+ self.output_text.clear()
500
+ test_name = "WELCH'S" if not equal_var else "STUDENT'S"
501
+ test_type_desc = "PAIRED" if paired else "INDEPENDENT"
502
+
503
+ self.output_text.append("=" * 50)
504
+ self.output_text.append(f"{test_name} T-TEST ({test_type_desc}) RESULTS")
505
+ self.output_text.append("=" * 50)
506
+ self.output_text.append(f"Group 1: {selected_columns[0]}")
507
+ self.output_text.append(f" Sample size (n₁): {len(data1)}")
508
+ self.output_text.append(f" Mean: {np.mean(data1):.4f}")
509
+ self.output_text.append(f" Std Dev: {np.std(data1, ddof=1):.4f}")
510
+ self.output_text.append("")
511
+ self.output_text.append(f"Group 2: {selected_columns[1]}")
512
+ self.output_text.append(f" Sample size (n₂): {len(data2)}")
513
+ self.output_text.append(f" Mean: {np.mean(data2):.4f}")
514
+ self.output_text.append(f" Std Dev: {np.std(data2, ddof=1):.4f}")
515
+ self.output_text.append("")
516
+ self.output_text.append("TEST STATISTICS:")
517
+ self.output_text.append(f" t-statistic: {statistic:.6f}")
518
+ self.output_text.append(f" p-value: {p_value:.6f}")
519
+
520
+ if paired:
521
+ df = len(data1) - 1
522
+ else:
523
+ df = len(data1) + len(data2) - 2
524
+ self.output_text.append(f" Degrees of freedom: {df}")
525
+ self.output_text.append("")
526
+
527
+ # Interpretation
528
+ alpha = 0.05
529
+ if p_value < alpha:
530
+ self.output_text.append(f"RESULT: Significant difference (p < {alpha})")
531
+ else:
532
+ self.output_text.append(f"RESULT: No significant difference (p ≥ {alpha})")
533
+
534
+ def execute_anova(self, selected_columns):
535
+ """Execute one-way ANOVA"""
536
+ if len(selected_columns) < 2:
537
+ self.show_error(f"One-way ANOVA requires at least 2 columns. You have selected {len(selected_columns)} columns.")
538
+ return
539
+
540
+ # Extract numeric data from all columns
541
+ datasets = []
542
+ dataset_info = []
543
+
544
+ for col_name in selected_columns:
545
+ data = self.get_numeric_column_data(col_name)
546
+ if data is None or len(data) == 0:
547
+ self.show_error(f"Column '{col_name}' contains no numeric data.")
548
+ return
549
+ datasets.append(data)
550
+ dataset_info.append({
551
+ 'name': col_name,
552
+ 'n': len(data),
553
+ 'data': data
554
+ })
555
+
556
+ # Perform one-way ANOVA
557
+ statistic, p_value = stats.f_oneway(*datasets)
558
+
559
+ # Display results
560
+ self.output_text.clear()
561
+ self.output_text.append("=" * 50)
562
+ self.output_text.append("ONE-WAY ANOVA RESULTS")
563
+ self.output_text.append("=" * 50)
564
+
565
+ for i, group in enumerate(dataset_info):
566
+ self.output_text.append(f"Group {i+1}: {group['name']}")
567
+ self.output_text.append(f" Sample size (n): {group['n']}")
568
+ self.output_text.append(f" Mean: {np.mean(group['data']):.4f}")
569
+ self.output_text.append(f" Std Dev: {np.std(group['data'], ddof=1):.4f}")
570
+ self.output_text.append("")
571
+
572
+ self.output_text.append("TEST STATISTICS:")
573
+ self.output_text.append(f" F-statistic: {statistic:.6f}")
574
+ self.output_text.append(f" p-value: {p_value:.6f}")
575
+
576
+ # Degrees of freedom
577
+ k = len(datasets) # number of groups
578
+ N = sum(len(data) for data in datasets) # total sample size
579
+ df_between = k - 1
580
+ df_within = N - k
581
+
582
+ self.output_text.append(f" Degrees of freedom (between): {df_between}")
583
+ self.output_text.append(f" Degrees of freedom (within): {df_within}")
584
+ self.output_text.append("")
585
+
586
+ # Interpretation
587
+ alpha = 0.05
588
+ if p_value < alpha:
589
+ self.output_text.append(f"RESULT: Significant difference between groups (p < {alpha})")
590
+ else:
591
+ self.output_text.append(f"RESULT: No significant difference between groups (p ≥ {alpha})")
592
+
593
+ def execute_mannwhitney(self, selected_columns):
594
+ """Execute Mann-Whitney U test"""
595
+ if len(selected_columns) != 2:
596
+ self.show_error(f"Mann-Whitney U test requires exactly 2 columns. You have selected {len(selected_columns)} columns.")
597
+ return
598
+
599
+ # Extract numeric data from columns
600
+ data1 = self.get_numeric_column_data(selected_columns[0])
601
+ data2 = self.get_numeric_column_data(selected_columns[1])
602
+
603
+ if data1 is None or len(data1) == 0:
604
+ self.show_error(f"Column '{selected_columns[0]}' contains no numeric data.")
605
+ return
606
+ if data2 is None or len(data2) == 0:
607
+ self.show_error(f"Column '{selected_columns[1]}' contains no numeric data.")
608
+ return
609
+
610
+ # Perform Mann-Whitney U test
611
+ statistic, p_value = stats.mannwhitneyu(data1, data2, alternative='two-sided')
612
+
613
+ # Display results
614
+ self.output_text.clear()
615
+ self.output_text.append("=" * 50)
616
+ self.output_text.append("MANN-WHITNEY U TEST RESULTS")
617
+ self.output_text.append("=" * 50)
618
+ self.output_text.append(f"Group 1: {selected_columns[0]}")
619
+ self.output_text.append(f" Sample size (n₁): {len(data1)}")
620
+ self.output_text.append(f" Median: {np.median(data1):.4f}")
621
+ self.output_text.append(f" Mean rank: {stats.rankdata(np.concatenate([data1, data2]))[:len(data1)].mean():.2f}")
622
+ self.output_text.append("")
623
+ self.output_text.append(f"Group 2: {selected_columns[1]}")
624
+ self.output_text.append(f" Sample size (n₂): {len(data2)}")
625
+ self.output_text.append(f" Median: {np.median(data2):.4f}")
626
+ self.output_text.append(f" Mean rank: {stats.rankdata(np.concatenate([data1, data2]))[-len(data2):].mean():.2f}")
627
+ self.output_text.append("")
628
+ self.output_text.append("TEST STATISTICS:")
629
+ self.output_text.append(f" U-statistic: {statistic:.6f}")
630
+ self.output_text.append(f" p-value: {p_value:.6f}")
631
+ self.output_text.append("")
632
+
633
+ # Interpretation
634
+ alpha = 0.05
635
+ if p_value < alpha:
636
+ self.output_text.append(f"RESULT: Significant difference (p < {alpha})")
637
+ else:
638
+ self.output_text.append(f"RESULT: No significant difference (p ≥ {alpha})")
639
+
640
+ def execute_correlation(self, selected_columns):
641
+ """Execute Pearson correlation analysis"""
642
+ if len(selected_columns) != 2:
643
+ self.show_error(f"Correlation analysis requires exactly 2 columns. You have selected {len(selected_columns)} columns.")
644
+ return
645
+
646
+ # Extract numeric data from columns
647
+ data1 = self.get_numeric_column_data(selected_columns[0])
648
+ data2 = self.get_numeric_column_data(selected_columns[1])
649
+
650
+ if data1 is None or len(data1) == 0:
651
+ self.show_error(f"Column '{selected_columns[0]}' contains no numeric data.")
652
+ return
653
+ if data2 is None or len(data2) == 0:
654
+ self.show_error(f"Column '{selected_columns[1]}' contains no numeric data.")
655
+ return
656
+
657
+ # For correlation, we need paired data
658
+ if len(data1) != len(data2):
659
+ self.show_error("Correlation analysis requires equal sample sizes (paired observations).")
660
+ return
661
+
662
+ # Perform Pearson correlation
663
+ correlation, p_value = stats.pearsonr(data1, data2)
664
+
665
+ # Display results
666
+ self.output_text.clear()
667
+ self.output_text.append("=" * 50)
668
+ self.output_text.append("PEARSON CORRELATION ANALYSIS RESULTS")
669
+ self.output_text.append("=" * 50)
670
+ self.output_text.append(f"Variable 1: {selected_columns[0]}")
671
+ self.output_text.append(f"Variable 2: {selected_columns[1]}")
672
+ self.output_text.append(f"Sample size (n): {len(data1)}")
673
+ self.output_text.append("")
674
+ self.output_text.append("CORRELATION STATISTICS:")
675
+ self.output_text.append(f" Pearson correlation coefficient (r): {correlation:.6f}")
676
+ self.output_text.append(f" p-value: {p_value:.6f}")
677
+ self.output_text.append(f" R-squared (r²): {correlation**2:.6f}")
678
+ self.output_text.append("")
679
+
680
+ # Interpretation of correlation strength
681
+ abs_corr = abs(correlation)
682
+ if abs_corr < 0.1:
683
+ strength = "negligible"
684
+ elif abs_corr < 0.3:
685
+ strength = "weak"
686
+ elif abs_corr < 0.5:
687
+ strength = "moderate"
688
+ elif abs_corr < 0.7:
689
+ strength = "strong"
690
+ else:
691
+ strength = "very strong"
692
+
693
+ direction = "positive" if correlation > 0 else "negative"
694
+
695
+ self.output_text.append(f"INTERPRETATION:")
696
+ self.output_text.append(f" Correlation strength: {strength}")
697
+ self.output_text.append(f" Correlation direction: {direction}")
698
+ self.output_text.append("")
699
+
700
+ # Statistical significance
701
+ alpha = 0.05
702
+ if p_value < alpha:
703
+ self.output_text.append(f"RESULT: Significant correlation (p < {alpha})")
704
+ else:
705
+ self.output_text.append(f"RESULT: No significant correlation (p ≥ {alpha})")
706
+
707
+ def execute_normality_test(self, selected_columns):
708
+ """Execute Shapiro-Wilk normality test"""
709
+ if len(selected_columns) == 0:
710
+ self.show_error("Normality test requires at least 1 column.")
711
+ return
712
+
713
+ self.output_text.clear()
714
+ self.output_text.append("=" * 50)
715
+ self.output_text.append("SHAPIRO-WILK NORMALITY TEST RESULTS")
716
+ self.output_text.append("=" * 50)
717
+
718
+ for col_name in selected_columns:
719
+ data = self.get_numeric_column_data(col_name)
720
+
721
+ if data is None or len(data) == 0:
722
+ self.output_text.append(f"Column '{col_name}': No numeric data available")
723
+ self.output_text.append("")
724
+ continue
725
+
726
+ if len(data) < 3:
727
+ self.output_text.append(f"Column '{col_name}': Insufficient data (n < 3)")
728
+ self.output_text.append("")
729
+ continue
730
+
731
+ if len(data) > 5000:
732
+ self.output_text.append(f"Column '{col_name}': Sample too large for Shapiro-Wilk (n > 5000)")
733
+ self.output_text.append("Consider using Kolmogorov-Smirnov test for large samples.")
734
+ self.output_text.append("")
735
+ continue
736
+
737
+ # Perform Shapiro-Wilk test
738
+ statistic, p_value = stats.shapiro(data)
739
+
740
+ self.output_text.append(f"Column: {col_name}")
741
+ self.output_text.append(f" Sample size (n): {len(data)}")
742
+ self.output_text.append(f" Mean: {np.mean(data):.4f}")
743
+ self.output_text.append(f" Std Dev: {np.std(data, ddof=1):.4f}")
744
+ self.output_text.append(f" Shapiro-Wilk statistic: {statistic:.6f}")
745
+ self.output_text.append(f" p-value: {p_value:.6f}")
746
+
747
+ # Interpretation
748
+ alpha = 0.05
749
+ if p_value < alpha:
750
+ self.output_text.append(f" RESULT: Data significantly deviates from normal distribution (p < {alpha})")
751
+ else:
752
+ self.output_text.append(f" RESULT: Data appears normally distributed (p ≥ {alpha})")
753
+ self.output_text.append("")
754
+
755
+ def execute_chisquare(self, selected_columns):
756
+ """Execute Chi-square test of independence"""
757
+ if len(selected_columns) != 2:
758
+ self.show_error(f"Chi-square test requires exactly 2 columns. You have selected {len(selected_columns)} columns.")
759
+ return
760
+
761
+ # Extract categorical data from columns
762
+ data1 = self.get_categorical_column_data(selected_columns[0])
763
+ data2 = self.get_categorical_column_data(selected_columns[1])
764
+
765
+ if data1 is None or len(data1) == 0:
766
+ self.show_error(f"Column '{selected_columns[0]}' contains no data.")
767
+ return
768
+ if data2 is None or len(data2) == 0:
769
+ self.show_error(f"Column '{selected_columns[1]}' contains no data.")
770
+ return
771
+
772
+ if len(data1) != len(data2):
773
+ self.show_error("Chi-square test requires equal sample sizes (paired observations).")
774
+ return
775
+
776
+ # Create contingency table
777
+ try:
778
+ contingency_table = pd.crosstab(data1, data2)
779
+
780
+ # Perform chi-square test
781
+ chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)
782
+
783
+ # Display results
784
+ self.output_text.clear()
785
+ self.output_text.append("=" * 50)
786
+ self.output_text.append("CHI-SQUARE TEST OF INDEPENDENCE RESULTS")
787
+ self.output_text.append("=" * 50)
788
+ self.output_text.append(f"Variable 1: {selected_columns[0]}")
789
+ self.output_text.append(f"Variable 2: {selected_columns[1]}")
790
+ self.output_text.append(f"Sample size (n): {len(data1)}")
791
+ self.output_text.append("")
792
+
793
+ self.output_text.append("CONTINGENCY TABLE:")
794
+ self.output_text.append(str(contingency_table))
795
+ self.output_text.append("")
796
+
797
+ self.output_text.append("TEST STATISTICS:")
798
+ self.output_text.append(f" Chi-square statistic: {chi2:.6f}")
799
+ self.output_text.append(f" p-value: {p_value:.6f}")
800
+ self.output_text.append(f" Degrees of freedom: {dof}")
801
+ self.output_text.append("")
802
+
803
+ # Check assumptions
804
+ min_expected = np.min(expected)
805
+ cells_below_5 = np.sum(expected < 5)
806
+ total_cells = expected.size
807
+
808
+ self.output_text.append("ASSUMPTION CHECK:")
809
+ self.output_text.append(f" Minimum expected frequency: {min_expected:.2f}")
810
+ self.output_text.append(f" Cells with expected frequency < 5: {cells_below_5}/{total_cells}")
811
+
812
+ if min_expected < 1 or (cells_below_5 / total_cells) > 0.2:
813
+ self.output_text.append(" WARNING: Chi-square assumptions may be violated!")
814
+ self.output_text.append(" Consider Fisher's exact test for small samples.")
815
+ else:
816
+ self.output_text.append(" Chi-square assumptions satisfied.")
817
+ self.output_text.append("")
818
+
819
+ # Calculate effect size (Cramér's V)
820
+ n = len(data1)
821
+ cramers_v = np.sqrt(chi2 / (n * (min(contingency_table.shape) - 1)))
822
+ self.output_text.append(f"EFFECT SIZE:")
823
+ self.output_text.append(f" Cramér's V: {cramers_v:.4f}")
824
+ self.output_text.append("")
825
+
826
+ # Interpretation
827
+ alpha = 0.05
828
+ if p_value < alpha:
829
+ self.output_text.append(f"RESULT: Significant association between variables (p < {alpha})")
830
+ else:
831
+ self.output_text.append(f"RESULT: No significant association between variables (p ≥ {alpha})")
832
+
833
+ except Exception as e:
834
+ self.show_error(f"Error creating contingency table: {str(e)}")
835
+
836
+ def show_error(self, message):
837
+ """Show error message"""
838
+ msg_box = QMessageBox()
839
+ msg_box.setIcon(QMessageBox.Icon.Critical)
840
+ msg_box.setWindowTitle("Error")
841
+ msg_box.setText(message)
842
+ msg_box.exec()
843
+ self.output_text.append(f"ERROR: {message}")
844
+
845
+ def main(app=None):
846
+ if app is None:
847
+ app = QApplication(sys.argv)
848
+ should_exec = True
849
+ else:
850
+ should_exec = False
851
+
852
+ window = StatisticalTestGUI()
853
+ window.show()
854
+
855
+ if should_exec:
856
+ sys.exit(app.exec())
857
+ else:
858
+ return window
859
+
860
+ if __name__ == "__main__":
861
+ main()