py2ls 0.1.10.12__py3-none-any.whl → 0.2.7.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of py2ls might be problematic. Click here for more details.
- py2ls/.DS_Store +0 -0
- py2ls/.git/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
- py2ls/.git/objects/.DS_Store +0 -0
- py2ls/.git/refs/.DS_Store +0 -0
- py2ls/ImageLoader.py +621 -0
- py2ls/__init__.py +7 -5
- py2ls/apptainer2ls.py +3940 -0
- py2ls/batman.py +164 -42
- py2ls/bio.py +2595 -0
- py2ls/cell_image_clf.py +1632 -0
- py2ls/container2ls.py +4635 -0
- py2ls/corr.py +475 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/email/email_html_template.html +88 -0
- py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
- py2ls/data/hyper_param_tabrepo_2024.py +1753 -0
- py2ls/data/mygenes_fields_241022.txt +355 -0
- py2ls/data/re_common_pattern.json +173 -0
- py2ls/data/sns_info.json +74 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/styles/example/.DS_Store +0 -0
- py2ls/data/styles/stylelib/.DS_Store +0 -0
- py2ls/data/styles/stylelib/grid.mplstyle +15 -0
- py2ls/data/styles/stylelib/high-contrast.mplstyle +6 -0
- py2ls/data/styles/stylelib/high-vis.mplstyle +4 -0
- py2ls/data/styles/stylelib/ieee.mplstyle +15 -0
- py2ls/data/styles/stylelib/light.mplstyl +6 -0
- py2ls/data/styles/stylelib/muted.mplstyle +6 -0
- py2ls/data/styles/stylelib/nature-reviews-latex.mplstyle +616 -0
- py2ls/data/styles/stylelib/nature-reviews.mplstyle +616 -0
- py2ls/data/styles/stylelib/nature.mplstyle +31 -0
- py2ls/data/styles/stylelib/no-latex.mplstyle +10 -0
- py2ls/data/styles/stylelib/notebook.mplstyle +36 -0
- py2ls/data/styles/stylelib/paper.mplstyle +290 -0
- py2ls/data/styles/stylelib/paper2.mplstyle +305 -0
- py2ls/data/styles/stylelib/retro.mplstyle +4 -0
- py2ls/data/styles/stylelib/sans.mplstyle +10 -0
- py2ls/data/styles/stylelib/scatter.mplstyle +7 -0
- py2ls/data/styles/stylelib/science.mplstyle +48 -0
- py2ls/data/styles/stylelib/std-colors.mplstyle +4 -0
- py2ls/data/styles/stylelib/vibrant.mplstyle +6 -0
- py2ls/data/tiles.csv +146 -0
- py2ls/data/usages_pd.json +1417 -0
- py2ls/data/usages_sns.json +31 -0
- py2ls/docker2ls.py +5446 -0
- py2ls/ec2ls.py +61 -0
- py2ls/fetch_update.py +145 -0
- py2ls/ich2ls.py +1955 -296
- py2ls/im2.py +8242 -0
- py2ls/image_ml2ls.py +2100 -0
- py2ls/ips.py +33909 -3418
- py2ls/ml2ls.py +7700 -0
- py2ls/mol.py +289 -0
- py2ls/mount2ls.py +1307 -0
- py2ls/netfinder.py +873 -351
- py2ls/nl2ls.py +283 -0
- py2ls/ocr.py +1581 -458
- py2ls/plot.py +10394 -314
- py2ls/rna2ls.py +311 -0
- py2ls/ssh2ls.md +456 -0
- py2ls/ssh2ls.py +5933 -0
- py2ls/ssh2ls_v01.py +2204 -0
- py2ls/stats.py +66 -172
- py2ls/temp20251124.py +509 -0
- py2ls/translator.py +2 -0
- py2ls/utils/decorators.py +3564 -0
- py2ls/utils_bio.py +3453 -0
- {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/METADATA +113 -224
- {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/RECORD +72 -16
- {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/WHEEL +0 -0
py2ls/stats.py
CHANGED
|
@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
|
|
|
8
8
|
import warnings
|
|
9
9
|
|
|
10
10
|
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
|
11
|
-
|
|
11
|
+
from .ips import df2array
|
|
12
12
|
|
|
13
13
|
# FuncStars --v 0.1.1
|
|
14
14
|
def FuncStars(
|
|
@@ -37,7 +37,7 @@ def FuncStars(
|
|
|
37
37
|
report=None,
|
|
38
38
|
report_scale=-0.1,
|
|
39
39
|
report_loc=None,
|
|
40
|
-
**kwargs
|
|
40
|
+
**kwargs,
|
|
41
41
|
):
|
|
42
42
|
if ax is None:
|
|
43
43
|
ax = plt.gca()
|
|
@@ -53,7 +53,7 @@ def FuncStars(
|
|
|
53
53
|
if y_loc is None:
|
|
54
54
|
y_loc = np.min(ylim) + yscale * (np.max(ylim) - np.min(ylim))
|
|
55
55
|
else:
|
|
56
|
-
y_loc=y_loc+(1-yscale) * np.abs(np.diff(ylim))+0.1 *y_loc
|
|
56
|
+
y_loc = y_loc + (1 - yscale) * np.abs(np.diff(ylim)) + 0.1 * y_loc
|
|
57
57
|
xcenter = np.mean([x1, x2])
|
|
58
58
|
if pval is not None:
|
|
59
59
|
# ns / *
|
|
@@ -79,7 +79,7 @@ def FuncStars(
|
|
|
79
79
|
y_loc,
|
|
80
80
|
symbol,
|
|
81
81
|
ha="center",
|
|
82
|
-
va="top"
|
|
82
|
+
va="top", # "center_baseline",
|
|
83
83
|
fontsize=fontsize,
|
|
84
84
|
fontname=fontname,
|
|
85
85
|
color=symbolcolor,
|
|
@@ -90,28 +90,31 @@ def FuncStars(
|
|
|
90
90
|
y_loc,
|
|
91
91
|
symbol * 2,
|
|
92
92
|
ha="center",
|
|
93
|
-
va="top"
|
|
93
|
+
va="top", # "center_baseline",
|
|
94
94
|
fontsize=fontsize,
|
|
95
95
|
fontname=fontname,
|
|
96
96
|
color=symbolcolor,
|
|
97
97
|
)
|
|
98
|
-
elif 0
|
|
98
|
+
elif 0 <= pval <= 0.001:
|
|
99
99
|
ax.text(
|
|
100
100
|
xcenter,
|
|
101
101
|
y_loc,
|
|
102
102
|
symbol * 3,
|
|
103
103
|
ha="center",
|
|
104
|
-
va="top"
|
|
104
|
+
va="top", # "center_baseline",
|
|
105
105
|
fontsize=fontsize,
|
|
106
106
|
fontname=fontname,
|
|
107
107
|
color=symbolcolor,
|
|
108
108
|
)
|
|
109
109
|
# lines indicators
|
|
110
|
-
if linego and 0
|
|
110
|
+
if linego and 0 <= pval <= 0.05:
|
|
111
111
|
# horizontal line
|
|
112
112
|
if yscale <= 0.99:
|
|
113
113
|
ax.plot(
|
|
114
|
-
[
|
|
114
|
+
[
|
|
115
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
|
116
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
|
117
|
+
],
|
|
115
118
|
[
|
|
116
119
|
y_loc - np.abs(np.diff(ylim)) * 0.03,
|
|
117
120
|
y_loc - np.abs(np.diff(ylim)) * 0.03,
|
|
@@ -122,7 +125,10 @@ def FuncStars(
|
|
|
122
125
|
)
|
|
123
126
|
# vertical line
|
|
124
127
|
ax.plot(
|
|
125
|
-
[
|
|
128
|
+
[
|
|
129
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
|
130
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
|
131
|
+
],
|
|
126
132
|
[
|
|
127
133
|
y_loc - np.abs(np.diff(ylim)) * tailindicator[0],
|
|
128
134
|
y_loc - np.abs(np.diff(ylim)) * 0.03,
|
|
@@ -132,7 +138,10 @@ def FuncStars(
|
|
|
132
138
|
linewidth=linewidth,
|
|
133
139
|
)
|
|
134
140
|
ax.plot(
|
|
135
|
-
[
|
|
141
|
+
[
|
|
142
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
|
143
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
|
144
|
+
],
|
|
136
145
|
[
|
|
137
146
|
y_loc - np.abs(np.diff(ylim)) * tailindicator[1],
|
|
138
147
|
y_loc - np.abs(np.diff(ylim)) * 0.03,
|
|
@@ -143,7 +152,10 @@ def FuncStars(
|
|
|
143
152
|
)
|
|
144
153
|
else:
|
|
145
154
|
ax.plot(
|
|
146
|
-
[
|
|
155
|
+
[
|
|
156
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
|
157
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
|
158
|
+
],
|
|
147
159
|
[
|
|
148
160
|
np.min(ylim)
|
|
149
161
|
+ 0.95 * (np.max(ylim) - np.min(ylim))
|
|
@@ -158,7 +170,10 @@ def FuncStars(
|
|
|
158
170
|
)
|
|
159
171
|
# vertical line
|
|
160
172
|
ax.plot(
|
|
161
|
-
[
|
|
173
|
+
[
|
|
174
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
|
175
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
|
176
|
+
],
|
|
162
177
|
[
|
|
163
178
|
np.min(ylim)
|
|
164
179
|
+ 0.95 * (np.max(ylim) - np.min(ylim))
|
|
@@ -172,7 +187,10 @@ def FuncStars(
|
|
|
172
187
|
linewidth=linewidth,
|
|
173
188
|
)
|
|
174
189
|
ax.plot(
|
|
175
|
-
[
|
|
190
|
+
[
|
|
191
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
|
192
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
|
193
|
+
],
|
|
176
194
|
[
|
|
177
195
|
np.min(ylim)
|
|
178
196
|
+ 0.95 * (np.max(ylim) - np.min(ylim))
|
|
@@ -312,9 +330,7 @@ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
|
|
|
312
330
|
)
|
|
313
331
|
notes_stat = "paired t test"
|
|
314
332
|
# note: APA FORMAT
|
|
315
|
-
notes_APA = (
|
|
316
|
-
f"t({sum([nX1-1])})={round(stat_value,3)},p={round(pval,3)}"
|
|
317
|
-
)
|
|
333
|
+
notes_APA = f"t({sum([nX1-1])})={round(stat_value,3)},p={round(pval,3)}"
|
|
318
334
|
elif cfg_pmc == "non-parametric":
|
|
319
335
|
if "np" in pair: # Perform Mann-Whitney
|
|
320
336
|
stat_value, pval = stats.mannwhitneyu(
|
|
@@ -324,7 +340,9 @@ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
|
|
|
324
340
|
if nX1 == nX2:
|
|
325
341
|
notes_APA = f"U(n={nX1})={round(stat_value,3)},p={round(pval,3)}"
|
|
326
342
|
else:
|
|
327
|
-
notes_APA =
|
|
343
|
+
notes_APA = (
|
|
344
|
+
f"U(n1={nX1},n2={nX2})={round(stat_value,3)},p={round(pval,3)}"
|
|
345
|
+
)
|
|
328
346
|
elif "pa" in pair and "np" not in pair: # Wilcoxon signed-rank test
|
|
329
347
|
stat_value, pval = stats.wilcoxon(
|
|
330
348
|
x1, x2, method="exact", nan_policy="omit"
|
|
@@ -333,7 +351,9 @@ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
|
|
|
333
351
|
if nX1 == nX2:
|
|
334
352
|
notes_APA = f"Z(n={nX1})={round(stat_value,3)},p={round(pval,3)}"
|
|
335
353
|
else:
|
|
336
|
-
notes_APA =
|
|
354
|
+
notes_APA = (
|
|
355
|
+
f"Z(n1={nX1},n2={nX2})={round(stat_value,3)},p={round(pval,3)}"
|
|
356
|
+
)
|
|
337
357
|
|
|
338
358
|
# filling output
|
|
339
359
|
output["stat"] = stat_value
|
|
@@ -408,7 +428,7 @@ def FuncMultiCmpt(
|
|
|
408
428
|
subject=None,
|
|
409
429
|
group=None,
|
|
410
430
|
verbose=True,
|
|
411
|
-
post_hoc=False
|
|
431
|
+
post_hoc=False,
|
|
412
432
|
):
|
|
413
433
|
if group is None:
|
|
414
434
|
group = factor
|
|
@@ -520,12 +540,16 @@ def FuncMultiCmpt(
|
|
|
520
540
|
if "np" in cfg_pair: # 'unpaired'
|
|
521
541
|
res_tab = run_kruskal(data, dv, factor)
|
|
522
542
|
notes_stat = f"Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA"
|
|
523
|
-
notes_APA = [
|
|
543
|
+
notes_APA = [
|
|
544
|
+
f'H({res_tab.ddof1[0]},N={data.shape[0]})={round(res_tab.H[0],3)},p={round(res_tab["p-unc"][0],3)}'
|
|
545
|
+
]
|
|
524
546
|
|
|
525
547
|
elif "pa" in cfg_pair and "np" not in cfg_pair: # 'paired'
|
|
526
548
|
res_tab = run_friedman(data, dv, factor, subject, method="chisq")
|
|
527
549
|
notes_stat = f"Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA"
|
|
528
|
-
notes_APA = [
|
|
550
|
+
notes_APA = [
|
|
551
|
+
f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0],3)},p={round(res_tab["p-unc"][0],3)}'
|
|
552
|
+
]
|
|
529
553
|
|
|
530
554
|
# =============================================================================
|
|
531
555
|
# # Post-hoc
|
|
@@ -542,7 +566,7 @@ def FuncMultiCmpt(
|
|
|
542
566
|
go_mix_within = factor if ("pa" in cfg_pair) or ("np" not in cfg_pair) else None
|
|
543
567
|
|
|
544
568
|
if res_tab["p-unc"][0] <= 0.05:
|
|
545
|
-
post_hoc=True
|
|
569
|
+
post_hoc = True
|
|
546
570
|
if post_hoc:
|
|
547
571
|
# Pairwise Comparisons
|
|
548
572
|
method_post_hoc = [
|
|
@@ -610,9 +634,9 @@ def FuncMultiCmpt(
|
|
|
610
634
|
# # filling output
|
|
611
635
|
# =============================================================================
|
|
612
636
|
|
|
613
|
-
pd.set_option(
|
|
614
|
-
pd.set_option(
|
|
615
|
-
pd.set_option(
|
|
637
|
+
pd.set_option("display.max_columns", None) # Show all columns
|
|
638
|
+
pd.set_option("display.max_colwidth", None) # No limit on column width
|
|
639
|
+
pd.set_option("display.expand_frame_repr", False) # Prevent line-wrapping
|
|
616
640
|
|
|
617
641
|
output["stat"] = notes_stat
|
|
618
642
|
# print(output['APA'])
|
|
@@ -627,7 +651,7 @@ def FuncMultiCmpt(
|
|
|
627
651
|
def display_output(output: dict):
|
|
628
652
|
if isinstance(output, pd.DataFrame):
|
|
629
653
|
output = output.to_dict(orient="list")
|
|
630
|
-
# ['res_posthoc', 'stat', 'APA', 'pval', 'res_tab']
|
|
654
|
+
# ['res_posthoc', 'stat', 'APA', 'pval', 'res_tab']
|
|
631
655
|
|
|
632
656
|
# ? show APA
|
|
633
657
|
# print(f"\n\ndisplay stat_output")
|
|
@@ -641,7 +665,7 @@ def display_output(output: dict):
|
|
|
641
665
|
except:
|
|
642
666
|
pass
|
|
643
667
|
try:
|
|
644
|
-
print(f"APA ⤵\n{output[
|
|
668
|
+
print(f"APA ⤵\n{output['APA'][0]} ⤵\npost-hoc analysis ⤵")
|
|
645
669
|
display(output["res_posthoc"])
|
|
646
670
|
except:
|
|
647
671
|
pass
|
|
@@ -659,14 +683,24 @@ def corr_pair(pair):
|
|
|
659
683
|
|
|
660
684
|
|
|
661
685
|
def check_normality(data, verbose=True):
|
|
662
|
-
|
|
663
|
-
|
|
686
|
+
if len(data) <= 5000:
|
|
687
|
+
# Shapiro-Wilk test is designed to test the normality of a small sample, typically less than 5000 observations.
|
|
688
|
+
stat_shapiro, pval4norm = stats.shapiro(data)
|
|
689
|
+
method = "Shapiro-Wilk test"
|
|
690
|
+
else:
|
|
691
|
+
from scipy.stats import kstest, zscore
|
|
692
|
+
|
|
693
|
+
data_scaled = zscore(data) # a standard normal distribution(mean=0,sd=1)
|
|
694
|
+
stat_kstest, pval4norm = kstest(data_scaled, "norm")
|
|
695
|
+
method = "Kolmogorov–Smirnov test"
|
|
696
|
+
if pval4norm >= 0.05:
|
|
664
697
|
Normality = True
|
|
665
698
|
else:
|
|
666
699
|
Normality = False
|
|
667
700
|
if verbose:
|
|
701
|
+
print(f"'{method}' was used to test for normality")
|
|
668
702
|
(
|
|
669
|
-
print(
|
|
703
|
+
print("\nnormally distributed")
|
|
670
704
|
if Normality
|
|
671
705
|
else print(f"\n NOT normally distributed\n")
|
|
672
706
|
)
|
|
@@ -705,7 +739,7 @@ def extract_apa(res_tab):
|
|
|
705
739
|
for irow in range(res_tab.shape[0]):
|
|
706
740
|
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow],3)},p={round(res_tab["p-unc"][irow],3)}'
|
|
707
741
|
notes_APA.append(note_tmp)
|
|
708
|
-
elif "DF" in res_tab:
|
|
742
|
+
elif "DF" in res_tab:
|
|
709
743
|
for irow in range(res_tab.shape[0] - 1):
|
|
710
744
|
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow],3)},p={round(res_tab["p-unc"][irow],3)}'
|
|
711
745
|
notes_APA.append(note_tmp)
|
|
@@ -867,146 +901,6 @@ def df_wide_long(df):
|
|
|
867
901
|
elif rows > columns:
|
|
868
902
|
return "Long"
|
|
869
903
|
|
|
870
|
-
|
|
871
|
-
def sort_rows_move_nan(arr, sort=False):
|
|
872
|
-
# Handle edge cases where all values are NaN
|
|
873
|
-
if np.all(np.isnan(arr)):
|
|
874
|
-
return arr # Return unchanged if the entire array is NaN
|
|
875
|
-
|
|
876
|
-
if sort:
|
|
877
|
-
# Replace NaNs with a temporary large value for sorting
|
|
878
|
-
temp_value = (
|
|
879
|
-
np.nanmax(arr[np.isfinite(arr)]) + 1 if np.any(np.isfinite(arr)) else np.inf
|
|
880
|
-
)
|
|
881
|
-
arr_no_nan = np.where(np.isnan(arr), temp_value, arr)
|
|
882
|
-
|
|
883
|
-
# Sort each row
|
|
884
|
-
sorted_arr = np.sort(arr_no_nan, axis=1)
|
|
885
|
-
|
|
886
|
-
# Move NaNs to the end
|
|
887
|
-
result_arr = np.where(sorted_arr == temp_value, np.nan, sorted_arr)
|
|
888
|
-
else:
|
|
889
|
-
result_rows = []
|
|
890
|
-
for row in arr:
|
|
891
|
-
# Separate non-NaN and NaN values
|
|
892
|
-
non_nan_values = row[~np.isnan(row)]
|
|
893
|
-
nan_count = np.isnan(row).sum()
|
|
894
|
-
# Create a new row with non-NaN values followed by NaNs
|
|
895
|
-
new_row = np.concatenate([non_nan_values, [np.nan] * nan_count])
|
|
896
|
-
result_rows.append(new_row)
|
|
897
|
-
# Convert the list of rows back into a 2D NumPy array
|
|
898
|
-
result_arr = np.array(result_rows)
|
|
899
|
-
|
|
900
|
-
# Remove rows/columns that contain only NaNs
|
|
901
|
-
clean_arr = result_arr[~np.isnan(result_arr).all(axis=1)]
|
|
902
|
-
clean_arr_ = clean_arr[:, ~np.isnan(clean_arr).all(axis=0)]
|
|
903
|
-
|
|
904
|
-
return clean_arr_
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
def df2array(data: pd.DataFrame, x=None, y=None, hue=None, sort=False):
|
|
908
|
-
if hue is None:
|
|
909
|
-
a = []
|
|
910
|
-
if sort:
|
|
911
|
-
cat_x=np.sort(data[x].unique().tolist()).tolist()
|
|
912
|
-
else:
|
|
913
|
-
cat_x = data[x].unique().tolist()
|
|
914
|
-
for i, x_ in enumerate(cat_x):
|
|
915
|
-
new_ = data.loc[data[x] == x_, y].to_list()
|
|
916
|
-
a = padcat(a, new_, axis=0)
|
|
917
|
-
return sort_rows_move_nan(a).T
|
|
918
|
-
else:
|
|
919
|
-
a = []
|
|
920
|
-
if sort:
|
|
921
|
-
cat_x = np.sort(data[x].unique().tolist()).tolist()
|
|
922
|
-
cat_hue = np.sort(data[hue].unique().tolist()).tolist()
|
|
923
|
-
else:
|
|
924
|
-
cat_x = data[x].unique().tolist()
|
|
925
|
-
cat_hue = data[hue].unique().tolist()
|
|
926
|
-
for i, x_ in enumerate(cat_x):
|
|
927
|
-
for j, hue_ in enumerate(cat_hue):
|
|
928
|
-
new_ = data.loc[(data[x] == x_) & (data[hue] == hue_), y].to_list()
|
|
929
|
-
a = padcat(a, new_, axis=0)
|
|
930
|
-
return sort_rows_move_nan(a).T
|
|
931
|
-
|
|
932
|
-
def array2df(data: np.ndarray):
|
|
933
|
-
df = pd.DataFrame()
|
|
934
|
-
df["group"] = (
|
|
935
|
-
np.tile(
|
|
936
|
-
["group" + str(i) for i in range(1, data.shape[1] + 1)], [data.shape[0], 1]
|
|
937
|
-
)
|
|
938
|
-
.reshape(-1, 1, order="F")[:, 0]
|
|
939
|
-
.tolist()
|
|
940
|
-
)
|
|
941
|
-
df["value"] = data.reshape(-1, 1, order="F")
|
|
942
|
-
return df
|
|
943
|
-
def padcat(*args, fill_value=np.nan, axis=1, order="row"):
|
|
944
|
-
"""
|
|
945
|
-
Concatenate vectors with padding.
|
|
946
|
-
|
|
947
|
-
Parameters:
|
|
948
|
-
*args : variable number of list or 1D arrays
|
|
949
|
-
Input arrays to concatenate.
|
|
950
|
-
fill_value : scalar, optional
|
|
951
|
-
The value to use for padding the shorter lists (default is np.nan).
|
|
952
|
-
axis : int, optional
|
|
953
|
-
The axis along which to concatenate (0 for rows, 1 for columns, default is 1).
|
|
954
|
-
order : str, optional
|
|
955
|
-
The order for flattening when required: "row" or "column" (default is "row").
|
|
956
|
-
|
|
957
|
-
Returns:
|
|
958
|
-
np.ndarray
|
|
959
|
-
A 2D array with the input arrays concatenated along the specified axis,
|
|
960
|
-
padded with fill_value where necessary.
|
|
961
|
-
"""
|
|
962
|
-
# Set the order for processing
|
|
963
|
-
if "ro" in order.lower():
|
|
964
|
-
order = "C" # row-major order
|
|
965
|
-
else:
|
|
966
|
-
order = "F" # column-major order
|
|
967
|
-
|
|
968
|
-
# Process input arrays based on their dimensions
|
|
969
|
-
processed_arrays = []
|
|
970
|
-
for arg in args:
|
|
971
|
-
arr = np.asarray(arg)
|
|
972
|
-
if arr.ndim == 1:
|
|
973
|
-
processed_arrays.append(arr) # Keep 1D arrays as is
|
|
974
|
-
elif arr.ndim == 2:
|
|
975
|
-
if axis == 0:
|
|
976
|
-
# If concatenating along rows, split 2D arrays into 1D arrays row-wise
|
|
977
|
-
processed_arrays.extend(arr)
|
|
978
|
-
elif axis == 1:
|
|
979
|
-
# If concatenating along columns, split 2D arrays into 1D arrays column-wise
|
|
980
|
-
processed_arrays.extend(arr.T)
|
|
981
|
-
else:
|
|
982
|
-
raise ValueError("axis must be 0 or 1")
|
|
983
|
-
else:
|
|
984
|
-
raise ValueError("Input arrays must be 1D or 2D")
|
|
985
|
-
|
|
986
|
-
if axis == 0:
|
|
987
|
-
# Concatenate along rows
|
|
988
|
-
max_len = max(arr.size for arr in processed_arrays)
|
|
989
|
-
result = np.full((len(processed_arrays), max_len), fill_value)
|
|
990
|
-
for i, arr in enumerate(processed_arrays):
|
|
991
|
-
result[i, : arr.size] = arr
|
|
992
|
-
elif axis == 1:
|
|
993
|
-
# Concatenate along columns
|
|
994
|
-
max_len = max(arr.size for arr in processed_arrays)
|
|
995
|
-
result = np.full((max_len, len(processed_arrays)), fill_value)
|
|
996
|
-
for i, arr in enumerate(processed_arrays):
|
|
997
|
-
result[: arr.size, i] = arr
|
|
998
|
-
else:
|
|
999
|
-
raise ValueError("axis must be 0 or 1")
|
|
1000
904
|
|
|
1001
|
-
return result
|
|
1002
905
|
|
|
1003
906
|
|
|
1004
|
-
# # Example usage:
|
|
1005
|
-
# a = [1, np.nan]
|
|
1006
|
-
# b = [1, 3, 4, np.nan, 2, np.nan]
|
|
1007
|
-
# c = [1, 2, 3, 4, 5, 6, 7, 8, 10]
|
|
1008
|
-
# d = padcat(a, b)
|
|
1009
|
-
# result1 = padcat(d, c)
|
|
1010
|
-
# result2 = padcat(a, b, c)
|
|
1011
|
-
# print("Result of padcat(d, c):\n", result1)
|
|
1012
|
-
# print("Result of padcat(a, b, c):\n", result2)
|