emdbva 0.0.1.dev134__py3-none-any.whl → 0.0.1.dev136__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {emdbva-0.0.1.dev134.dist-info → emdbva-0.0.1.dev136.dist-info}/METADATA +1 -1
- {emdbva-0.0.1.dev134.dist-info → emdbva-0.0.1.dev136.dist-info}/RECORD +11 -11
- va/metrics/bars.py +125 -65
- va/metrics/phaserandomization.py +3 -3
- va/metrics/projections.py +64 -1
- va/preparation.py +17 -4
- va/version.py +1 -1
- {emdbva-0.0.1.dev134.dist-info → emdbva-0.0.1.dev136.dist-info}/LICENSE +0 -0
- {emdbva-0.0.1.dev134.dist-info → emdbva-0.0.1.dev136.dist-info}/WHEEL +0 -0
- {emdbva-0.0.1.dev134.dist-info → emdbva-0.0.1.dev136.dist-info}/entry_points.txt +0 -0
- {emdbva-0.0.1.dev134.dist-info → emdbva-0.0.1.dev136.dist-info}/top_level.txt +0 -0
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
va/__init__.py,sha256=gHi6sjjnvd4J1-hcYShVW1RQAAZOcbHYxhQZzo7SX4E,268
|
|
2
2
|
va/mainva.py,sha256=pfO8f9-UKW75Nl5QAb76RTWWuJUvxubuCGchHdY4BRo,12871
|
|
3
|
-
va/preparation.py,sha256=
|
|
3
|
+
va/preparation.py,sha256=yUH8CJJ-lz3yvvgjvcP7ErLBEdVMsyLiXxlV5hzmJeE,111052
|
|
4
4
|
va/qscores.csv,sha256=O_4JWiqyu6g6AgndbWExD7tSbFYGF-HK2VdnS9zBYuw,1283930
|
|
5
5
|
va/validationanalysis.py,sha256=AQWut9RxjqqYHhBLrgjwE0DA0X4ZQOBLqsfoOT1v2e0,325383
|
|
6
|
-
va/version.py,sha256=
|
|
6
|
+
va/version.py,sha256=f4k2xWh9g6-wsQmRTu8gDpXKeU_HntlZKvT9yAvzKbc,760
|
|
7
7
|
va/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
va/metrics/bars.py,sha256=
|
|
8
|
+
va/metrics/bars.py,sha256=MPrVnaf7pr6Q6c4XDVdwnm8Ix1tVQeW2rJEKOGC15nU,23577
|
|
9
9
|
va/metrics/connected_percentage.py,sha256=HwsuC-Ehi767R57AE9NBGW7s-NwcJI0BGJA-3f0ZgsU,2209
|
|
10
10
|
va/metrics/contour_level_predicator.py,sha256=J7d5DCF8Xx1L0T6JC2DcT2G3izh3EHp6QAlUhgNJq4Y,5301
|
|
11
11
|
va/metrics/emda_mmcc.py,sha256=gH43N3J9uroyvVzgJg30cNztxgaolA3NC67H0IldsxY,447
|
|
12
12
|
va/metrics/emringer.py,sha256=IkybpIg2HG-qVfQvemouWpCLpSuVZem4tYYlnb4D4ig,1297
|
|
13
13
|
va/metrics/inclusion.py,sha256=pRpAIXRRutOt_1VfwFL9OiN2QH3eK4rQF6p7XziYbVY,45550
|
|
14
14
|
va/metrics/overlap_percentage.py,sha256=WSbJXq3R_ezf5tzoQte-y1oFDAzbUdYTU7-r9BzN3WE,5966
|
|
15
|
-
va/metrics/phaserandomization.py,sha256=
|
|
15
|
+
va/metrics/phaserandomization.py,sha256=tDL06TGW956VpGgQt7ctiVDWSE5NFRCn1662qkBCSX0,9600
|
|
16
16
|
va/metrics/phenix_cc.py,sha256=sagK8svWJF15NkyrJdsdlo9s8UwM3qQvEqx2e4jabVU,10804
|
|
17
17
|
va/metrics/phenix_mm.py,sha256=rNIsbSqzsWE0wz47yvX9u37KVbh4jEtg3WyHxs9JAno,5916
|
|
18
|
-
va/metrics/projections.py,sha256=
|
|
18
|
+
va/metrics/projections.py,sha256=vVxwGTkgDMiWX5rOdyvXQDPBMzTtGOLrRKdOBqBeR94,20721
|
|
19
19
|
va/metrics/qscore.py,sha256=I4pUy3UN7FVuQpr-5YE8j1cE26QH7r5VAHXi59LpsuQ,43040
|
|
20
20
|
va/metrics/residue_locres.py,sha256=dXLwyfaKVxT8ZQtkN7sOOuPsP-G8Up6k14h8sU-s9JQ,6004
|
|
21
21
|
va/metrics/resmap.py,sha256=DhrXZVQegftTeVC8gzA2ho7uLiCdzYpWoYhMq8jyB3A,19960
|
|
@@ -33,9 +33,9 @@ va/utils/log_utils.py,sha256=6ercMPRiiy_IITkHfgOO4ydyoIp97QV-KB4pUdEy4KY,520
|
|
|
33
33
|
va/utils/misc.py,sha256=dYcZf7J6ubzsvwfpy_QP-6R22RexS8ZawzWasOl7PIE,22010
|
|
34
34
|
va/utils/rescolor.py,sha256=kOcjd1m7ItLGt4dMfJiH-MLRQOzqUVOSTyiKO1vvmV4,1847
|
|
35
35
|
va/utils/stars.py,sha256=oV3zzxp3rty36MwBhTdP8RrZegFOX6Gu9RJfxxfP42U,26462
|
|
36
|
-
emdbva-0.0.1.
|
|
37
|
-
emdbva-0.0.1.
|
|
38
|
-
emdbva-0.0.1.
|
|
39
|
-
emdbva-0.0.1.
|
|
40
|
-
emdbva-0.0.1.
|
|
41
|
-
emdbva-0.0.1.
|
|
36
|
+
emdbva-0.0.1.dev136.dist-info/LICENSE,sha256=EKRtSIAJIDq0FaOStnUlRfcyVCRaNuXBEZWfcjSoAYo,11338
|
|
37
|
+
emdbva-0.0.1.dev136.dist-info/METADATA,sha256=S4p1-4wRUIPGcsE98SpwBAxbMYSxJSgAnZHPdTAxawo,1474
|
|
38
|
+
emdbva-0.0.1.dev136.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
39
|
+
emdbva-0.0.1.dev136.dist-info/entry_points.txt,sha256=3GDgE_RPnedndxYSN7AbQ6ojnQR8dYM3VvJrR_mShSw,38
|
|
40
|
+
emdbva-0.0.1.dev136.dist-info/top_level.txt,sha256=XkRzdik-cabgu1rSDVUmRKNy0pF_RGh-XAviUV9H0p8,3
|
|
41
|
+
emdbva-0.0.1.dev136.dist-info/RECORD,,
|
va/metrics/bars.py
CHANGED
|
@@ -147,15 +147,17 @@ def get_resolution_range(new_entry, df, score_type, column='resolution', resbin=
|
|
|
147
147
|
return None
|
|
148
148
|
|
|
149
149
|
|
|
150
|
-
def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
|
|
150
|
+
def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type, resbin):
|
|
151
151
|
"""
|
|
152
152
|
This function here using matplotlib to produce the Q-score bar image
|
|
153
153
|
"""
|
|
154
154
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
155
|
+
# Safely scale inputs only when they are provided (allow 0)
|
|
156
|
+
if a is not None:
|
|
157
|
+
a = (a * 1.5) / 200.0
|
|
158
|
+
if b is not None:
|
|
159
|
+
b = (b * 1.5) / 200.0
|
|
160
|
+
|
|
159
161
|
# Create a color scale from 0 to 1
|
|
160
162
|
color_scale = np.linspace(0, 1, 199)
|
|
161
163
|
|
|
@@ -173,37 +175,48 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
|
|
|
173
175
|
diamond_height = 0.65
|
|
174
176
|
diamond_half_width = 0.01
|
|
175
177
|
|
|
176
|
-
# Add diamond-shaped
|
|
177
|
-
if a != b:
|
|
178
|
+
# Add diamond-shaped markers depending on availability of a and b
|
|
179
|
+
if a is not None and b is not None and a != b:
|
|
180
|
+
# both present and different
|
|
178
181
|
ax.fill(
|
|
179
182
|
[a - diamond_half_width, a, a + diamond_half_width, a],
|
|
180
183
|
[0.5, 0.5 + diamond_height, 0.5, 0.5 - diamond_height],
|
|
181
184
|
color='Black', edgecolor='black'
|
|
182
185
|
)
|
|
183
186
|
|
|
184
|
-
# Add diamond-shaped marker for 'b'
|
|
185
187
|
ax.fill(
|
|
186
188
|
[b - diamond_half_width, b, b + diamond_half_width, b],
|
|
187
189
|
[0.5, 0.5 + diamond_height, 0.5, 0.5 - diamond_height],
|
|
188
190
|
facecolor='none', edgecolor='black'
|
|
189
191
|
)
|
|
190
|
-
|
|
191
|
-
#
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
# facecolor='yellow', edgecolor='black'
|
|
195
|
-
# )
|
|
196
|
-
top = np.array([[b-diamond_half_width, 0.5], [b, 0.5 + diamond_height], [b + diamond_half_width, 0.5], [b, 0.5]])
|
|
197
|
-
bottom = np.array([[b-diamond_half_width, 0.5], [b, 0.5], [b + diamond_half_width, 0.5], [b, 0.5 - diamond_height]])
|
|
192
|
+
elif a is not None and b is not None and a == b:
|
|
193
|
+
# both present and overlapped
|
|
194
|
+
top = np.array([[b - diamond_half_width, 0.5], [b, 0.5 + diamond_height], [b + diamond_half_width, 0.5], [b, 0.5]])
|
|
195
|
+
bottom = np.array([[b - diamond_half_width, 0.5], [b, 0.5], [b + diamond_half_width, 0.5], [b, 0.5 - diamond_height]])
|
|
198
196
|
top_patch = patches.Polygon(top, closed=True, facecolor='black', edgecolor='black')
|
|
199
197
|
bottom_patch = patches.Polygon(bottom, closed=True, facecolor='none', edgecolor='black')
|
|
200
198
|
ax.add_patch(top_patch)
|
|
201
199
|
ax.add_patch(bottom_patch)
|
|
200
|
+
elif a is not None:
|
|
201
|
+
# only a present -> solid marker
|
|
202
|
+
ax.fill(
|
|
203
|
+
[a - diamond_half_width, a, a + diamond_half_width, a],
|
|
204
|
+
[0.5, 0.5 + diamond_height, 0.5, 0.5 - diamond_height],
|
|
205
|
+
color='Black', edgecolor='black'
|
|
206
|
+
)
|
|
207
|
+
elif b is not None:
|
|
208
|
+
# only b present -> hollow marker
|
|
209
|
+
ax.fill(
|
|
210
|
+
[b - diamond_half_width, b, b + diamond_half_width, b],
|
|
211
|
+
[0.5, 0.5 + diamond_height, 0.5, 0.5 - diamond_height],
|
|
212
|
+
facecolor='none', edgecolor='black'
|
|
213
|
+
)
|
|
214
|
+
# else: neither present -> draw no marker
|
|
202
215
|
|
|
203
|
-
# add four values as
|
|
216
|
+
# add four values as annotations
|
|
204
217
|
worse = r'$\it{Worse}$'
|
|
205
218
|
better = r'$\it{Better}$'
|
|
206
|
-
ax.annotate(worse, (0, -0.9), color='black', ha='left', fontsize=10
|
|
219
|
+
ax.annotate(worse, (0, -0.9), color='black', ha='left', fontsize=10)
|
|
207
220
|
ax.annotate(better, (1.5, -0.9), color='black', ha='right', fontsize=10)
|
|
208
221
|
ax.annotate(f'{qscore:.3f}', (1.58, 0.2), color='black', ha='center', fontsize=12)
|
|
209
222
|
|
|
@@ -223,32 +236,18 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
|
|
|
223
236
|
title = plot_name[:-8]
|
|
224
237
|
ax.annotate(title, (0.75, 3.3), color='black', ha='center', fontsize=14, fontweight='bold')
|
|
225
238
|
ax.annotate('Value', (1.58, 1.7), color='black', ha='center', fontsize=14)
|
|
226
|
-
|
|
227
|
-
# ax.annotate(f'{a*100/1.5:.2f}%', (a, 1.4), color='black', ha='left', fontsize=10)
|
|
228
|
-
# #ax.annotate(f'{b:.2f}', (b, -0.8), color='black', ha='center', fontsize=10)
|
|
229
|
-
# #ax.annotate(f'{a*100:.2f}%', (a, 1.4), color='black', ha='center', fontsize=10)
|
|
230
|
-
# ax.annotate(f'{b*100/1.5:.2f}%', (b, 1.4), color='black', ha='right', fontsize=10)
|
|
231
|
-
# else:
|
|
232
|
-
# ax.annotate(f'{a*100/1.5:.2f}%', (a, 1.4), color='black', ha='right', fontsize=10)
|
|
233
|
-
# #ax.annotate(f'{b:.2f}', (b, -0.8), color='black', ha='center', fontsize=10)
|
|
234
|
-
# #ax.annotate(f'{a*100:.2f}%', (a, 1.4), color='black', ha='center', fontsize=10)
|
|
235
|
-
# ax.annotate(f'{b*100/1.5:.2f}%', (b, 1.4), color='black', ha='left', fontsize=10)
|
|
236
|
-
|
|
237
|
-
# Customize the plot
|
|
239
|
+
|
|
238
240
|
ax.set_xlim(-0.4, 1.7)
|
|
239
|
-
# ax.set_ylim(-4.3, 1.8)
|
|
240
|
-
# to fit the EMD id
|
|
241
241
|
ax.set_ylim(-4.3, 3.6)
|
|
242
242
|
ax.set_yticks([])
|
|
243
243
|
|
|
244
244
|
ax.spines['top'].set_visible(False)
|
|
245
245
|
ax.spines['right'].set_visible(False)
|
|
246
|
-
# Remove the left and bottom axis lines (optional)
|
|
247
246
|
ax.spines['left'].set_visible(False)
|
|
248
247
|
ax.spines['bottom'].set_visible(False)
|
|
249
248
|
|
|
250
|
-
|
|
251
|
-
|
|
249
|
+
# Legend / explanatory markers: adapt depending on which markers are available
|
|
250
|
+
if a is not None and b is not None and a != b:
|
|
252
251
|
wa = 0.01
|
|
253
252
|
ha = -2.0
|
|
254
253
|
ax.fill(
|
|
@@ -266,11 +265,9 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
|
|
|
266
265
|
[bha, bha + diamond_height, bha, bha - diamond_height],
|
|
267
266
|
facecolor='none', edgecolor='black'
|
|
268
267
|
)
|
|
269
|
-
ax.annotate('Percentile relative to EM structures of
|
|
268
|
+
ax.annotate(f'Percentile relative to EM structures of $\\pm${resbin} $\\mathrm{{\\AA}}$ (resolution)',
|
|
270
269
|
(bwa + 3 * diamond_half_width, bha - 0.25), color='black', ha='left', fontsize=11)
|
|
271
|
-
|
|
272
|
-
else:
|
|
273
|
-
# Add diamond-shaped marker for legend
|
|
270
|
+
elif a is not None and b is not None and a == b:
|
|
274
271
|
wa = 0.01
|
|
275
272
|
ha = -2.0
|
|
276
273
|
ax.fill(
|
|
@@ -280,7 +277,6 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
|
|
|
280
277
|
)
|
|
281
278
|
ax.annotate('Percentile relative to all EM structures (overlapped)', (wa + 3 * diamond_half_width, ha - 0.25),
|
|
282
279
|
color='black', ha='left', fontsize=11)
|
|
283
|
-
# ax.annotate('Percentile relative to all EM structures (overlapped)', (wa + 3*diamond_half_width, ha-0.25), color='black', ha='left', fontsize=11)
|
|
284
280
|
|
|
285
281
|
bwa = 0.01
|
|
286
282
|
bha = -3.6
|
|
@@ -289,9 +285,28 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
|
|
|
289
285
|
[bha, bha + diamond_height, bha, bha - diamond_height],
|
|
290
286
|
facecolor='none', edgecolor='black'
|
|
291
287
|
)
|
|
292
|
-
ax.annotate('Percentile relative to EM structures of
|
|
288
|
+
ax.annotate(f'Percentile relative to EM structures of $\\pm${resbin} $\\mathrm{{\\AA}}$ (resolution)',
|
|
289
|
+
(bwa + 3 * diamond_half_width, bha - 0.25), color='black', ha='left', fontsize=11)
|
|
290
|
+
elif a is not None:
|
|
291
|
+
wa = 0.01
|
|
292
|
+
ha = -2.0
|
|
293
|
+
ax.fill(
|
|
294
|
+
[wa - diamond_half_width, wa, wa + diamond_half_width, wa],
|
|
295
|
+
[ha, ha + diamond_height, ha, ha - diamond_height],
|
|
296
|
+
color='black', edgecolor='black'
|
|
297
|
+
)
|
|
298
|
+
ax.annotate('Percentile relative to all EM structures', (wa + 3 * diamond_half_width, ha - 0.25),
|
|
299
|
+
color='black', ha='left', fontsize=11)
|
|
300
|
+
elif b is not None:
|
|
301
|
+
bwa = 0.01
|
|
302
|
+
bha = -3.6
|
|
303
|
+
ax.fill(
|
|
304
|
+
[bwa - diamond_half_width, bwa, bwa + diamond_half_width, bwa],
|
|
305
|
+
[bha, bha + diamond_height, bha, bha - diamond_height],
|
|
306
|
+
facecolor='none', edgecolor='black'
|
|
307
|
+
)
|
|
308
|
+
ax.annotate(f'Percentile relative to EM structures of $\\pm${resbin} $\\mathrm{{\\AA}}$ (resolution)',
|
|
293
309
|
(bwa + 3 * diamond_half_width, bha - 0.25), color='black', ha='left', fontsize=11)
|
|
294
|
-
# ax.annotate(f'Percentile relative to EM structures of nearest 1000 (resolution)', (bwa + 3*diamond_half_width, bha-0.25), color='black', ha='left', fontsize=11)
|
|
295
310
|
|
|
296
311
|
ax.tick_params(axis='both', which='both', length=0)
|
|
297
312
|
plt.gca().set_xticklabels([])
|
|
@@ -302,16 +317,23 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
|
|
|
302
317
|
|
|
303
318
|
|
|
304
319
|
def bar(new_entry_dict, score_type, work_dir, score_dir, plot_name, update_bin_file=None):
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
320
|
+
if update_bin_file and os.path.isfile(update_bin_file):
|
|
321
|
+
input_file = update_bin_file
|
|
322
|
+
else:
|
|
323
|
+
candidate = os.path.join(score_dir, 'qscores.csv')
|
|
324
|
+
if os.path.isfile(candidate):
|
|
325
|
+
input_file = candidate
|
|
326
|
+
else:
|
|
327
|
+
raise ValueError('All Qscore file does not exist to produce the slider.')
|
|
328
|
+
print(f'The all {score_type} file is: {input_file}.')
|
|
329
|
+
resbin = current_qscore_resolution_bin(input_file, update_bin_file, work_dir)
|
|
330
|
+
print(f'Current resolution bin size for {score_type} is: {resbin}.')
|
|
309
331
|
|
|
310
332
|
# new_entry_dict = {'id': '8117', 'resolution': 2.95, 'name': '5irx.cif', 'qscore': 0.521}
|
|
311
333
|
qmin = None
|
|
312
334
|
qmax = None
|
|
313
335
|
df = load_score(input_file, new_entry_dict, score_type)
|
|
314
|
-
if score_type and new_entry_dict[score_type]:
|
|
336
|
+
if score_type and score_type in new_entry_dict and not pd.isna(new_entry_dict[score_type]):
|
|
315
337
|
(qmin, qmax), original_value = get_score(df, new_entry_dict[score_type], score_type)
|
|
316
338
|
target_value = int(match_to_newscale((0, sum(original_value)), (0, 199), original_value[0]))
|
|
317
339
|
to_whole = round(target_value/200., 3)
|
|
@@ -326,7 +348,6 @@ def bar(new_entry_dict, score_type, work_dir, score_dir, plot_name, update_bin_f
|
|
|
326
348
|
whole_res_low = None
|
|
327
349
|
whole_res_hight = None
|
|
328
350
|
|
|
329
|
-
#df1000 = get_nearest_onethousand(new_entry_dict, df, 500, score_type)
|
|
330
351
|
if new_entry_dict['resolution']:
|
|
331
352
|
df1000 = get_resolution_range(new_entry_dict, df, score_type, 'resolution', resbin)
|
|
332
353
|
(sqmin, sqmax), ovalue = get_score(df1000, new_entry_dict[score_type], score_type)
|
|
@@ -343,8 +364,8 @@ def bar(new_entry_dict, score_type, work_dir, score_dir, plot_name, update_bin_f
|
|
|
343
364
|
relative_res_low = None
|
|
344
365
|
relative_res_high = None
|
|
345
366
|
|
|
346
|
-
if to_whole and to_two:
|
|
347
|
-
plot_bar_mat(target_value, target_value_two, qmin, qmax, new_entry_dict[score_type], work_dir, plot_name, score_type)
|
|
367
|
+
if to_whole is not None and to_two is not None:
|
|
368
|
+
plot_bar_mat(target_value, target_value_two, qmin, qmax, new_entry_dict[score_type], work_dir, plot_name, score_type, resbin)
|
|
348
369
|
print(f'{score_type} to whole: {to_whole_real}, to relative resolution: {to_two_real}')
|
|
349
370
|
|
|
350
371
|
return ((to_whole_real, to_whole_counts, whole_res_low, whole_res_hight), (to_two_real, to_two_counts, relative_res_low, relative_res_high), resbin)
|
|
@@ -408,18 +429,32 @@ def resolution_qrelative_correlation(df_resolution_sorted, values, score_type='q
|
|
|
408
429
|
correlation_below_5 = []
|
|
409
430
|
correlation_above_5 = []
|
|
410
431
|
col_names = []
|
|
411
|
-
for
|
|
412
|
-
if score_type == 'qscore'
|
|
413
|
-
|
|
414
|
-
# Filter the data for resolution < 5 Å and > 5 Å
|
|
432
|
+
for value in values:
|
|
433
|
+
col_name = f'q_relative_{value}' if score_type == 'qscore' else f'q_relative_{value}'
|
|
434
|
+
# Filter the data for resolution <= 5 Å and > 5 Å
|
|
415
435
|
data_below_5 = df_resolution_sorted[df_resolution_sorted['resolution'] <= 5]
|
|
416
436
|
data_above_5 = df_resolution_sorted[df_resolution_sorted['resolution'] > 5]
|
|
417
437
|
|
|
418
|
-
#
|
|
419
|
-
|
|
438
|
+
# If the column doesn't exist, record NaN and continue
|
|
439
|
+
if col_name not in df_resolution_sorted.columns:
|
|
440
|
+
correlation_below_5.append(np.nan)
|
|
441
|
+
correlation_above_5.append(np.nan)
|
|
442
|
+
col_names.append(col_name)
|
|
443
|
+
continue
|
|
444
|
+
|
|
445
|
+
def safe_pearson(x, y):
|
|
446
|
+
# Require at least 2 samples
|
|
447
|
+
if x.size < 2 or y.size < 2:
|
|
448
|
+
return np.nan
|
|
449
|
+
try:
|
|
450
|
+
corr, _ = scipy.stats.pearsonr(x, y)
|
|
451
|
+
except Exception:
|
|
452
|
+
return np.nan
|
|
453
|
+
return corr
|
|
454
|
+
|
|
455
|
+
corr_below_5 = safe_pearson(data_below_5['resolution'].to_numpy(), data_below_5[col_name].to_numpy())
|
|
456
|
+
corr_above_5 = safe_pearson(data_above_5['resolution'].to_numpy(), data_above_5[col_name].to_numpy())
|
|
420
457
|
|
|
421
|
-
# Calculate correlation for resolution > 5 Å
|
|
422
|
-
corr_above_5, _ = scipy.stats.pearsonr(data_above_5['resolution'], data_above_5[col_name])
|
|
423
458
|
correlation_below_5.append(corr_below_5)
|
|
424
459
|
correlation_above_5.append(corr_above_5)
|
|
425
460
|
col_names.append(col_name)
|
|
@@ -435,7 +470,7 @@ def get_resolution_bin_size_fromva(score_file):
|
|
|
435
470
|
|
|
436
471
|
return float(current_qscore_resolution_bin)
|
|
437
472
|
|
|
438
|
-
def get_resolution_bin_size_fromfile(input_score_file, score_type='qscore'):
|
|
473
|
+
def get_resolution_bin_size_fromfile(input_score_file, work_dir=None, score_type='qscore'):
|
|
439
474
|
"""
|
|
440
475
|
Get the resolution bin size from the input score file
|
|
441
476
|
"""
|
|
@@ -453,12 +488,40 @@ def get_resolution_bin_size_fromfile(input_score_file, score_type='qscore'):
|
|
|
453
488
|
values = [round(x, 1) for x in np.arange(0.1, 1.6, 0.1)]
|
|
454
489
|
correlatioin_below_5, correlation_above_5, col_names = resolution_qrelative_correlation(df_resolution_sorted,
|
|
455
490
|
values, score_type='qscore')
|
|
491
|
+
print('Correlation above 5A:', correlation_above_5)
|
|
492
|
+
print('Correlation below 5A:', correlatioin_below_5)
|
|
493
|
+
print('Column names:', col_names)
|
|
494
|
+
# create and save a two-curve plot for the correlations
|
|
495
|
+
try:
|
|
496
|
+
xs = [float(c.replace('q_relative_', '')) for c in col_names]
|
|
497
|
+
except Exception:
|
|
498
|
+
xs = list(range(len(col_names)))
|
|
499
|
+
idx = np.argsort(xs)
|
|
500
|
+
xs_sorted = np.array(xs)[idx]
|
|
501
|
+
y_above = np.array(correlation_above_5, dtype=float)[idx]
|
|
502
|
+
y_below = np.array(correlatioin_below_5, dtype=float)[idx]
|
|
503
|
+
|
|
504
|
+
plt.figure(figsize=(7, 4), dpi=150)
|
|
505
|
+
plt.plot(xs_sorted, y_above, marker='o', linestyle='-', label='Correlation above 5 Å')
|
|
506
|
+
plt.plot(xs_sorted, y_below, marker='s', linestyle='--', label='Correlation below 5 Å')
|
|
507
|
+
plt.xlabel('Resolution bin size')
|
|
508
|
+
plt.ylabel('Pearson correlation')
|
|
509
|
+
plt.title('Resolution vs Q_relative correlation')
|
|
510
|
+
plt.legend()
|
|
511
|
+
plt.grid(alpha=0.4, linestyle='--')
|
|
512
|
+
out_fname = f'{work_dir}/bin_size_resolution_correlation.png'
|
|
513
|
+
plt.tight_layout()
|
|
514
|
+
plt.savefig(out_fname)
|
|
515
|
+
plt.close()
|
|
516
|
+
print(f'Correlation plot saved to {os.path.abspath(out_fname)}')
|
|
517
|
+
# saved cur
|
|
518
|
+
|
|
456
519
|
optimal_index = find_optimal_correlation_index(correlatioin_below_5)
|
|
457
520
|
optimal_resolution_bin = col_names[optimal_index].replace('q_relative_', '') if optimal_index is not None else None
|
|
458
521
|
|
|
459
522
|
return float(optimal_resolution_bin)
|
|
460
523
|
|
|
461
|
-
def current_qscore_resolution_bin(score_file, update_resolution_bin_file=None):
|
|
524
|
+
def current_qscore_resolution_bin(score_file, update_resolution_bin_file=None, work_dir=None):
|
|
462
525
|
"""
|
|
463
526
|
Calculate the Q-score resolution bin based on the current all qscore in csv.
|
|
464
527
|
"""
|
|
@@ -481,7 +544,7 @@ def current_qscore_resolution_bin(score_file, update_resolution_bin_file=None):
|
|
|
481
544
|
resolution_bin_size = cfg.get('resolution_bin_size')
|
|
482
545
|
|
|
483
546
|
if update_resolution_bin_file is not None:
|
|
484
|
-
resolution_bin_size = get_resolution_bin_size_fromfile(update_resolution_bin_file)
|
|
547
|
+
resolution_bin_size = get_resolution_bin_size_fromfile(update_resolution_bin_file, work_dir)
|
|
485
548
|
save_and_log(cfg, resolution_bin_size, update_resolution_bin_file)
|
|
486
549
|
return resolution_bin_size
|
|
487
550
|
|
|
@@ -490,9 +553,6 @@ def current_qscore_resolution_bin(score_file, update_resolution_bin_file=None):
|
|
|
490
553
|
return resolution_bin_size
|
|
491
554
|
|
|
492
555
|
print('No resolution bin size found in config file, will calculate from score file.')
|
|
493
|
-
resolution_bin_size = get_resolution_bin_size_fromfile(score_file)
|
|
556
|
+
resolution_bin_size = get_resolution_bin_size_fromfile(score_file, work_dir)
|
|
494
557
|
save_and_log(cfg, resolution_bin_size, score_file)
|
|
495
558
|
return resolution_bin_size
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
va/metrics/phaserandomization.py
CHANGED
|
@@ -183,8 +183,8 @@ def calculate_pixels(angpix):
|
|
|
183
183
|
"""
|
|
184
184
|
|
|
185
185
|
if angpix != 0:
|
|
186
|
-
dilatepx =
|
|
187
|
-
softpx =
|
|
186
|
+
dilatepx = 3
|
|
187
|
+
softpx = 8
|
|
188
188
|
return dilatepx, softpx
|
|
189
189
|
else:
|
|
190
190
|
print('No hard and soft radius for mask as voxel value is 0.')
|
|
@@ -196,7 +196,7 @@ def relion_auto_mask_fast(data, threshold, extend_pixels, edge_width, verbose=Tr
|
|
|
196
196
|
steps = []
|
|
197
197
|
if verbose:
|
|
198
198
|
steps = tqdm(total=3, desc="Masking Steps", unit="step")
|
|
199
|
-
|
|
199
|
+
|
|
200
200
|
# Step 1: Initial binary mask
|
|
201
201
|
mask = (data >= threshold).astype(np.float32)
|
|
202
202
|
if verbose:
|
va/metrics/projections.py
CHANGED
|
@@ -8,6 +8,7 @@ import numpy as np
|
|
|
8
8
|
from math import ceil
|
|
9
9
|
from scipy import ndimage
|
|
10
10
|
from mrcfile.mrcfile import MrcFile
|
|
11
|
+
from PIL import Image
|
|
11
12
|
import inspect
|
|
12
13
|
from va.utils.misc import out_json
|
|
13
14
|
|
|
@@ -179,6 +180,60 @@ class Projections:
|
|
|
179
180
|
|
|
180
181
|
return ind, org, scale
|
|
181
182
|
|
|
183
|
+
def _green_percentage(self, image_obj):
|
|
184
|
+
"""
|
|
185
|
+
image_obj can be a file path, PIL Image, or numpy array.
|
|
186
|
+
Returns a dictionary with:
|
|
187
|
+
- percentage: total green percentage in the whole image
|
|
188
|
+
- diff_vertical: left-half green percentage minus right-half green percentage
|
|
189
|
+
- diff_horizontal: top-half green percentage minus bottom-half green percentage
|
|
190
|
+
"""
|
|
191
|
+
if isinstance(image_obj, str):
|
|
192
|
+
img = Image.open(image_obj).convert("RGB")
|
|
193
|
+
elif isinstance(image_obj, Image.Image):
|
|
194
|
+
img = image_obj.convert("RGB")
|
|
195
|
+
else:
|
|
196
|
+
img = Image.fromarray(image_obj).convert("RGB")
|
|
197
|
+
|
|
198
|
+
img_array = np.array(img)
|
|
199
|
+
green_mask = np.all(img_array == [0, 138, 0], axis=-1)
|
|
200
|
+
height, width = green_mask.shape
|
|
201
|
+
total_pixels = height * width
|
|
202
|
+
if total_pixels == 0:
|
|
203
|
+
return {
|
|
204
|
+
'percentage': 0.0,
|
|
205
|
+
'diff_vertical': 0.0,
|
|
206
|
+
'diff_horizontal': 0.0,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
green_pixels = np.sum(green_mask)
|
|
210
|
+
proportion_green = (green_pixels / total_pixels) * 100
|
|
211
|
+
|
|
212
|
+
mid_vertical = width // 2
|
|
213
|
+
left_total = height * mid_vertical
|
|
214
|
+
right_total = height * (width - mid_vertical)
|
|
215
|
+
green_pixels_left_half = np.sum(green_mask[:, :mid_vertical])
|
|
216
|
+
green_pixels_right_half = np.sum(green_mask[:, mid_vertical:])
|
|
217
|
+
proportion_green_left_half = (green_pixels_left_half / left_total) * 100 if left_total else 0.0
|
|
218
|
+
proportion_green_right_half = (green_pixels_right_half / right_total) * 100 if right_total else 0.0
|
|
219
|
+
diff_vertical = proportion_green_left_half - proportion_green_right_half
|
|
220
|
+
|
|
221
|
+
mid_horizontal = height // 2
|
|
222
|
+
top_total = mid_horizontal * width
|
|
223
|
+
bottom_total = (height - mid_horizontal) * width
|
|
224
|
+
green_pixels_top_half = np.sum(green_mask[:mid_horizontal, :])
|
|
225
|
+
green_pixels_bottom_half = np.sum(green_mask[mid_horizontal:, :])
|
|
226
|
+
proportion_green_top_half = (green_pixels_top_half / top_total) * 100 if top_total else 0.0
|
|
227
|
+
proportion_green_bottom_half = (green_pixels_bottom_half / bottom_total) * 100 if bottom_total else 0.0
|
|
228
|
+
diff_horizontal = proportion_green_top_half - proportion_green_bottom_half
|
|
229
|
+
|
|
230
|
+
return {
|
|
231
|
+
'percentage': round(proportion_green, 2),
|
|
232
|
+
'diff_vertical': round(diff_vertical, 2),
|
|
233
|
+
'diff_horizontal': round(diff_horizontal, 2),
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
|
|
182
237
|
def orthogonal_projections(self, mapin=None, workdir=None, type=None, label=''):
|
|
183
238
|
map, workdir = self.mapincheck(mapin, workdir)
|
|
184
239
|
if map is not None and workdir is not None:
|
|
@@ -202,6 +257,7 @@ class Projections:
|
|
|
202
257
|
glow_scale_result = {}
|
|
203
258
|
glow_org_final = {}
|
|
204
259
|
glow_scale_final = {}
|
|
260
|
+
green_result = {}
|
|
205
261
|
for axis in range(2, -1, -1):
|
|
206
262
|
ind, org, scale = self.map_to_img(map, axis, type, self.errlist)
|
|
207
263
|
org_result.update(org)
|
|
@@ -212,6 +268,10 @@ class Projections:
|
|
|
212
268
|
glow_ind, glow_org, glow_scale = self.map_to_img(map, axis, type, self.errlist, self.glowimage)
|
|
213
269
|
glow_org_result.update(glow_org)
|
|
214
270
|
glow_scale_result.update(glow_scale)
|
|
271
|
+
if type == 'std':
|
|
272
|
+
for axis_name, image_obj in glow_org.items():
|
|
273
|
+
green_result[axis_name] = self._green_percentage(f'{workdir}/{image_obj}')
|
|
274
|
+
|
|
215
275
|
# if type == 'central' or type == 'largestvariance':
|
|
216
276
|
# glow_ind_result.update(glow_ind)
|
|
217
277
|
# if type == 'central' or type == 'largestvariance':
|
|
@@ -231,7 +291,10 @@ class Projections:
|
|
|
231
291
|
result_dict[f'{label}central_slice'] = {**final_org, **final_scale, **final_ind}
|
|
232
292
|
elif type == 'max' or type == 'projection' or type == 'std':
|
|
233
293
|
if glow_org_final and glow_scale_final:
|
|
234
|
-
|
|
294
|
+
glow_entry = {**glow_org_final, **glow_scale_final}
|
|
295
|
+
if green_result:
|
|
296
|
+
glow_entry['green_percentage'] = green_result
|
|
297
|
+
result_dict[f'{label}orthogonal_glow_{type}'] = glow_entry
|
|
235
298
|
result_dict[f'{label}orthogonal_{type}'] = {**final_org, **final_scale}
|
|
236
299
|
else:
|
|
237
300
|
result_dict[f'{label}orthogonal_{type}'] = {**final_org, **final_scale}
|
va/preparation.py
CHANGED
|
@@ -1367,7 +1367,11 @@ class PreParation:
|
|
|
1367
1367
|
|
|
1368
1368
|
auth_comp_id_map = {}
|
|
1369
1369
|
for chain, resseq, auth_comp_id in zip(chains, resseqs, auth_comp_ids):
|
|
1370
|
-
|
|
1370
|
+
try:
|
|
1371
|
+
resseq_key = int(resseq)
|
|
1372
|
+
except ValueError:
|
|
1373
|
+
resseq_key = resseq
|
|
1374
|
+
key = (chain, resseq_key)
|
|
1371
1375
|
auth_comp_id_map[key] = auth_comp_id
|
|
1372
1376
|
|
|
1373
1377
|
return auth_comp_id_map
|
|
@@ -1391,7 +1395,11 @@ class PreParation:
|
|
|
1391
1395
|
|
|
1392
1396
|
formal_charge_map = {}
|
|
1393
1397
|
for chain, resseq, formal_charge in zip(chains, resseqs, formal_charges):
|
|
1394
|
-
|
|
1398
|
+
try:
|
|
1399
|
+
resseq_key = int(resseq)
|
|
1400
|
+
except ValueError:
|
|
1401
|
+
resseq_key = resseq
|
|
1402
|
+
key = (chain, resseq_key)
|
|
1395
1403
|
formal_charge_map[key] = formal_charge
|
|
1396
1404
|
|
|
1397
1405
|
return formal_charge_map
|
|
@@ -1432,8 +1440,13 @@ class PreParation:
|
|
|
1432
1440
|
|
|
1433
1441
|
# Add auth_comp_id based on the map
|
|
1434
1442
|
for res, chain, resseq in zip(residues, chains, resseqs):
|
|
1435
|
-
|
|
1436
|
-
|
|
1443
|
+
try:
|
|
1444
|
+
resseq_key = int(resseq)
|
|
1445
|
+
except ValueError:
|
|
1446
|
+
resseq_key = resseq
|
|
1447
|
+
key = (chain, resseq_key)
|
|
1448
|
+
auth_comp_id = auth_comp_id_map.get(key, res)
|
|
1449
|
+
formal_charge = formal_charge_map.get(key, '?')
|
|
1437
1450
|
mmcif_dict['_atom_site.auth_comp_id'].append(auth_comp_id)
|
|
1438
1451
|
mmcif_dict['_atom_site.pdbx_formal_charge'].append(formal_charge)
|
|
1439
1452
|
|
va/version.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|