emdbva 0.0.1.dev132__tar.gz → 0.0.1.dev135__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {emdbva-0.0.1.dev132/emdbva.egg-info → emdbva-0.0.1.dev135}/PKG-INFO +1 -1
  2. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135/emdbva.egg-info}/PKG-INFO +1 -1
  3. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/bars.py +125 -65
  4. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/phaserandomization.py +3 -3
  5. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/projections.py +28 -1
  6. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/preparation.py +40 -4
  7. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/version.py +1 -1
  8. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/LICENSE +0 -0
  9. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/MANIFEST.in +0 -0
  10. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/README.rst +0 -0
  11. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/emdbva.egg-info/SOURCES.txt +0 -0
  12. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/emdbva.egg-info/dependency_links.txt +0 -0
  13. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/emdbva.egg-info/entry_points.txt +0 -0
  14. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/emdbva.egg-info/requires.txt +0 -0
  15. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/emdbva.egg-info/top_level.txt +0 -0
  16. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/setup.cfg +0 -0
  17. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/setup.py +0 -0
  18. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/__init__.py +0 -0
  19. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/mainva.py +0 -0
  20. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/__init__.py +0 -0
  21. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/connected_percentage.py +0 -0
  22. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/contour_level_predicator.py +0 -0
  23. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/emda_mmcc.py +0 -0
  24. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/emringer.py +0 -0
  25. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/inclusion.py +0 -0
  26. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/overlap_percentage.py +0 -0
  27. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/phenix_cc.py +0 -0
  28. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/phenix_mm.py +0 -0
  29. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/qscore.py +0 -0
  30. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/residue_locres.py +0 -0
  31. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/resmap.py +0 -0
  32. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/smoc.py +0 -0
  33. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/strudel.py +0 -0
  34. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/surfaces.py +0 -0
  35. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/metrics/threedfsc.py +0 -0
  36. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/qscores.csv +0 -0
  37. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/Checker.py +0 -0
  38. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/ChimeraxViews.py +0 -0
  39. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/MapProcessor.py +0 -0
  40. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/Model.py +0 -0
  41. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/__init__.py +0 -0
  42. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/cl_weights.pth +0 -0
  43. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/log_utils.py +0 -0
  44. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/misc.py +0 -0
  45. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/rescolor.py +0 -0
  46. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/utils/stars.py +0 -0
  47. {emdbva-0.0.1.dev132 → emdbva-0.0.1.dev135}/va/validationanalysis.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: emdbva
3
- Version: 0.0.1.dev132
3
+ Version: 0.0.1.dev135
4
4
  Summary: CryoEM validation toolkit
5
5
  Home-page: https://test.pypi.org/project/va/
6
6
  Author: Zhe Wang
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: emdbva
3
- Version: 0.0.1.dev132
3
+ Version: 0.0.1.dev135
4
4
  Summary: CryoEM validation toolkit
5
5
  Home-page: https://test.pypi.org/project/va/
6
6
  Author: Zhe Wang
@@ -147,15 +147,17 @@ def get_resolution_range(new_entry, df, score_type, column='resolution', resbin=
147
147
  return None
148
148
 
149
149
 
150
- def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
150
+ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type, resbin):
151
151
  """
152
152
  This function here using matplotlib to produce the Q-score bar image
153
153
  """
154
154
 
155
- a = a*1.5 if a else None
156
- b = b*1.5 if b else None
157
- a = a/200 if a else None
158
- b = b/200 if b else None
155
+ # Safely scale inputs only when they are provided (allow 0)
156
+ if a is not None:
157
+ a = (a * 1.5) / 200.0
158
+ if b is not None:
159
+ b = (b * 1.5) / 200.0
160
+
159
161
  # Create a color scale from 0 to 1
160
162
  color_scale = np.linspace(0, 1, 199)
161
163
 
@@ -173,37 +175,48 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
173
175
  diamond_height = 0.65
174
176
  diamond_half_width = 0.01
175
177
 
176
- # Add diamond-shaped marker for 'a'
177
- if a != b:
178
+ # Add diamond-shaped markers depending on availability of a and b
179
+ if a is not None and b is not None and a != b:
180
+ # both present and different
178
181
  ax.fill(
179
182
  [a - diamond_half_width, a, a + diamond_half_width, a],
180
183
  [0.5, 0.5 + diamond_height, 0.5, 0.5 - diamond_height],
181
184
  color='Black', edgecolor='black'
182
185
  )
183
186
 
184
- # Add diamond-shaped marker for 'b'
185
187
  ax.fill(
186
188
  [b - diamond_half_width, b, b + diamond_half_width, b],
187
189
  [0.5, 0.5 + diamond_height, 0.5, 0.5 - diamond_height],
188
190
  facecolor='none', edgecolor='black'
189
191
  )
190
- else:
191
- # ax.fill(
192
- # [b - diamond_half_width, b, b + diamond_half_width, b],
193
- # [0.5, 0.5 + diamond_height, 0.5, 0.5 - diamond_height],
194
- # facecolor='yellow', edgecolor='black'
195
- # )
196
- top = np.array([[b-diamond_half_width, 0.5], [b, 0.5 + diamond_height], [b + diamond_half_width, 0.5], [b, 0.5]])
197
- bottom = np.array([[b-diamond_half_width, 0.5], [b, 0.5], [b + diamond_half_width, 0.5], [b, 0.5 - diamond_height]])
192
+ elif a is not None and b is not None and a == b:
193
+ # both present and overlapped
194
+ top = np.array([[b - diamond_half_width, 0.5], [b, 0.5 + diamond_height], [b + diamond_half_width, 0.5], [b, 0.5]])
195
+ bottom = np.array([[b - diamond_half_width, 0.5], [b, 0.5], [b + diamond_half_width, 0.5], [b, 0.5 - diamond_height]])
198
196
  top_patch = patches.Polygon(top, closed=True, facecolor='black', edgecolor='black')
199
197
  bottom_patch = patches.Polygon(bottom, closed=True, facecolor='none', edgecolor='black')
200
198
  ax.add_patch(top_patch)
201
199
  ax.add_patch(bottom_patch)
200
+ elif a is not None:
201
+ # only a present -> solid marker
202
+ ax.fill(
203
+ [a - diamond_half_width, a, a + diamond_half_width, a],
204
+ [0.5, 0.5 + diamond_height, 0.5, 0.5 - diamond_height],
205
+ color='Black', edgecolor='black'
206
+ )
207
+ elif b is not None:
208
+ # only b present -> hollow marker
209
+ ax.fill(
210
+ [b - diamond_half_width, b, b + diamond_half_width, b],
211
+ [0.5, 0.5 + diamond_height, 0.5, 0.5 - diamond_height],
212
+ facecolor='none', edgecolor='black'
213
+ )
214
+ # else: neither present -> draw no marker
202
215
 
203
- # add four values as annotationso
216
+ # add four values as annotations
204
217
  worse = r'$\it{Worse}$'
205
218
  better = r'$\it{Better}$'
206
- ax.annotate(worse, (0, -0.9), color='black', ha='left', fontsize=10, )
219
+ ax.annotate(worse, (0, -0.9), color='black', ha='left', fontsize=10)
207
220
  ax.annotate(better, (1.5, -0.9), color='black', ha='right', fontsize=10)
208
221
  ax.annotate(f'{qscore:.3f}', (1.58, 0.2), color='black', ha='center', fontsize=12)
209
222
 
@@ -223,32 +236,18 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
223
236
  title = plot_name[:-8]
224
237
  ax.annotate(title, (0.75, 3.3), color='black', ha='center', fontsize=14, fontweight='bold')
225
238
  ax.annotate('Value', (1.58, 1.7), color='black', ha='center', fontsize=14)
226
- # if a >= b:
227
- # ax.annotate(f'{a*100/1.5:.2f}%', (a, 1.4), color='black', ha='left', fontsize=10)
228
- # #ax.annotate(f'{b:.2f}', (b, -0.8), color='black', ha='center', fontsize=10)
229
- # #ax.annotate(f'{a*100:.2f}%', (a, 1.4), color='black', ha='center', fontsize=10)
230
- # ax.annotate(f'{b*100/1.5:.2f}%', (b, 1.4), color='black', ha='right', fontsize=10)
231
- # else:
232
- # ax.annotate(f'{a*100/1.5:.2f}%', (a, 1.4), color='black', ha='right', fontsize=10)
233
- # #ax.annotate(f'{b:.2f}', (b, -0.8), color='black', ha='center', fontsize=10)
234
- # #ax.annotate(f'{a*100:.2f}%', (a, 1.4), color='black', ha='center', fontsize=10)
235
- # ax.annotate(f'{b*100/1.5:.2f}%', (b, 1.4), color='black', ha='left', fontsize=10)
236
-
237
- # Customize the plot
239
+
238
240
  ax.set_xlim(-0.4, 1.7)
239
- # ax.set_ylim(-4.3, 1.8)
240
- # to fit the EMD id
241
241
  ax.set_ylim(-4.3, 3.6)
242
242
  ax.set_yticks([])
243
243
 
244
244
  ax.spines['top'].set_visible(False)
245
245
  ax.spines['right'].set_visible(False)
246
- # Remove the left and bottom axis lines (optional)
247
246
  ax.spines['left'].set_visible(False)
248
247
  ax.spines['bottom'].set_visible(False)
249
248
 
250
- if a != b:
251
- # Add diamond-shaped marker for legend
249
+ # Legend / explanatory markers: adapt depending on which markers are available
250
+ if a is not None and b is not None and a != b:
252
251
  wa = 0.01
253
252
  ha = -2.0
254
253
  ax.fill(
@@ -266,11 +265,9 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
266
265
  [bha, bha + diamond_height, bha, bha - diamond_height],
267
266
  facecolor='none', edgecolor='black'
268
267
  )
269
- ax.annotate('Percentile relative to EM structures of $\pm$1 $\mathrm{\AA}$ (resolution)',
268
+ ax.annotate(f'Percentile relative to EM structures of $\\pm${resbin} $\\mathrm{{\\AA}}$ (resolution)',
270
269
  (bwa + 3 * diamond_half_width, bha - 0.25), color='black', ha='left', fontsize=11)
271
- # ax.annotate(f'Percentile relative to EM structures of nearest 1000 (resolution)', (bwa + 3*diamond_half_width, bha-0.25), color='black', ha='left', fontsize=11)
272
- else:
273
- # Add diamond-shaped marker for legend
270
+ elif a is not None and b is not None and a == b:
274
271
  wa = 0.01
275
272
  ha = -2.0
276
273
  ax.fill(
@@ -280,7 +277,6 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
280
277
  )
281
278
  ax.annotate('Percentile relative to all EM structures (overlapped)', (wa + 3 * diamond_half_width, ha - 0.25),
282
279
  color='black', ha='left', fontsize=11)
283
- # ax.annotate('Percentile relative to all EM structures (overlapped)', (wa + 3*diamond_half_width, ha-0.25), color='black', ha='left', fontsize=11)
284
280
 
285
281
  bwa = 0.01
286
282
  bha = -3.6
@@ -289,9 +285,28 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
289
285
  [bha, bha + diamond_height, bha, bha - diamond_height],
290
286
  facecolor='none', edgecolor='black'
291
287
  )
292
- ax.annotate('Percentile relative to EM structures of $\pm$1 $\mathrm{\AA}$ (resolution)',
288
+ ax.annotate(f'Percentile relative to EM structures of $\\pm${resbin} $\\mathrm{{\\AA}}$ (resolution)',
289
+ (bwa + 3 * diamond_half_width, bha - 0.25), color='black', ha='left', fontsize=11)
290
+ elif a is not None:
291
+ wa = 0.01
292
+ ha = -2.0
293
+ ax.fill(
294
+ [wa - diamond_half_width, wa, wa + diamond_half_width, wa],
295
+ [ha, ha + diamond_height, ha, ha - diamond_height],
296
+ color='black', edgecolor='black'
297
+ )
298
+ ax.annotate('Percentile relative to all EM structures', (wa + 3 * diamond_half_width, ha - 0.25),
299
+ color='black', ha='left', fontsize=11)
300
+ elif b is not None:
301
+ bwa = 0.01
302
+ bha = -3.6
303
+ ax.fill(
304
+ [bwa - diamond_half_width, bwa, bwa + diamond_half_width, bwa],
305
+ [bha, bha + diamond_height, bha, bha - diamond_height],
306
+ facecolor='none', edgecolor='black'
307
+ )
308
+ ax.annotate(f'Percentile relative to EM structures of $\\pm${resbin} $\\mathrm{{\\AA}}$ (resolution)',
293
309
  (bwa + 3 * diamond_half_width, bha - 0.25), color='black', ha='left', fontsize=11)
294
- # ax.annotate(f'Percentile relative to EM structures of nearest 1000 (resolution)', (bwa + 3*diamond_half_width, bha-0.25), color='black', ha='left', fontsize=11)
295
310
 
296
311
  ax.tick_params(axis='both', which='both', length=0)
297
312
  plt.gca().set_xticklabels([])
@@ -302,16 +317,23 @@ def plot_bar_mat(a, b, qmin, qmax, qscore, work_dir, plot_name, score_type):
302
317
 
303
318
 
304
319
  def bar(new_entry_dict, score_type, work_dir, score_dir, plot_name, update_bin_file=None):
305
- input_file = '{}/qscores.csv'.format(score_dir)
306
- print(f'The all Q-score file is: {input_file}.')
307
- resbin = current_qscore_resolution_bin(input_file, update_bin_file)
308
- print(f'Current resolution bin size for Q_relative is: {resbin}.')
320
+ if update_bin_file and os.path.isfile(update_bin_file):
321
+ input_file = update_bin_file
322
+ else:
323
+ candidate = os.path.join(score_dir, 'qscores.csv')
324
+ if os.path.isfile(candidate):
325
+ input_file = candidate
326
+ else:
327
+ raise ValueError('All Qscore file does not exist to produce the slider.')
328
+ print(f'The all {score_type} file is: {input_file}.')
329
+ resbin = current_qscore_resolution_bin(input_file, update_bin_file, work_dir)
330
+ print(f'Current resolution bin size for {score_type} is: {resbin}.')
309
331
 
310
332
  # new_entry_dict = {'id': '8117', 'resolution': 2.95, 'name': '5irx.cif', 'qscore': 0.521}
311
333
  qmin = None
312
334
  qmax = None
313
335
  df = load_score(input_file, new_entry_dict, score_type)
314
- if score_type and new_entry_dict[score_type]:
336
+ if score_type and score_type in new_entry_dict and not pd.isna(new_entry_dict[score_type]):
315
337
  (qmin, qmax), original_value = get_score(df, new_entry_dict[score_type], score_type)
316
338
  target_value = int(match_to_newscale((0, sum(original_value)), (0, 199), original_value[0]))
317
339
  to_whole = round(target_value/200., 3)
@@ -326,7 +348,6 @@ def bar(new_entry_dict, score_type, work_dir, score_dir, plot_name, update_bin_f
326
348
  whole_res_low = None
327
349
  whole_res_hight = None
328
350
 
329
- #df1000 = get_nearest_onethousand(new_entry_dict, df, 500, score_type)
330
351
  if new_entry_dict['resolution']:
331
352
  df1000 = get_resolution_range(new_entry_dict, df, score_type, 'resolution', resbin)
332
353
  (sqmin, sqmax), ovalue = get_score(df1000, new_entry_dict[score_type], score_type)
@@ -343,8 +364,8 @@ def bar(new_entry_dict, score_type, work_dir, score_dir, plot_name, update_bin_f
343
364
  relative_res_low = None
344
365
  relative_res_high = None
345
366
 
346
- if to_whole and to_two:
347
- plot_bar_mat(target_value, target_value_two, qmin, qmax, new_entry_dict[score_type], work_dir, plot_name, score_type)
367
+ if to_whole is not None and to_two is not None:
368
+ plot_bar_mat(target_value, target_value_two, qmin, qmax, new_entry_dict[score_type], work_dir, plot_name, score_type, resbin)
348
369
  print(f'{score_type} to whole: {to_whole_real}, to relative resolution: {to_two_real}')
349
370
 
350
371
  return ((to_whole_real, to_whole_counts, whole_res_low, whole_res_hight), (to_two_real, to_two_counts, relative_res_low, relative_res_high), resbin)
@@ -408,18 +429,32 @@ def resolution_qrelative_correlation(df_resolution_sorted, values, score_type='q
408
429
  correlation_below_5 = []
409
430
  correlation_above_5 = []
410
431
  col_names = []
411
- for i, value in enumerate(values):
412
- if score_type == 'qscore':
413
- col_name = f'q_relative_{value}' # Dynamically create the column name
414
- # Filter the data for resolution < 5 Å and > 5 Å
432
+ for value in values:
433
+ col_name = f'q_relative_{value}' if score_type == 'qscore' else f'q_relative_{value}'
434
+ # Filter the data for resolution <= 5 Å and > 5 Å
415
435
  data_below_5 = df_resolution_sorted[df_resolution_sorted['resolution'] <= 5]
416
436
  data_above_5 = df_resolution_sorted[df_resolution_sorted['resolution'] > 5]
417
437
 
418
- # Calculate correlation for resolution <= 5 Å
419
- corr_below_5, _ = scipy.stats.pearsonr(data_below_5['resolution'], data_below_5[col_name])
438
+ # If the column doesn't exist, record NaN and continue
439
+ if col_name not in df_resolution_sorted.columns:
440
+ correlation_below_5.append(np.nan)
441
+ correlation_above_5.append(np.nan)
442
+ col_names.append(col_name)
443
+ continue
444
+
445
+ def safe_pearson(x, y):
446
+ # Require at least 2 samples
447
+ if x.size < 2 or y.size < 2:
448
+ return np.nan
449
+ try:
450
+ corr, _ = scipy.stats.pearsonr(x, y)
451
+ except Exception:
452
+ return np.nan
453
+ return corr
454
+
455
+ corr_below_5 = safe_pearson(data_below_5['resolution'].to_numpy(), data_below_5[col_name].to_numpy())
456
+ corr_above_5 = safe_pearson(data_above_5['resolution'].to_numpy(), data_above_5[col_name].to_numpy())
420
457
 
421
- # Calculate correlation for resolution > 5 Å
422
- corr_above_5, _ = scipy.stats.pearsonr(data_above_5['resolution'], data_above_5[col_name])
423
458
  correlation_below_5.append(corr_below_5)
424
459
  correlation_above_5.append(corr_above_5)
425
460
  col_names.append(col_name)
@@ -435,7 +470,7 @@ def get_resolution_bin_size_fromva(score_file):
435
470
 
436
471
  return float(current_qscore_resolution_bin)
437
472
 
438
- def get_resolution_bin_size_fromfile(input_score_file, score_type='qscore'):
473
+ def get_resolution_bin_size_fromfile(input_score_file, work_dir=None, score_type='qscore'):
439
474
  """
440
475
  Get the resolution bin size from the input score file
441
476
  """
@@ -453,12 +488,40 @@ def get_resolution_bin_size_fromfile(input_score_file, score_type='qscore'):
453
488
  values = [round(x, 1) for x in np.arange(0.1, 1.6, 0.1)]
454
489
  correlatioin_below_5, correlation_above_5, col_names = resolution_qrelative_correlation(df_resolution_sorted,
455
490
  values, score_type='qscore')
491
+ print('Correlation above 5A:', correlation_above_5)
492
+ print('Correlation below 5A:', correlatioin_below_5)
493
+ print('Column names:', col_names)
494
+ # create and save a two-curve plot for the correlations
495
+ try:
496
+ xs = [float(c.replace('q_relative_', '')) for c in col_names]
497
+ except Exception:
498
+ xs = list(range(len(col_names)))
499
+ idx = np.argsort(xs)
500
+ xs_sorted = np.array(xs)[idx]
501
+ y_above = np.array(correlation_above_5, dtype=float)[idx]
502
+ y_below = np.array(correlatioin_below_5, dtype=float)[idx]
503
+
504
+ plt.figure(figsize=(7, 4), dpi=150)
505
+ plt.plot(xs_sorted, y_above, marker='o', linestyle='-', label='Correlation above 5 Å')
506
+ plt.plot(xs_sorted, y_below, marker='s', linestyle='--', label='Correlation below 5 Å')
507
+ plt.xlabel('Resolution bin size')
508
+ plt.ylabel('Pearson correlation')
509
+ plt.title('Resolution vs Q_relative correlation')
510
+ plt.legend()
511
+ plt.grid(alpha=0.4, linestyle='--')
512
+ out_fname = f'{work_dir}/bin_size_resolution_correlation.png'
513
+ plt.tight_layout()
514
+ plt.savefig(out_fname)
515
+ plt.close()
516
+ print(f'Correlation plot saved to {os.path.abspath(out_fname)}')
517
+ # saved cur
518
+
456
519
  optimal_index = find_optimal_correlation_index(correlatioin_below_5)
457
520
  optimal_resolution_bin = col_names[optimal_index].replace('q_relative_', '') if optimal_index is not None else None
458
521
 
459
522
  return float(optimal_resolution_bin)
460
523
 
461
- def current_qscore_resolution_bin(score_file, update_resolution_bin_file=None):
524
+ def current_qscore_resolution_bin(score_file, update_resolution_bin_file=None, work_dir=None):
462
525
  """
463
526
  Calculate the Q-score resolution bin based on the current all qscore in csv.
464
527
  """
@@ -481,7 +544,7 @@ def current_qscore_resolution_bin(score_file, update_resolution_bin_file=None):
481
544
  resolution_bin_size = cfg.get('resolution_bin_size')
482
545
 
483
546
  if update_resolution_bin_file is not None:
484
- resolution_bin_size = get_resolution_bin_size_fromfile(update_resolution_bin_file)
547
+ resolution_bin_size = get_resolution_bin_size_fromfile(update_resolution_bin_file, work_dir)
485
548
  save_and_log(cfg, resolution_bin_size, update_resolution_bin_file)
486
549
  return resolution_bin_size
487
550
 
@@ -490,9 +553,6 @@ def current_qscore_resolution_bin(score_file, update_resolution_bin_file=None):
490
553
  return resolution_bin_size
491
554
 
492
555
  print('No resolution bin size found in config file, will calculate from score file.')
493
- resolution_bin_size = get_resolution_bin_size_fromfile(score_file)
556
+ resolution_bin_size = get_resolution_bin_size_fromfile(score_file, work_dir)
494
557
  save_and_log(cfg, resolution_bin_size, score_file)
495
558
  return resolution_bin_size
496
-
497
-
498
-
@@ -183,8 +183,8 @@ def calculate_pixels(angpix):
183
183
  """
184
184
 
185
185
  if angpix != 0:
186
- dilatepx = 10 / angpix
187
- softpx = 5 / angpix
186
+ dilatepx = 3
187
+ softpx = 8
188
188
  return dilatepx, softpx
189
189
  else:
190
190
  print('No hard and soft radius for mask as voxel value is 0.')
@@ -196,7 +196,7 @@ def relion_auto_mask_fast(data, threshold, extend_pixels, edge_width, verbose=Tr
196
196
  steps = []
197
197
  if verbose:
198
198
  steps = tqdm(total=3, desc="Masking Steps", unit="step")
199
-
199
+
200
200
  # Step 1: Initial binary mask
201
201
  mask = (data >= threshold).astype(np.float32)
202
202
  if verbose:
@@ -8,6 +8,7 @@ import numpy as np
8
8
  from math import ceil
9
9
  from scipy import ndimage
10
10
  from mrcfile.mrcfile import MrcFile
11
+ from PIL import Image
11
12
  import inspect
12
13
  from va.utils.misc import out_json
13
14
 
@@ -179,6 +180,24 @@ class Projections:
179
180
 
180
181
  return ind, org, scale
181
182
 
183
+ def _green_percentage(self, image_obj):
184
+ """
185
+ image_obj can be a file path, PIL Image, or numpy array.
186
+ Returns green percentage as a float.
187
+ """
188
+ if isinstance(image_obj, str):
189
+ img = Image.open(image_obj).convert("RGB")
190
+ elif isinstance(image_obj, Image.Image):
191
+ img = image_obj.convert("RGB")
192
+ else:
193
+ img = Image.fromarray(image_obj).convert("RGB")
194
+
195
+ img_array = np.array(img)
196
+ green_pixels = np.sum(np.all(img_array == [0, 138, 0], axis=-1))
197
+ total_pixels = img_array.shape[0] * img_array.shape[1]
198
+ proportion_green = (green_pixels / total_pixels) * 100 if total_pixels else 0.0
199
+ return round(proportion_green, 2)
200
+
182
201
  def orthogonal_projections(self, mapin=None, workdir=None, type=None, label=''):
183
202
  map, workdir = self.mapincheck(mapin, workdir)
184
203
  if map is not None and workdir is not None:
@@ -202,6 +221,7 @@ class Projections:
202
221
  glow_scale_result = {}
203
222
  glow_org_final = {}
204
223
  glow_scale_final = {}
224
+ green_result = {}
205
225
  for axis in range(2, -1, -1):
206
226
  ind, org, scale = self.map_to_img(map, axis, type, self.errlist)
207
227
  org_result.update(org)
@@ -212,6 +232,10 @@ class Projections:
212
232
  glow_ind, glow_org, glow_scale = self.map_to_img(map, axis, type, self.errlist, self.glowimage)
213
233
  glow_org_result.update(glow_org)
214
234
  glow_scale_result.update(glow_scale)
235
+ if type == 'std':
236
+ for axis_name, image_obj in glow_org.items():
237
+ green_result[axis_name] = self._green_percentage(f'{workdir}/{image_obj}')
238
+
215
239
  # if type == 'central' or type == 'largestvariance':
216
240
  # glow_ind_result.update(glow_ind)
217
241
  # if type == 'central' or type == 'largestvariance':
@@ -231,7 +255,10 @@ class Projections:
231
255
  result_dict[f'{label}central_slice'] = {**final_org, **final_scale, **final_ind}
232
256
  elif type == 'max' or type == 'projection' or type == 'std':
233
257
  if glow_org_final and glow_scale_final:
234
- result_dict[f'{label}orthogonal_glow_{type}'] = {**glow_org_final, **glow_scale_final}
258
+ glow_entry = {**glow_org_final, **glow_scale_final}
259
+ if green_result:
260
+ glow_entry['green_percentage'] = green_result
261
+ result_dict[f'{label}orthogonal_glow_{type}'] = glow_entry
235
262
  result_dict[f'{label}orthogonal_{type}'] = {**final_org, **final_scale}
236
263
  else:
237
264
  result_dict[f'{label}orthogonal_{type}'] = {**final_org, **final_scale}
@@ -774,6 +774,29 @@ class PreParation:
774
774
  if db_name == 'PDB' and accession_code:
775
775
  db_info['accession'] = accession_code
776
776
 
777
+ # Fallback: if no PDB id found in _database_2, try _em_db_reference
778
+ if 'accession' not in db_info:
779
+ em_access = block.find_value('_em_db_reference.access_code')
780
+ em_dbname = block.find_value('_em_db_reference.db_name')
781
+
782
+ # Case 1: _em_db_reference is a single (non-loop) category
783
+ if em_access and str(em_access).strip() not in ('?', '.', ''):
784
+ if em_dbname and str(em_dbname).strip().upper() == 'PDB':
785
+ db_info['PDB'] = str(em_access).strip().strip("'").strip('"')
786
+
787
+ # Case 2: _em_db_reference is looped (multiple references)
788
+ else:
789
+ em_access_col = block.find_loop('_em_db_reference.access_code')
790
+ em_dbname_col = block.find_loop('_em_db_reference.db_name')
791
+
792
+ if em_access_col and em_dbname_col:
793
+ for acc, name in zip(em_access_col, em_dbname_col):
794
+ acc_s = str(acc).strip().strip("'").strip('"')
795
+ name_s = str(name).strip().strip("'").strip('"')
796
+ if name_s.upper() == 'PDB' and acc_s not in ('', '?', '.'):
797
+ db_info['PDB'] = acc_s
798
+ break # take first PDB accession found
799
+
777
800
  # Final merge and return
778
801
  result = {
779
802
  'resolution': resolution,
@@ -1344,7 +1367,11 @@ class PreParation:
1344
1367
 
1345
1368
  auth_comp_id_map = {}
1346
1369
  for chain, resseq, auth_comp_id in zip(chains, resseqs, auth_comp_ids):
1347
- key = (chain, int(resseq))
1370
+ try:
1371
+ resseq_key = int(resseq)
1372
+ except ValueError:
1373
+ resseq_key = resseq
1374
+ key = (chain, resseq_key)
1348
1375
  auth_comp_id_map[key] = auth_comp_id
1349
1376
 
1350
1377
  return auth_comp_id_map
@@ -1368,7 +1395,11 @@ class PreParation:
1368
1395
 
1369
1396
  formal_charge_map = {}
1370
1397
  for chain, resseq, formal_charge in zip(chains, resseqs, formal_charges):
1371
- key = (chain, int(resseq))
1398
+ try:
1399
+ resseq_key = int(resseq)
1400
+ except ValueError:
1401
+ resseq_key = resseq
1402
+ key = (chain, resseq_key)
1372
1403
  formal_charge_map[key] = formal_charge
1373
1404
 
1374
1405
  return formal_charge_map
@@ -1409,8 +1440,13 @@ class PreParation:
1409
1440
 
1410
1441
  # Add auth_comp_id based on the map
1411
1442
  for res, chain, resseq in zip(residues, chains, resseqs):
1412
- auth_comp_id = auth_comp_id_map.get((chain, int(resseq)), res)
1413
- formal_charge = formal_charge_map.get((chain, int(resseq)), '?')
1443
+ try:
1444
+ resseq_key = int(resseq)
1445
+ except ValueError:
1446
+ resseq_key = resseq
1447
+ key = (chain, resseq_key)
1448
+ auth_comp_id = auth_comp_id_map.get(key, res)
1449
+ formal_charge = formal_charge_map.get(key, '?')
1414
1450
  mmcif_dict['_atom_site.auth_comp_id'].append(auth_comp_id)
1415
1451
  mmcif_dict['_atom_site.pdbx_formal_charge'].append(formal_charge)
1416
1452
 
@@ -18,5 +18,5 @@ under the License.
18
18
 
19
19
  """
20
20
 
21
- __version__ = '0.0.1.dev132'
21
+ __version__ = '0.0.1.dev135'
22
22
  __em_statistics_version__ = '202505.v01'
File without changes
File without changes
File without changes
File without changes
File without changes