pycistem 0.6.1__cp310-cp310-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. pycistem/__init__.py +9 -0
  2. pycistem/config.py +20 -0
  3. pycistem/core/__init__.py +1 -0
  4. pycistem/core/core.cpp +1070 -0
  5. pycistem/core/core.cpython-310-x86_64-linux-gnu.so +0 -0
  6. pycistem/core/database.cpp +408 -0
  7. pycistem/core/euler_search.cpp +72 -0
  8. pycistem/core/run_profiles.cpp +114 -0
  9. pycistem/database/__init__.py +301 -0
  10. pycistem/programs/__init__.py +10 -0
  11. pycistem/programs/_cistem_constants.py +23 -0
  12. pycistem/programs/apply_ctf.py +59 -0
  13. pycistem/programs/cistem_program.py +277 -0
  14. pycistem/programs/ctffind.py +254 -0
  15. pycistem/programs/estimate_beamtilt.py +60 -0
  16. pycistem/programs/match_template.py +325 -0
  17. pycistem/programs/reconstruct3d.py +79 -0
  18. pycistem/programs/refine_ctf.py +106 -0
  19. pycistem/programs/refine_template.py +157 -0
  20. pycistem/programs/refine_template_dev.py +35 -0
  21. pycistem/programs/refine_template_niko.py +124 -0
  22. pycistem/programs/resample.py +30 -0
  23. pycistem/programs/run_profile.py +17 -0
  24. pycistem/programs/unblur.py +247 -0
  25. pycistem/programs/unblur_patch.py +204 -0
  26. pycistem/utils/__init__.py +1 -0
  27. pycistem/utils/extract_particles.py +36 -0
  28. pycistem/utils/move_class_into_unbinned_particle_stack.py +34 -0
  29. pycistem/utils/order_by_class_occupancy.py +26 -0
  30. pycistem/utils/plot_class_occupancy.py +51 -0
  31. pycistem/utils/plot_classification_fsc_movie.py +90 -0
  32. pycistem/utils/plot_occupancy_by_condition.py +129 -0
  33. pycistem-0.6.1.dist-info/METADATA +258 -0
  34. pycistem-0.6.1.dist-info/RECORD +84 -0
  35. pycistem-0.6.1.dist-info/WHEEL +5 -0
  36. pycistem-0.6.1.dist-info/licenses/LICENSE +201 -0
  37. pycistem-0.6.1.dist-info/sboms/auditwheel.cdx.json +1 -0
  38. pycistem.libs/libXau-154567c4.so.6.0.0 +0 -0
  39. pycistem.libs/libXcomposite-9a78b2b5.so.1.0.0 +0 -0
  40. pycistem.libs/libXcursor-557eab0f.so.1.0.2 +0 -0
  41. pycistem.libs/libXdamage-45f20f14.so.1.1.0 +0 -0
  42. pycistem.libs/libXfixes-e3b7d94c.so.3.1.0 +0 -0
  43. pycistem.libs/libXi-9178a6bd.so.6.1.0 +0 -0
  44. pycistem.libs/libXinerama-6a3f4a3e.so.1.0.0 +0 -0
  45. pycistem.libs/libXrandr-9f75043e.so.2.2.0 +0 -0
  46. pycistem.libs/libatk-1-3e086f29.0.so.0.22810.1 +0 -0
  47. pycistem.libs/libblkid-a9167753.so.1.1.0 +0 -0
  48. pycistem.libs/libbz2-a1e77c99.so.1.0.6 +0 -0
  49. pycistem.libs/libcairo-dfbea965.so.2.11512.0 +0 -0
  50. pycistem.libs/libdatrie-584ecbbd.so.1.3.2 +0 -0
  51. pycistem.libs/libffi-3a37023a.so.6.0.2 +0 -0
  52. pycistem.libs/libfontconfig-dcb2ce6c.so.1.12.0 +0 -0
  53. pycistem.libs/libfreetype-2f3b32b6.so.6.16.1 +0 -0
  54. pycistem.libs/libfribidi-a2ddad26.so.0.4.0 +0 -0
  55. pycistem.libs/libgdk-x11-2-ae2f4865.0.so.0.2400.32 +0 -0
  56. pycistem.libs/libgdk_pixbuf-2-e875edac.0.so.0.3612.0 +0 -0
  57. pycistem.libs/libgio-2-54f4f0a9.0.so.0.5600.4 +0 -0
  58. pycistem.libs/libgmodule-2-a8eef785.0.so.0.5600.4 +0 -0
  59. pycistem.libs/libgmp-d944b113.so.10.3.2 +0 -0
  60. pycistem.libs/libgnutls-e5fc1c5f.so.30.28.2 +0 -0
  61. pycistem.libs/libgomp-e985bcbb.so.1.0.0 +0 -0
  62. pycistem.libs/libgraphite2-a2b39163.so.3.0.1 +0 -0
  63. pycistem.libs/libgtk-x11-2-76f42ab1.0.so.0.2400.32 +0 -0
  64. pycistem.libs/libharfbuzz-26b3d829.so.0.10705.0 +0 -0
  65. pycistem.libs/libhogweed-cd4c53be.so.4.5 +0 -0
  66. pycistem.libs/libidn2-2f4a5893.so.0.3.6 +0 -0
  67. pycistem.libs/libmount-ec61bd71.so.1.1.0 +0 -0
  68. pycistem.libs/libnettle-37944285.so.6.5 +0 -0
  69. pycistem.libs/libp11-kit-ac9dcd7e.so.0.3.0 +0 -0
  70. pycistem.libs/libpango-1-e80d1584.0.so.0.4200.3 +0 -0
  71. pycistem.libs/libpangocairo-1-e0e0fbcf.0.so.0.4200.3 +0 -0
  72. pycistem.libs/libpangoft2-1-1fa1613e.0.so.0.4200.3 +0 -0
  73. pycistem.libs/libpcre-0dd207b5.so.1.2.10 +0 -0
  74. pycistem.libs/libpcre2-8-516f4c9d.so.0.7.1 +0 -0
  75. pycistem.libs/libpixman-1-06469c37.so.0.38.4 +0 -0
  76. pycistem.libs/libpng16-748299c7.so.16.34.0 +0 -0
  77. pycistem.libs/libselinux-d0805dcb.so.1 +0 -0
  78. pycistem.libs/libtasn1-564de53e.so.6.5.5 +0 -0
  79. pycistem.libs/libthai-cd935638.so.0.3.0 +0 -0
  80. pycistem.libs/libunistring-05abdd40.so.2.1.0 +0 -0
  81. pycistem.libs/libuuid-95b83d40.so.1.3.0 +0 -0
  82. pycistem.libs/libxcb-5ddf6756.so.1.1.0 +0 -0
  83. pycistem.libs/libxcb-render-161c0eb5.so.0.0.0 +0 -0
  84. pycistem.libs/libxcb-shm-0be6dfbf.so.0.0.0 +0 -0
@@ -0,0 +1,301 @@
1
+ import contextlib
2
+ import sqlite3
3
+ from pathlib import Path
4
+ from selectors import EpollSelector
5
+ #from pycistem.core import Project
6
+ from datetime import datetime
7
+
8
+ import mrcfile
9
+ import pandas as pd
10
+ import starfile
11
+ import mdocfile
12
+
13
+ from typing import Union, List, Optional
14
+
15
+ def create_project(
16
+ project_name: str,
17
+ output_dir: Path):
18
+ from pycistem.core import Project
19
+ Path(output_dir, project_name).mkdir(parents=True, exist_ok=True)
20
+ project = Project()
21
+ success = project.CreateNewProject(
22
+ Path(output_dir, project_name, f"{project_name}.db").as_posix(),
23
+ Path(output_dir, project_name).as_posix(),
24
+ project_name,
25
+ )
26
+ return Path(output_dir, project_name, f"{project_name}.db")
27
+
28
+ def import_movies(project_path: Union[str, Path], movies: Union[str, Path, List[Union[str, Path]]], pixelsize: float, exposure_dose: float, pattern="*.tif", gain: Union[bool, str, Path] = True, import_metadata: bool = True, bin_to_pixelsize: float = 1.0):
29
+ from pycistem.core import Project
30
+ if isinstance(project_path, Path):
31
+ project_path = project_path.as_posix()
32
+ project = Project()
33
+ project.OpenProjectFromFile(project_path)
34
+ num_movies = project.database.ReturnSingleLongFromSelectCommand(
35
+ "SELECT COUNT(*) FROM MOVIE_ASSETS;"
36
+ )
37
+ if isinstance(movies, str) or isinstance(movies, Path):
38
+ movies = Path(movies)
39
+ if movies.is_dir():
40
+ movies = list(movies.glob(pattern))
41
+ else:
42
+ movies = [movies]
43
+ movies = [Path(m).as_posix() for m in movies]
44
+
45
+ movie_filenames = sorted(
46
+ movies
47
+ )
48
+ if isinstance(gain, bool) and gain:
49
+ gain_filenames = [list(Path(movie).parent.glob("*.dm4"))[0] for movie in movie_filenames]
50
+ metadata_entries = []
51
+ for i, movie in enumerate(movie_filenames):
52
+ metadata = mdocfile.read(movie + ".mdoc")
53
+ metadata_entries.append(metadata.iloc[0])
54
+ # Insert data in MOVIE_ASSETS_METADATA using sqlite3
55
+ project.database.ExecuteSQL(
56
+ f"INSERT INTO MOVIE_ASSETS_METADATA "
57
+ f"(MOVIE_ASSET_ID,"
58
+ f"METADATA_SOURCE,"
59
+ f"CONTENT_JSON,"
60
+ f"TILT_ANGLE,"
61
+ f"STAGE_POSITION_X,"
62
+ f"STAGE_POSITION_Y,"
63
+ f"STAGE_POSITION_Z,"
64
+ f"IMAGE_SHIFT_X,"
65
+ f"IMAGE_SHIFT_Y,"
66
+ f"EXPOSURE_DOSE,"
67
+ f"ACQUISITION_TIME)"
68
+ f"VALUES ({i+1},"
69
+ f"'serialem_frames_mdoc',"
70
+ f"'{metadata.iloc[0].to_json(default_handler=str)}',"
71
+ f" {metadata.loc[0,'TiltAngle']},"
72
+ f" {metadata.loc[0,'StagePosition'][0]},"
73
+ f" {metadata.loc[0,'StagePosition'][1]},"
74
+ f" {metadata.loc[0,'StageZ']},"
75
+ f" {metadata.loc[0,'ImageShift'][0]},"
76
+ f" {metadata.loc[0,'ImageShift'][1]},"
77
+ f" {metadata.loc[0,'ExposureDose']},"
78
+ f" {datetime_to_msdos(datetime.strptime(metadata.loc[0,'DateTime'],'%d-%b-%Y %H:%M:%S'))});"
79
+ )
80
+ project.database.BeginMovieAssetInsert()
81
+ for i, movie in enumerate(movie_filenames):
82
+ project.database.AddNextMovieAsset(
83
+ i + 1,
84
+ Path(movie).name,
85
+ movie,
86
+ 0,
87
+ 11520,
88
+ 8184,
89
+ 34,
90
+ 300,
91
+ pixelsize,
92
+ exposure_dose,
93
+ 2.7,
94
+ gain_filenames[i].as_posix(),
95
+ "",
96
+ bin_to_pixelsize / pixelsize,
97
+ 0,
98
+ 0,
99
+ 1.0,
100
+ 1.0,
101
+ 0,
102
+ 25,
103
+ 1,
104
+ )
105
+ project.database.EndMovieAssetInsert()
106
+
107
+ project.database.Close(True)
108
+ return(len(movie_filenames))
109
+
110
+
111
+
112
+
113
+ def get_image_info_from_db(project,image_asset=None, get_ctf=True):
114
+ with contextlib.closing(sqlite3.connect(project)) as con:
115
+ if image_asset is None:
116
+ df1 = pd.read_sql_query("SELECT IMAGE_ASSET_ID,MOVIE_ASSET_ID,IMAGE_ASSETS.FILENAME, MOVIE_ASSETS.FILENAME as movie_filename, MOVIE_ASSETS.GAIN_FILENAME, CTF_ESTIMATION_ID , ALIGNMENT_ID, IMAGE_ASSETS.PIXEL_SIZE as image_pixel_size, IMAGE_ASSETS.VOLTAGE, IMAGE_ASSETS.SPHERICAL_ABERRATION, MOVIE_ASSETS.PIXEL_SIZE as movie_pixel_size, IMAGE_ASSETS.X_SIZE, IMAGE_ASSETS.Y_SIZE FROM IMAGE_ASSETS LEFT OUTER JOIN MOVIE_ASSETS ON MOVIE_ASSETS.MOVIE_ASSET_ID == IMAGE_ASSETS.PARENT_MOVIE_ID", con)
117
+ else:
118
+ df1 = pd.read_sql_query(f"SELECT IMAGE_ASSET_ID,MOVIE_ASSET_ID,IMAGE_ASSETS.FILENAME, MOVIE_ASSETS.FILENAME as movie_filename, CTF_ESTIMATION_ID , ALIGNMENT_ID, IMAGE_ASSETS.PIXEL_SIZE as image_pixel_size, IMAGE_ASSETS.VOLTAGE, IMAGE_ASSETS.SPHERICAL_ABERRATION, MOVIE_ASSETS.PIXEL_SIZE as movie_pixel_size, IMAGE_ASSETS.X_SIZE, IMAGE_ASSETS.Y_SIZE FROM IMAGE_ASSETS LEFT OUTER JOIN MOVIE_ASSETS ON MOVIE_ASSETS.MOVIE_ASSET_ID == IMAGE_ASSETS.PARENT_MOVIE_ID WHERE IMAGE_ASSETS.IMAGE_ASSET_ID = {image_asset} ", con)
119
+ if not get_ctf:
120
+ return(df1)
121
+ df2 = pd.read_sql_query("SELECT CTF_ESTIMATION_ID,DEFOCUS1,DEFOCUS2,DEFOCUS_ANGLE,OUTPUT_DIAGNOSTIC_FILE,SCORE, DETECTED_RING_RESOLUTION, AMPLITUDE_CONTRAST FROM ESTIMATED_CTF_PARAMETERS",con)
122
+ selected_micrographs = pd.merge(df1,df2,on="CTF_ESTIMATION_ID")
123
+ if image_asset is None:
124
+ return(selected_micrographs)
125
+ else:
126
+ if selected_micrographs.shape[0] > 0:
127
+ return(selected_micrographs.iloc[0])
128
+ else:
129
+ return(None)
130
+
131
+ def get_movie_info_from_db(project):
132
+ with contextlib.closing(sqlite3.connect(project)) as con:
133
+ df1 = pd.read_sql_query("SELECT * FROM MOVIE_ASSETS", con)
134
+
135
+ return(df1)
136
+
137
+
138
+ def get_tm_info_from_db(project,image_asset,tm_id=None):
139
+ with contextlib.closing(sqlite3.connect(project)) as con:
140
+ if tm_id is None:
141
+ df1 = pd.read_sql_query(f"SELECT * FROM TEMPLATE_MATCH_LIST WHERE IMAGE_ASSET_ID={image_asset}",con)
142
+ else:
143
+ df1 = pd.read_sql_query(f"SELECT * FROM TEMPLATE_MATCH_LIST WHERE IMAGE_ASSET_ID={image_asset} AND TEMPLATE_MATCH_JOB_ID={tm_id}",con)
144
+
145
+ if tm_id is None:
146
+ return(df1)
147
+ else:
148
+ if df1.shape[0] > 0:
149
+ return(df1.iloc[0])
150
+ else:
151
+ return(None)
152
+
153
+
154
+
155
+ def ensure_template_is_a_volume_asset(project: str, template_filename: str, pixel_size: float) -> int:
156
+ with contextlib.closing(sqlite3.connect(project)) as con:
157
+ df1 = pd.read_sql_query(f"SELECT * FROM VOLUME_ASSETS WHERE FILENAME='{template_filename}'",con)
158
+ if df1.shape[0] > 0:
159
+ return(df1.iloc[0]["VOLUME_ASSET_ID"])
160
+ else:
161
+ # Open using mrcfile and get dimensions
162
+ with mrcfile.open(template_filename) as mrc:
163
+ x_size = mrc.header.nx
164
+ y_size = mrc.header.ny
165
+ z_size = mrc.header.nz
166
+ #Get highest VOLUME_ASSET_ID
167
+ df2 = pd.read_sql_query("SELECT MAX(VOLUME_ASSET_ID) as max_id FROM VOLUME_ASSETS",con)
168
+ max_id = df2.iloc[0]["max_id"]
169
+ if max_id is None:
170
+ vol_id = 1
171
+ else:
172
+ vol_id = max_id + 1
173
+ con.execute(f"INSERT INTO VOLUME_ASSETS (VOLUME_ASSET_ID,NAME,FILENAME,PIXEL_SIZE,X_SIZE,Y_SIZE,Z_SIZE) VALUES ('{vol_id}','{Path(template_filename).stem}','{template_filename}','{pixel_size}','{x_size}','{y_size}','{z_size}')")
174
+ con.commit()
175
+ return(vol_id)
176
+
177
+
178
+ def insert_tmpackage_into_db(project, name, path):
179
+ with contextlib.closing(sqlite3.connect(project)) as con:
180
+ con.execute(f"INSERT INTO TEMPLATE_MATCHES_PACKAGE_ASSETS (NAME,STARFILE_FILENAME) VALUES ('{name}','{path}')")
181
+ con.commit()
182
+
183
+ def write_match_template_to_starfile(project, match_template_job_id, filename,overwrite=True, switch_phi_psi=False):
184
+
185
+ result_peaks = pd.DataFrame({
186
+ "cisTEMOriginalImageFilename": pd.Series(dtype="object"),
187
+ "cisTEMReference3DFilename": pd.Series(dtype="object"),
188
+ "cisTEMMicroscopeVoltagekV": pd.Series(dtype="float"),
189
+ "cisTEMMicroscopeCsMM": pd.Series(dtype="float"),
190
+ "cisTEMAmplitudeContrast": pd.Series(dtype="float"),
191
+ "cisTEMPhaseShift": pd.Series(dtype="float"),
192
+ "cisTEMDefocus1": pd.Series(dtype="float"),
193
+ "cisTEMDefocus2": pd.Series(dtype="float"),
194
+ "cisTEMDefocusAngle": pd.Series(dtype="float"),
195
+ "cisTEMPositionInStack": pd.Series(dtype="int"),
196
+ "cisTEMOriginalXPosition": pd.Series(dtype="float"),
197
+ "cisTEMOriginalYPosition": pd.Series(dtype="float"),
198
+ "cisTEMAnglePsi": pd.Series(dtype="float"),
199
+ "cisTEMAngleTheta": pd.Series(dtype="float"),
200
+ "cisTEMAnglePhi": pd.Series(dtype="float"),
201
+ "cisTEMPixelSize": pd.Series(dtype="float"),
202
+ "cisTEMScore": pd.Series(dtype="float"),
203
+ })
204
+
205
+ with contextlib.closing(sqlite3.connect(project)) as con:
206
+ df1 = pd.read_sql_query(f"SELECT * FROM TEMPLATE_MATCH_LIST WHERE TEMPLATE_MATCH_JOB_ID={match_template_job_id}",con)
207
+ for _i, tmres in df1.iterrows():
208
+ image = pd.read_sql_query(f"SELECT FILENAME FROM IMAGE_ASSETS WHERE IMAGE_ASSET_ID = {tmres['IMAGE_ASSET_ID']}",con)
209
+ volume = pd.read_sql_query(f"SELECT FILENAME FROM VOLUME_ASSETS WHERE VOLUME_ASSET_ID = {tmres['REFERENCE_VOLUME_ASSET_ID']}",con)
210
+ df2 = pd.read_sql_query(f"SELECT * FROM TEMPLATE_MATCH_PEAK_LIST_{tmres['TEMPLATE_MATCH_ID']}",con)
211
+ for _j, peakres in df2.iterrows():
212
+ new_peak_series = pd.Series([
213
+ "'"+image["FILENAME"].iloc[0]+"'",
214
+ "'"+volume["FILENAME"].iloc[0]+"'",
215
+ tmres["USED_VOLTAGE"],
216
+ tmres["USED_SPHERICAL_ABERRATION"],
217
+ tmres["USED_AMPLITUDE_CONTRAST"],
218
+ tmres["USED_PHASE_SHIFT"],
219
+ tmres["USED_DEFOCUS1"] + peakres["DEFOCUS"],
220
+ tmres["USED_DEFOCUS2"] + peakres["DEFOCUS"],
221
+ tmres["USED_DEFOCUS_ANGLE"],
222
+ peakres["PEAK_NUMBER"],
223
+ peakres["X_POSITION"],
224
+ peakres["Y_POSITION"],
225
+ peakres["PSI"],
226
+ peakres["THETA"],
227
+ peakres["PHI"],
228
+ tmres["USED_PIXEL_SIZE"],
229
+ peakres["PEAK_HEIGHT"]
230
+ ], index = result_peaks.columns)
231
+ result_peaks.loc[len(result_peaks.index)] = new_peak_series
232
+
233
+ # Due to a bug in cisTEM in earlier matches phi and psi are switched in the
234
+ # database
235
+ if(switch_phi_psi):
236
+ temp = result_peaks["cisTEMAnglePhi"]
237
+ result_peaks["cisTEMAnglePhi"] = result_peaks["cisTEMAnglePsi"]
238
+ result_peaks["cisTEMAnglePsi"] = temp
239
+ starfile.write(result_peaks, filename=filename, overwrite=overwrite)
240
+
241
+
242
+ def datetime_to_msdos(now):
243
+ msdos_date = ((now.year - 1980) << 9) | (now.month << 5) | now.day
244
+ msdos_time = (now.hour << 11) | (now.minute << 5) | (now.second // 2)
245
+
246
+ msdos_datetime = (msdos_date << 16) | msdos_time
247
+
248
+ return msdos_datetime
249
+
250
+ def create_peak_lists(con, id: int):
251
+ cur = con.cursor()
252
+ cur.execute(f"CREATE TABLE TEMPLATE_MATCH_PEAK_LIST_{id} (PEAK_NUMBER INTEGER PRIMARY KEY AUTOINCREMENT, X_POSITION REAL, Y_POSITION REAL, PSI REAL, THETA REAL, PHI REAL, DEFOCUS REAL, PIXEL_SIZE REAL, PEAK_HEIGHT REAL)")
253
+ con.commit()
254
+ cur.execute(f"CREATE TABLE TEMPLATE_MATCH_PEAK_CHANGE_LIST_{id} (PEAK_NUMBER INTEGER PRIMARY KEY AUTOINCREMENT, X_POSITION REAL, Y_POSITION REAL, PSI REAL, THETA REAL, PHI REAL, DEFOCUS REAL, PIXEL_SIZE REAL, PEAK_HEIGHT REAL, ORIGINAL_PEAK_NUMBER REAL, NEW_PEAK_NUMBER REAL)")
255
+ con.commit()
256
+
257
+ def get_max_match_template_job_id(database):
258
+ with contextlib.closing(sqlite3.connect(database)) as con:
259
+ cur = con.cursor()
260
+ cur.execute("SELECT MAX(TEMPLATE_MATCH_JOB_ID) FROM TEMPLATE_MATCH_LIST")
261
+ max_match_template_job_id = cur.fetchone()[0]
262
+ return(max_match_template_job_id)
263
+
264
+ def get_already_processed_images(database, match_template_job_id):
265
+ with contextlib.closing(sqlite3.connect(database)) as con:
266
+ already_processed_images = pd.read_sql_query(f"SELECT IMAGE_ASSET_ID FROM TEMPLATE_MATCH_LIST WHERE TEMPLATE_MATCH_JOB_ID = {match_template_job_id}",con)
267
+ return(already_processed_images)
268
+
269
+ def get_num_already_processed_images(database, match_template_job_id):
270
+ with contextlib.closing(sqlite3.connect(database)) as con:
271
+ cur = con.cursor()
272
+ cur.execute(f"SELECT COUNT(1) FROM TEMPLATE_MATCH_LIST WHERE TEMPLATE_MATCH_JOB_ID = {match_template_job_id}")
273
+ num_already_processed_images = cur.fetchone()[0]
274
+ return(num_already_processed_images)
275
+
276
+ def get_num_matches(database, match_template_job_id):
277
+ with contextlib.closing(sqlite3.connect(database)) as con:
278
+ cur = con.cursor()
279
+ cur.execute(f"SELECT TEMPLATE_MATCH_ID FROM TEMPLATE_MATCH_LIST WHERE TEMPLATE_MATCH_JOB_ID = {match_template_job_id}")
280
+ match_template_ids = cur.fetchall()
281
+ total = 0
282
+ for mti in match_template_ids:
283
+ cur.execute(f"SELECT MAX(RowId) FROM TEMPLATE_MATCH_PEAK_LIST_{mti[0]}")
284
+ num_matches = cur.fetchone()[0]
285
+ if num_matches is not None:
286
+ total += num_matches
287
+ return(total)
288
+
289
+ def get_num_images(database):
290
+ with contextlib.closing(sqlite3.connect(database)) as con:
291
+ cur = con.cursor()
292
+ cur.execute("SELECT COUNT(1) FROM IMAGE_ASSETS WHERE CTF_ESTIMATION_ID IS NOT -1")
293
+ num_images = cur.fetchone()[0]
294
+ return(num_images)
295
+
296
+ def get_num_movies(database):
297
+ with contextlib.closing(sqlite3.connect(database)) as con:
298
+ cur = con.cursor()
299
+ cur.execute("SELECT COUNT(1) FROM MOVIE_ASSETS")
300
+ num_movies = cur.fetchone()[0]
301
+ return(num_movies)
@@ -0,0 +1,10 @@
1
+ from pycistem.programs.apply_ctf import *
2
+ from pycistem.programs.cistem_program import *
3
+ from pycistem.programs.refine_template import *
4
+ from pycistem.programs.refine_template_niko import *
5
+ from pycistem.programs.refine_template_dev import *
6
+ from pycistem.programs.refine_ctf import *
7
+ from pycistem.programs.estimate_beamtilt import *
8
+ from pycistem.programs.run_profile import *
9
+ from pycistem.programs.reconstruct3d import *
10
+ from pycistem.programs.resample import *
@@ -0,0 +1,23 @@
1
+
2
+ socket_please_identify = b"JcFG>&P.RuC9,>za"
3
+ socket_sending_identification = b"gC2CeZWNb2GPv5qh"
4
+ socket_you_are_connected = b"J82zjSwYY^-!bF>4"
5
+ socket_send_job_details = b"gr<V>ThBp6w9fzLg"
6
+ socket_sending_job_package = b"'8ujA!Lup%PR*!hG"
7
+ socket_you_are_the_master = b"eVmYc.3!g}}cZZs"
8
+ socket_you_are_a_worker = b"U6u*:z6}W+7nV2g'"
9
+ socket_send_next_job = b"z7PnJh=x;[b#f/6L"
10
+ socket_time_to_die = b")[czL7$#Sg/d4-*K"
11
+ socket_ready_to_send_single_job = b"-TDv(X*kY.:d`D5:"
12
+ socket_i_have_an_error = b"8TU.cDc3jr,rb[SN"
13
+ socket_i_have_info = b"+5nxvY@zt.!_R#Vn"
14
+ socket_job_finished = b"jNA[3!VdLdkb$LwM"
15
+ socket_number_of_connections = b"Uu6tsQ,z}M''T`7f"
16
+ socket_all_jobs_finished = b"aL)yaH[$3s;9Ymk6"
17
+ socket_job_result = b"3F6E_.``L6YC^q[U"
18
+ socket_job_result_queue = b"^}`@pF9m;{m9k=$F"
19
+ socket_result_with_image_to_write = b"=z4-Y8Ge?vEjh`H^"
20
+ socket_program_defined_result = b"e}w<S9hm<3L6Dr+V"
21
+ socket_send_thread_timing = b"Kq04etrq1fO2QV4d"
22
+ socket_template_match_result_ready = b"EP927e$*cQ^egWq'"
23
+ socket_remote_control = b"9^$[9^$[9^$[9^$["
@@ -0,0 +1,59 @@
1
+ import asyncio
2
+ from dataclasses import dataclass
3
+ from typing import Union
4
+
5
+ from pycistem.database import get_image_info_from_db
6
+ from pycistem.programs import cistem_program
7
+ from pycistem.programs._cistem_constants import socket_send_next_job
8
+
9
+
10
+ @dataclass
11
+ class ApplyCtfParameters:
12
+ input_filename: str
13
+ output_filename: str
14
+ pixel_size: float = 1.0
15
+ acceleration_voltage: float = 300.0
16
+ spherical_aberration: float = 2.7
17
+ amplitude_contrast: float = 0.07
18
+ defocus_1: float = 20000
19
+ defocus_2: float = 20000
20
+ astigmatism_angle: float = 0.0
21
+ additional_phase_shift: float = 0.0
22
+ input_ctf_values_from_text_file: bool = False
23
+ text_filename: str = ""
24
+ phase_flip_only: bool = False
25
+ apply_wiener_filter: bool = False
26
+ wiener_filter_falloff_frequency: float = 100.0
27
+ wiener_filter_falloff_fudge_factor: float = 1.0
28
+ wiener_filter_scale_fudge_factor: float = 1.0
29
+ wiener_filter_high_pass_radius: float = 200.0
30
+ maintain_image_contrast: bool = True
31
+
32
+ def parameters_from_database(database, image_asset_id, output_filename, **kwargs):
33
+ image_info = get_image_info_from_db(database, image_asset=image_asset_id)
34
+ par = ApplyCtfParameters(input_filename=image_info["FILENAME"],
35
+ output_filename=output_filename,
36
+ pixel_size=image_info["image_pixel_size"],
37
+ acceleration_voltage=image_info["VOLTAGE"],
38
+ spherical_aberration=image_info["SPHERICAL_ABERRATION"],
39
+ amplitude_contrast=image_info["AMPLITUDE_CONTRAST"],
40
+ defocus_1=image_info["DEFOCUS1"],
41
+ defocus_2=image_info["DEFOCUS2"],
42
+ astigmatism_angle=image_info["DEFOCUS_ANGLE"],
43
+ **kwargs)
44
+ return(par)
45
+
46
+ def run(parameters: Union[ApplyCtfParameters,list[ApplyCtfParameters]],**kwargs):
47
+ if not isinstance(parameters, list):
48
+ parameters = [parameters]
49
+
50
+
51
+ asyncio.run(cistem_program.run("applyctf",parameters,**kwargs))
52
+
53
+ async def run_async(parameters: Union[ApplyCtfParameters,list[ApplyCtfParameters]],**kwargs):
54
+ if not isinstance(parameters, list):
55
+ parameters = [parameters]
56
+
57
+
58
+ await cistem_program.run("applyctf",parameters,**kwargs)
59
+
@@ -0,0 +1,277 @@
1
+ import asyncio
2
+ import logging
3
+ import secrets
4
+ import socket
5
+ import string
6
+ import struct
7
+ import subprocess
8
+ import time
9
+ from dataclasses import astuple, fields
10
+ from pathlib import Path
11
+ from time import sleep
12
+
13
+ import psutil
14
+
15
+
16
+ def get_ip_addresses(family):
17
+ for interface, snics in psutil.net_if_addrs().items():
18
+ for snic in snics:
19
+ if snic.family == family:
20
+ yield (interface, snic.address)
21
+
22
+ HOST = ",".join([a[1] for a in get_ip_addresses(socket.AF_INET)])
23
+
24
+
25
+ from pycistem.config import config
26
+
27
+ log = logging.getLogger(__name__)
28
+ log.addHandler(logging.NullHandler())
29
+
30
+
31
+ from pycistem.programs._cistem_constants import *
32
+
33
+
34
+ def _encode_parameters(parameters):
35
+ # creates a cisTEM compatible buffer from the parameters class
36
+ buffer = b""
37
+ buffer += int(1).to_bytes(4,"little")
38
+ buffer += len(fields(parameters)).to_bytes(4,"little")
39
+ parameterstuple = astuple(parameters)
40
+ for i,argument in enumerate(fields(parameters)):
41
+ if argument.type == float:
42
+ buffer += int(3).to_bytes(1,"little")
43
+ buffer += struct.pack("<f", parameterstuple[i])
44
+ if argument.type == bool:
45
+ buffer += int(4).to_bytes(1,"little")
46
+ buffer += parameterstuple[i].to_bytes(1,"little")
47
+ if argument.type == str:
48
+ buffer += int(1).to_bytes(1,"little")
49
+ bb = parameterstuple[i].encode("utf-8")
50
+ buffer += len(bb).to_bytes(4,"little")
51
+ buffer += bb
52
+ if argument.type == int:
53
+ buffer += int(2).to_bytes(1,"little")
54
+ buffer += struct.pack("<i", parameterstuple[i])
55
+ return buffer
56
+
57
+
58
+ async def handle_manager(reader, writer, identity, port):
59
+ # Handles initial connection from the executable and directs them to the leader
60
+ addr = writer.get_extra_info("peername")
61
+ #logger.info(f"{addr} connected to manager")
62
+ writer.write(socket_please_identify)
63
+ await writer.drain()
64
+ data = await reader.readexactly(16)
65
+ if data != socket_sending_identification:
66
+ log.error(f"{addr!r} {data} is not {socket_sending_identification}")
67
+ writer.close()
68
+ return
69
+ data = await reader.readexactly(16)
70
+ message= data.decode()
71
+ if message != identity:
72
+ log.error(f"{addr!r} {message} is not {identity}: wrong process connected")
73
+ writer.close()
74
+ return
75
+ writer.write(socket_you_are_a_worker)
76
+ host = HOST.encode("utf-8")
77
+ writer.write(len(host).to_bytes(4,"little"))
78
+ writer.write(host)
79
+ port = str(port).encode("utf-8")
80
+ writer.write(len(port).to_bytes(4,"little"))
81
+ writer.write(port)
82
+ await writer.drain()
83
+ time.sleep(1)
84
+ #logger.info(f"{addr} connected")
85
+ writer.close()
86
+
87
+ async def handle_leader(reader, writer, buffers, signal_handlers,results):
88
+ log_str = ""
89
+ # Handles connections from the executable asking for work
90
+
91
+ addr = writer.get_extra_info("peername")
92
+
93
+ writer.write(socket_you_are_connected)
94
+ await writer.drain()
95
+ data = await reader.readexactly(16)
96
+ if data != socket_send_next_job:
97
+ log.error(f"{addr!r} did not request next job, instead sent {data}")
98
+ writer.close()
99
+ return
100
+ data = await reader.readexactly(8)
101
+ #logger.info(f"{addr} sent {data} as dummy result")
102
+ while len(buffers) > 0:
103
+ parameter_index, buffer = buffers.pop(0)
104
+ log.debug(f"Working on parameter set {parameter_index}")
105
+ writer.write(socket_ready_to_send_single_job)
106
+ writer.write(len(buffer).to_bytes(8,"little"))
107
+ writer.write(buffer)
108
+ await writer.drain()
109
+
110
+ # check length of signal_handlers
111
+ if len(signal_handlers) > 0:
112
+ cont = True
113
+ while cont:
114
+ data = await reader.readexactly(16)
115
+ # log.debug(f"In no sig {addr} sent {data}")
116
+ result = None
117
+ if data != socket_job_result_queue and data != socket_i_have_info:
118
+ cont = False
119
+ else:
120
+ await signal_handlers[data](reader,writer,log)
121
+ log_str += "J"
122
+ continue
123
+ if data in signal_handlers:
124
+ #logger.info(f"{addr} sent {data} and I know what to do with it")
125
+ result = await signal_handlers[data](reader,writer,log)
126
+ results.append((parameter_index,result))
127
+ log_str += "R"
128
+ else:
129
+ log.error(f"{addr} sent {data} and I don't know what to do with it, really {log_str}")
130
+
131
+ #logger.error(f"{buffer}")
132
+ break
133
+ cont = True
134
+ while cont:
135
+ if socket_send_next_job not in signal_handlers:
136
+ data = await reader.readexactly(16)
137
+ # log.debug(f"In cont {addr} sent {data}")
138
+ if data != socket_send_next_job:
139
+ if data == socket_job_result_queue:
140
+ res = await signal_handlers[data](reader,writer,log)
141
+ log_str += "j"
142
+ continue
143
+ else:
144
+ log.error(f"{addr!r} did not request next job, instead sent {data}")
145
+ data = await reader.readexactly(8)
146
+ log_str += "N"
147
+ cont = False
148
+ #logger.info(f"{addr} sent {data} after requesting next results")
149
+ else:
150
+ cont = False
151
+ log.debug(f"{addr} finished, sending time to die")
152
+ writer.write(socket_time_to_die)
153
+ await writer.drain()
154
+ data = await reader.read(16)
155
+ #logger.info(f"{addr} sent {data}")
156
+ data = await reader.read(16)
157
+ #logger.info(f"{addr} sent {data}")
158
+ data = await reader.read(16)
159
+ #logger.info(f"{addr} sent {data}")
160
+
161
+ writer.close()
162
+
163
+
164
+ async def run(executable: str,parameters,signal_handlers={},num_procs=1,num_threads=1, cmd_prefix="", cmd_suffix="", save_output=False, save_output_path="",sleep_time=0.1):
165
+ results = []
166
+ buffers = []
167
+ # Set HOST to curretn ip address
168
+ #'127.0.0.1' # Standard loopback interface address (localhost)
169
+
170
+ # Create secret to identify workers
171
+ alphabet = string.ascii_letters + string.digits
172
+ identity = "".join(secrets.choice(alphabet) for i in range(16))
173
+ buffers = [(i,_encode_parameters(parameter)) for i, parameter in enumerate(parameters)]
174
+ log.debug(f"Secret is {identity}")
175
+
176
+ # Start the leader
177
+ start_port = 3000
178
+ leader_started = False
179
+ while not leader_started:
180
+ try:
181
+ server_leader = await asyncio.start_server(
182
+ lambda r,w : handle_leader(r,w, buffers,signal_handlers,results), "", start_port, family=socket.AF_INET)
183
+ leader_started = True
184
+ start_port += 1
185
+ except OSError:
186
+ log.debug(f"Port {start_port} already in use, trying next port")
187
+ start_port += 1
188
+ if start_port > 4000:
189
+ msg = "No ports available"
190
+ raise OSError(msg)
191
+ port_leader = server_leader.sockets[0].getsockname()[1]
192
+ addrs = ", ".join(str(sock.getsockname()) for sock in server_leader.sockets)
193
+ log.debug(f"Serving leader on {addrs}")
194
+
195
+ # Start the manager
196
+ manager_started = False
197
+ while not manager_started:
198
+ try:
199
+ server_manager = await asyncio.start_server(
200
+ lambda r,w : handle_manager(r,w,identity,port_leader), "",start_port, family=socket.AF_INET)
201
+ manager_started = True
202
+ except OSError:
203
+ log.debug(f"Port {start_port} already in use, trying next port")
204
+ start_port += 1
205
+ if start_port > 4000:
206
+ msg = "No ports available"
207
+ raise OSError(msg)
208
+ port_manager = server_manager.sockets[0].getsockname()[1]
209
+ addrs = ", ".join(str(sock.getsockname()) for sock in server_manager.sockets)
210
+ log.debug(f"Serving manager on {addrs}")
211
+
212
+
213
+ # Starting workers
214
+ cmd = str(Path(config["CISTEM_PATH"]) / executable)
215
+ cmd += f" {HOST} {port_manager} {identity} {num_threads}"
216
+
217
+ # Test if cmd_prefix is iterable
218
+ if type(num_procs) == int and type(cmd_prefix) == str:
219
+ cmd_prefix = [cmd_prefix for i in range(num_procs)]
220
+
221
+ if type(num_procs) == int and type(cmd_suffix) == str:
222
+ cmd_suffix = [cmd_suffix for i in range(num_procs)]
223
+
224
+
225
+
226
+ launch_futures = []
227
+ if type(num_procs) == int:
228
+ tasks = [cmd_prefix[i] + cmd +cmd_suffix[i] for i in range(num_procs)]
229
+ elif type(num_procs) == RunProfile:
230
+ tasks = []
231
+ num_procs.SubstituteExecutableName(executable)
232
+ for rc in num_procs.run_commands:
233
+ for _i in range(rc.number_of_copies):
234
+ tasks.append(rc.command_to_run)
235
+
236
+ for task in tasks:
237
+ launch_futures.append(await asyncio.create_subprocess_shell(
238
+ task,
239
+ stdout=asyncio.subprocess.PIPE,
240
+ stderr=asyncio.subprocess.STDOUT))
241
+ sleep(sleep_time)
242
+
243
+ logging.debug(f"Launched {num_procs} processes")
244
+
245
+
246
+ #if save_output:
247
+ # while True:
248
+ # for i, future in enumerate(launch_futures):
249
+ # line = await future.stdout.readline()
250
+ # if line:
251
+ # print(f"Pro {i}:")
252
+ # print(line)
253
+
254
+
255
+ result_futures = [
256
+ future.communicate()
257
+ for future in launch_futures
258
+ ]
259
+
260
+
261
+ try:
262
+ proc_results = await asyncio.gather(*result_futures, return_exceptions=False)
263
+ except Exception as ex:
264
+ print("Caught error executing task", ex)
265
+ raise
266
+ finally:
267
+ if save_output:
268
+ for i, result in enumerate(proc_results):
269
+ with open(save_output_path + f"_{i}.txt", "w") as f:
270
+ f.write(result[0].decode("utf-8"))
271
+ if result[1] and len(result[1]) > 0:
272
+ with open(save_output_path + f"_{i}_error.txt", "w") as f:
273
+ f.write(result[1].decode("utf-8"))
274
+ server_leader.close()
275
+ server_manager.close()
276
+ return(results)
277
+