restage 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
restage/__init__.py CHANGED
@@ -12,7 +12,6 @@ from .tables import (SimulationEntry,
12
12
  InstrEntry
13
13
  )
14
14
  from .database import Database
15
- from .cache import DATABASE
16
15
 
17
16
 
18
17
  try:
@@ -28,5 +27,4 @@ __all__ = [
28
27
  'NexusStructureEntry',
29
28
  'InstrEntry',
30
29
  'Database',
31
- 'DATABASE'
32
30
  ]
restage/cache.py CHANGED
@@ -1,24 +1,92 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
3
5
  from mccode_antlr.instr import Instr
4
6
  from .tables import InstrEntry, SimulationTableEntry, SimulationEntry
5
-
6
-
7
- def setup_database(named: str):
8
- from platformdirs import user_cache_path
9
- from .database import Database
10
- db_file = user_cache_path('restage', 'ess', ensure_exists=True).joinpath(f'{named}.db')
11
- db = Database(db_file)
12
- return db
13
-
14
-
15
- # Create the global database object in the module namespace.
16
- DATABASE = setup_database('database')
7
+ from .database import Database
8
+
9
+ @dataclass
10
+ class FileSystem:
11
+ root: Path
12
+ db_fixed: tuple[Database,...]
13
+ db_write: Database
14
+
15
+ @classmethod
16
+ def from_config(cls, named: str):
17
+ from .config import config
18
+ db_fixed = []
19
+ db_write = None
20
+ root = None
21
+ if not named.endswith('.db'):
22
+ named += '.db'
23
+ if config['cache'].exists():
24
+ path = config['cache'].as_path()
25
+ if not path.exists():
26
+ path.mkdir(parents=True)
27
+ db_write = Database(path / named)
28
+ root = path
29
+ if config['fixed'].exists():
30
+ more = [Path(c) for c in config['fixed'].as_str_seq() if Path(c).exists()]
31
+ for m in more:
32
+ db_fixed.append(Database(m / named, readonly=True))
33
+ if db_write is not None and db_write.readonly:
34
+ raise ValueError("Specified writable database location is readonly")
35
+ if db_write is None:
36
+ from platformdirs import user_cache_path
37
+ db_write = Database(user_cache_path('restage', 'ess', ensure_exists=True) / named)
38
+ if root is None:
39
+ from platformdirs import user_data_path
40
+ root = user_data_path('restage', 'ess')
41
+ return cls(root, tuple(db_fixed), db_write)
42
+
43
+ def query(self, method, *args, **kwargs):
44
+ q = [x for r in self.db_fixed for x in getattr(r, method)(*args, **kwargs)]
45
+ q.extend(getattr(self.db_write, method)(*args, **kwargs))
46
+ return q
47
+
48
+ def insert(self, method, *args, **kwargs):
49
+ getattr(self.db_write, method)(*args, **kwargs)
50
+
51
+ def query_instr_file(self, *args, **kwargs):
52
+ query = [x for r in self.db_fixed for x in r.query_instr_file(*args, **kwargs)]
53
+ query.extend(self.db_write.query_instr_file(*args, **kwargs))
54
+ return query
55
+
56
+ def insert_instr_file(self, *args, **kwargs):
57
+ self.db_write.insert_instr_file(*args, **kwargs)
58
+
59
+ def query_simulation_table(self, *args, **kwargs):
60
+ return self.query('query_simulation_table', *args, **kwargs)
61
+
62
+ def retrieve_simulation_table(self, *args, **kwargs):
63
+ return self.query('retrieve_simulation_table', *args, **kwargs)
64
+
65
+ def insert_simulation_table(self, *args, **kwargs):
66
+ self.insert('insert_simulation_table', *args, **kwargs)
67
+
68
+ def insert_simulation(self, *args, **kwargs):
69
+ # By definition, 'self.db_write' is writable and Database.insert_simulation
70
+ # _always_ ensures the presence of the specified table in its database.
71
+ # Therefore this method 'just works'.
72
+ self.insert('insert_simulation', *args, **kwargs)
73
+
74
+ def retrieve_simulation(self, table_id: str, row: SimulationEntry):
75
+ matches = []
76
+ for db in self.db_fixed:
77
+ if len(db.retrieve_simulation_table(table_id, False)) == 1:
78
+ matches.extend(db.retrieve_simulation(table_id, row))
79
+ if len(self.db_write.retrieve_simulation_table(table_id, False)) == 1:
80
+ matches.extend(self.db_write.retrieve_simulation(table_id, row))
81
+ return matches
82
+
83
+
84
+
85
+ FILESYSTEM = FileSystem.from_config('database')
17
86
 
18
87
 
19
88
  def module_data_path(sub: str):
20
- from platformdirs import user_data_path
21
- path = user_data_path('restage', 'ess').joinpath(sub)
89
+ path = FILESYSTEM.root / sub
22
90
  if not path.exists():
23
91
  path.mkdir(parents=True)
24
92
  return path
@@ -40,7 +108,6 @@ def directory_under_module_data_path(sub: str, prefix=None, suffix=None, name=No
40
108
  def _compile_instr(entry: InstrEntry, instr: Instr, config: dict | None = None,
41
109
  mpi: bool = False, acc: bool = False,
42
110
  target=None, generator=None):
43
- from tempfile import mkdtemp
44
111
  from mccode_antlr import __version__
45
112
  from mccode_antlr.compiler.c import compile_instrument, CBinaryTarget
46
113
  if config is None:
@@ -55,7 +122,9 @@ def _compile_instr(entry: InstrEntry, instr: Instr, config: dict | None = None,
55
122
  output = directory_under_module_data_path('bin')
56
123
  # TODO consider adding `dump_source=True` _and_ putting the resulting file into
57
124
  # the cache in order to make debugging future problems a tiny bit easier.
58
- binary_path = compile_instrument(instr, target, output, generator=generator, config=config)
125
+ # FIXME a future mccode-antlr will support setting 'source_file={file_path}'
126
+ # to allow exactly this.
127
+ binary_path = compile_instrument(instr, target, output, generator=generator, config=config, dump_source=True)
59
128
  entry.mccode_version = __version__
60
129
  entry.binary_path = str(binary_path)
61
130
  return entry
@@ -64,9 +133,9 @@ def _compile_instr(entry: InstrEntry, instr: Instr, config: dict | None = None,
64
133
  def cache_instr(instr: Instr, mpi: bool = False, acc: bool = False, mccode_version=None, binary_path=None, **kwargs) -> InstrEntry:
65
134
  instr_contents = str(instr)
66
135
  # the query returns a list[InstrTableEntry]
67
- query = DATABASE.query_instr_file(search={'file_contents': instr_contents, 'mpi': mpi, 'acc': acc})
136
+ query = FILESYSTEM.query_instr_file(search={'file_contents': instr_contents, 'mpi': mpi, 'acc': acc})
68
137
  if len(query) > 1:
69
- raise RuntimeError(f"Multiple entries for {instr_contents} in {DATABASE.instr_file_table}")
138
+ raise RuntimeError(f"Multiple entries for {instr_contents} in {FILESYSTEM}")
70
139
  elif len(query) == 1:
71
140
  return query[0]
72
141
 
@@ -75,10 +144,19 @@ def cache_instr(instr: Instr, mpi: bool = False, acc: bool = False, mccode_versi
75
144
  if binary_path is None:
76
145
  instr_file_entry = _compile_instr(instr_file_entry, instr, mpi=mpi, acc=acc, **kwargs)
77
146
 
78
- DATABASE.insert_instr_file(instr_file_entry)
147
+ FILESYSTEM.insert_instr_file(instr_file_entry)
79
148
  return instr_file_entry
80
149
 
81
150
 
151
+ def cache_get_instr(instr: Instr, mpi: bool = False, acc: bool = False) -> InstrEntry | None:
152
+ query = FILESYSTEM.query_instr_file(search={'file_contents': str(instr), 'mpi': mpi, 'acc': acc})
153
+ if len(query) > 1:
154
+ raise RuntimeError(f"Multiple entries for {instr} in {FILESYSTEM}")
155
+ elif len(query) == 1:
156
+ return query[0]
157
+ return None
158
+
159
+
82
160
  def verify_table_parameters(table, parameters: dict):
83
161
  names = list(parameters.keys())
84
162
  if any(x not in names for x in table.parameters):
@@ -89,108 +167,31 @@ def verify_table_parameters(table, parameters: dict):
89
167
 
90
168
 
91
169
  def cache_simulation_table(entry: InstrEntry, row: SimulationEntry) -> SimulationTableEntry:
92
- query = DATABASE.retrieve_simulation_table(entry.id)
93
- if len(query) > 1:
94
- raise RuntimeError(f"Multiple entries for {entry.id} in {DATABASE.simulations_table}")
95
- elif len(query):
96
- table = verify_table_parameters(query[0], row.parameter_values)
170
+ query = FILESYSTEM.retrieve_simulation_table(entry.id)
171
+ if len(query):
172
+ for q in query:
173
+ verify_table_parameters(q, row.parameter_values)
174
+ table = query[0]
97
175
  else:
98
176
  table = SimulationTableEntry(list(row.parameter_values.keys()), f'pst_{entry.id}', entry.id)
99
- DATABASE.insert_simulation_table(table)
177
+ FILESYSTEM.insert_simulation_table(table)
100
178
  return table
101
179
 
102
180
 
103
181
  def cache_has_simulation(entry: InstrEntry, row: SimulationEntry) -> bool:
104
182
  table = cache_simulation_table(entry, row)
105
- query = DATABASE.retrieve_simulation(table.id, row)
183
+ query = FILESYSTEM.retrieve_simulation(table.id, row)
106
184
  return len(query) > 0
107
185
 
108
186
 
109
187
  def cache_get_simulation(entry: InstrEntry, row: SimulationEntry) -> list[SimulationEntry]:
110
188
  table = cache_simulation_table(entry, row)
111
- query = DATABASE.retrieve_simulation(table.id, row)
189
+ query = FILESYSTEM.retrieve_simulation(table.id, row)
112
190
  if len(query) == 0:
113
- raise RuntimeError(f"Expected 1 or more entry for {table.id} in {DATABASE.simulations_table}, got none")
191
+ raise RuntimeError(f"Expected 1 or more entry for {table.id} in {FILESYSTEM}, got none")
114
192
  return query
115
193
 
116
194
 
117
195
  def cache_simulation(entry: InstrEntry, simulation: SimulationEntry):
118
196
  table = cache_simulation_table(entry, simulation)
119
- DATABASE.insert_simulation(table, simulation)
120
-
121
-
122
- def _cleanup_instr_table(allow_different=True):
123
- """Look through the cache tables and remove any entries which are no longer valid"""
124
- from pathlib import Path
125
- from mccode_antlr import __version__
126
- entries = DATABASE.all_instr_files()
127
- for entry in entries:
128
- if not entry.binary_path or not Path(entry.binary_path).exists():
129
- DATABASE.delete_instr_file(entry.id)
130
- elif allow_different and entry.mccode_version != __version__:
131
- DATABASE.delete_instr_file(entry.id)
132
- # plus remove the binary
133
- Path(entry.binary_path).unlink()
134
- # and its directory if it is empty (it's _probably_ empty, but we should make sure)
135
- if not any(Path(entry.binary_path).parent.iterdir()):
136
- Path(entry.binary_path).parent.rmdir()
137
-
138
-
139
- def _cleanup_simulations_table(keep_empty=False, allow_different=False, cleanup_directories=False):
140
- """Look through the cached table listing simulation tables and remove any entries which are no longer valid"""
141
- from pathlib import Path
142
- for entry in DATABASE.retrieve_all_simulation_tables():
143
- if not DATABASE.table_exists(entry.table_name):
144
- DATABASE.delete_simulation_table(entry.id)
145
- continue
146
-
147
- # clean up the entries of the table
148
- _cleanup_simulations(entry.id, keep_empty=keep_empty, cleanup_directories=cleanup_directories)
149
- # and remove the table if it is empty
150
- if not (keep_empty or len(DATABASE.retrieve_all_simulations(entry.id))):
151
- DATABASE.delete_simulation_table(entry.id)
152
- continue
153
-
154
- # check that the column names all match
155
- if not (allow_different or DATABASE.table_has_columns(entry.table_name, entry.parameters)):
156
- # Remove the simulation output folders for each tabulated simulation:
157
- if cleanup_directories:
158
- for sim in DATABASE.retrieve_all_simulations(entry.id):
159
- sim_path = Path(sim.output_path)
160
- for item in sim_path.iterdir():
161
- item.unlink()
162
- sim_path.rmdir()
163
- DATABASE.delete_simulation_table(entry.id)
164
-
165
-
166
- def _cleanup_nexus_table():
167
- # TODO implement this`
168
- pass
169
-
170
-
171
- def _cleanup_simulations(primary_id: str, keep_empty=False, cleanup_directories=False):
172
- """Look through a cached simulations table's entries and remove any which are no longer valid"""
173
- from pathlib import Path
174
- entries = DATABASE.retrieve_all_simulations(primary_id)
175
- for entry in entries:
176
- # Does the table reference a missing simulation output directory?
177
- if not Path(entry.output_path).exists():
178
- DATABASE.delete_simulation(primary_id, entry.id)
179
- # or an empty one?
180
- elif not keep_empty and not any(Path(entry.output_path).iterdir()):
181
- if cleanup_directories:
182
- Path(entry.output_path).rmdir()
183
- DATABASE.delete_simulation(primary_id, entry.id)
184
- # TODO add a lifetime to check against?
185
-
186
-
187
- def cache_cleanup(keep_empty=False, allow_different=False, cleanup_directories=False):
188
- _cleanup_instr_table(allow_different=allow_different)
189
- _cleanup_nexus_table()
190
- _cleanup_simulations_table(keep_empty=keep_empty, allow_different=allow_different,
191
- cleanup_directories=cleanup_directories)
192
-
193
-
194
- # FIXME auto cleanup is removing cached table entries incorrectly at the moment
195
- # # automatically clean up the cache when the module is loaded
196
- # cache_cleanup()
197
+ FILESYSTEM.insert_simulation(table, simulation)
@@ -0,0 +1,28 @@
1
+ import confuse
2
+ from os import environ
3
+ # Any platform independent configuration settings can go in 'default.yaml'
4
+ config = confuse.LazyConfig('restage', __name__)
5
+
6
+ # use environment variables specified as 'RESTAGE_XYZ' as configuration entries 'xyz'
7
+ config.set_env()
8
+ # Expected environment variables:
9
+ # RESTAGE_FIXED="/loc/one /usr/loc/two"
10
+ # RESTAGE_CACHE="$HOME/loc/three"
11
+
12
+
13
+ def _common_defaults():
14
+ import yaml
15
+ from importlib.resources import files, as_file
16
+
17
+ common_file = files(__name__).joinpath('default.yaml')
18
+ if not common_file.is_file():
19
+ raise RuntimeError(f"Can not locate default.yaml in module files (looking for {common_file})")
20
+ with as_file(common_file) as file:
21
+ with open(file, 'r') as data:
22
+ common_configs = yaml.safe_load(data)
23
+
24
+ return common_configs or {}
25
+
26
+
27
+ # By using the 'add' method, we set these as the *lowest* priority. Any user/system files will override:
28
+ config.add(_common_defaults())
File without changes
restage/database.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
3
4
  from pathlib import Path
4
5
  from .tables import SimulationEntry, SimulationTableEntry, NexusStructureEntry, InstrEntry
5
6
 
@@ -10,9 +11,13 @@ class Database:
10
11
  nexus_structures_table: str | None = None,
11
12
  simulations_table: str | None = None,
12
13
  # secondary_simulations_table: str = None
14
+ readonly: bool = False
13
15
  ):
14
16
  from sqlite3 import connect
15
- self.db = connect(db_file)
17
+ from os import access, W_OK
18
+ self.readonly = readonly or not access(db_file.parent, W_OK)
19
+ mode = 'ro' if self.readonly else 'rwc'
20
+ self.db = connect(f'file:{db_file}?mode={mode}', uri=True)
16
21
  self.cursor = self.db.cursor()
17
22
  self.instr_file_table = instr_file_table or 'instr_file'
18
23
  self.nexus_structures_table = nexus_structures_table or 'nexus_structures'
@@ -27,8 +32,11 @@ class Database:
27
32
  # (self.secondary_simulations_table, SecondaryInstrSimulationTable)
28
33
  ):
29
34
  if not self.table_exists(table):
30
- self.cursor.execute(tt.create_sql_table(table_name=table))
31
- self.db.commit()
35
+ if not self.readonly:
36
+ self.cursor.execute(tt.create_sql_table(table_name=table))
37
+ self.db.commit()
38
+ else:
39
+ raise ValueError(f'Table {table} does not exist in readonly database {db_file}')
32
40
 
33
41
  def __del__(self):
34
42
  self.db.close()
@@ -46,6 +54,8 @@ class Database:
46
54
  raise RuntimeError(f"Table {table_name} does not exist")
47
55
 
48
56
  def insert_instr_file(self, instr_file: InstrEntry):
57
+ if self.readonly:
58
+ raise ValueError('Cannot insert into readonly database')
49
59
  command = instr_file.insert_sql_table(table_name=self.instr_file_table)
50
60
  self.announce(command)
51
61
  self.cursor.execute(command)
@@ -56,21 +66,39 @@ class Database:
56
66
  return [InstrEntry.from_query_result(x) for x in self.cursor.fetchall()]
57
67
 
58
68
  def query_instr_file(self, search: dict) -> list[InstrEntry]:
69
+ from .tables import str_hash
70
+ contents = None
71
+ if 'file_contents' in search:
72
+ # direct file content searches are slow (for large contents, at least)
73
+ # Each InstrEntry inserts a hash of its contents, which is probably unique,
74
+ # so pull-back any matches against that and then check full contents below
75
+ contents = search['file_contents']
76
+ del search['file_contents']
77
+ search['file_hash'] = str_hash(contents)
59
78
  query = f"SELECT * FROM {self.instr_file_table} WHERE "
60
79
  query += ' AND '.join([f"{k}='{v}'" if isinstance(v, str) else f"{k}={v}" for k, v in search.items()])
61
80
  self.announce(query)
62
81
  self.cursor.execute(query)
63
- return [InstrEntry.from_query_result(x) for x in self.cursor.fetchall()]
82
+ results = [InstrEntry.from_query_result(x) for x in self.cursor.fetchall()]
83
+ if contents is not None:
84
+ # this check is _probably_ redundant, but on the off chance of a hash
85
+ # collision we can guarantee the returned InstrEntry matches:
86
+ results = [x for x in results if x.file_contents == contents]
87
+ return results
64
88
 
65
89
  def all_instr_files(self) -> list[InstrEntry]:
66
90
  self.cursor.execute(f"SELECT * FROM {self.instr_file_table}")
67
91
  return [InstrEntry.from_query_result(x) for x in self.cursor.fetchall()]
68
92
 
69
93
  def delete_instr_file(self, instr_id: str):
94
+ if self.readonly:
95
+ raise ValueError('Cannot delete from readonly database')
70
96
  self.cursor.execute(f"DELETE FROM {self.instr_file_table} WHERE id='{instr_id}'")
71
97
  self.db.commit()
72
98
 
73
99
  def insert_nexus_structure(self, nexus_structure: NexusStructureEntry):
100
+ if self.readonly:
101
+ raise ValueError('Cannot insert into readonly database')
74
102
  command = nexus_structure.insert_sql_table(table_name=self.nexus_structures_table)
75
103
  self.announce(command)
76
104
  self.cursor.execute(command)
@@ -81,6 +109,8 @@ class Database:
81
109
  return [NexusStructureEntry.from_query_result(x) for x in self.cursor.fetchall()]
82
110
 
83
111
  def insert_simulation_table(self, entry: SimulationTableEntry):
112
+ if self.readonly:
113
+ raise ValueError('Cannot insert into readonly database')
84
114
  command = entry.insert_sql_table(table_name=self.simulations_table)
85
115
  self.announce(command)
86
116
  self.cursor.execute(command)
@@ -94,7 +124,7 @@ class Database:
94
124
  def retrieve_simulation_table(self, primary_id: str, update_access_time=True) -> list[SimulationTableEntry]:
95
125
  self.cursor.execute(f"SELECT * FROM {self.simulations_table} WHERE id='{primary_id}'")
96
126
  entries = [SimulationTableEntry.from_query_result(x) for x in self.cursor.fetchall()]
97
- if update_access_time:
127
+ if not self.readonly and update_access_time:
98
128
  from .tables import utc_timestamp
99
129
  self.cursor.execute(f"UPDATE {self.simulations_table} SET last_access='{utc_timestamp()}' "
100
130
  f"WHERE id='{primary_id}'")
@@ -106,6 +136,8 @@ class Database:
106
136
  return [SimulationTableEntry.from_query_result(x) for x in self.cursor.fetchall()]
107
137
 
108
138
  def delete_simulation_table(self, primary_id: str):
139
+ if self.readonly:
140
+ raise ValueError('Cannot delete from readonly database')
109
141
  matches = self.retrieve_simulation_table(primary_id)
110
142
  if len(matches) != 1:
111
143
  raise RuntimeError(f"Expected exactly one match for id={primary_id}, got {matches}")
@@ -121,6 +153,8 @@ class Database:
121
153
  return [SimulationTableEntry.from_query_result(x) for x in self.cursor.fetchall()]
122
154
 
123
155
  def _insert_simulation(self, sim: SimulationTableEntry, pars: SimulationEntry):
156
+ if self.readonly:
157
+ raise ValueError('Cannot insert into readonly database')
124
158
  if not self.table_exists(sim.table_name):
125
159
  command = sim.create_simulation_sql_table()
126
160
  self.announce(command)
@@ -136,7 +170,7 @@ class Database:
136
170
  query = f"SELECT * FROM {table} WHERE {pars.between_query()}"
137
171
  self.cursor.execute(query)
138
172
  entries = [SimulationEntry.from_query_result(columns, x) for x in self.cursor.fetchall()]
139
- if update_access_time and len(entries):
173
+ if not self.readonly and update_access_time and len(entries):
140
174
  from .tables import utc_timestamp
141
175
  self.cursor.execute(f"UPDATE {table} SET last_access='{utc_timestamp()}' WHERE {pars.between_query()}")
142
176
  self.db.commit()
@@ -161,6 +195,8 @@ class Database:
161
195
  return self._retrieve_simulation(table, columns, pars)
162
196
 
163
197
  def delete_simulation(self, primary_id: str, simulation_id: str):
198
+ if self.readonly:
199
+ raise ValueError('Cannot delete from readonly database')
164
200
  matches = self.retrieve_simulation_table(primary_id)
165
201
  if len(matches) != 1:
166
202
  raise RuntimeError(f"Expected exactly one match for id={primary_id}, got {matches}")
restage/energy.py CHANGED
@@ -12,8 +12,13 @@ def get_and_remove(d: dict, k: str, default=None):
12
12
 
13
13
  def one_generic_energy_to_chopper_parameters(
14
14
  calculate_choppers, chopper_names: tuple[str, ...],
15
- time: float, order: int, parameters: dict):
15
+ time: float, order: int, parameters: dict,
16
+ chopper_parameter_present: bool
17
+ ):
18
+ from loguru import logger
16
19
  if any(x in parameters for x in ('ei', 'wavelength', 'lambda', 'energy', 'e')):
20
+ if chopper_parameter_present:
21
+ logger.warning('Specified chopper parameter(s) overridden by Ei or wavelength.')
17
22
  ei = get_and_remove(parameters, 'ei', get_and_remove(parameters, 'energy', get_and_remove(parameters, 'e')))
18
23
  if ei is None:
19
24
  wavelength = get_and_remove(parameters, 'wavelength', get_and_remove(parameters, 'lambda'))
@@ -28,26 +33,32 @@ def bifrost_translate_energy_to_chopper_parameters(parameters: dict):
28
33
  from .bifrost_choppers import calculate
29
34
  choppers = tuple(f'{a}_chopper_{b}' for a, b in product(['pulse_shaping', 'frame_overlap', 'bandwidth'], [1, 2]))
30
35
  # names = [a+b for a, b in product(('ps', 'fo', 'bw'), ('1', '2'))]
36
+ chopper_parameter_present = False
31
37
  for name in product(choppers, ('speed', 'phase')):
32
38
  name = ''.join(name)
33
39
  if name not in parameters:
34
40
  parameters[name] = 0
41
+ else:
42
+ chopper_parameter_present = True
35
43
  order = get_and_remove(parameters, 'order', 14)
36
44
  time = get_and_remove(parameters, 'time', get_and_remove(parameters, 't', 170/180/(2 * 15 * 14)))
37
- return one_generic_energy_to_chopper_parameters(calculate, choppers, time, order, parameters)
45
+ return one_generic_energy_to_chopper_parameters(calculate, choppers, time, order, parameters, chopper_parameter_present)
38
46
 
39
47
 
40
48
  def cspec_translate_energy_to_chopper_parameters(parameters: dict):
41
49
  from itertools import product
42
50
  from .cspec_choppers import calculate
43
51
  choppers = ('bw1', 'bw2', 'bw3', 's', 'p', 'm1', 'm2')
52
+ chopper_parameter_present = False
44
53
  for name in product(choppers, ('speed', 'phase')):
45
54
  name = ''.join(name)
46
55
  if name not in parameters:
47
56
  parameters[name] = 0
57
+ else:
58
+ chopper_parameter_present = True
48
59
  time = get_and_remove(parameters, 'time', 0.004)
49
60
  order = get_and_remove(parameters, 'order', 16)
50
- return one_generic_energy_to_chopper_parameters(calculate, choppers, time, order, parameters)
61
+ return one_generic_energy_to_chopper_parameters(calculate, choppers, time, order, parameters, chopper_parameter_present)
51
62
 
52
63
 
53
64
  def no_op_translate_energy_to_chopper_parameters(parameters: dict):
restage/instr.py CHANGED
@@ -16,7 +16,7 @@ def load_instr(filepath: Union[str, Path]) -> Instr:
16
16
  if not isinstance(filepath, Path):
17
17
  filepath = Path(filepath)
18
18
  if not filepath.exists() or not filepath.is_file():
19
- raise ValueError('The provided filepath does not exist or is not a file')
19
+ raise ValueError(f'The provided {filepath=} does not exist or is not a file')
20
20
 
21
21
  if filepath.suffix == '.instr':
22
22
  return load_mcstas_instr(filepath)
restage/mcpl.py CHANGED
@@ -4,20 +4,26 @@ from pathlib import Path
4
4
  def mcpl_real_filename(filename: Path) -> Path:
5
5
  """MCPL_output from McCode instruments has the bad habit of changing the output file name silently.
6
6
  Find the _real_ output file name by looking for the expected variants"""
7
- if filename.exists() and filename.is_file():
8
- return filename
9
- if filename.with_suffix('.mcpl').exists() and filename.with_suffix('.mcpl').is_file():
10
- return filename.with_suffix('.mcpl')
11
- if filename.with_suffix('.mcpl.gz').exists() and filename.with_suffix('.mcpl.gz').is_file():
12
- return filename.with_suffix('.mcpl.gz')
7
+ base, ext = filename.parent / filename.stem, filename.suffix
8
+ if ext in ('.gz',):
9
+ ext = base.suffix + ext
10
+ base = base.parent / base.stem
11
+ extensions = {'.mcpl.gz', '.mcpl', ''}
12
+ if ext not in extensions:
13
+ ValueError(f'Unsupported file extension: {ext}')
14
+ for ext in extensions:
15
+ check = base.with_suffix(ext)
16
+ if check.exists() and check.is_file():
17
+ return check
18
+ print(f'{base} -> {check} not found')
13
19
  raise FileNotFoundError(f'Could not find MCPL file {filename}')
14
20
 
15
21
 
16
- # def mcpl_particle_count(filename):
17
- # from mcpl import MCPLFile
18
- # with MCPLFile(mcpl_real_filename(filename)) as f:
19
- # n = f.nparticles
20
- # return n
22
+ def mcpl_real_extension(filename: Path) -> str:
23
+ for ext in ('.mcpl.gz', '.mcpl'):
24
+ if str(filename).endswith(ext):
25
+ return ext
26
+ return ''
21
27
 
22
28
 
23
29
  def mcpl_particle_count(filename):
@@ -52,11 +58,7 @@ def mcpl_merge_files(files: list[Path], filepath: Path, keep_originals: bool = F
52
58
  from subprocess import run
53
59
  real_filenames = [mcpl_real_filename(f) for f in files]
54
60
  # if the real filenames have .mcpl or .mcpl.gz, the merged filename should too
55
- ext = ''
56
- if real_filenames[0].name.endswith('.mcpl.gz'):
57
- ext = '.mcpl.gz'
58
- elif real_filenames[0].name.endswith('.mcpl'):
59
- ext = '.mcpl'
61
+ ext = mcpl_real_extension(real_filenames[0])
60
62
  filename = filepath.with_suffix(ext).as_posix()
61
63
 
62
64
  command = ['mcpltool', '--merge', filename] + [str(f) for f in real_filenames]
@@ -69,13 +71,8 @@ def mcpl_merge_files(files: list[Path], filepath: Path, keep_originals: bool = F
69
71
 
70
72
 
71
73
  def mcpl_rename_file(source: Path, dest: Path, strict: bool = False):
72
- filepath = mcpl_real_filename(source)
73
- filename = filepath.name # this could be '{name}', '{name}.mcpl', or '{name}.mcpl.gz'
74
- ext = ''
75
- if filepath.name.endswith('.mcpl.gz'):
76
- ext = '.mcpl.gz'
77
- elif filepath.name.endswith('.mcpl'):
78
- ext = '.mcpl'
74
+ filepath = mcpl_real_filename(source) # this could be '{name}', '{name}.mcpl', or '{name}.mcpl.gz'
75
+ ext = mcpl_real_extension(filepath)
79
76
 
80
77
  if not dest.name.endswith(ext):
81
78
  if strict:
restage/splitrun.py CHANGED
@@ -121,6 +121,14 @@ def splitrun_from_file(args, parameters, precision):
121
121
  splitrun_args(instr, parameters, precision, args)
122
122
 
123
123
 
124
+ def give_me_an_integer(something):
125
+ if isinstance(something, (list, tuple)):
126
+ return something[0]
127
+ if isinstance(something, int):
128
+ return something
129
+ return 0
130
+
131
+
124
132
  def splitrun_args(instr, parameters, precision, args, **kwargs):
125
133
  splitrun(instr, parameters, precision, split_at=args.split_at[0], grid=args.mesh,
126
134
  seed=args.seed[0] if args.seed is not None else None,
@@ -135,7 +143,7 @@ def splitrun_args(instr, parameters, precision, args, **kwargs):
135
143
  dry_run=args.dryrun,
136
144
  parallel=args.parallel,
137
145
  gpu=args.gpu,
138
- process_count=args.process_count,
146
+ process_count=give_me_an_integer(args.process_count),
139
147
  mcpl_output_component=args.mcpl_output_component[0] if args.mcpl_output_component is not None else None,
140
148
  mcpl_output_parameters=args.mcpl_output_parameters,
141
149
  mcpl_input_component=args.mcpl_input_component[0] if args.mcpl_input_component is not None else None,
@@ -425,6 +433,7 @@ def repeat_simulation_until(count, runner, args: dict, parameters, work_dir: Pat
425
433
  random.seed(args['seed'])
426
434
 
427
435
  files, outputs, counts = [], [], []
436
+ total_count = 0
428
437
  while goal - sum(counts) > 0:
429
438
  if len(counts) and counts[-1] <= 0:
430
439
  log.warn(f'No particles emitted in previous run, stopping')
@@ -441,6 +450,7 @@ def repeat_simulation_until(count, runner, args: dict, parameters, work_dir: Pat
441
450
  # recycle the intended-output mcpl filename to avoid breaking mcpl file-merging
442
451
  runner(_args_pars_mcpl(args, parameters, mcpl_filepath))
443
452
  counts.append(mcpl_particle_count(mcpl_filepath))
453
+ total_count += args['ncount']
444
454
  # rename the outputfile to this run's filename
445
455
  files[-1] = mcpl_rename_file(mcpl_filepath, files[-1])
446
456
 
restage/tables.py CHANGED
@@ -14,6 +14,11 @@ def utc_timestamp() -> float:
14
14
  return datetime.now(timezone.utc).timestamp()
15
15
 
16
16
 
17
+ def str_hash(string):
18
+ from hashlib import sha3_256
19
+ return sha3_256(string.encode('utf-8')).hexdigest()
20
+
21
+
17
22
  COMMON_COLUMNS = ['seed', 'ncount', 'output_path', 'gravitation', 'creation', 'last_access']
18
23
 
19
24
 
@@ -323,27 +328,30 @@ class InstrEntry:
323
328
  id: str = field(default_factory=uuid)
324
329
  creation: float = field(default_factory=utc_timestamp)
325
330
  last_access: float = field(default_factory=utc_timestamp)
331
+ file_hash: str = field(default_factory=str)
326
332
 
327
333
  @classmethod
328
334
  def from_query_result(cls, values):
329
- fid, file_contents, mpi, acc, binary_path, mccode_version, creation, last_access = values
330
- return cls(file_contents, mpi != 0, acc != 0, binary_path, mccode_version, fid, creation, last_access)
335
+ fid, file_hash, file_contents, mpi, acc, binary_path, mccode_version, creation, last_access = values
336
+ return cls(file_contents, mpi != 0, acc != 0, binary_path, mccode_version, fid, creation, last_access, file_hash)
331
337
 
332
338
  def __post_init__(self):
333
339
  if len(self.mccode_version) == 0:
334
340
  from mccode_antlr import __version__
335
341
  self.mccode_version = __version__
342
+ if len(self.file_hash) == 0:
343
+ self.file_hash = str_hash(self.file_contents)
336
344
 
337
345
  @staticmethod
338
346
  def columns():
339
- return ['id', 'file_contents', 'mpi', 'acc', 'binary_path', 'mccode_version', 'creation', 'last_access']
347
+ return ['id', 'file_hash', 'file_contents', 'mpi', 'acc', 'binary_path', 'mccode_version', 'creation', 'last_access']
340
348
 
341
349
  def values(self):
342
- str_values = [f"'{x}'" for x in (self.id, self.file_contents, self.binary_path, self.mccode_version)]
350
+ str_values = [f"'{x}'" for x in (self.id, self.file_hash, self.file_contents, self.binary_path, self.mccode_version)]
343
351
  int_values = [f'{x}' for x in (self.mpi, self.acc)]
344
352
  flt_values = [f'{self.creation}', f'{self.last_access}']
345
- # matches id, file_contents, mpi, acc, binary_path, mccode_version, creation, last_access order
346
- return str_values[:2] + int_values + str_values[2:] + flt_values
353
+ # matches id, file_hash, file_contents, mpi, acc, binary_path, mccode_version, creation, last_access order
354
+ return str_values[:3] + int_values + str_values[3:] + flt_values
347
355
 
348
356
  @classmethod
349
357
  def create_sql_table(cls, table_name: str = 'instr_files'):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: restage
3
- Version: 0.4.1
3
+ Version: 0.5.1
4
4
  Author-email: Gregory Tucker <gregory.tucker@ess.eu>
5
5
  License: BSD-3-Clause
6
6
  Classifier: License :: OSI Approved :: BSD License
@@ -15,8 +15,9 @@ Requires-Python: >=3.9
15
15
  Description-Content-Type: text/markdown
16
16
  Requires-Dist: zenlog>=1.1
17
17
  Requires-Dist: platformdirs>=3.11
18
+ Requires-Dist: confuse
18
19
  Requires-Dist: psutil>=5.9.6
19
- Requires-Dist: mccode-antlr[hdf5]>=0.10.2
20
+ Requires-Dist: mccode-antlr[hdf5]>=0.12.0
20
21
  Provides-Extra: test
21
22
  Requires-Dist: pytest; extra == "test"
22
23
  Requires-Dist: chopcal; extra == "test"
@@ -107,3 +108,35 @@ splitrun my_instrument.instr -n 1000000 -d /data/output sample_angle=1:90 sample
107
108
 
108
109
 
109
110
 
111
+ ## Cached data
112
+ ### Default writable cache
113
+ A `sqlite3` database is used to keep track of instrument stages, their compiled
114
+ binaries, and output file(s) produced by, e.g., `splitrun` simulations.
115
+ The default database location is determined by `platformdirs` under a folder
116
+ set by `user_cache_path('restage', 'ess')` and the default locations for
117
+ `restage`-compiled instrument binaries and simulation output is determined from
118
+ `user_data_path('restage', 'ess')`.
119
+
120
+ ### Override the database and output locations
121
+ These default locations can be overridden by setting the `RESTAGE_CACHE` environment
122
+ variable to a writeable folder, e.g., `export RESTAGE_CACHE="/tmp/ephemeral"`.
123
+
124
+ ### Read-only cache database(s)
125
+ Any number of fixed databases can be provided to allow for, e.g., system-wide reuse
126
+ of common staged simulations.
127
+ The location(s) of these database file(s) can be specified as a single
128
+ environment variable containing space-separated file locations, e.g.,
129
+ `export RESTAGE_FIXED="/usr/local/restage /afs/ess.eu/restage"`.
130
+ If the locations provided include a `database.db` file, they will be used to search
131
+ for instrument binaries and simulation output directories.
132
+
133
+ ### Use a configuration file to set parameters
134
+ Cache configuration information can be provided via a configuration file at,
135
+ e.g., `~/.config/restage/config.yaml`, like
136
+ ```yaml
137
+ cache: /tmp/ephemeral
138
+ fixed: /usr/local/restage /afs/ess.eu/restage
139
+ ```
140
+ The exact location searched to find the configuration file is platform dependent,
141
+ please consult the [`confuse` documentation](https://confuse.readthedocs.io/en/latest/usage.html)
142
+ for the paths used on your system.
@@ -0,0 +1,21 @@
1
+ restage/__init__.py,sha256=HlqvPpL7DKet00NAFyqJBNg9UFO7o05Gt2tFyKBQcsY,744
2
+ restage/bifrost_choppers.py,sha256=xQu21g2NcTLPpZ0ZWOuvN20zh07EWoO4QVoTnoORwZI,6443
3
+ restage/cache.py,sha256=PD07z9pxGwBcxoizgy11zoQBjViF1ZSzKS0686RZ3FI,8115
4
+ restage/cspec_choppers.py,sha256=ZWxyCcwYn4z9ZNqj_r6RC9ImbhVjYc1fmv-Ijm8A2Yk,206
5
+ restage/database.py,sha256=anyOby31fUN7rGAVNsnWDUhAISV0vQ7en8aQwVS5ZwA,11051
6
+ restage/emulate.py,sha256=VrhfZJIbECdbDS-MHklqRuAIy9cRkjZkwPBTKQSQoe0,6164
7
+ restage/energy.py,sha256=w78GUIWcHxANvBl2DTu73FQFawCXfzlK6L32TBQNt4g,3371
8
+ restage/instr.py,sha256=A0ShtXkswt_f7o-cIDtsVbG03_tGELe1aS3WzLxzkJM,2494
9
+ restage/mcpl.py,sha256=MKVY-2TYk5p8hW1lXJib5mxdbnILq4GoYiyAUgOBJmA,3269
10
+ restage/range.py,sha256=TjOf4DSKfgoAIcrWQvv6MrtksQpnGJHdsEjVI5K-UfI,8116
11
+ restage/run.py,sha256=nk8d7cIyIqSt-5pyGm68Zak5H1a-fbo_z2_36eN-08E,1481
12
+ restage/scan.py,sha256=Yx8OQSBG6I2_64sW0LIDb0glVKwWoxUQQznASXgDZFQ,1432
13
+ restage/splitrun.py,sha256=W_pTeiMjc9hhu-zaE6fdetVLG6MGEpnaTOdgmgVkS1g,26061
14
+ restage/tables.py,sha256=mL1SrCbgwfWzG-ezd_R3CxOSIZLNZRoC2r7ht59jGMA,16371
15
+ restage/config/__init__.py,sha256=zFRT9QXgpUJpBncELCQ6by1-kjYp8Li1yJDfqxkHxAA,965
16
+ restage/config/default.yaml,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ restage-0.5.1.dist-info/METADATA,sha256=r51j8rWD3Iwc9y8D-ZWZL46JERjcgEkch-88CYeOt4w,6769
18
+ restage-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ restage-0.5.1.dist-info/entry_points.txt,sha256=gghocSxC2gHHxUCalAibCN1mtkh3trNmAfH5Qwx0KYg,149
20
+ restage-0.5.1.dist-info/top_level.txt,sha256=iM_pb-taTZ0S2WMoDnt_qDMZoNMjmM19z3tTCuVm1IE,8
21
+ restage-0.5.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,19 +0,0 @@
1
- restage/__init__.py,sha256=v0dAUYVkvzjd3j6gjFdbunV_P8U9XxsGgLFGwbxBy6E,787
2
- restage/bifrost_choppers.py,sha256=xQu21g2NcTLPpZ0ZWOuvN20zh07EWoO4QVoTnoORwZI,6443
3
- restage/cache.py,sha256=Ea0e_hb8cN-PKFyyVXyIDo04UbWpcL5ytihzBo34QWQ,8491
4
- restage/cspec_choppers.py,sha256=ZWxyCcwYn4z9ZNqj_r6RC9ImbhVjYc1fmv-Ijm8A2Yk,206
5
- restage/database.py,sha256=pblHu8hCV5u3uyE8aUrnBSsfjDLYrxy9JRtnRuOvTXQ,9152
6
- restage/emulate.py,sha256=VrhfZJIbECdbDS-MHklqRuAIy9cRkjZkwPBTKQSQoe0,6164
7
- restage/energy.py,sha256=FeWyZj6BOH0Ao4BpS39lmZm7Znz-rESk2jTLeMEJpI4,2920
8
- restage/instr.py,sha256=1Yx8WVQOke_gRj7dV4BYPHXuxT1xH8xMQR7k3c7Yp9M,2490
9
- restage/mcpl.py,sha256=BZYxBytughjc8slR6gUaBy3D7gzo7Yl3ACXrXhWgagI,3403
10
- restage/range.py,sha256=TjOf4DSKfgoAIcrWQvv6MrtksQpnGJHdsEjVI5K-UfI,8116
11
- restage/run.py,sha256=nk8d7cIyIqSt-5pyGm68Zak5H1a-fbo_z2_36eN-08E,1481
12
- restage/scan.py,sha256=Yx8OQSBG6I2_64sW0LIDb0glVKwWoxUQQznASXgDZFQ,1432
13
- restage/splitrun.py,sha256=dAwDFz_KShB_OWlmARxpHyCfGhBYNuEyujveuAuS74g,25800
14
- restage/tables.py,sha256=oxHc9TdTVPBC6ZlQUQdoNzlo4CyvPp-pfijK81eAK28,16053
15
- restage-0.4.1.dist-info/METADATA,sha256=7txXTXEZ9UhtEGiXmX7o6voa58v6Jn-nsRXOTJXJ63M,5112
16
- restage-0.4.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
17
- restage-0.4.1.dist-info/entry_points.txt,sha256=gghocSxC2gHHxUCalAibCN1mtkh3trNmAfH5Qwx0KYg,149
18
- restage-0.4.1.dist-info/top_level.txt,sha256=iM_pb-taTZ0S2WMoDnt_qDMZoNMjmM19z3tTCuVm1IE,8
19
- restage-0.4.1.dist-info/RECORD,,