restage 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
restage/__init__.py CHANGED
@@ -12,7 +12,6 @@ from .tables import (SimulationEntry,
12
12
  InstrEntry
13
13
  )
14
14
  from .database import Database
15
- from .cache import DATABASE
16
15
 
17
16
 
18
17
  try:
@@ -28,5 +27,4 @@ __all__ = [
28
27
  'NexusStructureEntry',
29
28
  'InstrEntry',
30
29
  'Database',
31
- 'DATABASE'
32
30
  ]
@@ -82,9 +82,7 @@ def bandwidth_chopper_speeds_phases(energy_minimum: float):
82
82
  return SOURCE_FREQUENCY, phase, -SOURCE_FREQUENCY, phase
83
83
 
84
84
 
85
- def calculate(order: float, time: float, energy: float, names: list[str] | None = None):
86
- if names is None or len(names) != 6:
87
- names = ['ps1', 'ps2', 'fo1', 'fo2', 'bw1', 'bw2']
85
+ def calculate(order: float, time: float, energy: float, names: tuple[str, ...]):
88
86
  a, b, c, d, e, f = names
89
87
  s, p = 'speed', 'phase'
90
88
  r = dict()
@@ -94,9 +92,12 @@ def calculate(order: float, time: float, energy: float, names: list[str] | None
94
92
  return r
95
93
 
96
94
 
97
- def main(order: float, time: float, energy: float, names: list[str] | None = None):
95
+ def main(order: float, time: float, energy: float, names: tuple[str, ...] | None = None):
98
96
  if names is None or len(names) != 6:
99
- names = ['ps1', 'ps2', 'fo1', 'fo2', 'bw1', 'bw2']
97
+ # names = ('ps1', 'ps2', 'fo1', 'fo2', 'bw1', 'bw2')
98
+ names = ('pulse_shaping_chopper_1', 'pulse_shaping_chopper_2',
99
+ 'frame_overlap_chopper_1', 'frame_overlap_chopper_2',
100
+ 'bandwidth_chopper_1', 'bandwidth_chopper_2')
100
101
  rep = calculate(order, time, energy, names)
101
102
  print(' '.join([f'{k}={v}' for k, v in rep.items()]))
102
103
 
restage/cache.py CHANGED
@@ -1,25 +1,92 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
3
5
  from mccode_antlr.instr import Instr
4
6
  from .tables import InstrEntry, SimulationTableEntry, SimulationEntry
5
- from mccode_antlr.compiler.c import CBinaryTarget
6
-
7
-
8
- def setup_database(named: str):
9
- from platformdirs import user_cache_path
10
- from .database import Database
11
- db_file = user_cache_path('restage', 'ess', ensure_exists=True).joinpath(f'{named}.db')
12
- db = Database(db_file)
13
- return db
14
-
15
-
16
- # Create the global database object in the module namespace.
17
- DATABASE = setup_database('database')
7
+ from .database import Database
8
+
9
+ @dataclass
10
+ class FileSystem:
11
+ root: Path
12
+ db_fixed: tuple[Database,...]
13
+ db_write: Database
14
+
15
+ @classmethod
16
+ def from_config(cls, named: str):
17
+ from .config import config
18
+ db_fixed = []
19
+ db_write = None
20
+ root = None
21
+ if not named.endswith('.db'):
22
+ named += '.db'
23
+ if config['cache'].exists():
24
+ path = config['cache'].as_path()
25
+ if not path.exists():
26
+ path.mkdir(parents=True)
27
+ db_write = Database(path / named)
28
+ root = path
29
+ if config['fixed'].exists():
30
+ more = [Path(c) for c in config['fixed'].as_str_seq() if Path(c).exists()]
31
+ for m in more:
32
+ db_fixed.append(Database(m / named, readonly=True))
33
+ if db_write is not None and db_write.readonly:
34
+ raise ValueError("Specified writable database location is readonly")
35
+ if db_write is None:
36
+ from platformdirs import user_cache_path
37
+ db_write = Database(user_cache_path('restage', 'ess', ensure_exists=True) / named)
38
+ if root is None:
39
+ from platformdirs import user_data_path
40
+ root = user_data_path('restage', 'ess')
41
+ return cls(root, tuple(db_fixed), db_write)
42
+
43
+ def query(self, method, *args, **kwargs):
44
+ q = [x for r in self.db_fixed for x in getattr(r, method)(*args, **kwargs)]
45
+ q.extend(getattr(self.db_write, method)(*args, **kwargs))
46
+ return q
47
+
48
+ def insert(self, method, *args, **kwargs):
49
+ getattr(self.db_write, method)(*args, **kwargs)
50
+
51
+ def query_instr_file(self, *args, **kwargs):
52
+ query = [x for r in self.db_fixed for x in r.query_instr_file(*args, **kwargs)]
53
+ query.extend(self.db_write.query_instr_file(*args, **kwargs))
54
+ return query
55
+
56
+ def insert_instr_file(self, *args, **kwargs):
57
+ self.db_write.insert_instr_file(*args, **kwargs)
58
+
59
+ def query_simulation_table(self, *args, **kwargs):
60
+ return self.query('query_simulation_table', *args, **kwargs)
61
+
62
+ def retrieve_simulation_table(self, *args, **kwargs):
63
+ return self.query('retrieve_simulation_table', *args, **kwargs)
64
+
65
+ def insert_simulation_table(self, *args, **kwargs):
66
+ self.insert('insert_simulation_table', *args, **kwargs)
67
+
68
+ def insert_simulation(self, *args, **kwargs):
69
+ # By definition, 'self.db_write' is writable and Database.insert_simulation
70
+ # _always_ ensures the presence of the specified table in its database.
71
+ # Therefore this method 'just works'.
72
+ self.insert('insert_simulation', *args, **kwargs)
73
+
74
+ def retrieve_simulation(self, table_id: str, row: SimulationEntry):
75
+ matches = []
76
+ for db in self.db_fixed:
77
+ if len(db.retrieve_simulation_table(table_id, False)) == 1:
78
+ matches.extend(db.retrieve_simulation(table_id, row))
79
+ if len(self.db_write.retrieve_simulation_table(table_id, False)) == 1:
80
+ matches.extend(self.db_write.retrieve_simulation(table_id, row))
81
+ return matches
82
+
83
+
84
+
85
+ FILESYSTEM = FileSystem.from_config('database')
18
86
 
19
87
 
20
88
  def module_data_path(sub: str):
21
- from platformdirs import user_data_path
22
- path = user_data_path('restage', 'ess').joinpath(sub)
89
+ path = FILESYSTEM.root / sub
23
90
  if not path.exists():
24
91
  path.mkdir(parents=True)
25
92
  return path
@@ -41,7 +108,6 @@ def directory_under_module_data_path(sub: str, prefix=None, suffix=None, name=No
41
108
  def _compile_instr(entry: InstrEntry, instr: Instr, config: dict | None = None,
42
109
  mpi: bool = False, acc: bool = False,
43
110
  target=None, generator=None):
44
- from tempfile import mkdtemp
45
111
  from mccode_antlr import __version__
46
112
  from mccode_antlr.compiler.c import compile_instrument, CBinaryTarget
47
113
  if config is None:
@@ -54,7 +120,11 @@ def _compile_instr(entry: InstrEntry, instr: Instr, config: dict | None = None,
54
120
  generator = MCSTAS_GENERATOR
55
121
 
56
122
  output = directory_under_module_data_path('bin')
57
- binary_path = compile_instrument(instr, target, output, generator=generator, config=config)
123
+ # TODO consider adding `dump_source=True` _and_ putting the resulting file into
124
+ # the cache in order to make debugging future problems a tiny bit easier.
125
+ # FIXME a future mccode-antlr will support setting 'source_file={file_path}'
126
+ # to allow exactly this.
127
+ binary_path = compile_instrument(instr, target, output, generator=generator, config=config, dump_source=True)
58
128
  entry.mccode_version = __version__
59
129
  entry.binary_path = str(binary_path)
60
130
  return entry
@@ -63,9 +133,9 @@ def _compile_instr(entry: InstrEntry, instr: Instr, config: dict | None = None,
63
133
  def cache_instr(instr: Instr, mpi: bool = False, acc: bool = False, mccode_version=None, binary_path=None, **kwargs) -> InstrEntry:
64
134
  instr_contents = str(instr)
65
135
  # the query returns a list[InstrTableEntry]
66
- query = DATABASE.query_instr_file(search={'file_contents': instr_contents, 'mpi': mpi, 'acc': acc})
136
+ query = FILESYSTEM.query_instr_file(search={'file_contents': instr_contents, 'mpi': mpi, 'acc': acc})
67
137
  if len(query) > 1:
68
- raise RuntimeError(f"Multiple entries for {instr_contents} in {DATABASE.instr_file_table}")
138
+ raise RuntimeError(f"Multiple entries for {instr_contents} in {FILESYSTEM}")
69
139
  elif len(query) == 1:
70
140
  return query[0]
71
141
 
@@ -74,10 +144,19 @@ def cache_instr(instr: Instr, mpi: bool = False, acc: bool = False, mccode_versi
74
144
  if binary_path is None:
75
145
  instr_file_entry = _compile_instr(instr_file_entry, instr, mpi=mpi, acc=acc, **kwargs)
76
146
 
77
- DATABASE.insert_instr_file(instr_file_entry)
147
+ FILESYSTEM.insert_instr_file(instr_file_entry)
78
148
  return instr_file_entry
79
149
 
80
150
 
151
+ def cache_get_instr(instr: Instr, mpi: bool = False, acc: bool = False) -> InstrEntry | None:
152
+ query = FILESYSTEM.query_instr_file(search={'file_contents': str(instr), 'mpi': mpi, 'acc': acc})
153
+ if len(query) > 1:
154
+ raise RuntimeError(f"Multiple entries for {instr} in {FILESYSTEM}")
155
+ elif len(query) == 1:
156
+ return query[0]
157
+ return None
158
+
159
+
81
160
  def verify_table_parameters(table, parameters: dict):
82
161
  names = list(parameters.keys())
83
162
  if any(x not in names for x in table.parameters):
@@ -88,108 +167,31 @@ def verify_table_parameters(table, parameters: dict):
88
167
 
89
168
 
90
169
  def cache_simulation_table(entry: InstrEntry, row: SimulationEntry) -> SimulationTableEntry:
91
- query = DATABASE.retrieve_simulation_table(entry.id)
92
- if len(query) > 1:
93
- raise RuntimeError(f"Multiple entries for {entry.id} in {DATABASE.simulations_table}")
94
- elif len(query):
95
- table = verify_table_parameters(query[0], row.parameter_values)
170
+ query = FILESYSTEM.retrieve_simulation_table(entry.id)
171
+ if len(query):
172
+ for q in query:
173
+ verify_table_parameters(q, row.parameter_values)
174
+ table = query[0]
96
175
  else:
97
176
  table = SimulationTableEntry(list(row.parameter_values.keys()), f'pst_{entry.id}', entry.id)
98
- DATABASE.insert_simulation_table(table)
177
+ FILESYSTEM.insert_simulation_table(table)
99
178
  return table
100
179
 
101
180
 
102
181
  def cache_has_simulation(entry: InstrEntry, row: SimulationEntry) -> bool:
103
182
  table = cache_simulation_table(entry, row)
104
- query = DATABASE.retrieve_simulation(table.id, row)
183
+ query = FILESYSTEM.retrieve_simulation(table.id, row)
105
184
  return len(query) > 0
106
185
 
107
186
 
108
187
  def cache_get_simulation(entry: InstrEntry, row: SimulationEntry) -> list[SimulationEntry]:
109
188
  table = cache_simulation_table(entry, row)
110
- query = DATABASE.retrieve_simulation(table.id, row)
189
+ query = FILESYSTEM.retrieve_simulation(table.id, row)
111
190
  if len(query) == 0:
112
- raise RuntimeError(f"Expected 1 or more entry for {table.id} in {DATABASE.simulations_table}, got none")
191
+ raise RuntimeError(f"Expected 1 or more entry for {table.id} in {FILESYSTEM}, got none")
113
192
  return query
114
193
 
115
194
 
116
195
  def cache_simulation(entry: InstrEntry, simulation: SimulationEntry):
117
196
  table = cache_simulation_table(entry, simulation)
118
- DATABASE.insert_simulation(table, simulation)
119
-
120
-
121
- def _cleanup_instr_table(allow_different=True):
122
- """Look through the cache tables and remove any entries which are no longer valid"""
123
- from pathlib import Path
124
- from mccode_antlr import __version__
125
- entries = DATABASE.all_instr_files()
126
- for entry in entries:
127
- if not entry.binary_path or not Path(entry.binary_path).exists():
128
- DATABASE.delete_instr_file(entry.id)
129
- elif allow_different and entry.mccode_version != __version__:
130
- DATABASE.delete_instr_file(entry.id)
131
- # plus remove the binary
132
- Path(entry.binary_path).unlink()
133
- # and its directory if it is empty (it's _probably_ empty, but we should make sure)
134
- if not any(Path(entry.binary_path).parent.iterdir()):
135
- Path(entry.binary_path).parent.rmdir()
136
-
137
-
138
- def _cleanup_simulations_table(keep_empty=False, allow_different=False, cleanup_directories=False):
139
- """Look through the cached table listing simulation tables and remove any entries which are no longer valid"""
140
- from pathlib import Path
141
- for entry in DATABASE.retrieve_all_simulation_tables():
142
- if not DATABASE.table_exists(entry.table_name):
143
- DATABASE.delete_simulation_table(entry.id)
144
- continue
145
-
146
- # clean up the entries of the table
147
- _cleanup_simulations(entry.id, keep_empty=keep_empty, cleanup_directories=cleanup_directories)
148
- # and remove the table if it is empty
149
- if not (keep_empty or len(DATABASE.retrieve_all_simulations(entry.id))):
150
- DATABASE.delete_simulation_table(entry.id)
151
- continue
152
-
153
- # check that the column names all match
154
- if not (allow_different or DATABASE.table_has_columns(entry.table_name, entry.parameters)):
155
- # Remove the simulation output folders for each tabulated simulation:
156
- if cleanup_directories:
157
- for sim in DATABASE.retrieve_all_simulations(entry.id):
158
- sim_path = Path(sim.output_path)
159
- for item in sim_path.iterdir():
160
- item.unlink()
161
- sim_path.rmdir()
162
- DATABASE.delete_simulation_table(entry.id)
163
-
164
-
165
- def _cleanup_nexus_table():
166
- # TODO implement this`
167
- pass
168
-
169
-
170
- def _cleanup_simulations(primary_id: str, keep_empty=False, cleanup_directories=False):
171
- """Look through a cached simulations table's entries and remove any which are no longer valid"""
172
- from pathlib import Path
173
- entries = DATABASE.retrieve_all_simulations(primary_id)
174
- for entry in entries:
175
- # Does the table reference a missing simulation output directory?
176
- if not Path(entry.output_path).exists():
177
- DATABASE.delete_simulation(primary_id, entry.id)
178
- # or an empty one?
179
- elif not keep_empty and not any(Path(entry.output_path).iterdir()):
180
- if cleanup_directories:
181
- Path(entry.output_path).rmdir()
182
- DATABASE.delete_simulation(primary_id, entry.id)
183
- # TODO add a lifetime to check against?
184
-
185
-
186
- def cache_cleanup(keep_empty=False, allow_different=False, cleanup_directories=False):
187
- _cleanup_instr_table(allow_different=allow_different)
188
- _cleanup_nexus_table()
189
- _cleanup_simulations_table(keep_empty=keep_empty, allow_different=allow_different,
190
- cleanup_directories=cleanup_directories)
191
-
192
-
193
- # FIXME auto cleanup is removing cached table entries incorrectly at the moment
194
- # # automatically clean up the cache when the module is loaded
195
- # cache_cleanup()
197
+ FILESYSTEM.insert_simulation(table, simulation)
@@ -0,0 +1,28 @@
1
+ import confuse
2
+ from os import environ
3
+ # Any platform independent configuration settings can go in 'default.yaml'
4
+ config = confuse.LazyConfig('restage', __name__)
5
+
6
+ # use environment variables specified as 'RESTAGE_XYZ' as configuration entries 'xyz'
7
+ config.set_env()
8
+ # Expected environment variables:
9
+ # RESTAGE_FIXED="/loc/one /usr/loc/two"
10
+ # RESTAGE_CACHE="$HOME/loc/three"
11
+
12
+
13
+ def _common_defaults():
14
+ import yaml
15
+ from importlib.resources import files, as_file
16
+
17
+ common_file = files(__name__).joinpath('default.yaml')
18
+ if not common_file.is_file():
19
+ raise RuntimeError(f"Can not locate default.yaml in module files (looking for {common_file})")
20
+ with as_file(common_file) as file:
21
+ with open(file, 'r') as data:
22
+ common_configs = yaml.safe_load(data)
23
+
24
+ return common_configs or {}
25
+
26
+
27
+ # By using the 'add' method, we set these as the *lowest* priority. Any user/system files will override:
28
+ config.add(_common_defaults())
File without changes
restage/database.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
3
4
  from pathlib import Path
4
5
  from .tables import SimulationEntry, SimulationTableEntry, NexusStructureEntry, InstrEntry
5
6
 
@@ -10,9 +11,13 @@ class Database:
10
11
  nexus_structures_table: str | None = None,
11
12
  simulations_table: str | None = None,
12
13
  # secondary_simulations_table: str = None
14
+ readonly: bool = False
13
15
  ):
14
16
  from sqlite3 import connect
15
- self.db = connect(db_file)
17
+ from os import access, W_OK
18
+ self.readonly = readonly or not access(db_file.parent, W_OK)
19
+ mode = 'ro' if self.readonly else 'rwc'
20
+ self.db = connect(f'file:{db_file}?mode={mode}', uri=True)
16
21
  self.cursor = self.db.cursor()
17
22
  self.instr_file_table = instr_file_table or 'instr_file'
18
23
  self.nexus_structures_table = nexus_structures_table or 'nexus_structures'
@@ -27,8 +32,11 @@ class Database:
27
32
  # (self.secondary_simulations_table, SecondaryInstrSimulationTable)
28
33
  ):
29
34
  if not self.table_exists(table):
30
- self.cursor.execute(tt.create_sql_table(table_name=table))
31
- self.db.commit()
35
+ if not self.readonly:
36
+ self.cursor.execute(tt.create_sql_table(table_name=table))
37
+ self.db.commit()
38
+ else:
39
+ raise ValueError(f'Table {table} does not exist in readonly database {db_file}')
32
40
 
33
41
  def __del__(self):
34
42
  self.db.close()
@@ -46,6 +54,8 @@ class Database:
46
54
  raise RuntimeError(f"Table {table_name} does not exist")
47
55
 
48
56
  def insert_instr_file(self, instr_file: InstrEntry):
57
+ if self.readonly:
58
+ raise ValueError('Cannot insert into readonly database')
49
59
  command = instr_file.insert_sql_table(table_name=self.instr_file_table)
50
60
  self.announce(command)
51
61
  self.cursor.execute(command)
@@ -56,21 +66,39 @@ class Database:
56
66
  return [InstrEntry.from_query_result(x) for x in self.cursor.fetchall()]
57
67
 
58
68
  def query_instr_file(self, search: dict) -> list[InstrEntry]:
69
+ from .tables import str_hash
70
+ contents = None
71
+ if 'file_contents' in search:
72
+ # direct file content searches are slow (for large contents, at least)
73
+ # Each InstrEntry inserts a hash of its contents, which is probably unique,
74
+ # so pull-back any matches against that and then check full contents below
75
+ contents = search['file_contents']
76
+ del search['file_contents']
77
+ search['file_hash'] = str_hash(contents)
59
78
  query = f"SELECT * FROM {self.instr_file_table} WHERE "
60
79
  query += ' AND '.join([f"{k}='{v}'" if isinstance(v, str) else f"{k}={v}" for k, v in search.items()])
61
80
  self.announce(query)
62
81
  self.cursor.execute(query)
63
- return [InstrEntry.from_query_result(x) for x in self.cursor.fetchall()]
82
+ results = [InstrEntry.from_query_result(x) for x in self.cursor.fetchall()]
83
+ if contents is not None:
84
+ # this check is _probably_ redundant, but on the off chance of a hash
85
+ # collision we can guarantee the returned InstrEntry matches:
86
+ results = [x for x in results if x.file_contents == contents]
87
+ return results
64
88
 
65
89
  def all_instr_files(self) -> list[InstrEntry]:
66
90
  self.cursor.execute(f"SELECT * FROM {self.instr_file_table}")
67
91
  return [InstrEntry.from_query_result(x) for x in self.cursor.fetchall()]
68
92
 
69
93
  def delete_instr_file(self, instr_id: str):
94
+ if self.readonly:
95
+ raise ValueError('Cannot delete from readonly database')
70
96
  self.cursor.execute(f"DELETE FROM {self.instr_file_table} WHERE id='{instr_id}'")
71
97
  self.db.commit()
72
98
 
73
99
  def insert_nexus_structure(self, nexus_structure: NexusStructureEntry):
100
+ if self.readonly:
101
+ raise ValueError('Cannot insert into readonly database')
74
102
  command = nexus_structure.insert_sql_table(table_name=self.nexus_structures_table)
75
103
  self.announce(command)
76
104
  self.cursor.execute(command)
@@ -81,6 +109,8 @@ class Database:
81
109
  return [NexusStructureEntry.from_query_result(x) for x in self.cursor.fetchall()]
82
110
 
83
111
  def insert_simulation_table(self, entry: SimulationTableEntry):
112
+ if self.readonly:
113
+ raise ValueError('Cannot insert into readonly database')
84
114
  command = entry.insert_sql_table(table_name=self.simulations_table)
85
115
  self.announce(command)
86
116
  self.cursor.execute(command)
@@ -94,7 +124,7 @@ class Database:
94
124
  def retrieve_simulation_table(self, primary_id: str, update_access_time=True) -> list[SimulationTableEntry]:
95
125
  self.cursor.execute(f"SELECT * FROM {self.simulations_table} WHERE id='{primary_id}'")
96
126
  entries = [SimulationTableEntry.from_query_result(x) for x in self.cursor.fetchall()]
97
- if update_access_time:
127
+ if not self.readonly and update_access_time:
98
128
  from .tables import utc_timestamp
99
129
  self.cursor.execute(f"UPDATE {self.simulations_table} SET last_access='{utc_timestamp()}' "
100
130
  f"WHERE id='{primary_id}'")
@@ -106,6 +136,8 @@ class Database:
106
136
  return [SimulationTableEntry.from_query_result(x) for x in self.cursor.fetchall()]
107
137
 
108
138
  def delete_simulation_table(self, primary_id: str):
139
+ if self.readonly:
140
+ raise ValueError('Cannot delete from readonly database')
109
141
  matches = self.retrieve_simulation_table(primary_id)
110
142
  if len(matches) != 1:
111
143
  raise RuntimeError(f"Expected exactly one match for id={primary_id}, got {matches}")
@@ -121,6 +153,8 @@ class Database:
121
153
  return [SimulationTableEntry.from_query_result(x) for x in self.cursor.fetchall()]
122
154
 
123
155
  def _insert_simulation(self, sim: SimulationTableEntry, pars: SimulationEntry):
156
+ if self.readonly:
157
+ raise ValueError('Cannot insert into readonly database')
124
158
  if not self.table_exists(sim.table_name):
125
159
  command = sim.create_simulation_sql_table()
126
160
  self.announce(command)
@@ -136,7 +170,7 @@ class Database:
136
170
  query = f"SELECT * FROM {table} WHERE {pars.between_query()}"
137
171
  self.cursor.execute(query)
138
172
  entries = [SimulationEntry.from_query_result(columns, x) for x in self.cursor.fetchall()]
139
- if update_access_time and len(entries):
173
+ if not self.readonly and update_access_time and len(entries):
140
174
  from .tables import utc_timestamp
141
175
  self.cursor.execute(f"UPDATE {table} SET last_access='{utc_timestamp()}' WHERE {pars.between_query()}")
142
176
  self.db.commit()
@@ -161,6 +195,8 @@ class Database:
161
195
  return self._retrieve_simulation(table, columns, pars)
162
196
 
163
197
  def delete_simulation(self, primary_id: str, simulation_id: str):
198
+ if self.readonly:
199
+ raise ValueError('Cannot delete from readonly database')
164
200
  matches = self.retrieve_simulation_table(primary_id)
165
201
  if len(matches) != 1:
166
202
  raise RuntimeError(f"Expected exactly one match for id={primary_id}, got {matches}")
restage/energy.py CHANGED
@@ -10,13 +10,20 @@ def get_and_remove(d: dict, k: str, default=None):
10
10
  return default
11
11
 
12
12
 
13
- def one_generic_energy_to_chopper_parameters(calculate_choppers, time: float, order: int, parameters: dict):
13
+ def one_generic_energy_to_chopper_parameters(
14
+ calculate_choppers, chopper_names: tuple[str, ...],
15
+ time: float, order: int, parameters: dict,
16
+ chopper_parameter_present: bool
17
+ ):
18
+ from loguru import logger
14
19
  if any(x in parameters for x in ('ei', 'wavelength', 'lambda', 'energy', 'e')):
20
+ if chopper_parameter_present:
21
+ logger.warning('Specified chopper parameter(s) overridden by Ei or wavelength.')
15
22
  ei = get_and_remove(parameters, 'ei', get_and_remove(parameters, 'energy', get_and_remove(parameters, 'e')))
16
23
  if ei is None:
17
24
  wavelength = get_and_remove(parameters, 'wavelength', get_and_remove(parameters, 'lambda'))
18
25
  ei = _wavelength_angstrom_to_energy_mev(wavelength)
19
- choppers = calculate_choppers(order, time, ei)
26
+ choppers = calculate_choppers(order, time, ei, names=chopper_names)
20
27
  parameters.update(choppers)
21
28
  return parameters
22
29
 
@@ -24,25 +31,34 @@ def one_generic_energy_to_chopper_parameters(calculate_choppers, time: float, or
24
31
  def bifrost_translate_energy_to_chopper_parameters(parameters: dict):
25
32
  from itertools import product
26
33
  from .bifrost_choppers import calculate
27
- for name in product([a+b for a, b in product(('ps', 'fo', 'bw'), ('1', '2'))], ('speed', 'phase')):
34
+ choppers = tuple(f'{a}_chopper_{b}' for a, b in product(['pulse_shaping', 'frame_overlap', 'bandwidth'], [1, 2]))
35
+ # names = [a+b for a, b in product(('ps', 'fo', 'bw'), ('1', '2'))]
36
+ chopper_parameter_present = False
37
+ for name in product(choppers, ('speed', 'phase')):
28
38
  name = ''.join(name)
29
39
  if name not in parameters:
30
40
  parameters[name] = 0
41
+ else:
42
+ chopper_parameter_present = True
31
43
  order = get_and_remove(parameters, 'order', 14)
32
44
  time = get_and_remove(parameters, 'time', get_and_remove(parameters, 't', 170/180/(2 * 15 * 14)))
33
- return one_generic_energy_to_chopper_parameters(calculate, time, order, parameters)
45
+ return one_generic_energy_to_chopper_parameters(calculate, choppers, time, order, parameters, chopper_parameter_present)
34
46
 
35
47
 
36
48
  def cspec_translate_energy_to_chopper_parameters(parameters: dict):
37
49
  from itertools import product
38
50
  from .cspec_choppers import calculate
39
- for name in product(('bw1', 'bw2', 'bw3', 's', 'p', 'm1', 'm2'), ('speed', 'phase')):
51
+ choppers = ('bw1', 'bw2', 'bw3', 's', 'p', 'm1', 'm2')
52
+ chopper_parameter_present = False
53
+ for name in product(choppers, ('speed', 'phase')):
40
54
  name = ''.join(name)
41
55
  if name not in parameters:
42
56
  parameters[name] = 0
57
+ else:
58
+ chopper_parameter_present = True
43
59
  time = get_and_remove(parameters, 'time', 0.004)
44
60
  order = get_and_remove(parameters, 'order', 16)
45
- return one_generic_energy_to_chopper_parameters(calculate, time, order, parameters)
61
+ return one_generic_energy_to_chopper_parameters(calculate, choppers, time, order, parameters, chopper_parameter_present)
46
62
 
47
63
 
48
64
  def no_op_translate_energy_to_chopper_parameters(parameters: dict):
restage/instr.py CHANGED
@@ -6,10 +6,9 @@ from __future__ import annotations
6
6
  from pathlib import Path
7
7
  from typing import Union
8
8
  from mccode_antlr.instr import Instr
9
- from mccode_antlr.reader import Registry
10
9
 
11
10
 
12
- def load_instr(filepath: Union[str, Path], extra_registries: list[Registry] | None = None) -> Instr:
11
+ def load_instr(filepath: Union[str, Path]) -> Instr:
13
12
  """Loads an Instr object from a .instr file or a HDF5 file"""
14
13
  from mccode_antlr.io import load_hdf5
15
14
  from mccode_antlr.loader import load_mcstas_instr
@@ -17,25 +16,12 @@ def load_instr(filepath: Union[str, Path], extra_registries: list[Registry] | No
17
16
  if not isinstance(filepath, Path):
18
17
  filepath = Path(filepath)
19
18
  if not filepath.exists() or not filepath.is_file():
20
- raise ValueError('The provided filepath does not exist or is not a file')
21
-
22
- # FIXME this hack should be removed ASAP
23
- if extra_registries is None:
24
- from mccode_antlr.reader import GitHubRegistry
25
- mcpl_input_once_registry = GitHubRegistry(
26
- name='mcpl_input_once',
27
- url='https://github.com/g5t/mccode-mcpl-input-once',
28
- version='main',
29
- filename='pooch-registry.txt'
30
- )
31
- extra_registries = [mcpl_input_once_registry]
19
+ raise ValueError(f'The provided {filepath=} does not exist or is not a file')
32
20
 
33
21
  if filepath.suffix == '.instr':
34
- return load_mcstas_instr(filepath, registries=extra_registries)
22
+ return load_mcstas_instr(filepath)
35
23
 
36
- instr = load_hdf5(filepath)
37
- instr.registries += tuple(extra_registries)
38
- return instr
24
+ return load_hdf5(filepath)
39
25
 
40
26
 
41
27
  def collect_parameter_dict(instr: Instr, kwargs: dict, strict: bool = True) -> dict:
@@ -60,7 +46,7 @@ def collect_parameter_dict(instr: Instr, kwargs: dict, strict: bool = True) -> d
60
46
  for k, v in kwargs.items():
61
47
  if k not in parameters:
62
48
  if strict:
63
- raise ValueError(f"Parameter {k} is not a valid parameter name")
49
+ raise ValueError(f"Parameter {k} is not a valid parameter name. Valid names are: {', '.join(parameters)}")
64
50
  continue
65
51
  if not isinstance(v, Value):
66
52
  expected_type = parameters[k].data_type
restage/splitrun.py CHANGED
@@ -121,6 +121,14 @@ def splitrun_from_file(args, parameters, precision):
121
121
  splitrun_args(instr, parameters, precision, args)
122
122
 
123
123
 
124
+ def give_me_an_integer(something):
125
+ if isinstance(something, (list, tuple)):
126
+ return something[0]
127
+ if isinstance(something, int):
128
+ return something
129
+ return 0
130
+
131
+
124
132
  def splitrun_args(instr, parameters, precision, args, **kwargs):
125
133
  splitrun(instr, parameters, precision, split_at=args.split_at[0], grid=args.mesh,
126
134
  seed=args.seed[0] if args.seed is not None else None,
@@ -135,7 +143,7 @@ def splitrun_args(instr, parameters, precision, args, **kwargs):
135
143
  dry_run=args.dryrun,
136
144
  parallel=args.parallel,
137
145
  gpu=args.gpu,
138
- process_count=args.process_count,
146
+ process_count=give_me_an_integer(args.process_count),
139
147
  mcpl_output_component=args.mcpl_output_component[0] if args.mcpl_output_component is not None else None,
140
148
  mcpl_output_parameters=args.mcpl_output_parameters,
141
149
  mcpl_input_component=args.mcpl_input_component[0] if args.mcpl_input_component is not None else None,
@@ -425,6 +433,7 @@ def repeat_simulation_until(count, runner, args: dict, parameters, work_dir: Pat
425
433
  random.seed(args['seed'])
426
434
 
427
435
  files, outputs, counts = [], [], []
436
+ total_count = 0
428
437
  while goal - sum(counts) > 0:
429
438
  if len(counts) and counts[-1] <= 0:
430
439
  log.warn(f'No particles emitted in previous run, stopping')
@@ -441,6 +450,7 @@ def repeat_simulation_until(count, runner, args: dict, parameters, work_dir: Pat
441
450
  # recycle the intended-output mcpl filename to avoid breaking mcpl file-merging
442
451
  runner(_args_pars_mcpl(args, parameters, mcpl_filepath))
443
452
  counts.append(mcpl_particle_count(mcpl_filepath))
453
+ total_count += args['ncount']
444
454
  # rename the outputfile to this run's filename
445
455
  files[-1] = mcpl_rename_file(mcpl_filepath, files[-1])
446
456
 
restage/tables.py CHANGED
@@ -14,6 +14,11 @@ def utc_timestamp() -> float:
14
14
  return datetime.now(timezone.utc).timestamp()
15
15
 
16
16
 
17
+ def str_hash(string):
18
+ from hashlib import sha3_256
19
+ return sha3_256(string.encode('utf-8')).hexdigest()
20
+
21
+
17
22
  COMMON_COLUMNS = ['seed', 'ncount', 'output_path', 'gravitation', 'creation', 'last_access']
18
23
 
19
24
 
@@ -323,27 +328,30 @@ class InstrEntry:
323
328
  id: str = field(default_factory=uuid)
324
329
  creation: float = field(default_factory=utc_timestamp)
325
330
  last_access: float = field(default_factory=utc_timestamp)
331
+ file_hash: str = field(default_factory=str)
326
332
 
327
333
  @classmethod
328
334
  def from_query_result(cls, values):
329
- fid, file_contents, mpi, acc, binary_path, mccode_version, creation, last_access = values
330
- return cls(file_contents, mpi != 0, acc != 0, binary_path, mccode_version, fid, creation, last_access)
335
+ fid, file_hash, file_contents, mpi, acc, binary_path, mccode_version, creation, last_access = values
336
+ return cls(file_contents, mpi != 0, acc != 0, binary_path, mccode_version, fid, creation, last_access, file_hash)
331
337
 
332
338
  def __post_init__(self):
333
339
  if len(self.mccode_version) == 0:
334
340
  from mccode_antlr import __version__
335
341
  self.mccode_version = __version__
342
+ if len(self.file_hash) == 0:
343
+ self.file_hash = str_hash(self.file_contents)
336
344
 
337
345
  @staticmethod
338
346
  def columns():
339
- return ['id', 'file_contents', 'mpi', 'acc', 'binary_path', 'mccode_version', 'creation', 'last_access']
347
+ return ['id', 'file_hash', 'file_contents', 'mpi', 'acc', 'binary_path', 'mccode_version', 'creation', 'last_access']
340
348
 
341
349
  def values(self):
342
- str_values = [f"'{x}'" for x in (self.id, self.file_contents, self.binary_path, self.mccode_version)]
350
+ str_values = [f"'{x}'" for x in (self.id, self.file_hash, self.file_contents, self.binary_path, self.mccode_version)]
343
351
  int_values = [f'{x}' for x in (self.mpi, self.acc)]
344
352
  flt_values = [f'{self.creation}', f'{self.last_access}']
345
- # matches id, file_contents, mpi, acc, binary_path, mccode_version, creation, last_access order
346
- return str_values[:2] + int_values + str_values[2:] + flt_values
353
+ # matches id, file_hash, file_contents, mpi, acc, binary_path, mccode_version, creation, last_access order
354
+ return str_values[:3] + int_values + str_values[3:] + flt_values
347
355
 
348
356
  @classmethod
349
357
  def create_sql_table(cls, table_name: str = 'instr_files'):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: restage
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Author-email: Gregory Tucker <gregory.tucker@ess.eu>
5
5
  License: BSD-3-Clause
6
6
  Classifier: License :: OSI Approved :: BSD License
@@ -15,6 +15,7 @@ Requires-Python: >=3.9
15
15
  Description-Content-Type: text/markdown
16
16
  Requires-Dist: zenlog>=1.1
17
17
  Requires-Dist: platformdirs>=3.11
18
+ Requires-Dist: confuse
18
19
  Requires-Dist: psutil>=5.9.6
19
20
  Requires-Dist: mccode-antlr[hdf5]>=0.10.2
20
21
  Provides-Extra: test
@@ -107,3 +108,35 @@ splitrun my_instrument.instr -n 1000000 -d /data/output sample_angle=1:90 sample
107
108
 
108
109
 
109
110
 
111
+ ## Cached data
112
+ ### Default writable cache
113
+ A `sqlite3` database is used to keep track of instrument stages, their compiled
114
+ binaries, and output file(s) produced by, e.g., `splitrun` simulations.
115
+ The default database location is determined by `platformdirs` under a folder
116
+ set by `user_cache_path('restage', 'ess')` and the default locations for
117
+ `restage`-compiled instrument binaries and simulation output is determined from
118
+ `user_data_path('restage', 'ess')`.
119
+
120
+ ### Override the database and output locations
121
+ These default locations can be overridden by setting the `RESTAGE_CACHE` environment
122
+ variable to a writeable folder, e.g., `export RESTAGE_CACHE="/tmp/ephemeral"`.
123
+
124
+ ### Read-only cache database(s)
125
+ Any number of fixed databases can be provided to allow for, e.g., system-wide reuse
126
+ of common staged simulations.
127
+ The location(s) of these database file(s) can be specified as a single
128
+ environment variable containing space-separated file locations, e.g.,
129
+ `export RESTAGE_FIXED="/usr/local/restage /afs/ess.eu/restage"`.
130
+ If the locations provided include a `database.db` file, they will be used to search
131
+ for instrument binaries and simulation output directories.
132
+
133
+ ### Use a configuration file to set parameters
134
+ Cache configuration information can be provided via a configuration file at,
135
+ e.g., `~/.config/restage/config.yaml`, like
136
+ ```yaml
137
+ cache: /tmp/ephemeral
138
+ fixed: /usr/local/restage /afs/ess.eu/restage
139
+ ```
140
+ The exact location searched to find the configuration file is platform dependent,
141
+ please consult the [`confuse` documentation](https://confuse.readthedocs.io/en/latest/usage.html)
142
+ for the paths used on your system.
@@ -0,0 +1,21 @@
1
+ restage/__init__.py,sha256=HlqvPpL7DKet00NAFyqJBNg9UFO7o05Gt2tFyKBQcsY,744
2
+ restage/bifrost_choppers.py,sha256=xQu21g2NcTLPpZ0ZWOuvN20zh07EWoO4QVoTnoORwZI,6443
3
+ restage/cache.py,sha256=PD07z9pxGwBcxoizgy11zoQBjViF1ZSzKS0686RZ3FI,8115
4
+ restage/cspec_choppers.py,sha256=ZWxyCcwYn4z9ZNqj_r6RC9ImbhVjYc1fmv-Ijm8A2Yk,206
5
+ restage/database.py,sha256=anyOby31fUN7rGAVNsnWDUhAISV0vQ7en8aQwVS5ZwA,11051
6
+ restage/emulate.py,sha256=VrhfZJIbECdbDS-MHklqRuAIy9cRkjZkwPBTKQSQoe0,6164
7
+ restage/energy.py,sha256=w78GUIWcHxANvBl2DTu73FQFawCXfzlK6L32TBQNt4g,3371
8
+ restage/instr.py,sha256=A0ShtXkswt_f7o-cIDtsVbG03_tGELe1aS3WzLxzkJM,2494
9
+ restage/mcpl.py,sha256=BZYxBytughjc8slR6gUaBy3D7gzo7Yl3ACXrXhWgagI,3403
10
+ restage/range.py,sha256=TjOf4DSKfgoAIcrWQvv6MrtksQpnGJHdsEjVI5K-UfI,8116
11
+ restage/run.py,sha256=nk8d7cIyIqSt-5pyGm68Zak5H1a-fbo_z2_36eN-08E,1481
12
+ restage/scan.py,sha256=Yx8OQSBG6I2_64sW0LIDb0glVKwWoxUQQznASXgDZFQ,1432
13
+ restage/splitrun.py,sha256=W_pTeiMjc9hhu-zaE6fdetVLG6MGEpnaTOdgmgVkS1g,26061
14
+ restage/tables.py,sha256=mL1SrCbgwfWzG-ezd_R3CxOSIZLNZRoC2r7ht59jGMA,16371
15
+ restage/config/__init__.py,sha256=zFRT9QXgpUJpBncELCQ6by1-kjYp8Li1yJDfqxkHxAA,965
16
+ restage/config/default.yaml,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ restage-0.5.0.dist-info/METADATA,sha256=5MHru2wvnMp3OATaVEEHYBWI8fP16npDpXnXwXjlteo,6769
18
+ restage-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ restage-0.5.0.dist-info/entry_points.txt,sha256=gghocSxC2gHHxUCalAibCN1mtkh3trNmAfH5Qwx0KYg,149
20
+ restage-0.5.0.dist-info/top_level.txt,sha256=iM_pb-taTZ0S2WMoDnt_qDMZoNMjmM19z3tTCuVm1IE,8
21
+ restage-0.5.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,19 +0,0 @@
1
- restage/__init__.py,sha256=v0dAUYVkvzjd3j6gjFdbunV_P8U9XxsGgLFGwbxBy6E,787
2
- restage/bifrost_choppers.py,sha256=aeAw4JgkGPSewU-mqGlGqo1AmN2dmf6FP9u4VDx-eLo,6338
3
- restage/cache.py,sha256=Z60tn372Z_KlXC4e0UqMPPuM0NR8e6eD3aNbdR_q4rM,8374
4
- restage/cspec_choppers.py,sha256=ZWxyCcwYn4z9ZNqj_r6RC9ImbhVjYc1fmv-Ijm8A2Yk,206
5
- restage/database.py,sha256=pblHu8hCV5u3uyE8aUrnBSsfjDLYrxy9JRtnRuOvTXQ,9152
6
- restage/emulate.py,sha256=VrhfZJIbECdbDS-MHklqRuAIy9cRkjZkwPBTKQSQoe0,6164
7
- restage/energy.py,sha256=eLU6AmD_EfNsE1jOdorj_gqEL9tthRWjS-Oz9IlZ2-s,2665
8
- restage/instr.py,sha256=ZC-IDNSOurmxhHp78JQmMhBJ-L4h-eFBQN9l6_62MEw,3052
9
- restage/mcpl.py,sha256=BZYxBytughjc8slR6gUaBy3D7gzo7Yl3ACXrXhWgagI,3403
10
- restage/range.py,sha256=TjOf4DSKfgoAIcrWQvv6MrtksQpnGJHdsEjVI5K-UfI,8116
11
- restage/run.py,sha256=nk8d7cIyIqSt-5pyGm68Zak5H1a-fbo_z2_36eN-08E,1481
12
- restage/scan.py,sha256=Yx8OQSBG6I2_64sW0LIDb0glVKwWoxUQQznASXgDZFQ,1432
13
- restage/splitrun.py,sha256=dAwDFz_KShB_OWlmARxpHyCfGhBYNuEyujveuAuS74g,25800
14
- restage/tables.py,sha256=oxHc9TdTVPBC6ZlQUQdoNzlo4CyvPp-pfijK81eAK28,16053
15
- restage-0.4.0.dist-info/METADATA,sha256=ajMZ-yjLL6dYo5rvvDyBrkAfTmEPrFH6B74jeuw9isc,5112
16
- restage-0.4.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
17
- restage-0.4.0.dist-info/entry_points.txt,sha256=gghocSxC2gHHxUCalAibCN1mtkh3trNmAfH5Qwx0KYg,149
18
- restage-0.4.0.dist-info/top_level.txt,sha256=iM_pb-taTZ0S2WMoDnt_qDMZoNMjmM19z3tTCuVm1IE,8
19
- restage-0.4.0.dist-info/RECORD,,