lsst-ctrl-execute 30.0.5__tar.gz → 30.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/.github/workflows/build.yaml +2 -2
  2. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/PKG-INFO +1 -1
  3. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/allocationConfig.py +5 -0
  4. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/allocator.py +39 -4
  5. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/allocatorParser.py +10 -1
  6. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/condorConfig.py +2 -0
  7. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/slurmPlugin.py +13 -0
  8. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/test_allocatorParser.py +6 -0
  9. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/test_condorConfig.py +2 -0
  10. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/test_slurmPlugin.py +4 -0
  11. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_condor_slurm.py +2 -0
  12. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_execconfig.py +1 -0
  13. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/.github/dependabot.yml +0 -0
  14. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/.github/workflows/codeql-analysis.yml +0 -0
  15. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/.github/workflows/formatting.yaml +0 -0
  16. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/.github/workflows/rebase_checker.yaml +0 -0
  17. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/.gitignore +0 -0
  18. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/.pre-commit-config.yaml +0 -0
  19. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/COPYRIGHT +0 -0
  20. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/LICENSE +0 -0
  21. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/SConstruct +0 -0
  22. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/bsd_license.txt +0 -0
  23. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/etc/configs/gordon_config.py +0 -0
  24. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/etc/configs/lsst_config.py +0 -0
  25. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/etc/scripts/generateDag.py +0 -0
  26. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/gpl-v3.0.txt +0 -0
  27. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/pyproject.toml +0 -0
  28. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/__init__.py +0 -0
  29. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/__init__.py +0 -0
  30. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/__init__.py +0 -0
  31. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/condorInfoConfig.py +0 -0
  32. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/envString.py +0 -0
  33. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/findPackageFile.py +0 -0
  34. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/libexec/allocateNodes.py +0 -0
  35. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/libexec/dagIdInfo.py +0 -0
  36. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/libexec/qdelete.py +0 -0
  37. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/libexec/qstatus.py +0 -0
  38. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/namedClassFactory.py +0 -0
  39. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/pbsPlugin.py +0 -0
  40. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/qCommand.py +0 -0
  41. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/seqFile.py +0 -0
  42. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/python/lsst/ctrl/execute/templateWriter.py +0 -0
  43. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/setup.cfg +0 -0
  44. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/README +0 -0
  45. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/SConscript +0 -0
  46. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/test_allocationConfig.py +0 -0
  47. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/test_condorInfoConfig.py +0 -0
  48. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/test_dagIdInfo.py +0 -0
  49. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/test_findPackageFile.py +0 -0
  50. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/test_seqFile.py +0 -0
  51. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/test_templateWriter.py +0 -0
  52. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/allocator-info1.py +0 -0
  53. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_allocation.py +0 -0
  54. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_allocation_slurm.py +0 -0
  55. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_asserts.py +0 -0
  56. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_condor.py +0 -0
  57. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_condorInfo.py +0 -0
  58. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_condor_getenv.py +0 -0
  59. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_condor_setups.py +0 -0
  60. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/config_pegasus.py +0 -0
  61. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/generic.pbs.template +0 -0
  62. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/generic.pbs.txt +0 -0
  63. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/generic.slurm.template +0 -0
  64. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/generic.slurm.txt +0 -0
  65. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/glidein_condor_config.template +0 -0
  66. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/glidein_condor_config.txt +0 -0
  67. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/templateWriter.template +0 -0
  68. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/templateWriter.txt +0 -0
  69. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/tests/testfiles/test.diamond.dag +0 -0
  70. {lsst_ctrl_execute-30.0.5 → lsst_ctrl_execute-30.0.6}/ups/ctrl_execute.table +0 -0
@@ -40,14 +40,14 @@ jobs:
40
40
  --junitxml=junit.xml -o junit_family=legacy
41
41
 
42
42
  - name: Upload coverage to codecov
43
- uses: codecov/codecov-action@v5
43
+ uses: codecov/codecov-action@v6
44
44
  with:
45
45
  files: ./coverage.xml
46
46
  token: ${{ secrets.CODECOV_TOKEN }}
47
47
 
48
48
  - name: Upload test results to Codecov
49
49
  if: ${{ !cancelled() }}
50
- uses: codecov/codecov-action@v5
50
+ uses: codecov/codecov-action@v6
51
51
  with:
52
52
  report_type: test_results
53
53
  token: ${{ secrets.CODECOV_TOKEN }}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lsst-ctrl-execute
3
- Version: 30.0.5
3
+ Version: 30.0.6
4
4
  Summary: Utilities for executing and managing workloads.
5
5
  Project-URL: Homepage, https://github.com/lsst/ctrl_execute
6
6
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
@@ -43,6 +43,11 @@ class AllocatedPlatformConfig(pexConfig.Config):
43
43
  dtype=str,
44
44
  default=None,
45
45
  )
46
+ collector = pexConfig.Field(
47
+ doc="host where HTCondor collector service is running",
48
+ dtype=str,
49
+ default=None,
50
+ )
46
51
  loginHostName = pexConfig.Field(doc="the host to login and copy files to", dtype=str, default=None)
47
52
  utilityPath = pexConfig.Field(
48
53
  doc="the directory containing the scheduler commands", dtype=str, default=None
@@ -109,15 +109,29 @@ class Allocator:
109
109
  self.defaults["USER_SCRATCH"] = user_scratch
110
110
  self.commandLineDefaults = {}
111
111
  self.commandLineDefaults["NODE_COUNT"] = self.opts.nodeCount
112
- self.commandLineDefaults["COLLECTOR"] = self.opts.collector
112
+ if self.configuration.platform.collector:
113
+ self.commandLineDefaults["COLLECTOR"] = self.configuration.platform.collector
114
+ if self.opts.collector:
115
+ self.commandLineDefaults["COLLECTOR"] = self.opts.collector
113
116
  self.commandLineDefaults["CPORT"] = self.opts.collectorport
117
+ if self.configuration.platform.peakcpus:
118
+ self.commandLineDefaults["PEAKCPUS"] = self.configuration.platform.peakcpus
119
+ else:
120
+ self.commandLineDefaults["PEAKCPUS"] = 256
121
+ if self.configuration.platform.peakmemory:
122
+ self.commandLineDefaults["PEAKMEMORY"] = self.configuration.platform.peakmemory
123
+ else:
124
+ self.commandLineDefaults["PEAKMEMORY"] = 1000000
114
125
  if self.opts.exclusive:
115
126
  self.commandLineDefaults["CPUS"] = self.configuration.platform.peakcpus
116
127
  else:
117
- self.commandLineDefaults["CPUS"] = self.opts.cpus
128
+ if self.opts.cpus < self.configuration.platform.peakcpus:
129
+ self.commandLineDefaults["CPUS"] = self.opts.cpus
130
+ else:
131
+ self.commandLineDefaults["CPUS"] = self.configuration.platform.peakcpus
118
132
  self.commandLineDefaults["WALL_CLOCK"] = self.opts.maximumWallClock
119
133
  self.commandLineDefaults["ACCOUNT"] = self.opts.account
120
- self.commandLineDefaults["MEMPERCORE"] = 4096
134
+ self.commandLineDefaults["MEMPERCORE"] = self.opts.mempercore
121
135
  self.commandLineDefaults["ALLOWEDAUTO"] = 500
122
136
  self.commandLineDefaults["AUTOCPUS"] = 16
123
137
  self.commandLineDefaults["MINAUTOCPUS"] = 15
@@ -223,7 +237,7 @@ class Allocator:
223
237
  if not os.path.exists(self.configDir):
224
238
  os.makedirs(self.configDir)
225
239
  outfile = self.createFile(inputFile, self.submitFileName)
226
- _LOG.debug("Wrote new Slurm submit file to %s", outfile)
240
+ _LOG.debug("Wrote new submit file to %s", outfile)
227
241
  return outfile
228
242
 
229
243
  def createCondorConfigFile(self, input):
@@ -350,6 +364,21 @@ class Allocator:
350
364
  """
351
365
  return self.getParameter("CPUS")
352
366
 
367
+ def getPeakcpus(self):
368
+ """Accessor for PEAKCPUS
369
+ @return the value of PEAKCPUS
370
+ """
371
+ return self.getParameter("PEAKCPUS")
372
+
373
+ def getPeakmemory(self):
374
+ """Accessor for PEAKMEMORY
375
+ @return the value of PEAKMEMORY
376
+ """
377
+ peakmemory = self.getParameter("PEAKMEMORY")
378
+ if self.opts.queue == "torino":
379
+ peakmemory = int(3 * peakmemory / 2)
380
+ return peakmemory
381
+
353
382
  def getAutoCPUs(self):
354
383
  """Size of standard glideins for allocateNodes auto
355
384
  @return the value of autoCPUs
@@ -366,6 +395,12 @@ class Allocator:
366
395
  """
367
396
  return self.getParameter("MINAUTOCPUS")
368
397
 
398
+ def getCollector(self):
399
+ """Accessor for COLLECTOR
400
+ @return the value of COLLECTOR
401
+ """
402
+ return self.getParameter("COLLECTOR")
403
+
369
404
  def getWallClock(self):
370
405
  """Accessor for WALL_CLOCK
371
406
  @return the value of WALL_CLOCK
@@ -123,6 +123,15 @@ class AllocatorParser:
123
123
  type=int,
124
124
  required=False,
125
125
  )
126
+ parser.add_argument(
127
+ "--mempercore",
128
+ action="store",
129
+ default=4096,
130
+ dest="mempercore",
131
+ help="Memory per core in MB to be scheduled by default",
132
+ type=int,
133
+ required=False,
134
+ )
126
135
  parser.add_argument(
127
136
  "-s",
128
137
  "--qos",
@@ -147,7 +156,7 @@ class AllocatorParser:
147
156
  "--queue",
148
157
  action="store",
149
158
  dest="queue",
150
- default="roma,milano",
159
+ default="milano",
151
160
  help="queue / partition name",
152
161
  )
153
162
  parser.add_argument(
@@ -44,6 +44,8 @@ class PlatformConfig(pexConfig.Config):
44
44
  nodeSetRequired = pexConfig.Field(doc="is the nodeset required", dtype=bool, default=False)
45
45
  scheduler = pexConfig.Field(doc="scheduler type", dtype=str, default=None)
46
46
  peakcpus = pexConfig.Field(doc="peakcpus", dtype=int, default=None)
47
+ peakmemory = pexConfig.Field(doc="peakmemory", dtype=int, default=None)
48
+ collector = pexConfig.Field(doc="collector", dtype=str, default=None)
47
49
  manager = pexConfig.Field(doc="workflow manager", dtype=str, default=None)
48
50
  setup_using = pexConfig.Field(doc="environment setup type", dtype=str, default=None)
49
51
  manager_software_home = pexConfig.Field(
@@ -150,6 +150,10 @@ class SlurmPlugin(Allocator):
150
150
  cpus = self.getCPUs()
151
151
  memoryPerCore = self.getMemoryPerCore()
152
152
  totalMemory = cpus * memoryPerCore
153
+ peakMemory = self.getPeakmemory()
154
+ if totalMemory > peakMemory:
155
+ totalMemory = peakMemory
156
+ _LOG.debug("Direct: Setting job memory to peak memory on platform.")
153
157
 
154
158
  # run the sbatch command
155
159
  template = Template(self.getLocalScratchDirectory())
@@ -324,6 +328,11 @@ class SlurmPlugin(Allocator):
324
328
  autoCPUs = cpus
325
329
  memoryPerCore = self.getMemoryPerCore()
326
330
  memoryLimit = autoCPUs * memoryPerCore
331
+ peakMemory = self.getPeakmemory()
332
+ if memoryLimit > peakMemory:
333
+ memoryLimit = peakMemory
334
+ _LOG.debug("Auto: Setting job memory to peak memory on platform.")
335
+
327
336
  auser = self.getUserName()
328
337
  anodeset = self.getNodeset()
329
338
 
@@ -400,6 +409,10 @@ class SlurmPlugin(Allocator):
400
409
  _LOG.debug("\n%d.%d", ajob["ClusterId"], ajob["ProcId"])
401
410
  _LOG.debug("%s", ajob)
402
411
  thisMemory = ajob["RequestMemoryEval"]
412
+ peakMemory = self.getPeakmemory()
413
+ if thisMemory > peakMemory:
414
+ thisMemory = peakMemory
415
+ _LOG.debug("Auto large: Setting job memory to peak memory on platform.")
403
416
  useCores = ajob["RequestCpus"]
404
417
  clusterid = ajob["ClusterId"]
405
418
  procid = ajob["ProcId"]
@@ -46,6 +46,10 @@ class TestAllocatorParser(lsst.utils.tests.TestCase):
46
46
  "sdfmilan003",
47
47
  "--nodelist",
48
48
  "sdfmilan004",
49
+ "--mempercore",
50
+ "6144",
51
+ "--collector",
52
+ "sdfiana039",
49
53
  "-q",
50
54
  "normal",
51
55
  "-O",
@@ -63,6 +67,8 @@ class TestAllocatorParser(lsst.utils.tests.TestCase):
63
67
  self.assertEqual(args.maximumWallClock, "00:30:00")
64
68
  self.assertEqual(args.exclude, "sdfmilan003")
65
69
  self.assertEqual(args.nodelist, "sdfmilan004")
70
+ self.assertEqual(args.mempercore, 6144)
71
+ self.assertEqual(args.collector, "sdfiana039")
66
72
  self.assertEqual(args.queue, "normal")
67
73
  self.assertEqual(args.outputLog, "outlog")
68
74
  self.assertEqual(args.errorLog, "errlog")
@@ -95,6 +95,8 @@ class TestCondorConfig(lsst.utils.tests.TestCase):
95
95
  self.assertEqual(self.config.platform.scheduler, "slurm")
96
96
  self.assertEqual(self.config.platform.setup_using, "getenv")
97
97
  self.assertEqual(self.config.platform.manager, "dagman")
98
+ self.assertEqual(self.config.platform.peakcpus, 120)
99
+ self.assertEqual(self.config.platform.peakmemory, 491520)
98
100
 
99
101
  def test6(self):
100
102
  path = os.path.join("tests", "testfiles", "config_pegasus.py")
@@ -88,12 +88,16 @@ class SlurmPluginTest(lsst.utils.tests.TestCase):
88
88
  scheduler: Allocator = schedulerClass(platform, args, configuration, condor_info_file)
89
89
  self.assertTrue(scheduler)
90
90
 
91
+ peakcpus = scheduler.getPeakcpus()
92
+ peakmemory = scheduler.getPeakmemory()
91
93
  autocpus = scheduler.getAutoCPUs()
92
94
  minautocpus = scheduler.getMinAutoCPUs()
93
95
  cpus = scheduler.getCPUs()
94
96
  nodes = scheduler.getNodes()
95
97
  nodeset = scheduler.getNodeset()
96
98
  wallclock = scheduler.getWallClock()
99
+ self.assertEqual(peakcpus, 120)
100
+ self.assertEqual(peakmemory, 737280)
97
101
  self.assertEqual(autocpus, 16)
98
102
  self.assertEqual(minautocpus, 15)
99
103
  self.assertEqual(cpus, 12)
@@ -1,4 +1,6 @@
1
1
  # flake8: noqa
2
+ config.platform.peakcpus = 120
3
+ config.platform.peakmemory = 491520
2
4
  config.platform.defaultRoot = "/usr"
3
5
  config.platform.localScratch = "./tests/condor_scratch_slurm"
4
6
  config.platform.dataDirectory = "/tmp/data_slurm"
@@ -4,3 +4,4 @@ config.platform.localScratch = "/condor_scratch"
4
4
  config.platform.fileSystemDomain = "slac.stanford.edu"
5
5
  config.platform.scheduler = "slurm"
6
6
  config.platform.peakcpus = 120
7
+ config.platform.peakmemory = 737280