arvados-cli 0.1.20150414014706 → 0.1.20150415200702

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +37 -16
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e2c86950df407714ce2140ccfb36810b6d9916b5
4
- data.tar.gz: 973e4f62af66a78a53b55fd1fad5b4e83eb46536
3
+ metadata.gz: c5a29066057cf677fedb4ac70ed67cc35d6a556c
4
+ data.tar.gz: 90a07dc724dbdfd869e9ce1ba621845f5b2a90f8
5
5
  SHA512:
6
- metadata.gz: 96fc3f35e74215321c8eb90dae49b53dd2bdec3927b5212402792cbd4b77e3aacfde16861e6ac973068c8b3aa4b59395a279466706200431a71cc7d32e390f47
7
- data.tar.gz: 15d1f220b4626c654c084be1cf7828aae91a39ff6fd0e44e42548b4e4ba97acfe67af7aa2e5df2fa324561046ed31aec2c6531f2746a3766b718601399239827
6
+ metadata.gz: 66e9d5b6de7e26a854741e6c3d1303237420de566ab3b22ebc10c42cf1321be030dcbaf653e5c306c49ff98c9ab536ccb25a6b9ea1e2b0317ea35a75e0cb4ccd
7
+ data.tar.gz: a95c578369dcfb2176cdf24e382e8131979a52cf9b6eddfeb856294f9b16bc95fccafafed3ead0f2a22b55b91607d7bd0c8f06510338d5320b30062a42980f22
data/bin/crunch-job CHANGED
@@ -643,12 +643,44 @@ my $thisround_failed_multiple = 0;
643
643
  @jobstep_todo = sort { $jobstep[$a]->{level} <=> $jobstep[$b]->{level}
644
644
  or $a <=> $b } @jobstep_todo;
645
645
  my $level = $jobstep[$jobstep_todo[0]]->{level};
646
- Log (undef, "start level $level");
647
646
 
647
+ my $initial_tasks_this_level = 0;
648
+ foreach my $id (@jobstep_todo) {
649
+ $initial_tasks_this_level++ if ($jobstep[$id]->{level} == $level);
650
+ }
651
+
652
+ # If the number of tasks scheduled at this level #T is smaller than the number
653
+ # of slots available #S, only use the first #T slots, or the first slot on
654
+ # each node, whichever number is greater.
655
+ #
656
+ # When we dispatch tasks later, we'll allocate whole-node resources like RAM
657
+ # based on these numbers. Using fewer slots makes more resources available
658
+ # to each individual task, which should normally be a better strategy when
659
+ # there are fewer of them running with less parallelism.
660
+ #
661
+ # Note that this calculation is not redone if the initial tasks at
662
+ # this level queue more tasks at the same level. This may harm
663
+ # overall task throughput for that level.
664
+ my @freeslot;
665
+ if ($initial_tasks_this_level < @node) {
666
+ @freeslot = (0..$#node);
667
+ } elsif ($initial_tasks_this_level < @slot) {
668
+ @freeslot = (0..$initial_tasks_this_level - 1);
669
+ } else {
670
+ @freeslot = (0..$#slot);
671
+ }
672
+ my $round_num_freeslots = scalar(@freeslot);
648
673
 
674
+ my %round_max_slots = ();
675
+ for (my $ii = $#freeslot; $ii >= 0; $ii--) {
676
+ my $this_slot = $slot[$freeslot[$ii]];
677
+ my $node_name = $this_slot->{node}->{name};
678
+ $round_max_slots{$node_name} ||= $this_slot->{cpu};
679
+ last if (scalar(keys(%round_max_slots)) >= @node);
680
+ }
649
681
 
682
+ Log(undef, "start level $level with $round_num_freeslots slots");
650
683
  my %proc;
651
- my @freeslot = (0..$#slot);
652
684
  my @holdslot;
653
685
  my %reader;
654
686
  my $progress_is_dirty = 1;
@@ -657,12 +689,6 @@ my $progress_stats_updated = 0;
657
689
  update_progress_stats();
658
690
 
659
691
 
660
- my $tasks_this_level = 0;
661
- foreach my $id (@jobstep_todo) {
662
- $tasks_this_level++ if ($jobstep[$id]->{level} == $level);
663
- }
664
-
665
-
666
692
  THISROUND:
667
693
  for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
668
694
  {
@@ -716,16 +742,11 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
716
742
  $ENV{"HOME"} = $ENV{"TASK_WORK"};
717
743
  $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep";
718
744
  $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
719
- $ENV{"CRUNCH_NODE_SLOTS"} = $slot[$childslot]->{node}->{ncpus};
745
+ $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
720
746
  $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
721
747
 
722
748
  $ENV{"GZIP"} = "-n";
723
749
 
724
- my $max_node_concurrent_tasks = $ENV{CRUNCH_NODE_SLOTS};
725
- if ($tasks_this_level < $max_node_concurrent_tasks) {
726
- $max_node_concurrent_tasks = $tasks_this_level;
727
- }
728
-
729
750
  my @srunargs = (
730
751
  "srun",
731
752
  "--nodelist=".$childnode->{name},
@@ -740,7 +761,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
740
761
  # $command. No tool is expected to read these values directly.
741
762
  .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
742
763
  .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
743
- ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($max_node_concurrent_tasks * 100) )) "
764
+ ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
744
765
  ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP ";
745
766
  $command .= "&& exec arv-mount --by-id --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
746
767
  if ($docker_hash)
@@ -860,7 +881,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
860
881
 
861
882
  while (!@freeslot
862
883
  ||
863
- (@slot > @freeslot && $todo_ptr+1 > $#jobstep_todo))
884
+ ($round_num_freeslots > @freeslot && $todo_ptr+1 > $#jobstep_todo))
864
885
  {
865
886
  last THISROUND if $main::please_freeze || defined($main::success);
866
887
  if ($main::please_info)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20150414014706
4
+ version: 0.1.20150415200702
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-14 00:00:00.000000000 Z
11
+ date: 2015-04-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -178,7 +178,7 @@ dependencies:
178
178
  - - "<"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.0.0
181
- description: Arvados command line tools, git commit 6261cf9003ec37622d38a3c40d94a75eff397922
181
+ description: Arvados command line tools, git commit d2e7a97c8d24ef8ae61d860e9c972626f80cf2b4
182
182
  email: gem-dev@curoverse.com
183
183
  executables:
184
184
  - arv