build-graph 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,7 +14,7 @@
14
14
  // A parallel merge sort algorithm template implemented using C++11 threads.
15
15
  namespace ParallelMergeSort
16
16
  {
17
- /*
17
+ /*
18
18
  # Parallel Merge Algorithm
19
19
 
20
20
  This parallel merge algorithm uses two threads and requires no synchrnoisation (e.g. lock free).
@@ -38,12 +38,12 @@ namespace ParallelMergeSort
38
38
  ArrayT & source, & destination;
39
39
  const ComparatorT & comparator;
40
40
  std::size_t lower_bound, middle_bound;
41
-
41
+
42
42
  void operator()() {
43
43
  std::size_t left = lower_bound;
44
44
  std::size_t right = middle_bound;
45
45
  std::size_t offset = lower_bound;
46
-
46
+
47
47
  while (offset < middle_bound) {
48
48
  if (comparator(source[left], source[right])) {
49
49
  destination[offset++] = source[left++];
@@ -53,19 +53,19 @@ namespace ParallelMergeSort
53
53
  }
54
54
  }
55
55
  };
56
-
56
+
57
57
  // This implementation assumes that if there are |j| items on the right side, there are at least |j| - 1 items on the left side.
58
58
  template <typename ArrayT, typename ComparatorT>
59
59
  struct ParallelRightMerge {
60
60
  ArrayT & source, & destination;
61
61
  const ComparatorT & comparator;
62
62
  std::size_t lower_bound, middle_bound, upper_bound;
63
-
63
+
64
64
  void operator()() {
65
65
  std::size_t left = middle_bound-1;
66
66
  std::size_t right = upper_bound-1;
67
67
  std::size_t offset = upper_bound-1;
68
-
68
+
69
69
  while (offset >= middle_bound) {
70
70
  if (comparator(source[left], source[right])) {
71
71
  destination[offset--] = source[right--];
@@ -76,14 +76,14 @@ namespace ParallelMergeSort
76
76
  if (offset >= middle_bound) {
77
77
  destination[offset] = source[right];
78
78
  }
79
-
79
+
80
80
  break;
81
81
  }
82
82
  }
83
83
  }
84
84
  }
85
85
  };
86
-
86
+
87
87
  // Merge two sorted sub-sequences sequentially (from left to right).
88
88
  // Is it possible to merge without copying from source to destination, and what are the performance implications?
89
89
  template <typename ArrayT, typename ComparatorT>
@@ -91,12 +91,12 @@ namespace ParallelMergeSort
91
91
  std::size_t left = lower_bound;
92
92
  std::size_t right = middle_bound;
93
93
  std::size_t offset = lower_bound;
94
-
94
+
95
95
  // We merge both sub-sequences, defined as [lower_bound, middle_bound] and [middle_bound, upper_bound].
96
96
  while (true) {
97
97
  if (comparator(source[left], source[right])) {
98
98
  destination[offset++] = source[left++];
99
-
99
+
100
100
  // If we have adjusted left, we may have exhausted left side:
101
101
  if (left == middle_bound) {
102
102
  // We have no more elements in lower half.
@@ -105,7 +105,7 @@ namespace ParallelMergeSort
105
105
  }
106
106
  } else {
107
107
  destination[offset++] = source[right++];
108
-
108
+
109
109
  // As above, we may have exhausted right side:
110
110
  if (right == upper_bound) {
111
111
  // We have no more elements in upper half.
@@ -113,32 +113,32 @@ namespace ParallelMergeSort
113
113
  break;
114
114
  }
115
115
  }
116
- }
116
+ }
117
117
  }
118
-
118
+
119
119
  template <typename ArrayT, typename ComparatorT>
120
120
  void partition(ArrayT & array, ArrayT & temporary, const ComparatorT & comparator, std::size_t lower_bound, std::size_t upper_bound, std::size_t threaded);
121
-
121
+
122
122
  // This functor is used for parallelizing the top level partition function.
123
123
  template <typename ArrayT, typename ComparatorT>
124
124
  struct ParallelPartition {
125
125
  ArrayT & array, & temporary;
126
126
  const ComparatorT & comparator;
127
127
  std::size_t lower_bound, upper_bound, threaded;
128
-
128
+
129
129
  void operator()() {
130
130
  partition(array, temporary, comparator, lower_bound, upper_bound, threaded);
131
131
  }
132
132
  };
133
-
133
+
134
134
  /** Recursive Partition Algorithm.
135
-
135
+
136
136
  This algorithm uses O(2n) memory to reduce the amount of copies that occurs. It does this by using a parity such that at each point in the partition tree we provide a source and destination. Given the functions P (partition) and M (merge), we have the following theorem:
137
-
137
+
138
138
  P(A=source, B=destination) sorts source into destination. A=[...] means that we are considering only a subset of A. Subscript is not given, but should be intuitive given the definition of merge sort. (x) on the left gives the order of each step as performed sequentially.
139
-
139
+
140
140
  == [ PARTITION ] == == [ MERGE ] ==
141
-
141
+
142
142
  (1) P(A=[1,3,4,2], B=[1,3,2,4]) (14) M(A=[1,3,2,4], B): B = [1,2,3,4]
143
143
  |
144
144
  (2) |---P(B=[1,3], A=[1,3]) (7) M(B=[1,3], A): A=[1,3]
@@ -150,16 +150,16 @@ namespace ParallelMergeSort
150
150
  |
151
151
  (9) |---P(A=[4],B=[4]) (10) M(A=[4], B): B=[4]
152
152
  (11) \---P(A=[2],B=[2]) (12) M(A=[2], B): B=[2]
153
-
153
+
154
154
  During merge, we fold back up, and alternate between A and B for the current storage. This avoids the need to dynamically allocate memory during sort and avoids unnecessary copies.
155
-
155
+
156
156
  */
157
-
157
+
158
158
  // Sequential partition algorithm. Provide an array, and an upper and lower bound to sort.
159
159
  template <typename ArrayT, typename ComparatorT>
160
160
  void partition(ArrayT & source, ArrayT & destination, const ComparatorT & comparator, const std::size_t & lower_bound, const std::size_t & upper_bound) {
161
161
  std::size_t count = upper_bound - lower_bound;
162
-
162
+
163
163
  // In the case where count == 1, we are at the very bottom of the tree and both source and destination will be the same.
164
164
  // The same applies when count == 2, but we might need to swap the items around if they are not in the right order.
165
165
  if (count == 2) {
@@ -169,66 +169,66 @@ namespace ParallelMergeSort
169
169
  // After this point, where count > 2, source and destination are different.
170
170
  } else if (count > 2) {
171
171
  std::size_t middle_bound = (lower_bound + upper_bound) / 2;
172
-
172
+
173
173
  // While it is possible to simply call partition, we try to avoid recursion by folding up the bottom two cases:
174
174
  // (count == 1), do nothing
175
175
  // (count == 2), swap if order is not correct
176
- // (count > 2), partition
176
+ // (count > 2), partition
177
177
  // After profilling, I found that the benefit of unrolling (count == 2) was minimal - there was about a 2-3% improvement.
178
178
 
179
179
  std::size_t lower_count = middle_bound - lower_bound;
180
180
  if (lower_count > 1)
181
181
  partition(destination, source, comparator, lower_bound, middle_bound);
182
-
183
- std::size_t upper_count = upper_bound - middle_bound;
182
+
183
+ std::size_t upper_count = upper_bound - middle_bound;
184
184
  if (upper_count > 1)
185
185
  partition(destination, source, comparator, middle_bound, upper_bound);
186
-
186
+
187
187
  merge(source, destination, comparator, lower_bound, middle_bound, upper_bound);
188
188
  }
189
189
  }
190
-
190
+
191
191
  /** Parallel Partition Algorithm
192
-
192
+
193
193
  This parallel partition algorithm which controls the downward descent of the merge sort algorithm is designed for large datasets. Because merge sort follows a binary tree structure, the work is essentially split between two threads at each node in the tree. Firstly, we must recursively call partition on two separate threads. Once this is done, we have two ascending sequences, and we merge these together, again using two threads, one for left sequence and one for right sequence.
194
-
194
+
195
195
  Because higher level threads will be waiting on lower level threads, the value of threaded should be equal to 2^threaded == processors for best performance.
196
-
196
+
197
197
  */
198
-
198
+
199
199
  // Use this to control whether parallal partition is used.
200
200
  // For large data sets > 500_000 items, you will see an improvement of about ~50% per thread.
201
201
  const bool PARALLEL_PARTITION = true;
202
-
203
- // Use this to control whether parallel merge is used.
202
+
203
+ // Use this to control whether parallel merge is used.
204
204
  // For large data sets > 1_000_000 items, you will see an improvement of about 15%.
205
205
  const bool PARALLEL_MERGE = true;
206
-
206
+
207
207
  // If you make this number too small, e.g. <= 2, you may cause synchronsation issues, because you will force parallelisation
208
208
  // for base cases which actually need to be sequential to ensure that comparison cache is generated correctly.
209
209
  const std::size_t PARALLEL_MERGE_MINIMUM_COUNT = 128;
210
-
210
+
211
211
  // Provide an array, and an upper and lower bound, along with the number of threads to use.
212
212
  template <typename ArrayT, typename ComparatorT>
213
213
  void partition(ArrayT & source, ArrayT & destination, const ComparatorT & comparator, std::size_t lower_bound, std::size_t upper_bound, std::size_t threaded) {
214
214
  std::size_t count = upper_bound - lower_bound;
215
-
215
+
216
216
  if (count > 1) {
217
217
  std::size_t middle_bound = (lower_bound + upper_bound) / 2;
218
-
218
+
219
219
  //Benchmark::WallTime tp;
220
220
  if (PARALLEL_PARTITION && threaded > 0) {
221
221
  // We could check whether there is any work to do before creating threads, but we assume
222
222
  // that threads will only be created high up in the tree by default, so there *should*
223
223
  // be a significant work available per-thread.
224
- ParallelPartition<ArrayT, ComparatorT>
225
- lower_partition = {destination, source, comparator, lower_bound, middle_bound, threaded - 1},
224
+ ParallelPartition<ArrayT, ComparatorT>
225
+ lower_partition = {destination, source, comparator, lower_bound, middle_bound, threaded - 1},
226
226
  upper_partition = {destination, source, comparator, middle_bound, upper_bound, threaded - 1};
227
-
228
- std::thread
227
+
228
+ std::thread
229
229
  lower_thread(lower_partition),
230
230
  upper_thread(upper_partition);
231
-
231
+
232
232
  upper_thread.join();
233
233
  lower_thread.join();
234
234
  } else {
@@ -237,17 +237,17 @@ namespace ParallelMergeSort
237
237
  partition(destination, source, comparator, middle_bound, upper_bound);
238
238
  }
239
239
  //std::cerr << "Partition Time: " << tp.total() << " [" << lower_bound << " -> " << upper_bound << " : " << threaded << " ]" << std::endl;
240
-
240
+
241
241
  //Benchmark::WallTime tm;
242
242
  if (PARALLEL_MERGE && threaded > 0 && count > PARALLEL_MERGE_MINIMUM_COUNT) {
243
243
  // By the time we get here, we are sure that both left and right partitions have been merged, e.g. we have two ordered sequences [lower_bound, middle_bound] and [middle_bound, upper_bound]. Now, we need to join them together:
244
244
  ParallelLeftMerge<ArrayT, ComparatorT> left_merge = {source, destination, comparator, lower_bound, middle_bound};
245
245
  ParallelRightMerge<ArrayT, ComparatorT> right_merge = {source, destination, comparator, lower_bound, middle_bound, upper_bound};
246
-
246
+
247
247
  std::thread
248
248
  left_thread(left_merge),
249
249
  right_thread(right_merge);
250
-
250
+
251
251
  left_thread.join();
252
252
  right_thread.join();
253
253
  } else {
@@ -257,17 +257,17 @@ namespace ParallelMergeSort
257
257
  //std::cerr << "Merge Time: " << tm.total() << " [" << lower_bound << " -> " << upper_bound << " : " << threaded << " ]" << std::endl;
258
258
  }
259
259
  }
260
-
260
+
261
261
  /** Parallel Merge Sort, main entry point.
262
-
263
- Given an array of items, a comparator functor, use at most 2^threaded threads to sort the items.
264
-
262
+
263
+ Given an array of items, a comparator functor, use at most 2^threaded threads to sort the items.
264
+
265
265
  */
266
266
  template <typename ArrayT, typename ComparatorT>
267
267
  void sort(ArrayT & array, const ComparatorT & comparator, std::size_t threaded = 2) {
268
268
  // Is all this swapping around really necessary?
269
269
  ArrayT temporary(array.begin(), array.end());
270
-
270
+
271
271
  //Benchmark::WallTime ts;
272
272
  if (threaded == 0)
273
273
  partition(temporary, array, comparator, 0, array.size());
@@ -17,18 +17,18 @@ template <typename AnyT>
17
17
  std::ostream& operator<< (std::ostream &o, const std::vector<AnyT> & v)
18
18
  {
19
19
  bool first = true;
20
-
20
+
21
21
  o << "[";
22
22
  for (typename std::vector<AnyT>::const_iterator i = v.begin(); i != v.end(); ++i) {
23
23
  if (first)
24
24
  first = false;
25
25
  else
26
26
  o << ", ";
27
-
27
+
28
28
  o << *i;
29
29
  }
30
30
  o << "]";
31
-
31
+
32
32
  return o;
33
33
  }
34
34
 
@@ -42,18 +42,18 @@ static void test_parallel_merge ()
42
42
  2, 4, 6, 8, 12,
43
43
  1, 3, 5, 10, 11
44
44
  };
45
-
45
+
46
46
  ArrayT a(data, data+(sizeof(data)/sizeof(*data)));
47
47
  ArrayT b(a.size());
48
-
48
+
49
49
  ParallelMergeSort::ParallelLeftMerge<ArrayT, ComparatorT> left_merge = {a, b, comparator, 0, a.size() / 2};
50
50
  left_merge();
51
-
51
+
52
52
  std::cout << "After Left: " << b << std::endl;
53
-
53
+
54
54
  ParallelMergeSort::ParallelRightMerge<ArrayT, ComparatorT> right_merge = {a, b, comparator, 0, a.size() / 2, a.size()};
55
55
  right_merge();
56
-
56
+
57
57
  std::cout << "After Right: " << b << std::endl;
58
58
  }
59
59
 
@@ -62,33 +62,33 @@ static void test_sort ()
62
62
  typedef std::vector<long long> ArrayT;
63
63
  typedef std::less<long long> ComparatorT;
64
64
  ComparatorT comparator;
65
-
65
+
66
66
  const long long data[] = {
67
67
  11, 2, 4, 6, 8, 10, 12, 1, 3, 5, 7, 9, 13
68
68
  };
69
-
69
+
70
70
  std::vector<long long> v(data, data+(sizeof(data)/sizeof(*data)));
71
-
71
+
72
72
  std::cerr << "Sorting " << v << std::endl;
73
73
 
74
74
  ParallelMergeSort::sort(v, comparator, 0);
75
-
76
- std::cerr << "Sorted " << v << std::endl;
75
+
76
+ std::cerr << "Sorted " << v << std::endl;
77
77
  }
78
78
 
79
79
  static void test_dictionary ()
80
80
  {
81
81
  // This defines a dictionary based on ASCII characters.
82
82
  typedef DictionarySort::Dictionary<char, DictionarySort::IndexT[256]> ASCIIDictionaryT;
83
-
83
+
84
84
  // For unicode characters, you could use something like this:
85
85
  // typedef DictionarySort::Dictionary<uint32_t, std::map<uint32_t, DictionarySort::IndexT>> UCS32DictionaryT;
86
- // Be aware that
87
-
86
+ // Be aware that
87
+
88
88
  std::string s = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
89
89
  ASCIIDictionaryT::WordT alphabet(s.begin(), s.end());
90
90
  ASCIIDictionaryT dictionary(alphabet);
91
-
91
+
92
92
  ASCIIDictionaryT::WordsT words, sorted_words;
93
93
  const std::size_t MAX_LENGTH = 25;
94
94
  const std::size_t MAX_COUNT = 2500000;
@@ -99,7 +99,7 @@ static void test_dictionary ()
99
99
  }
100
100
  words.push_back(word);
101
101
  }
102
-
102
+
103
103
  std::cerr << "Sorting " << words.size() << " words..." << std::endl;
104
104
  std::cerr << "Sort mode = " << DictionarySort::SORT_MODE << std::endl;
105
105
 
@@ -115,17 +115,17 @@ static void test_dictionary ()
115
115
  checksum = dictionary.sort(words, sorted_words);
116
116
  }
117
117
  Benchmark::TimeT elapsed_time = t.total() / K;
118
-
118
+
119
119
  std::cerr << "Checksum: " << checksum << " ? " << (checksum == 479465310674138860) << std::endl;
120
120
  std::cerr << "Total Time: " << elapsed_time << std::endl;
121
121
  }
122
122
 
123
123
  int main (int argc, const char * argv[])
124
- {
124
+ {
125
125
  //test_parallel_merge();
126
126
  //test_sort();
127
127
  test_dictionary();
128
-
128
+
129
129
  return 0;
130
130
  }
131
131
 
@@ -29,6 +29,8 @@ module Build::Graph::WalkerSpec
29
29
  include Build::Files
30
30
 
31
31
  describe Build::Graph::Walker do
32
+ let(:logger) {Logger.new($stderr).tap{|logger| logger.level = Logger::DEBUG}}
33
+
32
34
  it "should be unique" do
33
35
  test_glob = Glob.new(__dir__, "*.rb")
34
36
  listing_output = Paths.directory(__dir__, ["listing.txt"])
@@ -36,8 +38,6 @@ module Build::Graph::WalkerSpec
36
38
  node_a = Node.new(test_glob, listing_output, "a")
37
39
  node_b = Node.new(listing_output, Paths::NONE, "b")
38
40
 
39
- nodes = Set.new([node_a, node_b])
40
-
41
41
  sequence = []
42
42
 
43
43
  # A walker runs repeatedly, updating tasks which have been marked as dirty.
@@ -49,7 +49,7 @@ module Build::Graph::WalkerSpec
49
49
  end
50
50
  end
51
51
 
52
- walker.update(nodes)
52
+ walker.update([node_a, node_b])
53
53
 
54
54
  expect(walker.tasks.count).to be == 2
55
55
  expect(walker.failed_tasks.count).to be == 0
@@ -64,8 +64,6 @@ module Build::Graph::WalkerSpec
64
64
  node_a = Node.new(test_glob, listing_output, "a")
65
65
  node_b = Node.new(listing_output, summary_output, "b")
66
66
 
67
- nodes = Set.new([node_a, node_b])
68
-
69
67
  # A walker runs repeatedly, updating tasks which have been marked as dirty.
70
68
  walker = Walker.new do |walker, node|
71
69
  task = Task.new(walker, node)
@@ -77,7 +75,7 @@ module Build::Graph::WalkerSpec
77
75
  end
78
76
  end
79
77
 
80
- walker.update(nodes)
78
+ walker.update([node_a, node_b])
81
79
 
82
80
  expect(walker.tasks.count).to be == 2
83
81
  expect(walker.failed_tasks.count).to be == 2
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: build-graph
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Williams
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-09 00:00:00.000000000 Z
11
+ date: 2016-02-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: process-group
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.0.1
19
+ version: 1.1.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.0.1
26
+ version: 1.1.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: build-files
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 1.0.2
33
+ version: 1.0.3
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 1.0.2
40
+ version: 1.0.3
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: build-makefile
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -140,13 +140,15 @@ files:
140
140
  - Rakefile
141
141
  - build-graph.gemspec
142
142
  - lib/build/graph.rb
143
+ - lib/build/graph/call_stack.rb
143
144
  - lib/build/graph/edge.rb
144
- - lib/build/graph/error.rb
145
145
  - lib/build/graph/node.rb
146
146
  - lib/build/graph/task.rb
147
147
  - lib/build/graph/version.rb
148
148
  - lib/build/graph/walker.rb
149
149
  - spec/build/graph/build_test.rb
150
+ - spec/build/graph/call_stack_spec.rb
151
+ - spec/build/graph/edge_spec.rb
150
152
  - spec/build/graph/graph_spec.rb
151
153
  - spec/build/graph/inherit_spec.rb
152
154
  - spec/build/graph/node_spec.rb
@@ -178,13 +180,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
180
  version: '0'
179
181
  requirements: []
180
182
  rubyforge_project:
181
- rubygems_version: 2.4.6
183
+ rubygems_version: 2.5.1
182
184
  signing_key:
183
185
  specification_version: 4
184
186
  summary: Build::Graph is a framework for build systems, with specific functionality
185
187
  for dealing with file based processes.
186
188
  test_files:
187
189
  - spec/build/graph/build_test.rb
190
+ - spec/build/graph/call_stack_spec.rb
191
+ - spec/build/graph/edge_spec.rb
188
192
  - spec/build/graph/graph_spec.rb
189
193
  - spec/build/graph/inherit_spec.rb
190
194
  - spec/build/graph/node_spec.rb