RubyGems - build-graph - Versions diffs - 1.0.3 → 1.0.4 - Mend

build-graph 1.0.3 → 1.0.4

Files changed (21) hide show

checksums.yaml +4 -4
data/.travis.yml +9 -6
data/README.md +32 -8
data/build-graph.gemspec +2 -2
data/lib/build/graph.rb +1 -1
data/lib/build/graph/call_stack.rb +51 -0
data/lib/build/graph/edge.rb +1 -2
data/lib/build/graph/task.rb +23 -3
data/lib/build/graph/version.rb +1 -1
data/lib/build/graph/walker.rb +47 -18
data/spec/build/graph/build_test.rb +3 -2
data/spec/build/graph/call_stack_spec.rb +42 -0
data/{lib/build/graph/error.rb → spec/build/graph/edge_spec.rb} +15 -14
data/spec/build/graph/graph_spec.rb +33 -27
data/spec/build/graph/process_graph.rb +10 -6
data/spec/build/graph/program/Benchmark.cpp +3 -3
data/spec/build/graph/program/DictionarySort.h +58 -58
data/spec/build/graph/program/ParallelMergeSort.h +53 -53
data/spec/build/graph/program/main.cpp +21 -21
data/spec/build/graph/walker_spec.rb +4 -6
metadata +12 -8

data/spec/build/graph/program/ParallelMergeSort.h CHANGED Viewed

@@ -14,7 +14,7 @@
 // A parallel merge sort algorithm template implemented using C++11 threads.
 namespace ParallelMergeSort
 {
-	/*
+	/*
 	# Parallel Merge Algorithm
 	This parallel merge algorithm uses two threads and requires no synchrnoisation (e.g. lock free).
@@ -38,12 +38,12 @@ namespace ParallelMergeSort
 		ArrayT & source, & destination;
 		const ComparatorT & comparator;
 		std::size_t lower_bound, middle_bound;
 		void operator()() {
 			std::size_t left = lower_bound;
 			std::size_t right = middle_bound;
 			std::size_t offset = lower_bound;
 			while (offset < middle_bound) {
 				if (comparator(source[left], source[right])) {
 					destination[offset++] = source[left++];
@@ -53,19 +53,19 @@ namespace ParallelMergeSort
 			}
 		}
 	};
 	// This implementation assumes that if there are |j| items on the right side, there are at least |j| - 1 items on the left side.
 	template <typename ArrayT, typename ComparatorT>
 	struct ParallelRightMerge {
 		ArrayT & source, & destination;
 		const ComparatorT & comparator;
 		std::size_t lower_bound, middle_bound, upper_bound;
 		void operator()() {
 			std::size_t left = middle_bound-1;
 			std::size_t right = upper_bound-1;
 			std::size_t offset = upper_bound-1;
 			while (offset >= middle_bound) {
 				if (comparator(source[left], source[right])) {
 					destination[offset--] = source[right--];
@@ -76,14 +76,14 @@ namespace ParallelMergeSort
 						if (offset >= middle_bound) {
 							destination[offset] = source[right];
 						}
 						break;
 					}
 				}
 			}
 		}
 	};
 	// Merge two sorted sub-sequences sequentially (from left to right).
 	// Is it possible to merge without copying from source to destination, and what are the performance implications?
 	template <typename ArrayT, typename ComparatorT>
@@ -91,12 +91,12 @@ namespace ParallelMergeSort
 		std::size_t left = lower_bound;
 		std::size_t right = middle_bound;
 		std::size_t offset = lower_bound;
 		// We merge both sub-sequences, defined as [lower_bound, middle_bound] and [middle_bound, upper_bound].
 		while (true) {
 			if (comparator(source[left], source[right])) {
 				destination[offset++] = source[left++];
 				// If we have adjusted left, we may have exhausted left side:
 				if (left == middle_bound) {
 					// We have no more elements in lower half.
@@ -105,7 +105,7 @@ namespace ParallelMergeSort
 				}
 			} else {
 				destination[offset++] = source[right++];
 				// As above, we may have exhausted right side:
 				if (right == upper_bound) {
 					// We have no more elements in upper half.
@@ -113,32 +113,32 @@ namespace ParallelMergeSort
 					break;
 				}
 			}
-		}
+		}
 	}
 	template <typename ArrayT, typename ComparatorT>
 	void partition(ArrayT & array, ArrayT & temporary, const ComparatorT & comparator, std::size_t lower_bound, std::size_t upper_bound, std::size_t threaded);
 	// This functor is used for parallelizing the top level partition function.
 	template <typename ArrayT, typename ComparatorT>
 	struct ParallelPartition {
 		ArrayT & array, & temporary;
 		const ComparatorT & comparator;
 		std::size_t lower_bound, upper_bound, threaded;
 		void operator()() {
 			partition(array, temporary, comparator, lower_bound, upper_bound, threaded);
 		}
 	};
 	/** Recursive Partition Algorithm.
 	This algorithm uses O(2n) memory to reduce the amount of copies that occurs. It does this by using a parity such that at each point in the partition tree we provide a source and destination. Given the functions P (partition) and M (merge), we have the following theorem:
 	P(A=source, B=destination) sorts source into destination. A=[...] means that we are considering only a subset of A. Subscript is not given, but should be intuitive given the definition of merge sort. (x) on the left gives the order of each step as performed sequentially.
 	== [ PARTITION ] ==                     == [ MERGE ] ==
 	(1) P(A=[1,3,4,2], B=[1,3,2,4])        (14) M(A=[1,3,2,4], B): B = [1,2,3,4]
 	|
 	(2) |---P(B=[1,3], A=[1,3])             (7) M(B=[1,3], A): A=[1,3]
@@ -150,16 +150,16 @@ namespace ParallelMergeSort
 	|
 	(9)     |---P(A=[4],B=[4])             (10) M(A=[4], B): B=[4]
 	(11)     \---P(A=[2],B=[2])             (12) M(A=[2], B): B=[2]
 	During merge, we fold back up, and alternate between A and B for the current storage. This avoids the need to dynamically allocate memory during sort and avoids unnecessary copies.
 	*/
 	// Sequential partition algorithm. Provide an array, and an upper and lower bound to sort.
 	template <typename ArrayT, typename ComparatorT>
 	void partition(ArrayT & source, ArrayT & destination, const ComparatorT & comparator, const std::size_t & lower_bound, const std::size_t & upper_bound) {
 		std::size_t count = upper_bound - lower_bound;
 		// In the case where count == 1, we are at the very bottom of the tree and both source and destination will be the same.
 		// The same applies when count == 2, but we might need to swap the items around if they are not in the right order.
 		if (count == 2) {
@@ -169,66 +169,66 @@ namespace ParallelMergeSort
 			// After this point, where count > 2, source and destination are different.
 		} else if (count > 2) {
 			std::size_t middle_bound = (lower_bound + upper_bound) / 2;
 			// While it is possible to simply call partition, we try to avoid recursion by folding up the bottom two cases:
 			//  (count == 1), do nothing
 			//  (count == 2), swap if order is not correct
-			//  (count > 2), partition
+			//  (count > 2), partition
 			// After profilling, I found that the benefit of unrolling (count == 2) was minimal - there was about a 2-3% improvement.
 			std::size_t lower_count = middle_bound - lower_bound;
 			if (lower_count > 1)
 				partition(destination, source, comparator, lower_bound, middle_bound);
-			std::size_t upper_count = upper_bound - middle_bound;
+			std::size_t upper_count = upper_bound - middle_bound;
 			if (upper_count > 1)
 				partition(destination, source, comparator, middle_bound, upper_bound);
 			merge(source, destination, comparator, lower_bound, middle_bound, upper_bound);
 		}
 	}
 	/** Parallel Partition Algorithm
 	This parallel partition algorithm which controls the downward descent of the merge sort algorithm is designed for large datasets. Because merge sort follows a binary tree structure, the work is essentially split between two threads at each node in the tree. Firstly, we must recursively call partition on two separate threads. Once this is done, we have two ascending sequences, and we merge these together, again using two threads, one for left sequence and one for right sequence.
 	Because higher level threads will be waiting on lower level threads, the value of threaded should be equal to 2^threaded == processors for best performance.
 	*/
 	// Use this to control whether parallal partition is used.
 	// For large data sets > 500_000 items, you will see an improvement of about ~50% per thread.
 	const bool PARALLEL_PARTITION = true;
-	// Use this to control whether parallel merge is used.
+	// Use this to control whether parallel merge is used.
 	// For large data sets > 1_000_000 items, you will see an improvement of about 15%.
 	const bool PARALLEL_MERGE = true;
 	// If you make this number too small, e.g. <= 2, you may cause synchronsation issues, because you will force parallelisation
 	// for base cases which actually need to be sequential to ensure that comparison cache is generated correctly.
 	const std::size_t PARALLEL_MERGE_MINIMUM_COUNT = 128;
 	// Provide an array, and an upper and lower bound, along with the number of threads to use.
 	template <typename ArrayT, typename ComparatorT>
 	void partition(ArrayT & source, ArrayT & destination, const ComparatorT & comparator, std::size_t lower_bound, std::size_t upper_bound, std::size_t threaded) {
 		std::size_t count = upper_bound - lower_bound;
 		if (count > 1) {
 			std::size_t middle_bound = (lower_bound + upper_bound) / 2;
 			//Benchmark::WallTime tp;
 			if (PARALLEL_PARTITION && threaded > 0) {
 				// We could check whether there is any work to do before creating threads, but we assume
 				// that threads will only be created high up in the tree by default, so there *should*
 				// be a significant work available per-thread.
-				ParallelPartition<ArrayT, ComparatorT>
-					lower_partition = {destination, source, comparator, lower_bound, middle_bound, threaded - 1},
+				ParallelPartition<ArrayT, ComparatorT>
+					lower_partition = {destination, source, comparator, lower_bound, middle_bound, threaded - 1},
 				upper_partition = {destination, source, comparator, middle_bound, upper_bound, threaded - 1};
-				std::thread
+				std::thread
 					lower_thread(lower_partition),
 				upper_thread(upper_partition);
 				upper_thread.join();
 				lower_thread.join();
 			} else {
@@ -237,17 +237,17 @@ namespace ParallelMergeSort
 				partition(destination, source, comparator, middle_bound, upper_bound);
 			}
 			//std::cerr << "Partition Time: " << tp.total() << " [" << lower_bound << " -> " << upper_bound << " : " << threaded << " ]" << std::endl;
 			//Benchmark::WallTime tm;
 			if (PARALLEL_MERGE && threaded > 0 && count > PARALLEL_MERGE_MINIMUM_COUNT) {
 				// By the time we get here, we are sure that both left and right partitions have been merged, e.g. we have two ordered sequences [lower_bound, middle_bound] and [middle_bound, upper_bound]. Now, we need to join them together:
 				ParallelLeftMerge<ArrayT, ComparatorT> left_merge = {source, destination, comparator, lower_bound, middle_bound};
 				ParallelRightMerge<ArrayT, ComparatorT> right_merge = {source, destination, comparator, lower_bound, middle_bound, upper_bound};
 				std::thread
 					left_thread(left_merge),
 				right_thread(right_merge);
 				left_thread.join();
 				right_thread.join();
 			} else {
@@ -257,17 +257,17 @@ namespace ParallelMergeSort
 			//std::cerr << "Merge Time: " << tm.total() << " [" << lower_bound << " -> " << upper_bound << " : " << threaded << " ]" << std::endl;
 		}
 	}
 	/** Parallel Merge Sort, main entry point.
-	Given an array of items, a comparator functor, use at most 2^threaded threads to sort the items.
+	Given an array of items, a comparator functor, use at most 2^threaded threads to sort the items.
 	*/
 	template <typename ArrayT, typename ComparatorT>
 	void sort(ArrayT & array, const ComparatorT & comparator, std::size_t threaded = 2) {
 		// Is all this swapping around really necessary?
 		ArrayT temporary(array.begin(), array.end());
 		//Benchmark::WallTime ts;
 		if (threaded == 0)
 			partition(temporary, array, comparator, 0, array.size());

data/spec/build/graph/program/main.cpp CHANGED Viewed

@@ -17,18 +17,18 @@ template <typename AnyT>
 std::ostream& operator<< (std::ostream &o, const std::vector<AnyT> & v)
 {
 	bool first = true;
 	o << "[";
 	for (typename std::vector<AnyT>::const_iterator i = v.begin(); i != v.end(); ++i) {
 		if (first)
 			first = false;
 		else
 			o << ", ";
 		o << *i;
 	}
 	o << "]";
 	return o;
 }
@@ -42,18 +42,18 @@ static void test_parallel_merge ()
 		2, 4, 6, 8, 12,
 		1, 3, 5, 10, 11
 	};
 	ArrayT a(data, data+(sizeof(data)/sizeof(*data)));
 	ArrayT b(a.size());
 	ParallelMergeSort::ParallelLeftMerge<ArrayT, ComparatorT> left_merge = {a, b, comparator, 0, a.size() / 2};
 	left_merge();
 	std::cout << "After Left: " << b << std::endl;
 	ParallelMergeSort::ParallelRightMerge<ArrayT, ComparatorT> right_merge = {a, b, comparator, 0, a.size() / 2, a.size()};
 	right_merge();
 	std::cout << "After Right: " << b << std::endl;
 }
@@ -62,33 +62,33 @@ static void test_sort ()
 	typedef std::vector<long long> ArrayT;
 	typedef std::less<long long> ComparatorT;
 	ComparatorT comparator;
 	const long long data[] = {
 		11, 2, 4, 6, 8, 10, 12, 1, 3, 5, 7, 9, 13
 	};
 	std::vector<long long> v(data, data+(sizeof(data)/sizeof(*data)));
 	std::cerr << "Sorting " << v << std::endl;
 	ParallelMergeSort::sort(v, comparator, 0);
-	std::cerr << "Sorted  " << v << std::endl;
+	std::cerr << "Sorted  " << v << std::endl;
 }
 static void test_dictionary ()
 {
 	// This defines a dictionary based on ASCII characters.
 	typedef DictionarySort::Dictionary<char, DictionarySort::IndexT[256]> ASCIIDictionaryT;
 	// For unicode characters, you could use something like this:
 	// typedef DictionarySort::Dictionary<uint32_t, std::map<uint32_t, DictionarySort::IndexT>> UCS32DictionaryT;
-	// Be aware that
+	// Be aware that
 	std::string s = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
 	ASCIIDictionaryT::WordT alphabet(s.begin(), s.end());
 	ASCIIDictionaryT dictionary(alphabet);
 	ASCIIDictionaryT::WordsT words, sorted_words;
 	const std::size_t MAX_LENGTH = 25;
 	const std::size_t MAX_COUNT = 2500000;
@@ -99,7 +99,7 @@ static void test_dictionary ()
 		}
 		words.push_back(word);
 	}
 	std::cerr << "Sorting " << words.size() << " words..." << std::endl;
 	std::cerr << "Sort mode = " << DictionarySort::SORT_MODE << std::endl;
@@ -115,17 +115,17 @@ static void test_dictionary ()
 		checksum = dictionary.sort(words, sorted_words);
 	}
 	Benchmark::TimeT elapsed_time = t.total() / K;
 	std::cerr << "Checksum: " << checksum << " ? " << (checksum == 479465310674138860) << std::endl;
 	std::cerr << "Total Time: " << elapsed_time << std::endl;
 }
 int main (int argc, const char * argv[])
-{
+{
 	//test_parallel_merge();
 	//test_sort();
 	test_dictionary();
 	return 0;
 }

data/spec/build/graph/walker_spec.rb CHANGED Viewed

@@ -29,6 +29,8 @@ module Build::Graph::WalkerSpec
 	include Build::Files
 	describe Build::Graph::Walker do
+		let(:logger) {Logger.new($stderr).tap{|logger| logger.level = Logger::DEBUG}}
 		it "should be unique" do
 			test_glob = Glob.new(__dir__, "*.rb")
 			listing_output = Paths.directory(__dir__, ["listing.txt"])
@@ -36,8 +38,6 @@ module Build::Graph::WalkerSpec
 			node_a = Node.new(test_glob, listing_output, "a")
 			node_b = Node.new(listing_output, Paths::NONE, "b")
-			nodes = Set.new([node_a, node_b])
 			sequence = []
 			# A walker runs repeatedly, updating tasks which have been marked as dirty.
@@ -49,7 +49,7 @@ module Build::Graph::WalkerSpec
 				end
 			end
-			walker.update(nodes)
+			walker.update([node_a, node_b])
 			expect(walker.tasks.count).to be == 2
 			expect(walker.failed_tasks.count).to be == 0
@@ -64,8 +64,6 @@ module Build::Graph::WalkerSpec
 			node_a = Node.new(test_glob, listing_output, "a")
 			node_b = Node.new(listing_output, summary_output, "b")
-			nodes = Set.new([node_a, node_b])
 			# A walker runs repeatedly, updating tasks which have been marked as dirty.
 			walker = Walker.new do |walker, node|
 				task = Task.new(walker, node)
@@ -77,7 +75,7 @@ module Build::Graph::WalkerSpec
 				end
 			end
-			walker.update(nodes)
+			walker.update([node_a, node_b])
 			expect(walker.tasks.count).to be == 2
 			expect(walker.failed_tasks.count).to be == 2

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: build-graph
 version: !ruby/object:Gem::Version
-  version: 1.0.3
+  version: 1.0.4
 platform: ruby
 authors:
 - Samuel Williams
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-01-09 00:00:00.000000000 Z
+date: 2016-02-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: process-group
@@ -16,28 +16,28 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 1.0.1
+        version: 1.1.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 1.0.1
+        version: 1.1.0
 - !ruby/object:Gem::Dependency
   name: build-files
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 1.0.2
+        version: 1.0.3
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 1.0.2
+        version: 1.0.3
 - !ruby/object:Gem::Dependency
   name: build-makefile
   requirement: !ruby/object:Gem::Requirement
@@ -140,13 +140,15 @@ files:
 - Rakefile
 - build-graph.gemspec
 - lib/build/graph.rb
+- lib/build/graph/call_stack.rb
 - lib/build/graph/edge.rb
-- lib/build/graph/error.rb
 - lib/build/graph/node.rb
 - lib/build/graph/task.rb
 - lib/build/graph/version.rb
 - lib/build/graph/walker.rb
 - spec/build/graph/build_test.rb
+- spec/build/graph/call_stack_spec.rb
+- spec/build/graph/edge_spec.rb
 - spec/build/graph/graph_spec.rb
 - spec/build/graph/inherit_spec.rb
 - spec/build/graph/node_spec.rb
@@ -178,13 +180,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.6
+rubygems_version: 2.5.1
 signing_key:
 specification_version: 4
 summary: Build::Graph is a framework for build systems, with specific functionality
   for dealing with file based processes.
 test_files:
 - spec/build/graph/build_test.rb
+- spec/build/graph/call_stack_spec.rb
+- spec/build/graph/edge_spec.rb
 - spec/build/graph/graph_spec.rb
 - spec/build/graph/inherit_spec.rb
 - spec/build/graph/node_spec.rb