clusta 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/LICENSE +20 -0
  2. data/README.rdoc +0 -0
  3. data/VERSION +1 -0
  4. data/bin/clusta +35 -0
  5. data/lib/clusta.rb +24 -0
  6. data/lib/clusta/geometry.rb +25 -0
  7. data/lib/clusta/geometry/arrow.rb +16 -0
  8. data/lib/clusta/geometry/assortativity.rb +31 -0
  9. data/lib/clusta/geometry/degree.rb +28 -0
  10. data/lib/clusta/geometry/directed/arrow.rb +12 -0
  11. data/lib/clusta/geometry/directed/degree.rb +29 -0
  12. data/lib/clusta/geometry/directed/edge.rb +26 -0
  13. data/lib/clusta/geometry/directed/edge_degree_pair.rb +24 -0
  14. data/lib/clusta/geometry/directed/vertex_arrows.rb +25 -0
  15. data/lib/clusta/geometry/edge.rb +58 -0
  16. data/lib/clusta/geometry/edge_degree_pair.rb +21 -0
  17. data/lib/clusta/geometry/element.rb +131 -0
  18. data/lib/clusta/geometry/vertex.rb +11 -0
  19. data/lib/clusta/geometry/vertex_arrows.rb +45 -0
  20. data/lib/clusta/runner.rb +15 -0
  21. data/lib/clusta/transforms.rb +50 -0
  22. data/lib/clusta/transforms/edge_degree_pairs_to_assortativities.rb +42 -0
  23. data/lib/clusta/transforms/edges_to_degrees.rb +42 -0
  24. data/lib/clusta/transforms/edges_to_vertex_arrows.rb +50 -0
  25. data/lib/clusta/transforms/import.rb +17 -0
  26. data/lib/clusta/transforms/vertex_arrows_to_edge_degree_pairs.rb +63 -0
  27. data/spec/clusta/geometry/element_spec.rb +191 -0
  28. data/spec/clusta/transforms/edges_to_degrees_spec.rb +22 -0
  29. data/spec/clusta/transforms/edges_to_vertex_arrows_spec.rb +21 -0
  30. data/spec/data/README.rdoc +54 -0
  31. data/spec/data/degrees/directed.tsv +9 -0
  32. data/spec/data/degrees/undirected.tsv +9 -0
  33. data/spec/data/edges/directed.unweighted.tsv +10 -0
  34. data/spec/data/edges/directed.weighted.tsv +10 -0
  35. data/spec/data/edges/undirected.unweighted.tsv +9 -0
  36. data/spec/data/edges/undirected.weighted.tsv +9 -0
  37. data/spec/data/vertex_arrows/directed.unweighted.tsv +7 -0
  38. data/spec/data/vertex_arrows/directed.weighted.tsv +7 -0
  39. data/spec/data/vertex_arrows/undirected.unweighted.tsv +9 -0
  40. data/spec/data/vertex_arrows/undirected.weighted.tsv +9 -0
  41. data/spec/spec_helper.rb +21 -0
  42. data/spec/support/transforms_spec_helper.rb +120 -0
  43. metadata +123 -0
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe Clusta::Transforms::EdgesToDegrees do
4
+
5
+ it "handles undirected, unweighted edges" do
6
+ transforming("edges/undirected.unweighted.tsv", :with => "edges_to_degrees").should have_output("degrees/undirected.tsv")
7
+ end
8
+
9
+ it "handles undirected, weighted edges" do
10
+ transforming("edges/undirected.weighted.tsv", :with => "edges_to_degrees").should have_output("degrees/undirected.tsv")
11
+ end
12
+
13
+ it "handles directed, unweighted edges" do
14
+ transforming("edges/directed.unweighted.tsv", :with => "edges_to_degrees").should have_output("degrees/directed.tsv")
15
+ end
16
+
17
+ it "handles directed, weighted edges" do
18
+ transforming("edges/directed.weighted.tsv", :with => "edges_to_degrees").should have_output("degrees/directed.tsv")
19
+ end
20
+
21
+
22
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ describe Clusta::Transforms::EdgesToVertexArrows do
4
+
5
+ it "handles undirected, unweighted edges" do
6
+ transforming("edges/undirected.unweighted.tsv", :with => "edges_to_vertex_arrows").should have_output("vertex_arrows/undirected.unweighted.tsv")
7
+ end
8
+
9
+ it "handles undirected, weighted edges" do
10
+ transforming("edges/undirected.weighted.tsv", :with => "edges_to_vertex_arrows").should have_output("vertex_arrows/undirected.weighted.tsv")
11
+ end
12
+
13
+ it "handles directed, unweighted edges" do
14
+ transforming("edges/directed.unweighted.tsv", :with => "edges_to_vertex_arrows").should have_output("vertex_arrows/directed.unweighted.tsv")
15
+ end
16
+
17
+ it "handles directed, weighted edges" do
18
+ transforming("edges/directed.weighted.tsv", :with => "edges_to_vertex_arrows").should have_output("vertex_arrows/directed.weighted.tsv")
19
+ end
20
+
21
+ end
@@ -0,0 +1,54 @@
1
+ This is the sample undirected network:
2
+
3
+ 1 -- 2 -- 3
4
+ |
5
+ 4 -- 5 -- 6
6
+ | |
7
+ 7 -- 8 -- 9
8
+
9
+ This is the sample directed network:
10
+
11
+ 1 --> 2 <-- 3
12
+ ^
13
+ |
14
+ |
15
+ 4 <-- 5 <-- 6
16
+ ^ ^
17
+ | |
18
+ v |
19
+ 7 --> 8 --> 9
20
+
21
+ This is the sample undirected, weighted network:
22
+
23
+ 1 --0.5-- 2 --0.1-- 3
24
+ |
25
+ 0.8
26
+ |
27
+ 4 --0.9-- 5 --0.4-- 6
28
+ | |
29
+ 0.3 0.7
30
+ | |
31
+ 7 --0.0-- 8 --1.0-- 9
32
+
33
+ This is the sample directed, weighted network.
34
+
35
+ 1 --0.5--> 2 <--0.1-- 3
36
+ ^
37
+ |
38
+ |
39
+ 0.8
40
+ |
41
+ |
42
+ 4 <--0.9-- 5 <--0.4-- 6
43
+ | ^
44
+ ^ | |
45
+ | | |
46
+ | | |
47
+ 0.3 0.2 0.7
48
+ | | |
49
+ | | |
50
+ | v |
51
+ | |
52
+ 7 --0.0--> 8 --1.0--> 9
53
+
54
+ Node 0 is always disconnected from the rest of the graph.
@@ -0,0 +1,9 @@
1
+ DirectedDegree 1 1 1
2
+ DirectedDegree 2 2 0
3
+ DirectedDegree 3 0 1
4
+ DirectedDegree 4 1 1
5
+ DirectedDegree 5 2 2
6
+ DirectedDegree 6 1 1
7
+ DirectedDegree 7 1 2
8
+ DirectedDegree 8 1 2
9
+ DirectedDegree 9 1 0
@@ -0,0 +1,9 @@
1
+ Degree 1 2
2
+ Degree 2 2
3
+ Degree 3 1
4
+ Degree 4 2
5
+ Degree 5 3
6
+ Degree 6 2
7
+ Degree 7 2
8
+ Degree 8 3
9
+ Degree 9 1
@@ -0,0 +1,10 @@
1
+ DirectedEdge 1 2
2
+ DirectedEdge 3 2
3
+ DirectedEdge 4 1
4
+ DirectedEdge 5 4
5
+ DirectedEdge 6 5
6
+ DirectedEdge 5 7
7
+ DirectedEdge 7 5
8
+ DirectedEdge 8 6
9
+ DirectedEdge 7 8
10
+ DirectedEdge 8 9
@@ -0,0 +1,10 @@
1
+ DirectedEdge 1 2 0.5
2
+ DirectedEdge 3 2 0.1
3
+ DirectedEdge 4 1 0.8
4
+ DirectedEdge 5 4 0.9
5
+ DirectedEdge 6 5 0.4
6
+ DirectedEdge 5 7 0.2
7
+ DirectedEdge 7 5 0.3
8
+ DirectedEdge 8 6 0.7
9
+ DirectedEdge 7 8 0.0
10
+ DirectedEdge 8 9 1.0
@@ -0,0 +1,9 @@
1
+ Edge 1 2
2
+ Edge 2 3
3
+ Edge 1 4
4
+ Edge 4 5
5
+ Edge 5 6
6
+ Edge 5 7
7
+ Edge 6 8
8
+ Edge 7 8
9
+ Edge 8 9
@@ -0,0 +1,9 @@
1
+ Edge 1 2 0.5
2
+ Edge 2 3 0.1
3
+ Edge 1 4 0.8
4
+ Edge 4 5 0.9
5
+ Edge 5 6 0.4
6
+ Edge 5 7 0.3
7
+ Edge 6 8 0.7
8
+ Edge 7 8 0.0
9
+ Edge 8 9 1.0
@@ -0,0 +1,7 @@
1
+ DirectedVertexArrows 1 DirectedArrow;2
2
+ DirectedVertexArrows 3 DirectedArrow;2
3
+ DirectedVertexArrows 4 DirectedArrow;1
4
+ DirectedVertexArrows 5 DirectedArrow;4 DirectedArrow;7
5
+ DirectedVertexArrows 6 DirectedArrow;5
6
+ DirectedVertexArrows 7 DirectedArrow;5 DirectedArrow;8
7
+ DirectedVertexArrows 8 DirectedArrow;6 DirectedArrow;9
@@ -0,0 +1,7 @@
1
+ DirectedVertexArrows 1 DirectedArrow;2;0.5
2
+ DirectedVertexArrows 3 DirectedArrow;2;0.1
3
+ DirectedVertexArrows 4 DirectedArrow;1;0.8
4
+ DirectedVertexArrows 5 DirectedArrow;4;0.9 DirectedArrow;7;0.2
5
+ DirectedVertexArrows 6 DirectedArrow;5;0.4
6
+ DirectedVertexArrows 7 DirectedArrow;5;0.3 DirectedArrow;8;0.0
7
+ DirectedVertexArrows 8 DirectedArrow;6;0.7 DirectedArrow;9;1.0
@@ -0,0 +1,9 @@
1
+ VertexArrows 1 Arrow;2 Arrow;4
2
+ VertexArrows 2 Arrow;1 Arrow;3
3
+ VertexArrows 3 Arrow;2
4
+ VertexArrows 4 Arrow;1 Arrow;5
5
+ VertexArrows 5 Arrow;4 Arrow;6 Arrow;7
6
+ VertexArrows 6 Arrow;5 Arrow;8
7
+ VertexArrows 7 Arrow;5 Arrow;8
8
+ VertexArrows 8 Arrow;6 Arrow;7 Arrow;9
9
+ VertexArrows 9 Arrow;8
@@ -0,0 +1,9 @@
1
+ VertexArrows 1 Arrow;2;0.5 Arrow;4;0.8
2
+ VertexArrows 2 Arrow;1;0.5 Arrow;3;0.1
3
+ VertexArrows 3 Arrow;2;0.1
4
+ VertexArrows 4 Arrow;1;0.8 Arrow;5;0.9
5
+ VertexArrows 5 Arrow;4;0.9 Arrow;6;0.4 Arrow;7;0.3
6
+ VertexArrows 6 Arrow;5;0.4 Arrow;8;0.7
7
+ VertexArrows 7 Arrow;5;0.3 Arrow;8;0.0
8
+ VertexArrows 8 Arrow;6;0.7 Arrow;7;0.0 Arrow;9;1.0
9
+ VertexArrows 9 Arrow;8;1.0
@@ -0,0 +1,21 @@
1
+ require 'rspec'
2
+
3
+ CLUSTA_ROOT_DIR = File.expand_path( '../', File.dirname(__FILE__)) unless defined?(CLUSTA_ROOT_DIR)
4
+ CLUSTA_LIB_DIR = File.join(CLUSTA_ROOT_DIR, 'lib') unless defined?(CLUSTA_LIB_DIR)
5
+ CLUSTA_BIN_DIR = File.join(CLUSTA_ROOT_DIR, 'bin') unless defined?(CLUSTA_BIN_DIR)
6
+ CLUSTA_SPEC_DIR = File.join(CLUSTA_ROOT_DIR, 'spec') unless defined?(CLUSTA_SPEC_DIR)
7
+ CLUSTA_SPEC_DATA_DIR = File.join(CLUSTA_SPEC_DIR, 'data') unless defined?(CLUSTA_SPEC_DATA_DIR)
8
+
9
+ $:.unshift << CLUSTA_LIB_DIR unless $:.include?(CLUSTA_LIB_DIR)
10
+ require 'clusta'
11
+
12
+ module Clusta
13
+ SpecError = Class.new(Error)
14
+ end
15
+
16
+ Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |path| require path }
17
+
18
+ RSpec.configure do |config|
19
+ config.mock_with :rspec
20
+ include Clusta::TransformsSpecHelper
21
+ end
@@ -0,0 +1,120 @@
1
+ require 'diffy'
2
+ require 'open3'
3
+
4
+ module Clusta
5
+ module TransformsSpecHelper
6
+
7
+ def transforming seg, options={}
8
+ Transformer.new(seg, options)
9
+ end
10
+
11
+ def have_output seg
12
+ HaveOutput.new(*seg)
13
+ end
14
+
15
+ class Transformer
16
+
17
+ attr_accessor :seg, :output, :error
18
+
19
+ def initialize seg, options={}
20
+ self.seg = seg
21
+ self.path = File.join(CLUSTA_SPEC_DATA_DIR, seg)
22
+ @options = options
23
+ end
24
+
25
+ def path= path
26
+ raise SpecError.new("Spec data path #{path} does not exist.") unless File.exist?(path)
27
+ @path = path
28
+ end
29
+
30
+ def clusta_bin
31
+ File.join(CLUSTA_BIN_DIR, 'clusta')
32
+ end
33
+
34
+ def transform_name
35
+ @options[:with] or raise SpecError.new("Must supply a transformation name with the :with option.")
36
+ end
37
+
38
+ def command
39
+ "#{clusta_bin} --run=local --transform=#{transform_name} --run=local #{@path} -"
40
+ end
41
+
42
+ def output
43
+ return @output if @output
44
+ run
45
+ @output
46
+ end
47
+
48
+ def error
49
+ return @formatted_error if @formatted_error
50
+ run
51
+ @formatted_error = format_error(@error)
52
+ end
53
+
54
+ def format_error raw
55
+ raw.split("\n").map do |line|
56
+ "STDERR: " + line
57
+ end.join("\n")
58
+ end
59
+
60
+ def run
61
+ Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
62
+ @output = stdout.read
63
+ @error = stderr.read
64
+ @exit = wait_thr.value
65
+ end
66
+ end
67
+
68
+ def error?
69
+ return true unless @exit
70
+ @exit.to_i != 0
71
+ end
72
+
73
+ end
74
+
75
+ class HaveOutput
76
+
77
+ def initialize seg
78
+ @seg = seg
79
+ self.path = File.join(CLUSTA_SPEC_DATA_DIR, seg)
80
+ end
81
+
82
+ def verbose?
83
+ ENV["VERBOSE"]
84
+ end
85
+
86
+ def path= path
87
+ raise SpecError.new("Spec data path #{path} does not exist.") unless File.exist?(path)
88
+ @path = path
89
+ end
90
+
91
+ def expected
92
+ @expected ||= File.read(@path)
93
+ end
94
+
95
+ def matches? transformer
96
+ @transformer = transformer
97
+ @transformer.output == expected
98
+ end
99
+
100
+ def diff
101
+ @diff ||= Diffy::Diff.new(expected, @transformer.output)
102
+ end
103
+
104
+ def failure_message
105
+ "expected #{@transformer.seg} to match #{@seg}:\n\n#{diff}".tap do |m|
106
+ m << "\n\n#{@transformer.error}" if verbose? || @transformer.error?
107
+ end
108
+ end
109
+
110
+ def negative_failure_message
111
+ "expected #{@transformer.seg} to be different than #{@seg}.".tap do |m|
112
+ m << "\n\n#{@transformer.error}" if verbose? || @transformer.error?
113
+ end
114
+ end
115
+
116
+ end
117
+
118
+ end
119
+ end
120
+
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clusta
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dhruv Bansal
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-04 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &18809420 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *18809420
25
+ - !ruby/object:Gem::Dependency
26
+ name: diffy
27
+ requirement: &18808980 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *18808980
36
+ - !ruby/object:Gem::Dependency
37
+ name: wukong
38
+ requirement: &18808560 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *18808560
47
+ description: Clusta is a Ruby library that implements network algorithms using Wukong. This
48
+ means you can use and extend these algorithms on your laptop and seamlessly lift
49
+ them into a Hadoop cluster when you're ready.
50
+ email:
51
+ - dhruv@infochimps.com
52
+ executables:
53
+ - clusta
54
+ extensions: []
55
+ extra_rdoc_files: []
56
+ files:
57
+ - bin/clusta
58
+ - lib/clusta.rb
59
+ - lib/clusta/geometry/vertex.rb
60
+ - lib/clusta/geometry/element.rb
61
+ - lib/clusta/geometry/vertex_arrows.rb
62
+ - lib/clusta/geometry/directed/vertex_arrows.rb
63
+ - lib/clusta/geometry/directed/edge.rb
64
+ - lib/clusta/geometry/directed/arrow.rb
65
+ - lib/clusta/geometry/directed/edge_degree_pair.rb
66
+ - lib/clusta/geometry/directed/degree.rb
67
+ - lib/clusta/geometry/edge.rb
68
+ - lib/clusta/geometry/arrow.rb
69
+ - lib/clusta/geometry/edge_degree_pair.rb
70
+ - lib/clusta/geometry/assortativity.rb
71
+ - lib/clusta/geometry/degree.rb
72
+ - lib/clusta/transforms.rb
73
+ - lib/clusta/runner.rb
74
+ - lib/clusta/transforms/import.rb
75
+ - lib/clusta/transforms/edge_degree_pairs_to_assortativities.rb
76
+ - lib/clusta/transforms/edges_to_degrees.rb
77
+ - lib/clusta/transforms/vertex_arrows_to_edge_degree_pairs.rb
78
+ - lib/clusta/transforms/edges_to_vertex_arrows.rb
79
+ - lib/clusta/geometry.rb
80
+ - spec/clusta/geometry/element_spec.rb
81
+ - spec/clusta/transforms/edges_to_degrees_spec.rb
82
+ - spec/clusta/transforms/edges_to_vertex_arrows_spec.rb
83
+ - spec/spec_helper.rb
84
+ - spec/support/transforms_spec_helper.rb
85
+ - spec/data/edges/directed.weighted.tsv
86
+ - spec/data/edges/directed.unweighted.tsv
87
+ - spec/data/edges/undirected.weighted.tsv
88
+ - spec/data/edges/undirected.unweighted.tsv
89
+ - spec/data/vertex_arrows/directed.weighted.tsv
90
+ - spec/data/vertex_arrows/directed.unweighted.tsv
91
+ - spec/data/vertex_arrows/undirected.weighted.tsv
92
+ - spec/data/vertex_arrows/undirected.unweighted.tsv
93
+ - spec/data/degrees/undirected.tsv
94
+ - spec/data/degrees/directed.tsv
95
+ - spec/data/README.rdoc
96
+ - LICENSE
97
+ - README.rdoc
98
+ - VERSION
99
+ homepage: http://github.com/dhruvbansal/clusta
100
+ licenses: []
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ none: false
113
+ requirements:
114
+ - - ! '>='
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ requirements: []
118
+ rubyforge_project:
119
+ rubygems_version: 1.8.17
120
+ signing_key:
121
+ specification_version: 3
122
+ summary: Scalable network algorithms library built in Ruby using Wukong.
123
+ test_files: []