clusta 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/LICENSE +20 -0
  2. data/README.rdoc +0 -0
  3. data/VERSION +1 -0
  4. data/bin/clusta +35 -0
  5. data/lib/clusta.rb +24 -0
  6. data/lib/clusta/geometry.rb +25 -0
  7. data/lib/clusta/geometry/arrow.rb +16 -0
  8. data/lib/clusta/geometry/assortativity.rb +31 -0
  9. data/lib/clusta/geometry/degree.rb +28 -0
  10. data/lib/clusta/geometry/directed/arrow.rb +12 -0
  11. data/lib/clusta/geometry/directed/degree.rb +29 -0
  12. data/lib/clusta/geometry/directed/edge.rb +26 -0
  13. data/lib/clusta/geometry/directed/edge_degree_pair.rb +24 -0
  14. data/lib/clusta/geometry/directed/vertex_arrows.rb +25 -0
  15. data/lib/clusta/geometry/edge.rb +58 -0
  16. data/lib/clusta/geometry/edge_degree_pair.rb +21 -0
  17. data/lib/clusta/geometry/element.rb +131 -0
  18. data/lib/clusta/geometry/vertex.rb +11 -0
  19. data/lib/clusta/geometry/vertex_arrows.rb +45 -0
  20. data/lib/clusta/runner.rb +15 -0
  21. data/lib/clusta/transforms.rb +50 -0
  22. data/lib/clusta/transforms/edge_degree_pairs_to_assortativities.rb +42 -0
  23. data/lib/clusta/transforms/edges_to_degrees.rb +42 -0
  24. data/lib/clusta/transforms/edges_to_vertex_arrows.rb +50 -0
  25. data/lib/clusta/transforms/import.rb +17 -0
  26. data/lib/clusta/transforms/vertex_arrows_to_edge_degree_pairs.rb +63 -0
  27. data/spec/clusta/geometry/element_spec.rb +191 -0
  28. data/spec/clusta/transforms/edges_to_degrees_spec.rb +22 -0
  29. data/spec/clusta/transforms/edges_to_vertex_arrows_spec.rb +21 -0
  30. data/spec/data/README.rdoc +54 -0
  31. data/spec/data/degrees/directed.tsv +9 -0
  32. data/spec/data/degrees/undirected.tsv +9 -0
  33. data/spec/data/edges/directed.unweighted.tsv +10 -0
  34. data/spec/data/edges/directed.weighted.tsv +10 -0
  35. data/spec/data/edges/undirected.unweighted.tsv +9 -0
  36. data/spec/data/edges/undirected.weighted.tsv +9 -0
  37. data/spec/data/vertex_arrows/directed.unweighted.tsv +7 -0
  38. data/spec/data/vertex_arrows/directed.weighted.tsv +7 -0
  39. data/spec/data/vertex_arrows/undirected.unweighted.tsv +9 -0
  40. data/spec/data/vertex_arrows/undirected.weighted.tsv +9 -0
  41. data/spec/spec_helper.rb +21 -0
  42. data/spec/support/transforms_spec_helper.rb +120 -0
  43. metadata +123 -0
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe Clusta::Transforms::EdgesToDegrees do
4
+
5
+ it "handles undirected, unweighted edges" do
6
+ transforming("edges/undirected.unweighted.tsv", :with => "edges_to_degrees").should have_output("degrees/undirected.tsv")
7
+ end
8
+
9
+ it "handles undirected, weighted edges" do
10
+ transforming("edges/undirected.weighted.tsv", :with => "edges_to_degrees").should have_output("degrees/undirected.tsv")
11
+ end
12
+
13
+ it "handles directed, unweighted edges" do
14
+ transforming("edges/directed.unweighted.tsv", :with => "edges_to_degrees").should have_output("degrees/directed.tsv")
15
+ end
16
+
17
+ it "handles directed, weighted edges" do
18
+ transforming("edges/directed.weighted.tsv", :with => "edges_to_degrees").should have_output("degrees/directed.tsv")
19
+ end
20
+
21
+
22
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ describe Clusta::Transforms::EdgesToVertexArrows do
4
+
5
+ it "handles undirected, unweighted edges" do
6
+ transforming("edges/undirected.unweighted.tsv", :with => "edges_to_vertex_arrows").should have_output("vertex_arrows/undirected.unweighted.tsv")
7
+ end
8
+
9
+ it "handles undirected, weighted edges" do
10
+ transforming("edges/undirected.weighted.tsv", :with => "edges_to_vertex_arrows").should have_output("vertex_arrows/undirected.weighted.tsv")
11
+ end
12
+
13
+ it "handles directed, unweighted edges" do
14
+ transforming("edges/directed.unweighted.tsv", :with => "edges_to_vertex_arrows").should have_output("vertex_arrows/directed.unweighted.tsv")
15
+ end
16
+
17
+ it "handles directed, weighted edges" do
18
+ transforming("edges/directed.weighted.tsv", :with => "edges_to_vertex_arrows").should have_output("vertex_arrows/directed.weighted.tsv")
19
+ end
20
+
21
+ end
@@ -0,0 +1,54 @@
1
+ This is the sample undirected network:
2
+
3
+ 1 -- 2 -- 3
4
+ |
5
+ 4 -- 5 -- 6
6
+ | |
7
+ 7 -- 8 -- 9
8
+
9
+ This is the sample directed network:
10
+
11
+ 1 --> 2 <-- 3
12
+ ^
13
+ |
14
+ |
15
+ 4 <-- 5 <-- 6
16
+ ^ ^
17
+ | |
18
+ v |
19
+ 7 --> 8 --> 9
20
+
21
+ This is the sample undirected, weighted network:
22
+
23
+ 1 --0.5-- 2 --0.1-- 3
24
+ |
25
+ 0.8
26
+ |
27
+ 4 --0.9-- 5 --0.4-- 6
28
+ | |
29
+ 0.3 0.7
30
+ | |
31
+ 7 --0.0-- 8 --1.0-- 9
32
+
33
+ This is the sample directed, weighted network.
34
+
35
+ 1 --0.5--> 2 <--0.1-- 3
36
+ ^
37
+ |
38
+ |
39
+ 0.8
40
+ |
41
+ |
42
+ 4 <--0.9-- 5 <--0.4-- 6
43
+ | ^
44
+ ^ | |
45
+ | | |
46
+ | | |
47
+ 0.3 0.2 0.7
48
+ | | |
49
+ | | |
50
+ | v |
51
+ | |
52
+ 7 --0.0--> 8 --1.0--> 9
53
+
54
+ Node 0 is always disconnected from the rest of the graph.
@@ -0,0 +1,9 @@
1
+ DirectedDegree 1 1 1
2
+ DirectedDegree 2 2 0
3
+ DirectedDegree 3 0 1
4
+ DirectedDegree 4 1 1
5
+ DirectedDegree 5 2 2
6
+ DirectedDegree 6 1 1
7
+ DirectedDegree 7 1 2
8
+ DirectedDegree 8 1 2
9
+ DirectedDegree 9 1 0
@@ -0,0 +1,9 @@
1
+ Degree 1 2
2
+ Degree 2 2
3
+ Degree 3 1
4
+ Degree 4 2
5
+ Degree 5 3
6
+ Degree 6 2
7
+ Degree 7 2
8
+ Degree 8 3
9
+ Degree 9 1
@@ -0,0 +1,10 @@
1
+ DirectedEdge 1 2
2
+ DirectedEdge 3 2
3
+ DirectedEdge 4 1
4
+ DirectedEdge 5 4
5
+ DirectedEdge 6 5
6
+ DirectedEdge 5 7
7
+ DirectedEdge 7 5
8
+ DirectedEdge 8 6
9
+ DirectedEdge 7 8
10
+ DirectedEdge 8 9
@@ -0,0 +1,10 @@
1
+ DirectedEdge 1 2 0.5
2
+ DirectedEdge 3 2 0.1
3
+ DirectedEdge 4 1 0.8
4
+ DirectedEdge 5 4 0.9
5
+ DirectedEdge 6 5 0.4
6
+ DirectedEdge 5 7 0.2
7
+ DirectedEdge 7 5 0.3
8
+ DirectedEdge 8 6 0.7
9
+ DirectedEdge 7 8 0.0
10
+ DirectedEdge 8 9 1.0
@@ -0,0 +1,9 @@
1
+ Edge 1 2
2
+ Edge 2 3
3
+ Edge 1 4
4
+ Edge 4 5
5
+ Edge 5 6
6
+ Edge 5 7
7
+ Edge 6 8
8
+ Edge 7 8
9
+ Edge 8 9
@@ -0,0 +1,9 @@
1
+ Edge 1 2 0.5
2
+ Edge 2 3 0.1
3
+ Edge 1 4 0.8
4
+ Edge 4 5 0.9
5
+ Edge 5 6 0.4
6
+ Edge 5 7 0.3
7
+ Edge 6 8 0.7
8
+ Edge 7 8 0.0
9
+ Edge 8 9 1.0
@@ -0,0 +1,7 @@
1
+ DirectedVertexArrows 1 DirectedArrow;2
2
+ DirectedVertexArrows 3 DirectedArrow;2
3
+ DirectedVertexArrows 4 DirectedArrow;1
4
+ DirectedVertexArrows 5 DirectedArrow;4 DirectedArrow;7
5
+ DirectedVertexArrows 6 DirectedArrow;5
6
+ DirectedVertexArrows 7 DirectedArrow;5 DirectedArrow;8
7
+ DirectedVertexArrows 8 DirectedArrow;6 DirectedArrow;9
@@ -0,0 +1,7 @@
1
+ DirectedVertexArrows 1 DirectedArrow;2;0.5
2
+ DirectedVertexArrows 3 DirectedArrow;2;0.1
3
+ DirectedVertexArrows 4 DirectedArrow;1;0.8
4
+ DirectedVertexArrows 5 DirectedArrow;4;0.9 DirectedArrow;7;0.2
5
+ DirectedVertexArrows 6 DirectedArrow;5;0.4
6
+ DirectedVertexArrows 7 DirectedArrow;5;0.3 DirectedArrow;8;0.0
7
+ DirectedVertexArrows 8 DirectedArrow;6;0.7 DirectedArrow;9;1.0
@@ -0,0 +1,9 @@
1
+ VertexArrows 1 Arrow;2 Arrow;4
2
+ VertexArrows 2 Arrow;1 Arrow;3
3
+ VertexArrows 3 Arrow;2
4
+ VertexArrows 4 Arrow;1 Arrow;5
5
+ VertexArrows 5 Arrow;4 Arrow;6 Arrow;7
6
+ VertexArrows 6 Arrow;5 Arrow;8
7
+ VertexArrows 7 Arrow;5 Arrow;8
8
+ VertexArrows 8 Arrow;6 Arrow;7 Arrow;9
9
+ VertexArrows 9 Arrow;8
@@ -0,0 +1,9 @@
1
+ VertexArrows 1 Arrow;2;0.5 Arrow;4;0.8
2
+ VertexArrows 2 Arrow;1;0.5 Arrow;3;0.1
3
+ VertexArrows 3 Arrow;2;0.1
4
+ VertexArrows 4 Arrow;1;0.8 Arrow;5;0.9
5
+ VertexArrows 5 Arrow;4;0.9 Arrow;6;0.4 Arrow;7;0.3
6
+ VertexArrows 6 Arrow;5;0.4 Arrow;8;0.7
7
+ VertexArrows 7 Arrow;5;0.3 Arrow;8;0.0
8
+ VertexArrows 8 Arrow;6;0.7 Arrow;7;0.0 Arrow;9;1.0
9
+ VertexArrows 9 Arrow;8;1.0
@@ -0,0 +1,21 @@
1
+ require 'rspec'
2
+
3
+ CLUSTA_ROOT_DIR = File.expand_path( '../', File.dirname(__FILE__)) unless defined?(CLUSTA_ROOT_DIR)
4
+ CLUSTA_LIB_DIR = File.join(CLUSTA_ROOT_DIR, 'lib') unless defined?(CLUSTA_LIB_DIR)
5
+ CLUSTA_BIN_DIR = File.join(CLUSTA_ROOT_DIR, 'bin') unless defined?(CLUSTA_BIN_DIR)
6
+ CLUSTA_SPEC_DIR = File.join(CLUSTA_ROOT_DIR, 'spec') unless defined?(CLUSTA_SPEC_DIR)
7
+ CLUSTA_SPEC_DATA_DIR = File.join(CLUSTA_SPEC_DIR, 'data') unless defined?(CLUSTA_SPEC_DATA_DIR)
8
+
9
+ $:.unshift << CLUSTA_LIB_DIR unless $:.include?(CLUSTA_LIB_DIR)
10
+ require 'clusta'
11
+
12
+ module Clusta
13
+ SpecError = Class.new(Error)
14
+ end
15
+
16
+ Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |path| require path }
17
+
18
+ RSpec.configure do |config|
19
+ config.mock_with :rspec
20
+ include Clusta::TransformsSpecHelper
21
+ end
@@ -0,0 +1,120 @@
1
+ require 'diffy'
2
+ require 'open3'
3
+
4
+ module Clusta
5
+ module TransformsSpecHelper
6
+
7
+ def transforming seg, options={}
8
+ Transformer.new(seg, options)
9
+ end
10
+
11
+ def have_output seg
12
+ HaveOutput.new(*seg)
13
+ end
14
+
15
+ class Transformer
16
+
17
+ attr_accessor :seg, :output, :error
18
+
19
+ def initialize seg, options={}
20
+ self.seg = seg
21
+ self.path = File.join(CLUSTA_SPEC_DATA_DIR, seg)
22
+ @options = options
23
+ end
24
+
25
+ def path= path
26
+ raise SpecError.new("Spec data path #{path} does not exist.") unless File.exist?(path)
27
+ @path = path
28
+ end
29
+
30
+ def clusta_bin
31
+ File.join(CLUSTA_BIN_DIR, 'clusta')
32
+ end
33
+
34
+ def transform_name
35
+ @options[:with] or raise SpecError.new("Must supply a transformation name with the :with option.")
36
+ end
37
+
38
+ def command
39
+ "#{clusta_bin} --run=local --transform=#{transform_name} --run=local #{@path} -"
40
+ end
41
+
42
+ def output
43
+ return @output if @output
44
+ run
45
+ @output
46
+ end
47
+
48
+ def error
49
+ return @formatted_error if @formatted_error
50
+ run
51
+ @formatted_error = format_error(@error)
52
+ end
53
+
54
+ def format_error raw
55
+ raw.split("\n").map do |line|
56
+ "STDERR: " + line
57
+ end.join("\n")
58
+ end
59
+
60
+ def run
61
+ Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
62
+ @output = stdout.read
63
+ @error = stderr.read
64
+ @exit = wait_thr.value
65
+ end
66
+ end
67
+
68
+ def error?
69
+ return true unless @exit
70
+ @exit.to_i != 0
71
+ end
72
+
73
+ end
74
+
75
+ class HaveOutput
76
+
77
+ def initialize seg
78
+ @seg = seg
79
+ self.path = File.join(CLUSTA_SPEC_DATA_DIR, seg)
80
+ end
81
+
82
+ def verbose?
83
+ ENV["VERBOSE"]
84
+ end
85
+
86
+ def path= path
87
+ raise SpecError.new("Spec data path #{path} does not exist.") unless File.exist?(path)
88
+ @path = path
89
+ end
90
+
91
+ def expected
92
+ @expected ||= File.read(@path)
93
+ end
94
+
95
+ def matches? transformer
96
+ @transformer = transformer
97
+ @transformer.output == expected
98
+ end
99
+
100
+ def diff
101
+ @diff ||= Diffy::Diff.new(expected, @transformer.output)
102
+ end
103
+
104
+ def failure_message
105
+ "expected #{@transformer.seg} to match #{@seg}:\n\n#{diff}".tap do |m|
106
+ m << "\n\n#{@transformer.error}" if verbose? || @transformer.error?
107
+ end
108
+ end
109
+
110
+ def negative_failure_message
111
+ "expected #{@transformer.seg} to be different than #{@seg}.".tap do |m|
112
+ m << "\n\n#{@transformer.error}" if verbose? || @transformer.error?
113
+ end
114
+ end
115
+
116
+ end
117
+
118
+ end
119
+ end
120
+
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clusta
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dhruv Bansal
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-04 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &18809420 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *18809420
25
+ - !ruby/object:Gem::Dependency
26
+ name: diffy
27
+ requirement: &18808980 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *18808980
36
+ - !ruby/object:Gem::Dependency
37
+ name: wukong
38
+ requirement: &18808560 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *18808560
47
+ description: Clusta is a Ruby library that implements network algorithms using Wukong. This
48
+ means you can use and extend these algorithms on your laptop and seamlessly lift
49
+ them into a Hadoop cluster when you're ready.
50
+ email:
51
+ - dhruv@infochimps.com
52
+ executables:
53
+ - clusta
54
+ extensions: []
55
+ extra_rdoc_files: []
56
+ files:
57
+ - bin/clusta
58
+ - lib/clusta.rb
59
+ - lib/clusta/geometry/vertex.rb
60
+ - lib/clusta/geometry/element.rb
61
+ - lib/clusta/geometry/vertex_arrows.rb
62
+ - lib/clusta/geometry/directed/vertex_arrows.rb
63
+ - lib/clusta/geometry/directed/edge.rb
64
+ - lib/clusta/geometry/directed/arrow.rb
65
+ - lib/clusta/geometry/directed/edge_degree_pair.rb
66
+ - lib/clusta/geometry/directed/degree.rb
67
+ - lib/clusta/geometry/edge.rb
68
+ - lib/clusta/geometry/arrow.rb
69
+ - lib/clusta/geometry/edge_degree_pair.rb
70
+ - lib/clusta/geometry/assortativity.rb
71
+ - lib/clusta/geometry/degree.rb
72
+ - lib/clusta/transforms.rb
73
+ - lib/clusta/runner.rb
74
+ - lib/clusta/transforms/import.rb
75
+ - lib/clusta/transforms/edge_degree_pairs_to_assortativities.rb
76
+ - lib/clusta/transforms/edges_to_degrees.rb
77
+ - lib/clusta/transforms/vertex_arrows_to_edge_degree_pairs.rb
78
+ - lib/clusta/transforms/edges_to_vertex_arrows.rb
79
+ - lib/clusta/geometry.rb
80
+ - spec/clusta/geometry/element_spec.rb
81
+ - spec/clusta/transforms/edges_to_degrees_spec.rb
82
+ - spec/clusta/transforms/edges_to_vertex_arrows_spec.rb
83
+ - spec/spec_helper.rb
84
+ - spec/support/transforms_spec_helper.rb
85
+ - spec/data/edges/directed.weighted.tsv
86
+ - spec/data/edges/directed.unweighted.tsv
87
+ - spec/data/edges/undirected.weighted.tsv
88
+ - spec/data/edges/undirected.unweighted.tsv
89
+ - spec/data/vertex_arrows/directed.weighted.tsv
90
+ - spec/data/vertex_arrows/directed.unweighted.tsv
91
+ - spec/data/vertex_arrows/undirected.weighted.tsv
92
+ - spec/data/vertex_arrows/undirected.unweighted.tsv
93
+ - spec/data/degrees/undirected.tsv
94
+ - spec/data/degrees/directed.tsv
95
+ - spec/data/README.rdoc
96
+ - LICENSE
97
+ - README.rdoc
98
+ - VERSION
99
+ homepage: http://github.com/dhruvbansal/clusta
100
+ licenses: []
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ none: false
113
+ requirements:
114
+ - - ! '>='
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ requirements: []
118
+ rubyforge_project:
119
+ rubygems_version: 1.8.17
120
+ signing_key:
121
+ specification_version: 3
122
+ summary: Scalable network algorithms library built in Ruby using Wukong.
123
+ test_files: []