clusta 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/LICENSE +20 -0
  2. data/README.rdoc +0 -0
  3. data/VERSION +1 -0
  4. data/bin/clusta +35 -0
  5. data/lib/clusta.rb +24 -0
  6. data/lib/clusta/geometry.rb +25 -0
  7. data/lib/clusta/geometry/arrow.rb +16 -0
  8. data/lib/clusta/geometry/assortativity.rb +31 -0
  9. data/lib/clusta/geometry/degree.rb +28 -0
  10. data/lib/clusta/geometry/directed/arrow.rb +12 -0
  11. data/lib/clusta/geometry/directed/degree.rb +29 -0
  12. data/lib/clusta/geometry/directed/edge.rb +26 -0
  13. data/lib/clusta/geometry/directed/edge_degree_pair.rb +24 -0
  14. data/lib/clusta/geometry/directed/vertex_arrows.rb +25 -0
  15. data/lib/clusta/geometry/edge.rb +58 -0
  16. data/lib/clusta/geometry/edge_degree_pair.rb +21 -0
  17. data/lib/clusta/geometry/element.rb +131 -0
  18. data/lib/clusta/geometry/vertex.rb +11 -0
  19. data/lib/clusta/geometry/vertex_arrows.rb +45 -0
  20. data/lib/clusta/runner.rb +15 -0
  21. data/lib/clusta/transforms.rb +50 -0
  22. data/lib/clusta/transforms/edge_degree_pairs_to_assortativities.rb +42 -0
  23. data/lib/clusta/transforms/edges_to_degrees.rb +42 -0
  24. data/lib/clusta/transforms/edges_to_vertex_arrows.rb +50 -0
  25. data/lib/clusta/transforms/import.rb +17 -0
  26. data/lib/clusta/transforms/vertex_arrows_to_edge_degree_pairs.rb +63 -0
  27. data/spec/clusta/geometry/element_spec.rb +191 -0
  28. data/spec/clusta/transforms/edges_to_degrees_spec.rb +22 -0
  29. data/spec/clusta/transforms/edges_to_vertex_arrows_spec.rb +21 -0
  30. data/spec/data/README.rdoc +54 -0
  31. data/spec/data/degrees/directed.tsv +9 -0
  32. data/spec/data/degrees/undirected.tsv +9 -0
  33. data/spec/data/edges/directed.unweighted.tsv +10 -0
  34. data/spec/data/edges/directed.weighted.tsv +10 -0
  35. data/spec/data/edges/undirected.unweighted.tsv +9 -0
  36. data/spec/data/edges/undirected.weighted.tsv +9 -0
  37. data/spec/data/vertex_arrows/directed.unweighted.tsv +7 -0
  38. data/spec/data/vertex_arrows/directed.weighted.tsv +7 -0
  39. data/spec/data/vertex_arrows/undirected.unweighted.tsv +9 -0
  40. data/spec/data/vertex_arrows/undirected.weighted.tsv +9 -0
  41. data/spec/spec_helper.rb +21 -0
  42. data/spec/support/transforms_spec_helper.rb +120 -0
  43. metadata +123 -0
@@ -0,0 +1,11 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class Vertex < Element
5
+ field :label
6
+ end
7
+
8
+ end
9
+ end
10
+
11
+
@@ -0,0 +1,45 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class VertexArrows < Vertex
5
+
6
+ input_fields :arrows
7
+
8
+ def joins? target_label
9
+ arrows.detect { |arrow| arrow.target_label == target_label }
10
+ end
11
+
12
+ def directed?
13
+ false
14
+ end
15
+
16
+ def source_degrees
17
+ [arrows.size]
18
+ end
19
+
20
+ def target_degrees
21
+ [1]
22
+ end
23
+
24
+ def edge_degree_pair source_label, target_label, *args
25
+ EdgeDegreePair.new(source_label, target_label, *args)
26
+ end
27
+
28
+ def edge_degrees_pairs
29
+ sds = source_degrees
30
+ arrows.map do |arrow|
31
+ edge_degree_pair(label, arrow.target_label, *(sds + target_degrees))
32
+ end
33
+ end
34
+
35
+ def reversed_edge_degree_pairs
36
+ sds = source_degrees
37
+ arrows.map do |arrow|
38
+ edge_degree_pair(arrow.target_label, label, *(target_degrees + sds))
39
+ end
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,15 @@
1
+ module Clusta
2
+
3
+ class Runner
4
+
5
+ def initialize args
6
+ self.args = args
7
+ end
8
+
9
+ def run!
10
+ Settings.resolve!
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,50 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ def self.register_streamable klass, aliases=nil
6
+ (aliases || [klass.to_s]).each do |klass_alias|
7
+ Wukong::RESOURCE_CLASS_MAP[klass_alias] = klass
8
+ end
9
+ end
10
+
11
+ def self.register_transform name, path
12
+ autoload name, path
13
+ end
14
+
15
+ Dir[File.join(File.dirname(__FILE__), "transforms/*.rb")].each do |path|
16
+ require_name = Clusta.require_name(path)
17
+ register_transform Clusta.classify(require_name), "clusta/transforms/#{require_name}"
18
+ end
19
+
20
+ ARG_REGEXP = /--transform=[\w\d_]+/
21
+
22
+ def self.from_arg arg
23
+ from_name(arg.split('=').last)
24
+ end
25
+
26
+ def self.from_name name
27
+ begin
28
+ const_get(Clusta.classify(name))
29
+ rescue NameError => e
30
+ raise Error.new("No such transform: '#{name}'")
31
+ end
32
+ end
33
+
34
+ def self.script_for transform
35
+ mapper = transform::Mapper if defined?(transform::Mapper)
36
+ reducer = transform::Reducer if defined?(transform::Reducer)
37
+ Wukong::Script.new(mapper, reducer)
38
+ end
39
+
40
+ end
41
+
42
+ Geometry::ELEMENTS.each do |element_name|
43
+ Transforms.register_streamable Geometry.const_get(element_name), [
44
+ "Clusta::Geometry::#{element_name}",
45
+ "Geometry::#{element_name}",
46
+ element_name.to_s
47
+ ]
48
+ end
49
+
50
+ end
@@ -0,0 +1,42 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module EdgeDegreePairsToAssortativities
6
+
7
+ class Mapper < Wukong::Streamer::StructStreamer
8
+
9
+ def process edge_degree_pair, *record
10
+ emit edge_degree_pair.assortativity
11
+ end
12
+
13
+ end
14
+
15
+ class Reducer < Wukong::Streamer::AccumulatingReducer
16
+
17
+ attr_accessor :assortativity
18
+
19
+ include Wukong::Streamer::StructRecordizer
20
+
21
+ def get_key new_assortativity, *record
22
+ new_assortativity.key
23
+ end
24
+
25
+ def start! new_assortativity, *record
26
+ self.assortativity = new_assortativity.zero
27
+ end
28
+
29
+ def accumulate new_assortativity, *record
30
+ self.assortativity += new_assortativity
31
+ end
32
+
33
+ def finalize &block
34
+ emit self.assortativity
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+ end
41
+
42
+ end
@@ -0,0 +1,42 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module EdgesToDegrees
6
+
7
+ class Mapper < Wukong::Streamer::StructStreamer
8
+
9
+ def process edge, *record
10
+ edge.degrees.each { |degree| emit(degree) }
11
+ end
12
+
13
+ end
14
+
15
+ class Reducer < Wukong::Streamer::AccumulatingReducer
16
+
17
+ attr_accessor :degree
18
+
19
+ include Wukong::Streamer::StructRecordizer
20
+
21
+ def get_key new_degree, *record
22
+ new_degree.vertex_label
23
+ end
24
+
25
+ def start! new_degree, *record
26
+ self.degree = (new_degree.directed? ? new_degree.class.new(key, 0, 0) : new_degree.class.new(key, 0))
27
+ end
28
+
29
+ def accumulate new_degree, *record
30
+ self.degree += new_degree
31
+ end
32
+
33
+ def finalize &block
34
+ emit degree
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+ end
41
+
42
+ end
@@ -0,0 +1,50 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module EdgesToVertexArrows
6
+
7
+ class Mapper < Wukong::Streamer::StructStreamer
8
+
9
+ def process edge, *record
10
+ emit edge
11
+ emit edge.reversed unless edge.directed?
12
+ end
13
+
14
+ end
15
+
16
+ class Reducer < Wukong::Streamer::AccumulatingReducer
17
+
18
+ attr_accessor :arrows, :directed
19
+
20
+ include Wukong::Streamer::StructRecordizer
21
+
22
+ alias_method :vertex_label, :key
23
+
24
+ def get_key new_edge, *record
25
+ new_edge.source_label
26
+ end
27
+
28
+ def start! new_edge, *record
29
+ self.arrows = []
30
+ self.directed = new_edge.directed?
31
+ end
32
+
33
+ def accumulate new_edge, *record
34
+ self.arrows << new_edge.arrow
35
+ end
36
+
37
+ def finalize &block
38
+ if directed
39
+ emit Geometry::DirectedVertexArrows.new(vertex_label, *arrows)
40
+ else
41
+ emit Geometry::VertexArrows.new(vertex_label, *arrows)
42
+ end
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+ end
49
+
50
+ end
@@ -0,0 +1,17 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module Import
6
+
7
+ class Mapper < Wukong::Streamer::Base
8
+
9
+ def process *record
10
+ emit record.unshift(Settings[:as])
11
+ end
12
+ end
13
+
14
+ end
15
+ end
16
+
17
+ end
@@ -0,0 +1,63 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module VertexArrowsToEdgeDegreePairs
6
+
7
+ class Mapper < Wukong::Streamer::StructStreamer
8
+
9
+ def process vertex_arrows, *record
10
+ vertex_arrows.reversed_edge_degree_pairs.each { |edge_degree| emit(edge_degree) }
11
+ end
12
+
13
+ end
14
+
15
+ class Reducer < Wukong::Streamer::AccumulatingReducer
16
+
17
+ attr_accessor :edge_degree_pairs, :source_degree, :source_in_degree, :source_out_degree
18
+
19
+ include Wukong::Streamer::StructRecordizer
20
+
21
+ def get_key new_edge_degree_pair, *record
22
+ new_edge_degree_pair.source_label
23
+ end
24
+
25
+ def start! new_edge_degree_pair, *record
26
+ self.edge_degree_pairs = []
27
+ if new_edge_degree_pair.directed?
28
+ self.source_in_degree = 0
29
+ self.source_out_degree = 0
30
+ else
31
+ self.source_degree = 0
32
+ end
33
+ end
34
+
35
+ def accumulate new_edge_degree_pair, *record
36
+ self.edge_degree_pairs << new_edge_degree_pair
37
+ if new_edge_degree_pair.directed?
38
+ self.source_in_degree += new_edge_degree_pair.source_in_degree_value
39
+ self.source_out_degree += new_edge_degree_pair.source_out_degree_value
40
+ else
41
+ self.source_degree += new_edge_degree_pair.source_degree_value
42
+ end
43
+ end
44
+
45
+ def finalize &block
46
+ edge_degree_pairs.each do |edge_degree_pair|
47
+ if edge_degree_pair.directed?
48
+ edge_degree_pair.source_in_degree_value = source_in_degree
49
+ edge_degree_pair.source_out_degree_value = source_out_degree
50
+ else
51
+ edge_degree_pair.source_degree_value = source_degree
52
+ end
53
+ emit edge_degree_pair
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ end
60
+ end
61
+
62
+ end
63
+
@@ -0,0 +1,191 @@
1
+ require 'spec_helper'
2
+
3
+ describe Clusta::Geometry::Element do
4
+
5
+ describe "setting inheritable fields" do
6
+
7
+ it "should not define any fields of its own" do
8
+ Clusta::Geometry::Element.fields.should == []
9
+ end
10
+
11
+ it "should allow a subclass to set its own fields without polluting the parent" do
12
+ wrapper = Class.new(Clusta::Geometry::Element)
13
+ wrapper.field :foo
14
+ Clusta::Geometry::Element.field_names.should_not include('foo')
15
+ wrapper.field_names.should include('foo')
16
+ end
17
+
18
+ it "should allow a subclass of a subclass to set its own fields without polluting the parent" do
19
+ wrapper1 = Class.new(Clusta::Geometry::Element)
20
+ wrapper1.field :foo
21
+ wrapper2 = Class.new(wrapper1)
22
+ wrapper2.field :bar
23
+
24
+ Clusta::Geometry::Element.field_names.should_not include('foo')
25
+ Clusta::Geometry::Element.field_names.should_not include('bar')
26
+
27
+ wrapper1.field_names.should include('foo')
28
+ wrapper1.field_names.should_not include('bar')
29
+
30
+ wrapper2.field_names.should include('foo')
31
+ wrapper2.field_names.should include('bar')
32
+ end
33
+
34
+ it "should not allow for more than one optional field" do
35
+ wrapper = Class.new(Clusta::Geometry::Element)
36
+ wrapper.field :foo, :optional => true
37
+ lambda { wrapper.field :bar, :optional => true }.should raise_error
38
+ end
39
+ end
40
+
41
+ describe "initializing" do
42
+
43
+ it "should assign declared fields" do
44
+ wrapper = Class.new(Clusta::Geometry::Element)
45
+ wrapper.field :foo
46
+ wrapper.field :baz
47
+ instance = wrapper.new("bar", "boof")
48
+ instance.foo.should == "bar"
49
+ instance.baz.should == "boof"
50
+ end
51
+
52
+ it "should allow for an optional field at the end" do
53
+ wrapper = Class.new(Clusta::Geometry::Element)
54
+ wrapper.field :foo
55
+ wrapper.field :baz, :optional => true
56
+ instance = wrapper.new("bar")
57
+ instance.foo.should == "bar"
58
+ instance.baz.should == nil
59
+
60
+ instance = wrapper.new("bar", "boof")
61
+ instance.foo.should == "bar"
62
+ instance.baz.should == "boof"
63
+ end
64
+
65
+ end
66
+
67
+ describe "serializing" do
68
+
69
+ it "constructs an array" do
70
+ wrapper = Class.new(Clusta::Geometry::Element)
71
+ wrapper.field :foo
72
+ wrapper.field :baz
73
+ wrapper.new("bar", "boof").to_flat[1].should == 'bar'
74
+ wrapper.new("bar", "boof").to_flat[2].should == 'boof'
75
+ end
76
+
77
+ it "constructs an array with optional fields" do
78
+ wrapper = Class.new(Clusta::Geometry::Element)
79
+ wrapper.field :foo
80
+ wrapper.field :baz, :optional => true
81
+ wrapper.new("bar").to_flat[2].should == nil
82
+ wrapper.new("bar", "boof").to_flat[2].should == 'boof'
83
+ end
84
+
85
+ end
86
+
87
+ describe "dealing with fields beyond those declared" do
88
+
89
+ it "should accept additional fields by default" do
90
+ instance = Clusta::Geometry::Element.new("foo", "bar", "baz")
91
+ instance.input_fields.should include("foo", "bar", "baz")
92
+ end
93
+
94
+ it "should serialize additional fields properly" do
95
+ instance = Clusta::Geometry::Element.new("foo", "bar", "baz")
96
+ instance.to_flat.should include("foo", "bar", "baz")
97
+ end
98
+
99
+ it "should accept additional fields on a subclass" do
100
+ wrapper = Class.new(Clusta::Geometry::Element)
101
+ wrapper.field :foo
102
+ instance = wrapper.new("foovalue", "bar", "baz")
103
+ instance.foo.should == "foovalue"
104
+ instance.input_fields.should include("bar", "baz")
105
+ end
106
+
107
+ it "should serialize additional fields on a subclass properly" do
108
+ wrapper = Class.new(Clusta::Geometry::Element)
109
+ wrapper.field :foo
110
+ instance = wrapper.new("foovalue", "bar", "baz")
111
+ instance.to_flat.should include("foovalue", "bar", "baz")
112
+ end
113
+
114
+ it "should allow a subclass to alias input_fields" do
115
+ wrapper = Class.new(Clusta::Geometry::Element)
116
+ wrapper.field :foo
117
+ wrapper.input_fields :bar
118
+ instance = wrapper.new("foovalue", "bar", "baz")
119
+ instance.foo.should == "foovalue"
120
+ instance.bar.should == instance.input_fields
121
+ end
122
+
123
+ it "should behave sensibly with both an optional field and input fields" do
124
+ wrapper = Class.new(Clusta::Geometry::Element)
125
+ wrapper.field :foo
126
+ wrapper.field :bar, :optional => true
127
+
128
+ instance = wrapper.new("foovalue", "barvalue", "extra1", "extra2")
129
+ instance.foo.should == 'foovalue'
130
+ instance.bar.should == 'barvalue'
131
+ instance.input_fields.should == ['extra1', 'extra2']
132
+
133
+ instance = wrapper.new("foovalue")
134
+ instance.foo.should == 'foovalue'
135
+ instance.bar.should be_nil
136
+ instance.input_fields.should be_empty
137
+
138
+ end
139
+ end
140
+
141
+ describe "embedded geometry elements" do
142
+
143
+ it "should be able to instantiate embedded elements when named as fields" do
144
+ parent = Class.new(Clusta::Geometry::Element)
145
+
146
+ parent.field :foo
147
+ parent.field :child, :type => :geometry
148
+
149
+ instance = parent.new("foovalue", "Edge;1;2")
150
+ instance.foo.should == 'foovalue'
151
+ instance.child.source_label.should == '1'
152
+ instance.child.target_label.should == '2'
153
+ end
154
+
155
+ it "should be able to serialize embedded elements when named as fields" do
156
+ parent = Class.new(Clusta::Geometry::Element)
157
+
158
+ parent.field :foo
159
+ parent.field :child, :type => :geometry
160
+
161
+ instance = parent.new('foovalue', Clusta::Geometry::Edge.new('1', '2'))
162
+ instance.to_flat.should include('foovalue', 'Edge;1;2')
163
+ end
164
+
165
+ it "should be able to instantiate embedded elements when given as input fields" do
166
+ parent = Class.new(Clusta::Geometry::Element)
167
+
168
+ parent.field :foo
169
+ instance = parent.new("foovalue", "Edge;1;2", "Edge;3;4")
170
+
171
+ instance.foo.should == 'foovalue'
172
+
173
+ instance.input_fields.size.should == 2
174
+ instance.input_fields[0].source_label.should == '1'
175
+ instance.input_fields[0].target_label.should == '2'
176
+ instance.input_fields[1].source_label.should == '3'
177
+ instance.input_fields[1].target_label.should == '4'
178
+ end
179
+
180
+ it "should be able to serialize embedded elements when given as input fields" do
181
+ parent = Class.new(Clusta::Geometry::Element)
182
+
183
+ parent.field :foo
184
+ instance = parent.new('foovalue', Clusta::Geometry::Edge.new('1','2'), Clusta::Geometry::Edge.new('3','4'))
185
+ instance.to_flat.should include('foovalue', 'Edge;1;2')
186
+ end
187
+
188
+ end
189
+
190
+ end
191
+