clusta 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/LICENSE +20 -0
  2. data/README.rdoc +0 -0
  3. data/VERSION +1 -0
  4. data/bin/clusta +35 -0
  5. data/lib/clusta.rb +24 -0
  6. data/lib/clusta/geometry.rb +25 -0
  7. data/lib/clusta/geometry/arrow.rb +16 -0
  8. data/lib/clusta/geometry/assortativity.rb +31 -0
  9. data/lib/clusta/geometry/degree.rb +28 -0
  10. data/lib/clusta/geometry/directed/arrow.rb +12 -0
  11. data/lib/clusta/geometry/directed/degree.rb +29 -0
  12. data/lib/clusta/geometry/directed/edge.rb +26 -0
  13. data/lib/clusta/geometry/directed/edge_degree_pair.rb +24 -0
  14. data/lib/clusta/geometry/directed/vertex_arrows.rb +25 -0
  15. data/lib/clusta/geometry/edge.rb +58 -0
  16. data/lib/clusta/geometry/edge_degree_pair.rb +21 -0
  17. data/lib/clusta/geometry/element.rb +131 -0
  18. data/lib/clusta/geometry/vertex.rb +11 -0
  19. data/lib/clusta/geometry/vertex_arrows.rb +45 -0
  20. data/lib/clusta/runner.rb +15 -0
  21. data/lib/clusta/transforms.rb +50 -0
  22. data/lib/clusta/transforms/edge_degree_pairs_to_assortativities.rb +42 -0
  23. data/lib/clusta/transforms/edges_to_degrees.rb +42 -0
  24. data/lib/clusta/transforms/edges_to_vertex_arrows.rb +50 -0
  25. data/lib/clusta/transforms/import.rb +17 -0
  26. data/lib/clusta/transforms/vertex_arrows_to_edge_degree_pairs.rb +63 -0
  27. data/spec/clusta/geometry/element_spec.rb +191 -0
  28. data/spec/clusta/transforms/edges_to_degrees_spec.rb +22 -0
  29. data/spec/clusta/transforms/edges_to_vertex_arrows_spec.rb +21 -0
  30. data/spec/data/README.rdoc +54 -0
  31. data/spec/data/degrees/directed.tsv +9 -0
  32. data/spec/data/degrees/undirected.tsv +9 -0
  33. data/spec/data/edges/directed.unweighted.tsv +10 -0
  34. data/spec/data/edges/directed.weighted.tsv +10 -0
  35. data/spec/data/edges/undirected.unweighted.tsv +9 -0
  36. data/spec/data/edges/undirected.weighted.tsv +9 -0
  37. data/spec/data/vertex_arrows/directed.unweighted.tsv +7 -0
  38. data/spec/data/vertex_arrows/directed.weighted.tsv +7 -0
  39. data/spec/data/vertex_arrows/undirected.unweighted.tsv +9 -0
  40. data/spec/data/vertex_arrows/undirected.weighted.tsv +9 -0
  41. data/spec/spec_helper.rb +21 -0
  42. data/spec/support/transforms_spec_helper.rb +120 -0
  43. metadata +123 -0
@@ -0,0 +1,11 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class Vertex < Element
5
+ field :label
6
+ end
7
+
8
+ end
9
+ end
10
+
11
+
@@ -0,0 +1,45 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class VertexArrows < Vertex
5
+
6
+ input_fields :arrows
7
+
8
+ def joins? target_label
9
+ arrows.detect { |arrow| arrow.target_label == target_label }
10
+ end
11
+
12
+ def directed?
13
+ false
14
+ end
15
+
16
+ def source_degrees
17
+ [arrows.size]
18
+ end
19
+
20
+ def target_degrees
21
+ [1]
22
+ end
23
+
24
+ def edge_degree_pair source_label, target_label, *args
25
+ EdgeDegreePair.new(source_label, target_label, *args)
26
+ end
27
+
28
+ def edge_degrees_pairs
29
+ sds = source_degrees
30
+ arrows.map do |arrow|
31
+ edge_degree_pair(label, arrow.target_label, *(sds + target_degrees))
32
+ end
33
+ end
34
+
35
+ def reversed_edge_degree_pairs
36
+ sds = source_degrees
37
+ arrows.map do |arrow|
38
+ edge_degree_pair(arrow.target_label, label, *(target_degrees + sds))
39
+ end
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,15 @@
1
+ module Clusta
2
+
3
+ class Runner
4
+
5
+ def initialize args
6
+ self.args = args
7
+ end
8
+
9
+ def run!
10
+ Settings.resolve!
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,50 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ def self.register_streamable klass, aliases=nil
6
+ (aliases || [klass.to_s]).each do |klass_alias|
7
+ Wukong::RESOURCE_CLASS_MAP[klass_alias] = klass
8
+ end
9
+ end
10
+
11
+ def self.register_transform name, path
12
+ autoload name, path
13
+ end
14
+
15
+ Dir[File.join(File.dirname(__FILE__), "transforms/*.rb")].each do |path|
16
+ require_name = Clusta.require_name(path)
17
+ register_transform Clusta.classify(require_name), "clusta/transforms/#{require_name}"
18
+ end
19
+
20
+ ARG_REGEXP = /--transform=[\w\d_]+/
21
+
22
+ def self.from_arg arg
23
+ from_name(arg.split('=').last)
24
+ end
25
+
26
+ def self.from_name name
27
+ begin
28
+ const_get(Clusta.classify(name))
29
+ rescue NameError => e
30
+ raise Error.new("No such transform: '#{name}'")
31
+ end
32
+ end
33
+
34
+ def self.script_for transform
35
+ mapper = transform::Mapper if defined?(transform::Mapper)
36
+ reducer = transform::Reducer if defined?(transform::Reducer)
37
+ Wukong::Script.new(mapper, reducer)
38
+ end
39
+
40
+ end
41
+
42
+ Geometry::ELEMENTS.each do |element_name|
43
+ Transforms.register_streamable Geometry.const_get(element_name), [
44
+ "Clusta::Geometry::#{element_name}",
45
+ "Geometry::#{element_name}",
46
+ element_name.to_s
47
+ ]
48
+ end
49
+
50
+ end
@@ -0,0 +1,42 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module EdgeDegreePairsToAssortativities
6
+
7
+ class Mapper < Wukong::Streamer::StructStreamer
8
+
9
+ def process edge_degree_pair, *record
10
+ emit edge_degree_pair.assortativity
11
+ end
12
+
13
+ end
14
+
15
+ class Reducer < Wukong::Streamer::AccumulatingReducer
16
+
17
+ attr_accessor :assortativity
18
+
19
+ include Wukong::Streamer::StructRecordizer
20
+
21
+ def get_key new_assortativity, *record
22
+ new_assortativity.key
23
+ end
24
+
25
+ def start! new_assortativity, *record
26
+ self.assortativity = new_assortativity.zero
27
+ end
28
+
29
+ def accumulate new_assortativity, *record
30
+ self.assortativity += new_assortativity
31
+ end
32
+
33
+ def finalize &block
34
+ emit self.assortativity
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+ end
41
+
42
+ end
@@ -0,0 +1,42 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module EdgesToDegrees
6
+
7
+ class Mapper < Wukong::Streamer::StructStreamer
8
+
9
+ def process edge, *record
10
+ edge.degrees.each { |degree| emit(degree) }
11
+ end
12
+
13
+ end
14
+
15
+ class Reducer < Wukong::Streamer::AccumulatingReducer
16
+
17
+ attr_accessor :degree
18
+
19
+ include Wukong::Streamer::StructRecordizer
20
+
21
+ def get_key new_degree, *record
22
+ new_degree.vertex_label
23
+ end
24
+
25
+ def start! new_degree, *record
26
+ self.degree = (new_degree.directed? ? new_degree.class.new(key, 0, 0) : new_degree.class.new(key, 0))
27
+ end
28
+
29
+ def accumulate new_degree, *record
30
+ self.degree += new_degree
31
+ end
32
+
33
+ def finalize &block
34
+ emit degree
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+ end
41
+
42
+ end
@@ -0,0 +1,50 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module EdgesToVertexArrows
6
+
7
+ class Mapper < Wukong::Streamer::StructStreamer
8
+
9
+ def process edge, *record
10
+ emit edge
11
+ emit edge.reversed unless edge.directed?
12
+ end
13
+
14
+ end
15
+
16
+ class Reducer < Wukong::Streamer::AccumulatingReducer
17
+
18
+ attr_accessor :arrows, :directed
19
+
20
+ include Wukong::Streamer::StructRecordizer
21
+
22
+ alias_method :vertex_label, :key
23
+
24
+ def get_key new_edge, *record
25
+ new_edge.source_label
26
+ end
27
+
28
+ def start! new_edge, *record
29
+ self.arrows = []
30
+ self.directed = new_edge.directed?
31
+ end
32
+
33
+ def accumulate new_edge, *record
34
+ self.arrows << new_edge.arrow
35
+ end
36
+
37
+ def finalize &block
38
+ if directed
39
+ emit Geometry::DirectedVertexArrows.new(vertex_label, *arrows)
40
+ else
41
+ emit Geometry::VertexArrows.new(vertex_label, *arrows)
42
+ end
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+ end
49
+
50
+ end
@@ -0,0 +1,17 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module Import
6
+
7
+ class Mapper < Wukong::Streamer::Base
8
+
9
+ def process *record
10
+ emit record.unshift(Settings[:as])
11
+ end
12
+ end
13
+
14
+ end
15
+ end
16
+
17
+ end
@@ -0,0 +1,63 @@
1
+ module Clusta
2
+
3
+ module Transforms
4
+
5
+ module VertexArrowsToEdgeDegreePairs
6
+
7
+ class Mapper < Wukong::Streamer::StructStreamer
8
+
9
+ def process vertex_arrows, *record
10
+ vertex_arrows.reversed_edge_degree_pairs.each { |edge_degree| emit(edge_degree) }
11
+ end
12
+
13
+ end
14
+
15
+ class Reducer < Wukong::Streamer::AccumulatingReducer
16
+
17
+ attr_accessor :edge_degree_pairs, :source_degree, :source_in_degree, :source_out_degree
18
+
19
+ include Wukong::Streamer::StructRecordizer
20
+
21
+ def get_key new_edge_degree_pair, *record
22
+ new_edge_degree_pair.source_label
23
+ end
24
+
25
+ def start! new_edge_degree_pair, *record
26
+ self.edge_degree_pairs = []
27
+ if new_edge_degree_pair.directed?
28
+ self.source_in_degree = 0
29
+ self.source_out_degree = 0
30
+ else
31
+ self.source_degree = 0
32
+ end
33
+ end
34
+
35
+ def accumulate new_edge_degree_pair, *record
36
+ self.edge_degree_pairs << new_edge_degree_pair
37
+ if new_edge_degree_pair.directed?
38
+ self.source_in_degree += new_edge_degree_pair.source_in_degree_value
39
+ self.source_out_degree += new_edge_degree_pair.source_out_degree_value
40
+ else
41
+ self.source_degree += new_edge_degree_pair.source_degree_value
42
+ end
43
+ end
44
+
45
+ def finalize &block
46
+ edge_degree_pairs.each do |edge_degree_pair|
47
+ if edge_degree_pair.directed?
48
+ edge_degree_pair.source_in_degree_value = source_in_degree
49
+ edge_degree_pair.source_out_degree_value = source_out_degree
50
+ else
51
+ edge_degree_pair.source_degree_value = source_degree
52
+ end
53
+ emit edge_degree_pair
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ end
60
+ end
61
+
62
+ end
63
+
@@ -0,0 +1,191 @@
1
+ require 'spec_helper'
2
+
3
+ describe Clusta::Geometry::Element do
4
+
5
+ describe "setting inheritable fields" do
6
+
7
+ it "should not define any fields of its own" do
8
+ Clusta::Geometry::Element.fields.should == []
9
+ end
10
+
11
+ it "should allow a subclass to set its own fields without polluting the parent" do
12
+ wrapper = Class.new(Clusta::Geometry::Element)
13
+ wrapper.field :foo
14
+ Clusta::Geometry::Element.field_names.should_not include('foo')
15
+ wrapper.field_names.should include('foo')
16
+ end
17
+
18
+ it "should allow a subclass of a subclass to set its own fields without polluting the parent" do
19
+ wrapper1 = Class.new(Clusta::Geometry::Element)
20
+ wrapper1.field :foo
21
+ wrapper2 = Class.new(wrapper1)
22
+ wrapper2.field :bar
23
+
24
+ Clusta::Geometry::Element.field_names.should_not include('foo')
25
+ Clusta::Geometry::Element.field_names.should_not include('bar')
26
+
27
+ wrapper1.field_names.should include('foo')
28
+ wrapper1.field_names.should_not include('bar')
29
+
30
+ wrapper2.field_names.should include('foo')
31
+ wrapper2.field_names.should include('bar')
32
+ end
33
+
34
+ it "should not allow for more than one optional field" do
35
+ wrapper = Class.new(Clusta::Geometry::Element)
36
+ wrapper.field :foo, :optional => true
37
+ lambda { wrapper.field :bar, :optional => true }.should raise_error
38
+ end
39
+ end
40
+
41
+ describe "initializing" do
42
+
43
+ it "should assign declared fields" do
44
+ wrapper = Class.new(Clusta::Geometry::Element)
45
+ wrapper.field :foo
46
+ wrapper.field :baz
47
+ instance = wrapper.new("bar", "boof")
48
+ instance.foo.should == "bar"
49
+ instance.baz.should == "boof"
50
+ end
51
+
52
+ it "should allow for an optional field at the end" do
53
+ wrapper = Class.new(Clusta::Geometry::Element)
54
+ wrapper.field :foo
55
+ wrapper.field :baz, :optional => true
56
+ instance = wrapper.new("bar")
57
+ instance.foo.should == "bar"
58
+ instance.baz.should == nil
59
+
60
+ instance = wrapper.new("bar", "boof")
61
+ instance.foo.should == "bar"
62
+ instance.baz.should == "boof"
63
+ end
64
+
65
+ end
66
+
67
+ describe "serializing" do
68
+
69
+ it "constructs an array" do
70
+ wrapper = Class.new(Clusta::Geometry::Element)
71
+ wrapper.field :foo
72
+ wrapper.field :baz
73
+ wrapper.new("bar", "boof").to_flat[1].should == 'bar'
74
+ wrapper.new("bar", "boof").to_flat[2].should == 'boof'
75
+ end
76
+
77
+ it "constructs an array with optional fields" do
78
+ wrapper = Class.new(Clusta::Geometry::Element)
79
+ wrapper.field :foo
80
+ wrapper.field :baz, :optional => true
81
+ wrapper.new("bar").to_flat[2].should == nil
82
+ wrapper.new("bar", "boof").to_flat[2].should == 'boof'
83
+ end
84
+
85
+ end
86
+
87
+ describe "dealing with fields beyond those declared" do
88
+
89
+ it "should accept additional fields by default" do
90
+ instance = Clusta::Geometry::Element.new("foo", "bar", "baz")
91
+ instance.input_fields.should include("foo", "bar", "baz")
92
+ end
93
+
94
+ it "should serialize additional fields properly" do
95
+ instance = Clusta::Geometry::Element.new("foo", "bar", "baz")
96
+ instance.to_flat.should include("foo", "bar", "baz")
97
+ end
98
+
99
+ it "should accept additional fields on a subclass" do
100
+ wrapper = Class.new(Clusta::Geometry::Element)
101
+ wrapper.field :foo
102
+ instance = wrapper.new("foovalue", "bar", "baz")
103
+ instance.foo.should == "foovalue"
104
+ instance.input_fields.should include("bar", "baz")
105
+ end
106
+
107
+ it "should serialize additional fields on a subclass properly" do
108
+ wrapper = Class.new(Clusta::Geometry::Element)
109
+ wrapper.field :foo
110
+ instance = wrapper.new("foovalue", "bar", "baz")
111
+ instance.to_flat.should include("foovalue", "bar", "baz")
112
+ end
113
+
114
+ it "should allow a subclass to alias input_fields" do
115
+ wrapper = Class.new(Clusta::Geometry::Element)
116
+ wrapper.field :foo
117
+ wrapper.input_fields :bar
118
+ instance = wrapper.new("foovalue", "bar", "baz")
119
+ instance.foo.should == "foovalue"
120
+ instance.bar.should == instance.input_fields
121
+ end
122
+
123
+ it "should behave sensibly with both an optional field and input fields" do
124
+ wrapper = Class.new(Clusta::Geometry::Element)
125
+ wrapper.field :foo
126
+ wrapper.field :bar, :optional => true
127
+
128
+ instance = wrapper.new("foovalue", "barvalue", "extra1", "extra2")
129
+ instance.foo.should == 'foovalue'
130
+ instance.bar.should == 'barvalue'
131
+ instance.input_fields.should == ['extra1', 'extra2']
132
+
133
+ instance = wrapper.new("foovalue")
134
+ instance.foo.should == 'foovalue'
135
+ instance.bar.should be_nil
136
+ instance.input_fields.should be_empty
137
+
138
+ end
139
+ end
140
+
141
+ describe "embedded geometry elements" do
142
+
143
+ it "should be able to instantiate embedded elements when named as fields" do
144
+ parent = Class.new(Clusta::Geometry::Element)
145
+
146
+ parent.field :foo
147
+ parent.field :child, :type => :geometry
148
+
149
+ instance = parent.new("foovalue", "Edge;1;2")
150
+ instance.foo.should == 'foovalue'
151
+ instance.child.source_label.should == '1'
152
+ instance.child.target_label.should == '2'
153
+ end
154
+
155
+ it "should be able to serialize embedded elements when named as fields" do
156
+ parent = Class.new(Clusta::Geometry::Element)
157
+
158
+ parent.field :foo
159
+ parent.field :child, :type => :geometry
160
+
161
+ instance = parent.new('foovalue', Clusta::Geometry::Edge.new('1', '2'))
162
+ instance.to_flat.should include('foovalue', 'Edge;1;2')
163
+ end
164
+
165
+ it "should be able to instantiate embedded elements when given as input fields" do
166
+ parent = Class.new(Clusta::Geometry::Element)
167
+
168
+ parent.field :foo
169
+ instance = parent.new("foovalue", "Edge;1;2", "Edge;3;4")
170
+
171
+ instance.foo.should == 'foovalue'
172
+
173
+ instance.input_fields.size.should == 2
174
+ instance.input_fields[0].source_label.should == '1'
175
+ instance.input_fields[0].target_label.should == '2'
176
+ instance.input_fields[1].source_label.should == '3'
177
+ instance.input_fields[1].target_label.should == '4'
178
+ end
179
+
180
+ it "should be able to serialize embedded elements when given as input fields" do
181
+ parent = Class.new(Clusta::Geometry::Element)
182
+
183
+ parent.field :foo
184
+ instance = parent.new('foovalue', Clusta::Geometry::Edge.new('1','2'), Clusta::Geometry::Edge.new('3','4'))
185
+ instance.to_flat.should include('foovalue', 'Edge;1;2')
186
+ end
187
+
188
+ end
189
+
190
+ end
191
+