clusta 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/README.rdoc +66 -0
  2. data/VERSION +1 -1
  3. data/bin/clusta +1 -28
  4. data/lib/clusta.rb +12 -3
  5. data/lib/clusta/geometry.rb +53 -8
  6. data/lib/clusta/geometry/all.rb +3 -0
  7. data/lib/clusta/geometry/assortativity.rb +2 -2
  8. data/lib/clusta/geometry/degree.rb +3 -1
  9. data/lib/clusta/geometry/{edge_degree_pair.rb → degree_pair.rb} +3 -3
  10. data/lib/clusta/geometry/directed/degree.rb +3 -1
  11. data/lib/clusta/geometry/directed/{edge_degree_pair.rb → degree_pair.rb} +4 -3
  12. data/lib/clusta/geometry/directed/edge.rb +4 -2
  13. data/lib/clusta/geometry/directed/{arrow.rb → neighbor.rb} +1 -1
  14. data/lib/clusta/geometry/directed/neighborhood.rb +31 -0
  15. data/lib/clusta/geometry/edge.rb +6 -4
  16. data/lib/clusta/geometry/element.rb +10 -117
  17. data/lib/clusta/geometry/{arrow.rb → neighbor.rb} +3 -3
  18. data/lib/clusta/geometry/neighborhood.rb +41 -0
  19. data/lib/clusta/geometry/vertex.rb +4 -1
  20. data/lib/clusta/runner.rb +101 -4
  21. data/lib/clusta/schema.rb +100 -0
  22. data/lib/clusta/serialization.rb +63 -0
  23. data/lib/clusta/serialization/json.rb +86 -0
  24. data/lib/clusta/serialization/tsv.rb +81 -0
  25. data/lib/clusta/transforms.rb +59 -26
  26. data/lib/clusta/transforms/{edge_degree_pairs_to_assortativities.rb → degree_pairs_to_assortativities.rb} +7 -3
  27. data/lib/clusta/transforms/edges_to_degrees.rb +5 -0
  28. data/lib/clusta/transforms/{edges_to_vertex_arrows.rb → edges_to_neighborhoods.rb} +11 -6
  29. data/lib/clusta/transforms/import.rb +6 -0
  30. data/lib/clusta/transforms/neighborhoods_to_degree_pairs.rb +70 -0
  31. data/lib/clusta/transforms/pm3d.rb +46 -0
  32. data/lib/clusta/transforms/prune_edges.rb +34 -0
  33. data/spec/clusta/schema_spec.rb +36 -0
  34. data/spec/clusta/serialization/json_spec.rb +133 -0
  35. data/spec/clusta/serialization/tsv_spec.rb +133 -0
  36. data/spec/clusta/serialization_spec.rb +27 -0
  37. data/spec/clusta/transforms/degree_pairs_to_assortativities_spec.rb +13 -0
  38. data/spec/clusta/transforms/{edges_to_vertex_arrows_spec.rb → edges_to_neighborhoods_spec.rb} +5 -5
  39. data/spec/clusta/transforms/import_spec.rb +9 -0
  40. data/spec/clusta/transforms/neighborhoods_to_degree_pairs_spec.rb +21 -0
  41. data/spec/clusta/transforms/prune_edges_spec.rb +22 -0
  42. data/spec/data/assortativities/directed.tsv +4 -0
  43. data/spec/data/assortativities/undirected.tsv +7 -0
  44. data/spec/data/degree_pairs/directed.tsv +10 -0
  45. data/spec/data/degree_pairs/undirected.tsv +18 -0
  46. data/spec/data/external/vertices.tsv +9 -0
  47. data/spec/data/imports/vertices.labeled.tsv +9 -0
  48. data/spec/data/neighborhoods/directed.unweighted.tsv +7 -0
  49. data/spec/data/neighborhoods/directed.weighted.tsv +7 -0
  50. data/spec/data/neighborhoods/undirected.unweighted.tsv +9 -0
  51. data/spec/data/neighborhoods/undirected.weighted.tsv +9 -0
  52. data/spec/data/pruned_edges/directed.unweighted.tsv +1 -0
  53. data/spec/data/pruned_edges/directed.weighted.tsv +3 -0
  54. data/spec/data/pruned_edges/undirected.unweighted.tsv +1 -0
  55. data/spec/data/pruned_edges/undirected.weighted.tsv +3 -0
  56. data/spec/support/transforms_spec_helper.rb +5 -1
  57. metadata +47 -23
  58. data/lib/clusta/geometry/directed/vertex_arrows.rb +0 -25
  59. data/lib/clusta/geometry/vertex_arrows.rb +0 -45
  60. data/lib/clusta/transforms/vertex_arrows_to_edge_degree_pairs.rb +0 -63
  61. data/spec/clusta/geometry/element_spec.rb +0 -191
  62. data/spec/data/vertex_arrows/directed.unweighted.tsv +0 -7
  63. data/spec/data/vertex_arrows/directed.weighted.tsv +0 -7
  64. data/spec/data/vertex_arrows/undirected.unweighted.tsv +0 -9
  65. data/spec/data/vertex_arrows/undirected.weighted.tsv +0 -9
@@ -1,25 +0,0 @@
1
- module Clusta
2
- module Geometry
3
-
4
- class DirectedVertexArrows < VertexArrows
5
-
6
- def directed?
7
- true
8
- end
9
-
10
- def source_degrees
11
- [0, arrows.size]
12
- end
13
-
14
- def target_degrees
15
- [1, 0]
16
- end
17
-
18
- def edge_degree_pair source_label, target_label, *args
19
- DirectedEdgeDegreePair.new(source_label, target_label, *args)
20
- end
21
-
22
- end
23
-
24
- end
25
- end
@@ -1,45 +0,0 @@
1
- module Clusta
2
- module Geometry
3
-
4
- class VertexArrows < Vertex
5
-
6
- input_fields :arrows
7
-
8
- def joins? target_label
9
- arrows.detect { |arrow| arrow.target_label == target_label }
10
- end
11
-
12
- def directed?
13
- false
14
- end
15
-
16
- def source_degrees
17
- [arrows.size]
18
- end
19
-
20
- def target_degrees
21
- [1]
22
- end
23
-
24
- def edge_degree_pair source_label, target_label, *args
25
- EdgeDegreePair.new(source_label, target_label, *args)
26
- end
27
-
28
- def edge_degrees_pairs
29
- sds = source_degrees
30
- arrows.map do |arrow|
31
- edge_degree_pair(label, arrow.target_label, *(sds + target_degrees))
32
- end
33
- end
34
-
35
- def reversed_edge_degree_pairs
36
- sds = source_degrees
37
- arrows.map do |arrow|
38
- edge_degree_pair(arrow.target_label, label, *(target_degrees + sds))
39
- end
40
- end
41
-
42
- end
43
-
44
- end
45
- end
@@ -1,63 +0,0 @@
1
- module Clusta
2
-
3
- module Transforms
4
-
5
- module VertexArrowsToEdgeDegreePairs
6
-
7
- class Mapper < Wukong::Streamer::StructStreamer
8
-
9
- def process vertex_arrows, *record
10
- vertex_arrows.reversed_edge_degree_pairs.each { |edge_degree| emit(edge_degree) }
11
- end
12
-
13
- end
14
-
15
- class Reducer < Wukong::Streamer::AccumulatingReducer
16
-
17
- attr_accessor :edge_degree_pairs, :source_degree, :source_in_degree, :source_out_degree
18
-
19
- include Wukong::Streamer::StructRecordizer
20
-
21
- def get_key new_edge_degree_pair, *record
22
- new_edge_degree_pair.source_label
23
- end
24
-
25
- def start! new_edge_degree_pair, *record
26
- self.edge_degree_pairs = []
27
- if new_edge_degree_pair.directed?
28
- self.source_in_degree = 0
29
- self.source_out_degree = 0
30
- else
31
- self.source_degree = 0
32
- end
33
- end
34
-
35
- def accumulate new_edge_degree_pair, *record
36
- self.edge_degree_pairs << new_edge_degree_pair
37
- if new_edge_degree_pair.directed?
38
- self.source_in_degree += new_edge_degree_pair.source_in_degree_value
39
- self.source_out_degree += new_edge_degree_pair.source_out_degree_value
40
- else
41
- self.source_degree += new_edge_degree_pair.source_degree_value
42
- end
43
- end
44
-
45
- def finalize &block
46
- edge_degree_pairs.each do |edge_degree_pair|
47
- if edge_degree_pair.directed?
48
- edge_degree_pair.source_in_degree_value = source_in_degree
49
- edge_degree_pair.source_out_degree_value = source_out_degree
50
- else
51
- edge_degree_pair.source_degree_value = source_degree
52
- end
53
- emit edge_degree_pair
54
- end
55
- end
56
-
57
- end
58
-
59
- end
60
- end
61
-
62
- end
63
-
@@ -1,191 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Clusta::Geometry::Element do
4
-
5
- describe "setting inheritable fields" do
6
-
7
- it "should not define any fields of its own" do
8
- Clusta::Geometry::Element.fields.should == []
9
- end
10
-
11
- it "should allow a subclass to set its own fields without polluting the parent" do
12
- wrapper = Class.new(Clusta::Geometry::Element)
13
- wrapper.field :foo
14
- Clusta::Geometry::Element.field_names.should_not include('foo')
15
- wrapper.field_names.should include('foo')
16
- end
17
-
18
- it "should allow a subclass of a subclass to set its own fields without polluting the parent" do
19
- wrapper1 = Class.new(Clusta::Geometry::Element)
20
- wrapper1.field :foo
21
- wrapper2 = Class.new(wrapper1)
22
- wrapper2.field :bar
23
-
24
- Clusta::Geometry::Element.field_names.should_not include('foo')
25
- Clusta::Geometry::Element.field_names.should_not include('bar')
26
-
27
- wrapper1.field_names.should include('foo')
28
- wrapper1.field_names.should_not include('bar')
29
-
30
- wrapper2.field_names.should include('foo')
31
- wrapper2.field_names.should include('bar')
32
- end
33
-
34
- it "should not allow for more than one optional field" do
35
- wrapper = Class.new(Clusta::Geometry::Element)
36
- wrapper.field :foo, :optional => true
37
- lambda { wrapper.field :bar, :optional => true }.should raise_error
38
- end
39
- end
40
-
41
- describe "initializing" do
42
-
43
- it "should assign declared fields" do
44
- wrapper = Class.new(Clusta::Geometry::Element)
45
- wrapper.field :foo
46
- wrapper.field :baz
47
- instance = wrapper.new("bar", "boof")
48
- instance.foo.should == "bar"
49
- instance.baz.should == "boof"
50
- end
51
-
52
- it "should allow for an optional field at the end" do
53
- wrapper = Class.new(Clusta::Geometry::Element)
54
- wrapper.field :foo
55
- wrapper.field :baz, :optional => true
56
- instance = wrapper.new("bar")
57
- instance.foo.should == "bar"
58
- instance.baz.should == nil
59
-
60
- instance = wrapper.new("bar", "boof")
61
- instance.foo.should == "bar"
62
- instance.baz.should == "boof"
63
- end
64
-
65
- end
66
-
67
- describe "serializing" do
68
-
69
- it "constructs an array" do
70
- wrapper = Class.new(Clusta::Geometry::Element)
71
- wrapper.field :foo
72
- wrapper.field :baz
73
- wrapper.new("bar", "boof").to_flat[1].should == 'bar'
74
- wrapper.new("bar", "boof").to_flat[2].should == 'boof'
75
- end
76
-
77
- it "constructs an array with optional fields" do
78
- wrapper = Class.new(Clusta::Geometry::Element)
79
- wrapper.field :foo
80
- wrapper.field :baz, :optional => true
81
- wrapper.new("bar").to_flat[2].should == nil
82
- wrapper.new("bar", "boof").to_flat[2].should == 'boof'
83
- end
84
-
85
- end
86
-
87
- describe "dealing with fields beyond those declared" do
88
-
89
- it "should accept additional fields by default" do
90
- instance = Clusta::Geometry::Element.new("foo", "bar", "baz")
91
- instance.input_fields.should include("foo", "bar", "baz")
92
- end
93
-
94
- it "should serialize additional fields properly" do
95
- instance = Clusta::Geometry::Element.new("foo", "bar", "baz")
96
- instance.to_flat.should include("foo", "bar", "baz")
97
- end
98
-
99
- it "should accept additional fields on a subclass" do
100
- wrapper = Class.new(Clusta::Geometry::Element)
101
- wrapper.field :foo
102
- instance = wrapper.new("foovalue", "bar", "baz")
103
- instance.foo.should == "foovalue"
104
- instance.input_fields.should include("bar", "baz")
105
- end
106
-
107
- it "should serialize additional fields on a subclass properly" do
108
- wrapper = Class.new(Clusta::Geometry::Element)
109
- wrapper.field :foo
110
- instance = wrapper.new("foovalue", "bar", "baz")
111
- instance.to_flat.should include("foovalue", "bar", "baz")
112
- end
113
-
114
- it "should allow a subclass to alias input_fields" do
115
- wrapper = Class.new(Clusta::Geometry::Element)
116
- wrapper.field :foo
117
- wrapper.input_fields :bar
118
- instance = wrapper.new("foovalue", "bar", "baz")
119
- instance.foo.should == "foovalue"
120
- instance.bar.should == instance.input_fields
121
- end
122
-
123
- it "should behave sensibly with both an optional field and input fields" do
124
- wrapper = Class.new(Clusta::Geometry::Element)
125
- wrapper.field :foo
126
- wrapper.field :bar, :optional => true
127
-
128
- instance = wrapper.new("foovalue", "barvalue", "extra1", "extra2")
129
- instance.foo.should == 'foovalue'
130
- instance.bar.should == 'barvalue'
131
- instance.input_fields.should == ['extra1', 'extra2']
132
-
133
- instance = wrapper.new("foovalue")
134
- instance.foo.should == 'foovalue'
135
- instance.bar.should be_nil
136
- instance.input_fields.should be_empty
137
-
138
- end
139
- end
140
-
141
- describe "embedded geometry elements" do
142
-
143
- it "should be able to instantiate embedded elements when named as fields" do
144
- parent = Class.new(Clusta::Geometry::Element)
145
-
146
- parent.field :foo
147
- parent.field :child, :type => :geometry
148
-
149
- instance = parent.new("foovalue", "Edge;1;2")
150
- instance.foo.should == 'foovalue'
151
- instance.child.source_label.should == '1'
152
- instance.child.target_label.should == '2'
153
- end
154
-
155
- it "should be able to serialize embedded elements when named as fields" do
156
- parent = Class.new(Clusta::Geometry::Element)
157
-
158
- parent.field :foo
159
- parent.field :child, :type => :geometry
160
-
161
- instance = parent.new('foovalue', Clusta::Geometry::Edge.new('1', '2'))
162
- instance.to_flat.should include('foovalue', 'Edge;1;2')
163
- end
164
-
165
- it "should be able to instantiate embedded elements when given as input fields" do
166
- parent = Class.new(Clusta::Geometry::Element)
167
-
168
- parent.field :foo
169
- instance = parent.new("foovalue", "Edge;1;2", "Edge;3;4")
170
-
171
- instance.foo.should == 'foovalue'
172
-
173
- instance.input_fields.size.should == 2
174
- instance.input_fields[0].source_label.should == '1'
175
- instance.input_fields[0].target_label.should == '2'
176
- instance.input_fields[1].source_label.should == '3'
177
- instance.input_fields[1].target_label.should == '4'
178
- end
179
-
180
- it "should be able to serialize embedded elements when given as input fields" do
181
- parent = Class.new(Clusta::Geometry::Element)
182
-
183
- parent.field :foo
184
- instance = parent.new('foovalue', Clusta::Geometry::Edge.new('1','2'), Clusta::Geometry::Edge.new('3','4'))
185
- instance.to_flat.should include('foovalue', 'Edge;1;2')
186
- end
187
-
188
- end
189
-
190
- end
191
-
@@ -1,7 +0,0 @@
1
- DirectedVertexArrows 1 DirectedArrow;2
2
- DirectedVertexArrows 3 DirectedArrow;2
3
- DirectedVertexArrows 4 DirectedArrow;1
4
- DirectedVertexArrows 5 DirectedArrow;4 DirectedArrow;7
5
- DirectedVertexArrows 6 DirectedArrow;5
6
- DirectedVertexArrows 7 DirectedArrow;5 DirectedArrow;8
7
- DirectedVertexArrows 8 DirectedArrow;6 DirectedArrow;9
@@ -1,7 +0,0 @@
1
- DirectedVertexArrows 1 DirectedArrow;2;0.5
2
- DirectedVertexArrows 3 DirectedArrow;2;0.1
3
- DirectedVertexArrows 4 DirectedArrow;1;0.8
4
- DirectedVertexArrows 5 DirectedArrow;4;0.9 DirectedArrow;7;0.2
5
- DirectedVertexArrows 6 DirectedArrow;5;0.4
6
- DirectedVertexArrows 7 DirectedArrow;5;0.3 DirectedArrow;8;0.0
7
- DirectedVertexArrows 8 DirectedArrow;6;0.7 DirectedArrow;9;1.0
@@ -1,9 +0,0 @@
1
- VertexArrows 1 Arrow;2 Arrow;4
2
- VertexArrows 2 Arrow;1 Arrow;3
3
- VertexArrows 3 Arrow;2
4
- VertexArrows 4 Arrow;1 Arrow;5
5
- VertexArrows 5 Arrow;4 Arrow;6 Arrow;7
6
- VertexArrows 6 Arrow;5 Arrow;8
7
- VertexArrows 7 Arrow;5 Arrow;8
8
- VertexArrows 8 Arrow;6 Arrow;7 Arrow;9
9
- VertexArrows 9 Arrow;8
@@ -1,9 +0,0 @@
1
- VertexArrows 1 Arrow;2;0.5 Arrow;4;0.8
2
- VertexArrows 2 Arrow;1;0.5 Arrow;3;0.1
3
- VertexArrows 3 Arrow;2;0.1
4
- VertexArrows 4 Arrow;1;0.8 Arrow;5;0.9
5
- VertexArrows 5 Arrow;4;0.9 Arrow;6;0.4 Arrow;7;0.3
6
- VertexArrows 6 Arrow;5;0.4 Arrow;8;0.7
7
- VertexArrows 7 Arrow;5;0.3 Arrow;8;0.0
8
- VertexArrows 8 Arrow;6;0.7 Arrow;7;0.0 Arrow;9;1.0
9
- VertexArrows 9 Arrow;8;1.0