clusta 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/LICENSE +20 -0
  2. data/README.rdoc +0 -0
  3. data/VERSION +1 -0
  4. data/bin/clusta +35 -0
  5. data/lib/clusta.rb +24 -0
  6. data/lib/clusta/geometry.rb +25 -0
  7. data/lib/clusta/geometry/arrow.rb +16 -0
  8. data/lib/clusta/geometry/assortativity.rb +31 -0
  9. data/lib/clusta/geometry/degree.rb +28 -0
  10. data/lib/clusta/geometry/directed/arrow.rb +12 -0
  11. data/lib/clusta/geometry/directed/degree.rb +29 -0
  12. data/lib/clusta/geometry/directed/edge.rb +26 -0
  13. data/lib/clusta/geometry/directed/edge_degree_pair.rb +24 -0
  14. data/lib/clusta/geometry/directed/vertex_arrows.rb +25 -0
  15. data/lib/clusta/geometry/edge.rb +58 -0
  16. data/lib/clusta/geometry/edge_degree_pair.rb +21 -0
  17. data/lib/clusta/geometry/element.rb +131 -0
  18. data/lib/clusta/geometry/vertex.rb +11 -0
  19. data/lib/clusta/geometry/vertex_arrows.rb +45 -0
  20. data/lib/clusta/runner.rb +15 -0
  21. data/lib/clusta/transforms.rb +50 -0
  22. data/lib/clusta/transforms/edge_degree_pairs_to_assortativities.rb +42 -0
  23. data/lib/clusta/transforms/edges_to_degrees.rb +42 -0
  24. data/lib/clusta/transforms/edges_to_vertex_arrows.rb +50 -0
  25. data/lib/clusta/transforms/import.rb +17 -0
  26. data/lib/clusta/transforms/vertex_arrows_to_edge_degree_pairs.rb +63 -0
  27. data/spec/clusta/geometry/element_spec.rb +191 -0
  28. data/spec/clusta/transforms/edges_to_degrees_spec.rb +22 -0
  29. data/spec/clusta/transforms/edges_to_vertex_arrows_spec.rb +21 -0
  30. data/spec/data/README.rdoc +54 -0
  31. data/spec/data/degrees/directed.tsv +9 -0
  32. data/spec/data/degrees/undirected.tsv +9 -0
  33. data/spec/data/edges/directed.unweighted.tsv +10 -0
  34. data/spec/data/edges/directed.weighted.tsv +10 -0
  35. data/spec/data/edges/undirected.unweighted.tsv +9 -0
  36. data/spec/data/edges/undirected.weighted.tsv +9 -0
  37. data/spec/data/vertex_arrows/directed.unweighted.tsv +7 -0
  38. data/spec/data/vertex_arrows/directed.weighted.tsv +7 -0
  39. data/spec/data/vertex_arrows/undirected.unweighted.tsv +9 -0
  40. data/spec/data/vertex_arrows/undirected.weighted.tsv +9 -0
  41. data/spec/spec_helper.rb +21 -0
  42. data/spec/support/transforms_spec_helper.rb +120 -0
  43. metadata +123 -0
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Dhruv Bansal
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
File without changes
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/bin/clusta ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $: << File.expand_path('../lib', File.dirname(__FILE__)) unless $:.include?(File.expand_path('../lib', File.dirname(__FILE__)))
4
+
5
+ require 'clusta'
6
+
7
+ def usage
8
+ "usage: #{File.basename(__FILE__)} --transform=TRANSFORM_NAME [ARGS ...]"
9
+ end
10
+
11
+ def extract_transform_arg
12
+ transform_arg = ARGV.find_all { |arg| arg =~ Clusta::Transforms::ARG_REGEXP }.first
13
+ if transform_arg.nil?
14
+ $stderr.puts(usage)
15
+ exit(1)
16
+ end
17
+ # ARGV.delete_if { |arg| arg =~ Clusta::Transforms::ARG_REGEXP }
18
+ transform_arg
19
+ end
20
+
21
+ def add_default_run_arg
22
+ ARGV.unshift('--run=local') unless ARGV.detect { |arg| arg =~ /--run/ }
23
+ end
24
+
25
+ if $0 == __FILE__
26
+ begin
27
+ add_default_run_arg
28
+ transform = Clusta::Transforms.from_arg(extract_transform_arg)
29
+ script = Clusta::Transforms.script_for(transform)
30
+ script.run
31
+ rescue Clusta::Error => e
32
+ $stderr.puts e.message
33
+ exit(1)
34
+ end
35
+ end
data/lib/clusta.rb ADDED
@@ -0,0 +1,24 @@
1
+ require 'wukong'
2
+
3
+ module Clusta
4
+
5
+ def self.underscore string
6
+ string.gsub(/([A-Z])/, '_\1').downcase[1..-1]
7
+ end
8
+
9
+ def self.classify string
10
+ string.split('_').map(&:capitalize).join
11
+ end
12
+
13
+ def self.require_name path
14
+ File.basename(path).gsub(/\.rb$/, '')
15
+ end
16
+
17
+ Error = Class.new(StandardError)
18
+ DirectednessMismatchError = Class.new(Error)
19
+ AmbiguousArgumentsError = Class.new(Error)
20
+
21
+ autoload :Geometry, 'clusta/geometry'
22
+ autoload :Transforms, 'clusta/transforms'
23
+
24
+ end
@@ -0,0 +1,25 @@
1
+ module Clusta
2
+
3
+ module Geometry
4
+
5
+ autoload :Element, 'clusta/geometry/element'
6
+
7
+ ELEMENTS = []
8
+
9
+ def self.register_geometry name, path, geometries=nil
10
+ autoload name, path
11
+ self::ELEMENTS << name
12
+ end
13
+
14
+ Dir[File.join(File.dirname(__FILE__), "geometry/*.rb")].each do |path|
15
+ require_name = Clusta.require_name(path)
16
+ register_geometry Clusta.classify(require_name), "clusta/geometry/#{require_name}"
17
+ end
18
+
19
+ Dir[File.join(File.dirname(__FILE__), "geometry/directed/*.rb")].each do |path|
20
+ require_name = Clusta.require_name(path)
21
+ register_geometry ("Directed" + Clusta.classify(require_name)), "clusta/geometry/directed/#{require_name}"
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,16 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class Arrow < Element
5
+ field :target_label
6
+ field :weight, :optional => true
7
+
8
+ def directed?
9
+ false
10
+ end
11
+
12
+ end
13
+
14
+ end
15
+ end
16
+
@@ -0,0 +1,31 @@
1
+ module Clusta
2
+
3
+ module Geometry
4
+
5
+ class Assortativity < Element
6
+
7
+ field :source_degree_value, :type => :int
8
+ field :target_degree_value, :type => :int
9
+ field :count, :type => :int
10
+
11
+ def directed?
12
+ false
13
+ end
14
+
15
+ def key
16
+ [source_degree_value, target_degree_value]
17
+ end
18
+
19
+ def zero
20
+ self.class.new(*(key + [0]))
21
+ end
22
+
23
+ def +(other)
24
+ raise DirectednessMismatchError.new if other.directed?
25
+ self.class.new(*(key + [count + other.count]))
26
+ end
27
+
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class Degree < Element
5
+
6
+ field :vertex_label
7
+ field :degree, :type => :int
8
+
9
+ def directed?
10
+ false
11
+ end
12
+
13
+ def zero
14
+ self.class.new(vertex_label, 0)
15
+ end
16
+
17
+ def one deg=1
18
+ self.class.new(vertex_label, deg)
19
+ end
20
+
21
+ def +(other)
22
+ raise DirectednessMismatchError.new if other.directed?
23
+ self.class.new(vertex_label, self.degree + other.degree)
24
+ end
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,12 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class DirectedArrow < Arrow
5
+ def directed
6
+ true
7
+ end
8
+ end
9
+
10
+ end
11
+ end
12
+
@@ -0,0 +1,29 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class DirectedDegree < Element
5
+
6
+ field :vertex_label
7
+ field :in_degree, :type => :int
8
+ field :out_degree, :type => :int
9
+
10
+ def directed?
11
+ true
12
+ end
13
+
14
+ def zero
15
+ self.class.new(vertex_label, 0, 0)
16
+ end
17
+
18
+ def one inbound=1, outbound=0
19
+ self.class.new(vertex_label, inbound, outbound)
20
+ end
21
+
22
+ def +(other)
23
+ raise DirectednessMismatchError.new unless other.directed?
24
+ self.class.new(vertex_label, self.in_degree + other.in_degree, self.out_degree + other.out_degree)
25
+ end
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,26 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class DirectedEdge < Edge
5
+
6
+ def directed?
7
+ true
8
+ end
9
+
10
+ def source_degree
11
+ DirectedDegree.new(source_label, 0, 1)
12
+ end
13
+
14
+ def target_degree
15
+ DirectedDegree.new(target_label, 1, 0)
16
+ end
17
+
18
+ def arrow
19
+ DirectedArrow.new(target_label, weight)
20
+ end
21
+
22
+
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,24 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class DirectedEdgeDegreePair < Element
5
+ field :source_label
6
+ field :target_label
7
+ field :source_in_degree_value, :type => :int
8
+ field :source_out_degree_value, :type => :int
9
+ field :target_in_degree_value, :type => :int
10
+ field :target_out_degree_value, :type => :int
11
+ field :weight, :optional => true
12
+
13
+ def directed?
14
+ true
15
+ end
16
+
17
+ def assortativity
18
+ Assortativity.new(source_in_degree_value, target_out_degree_value, 1)
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,25 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class DirectedVertexArrows < VertexArrows
5
+
6
+ def directed?
7
+ true
8
+ end
9
+
10
+ def source_degrees
11
+ [0, arrows.size]
12
+ end
13
+
14
+ def target_degrees
15
+ [1, 0]
16
+ end
17
+
18
+ def edge_degree_pair source_label, target_label, *args
19
+ DirectedEdgeDegreePair.new(source_label, target_label, *args)
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,58 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class Edge < Element
5
+
6
+ field :source_label
7
+ field :target_label
8
+ field :weight, :optional => true
9
+
10
+ def weighted?
11
+ self.weight
12
+ end
13
+
14
+ def directed?
15
+ false
16
+ end
17
+
18
+ def joins? label
19
+ source_label == label || target_label == label
20
+ end
21
+
22
+ def labels_string
23
+ [source_label, target_label].map(&:to_s).join(' -> ')
24
+ end
25
+
26
+ def source_degree
27
+ Degree.new(source_label, 1)
28
+ end
29
+
30
+ def target_degree
31
+ Degree.new(target_label, 1)
32
+ end
33
+
34
+ def degrees
35
+ [source_degree, target_degree]
36
+ end
37
+
38
+ def degree_of label
39
+ case label
40
+ when source_label then source_degree
41
+ when target_label then target_degree
42
+ else
43
+ raise Error.new("This edge (#{labels_string}) does not contain vertex #{label}")
44
+ end
45
+ end
46
+
47
+ def reversed
48
+ self.class.new(target_label, source_label, weight)
49
+ end
50
+
51
+ def arrow
52
+ Arrow.new(target_label, weight)
53
+ end
54
+
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,21 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class EdgeDegreePair < Element
5
+ field :source_label
6
+ field :target_label
7
+ field :source_degree_value, :type => :int
8
+ field :target_degree_value, :type => :int
9
+
10
+ def directed?
11
+ false
12
+ end
13
+
14
+ def assortativity
15
+ Assortativity.new(source_degree_value, target_degree_value, 1)
16
+ end
17
+
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,131 @@
1
+ module Clusta
2
+ module Geometry
3
+
4
+ class Element
5
+
6
+ attr_accessor :input_fields
7
+
8
+ @fields = []
9
+ class << self ; attr_reader :fields ; end
10
+
11
+ def self.inherited(subclass)
12
+ subclass.instance_variable_set("@fields", @fields.dup)
13
+ super
14
+ end
15
+
16
+ def self.field_names
17
+ @fields.map { |field| field[:name].to_s }
18
+ end
19
+
20
+ def self.has_optional_field?
21
+ @fields.any? { |field| field[:optional] }
22
+ end
23
+
24
+ def self.optional_field
25
+ @fields.detect { |field| field[:optional] }
26
+ end
27
+
28
+ def self.from_string string
29
+ return string unless string.is_a?(String)
30
+ args = string.split(';')
31
+ klass_name = args.shift
32
+ raise ArgumentError.new("Elements instantiated from a string must match the format 'klass;[field1;[field2;]...]'") unless klass_name
33
+ Wukong.class_from_resource(klass_name).new(*args)
34
+ end
35
+
36
+ def self.field name, options={}
37
+ raise AmbiguousArgumentsError.new("Cannot define a second optional field #{name} because field #{optional_field[:name]} is already optional.") if has_optional_field?
38
+ attr_reader name
39
+ case options[:type]
40
+ when :int
41
+ define_method "#{name}=" do |val|
42
+ instance_variable_set("@#{name}", val.to_i)
43
+ end
44
+ when :float
45
+ define_method "#{name}=" do |val|
46
+ instance_variable_set("@#{name}", val.to_f)
47
+ end
48
+ when :geometry
49
+ define_method "#{name}=" do |val|
50
+ instance_variable_set("@#{name}", self.class.from_string(val))
51
+ end
52
+ else
53
+ define_method "#{name}=" do |val|
54
+ instance_variable_set("@#{name}", val)
55
+ end
56
+ end
57
+ @fields << options.merge(:name => name)
58
+ end
59
+
60
+ def fields
61
+ self.class.fields
62
+ end
63
+
64
+ def self.input_fields name
65
+ alias_method name, :input_fields
66
+ end
67
+
68
+ def self.stream_name
69
+ if defined?(Settings) && Settings[:full_class_names]
70
+ to_s
71
+ else
72
+ to_s.split("::").last
73
+ end
74
+ end
75
+
76
+ def stream_name
77
+ self.class.stream_name
78
+ end
79
+
80
+ def initialize *args
81
+ self.class.fields.each_with_index do |field, index|
82
+ suffix = case index.to_s
83
+ when /1$/ then 'st'
84
+ when /2$/ then 'nd'
85
+ when /3$/ then 'rd'
86
+ else 'th'
87
+ end
88
+ case
89
+ when field[:optional]
90
+ self.send("#{field[:name]}=", args[index]) if args[index]
91
+ when args[index].nil?
92
+ raise ArgumentError.new("A #{self.class} requires a non-nil value for #{field[:name]} as its #{index}#{suffix} argument.")
93
+ else
94
+ self.send("#{field[:name]}=", args[index])
95
+ end
96
+ end
97
+ self.set_input_fields(*(args[self.class.fields.size..-1] || []))
98
+ end
99
+
100
+ def set_input_fields *input_fields
101
+ self.input_fields = input_fields.map do |field|
102
+ if field =~ /^[A-Z].*;/
103
+ self.class.from_string(field)
104
+ else
105
+ field
106
+ end
107
+ end
108
+ end
109
+
110
+ def output_fields
111
+ input_fields.map(&:to_s)
112
+ end
113
+
114
+ def to_flat
115
+ [stream_name].tap do |record|
116
+ fields.each do |field|
117
+ value = send(field[:name])
118
+ record << value.to_s unless value.nil? && field[:optional]
119
+ end
120
+ end.concat(output_fields)
121
+ end
122
+
123
+ def to_s
124
+ to_flat.join(';')
125
+ end
126
+
127
+ end
128
+
129
+ end
130
+ end
131
+