hexwrench 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,12 @@
1
+ LICENSING warning:
2
+ by using the Weka wrapper, you agree to use Weka and HexWrench in a GNU GPL compatible way
3
+
4
+ = HexWrench
5
+
6
+ == Weka
7
+ HexWrench comes with a Weka layer, which goal is to turn Welo::Resources in
8
+ weka.core.Instances.
9
+
10
+ To use it, you need:
11
+ - JRuby
12
+ - A recent Weka JAR on your $LOAD_PATH
data/Rakefile ADDED
@@ -0,0 +1,46 @@
1
+
2
+ require 'rubygems'
3
+ require 'rake/gempackagetask'
4
+
5
+ $LOAD_PATH.unshift('lib')
6
+ require 'hexwrench'
7
+
8
+ spec = Gem::Specification.new do |s|
9
+ s.name = 'hexwrench'
10
+ s.rubyforge_project = 'hexwrench'
11
+ s.version = HexWrench::VERSION
12
+ s.author = HexWrench::AUTHORS.first
13
+ s.homepage = HexWrench::WEBSITE
14
+ s.summary = "A data miner glue layer for your Welo resources"
15
+ s.email = "crapooze@gmail.com"
16
+ s.platform = Gem::Platform::RUBY
17
+
18
+ s.files = [
19
+ 'Rakefile',
20
+ 'TODO',
21
+ 'README',
22
+ 'lib/hexwrench.rb',
23
+ 'lib/hexwrench/core/explorer.rb',
24
+ 'lib/hexwrench/core/feature.rb',
25
+ 'lib/hexwrench/core/resource.rb',
26
+ 'lib/hexwrench/weka.rb',
27
+ 'lib/hexwrench/weka/explorer.rb',
28
+ 'lib/hexwrench/weka/feature.rb',
29
+ ]
30
+
31
+ s.require_path = 'lib'
32
+ s.bindir = 'bin'
33
+ s.executables = []
34
+ s.has_rdoc = true
35
+
36
+ s.add_dependency('welo', '>= 0.0.6')
37
+ end
38
+
39
+ Rake::GemPackageTask.new(spec) do |pkg|
40
+ pkg.need_tar = true
41
+ end
42
+
43
+ task :gem => ["pkg/#{spec.name}-#{spec.version}.gem"] do
44
+ puts "generated #{spec.version}"
45
+ end
46
+
data/TODO ADDED
@@ -0,0 +1,6 @@
1
+ * related feature:
2
+ * explorer:
3
+ - rename
4
+ * weka-explorer:
5
+ - populate attributes name on the fly
6
+ - prepare sizes of relationships?
@@ -0,0 +1,9 @@
1
+
2
+ module HexWrench
3
+ class Explorer
4
+ attr_reader :model
5
+ def initialize(model)
6
+ @model = model
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,75 @@
1
+
2
+ module HexWrench
3
+ class Feature
4
+ def self.mapping
5
+ {:numeric => NumericFeature,
6
+ :date => DateFeature,
7
+ :nominal => NominalFeature,
8
+ :string => StringFeature,
9
+ :related => RelationFeature
10
+ }
11
+ end
12
+
13
+ def self.for(sym, resource, type)
14
+ raise ArgumentError.new, "not understood type: #{type}" unless mapping.has_key?(type)
15
+ klass = mapping[type]
16
+ klass.new(sym, resource)
17
+ end
18
+
19
+ attr_accessor :sym, :resource
20
+
21
+ def initialize(sym, resource)
22
+ @resource = resource
23
+ @sym = sym
24
+ end
25
+ end
26
+
27
+ class NumericFeature < Feature
28
+ end
29
+
30
+ class DateFeature < Feature
31
+ attr_accessor :format
32
+ def self.default_format
33
+ "%Y-%m-%d"
34
+ end
35
+
36
+ def initialize(sym, resource, format=nil)
37
+ super(sym, resource)
38
+ @format = format
39
+ end
40
+ end
41
+
42
+ class NominalFeature < Feature
43
+ attr_accessor :allowed_labels
44
+ def initialize(sym, resource, labels = [])
45
+ super(sym, resource)
46
+ @allowed_labels = labels
47
+ end
48
+ end
49
+
50
+ class StringFeature < Feature
51
+ end
52
+
53
+ class RelationFeature < Feature
54
+ def relationship
55
+ resource.relationship(sym)
56
+ end
57
+
58
+ def related_model_from_relationship_klass
59
+ Kernel.const_get relationship.klass
60
+ end
61
+
62
+ attr_writer :related_model
63
+
64
+ def related_model
65
+ @related_model || related_model_from_relationship_klass
66
+ end
67
+
68
+ attr_accessor :related_perspective
69
+
70
+ def related_perspective
71
+ @related_perspective || :default
72
+ end
73
+ end
74
+
75
+ end
@@ -0,0 +1,40 @@
1
+
2
+ require 'welo'
3
+
4
+ module HexWrench
5
+ module Resource
6
+ include Welo::Resource
7
+
8
+ def self.included(mod)
9
+ mod.extend Welo::Resource::ClassMethods
10
+ mod.extend ClassMethods
11
+ mod.features_hash = {}
12
+ end
13
+
14
+ module ClassMethods
15
+ attr_accessor :features_hash
16
+ def feature(name, type=nil)
17
+ if type
18
+ f = Feature.for(name, self, type)
19
+ yield f if block_given?
20
+ features_hash[name] = f
21
+ end
22
+ features_hash[name]
23
+ end
24
+
25
+ def features(persp)
26
+ perspective(persp).fields.map do |f|
27
+ feature(f)
28
+ end
29
+ end
30
+ end
31
+
32
+ def features(persp)
33
+ self.class.features(persp)
34
+ end
35
+
36
+ def feature(name)
37
+ self.class.feature(name)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,156 @@
1
+
2
+ require 'hexwrench/core/explorer'
3
+
4
+ module HexWrench
5
+ module Weka
6
+ class Explorer < HexWrench::Explorer
7
+ class Header #represents an ARFF-like header
8
+ attr_reader :model, :perspective, :perspectives, :resources_cnt, :relation_name
9
+ def initialize(model, perspective_tree, resources_cnt=0, name="dummy")
10
+ @model = model
11
+ if perspective_tree.is_a?(Array)
12
+ @perspective = perspective_tree[0]
13
+ @perspectives = perspective_tree[1]
14
+ else
15
+ @perspective = perspective_tree
16
+ @perspectives = {}
17
+ end
18
+ @resources_cnt = resources_cnt
19
+ @relation_name = name
20
+ end
21
+
22
+ def features
23
+ model.perspective(perspective).fields.map do |sym|
24
+ model.feature(sym)
25
+ end
26
+ end
27
+
28
+ def attribute(name)
29
+ pair = attributes_pairs.find{|n, attr| n == name}
30
+ pair.last if pair
31
+ end
32
+
33
+ def attributes
34
+ attributes_pairs.map{|name, attr| attr}
35
+ end
36
+
37
+ def attributes_pairs
38
+ @attributes_pairs ||= create_attributes_pairs
39
+ end
40
+
41
+ def fast_vector
42
+ @fast_vector ||= create_fast_vector
43
+ end
44
+
45
+ def instances
46
+ @instances ||= create_instances
47
+ end
48
+
49
+ def create_attributes_pairs
50
+ features.map do |feat|
51
+ [feat.sym, attribute_for_feature(feat)]
52
+ end
53
+ end
54
+
55
+ # headers for related attributes
56
+ def headers
57
+ @headers ||= {}
58
+ end
59
+
60
+ def header(name)
61
+ headers[name]
62
+ end
63
+
64
+ def attribute_for_feature(feat)
65
+ name = File.join(relation_name, feat.sym.to_s)
66
+ case feat
67
+ when NumericFeature
68
+ Weka::Attribute.new(name)
69
+ when DateFeature
70
+ Weka::Attribute.new(name, feat.weka_format)
71
+ when NominalFeature
72
+ Weka::Attribute.new(name, feat.labels_fv)
73
+ when StringFeature
74
+ construct = Weka::Attribute.java_class.constructor(java.lang.String,
75
+ Weka::FastVector)
76
+ construct.new_instance(name, nil).to_java
77
+ when RelationFeature
78
+ related_model = feat.related_model
79
+ related_persp = perspectives[feat.sym] || :default
80
+ cnt = 0 #XXX could be taken more cleverly if support in Welo's relationship
81
+ header = Header.new(related_model, related_persp, cnt, name)
82
+ headers[feat.sym] = header
83
+ construct = Weka::Attribute.java_class.constructor(java.lang.String,
84
+ Weka::Instances)
85
+ construct.new_instance(name, header.instances).to_java
86
+ else
87
+ raise ArgumentError, "don't know how to handler #{feat} to make an attribute"
88
+ end
89
+ end
90
+
91
+ def create_fast_vector
92
+ fv = Weka::FastVector.new
93
+ attributes.each do |attribute|
94
+ fv.add_element(attribute)
95
+ end
96
+ fv
97
+ end
98
+
99
+ def create_instances
100
+ inst = Weka::Instances.new(relation_name, fast_vector, resources_cnt)
101
+ end
102
+
103
+ def add_resource(resource)
104
+ values = features.map do |feat|
105
+ rb_val = resource.send(feat.sym)
106
+ attribute = attribute(feat.sym)
107
+ val = case feat
108
+ when DateFeature
109
+ date_str = if rb_val.respond_to?(:strftime)
110
+ fmt = feat.format || DateFeature.default_format
111
+ rb_val.send(:strftime, fmt)
112
+ else
113
+ rb_val
114
+ end
115
+ attribute.parseDate(date_str)
116
+ when NominalFeature
117
+ attribute.indexOfValue(rb_val.to_s.to_java)
118
+ when StringFeature
119
+ attribute.addStringValue(rb_val.to_java)
120
+ when RelationFeature
121
+ header = header(feat.sym)
122
+ raise NotImplementedError, "no header built for #{feat} yet" unless header
123
+ if feat.relationship.many?
124
+ rb_val.each do |rb_val_|
125
+ header.add_resource(rb_val_)
126
+ end
127
+ else
128
+ header.add_resource(rb_val)
129
+ end
130
+ attribute.addRelation(header.instances)
131
+ else
132
+ rb_val
133
+ end
134
+ val
135
+ end
136
+ instances.add(Weka::Instance.new(1.0, values.to_java(Java::double)))
137
+ end
138
+ end
139
+
140
+ attr_reader :headers
141
+
142
+ def initialize(model)
143
+ super(model)
144
+ @headers = {}
145
+ end
146
+
147
+ def header(persp)
148
+ @headers[persp] ||= create_header(persp)
149
+ end
150
+
151
+ def create_header(persp)
152
+ Header.new(model, persp)
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,22 @@
1
+
2
+ require 'hexwrench/core/feature'
3
+
4
+ module HexWrench
5
+ class DateFeature < Feature
6
+ attr_writer :weka_format
7
+ DEFAULT_WEKA_FORMAT = "yyyy-MM-dd"
8
+ def weka_format
9
+ @weka_format || format || DEFAULT_WEKA_FORMAT
10
+ end
11
+ end
12
+
13
+ class NominalFeature < Feature
14
+ def labels_fv
15
+ unless @labels_fv
16
+ @labels_fv = Weka::FastVector.new
17
+ allowed_labels.each{|l| @labels_fv.add_element(l.to_s)}
18
+ end
19
+ @labels_fv
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+
2
+ require 'java'
3
+ require 'weka'
4
+
5
+ require 'hexwrench/weka/feature'
6
+
7
+ module HexWrench
8
+ module Weka
9
+ autoload :Explorer, 'hexwrench/weka/explorer'
10
+ include_class 'weka.core.Attribute'
11
+ include_class 'weka.core.FastVector'
12
+ include_class 'weka.core.Instances'
13
+ include_class 'weka.core.Instance'
14
+ end
15
+ end
data/lib/hexwrench.rb ADDED
@@ -0,0 +1,10 @@
1
+
2
+ module HexWrench
3
+ VERSION = "0.0.1"
4
+ AUTHORS = ["crapooze"]
5
+ WEBSITE = "https://github.com/crapooze/hexwrench"
6
+ LICENCE = "MIT | GPL"
7
+ autoload :Resource, 'hexwrench/core/resource'
8
+ autoload :Feature, 'hexwrench/core/feature'
9
+ autoload :Explorer, 'hexwrench/core/explorer'
10
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hexwrench
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - crapooze
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-23 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: welo
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 19
30
+ segments:
31
+ - 0
32
+ - 0
33
+ - 6
34
+ version: 0.0.6
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ description:
38
+ email: crapooze@gmail.com
39
+ executables: []
40
+
41
+ extensions: []
42
+
43
+ extra_rdoc_files: []
44
+
45
+ files:
46
+ - Rakefile
47
+ - TODO
48
+ - README
49
+ - lib/hexwrench.rb
50
+ - lib/hexwrench/core/explorer.rb
51
+ - lib/hexwrench/core/feature.rb
52
+ - lib/hexwrench/core/resource.rb
53
+ - lib/hexwrench/weka.rb
54
+ - lib/hexwrench/weka/explorer.rb
55
+ - lib/hexwrench/weka/feature.rb
56
+ has_rdoc: true
57
+ homepage: https://github.com/crapooze/hexwrench
58
+ licenses: []
59
+
60
+ post_install_message:
61
+ rdoc_options: []
62
+
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ hash: 3
80
+ segments:
81
+ - 0
82
+ version: "0"
83
+ requirements: []
84
+
85
+ rubyforge_project: hexwrench
86
+ rubygems_version: 1.3.7
87
+ signing_key:
88
+ specification_version: 3
89
+ summary: A data miner glue layer for your Welo resources
90
+ test_files: []
91
+