hexwrench 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,12 @@
1
+ LICENSING warning:
2
+ by using the Weka wrapper, you agree to use Weka and HexWrench in a GNU GPL compatible way
3
+
4
+ = HexWrench
5
+
6
+ == Weka
7
+ HexWrench comes with a Weka layer, which goal is to turn Welo::Resources in
8
+ weka.core.Instances.
9
+
10
+ To use it, you need:
11
+ - JRuby
12
+ - A recent Weka JAR on your $LOAD_PATH
data/Rakefile ADDED
@@ -0,0 +1,46 @@
1
+
2
+ require 'rubygems'
3
+ require 'rake/gempackagetask'
4
+
5
+ $LOAD_PATH.unshift('lib')
6
+ require 'hexwrench'
7
+
8
+ spec = Gem::Specification.new do |s|
9
+ s.name = 'hexwrench'
10
+ s.rubyforge_project = 'hexwrench'
11
+ s.version = HexWrench::VERSION
12
+ s.author = HexWrench::AUTHORS.first
13
+ s.homepage = HexWrench::WEBSITE
14
+ s.summary = "A data miner glue layer for your Welo resources"
15
+ s.email = "crapooze@gmail.com"
16
+ s.platform = Gem::Platform::RUBY
17
+
18
+ s.files = [
19
+ 'Rakefile',
20
+ 'TODO',
21
+ 'README',
22
+ 'lib/hexwrench.rb',
23
+ 'lib/hexwrench/core/explorer.rb',
24
+ 'lib/hexwrench/core/feature.rb',
25
+ 'lib/hexwrench/core/resource.rb',
26
+ 'lib/hexwrench/weka.rb',
27
+ 'lib/hexwrench/weka/explorer.rb',
28
+ 'lib/hexwrench/weka/feature.rb',
29
+ ]
30
+
31
+ s.require_path = 'lib'
32
+ s.bindir = 'bin'
33
+ s.executables = []
34
+ s.has_rdoc = true
35
+
36
+ s.add_dependency('welo', '>= 0.0.6')
37
+ end
38
+
39
+ Rake::GemPackageTask.new(spec) do |pkg|
40
+ pkg.need_tar = true
41
+ end
42
+
43
+ task :gem => ["pkg/#{spec.name}-#{spec.version}.gem"] do
44
+ puts "generated #{spec.version}"
45
+ end
46
+
data/TODO ADDED
@@ -0,0 +1,6 @@
1
+ * related feature:
2
+ * explorer:
3
+ - rename
4
+ * weka-explorer:
5
+ - populate attributes name on the fly
6
+ - prepare sizes of relationships?
@@ -0,0 +1,9 @@
1
+
2
+ module HexWrench
3
+ class Explorer
4
+ attr_reader :model
5
+ def initialize(model)
6
+ @model = model
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,75 @@
1
+
2
+ module HexWrench
3
+ class Feature
4
+ def self.mapping
5
+ {:numeric => NumericFeature,
6
+ :date => DateFeature,
7
+ :nominal => NominalFeature,
8
+ :string => StringFeature,
9
+ :related => RelationFeature
10
+ }
11
+ end
12
+
13
+ def self.for(sym, resource, type)
14
+ raise ArgumentError.new, "not understood type: #{type}" unless mapping.has_key?(type)
15
+ klass = mapping[type]
16
+ klass.new(sym, resource)
17
+ end
18
+
19
+ attr_accessor :sym, :resource
20
+
21
+ def initialize(sym, resource)
22
+ @resource = resource
23
+ @sym = sym
24
+ end
25
+ end
26
+
27
+ class NumericFeature < Feature
28
+ end
29
+
30
+ class DateFeature < Feature
31
+ attr_accessor :format
32
+ def self.default_format
33
+ "%Y-%m-%d"
34
+ end
35
+
36
+ def initialize(sym, resource, format=nil)
37
+ super(sym, resource)
38
+ @format = format
39
+ end
40
+ end
41
+
42
+ class NominalFeature < Feature
43
+ attr_accessor :allowed_labels
44
+ def initialize(sym, resource, labels = [])
45
+ super(sym, resource)
46
+ @allowed_labels = labels
47
+ end
48
+ end
49
+
50
+ class StringFeature < Feature
51
+ end
52
+
53
+ class RelationFeature < Feature
54
+ def relationship
55
+ resource.relationship(sym)
56
+ end
57
+
58
+ def related_model_from_relationship_klass
59
+ Kernel.const_get relationship.klass
60
+ end
61
+
62
+ attr_writer :related_model
63
+
64
+ def related_model
65
+ @related_model || related_model_from_relationship_klass
66
+ end
67
+
68
+ attr_accessor :related_perspective
69
+
70
+ def related_perspective
71
+ @related_perspective || :default
72
+ end
73
+ end
74
+
75
+ end
@@ -0,0 +1,40 @@
1
+
2
+ require 'welo'
3
+
4
+ module HexWrench
5
+ module Resource
6
+ include Welo::Resource
7
+
8
+ def self.included(mod)
9
+ mod.extend Welo::Resource::ClassMethods
10
+ mod.extend ClassMethods
11
+ mod.features_hash = {}
12
+ end
13
+
14
+ module ClassMethods
15
+ attr_accessor :features_hash
16
+ def feature(name, type=nil)
17
+ if type
18
+ f = Feature.for(name, self, type)
19
+ yield f if block_given?
20
+ features_hash[name] = f
21
+ end
22
+ features_hash[name]
23
+ end
24
+
25
+ def features(persp)
26
+ perspective(persp).fields.map do |f|
27
+ feature(f)
28
+ end
29
+ end
30
+ end
31
+
32
+ def features(persp)
33
+ self.class.features(persp)
34
+ end
35
+
36
+ def feature(name)
37
+ self.class.feature(name)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,156 @@
1
+
2
+ require 'hexwrench/core/explorer'
3
+
4
+ module HexWrench
5
+ module Weka
6
+ class Explorer < HexWrench::Explorer
7
+ class Header #represents an ARFF-like header
8
+ attr_reader :model, :perspective, :perspectives, :resources_cnt, :relation_name
9
+ def initialize(model, perspective_tree, resources_cnt=0, name="dummy")
10
+ @model = model
11
+ if perspective_tree.is_a?(Array)
12
+ @perspective = perspective_tree[0]
13
+ @perspectives = perspective_tree[1]
14
+ else
15
+ @perspective = perspective_tree
16
+ @perspectives = {}
17
+ end
18
+ @resources_cnt = resources_cnt
19
+ @relation_name = name
20
+ end
21
+
22
+ def features
23
+ model.perspective(perspective).fields.map do |sym|
24
+ model.feature(sym)
25
+ end
26
+ end
27
+
28
+ def attribute(name)
29
+ pair = attributes_pairs.find{|n, attr| n == name}
30
+ pair.last if pair
31
+ end
32
+
33
+ def attributes
34
+ attributes_pairs.map{|name, attr| attr}
35
+ end
36
+
37
+ def attributes_pairs
38
+ @attributes_pairs ||= create_attributes_pairs
39
+ end
40
+
41
+ def fast_vector
42
+ @fast_vector ||= create_fast_vector
43
+ end
44
+
45
+ def instances
46
+ @instances ||= create_instances
47
+ end
48
+
49
+ def create_attributes_pairs
50
+ features.map do |feat|
51
+ [feat.sym, attribute_for_feature(feat)]
52
+ end
53
+ end
54
+
55
+ # headers for related attributes
56
+ def headers
57
+ @headers ||= {}
58
+ end
59
+
60
+ def header(name)
61
+ headers[name]
62
+ end
63
+
64
+ def attribute_for_feature(feat)
65
+ name = File.join(relation_name, feat.sym.to_s)
66
+ case feat
67
+ when NumericFeature
68
+ Weka::Attribute.new(name)
69
+ when DateFeature
70
+ Weka::Attribute.new(name, feat.weka_format)
71
+ when NominalFeature
72
+ Weka::Attribute.new(name, feat.labels_fv)
73
+ when StringFeature
74
+ construct = Weka::Attribute.java_class.constructor(java.lang.String,
75
+ Weka::FastVector)
76
+ construct.new_instance(name, nil).to_java
77
+ when RelationFeature
78
+ related_model = feat.related_model
79
+ related_persp = perspectives[feat.sym] || :default
80
+ cnt = 0 #XXX could be taken more cleverly if support in Welo's relationship
81
+ header = Header.new(related_model, related_persp, cnt, name)
82
+ headers[feat.sym] = header
83
+ construct = Weka::Attribute.java_class.constructor(java.lang.String,
84
+ Weka::Instances)
85
+ construct.new_instance(name, header.instances).to_java
86
+ else
87
+ raise ArgumentError, "don't know how to handler #{feat} to make an attribute"
88
+ end
89
+ end
90
+
91
+ def create_fast_vector
92
+ fv = Weka::FastVector.new
93
+ attributes.each do |attribute|
94
+ fv.add_element(attribute)
95
+ end
96
+ fv
97
+ end
98
+
99
+ def create_instances
100
+ inst = Weka::Instances.new(relation_name, fast_vector, resources_cnt)
101
+ end
102
+
103
+ def add_resource(resource)
104
+ values = features.map do |feat|
105
+ rb_val = resource.send(feat.sym)
106
+ attribute = attribute(feat.sym)
107
+ val = case feat
108
+ when DateFeature
109
+ date_str = if rb_val.respond_to?(:strftime)
110
+ fmt = feat.format || DateFeature.default_format
111
+ rb_val.send(:strftime, fmt)
112
+ else
113
+ rb_val
114
+ end
115
+ attribute.parseDate(date_str)
116
+ when NominalFeature
117
+ attribute.indexOfValue(rb_val.to_s.to_java)
118
+ when StringFeature
119
+ attribute.addStringValue(rb_val.to_java)
120
+ when RelationFeature
121
+ header = header(feat.sym)
122
+ raise NotImplementedError, "no header built for #{feat} yet" unless header
123
+ if feat.relationship.many?
124
+ rb_val.each do |rb_val_|
125
+ header.add_resource(rb_val_)
126
+ end
127
+ else
128
+ header.add_resource(rb_val)
129
+ end
130
+ attribute.addRelation(header.instances)
131
+ else
132
+ rb_val
133
+ end
134
+ val
135
+ end
136
+ instances.add(Weka::Instance.new(1.0, values.to_java(Java::double)))
137
+ end
138
+ end
139
+
140
+ attr_reader :headers
141
+
142
+ def initialize(model)
143
+ super(model)
144
+ @headers = {}
145
+ end
146
+
147
+ def header(persp)
148
+ @headers[persp] ||= create_header(persp)
149
+ end
150
+
151
+ def create_header(persp)
152
+ Header.new(model, persp)
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,22 @@
1
+
2
+ require 'hexwrench/core/feature'
3
+
4
+ module HexWrench
5
+ class DateFeature < Feature
6
+ attr_writer :weka_format
7
+ DEFAULT_WEKA_FORMAT = "yyyy-MM-dd"
8
+ def weka_format
9
+ @weka_format || format || DEFAULT_WEKA_FORMAT
10
+ end
11
+ end
12
+
13
+ class NominalFeature < Feature
14
+ def labels_fv
15
+ unless @labels_fv
16
+ @labels_fv = Weka::FastVector.new
17
+ allowed_labels.each{|l| @labels_fv.add_element(l.to_s)}
18
+ end
19
+ @labels_fv
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+
2
+ require 'java'
3
+ require 'weka'
4
+
5
+ require 'hexwrench/weka/feature'
6
+
7
+ module HexWrench
8
+ module Weka
9
+ autoload :Explorer, 'hexwrench/weka/explorer'
10
+ include_class 'weka.core.Attribute'
11
+ include_class 'weka.core.FastVector'
12
+ include_class 'weka.core.Instances'
13
+ include_class 'weka.core.Instance'
14
+ end
15
+ end
data/lib/hexwrench.rb ADDED
@@ -0,0 +1,10 @@
1
+
2
+ module HexWrench
3
+ VERSION = "0.0.1"
4
+ AUTHORS = ["crapooze"]
5
+ WEBSITE = "https://github.com/crapooze/hexwrench"
6
+ LICENCE = "MIT | GPL"
7
+ autoload :Resource, 'hexwrench/core/resource'
8
+ autoload :Feature, 'hexwrench/core/feature'
9
+ autoload :Explorer, 'hexwrench/core/explorer'
10
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hexwrench
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - crapooze
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-23 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: welo
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 19
30
+ segments:
31
+ - 0
32
+ - 0
33
+ - 6
34
+ version: 0.0.6
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ description:
38
+ email: crapooze@gmail.com
39
+ executables: []
40
+
41
+ extensions: []
42
+
43
+ extra_rdoc_files: []
44
+
45
+ files:
46
+ - Rakefile
47
+ - TODO
48
+ - README
49
+ - lib/hexwrench.rb
50
+ - lib/hexwrench/core/explorer.rb
51
+ - lib/hexwrench/core/feature.rb
52
+ - lib/hexwrench/core/resource.rb
53
+ - lib/hexwrench/weka.rb
54
+ - lib/hexwrench/weka/explorer.rb
55
+ - lib/hexwrench/weka/feature.rb
56
+ has_rdoc: true
57
+ homepage: https://github.com/crapooze/hexwrench
58
+ licenses: []
59
+
60
+ post_install_message:
61
+ rdoc_options: []
62
+
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ hash: 3
80
+ segments:
81
+ - 0
82
+ version: "0"
83
+ requirements: []
84
+
85
+ rubyforge_project: hexwrench
86
+ rubygems_version: 1.3.7
87
+ signing_key:
88
+ specification_version: 3
89
+ summary: A data miner glue layer for your Welo resources
90
+ test_files: []
91
+