hexwrench 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +12 -0
- data/Rakefile +46 -0
- data/TODO +6 -0
- data/lib/hexwrench/core/explorer.rb +9 -0
- data/lib/hexwrench/core/feature.rb +75 -0
- data/lib/hexwrench/core/resource.rb +40 -0
- data/lib/hexwrench/weka/explorer.rb +156 -0
- data/lib/hexwrench/weka/feature.rb +22 -0
- data/lib/hexwrench/weka.rb +15 -0
- data/lib/hexwrench.rb +10 -0
- metadata +91 -0
data/README
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
LICENSING warning:
|
2
|
+
by using the Weka wrapper, you agree to use Weka and HexWrench in a GNU GPL compatible way
|
3
|
+
|
4
|
+
= HexWrench
|
5
|
+
|
6
|
+
== Weka
|
7
|
+
HexWrench comes with a Weka layer, which goal is to turn Welo::Resources in
|
8
|
+
weka.core.Instances.
|
9
|
+
|
10
|
+
To use it, you need:
|
11
|
+
- JRuby
|
12
|
+
- A recent Weka JAR on your $LOAD_PATH
|
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rake/gempackagetask'
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift('lib')
|
6
|
+
require 'hexwrench'
|
7
|
+
|
8
|
+
spec = Gem::Specification.new do |s|
|
9
|
+
s.name = 'hexwrench'
|
10
|
+
s.rubyforge_project = 'hexwrench'
|
11
|
+
s.version = HexWrench::VERSION
|
12
|
+
s.author = HexWrench::AUTHORS.first
|
13
|
+
s.homepage = HexWrench::WEBSITE
|
14
|
+
s.summary = "A data miner glue layer for your Welo resources"
|
15
|
+
s.email = "crapooze@gmail.com"
|
16
|
+
s.platform = Gem::Platform::RUBY
|
17
|
+
|
18
|
+
s.files = [
|
19
|
+
'Rakefile',
|
20
|
+
'TODO',
|
21
|
+
'README',
|
22
|
+
'lib/hexwrench.rb',
|
23
|
+
'lib/hexwrench/core/explorer.rb',
|
24
|
+
'lib/hexwrench/core/feature.rb',
|
25
|
+
'lib/hexwrench/core/resource.rb',
|
26
|
+
'lib/hexwrench/weka.rb',
|
27
|
+
'lib/hexwrench/weka/explorer.rb',
|
28
|
+
'lib/hexwrench/weka/feature.rb',
|
29
|
+
]
|
30
|
+
|
31
|
+
s.require_path = 'lib'
|
32
|
+
s.bindir = 'bin'
|
33
|
+
s.executables = []
|
34
|
+
s.has_rdoc = true
|
35
|
+
|
36
|
+
s.add_dependency('welo', '>= 0.0.6')
|
37
|
+
end
|
38
|
+
|
39
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
40
|
+
pkg.need_tar = true
|
41
|
+
end
|
42
|
+
|
43
|
+
task :gem => ["pkg/#{spec.name}-#{spec.version}.gem"] do
|
44
|
+
puts "generated #{spec.version}"
|
45
|
+
end
|
46
|
+
|
data/TODO
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
|
2
|
+
module HexWrench
|
3
|
+
class Feature
|
4
|
+
def self.mapping
|
5
|
+
{:numeric => NumericFeature,
|
6
|
+
:date => DateFeature,
|
7
|
+
:nominal => NominalFeature,
|
8
|
+
:string => StringFeature,
|
9
|
+
:related => RelationFeature
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.for(sym, resource, type)
|
14
|
+
raise ArgumentError.new, "not understood type: #{type}" unless mapping.has_key?(type)
|
15
|
+
klass = mapping[type]
|
16
|
+
klass.new(sym, resource)
|
17
|
+
end
|
18
|
+
|
19
|
+
attr_accessor :sym, :resource
|
20
|
+
|
21
|
+
def initialize(sym, resource)
|
22
|
+
@resource = resource
|
23
|
+
@sym = sym
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class NumericFeature < Feature
|
28
|
+
end
|
29
|
+
|
30
|
+
class DateFeature < Feature
|
31
|
+
attr_accessor :format
|
32
|
+
def self.default_format
|
33
|
+
"%Y-%m-%d"
|
34
|
+
end
|
35
|
+
|
36
|
+
def initialize(sym, resource, format=nil)
|
37
|
+
super(sym, resource)
|
38
|
+
@format = format
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class NominalFeature < Feature
|
43
|
+
attr_accessor :allowed_labels
|
44
|
+
def initialize(sym, resource, labels = [])
|
45
|
+
super(sym, resource)
|
46
|
+
@allowed_labels = labels
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class StringFeature < Feature
|
51
|
+
end
|
52
|
+
|
53
|
+
class RelationFeature < Feature
|
54
|
+
def relationship
|
55
|
+
resource.relationship(sym)
|
56
|
+
end
|
57
|
+
|
58
|
+
def related_model_from_relationship_klass
|
59
|
+
Kernel.const_get relationship.klass
|
60
|
+
end
|
61
|
+
|
62
|
+
attr_writer :related_model
|
63
|
+
|
64
|
+
def related_model
|
65
|
+
@related_model || related_model_from_relationship_klass
|
66
|
+
end
|
67
|
+
|
68
|
+
attr_accessor :related_perspective
|
69
|
+
|
70
|
+
def related_perspective
|
71
|
+
@related_perspective || :default
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
|
2
|
+
require 'welo'
|
3
|
+
|
4
|
+
module HexWrench
|
5
|
+
module Resource
|
6
|
+
include Welo::Resource
|
7
|
+
|
8
|
+
def self.included(mod)
|
9
|
+
mod.extend Welo::Resource::ClassMethods
|
10
|
+
mod.extend ClassMethods
|
11
|
+
mod.features_hash = {}
|
12
|
+
end
|
13
|
+
|
14
|
+
module ClassMethods
|
15
|
+
attr_accessor :features_hash
|
16
|
+
def feature(name, type=nil)
|
17
|
+
if type
|
18
|
+
f = Feature.for(name, self, type)
|
19
|
+
yield f if block_given?
|
20
|
+
features_hash[name] = f
|
21
|
+
end
|
22
|
+
features_hash[name]
|
23
|
+
end
|
24
|
+
|
25
|
+
def features(persp)
|
26
|
+
perspective(persp).fields.map do |f|
|
27
|
+
feature(f)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def features(persp)
|
33
|
+
self.class.features(persp)
|
34
|
+
end
|
35
|
+
|
36
|
+
def feature(name)
|
37
|
+
self.class.feature(name)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
|
2
|
+
require 'hexwrench/core/explorer'
|
3
|
+
|
4
|
+
module HexWrench
|
5
|
+
module Weka
|
6
|
+
class Explorer < HexWrench::Explorer
|
7
|
+
class Header #represents an ARFF-like header
|
8
|
+
attr_reader :model, :perspective, :perspectives, :resources_cnt, :relation_name
|
9
|
+
def initialize(model, perspective_tree, resources_cnt=0, name="dummy")
|
10
|
+
@model = model
|
11
|
+
if perspective_tree.is_a?(Array)
|
12
|
+
@perspective = perspective_tree[0]
|
13
|
+
@perspectives = perspective_tree[1]
|
14
|
+
else
|
15
|
+
@perspective = perspective_tree
|
16
|
+
@perspectives = {}
|
17
|
+
end
|
18
|
+
@resources_cnt = resources_cnt
|
19
|
+
@relation_name = name
|
20
|
+
end
|
21
|
+
|
22
|
+
def features
|
23
|
+
model.perspective(perspective).fields.map do |sym|
|
24
|
+
model.feature(sym)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def attribute(name)
|
29
|
+
pair = attributes_pairs.find{|n, attr| n == name}
|
30
|
+
pair.last if pair
|
31
|
+
end
|
32
|
+
|
33
|
+
def attributes
|
34
|
+
attributes_pairs.map{|name, attr| attr}
|
35
|
+
end
|
36
|
+
|
37
|
+
def attributes_pairs
|
38
|
+
@attributes_pairs ||= create_attributes_pairs
|
39
|
+
end
|
40
|
+
|
41
|
+
def fast_vector
|
42
|
+
@fast_vector ||= create_fast_vector
|
43
|
+
end
|
44
|
+
|
45
|
+
def instances
|
46
|
+
@instances ||= create_instances
|
47
|
+
end
|
48
|
+
|
49
|
+
def create_attributes_pairs
|
50
|
+
features.map do |feat|
|
51
|
+
[feat.sym, attribute_for_feature(feat)]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# headers for related attributes
|
56
|
+
def headers
|
57
|
+
@headers ||= {}
|
58
|
+
end
|
59
|
+
|
60
|
+
def header(name)
|
61
|
+
headers[name]
|
62
|
+
end
|
63
|
+
|
64
|
+
def attribute_for_feature(feat)
|
65
|
+
name = File.join(relation_name, feat.sym.to_s)
|
66
|
+
case feat
|
67
|
+
when NumericFeature
|
68
|
+
Weka::Attribute.new(name)
|
69
|
+
when DateFeature
|
70
|
+
Weka::Attribute.new(name, feat.weka_format)
|
71
|
+
when NominalFeature
|
72
|
+
Weka::Attribute.new(name, feat.labels_fv)
|
73
|
+
when StringFeature
|
74
|
+
construct = Weka::Attribute.java_class.constructor(java.lang.String,
|
75
|
+
Weka::FastVector)
|
76
|
+
construct.new_instance(name, nil).to_java
|
77
|
+
when RelationFeature
|
78
|
+
related_model = feat.related_model
|
79
|
+
related_persp = perspectives[feat.sym] || :default
|
80
|
+
cnt = 0 #XXX could be taken more cleverly if support in Welo's relationship
|
81
|
+
header = Header.new(related_model, related_persp, cnt, name)
|
82
|
+
headers[feat.sym] = header
|
83
|
+
construct = Weka::Attribute.java_class.constructor(java.lang.String,
|
84
|
+
Weka::Instances)
|
85
|
+
construct.new_instance(name, header.instances).to_java
|
86
|
+
else
|
87
|
+
raise ArgumentError, "don't know how to handler #{feat} to make an attribute"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def create_fast_vector
|
92
|
+
fv = Weka::FastVector.new
|
93
|
+
attributes.each do |attribute|
|
94
|
+
fv.add_element(attribute)
|
95
|
+
end
|
96
|
+
fv
|
97
|
+
end
|
98
|
+
|
99
|
+
def create_instances
|
100
|
+
inst = Weka::Instances.new(relation_name, fast_vector, resources_cnt)
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_resource(resource)
|
104
|
+
values = features.map do |feat|
|
105
|
+
rb_val = resource.send(feat.sym)
|
106
|
+
attribute = attribute(feat.sym)
|
107
|
+
val = case feat
|
108
|
+
when DateFeature
|
109
|
+
date_str = if rb_val.respond_to?(:strftime)
|
110
|
+
fmt = feat.format || DateFeature.default_format
|
111
|
+
rb_val.send(:strftime, fmt)
|
112
|
+
else
|
113
|
+
rb_val
|
114
|
+
end
|
115
|
+
attribute.parseDate(date_str)
|
116
|
+
when NominalFeature
|
117
|
+
attribute.indexOfValue(rb_val.to_s.to_java)
|
118
|
+
when StringFeature
|
119
|
+
attribute.addStringValue(rb_val.to_java)
|
120
|
+
when RelationFeature
|
121
|
+
header = header(feat.sym)
|
122
|
+
raise NotImplementedError, "no header built for #{feat} yet" unless header
|
123
|
+
if feat.relationship.many?
|
124
|
+
rb_val.each do |rb_val_|
|
125
|
+
header.add_resource(rb_val_)
|
126
|
+
end
|
127
|
+
else
|
128
|
+
header.add_resource(rb_val)
|
129
|
+
end
|
130
|
+
attribute.addRelation(header.instances)
|
131
|
+
else
|
132
|
+
rb_val
|
133
|
+
end
|
134
|
+
val
|
135
|
+
end
|
136
|
+
instances.add(Weka::Instance.new(1.0, values.to_java(Java::double)))
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
attr_reader :headers
|
141
|
+
|
142
|
+
def initialize(model)
|
143
|
+
super(model)
|
144
|
+
@headers = {}
|
145
|
+
end
|
146
|
+
|
147
|
+
def header(persp)
|
148
|
+
@headers[persp] ||= create_header(persp)
|
149
|
+
end
|
150
|
+
|
151
|
+
def create_header(persp)
|
152
|
+
Header.new(model, persp)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
require 'hexwrench/core/feature'
|
3
|
+
|
4
|
+
module HexWrench
|
5
|
+
class DateFeature < Feature
|
6
|
+
attr_writer :weka_format
|
7
|
+
DEFAULT_WEKA_FORMAT = "yyyy-MM-dd"
|
8
|
+
def weka_format
|
9
|
+
@weka_format || format || DEFAULT_WEKA_FORMAT
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class NominalFeature < Feature
|
14
|
+
def labels_fv
|
15
|
+
unless @labels_fv
|
16
|
+
@labels_fv = Weka::FastVector.new
|
17
|
+
allowed_labels.each{|l| @labels_fv.add_element(l.to_s)}
|
18
|
+
end
|
19
|
+
@labels_fv
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
require 'java'
|
3
|
+
require 'weka'
|
4
|
+
|
5
|
+
require 'hexwrench/weka/feature'
|
6
|
+
|
7
|
+
module HexWrench
|
8
|
+
module Weka
|
9
|
+
autoload :Explorer, 'hexwrench/weka/explorer'
|
10
|
+
include_class 'weka.core.Attribute'
|
11
|
+
include_class 'weka.core.FastVector'
|
12
|
+
include_class 'weka.core.Instances'
|
13
|
+
include_class 'weka.core.Instance'
|
14
|
+
end
|
15
|
+
end
|
data/lib/hexwrench.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
|
2
|
+
module HexWrench
|
3
|
+
VERSION = "0.0.1"
|
4
|
+
AUTHORS = ["crapooze"]
|
5
|
+
WEBSITE = "https://github.com/crapooze/hexwrench"
|
6
|
+
LICENCE = "MIT | GPL"
|
7
|
+
autoload :Resource, 'hexwrench/core/resource'
|
8
|
+
autoload :Feature, 'hexwrench/core/feature'
|
9
|
+
autoload :Explorer, 'hexwrench/core/explorer'
|
10
|
+
end
|
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hexwrench
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- crapooze
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-02-23 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: welo
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 19
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
- 0
|
33
|
+
- 6
|
34
|
+
version: 0.0.6
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
description:
|
38
|
+
email: crapooze@gmail.com
|
39
|
+
executables: []
|
40
|
+
|
41
|
+
extensions: []
|
42
|
+
|
43
|
+
extra_rdoc_files: []
|
44
|
+
|
45
|
+
files:
|
46
|
+
- Rakefile
|
47
|
+
- TODO
|
48
|
+
- README
|
49
|
+
- lib/hexwrench.rb
|
50
|
+
- lib/hexwrench/core/explorer.rb
|
51
|
+
- lib/hexwrench/core/feature.rb
|
52
|
+
- lib/hexwrench/core/resource.rb
|
53
|
+
- lib/hexwrench/weka.rb
|
54
|
+
- lib/hexwrench/weka/explorer.rb
|
55
|
+
- lib/hexwrench/weka/feature.rb
|
56
|
+
has_rdoc: true
|
57
|
+
homepage: https://github.com/crapooze/hexwrench
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
hash: 3
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
83
|
+
requirements: []
|
84
|
+
|
85
|
+
rubyforge_project: hexwrench
|
86
|
+
rubygems_version: 1.3.7
|
87
|
+
signing_key:
|
88
|
+
specification_version: 3
|
89
|
+
summary: A data miner glue layer for your Welo resources
|
90
|
+
test_files: []
|
91
|
+
|