svm_helper 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/.versions.conf +4 -0
- data/.yardopts +3 -0
- data/Gemfile +24 -0
- data/Guardfile +17 -0
- data/LICENSE.txt +22 -0
- data/README.md +41 -0
- data/Rakefile +7 -0
- data/lib/svm_helper.rb +8 -0
- data/lib/svm_helper/feature_vector.rb +17 -0
- data/lib/svm_helper/interface_helper.rb +57 -0
- data/lib/svm_helper/preprocessed_data.rb +17 -0
- data/lib/svm_helper/preprocessors.rb +2 -0
- data/lib/svm_helper/preprocessors/simple.rb +111 -0
- data/lib/svm_helper/preprocessors/with_industry_map.rb +40 -0
- data/lib/svm_helper/selectors.rb +3 -0
- data/lib/svm_helper/selectors/n_gram.rb +31 -0
- data/lib/svm_helper/selectors/simple.rb +163 -0
- data/lib/svm_helper/selectors/with_binary_encoding.rb +42 -0
- data/lib/svm_helper/stopwords/de +127 -0
- data/lib/svm_helper/stopwords/en +119 -0
- data/lib/svm_helper/version.rb +3 -0
- data/spec/factories.rb +35 -0
- data/spec/factories/jobs/tmp.html +42 -0
- data/spec/factories/jobs/tmp2.html +20 -0
- data/spec/factories/jobs/tmp3.html +34 -0
- data/spec/factories/jobs_with_description.rb +20 -0
- data/spec/factories/jobs_with_title.rb +72 -0
- data/spec/preprocessors/simple_spec.rb +138 -0
- data/spec/preprocessors/with_industry_map_spec.rb +16 -0
- data/spec/selectors/n_gram_spec.rb +21 -0
- data/spec/selectors/simple_spec.rb +121 -0
- data/spec/selectors/with_binary_encoding_spec.rb +39 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/support/preprocessor_spec.rb +21 -0
- data/spec/support/selector_spec.rb +21 -0
- data/svm_helper.gemspec +21 -0
- metadata +112 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Selector::WithBinaryEncoding do
|
4
|
+
it_behaves_like 'a selector'
|
5
|
+
let(:simple) { Selector::WithBinaryEncoding.new }
|
6
|
+
|
7
|
+
let(:dictionary) { %w(auto pferd haus hase garten) }
|
8
|
+
let(:data) { FactoryGirl.build(:data) }
|
9
|
+
let(:vector) { simple.generate_vector(data).tap{|e| e.career_level! } }
|
10
|
+
|
11
|
+
before(:each) do
|
12
|
+
simple.stubs(:global_dictionary).returns(dictionary)
|
13
|
+
end
|
14
|
+
it "should build a feature vector for each dataset with the size of the dictionary plus classifications" do
|
15
|
+
vector.data.should have(5+4).things
|
16
|
+
end
|
17
|
+
it "should set 0 if a word from the dictionary NOT exists at the corresponding index" do
|
18
|
+
vector.data[0].should eq(0)
|
19
|
+
end
|
20
|
+
it "should set 1 if a word from the dictionary exists at the corresponding index" do
|
21
|
+
vector.data[1].should eq(1)
|
22
|
+
end
|
23
|
+
it "should set 0's and 1's for each word in the dictionary" do
|
24
|
+
vector.data.first(5).should eq([0,1,1,0,1])
|
25
|
+
end
|
26
|
+
it "should add a n-sized array of 0's and 1's to the results" do
|
27
|
+
vector.data.last(4).should eq([0,1,1,1])
|
28
|
+
end
|
29
|
+
it "should call make_vector" do
|
30
|
+
simple.expects(:make_vector).once
|
31
|
+
simple.generate_vector(data)
|
32
|
+
end
|
33
|
+
context "custom dictionary" do
|
34
|
+
it "should accept a custom dictionary" do
|
35
|
+
vector = simple.generate_vector(data, :career_level, %w(pferd flasche glas))
|
36
|
+
vector.data.should eq([[1,0,0],[0,1,1,1]].flatten)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler.setup
|
3
|
+
Bundler.require(:default, :test)
|
4
|
+
require 'svm_helper'
|
5
|
+
|
6
|
+
RSpec.configure do |config|
|
7
|
+
config.mock_with :mocha
|
8
|
+
|
9
|
+
# Requires supporting ruby files with custom matchers and macros, etc,
|
10
|
+
# in spec/support/ and its subdirectories.
|
11
|
+
Dir["./spec/support/**/*.rb"].each {|f| require f}
|
12
|
+
|
13
|
+
FactoryGirl.find_definitions
|
14
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
shared_examples_for 'a preprocessor' do
|
4
|
+
let(:preprocessor) { described_class.new(industry_map: {1423=>3, 523=>54}) }
|
5
|
+
let(:job) { FactoryGirl.build(:job) }
|
6
|
+
let(:jobs) { [job] }
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
job.stubs(:classification_id)
|
10
|
+
job.stubs(:label)
|
11
|
+
end
|
12
|
+
it { preprocessor.should respond_to :process }
|
13
|
+
it "should return a PreprocessedData object" do
|
14
|
+
preprocessor.process(job).should be_a(PreprocessedData)
|
15
|
+
end
|
16
|
+
it "should be able to process multiple jobs" do
|
17
|
+
preprocessor.process(jobs).each do |e|
|
18
|
+
e.should be_a(PreprocessedData)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
shared_examples_for 'a selector' do
|
4
|
+
let(:selector) { described_class.new }
|
5
|
+
let(:data) { FactoryGirl.build(:data) }
|
6
|
+
|
7
|
+
it "should return a FeatureVector object" do
|
8
|
+
selector.generate_vector(data).should be_a(FeatureVector)
|
9
|
+
end
|
10
|
+
it "should create and array with 0 and 1's" do
|
11
|
+
vector = selector.generate_vector(data)
|
12
|
+
vector.data.each do |e|
|
13
|
+
[0,1].should include(e)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
it "should be able to process multiple data entries at once" do
|
17
|
+
selector.generate_vectors([data]).each do |e|
|
18
|
+
e.should == selector.generate_vector(data)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/svm_helper.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'svm_helper/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "svm_helper"
|
8
|
+
gem.version = SvmHelper::VERSION
|
9
|
+
gem.authors = ["Andreas Eger"]
|
10
|
+
gem.email = ["dev@eger-andreas.de"]
|
11
|
+
gem.description = %q{Shared helper classes for usage in context of SVM at experteer}
|
12
|
+
gem.summary = %q{Preprocessor and Selector classes to generate FeatureVectors from Job data}
|
13
|
+
gem.homepage = "https://github.com/sch1zo/svm_helper"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_dependency('parallel', '~> 0.6.2')
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: svm_helper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andreas Eger
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-03-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: parallel
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.6.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.6.2
|
27
|
+
description: Shared helper classes for usage in context of SVM at experteer
|
28
|
+
email:
|
29
|
+
- dev@eger-andreas.de
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- .gitignore
|
35
|
+
- .rspec
|
36
|
+
- .ruby-version
|
37
|
+
- .travis.yml
|
38
|
+
- .versions.conf
|
39
|
+
- .yardopts
|
40
|
+
- Gemfile
|
41
|
+
- Guardfile
|
42
|
+
- LICENSE.txt
|
43
|
+
- README.md
|
44
|
+
- Rakefile
|
45
|
+
- lib/svm_helper.rb
|
46
|
+
- lib/svm_helper/feature_vector.rb
|
47
|
+
- lib/svm_helper/interface_helper.rb
|
48
|
+
- lib/svm_helper/preprocessed_data.rb
|
49
|
+
- lib/svm_helper/preprocessors.rb
|
50
|
+
- lib/svm_helper/preprocessors/simple.rb
|
51
|
+
- lib/svm_helper/preprocessors/with_industry_map.rb
|
52
|
+
- lib/svm_helper/selectors.rb
|
53
|
+
- lib/svm_helper/selectors/n_gram.rb
|
54
|
+
- lib/svm_helper/selectors/simple.rb
|
55
|
+
- lib/svm_helper/selectors/with_binary_encoding.rb
|
56
|
+
- lib/svm_helper/stopwords/de
|
57
|
+
- lib/svm_helper/stopwords/en
|
58
|
+
- lib/svm_helper/version.rb
|
59
|
+
- spec/factories.rb
|
60
|
+
- spec/factories/jobs/tmp.html
|
61
|
+
- spec/factories/jobs/tmp2.html
|
62
|
+
- spec/factories/jobs/tmp3.html
|
63
|
+
- spec/factories/jobs_with_description.rb
|
64
|
+
- spec/factories/jobs_with_title.rb
|
65
|
+
- spec/preprocessors/simple_spec.rb
|
66
|
+
- spec/preprocessors/with_industry_map_spec.rb
|
67
|
+
- spec/selectors/n_gram_spec.rb
|
68
|
+
- spec/selectors/simple_spec.rb
|
69
|
+
- spec/selectors/with_binary_encoding_spec.rb
|
70
|
+
- spec/spec_helper.rb
|
71
|
+
- spec/support/preprocessor_spec.rb
|
72
|
+
- spec/support/selector_spec.rb
|
73
|
+
- svm_helper.gemspec
|
74
|
+
homepage: https://github.com/sch1zo/svm_helper
|
75
|
+
licenses: []
|
76
|
+
metadata: {}
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options: []
|
79
|
+
require_paths:
|
80
|
+
- lib
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - '>='
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
requirements: []
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 2.0.0.rc.2
|
94
|
+
signing_key:
|
95
|
+
specification_version: 4
|
96
|
+
summary: Preprocessor and Selector classes to generate FeatureVectors from Job data
|
97
|
+
test_files:
|
98
|
+
- spec/factories.rb
|
99
|
+
- spec/factories/jobs/tmp.html
|
100
|
+
- spec/factories/jobs/tmp2.html
|
101
|
+
- spec/factories/jobs/tmp3.html
|
102
|
+
- spec/factories/jobs_with_description.rb
|
103
|
+
- spec/factories/jobs_with_title.rb
|
104
|
+
- spec/preprocessors/simple_spec.rb
|
105
|
+
- spec/preprocessors/with_industry_map_spec.rb
|
106
|
+
- spec/selectors/n_gram_spec.rb
|
107
|
+
- spec/selectors/simple_spec.rb
|
108
|
+
- spec/selectors/with_binary_encoding_spec.rb
|
109
|
+
- spec/spec_helper.rb
|
110
|
+
- spec/support/preprocessor_spec.rb
|
111
|
+
- spec/support/selector_spec.rb
|
112
|
+
has_rdoc:
|