svm_helper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/.rspec +3 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +9 -0
  6. data/.versions.conf +4 -0
  7. data/.yardopts +3 -0
  8. data/Gemfile +24 -0
  9. data/Guardfile +17 -0
  10. data/LICENSE.txt +22 -0
  11. data/README.md +41 -0
  12. data/Rakefile +7 -0
  13. data/lib/svm_helper.rb +8 -0
  14. data/lib/svm_helper/feature_vector.rb +17 -0
  15. data/lib/svm_helper/interface_helper.rb +57 -0
  16. data/lib/svm_helper/preprocessed_data.rb +17 -0
  17. data/lib/svm_helper/preprocessors.rb +2 -0
  18. data/lib/svm_helper/preprocessors/simple.rb +111 -0
  19. data/lib/svm_helper/preprocessors/with_industry_map.rb +40 -0
  20. data/lib/svm_helper/selectors.rb +3 -0
  21. data/lib/svm_helper/selectors/n_gram.rb +31 -0
  22. data/lib/svm_helper/selectors/simple.rb +163 -0
  23. data/lib/svm_helper/selectors/with_binary_encoding.rb +42 -0
  24. data/lib/svm_helper/stopwords/de +127 -0
  25. data/lib/svm_helper/stopwords/en +119 -0
  26. data/lib/svm_helper/version.rb +3 -0
  27. data/spec/factories.rb +35 -0
  28. data/spec/factories/jobs/tmp.html +42 -0
  29. data/spec/factories/jobs/tmp2.html +20 -0
  30. data/spec/factories/jobs/tmp3.html +34 -0
  31. data/spec/factories/jobs_with_description.rb +20 -0
  32. data/spec/factories/jobs_with_title.rb +72 -0
  33. data/spec/preprocessors/simple_spec.rb +138 -0
  34. data/spec/preprocessors/with_industry_map_spec.rb +16 -0
  35. data/spec/selectors/n_gram_spec.rb +21 -0
  36. data/spec/selectors/simple_spec.rb +121 -0
  37. data/spec/selectors/with_binary_encoding_spec.rb +39 -0
  38. data/spec/spec_helper.rb +14 -0
  39. data/spec/support/preprocessor_spec.rb +21 -0
  40. data/spec/support/selector_spec.rb +21 -0
  41. data/svm_helper.gemspec +21 -0
  42. metadata +112 -0
@@ -0,0 +1,39 @@
1
+ require "spec_helper"
2
+
3
+ describe Selector::WithBinaryEncoding do
4
+ it_behaves_like 'a selector'
5
+ let(:simple) { Selector::WithBinaryEncoding.new }
6
+
7
+ let(:dictionary) { %w(auto pferd haus hase garten) }
8
+ let(:data) { FactoryGirl.build(:data) }
9
+ let(:vector) { simple.generate_vector(data).tap{|e| e.career_level! } }
10
+
11
+ before(:each) do
12
+ simple.stubs(:global_dictionary).returns(dictionary)
13
+ end
14
+ it "should build a feature vector for each dataset with the size of the dictionary plus classifications" do
15
+ vector.data.should have(5+4).things
16
+ end
17
+ it "should set 0 if a word from the dictionary NOT exists at the corresponding index" do
18
+ vector.data[0].should eq(0)
19
+ end
20
+ it "should set 1 if a word from the dictionary exists at the corresponding index" do
21
+ vector.data[1].should eq(1)
22
+ end
23
+ it "should set 0's and 1's for each word in the dictionary" do
24
+ vector.data.first(5).should eq([0,1,1,0,1])
25
+ end
26
+ it "should add a n-sized array of 0's and 1's to the results" do
27
+ vector.data.last(4).should eq([0,1,1,1])
28
+ end
29
+ it "should call make_vector" do
30
+ simple.expects(:make_vector).once
31
+ simple.generate_vector(data)
32
+ end
33
+ context "custom dictionary" do
34
+ it "should accept a custom dictionary" do
35
+ vector = simple.generate_vector(data, :career_level, %w(pferd flasche glas))
36
+ vector.data.should eq([[1,0,0],[0,1,1,1]].flatten)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,14 @@
1
+ require 'bundler'
2
+ Bundler.setup
3
+ Bundler.require(:default, :test)
4
+ require 'svm_helper'
5
+
6
+ RSpec.configure do |config|
7
+ config.mock_with :mocha
8
+
9
+ # Requires supporting ruby files with custom matchers and macros, etc,
10
+ # in spec/support/ and its subdirectories.
11
+ Dir["./spec/support/**/*.rb"].each {|f| require f}
12
+
13
+ FactoryGirl.find_definitions
14
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ shared_examples_for 'a preprocessor' do
4
+ let(:preprocessor) { described_class.new(industry_map: {1423=>3, 523=>54}) }
5
+ let(:job) { FactoryGirl.build(:job) }
6
+ let(:jobs) { [job] }
7
+
8
+ before(:each) do
9
+ job.stubs(:classification_id)
10
+ job.stubs(:label)
11
+ end
12
+ it { preprocessor.should respond_to :process }
13
+ it "should return a PreprocessedData object" do
14
+ preprocessor.process(job).should be_a(PreprocessedData)
15
+ end
16
+ it "should be able to process multiple jobs" do
17
+ preprocessor.process(jobs).each do |e|
18
+ e.should be_a(PreprocessedData)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ require "spec_helper"
2
+
3
+ shared_examples_for 'a selector' do
4
+ let(:selector) { described_class.new }
5
+ let(:data) { FactoryGirl.build(:data) }
6
+
7
+ it "should return a FeatureVector object" do
8
+ selector.generate_vector(data).should be_a(FeatureVector)
9
+ end
10
+ it "should create and array with 0 and 1's" do
11
+ vector = selector.generate_vector(data)
12
+ vector.data.each do |e|
13
+ [0,1].should include(e)
14
+ end
15
+ end
16
+ it "should be able to process multiple data entries at once" do
17
+ selector.generate_vectors([data]).each do |e|
18
+ e.should == selector.generate_vector(data)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'svm_helper/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "svm_helper"
8
+ gem.version = SvmHelper::VERSION
9
+ gem.authors = ["Andreas Eger"]
10
+ gem.email = ["dev@eger-andreas.de"]
11
+ gem.description = %q{Shared helper classes for usage in context of SVM at experteer}
12
+ gem.summary = %q{Preprocessor and Selector classes to generate FeatureVectors from Job data}
13
+ gem.homepage = "https://github.com/sch1zo/svm_helper"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency('parallel', '~> 0.6.2')
21
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: svm_helper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andreas Eger
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-03-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: parallel
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.6.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.6.2
27
+ description: Shared helper classes for usage in context of SVM at experteer
28
+ email:
29
+ - dev@eger-andreas.de
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - .gitignore
35
+ - .rspec
36
+ - .ruby-version
37
+ - .travis.yml
38
+ - .versions.conf
39
+ - .yardopts
40
+ - Gemfile
41
+ - Guardfile
42
+ - LICENSE.txt
43
+ - README.md
44
+ - Rakefile
45
+ - lib/svm_helper.rb
46
+ - lib/svm_helper/feature_vector.rb
47
+ - lib/svm_helper/interface_helper.rb
48
+ - lib/svm_helper/preprocessed_data.rb
49
+ - lib/svm_helper/preprocessors.rb
50
+ - lib/svm_helper/preprocessors/simple.rb
51
+ - lib/svm_helper/preprocessors/with_industry_map.rb
52
+ - lib/svm_helper/selectors.rb
53
+ - lib/svm_helper/selectors/n_gram.rb
54
+ - lib/svm_helper/selectors/simple.rb
55
+ - lib/svm_helper/selectors/with_binary_encoding.rb
56
+ - lib/svm_helper/stopwords/de
57
+ - lib/svm_helper/stopwords/en
58
+ - lib/svm_helper/version.rb
59
+ - spec/factories.rb
60
+ - spec/factories/jobs/tmp.html
61
+ - spec/factories/jobs/tmp2.html
62
+ - spec/factories/jobs/tmp3.html
63
+ - spec/factories/jobs_with_description.rb
64
+ - spec/factories/jobs_with_title.rb
65
+ - spec/preprocessors/simple_spec.rb
66
+ - spec/preprocessors/with_industry_map_spec.rb
67
+ - spec/selectors/n_gram_spec.rb
68
+ - spec/selectors/simple_spec.rb
69
+ - spec/selectors/with_binary_encoding_spec.rb
70
+ - spec/spec_helper.rb
71
+ - spec/support/preprocessor_spec.rb
72
+ - spec/support/selector_spec.rb
73
+ - svm_helper.gemspec
74
+ homepage: https://github.com/sch1zo/svm_helper
75
+ licenses: []
76
+ metadata: {}
77
+ post_install_message:
78
+ rdoc_options: []
79
+ require_paths:
80
+ - lib
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ required_rubygems_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ requirements: []
92
+ rubyforge_project:
93
+ rubygems_version: 2.0.0.rc.2
94
+ signing_key:
95
+ specification_version: 4
96
+ summary: Preprocessor and Selector classes to generate FeatureVectors from Job data
97
+ test_files:
98
+ - spec/factories.rb
99
+ - spec/factories/jobs/tmp.html
100
+ - spec/factories/jobs/tmp2.html
101
+ - spec/factories/jobs/tmp3.html
102
+ - spec/factories/jobs_with_description.rb
103
+ - spec/factories/jobs_with_title.rb
104
+ - spec/preprocessors/simple_spec.rb
105
+ - spec/preprocessors/with_industry_map_spec.rb
106
+ - spec/selectors/n_gram_spec.rb
107
+ - spec/selectors/simple_spec.rb
108
+ - spec/selectors/with_binary_encoding_spec.rb
109
+ - spec/spec_helper.rb
110
+ - spec/support/preprocessor_spec.rb
111
+ - spec/support/selector_spec.rb
112
+ has_rdoc: