svm_helper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/.rspec +3 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +9 -0
  6. data/.versions.conf +4 -0
  7. data/.yardopts +3 -0
  8. data/Gemfile +24 -0
  9. data/Guardfile +17 -0
  10. data/LICENSE.txt +22 -0
  11. data/README.md +41 -0
  12. data/Rakefile +7 -0
  13. data/lib/svm_helper.rb +8 -0
  14. data/lib/svm_helper/feature_vector.rb +17 -0
  15. data/lib/svm_helper/interface_helper.rb +57 -0
  16. data/lib/svm_helper/preprocessed_data.rb +17 -0
  17. data/lib/svm_helper/preprocessors.rb +2 -0
  18. data/lib/svm_helper/preprocessors/simple.rb +111 -0
  19. data/lib/svm_helper/preprocessors/with_industry_map.rb +40 -0
  20. data/lib/svm_helper/selectors.rb +3 -0
  21. data/lib/svm_helper/selectors/n_gram.rb +31 -0
  22. data/lib/svm_helper/selectors/simple.rb +163 -0
  23. data/lib/svm_helper/selectors/with_binary_encoding.rb +42 -0
  24. data/lib/svm_helper/stopwords/de +127 -0
  25. data/lib/svm_helper/stopwords/en +119 -0
  26. data/lib/svm_helper/version.rb +3 -0
  27. data/spec/factories.rb +35 -0
  28. data/spec/factories/jobs/tmp.html +42 -0
  29. data/spec/factories/jobs/tmp2.html +20 -0
  30. data/spec/factories/jobs/tmp3.html +34 -0
  31. data/spec/factories/jobs_with_description.rb +20 -0
  32. data/spec/factories/jobs_with_title.rb +72 -0
  33. data/spec/preprocessors/simple_spec.rb +138 -0
  34. data/spec/preprocessors/with_industry_map_spec.rb +16 -0
  35. data/spec/selectors/n_gram_spec.rb +21 -0
  36. data/spec/selectors/simple_spec.rb +121 -0
  37. data/spec/selectors/with_binary_encoding_spec.rb +39 -0
  38. data/spec/spec_helper.rb +14 -0
  39. data/spec/support/preprocessor_spec.rb +21 -0
  40. data/spec/support/selector_spec.rb +21 -0
  41. data/svm_helper.gemspec +21 -0
  42. metadata +112 -0
@@ -0,0 +1,39 @@
1
+ require "spec_helper"
2
+
3
+ describe Selector::WithBinaryEncoding do
4
+ it_behaves_like 'a selector'
5
+ let(:simple) { Selector::WithBinaryEncoding.new }
6
+
7
+ let(:dictionary) { %w(auto pferd haus hase garten) }
8
+ let(:data) { FactoryGirl.build(:data) }
9
+ let(:vector) { simple.generate_vector(data).tap{|e| e.career_level! } }
10
+
11
+ before(:each) do
12
+ simple.stubs(:global_dictionary).returns(dictionary)
13
+ end
14
+ it "should build a feature vector for each dataset with the size of the dictionary plus classifications" do
15
+ vector.data.should have(5+4).things
16
+ end
17
+ it "should set 0 if a word from the dictionary NOT exists at the corresponding index" do
18
+ vector.data[0].should eq(0)
19
+ end
20
+ it "should set 1 if a word from the dictionary exists at the corresponding index" do
21
+ vector.data[1].should eq(1)
22
+ end
23
+ it "should set 0's and 1's for each word in the dictionary" do
24
+ vector.data.first(5).should eq([0,1,1,0,1])
25
+ end
26
+ it "should add a n-sized array of 0's and 1's to the results" do
27
+ vector.data.last(4).should eq([0,1,1,1])
28
+ end
29
+ it "should call make_vector" do
30
+ simple.expects(:make_vector).once
31
+ simple.generate_vector(data)
32
+ end
33
+ context "custom dictionary" do
34
+ it "should accept a custom dictionary" do
35
+ vector = simple.generate_vector(data, :career_level, %w(pferd flasche glas))
36
+ vector.data.should eq([[1,0,0],[0,1,1,1]].flatten)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,14 @@
1
+ require 'bundler'
2
+ Bundler.setup
3
+ Bundler.require(:default, :test)
4
+ require 'svm_helper'
5
+
6
+ RSpec.configure do |config|
7
+ config.mock_with :mocha
8
+
9
+ # Requires supporting ruby files with custom matchers and macros, etc,
10
+ # in spec/support/ and its subdirectories.
11
+ Dir["./spec/support/**/*.rb"].each {|f| require f}
12
+
13
+ FactoryGirl.find_definitions
14
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ shared_examples_for 'a preprocessor' do
4
+ let(:preprocessor) { described_class.new(industry_map: {1423=>3, 523=>54}) }
5
+ let(:job) { FactoryGirl.build(:job) }
6
+ let(:jobs) { [job] }
7
+
8
+ before(:each) do
9
+ job.stubs(:classification_id)
10
+ job.stubs(:label)
11
+ end
12
+ it { preprocessor.should respond_to :process }
13
+ it "should return a PreprocessedData object" do
14
+ preprocessor.process(job).should be_a(PreprocessedData)
15
+ end
16
+ it "should be able to process multiple jobs" do
17
+ preprocessor.process(jobs).each do |e|
18
+ e.should be_a(PreprocessedData)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ require "spec_helper"
2
+
3
+ shared_examples_for 'a selector' do
4
+ let(:selector) { described_class.new }
5
+ let(:data) { FactoryGirl.build(:data) }
6
+
7
+ it "should return a FeatureVector object" do
8
+ selector.generate_vector(data).should be_a(FeatureVector)
9
+ end
10
+ it "should create and array with 0 and 1's" do
11
+ vector = selector.generate_vector(data)
12
+ vector.data.each do |e|
13
+ [0,1].should include(e)
14
+ end
15
+ end
16
+ it "should be able to process multiple data entries at once" do
17
+ selector.generate_vectors([data]).each do |e|
18
+ e.should == selector.generate_vector(data)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'svm_helper/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "svm_helper"
8
+ gem.version = SvmHelper::VERSION
9
+ gem.authors = ["Andreas Eger"]
10
+ gem.email = ["dev@eger-andreas.de"]
11
+ gem.description = %q{Shared helper classes for usage in context of SVM at experteer}
12
+ gem.summary = %q{Preprocessor and Selector classes to generate FeatureVectors from Job data}
13
+ gem.homepage = "https://github.com/sch1zo/svm_helper"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency('parallel', '~> 0.6.2')
21
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: svm_helper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andreas Eger
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-03-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: parallel
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.6.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.6.2
27
+ description: Shared helper classes for usage in context of SVM at experteer
28
+ email:
29
+ - dev@eger-andreas.de
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - .gitignore
35
+ - .rspec
36
+ - .ruby-version
37
+ - .travis.yml
38
+ - .versions.conf
39
+ - .yardopts
40
+ - Gemfile
41
+ - Guardfile
42
+ - LICENSE.txt
43
+ - README.md
44
+ - Rakefile
45
+ - lib/svm_helper.rb
46
+ - lib/svm_helper/feature_vector.rb
47
+ - lib/svm_helper/interface_helper.rb
48
+ - lib/svm_helper/preprocessed_data.rb
49
+ - lib/svm_helper/preprocessors.rb
50
+ - lib/svm_helper/preprocessors/simple.rb
51
+ - lib/svm_helper/preprocessors/with_industry_map.rb
52
+ - lib/svm_helper/selectors.rb
53
+ - lib/svm_helper/selectors/n_gram.rb
54
+ - lib/svm_helper/selectors/simple.rb
55
+ - lib/svm_helper/selectors/with_binary_encoding.rb
56
+ - lib/svm_helper/stopwords/de
57
+ - lib/svm_helper/stopwords/en
58
+ - lib/svm_helper/version.rb
59
+ - spec/factories.rb
60
+ - spec/factories/jobs/tmp.html
61
+ - spec/factories/jobs/tmp2.html
62
+ - spec/factories/jobs/tmp3.html
63
+ - spec/factories/jobs_with_description.rb
64
+ - spec/factories/jobs_with_title.rb
65
+ - spec/preprocessors/simple_spec.rb
66
+ - spec/preprocessors/with_industry_map_spec.rb
67
+ - spec/selectors/n_gram_spec.rb
68
+ - spec/selectors/simple_spec.rb
69
+ - spec/selectors/with_binary_encoding_spec.rb
70
+ - spec/spec_helper.rb
71
+ - spec/support/preprocessor_spec.rb
72
+ - spec/support/selector_spec.rb
73
+ - svm_helper.gemspec
74
+ homepage: https://github.com/sch1zo/svm_helper
75
+ licenses: []
76
+ metadata: {}
77
+ post_install_message:
78
+ rdoc_options: []
79
+ require_paths:
80
+ - lib
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ required_rubygems_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ requirements: []
92
+ rubyforge_project:
93
+ rubygems_version: 2.0.0.rc.2
94
+ signing_key:
95
+ specification_version: 4
96
+ summary: Preprocessor and Selector classes to generate FeatureVectors from Job data
97
+ test_files:
98
+ - spec/factories.rb
99
+ - spec/factories/jobs/tmp.html
100
+ - spec/factories/jobs/tmp2.html
101
+ - spec/factories/jobs/tmp3.html
102
+ - spec/factories/jobs_with_description.rb
103
+ - spec/factories/jobs_with_title.rb
104
+ - spec/preprocessors/simple_spec.rb
105
+ - spec/preprocessors/with_industry_map_spec.rb
106
+ - spec/selectors/n_gram_spec.rb
107
+ - spec/selectors/simple_spec.rb
108
+ - spec/selectors/with_binary_encoding_spec.rb
109
+ - spec/spec_helper.rb
110
+ - spec/support/preprocessor_spec.rb
111
+ - spec/support/selector_spec.rb
112
+ has_rdoc: