svm_helper 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Guardfile CHANGED
@@ -1,5 +1,5 @@
1
- # guard 'rspec', cli: "--color --format p", all_after_pass: false do
2
- guard 'rspec', cli: "--color --format p", all_after_pass: false, rvm:['2.0.0@svm_helper', 'jruby@svm_helper'] do
1
+ guard 'rspec', cli: "--color --format p", all_after_pass: false do
2
+ # guard 'rspec', cli: "--color --format p", all_after_pass: false, rvm:['2.0.0@svm_helper', 'jruby@svm_helper'] do
3
3
  watch(%r{^spec/.+_spec\.rb$})
4
4
  watch(%r{^lib/svm_helper/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
5
5
  watch('spec/spec_helper.rb') { 'spec' }
@@ -5,13 +5,10 @@ require_relative 'interface_helper'
5
5
  # @author Andreas Eger
6
6
  class FeatureVector < InterfaceHelper
7
7
  attribute :word_data
8
- attribute :classification_arrays
9
- attribute :labels
8
+ attribute :classification
9
+ attribute :label
10
10
 
11
- def label
12
- labels[classification]
13
- end
14
11
  def data
15
- word_data + classification_arrays[classification]
12
+ word_data + classification
16
13
  end
17
14
  end
@@ -30,7 +30,6 @@ class InterfaceHelper
30
30
  params.each do |key, value|
31
31
  send("#{key}=", value)
32
32
  end
33
- @_attributes[:classification] ||= :function
34
33
  end
35
34
 
36
35
  #
@@ -41,17 +40,4 @@ class InterfaceHelper
41
40
  def == anOther
42
41
  @_attributes.keys.map{ |sym| self.send(sym) == anOther.send(sym)}.reduce(true){|a,e| a && e }
43
42
  end
44
-
45
- def industry!
46
- @_attributes[:classification] = :industry
47
- end
48
- def function!
49
- @_attributes[:classification] = :function
50
- end
51
- def career_level!
52
- @_attributes[:classification] = :career_level
53
- end
54
- def classification
55
- @_attributes[:classification]
56
- end
57
43
  end
@@ -5,13 +5,6 @@ require_relative 'interface_helper'
5
5
  # @author Andreas Eger
6
6
  class PreprocessedData < InterfaceHelper
7
7
  attribute :data
8
- attribute :ids
9
- attribute :labels
10
-
11
- def id
12
- ids[classification]
13
- end
14
- def label
15
- labels[classification]
16
- end
8
+ attribute :id
9
+ attribute :label
17
10
  end
@@ -0,0 +1,35 @@
1
+ require_relative 'simple'
2
+ module Preprocessor
3
+ #
4
+ # Preprocessor Base Class
5
+ #
6
+ # @author Andreas Eger
7
+ #
8
+ class IDMapping < Simple
9
+ attr_reader :id_map
10
+
11
+ #
12
+ # @param args [Hash] options hash
13
+ # @option args [Hash] :industry_map mapping for the tree like industry ids to continuous ones
14
+ def initialize id_map, args={}
15
+ super(args)
16
+ @id_map = id_map
17
+ end
18
+
19
+ def map_id(id)
20
+ @id_map[id]
21
+ end
22
+ def label
23
+ "with_id_mapping"
24
+ end
25
+
26
+ private
27
+ def process_job job
28
+ PreprocessedData.new(
29
+ data: [clean_title(job[:title]), clean_description(job[:description])],
30
+ id: map_id(job[:id]),
31
+ label: job[:label]
32
+ )
33
+ end
34
+ end
35
+ end
@@ -35,21 +35,26 @@ module Preprocessor
35
35
  #
36
36
  # cleans provided jobs
37
37
  # @overload process(jobs, classification)
38
- # @param jobs [Job] single Job
38
+ # @param jobs [Hash] single Job
39
+ # @option title
40
+ # @option description
41
+ # @option id
42
+ # @option label
39
43
  # @param classification [Symbol] in `:industry`, `:function`, `:career_level`
40
44
  # @overload process(jobs, classification)
41
- # @param jobs [Array<Job>] list of Jobs
45
+ # @param jobs [Array<Hash>] list of Jobs
42
46
  # @param classification [Symbol] in `:industry`, `:function`, `:career_level`
43
47
  #
44
48
  # @return [Array<PreprocessedData>] list of processed job data - or singe job data
45
- def process jobs, classification=:function
46
- if jobs.respond_to? :map
47
- process_jobs jobs, classification
49
+ def process jobs
50
+ if jobs.is_a? Array
51
+ process_jobs jobs
48
52
  else
49
- process_job jobs, classification
53
+ process_job jobs
50
54
  end
51
55
  end
52
56
 
57
+
53
58
  #
54
59
  # converts string into a cleaner version
55
60
  # @param title [String] job title
@@ -84,28 +89,22 @@ module Preprocessor
84
89
  end
85
90
 
86
91
  private
87
- def process_jobs jobs, classification
92
+ def process_jobs jobs
88
93
  if @parallel && RUBY_PLATFORM == 'java'
89
- Parallel.map(jobs, in_threads: THREAD_COUNT ) {|job| process_job job, classification }
94
+ Parallel.map(jobs, in_threads: THREAD_COUNT ) {|job| process_job job }
90
95
  elsif @parallel
91
- Parallel.map(jobs, in_processes: THREAD_COUNT ) {|job| process_job job, classification }
96
+ Parallel.map(jobs, in_processes: THREAD_COUNT ) {|job| process_job job }
92
97
  else
93
- jobs.map {|job| process_job job, classification }
98
+ jobs.map {|job| process_job job }
94
99
  end
95
100
  end
96
101
 
97
- def process_job job, classification
102
+ def process_job job
98
103
  PreprocessedData.new(
99
- data: [ clean_title(job.title), clean_description(job.description) ],
100
- ids: {
101
- industry: job.classification_id(:industry),
102
- function: job.classification_id(:function),
103
- career_level: job.classification_id(:career_level) },
104
- labels: {
105
- industry: job.label(:industry),
106
- function: job.label(:function),
107
- career_level: job.label(:career_level) }
108
- ).tap{|e| e.send("#{classification}!")}
104
+ data: [clean_title(job[:title]), clean_description(job[:description])],
105
+ id: job[:id],
106
+ label: job[:label]
107
+ )
109
108
  end
110
109
  end
111
110
  end
@@ -1,2 +1,2 @@
1
1
  require_relative 'preprocessors/simple'
2
- require_relative 'preprocessors/with_industry_map'
2
+ require_relative 'preprocessors/id_mapping'
@@ -8,7 +8,7 @@ module Selector
8
8
  class NGram < Selector::Simple
9
9
  attr_reader :gram_size
10
10
 
11
- def initialize args={}
11
+ def initialize classification, args={}
12
12
  super
13
13
  @gram_size = args.fetch(:gram_size) { 2 }
14
14
  end
@@ -26,7 +26,8 @@ module Selector
26
26
 
27
27
  attr_accessor :global_dictionary
28
28
 
29
- def initialize args={}
29
+ def initialize classification, args={}
30
+ @classification = classification
30
31
  @global_dictionary = args.fetch(:global_dictionary) {[]}
31
32
  @language = args.fetch(:language){'en'}
32
33
  @parallel = args.fetch(:parallel){false}
@@ -43,13 +44,13 @@ module Selector
43
44
  # @param dictionary_size [Integer] Size of a dictionary to create if non exists
44
45
  #
45
46
  # @return [Array<FeatureVector>] list of feature vectors and labels
46
- def generate_vectors data_set, classification=:function, dictionary_size=DEFAULT_DICTIONARY_SIZE
47
+ def generate_vectors data_set, dictionary_size=DEFAULT_DICTIONARY_SIZE
47
48
  words_per_data = extract_words data_set
48
49
  generate_global_dictionary words_per_data, dictionary_size
49
50
 
50
51
  make_vectors(words_per_data) do |words,index|
51
52
  word_set = words.uniq
52
- make_vector word_set, data_set[index], classification
53
+ make_vector word_set, data_set[index]
53
54
  end
54
55
  end
55
56
 
@@ -60,9 +61,9 @@ module Selector
60
61
  # @param dictionary [Array] dictionary to use for this selection
61
62
  #
62
63
  # @return [FeatureVector]
63
- def generate_vector data, classification=:function, dictionary=global_dictionary
64
+ def generate_vector data, dictionary=global_dictionary
64
65
  word_set = Set.new extract_words_from_data(data)
65
- make_vector word_set, data, classification, dictionary
66
+ make_vector word_set, data, dictionary
66
67
  end
67
68
 
68
69
  #
@@ -109,8 +110,9 @@ module Selector
109
110
  (data.data.flat_map(&:split) - stopwords).delete_if { |e| e.size <= 3 }
110
111
  end
111
112
 
112
- def reset
113
+ def reset classification
113
114
  @global_dictionary = []
115
+ @classification = classification
114
116
  end
115
117
 
116
118
  private
@@ -120,24 +122,17 @@ module Selector
120
122
  # also adds the label
121
123
  # @param words [Array<String>] list of words
122
124
  # @param data [PreprocessedData]
123
- # @param classification [Symbol] in `:industry`, `:function`, `:career_level`
124
125
  # @param dictionary
125
126
  #
126
127
  # @return [FeatureVector]
127
- def make_vector words, data, classification, dictionary=global_dictionary
128
+ def make_vector words, data, dictionary=global_dictionary
128
129
  FeatureVector.new(
129
130
  word_data: dictionary.map{|dic_word|
130
- words.include?(dic_word) ? 1 : 0
131
- },
132
- classification_arrays: {
133
- function: classification_array(data.ids, :function),
134
- industry: classification_array(data.ids, :industry),
135
- career_level: classification_array(data.ids, :career_level) },
136
- labels: {
137
- function: data.labels[:function] ? 1 : 0,
138
- industry: data.labels[:industry] ? 1 : 0,
139
- career_level: data.labels[:career_level] ? 1 : 0 }
140
- ).tap{|e| e.send("#{classification}!")}
131
+ words.include?(dic_word) ? 1 : 0
132
+ },
133
+ classification: classification_array(data.id),
134
+ label: data.label ? 1 : 0
135
+ )
141
136
  end
142
137
 
143
138
  def make_vectors data, &block
@@ -155,9 +150,8 @@ module Selector
155
150
  # @param ids [Hash] hash with classification ids
156
151
  #
157
152
  # @return [Array<Integer>] list of size=count(classifcation_ids) with only one not zero item
158
- def classification_array(ids, classification)
159
- id = ids[classification]
160
- Array.new(CLASSIFICATIONS_SIZE[classification]){|n| n==(id-1) ? 1 : 0}
153
+ def classification_array(id)
154
+ Array.new(CLASSIFICATIONS_SIZE[@classification]){|n| n==(id-1) ? 1 : 0}
161
155
  end
162
156
  end
163
157
  end
@@ -12,7 +12,7 @@ module Selector
12
12
  industry: 16, # max id 65535, currently 14370
13
13
  career_level: 4 } # max id 15, currently 8
14
14
 
15
- def initialize args={}
15
+ def initialize *args
16
16
  super
17
17
  end
18
18
 
@@ -26,9 +26,8 @@ module Selector
26
26
  # @param ids [Hash] hash with classification ids
27
27
  #
28
28
  # @return [Array<Integer>] binary encoded classification id
29
- def classification_array(ids, classification)
30
- id = ids[classification]
31
- number_to_binary_array(id, CLASSIFICATIONS_SIZE[classification])
29
+ def classification_array(id)
30
+ number_to_binary_array(id, CLASSIFICATIONS_SIZE[@classification])
32
31
  end
33
32
 
34
33
  def number_to_binary_array(number, size=8)
@@ -1,3 +1,3 @@
1
1
  module SvmHelper
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
data/spec/factories.rb CHANGED
@@ -7,24 +7,20 @@ FactoryGirl.define do
7
7
  wrong_function_id 4
8
8
  wrong_career_level nil
9
9
  end
10
- factory :job, class: OpenStruct do
10
+ factory :job, class: Hash do
11
11
  title "Meh"
12
12
  description "Foo Bar"
13
- summary "Really lot of work to do"
14
- qc_job_check
15
- end
13
+ id 4
14
+ label true
16
15
 
17
- factory :job_without_job_check, class: OpenStruct do
18
- title "Meh"
19
- description "Foo Bar"
20
- summary "Really lot of work to do"
21
- original_industry_id 1423
16
+ initialize_with { attributes }
22
17
  end
23
18
 
19
+
24
20
  factory :data, class: PreprocessedData do
25
21
  data ["haus fooo garten baaz pferd fooo"]
26
- ids {{function: 3, industry: 43, career_level: 7}}
27
- labels {{function: true, industry: false, career_level: true}}
22
+ id 7
23
+ label true
28
24
  end
29
25
  factory :data_w_short_words, parent: :data do
30
26
  data ["auto foo pferd bz gooo fooo 2"]
@@ -0,0 +1,12 @@
1
+ require 'spec_helper'
2
+
3
+ describe Preprocessor::IDMapping do
4
+ it_behaves_like 'a preprocessor'
5
+ let(:preprocessor) { Preprocessor::IDMapping.new(1423=>3, 523=>54) }
6
+ let(:job) { FactoryGirl.build(:job) }
7
+ let(:jobs) { [job] }
8
+ it "should make use of a industry_map" do
9
+ preprocessor.expects(:map_id)
10
+ preprocessor.process(jobs)
11
+ end
12
+ end
@@ -11,31 +11,18 @@ describe Preprocessor::Simple do
11
11
  context do
12
12
  before(:each) do
13
13
  @jobs = FactoryGirl.build_list :job, 3
14
- @jobs.each{|e| e.stubs(:classification_id)}
15
- @jobs.each{|e| e.stubs(:label)}
16
14
  end
17
15
  it "should work with jobs with quality check" do
18
16
  -> {simple.process(@jobs) }.should_not raise_error
19
17
  end
20
- # it "should set labels to true if quality check exists and no wrong_ label set" do
21
- # simple.process(@jobs).each{|e| e.career_level!; e.label.should be_true}
22
- # end
23
- it "should set labels to false if quality check exists and wrong_ label is set" do
24
- simple.process(@jobs).each{|e| e.function!; e.label.should be_false}
18
+ it "should set labels to true if quality check exists and label was true" do
19
+ @jobs.map!{|e| e[:label] = true;e }
20
+ simple.process(@jobs).each{|e| e.label.should be_true}
21
+ end
22
+ it "should set labels to false if quality check exists and label false" do
23
+ @jobs.map!{|e| e[:label] = false;e }
24
+ simple.process(@jobs).each{|e| e.label.should be_false}
25
25
  end
26
- end
27
-
28
- it "should work with jobs without quality check" do
29
- jobs = FactoryGirl.build_list :job_without_job_check, 3
30
- jobs.each{|e| e.stubs(:classification_id)}
31
- jobs.each{|e| e.stubs(:label)}
32
- -> {simple.process(jobs) }.should_not raise_error
33
- end
34
- it "should set labels to false if no quality check" do
35
- jobs = FactoryGirl.build_list :job_without_job_check, 3
36
- jobs.each{|e| e.stubs(:classification_id)}
37
- jobs.each{|e| e.stubs(:label)}
38
- simple.process(jobs).each{|e| e.career_level!; e.label.should be_false}
39
26
  end
40
27
 
41
28
  context "processing" do
@@ -43,8 +30,6 @@ describe Preprocessor::Simple do
43
30
  before(:each) do
44
31
  simple.stubs(:clean_title)
45
32
  simple.stubs(:clean_description)
46
- jobs.each{|e| e.stubs(:classification_id)}
47
- jobs.each{|e| e.stubs(:label)}
48
33
  end
49
34
  it "should call clean_title on each job" do
50
35
  simple.expects(:clean_title).times(3)
@@ -59,7 +44,7 @@ describe Preprocessor::Simple do
59
44
  context "#clean_title" do
60
45
  it "should be downcased" do
61
46
  job = FactoryGirl.build(:job_title_downcasing)
62
- simple.clean_title(job.title).should eq(job.clean_title)
47
+ simple.clean_title(job[:title]).should eq(job[:clean_title])
63
48
  end
64
49
  [ FactoryGirl.build(:job_title_w_gender),
65
50
  FactoryGirl.build(:job_title_w_gender_brackets),
@@ -77,8 +62,8 @@ describe Preprocessor::Simple do
77
62
  FactoryGirl.build(:job_title_var_0),
78
63
  FactoryGirl.build(:job_title_w_special),
79
64
  FactoryGirl.build(:job_title_w_percent)].each do |job|
80
- it "should cleanup '#{job.title}'" do
81
- simple.clean_title(job.title).should eq(job.clean_title)
65
+ it "should cleanup '#{job[:title]}'" do
66
+ simple.clean_title(job[:title]).should eq(job[:clean_title])
82
67
  end
83
68
  end
84
69
  end
@@ -91,27 +76,27 @@ describe Preprocessor::Simple do
91
76
  FactoryGirl.build(:job_description_w_gender) ]
92
77
  }
93
78
  it "should remove html/xml tags" do
94
- desc = simple.clean_description(jobs[0].description)
79
+ desc = simple.clean_description(jobs[0][:description])
95
80
  desc.should_not match(/<(.*?)>/)
96
81
  end
97
82
  it "should remove new lines" do
98
- desc = simple.clean_description(jobs[0].description)
83
+ desc = simple.clean_description(jobs[0][:description])
99
84
  desc.should_not match(/\r\n|\n|\r/)
100
85
  end
101
86
  it "should remove all special characters" do
102
- desc = simple.clean_description(jobs[2].description)
87
+ desc = simple.clean_description(jobs[2][:description])
103
88
  desc.should_not match(/[^a-z öäü]/i)
104
89
  end
105
90
  it "should remove gender tokens" do
106
- desc = simple.clean_description(jobs[3].description)
91
+ desc = simple.clean_description(jobs[3][:description])
107
92
  desc.should_not match(%r{(\(*(m|w)(\/|\|)(w|m)\)*)|(/-*in)|\(in\)})
108
93
  end
109
94
  it "should remove job code token" do
110
- desc = simple.clean_description(jobs[4].description)
95
+ desc = simple.clean_description(jobs[4][:description])
111
96
  desc.should_not match(/\[.*\]|\(.*\)|\{.*\}|\d+\w+/)
112
97
  end
113
98
  it "should be downcased" do
114
- desc = simple.clean_description(jobs[2].description)
99
+ desc = simple.clean_description(jobs[2][:description])
115
100
  desc.should_not match(/[^a-z öäü]/)
116
101
  end
117
102
  end
@@ -125,13 +110,9 @@ describe Preprocessor::Simple do
125
110
  FactoryGirl.build(:job_description_w_code_token),
126
111
  FactoryGirl.build(:job_description_w_gender) ]
127
112
  }
128
- before(:each) do
129
- jobs.each{|e| e.stubs(:classification_id)}
130
- jobs.each{|e| e.stubs(:label)}
131
- end
132
113
  it "should be the same parallelized" do
133
- single = simple.process(jobs, :function)
134
- p_data = parallel.process(jobs, :function)
114
+ single = simple.process(jobs)
115
+ p_data = parallel.process(jobs)
135
116
  single.each.with_index { |e,i| e.data.should == p_data[i].data }
136
117
  end
137
118
  end
@@ -3,7 +3,7 @@ require "spec_helper"
3
3
  describe Selector::NGram do
4
4
  it_behaves_like 'a selector'
5
5
 
6
- let(:ngram) { Selector::NGram.new(gram_size: 3) }
6
+ let(:ngram) { Selector::NGram.new(:function, gram_size: 3) }
7
7
  context "#extract_words_from_data" do
8
8
  it "should generate a list of words from the data" do
9
9
  words = ngram.extract_words_from_data(FactoryGirl.build(:data))
@@ -2,8 +2,8 @@ require "spec_helper"
2
2
 
3
3
  describe Selector::Simple do
4
4
  it_behaves_like 'a selector'
5
-
6
- let(:simple) { Selector::Simple.new }
5
+
6
+ let(:simple) { Selector::Simple.new(:function) }
7
7
  it "should have select_feature_vector implemented" do
8
8
  expect { simple.generate_vectors([]) }.to_not raise_error
9
9
  end
@@ -53,7 +53,8 @@ describe Selector::Simple do
53
53
  context "#generate_vector" do
54
54
  let(:dictionary) { %w(auto pferd haus hase garten) }
55
55
  let(:data) { FactoryGirl.build(:data) }
56
- let(:vector) { simple.generate_vector(data).tap{|e| e.career_level! } }
56
+ let(:simple) { Selector::Simple.new(:career_level) }
57
+ let(:vector) { simple.generate_vector(data) }
57
58
 
58
59
  before(:each) do
59
60
  simple.stubs(:global_dictionary).returns(dictionary)
@@ -79,7 +80,7 @@ describe Selector::Simple do
79
80
  end
80
81
  context "custom dictionary" do
81
82
  it "should accept a custom dictionary" do
82
- vector = simple.generate_vector(data, :career_level, %w(pferd flasche glas))
83
+ vector = simple.generate_vector(data, %w(pferd flasche glas))
83
84
  vector.data.should eq([[1,0,0],[0,0,0,0,0,0,1,0]].flatten)
84
85
  end
85
86
  end
@@ -106,7 +107,7 @@ describe Selector::Simple do
106
107
  simple.generate_vectors(data)
107
108
  end
108
109
  context "parallel" do
109
- let(:parallel) { Selector::Simple.new(parallel: true) }
110
+ let(:parallel) { Selector::Simple.new(:function, parallel: true) }
110
111
  before(:each) do
111
112
  simple.stubs(:global_dictionary).returns(dictionary)
112
113
  parallel.stubs(:global_dictionary).returns(dictionary)
@@ -2,11 +2,11 @@ require "spec_helper"
2
2
 
3
3
  describe Selector::WithBinaryEncoding do
4
4
  it_behaves_like 'a selector'
5
- let(:simple) { Selector::WithBinaryEncoding.new }
5
+ let(:simple) { Selector::WithBinaryEncoding.new(:career_level) }
6
6
 
7
7
  let(:dictionary) { %w(auto pferd haus hase garten) }
8
8
  let(:data) { FactoryGirl.build(:data) }
9
- let(:vector) { simple.generate_vector(data).tap{|e| e.career_level! } }
9
+ let(:vector) { simple.generate_vector(data) }
10
10
 
11
11
  before(:each) do
12
12
  simple.stubs(:global_dictionary).returns(dictionary)
@@ -32,7 +32,7 @@ describe Selector::WithBinaryEncoding do
32
32
  end
33
33
  context "custom dictionary" do
34
34
  it "should accept a custom dictionary" do
35
- vector = simple.generate_vector(data, :career_level, %w(pferd flasche glas))
35
+ vector = simple.generate_vector(data, %w(pferd flasche glas))
36
36
  vector.data.should eq([[1,0,0],[0,1,1,1]].flatten)
37
37
  end
38
38
  end
@@ -1,14 +1,10 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  shared_examples_for 'a preprocessor' do
4
- let(:preprocessor) { described_class.new(industry_map: {1423=>3, 523=>54}) }
4
+ let(:preprocessor) { described_class.new(1423=>3, 523=>54) }
5
5
  let(:job) { FactoryGirl.build(:job) }
6
6
  let(:jobs) { [job] }
7
7
 
8
- before(:each) do
9
- job.stubs(:classification_id)
10
- job.stubs(:label)
11
- end
12
8
  it { preprocessor.should respond_to :process }
13
9
  it "should return a PreprocessedData object" do
14
10
  preprocessor.process(job).should be_a(PreprocessedData)
@@ -1,7 +1,7 @@
1
1
  require "spec_helper"
2
2
 
3
3
  shared_examples_for 'a selector' do
4
- let(:selector) { described_class.new }
4
+ let(:selector) { described_class.new(:function) }
5
5
  let(:data) { FactoryGirl.build(:data) }
6
6
 
7
7
  it "should return a FeatureVector object" do
metadata CHANGED
@@ -1,18 +1,20 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svm_helper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Andreas Eger
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-03-13 00:00:00.000000000 Z
12
+ date: 2013-03-15 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: parallel
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
19
  - - ~>
18
20
  - !ruby/object:Gem::Version
@@ -20,6 +22,7 @@ dependencies:
20
22
  type: :runtime
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
27
  - - ~>
25
28
  - !ruby/object:Gem::Version
@@ -47,8 +50,8 @@ files:
47
50
  - lib/svm_helper/interface_helper.rb
48
51
  - lib/svm_helper/preprocessed_data.rb
49
52
  - lib/svm_helper/preprocessors.rb
53
+ - lib/svm_helper/preprocessors/id_mapping.rb
50
54
  - lib/svm_helper/preprocessors/simple.rb
51
- - lib/svm_helper/preprocessors/with_industry_map.rb
52
55
  - lib/svm_helper/selectors.rb
53
56
  - lib/svm_helper/selectors/n_gram.rb
54
57
  - lib/svm_helper/selectors/simple.rb
@@ -62,8 +65,8 @@ files:
62
65
  - spec/factories/jobs/tmp3.html
63
66
  - spec/factories/jobs_with_description.rb
64
67
  - spec/factories/jobs_with_title.rb
68
+ - spec/preprocessors/id_mapping_spec.rb
65
69
  - spec/preprocessors/simple_spec.rb
66
- - spec/preprocessors/with_industry_map_spec.rb
67
70
  - spec/selectors/n_gram_spec.rb
68
71
  - spec/selectors/simple_spec.rb
69
72
  - spec/selectors/with_binary_encoding_spec.rb
@@ -73,26 +76,33 @@ files:
73
76
  - svm_helper.gemspec
74
77
  homepage: https://github.com/sch1zo/svm_helper
75
78
  licenses: []
76
- metadata: {}
77
79
  post_install_message:
78
80
  rdoc_options: []
79
81
  require_paths:
80
82
  - lib
81
83
  required_ruby_version: !ruby/object:Gem::Requirement
84
+ none: false
82
85
  requirements:
83
86
  - - '>='
84
87
  - !ruby/object:Gem::Version
85
88
  version: '0'
89
+ segments:
90
+ - 0
91
+ hash: 2037039748537332986
86
92
  required_rubygems_version: !ruby/object:Gem::Requirement
93
+ none: false
87
94
  requirements:
88
95
  - - '>='
89
96
  - !ruby/object:Gem::Version
90
97
  version: '0'
98
+ segments:
99
+ - 0
100
+ hash: 2037039748537332986
91
101
  requirements: []
92
102
  rubyforge_project:
93
- rubygems_version: 2.0.0.rc.2
103
+ rubygems_version: 1.8.25
94
104
  signing_key:
95
- specification_version: 4
105
+ specification_version: 3
96
106
  summary: Preprocessor and Selector classes to generate FeatureVectors from Job data
97
107
  test_files:
98
108
  - spec/factories.rb
@@ -101,8 +111,8 @@ test_files:
101
111
  - spec/factories/jobs/tmp3.html
102
112
  - spec/factories/jobs_with_description.rb
103
113
  - spec/factories/jobs_with_title.rb
114
+ - spec/preprocessors/id_mapping_spec.rb
104
115
  - spec/preprocessors/simple_spec.rb
105
- - spec/preprocessors/with_industry_map_spec.rb
106
116
  - spec/selectors/n_gram_spec.rb
107
117
  - spec/selectors/simple_spec.rb
108
118
  - spec/selectors/with_binary_encoding_spec.rb
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: bbfc13983f715f2f0ab66d2a9dbfed543896329e
4
- data.tar.gz: 2e6f5c32898cc01a2468ebaa2e82470e01012586
5
- SHA512:
6
- metadata.gz: 8c616e62ff4717808e0b9f29e3d6773c03096934194a1fb706e3eb0ded7527c455ea00039e0a4d39fc08e13f735d4f49cd984d26128e624fbe0b956851893c21
7
- data.tar.gz: f28756b0f3f9539f69bcda6ce3f79bdf3ef226ee813f0e5ed7b6f38595f1760e05bbd1abe7ec8ccc5c3a8db8b666871a0d33a755c19c4842a85a600e4749498d
@@ -1,40 +0,0 @@
1
- require_relative 'simple'
2
- module Preprocessor
3
- #
4
- # Preprocessor Base Class
5
- #
6
- # @author Andreas Eger
7
- #
8
- class WithIndustryMap < Simple
9
- attr_reader :industry_map
10
-
11
- #
12
- # @param args [Hash] options hash
13
- # @option args [Hash] :industry_map mapping for the tree like industry ids to continuous ones
14
- def initialize args={}
15
- @industry_map = args.fetch(:industry_map){ Hash[Pjpp::Industry.select(:id).all.map(&:id).sort.map.with_index{|e,i| [e,i]}] }
16
- end
17
-
18
- def map_industry_id(id)
19
- @industry_map[id]
20
- end
21
- def label
22
- "with_industry_map"
23
- end
24
-
25
- private
26
- def process_job job, classification
27
- PreprocessedData.new(
28
- data: [ clean_title(job.title), clean_description(job.description) ],
29
- ids: {
30
- industry: map_industry_id(job.classification_id(:industry)),
31
- function: job.classification_id(:function),
32
- career_level: job.classification_id(:career_level) },
33
- labels: {
34
- industry: job.label(:industry),
35
- function: job.label(:function),
36
- career_level: job.label(:career_level) }
37
- ).tap{|e| e.send("#{classification}!")}
38
- end
39
- end
40
- end
@@ -1,16 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Preprocessor::WithIndustryMap do
4
- it_behaves_like 'a preprocessor'
5
- let(:preprocessor) { Preprocessor::WithIndustryMap.new(industry_map: {1423=>3, 523=>54}) }
6
- let(:job) { FactoryGirl.build(:job) }
7
- let(:jobs) { [job] }
8
- before(:each) do
9
- job.stubs(:classification_id)
10
- job.stubs(:label)
11
- end
12
- it "should make use of a industry_map" do
13
- preprocessor.expects(:map_industry_id)
14
- preprocessor.process(jobs)
15
- end
16
- end