svm_helper 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Guardfile CHANGED
@@ -1,5 +1,5 @@
1
- # guard 'rspec', cli: "--color --format p", all_after_pass: false do
2
- guard 'rspec', cli: "--color --format p", all_after_pass: false, rvm:['2.0.0@svm_helper', 'jruby@svm_helper'] do
1
+ guard 'rspec', cli: "--color --format p", all_after_pass: false do
2
+ # guard 'rspec', cli: "--color --format p", all_after_pass: false, rvm:['2.0.0@svm_helper', 'jruby@svm_helper'] do
3
3
  watch(%r{^spec/.+_spec\.rb$})
4
4
  watch(%r{^lib/svm_helper/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
5
5
  watch('spec/spec_helper.rb') { 'spec' }
@@ -5,13 +5,10 @@ require_relative 'interface_helper'
5
5
  # @author Andreas Eger
6
6
  class FeatureVector < InterfaceHelper
7
7
  attribute :word_data
8
- attribute :classification_arrays
9
- attribute :labels
8
+ attribute :classification
9
+ attribute :label
10
10
 
11
- def label
12
- labels[classification]
13
- end
14
11
  def data
15
- word_data + classification_arrays[classification]
12
+ word_data + classification
16
13
  end
17
14
  end
@@ -30,7 +30,6 @@ class InterfaceHelper
30
30
  params.each do |key, value|
31
31
  send("#{key}=", value)
32
32
  end
33
- @_attributes[:classification] ||= :function
34
33
  end
35
34
 
36
35
  #
@@ -41,17 +40,4 @@ class InterfaceHelper
41
40
  def == anOther
42
41
  @_attributes.keys.map{ |sym| self.send(sym) == anOther.send(sym)}.reduce(true){|a,e| a && e }
43
42
  end
44
-
45
- def industry!
46
- @_attributes[:classification] = :industry
47
- end
48
- def function!
49
- @_attributes[:classification] = :function
50
- end
51
- def career_level!
52
- @_attributes[:classification] = :career_level
53
- end
54
- def classification
55
- @_attributes[:classification]
56
- end
57
43
  end
@@ -5,13 +5,6 @@ require_relative 'interface_helper'
5
5
  # @author Andreas Eger
6
6
  class PreprocessedData < InterfaceHelper
7
7
  attribute :data
8
- attribute :ids
9
- attribute :labels
10
-
11
- def id
12
- ids[classification]
13
- end
14
- def label
15
- labels[classification]
16
- end
8
+ attribute :id
9
+ attribute :label
17
10
  end
@@ -0,0 +1,35 @@
1
+ require_relative 'simple'
2
+ module Preprocessor
3
+ #
4
+ # Preprocessor Base Class
5
+ #
6
+ # @author Andreas Eger
7
+ #
8
+ class IDMapping < Simple
9
+ attr_reader :id_map
10
+
11
+ #
12
+ # @param args [Hash] options hash
13
+ # @option args [Hash] :industry_map mapping for the tree like industry ids to continuous ones
14
+ def initialize id_map, args={}
15
+ super(args)
16
+ @id_map = id_map
17
+ end
18
+
19
+ def map_id(id)
20
+ @id_map[id]
21
+ end
22
+ def label
23
+ "with_id_mapping"
24
+ end
25
+
26
+ private
27
+ def process_job job
28
+ PreprocessedData.new(
29
+ data: [clean_title(job[:title]), clean_description(job[:description])],
30
+ id: map_id(job[:id]),
31
+ label: job[:label]
32
+ )
33
+ end
34
+ end
35
+ end
@@ -35,21 +35,26 @@ module Preprocessor
35
35
  #
36
36
  # cleans provided jobs
37
37
  # @overload process(jobs, classification)
38
- # @param jobs [Job] single Job
38
+ # @param jobs [Hash] single Job
39
+ # @option title
40
+ # @option description
41
+ # @option id
42
+ # @option label
39
43
  # @param classification [Symbol] in `:industry`, `:function`, `:career_level`
40
44
  # @overload process(jobs, classification)
41
- # @param jobs [Array<Job>] list of Jobs
45
+ # @param jobs [Array<Hash>] list of Jobs
42
46
  # @param classification [Symbol] in `:industry`, `:function`, `:career_level`
43
47
  #
44
48
  # @return [Array<PreprocessedData>] list of processed job data - or singe job data
45
- def process jobs, classification=:function
46
- if jobs.respond_to? :map
47
- process_jobs jobs, classification
49
+ def process jobs
50
+ if jobs.is_a? Array
51
+ process_jobs jobs
48
52
  else
49
- process_job jobs, classification
53
+ process_job jobs
50
54
  end
51
55
  end
52
56
 
57
+
53
58
  #
54
59
  # converts string into a cleaner version
55
60
  # @param title [String] job title
@@ -84,28 +89,22 @@ module Preprocessor
84
89
  end
85
90
 
86
91
  private
87
- def process_jobs jobs, classification
92
+ def process_jobs jobs
88
93
  if @parallel && RUBY_PLATFORM == 'java'
89
- Parallel.map(jobs, in_threads: THREAD_COUNT ) {|job| process_job job, classification }
94
+ Parallel.map(jobs, in_threads: THREAD_COUNT ) {|job| process_job job }
90
95
  elsif @parallel
91
- Parallel.map(jobs, in_processes: THREAD_COUNT ) {|job| process_job job, classification }
96
+ Parallel.map(jobs, in_processes: THREAD_COUNT ) {|job| process_job job }
92
97
  else
93
- jobs.map {|job| process_job job, classification }
98
+ jobs.map {|job| process_job job }
94
99
  end
95
100
  end
96
101
 
97
- def process_job job, classification
102
+ def process_job job
98
103
  PreprocessedData.new(
99
- data: [ clean_title(job.title), clean_description(job.description) ],
100
- ids: {
101
- industry: job.classification_id(:industry),
102
- function: job.classification_id(:function),
103
- career_level: job.classification_id(:career_level) },
104
- labels: {
105
- industry: job.label(:industry),
106
- function: job.label(:function),
107
- career_level: job.label(:career_level) }
108
- ).tap{|e| e.send("#{classification}!")}
104
+ data: [clean_title(job[:title]), clean_description(job[:description])],
105
+ id: job[:id],
106
+ label: job[:label]
107
+ )
109
108
  end
110
109
  end
111
110
  end
@@ -1,2 +1,2 @@
1
1
  require_relative 'preprocessors/simple'
2
- require_relative 'preprocessors/with_industry_map'
2
+ require_relative 'preprocessors/id_mapping'
@@ -8,7 +8,7 @@ module Selector
8
8
  class NGram < Selector::Simple
9
9
  attr_reader :gram_size
10
10
 
11
- def initialize args={}
11
+ def initialize classification, args={}
12
12
  super
13
13
  @gram_size = args.fetch(:gram_size) { 2 }
14
14
  end
@@ -26,7 +26,8 @@ module Selector
26
26
 
27
27
  attr_accessor :global_dictionary
28
28
 
29
- def initialize args={}
29
+ def initialize classification, args={}
30
+ @classification = classification
30
31
  @global_dictionary = args.fetch(:global_dictionary) {[]}
31
32
  @language = args.fetch(:language){'en'}
32
33
  @parallel = args.fetch(:parallel){false}
@@ -43,13 +44,13 @@ module Selector
43
44
  # @param dictionary_size [Integer] Size of a dictionary to create if non exists
44
45
  #
45
46
  # @return [Array<FeatureVector>] list of feature vectors and labels
46
- def generate_vectors data_set, classification=:function, dictionary_size=DEFAULT_DICTIONARY_SIZE
47
+ def generate_vectors data_set, dictionary_size=DEFAULT_DICTIONARY_SIZE
47
48
  words_per_data = extract_words data_set
48
49
  generate_global_dictionary words_per_data, dictionary_size
49
50
 
50
51
  make_vectors(words_per_data) do |words,index|
51
52
  word_set = words.uniq
52
- make_vector word_set, data_set[index], classification
53
+ make_vector word_set, data_set[index]
53
54
  end
54
55
  end
55
56
 
@@ -60,9 +61,9 @@ module Selector
60
61
  # @param dictionary [Array] dictionary to use for this selection
61
62
  #
62
63
  # @return [FeatureVector]
63
- def generate_vector data, classification=:function, dictionary=global_dictionary
64
+ def generate_vector data, dictionary=global_dictionary
64
65
  word_set = Set.new extract_words_from_data(data)
65
- make_vector word_set, data, classification, dictionary
66
+ make_vector word_set, data, dictionary
66
67
  end
67
68
 
68
69
  #
@@ -109,8 +110,9 @@ module Selector
109
110
  (data.data.flat_map(&:split) - stopwords).delete_if { |e| e.size <= 3 }
110
111
  end
111
112
 
112
- def reset
113
+ def reset classification
113
114
  @global_dictionary = []
115
+ @classification = classification
114
116
  end
115
117
 
116
118
  private
@@ -120,24 +122,17 @@ module Selector
120
122
  # also adds the label
121
123
  # @param words [Array<String>] list of words
122
124
  # @param data [PreprocessedData]
123
- # @param classification [Symbol] in `:industry`, `:function`, `:career_level`
124
125
  # @param dictionary
125
126
  #
126
127
  # @return [FeatureVector]
127
- def make_vector words, data, classification, dictionary=global_dictionary
128
+ def make_vector words, data, dictionary=global_dictionary
128
129
  FeatureVector.new(
129
130
  word_data: dictionary.map{|dic_word|
130
- words.include?(dic_word) ? 1 : 0
131
- },
132
- classification_arrays: {
133
- function: classification_array(data.ids, :function),
134
- industry: classification_array(data.ids, :industry),
135
- career_level: classification_array(data.ids, :career_level) },
136
- labels: {
137
- function: data.labels[:function] ? 1 : 0,
138
- industry: data.labels[:industry] ? 1 : 0,
139
- career_level: data.labels[:career_level] ? 1 : 0 }
140
- ).tap{|e| e.send("#{classification}!")}
131
+ words.include?(dic_word) ? 1 : 0
132
+ },
133
+ classification: classification_array(data.id),
134
+ label: data.label ? 1 : 0
135
+ )
141
136
  end
142
137
 
143
138
  def make_vectors data, &block
@@ -155,9 +150,8 @@ module Selector
155
150
  # @param ids [Hash] hash with classification ids
156
151
  #
157
152
  # @return [Array<Integer>] list of size=count(classifcation_ids) with only one not zero item
158
- def classification_array(ids, classification)
159
- id = ids[classification]
160
- Array.new(CLASSIFICATIONS_SIZE[classification]){|n| n==(id-1) ? 1 : 0}
153
+ def classification_array(id)
154
+ Array.new(CLASSIFICATIONS_SIZE[@classification]){|n| n==(id-1) ? 1 : 0}
161
155
  end
162
156
  end
163
157
  end
@@ -12,7 +12,7 @@ module Selector
12
12
  industry: 16, # max id 65535, currently 14370
13
13
  career_level: 4 } # max id 15, currently 8
14
14
 
15
- def initialize args={}
15
+ def initialize *args
16
16
  super
17
17
  end
18
18
 
@@ -26,9 +26,8 @@ module Selector
26
26
  # @param ids [Hash] hash with classification ids
27
27
  #
28
28
  # @return [Array<Integer>] binary encoded classification id
29
- def classification_array(ids, classification)
30
- id = ids[classification]
31
- number_to_binary_array(id, CLASSIFICATIONS_SIZE[classification])
29
+ def classification_array(id)
30
+ number_to_binary_array(id, CLASSIFICATIONS_SIZE[@classification])
32
31
  end
33
32
 
34
33
  def number_to_binary_array(number, size=8)
@@ -1,3 +1,3 @@
1
1
  module SvmHelper
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
data/spec/factories.rb CHANGED
@@ -7,24 +7,20 @@ FactoryGirl.define do
7
7
  wrong_function_id 4
8
8
  wrong_career_level nil
9
9
  end
10
- factory :job, class: OpenStruct do
10
+ factory :job, class: Hash do
11
11
  title "Meh"
12
12
  description "Foo Bar"
13
- summary "Really lot of work to do"
14
- qc_job_check
15
- end
13
+ id 4
14
+ label true
16
15
 
17
- factory :job_without_job_check, class: OpenStruct do
18
- title "Meh"
19
- description "Foo Bar"
20
- summary "Really lot of work to do"
21
- original_industry_id 1423
16
+ initialize_with { attributes }
22
17
  end
23
18
 
19
+
24
20
  factory :data, class: PreprocessedData do
25
21
  data ["haus fooo garten baaz pferd fooo"]
26
- ids {{function: 3, industry: 43, career_level: 7}}
27
- labels {{function: true, industry: false, career_level: true}}
22
+ id 7
23
+ label true
28
24
  end
29
25
  factory :data_w_short_words, parent: :data do
30
26
  data ["auto foo pferd bz gooo fooo 2"]
@@ -0,0 +1,12 @@
1
+ require 'spec_helper'
2
+
3
+ describe Preprocessor::IDMapping do
4
+ it_behaves_like 'a preprocessor'
5
+ let(:preprocessor) { Preprocessor::IDMapping.new(1423=>3, 523=>54) }
6
+ let(:job) { FactoryGirl.build(:job) }
7
+ let(:jobs) { [job] }
8
+ it "should make use of a industry_map" do
9
+ preprocessor.expects(:map_id)
10
+ preprocessor.process(jobs)
11
+ end
12
+ end
@@ -11,31 +11,18 @@ describe Preprocessor::Simple do
11
11
  context do
12
12
  before(:each) do
13
13
  @jobs = FactoryGirl.build_list :job, 3
14
- @jobs.each{|e| e.stubs(:classification_id)}
15
- @jobs.each{|e| e.stubs(:label)}
16
14
  end
17
15
  it "should work with jobs with quality check" do
18
16
  -> {simple.process(@jobs) }.should_not raise_error
19
17
  end
20
- # it "should set labels to true if quality check exists and no wrong_ label set" do
21
- # simple.process(@jobs).each{|e| e.career_level!; e.label.should be_true}
22
- # end
23
- it "should set labels to false if quality check exists and wrong_ label is set" do
24
- simple.process(@jobs).each{|e| e.function!; e.label.should be_false}
18
+ it "should set labels to true if quality check exists and label was true" do
19
+ @jobs.map!{|e| e[:label] = true;e }
20
+ simple.process(@jobs).each{|e| e.label.should be_true}
21
+ end
22
+ it "should set labels to false if quality check exists and label false" do
23
+ @jobs.map!{|e| e[:label] = false;e }
24
+ simple.process(@jobs).each{|e| e.label.should be_false}
25
25
  end
26
- end
27
-
28
- it "should work with jobs without quality check" do
29
- jobs = FactoryGirl.build_list :job_without_job_check, 3
30
- jobs.each{|e| e.stubs(:classification_id)}
31
- jobs.each{|e| e.stubs(:label)}
32
- -> {simple.process(jobs) }.should_not raise_error
33
- end
34
- it "should set labels to false if no quality check" do
35
- jobs = FactoryGirl.build_list :job_without_job_check, 3
36
- jobs.each{|e| e.stubs(:classification_id)}
37
- jobs.each{|e| e.stubs(:label)}
38
- simple.process(jobs).each{|e| e.career_level!; e.label.should be_false}
39
26
  end
40
27
 
41
28
  context "processing" do
@@ -43,8 +30,6 @@ describe Preprocessor::Simple do
43
30
  before(:each) do
44
31
  simple.stubs(:clean_title)
45
32
  simple.stubs(:clean_description)
46
- jobs.each{|e| e.stubs(:classification_id)}
47
- jobs.each{|e| e.stubs(:label)}
48
33
  end
49
34
  it "should call clean_title on each job" do
50
35
  simple.expects(:clean_title).times(3)
@@ -59,7 +44,7 @@ describe Preprocessor::Simple do
59
44
  context "#clean_title" do
60
45
  it "should be downcased" do
61
46
  job = FactoryGirl.build(:job_title_downcasing)
62
- simple.clean_title(job.title).should eq(job.clean_title)
47
+ simple.clean_title(job[:title]).should eq(job[:clean_title])
63
48
  end
64
49
  [ FactoryGirl.build(:job_title_w_gender),
65
50
  FactoryGirl.build(:job_title_w_gender_brackets),
@@ -77,8 +62,8 @@ describe Preprocessor::Simple do
77
62
  FactoryGirl.build(:job_title_var_0),
78
63
  FactoryGirl.build(:job_title_w_special),
79
64
  FactoryGirl.build(:job_title_w_percent)].each do |job|
80
- it "should cleanup '#{job.title}'" do
81
- simple.clean_title(job.title).should eq(job.clean_title)
65
+ it "should cleanup '#{job[:title]}'" do
66
+ simple.clean_title(job[:title]).should eq(job[:clean_title])
82
67
  end
83
68
  end
84
69
  end
@@ -91,27 +76,27 @@ describe Preprocessor::Simple do
91
76
  FactoryGirl.build(:job_description_w_gender) ]
92
77
  }
93
78
  it "should remove html/xml tags" do
94
- desc = simple.clean_description(jobs[0].description)
79
+ desc = simple.clean_description(jobs[0][:description])
95
80
  desc.should_not match(/<(.*?)>/)
96
81
  end
97
82
  it "should remove new lines" do
98
- desc = simple.clean_description(jobs[0].description)
83
+ desc = simple.clean_description(jobs[0][:description])
99
84
  desc.should_not match(/\r\n|\n|\r/)
100
85
  end
101
86
  it "should remove all special characters" do
102
- desc = simple.clean_description(jobs[2].description)
87
+ desc = simple.clean_description(jobs[2][:description])
103
88
  desc.should_not match(/[^a-z öäü]/i)
104
89
  end
105
90
  it "should remove gender tokens" do
106
- desc = simple.clean_description(jobs[3].description)
91
+ desc = simple.clean_description(jobs[3][:description])
107
92
  desc.should_not match(%r{(\(*(m|w)(\/|\|)(w|m)\)*)|(/-*in)|\(in\)})
108
93
  end
109
94
  it "should remove job code token" do
110
- desc = simple.clean_description(jobs[4].description)
95
+ desc = simple.clean_description(jobs[4][:description])
111
96
  desc.should_not match(/\[.*\]|\(.*\)|\{.*\}|\d+\w+/)
112
97
  end
113
98
  it "should be downcased" do
114
- desc = simple.clean_description(jobs[2].description)
99
+ desc = simple.clean_description(jobs[2][:description])
115
100
  desc.should_not match(/[^a-z öäü]/)
116
101
  end
117
102
  end
@@ -125,13 +110,9 @@ describe Preprocessor::Simple do
125
110
  FactoryGirl.build(:job_description_w_code_token),
126
111
  FactoryGirl.build(:job_description_w_gender) ]
127
112
  }
128
- before(:each) do
129
- jobs.each{|e| e.stubs(:classification_id)}
130
- jobs.each{|e| e.stubs(:label)}
131
- end
132
113
  it "should be the same parallelized" do
133
- single = simple.process(jobs, :function)
134
- p_data = parallel.process(jobs, :function)
114
+ single = simple.process(jobs)
115
+ p_data = parallel.process(jobs)
135
116
  single.each.with_index { |e,i| e.data.should == p_data[i].data }
136
117
  end
137
118
  end
@@ -3,7 +3,7 @@ require "spec_helper"
3
3
  describe Selector::NGram do
4
4
  it_behaves_like 'a selector'
5
5
 
6
- let(:ngram) { Selector::NGram.new(gram_size: 3) }
6
+ let(:ngram) { Selector::NGram.new(:function, gram_size: 3) }
7
7
  context "#extract_words_from_data" do
8
8
  it "should generate a list of words from the data" do
9
9
  words = ngram.extract_words_from_data(FactoryGirl.build(:data))
@@ -2,8 +2,8 @@ require "spec_helper"
2
2
 
3
3
  describe Selector::Simple do
4
4
  it_behaves_like 'a selector'
5
-
6
- let(:simple) { Selector::Simple.new }
5
+
6
+ let(:simple) { Selector::Simple.new(:function) }
7
7
  it "should have select_feature_vector implemented" do
8
8
  expect { simple.generate_vectors([]) }.to_not raise_error
9
9
  end
@@ -53,7 +53,8 @@ describe Selector::Simple do
53
53
  context "#generate_vector" do
54
54
  let(:dictionary) { %w(auto pferd haus hase garten) }
55
55
  let(:data) { FactoryGirl.build(:data) }
56
- let(:vector) { simple.generate_vector(data).tap{|e| e.career_level! } }
56
+ let(:simple) { Selector::Simple.new(:career_level) }
57
+ let(:vector) { simple.generate_vector(data) }
57
58
 
58
59
  before(:each) do
59
60
  simple.stubs(:global_dictionary).returns(dictionary)
@@ -79,7 +80,7 @@ describe Selector::Simple do
79
80
  end
80
81
  context "custom dictionary" do
81
82
  it "should accept a custom dictionary" do
82
- vector = simple.generate_vector(data, :career_level, %w(pferd flasche glas))
83
+ vector = simple.generate_vector(data, %w(pferd flasche glas))
83
84
  vector.data.should eq([[1,0,0],[0,0,0,0,0,0,1,0]].flatten)
84
85
  end
85
86
  end
@@ -106,7 +107,7 @@ describe Selector::Simple do
106
107
  simple.generate_vectors(data)
107
108
  end
108
109
  context "parallel" do
109
- let(:parallel) { Selector::Simple.new(parallel: true) }
110
+ let(:parallel) { Selector::Simple.new(:function, parallel: true) }
110
111
  before(:each) do
111
112
  simple.stubs(:global_dictionary).returns(dictionary)
112
113
  parallel.stubs(:global_dictionary).returns(dictionary)
@@ -2,11 +2,11 @@ require "spec_helper"
2
2
 
3
3
  describe Selector::WithBinaryEncoding do
4
4
  it_behaves_like 'a selector'
5
- let(:simple) { Selector::WithBinaryEncoding.new }
5
+ let(:simple) { Selector::WithBinaryEncoding.new(:career_level) }
6
6
 
7
7
  let(:dictionary) { %w(auto pferd haus hase garten) }
8
8
  let(:data) { FactoryGirl.build(:data) }
9
- let(:vector) { simple.generate_vector(data).tap{|e| e.career_level! } }
9
+ let(:vector) { simple.generate_vector(data) }
10
10
 
11
11
  before(:each) do
12
12
  simple.stubs(:global_dictionary).returns(dictionary)
@@ -32,7 +32,7 @@ describe Selector::WithBinaryEncoding do
32
32
  end
33
33
  context "custom dictionary" do
34
34
  it "should accept a custom dictionary" do
35
- vector = simple.generate_vector(data, :career_level, %w(pferd flasche glas))
35
+ vector = simple.generate_vector(data, %w(pferd flasche glas))
36
36
  vector.data.should eq([[1,0,0],[0,1,1,1]].flatten)
37
37
  end
38
38
  end
@@ -1,14 +1,10 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  shared_examples_for 'a preprocessor' do
4
- let(:preprocessor) { described_class.new(industry_map: {1423=>3, 523=>54}) }
4
+ let(:preprocessor) { described_class.new(1423=>3, 523=>54) }
5
5
  let(:job) { FactoryGirl.build(:job) }
6
6
  let(:jobs) { [job] }
7
7
 
8
- before(:each) do
9
- job.stubs(:classification_id)
10
- job.stubs(:label)
11
- end
12
8
  it { preprocessor.should respond_to :process }
13
9
  it "should return a PreprocessedData object" do
14
10
  preprocessor.process(job).should be_a(PreprocessedData)
@@ -1,7 +1,7 @@
1
1
  require "spec_helper"
2
2
 
3
3
  shared_examples_for 'a selector' do
4
- let(:selector) { described_class.new }
4
+ let(:selector) { described_class.new(:function) }
5
5
  let(:data) { FactoryGirl.build(:data) }
6
6
 
7
7
  it "should return a FeatureVector object" do
metadata CHANGED
@@ -1,18 +1,20 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svm_helper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Andreas Eger
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-03-13 00:00:00.000000000 Z
12
+ date: 2013-03-15 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: parallel
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
19
  - - ~>
18
20
  - !ruby/object:Gem::Version
@@ -20,6 +22,7 @@ dependencies:
20
22
  type: :runtime
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
27
  - - ~>
25
28
  - !ruby/object:Gem::Version
@@ -47,8 +50,8 @@ files:
47
50
  - lib/svm_helper/interface_helper.rb
48
51
  - lib/svm_helper/preprocessed_data.rb
49
52
  - lib/svm_helper/preprocessors.rb
53
+ - lib/svm_helper/preprocessors/id_mapping.rb
50
54
  - lib/svm_helper/preprocessors/simple.rb
51
- - lib/svm_helper/preprocessors/with_industry_map.rb
52
55
  - lib/svm_helper/selectors.rb
53
56
  - lib/svm_helper/selectors/n_gram.rb
54
57
  - lib/svm_helper/selectors/simple.rb
@@ -62,8 +65,8 @@ files:
62
65
  - spec/factories/jobs/tmp3.html
63
66
  - spec/factories/jobs_with_description.rb
64
67
  - spec/factories/jobs_with_title.rb
68
+ - spec/preprocessors/id_mapping_spec.rb
65
69
  - spec/preprocessors/simple_spec.rb
66
- - spec/preprocessors/with_industry_map_spec.rb
67
70
  - spec/selectors/n_gram_spec.rb
68
71
  - spec/selectors/simple_spec.rb
69
72
  - spec/selectors/with_binary_encoding_spec.rb
@@ -73,26 +76,33 @@ files:
73
76
  - svm_helper.gemspec
74
77
  homepage: https://github.com/sch1zo/svm_helper
75
78
  licenses: []
76
- metadata: {}
77
79
  post_install_message:
78
80
  rdoc_options: []
79
81
  require_paths:
80
82
  - lib
81
83
  required_ruby_version: !ruby/object:Gem::Requirement
84
+ none: false
82
85
  requirements:
83
86
  - - '>='
84
87
  - !ruby/object:Gem::Version
85
88
  version: '0'
89
+ segments:
90
+ - 0
91
+ hash: 2037039748537332986
86
92
  required_rubygems_version: !ruby/object:Gem::Requirement
93
+ none: false
87
94
  requirements:
88
95
  - - '>='
89
96
  - !ruby/object:Gem::Version
90
97
  version: '0'
98
+ segments:
99
+ - 0
100
+ hash: 2037039748537332986
91
101
  requirements: []
92
102
  rubyforge_project:
93
- rubygems_version: 2.0.0.rc.2
103
+ rubygems_version: 1.8.25
94
104
  signing_key:
95
- specification_version: 4
105
+ specification_version: 3
96
106
  summary: Preprocessor and Selector classes to generate FeatureVectors from Job data
97
107
  test_files:
98
108
  - spec/factories.rb
@@ -101,8 +111,8 @@ test_files:
101
111
  - spec/factories/jobs/tmp3.html
102
112
  - spec/factories/jobs_with_description.rb
103
113
  - spec/factories/jobs_with_title.rb
114
+ - spec/preprocessors/id_mapping_spec.rb
104
115
  - spec/preprocessors/simple_spec.rb
105
- - spec/preprocessors/with_industry_map_spec.rb
106
116
  - spec/selectors/n_gram_spec.rb
107
117
  - spec/selectors/simple_spec.rb
108
118
  - spec/selectors/with_binary_encoding_spec.rb
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: bbfc13983f715f2f0ab66d2a9dbfed543896329e
4
- data.tar.gz: 2e6f5c32898cc01a2468ebaa2e82470e01012586
5
- SHA512:
6
- metadata.gz: 8c616e62ff4717808e0b9f29e3d6773c03096934194a1fb706e3eb0ded7527c455ea00039e0a4d39fc08e13f735d4f49cd984d26128e624fbe0b956851893c21
7
- data.tar.gz: f28756b0f3f9539f69bcda6ce3f79bdf3ef226ee813f0e5ed7b6f38595f1760e05bbd1abe7ec8ccc5c3a8db8b666871a0d33a755c19c4842a85a600e4749498d
@@ -1,40 +0,0 @@
1
- require_relative 'simple'
2
- module Preprocessor
3
- #
4
- # Preprocessor Base Class
5
- #
6
- # @author Andreas Eger
7
- #
8
- class WithIndustryMap < Simple
9
- attr_reader :industry_map
10
-
11
- #
12
- # @param args [Hash] options hash
13
- # @option args [Hash] :industry_map mapping for the tree like industry ids to continuous ones
14
- def initialize args={}
15
- @industry_map = args.fetch(:industry_map){ Hash[Pjpp::Industry.select(:id).all.map(&:id).sort.map.with_index{|e,i| [e,i]}] }
16
- end
17
-
18
- def map_industry_id(id)
19
- @industry_map[id]
20
- end
21
- def label
22
- "with_industry_map"
23
- end
24
-
25
- private
26
- def process_job job, classification
27
- PreprocessedData.new(
28
- data: [ clean_title(job.title), clean_description(job.description) ],
29
- ids: {
30
- industry: map_industry_id(job.classification_id(:industry)),
31
- function: job.classification_id(:function),
32
- career_level: job.classification_id(:career_level) },
33
- labels: {
34
- industry: job.label(:industry),
35
- function: job.label(:function),
36
- career_level: job.label(:career_level) }
37
- ).tap{|e| e.send("#{classification}!")}
38
- end
39
- end
40
- end
@@ -1,16 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Preprocessor::WithIndustryMap do
4
- it_behaves_like 'a preprocessor'
5
- let(:preprocessor) { Preprocessor::WithIndustryMap.new(industry_map: {1423=>3, 523=>54}) }
6
- let(:job) { FactoryGirl.build(:job) }
7
- let(:jobs) { [job] }
8
- before(:each) do
9
- job.stubs(:classification_id)
10
- job.stubs(:label)
11
- end
12
- it "should make use of a industry_map" do
13
- preprocessor.expects(:map_industry_id)
14
- preprocessor.process(jobs)
15
- end
16
- end