feature_set 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use 1.9.2@feature_set --create
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in feature_set.gemspec
4
+ gemspec
data/README.markdown ADDED
@@ -0,0 +1,8 @@
1
+ ## FeatureSet
2
+
3
+
4
+ Helpful tasks:
5
+
6
+ - rake build
7
+ - rake install
8
+ - rake release
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "feature_set/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "feature_set"
7
+ s.version = FeatureSet::VERSION
8
+ s.authors = ["Andrew Cantino"]
9
+ s.email = ["andrew@iterationlabs.com"]
10
+ s.homepage = "https://github.com/iterationlabs/feature_set"
11
+ s.summary = %q{Generate feature vectors from textual data}
12
+ s.description = %q{FeatureSet is a Ruby library for generating feature vectors from textual data. It can output in ARFF format for experimentation with Weka.}
13
+
14
+ s.rubyforge_project = "feature_set"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ s.add_development_dependency "rspec"
23
+ s.add_runtime_dependency "wwood-rarff"
24
+ s.add_runtime_dependency "activesupport"
25
+ s.add_runtime_dependency "i18n"
26
+ end
@@ -0,0 +1,70 @@
1
+ require 'active_support'
2
+ require 'active_support/inflector'
3
+
4
+ require "feature_set/feature_builder/word_vector"
5
+ require "feature_set/feature_builder/cuss"
6
+
7
+ require "feature_set/datum"
8
+
9
+ module FeatureSet
10
+ class Builder
11
+ BUILTIN_FEATURE_BUILDERS = %w[FeatureSet::FeatureBuilder::Cuss
12
+ FeatureSet::FeatureBuilder::WordVector].map(&:constantize)
13
+
14
+ attr_accessor :options, :feature_builders, :data, :features
15
+
16
+ def initialize(options = {})
17
+ @options = options
18
+ @feature_builders = []
19
+ @features = []
20
+ @data = []
21
+ end
22
+
23
+ def add_data(data)
24
+ clear_features
25
+ (@data << data).flatten!
26
+ end
27
+
28
+ def clear_data
29
+ @data = []
30
+ clear_features
31
+ end
32
+
33
+ def clear_features
34
+ @features = []
35
+ end
36
+
37
+ def generate_features(opts = {})
38
+ wrapped_data_set = self.class.wrap_dataset(data)
39
+
40
+ feature_builders.each {|fb| fb.before_generate_features(wrapped_data_set) }
41
+
42
+ @features = wrapped_data_set.map do |row|
43
+ output_row = {}
44
+
45
+ row.each do |key, datum|
46
+ (output_row[:class] = datum) and next if key == :class
47
+ output_row[key] = datum.value if opts[:include_original]
48
+
49
+ feature_builders.each do |builder|
50
+ builder.generate_features(datum, key, row).each do |feature, value|
51
+ output_row["#{key}_#{feature}".to_sym] = value
52
+ end
53
+ end
54
+ end
55
+
56
+ output_row
57
+ end
58
+ end
59
+
60
+ def add_feature_builders(*builders)
61
+ builders = BUILTIN_FEATURE_BUILDERS.map(&:new) if [:all, "all"].include?(builders.first)
62
+ (@feature_builders << builders).flatten!
63
+ end
64
+ alias_method :add_feature_builder, :add_feature_builders
65
+
66
+ def self.wrap_dataset(dataset)
67
+ dataset.map { |row| row.inject({}) { |m, (k, v)| m[k] = (k == :class ? v : Datum.new(v)) ; m } }
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,351 @@
1
+ anus
2
+ arse
3
+ arsehole
4
+ ass
5
+ ass-hat
6
+ asshat
7
+ ass-jabber
8
+ assjabber
9
+ ass-pirate
10
+ asspirate
11
+ assbag
12
+ assbandit
13
+ assbanger
14
+ assbite
15
+ assclown
16
+ asscock
17
+ asscracker
18
+ asses
19
+ assface
20
+ assfuck
21
+ assfucker
22
+ assgoblin
23
+ asshead
24
+ asshole
25
+ asshopper
26
+ assjacker
27
+ asslick
28
+ asslicker
29
+ assmonkey
30
+ assmunch
31
+ assmuncher
32
+ assnigger
33
+ assshit
34
+ assshole
35
+ asssucker
36
+ asswad
37
+ asswipe
38
+ bampot
39
+ bastard
40
+ beaner
41
+ bitch
42
+ bitchass
43
+ bitches
44
+ bitchtits
45
+ bitchy
46
+ blow job
47
+ blowjob
48
+ bollocks
49
+ bollox
50
+ boner
51
+ brotherfucker
52
+ bullshit
53
+ bumblefuck
54
+ butt plug
55
+ buttplug
56
+ butt-pirate
57
+ buttpirate
58
+ buttfucka
59
+ buttfucker
60
+ camel toe
61
+ cameltoe
62
+ carpetmuncher
63
+ chinc
64
+ chink
65
+ choad
66
+ chode
67
+ clit
68
+ clitface
69
+ clitfuck
70
+ clusterfuck
71
+ cock
72
+ cockass
73
+ cockbite
74
+ cockburger
75
+ cockface
76
+ cockfucker
77
+ cockhead
78
+ cockjockey
79
+ cockknoker
80
+ cockmaster
81
+ cockmongler
82
+ cockmongruel
83
+ cockmonkey
84
+ cockmuncher
85
+ cocknose
86
+ cocknugget
87
+ cockshit
88
+ cocksmith
89
+ cocksmoke
90
+ cocksmoker
91
+ cocksniffer
92
+ cocksucker
93
+ cockwaffle
94
+ coochie
95
+ coochy
96
+ coon
97
+ cooter
98
+ cracker
99
+ cum
100
+ cumbubble
101
+ cumdumpster
102
+ cumguzzler
103
+ cumjockey
104
+ cumslut
105
+ cumtart
106
+ cunnie
107
+ cunnilingus
108
+ cunt
109
+ cuntass
110
+ cuntface
111
+ cunthole
112
+ cuntlicker
113
+ cuntrag
114
+ cuntslut
115
+ dago
116
+ damn
117
+ deggo
118
+ dick
119
+ dickbag
120
+ dickbeaters
121
+ dickface
122
+ dickfuck
123
+ dickfucker
124
+ dickhead
125
+ dickhole
126
+ dickjuice
127
+ dickmilk
128
+ dickmonger
129
+ dicks
130
+ dickslap
131
+ dicksucker
132
+ dicksucking
133
+ dickwad
134
+ dickweasel
135
+ dickweed
136
+ dickwod
137
+ dike
138
+ dildo
139
+ dipshit
140
+ doochbag
141
+ dookie
142
+ douche
143
+ douche-fag
144
+ douchefag
145
+ douchebag
146
+ douchewaffle
147
+ dumass
148
+ dumb ass
149
+ dumbass
150
+ dumbfuck
151
+ dumbshit
152
+ dumshit
153
+ dyke
154
+ fag
155
+ fagbag
156
+ fagfucker
157
+ faggit
158
+ faggot
159
+ faggotcock
160
+ fagtard
161
+ fatass
162
+ fellatio
163
+ feltch
164
+ flamer
165
+ fuck
166
+ fuckass
167
+ fuckbag
168
+ fuckboy
169
+ fuckbrain
170
+ fuckbutt
171
+ fucked
172
+ fucker
173
+ fuckersucker
174
+ fuckface
175
+ fuckhead
176
+ fuckhole
177
+ fuckin
178
+ fucking
179
+ fucknut
180
+ fucknutt
181
+ fuckoff
182
+ fucks
183
+ fuckstick
184
+ fucktard
185
+ fucktart
186
+ fuckup
187
+ fuckwad
188
+ fuckwit
189
+ fuckwitt
190
+ fudgepacker
191
+ gay
192
+ gayass
193
+ gaybob
194
+ gaydo
195
+ gayfuck
196
+ gayfuckist
197
+ gaylord
198
+ gaytard
199
+ gaywad
200
+ goddamn
201
+ goddamnit
202
+ gooch
203
+ gook
204
+ gringo
205
+ guido
206
+ handjob
207
+ hard on
208
+ hardon
209
+ heeb
210
+ hell
211
+ ho
212
+ hoe
213
+ homo
214
+ homodumbshit
215
+ honkey
216
+ humping
217
+ jackass
218
+ jap
219
+ jerk off
220
+ jerkoff
221
+ jigaboo
222
+ jizz
223
+ jungle bunny
224
+ junglebunny
225
+ kike
226
+ kooch
227
+ kootch
228
+ kraut
229
+ kunt
230
+ kyke
231
+ lameass
232
+ lesbian
233
+ lesbo
234
+ lezzie
235
+ mcfagget
236
+ mick
237
+ minge
238
+ mothafucka
239
+ mothafuckin\'
240
+ mothafuckin
241
+ motherfucker
242
+ motherfucking
243
+ muff
244
+ muffdiver
245
+ munging
246
+ negro
247
+ nigaboo
248
+ nigga
249
+ nigger
250
+ niggers
251
+ niglet
252
+ nut sack
253
+ nutsack
254
+ paki
255
+ panooch
256
+ pecker
257
+ peckerhead
258
+ penis
259
+ penisbanger
260
+ penisfucker
261
+ penispuffer
262
+ piss
263
+ pissed
264
+ pissed off
265
+ pissedoff
266
+ pissflaps
267
+ polesmoker
268
+ pollock
269
+ poon
270
+ poonani
271
+ poonany
272
+ poontang
273
+ porch monkey
274
+ porchmonkey
275
+ prick
276
+ punanny
277
+ punta
278
+ pussies
279
+ pussy
280
+ pussylicking
281
+ puto
282
+ queef
283
+ queer
284
+ queerbait
285
+ queerhole
286
+ renob
287
+ rimjob
288
+ ruski
289
+ sand nigger
290
+ sandnigger
291
+ schlong
292
+ scrote
293
+ shit
294
+ shitass
295
+ shitbag
296
+ shitbagger
297
+ shitbrains
298
+ shitbreath
299
+ shitcanned
300
+ shitcunt
301
+ shitdick
302
+ shitface
303
+ shitfaced
304
+ shithead
305
+ shithole
306
+ shithouse
307
+ shitspitter
308
+ shitstain
309
+ shitter
310
+ shittiest
311
+ shitting
312
+ shitty
313
+ shiz
314
+ shiznit
315
+ skank
316
+ skeet
317
+ skullfuck
318
+ slut
319
+ slutbag
320
+ smeg
321
+ snatch
322
+ spic
323
+ spick
324
+ splooge
325
+ spook
326
+ suckass
327
+ tard
328
+ testicle
329
+ thundercunt
330
+ tit
331
+ titfuck
332
+ tits
333
+ tittyfuck
334
+ twat
335
+ twatlips
336
+ twats
337
+ twatwaffle
338
+ unclefucker
339
+ va-j-j
340
+ vajj
341
+ vag
342
+ vagina
343
+ vajayjay
344
+ vjayjay
345
+ wank
346
+ wankjob
347
+ wetback
348
+ whore
349
+ whorebag
350
+ whoreface
351
+ wop
@@ -0,0 +1,24 @@
1
+ module FeatureSet
2
+ class Datum
3
+ TOKEN_REGEX = /[\s\/]+/
4
+ NON_ASCII_REGEX = /[^a-zA-Z0-9_-]/
5
+
6
+ attr_accessor :value
7
+
8
+ def initialize(v)
9
+ self.value = v
10
+ end
11
+
12
+ def tokens
13
+ @tokens ||= begin
14
+ value.strip.downcase.gsub(NON_ASCII_REGEX, ' ').split(TOKEN_REGEX)
15
+ end
16
+ end
17
+
18
+ def token_counts
19
+ @token_counts ||= begin
20
+ tokens.inject({}) { |m, w| m[w] ||= 0; m[w] += 1; m }
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,18 @@
1
+ module FeatureSet
2
+ module FeatureBuilder
3
+ class Base
4
+ attr_accessor :options
5
+
6
+ def initialize(options = {})
7
+ @options = options
8
+ end
9
+
10
+ def generate_features(datum, key, row)
11
+ raise "Please implement 'generate_features' in your subclass of FeatureBuilder::Base."
12
+ end
13
+
14
+ def before_generate_features(dataset)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,14 @@
1
+ require "feature_set/feature_builder/base"
2
+
3
+ module FeatureSet
4
+ module FeatureBuilder
5
+ class Cuss < Base
6
+ CUSS_WORDS = File.read(File.expand_path(File.join(File.dirname(__FILE__), '..', 'data', 'cusswords.txt'))).split("\n").map {|i| i.strip.downcase }
7
+
8
+ def generate_features(datum, key, row)
9
+ return {} unless datum.value.is_a?(String)
10
+ { :cuss_count => (datum.tokens & CUSS_WORDS).length }
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,45 @@
1
+ require "feature_set/feature_builder/base"
2
+
3
+ module FeatureSet
4
+ module FeatureBuilder
5
+ class WordVector < Base
6
+ attr_accessor :idfs
7
+
8
+ def initialize(options = {})
9
+ super
10
+ end
11
+
12
+ def before_generate_features(dataset)
13
+ @idfs = {}
14
+ dataset.each do |row|
15
+ row.each do |key, datum|
16
+ next if key == :class
17
+ if datum.value.is_a?(String)
18
+ idfs[key] ||= {}
19
+ datum.token_counts.keys.each do |token|
20
+ idfs[key][token] ||= 0
21
+ idfs[key][token] += 1
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ num_docs = dataset.length
28
+ idfs.each do |feature, freqs|
29
+ freqs.each do |key, value|
30
+ idfs[feature][key] = Math.log(num_docs / value.to_f)
31
+ end
32
+ end
33
+
34
+ def generate_features(datum, key, row)
35
+ return {} unless datum.value.is_a?(String)
36
+ num_words = datum.tokens.length.to_f
37
+ idfs[key].inject({}) do |memo, (word, idf)|
38
+ memo[word] = ((datum.token_counts[word] || 0) / num_words) * idf
39
+ memo
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,3 @@
1
+ module FeatureSet
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,4 @@
1
+ require "rarff"
2
+
3
+ require "feature_set/version"
4
+ require "feature_set/builder"
@@ -0,0 +1,72 @@
1
+ require 'spec_helper'
2
+
3
+ describe FeatureSet::Builder do
4
+ describe "adding feature builders" do
5
+ it "can add all known feature builders" do
6
+ builder = FeatureSet::Builder.new
7
+ builder.add_feature_builders :all
8
+ builder.feature_builders.map {|i| i.class}.should include(FeatureSet::FeatureBuilder::WordVector)
9
+ builder.feature_builders.length.should == Dir[File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "lib", "feature_set", "feature_builder", "*.rb"))].length - 1
10
+ end
11
+
12
+ it "can add individual feature builders" do
13
+ builder = FeatureSet::Builder.new
14
+ builder.add_feature_builder FeatureSet::FeatureBuilder::WordVector.new
15
+ builder.feature_builders.length.should == 1
16
+ end
17
+
18
+ it "can add arrays of feature builders" do
19
+ builder = FeatureSet::Builder.new
20
+ builder.add_feature_builders [FeatureSet::FeatureBuilder::WordVector.new, FeatureSet::FeatureBuilder::Cuss.new]
21
+ builder.feature_builders.length.should == 2
22
+ end
23
+ end
24
+
25
+ describe "adding data" do
26
+ it "should accept mappings between one or more strings and their classifications" do
27
+ builder = FeatureSet::Builder.new
28
+ builder.add_data [ { :status => "I am happy!", :class => :happy },
29
+ { :status => "I am sad." , :class => :sad } ]
30
+ builder.data.should == [ { :status => "I am happy!", :class => :happy },
31
+ { :status => "I am sad." , :class => :sad } ]
32
+ builder.add_data :status => "Something", :another_feature => "Something else", :class => :awesome
33
+ builder.data.should == [ { :status => "I am happy!", :class => :happy },
34
+ { :status => "I am sad." , :class => :sad },
35
+ { :status => "Something", :another_feature => "Something else", :class => :awesome } ]
36
+ builder.clear_data
37
+ builder.data.should == []
38
+ builder.data = [ { :status => "I am happy!", :class => :happy },
39
+ { :status => "I am sad." , :class => :sad } ]
40
+ builder.data.should == [ { :status => "I am happy!", :class => :happy },
41
+ { :status => "I am sad." , :class => :sad } ]
42
+ end
43
+ end
44
+
45
+ describe "generating features" do
46
+ before do
47
+ @builder = FeatureSet::Builder.new
48
+ @builder.add_feature_builder FeatureSet::FeatureBuilder::Cuss.new
49
+ @builder.add_data :status => "this is some text", :class => :awesome
50
+ @builder.add_data :status => "this is some shitty text", :class => :less_awesome
51
+ end
52
+
53
+ it "should output a row of features for every line of data" do
54
+ @builder.generate_features
55
+ @builder.features[0].should == { :status_cuss_count => 0, :class => :awesome }
56
+ @builder.features[1].should == { :status_cuss_count => 1, :class => :less_awesome }
57
+ end
58
+
59
+ it "should make it easy to keep the original data" do
60
+ @builder.generate_features(:include_original => true)
61
+ @builder.features[0].should == { :status => "this is some text", :status_cuss_count => 0, :class => :awesome }
62
+ @builder.features[1].should == { :status => "this is some shitty text", :status_cuss_count => 1, :class => :less_awesome }
63
+ end
64
+
65
+ it "should generate features for every string" do
66
+ @builder.add_data :status => "text", :foo => "more shitty text", :class => :awesome
67
+ @builder.generate_features
68
+ @builder.features[1].should == { :status_cuss_count => 1, :class => :less_awesome }
69
+ @builder.features[2].should == { :status_cuss_count => 0, :foo_cuss_count => 1, :class => :awesome }
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe FeatureSet::Datum do
4
+ describe "tokenize" do
5
+ it "should return an array of tokens" do
6
+ FeatureSet::Datum.new("hello world sup?").tokens.should =~ ["hello", "world", "sup"]
7
+ end
8
+
9
+ it "should memoize" do
10
+ datum = FeatureSet::Datum.new("hello world sup?")
11
+ datum.tokens.should =~ ["hello", "world", "sup"]
12
+ datum.value = "hello"
13
+ datum.tokens.should =~ ["hello", "world", "sup"]
14
+ end
15
+ end
16
+
17
+ describe "#token_counts" do
18
+ it "should provide counts for each token" do
19
+ datum = FeatureSet::Datum.new("hello world sup? hello!")
20
+ datum.token_counts.should == { "hello" => 2, "world" => 1, "sup" => 1}
21
+ end
22
+
23
+ it "should memoize" do
24
+ datum = FeatureSet::Datum.new("hello world sup? hello!")
25
+ datum.token_counts.should == { "hello" => 2, "world" => 1, "sup" => 1}
26
+ datum.value = "hello"
27
+ datum.instance_variable_set(:@tokens, ["hello"])
28
+ datum.token_counts.should == { "hello" => 2, "world" => 1, "sup" => 1}
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe FeatureSet::FeatureBuilder::Cuss do
4
+ before do
5
+ @builder = FeatureSet::FeatureBuilder::Cuss.new
6
+ end
7
+
8
+ it "should output :cuss_count as the number of distinct cuss words found" do
9
+ @builder.generate_features(FeatureSet::Datum.new("this fucking shit"), nil, nil).should == { :cuss_count => 2 }
10
+ @builder.generate_features(FeatureSet::Datum.new("this fucking fucking fucking shit"), nil, nil).should == { :cuss_count => 2 }
11
+ end
12
+
13
+ it "should ignore non-string features" do
14
+ @builder.generate_features(FeatureSet::Datum.new(2), nil, nil).should == {}
15
+ end
16
+ end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ describe FeatureSet::FeatureBuilder::WordVector do
4
+ it "should output a named feature for every word in the dataset, after performing tfidf" do
5
+ builder = FeatureSet::FeatureBuilder::WordVector.new
6
+ dataset = [
7
+ { :m1 => "hello world. hello!", :m2 => "how goes?", :class => :yes },
8
+ { :m1 => "foo world", :m2 => "how?", :class => :no }
9
+ ]
10
+ wrapped_dataset = FeatureSet::Builder.wrap_dataset(dataset)
11
+ builder.before_generate_features(wrapped_dataset)
12
+
13
+ builder.idfs.should == {
14
+ :m1 => { "hello" => Math.log(2/1.0), "world" => Math.log(2/2.0), "foo" => Math.log(2/1.0) },
15
+ :m2 => { "how" => Math.log(2/2.0), "goes" => Math.log(2/1.0) }
16
+ }
17
+
18
+ builder.generate_features(wrapped_dataset.first[:m1], :m1, wrapped_dataset.first).should == { "hello" => (2/3.0) * Math.log(2/1.0), "world" => (1/3.0) * Math.log(2/2.0), "foo" => 0 }
19
+ builder.generate_features(wrapped_dataset.first[:m2], :m2, wrapped_dataset.first).should == { "how" => (1/2.0) * Math.log(2/2.0), "goes" => (1/2.0) * Math.log(2/1.0) }
20
+
21
+ builder.generate_features(wrapped_dataset.last[:m1], :m1, wrapped_dataset.last).should == { "hello" => 0, "world" => (1/2.0) * Math.log(2/2.0), "foo" => (1/2.0) * Math.log(2/1.0) }
22
+ builder.generate_features(wrapped_dataset.last[:m2], :m2, wrapped_dataset.last).should == { "how" => (1/1.0) * Math.log(2/2.0), "goes" => 0 }
23
+ end
24
+
25
+ it "should ignore non-string features" do
26
+ builder = FeatureSet::FeatureBuilder::WordVector.new
27
+ builder.before_generate_features([{ :something => FeatureSet::Datum.new(2), :class => false }, { :something => FeatureSet::Datum.new(1), :class => true }])
28
+ builder.generate_features(FeatureSet::Datum.new(2), :something, { :something => FeatureSet::Datum.new(2), :class => false }).should == {}
29
+ end
30
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,4 @@
1
+ --colour
2
+ --format s -c
3
+ --loadby mtime
4
+ --reverse
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'feature_set'
3
+
4
+ RSpec.configure do |c|
5
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: feature_set
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Cantino
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-17 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &70284888584540 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70284888584540
25
+ - !ruby/object:Gem::Dependency
26
+ name: wwood-rarff
27
+ requirement: &70284888584120 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70284888584120
36
+ - !ruby/object:Gem::Dependency
37
+ name: activesupport
38
+ requirement: &70284888583700 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70284888583700
47
+ - !ruby/object:Gem::Dependency
48
+ name: i18n
49
+ requirement: &70284888583280 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *70284888583280
58
+ description: FeatureSet is a Ruby library for generating feature vectors from textual
59
+ data. It can output in ARFF format for experimentation with Weka.
60
+ email:
61
+ - andrew@iterationlabs.com
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - .gitignore
67
+ - .rvmrc
68
+ - Gemfile
69
+ - README.markdown
70
+ - Rakefile
71
+ - feature_set.gemspec
72
+ - lib/feature_set.rb
73
+ - lib/feature_set/builder.rb
74
+ - lib/feature_set/data/cusswords.txt
75
+ - lib/feature_set/datum.rb
76
+ - lib/feature_set/feature_builder/base.rb
77
+ - lib/feature_set/feature_builder/cuss.rb
78
+ - lib/feature_set/feature_builder/word_vector.rb
79
+ - lib/feature_set/version.rb
80
+ - spec/feature_set/builder_spec.rb
81
+ - spec/feature_set/datum_spec.rb
82
+ - spec/feature_set/feature/cuss_spec.rb
83
+ - spec/feature_set/feature/word_vector_spec.rb
84
+ - spec/spec.opts
85
+ - spec/spec_helper.rb
86
+ homepage: https://github.com/iterationlabs/feature_set
87
+ licenses: []
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project: feature_set
106
+ rubygems_version: 1.8.10
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: Generate feature vectors from textual data
110
+ test_files:
111
+ - spec/feature_set/builder_spec.rb
112
+ - spec/feature_set/datum_spec.rb
113
+ - spec/feature_set/feature/cuss_spec.rb
114
+ - spec/feature_set/feature/word_vector_spec.rb
115
+ - spec/spec.opts
116
+ - spec/spec_helper.rb