feature_set 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use 1.9.2@feature_set --create
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in feature_set.gemspec
4
+ gemspec
data/README.markdown ADDED
@@ -0,0 +1,8 @@
1
+ ## FeatureSet
2
+
3
+
4
+ Helpful tasks:
5
+
6
+ - rake build
7
+ - rake install
8
+ - rake release
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "feature_set/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "feature_set"
7
+ s.version = FeatureSet::VERSION
8
+ s.authors = ["Andrew Cantino"]
9
+ s.email = ["andrew@iterationlabs.com"]
10
+ s.homepage = "https://github.com/iterationlabs/feature_set"
11
+ s.summary = %q{Generate feature vectors from textual data}
12
+ s.description = %q{FeatureSet is a Ruby library for generating feature vectors from textual data. It can output in ARFF format for experimentation with Weka.}
13
+
14
+ s.rubyforge_project = "feature_set"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ s.add_development_dependency "rspec"
23
+ s.add_runtime_dependency "wwood-rarff"
24
+ s.add_runtime_dependency "activesupport"
25
+ s.add_runtime_dependency "i18n"
26
+ end
@@ -0,0 +1,70 @@
1
+ require 'active_support'
2
+ require 'active_support/inflector'
3
+
4
+ require "feature_set/feature_builder/word_vector"
5
+ require "feature_set/feature_builder/cuss"
6
+
7
+ require "feature_set/datum"
8
+
9
+ module FeatureSet
10
+ class Builder
11
+ BUILTIN_FEATURE_BUILDERS = %w[FeatureSet::FeatureBuilder::Cuss
12
+ FeatureSet::FeatureBuilder::WordVector].map(&:constantize)
13
+
14
+ attr_accessor :options, :feature_builders, :data, :features
15
+
16
+ def initialize(options = {})
17
+ @options = options
18
+ @feature_builders = []
19
+ @features = []
20
+ @data = []
21
+ end
22
+
23
+ def add_data(data)
24
+ clear_features
25
+ (@data << data).flatten!
26
+ end
27
+
28
+ def clear_data
29
+ @data = []
30
+ clear_features
31
+ end
32
+
33
+ def clear_features
34
+ @features = []
35
+ end
36
+
37
+ def generate_features(opts = {})
38
+ wrapped_data_set = self.class.wrap_dataset(data)
39
+
40
+ feature_builders.each {|fb| fb.before_generate_features(wrapped_data_set) }
41
+
42
+ @features = wrapped_data_set.map do |row|
43
+ output_row = {}
44
+
45
+ row.each do |key, datum|
46
+ (output_row[:class] = datum) and next if key == :class
47
+ output_row[key] = datum.value if opts[:include_original]
48
+
49
+ feature_builders.each do |builder|
50
+ builder.generate_features(datum, key, row).each do |feature, value|
51
+ output_row["#{key}_#{feature}".to_sym] = value
52
+ end
53
+ end
54
+ end
55
+
56
+ output_row
57
+ end
58
+ end
59
+
60
+ def add_feature_builders(*builders)
61
+ builders = BUILTIN_FEATURE_BUILDERS.map(&:new) if [:all, "all"].include?(builders.first)
62
+ (@feature_builders << builders).flatten!
63
+ end
64
+ alias_method :add_feature_builder, :add_feature_builders
65
+
66
+ def self.wrap_dataset(dataset)
67
+ dataset.map { |row| row.inject({}) { |m, (k, v)| m[k] = (k == :class ? v : Datum.new(v)) ; m } }
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,351 @@
1
+ anus
2
+ arse
3
+ arsehole
4
+ ass
5
+ ass-hat
6
+ asshat
7
+ ass-jabber
8
+ assjabber
9
+ ass-pirate
10
+ asspirate
11
+ assbag
12
+ assbandit
13
+ assbanger
14
+ assbite
15
+ assclown
16
+ asscock
17
+ asscracker
18
+ asses
19
+ assface
20
+ assfuck
21
+ assfucker
22
+ assgoblin
23
+ asshead
24
+ asshole
25
+ asshopper
26
+ assjacker
27
+ asslick
28
+ asslicker
29
+ assmonkey
30
+ assmunch
31
+ assmuncher
32
+ assnigger
33
+ assshit
34
+ assshole
35
+ asssucker
36
+ asswad
37
+ asswipe
38
+ bampot
39
+ bastard
40
+ beaner
41
+ bitch
42
+ bitchass
43
+ bitches
44
+ bitchtits
45
+ bitchy
46
+ blow job
47
+ blowjob
48
+ bollocks
49
+ bollox
50
+ boner
51
+ brotherfucker
52
+ bullshit
53
+ bumblefuck
54
+ butt plug
55
+ buttplug
56
+ butt-pirate
57
+ buttpirate
58
+ buttfucka
59
+ buttfucker
60
+ camel toe
61
+ cameltoe
62
+ carpetmuncher
63
+ chinc
64
+ chink
65
+ choad
66
+ chode
67
+ clit
68
+ clitface
69
+ clitfuck
70
+ clusterfuck
71
+ cock
72
+ cockass
73
+ cockbite
74
+ cockburger
75
+ cockface
76
+ cockfucker
77
+ cockhead
78
+ cockjockey
79
+ cockknoker
80
+ cockmaster
81
+ cockmongler
82
+ cockmongruel
83
+ cockmonkey
84
+ cockmuncher
85
+ cocknose
86
+ cocknugget
87
+ cockshit
88
+ cocksmith
89
+ cocksmoke
90
+ cocksmoker
91
+ cocksniffer
92
+ cocksucker
93
+ cockwaffle
94
+ coochie
95
+ coochy
96
+ coon
97
+ cooter
98
+ cracker
99
+ cum
100
+ cumbubble
101
+ cumdumpster
102
+ cumguzzler
103
+ cumjockey
104
+ cumslut
105
+ cumtart
106
+ cunnie
107
+ cunnilingus
108
+ cunt
109
+ cuntass
110
+ cuntface
111
+ cunthole
112
+ cuntlicker
113
+ cuntrag
114
+ cuntslut
115
+ dago
116
+ damn
117
+ deggo
118
+ dick
119
+ dickbag
120
+ dickbeaters
121
+ dickface
122
+ dickfuck
123
+ dickfucker
124
+ dickhead
125
+ dickhole
126
+ dickjuice
127
+ dickmilk
128
+ dickmonger
129
+ dicks
130
+ dickslap
131
+ dicksucker
132
+ dicksucking
133
+ dickwad
134
+ dickweasel
135
+ dickweed
136
+ dickwod
137
+ dike
138
+ dildo
139
+ dipshit
140
+ doochbag
141
+ dookie
142
+ douche
143
+ douche-fag
144
+ douchefag
145
+ douchebag
146
+ douchewaffle
147
+ dumass
148
+ dumb ass
149
+ dumbass
150
+ dumbfuck
151
+ dumbshit
152
+ dumshit
153
+ dyke
154
+ fag
155
+ fagbag
156
+ fagfucker
157
+ faggit
158
+ faggot
159
+ faggotcock
160
+ fagtard
161
+ fatass
162
+ fellatio
163
+ feltch
164
+ flamer
165
+ fuck
166
+ fuckass
167
+ fuckbag
168
+ fuckboy
169
+ fuckbrain
170
+ fuckbutt
171
+ fucked
172
+ fucker
173
+ fuckersucker
174
+ fuckface
175
+ fuckhead
176
+ fuckhole
177
+ fuckin
178
+ fucking
179
+ fucknut
180
+ fucknutt
181
+ fuckoff
182
+ fucks
183
+ fuckstick
184
+ fucktard
185
+ fucktart
186
+ fuckup
187
+ fuckwad
188
+ fuckwit
189
+ fuckwitt
190
+ fudgepacker
191
+ gay
192
+ gayass
193
+ gaybob
194
+ gaydo
195
+ gayfuck
196
+ gayfuckist
197
+ gaylord
198
+ gaytard
199
+ gaywad
200
+ goddamn
201
+ goddamnit
202
+ gooch
203
+ gook
204
+ gringo
205
+ guido
206
+ handjob
207
+ hard on
208
+ hardon
209
+ heeb
210
+ hell
211
+ ho
212
+ hoe
213
+ homo
214
+ homodumbshit
215
+ honkey
216
+ humping
217
+ jackass
218
+ jap
219
+ jerk off
220
+ jerkoff
221
+ jigaboo
222
+ jizz
223
+ jungle bunny
224
+ junglebunny
225
+ kike
226
+ kooch
227
+ kootch
228
+ kraut
229
+ kunt
230
+ kyke
231
+ lameass
232
+ lesbian
233
+ lesbo
234
+ lezzie
235
+ mcfagget
236
+ mick
237
+ minge
238
+ mothafucka
239
+ mothafuckin\'
240
+ mothafuckin
241
+ motherfucker
242
+ motherfucking
243
+ muff
244
+ muffdiver
245
+ munging
246
+ negro
247
+ nigaboo
248
+ nigga
249
+ nigger
250
+ niggers
251
+ niglet
252
+ nut sack
253
+ nutsack
254
+ paki
255
+ panooch
256
+ pecker
257
+ peckerhead
258
+ penis
259
+ penisbanger
260
+ penisfucker
261
+ penispuffer
262
+ piss
263
+ pissed
264
+ pissed off
265
+ pissedoff
266
+ pissflaps
267
+ polesmoker
268
+ pollock
269
+ poon
270
+ poonani
271
+ poonany
272
+ poontang
273
+ porch monkey
274
+ porchmonkey
275
+ prick
276
+ punanny
277
+ punta
278
+ pussies
279
+ pussy
280
+ pussylicking
281
+ puto
282
+ queef
283
+ queer
284
+ queerbait
285
+ queerhole
286
+ renob
287
+ rimjob
288
+ ruski
289
+ sand nigger
290
+ sandnigger
291
+ schlong
292
+ scrote
293
+ shit
294
+ shitass
295
+ shitbag
296
+ shitbagger
297
+ shitbrains
298
+ shitbreath
299
+ shitcanned
300
+ shitcunt
301
+ shitdick
302
+ shitface
303
+ shitfaced
304
+ shithead
305
+ shithole
306
+ shithouse
307
+ shitspitter
308
+ shitstain
309
+ shitter
310
+ shittiest
311
+ shitting
312
+ shitty
313
+ shiz
314
+ shiznit
315
+ skank
316
+ skeet
317
+ skullfuck
318
+ slut
319
+ slutbag
320
+ smeg
321
+ snatch
322
+ spic
323
+ spick
324
+ splooge
325
+ spook
326
+ suckass
327
+ tard
328
+ testicle
329
+ thundercunt
330
+ tit
331
+ titfuck
332
+ tits
333
+ tittyfuck
334
+ twat
335
+ twatlips
336
+ twats
337
+ twatwaffle
338
+ unclefucker
339
+ va-j-j
340
+ vajj
341
+ vag
342
+ vagina
343
+ vajayjay
344
+ vjayjay
345
+ wank
346
+ wankjob
347
+ wetback
348
+ whore
349
+ whorebag
350
+ whoreface
351
+ wop
@@ -0,0 +1,24 @@
1
+ module FeatureSet
2
+ class Datum
3
+ TOKEN_REGEX = /[\s\/]+/
4
+ NON_ASCII_REGEX = /[^a-zA-Z0-9_-]/
5
+
6
+ attr_accessor :value
7
+
8
+ def initialize(v)
9
+ self.value = v
10
+ end
11
+
12
+ def tokens
13
+ @tokens ||= begin
14
+ value.strip.downcase.gsub(NON_ASCII_REGEX, ' ').split(TOKEN_REGEX)
15
+ end
16
+ end
17
+
18
+ def token_counts
19
+ @token_counts ||= begin
20
+ tokens.inject({}) { |m, w| m[w] ||= 0; m[w] += 1; m }
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,18 @@
1
+ module FeatureSet
2
+ module FeatureBuilder
3
+ class Base
4
+ attr_accessor :options
5
+
6
+ def initialize(options = {})
7
+ @options = options
8
+ end
9
+
10
+ def generate_features(datum, key, row)
11
+ raise "Please implement 'generate_features' in your subclass of FeatureBuilder::Base."
12
+ end
13
+
14
+ def before_generate_features(dataset)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,14 @@
1
+ require "feature_set/feature_builder/base"
2
+
3
+ module FeatureSet
4
+ module FeatureBuilder
5
+ class Cuss < Base
6
+ CUSS_WORDS = File.read(File.expand_path(File.join(File.dirname(__FILE__), '..', 'data', 'cusswords.txt'))).split("\n").map {|i| i.strip.downcase }
7
+
8
+ def generate_features(datum, key, row)
9
+ return {} unless datum.value.is_a?(String)
10
+ { :cuss_count => (datum.tokens & CUSS_WORDS).length }
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,45 @@
1
+ require "feature_set/feature_builder/base"
2
+
3
+ module FeatureSet
4
+ module FeatureBuilder
5
+ class WordVector < Base
6
+ attr_accessor :idfs
7
+
8
+ def initialize(options = {})
9
+ super
10
+ end
11
+
12
+ def before_generate_features(dataset)
13
+ @idfs = {}
14
+ dataset.each do |row|
15
+ row.each do |key, datum|
16
+ next if key == :class
17
+ if datum.value.is_a?(String)
18
+ idfs[key] ||= {}
19
+ datum.token_counts.keys.each do |token|
20
+ idfs[key][token] ||= 0
21
+ idfs[key][token] += 1
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ num_docs = dataset.length
28
+ idfs.each do |feature, freqs|
29
+ freqs.each do |key, value|
30
+ idfs[feature][key] = Math.log(num_docs / value.to_f)
31
+ end
32
+ end
33
+
34
+ def generate_features(datum, key, row)
35
+ return {} unless datum.value.is_a?(String)
36
+ num_words = datum.tokens.length.to_f
37
+ idfs[key].inject({}) do |memo, (word, idf)|
38
+ memo[word] = ((datum.token_counts[word] || 0) / num_words) * idf
39
+ memo
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,3 @@
1
+ module FeatureSet
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,4 @@
1
+ require "rarff"
2
+
3
+ require "feature_set/version"
4
+ require "feature_set/builder"
@@ -0,0 +1,72 @@
1
+ require 'spec_helper'
2
+
3
+ describe FeatureSet::Builder do
4
+ describe "adding feature builders" do
5
+ it "can add all known feature builders" do
6
+ builder = FeatureSet::Builder.new
7
+ builder.add_feature_builders :all
8
+ builder.feature_builders.map {|i| i.class}.should include(FeatureSet::FeatureBuilder::WordVector)
9
+ builder.feature_builders.length.should == Dir[File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "lib", "feature_set", "feature_builder", "*.rb"))].length - 1
10
+ end
11
+
12
+ it "can add individual feature builders" do
13
+ builder = FeatureSet::Builder.new
14
+ builder.add_feature_builder FeatureSet::FeatureBuilder::WordVector.new
15
+ builder.feature_builders.length.should == 1
16
+ end
17
+
18
+ it "can add arrays of feature builders" do
19
+ builder = FeatureSet::Builder.new
20
+ builder.add_feature_builders [FeatureSet::FeatureBuilder::WordVector.new, FeatureSet::FeatureBuilder::Cuss.new]
21
+ builder.feature_builders.length.should == 2
22
+ end
23
+ end
24
+
25
+ describe "adding data" do
26
+ it "should accept mappings between one or more strings and their classifications" do
27
+ builder = FeatureSet::Builder.new
28
+ builder.add_data [ { :status => "I am happy!", :class => :happy },
29
+ { :status => "I am sad." , :class => :sad } ]
30
+ builder.data.should == [ { :status => "I am happy!", :class => :happy },
31
+ { :status => "I am sad." , :class => :sad } ]
32
+ builder.add_data :status => "Something", :another_feature => "Something else", :class => :awesome
33
+ builder.data.should == [ { :status => "I am happy!", :class => :happy },
34
+ { :status => "I am sad." , :class => :sad },
35
+ { :status => "Something", :another_feature => "Something else", :class => :awesome } ]
36
+ builder.clear_data
37
+ builder.data.should == []
38
+ builder.data = [ { :status => "I am happy!", :class => :happy },
39
+ { :status => "I am sad." , :class => :sad } ]
40
+ builder.data.should == [ { :status => "I am happy!", :class => :happy },
41
+ { :status => "I am sad." , :class => :sad } ]
42
+ end
43
+ end
44
+
45
+ describe "generating features" do
46
+ before do
47
+ @builder = FeatureSet::Builder.new
48
+ @builder.add_feature_builder FeatureSet::FeatureBuilder::Cuss.new
49
+ @builder.add_data :status => "this is some text", :class => :awesome
50
+ @builder.add_data :status => "this is some shitty text", :class => :less_awesome
51
+ end
52
+
53
+ it "should output a row of features for every line of data" do
54
+ @builder.generate_features
55
+ @builder.features[0].should == { :status_cuss_count => 0, :class => :awesome }
56
+ @builder.features[1].should == { :status_cuss_count => 1, :class => :less_awesome }
57
+ end
58
+
59
+ it "should make it easy to keep the original data" do
60
+ @builder.generate_features(:include_original => true)
61
+ @builder.features[0].should == { :status => "this is some text", :status_cuss_count => 0, :class => :awesome }
62
+ @builder.features[1].should == { :status => "this is some shitty text", :status_cuss_count => 1, :class => :less_awesome }
63
+ end
64
+
65
+ it "should generate features for every string" do
66
+ @builder.add_data :status => "text", :foo => "more shitty text", :class => :awesome
67
+ @builder.generate_features
68
+ @builder.features[1].should == { :status_cuss_count => 1, :class => :less_awesome }
69
+ @builder.features[2].should == { :status_cuss_count => 0, :foo_cuss_count => 1, :class => :awesome }
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe FeatureSet::Datum do
4
+ describe "tokenize" do
5
+ it "should return an array of tokens" do
6
+ FeatureSet::Datum.new("hello world sup?").tokens.should =~ ["hello", "world", "sup"]
7
+ end
8
+
9
+ it "should memoize" do
10
+ datum = FeatureSet::Datum.new("hello world sup?")
11
+ datum.tokens.should =~ ["hello", "world", "sup"]
12
+ datum.value = "hello"
13
+ datum.tokens.should =~ ["hello", "world", "sup"]
14
+ end
15
+ end
16
+
17
+ describe "#token_counts" do
18
+ it "should provide counts for each token" do
19
+ datum = FeatureSet::Datum.new("hello world sup? hello!")
20
+ datum.token_counts.should == { "hello" => 2, "world" => 1, "sup" => 1}
21
+ end
22
+
23
+ it "should memoize" do
24
+ datum = FeatureSet::Datum.new("hello world sup? hello!")
25
+ datum.token_counts.should == { "hello" => 2, "world" => 1, "sup" => 1}
26
+ datum.value = "hello"
27
+ datum.instance_variable_set(:@tokens, ["hello"])
28
+ datum.token_counts.should == { "hello" => 2, "world" => 1, "sup" => 1}
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe FeatureSet::FeatureBuilder::Cuss do
4
+ before do
5
+ @builder = FeatureSet::FeatureBuilder::Cuss.new
6
+ end
7
+
8
+ it "should output :cuss_count as the number of distinct cuss words found" do
9
+ @builder.generate_features(FeatureSet::Datum.new("this fucking shit"), nil, nil).should == { :cuss_count => 2 }
10
+ @builder.generate_features(FeatureSet::Datum.new("this fucking fucking fucking shit"), nil, nil).should == { :cuss_count => 2 }
11
+ end
12
+
13
+ it "should ignore non-string features" do
14
+ @builder.generate_features(FeatureSet::Datum.new(2), nil, nil).should == {}
15
+ end
16
+ end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ describe FeatureSet::FeatureBuilder::WordVector do
4
+ it "should output a named feature for every word in the dataset, after performing tfidf" do
5
+ builder = FeatureSet::FeatureBuilder::WordVector.new
6
+ dataset = [
7
+ { :m1 => "hello world. hello!", :m2 => "how goes?", :class => :yes },
8
+ { :m1 => "foo world", :m2 => "how?", :class => :no }
9
+ ]
10
+ wrapped_dataset = FeatureSet::Builder.wrap_dataset(dataset)
11
+ builder.before_generate_features(wrapped_dataset)
12
+
13
+ builder.idfs.should == {
14
+ :m1 => { "hello" => Math.log(2/1.0), "world" => Math.log(2/2.0), "foo" => Math.log(2/1.0) },
15
+ :m2 => { "how" => Math.log(2/2.0), "goes" => Math.log(2/1.0) }
16
+ }
17
+
18
+ builder.generate_features(wrapped_dataset.first[:m1], :m1, wrapped_dataset.first).should == { "hello" => (2/3.0) * Math.log(2/1.0), "world" => (1/3.0) * Math.log(2/2.0), "foo" => 0 }
19
+ builder.generate_features(wrapped_dataset.first[:m2], :m2, wrapped_dataset.first).should == { "how" => (1/2.0) * Math.log(2/2.0), "goes" => (1/2.0) * Math.log(2/1.0) }
20
+
21
+ builder.generate_features(wrapped_dataset.last[:m1], :m1, wrapped_dataset.last).should == { "hello" => 0, "world" => (1/2.0) * Math.log(2/2.0), "foo" => (1/2.0) * Math.log(2/1.0) }
22
+ builder.generate_features(wrapped_dataset.last[:m2], :m2, wrapped_dataset.last).should == { "how" => (1/1.0) * Math.log(2/2.0), "goes" => 0 }
23
+ end
24
+
25
+ it "should ignore non-string features" do
26
+ builder = FeatureSet::FeatureBuilder::WordVector.new
27
+ builder.before_generate_features([{ :something => FeatureSet::Datum.new(2), :class => false }, { :something => FeatureSet::Datum.new(1), :class => true }])
28
+ builder.generate_features(FeatureSet::Datum.new(2), :something, { :something => FeatureSet::Datum.new(2), :class => false }).should == {}
29
+ end
30
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,4 @@
1
+ --colour
2
+ --format s -c
3
+ --loadby mtime
4
+ --reverse
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'feature_set'
3
+
4
+ RSpec.configure do |c|
5
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: feature_set
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Cantino
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-17 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &70284888584540 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70284888584540
25
+ - !ruby/object:Gem::Dependency
26
+ name: wwood-rarff
27
+ requirement: &70284888584120 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70284888584120
36
+ - !ruby/object:Gem::Dependency
37
+ name: activesupport
38
+ requirement: &70284888583700 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70284888583700
47
+ - !ruby/object:Gem::Dependency
48
+ name: i18n
49
+ requirement: &70284888583280 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *70284888583280
58
+ description: FeatureSet is a Ruby library for generating feature vectors from textual
59
+ data. It can output in ARFF format for experimentation with Weka.
60
+ email:
61
+ - andrew@iterationlabs.com
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - .gitignore
67
+ - .rvmrc
68
+ - Gemfile
69
+ - README.markdown
70
+ - Rakefile
71
+ - feature_set.gemspec
72
+ - lib/feature_set.rb
73
+ - lib/feature_set/builder.rb
74
+ - lib/feature_set/data/cusswords.txt
75
+ - lib/feature_set/datum.rb
76
+ - lib/feature_set/feature_builder/base.rb
77
+ - lib/feature_set/feature_builder/cuss.rb
78
+ - lib/feature_set/feature_builder/word_vector.rb
79
+ - lib/feature_set/version.rb
80
+ - spec/feature_set/builder_spec.rb
81
+ - spec/feature_set/datum_spec.rb
82
+ - spec/feature_set/feature/cuss_spec.rb
83
+ - spec/feature_set/feature/word_vector_spec.rb
84
+ - spec/spec.opts
85
+ - spec/spec_helper.rb
86
+ homepage: https://github.com/iterationlabs/feature_set
87
+ licenses: []
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project: feature_set
106
+ rubygems_version: 1.8.10
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: Generate feature vectors from textual data
110
+ test_files:
111
+ - spec/feature_set/builder_spec.rb
112
+ - spec/feature_set/datum_spec.rb
113
+ - spec/feature_set/feature/cuss_spec.rb
114
+ - spec/feature_set/feature/word_vector_spec.rb
115
+ - spec/spec.opts
116
+ - spec/spec_helper.rb