kfold 0.1 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/.idea/encodings.xml +5 -0
- data/.idea/kfold.iml +10 -0
- data/.idea/misc.xml +11 -0
- data/.idea/modules.xml +9 -0
- data/.idea/vcs.xml +7 -0
- data/.rspec +2 -0
- data/CHANGELOG +3 -1
- data/Gemfile +4 -0
- data/Manifest +1 -1
- data/README.rdoc +166 -0
- data/Rakefile +1 -17
- data/bin/kfold +102 -26
- data/kfold.gemspec +14 -31
- data/lib/kfold.rb +1 -3
- data/lib/kfold/version.rb +3 -0
- data/spec/kfold/data_file_spec.rb +1 -1
- data/spec/spec_helper.rb +9 -0
- metadata +66 -91
- data.tar.gz.sig +0 -0
- data/README +0 -3
- data/spec/helper.rb +0 -3
- metadata.gz.sig +0 -2
data/.gitignore
ADDED
data/.idea/encodings.xml
ADDED
data/.idea/kfold.iml
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="NewModuleRootManager">
|
4
|
+
<content url="file://$MODULE_DIR$" />
|
5
|
+
<orderEntry type="inheritedJdk" />
|
6
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
7
|
+
<orderEntry type="library" scope="PROVIDED" name="bundler (v1.0.18, ruby-1.9.2-p290) [gem]" level="application" />
|
8
|
+
</component>
|
9
|
+
</module>
|
10
|
+
|
data/.idea/misc.xml
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="DependencyValidationManager">
|
4
|
+
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
|
5
|
+
</component>
|
6
|
+
<component name="ProjectResources">
|
7
|
+
<default-html-doctype>http://www.w3.org/1999/xhtml</default-html-doctype>
|
8
|
+
</component>
|
9
|
+
<component name="ProjectRootManager" version="2" project-jdk-name="ruby-1.9.2-p290" project-jdk-type="RUBY_SDK" />
|
10
|
+
</project>
|
11
|
+
|
data/.idea/modules.xml
ADDED
data/.idea/vcs.xml
ADDED
data/.rspec
ADDED
data/CHANGELOG
CHANGED
data/Gemfile
ADDED
data/Manifest
CHANGED
data/README.rdoc
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
= kfold
|
2
|
+
|
3
|
+
kfold creates K-fold splits from data files and assists in training and testing (useful for cross-validation in supervised machine
|
4
|
+
learning)
|
5
|
+
|
6
|
+
== Command overview
|
7
|
+
|
8
|
+
help Display global or [command] help documentation.
|
9
|
+
split Split a data file into K partitions
|
10
|
+
test Apply trained models on a dataset previously split using kfold
|
11
|
+
train Train models on a dataset previously split using kfold
|
12
|
+
|
13
|
+
== Example usage
|
14
|
+
|
15
|
+
10-fold cross-validation of the standard MaltParser on a treebank named shuffled.c32.conll may be done as follows:
|
16
|
+
|
17
|
+
kfold split -f -i shuffled.c32.conll -d '\n\n'
|
18
|
+
kfold train -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -m learn
|
19
|
+
kfold test -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -o %O -m parse
|
20
|
+
eval07.pl -q -g shuffled.c32.conll -s shuffled.c32.conll.output
|
21
|
+
|
22
|
+
The MaltParser does not like to put its models in a subdirectory, so rather than using the standard model files suggested by kfold (%M), we construct custom non-nested model filenames using %B.model_%N.
|
23
|
+
|
24
|
+
== Command details
|
25
|
+
|
26
|
+
The following is simply the output of the built-in help commands.
|
27
|
+
|
28
|
+
=== Splitting data files
|
29
|
+
|
30
|
+
NAME:
|
31
|
+
|
32
|
+
split
|
33
|
+
|
34
|
+
DESCRIPTION:
|
35
|
+
|
36
|
+
Given the data file INPUT, the partitions are written to files named INPUT.parts/{01..K}
|
37
|
+
|
38
|
+
SYNOPSIS:
|
39
|
+
|
40
|
+
kfold split -i INPUT [options]
|
41
|
+
|
42
|
+
EXAMPLES:
|
43
|
+
|
44
|
+
# Split the file sample.txt into 4 parts
|
45
|
+
kfold split -k4 sample.txt
|
46
|
+
|
47
|
+
# Split the double-newline-delimited file sample.conll into 10 parts
|
48
|
+
kfold split -d"\n\n" sample.conll
|
49
|
+
|
50
|
+
OPTIONS:
|
51
|
+
|
52
|
+
-i, --input FILE
|
53
|
+
Data file to split
|
54
|
+
|
55
|
+
-k, --parts N
|
56
|
+
The number of partitions desired
|
57
|
+
|
58
|
+
-d, --delimiter DELIM
|
59
|
+
String used to separate individual entries (newline per default)
|
60
|
+
|
61
|
+
-g, --granularity N
|
62
|
+
Ensure the number of entries in each partition is divisible by N (useful for block-structured data)
|
63
|
+
|
64
|
+
-f, --overwrite
|
65
|
+
Remove existing parts prior to executing
|
66
|
+
|
67
|
+
--fold
|
68
|
+
Additionally, create K folds of K-1 parts in a another folder
|
69
|
+
|
70
|
+
--parts-name STRING
|
71
|
+
Use the given name as suffix for the partitions folder created
|
72
|
+
|
73
|
+
--folds-name STRING
|
74
|
+
Use the given name as suffix for the folds folder created
|
75
|
+
|
76
|
+
=== Training on the folds
|
77
|
+
|
78
|
+
NAME:
|
79
|
+
|
80
|
+
train
|
81
|
+
|
82
|
+
DESCRIPTION:
|
83
|
+
|
84
|
+
Given training data previously split in K parts and folds, train K models on the K folds
|
85
|
+
|
86
|
+
Certain keywords in the training command and its arguments are interpolated at runtime:
|
87
|
+
|
88
|
+
* %N - fold number, e.g. '01'
|
89
|
+
* %F - fold filename, e.g. 'brown.train/01'
|
90
|
+
* %I - alias for %F
|
91
|
+
* %M - model filename, e.g. 'brown.models/01'
|
92
|
+
* %B - basename (as specified on the command line), e.g. 'brown'
|
93
|
+
|
94
|
+
|
95
|
+
SYNOPSIS:
|
96
|
+
|
97
|
+
kfold train --base NAME [options] -- CMD [--CMD-OPTIONS] [CMD-ARGS]
|
98
|
+
|
99
|
+
EXAMPLES:
|
100
|
+
|
101
|
+
# Train MaltParser for cross-validation
|
102
|
+
kfold train -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -m learn
|
103
|
+
|
104
|
+
OPTIONS:
|
105
|
+
|
106
|
+
-f, --overwrite
|
107
|
+
Remove existing models prior to executing
|
108
|
+
|
109
|
+
--base NAME
|
110
|
+
Default prefix of training folds and model files
|
111
|
+
|
112
|
+
--folds-name SUFFIX
|
113
|
+
Look for folds {01..K} in the folder BASE.SUFFIX
|
114
|
+
|
115
|
+
--models-name SUFFIX
|
116
|
+
Yield model names as BASE.SUFFIX/{01..K} as interpolation pattern %M
|
117
|
+
|
118
|
+
=== Testing the models on their reciprocal data file parts
|
119
|
+
|
120
|
+
NAME:
|
121
|
+
|
122
|
+
test
|
123
|
+
|
124
|
+
DESCRIPTION:
|
125
|
+
|
126
|
+
Process K parts of a split datafile using K previously trained models.
|
127
|
+
|
128
|
+
Certain keywords in the testing command and its arguments are interpolated at runtime:
|
129
|
+
|
130
|
+
* %N - part number, e.g. '01'
|
131
|
+
* %T - part filename, e.g. 'brown.test/01'
|
132
|
+
* %I - alias for %T
|
133
|
+
* %O - output filename, e.g. 'brown.outputs/01'
|
134
|
+
* %M - model filename, e.g. 'brown.models/01'
|
135
|
+
* %B - basename (as specified on the command line), e.g. 'brown'
|
136
|
+
|
137
|
+
|
138
|
+
SYNOPSIS:
|
139
|
+
|
140
|
+
kfold test --base NAME [options] -- CMD [--CMD-OPTIONS] [CMD-ARGS]
|
141
|
+
|
142
|
+
EXAMPLES:
|
143
|
+
|
144
|
+
# Apply trained MaltParser models for cross-validation
|
145
|
+
kfold test -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -o %O -m parse
|
146
|
+
|
147
|
+
OPTIONS:
|
148
|
+
|
149
|
+
-f, --overwrite
|
150
|
+
Remove existing test output prior to executing
|
151
|
+
|
152
|
+
--base NAME
|
153
|
+
Default prefix of model files and test outputs
|
154
|
+
|
155
|
+
--parts-name SUFFIX
|
156
|
+
Look for parts {01..K} to be processed in the folder BASE.SUFFIX
|
157
|
+
|
158
|
+
--models-name SUFFIX
|
159
|
+
Yield model names as BASE.SUFFIX/{01..K} as interpolation pattern %M
|
160
|
+
|
161
|
+
--outputs-name SUFFIX
|
162
|
+
Yield output filenames as BASE.SUFFIX/{01..K} as interpolation pattern %O
|
163
|
+
|
164
|
+
--output-name SUFFIX
|
165
|
+
Put the concatenated output of all models in BASE.SUFFIX
|
166
|
+
|
data/Rakefile
CHANGED
@@ -1,17 +1 @@
|
|
1
|
-
|
2
|
-
require 'rubygems'
|
3
|
-
require 'rake'
|
4
|
-
require 'echoe'
|
5
|
-
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib'))
|
7
|
-
require 'kfold'
|
8
|
-
|
9
|
-
Echoe.new('kfold', Kfold::VERSION) do |p|
|
10
|
-
p.description = "Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)"
|
11
|
-
p.url = "http://github.com/crishoj/kfold"
|
12
|
-
p.author = "Christian Rishøj"
|
13
|
-
p.email = "christian@rishoj.net"
|
14
|
-
p.ignore_pattern = ["tmp/**/*", "script/*", "nbproject/**/*"]
|
15
|
-
p.runtime_dependencies = ["commander"]
|
16
|
-
p.development_dependencies = []
|
17
|
-
end
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/kfold
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
2
|
require 'commander/import'
|
5
3
|
require 'kfold'
|
6
4
|
require 'fileutils'
|
7
5
|
|
8
6
|
program :version, Kfold::VERSION
|
9
|
-
program :description,
|
10
|
-
|
7
|
+
program :description, "Create K-fold splits from data files and assist in training and testing (useful for cross-validation in supervised machine learning)"
|
8
|
+
|
11
9
|
def do_directory(dir, overwrite = false)
|
12
10
|
if File.exist? dir
|
13
11
|
if overwrite
|
@@ -35,7 +33,7 @@ command :split do |c|
|
|
35
33
|
c.option '--parts-name STRING', String, 'Use the given name as suffix for the partitions folder created'
|
36
34
|
c.option '--folds-name STRING', String, 'Use the given name as suffix for the folds folder created'
|
37
35
|
c.action do |args, options|
|
38
|
-
options.default :parts => 10, :delimiter => "\n", :granularity => 1, :parts_name => '
|
36
|
+
options.default :parts => 10, :delimiter => "\n", :granularity => 1, :parts_name => 'test', :folds_name => 'train'
|
39
37
|
abort "Failed: Please specify input file using -i INPUT" unless options.input
|
40
38
|
abort "Failed: Input file #{options.input} does not exist" unless File.exist? options.input
|
41
39
|
# Interpret newlines, linefeeds and tabs
|
@@ -87,25 +85,103 @@ end
|
|
87
85
|
|
88
86
|
alias_command :fold, :split, '--fold'
|
89
87
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
88
|
+
command :train do |c|
|
89
|
+
c.syntax = 'kfold train --base NAME [options] -- CMD [--CMD-OPTIONS] [CMD-ARGS]'
|
90
|
+
c.summary = 'Train models on a dataset previously split using kfold'
|
91
|
+
c.description = <<-end
|
92
|
+
Given training data previously split in K parts and folds, train K models on the K folds
|
93
|
+
|
94
|
+
Certain keywords in the training command and its arguments are interpolated at runtime:
|
95
|
+
|
96
|
+
* %N - fold number, e.g. '01'
|
97
|
+
* %F - fold filename, e.g. 'brown.train/01'
|
98
|
+
* %I - alias for %F
|
99
|
+
* %M - model filename, e.g. 'brown.models/01'
|
100
|
+
* %B - basename (as specified on the command line), e.g. 'brown'
|
101
|
+
end
|
102
|
+
c.example 'Train MaltParser for cross-validation', 'kfold train -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -m learn'
|
103
|
+
c.option '-f', '--overwrite', 'Remove existing models prior to executing'
|
104
|
+
c.option '--base NAME', String, 'Default prefix of training folds and model files'
|
105
|
+
c.option '--folds-name SUFFIX', String, 'Look for folds {01..K} in the folder BASE.SUFFIX'
|
106
|
+
c.option '--models-name SUFFIX', String, 'Yield model names as BASE.SUFFIX/{01..K} as interpolation pattern %M'
|
107
|
+
c.action do |args, options|
|
108
|
+
options.default :folds_name => 'train', :models_name => 'models'
|
109
|
+
raise "Must specify base name using --base" unless options.base
|
110
|
+
folds_dir = "#{options.base}.#{options.folds_name}"
|
111
|
+
models_dir = "#{options.base}.#{options.models_name}"
|
112
|
+
raise "Folds directory not found" unless File.exists?(folds_dir)
|
113
|
+
folds = Dir.glob(File.join(folds_dir, '*')).collect { |file|
|
114
|
+
file if File.basename(file) =~ /^[0-9]+$/
|
115
|
+
}.compact
|
116
|
+
raise "No folds found in #{folds_dir}" if folds.count == 0
|
117
|
+
say "Found #{folds.count} folds in #{folds_dir}"
|
118
|
+
cmds = folds.collect do |fold|
|
119
|
+
num = File.basename(fold)
|
120
|
+
model = File.join(models_dir, num)
|
121
|
+
args.collect { |arg|
|
122
|
+
arg.gsub(/%F/, fold).gsub(/%T/, fold).gsub(/%N/, num).gsub('%M', model).gsub('%B', options.base)
|
123
|
+
}.join(' ')
|
124
|
+
end
|
125
|
+
do_directory(models_dir, options.overwrite) if cmds.first.match(models_dir)
|
126
|
+
cmds.each do |cmd|
|
127
|
+
say "[exec] #{cmd}"
|
128
|
+
system cmd
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
command :test do |c|
|
134
|
+
c.syntax = 'kfold test --base NAME [options] -- CMD [--CMD-OPTIONS] [CMD-ARGS]'
|
135
|
+
c.summary = 'Apply trained models on a dataset previously split using kfold'
|
136
|
+
c.option '-f', '--overwrite', 'Remove existing test output prior to executing'
|
137
|
+
c.option '--base NAME', String, 'Default prefix of model files and test outputs'
|
138
|
+
c.option '--parts-name SUFFIX', String, 'Look for parts {01..K} to be processed in the folder BASE.SUFFIX'
|
139
|
+
c.option '--models-name SUFFIX', String, 'Yield model names as BASE.SUFFIX/{01..K} as interpolation pattern %M'
|
140
|
+
c.option '--outputs-name SUFFIX', String, 'Yield output filenames as BASE.SUFFIX/{01..K} as interpolation pattern %O'
|
141
|
+
c.option '--output-name SUFFIX', String, 'Put the concatenated output of all models in BASE.SUFFIX'
|
142
|
+
c.description = <<-end
|
143
|
+
Process K parts of a split datafile using K previously trained models.
|
144
|
+
|
145
|
+
Certain keywords in the testing command and its arguments are interpolated at runtime:
|
111
146
|
|
147
|
+
* %N - part number, e.g. '01'
|
148
|
+
* %T - part filename, e.g. 'brown.test/01'
|
149
|
+
* %I - alias for %T
|
150
|
+
* %O - output filename, e.g. 'brown.outputs/01'
|
151
|
+
* %M - model filename, e.g. 'brown.models/01'
|
152
|
+
* %B - basename (as specified on the command line), e.g. 'brown'
|
153
|
+
end
|
154
|
+
c.example 'Apply trained MaltParser models for cross-validation', 'kfold test -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -o %O -m parse'
|
155
|
+
c.action do |args, options|
|
156
|
+
options.default :parts_name => 'test', :models_name => 'models', :outputs_name => 'outputs', :output_name => 'output'
|
157
|
+
raise "Must specify base name using --base" unless options.base
|
158
|
+
parts_dir = "#{options.base}.#{options.parts_name}"
|
159
|
+
models_dir = "#{options.base}.#{options.models_name}"
|
160
|
+
output_dir = "#{options.base}.#{options.outputs_name}"
|
161
|
+
output_file = "#{options.base}.#{options.output_name}"
|
162
|
+
raise "Parts directory not found" unless File.exists?(parts_dir)
|
163
|
+
raise "Model directory not found" unless File.exists?(models_dir)
|
164
|
+
parts = Dir.glob(File.join(parts_dir, '*')).collect { |file|
|
165
|
+
file if File.basename(file) =~ /^[0-9]+$/
|
166
|
+
}.compact
|
167
|
+
raise "No parts found in #{parts_dir}" if parts.count == 0
|
168
|
+
say "Found #{parts.count} parts in #{parts_dir}"
|
169
|
+
outputs = []
|
170
|
+
cmds = parts.collect do |part|
|
171
|
+
num = File.basename(part)
|
172
|
+
model = File.join(models_dir, num)
|
173
|
+
output = File.join(output_dir, num)
|
174
|
+
outputs << output
|
175
|
+
args.collect { |arg|
|
176
|
+
arg.gsub(/%O/, output).gsub(/%I/, part).gsub(/%T/, part).gsub(/%N/, num).gsub('%M', model).gsub('%B', options.base)
|
177
|
+
}.join(' ')
|
178
|
+
end
|
179
|
+
do_directory(output_dir, options.overwrite)
|
180
|
+
cmds.each do |cmd|
|
181
|
+
say "[exec] #{cmd}"
|
182
|
+
system cmd
|
183
|
+
end
|
184
|
+
say "[join] #{outputs.join(' ')} => #{output_file}"
|
185
|
+
system "cat #{outputs.join(' ')} > #{output_file}"
|
186
|
+
end
|
187
|
+
end
|
data/kfold.gemspec
CHANGED
@@ -1,37 +1,20 @@
|
|
1
|
-
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require "kfold/version"
|
2
3
|
|
3
4
|
Gem::Specification.new do |s|
|
4
|
-
s.name
|
5
|
-
s.version
|
5
|
+
s.name = "kfold"
|
6
|
+
s.version = Kfold::VERSION
|
7
|
+
s.authors = ["Christian Rishoj"]
|
8
|
+
s.email = ["christian@rishoj.net"]
|
9
|
+
s.homepage = "http://github.com/crishoj/kfold"
|
10
|
+
s.summary = %q{Split your data and process it in parallel}
|
11
|
+
s.description = %q{Create K-fold splits from data files and assist in training and testing (useful for cross-validation in supervised machine learning)}
|
6
12
|
|
7
|
-
s.
|
8
|
-
s.
|
9
|
-
s.
|
10
|
-
s.date = %q{2010-12-30}
|
11
|
-
s.default_executable = %q{kfold}
|
12
|
-
s.description = %q{Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)}
|
13
|
-
s.email = %q{christian@rishoj.net}
|
14
|
-
s.executables = ["kfold"]
|
15
|
-
s.extra_rdoc_files = ["CHANGELOG", "LICENSE", "README", "bin/kfold", "lib/kfold.rb", "lib/kfold/data_file.rb"]
|
16
|
-
s.files = ["CHANGELOG", "LICENSE", "Manifest", "README", "Rakefile", "bin/kfold", "lib/kfold.rb", "lib/kfold/data_file.rb", "spec/helper.rb", "spec/kfold/data_file_spec.rb", "spec/kfold/sample_data_file.conll", "kfold.gemspec"]
|
17
|
-
s.homepage = %q{http://github.com/crishoj/kfold}
|
18
|
-
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Kfold", "--main", "README"]
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
16
|
s.require_paths = ["lib"]
|
20
|
-
s.rubyforge_project = %q{kfold}
|
21
|
-
s.rubygems_version = %q{1.3.7}
|
22
|
-
s.signing_key = %q{/Users/crjensen/Documents/Certificates/gem-private_key.pem}
|
23
|
-
s.summary = %q{Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)}
|
24
17
|
|
25
|
-
|
26
|
-
|
27
|
-
s.specification_version = 3
|
28
|
-
|
29
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
30
|
-
s.add_runtime_dependency(%q<commander>, [">= 0"])
|
31
|
-
else
|
32
|
-
s.add_dependency(%q<commander>, [">= 0"])
|
33
|
-
end
|
34
|
-
else
|
35
|
-
s.add_dependency(%q<commander>, [">= 0"])
|
36
|
-
end
|
18
|
+
s.add_development_dependency "rspec"
|
19
|
+
s.add_runtime_dependency "commander"
|
37
20
|
end
|
data/lib/kfold.rb
CHANGED
data/spec/spec_helper.rb
ADDED
metadata
CHANGED
@@ -1,118 +1,93 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: kfold
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 1
|
8
|
-
version: "0.1"
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.3'
|
5
|
+
prerelease:
|
9
6
|
platform: ruby
|
10
|
-
authors:
|
11
|
-
-
|
7
|
+
authors:
|
8
|
+
- Christian Rishoj
|
12
9
|
autorequire:
|
13
10
|
bindir: bin
|
14
|
-
cert_chain:
|
15
|
-
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
GDHt9mtVmQiD3fBCfPJhysaRSUyaUA7jEFljgRTxuH1GWLyGE24/c6zVBknLPSC+
|
27
|
-
hFW0Ib3/AgMBAAGjOTA3MAkGA1UdEwQCMAAwHQYDVR0OBBYEFNtKnxiLUpy9a406
|
28
|
-
diOk3lm5ISTEMAsGA1UdDwQEAwIEsDANBgkqhkiG9w0BAQUFAAOCAQEAiMhO6NXU
|
29
|
-
f/uTInOl2fFxIQsakyCXoWE2u7U2NLLW5R5DCYFK/EH+OYsum1Khu+Jt/n5loV7e
|
30
|
-
B4QlCbwlhwztW7sWA/sQQoLQsroZi2kmYwnkYLeqRgUre3E+YpD3S9QAWIFYpkBZ
|
31
|
-
b9mIToqxb0m+WiLCysrg3sfDymrfuNDdtQcVPcJ5W2+Mj6LJJN65bAvqqExVpr63
|
32
|
-
qbn/bmiocEIbQUsPSVuw+FSIiR6be/Ty3QpWQgxXnbHsfHFWPpADuOwTYPLxWqBg
|
33
|
-
4izI+lCFvIjAaa5WjKVW8PV3XIvgr4+/ESIzs1OOVW7ktQNwu7GXt/kR2KQH9FRC
|
34
|
-
VSyVlp5OZP6OoA==
|
35
|
-
-----END CERTIFICATE-----
|
36
|
-
|
37
|
-
date: 2010-12-30 00:00:00 +07:00
|
38
|
-
default_executable:
|
39
|
-
dependencies:
|
40
|
-
- !ruby/object:Gem::Dependency
|
41
|
-
name: commander
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-10-15 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &70121742826740 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
42
23
|
prerelease: false
|
43
|
-
|
24
|
+
version_requirements: *70121742826740
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: commander
|
27
|
+
requirement: &70121742826200 !ruby/object:Gem::Requirement
|
44
28
|
none: false
|
45
|
-
requirements:
|
46
|
-
- -
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
|
49
|
-
- 0
|
50
|
-
version: "0"
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
51
33
|
type: :runtime
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70121742826200
|
36
|
+
description: Create K-fold splits from data files and assist in training and testing
|
37
|
+
(useful for cross-validation in supervised machine learning)
|
38
|
+
email:
|
39
|
+
- christian@rishoj.net
|
40
|
+
executables:
|
56
41
|
- kfold
|
57
42
|
extensions: []
|
58
|
-
|
59
|
-
|
60
|
-
-
|
61
|
-
-
|
62
|
-
-
|
63
|
-
-
|
64
|
-
-
|
65
|
-
-
|
66
|
-
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- .idea/encodings.xml
|
47
|
+
- .idea/kfold.iml
|
48
|
+
- .idea/misc.xml
|
49
|
+
- .idea/modules.xml
|
50
|
+
- .idea/vcs.xml
|
51
|
+
- .rspec
|
67
52
|
- CHANGELOG
|
53
|
+
- Gemfile
|
68
54
|
- LICENSE
|
69
55
|
- Manifest
|
70
|
-
- README
|
56
|
+
- README.rdoc
|
71
57
|
- Rakefile
|
72
58
|
- bin/kfold
|
59
|
+
- kfold.gemspec
|
73
60
|
- lib/kfold.rb
|
74
61
|
- lib/kfold/data_file.rb
|
75
|
-
-
|
62
|
+
- lib/kfold/version.rb
|
76
63
|
- spec/kfold/data_file_spec.rb
|
77
64
|
- spec/kfold/sample_data_file.conll
|
78
|
-
-
|
79
|
-
has_rdoc: true
|
65
|
+
- spec/spec_helper.rb
|
80
66
|
homepage: http://github.com/crishoj/kfold
|
81
67
|
licenses: []
|
82
|
-
|
83
68
|
post_install_message:
|
84
|
-
rdoc_options:
|
85
|
-
|
86
|
-
- --inline-source
|
87
|
-
- --title
|
88
|
-
- Kfold
|
89
|
-
- --main
|
90
|
-
- README
|
91
|
-
require_paths:
|
69
|
+
rdoc_options: []
|
70
|
+
require_paths:
|
92
71
|
- lib
|
93
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
73
|
none: false
|
95
|
-
requirements:
|
96
|
-
- -
|
97
|
-
- !ruby/object:Gem::Version
|
98
|
-
|
99
|
-
|
100
|
-
version: "0"
|
101
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
79
|
none: false
|
103
|
-
requirements:
|
104
|
-
- -
|
105
|
-
- !ruby/object:Gem::Version
|
106
|
-
|
107
|
-
- 1
|
108
|
-
- 2
|
109
|
-
version: "1.2"
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
110
84
|
requirements: []
|
111
|
-
|
112
|
-
|
113
|
-
rubygems_version: 1.3.7
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 1.8.10
|
114
87
|
signing_key:
|
115
88
|
specification_version: 3
|
116
|
-
summary:
|
117
|
-
test_files:
|
118
|
-
|
89
|
+
summary: Split your data and process it in parallel
|
90
|
+
test_files:
|
91
|
+
- spec/kfold/data_file_spec.rb
|
92
|
+
- spec/kfold/sample_data_file.conll
|
93
|
+
- spec/spec_helper.rb
|
data.tar.gz.sig
DELETED
Binary file
|
data/README
DELETED
data/spec/helper.rb
DELETED
metadata.gz.sig
DELETED