kfold 0.1 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/.idea/encodings.xml +5 -0
- data/.idea/kfold.iml +10 -0
- data/.idea/misc.xml +11 -0
- data/.idea/modules.xml +9 -0
- data/.idea/vcs.xml +7 -0
- data/.rspec +2 -0
- data/CHANGELOG +3 -1
- data/Gemfile +4 -0
- data/Manifest +1 -1
- data/README.rdoc +166 -0
- data/Rakefile +1 -17
- data/bin/kfold +102 -26
- data/kfold.gemspec +14 -31
- data/lib/kfold.rb +1 -3
- data/lib/kfold/version.rb +3 -0
- data/spec/kfold/data_file_spec.rb +1 -1
- data/spec/spec_helper.rb +9 -0
- metadata +66 -91
- data.tar.gz.sig +0 -0
- data/README +0 -3
- data/spec/helper.rb +0 -3
- metadata.gz.sig +0 -2
data/.gitignore
ADDED
data/.idea/encodings.xml
ADDED
data/.idea/kfold.iml
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="NewModuleRootManager">
|
4
|
+
<content url="file://$MODULE_DIR$" />
|
5
|
+
<orderEntry type="inheritedJdk" />
|
6
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
7
|
+
<orderEntry type="library" scope="PROVIDED" name="bundler (v1.0.18, ruby-1.9.2-p290) [gem]" level="application" />
|
8
|
+
</component>
|
9
|
+
</module>
|
10
|
+
|
data/.idea/misc.xml
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="DependencyValidationManager">
|
4
|
+
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
|
5
|
+
</component>
|
6
|
+
<component name="ProjectResources">
|
7
|
+
<default-html-doctype>http://www.w3.org/1999/xhtml</default-html-doctype>
|
8
|
+
</component>
|
9
|
+
<component name="ProjectRootManager" version="2" project-jdk-name="ruby-1.9.2-p290" project-jdk-type="RUBY_SDK" />
|
10
|
+
</project>
|
11
|
+
|
data/.idea/modules.xml
ADDED
data/.idea/vcs.xml
ADDED
data/.rspec
ADDED
data/CHANGELOG
CHANGED
data/Gemfile
ADDED
data/Manifest
CHANGED
data/README.rdoc
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
= kfold
|
2
|
+
|
3
|
+
kfold creates K-fold splits from data files and assists in training and testing (useful for cross-validation in supervised machine
|
4
|
+
learning)
|
5
|
+
|
6
|
+
== Command overview
|
7
|
+
|
8
|
+
help Display global or [command] help documentation.
|
9
|
+
split Split a data file into K partitions
|
10
|
+
test Apply trained models on a dataset previously split using kfold
|
11
|
+
train Train models on a dataset previously split using kfold
|
12
|
+
|
13
|
+
== Example usage
|
14
|
+
|
15
|
+
10-fold cross-validation of the standard MaltParser on a treebank named shuffled.c32.conll may be done as follows:
|
16
|
+
|
17
|
+
kfold split -f -i shuffled.c32.conll -d '\n\n'
|
18
|
+
kfold train -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -m learn
|
19
|
+
kfold test -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -o %O -m parse
|
20
|
+
eval07.pl -q -g shuffled.c32.conll -s shuffled.c32.conll.output
|
21
|
+
|
22
|
+
The MaltParser does not like to put its models in a subdirectory, so rather than using the standard model files suggested by kfold (%M), we construct custom non-nested model filenames using %B.model_%N.
|
23
|
+
|
24
|
+
== Command details
|
25
|
+
|
26
|
+
The following is simply the output of the built-in help commands.
|
27
|
+
|
28
|
+
=== Splitting data files
|
29
|
+
|
30
|
+
NAME:
|
31
|
+
|
32
|
+
split
|
33
|
+
|
34
|
+
DESCRIPTION:
|
35
|
+
|
36
|
+
Given the data file INPUT, the partitions are written to files named INPUT.parts/{01..K}
|
37
|
+
|
38
|
+
SYNOPSIS:
|
39
|
+
|
40
|
+
kfold split -i INPUT [options]
|
41
|
+
|
42
|
+
EXAMPLES:
|
43
|
+
|
44
|
+
# Split the file sample.txt into 4 parts
|
45
|
+
kfold split -k4 sample.txt
|
46
|
+
|
47
|
+
# Split the double-newline-delimited file sample.conll into 10 parts
|
48
|
+
kfold split -d"\n\n" sample.conll
|
49
|
+
|
50
|
+
OPTIONS:
|
51
|
+
|
52
|
+
-i, --input FILE
|
53
|
+
Data file to split
|
54
|
+
|
55
|
+
-k, --parts N
|
56
|
+
The number of partitions desired
|
57
|
+
|
58
|
+
-d, --delimiter DELIM
|
59
|
+
String used to separate individual entries (newline per default)
|
60
|
+
|
61
|
+
-g, --granularity N
|
62
|
+
Ensure the number of entries in each partition is divisible by N (useful for block-structured data)
|
63
|
+
|
64
|
+
-f, --overwrite
|
65
|
+
Remove existing parts prior to executing
|
66
|
+
|
67
|
+
--fold
|
68
|
+
Additionally, create K folds of K-1 parts in a another folder
|
69
|
+
|
70
|
+
--parts-name STRING
|
71
|
+
Use the given name as suffix for the partitions folder created
|
72
|
+
|
73
|
+
--folds-name STRING
|
74
|
+
Use the given name as suffix for the folds folder created
|
75
|
+
|
76
|
+
=== Training on the folds
|
77
|
+
|
78
|
+
NAME:
|
79
|
+
|
80
|
+
train
|
81
|
+
|
82
|
+
DESCRIPTION:
|
83
|
+
|
84
|
+
Given training data previously split in K parts and folds, train K models on the K folds
|
85
|
+
|
86
|
+
Certain keywords in the training command and its arguments are interpolated at runtime:
|
87
|
+
|
88
|
+
* %N - fold number, e.g. '01'
|
89
|
+
* %F - fold filename, e.g. 'brown.train/01'
|
90
|
+
* %I - alias for %F
|
91
|
+
* %M - model filename, e.g. 'brown.models/01'
|
92
|
+
* %B - basename (as specified on the command line), e.g. 'brown'
|
93
|
+
|
94
|
+
|
95
|
+
SYNOPSIS:
|
96
|
+
|
97
|
+
kfold train --base NAME [options] -- CMD [--CMD-OPTIONS] [CMD-ARGS]
|
98
|
+
|
99
|
+
EXAMPLES:
|
100
|
+
|
101
|
+
# Train MaltParser for cross-validation
|
102
|
+
kfold train -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -m learn
|
103
|
+
|
104
|
+
OPTIONS:
|
105
|
+
|
106
|
+
-f, --overwrite
|
107
|
+
Remove existing models prior to executing
|
108
|
+
|
109
|
+
--base NAME
|
110
|
+
Default prefix of training folds and model files
|
111
|
+
|
112
|
+
--folds-name SUFFIX
|
113
|
+
Look for folds {01..K} in the folder BASE.SUFFIX
|
114
|
+
|
115
|
+
--models-name SUFFIX
|
116
|
+
Yield model names as BASE.SUFFIX/{01..K} as interpolation pattern %M
|
117
|
+
|
118
|
+
=== Testing the models on their reciprocal data file parts
|
119
|
+
|
120
|
+
NAME:
|
121
|
+
|
122
|
+
test
|
123
|
+
|
124
|
+
DESCRIPTION:
|
125
|
+
|
126
|
+
Process K parts of a split datafile using K previously trained models.
|
127
|
+
|
128
|
+
Certain keywords in the testing command and its arguments are interpolated at runtime:
|
129
|
+
|
130
|
+
* %N - part number, e.g. '01'
|
131
|
+
* %T - part filename, e.g. 'brown.test/01'
|
132
|
+
* %I - alias for %T
|
133
|
+
* %O - output filename, e.g. 'brown.outputs/01'
|
134
|
+
* %M - model filename, e.g. 'brown.models/01'
|
135
|
+
* %B - basename (as specified on the command line), e.g. 'brown'
|
136
|
+
|
137
|
+
|
138
|
+
SYNOPSIS:
|
139
|
+
|
140
|
+
kfold test --base NAME [options] -- CMD [--CMD-OPTIONS] [CMD-ARGS]
|
141
|
+
|
142
|
+
EXAMPLES:
|
143
|
+
|
144
|
+
# Apply trained MaltParser models for cross-validation
|
145
|
+
kfold test -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -o %O -m parse
|
146
|
+
|
147
|
+
OPTIONS:
|
148
|
+
|
149
|
+
-f, --overwrite
|
150
|
+
Remove existing test output prior to executing
|
151
|
+
|
152
|
+
--base NAME
|
153
|
+
Default prefix of model files and test outputs
|
154
|
+
|
155
|
+
--parts-name SUFFIX
|
156
|
+
Look for parts {01..K} to be processed in the folder BASE.SUFFIX
|
157
|
+
|
158
|
+
--models-name SUFFIX
|
159
|
+
Yield model names as BASE.SUFFIX/{01..K} as interpolation pattern %M
|
160
|
+
|
161
|
+
--outputs-name SUFFIX
|
162
|
+
Yield output filenames as BASE.SUFFIX/{01..K} as interpolation pattern %O
|
163
|
+
|
164
|
+
--output-name SUFFIX
|
165
|
+
Put the concatenated output of all models in BASE.SUFFIX
|
166
|
+
|
data/Rakefile
CHANGED
@@ -1,17 +1 @@
|
|
1
|
-
|
2
|
-
require 'rubygems'
|
3
|
-
require 'rake'
|
4
|
-
require 'echoe'
|
5
|
-
|
6
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib'))
|
7
|
-
require 'kfold'
|
8
|
-
|
9
|
-
Echoe.new('kfold', Kfold::VERSION) do |p|
|
10
|
-
p.description = "Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)"
|
11
|
-
p.url = "http://github.com/crishoj/kfold"
|
12
|
-
p.author = "Christian Rishøj"
|
13
|
-
p.email = "christian@rishoj.net"
|
14
|
-
p.ignore_pattern = ["tmp/**/*", "script/*", "nbproject/**/*"]
|
15
|
-
p.runtime_dependencies = ["commander"]
|
16
|
-
p.development_dependencies = []
|
17
|
-
end
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/kfold
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
2
|
require 'commander/import'
|
5
3
|
require 'kfold'
|
6
4
|
require 'fileutils'
|
7
5
|
|
8
6
|
program :version, Kfold::VERSION
|
9
|
-
program :description,
|
10
|
-
|
7
|
+
program :description, "Create K-fold splits from data files and assist in training and testing (useful for cross-validation in supervised machine learning)"
|
8
|
+
|
11
9
|
def do_directory(dir, overwrite = false)
|
12
10
|
if File.exist? dir
|
13
11
|
if overwrite
|
@@ -35,7 +33,7 @@ command :split do |c|
|
|
35
33
|
c.option '--parts-name STRING', String, 'Use the given name as suffix for the partitions folder created'
|
36
34
|
c.option '--folds-name STRING', String, 'Use the given name as suffix for the folds folder created'
|
37
35
|
c.action do |args, options|
|
38
|
-
options.default :parts => 10, :delimiter => "\n", :granularity => 1, :parts_name => '
|
36
|
+
options.default :parts => 10, :delimiter => "\n", :granularity => 1, :parts_name => 'test', :folds_name => 'train'
|
39
37
|
abort "Failed: Please specify input file using -i INPUT" unless options.input
|
40
38
|
abort "Failed: Input file #{options.input} does not exist" unless File.exist? options.input
|
41
39
|
# Interpret newlines, linefeeds and tabs
|
@@ -87,25 +85,103 @@ end
|
|
87
85
|
|
88
86
|
alias_command :fold, :split, '--fold'
|
89
87
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
88
|
+
command :train do |c|
|
89
|
+
c.syntax = 'kfold train --base NAME [options] -- CMD [--CMD-OPTIONS] [CMD-ARGS]'
|
90
|
+
c.summary = 'Train models on a dataset previously split using kfold'
|
91
|
+
c.description = <<-end
|
92
|
+
Given training data previously split in K parts and folds, train K models on the K folds
|
93
|
+
|
94
|
+
Certain keywords in the training command and its arguments are interpolated at runtime:
|
95
|
+
|
96
|
+
* %N - fold number, e.g. '01'
|
97
|
+
* %F - fold filename, e.g. 'brown.train/01'
|
98
|
+
* %I - alias for %F
|
99
|
+
* %M - model filename, e.g. 'brown.models/01'
|
100
|
+
* %B - basename (as specified on the command line), e.g. 'brown'
|
101
|
+
end
|
102
|
+
c.example 'Train MaltParser for cross-validation', 'kfold train -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -m learn'
|
103
|
+
c.option '-f', '--overwrite', 'Remove existing models prior to executing'
|
104
|
+
c.option '--base NAME', String, 'Default prefix of training folds and model files'
|
105
|
+
c.option '--folds-name SUFFIX', String, 'Look for folds {01..K} in the folder BASE.SUFFIX'
|
106
|
+
c.option '--models-name SUFFIX', String, 'Yield model names as BASE.SUFFIX/{01..K} as interpolation pattern %M'
|
107
|
+
c.action do |args, options|
|
108
|
+
options.default :folds_name => 'train', :models_name => 'models'
|
109
|
+
raise "Must specify base name using --base" unless options.base
|
110
|
+
folds_dir = "#{options.base}.#{options.folds_name}"
|
111
|
+
models_dir = "#{options.base}.#{options.models_name}"
|
112
|
+
raise "Folds directory not found" unless File.exists?(folds_dir)
|
113
|
+
folds = Dir.glob(File.join(folds_dir, '*')).collect { |file|
|
114
|
+
file if File.basename(file) =~ /^[0-9]+$/
|
115
|
+
}.compact
|
116
|
+
raise "No folds found in #{folds_dir}" if folds.count == 0
|
117
|
+
say "Found #{folds.count} folds in #{folds_dir}"
|
118
|
+
cmds = folds.collect do |fold|
|
119
|
+
num = File.basename(fold)
|
120
|
+
model = File.join(models_dir, num)
|
121
|
+
args.collect { |arg|
|
122
|
+
arg.gsub(/%F/, fold).gsub(/%T/, fold).gsub(/%N/, num).gsub('%M', model).gsub('%B', options.base)
|
123
|
+
}.join(' ')
|
124
|
+
end
|
125
|
+
do_directory(models_dir, options.overwrite) if cmds.first.match(models_dir)
|
126
|
+
cmds.each do |cmd|
|
127
|
+
say "[exec] #{cmd}"
|
128
|
+
system cmd
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
command :test do |c|
|
134
|
+
c.syntax = 'kfold test --base NAME [options] -- CMD [--CMD-OPTIONS] [CMD-ARGS]'
|
135
|
+
c.summary = 'Apply trained models on a dataset previously split using kfold'
|
136
|
+
c.option '-f', '--overwrite', 'Remove existing test output prior to executing'
|
137
|
+
c.option '--base NAME', String, 'Default prefix of model files and test outputs'
|
138
|
+
c.option '--parts-name SUFFIX', String, 'Look for parts {01..K} to be processed in the folder BASE.SUFFIX'
|
139
|
+
c.option '--models-name SUFFIX', String, 'Yield model names as BASE.SUFFIX/{01..K} as interpolation pattern %M'
|
140
|
+
c.option '--outputs-name SUFFIX', String, 'Yield output filenames as BASE.SUFFIX/{01..K} as interpolation pattern %O'
|
141
|
+
c.option '--output-name SUFFIX', String, 'Put the concatenated output of all models in BASE.SUFFIX'
|
142
|
+
c.description = <<-end
|
143
|
+
Process K parts of a split datafile using K previously trained models.
|
144
|
+
|
145
|
+
Certain keywords in the testing command and its arguments are interpolated at runtime:
|
111
146
|
|
147
|
+
* %N - part number, e.g. '01'
|
148
|
+
* %T - part filename, e.g. 'brown.test/01'
|
149
|
+
* %I - alias for %T
|
150
|
+
* %O - output filename, e.g. 'brown.outputs/01'
|
151
|
+
* %M - model filename, e.g. 'brown.models/01'
|
152
|
+
* %B - basename (as specified on the command line), e.g. 'brown'
|
153
|
+
end
|
154
|
+
c.example 'Apply trained MaltParser models for cross-validation', 'kfold test -f --base shuffled.c32.conll -- java -jar ~/Tools/malt-1.4.1/malt.jar -c %B.model_%N -i %T -o %O -m parse'
|
155
|
+
c.action do |args, options|
|
156
|
+
options.default :parts_name => 'test', :models_name => 'models', :outputs_name => 'outputs', :output_name => 'output'
|
157
|
+
raise "Must specify base name using --base" unless options.base
|
158
|
+
parts_dir = "#{options.base}.#{options.parts_name}"
|
159
|
+
models_dir = "#{options.base}.#{options.models_name}"
|
160
|
+
output_dir = "#{options.base}.#{options.outputs_name}"
|
161
|
+
output_file = "#{options.base}.#{options.output_name}"
|
162
|
+
raise "Parts directory not found" unless File.exists?(parts_dir)
|
163
|
+
raise "Model directory not found" unless File.exists?(models_dir)
|
164
|
+
parts = Dir.glob(File.join(parts_dir, '*')).collect { |file|
|
165
|
+
file if File.basename(file) =~ /^[0-9]+$/
|
166
|
+
}.compact
|
167
|
+
raise "No parts found in #{parts_dir}" if parts.count == 0
|
168
|
+
say "Found #{parts.count} parts in #{parts_dir}"
|
169
|
+
outputs = []
|
170
|
+
cmds = parts.collect do |part|
|
171
|
+
num = File.basename(part)
|
172
|
+
model = File.join(models_dir, num)
|
173
|
+
output = File.join(output_dir, num)
|
174
|
+
outputs << output
|
175
|
+
args.collect { |arg|
|
176
|
+
arg.gsub(/%O/, output).gsub(/%I/, part).gsub(/%T/, part).gsub(/%N/, num).gsub('%M', model).gsub('%B', options.base)
|
177
|
+
}.join(' ')
|
178
|
+
end
|
179
|
+
do_directory(output_dir, options.overwrite)
|
180
|
+
cmds.each do |cmd|
|
181
|
+
say "[exec] #{cmd}"
|
182
|
+
system cmd
|
183
|
+
end
|
184
|
+
say "[join] #{outputs.join(' ')} => #{output_file}"
|
185
|
+
system "cat #{outputs.join(' ')} > #{output_file}"
|
186
|
+
end
|
187
|
+
end
|
data/kfold.gemspec
CHANGED
@@ -1,37 +1,20 @@
|
|
1
|
-
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require "kfold/version"
|
2
3
|
|
3
4
|
Gem::Specification.new do |s|
|
4
|
-
s.name
|
5
|
-
s.version
|
5
|
+
s.name = "kfold"
|
6
|
+
s.version = Kfold::VERSION
|
7
|
+
s.authors = ["Christian Rishoj"]
|
8
|
+
s.email = ["christian@rishoj.net"]
|
9
|
+
s.homepage = "http://github.com/crishoj/kfold"
|
10
|
+
s.summary = %q{Split your data and process it in parallel}
|
11
|
+
s.description = %q{Create K-fold splits from data files and assist in training and testing (useful for cross-validation in supervised machine learning)}
|
6
12
|
|
7
|
-
s.
|
8
|
-
s.
|
9
|
-
s.
|
10
|
-
s.date = %q{2010-12-30}
|
11
|
-
s.default_executable = %q{kfold}
|
12
|
-
s.description = %q{Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)}
|
13
|
-
s.email = %q{christian@rishoj.net}
|
14
|
-
s.executables = ["kfold"]
|
15
|
-
s.extra_rdoc_files = ["CHANGELOG", "LICENSE", "README", "bin/kfold", "lib/kfold.rb", "lib/kfold/data_file.rb"]
|
16
|
-
s.files = ["CHANGELOG", "LICENSE", "Manifest", "README", "Rakefile", "bin/kfold", "lib/kfold.rb", "lib/kfold/data_file.rb", "spec/helper.rb", "spec/kfold/data_file_spec.rb", "spec/kfold/sample_data_file.conll", "kfold.gemspec"]
|
17
|
-
s.homepage = %q{http://github.com/crishoj/kfold}
|
18
|
-
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Kfold", "--main", "README"]
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
16
|
s.require_paths = ["lib"]
|
20
|
-
s.rubyforge_project = %q{kfold}
|
21
|
-
s.rubygems_version = %q{1.3.7}
|
22
|
-
s.signing_key = %q{/Users/crjensen/Documents/Certificates/gem-private_key.pem}
|
23
|
-
s.summary = %q{Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)}
|
24
17
|
|
25
|
-
|
26
|
-
|
27
|
-
s.specification_version = 3
|
28
|
-
|
29
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
30
|
-
s.add_runtime_dependency(%q<commander>, [">= 0"])
|
31
|
-
else
|
32
|
-
s.add_dependency(%q<commander>, [">= 0"])
|
33
|
-
end
|
34
|
-
else
|
35
|
-
s.add_dependency(%q<commander>, [">= 0"])
|
36
|
-
end
|
18
|
+
s.add_development_dependency "rspec"
|
19
|
+
s.add_runtime_dependency "commander"
|
37
20
|
end
|
data/lib/kfold.rb
CHANGED
data/spec/spec_helper.rb
ADDED
metadata
CHANGED
@@ -1,118 +1,93 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: kfold
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 1
|
8
|
-
version: "0.1"
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.3'
|
5
|
+
prerelease:
|
9
6
|
platform: ruby
|
10
|
-
authors:
|
11
|
-
-
|
7
|
+
authors:
|
8
|
+
- Christian Rishoj
|
12
9
|
autorequire:
|
13
10
|
bindir: bin
|
14
|
-
cert_chain:
|
15
|
-
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
GDHt9mtVmQiD3fBCfPJhysaRSUyaUA7jEFljgRTxuH1GWLyGE24/c6zVBknLPSC+
|
27
|
-
hFW0Ib3/AgMBAAGjOTA3MAkGA1UdEwQCMAAwHQYDVR0OBBYEFNtKnxiLUpy9a406
|
28
|
-
diOk3lm5ISTEMAsGA1UdDwQEAwIEsDANBgkqhkiG9w0BAQUFAAOCAQEAiMhO6NXU
|
29
|
-
f/uTInOl2fFxIQsakyCXoWE2u7U2NLLW5R5DCYFK/EH+OYsum1Khu+Jt/n5loV7e
|
30
|
-
B4QlCbwlhwztW7sWA/sQQoLQsroZi2kmYwnkYLeqRgUre3E+YpD3S9QAWIFYpkBZ
|
31
|
-
b9mIToqxb0m+WiLCysrg3sfDymrfuNDdtQcVPcJ5W2+Mj6LJJN65bAvqqExVpr63
|
32
|
-
qbn/bmiocEIbQUsPSVuw+FSIiR6be/Ty3QpWQgxXnbHsfHFWPpADuOwTYPLxWqBg
|
33
|
-
4izI+lCFvIjAaa5WjKVW8PV3XIvgr4+/ESIzs1OOVW7ktQNwu7GXt/kR2KQH9FRC
|
34
|
-
VSyVlp5OZP6OoA==
|
35
|
-
-----END CERTIFICATE-----
|
36
|
-
|
37
|
-
date: 2010-12-30 00:00:00 +07:00
|
38
|
-
default_executable:
|
39
|
-
dependencies:
|
40
|
-
- !ruby/object:Gem::Dependency
|
41
|
-
name: commander
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-10-15 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &70121742826740 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
42
23
|
prerelease: false
|
43
|
-
|
24
|
+
version_requirements: *70121742826740
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: commander
|
27
|
+
requirement: &70121742826200 !ruby/object:Gem::Requirement
|
44
28
|
none: false
|
45
|
-
requirements:
|
46
|
-
- -
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
|
49
|
-
- 0
|
50
|
-
version: "0"
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
51
33
|
type: :runtime
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70121742826200
|
36
|
+
description: Create K-fold splits from data files and assist in training and testing
|
37
|
+
(useful for cross-validation in supervised machine learning)
|
38
|
+
email:
|
39
|
+
- christian@rishoj.net
|
40
|
+
executables:
|
56
41
|
- kfold
|
57
42
|
extensions: []
|
58
|
-
|
59
|
-
|
60
|
-
-
|
61
|
-
-
|
62
|
-
-
|
63
|
-
-
|
64
|
-
-
|
65
|
-
-
|
66
|
-
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- .idea/encodings.xml
|
47
|
+
- .idea/kfold.iml
|
48
|
+
- .idea/misc.xml
|
49
|
+
- .idea/modules.xml
|
50
|
+
- .idea/vcs.xml
|
51
|
+
- .rspec
|
67
52
|
- CHANGELOG
|
53
|
+
- Gemfile
|
68
54
|
- LICENSE
|
69
55
|
- Manifest
|
70
|
-
- README
|
56
|
+
- README.rdoc
|
71
57
|
- Rakefile
|
72
58
|
- bin/kfold
|
59
|
+
- kfold.gemspec
|
73
60
|
- lib/kfold.rb
|
74
61
|
- lib/kfold/data_file.rb
|
75
|
-
-
|
62
|
+
- lib/kfold/version.rb
|
76
63
|
- spec/kfold/data_file_spec.rb
|
77
64
|
- spec/kfold/sample_data_file.conll
|
78
|
-
-
|
79
|
-
has_rdoc: true
|
65
|
+
- spec/spec_helper.rb
|
80
66
|
homepage: http://github.com/crishoj/kfold
|
81
67
|
licenses: []
|
82
|
-
|
83
68
|
post_install_message:
|
84
|
-
rdoc_options:
|
85
|
-
|
86
|
-
- --inline-source
|
87
|
-
- --title
|
88
|
-
- Kfold
|
89
|
-
- --main
|
90
|
-
- README
|
91
|
-
require_paths:
|
69
|
+
rdoc_options: []
|
70
|
+
require_paths:
|
92
71
|
- lib
|
93
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
73
|
none: false
|
95
|
-
requirements:
|
96
|
-
- -
|
97
|
-
- !ruby/object:Gem::Version
|
98
|
-
|
99
|
-
|
100
|
-
version: "0"
|
101
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
79
|
none: false
|
103
|
-
requirements:
|
104
|
-
- -
|
105
|
-
- !ruby/object:Gem::Version
|
106
|
-
|
107
|
-
- 1
|
108
|
-
- 2
|
109
|
-
version: "1.2"
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
110
84
|
requirements: []
|
111
|
-
|
112
|
-
|
113
|
-
rubygems_version: 1.3.7
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 1.8.10
|
114
87
|
signing_key:
|
115
88
|
specification_version: 3
|
116
|
-
summary:
|
117
|
-
test_files:
|
118
|
-
|
89
|
+
summary: Split your data and process it in parallel
|
90
|
+
test_files:
|
91
|
+
- spec/kfold/data_file_spec.rb
|
92
|
+
- spec/kfold/sample_data_file.conll
|
93
|
+
- spec/spec_helper.rb
|
data.tar.gz.sig
DELETED
Binary file
|
data/README
DELETED
data/spec/helper.rb
DELETED
metadata.gz.sig
DELETED