kfold 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/CHANGELOG +1 -0
- data/LICENSE +0 -0
- data/Manifest +11 -0
- data/README +3 -0
- data/Rakefile +17 -0
- data/bin/kfold +111 -0
- data/kfold.gemspec +37 -0
- data/lib/kfold.rb +7 -0
- data/lib/kfold/data_file.rb +67 -0
- data/spec/helper.rb +3 -0
- data/spec/kfold/data_file_spec.rb +65 -0
- data/spec/kfold/sample_data_file.conll +108 -0
- metadata +118 -0
- metadata.gz.sig +2 -0
data.tar.gz.sig
ADDED
Binary file
|
data/CHANGELOG
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
v0.1. Commands for K-fold splitting and folding
|
data/LICENSE
ADDED
Binary file
|
data/Manifest
ADDED
data/README
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rake'
|
4
|
+
require 'echoe'
|
5
|
+
|
6
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib'))
|
7
|
+
require 'kfold'
|
8
|
+
|
9
|
+
Echoe.new('kfold', Kfold::VERSION) do |p|
|
10
|
+
p.description = "Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)"
|
11
|
+
p.url = "http://github.com/crishoj/kfold"
|
12
|
+
p.author = "Christian Rishøj"
|
13
|
+
p.email = "christian@rishoj.net"
|
14
|
+
p.ignore_pattern = ["tmp/**/*", "script/*", "nbproject/**/*"]
|
15
|
+
p.runtime_dependencies = ["commander"]
|
16
|
+
p.development_dependencies = []
|
17
|
+
end
|
data/bin/kfold
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'commander/import'
|
5
|
+
require 'kfold'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
program :version, Kfold::VERSION
|
9
|
+
program :description, 'Do stuff'
|
10
|
+
|
11
|
+
def do_directory(dir, overwrite = false)
|
12
|
+
if File.exist? dir
|
13
|
+
if overwrite
|
14
|
+
say "Removing existing #{dir}"
|
15
|
+
FileUtils.rm_rf(dir)
|
16
|
+
else
|
17
|
+
abort "Failed: Directory #{dir} exists"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
FileUtils.mkdir dir
|
21
|
+
end
|
22
|
+
|
23
|
+
command :split do |c|
|
24
|
+
c.syntax = 'kfold split -i INPUT [options]'
|
25
|
+
c.summary = 'Split a data file into K partitions'
|
26
|
+
c.description = 'Given the data file INPUT, the partitions are written to files named INPUT.parts/{01..K}'
|
27
|
+
c.example 'Split the file sample.txt into 4 parts', 'kfold split -k4 sample.txt'
|
28
|
+
c.example 'Split the double-newline-delimited file sample.conll into 10 parts', 'kfold split -d"\n\n" sample.conll'
|
29
|
+
c.option '-i', '--input FILE', String, 'Data file to split'
|
30
|
+
c.option '-k', '--parts N', Integer, 'The number of partitions desired'
|
31
|
+
c.option '-d', '--delimiter DELIM', String, 'String used to separate individual entries (newline per default)'
|
32
|
+
c.option '-g', '--granularity N', Integer, 'Ensure the number of entries in each partition is divisible by N (useful for block-structured data)'
|
33
|
+
c.option '-f', '--overwrite', 'Remove existing parts prior to executing'
|
34
|
+
c.option '--fold', 'Additionally, create K folds of K-1 parts in a another folder'
|
35
|
+
c.option '--parts-name STRING', String, 'Use the given name as suffix for the partitions folder created'
|
36
|
+
c.option '--folds-name STRING', String, 'Use the given name as suffix for the folds folder created'
|
37
|
+
c.action do |args, options|
|
38
|
+
options.default :parts => 10, :delimiter => "\n", :granularity => 1, :parts_name => 'parts'
|
39
|
+
abort "Failed: Please specify input file using -i INPUT" unless options.input
|
40
|
+
abort "Failed: Input file #{options.input} does not exist" unless File.exist? options.input
|
41
|
+
# Interpret newlines, linefeeds and tabs
|
42
|
+
options.delimiter.gsub!('\n', "\n")
|
43
|
+
options.delimiter.gsub!('\r', "\r")
|
44
|
+
options.delimiter.gsub!('\t', "\t")
|
45
|
+
parts_dir = "#{options.input}.#{options.parts_name}"
|
46
|
+
do_directory(parts_dir, options.overwrite)
|
47
|
+
df = Kfold::DataFile.new(options.input, options.delimiter, options.granularity)
|
48
|
+
say "Partitioning breakdown: #{df.breakdown options.parts}"
|
49
|
+
part, part_file, last_part_num = nil
|
50
|
+
part_names = []
|
51
|
+
part_entries = 0
|
52
|
+
df.each_entry_in_parts(options.parts) do |part_num, entry|
|
53
|
+
if part_num != last_part_num
|
54
|
+
if part
|
55
|
+
part.close
|
56
|
+
say "[part] Wrote #{part_entries} entries to #{part_file}"
|
57
|
+
end
|
58
|
+
part_name = part_num.to_s.rjust(options.parts.to_s.size, '0')
|
59
|
+
part_names << part_name
|
60
|
+
part_file = File.join(parts_dir, part_name)
|
61
|
+
part = File.open(part_file, 'w')
|
62
|
+
last_part_num = part_num
|
63
|
+
part_entries = 0
|
64
|
+
end
|
65
|
+
part << entry
|
66
|
+
part_entries += 1
|
67
|
+
end
|
68
|
+
say "[part] Wrote #{part_entries} entries to #{part_file}"
|
69
|
+
part.close
|
70
|
+
if options.fold
|
71
|
+
folds_dir = "#{options.input}.#{options.folds_name}"
|
72
|
+
do_directory(folds_dir, options.overwrite)
|
73
|
+
part_names.each do |fold_name|
|
74
|
+
fold_parts = part_names - [fold_name]
|
75
|
+
fold_file = File.join(folds_dir, fold_name)
|
76
|
+
File.open(fold_file, 'w') do |fold|
|
77
|
+
fold_parts.each do |part|
|
78
|
+
part_file = File.join(parts_dir, part)
|
79
|
+
fold << File.open(part_file).read
|
80
|
+
end
|
81
|
+
end
|
82
|
+
say "[fold] Wrote parts #{fold_parts * ' '} to #{fold_file}"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
alias_command :fold, :split, '--fold'
|
89
|
+
|
90
|
+
#command :train do |c|
|
91
|
+
# c.syntax = 'kfold train [options]'
|
92
|
+
# c.summary = ''
|
93
|
+
# c.description = ''
|
94
|
+
# c.example 'description', 'command example'
|
95
|
+
# c.option '--some-switch', 'Some switch that does something'
|
96
|
+
# c.action do |args, options|
|
97
|
+
# # Do something or c.when_called Kfold::Commands::Train
|
98
|
+
# end
|
99
|
+
#end
|
100
|
+
#
|
101
|
+
#command :test do |c|
|
102
|
+
# c.syntax = 'kfold test [options]'
|
103
|
+
# c.summary = ''
|
104
|
+
# c.description = ''
|
105
|
+
# c.example 'description', 'command example'
|
106
|
+
# c.option '--some-switch', 'Some switch that does something'
|
107
|
+
# c.action do |args, options|
|
108
|
+
# # Do something or c.when_called Kfold::Commands::Test
|
109
|
+
# end
|
110
|
+
#end
|
111
|
+
|
data/kfold.gemspec
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{kfold}
|
5
|
+
s.version = "0.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Christian Rishøj"]
|
9
|
+
s.cert_chain = ["/Users/crjensen/Documents/Certificates/gem-public_cert.pem"]
|
10
|
+
s.date = %q{2010-12-30}
|
11
|
+
s.default_executable = %q{kfold}
|
12
|
+
s.description = %q{Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)}
|
13
|
+
s.email = %q{christian@rishoj.net}
|
14
|
+
s.executables = ["kfold"]
|
15
|
+
s.extra_rdoc_files = ["CHANGELOG", "LICENSE", "README", "bin/kfold", "lib/kfold.rb", "lib/kfold/data_file.rb"]
|
16
|
+
s.files = ["CHANGELOG", "LICENSE", "Manifest", "README", "Rakefile", "bin/kfold", "lib/kfold.rb", "lib/kfold/data_file.rb", "spec/helper.rb", "spec/kfold/data_file_spec.rb", "spec/kfold/sample_data_file.conll", "kfold.gemspec"]
|
17
|
+
s.homepage = %q{http://github.com/crishoj/kfold}
|
18
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Kfold", "--main", "README"]
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
s.rubyforge_project = %q{kfold}
|
21
|
+
s.rubygems_version = %q{1.3.7}
|
22
|
+
s.signing_key = %q{/Users/crjensen/Documents/Certificates/gem-private_key.pem}
|
23
|
+
s.summary = %q{Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)}
|
24
|
+
|
25
|
+
if s.respond_to? :specification_version then
|
26
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
27
|
+
s.specification_version = 3
|
28
|
+
|
29
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
30
|
+
s.add_runtime_dependency(%q<commander>, [">= 0"])
|
31
|
+
else
|
32
|
+
s.add_dependency(%q<commander>, [">= 0"])
|
33
|
+
end
|
34
|
+
else
|
35
|
+
s.add_dependency(%q<commander>, [">= 0"])
|
36
|
+
end
|
37
|
+
end
|
data/lib/kfold.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
|
2
|
+
module Kfold
|
3
|
+
class DataFile
|
4
|
+
attr_reader :filename, :delimiter, :granularity
|
5
|
+
|
6
|
+
def initialize(filename, delimiter = "\n", granularity = 1)
|
7
|
+
@filename, @delimiter, @granularity = filename, delimiter, granularity
|
8
|
+
end
|
9
|
+
|
10
|
+
def num_entries
|
11
|
+
@num_entres ||= count_entries
|
12
|
+
end
|
13
|
+
|
14
|
+
def num_blocks
|
15
|
+
@num_blocks ||= (self.num_entries.to_f/self.granularity.to_f).ceil
|
16
|
+
end
|
17
|
+
|
18
|
+
def breakdown(parts = 10)
|
19
|
+
blocks_per_part, rest = self.num_blocks.divmod(parts)
|
20
|
+
msg = "#{num_entries} entries into #{parts} parts, #{blocks_per_part} blocks of #{self.granularity} entries per part"
|
21
|
+
if rest > 0
|
22
|
+
msg += " (plus #{rest} extra blocks in last part)"
|
23
|
+
end
|
24
|
+
msg
|
25
|
+
end
|
26
|
+
|
27
|
+
def each_entry_in_parts(parts = 10)
|
28
|
+
blocks_per_part, rest = num_blocks.divmod(parts)
|
29
|
+
cur_part = 1
|
30
|
+
cur_block = 1
|
31
|
+
cur_entry = 0
|
32
|
+
block_entries = 0
|
33
|
+
part_entries = 0
|
34
|
+
File.foreach(self.filename, self.delimiter) do |entry|
|
35
|
+
cur_entry += 1
|
36
|
+
yield cur_part, entry
|
37
|
+
block_entries += 1
|
38
|
+
part_entries += 1
|
39
|
+
if block_entries == self.granularity
|
40
|
+
# End of this block
|
41
|
+
if cur_block == blocks_per_part and not cur_part == parts
|
42
|
+
# End of this part
|
43
|
+
cur_part += 1
|
44
|
+
cur_block = 1
|
45
|
+
else
|
46
|
+
cur_block += 1
|
47
|
+
end
|
48
|
+
block_entries = 0
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
protected
|
54
|
+
|
55
|
+
def count_entries
|
56
|
+
num_entries = 0
|
57
|
+
last_empty = false
|
58
|
+
File.foreach(self.filename, self.delimiter) do |entry|
|
59
|
+
last_empty = (entry == '')
|
60
|
+
num_entries += 1
|
61
|
+
end
|
62
|
+
num_entries -= 1 if last_empty
|
63
|
+
num_entries
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
data/spec/helper.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
|
2
|
+
require 'kfold/data_file'
|
3
|
+
|
4
|
+
describe Kfold::DataFile do
|
5
|
+
before(:each) do
|
6
|
+
@sample_file = File.join(File.dirname(__FILE__), 'sample_data_file.conll')
|
7
|
+
@df = Kfold::DataFile.new(@sample_file, "\n\n")
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should contain 11 entries" do
|
11
|
+
@df.num_entries.should == 11
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should yield consecutive part numbers" do
|
15
|
+
next_part_num = nil
|
16
|
+
@df.each_entry_in_parts(@df.num_entries) do |part_num, entry|
|
17
|
+
part_num.should == next_part_num unless next_part_num.nil?
|
18
|
+
next_part_num = part_num + 1
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should yield all the entries in the file" do
|
23
|
+
yield_count = 0
|
24
|
+
part_count = 0
|
25
|
+
last_part_num = nil
|
26
|
+
@df.each_entry_in_parts(1) do |part_num, entry|
|
27
|
+
yield_count += 1
|
28
|
+
if (part_num != last_part_num)
|
29
|
+
part_count += 1
|
30
|
+
last_part_num = part_num
|
31
|
+
end
|
32
|
+
end
|
33
|
+
yield_count.should == @df.num_entries
|
34
|
+
part_count.should == 1
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should yield one entry per part if so requested" do
|
38
|
+
yield_count = 0
|
39
|
+
part_count = 0
|
40
|
+
last_part_num = nil
|
41
|
+
@df.each_entry_in_parts(@df.num_entries) do |part_num, entry|
|
42
|
+
yield_count += 1
|
43
|
+
if (part_num != last_part_num)
|
44
|
+
part_count += 1
|
45
|
+
last_part_num = part_num
|
46
|
+
end
|
47
|
+
end
|
48
|
+
yield_count.should == @df.num_entries
|
49
|
+
part_count.should == @df.num_entries
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should yield the number of parts requested" do
|
53
|
+
part_count = 0
|
54
|
+
last_part_num = nil
|
55
|
+
@df.each_entry_in_parts(3) do |part_num, entry|
|
56
|
+
if (part_num != last_part_num)
|
57
|
+
part_count += 1
|
58
|
+
last_part_num = part_num
|
59
|
+
end
|
60
|
+
end
|
61
|
+
part_count.should == 3
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
@@ -0,0 +1,108 @@
|
|
1
|
+
1 tashiro _ NAME NAMEper _ 2 HD 2 HD
|
2
|
+
2 yasuko _ NAME NAMEper _ 3 COMP 3 COMP
|
3
|
+
3 desu _ PV PVfin u 0 ROOT 0 ROOT
|
4
|
+
4 . _ . . _ 3 PUNCT 3 PUNCT
|
5
|
+
|
6
|
+
1 hayakawa _ NAME NAMEper _ 2 HD 2 HD
|
7
|
+
2 ryou _ NAME NAMEper _ 3 COMP 3 COMP
|
8
|
+
3 desu _ PV PVfin u 0 ROOT 0 ROOT
|
9
|
+
4 . _ . . _ 3 PUNCT 3 PUNCT
|
10
|
+
|
11
|
+
1 hayakawa _ NAME NAMEper _ 2 COMP 2 COMP
|
12
|
+
2 saN _ P PNsf _ 6 ADJ 6 ADJ
|
13
|
+
3 ima _ N Ntmp _ 6 ADJ 6 ADJ
|
14
|
+
4 chotto _ ADV ADV _ 6 ADJ 6 ADJ
|
15
|
+
5 ojikaN _ N NN _ 6 SBJ 6 SBJ
|
16
|
+
6 yoroshii _ ADJ ADJifin _ 7 COMP 7 COMP
|
17
|
+
7 desu _ PV PVfin u 0 ROOT 0 ROOT
|
18
|
+
8 ka _ PS PSE _ 7 MRK 7 MRK
|
19
|
+
9 chotto _ ADV ADV _ 16 ADJ 16 ADJ
|
20
|
+
10 doitsu _ NAME NAMEloc _ 11 COMP 11 COMP
|
21
|
+
11 no _ P Pgen _ 12 ADJ 12 ADJ
|
22
|
+
12 shucchou _ N VN _ 13 COMP 13 COMP
|
23
|
+
13 no _ P Pgen _ 14 COMP 14 COMP
|
24
|
+
14 koto _ N NF _ 15 COMP 15 COMP
|
25
|
+
15 de _ P P _ 16 ADJ 16 ADJ
|
26
|
+
16 gosoudaN _ N VN _ 17 COMP 17 COMP
|
27
|
+
17 shitai _ VADJ VADJi _ 18 COMP 18 COMP
|
28
|
+
18 no _ N NF _ 19 COMP 19 COMP
|
29
|
+
19 desu _ PV PVfin u 20 COMP 20 COMP
|
30
|
+
20 ga _ PS PSSb _ 0 ROOT 0 ROOT
|
31
|
+
21 . _ . . _ 20 PUNCT 20 PUNCT
|
32
|
+
|
33
|
+
1 hai _ ITJ ITJ _ 0 ROOT 0 ROOT
|
34
|
+
2 . _ . . _ 1 PUNCT 1 PUNCT
|
35
|
+
|
36
|
+
1 daijoubu _ ADJ ADJ_n _ 2 COMP 2 COMP
|
37
|
+
2 desu _ PV PVfin u 3 COMP 3 COMP
|
38
|
+
3 ga _ PS PSSb _ 0 ROOT 0 ROOT
|
39
|
+
4 . _ . . _ 3 PUNCT 3 PUNCT
|
40
|
+
|
41
|
+
1 de _ CNJ CNJ _ 21 ADJ 21 ADJ
|
42
|
+
2 dekimashitara _ V Vcnd _ 9 ADJ 9 ADJ
|
43
|
+
3 koNkai _ N Ntmp _ 4 COMP 4 COMP
|
44
|
+
4 no _ P Pgen _ 5 ADJ 5 ADJ
|
45
|
+
5 shucchou _ N VN _ 7 SBJ 7 SBJ
|
46
|
+
6 juuichigatsu _ CD CDdate _ 7 COMP 7 COMP
|
47
|
+
7 ni _ P P _ 8 COMP 8 COMP
|
48
|
+
8 to _ P PQ _ 9 COMP 9 COMP
|
49
|
+
9 omotte _ V Vte _ 10 COMP 10 COMP
|
50
|
+
10 iru _ VAUX VAUXfin u 11 COMP 11 COMP
|
51
|
+
11 no _ N NF _ 12 COMP 12 COMP
|
52
|
+
12 desu _ PV PVfin u 13 COMP 13 COMP
|
53
|
+
13 ga _ PS PSSb _ 21 ADJ 21 ADJ
|
54
|
+
14 juuichigatsu _ CD CDdate _ 15 COMP 15 COMP
|
55
|
+
15 desu _ PV PVfin u 16 COMP 16 COMP
|
56
|
+
16 to _ PS PSSa _ 21 ADJ 21 ADJ
|
57
|
+
17 goyotei _ N NN _ 18 COMP 18 COMP
|
58
|
+
18 no _ P Pgen _ 19 COMP 19 COMP
|
59
|
+
19 hou _ N NF _ 21 SBJ 21 SBJ
|
60
|
+
20 ikaga _ ADV ADVwh _ 21 COMP 21 COMP
|
61
|
+
21 deshou _ PV PVfin u 0 ROOT 0 ROOT
|
62
|
+
22 ka _ PS PSE _ 21 MRK 21 MRK
|
63
|
+
23 . _ . . _ 22 PUNCT 22 PUNCT
|
64
|
+
|
65
|
+
1 juuichigatsu _ CD CDdate _ 2 COMP 2 COMP
|
66
|
+
2 desu _ PV PVfin u 0 ROOT 0 ROOT
|
67
|
+
3 ka _ PS PSE _ 2 MRK 2 MRK
|
68
|
+
4 . _ . . _ 3 PUNCT 3 PUNCT
|
69
|
+
|
70
|
+
1 juuichigatsu _ CD CDdate _ 7 ADJ 7 ADJ
|
71
|
+
2 wa _ P Pfoc _ 1 MRK 1 MRK
|
72
|
+
3 zeNhaN _ N Ntmp _ 7 ADJ 7 ADJ
|
73
|
+
4 nara _ P Pfoc _ 3 MRK 3 MRK
|
74
|
+
5 jikaN _ N NN _ 6 COMP 6 COMP
|
75
|
+
6 ga _ P Pnom _ 7 SBJ 7 SBJ
|
76
|
+
7 toreru _ V Vfin u 9 COMP 9 COMP
|
77
|
+
8 ka _ PS PSE _ 7 MRK 7 MRK
|
78
|
+
9 to _ P PQ _ 10 COMP 10 COMP
|
79
|
+
10 omoimasu _ V Vfin u 11 COMP 11 COMP
|
80
|
+
11 ga _ PS PSSb _ 0 ROOT 0 ROOT
|
81
|
+
12 . _ . . _ 11 PUNCT 11 PUNCT
|
82
|
+
|
83
|
+
1 tsuki _ N NN _ 2 COMP 2 COMP
|
84
|
+
2 no _ P Pgen _ 3 ADJ 3 ADJ
|
85
|
+
3 zeNhaN _ N Ntmp _ 5 ADJ 5 ADJ
|
86
|
+
4 nara _ P Pfoc _ 3 MRK 3 MRK
|
87
|
+
5 yoroshii _ ADJ ADJifin _ 6 COMP 6 COMP
|
88
|
+
6 desu _ PV PVfin u 0 ROOT 0 ROOT
|
89
|
+
7 ka _ PS PSE _ 6 MRK 6 MRK
|
90
|
+
8 . _ . . _ 7 PUNCT 7 PUNCT
|
91
|
+
|
92
|
+
1 hai _ ITJ ITJ _ 0 ROOT 0 ROOT
|
93
|
+
2 . _ . . _ 1 PUNCT 1 PUNCT
|
94
|
+
|
95
|
+
1 sou _ ADV ADVdem _ 2 COMP 2 COMP
|
96
|
+
2 shimasu _ V Vfin u 3 COMP 3 COMP
|
97
|
+
3 to _ PS PSSa _ 11 ADJ 11 ADJ
|
98
|
+
4 kokonoka _ CD CDdate _ 5 COMP 5 COMP
|
99
|
+
5 no _ P Pgen _ 7 ADJ 7 ADJ
|
100
|
+
6 getsuyou _ NT NT _ 7 COMP 7 COMP
|
101
|
+
7 hatsu _ N Nsf _ 11 SBJ 11 SBJ
|
102
|
+
8 toka _ P Pcnj _ 7 MRK 7 MRK
|
103
|
+
9 wa _ P Pfoc _ 7 MRK 7 MRK
|
104
|
+
10 ikaga _ ADV ADVwh _ 11 COMP 11 COMP
|
105
|
+
11 deshou _ PV PVfin u 0 ROOT 0 ROOT
|
106
|
+
12 ka _ PS PSE _ 11 MRK 11 MRK
|
107
|
+
13 . _ . . _ 12 PUNCT 12 PUNCT
|
108
|
+
|
metadata
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: kfold
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
version: "0.1"
|
9
|
+
platform: ruby
|
10
|
+
authors:
|
11
|
+
- "Christian Rish\xC3\xB8j"
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain:
|
15
|
+
- |
|
16
|
+
-----BEGIN CERTIFICATE-----
|
17
|
+
MIIDNjCCAh6gAwIBAgIBADANBgkqhkiG9w0BAQUFADBBMRIwEAYDVQQDDAljaHJp
|
18
|
+
c3RpYW4xFjAUBgoJkiaJk/IsZAEZFgZyaXNob2oxEzARBgoJkiaJk/IsZAEZFgNu
|
19
|
+
ZXQwHhcNMTAxMjMwMTAwMjUzWhcNMTExMjMwMTAwMjUzWjBBMRIwEAYDVQQDDAlj
|
20
|
+
aHJpc3RpYW4xFjAUBgoJkiaJk/IsZAEZFgZyaXNob2oxEzARBgoJkiaJk/IsZAEZ
|
21
|
+
FgNuZXQwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCg2y4Tc0otb09T
|
22
|
+
/UucTm3DbQilOfQwN9QCzjjnPT6071YeyHsEmv4zokUCBSW5n+H/D0tSAQobT6D+
|
23
|
+
APet6eFLGRW0L9MGsi7do5w+GY4B2nr6CGaouJaoj7XvWKqgigZ6YwfI9RAdpVNT
|
24
|
+
UhF5VeUs9KZ8dMgc9YeezrB9yZMNHM9nDKDEH6bK53xH8OeUkaUBhASEYJ07zMj+
|
25
|
+
hioKQQB/BUCAxpc5ElE9dmdJuinHFqD7KmqU7KSp/0wHvvgT5sqzS32xzlovOSxK
|
26
|
+
GDHt9mtVmQiD3fBCfPJhysaRSUyaUA7jEFljgRTxuH1GWLyGE24/c6zVBknLPSC+
|
27
|
+
hFW0Ib3/AgMBAAGjOTA3MAkGA1UdEwQCMAAwHQYDVR0OBBYEFNtKnxiLUpy9a406
|
28
|
+
diOk3lm5ISTEMAsGA1UdDwQEAwIEsDANBgkqhkiG9w0BAQUFAAOCAQEAiMhO6NXU
|
29
|
+
f/uTInOl2fFxIQsakyCXoWE2u7U2NLLW5R5DCYFK/EH+OYsum1Khu+Jt/n5loV7e
|
30
|
+
B4QlCbwlhwztW7sWA/sQQoLQsroZi2kmYwnkYLeqRgUre3E+YpD3S9QAWIFYpkBZ
|
31
|
+
b9mIToqxb0m+WiLCysrg3sfDymrfuNDdtQcVPcJ5W2+Mj6LJJN65bAvqqExVpr63
|
32
|
+
qbn/bmiocEIbQUsPSVuw+FSIiR6be/Ty3QpWQgxXnbHsfHFWPpADuOwTYPLxWqBg
|
33
|
+
4izI+lCFvIjAaa5WjKVW8PV3XIvgr4+/ESIzs1OOVW7ktQNwu7GXt/kR2KQH9FRC
|
34
|
+
VSyVlp5OZP6OoA==
|
35
|
+
-----END CERTIFICATE-----
|
36
|
+
|
37
|
+
date: 2010-12-30 00:00:00 +07:00
|
38
|
+
default_executable:
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: commander
|
42
|
+
prerelease: false
|
43
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
44
|
+
none: false
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
segments:
|
49
|
+
- 0
|
50
|
+
version: "0"
|
51
|
+
type: :runtime
|
52
|
+
version_requirements: *id001
|
53
|
+
description: Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)
|
54
|
+
email: christian@rishoj.net
|
55
|
+
executables:
|
56
|
+
- kfold
|
57
|
+
extensions: []
|
58
|
+
|
59
|
+
extra_rdoc_files:
|
60
|
+
- CHANGELOG
|
61
|
+
- LICENSE
|
62
|
+
- README
|
63
|
+
- bin/kfold
|
64
|
+
- lib/kfold.rb
|
65
|
+
- lib/kfold/data_file.rb
|
66
|
+
files:
|
67
|
+
- CHANGELOG
|
68
|
+
- LICENSE
|
69
|
+
- Manifest
|
70
|
+
- README
|
71
|
+
- Rakefile
|
72
|
+
- bin/kfold
|
73
|
+
- lib/kfold.rb
|
74
|
+
- lib/kfold/data_file.rb
|
75
|
+
- spec/helper.rb
|
76
|
+
- spec/kfold/data_file_spec.rb
|
77
|
+
- spec/kfold/sample_data_file.conll
|
78
|
+
- kfold.gemspec
|
79
|
+
has_rdoc: true
|
80
|
+
homepage: http://github.com/crishoj/kfold
|
81
|
+
licenses: []
|
82
|
+
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options:
|
85
|
+
- --line-numbers
|
86
|
+
- --inline-source
|
87
|
+
- --title
|
88
|
+
- Kfold
|
89
|
+
- --main
|
90
|
+
- README
|
91
|
+
require_paths:
|
92
|
+
- lib
|
93
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
segments:
|
99
|
+
- 0
|
100
|
+
version: "0"
|
101
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
|
+
none: false
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
segments:
|
107
|
+
- 1
|
108
|
+
- 2
|
109
|
+
version: "1.2"
|
110
|
+
requirements: []
|
111
|
+
|
112
|
+
rubyforge_project: kfold
|
113
|
+
rubygems_version: 1.3.7
|
114
|
+
signing_key:
|
115
|
+
specification_version: 3
|
116
|
+
summary: Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)
|
117
|
+
test_files: []
|
118
|
+
|
metadata.gz.sig
ADDED