kfold 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/CHANGELOG +1 -0
- data/LICENSE +0 -0
- data/Manifest +11 -0
- data/README +3 -0
- data/Rakefile +17 -0
- data/bin/kfold +111 -0
- data/kfold.gemspec +37 -0
- data/lib/kfold.rb +7 -0
- data/lib/kfold/data_file.rb +67 -0
- data/spec/helper.rb +3 -0
- data/spec/kfold/data_file_spec.rb +65 -0
- data/spec/kfold/sample_data_file.conll +108 -0
- metadata +118 -0
- metadata.gz.sig +2 -0
data.tar.gz.sig
ADDED
Binary file
|
data/CHANGELOG
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
v0.1. Commands for K-fold splitting and folding
|
data/LICENSE
ADDED
Binary file
|
data/Manifest
ADDED
data/README
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rake'
|
4
|
+
require 'echoe'
|
5
|
+
|
6
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib'))
|
7
|
+
require 'kfold'
|
8
|
+
|
9
|
+
Echoe.new('kfold', Kfold::VERSION) do |p|
|
10
|
+
p.description = "Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)"
|
11
|
+
p.url = "http://github.com/crishoj/kfold"
|
12
|
+
p.author = "Christian Rishøj"
|
13
|
+
p.email = "christian@rishoj.net"
|
14
|
+
p.ignore_pattern = ["tmp/**/*", "script/*", "nbproject/**/*"]
|
15
|
+
p.runtime_dependencies = ["commander"]
|
16
|
+
p.development_dependencies = []
|
17
|
+
end
|
data/bin/kfold
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'commander/import'
|
5
|
+
require 'kfold'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
program :version, Kfold::VERSION
|
9
|
+
program :description, 'Do stuff'
|
10
|
+
|
11
|
+
def do_directory(dir, overwrite = false)
|
12
|
+
if File.exist? dir
|
13
|
+
if overwrite
|
14
|
+
say "Removing existing #{dir}"
|
15
|
+
FileUtils.rm_rf(dir)
|
16
|
+
else
|
17
|
+
abort "Failed: Directory #{dir} exists"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
FileUtils.mkdir dir
|
21
|
+
end
|
22
|
+
|
23
|
+
command :split do |c|
|
24
|
+
c.syntax = 'kfold split -i INPUT [options]'
|
25
|
+
c.summary = 'Split a data file into K partitions'
|
26
|
+
c.description = 'Given the data file INPUT, the partitions are written to files named INPUT.parts/{01..K}'
|
27
|
+
c.example 'Split the file sample.txt into 4 parts', 'kfold split -k4 sample.txt'
|
28
|
+
c.example 'Split the double-newline-delimited file sample.conll into 10 parts', 'kfold split -d"\n\n" sample.conll'
|
29
|
+
c.option '-i', '--input FILE', String, 'Data file to split'
|
30
|
+
c.option '-k', '--parts N', Integer, 'The number of partitions desired'
|
31
|
+
c.option '-d', '--delimiter DELIM', String, 'String used to separate individual entries (newline per default)'
|
32
|
+
c.option '-g', '--granularity N', Integer, 'Ensure the number of entries in each partition is divisible by N (useful for block-structured data)'
|
33
|
+
c.option '-f', '--overwrite', 'Remove existing parts prior to executing'
|
34
|
+
c.option '--fold', 'Additionally, create K folds of K-1 parts in a another folder'
|
35
|
+
c.option '--parts-name STRING', String, 'Use the given name as suffix for the partitions folder created'
|
36
|
+
c.option '--folds-name STRING', String, 'Use the given name as suffix for the folds folder created'
|
37
|
+
c.action do |args, options|
|
38
|
+
options.default :parts => 10, :delimiter => "\n", :granularity => 1, :parts_name => 'parts'
|
39
|
+
abort "Failed: Please specify input file using -i INPUT" unless options.input
|
40
|
+
abort "Failed: Input file #{options.input} does not exist" unless File.exist? options.input
|
41
|
+
# Interpret newlines, linefeeds and tabs
|
42
|
+
options.delimiter.gsub!('\n', "\n")
|
43
|
+
options.delimiter.gsub!('\r', "\r")
|
44
|
+
options.delimiter.gsub!('\t', "\t")
|
45
|
+
parts_dir = "#{options.input}.#{options.parts_name}"
|
46
|
+
do_directory(parts_dir, options.overwrite)
|
47
|
+
df = Kfold::DataFile.new(options.input, options.delimiter, options.granularity)
|
48
|
+
say "Partitioning breakdown: #{df.breakdown options.parts}"
|
49
|
+
part, part_file, last_part_num = nil
|
50
|
+
part_names = []
|
51
|
+
part_entries = 0
|
52
|
+
df.each_entry_in_parts(options.parts) do |part_num, entry|
|
53
|
+
if part_num != last_part_num
|
54
|
+
if part
|
55
|
+
part.close
|
56
|
+
say "[part] Wrote #{part_entries} entries to #{part_file}"
|
57
|
+
end
|
58
|
+
part_name = part_num.to_s.rjust(options.parts.to_s.size, '0')
|
59
|
+
part_names << part_name
|
60
|
+
part_file = File.join(parts_dir, part_name)
|
61
|
+
part = File.open(part_file, 'w')
|
62
|
+
last_part_num = part_num
|
63
|
+
part_entries = 0
|
64
|
+
end
|
65
|
+
part << entry
|
66
|
+
part_entries += 1
|
67
|
+
end
|
68
|
+
say "[part] Wrote #{part_entries} entries to #{part_file}"
|
69
|
+
part.close
|
70
|
+
if options.fold
|
71
|
+
folds_dir = "#{options.input}.#{options.folds_name}"
|
72
|
+
do_directory(folds_dir, options.overwrite)
|
73
|
+
part_names.each do |fold_name|
|
74
|
+
fold_parts = part_names - [fold_name]
|
75
|
+
fold_file = File.join(folds_dir, fold_name)
|
76
|
+
File.open(fold_file, 'w') do |fold|
|
77
|
+
fold_parts.each do |part|
|
78
|
+
part_file = File.join(parts_dir, part)
|
79
|
+
fold << File.open(part_file).read
|
80
|
+
end
|
81
|
+
end
|
82
|
+
say "[fold] Wrote parts #{fold_parts * ' '} to #{fold_file}"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
alias_command :fold, :split, '--fold'
|
89
|
+
|
90
|
+
#command :train do |c|
|
91
|
+
# c.syntax = 'kfold train [options]'
|
92
|
+
# c.summary = ''
|
93
|
+
# c.description = ''
|
94
|
+
# c.example 'description', 'command example'
|
95
|
+
# c.option '--some-switch', 'Some switch that does something'
|
96
|
+
# c.action do |args, options|
|
97
|
+
# # Do something or c.when_called Kfold::Commands::Train
|
98
|
+
# end
|
99
|
+
#end
|
100
|
+
#
|
101
|
+
#command :test do |c|
|
102
|
+
# c.syntax = 'kfold test [options]'
|
103
|
+
# c.summary = ''
|
104
|
+
# c.description = ''
|
105
|
+
# c.example 'description', 'command example'
|
106
|
+
# c.option '--some-switch', 'Some switch that does something'
|
107
|
+
# c.action do |args, options|
|
108
|
+
# # Do something or c.when_called Kfold::Commands::Test
|
109
|
+
# end
|
110
|
+
#end
|
111
|
+
|
data/kfold.gemspec
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{kfold}
|
5
|
+
s.version = "0.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Christian Rishøj"]
|
9
|
+
s.cert_chain = ["/Users/crjensen/Documents/Certificates/gem-public_cert.pem"]
|
10
|
+
s.date = %q{2010-12-30}
|
11
|
+
s.default_executable = %q{kfold}
|
12
|
+
s.description = %q{Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)}
|
13
|
+
s.email = %q{christian@rishoj.net}
|
14
|
+
s.executables = ["kfold"]
|
15
|
+
s.extra_rdoc_files = ["CHANGELOG", "LICENSE", "README", "bin/kfold", "lib/kfold.rb", "lib/kfold/data_file.rb"]
|
16
|
+
s.files = ["CHANGELOG", "LICENSE", "Manifest", "README", "Rakefile", "bin/kfold", "lib/kfold.rb", "lib/kfold/data_file.rb", "spec/helper.rb", "spec/kfold/data_file_spec.rb", "spec/kfold/sample_data_file.conll", "kfold.gemspec"]
|
17
|
+
s.homepage = %q{http://github.com/crishoj/kfold}
|
18
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Kfold", "--main", "README"]
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
s.rubyforge_project = %q{kfold}
|
21
|
+
s.rubygems_version = %q{1.3.7}
|
22
|
+
s.signing_key = %q{/Users/crjensen/Documents/Certificates/gem-private_key.pem}
|
23
|
+
s.summary = %q{Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)}
|
24
|
+
|
25
|
+
if s.respond_to? :specification_version then
|
26
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
27
|
+
s.specification_version = 3
|
28
|
+
|
29
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
30
|
+
s.add_runtime_dependency(%q<commander>, [">= 0"])
|
31
|
+
else
|
32
|
+
s.add_dependency(%q<commander>, [">= 0"])
|
33
|
+
end
|
34
|
+
else
|
35
|
+
s.add_dependency(%q<commander>, [">= 0"])
|
36
|
+
end
|
37
|
+
end
|
data/lib/kfold.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
|
2
|
+
module Kfold
|
3
|
+
class DataFile
|
4
|
+
attr_reader :filename, :delimiter, :granularity
|
5
|
+
|
6
|
+
def initialize(filename, delimiter = "\n", granularity = 1)
|
7
|
+
@filename, @delimiter, @granularity = filename, delimiter, granularity
|
8
|
+
end
|
9
|
+
|
10
|
+
def num_entries
|
11
|
+
@num_entres ||= count_entries
|
12
|
+
end
|
13
|
+
|
14
|
+
def num_blocks
|
15
|
+
@num_blocks ||= (self.num_entries.to_f/self.granularity.to_f).ceil
|
16
|
+
end
|
17
|
+
|
18
|
+
def breakdown(parts = 10)
|
19
|
+
blocks_per_part, rest = self.num_blocks.divmod(parts)
|
20
|
+
msg = "#{num_entries} entries into #{parts} parts, #{blocks_per_part} blocks of #{self.granularity} entries per part"
|
21
|
+
if rest > 0
|
22
|
+
msg += " (plus #{rest} extra blocks in last part)"
|
23
|
+
end
|
24
|
+
msg
|
25
|
+
end
|
26
|
+
|
27
|
+
def each_entry_in_parts(parts = 10)
|
28
|
+
blocks_per_part, rest = num_blocks.divmod(parts)
|
29
|
+
cur_part = 1
|
30
|
+
cur_block = 1
|
31
|
+
cur_entry = 0
|
32
|
+
block_entries = 0
|
33
|
+
part_entries = 0
|
34
|
+
File.foreach(self.filename, self.delimiter) do |entry|
|
35
|
+
cur_entry += 1
|
36
|
+
yield cur_part, entry
|
37
|
+
block_entries += 1
|
38
|
+
part_entries += 1
|
39
|
+
if block_entries == self.granularity
|
40
|
+
# End of this block
|
41
|
+
if cur_block == blocks_per_part and not cur_part == parts
|
42
|
+
# End of this part
|
43
|
+
cur_part += 1
|
44
|
+
cur_block = 1
|
45
|
+
else
|
46
|
+
cur_block += 1
|
47
|
+
end
|
48
|
+
block_entries = 0
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
protected
|
54
|
+
|
55
|
+
def count_entries
|
56
|
+
num_entries = 0
|
57
|
+
last_empty = false
|
58
|
+
File.foreach(self.filename, self.delimiter) do |entry|
|
59
|
+
last_empty = (entry == '')
|
60
|
+
num_entries += 1
|
61
|
+
end
|
62
|
+
num_entries -= 1 if last_empty
|
63
|
+
num_entries
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
data/spec/helper.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
|
2
|
+
require 'kfold/data_file'
|
3
|
+
|
4
|
+
describe Kfold::DataFile do
|
5
|
+
before(:each) do
|
6
|
+
@sample_file = File.join(File.dirname(__FILE__), 'sample_data_file.conll')
|
7
|
+
@df = Kfold::DataFile.new(@sample_file, "\n\n")
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should contain 11 entries" do
|
11
|
+
@df.num_entries.should == 11
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should yield consecutive part numbers" do
|
15
|
+
next_part_num = nil
|
16
|
+
@df.each_entry_in_parts(@df.num_entries) do |part_num, entry|
|
17
|
+
part_num.should == next_part_num unless next_part_num.nil?
|
18
|
+
next_part_num = part_num + 1
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should yield all the entries in the file" do
|
23
|
+
yield_count = 0
|
24
|
+
part_count = 0
|
25
|
+
last_part_num = nil
|
26
|
+
@df.each_entry_in_parts(1) do |part_num, entry|
|
27
|
+
yield_count += 1
|
28
|
+
if (part_num != last_part_num)
|
29
|
+
part_count += 1
|
30
|
+
last_part_num = part_num
|
31
|
+
end
|
32
|
+
end
|
33
|
+
yield_count.should == @df.num_entries
|
34
|
+
part_count.should == 1
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should yield one entry per part if so requested" do
|
38
|
+
yield_count = 0
|
39
|
+
part_count = 0
|
40
|
+
last_part_num = nil
|
41
|
+
@df.each_entry_in_parts(@df.num_entries) do |part_num, entry|
|
42
|
+
yield_count += 1
|
43
|
+
if (part_num != last_part_num)
|
44
|
+
part_count += 1
|
45
|
+
last_part_num = part_num
|
46
|
+
end
|
47
|
+
end
|
48
|
+
yield_count.should == @df.num_entries
|
49
|
+
part_count.should == @df.num_entries
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should yield the number of parts requested" do
|
53
|
+
part_count = 0
|
54
|
+
last_part_num = nil
|
55
|
+
@df.each_entry_in_parts(3) do |part_num, entry|
|
56
|
+
if (part_num != last_part_num)
|
57
|
+
part_count += 1
|
58
|
+
last_part_num = part_num
|
59
|
+
end
|
60
|
+
end
|
61
|
+
part_count.should == 3
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
@@ -0,0 +1,108 @@
|
|
1
|
+
1 tashiro _ NAME NAMEper _ 2 HD 2 HD
|
2
|
+
2 yasuko _ NAME NAMEper _ 3 COMP 3 COMP
|
3
|
+
3 desu _ PV PVfin u 0 ROOT 0 ROOT
|
4
|
+
4 . _ . . _ 3 PUNCT 3 PUNCT
|
5
|
+
|
6
|
+
1 hayakawa _ NAME NAMEper _ 2 HD 2 HD
|
7
|
+
2 ryou _ NAME NAMEper _ 3 COMP 3 COMP
|
8
|
+
3 desu _ PV PVfin u 0 ROOT 0 ROOT
|
9
|
+
4 . _ . . _ 3 PUNCT 3 PUNCT
|
10
|
+
|
11
|
+
1 hayakawa _ NAME NAMEper _ 2 COMP 2 COMP
|
12
|
+
2 saN _ P PNsf _ 6 ADJ 6 ADJ
|
13
|
+
3 ima _ N Ntmp _ 6 ADJ 6 ADJ
|
14
|
+
4 chotto _ ADV ADV _ 6 ADJ 6 ADJ
|
15
|
+
5 ojikaN _ N NN _ 6 SBJ 6 SBJ
|
16
|
+
6 yoroshii _ ADJ ADJifin _ 7 COMP 7 COMP
|
17
|
+
7 desu _ PV PVfin u 0 ROOT 0 ROOT
|
18
|
+
8 ka _ PS PSE _ 7 MRK 7 MRK
|
19
|
+
9 chotto _ ADV ADV _ 16 ADJ 16 ADJ
|
20
|
+
10 doitsu _ NAME NAMEloc _ 11 COMP 11 COMP
|
21
|
+
11 no _ P Pgen _ 12 ADJ 12 ADJ
|
22
|
+
12 shucchou _ N VN _ 13 COMP 13 COMP
|
23
|
+
13 no _ P Pgen _ 14 COMP 14 COMP
|
24
|
+
14 koto _ N NF _ 15 COMP 15 COMP
|
25
|
+
15 de _ P P _ 16 ADJ 16 ADJ
|
26
|
+
16 gosoudaN _ N VN _ 17 COMP 17 COMP
|
27
|
+
17 shitai _ VADJ VADJi _ 18 COMP 18 COMP
|
28
|
+
18 no _ N NF _ 19 COMP 19 COMP
|
29
|
+
19 desu _ PV PVfin u 20 COMP 20 COMP
|
30
|
+
20 ga _ PS PSSb _ 0 ROOT 0 ROOT
|
31
|
+
21 . _ . . _ 20 PUNCT 20 PUNCT
|
32
|
+
|
33
|
+
1 hai _ ITJ ITJ _ 0 ROOT 0 ROOT
|
34
|
+
2 . _ . . _ 1 PUNCT 1 PUNCT
|
35
|
+
|
36
|
+
1 daijoubu _ ADJ ADJ_n _ 2 COMP 2 COMP
|
37
|
+
2 desu _ PV PVfin u 3 COMP 3 COMP
|
38
|
+
3 ga _ PS PSSb _ 0 ROOT 0 ROOT
|
39
|
+
4 . _ . . _ 3 PUNCT 3 PUNCT
|
40
|
+
|
41
|
+
1 de _ CNJ CNJ _ 21 ADJ 21 ADJ
|
42
|
+
2 dekimashitara _ V Vcnd _ 9 ADJ 9 ADJ
|
43
|
+
3 koNkai _ N Ntmp _ 4 COMP 4 COMP
|
44
|
+
4 no _ P Pgen _ 5 ADJ 5 ADJ
|
45
|
+
5 shucchou _ N VN _ 7 SBJ 7 SBJ
|
46
|
+
6 juuichigatsu _ CD CDdate _ 7 COMP 7 COMP
|
47
|
+
7 ni _ P P _ 8 COMP 8 COMP
|
48
|
+
8 to _ P PQ _ 9 COMP 9 COMP
|
49
|
+
9 omotte _ V Vte _ 10 COMP 10 COMP
|
50
|
+
10 iru _ VAUX VAUXfin u 11 COMP 11 COMP
|
51
|
+
11 no _ N NF _ 12 COMP 12 COMP
|
52
|
+
12 desu _ PV PVfin u 13 COMP 13 COMP
|
53
|
+
13 ga _ PS PSSb _ 21 ADJ 21 ADJ
|
54
|
+
14 juuichigatsu _ CD CDdate _ 15 COMP 15 COMP
|
55
|
+
15 desu _ PV PVfin u 16 COMP 16 COMP
|
56
|
+
16 to _ PS PSSa _ 21 ADJ 21 ADJ
|
57
|
+
17 goyotei _ N NN _ 18 COMP 18 COMP
|
58
|
+
18 no _ P Pgen _ 19 COMP 19 COMP
|
59
|
+
19 hou _ N NF _ 21 SBJ 21 SBJ
|
60
|
+
20 ikaga _ ADV ADVwh _ 21 COMP 21 COMP
|
61
|
+
21 deshou _ PV PVfin u 0 ROOT 0 ROOT
|
62
|
+
22 ka _ PS PSE _ 21 MRK 21 MRK
|
63
|
+
23 . _ . . _ 22 PUNCT 22 PUNCT
|
64
|
+
|
65
|
+
1 juuichigatsu _ CD CDdate _ 2 COMP 2 COMP
|
66
|
+
2 desu _ PV PVfin u 0 ROOT 0 ROOT
|
67
|
+
3 ka _ PS PSE _ 2 MRK 2 MRK
|
68
|
+
4 . _ . . _ 3 PUNCT 3 PUNCT
|
69
|
+
|
70
|
+
1 juuichigatsu _ CD CDdate _ 7 ADJ 7 ADJ
|
71
|
+
2 wa _ P Pfoc _ 1 MRK 1 MRK
|
72
|
+
3 zeNhaN _ N Ntmp _ 7 ADJ 7 ADJ
|
73
|
+
4 nara _ P Pfoc _ 3 MRK 3 MRK
|
74
|
+
5 jikaN _ N NN _ 6 COMP 6 COMP
|
75
|
+
6 ga _ P Pnom _ 7 SBJ 7 SBJ
|
76
|
+
7 toreru _ V Vfin u 9 COMP 9 COMP
|
77
|
+
8 ka _ PS PSE _ 7 MRK 7 MRK
|
78
|
+
9 to _ P PQ _ 10 COMP 10 COMP
|
79
|
+
10 omoimasu _ V Vfin u 11 COMP 11 COMP
|
80
|
+
11 ga _ PS PSSb _ 0 ROOT 0 ROOT
|
81
|
+
12 . _ . . _ 11 PUNCT 11 PUNCT
|
82
|
+
|
83
|
+
1 tsuki _ N NN _ 2 COMP 2 COMP
|
84
|
+
2 no _ P Pgen _ 3 ADJ 3 ADJ
|
85
|
+
3 zeNhaN _ N Ntmp _ 5 ADJ 5 ADJ
|
86
|
+
4 nara _ P Pfoc _ 3 MRK 3 MRK
|
87
|
+
5 yoroshii _ ADJ ADJifin _ 6 COMP 6 COMP
|
88
|
+
6 desu _ PV PVfin u 0 ROOT 0 ROOT
|
89
|
+
7 ka _ PS PSE _ 6 MRK 6 MRK
|
90
|
+
8 . _ . . _ 7 PUNCT 7 PUNCT
|
91
|
+
|
92
|
+
1 hai _ ITJ ITJ _ 0 ROOT 0 ROOT
|
93
|
+
2 . _ . . _ 1 PUNCT 1 PUNCT
|
94
|
+
|
95
|
+
1 sou _ ADV ADVdem _ 2 COMP 2 COMP
|
96
|
+
2 shimasu _ V Vfin u 3 COMP 3 COMP
|
97
|
+
3 to _ PS PSSa _ 11 ADJ 11 ADJ
|
98
|
+
4 kokonoka _ CD CDdate _ 5 COMP 5 COMP
|
99
|
+
5 no _ P Pgen _ 7 ADJ 7 ADJ
|
100
|
+
6 getsuyou _ NT NT _ 7 COMP 7 COMP
|
101
|
+
7 hatsu _ N Nsf _ 11 SBJ 11 SBJ
|
102
|
+
8 toka _ P Pcnj _ 7 MRK 7 MRK
|
103
|
+
9 wa _ P Pfoc _ 7 MRK 7 MRK
|
104
|
+
10 ikaga _ ADV ADVwh _ 11 COMP 11 COMP
|
105
|
+
11 deshou _ PV PVfin u 0 ROOT 0 ROOT
|
106
|
+
12 ka _ PS PSE _ 11 MRK 11 MRK
|
107
|
+
13 . _ . . _ 12 PUNCT 12 PUNCT
|
108
|
+
|
metadata
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: kfold
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
version: "0.1"
|
9
|
+
platform: ruby
|
10
|
+
authors:
|
11
|
+
- "Christian Rish\xC3\xB8j"
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain:
|
15
|
+
- |
|
16
|
+
-----BEGIN CERTIFICATE-----
|
17
|
+
MIIDNjCCAh6gAwIBAgIBADANBgkqhkiG9w0BAQUFADBBMRIwEAYDVQQDDAljaHJp
|
18
|
+
c3RpYW4xFjAUBgoJkiaJk/IsZAEZFgZyaXNob2oxEzARBgoJkiaJk/IsZAEZFgNu
|
19
|
+
ZXQwHhcNMTAxMjMwMTAwMjUzWhcNMTExMjMwMTAwMjUzWjBBMRIwEAYDVQQDDAlj
|
20
|
+
aHJpc3RpYW4xFjAUBgoJkiaJk/IsZAEZFgZyaXNob2oxEzARBgoJkiaJk/IsZAEZ
|
21
|
+
FgNuZXQwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCg2y4Tc0otb09T
|
22
|
+
/UucTm3DbQilOfQwN9QCzjjnPT6071YeyHsEmv4zokUCBSW5n+H/D0tSAQobT6D+
|
23
|
+
APet6eFLGRW0L9MGsi7do5w+GY4B2nr6CGaouJaoj7XvWKqgigZ6YwfI9RAdpVNT
|
24
|
+
UhF5VeUs9KZ8dMgc9YeezrB9yZMNHM9nDKDEH6bK53xH8OeUkaUBhASEYJ07zMj+
|
25
|
+
hioKQQB/BUCAxpc5ElE9dmdJuinHFqD7KmqU7KSp/0wHvvgT5sqzS32xzlovOSxK
|
26
|
+
GDHt9mtVmQiD3fBCfPJhysaRSUyaUA7jEFljgRTxuH1GWLyGE24/c6zVBknLPSC+
|
27
|
+
hFW0Ib3/AgMBAAGjOTA3MAkGA1UdEwQCMAAwHQYDVR0OBBYEFNtKnxiLUpy9a406
|
28
|
+
diOk3lm5ISTEMAsGA1UdDwQEAwIEsDANBgkqhkiG9w0BAQUFAAOCAQEAiMhO6NXU
|
29
|
+
f/uTInOl2fFxIQsakyCXoWE2u7U2NLLW5R5DCYFK/EH+OYsum1Khu+Jt/n5loV7e
|
30
|
+
B4QlCbwlhwztW7sWA/sQQoLQsroZi2kmYwnkYLeqRgUre3E+YpD3S9QAWIFYpkBZ
|
31
|
+
b9mIToqxb0m+WiLCysrg3sfDymrfuNDdtQcVPcJ5W2+Mj6LJJN65bAvqqExVpr63
|
32
|
+
qbn/bmiocEIbQUsPSVuw+FSIiR6be/Ty3QpWQgxXnbHsfHFWPpADuOwTYPLxWqBg
|
33
|
+
4izI+lCFvIjAaa5WjKVW8PV3XIvgr4+/ESIzs1OOVW7ktQNwu7GXt/kR2KQH9FRC
|
34
|
+
VSyVlp5OZP6OoA==
|
35
|
+
-----END CERTIFICATE-----
|
36
|
+
|
37
|
+
date: 2010-12-30 00:00:00 +07:00
|
38
|
+
default_executable:
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: commander
|
42
|
+
prerelease: false
|
43
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
44
|
+
none: false
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
segments:
|
49
|
+
- 0
|
50
|
+
version: "0"
|
51
|
+
type: :runtime
|
52
|
+
version_requirements: *id001
|
53
|
+
description: Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)
|
54
|
+
email: christian@rishoj.net
|
55
|
+
executables:
|
56
|
+
- kfold
|
57
|
+
extensions: []
|
58
|
+
|
59
|
+
extra_rdoc_files:
|
60
|
+
- CHANGELOG
|
61
|
+
- LICENSE
|
62
|
+
- README
|
63
|
+
- bin/kfold
|
64
|
+
- lib/kfold.rb
|
65
|
+
- lib/kfold/data_file.rb
|
66
|
+
files:
|
67
|
+
- CHANGELOG
|
68
|
+
- LICENSE
|
69
|
+
- Manifest
|
70
|
+
- README
|
71
|
+
- Rakefile
|
72
|
+
- bin/kfold
|
73
|
+
- lib/kfold.rb
|
74
|
+
- lib/kfold/data_file.rb
|
75
|
+
- spec/helper.rb
|
76
|
+
- spec/kfold/data_file_spec.rb
|
77
|
+
- spec/kfold/sample_data_file.conll
|
78
|
+
- kfold.gemspec
|
79
|
+
has_rdoc: true
|
80
|
+
homepage: http://github.com/crishoj/kfold
|
81
|
+
licenses: []
|
82
|
+
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options:
|
85
|
+
- --line-numbers
|
86
|
+
- --inline-source
|
87
|
+
- --title
|
88
|
+
- Kfold
|
89
|
+
- --main
|
90
|
+
- README
|
91
|
+
require_paths:
|
92
|
+
- lib
|
93
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
segments:
|
99
|
+
- 0
|
100
|
+
version: "0"
|
101
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
|
+
none: false
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
segments:
|
107
|
+
- 1
|
108
|
+
- 2
|
109
|
+
version: "1.2"
|
110
|
+
requirements: []
|
111
|
+
|
112
|
+
rubyforge_project: kfold
|
113
|
+
rubygems_version: 1.3.7
|
114
|
+
signing_key:
|
115
|
+
specification_version: 3
|
116
|
+
summary: Create k-fold splits from datafiles (useful for cross-validation in supervised machine learning)
|
117
|
+
test_files: []
|
118
|
+
|
metadata.gz.sig
ADDED